aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.hgtags5
-rw-r--r--.rootkeys938
-rw-r--r--BitKeeper/etc/ignore90
-rw-r--r--BitKeeper/etc/logging_ok27
-rw-r--r--Config.mk33
-rw-r--r--Makefile22
-rw-r--r--buildconfigs/Rules.mk6
-rw-r--r--buildconfigs/mk.linux-2.4-xen05
-rw-r--r--buildconfigs/mk.linux-2.4-xenU5
-rw-r--r--buildconfigs/mk.linux-2.6-xen06
-rw-r--r--buildconfigs/mk.linux-2.6-xenU6
-rw-r--r--docs/misc/VMX_changes.txt90
-rw-r--r--docs/misc/crashdb.txt50
-rw-r--r--docs/misc/grant-tables.txt325
-rw-r--r--docs/misc/sedf_scheduler_mini-HOWTO.txt44
-rw-r--r--docs/src/interface.tex18
-rw-r--r--docs/src/user.tex75
-rw-r--r--extras/mini-os/Makefile42
-rw-r--r--extras/mini-os/h/hypervisor.h92
-rw-r--r--extras/mini-os/h/lib.h17
-rw-r--r--extras/mini-os/h/mm.h64
-rw-r--r--extras/mini-os/h/os.h182
-rw-r--r--extras/mini-os/h/types.h11
-rw-r--r--extras/mini-os/head.S18
-rw-r--r--extras/mini-os/kernel.c17
-rw-r--r--extras/mini-os/lib/math.c4
-rw-r--r--extras/mini-os/lib/printf.c4
-rw-r--r--extras/mini-os/minios-x86_32.lds (renamed from extras/mini-os/minios.lds)0
-rw-r--r--extras/mini-os/minios-x86_64.lds54
-rw-r--r--extras/mini-os/mm.c2
-rw-r--r--extras/mini-os/traps.c208
-rw-r--r--extras/mini-os/x86_32.S (renamed from extras/mini-os/entry.S)24
-rw-r--r--extras/mini-os/x86_64.S222
-rw-r--r--freebsd-5.3-xen-sparse/conf/files.i386-xen11
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c76
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c171
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c136
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s72
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c3
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c367
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c3
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s2
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c254
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c284
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c215
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c10
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c8
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c286
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h22
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/frame.h129
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h71
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h2
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h201
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h12
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/pmap.h9
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h105
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h2
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h111
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h3
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h14
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h132
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c283
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c18
-rw-r--r--freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c20
-rw-r--r--freebsd-5.3-xen-sparse/kern/kern_shutdown.c635
-rw-r--r--linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c513
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/Makefile2
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/config.in16
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/defconfig-xen03
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU2
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c50
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile4
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/head.S4
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c5
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/process.c50
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c33
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c11
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/mm/fault.c4
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/mm/init.c13
-rw-r--r--linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c19
-rw-r--r--linux-2.4.30-xen-sparse/fs/exec.c1179
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/desc.h8
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h4
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h45
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/msr.h138
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/page.h24
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h25
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h36
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h53
-rw-r--r--linux-2.4.30-xen-sparse/include/asm-xen/system.h2
-rwxr-xr-xlinux-2.4.30-xen-sparse/mkbuildtree15
-rw-r--r--linux-2.4.30-xen-sparse/mm/highmem.c1
-rw-r--r--linux-2.4.30-xen-sparse/mm/memory.c36
-rw-r--r--linux-2.4.30-xen-sparse/mm/mremap.c2
-rw-r--r--linux-2.4.30-xen-sparse/mm/swapfile.c1269
-rw-r--r--linux-2.4.30-xen-sparse/mm/vmalloc.c385
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/Kconfig60
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 (renamed from linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig)79
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_641023
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 (renamed from linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig)29
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64897
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig336
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/Makefile1
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile17
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile13
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c906
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c83
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c3
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S84
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S3
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c2611
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c116
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c297
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c16
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c1115
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c85
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c171
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c63
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c624
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c1437
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c122
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c40
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile12
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile4
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c9
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c423
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c25
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c26
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c226
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c232
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile7
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c81
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c1050
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile6
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c133
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c168
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c8
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c390
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c39
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c16
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig455
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile92
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile67
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile16
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c200
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c70
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/e820.c533
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S1119
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S207
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head64.c132
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c49
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c2051
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ioport.c63
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c105
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c267
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c954
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c203
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c96
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c753
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c1378
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c344
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c493
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c411
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c958
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c972
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c190
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c223
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile31
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c591
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c966
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c466
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/pageattr.c247
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile41
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS22
-rw-r--r--linux-2.6.11-xen-sparse/drivers/acpi/tables.c615
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/Makefile3
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c50
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c420
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h48
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c34
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c23
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c452
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c562
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h10
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c66
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile3
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c87
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h253
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c540
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c451
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c478
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/console/console.c94
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c17
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile2
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/common.h3
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/control.c29
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c65
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/loopback.c16
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c156
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c36
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c59
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h85
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c61
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c252
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c1070
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c1738
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h183
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/bugs.h214
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h5
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h10
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h520
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h21
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h7
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h55
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h26
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h56
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h272
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h22
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h28
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h56
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h6
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h8
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h250
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h57
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h102
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h9
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h884
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h27
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h41
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/desc.h240
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h136
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h114
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/floppy.h204
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h505
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/io.h365
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h36
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h30
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h138
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_time.h122
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h48
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h47
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h5
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h55
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h76
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/page.h229
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/param.h22
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pci.h148
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pda.h (renamed from xen/include/asm-x86/pda.h)54
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h171
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h527
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h474
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h119
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/segment.h47
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h154
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h403
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/timer.h64
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h97
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/vga.h20
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h328
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h10
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h72
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h536
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/multicall.h107
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/synch_bitops.h2
-rw-r--r--linux-2.6.11-xen-sparse/include/linux/skbuff.h1184
-rw-r--r--linux-2.6.11-xen-sparse/mm/mmap.c2108
-rw-r--r--linux-2.6.11-xen-sparse/net/core/dev.c3389
-rw-r--r--linux-2.6.11-xen-sparse/net/core/skbuff.c1523
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S2
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c8
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h35
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c15
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c16
-rw-r--r--netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c12
-rw-r--r--patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch656
-rw-r--r--patches/linux-2.6.11/net-csum.patch22
-rw-r--r--patches/linux-2.6.11/nettel.patch30
-rw-r--r--patches/linux-2.6.11/x86_64-linux.patch68
-rw-r--r--tools/Makefile23
-rw-r--r--tools/Rules.mk15
-rw-r--r--tools/blktap/Makefile144
-rw-r--r--tools/blktap/README149
-rw-r--r--tools/blktap/README-PARALLAX177
-rw-r--r--tools/blktap/blkdump.c131
-rw-r--r--tools/blktap/blktaplib.c564
-rw-r--r--tools/blktap/blktaplib.h101
-rwxr-xr-xtools/blktap/block-async.c393
-rwxr-xr-xtools/blktap/block-async.h69
-rw-r--r--tools/blktap/blockstore.c1350
-rw-r--r--tools/blktap/blockstore.h134
-rw-r--r--tools/blktap/blockstored.c276
-rw-r--r--tools/blktap/bstest.c191
-rw-r--r--tools/blktap/parallax.c611
-rw-r--r--tools/blktap/radix.c631
-rw-r--r--tools/blktap/radix.h45
-rwxr-xr-xtools/blktap/requests-async.c762
-rwxr-xr-xtools/blktap/requests-async.h29
-rw-r--r--tools/blktap/snaplog.c238
-rw-r--r--tools/blktap/snaplog.h61
-rw-r--r--tools/blktap/vdi.c367
-rw-r--r--tools/blktap/vdi.h55
-rw-r--r--tools/blktap/vdi_create.c52
-rw-r--r--tools/blktap/vdi_fill.c81
-rw-r--r--tools/blktap/vdi_list.c47
-rw-r--r--tools/blktap/vdi_snap.c43
-rw-r--r--tools/blktap/vdi_snap_delete.c48
-rw-r--r--tools/blktap/vdi_snap_list.c82
-rw-r--r--tools/blktap/vdi_tree.c132
-rw-r--r--tools/blktap/vdi_unittest.c184
-rw-r--r--tools/blktap/vdi_validate.c97
-rwxr-xr-xtools/check/check_twisted46
-rw-r--r--tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in308
-rwxr-xr-xtools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure4650
-rw-r--r--tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in121
-rw-r--r--tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.srv75
-rw-r--r--tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c556
-rw-r--r--tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c639
-rwxr-xr-xtools/debugger/gdb/gdb-6.2.1-xen-sparse/mkbuildtree115
-rwxr-xr-xtools/debugger/gdb/gdbbuild23
-rw-r--r--tools/debugger/libxendebug/Makefile72
-rw-r--r--tools/debugger/libxendebug/list.h186
-rw-r--r--tools/debugger/libxendebug/xendebug.c599
-rw-r--r--tools/debugger/libxendebug/xendebug.h78
-rw-r--r--tools/debugger/pdb/Domain.ml63
-rw-r--r--tools/debugger/pdb/Domain.mli38
-rw-r--r--tools/debugger/pdb/Intel.ml71
-rw-r--r--tools/debugger/pdb/Makefile56
-rw-r--r--tools/debugger/pdb/OCamlMakefile1149
-rw-r--r--tools/debugger/pdb/PDB.ml180
-rw-r--r--tools/debugger/pdb/Process.ml39
-rw-r--r--tools/debugger/pdb/Process.mli20
-rw-r--r--tools/debugger/pdb/Util.ml153
-rw-r--r--tools/debugger/pdb/debugger.ml315
-rw-r--r--tools/debugger/pdb/evtchn.ml32
-rw-r--r--tools/debugger/pdb/evtchn.mli14
-rw-r--r--tools/debugger/pdb/pdb_caml_xc.c732
-rw-r--r--tools/debugger/pdb/pdb_xen.c93
-rw-r--r--tools/debugger/pdb/server.ml219
-rw-r--r--tools/examples/Makefile3
-rwxr-xr-xtools/examples/block-enbd4
-rwxr-xr-xtools/examples/block-file4
-rw-r--r--tools/examples/bochsrc20
-rwxr-xr-xtools/examples/init.d/xend2
-rwxr-xr-xtools/examples/init.d/xendomains12
-rw-r--r--tools/examples/mem-map.sxp10
-rwxr-xr-xtools/examples/network37
-rw-r--r--tools/examples/network-nat14
-rwxr-xr-xtools/examples/vif-bridge6
-rw-r--r--tools/examples/vif-nat6
-rwxr-xr-xtools/examples/vif-route6
-rw-r--r--tools/examples/xmexample.vmx148
-rw-r--r--tools/examples/xmexample13
-rw-r--r--tools/examples/xmexample24
-rw-r--r--tools/firmware/Makefile34
-rw-r--r--tools/firmware/README88
-rw-r--r--tools/firmware/rombios/Makefile58
-rw-r--r--tools/firmware/rombios/apmbios.S329
-rw-r--r--tools/firmware/rombios/biossums.c478
-rwxr-xr-xtools/firmware/rombios/makesym.perl31
-rw-r--r--tools/firmware/rombios/rombios.c10825
-rw-r--r--tools/firmware/rombios/rombios.diffs206
-rw-r--r--tools/firmware/vgabios/BUGS3
-rw-r--r--tools/firmware/vgabios/COPYING504
-rw-r--r--tools/firmware/vgabios/ChangeLog1060
-rw-r--r--tools/firmware/vgabios/Makefile77
-rw-r--r--tools/firmware/vgabios/Notes11
-rw-r--r--tools/firmware/vgabios/README191
-rw-r--r--tools/firmware/vgabios/TODO28
-rw-r--r--tools/firmware/vgabios/biossums.c200
-rw-r--r--tools/firmware/vgabios/clext.c1587
-rwxr-xr-xtools/firmware/vgabios/dataseghack23
-rw-r--r--tools/firmware/vgabios/vbe.c1068
-rw-r--r--tools/firmware/vgabios/vbe.h302
-rw-r--r--tools/firmware/vgabios/vbe_display_api.txt227
-rw-r--r--tools/firmware/vgabios/vbetables.h1282
-rw-r--r--tools/firmware/vgabios/vgabios.c3608
-rw-r--r--tools/firmware/vgabios/vgabios.h47
-rw-r--r--tools/firmware/vgabios/vgafonts.h784
-rw-r--r--tools/firmware/vgabios/vgatables.h318
-rw-r--r--tools/firmware/vmxassist/Makefile84
-rw-r--r--tools/firmware/vmxassist/TODO8
-rw-r--r--tools/firmware/vmxassist/gen.c52
-rw-r--r--tools/firmware/vmxassist/head.S162
-rw-r--r--tools/firmware/vmxassist/machine.h203
-rwxr-xr-xtools/firmware/vmxassist/mkhex26
-rw-r--r--tools/firmware/vmxassist/setup.c338
-rw-r--r--tools/firmware/vmxassist/trap.S189
-rw-r--r--tools/firmware/vmxassist/util.c364
-rw-r--r--tools/firmware/vmxassist/util.h41
-rw-r--r--tools/firmware/vmxassist/vm86.c956
-rw-r--r--tools/firmware/vmxassist/vm86.h67
-rw-r--r--tools/firmware/vmxassist/vmxassist.ld34
-rw-r--r--tools/firmware/vmxassist/vmxloader.c110
-rw-r--r--tools/ioemu/COPYING339
-rw-r--r--tools/ioemu/COPYING.LIB504
-rw-r--r--tools/ioemu/Changelog295
-rw-r--r--tools/ioemu/Makefile116
-rw-r--r--tools/ioemu/README61
-rw-r--r--tools/ioemu/TODO66
-rw-r--r--tools/ioemu/VERSION1
-rw-r--r--tools/ioemu/block-cloop.c167
-rw-r--r--tools/ioemu/block-cow.c263
-rw-r--r--tools/ioemu/block-qcow.c677
-rw-r--r--tools/ioemu/block-vmdk.c279
-rw-r--r--tools/ioemu/block.c548
-rw-r--r--tools/ioemu/block_int.h77
-rw-r--r--tools/ioemu/bswap.h202
-rwxr-xr-xtools/ioemu/configure583
-rw-r--r--tools/ioemu/console.c731
-rw-r--r--tools/ioemu/cpu-all.h688
-rw-r--r--tools/ioemu/cpu-defs.h95
-rw-r--r--tools/ioemu/cpu.h69
-rw-r--r--tools/ioemu/create_keysym_header.sh77
-rw-r--r--tools/ioemu/exec-all.h579
-rw-r--r--tools/ioemu/exec.c461
-rw-r--r--tools/ioemu/hw/adb.c386
-rw-r--r--tools/ioemu/hw/adlib.c313
-rw-r--r--tools/ioemu/hw/cirrus_vga.c3115
-rw-r--r--tools/ioemu/hw/cirrus_vga_rop.h78
-rw-r--r--tools/ioemu/hw/cirrus_vga_rop2.h260
-rw-r--r--tools/ioemu/hw/cuda.c614
-rw-r--r--tools/ioemu/hw/dma.c535
-rw-r--r--tools/ioemu/hw/fdc.c1719
-rw-r--r--tools/ioemu/hw/fmopl.c1390
-rw-r--r--tools/ioemu/hw/fmopl.h174
-rw-r--r--tools/ioemu/hw/i8254.c525
-rw-r--r--tools/ioemu/hw/i8259.c529
-rw-r--r--tools/ioemu/hw/ide.c2318
-rw-r--r--tools/ioemu/hw/iommu.c218
-rw-r--r--tools/ioemu/hw/lance.c468
-rw-r--r--tools/ioemu/hw/m48t08.c391
-rw-r--r--tools/ioemu/hw/m48t08.h12
-rw-r--r--tools/ioemu/hw/m48t59.c602
-rw-r--r--tools/ioemu/hw/m48t59.h13
-rw-r--r--tools/ioemu/hw/magic-load.c326
-rw-r--r--tools/ioemu/hw/mc146818rtc.c463
-rw-r--r--tools/ioemu/hw/ne2000.c684
-rw-r--r--tools/ioemu/hw/openpic.c1023
-rw-r--r--tools/ioemu/hw/pc.c573
-rw-r--r--tools/ioemu/hw/pci.c1524
-rw-r--r--tools/ioemu/hw/pckbd.c919
-rw-r--r--tools/ioemu/hw/port-e9.c47
-rw-r--r--tools/ioemu/hw/ppc.c462
-rw-r--r--tools/ioemu/hw/ppc_chrp.c233
-rw-r--r--tools/ioemu/hw/ppc_prep.c548
-rw-r--r--tools/ioemu/hw/sb16.c1268
-rw-r--r--tools/ioemu/hw/sched.c268
-rw-r--r--tools/ioemu/hw/serial.c279
-rw-r--r--tools/ioemu/hw/sun4m.c113
-rw-r--r--tools/ioemu/hw/tcx.c207
-rw-r--r--tools/ioemu/hw/timer.c97
-rw-r--r--tools/ioemu/hw/vga.c2059
-rw-r--r--tools/ioemu/hw/vga_int.h168
-rw-r--r--tools/ioemu/hw/vga_template.h519
-rw-r--r--tools/ioemu/keyboard_rdesktop.c165
-rw-r--r--tools/ioemu/keymaps/ar98
-rw-r--r--tools/ioemu/keymaps/common157
-rw-r--r--tools/ioemu/keymaps/convert-map63
-rw-r--r--tools/ioemu/keymaps/da120
-rw-r--r--tools/ioemu/keymaps/de114
-rw-r--r--tools/ioemu/keymaps/de-ch169
-rw-r--r--tools/ioemu/keymaps/en-gb119
-rw-r--r--tools/ioemu/keymaps/en-us35
-rw-r--r--tools/ioemu/keymaps/es105
-rw-r--r--tools/ioemu/keymaps/et86
-rw-r--r--tools/ioemu/keymaps/fi124
-rw-r--r--tools/ioemu/keymaps/fo77
-rw-r--r--tools/ioemu/keymaps/fr181
-rw-r--r--tools/ioemu/keymaps/fr-be140
-rw-r--r--tools/ioemu/keymaps/fr-ca50
-rw-r--r--tools/ioemu/keymaps/fr-ch114
-rw-r--r--tools/ioemu/keymaps/hr125
-rw-r--r--tools/ioemu/keymaps/hu115
-rw-r--r--tools/ioemu/keymaps/is140
-rw-r--r--tools/ioemu/keymaps/it115
-rw-r--r--tools/ioemu/keymaps/ja104
-rw-r--r--tools/ioemu/keymaps/lt57
-rw-r--r--tools/ioemu/keymaps/lv128
-rw-r--r--tools/ioemu/keymaps/mk101
-rw-r--r--tools/ioemu/keymaps/modifiers17
-rw-r--r--tools/ioemu/keymaps/nl60
-rw-r--r--tools/ioemu/keymaps/nl-be3
-rw-r--r--tools/ioemu/keymaps/no119
-rw-r--r--tools/ioemu/keymaps/pl122
-rw-r--r--tools/ioemu/keymaps/pt113
-rw-r--r--tools/ioemu/keymaps/pt-br69
-rw-r--r--tools/ioemu/keymaps/ru109
-rw-r--r--tools/ioemu/keymaps/sl110
-rw-r--r--tools/ioemu/keymaps/sv82
-rw-r--r--tools/ioemu/keymaps/th131
-rw-r--r--tools/ioemu/keymaps/tr123
-rw-r--r--tools/ioemu/main.c250
-rw-r--r--tools/ioemu/monitor.c282
-rw-r--r--tools/ioemu/osdep.c499
-rw-r--r--tools/ioemu/osdep.h50
-rw-r--r--tools/ioemu/path.c147
-rw-r--r--tools/ioemu/qemu-binfmt-conf.sh34
-rw-r--r--tools/ioemu/qemu-img.c698
-rw-r--r--tools/ioemu/readline.c424
-rw-r--r--tools/ioemu/sdl.c605
-rw-r--r--tools/ioemu/target-i386-dm/Makefile399
-rwxr-xr-xtools/ioemu/target-i386-dm/device-model91
-rw-r--r--tools/ioemu/target-i386-dm/helper2.c415
-rwxr-xr-xtools/ioemu/target-i386-dm/qemu-ifup10
-rw-r--r--tools/ioemu/target-i386-dm/qemu-vgaram-bin.gzbin0 -> 3338 bytes
-rw-r--r--tools/ioemu/thunk.c243
-rw-r--r--tools/ioemu/thunk.h158
-rw-r--r--tools/ioemu/vgafont.h4611
-rw-r--r--tools/ioemu/vl.c3000
-rw-r--r--tools/ioemu/vl.h787
-rw-r--r--tools/ioemu/vnc.c549
-rw-r--r--tools/ioemu/x86_32.ld140
-rw-r--r--tools/ioemu/x86_64.ld171
-rw-r--r--tools/libxc/Makefile35
-rw-r--r--tools/libxc/linux_boot_params.h165
-rw-r--r--[-rwxr-xr-x]tools/libxc/plan9a.out.h0
-rw-r--r--tools/libxc/xc.h231
-rw-r--r--tools/libxc/xc_atropos.c51
-rw-r--r--tools/libxc/xc_core.c116
-rw-r--r--tools/libxc/xc_domain.c139
-rw-r--r--tools/libxc/xc_evtchn.c18
-rw-r--r--tools/libxc/xc_gnttab.c141
-rw-r--r--tools/libxc/xc_io.c43
-rw-r--r--tools/libxc/xc_io.h71
-rw-r--r--tools/libxc/xc_linux_build.c746
-rw-r--r--tools/libxc/xc_linux_restore.c396
-rw-r--r--tools/libxc/xc_linux_save.c388
-rw-r--r--tools/libxc/xc_load_bin.c299
-rw-r--r--tools/libxc/xc_load_elf.c310
-rw-r--r--tools/libxc/xc_misc.c61
-rw-r--r--tools/libxc/xc_physdev.c12
-rw-r--r--[-rwxr-xr-x]tools/libxc/xc_plan9_build.c191
-rw-r--r--tools/libxc/xc_private.c243
-rw-r--r--tools/libxc/xc_private.h152
-rw-r--r--tools/libxc/xc_ptrace.c396
-rw-r--r--tools/libxc/xc_ptrace_core.c295
-rw-r--r--tools/libxc/xc_rrobin.c37
-rw-r--r--tools/libxc/xc_sedf.c51
-rw-r--r--tools/libxc/xc_vmx_build.c758
-rw-r--r--tools/misc/Makefile14
-rw-r--r--tools/misc/cpuperf/Makefile51
-rw-r--r--tools/misc/cpuperf/README.txt371
-rw-r--r--tools/misc/cpuperf/cpuperf.c301
-rw-r--r--tools/misc/cpuperf/cpuperf_perfcntr.h41
-rw-r--r--tools/misc/cpuperf/cpuperf_xeno.h38
-rw-r--r--tools/misc/cpuperf/module/Makefile16
-rw-r--r--tools/misc/cpuperf/module/perfcntr.c730
-rw-r--r--tools/misc/cpuperf/p4perf.h382
-rw-r--r--tools/misc/mbootpack/GPL340
-rw-r--r--tools/misc/mbootpack/Makefile74
-rw-r--r--tools/misc/mbootpack/README77
-rw-r--r--tools/misc/mbootpack/bin2c.c356
-rw-r--r--tools/misc/mbootpack/bootsect.S136
-rw-r--r--tools/misc/mbootpack/buildimage.c174
-rw-r--r--tools/misc/mbootpack/mb_header.h90
-rw-r--r--tools/misc/mbootpack/mb_info.h217
-rw-r--r--tools/misc/mbootpack/mbootpack.c703
-rw-r--r--tools/misc/mbootpack/mbootpack.h91
-rw-r--r--tools/misc/mbootpack/setup.S1064
-rw-r--r--tools/misc/netfix1
-rw-r--r--tools/misc/xc_shadow.c70
-rw-r--r--tools/misc/xend90
-rw-r--r--tools/misc/xenperf.c29
-rwxr-xr-xtools/misc/xensv136
-rwxr-xr-xtools/misc/xm1
-rw-r--r--tools/pygrub/Makefile18
-rw-r--r--tools/pygrub/README15
-rw-r--r--tools/pygrub/setup.py37
-rw-r--r--tools/pygrub/src/GrubConf.py229
-rw-r--r--tools/pygrub/src/__init__.py (renamed from tools/python/xen/xend/util.py)0
-rw-r--r--tools/pygrub/src/fsys/__init__.py64
-rw-r--r--tools/pygrub/src/fsys/ext2/__init__.py38
-rw-r--r--tools/pygrub/src/fsys/ext2/ext2module.c365
-rw-r--r--tools/pygrub/src/fsys/ext2/test.py15
-rw-r--r--tools/pygrub/src/fsys/reiser/__init__.py39
-rw-r--r--tools/pygrub/src/fsys/reiser/reisermodule.c345
-rw-r--r--tools/pygrub/src/pygrub278
-rw-r--r--tools/python/setup.py21
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c599
-rw-r--r--tools/python/xen/lowlevel/xs/xs.c617
-rw-r--r--tools/python/xen/lowlevel/xu/xu.c1031
-rw-r--r--tools/python/xen/sv/CreateDomain.py163
-rw-r--r--tools/python/xen/sv/Daemon.py110
-rwxr-xr-xtools/python/xen/sv/DomInfo.py149
-rwxr-xr-xtools/python/xen/sv/DomList.py81
-rwxr-xr-xtools/python/xen/sv/GenTabbed.py131
-rwxr-xr-xtools/python/xen/sv/HTMLBase.py63
-rwxr-xr-xtools/python/xen/sv/Main.py113
-rw-r--r--tools/python/xen/sv/MigrateDomain.py72
-rwxr-xr-xtools/python/xen/sv/NodeInfo.py64
-rw-r--r--tools/python/xen/sv/RestoreDomain.py46
-rw-r--r--tools/python/xen/sv/SaveDomain.py59
-rwxr-xr-xtools/python/xen/sv/TabView.py49
-rwxr-xr-xtools/python/xen/sv/Wizard.py269
-rwxr-xr-xtools/python/xen/sv/__init__.py1
-rw-r--r--tools/python/xen/sv/params.py3
-rwxr-xr-xtools/python/xen/sv/util.py123
-rw-r--r--tools/python/xen/util/blkif.py84
-rw-r--r--tools/python/xen/util/console_client.py28
-rw-r--r--tools/python/xen/util/ip.py82
-rw-r--r--tools/python/xen/util/mac.py11
-rw-r--r--tools/python/xen/util/memmap.py41
-rw-r--r--tools/python/xen/util/process.py37
-rw-r--r--tools/python/xen/util/xpopen.py169
-rw-r--r--tools/python/xen/web/SrvBase.py75
-rw-r--r--tools/python/xen/web/SrvDir.py (renamed from tools/python/xen/xend/server/SrvDir.py)62
-rw-r--r--tools/python/xen/web/__init__.py1
-rw-r--r--tools/python/xen/web/connection.py398
-rw-r--r--tools/python/xen/web/http.py514
-rw-r--r--tools/python/xen/web/httpserver.py342
-rw-r--r--tools/python/xen/web/protocol.py126
-rw-r--r--tools/python/xen/web/reactor.py2
-rw-r--r--tools/python/xen/web/resource.py91
-rw-r--r--tools/python/xen/web/static.py45
-rw-r--r--tools/python/xen/web/tcp.py90
-rw-r--r--tools/python/xen/web/unix.py81
-rw-r--r--tools/python/xen/xend/Blkctl.py14
-rw-r--r--tools/python/xen/xend/EventServer.py98
-rw-r--r--tools/python/xen/xend/EventTypes.py34
-rw-r--r--tools/python/xen/xend/PrettyPrint.py13
-rw-r--r--tools/python/xen/xend/Vifctl.py10
-rw-r--r--tools/python/xen/xend/XendAsynchProtocol.py94
-rw-r--r--tools/python/xen/xend/XendBootloader.py94
-rw-r--r--tools/python/xen/xend/XendCheckpoint.py141
-rw-r--r--tools/python/xen/xend/XendClient.py161
-rw-r--r--tools/python/xen/xend/XendConsole.py26
-rw-r--r--tools/python/xen/xend/XendDB.py37
-rw-r--r--tools/python/xen/xend/XendDomain.py815
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py1321
-rw-r--r--tools/python/xen/xend/XendMigrate.py555
-rw-r--r--tools/python/xen/xend/XendNode.py6
-rw-r--r--tools/python/xen/xend/XendProtocol.py40
-rw-r--r--tools/python/xen/xend/XendRoot.py95
-rw-r--r--tools/python/xen/xend/XendVnet.py22
-rw-r--r--tools/python/xen/xend/encode.py4
-rw-r--r--tools/python/xen/xend/image.py339
-rw-r--r--tools/python/xen/xend/scheduler.py24
-rw-r--r--tools/python/xen/xend/server/SrvBase.py185
-rw-r--r--tools/python/xen/xend/server/SrvConsole.py30
-rw-r--r--tools/python/xen/xend/server/SrvConsoleDir.py26
-rw-r--r--tools/python/xen/xend/server/SrvDaemon.py515
-rw-r--r--tools/python/xen/xend/server/SrvDeviceDir.py9
-rw-r--r--tools/python/xen/xend/server/SrvDmesg.py24
-rw-r--r--tools/python/xen/xend/server/SrvDomain.py171
-rw-r--r--tools/python/xen/xend/server/SrvDomainDir.py51
-rw-r--r--tools/python/xen/xend/server/SrvEventDir.py41
-rw-r--r--tools/python/xen/xend/server/SrvNode.py44
-rw-r--r--tools/python/xen/xend/server/SrvRoot.py8
-rw-r--r--tools/python/xen/xend/server/SrvServer.py51
-rw-r--r--tools/python/xen/xend/server/SrvVnetDir.py26
-rw-r--r--tools/python/xen/xend/server/SrvXendLog.py9
-rwxr-xr-xtools/python/xen/xend/server/blkif.py655
-rwxr-xr-xtools/python/xen/xend/server/channel.py775
-rwxr-xr-xtools/python/xen/xend/server/console.py466
-rwxr-xr-xtools/python/xen/xend/server/controller.py865
-rw-r--r--tools/python/xen/xend/server/domain.py58
-rw-r--r--tools/python/xen/xend/server/event.py218
-rw-r--r--tools/python/xen/xend/server/messages.py121
-rwxr-xr-xtools/python/xen/xend/server/netif.py506
-rw-r--r--tools/python/xen/xend/server/params.py37
-rw-r--r--tools/python/xen/xend/server/pciif.py59
-rw-r--r--tools/python/xen/xend/server/relocate.py139
-rw-r--r--tools/python/xen/xend/server/usbif.py350
-rw-r--r--tools/python/xen/xend/sxp.py3
-rw-r--r--tools/python/xen/xend/uuid.py65
-rw-r--r--tools/python/xen/xend/xenstore/__init__.py2
-rw-r--r--tools/python/xen/xend/xenstore/xsnode.py382
-rw-r--r--tools/python/xen/xend/xenstore/xsobj.py522
-rw-r--r--tools/python/xen/xend/xenstore/xsresource.py136
-rw-r--r--tools/python/xen/xm/create.py111
-rw-r--r--tools/python/xen/xm/main.py181
-rw-r--r--tools/python/xen/xm/migrate.py3
-rwxr-xr-xtools/sv/Main.rpy3
-rwxr-xr-xtools/sv/Makefile34
-rw-r--r--tools/sv/images/destroy.pngbin2408 -> 0 bytes
-rw-r--r--tools/sv/images/finish.pngbin1189 -> 0 bytes
-rw-r--r--tools/sv/images/left-end-highlight.jpgbin535 -> 0 bytes
-rw-r--r--tools/sv/images/left-end-no-highlight.jpgbin440 -> 0 bytes
-rw-r--r--tools/sv/images/middle-highlight.jpgbin376 -> 0 bytes
-rw-r--r--tools/sv/images/middle-no-highlight.jpgbin344 -> 0 bytes
-rw-r--r--tools/sv/images/next.pngbin1270 -> 0 bytes
-rwxr-xr-xtools/sv/images/orb_01.jpgbin19864 -> 0 bytes
-rwxr-xr-xtools/sv/images/orb_02.jpgbin507 -> 0 bytes
-rw-r--r--tools/sv/images/pause.pngbin1662 -> 0 bytes
-rw-r--r--tools/sv/images/previous.pngbin1285 -> 0 bytes
-rwxr-xr-xtools/sv/images/reboot.pngbin3132 -> 0 bytes
-rw-r--r--tools/sv/images/right-end-highlight.jpgbin535 -> 0 bytes
-rw-r--r--tools/sv/images/right-end-no-highlight.jpgbin447 -> 0 bytes
-rw-r--r--tools/sv/images/seperator-left-highlight.jpgbin552 -> 0 bytes
-rw-r--r--tools/sv/images/seperator-right-highlight.jpgbin560 -> 0 bytes
-rw-r--r--tools/sv/images/seperator.jpgbin443 -> 0 bytes
-rwxr-xr-xtools/sv/images/shutdown.pngbin2901 -> 0 bytes
-rw-r--r--tools/sv/images/small-destroy.pngbin483 -> 0 bytes
-rw-r--r--tools/sv/images/small-pause.pngbin434 -> 0 bytes
-rw-r--r--tools/sv/images/small-unpause.pngbin500 -> 0 bytes
-rw-r--r--tools/sv/images/unpause.pngbin1890 -> 0 bytes
-rw-r--r--tools/sv/images/xen.pngbin10575 -> 0 bytes
-rwxr-xr-xtools/sv/inc/script.js22
-rw-r--r--tools/sv/inc/style.css32
-rw-r--r--tools/tests/Makefile22
-rw-r--r--tools/tests/test_x86_emulator.c262
-rw-r--r--tools/vnet/INSTALL31
-rw-r--r--tools/vnet/Makefile17
-rw-r--r--tools/vnet/libxutil/Makefile (renamed from tools/libxutil/Makefile)18
-rw-r--r--tools/vnet/libxutil/allocate.c (renamed from tools/libxutil/allocate.c)0
-rw-r--r--tools/vnet/libxutil/allocate.h (renamed from tools/libxutil/allocate.h)0
-rw-r--r--tools/vnet/libxutil/debug.h (renamed from tools/libxutil/debug.h)0
-rw-r--r--tools/vnet/libxutil/enum.c (renamed from tools/libxutil/enum.c)0
-rw-r--r--tools/vnet/libxutil/enum.h (renamed from tools/libxutil/enum.h)0
-rw-r--r--tools/vnet/libxutil/fd_stream.c184
-rw-r--r--tools/vnet/libxutil/fd_stream.h (renamed from tools/xfrd/lzi_stream.h)23
-rw-r--r--tools/vnet/libxutil/file_stream.c (renamed from tools/libxutil/file_stream.c)25
-rw-r--r--tools/vnet/libxutil/file_stream.h (renamed from tools/libxutil/file_stream.h)0
-rw-r--r--tools/vnet/libxutil/gzip_stream.c (renamed from tools/libxutil/gzip_stream.c)13
-rw-r--r--tools/vnet/libxutil/gzip_stream.h (renamed from tools/libxutil/gzip_stream.h)0
-rw-r--r--tools/vnet/libxutil/hash_table.c (renamed from tools/libxutil/hash_table.c)0
-rw-r--r--tools/vnet/libxutil/hash_table.h (renamed from tools/libxutil/hash_table.h)0
-rw-r--r--tools/vnet/libxutil/iostream.c (renamed from tools/libxutil/iostream.c)0
-rw-r--r--tools/vnet/libxutil/iostream.h (renamed from tools/libxutil/iostream.h)54
-rw-r--r--tools/vnet/libxutil/kernel_stream.c (renamed from tools/libxutil/kernel_stream.c)49
-rw-r--r--tools/vnet/libxutil/kernel_stream.h (renamed from tools/libxutil/kernel_stream.h)0
-rw-r--r--tools/vnet/libxutil/lexis.c (renamed from tools/libxutil/lexis.c)0
-rw-r--r--tools/vnet/libxutil/lexis.h (renamed from tools/libxutil/lexis.h)2
-rw-r--r--tools/vnet/libxutil/socket_stream.c (renamed from tools/libxutil/socket_stream.c)0
-rw-r--r--tools/vnet/libxutil/socket_stream.h (renamed from tools/libxutil/socket_stream.h)0
-rw-r--r--tools/vnet/libxutil/string_stream.c (renamed from tools/libxutil/string_stream.c)35
-rw-r--r--tools/vnet/libxutil/string_stream.h (renamed from tools/libxutil/string_stream.h)0
-rw-r--r--tools/vnet/libxutil/sxpr.c (renamed from tools/libxutil/sxpr.c)484
-rw-r--r--tools/vnet/libxutil/sxpr.h (renamed from tools/libxutil/sxpr.h)252
-rw-r--r--tools/vnet/libxutil/sxpr_parser.c (renamed from tools/libxutil/sxpr_parser.c)1119
-rw-r--r--tools/vnet/libxutil/sxpr_parser.h (renamed from tools/libxutil/sxpr_parser.h)68
-rw-r--r--tools/vnet/libxutil/sys_net.c (renamed from tools/libxutil/sys_net.c)0
-rw-r--r--tools/vnet/libxutil/sys_net.h (renamed from tools/libxutil/sys_net.h)0
-rw-r--r--tools/vnet/libxutil/sys_string.c (renamed from tools/libxutil/sys_string.c)55
-rw-r--r--tools/vnet/libxutil/sys_string.h (renamed from tools/libxutil/sys_string.h)1
-rw-r--r--tools/vnet/libxutil/util.c (renamed from tools/libxutil/util.c)0
-rw-r--r--tools/vnet/libxutil/util.h (renamed from tools/libxutil/util.h)0
-rw-r--r--tools/vnet/vnet-module/Makefile-2.64
-rw-r--r--tools/vnet/vnet-module/Makefile.vnet2
-rw-r--r--tools/vnet/vnet-module/if_varp.h4
-rw-r--r--tools/vnet/vnet-module/varp.c14
-rw-r--r--tools/vnet/vnetd/Makefile2
-rw-r--r--tools/vnet/vnetd/vcache.c14
-rw-r--r--tools/x2d2/Makefile22
-rw-r--r--tools/x2d2/cntrl_con.c457
-rw-r--r--tools/x2d2/minixend.c939
-rw-r--r--tools/x2d2/minixend.h154
-rw-r--r--tools/x2d2/util.c132
-rw-r--r--tools/xcs/Makefile49
-rw-r--r--tools/xcs/bindings.c179
-rw-r--r--tools/xcs/connection.c157
-rw-r--r--tools/xcs/ctrl_interface.c269
-rw-r--r--tools/xcs/dump.c506
-rw-r--r--tools/xcs/dump.h28
-rw-r--r--tools/xcs/evtchn.c106
-rw-r--r--tools/xcs/xcs.c973
-rw-r--r--tools/xcs/xcs.h148
-rw-r--r--tools/xcs/xcs_proto.h101
-rw-r--r--tools/xcs/xcsdump.c206
-rw-r--r--tools/xcutils/Makefile53
-rw-r--r--tools/xcutils/xc_restore.c30
-rw-r--r--tools/xcutils/xc_save.c29
-rw-r--r--tools/xenstore/.gdbinit4
-rw-r--r--tools/xenstore/Makefile102
-rw-r--r--tools/xenstore/TODO7
-rw-r--r--tools/xenstore/fake_libxc.c119
-rw-r--r--tools/xenstore/list.h508
-rw-r--r--tools/xenstore/talloc.c1143
-rw-r--r--tools/xenstore/talloc.h134
-rw-r--r--tools/xenstore/talloc_guide.txt569
-rw-r--r--tools/xenstore/testsuite/01simple.sh4
-rw-r--r--tools/xenstore/testsuite/02directory.sh31
-rw-r--r--tools/xenstore/testsuite/03write.sh17
-rw-r--r--tools/xenstore/testsuite/04rm.sh18
-rw-r--r--tools/xenstore/testsuite/05filepermissions.sh49
-rw-r--r--tools/xenstore/testsuite/06dirpermissions.sh61
-rw-r--r--tools/xenstore/testsuite/07watch.sh32
-rw-r--r--tools/xenstore/testsuite/08transaction.sh54
-rw-r--r--tools/xenstore/testsuite/09domain.sh15
-rwxr-xr-xtools/xenstore/testsuite/test.sh44
-rw-r--r--tools/xenstore/utils.c143
-rw-r--r--tools/xenstore/utils.h61
-rw-r--r--tools/xenstore/xenstored.h81
-rw-r--r--tools/xenstore/xenstored_core.c1362
-rw-r--r--tools/xenstore/xenstored_core.h126
-rw-r--r--tools/xenstore/xenstored_domain.c390
-rw-r--r--tools/xenstore/xenstored_domain.h38
-rw-r--r--tools/xenstore/xenstored_test.h37
-rw-r--r--tools/xenstore/xenstored_transaction.c284
-rw-r--r--tools/xenstore/xenstored_transaction.h50
-rw-r--r--tools/xenstore/xenstored_watch.c279
-rw-r--r--tools/xenstore/xenstored_watch.h42
-rw-r--r--tools/xenstore/xs.c551
-rw-r--r--tools/xenstore/xs.h146
-rw-r--r--tools/xenstore/xs_lib.c141
-rw-r--r--tools/xenstore/xs_lib.h63
-rw-r--r--tools/xenstore/xs_random.c1646
-rw-r--r--tools/xenstore/xs_stress.c207
-rw-r--r--tools/xenstore/xs_test.c647
-rw-r--r--tools/xentrace/Makefile4
-rw-r--r--tools/xentrace/formats50
-rw-r--r--tools/xentrace/xenctx.c83
-rw-r--r--tools/xentrace/xentrace.c62
-rw-r--r--tools/xentrace/xentrace_format4
-rw-r--r--tools/xfrd/Make.xfrd33
-rw-r--r--tools/xfrd/Makefile91
-rw-r--r--tools/xfrd/connection.c195
-rw-r--r--tools/xfrd/connection.h36
-rw-r--r--tools/xfrd/debug.h73
-rw-r--r--tools/xfrd/http.h50
-rw-r--r--tools/xfrd/lzi_stream.c533
-rw-r--r--tools/xfrd/marshal.c207
-rw-r--r--tools/xfrd/marshal.h42
-rw-r--r--tools/xfrd/select.c50
-rw-r--r--tools/xfrd/select.h16
-rw-r--r--tools/xfrd/xdr.c316
-rw-r--r--tools/xfrd/xdr.h30
-rw-r--r--tools/xfrd/xen_domain.c395
-rw-r--r--tools/xfrd/xen_domain.h22
-rw-r--r--tools/xfrd/xfrd.c1272
-rw-r--r--tools/xfrd/xfrd.h17
-rwxr-xr-xtools/xfrd/xfrdClient.py124
-rw-r--r--xen/Makefile8
-rw-r--r--xen/Rules.mk38
-rw-r--r--xen/arch/ia64/Makefile71
-rw-r--r--xen/arch/ia64/Rules.mk24
-rw-r--r--xen/arch/ia64/acpi.c678
-rw-r--r--xen/arch/ia64/asm-offsets.c276
-rw-r--r--xen/arch/ia64/dom0_ops.c52
-rw-r--r--xen/arch/ia64/dom_fw.c576
-rw-r--r--xen/arch/ia64/domain.c1255
-rw-r--r--xen/arch/ia64/hpsimserial.c23
-rw-r--r--xen/arch/ia64/hypercall.c127
-rw-r--r--xen/arch/ia64/hyperprivop.S513
-rw-r--r--xen/arch/ia64/idle0_task.c58
-rw-r--r--xen/arch/ia64/irq.c1496
-rw-r--r--xen/arch/ia64/ivt.S1898
-rw-r--r--xen/arch/ia64/lib/Makefile44
-rw-r--r--xen/arch/ia64/mm_init.c547
-rw-r--r--xen/arch/ia64/mmio.c325
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/cpumask.h12
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/efi.c50
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/entry.S237
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/entry.h37
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h69
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/hardirq.h14
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/head.S120
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h26
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/ia64regs.h38
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/interrupt.h27
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/io.h14
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c118
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/kregs.h65
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/mca_asm.h32
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/minstate.h25
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/mm_contig.c47
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/page.h74
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/pal.S26
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/pal.h12
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/pgalloc.h76
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/processor.h37
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/ptrace.h20
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/series40
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/setup.c151
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/sn_sal.h33
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/system.h38
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/time.c56
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/tlb.c38
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/types.h44
-rw-r--r--xen/arch/ia64/patch/linux-2.6.11/unaligned.c227
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/bootmem.h12
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/current.h17
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/efi.c85
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/efi.h13
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/entry.S195
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h20
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/hardirq.h22
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/head.S93
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c36
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h26
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/hw_irq.h24
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/ide.h35
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/init_task.c35
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/init_task.h53
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/interrupt.h18
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/io.h14
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/irq.h18
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c82
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/ivt.S528
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/kregs.h13
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/lds.S17
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/linuxtime.h34
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/mca_asm.h32
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/minstate.h29
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c92
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/mm_contig.c216
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/mmzone.h14
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/page.h84
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/page_alloc.c305
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/pal.S26
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/pgalloc.h65
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/processor.h19
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/sal.h26
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/setup.c203
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/slab.c139
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/slab.h14
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/swiotlb.c47
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/system.h43
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/time.c338
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/tlb.c48
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/types.h15
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/unaligned.c97
-rw-r--r--xen/arch/ia64/patch/linux-2.6.7/wait.h26
-rw-r--r--xen/arch/ia64/pcdp.c120
-rw-r--r--xen/arch/ia64/pdb-stub.c59
-rw-r--r--xen/arch/ia64/privop.c316
-rw-r--r--xen/arch/ia64/process.c382
-rw-r--r--xen/arch/ia64/regionreg.c125
-rw-r--r--xen/arch/ia64/smp.c43
-rw-r--r--xen/arch/ia64/smpboot.c2
-rw-r--r--xen/arch/ia64/sn_console.c84
-rw-r--r--xen/arch/ia64/tools/README.RunVT59
-rw-r--r--xen/arch/ia64/tools/README.xenia6421
-rw-r--r--xen/arch/ia64/tools/README.xenia64linux50
-rw-r--r--xen/arch/ia64/tools/mkbuildtree345
-rw-r--r--xen/arch/ia64/tools/privify/Makefile9
-rw-r--r--xen/arch/ia64/tools/privify/README.privify8
-rw-r--r--xen/arch/ia64/tools/privify/privify.c360
-rw-r--r--xen/arch/ia64/tools/privify/privify.h34
-rw-r--r--xen/arch/ia64/tools/privify/privify_elf64.c120
-rw-r--r--xen/arch/ia64/vcpu.c627
-rw-r--r--xen/arch/ia64/vhpt.c119
-rw-r--r--xen/arch/ia64/vlsapic.c471
-rw-r--r--xen/arch/ia64/vmmu.c802
-rw-r--r--xen/arch/ia64/vmx_entry.S611
-rw-r--r--xen/arch/ia64/vmx_init.c297
-rw-r--r--xen/arch/ia64/vmx_interrupt.c388
-rw-r--r--xen/arch/ia64/vmx_ivt.S978
-rw-r--r--xen/arch/ia64/vmx_minstate.h329
-rw-r--r--xen/arch/ia64/vmx_phy_mode.c393
-rw-r--r--xen/arch/ia64/vmx_process.c345
-rw-r--r--xen/arch/ia64/vmx_utility.c659
-rw-r--r--xen/arch/ia64/vmx_vcpu.c436
-rw-r--r--xen/arch/ia64/vmx_virt.c1501
-rw-r--r--xen/arch/ia64/vmx_vsa.S84
-rw-r--r--xen/arch/ia64/vtlb.c1004
-rw-r--r--xen/arch/ia64/xenasm.S69
-rw-r--r--xen/arch/ia64/xenirq.c77
-rw-r--r--xen/arch/ia64/xenmem.c101
-rw-r--r--xen/arch/ia64/xenmisc.c317
-rw-r--r--xen/arch/ia64/xensetup.c330
-rw-r--r--xen/arch/ia64/xentime.c380
-rw-r--r--xen/arch/x86/Makefile24
-rw-r--r--xen/arch/x86/Rules.mk15
-rw-r--r--xen/arch/x86/acpi.c721
-rw-r--r--xen/arch/x86/acpi/boot.c723
-rw-r--r--xen/arch/x86/apic.c589
-rw-r--r--xen/arch/x86/audit.c976
-rw-r--r--xen/arch/x86/bitops.c99
-rw-r--r--xen/arch/x86/boot/mkelf32.c1
-rw-r--r--xen/arch/x86/boot/x86_32.S103
-rw-r--r--xen/arch/x86/boot/x86_64.S213
-rw-r--r--xen/arch/x86/cdb.c414
-rw-r--r--xen/arch/x86/cpu/amd.c254
-rw-r--r--xen/arch/x86/cpu/centaur.c477
-rw-r--r--xen/arch/x86/cpu/common.c581
-rw-r--r--xen/arch/x86/cpu/cpu.h31
-rw-r--r--xen/arch/x86/cpu/cyrix.c400
-rw-r--r--xen/arch/x86/cpu/intel.c249
-rw-r--r--xen/arch/x86/cpu/intel_cacheinfo.c142
-rw-r--r--xen/arch/x86/cpu/rise.c54
-rw-r--r--xen/arch/x86/cpu/transmeta.c108
-rw-r--r--xen/arch/x86/dmi_scan.c493
-rw-r--r--xen/arch/x86/dom0_ops.c169
-rw-r--r--xen/arch/x86/domain.c1344
-rw-r--r--xen/arch/x86/domain_build.c691
-rw-r--r--xen/arch/x86/e820.c76
-rw-r--r--xen/arch/x86/extable.c14
-rw-r--r--xen/arch/x86/flushtlb.c2
-rw-r--r--xen/arch/x86/genapic/bigsmp.c52
-rw-r--r--xen/arch/x86/genapic/default.c27
-rw-r--r--xen/arch/x86/genapic/es7000.c29
-rw-r--r--xen/arch/x86/genapic/es7000.h110
-rw-r--r--xen/arch/x86/genapic/es7000plat.c302
-rw-r--r--xen/arch/x86/genapic/probe.c91
-rw-r--r--xen/arch/x86/genapic/summit.c27
-rw-r--r--xen/arch/x86/i387.c88
-rw-r--r--xen/arch/x86/i8259.c185
-rw-r--r--xen/arch/x86/idle0_task.c30
-rw-r--r--xen/arch/x86/io_apic.c2704
-rw-r--r--xen/arch/x86/irq.c154
-rw-r--r--xen/arch/x86/memory.c2400
-rw-r--r--xen/arch/x86/microcode.c14
-rw-r--r--xen/arch/x86/mm.c3124
-rw-r--r--xen/arch/x86/mpparse.c704
-rw-r--r--xen/arch/x86/mtrr/generic.c10
-rw-r--r--xen/arch/x86/mtrr/main.c27
-rw-r--r--xen/arch/x86/nmi.c170
-rw-r--r--xen/arch/x86/pci-irq.c1084
-rw-r--r--xen/arch/x86/pci-pc.c1548
-rw-r--r--xen/arch/x86/pci-x86.c402
-rw-r--r--xen/arch/x86/pci-x86.h71
-rw-r--r--xen/arch/x86/pdb-linux.c100
-rw-r--r--xen/arch/x86/pdb-stub.c1280
-rw-r--r--xen/arch/x86/physdev.c143
-rw-r--r--xen/arch/x86/setup.c612
-rw-r--r--xen/arch/x86/shadow.c3401
-rw-r--r--xen/arch/x86/smp.c302
-rw-r--r--xen/arch/x86/smpboot.c1798
-rw-r--r--xen/arch/x86/string.c63
-rw-r--r--xen/arch/x86/time.c53
-rw-r--r--xen/arch/x86/trampoline.S12
-rw-r--r--xen/arch/x86/traps.c1180
-rw-r--r--xen/arch/x86/usercopy.c139
-rw-r--r--xen/arch/x86/vmx.c1377
-rw-r--r--xen/arch/x86/vmx_intercept.c264
-rw-r--r--xen/arch/x86/vmx_io.c508
-rw-r--r--xen/arch/x86/vmx_platform.c674
-rw-r--r--xen/arch/x86/vmx_vmcs.c466
-rw-r--r--xen/arch/x86/x86_32/asm-offsets.c95
-rw-r--r--xen/arch/x86/x86_32/domain_page.c47
-rw-r--r--xen/arch/x86/x86_32/entry.S605
-rw-r--r--xen/arch/x86/x86_32/mm.c455
-rw-r--r--xen/arch/x86/x86_32/seg_fixup.c79
-rw-r--r--xen/arch/x86/x86_32/traps.c235
-rw-r--r--xen/arch/x86/x86_32/usercopy.c579
-rw-r--r--xen/arch/x86/x86_32/xen.lds8
-rw-r--r--xen/arch/x86/x86_64/asm-offsets.c96
-rw-r--r--xen/arch/x86/x86_64/entry.S592
-rw-r--r--xen/arch/x86/x86_64/mm.c590
-rw-r--r--xen/arch/x86/x86_64/traps.c192
-rw-r--r--xen/arch/x86/x86_64/usercopy.c136
-rw-r--r--xen/arch/x86/x86_64/xen.lds6
-rw-r--r--xen/arch/x86/x86_emulate.c1071
-rw-r--r--xen/common/Makefile7
-rw-r--r--xen/common/ac_timer.c110
-rw-r--r--xen/common/bitmap.c365
-rw-r--r--xen/common/debug-linux.c267
-rw-r--r--xen/common/debug.c113
-rw-r--r--xen/common/dom0_ops.c372
-rw-r--r--xen/common/dom_mem_ops.c61
-rw-r--r--xen/common/domain.c373
-rw-r--r--xen/common/elf.c162
-rw-r--r--xen/common/event_channel.c424
-rw-r--r--xen/common/grant_table.c957
-rw-r--r--xen/common/kernel.c70
-rw-r--r--xen/common/keyhandler.c74
-rw-r--r--xen/common/lib.c57
-rw-r--r--xen/common/multicall.c25
-rw-r--r--xen/common/page_alloc.c201
-rw-r--r--xen/common/perfc.c40
-rw-r--r--xen/common/physdev.c746
-rw-r--r--xen/common/resource.c319
-rw-r--r--xen/common/sched_atropos.c691
-rw-r--r--xen/common/sched_bvt.c347
-rw-r--r--xen/common/sched_rrobin.c228
-rw-r--r--xen/common/sched_sedf.c1453
-rw-r--r--xen/common/schedule.c436
-rw-r--r--xen/common/slab.c1844
-rw-r--r--xen/common/softirq.c10
-rw-r--r--xen/common/string.c10
-rw-r--r--xen/common/trace.c90
-rw-r--r--xen/common/vsprintf.c50
-rw-r--r--xen/common/xmalloc.c213
-rw-r--r--xen/drivers/Makefile2
-rw-r--r--xen/drivers/acpi/Makefile55
-rw-r--r--xen/drivers/acpi/acpi_ksyms.c157
-rw-r--r--xen/drivers/acpi/tables.c61
-rw-r--r--xen/drivers/char/console.c296
-rw-r--r--xen/drivers/char/ns16550.c297
-rw-r--r--xen/drivers/char/serial.c549
-rw-r--r--xen/drivers/pci/Makefile45
-rw-r--r--xen/drivers/pci/gen-devlist.c130
-rw-r--r--xen/drivers/pci/names.c135
-rw-r--r--xen/drivers/pci/pci.c1773
-rw-r--r--xen/drivers/pci/pci.ids7514
-rw-r--r--xen/drivers/pci/quirks.c835
-rw-r--r--xen/drivers/pci/setup-res.c241
-rw-r--r--xen/include/acpi/acconfig.h31
-rw-r--r--xen/include/acpi/acdebug.h469
-rw-r--r--xen/include/acpi/acdisasm.h402
-rw-r--r--xen/include/acpi/acdispat.h513
-rw-r--r--xen/include/acpi/acevents.h274
-rw-r--r--xen/include/acpi/acexcep.h10
-rw-r--r--xen/include/acpi/acglobal.h81
-rw-r--r--xen/include/acpi/achware.h43
-rw-r--r--xen/include/acpi/acinterp.h738
-rw-r--r--xen/include/acpi/aclocal.h52
-rw-r--r--xen/include/acpi/acmacros.h63
-rw-r--r--xen/include/acpi/acnamesp.h513
-rw-r--r--xen/include/acpi/acobject.h31
-rw-r--r--xen/include/acpi/acoutput.h4
-rw-r--r--xen/include/acpi/acparser.h347
-rw-r--r--xen/include/acpi/acpi.h2
-rw-r--r--xen/include/acpi/acpi_bus.h69
-rw-r--r--xen/include/acpi/acpi_drivers.h284
-rw-r--r--xen/include/acpi/acpiosxf.h38
-rw-r--r--xen/include/acpi/acpixf.h45
-rw-r--r--xen/include/acpi/acresrc.h391
-rw-r--r--xen/include/acpi/acstruct.h25
-rw-r--r--xen/include/acpi/actables.h233
-rw-r--r--xen/include/acpi/actbl.h33
-rw-r--r--xen/include/acpi/actbl1.h2
-rw-r--r--xen/include/acpi/actbl2.h94
-rw-r--r--xen/include/acpi/actbl71.h144
-rw-r--r--xen/include/acpi/actypes.h149
-rw-r--r--xen/include/acpi/acutils.h43
-rw-r--r--xen/include/acpi/amlcode.h506
-rw-r--r--xen/include/acpi/amlresrc.h329
-rw-r--r--xen/include/acpi/platform/acenv.h23
-rw-r--r--xen/include/acpi/platform/acgcc.h4
-rw-r--r--xen/include/acpi/platform/aclinux.h6
-rw-r--r--xen/include/asm-ia64/config.h281
-rw-r--r--xen/include/asm-ia64/debugger.h42
-rw-r--r--xen/include/asm-ia64/dom_fw.h119
-rw-r--r--xen/include/asm-ia64/domain.h164
-rw-r--r--xen/include/asm-ia64/flushtlb.h14
-rw-r--r--xen/include/asm-ia64/ia64_int.h56
-rw-r--r--xen/include/asm-ia64/init.h29
-rw-r--r--xen/include/asm-ia64/mm.h380
-rw-r--r--xen/include/asm-ia64/mmu_context.h23
-rw-r--r--xen/include/asm-ia64/multicall.h5
-rw-r--r--xen/include/asm-ia64/offsets.h2
-rw-r--r--xen/include/asm-ia64/privop.h177
-rw-r--r--xen/include/asm-ia64/regionreg.h42
-rw-r--r--xen/include/asm-ia64/regs.h3
-rw-r--r--xen/include/asm-ia64/serial.h20
-rw-r--r--xen/include/asm-ia64/slab.h3
-rw-r--r--xen/include/asm-ia64/time.h1
-rw-r--r--xen/include/asm-ia64/tlb.h51
-rw-r--r--xen/include/asm-ia64/vcpu.h154
-rw-r--r--xen/include/asm-ia64/vhpt.h494
-rw-r--r--xen/include/asm-ia64/virt_event.h114
-rw-r--r--xen/include/asm-ia64/vmmu.h344
-rw-r--r--xen/include/asm-ia64/vmx.h38
-rw-r--r--xen/include/asm-ia64/vmx_mm_def.h176
-rw-r--r--xen/include/asm-ia64/vmx_pal.h120
-rw-r--r--xen/include/asm-ia64/vmx_pal_vsa.h44
-rw-r--r--xen/include/asm-ia64/vmx_phy_mode.h126
-rw-r--r--xen/include/asm-ia64/vmx_platform.h37
-rw-r--r--xen/include/asm-ia64/vmx_ptrace.h97
-rw-r--r--xen/include/asm-ia64/vmx_vcpu.h598
-rw-r--r--xen/include/asm-ia64/vmx_vpd.h193
-rw-r--r--xen/include/asm-ia64/vtm.h67
-rw-r--r--xen/include/asm-ia64/xenprocessor.h213
-rw-r--r--xen/include/asm-ia64/xenserial.h (renamed from xen/drivers/pci/compat.c)0
-rw-r--r--xen/include/asm-ia64/xensystem.h65
-rw-r--r--xen/include/asm-x86/acpi.h77
-rw-r--r--xen/include/asm-x86/apic.h67
-rw-r--r--xen/include/asm-x86/apicdef.h279
-rw-r--r--xen/include/asm-x86/asm_defns.h2
-rw-r--r--xen/include/asm-x86/bitops.h234
-rw-r--r--xen/include/asm-x86/config.h296
-rw-r--r--xen/include/asm-x86/cpufeature.h40
-rw-r--r--xen/include/asm-x86/current.h60
-rw-r--r--xen/include/asm-x86/debugger.h93
-rw-r--r--xen/include/asm-x86/desc.h133
-rw-r--r--xen/include/asm-x86/div64.h37
-rw-r--r--xen/include/asm-x86/domain.h132
-rw-r--r--xen/include/asm-x86/domain_page.h29
-rw-r--r--xen/include/asm-x86/e820.h16
-rw-r--r--xen/include/asm-x86/fixmap.h31
-rw-r--r--xen/include/asm-x86/flushtlb.h63
-rw-r--r--xen/include/asm-x86/genapic.h115
-rw-r--r--xen/include/asm-x86/hardirq.h4
-rw-r--r--xen/include/asm-x86/i387.h37
-rw-r--r--xen/include/asm-x86/init.h29
-rw-r--r--xen/include/asm-x86/io.h9
-rw-r--r--xen/include/asm-x86/io_apic.h108
-rw-r--r--xen/include/asm-x86/irq.h150
-rw-r--r--xen/include/asm-x86/ldt.h25
-rw-r--r--xen/include/asm-x86/mach-bigsmp/mach_apic.h167
-rw-r--r--xen/include/asm-x86/mach-bigsmp/mach_apicdef.h13
-rw-r--r--xen/include/asm-x86/mach-bigsmp/mach_ipi.h25
-rw-r--r--xen/include/asm-x86/mach-default/bios_ebda.h15
-rw-r--r--xen/include/asm-x86/mach-default/io_ports.h30
-rw-r--r--xen/include/asm-x86/mach-default/irq_vectors.h96
-rw-r--r--xen/include/asm-x86/mach-default/irq_vectors_limits.h8
-rw-r--r--xen/include/asm-x86/mach-default/mach_apic.h133
-rw-r--r--xen/include/asm-x86/mach-default/mach_apicdef.h13
-rw-r--r--xen/include/asm-x86/mach-default/mach_ipi.h30
-rw-r--r--xen/include/asm-x86/mach-default/mach_mpparse.h28
-rw-r--r--xen/include/asm-x86/mach-default/mach_wakecpu.h41
-rw-r--r--xen/include/asm-x86/mach-es7000/mach_apic.h207
-rw-r--r--xen/include/asm-x86/mach-es7000/mach_apicdef.h13
-rw-r--r--xen/include/asm-x86/mach-es7000/mach_ipi.h24
-rw-r--r--xen/include/asm-x86/mach-es7000/mach_mpparse.h41
-rw-r--r--xen/include/asm-x86/mach-es7000/mach_wakecpu.h58
-rw-r--r--xen/include/asm-x86/mach-generic/mach_apic.h32
-rw-r--r--xen/include/asm-x86/mach-generic/mach_apicdef.h11
-rw-r--r--xen/include/asm-x86/mach-generic/mach_ipi.h10
-rw-r--r--xen/include/asm-x86/mach-generic/mach_mpparse.h12
-rw-r--r--xen/include/asm-x86/mach-generic/mach_mpspec.h10
-rw-r--r--xen/include/asm-x86/mach-summit/mach_apic.h189
-rw-r--r--xen/include/asm-x86/mach-summit/mach_apicdef.h13
-rw-r--r--xen/include/asm-x86/mach-summit/mach_ipi.h25
-rw-r--r--xen/include/asm-x86/mach-summit/mach_mpparse.h121
-rw-r--r--xen/include/asm-x86/mm.h239
-rw-r--r--xen/include/asm-x86/mpspec.h280
-rw-r--r--xen/include/asm-x86/mpspec_def.h188
-rw-r--r--xen/include/asm-x86/msr.h100
-rw-r--r--xen/include/asm-x86/multicall.h24
-rw-r--r--xen/include/asm-x86/page.h408
-rw-r--r--xen/include/asm-x86/pci.h35
-rw-r--r--xen/include/asm-x86/pdb.h89
-rw-r--r--xen/include/asm-x86/physdev.h17
-rw-r--r--xen/include/asm-x86/processor.h374
-rw-r--r--xen/include/asm-x86/regs.h36
-rw-r--r--xen/include/asm-x86/rwlock.h12
-rw-r--r--xen/include/asm-x86/shadow.h1605
-rw-r--r--xen/include/asm-x86/smp.h102
-rw-r--r--xen/include/asm-x86/smpboot.h116
-rw-r--r--xen/include/asm-x86/string.h398
-rw-r--r--xen/include/asm-x86/system.h71
-rw-r--r--xen/include/asm-x86/time.h7
-rw-r--r--xen/include/asm-x86/types.h28
-rw-r--r--xen/include/asm-x86/uaccess.h253
-rw-r--r--xen/include/asm-x86/vmx.h340
-rw-r--r--xen/include/asm-x86/vmx_cpu.h35
-rw-r--r--xen/include/asm-x86/vmx_intercept.h31
-rw-r--r--xen/include/asm-x86/vmx_platform.h94
-rw-r--r--xen/include/asm-x86/vmx_virpit.h42
-rw-r--r--xen/include/asm-x86/vmx_vmcs.h284
-rw-r--r--xen/include/asm-x86/x86_32/asm_defns.h166
-rw-r--r--xen/include/asm-x86/x86_32/current.h55
-rw-r--r--xen/include/asm-x86/x86_32/page-2level.h56
-rw-r--r--xen/include/asm-x86/x86_32/page-3level.h70
-rw-r--r--xen/include/asm-x86/x86_32/page.h38
-rw-r--r--xen/include/asm-x86/x86_32/regs.h59
-rw-r--r--xen/include/asm-x86/x86_32/string.h486
-rw-r--r--xen/include/asm-x86/x86_32/uaccess.h373
-rw-r--r--xen/include/asm-x86/x86_64/asm_defns.h137
-rw-r--r--xen/include/asm-x86/x86_64/current.h60
-rw-r--r--xen/include/asm-x86/x86_64/desc.h118
-rw-r--r--xen/include/asm-x86/x86_64/ldt.h39
-rw-r--r--xen/include/asm-x86/x86_64/page.h92
-rw-r--r--xen/include/asm-x86/x86_64/regs.h118
-rw-r--r--xen/include/asm-x86/x86_64/string.h69
-rw-r--r--xen/include/asm-x86/x86_64/uaccess.h324
-rw-r--r--xen/include/asm-x86/x86_emulate.h169
-rw-r--r--xen/include/public/arch-ia64.h91
-rw-r--r--xen/include/public/arch-x86_32.h113
-rw-r--r--xen/include/public/arch-x86_64.h187
-rw-r--r--xen/include/public/dom0_ops.h385
-rw-r--r--xen/include/public/event_channel.h30
-rw-r--r--xen/include/public/grant_table.h35
-rw-r--r--xen/include/public/io/blkif.h59
-rw-r--r--xen/include/public/io/domain_controller.h297
-rw-r--r--xen/include/public/io/ioreq.h61
-rw-r--r--xen/include/public/io/netif.h6
-rw-r--r--xen/include/public/io/ring.h199
-rw-r--r--xen/include/public/io/usbif.h66
-rw-r--r--xen/include/public/physdev.h83
-rw-r--r--xen/include/public/sched_ctl.h48
-rw-r--r--xen/include/public/trace.h52
-rw-r--r--xen/include/public/vmx_assist.h101
-rw-r--r--xen/include/public/xen.h312
-rw-r--r--xen/include/xen/ac_timer.h98
-rw-r--r--xen/include/xen/acpi.h126
-rw-r--r--xen/include/xen/bitmap.h250
-rw-r--r--xen/include/xen/bitops.h129
-rw-r--r--xen/include/xen/config.h6
-rw-r--r--xen/include/xen/console.h10
-rw-r--r--xen/include/xen/cpumask.h378
-rw-r--r--xen/include/xen/dmi.h38
-rw-r--r--xen/include/xen/domain.h14
-rw-r--r--xen/include/xen/domain_page.h100
-rw-r--r--xen/include/xen/elf.h10
-rw-r--r--xen/include/xen/event.h36
-rw-r--r--xen/include/xen/grant_table.h15
-rw-r--r--xen/include/xen/init.h31
-rw-r--r--xen/include/xen/inttypes.h251
-rw-r--r--xen/include/xen/ioport.h117
-rw-r--r--xen/include/xen/irq.h11
-rw-r--r--xen/include/xen/irq_cpustat.h4
-rw-r--r--xen/include/xen/kernel.h24
-rw-r--r--xen/include/xen/keyhandler.h6
-rw-r--r--xen/include/xen/lib.h52
-rw-r--r--xen/include/xen/list.h12
-rw-r--r--xen/include/xen/mm.h25
-rw-r--r--xen/include/xen/pci.h838
-rw-r--r--xen/include/xen/pci_ids.h2017
-rw-r--r--xen/include/xen/perfc.h54
-rw-r--r--xen/include/xen/perfc_defn.h144
-rw-r--r--xen/include/xen/sched-if.h59
-rw-r--r--xen/include/xen/sched.h390
-rw-r--r--xen/include/xen/serial.h116
-rw-r--r--xen/include/xen/shadow.h19
-rw-r--r--xen/include/xen/slab.h57
-rw-r--r--xen/include/xen/smp.h63
-rw-r--r--xen/include/xen/softirq.h4
-rw-r--r--xen/include/xen/spinlock.h3
-rw-r--r--xen/include/xen/time.h23
-rw-r--r--xen/include/xen/trace.h64
-rw-r--r--xen/include/xen/types.h8
-rw-r--r--xen/include/xen/xmalloc.h27
-rw-r--r--xen/tools/figlet/Makefile4
1297 files changed, 252657 insertions, 70702 deletions
diff --git a/.hgtags b/.hgtags
index 49eeffb772..a094a94287 100644
--- a/.hgtags
+++ b/.hgtags
@@ -1,9 +1,12 @@
42882b3e0dda89f3a8ec00da568f86e9b3c230f1 RELEASE-2.0.0
475a162b66e2c19b1e9468b234a4ba705334905e RELEASE-2.0.1
-82ba8bd1ceb2e03af769775fb8bc890dcab04f72 RELEASE-2.0.3
+dc2f08429f17e6614fd2f1ab88cc09ca0a850f32 RELEASE-2.0.2
+6e1bbc13911751efa0b1c018425c1b085820fa02 RELEASE-2.0.3
+fb875591fd72e15c31879c0e9034d99b80225595 RELEASE-2.0.4
1a522944f76540ea9d73fcc1b0d13d0f670183f0 RELEASE-2.0.5
2a5814ad2e5634a5fa291b703a152e7fc0b4faf0 RELEASE-2.0.6
487b2ee37d1cecb5f3e7a546b05ad097a0226f2f beta1
+da92dcde82ea3a765c16231da72454c2edcf6bbb ia64-stable
3d330e41f41ce1bc118c02346e18949ad5d67f6b latest-semistable
30c521db4c71960b0cf1d9c9e1b658e77b535a3e latest-stable
9afec5bc14aeb197ef37ea54a57eacd427463fc3 semistable
diff --git a/.rootkeys b/.rootkeys
index 4f6f54f657..f8d113f0e6 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -3,6 +3,7 @@
3ddb6b0buTaC5zg1_a8FoAR9FWi_mw BitKeeper/etc/ignore
3ddb79c9_hgSp-gsQm8HqWM_9W3B_A BitKeeper/etc/logging_ok
4177dbbfqsi01p2zgZa0geUOgScONw COPYING
+423fdd91sxkCMaKFcDsEdhsZer54vA Config.mk
3eb788d6Kleck_Cut0ouGneviGzliQ Makefile
3f5ef5a24IaQasQE2tyMxrfxskMmvw README
41880852AtdVfSsfKGtrLdajX1vEXQ buildconfigs/Rules.mk
@@ -15,15 +16,18 @@
4187c1c7IWmBinGdI19kL4MuZ6RLbQ docs/check_pkgs
3f9e7d60PWZJeVh5xdnk0nLUdxlqEA docs/figs/xenlogo.eps
418a3248xjIqmNKo0v_XQSfAvlBGFw docs/html.sty
+41c0c4116itF389v0CEWcmzue6zJkA docs/misc/VMX_changes.txt
4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO
412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
+420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
+4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
+424d462b5GuApQ_NyMsRFt9LbrsWow docs/misc/sedf_scheduler_mini-HOWTO.txt
40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
410144afnSd2Yw68AHGO5gXu2m3y6A docs/src/user.tex
3f815144d1vI2777JI-dO4wk49Iw7g extras/mini-os/Makefile
3f815144zTnCV5591ulIJQrpe5b-5Q extras/mini-os/README
-3f815144wiiDekmfMl9LIPIvhR83Uw extras/mini-os/entry.S
3f815144r7AHj8GPvc3Nl1L9OSsWIg extras/mini-os/events.c
3f815144h-Chna6E38yo40jqU95G1Q extras/mini-os/h/events.h
3f8151445bYdgThGHQPeOW49PsrJ_A extras/mini-os/h/hypervisor.h
@@ -32,17 +36,19 @@
3f815144nbSjjT1h4m99-QPbeSWY0Q extras/mini-os/h/os.h
3f815144L1t0AevJt2JDXPegv6JTrw extras/mini-os/h/time.h
3f815144UxddtL0ICCKisN-NDHNFaA extras/mini-os/h/types.h
-3f815145W2mamPMclRLOzm5B38vWUQ extras/mini-os/head.S
3f815145LqcH11TCEZbAvcjarckkJw extras/mini-os/hypervisor.c
3f815145vwnmxhCwN7dMRWv_XFtXbg extras/mini-os/kernel.c
3f8151451k5emQAlRe80JdIvfSN4VA extras/mini-os/lib/malloc.c
3f815145Mb9WSKjOPsYTLsPIvPyy4Q extras/mini-os/lib/math.c
3f8151454rEuPjN74V2Bcu65RLnM-Q extras/mini-os/lib/printf.c
3f815145MQZrUJV0iRmTK2KIhwB2wg extras/mini-os/lib/string.c
-3f815145AYE58Kpmsj5U7oHDpVDZJA extras/mini-os/minios.lds
+3f815145AYE58Kpmsj5U7oHDpVDZJA extras/mini-os/minios-x86_32.lds
+420c983dKBm6a2-sM04QzaaTy55crw extras/mini-os/minios-x86_64.lds
3f815145CB8XdPUqsmhAjSDFuwOoqA extras/mini-os/mm.c
3f815145vGYx1WY79voKkZB9yKwJKQ extras/mini-os/time.c
3f815145xlKBAQmal9oces3G_Mvxqw extras/mini-os/traps.c
+3f815145W2mamPMclRLOzm5B38vWUQ extras/mini-os/x86_32.S
+420c983dAE5qEtJMI6wHAQnHVJ1r2g extras/mini-os/x86_64.S
423e7e86yUUeeOvTAmjIahrpk1ksaQ freebsd-5.3-xen-sparse/conf/Makefile.i386-xen
423e7e86CSWbA9G8OftmMbfhStuQ6Q freebsd-5.3-xen-sparse/conf/files.i386-xen
42642db9m5qcqIrsHX8QlcBGeKSAmw freebsd-5.3-xen-sparse/conf/kern.mk
@@ -65,6 +71,7 @@
423e7e88B5vxFblc-MlhxKk9e4ieBw freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
423e7e88z_BrFu1O71-Ya4pXJpjAPQ freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
423e7e88uDvAZLmABMkqOpmemyVRyw freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
+4266317eeOLpvRxIjmOYQVlL4WWQsg freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c
423e7e88yr5NFQudubMnkvdb_y-Gtg freebsd-5.3-xen-sparse/i386-xen/i386-xen/hypervisor.c
423e7e88Y-e-4RRf9nrgkVn5PXUv3Q freebsd-5.3-xen-sparse/i386-xen/i386-xen/i686_mem.c
423e7e88b8m2cuGtOxVvs4Sok4Vk7Q freebsd-5.3-xen-sparse/i386-xen/i386-xen/initcpu.c
@@ -88,7 +95,7 @@
423e7e8aVYTynjpZsJxUsFSlIDhpJw freebsd-5.3-xen-sparse/i386-xen/include/cpufunc.h
423e7e8avrrUxDugrwq_GJp499DkJw freebsd-5.3-xen-sparse/i386-xen/include/ctrl_if.h
423e7e8apY1r9Td-S0eZITNZZbfNTQ freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
-423e7e8aL9DsObEegCwtILrF6SWcAQ freebsd-5.3-xen-sparse/i386-xen/include/frame.h
+4266317eOVvN00XdcqRfDRFIrbqgvg freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h
423e7e8btv8Gojq50ggnP5A1Dkc4kA freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
423e7e8buhTLVFLZ33-5s8-UdADSZg freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
423e7e8bnHT1kMD-FPC7zHZR7l3VXw freebsd-5.3-xen-sparse/i386-xen/include/md_var.h
@@ -100,7 +107,6 @@
423e7e8bVOoPguCLyNj7pil-PT7Vcw freebsd-5.3-xen-sparse/i386-xen/include/segments.h
423e7e8c9AuwksRrt0ptRKHnNVWuNQ freebsd-5.3-xen-sparse/i386-xen/include/synch_bitops.h
423e7e8csdWimnMBI2HxEDJ30L42kQ freebsd-5.3-xen-sparse/i386-xen/include/trap.h
-423e7e8cgVgn9W8sZWwfh_4938fSJQ freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
423e7e8cdsEhPyad2ppDoSiBR4eB9g freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
423e7e8ccGI7kzIlRcEVziGZzm46wg freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
423e7e8cVSqLIOp5vH4ADvAL_MF6Qg freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
@@ -113,6 +119,7 @@
423e7e8dVX2QkuzWwB2rtZDxD5Y_-w freebsd-5.3-xen-sparse/i386-xen/xen/misc/npx.c
423e7e8d_PdWXjQeRg75twh7TleJhQ freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
423e7e8dlsc1oCW_ul57w0AHY5jZjQ freebsd-5.3-xen-sparse/kern/kern_fork.c
+427346bfHJQldVgD-acDy_2toKMhTA freebsd-5.3-xen-sparse/kern/kern_shutdown.c
423e7e8dVDL1WLfbmQWuXMbetYk4jA freebsd-5.3-xen-sparse/mkbuildtree
423e7e8dBrOrAbydK6h49bY0VvDgPw freebsd-5.3-xen-sparse/xenfbsd_kernel_build
4187ca95_eQN62ugV1zliQcfzXrHnw install.sh
@@ -158,7 +165,6 @@
3f108aeaLcGDgQdFAANLTUEid0a05w linux-2.4.30-xen-sparse/drivers/char/mem.c
3e5a4e66rw65CxyolW9PKz4GG42RcA linux-2.4.30-xen-sparse/drivers/char/tty_io.c
40c9c0c1pPwYE3-4i-oI3ubUu7UgvQ linux-2.4.30-xen-sparse/drivers/scsi/aic7xxx/Makefile
-3e5a4e669uzIE54VwucPYtGwXLAbzA linux-2.4.30-xen-sparse/fs/exec.c
3e5a4e66wbeCpsJgVf_U8Jde-CNcsA linux-2.4.30-xen-sparse/include/asm-xen/bugs.h
3e5a4e66HdSkvIV6SJ1evG_xmTmXHA linux-2.4.30-xen-sparse/include/asm-xen/desc.h
3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h
@@ -169,7 +175,6 @@
40d70c240tW7TWArl1VUgIFH2nVO1A linux-2.4.30-xen-sparse/include/asm-xen/keyboard.h
3e5a4e678ddsQOpbSiRdy1GRcDc9WA linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h
40d06e5b2YWInUX1Xv9amVANwd_2Xg linux-2.4.30-xen-sparse/include/asm-xen/module.h
-3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ linux-2.4.30-xen-sparse/include/asm-xen/msr.h
3e5a4e67mnQfh-R8KcQCaVo2Oho6yg linux-2.4.30-xen-sparse/include/asm-xen/page.h
409ba2e7ZfV5hqTvIzxLtpClnxtIzg linux-2.4.30-xen-sparse/include/asm-xen/pci.h
3e5a4e67uTYU5oEnIDjxuaez8njjqg linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h
@@ -197,18 +202,21 @@
3f108af5VxPkLv13tXpXgoRKALQtXQ linux-2.4.30-xen-sparse/mm/mprotect.c
3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4.30-xen-sparse/mm/mremap.c
409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.30-xen-sparse/mm/page_alloc.c
-3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.30-xen-sparse/mm/swapfile.c
-41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.30-xen-sparse/mm/vmalloc.c
41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.30-xen-sparse/net/core/skbuff.c
40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.11-xen-sparse/arch/xen/Kconfig
40f56237utH41NPukqHksuNf29IC9A linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers
40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.11-xen-sparse/arch/xen/Makefile
40f56237JTc60m1FRlUxkUaGSQKrNw linux-2.6.11-xen-sparse/arch/xen/boot/Makefile
-40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig
-40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig
+40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
+424f001dsaMEQ1wWQnPmu0ejo6pgPA linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
+40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
+424f001dsBzCezYZD8vAn-h5D9ZRtQ linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
40f56237Mta0yHNaMS_qtM2rge0qYA linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
40f56238u2CJdXNpjsZgHBxeVyY-2g linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
40f56238eczveJ86k_4hNxCLRQIF-g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile
+42778a68_kGyflDnRbar9WAmb4CKYw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile
+42778a68_2OruSz7lwFPBiGhl3y-FA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
+42778a69h76S5SCnDonnxnIt9nDGFQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c
40f56238rXVTJQKbBuXXLH52qEArcg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/Makefile
40f562385s4lr6Zg92gExe7UQ4A76Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c
41ab440bnpxZdWShZrGgM9pPaz5rmA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile
@@ -216,39 +224,80 @@
40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
+42778a69_lodTzZVlojib1-pZF030g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c
40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
+41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c
+42778a69obEqvR75wSKCWPk9QnHo-w linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c
4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c
+41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
+41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile
40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.S
+427245dboQBkhq841wIPqlRD-AG9Jw linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile
40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile
40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c
4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c
40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
-413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c
40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
-4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c
4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c
40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile
40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c
41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c
40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c
4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c
+412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
414c113396tK1HTVeUalm3u-1DF16g linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c
+418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c
+424efaa6xahU2q85_dT-SjUJEaivfg linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig
+424efaa6kKleWe45IrqsG8gkejgEQA linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile
+424efaa6HSyuVodl6SxFGj39vlp6MA linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
+428f0973_moB26LYt56xXKYCTqEdXw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile
+428f0973NBdgINmWOEJjoIDL73SDbQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c
+424efaa7bVAw3Z_q0SdFivfNVavyIg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
+424efaa7ddTVabh547Opf0u9vKmUXw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/e820.c
428868bbQust_FkSdkerMqYBWfrVKg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/early_printk.c
+424efaa7B_BWrAkLPJNoKk4EQY2a7w linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
+424efaa7vhgi7th5QVICjfuHmEWOkw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S
+424efaa7tiMEZSAYepwyjaNWxyXF7Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head64.c
+424efaa6M6AGf53TJa2y9cl6coos0g linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c
+428f0973EY3nNGYatQn7IO61RHO0oQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
+424efaa6wHXXaloZygAv6ywDb7u7nQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ioport.c
+424efaa6gOkc9_uHCLgvY_DXPqh_sg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
+424efaa6ibN3xXEeXoxURmKfJF_CUA linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
+428f09737NE-9XTvvA58TXLP0j54Uw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
+424efaa6aX4JkXAzBf4nqxRmLUfhqQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
+424efaa7e8nVw04q-pK8XRFaHPVx_A linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
+424efaa7CxY9cbhqapUfqVYnD7T9LQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c
+424efaa7I-DPzj1fkZeYPJS7rA4FAw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
+424efaa7DIVTR1U4waPGHucha9Xilg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+424efaa6L1lrzwCIadTNxogSvljFwg linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c
+424efaa61XzweJyW3v5Lb9egpe3rtw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
+424efaa778MkpdkAIq0An1FjQENN_Q linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
+424efaa7szEu90xkjpXk5TufZxxa4g linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
+424efaa6sJsuHdGIGxm0r-ugsss3OQ linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
+424efaa6xbX9LkKyaXvgbL9s_39Trw linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
+424efaa670zlQTtnOYK_aNgqhmSx-Q linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile
+424efaa6HUC68-hBHgiWOMDfKZogIA linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
+424efaa65ELRJ3JfgQQKLzW6y0ECYQ linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c
+424efaa60dTbHfv65JBLVhNLcNPcRA linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c
+424efaa6uMX8YJASAVJT8ral74dz9Q linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/pageattr.c
+424efaa629XgfZi3vvTAuQmhCqmvIA linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile
+424efaa64SRL9FZhtQovFJAVh9sZlQ linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS
41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.11-xen-sparse/drivers/Makefile
+42778a69QJ93x9p93ALrTV5QELHF-Q linux-2.6.11-xen-sparse/drivers/acpi/tables.c
4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.11-xen-sparse/drivers/char/mem.c
4111308bZAIzwf_Kzu6x1TZYZ3E0_Q linux-2.6.11-xen-sparse/drivers/char/tty_io.c
40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.11-xen-sparse/drivers/xen/Makefile
@@ -265,6 +314,12 @@
40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
+41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
+41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
+41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
+41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
+41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
+41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.11-xen-sparse/drivers/xen/console/Makefile
3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.11-xen-sparse/drivers/xen/console/console.c
40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile
@@ -280,20 +335,27 @@
405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile
3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
+41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
+41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
+41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
+41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
+41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
+41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h
42400318xlBIV46qyxLTaDepPLNyhg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/agp.h
-42a7252dZgup3r7qUeJGriyN5o9rUg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/bugs.h
40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
40f5623akIoBsQ3KxSB2kufkbgONXQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/floppy.h
4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h
+42539fb5A9hsS3NFQ-2VY4y1TONZZQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h
40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
+41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
+426fa4d7RzvcFMqff_M76HrvRQZHSg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h
4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
-40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h
40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h
41137cc1kkvg0cg7uxddcEfjL7L67w linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h
@@ -305,29 +367,66 @@
412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h
40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h
+4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h
-40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h
+424f001delsctIT-_5gdbHsN9VfaQA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h
+424efa21QfpO4QqQf9ADB4U_2zo8dQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
+424efa21riixePBPesLRsVnhFxfEfQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/desc.h
+424efa21iAXuoKIT3-zDni6aryFlPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
+424efa21QCdU7W3An0BM0bboJZ6f4Q linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h
+424efa21S7Ruo0JzTFH1qwezpdtCbw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/floppy.h
+424f001ds3cL9WAgSH5Nja1BAkZfDg linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
+424efa20tMbuEQuxvPjow-wkBx83rA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/io.h
+424efa20meDrUt6I2XWbpuf72e4gEw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h
+424f001d3cpZoX9SZD_zjTapOs-ZIQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h
+424f001eirTAXdX_1gCugGtzSGJUXw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
+424f001eTD7ATy8MC71Lm2rOHHyUCA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_time.h
+424f001ew4jIwfKeZUNa_U54UAaJcw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h
+424f001ePIPWhBJGeTgj-KmiHOYvqw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
+424f001e0S9hTGOoEN8pgheQJ76yqQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h
+424f001eQPBrY1621DbCPKn9wK36ZQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h
+424efa21FvJNdHFfm2w2TOWohNsqDQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
+424efa214neirHds4zbtwaefvG5PYA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/page.h
+424efa21-7jaHj-W-T4E9oM3kqFA7Q linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/param.h
+424efa20I76WtOlPh71MaXtai3-qZA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pci.h
+424efa20Fs7EHhAV6Hz_UtifwEfczg linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pda.h
+424efa20CGx-5HD8ahpdHxPW2KlrtA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
+424efa21YaMjX7hz7eCkVNcNWRK42A linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
+424efa21wPKwwFR1fcqrPD0_o3GKWA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h
+424efa20fTFqmaE0stH6lfB_4yN_lA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
+424efa21fY4IvK0luYgDJHKV-MD3eQ linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/segment.h
+424efa21KcupuJlHgmPiTk_T214FrA linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h
+424efa210ZRt2U_8WmtyI7g74Nz-4Q linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h
+424f001eBp9fMbZ0Mo2kRJQ84gMgRw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/timer.h
+424efa21Xk2acvaHYnpyTCLE6nU6hw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
+424efa21Ey6Q4L4AsXxcEwH3vMDeiw linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/vga.h
+424efa214gNhOfFimFJHq4in24Yp1g linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h
41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.11-xen-sparse/include/asm-xen/balloon.h
40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.11-xen-sparse/include/asm-xen/ctrl_if.h
40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h
419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.11-xen-sparse/include/asm-xen/foreign_page.h
+412dfaeazclyNDM0cpnp60Yo4xulpQ linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.11-xen-sparse/include/asm-xen/linux-public/privcmd.h
3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.11-xen-sparse/include/asm-xen/linux-public/suspend.h
-40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.11-xen-sparse/include/asm-xen/multicall.h
4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.11-xen-sparse/include/asm-xen/queues.h
+42a885cawNQ18_b7i5-G7ekMsZ48hw linux-2.6.11-xen-sparse/include/asm-xen/synch_bitops.h
3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h
419b4e93z2S0gR17XTy8wg09JEwAhg linux-2.6.11-xen-sparse/include/linux/gfp.h
42305f545Vc5SLCUewZ2-n-P9JJhEQ linux-2.6.11-xen-sparse/include/linux/highmem.h
419dfc609zbti8rqL60tL2dHXQ_rvQ linux-2.6.11-xen-sparse/include/linux/irq.h
+428f8747dtEZ4CfC5tb6Loe9h0Ivpg linux-2.6.11-xen-sparse/include/linux/skbuff.h
419dfc6awx7w88wk6cG9P3mPidX6LQ linux-2.6.11-xen-sparse/kernel/irq/manage.c
40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.11-xen-sparse/mkbuildtree
42305f54Q6xJ1bXcQJlCQq1m-e2C8g linux-2.6.11-xen-sparse/mm/highmem.c
412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.11-xen-sparse/mm/memory.c
+426fa4d7ooLYmFcFjJMF_ut4GFVh2Q linux-2.6.11-xen-sparse/mm/mmap.c
410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.11-xen-sparse/mm/page_alloc.c
+428f8747Gp_X2UtgwcL0-YeYkCXxvQ linux-2.6.11-xen-sparse/net/core/dev.c
+428f8747vBdkOrip6rhWK_eEvVc8dA linux-2.6.11-xen-sparse/net/core/skbuff.c
413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen
@@ -367,13 +466,45 @@
422e4430-gOD358H8nGGnNWes08Nng netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c
413cb3b53nyOv1OIeDSsCXhBFDXvJA netbsd-2.0-xen-sparse/sys/nfs/files.nfs
413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.11/agpgart.patch
+427261074Iy1MkbbqIV6zdZDWWx_Jg patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch
42372652KCUP-IOH9RN19YQmGhs4aA patches/linux-2.6.11/iomap.patch
428359d4b3fDYtazwXi4UUmSWaOUew patches/linux-2.6.11/linux-2.6.11.12.patch
-418abc69J3F638vPO9MYoDGeYilxoQ patches/linux-2.6.11/nettel.patch
+4296fb998LGSWCcljGKbOCUv3h9uRQ patches/linux-2.6.11/net-csum.patch
429ae875I9ZrqrRDjGD34IC2kzDREw patches/linux-2.6.11/rcu-nohz.patch
429ba3007184K-y6WHQ6KgY65-lEIQ patches/linux-2.6.11/udp-frag.patch
+424f001e_M1Tnxc52rDrmCLelnDWMQ patches/linux-2.6.11/x86_64-linux.patch
3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk
+4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
+4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
+42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX
+4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
+42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
+42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
+428df8fdkg84W8yveE50EbkbTUZgjQ tools/blktap/block-async.c
+428df8feTrgGFZEBMA_dYijy9DNs1g tools/blktap/block-async.h
+42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
+42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
+42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
+42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c
+42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
+42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
+42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
+428df8fe5RYONloDWVMkM-CfHfB1vA tools/blktap/requests-async.c
+428df8feWeKJ-9HJb5_rFqdm_xqErg tools/blktap/requests-async.h
+42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c
+42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h
+42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
+42277b04tt5QkIvs8She8CQqH5kwpg tools/blktap/vdi.h
+42277b04zMAhB0_946sHQ_H2vwnt0Q tools/blktap/vdi_create.c
+42277b04xB_iUmiSm6nKcy8OV8bckA tools/blktap/vdi_fill.c
+42277b045CJGD_rKH-ZT_-0X4knhWA tools/blktap/vdi_list.c
+42277b043ZKx0NJSbcgptQctQ5rerg tools/blktap/vdi_snap.c
+423f270c_QDjGLQ_YdaOtyBM5n9BDg tools/blktap/vdi_snap_delete.c
+42277b043Fjy5-H7LyBtUPyDlZFo6A tools/blktap/vdi_snap_list.c
+42277b04vhqD6Lq3WmGbaESoAAKdhw tools/blktap/vdi_tree.c
+42277b04RnFo07c1LcdmLn-FtRJEmw tools/blktap/vdi_unittest.c
+42277b047H8fTVyUf75BWAjh6Zpsqg tools/blktap/vdi_validate.c
4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile
4124b307vHLUWbfpemVefmaWDcdfag tools/check/README
4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl
@@ -382,16 +513,44 @@
42642813SYRkwr07qVZ9eCI5QTZANg tools/check/check_iproute
4124b307u-FeKvFP9kZnh0rLV0XjGg tools/check/check_logging
4124b307tRTjLqzRy60QrUoqN2Fhuw tools/check/check_python
-4124b307XdznSNCv97lrT3RpOdMM1A tools/check/check_twisted
4124b307lnAATmulpXYa0M-dzxLBDA tools/check/check_zlib_devel
4124b308ly20ptMKQoiztPyP_X68Mw tools/check/check_zlib_lib
4124b308O8yPHMKbj4YPR_grPGZmdA tools/check/chk
+423d3a7bpoTFd0vqFaocQ-FqC8RuPA tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
+423d3a7b_HtKYGocoTS1adeOpqDFnw tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure
+423d3a7b2vJq86I8FbYm6up5BsCwfA tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in
+423d3a7bQPownmVb63qOoyq89ebBVA tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.srv
+423d3a7bHtqhyOgiRWhjWt-S-6wbYg tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
+4273458dYPghQKVnj_xu5-fC38CcOg tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c
+423d3a7b2ENk2IskDZYZ98pe5NsvIA tools/debugger/gdb/gdb-6.2.1-xen-sparse/mkbuildtree
+423d3a7buANO_q-kgxIRffUu7lMnUw tools/debugger/gdb/gdbbuild
+42a1a777Dt8l7bna7fm1vKmTEX1FCQ tools/debugger/libxendebug/Makefile
+42a0c8d8qbLfvuvDUA0tFB9nHMh-zg tools/debugger/libxendebug/list.h
+42a0c8d98XtmbhyddBgIyyHllz5WTw tools/debugger/libxendebug/xendebug.c
+42a0c8d9ucRxWO41IHTfYI7xYGoKrw tools/debugger/libxendebug/xendebug.h
+42a0c8d9zuGuWoaTux5NW4N3wOw8pg tools/debugger/pdb/Domain.ml
+42a0c8d9pigEXFFtdut3R99jbf73NA tools/debugger/pdb/Domain.mli
+42a0c8d93wnR_hcSAa7VHgn8CSrWEA tools/debugger/pdb/Intel.ml
+42a0c8d95glt-jkgXe8GDOPT6TYN6Q tools/debugger/pdb/Makefile
+42a0c8d9UueJDF0IRX3OozEvUhSTmw tools/debugger/pdb/OCamlMakefile
+42a0c8d9PgBvaWPzTHSFb9ngii7c7w tools/debugger/pdb/PDB.ml
+42a0c8danHHGiNywdeer6j4jzxAc2A tools/debugger/pdb/Process.ml
+42a0c8dav_08OtySI4kYP1lahlVrpQ tools/debugger/pdb/Process.mli
+42a0c8da51EqubQT5PJ4sxCKLF3xSw tools/debugger/pdb/Util.ml
+42a0c8daxftpiXuvLmc9fOOEhdFWiQ tools/debugger/pdb/debugger.ml
+42a0c8da81tzhpvIAfkx9nZqUNrQvg tools/debugger/pdb/evtchn.ml
+42a0c8dasiso9c-2sCvHBzP6YVjATA tools/debugger/pdb/evtchn.mli
+42a0c8daXD_6Y62A_u5-PO_Klrhi0w tools/debugger/pdb/pdb_caml_xc.c
+42a0c8danJXun9ay5SPBhhkKvuUPfg tools/debugger/pdb/pdb_xen.c
+42a0c8dbjK6Du89D2SUcxsuAdlUu3w tools/debugger/pdb/server.ml
401d7e160vaxMBAUSLSicuZ7AQjJ3w tools/examples/Makefile
401d7e16UgeqroJQTIhwkrDVkoWgZQ tools/examples/README
41597996VhTbNuHbuscYSfRb-WR6fA tools/examples/block-enbd
41597996GHP2_yVih2UspXh328fgMQ tools/examples/block-file
+41dde8af16Hulg1pgW8aOnbbxyrl7w tools/examples/bochsrc
405ff55dawQyCHFEnJ067ChPRoXBBA tools/examples/init.d/xend
40278d94cIUWl2eRgnwZtr4hTyWT1Q tools/examples/init.d/xendomains
+41dde8afTUuvdtFUlOx0ZRusKxyd8w tools/examples/mem-map.sxp
40ee75a9xFz6S05sDKu-JCLqyVTkDA tools/examples/network
41fc0c18hVgK5rKJyZUsqybux9D9Dg tools/examples/network-nat
41e661e1giIEKbJ25qfiP-ke8u8hFA tools/examples/network-route
@@ -401,77 +560,242 @@
423ab2eaNCzxk3c-9yU1BwzxWvsDCQ tools/examples/vnc/Xservers
423ab2ea7ajZLdZOI-8Z-bpNdNhhAQ tools/examples/vnc/Xvnc-xen
40ee75a93cqxHp6MiYXxxwR5j2_8QQ tools/examples/xend-config.sxp
+41dde8af6M2Pm1Rrv_f5jEFC_BIOIA tools/examples/xmexample.vmx
41090ec8Pj_bkgCBpg2W7WfmNkumEA tools/examples/xmexample1
40cf2937oKlROYOJTN8GWwWM5AmjBg tools/examples/xmexample2
41fc0c18_k4iL81hu4pMIWQu9dKpKA tools/examples/xmexample3
+42a6b4b7KssGzTDVN-XG2FM1gCEnnw tools/firmware/Makefile
+42a6b4b7qP95OSsEL8XWKKZ1p1myjQ tools/firmware/README
+42a6b4b78PWdYzKYvLt_EHhvQCl9ig tools/firmware/rombios/Makefile
+42a6b4b75sz5KF9Lry2EGnPMhOdnUA tools/firmware/rombios/apmbios.S
+42a6b4b7YwP9rl3AJRTmZbBoal_c6Q tools/firmware/rombios/biossums.c
+42a6b4b83gANosDYd43YaK7ATQvBEg tools/firmware/rombios/makesym.perl
+42a6b4b8qcIQIBXDeOY3JRwsLM6lhw tools/firmware/rombios/rombios.c
+42a6b4b8K7yqnU3-QxndYNZUgHpniw tools/firmware/rombios/rombios.diffs
+42a6b4b86GMM969Y82nK3HuUi6eP9g tools/firmware/vgabios/BUGS
+42a6b4b8J_MHMVmmF_igI7zeDxSiwA tools/firmware/vgabios/COPYING
+42a6b4b8SYW5q21pPPuQt88Bkpqc2Q tools/firmware/vgabios/ChangeLog
+42a6b4b8INe7qe20YYlwATaAADEMQA tools/firmware/vgabios/Makefile
+42a6b4b8AYFCsoAeqqQ8dibmgxkfLA tools/firmware/vgabios/Notes
+42a6b4b8NUXHh1hudvvNCuqgo9cB-Q tools/firmware/vgabios/README
+42a6b4b8MM0Pj6uDwdJ4Eyg6hB-oEA tools/firmware/vgabios/TODO
+42a6b4b8AL0YrgudjmQr7QvJ3we1Cg tools/firmware/vgabios/biossums.c
+42a6b4b8Zce-r8OtpctwvqHBS8cHEw tools/firmware/vgabios/clext.c
+42a6b4b8fIyMd0d8tIPV4JDAvB5l1A tools/firmware/vgabios/dataseghack
+42a6b4b8M4BsNDRAJMHpY8H2iRu0qA tools/firmware/vgabios/vbe.c
+42a6b4b8Z2pSU4e5qrUR5r1vEKNbKQ tools/firmware/vgabios/vbe.h
+42a6b4b8EyiklW2C9eD9_t0OmRfmFQ tools/firmware/vgabios/vbe_display_api.txt
+42a6b4b8oXcw5CgLj-mBVT4dUc-Umw tools/firmware/vgabios/vbetables.h
+42a6b4b85jkZnCar41YreYVUAY7IDQ tools/firmware/vgabios/vgabios.c
+42a6b4b8xxpRYh1BesaSgW3gpgMsaQ tools/firmware/vgabios/vgabios.h
+42a6b4b8WSA5xHF-R5F8iBcB6BC5wA tools/firmware/vgabios/vgafonts.h
+42a6b4b9C66bPuUTaLjCnJ0I-kGz9w tools/firmware/vgabios/vgatables.h
+42a6b4b969QLJRt3TU_v3yYhZI45Gg tools/firmware/vmxassist/Makefile
+42a6b4b95iuk7M2s-edoSFrWcdoYcw tools/firmware/vmxassist/TODO
+42a6b4b9Q6VB27GxRNCARsDN2ZuKNw tools/firmware/vmxassist/gen.c
+42a6b4b9NmLjb36-sXiiWzcGHjTOJA tools/firmware/vmxassist/head.S
+42a6b4b9jmF9m22iiwu8XwEm1j5fnQ tools/firmware/vmxassist/machine.h
+42a6b4b9ABmGHA1LzYjpq63FBs4hcw tools/firmware/vmxassist/mkhex
+42a6b4b9xmj4TLHJtV-DhnwT9mMpfw tools/firmware/vmxassist/setup.c
+42a6b4b9PjgANTP8Y8JFTToBrV9ssg tools/firmware/vmxassist/trap.S
+42a6b4b9GlymU0VmQyan23pagDaRTQ tools/firmware/vmxassist/util.c
+42a6b4b9mmqUyFn487gP4spU_R6xtg tools/firmware/vmxassist/util.h
+42a6b4b9JssxvlpcV_-QcGRMDGgL_w tools/firmware/vmxassist/vm86.c
+42a6b4b92oUAJMzCE-YcVlA2Z-2zyg tools/firmware/vmxassist/vm86.h
+42a6b4b9TlkVUYTkLd_Bvq9vlrEx6g tools/firmware/vmxassist/vmxassist.ld
+42a6b4b92L-2zFg-Qal6YweeE-pMiA tools/firmware/vmxassist/vmxloader.c
+428d0d82yOaUzYQuYQxH7VzQytKo-g tools/ioemu/COPYING
+428d0d82EdPp1TqJBembLgyB1y413w tools/ioemu/COPYING.LIB
+428d0d82fd6-QydvFfHmeQBGrKnrrA tools/ioemu/Changelog
+428d0d82xvTj4yzPYiurazyGj1PaEw tools/ioemu/Makefile
+428d0d82HvgRPoyU3f60_u_t1L28Ag tools/ioemu/README
+428d0d82aoWewa_6Z5kNUTgkRw0wNg tools/ioemu/TODO
+428d0d82WYi8vrG7RKKyIJw01DAnGg tools/ioemu/VERSION
+428d0d82wB05ibBxTCSsAhz3qRO7Gg tools/ioemu/block-cloop.c
+428d0d82cucBBZFks3aMSL0-C3L9Nw tools/ioemu/block-cow.c
+428d0d82s5FM7xmnj1XLAMlt_DdRIA tools/ioemu/block-qcow.c
+428d0d83yWYa6mIH2mplo1L_3Cqadw tools/ioemu/block-vmdk.c
+428d0d83nfcgHvu37hviRYwAAAAxSQ tools/ioemu/block.c
+428d0d83LrXLfgm9h2RPNBRM_vkqsA tools/ioemu/block_int.h
+428d0d83zt7CgVsTa-CIorpIGVWe7g tools/ioemu/bswap.h
+428d0d83-I9bQJ8EduVO0OmP_YMtVg tools/ioemu/configure
+428d0d83sUjdDRZnfykBaWd_uGjVQQ tools/ioemu/console.c
+428d0d83Rsv-Pq8iGrvA0ChVTD-KEQ tools/ioemu/cpu-all.h
+428d0d830tCm2-QC3iLTo-yS2D7azQ tools/ioemu/cpu-defs.h
+428d0d83bOFEAX7Kc_lt7pm_ItnYOg tools/ioemu/cpu.h
+428d0d83wJqNCht75GfVfWqGzaDBGA tools/ioemu/create_keysym_header.sh
+428d0d83warJp9F3aKU4moRRVfTmFg tools/ioemu/exec-all.h
+428d0d83m3Kwp8vJKycK1n5a_LygfA tools/ioemu/exec.c
+428d0d83G-F1mvFyzCEMNhiU6ts8lQ tools/ioemu/hw/adb.c
+428d0d83EE1hpyfMfr667s4aFK42hg tools/ioemu/hw/adlib.c
+428d0d83AoBht7yFAmAUWoi-ZZS2Tw tools/ioemu/hw/cirrus_vga.c
+428d0d83lD5ovmJG_Q1VfIIjw1Fm-A tools/ioemu/hw/cirrus_vga_rop.h
+428d0d83SCwX65BPgonBcgYCxdKDNA tools/ioemu/hw/cirrus_vga_rop2.h
+428d0d83zAKLZ8JX7_D6RMGcml3jRA tools/ioemu/hw/cuda.c
+428d0d83OLV-aQor-LfByakKvo-1-g tools/ioemu/hw/dma.c
+428d0d83P1VkKtXn90RMN8eBsvPFQA tools/ioemu/hw/fdc.c
+428d0d849AqxX6FsPHv0ovjaFyNMVg tools/ioemu/hw/fmopl.c
+428d0d84-hHRu7PVXjfc7oLfrDxY6g tools/ioemu/hw/fmopl.h
+428d0d84zbtT2C8Xci_SqMP5bZ-wcQ tools/ioemu/hw/i8254.c
+428d0d84KlR61OwSzjF0-L4iz58dfQ tools/ioemu/hw/i8259.c
+428d0d84auhZx6c5Kv3WrfM2UZvqHA tools/ioemu/hw/ide.c
+428d0d84WSlhNzdrcb-f-Lg-W9dniQ tools/ioemu/hw/iommu.c
+428d0d84ri8ZtvhB6RJr1YNejjNWIQ tools/ioemu/hw/lance.c
+428d0d84cxFFgDv5fBFrlxGoCiy6Nw tools/ioemu/hw/m48t08.c
+428d0d84MQYDhAOLnBnag1BZWcW6JA tools/ioemu/hw/m48t08.h
+428d0d84sE4ghX33RQ5kDSuyoLdhFg tools/ioemu/hw/m48t59.c
+428d0d8465kZWTT4mVgf-VonglDOxw tools/ioemu/hw/m48t59.h
+428d0d84OY7tvE-PKrBfjf2vEQXyMA tools/ioemu/hw/magic-load.c
+428d0d84U-PYPR_GMVJoQsbCAVAQow tools/ioemu/hw/mc146818rtc.c
+428d0d84jtSXGjQYKd_xvSiMM4C_7Q tools/ioemu/hw/ne2000.c
+428d0d84SMHPk0cRnrZgUYkMxFXMMQ tools/ioemu/hw/openpic.c
+428d0d84lyG0XDg5MxLMSee3MWgq3g tools/ioemu/hw/pc.c
+428d0d84HWR3Q7dEESycfJ7hSWdGig tools/ioemu/hw/pci.c
+428d0d84Noyn4ik0UX1E7OdfuFdrIw tools/ioemu/hw/pckbd.c
+4294307e0KIA9jaU_1OMIGCcNeLdeQ tools/ioemu/hw/port-e9.c
+428d0d840SMURRjsz9V96rwt-naynw tools/ioemu/hw/ppc.c
+428d0d84MI7kZftH_c0FK1qiiyQBZg tools/ioemu/hw/ppc_chrp.c
+428d0d859-xwA89jmzFk6x9UyXjAeA tools/ioemu/hw/ppc_prep.c
+428d0d85YS1n4Fr_EK7B01EWSmrYRg tools/ioemu/hw/sb16.c
+428d0d85GrUXL_p0ppOUIfWf8--hvw tools/ioemu/hw/sched.c
+428d0d85wP3aLdHYJ-hDAImDP2sj_g tools/ioemu/hw/serial.c
+428d0d85mOfwFqDCO76K6bc4IQOxQA tools/ioemu/hw/sun4m.c
+428d0d852OCpAsfS1PNoJOfnHhFPSQ tools/ioemu/hw/tcx.c
+428d0d85gCUCX0nbuRAt28QJgQ5P8w tools/ioemu/hw/timer.c
+428d0d85hp-zgN40hVYXWRjhInkUkg tools/ioemu/hw/vga.c
+428d0d85G_4S-hpRyrhV4yGjSrS-cQ tools/ioemu/hw/vga_int.h
+428d0d85oWl1ONX_gIZWS1fXjeXGlA tools/ioemu/hw/vga_template.h
+428d0d85_mNnFPE8hnoC3VvBD9CCuA tools/ioemu/keyboard_rdesktop.c
+428d0d85SyOIeDg3SoxH2BiBpXWWkA tools/ioemu/keymaps/ar
+428d0d85ToGTVvPrl8hKAi2QxCzp2w tools/ioemu/keymaps/common
+428d0d85fmdxRplWI5Jp54NNZy5Mmw tools/ioemu/keymaps/convert-map
+428d0d85t5IBwlnttPreCS0UX3nbOw tools/ioemu/keymaps/da
+428d0d85XRNojuUlkCgvea0I_fdJEg tools/ioemu/keymaps/de
+428d0d85QPup3ixECEpa7Pzr9lLEyw tools/ioemu/keymaps/de-ch
+428d0d86r5UpNhOSALGJUUDaGv-vnQ tools/ioemu/keymaps/en-gb
+428d0d86ylUT-4Skjnwa27vxIeBqYw tools/ioemu/keymaps/en-us
+428d0d86vcHusn3XzWTLjKLDdNhZxw tools/ioemu/keymaps/es
+428d0d86UVS0Km-9J94RAQM7iAbBzw tools/ioemu/keymaps/et
+428d0d86hS47OlX4USgPPWk6RFWKLQ tools/ioemu/keymaps/fi
+428d0d86kOcjaVVZqDgV2JDGcXQ8rg tools/ioemu/keymaps/fo
+428d0d86c4GgMp1hDU2MFxiZ1Pz9Lg tools/ioemu/keymaps/fr
+428d0d86BdbSM5PxuMaSf8vBv6rXQg tools/ioemu/keymaps/fr-be
+428d0d86dQk_p9io2QdI9SGC6FVidg tools/ioemu/keymaps/fr-ca
+428d0d86JpfLBZmnrv7Yp0tuezgzng tools/ioemu/keymaps/fr-ch
+428d0d861RURctgJ3cgtnq0chW6JOA tools/ioemu/keymaps/hr
+428d0d86mqzqw70FkLHZFzIkvTJBpw tools/ioemu/keymaps/hu
+428d0d86O3ruSBL8ZyRBeLF7Ow67Og tools/ioemu/keymaps/is
+428d0d87pcCatuZLYpVWtUu2Da9sgw tools/ioemu/keymaps/it
+428d0d87M3Hy7ubCu27ZO-zWDk-YhQ tools/ioemu/keymaps/ja
+428d0d87CqrbJBUI28UxJCIduSJ4rQ tools/ioemu/keymaps/lt
+428d0d87jIV_V1YwET59i-Py3h0ILA tools/ioemu/keymaps/lv
+428d0d87T3KIxrywXSAkRu-AiQQgIQ tools/ioemu/keymaps/mk
+428d0d87_wmWi_IBHfpmZzhCKU-Baw tools/ioemu/keymaps/modifiers
+428d0d87GgUuEd4Mz9p3mUGkdMdOsg tools/ioemu/keymaps/nl
+428d0d87E1NtUwguKl72ifCTjDQ5rQ tools/ioemu/keymaps/nl-be
+428d0d87lKhQOfn5yQ0tq3u7hfIgpw tools/ioemu/keymaps/no
+428d0d87iD3aff-LOlaA4CmOUVct3Q tools/ioemu/keymaps/pl
+428d0d870CMCzI7c6gcGZMNuIYGbnQ tools/ioemu/keymaps/pt
+428d0d87gCs2M4A4P1ITzW86lm_-JA tools/ioemu/keymaps/pt-br
+428d0d87nzQ8eK1b9_Zs1Z82dOuX1Q tools/ioemu/keymaps/ru
+428d0d87uHdsh15a5mAD-HyWni8QDw tools/ioemu/keymaps/sl
+428d0d87gsUMIP42oFecYrdZAJDAuw tools/ioemu/keymaps/sv
+428d0d87OcfLjKuhg6p2uuiAPvJBqQ tools/ioemu/keymaps/th
+428d0d87QbRtHJUft9qBkNXcl4pbzw tools/ioemu/keymaps/tr
+428d0d88CJoMejkmBh6pWaqKMvQF8A tools/ioemu/main.c
+428d0d88Fcan7gQZ6axXOmokBDLe7g tools/ioemu/monitor.c
+428d0d88lVaOC64YBZ1Wzt-WV4JaSw tools/ioemu/osdep.c
+428d0d885W7r27CDEJCW6Jlbxggc9g tools/ioemu/osdep.h
+428d0d88CiP9tVdIdLWAzOnCOSdafg tools/ioemu/path.c
+428d0d8908B65zMmhdGVME3jv7gpww tools/ioemu/qemu-binfmt-conf.sh
+428d0d89taY6NPlnIyOAMQd_Ww5qUw tools/ioemu/qemu-img.c
+428d0d89FY-g4UPH-ZW7t5ZCqvQVTQ tools/ioemu/readline.c
+428d0d89dLURbktZFufDKSHan01GFg tools/ioemu/sdl.c
+428d0d82dUmXkgIy11G-hoKTkhvkfQ tools/ioemu/target-i386-dm/Makefile
+428d0d8atdIE_8ACJPPii5_asQNafw tools/ioemu/target-i386-dm/device-model
+428d0d8ahpRAYl6s_itBxnTcxyMHaQ tools/ioemu/target-i386-dm/helper2.c
+428d0d8aU3Moaq4zNW5QMV_NxD-4XA tools/ioemu/target-i386-dm/qemu-ifup
+428d0d8aqidj8n5H2_2qhBV0mIIJzA tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz
+428d0d8bMq0ZpccpHb1iVvSNbJjRxg tools/ioemu/thunk.c
+428d0d8b2PYfwKLLShlnWcM3VWq9ag tools/ioemu/thunk.h
+428d0d8bfvbYQwj6MgDr958m4_SfRA tools/ioemu/vgafont.h
+428d0d8bgAojEQcAcTV2gj2E_eG4Lw tools/ioemu/vl.c
+428d0d8bXiCY4iTjoSPxGry8jXdAtg tools/ioemu/vl.h
+428d0d8bQVKedvN5EIPm39s33TXkpA tools/ioemu/vnc.c
+428d0d85d831iQvvCD3LcaOD9rYGkg tools/ioemu/x86_32.ld
+428f0763_67jCiHbdgfGlgAOJqfg9A tools/ioemu/x86_64.ld
3fbba6dbDfYvJSsw9500b4SZyUhxjQ tools/libxc/Makefile
+41dde8afKYRKxS4XtLv1KUegGQy_bg tools/libxc/linux_boot_params.h
41cc934abX-QLXJXW_clV_wRjM0zYg tools/libxc/plan9a.out.h
3fbba6dc1uU7U3IFeF6A-XEOYF2MkQ tools/libxc/rpm.spec
3fbba6dcrNxtygEcgJYAJJ1gCQqfsA tools/libxc/xc.h
-40589968oCfoUlXd460CjVAkBE8IBA tools/libxc/xc_atropos.c
3fbba6dbEVkVMX0JuDFzap9jeaucGA tools/libxc/xc_bvtsched.c
+4273458dyF2_sKA6CFkNJQYb8eY2dA tools/libxc/xc_core.c
3fbba6dbasJQV-MVElDC0DGSHMiL5w tools/libxc/xc_domain.c
40278d99BLsfUv3qxv0I8C1sClZ0ow tools/libxc/xc_elf.h
403e0977Bjsm_e82pwvl9VvaJxh8Gg tools/libxc/xc_evtchn.c
-40e03333Eegw8czSWvHsbKxrRZJjRA tools/libxc/xc_io.c
-40e03333vrWGbLAhyJjXlqCHaJt7eA tools/libxc/xc_io.h
+4227c129ZKjJPNYooHVzBCyinf7Y6Q tools/libxc/xc_gnttab.c
3fbba6dbNCU7U6nsMYiXzKkp3ztaJg tools/libxc/xc_linux_build.c
3fbba6dbl267zZOAVHYLOdLCdhcZMw tools/libxc/xc_linux_restore.c
3fbba6db7li3FJiABYtCmuGxOJxEGw tools/libxc/xc_linux_save.c
+42a40bc3vE3p9fPSJZQZK0MdQF9B8g tools/libxc/xc_load_bin.c
+42a40bc4diWfFsPGf0RW7qXMufU4YQ tools/libxc/xc_load_elf.c
3fbba6db7WnnJr0KFrIFrqNlSKvFYg tools/libxc/xc_misc.c
4051bce6CHAsYh8P5t2OHDtRWOP9og tools/libxc/xc_physdev.c
41cc934aO1m6NxEh_8eDr9bJIMoLFA tools/libxc/xc_plan9_build.c
3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/libxc/xc_private.c
3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
-40589968UQFnJeOMn8UIFLbXBuwXjw tools/libxc/xc_rrobin.c
-40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
-40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c
-40e03332KYz7o1bn2MG_KPbBlyoIMA tools/libxutil/allocate.h
-41a216cav5JJbtDQnusfuMa_1x_Xpw tools/libxutil/debug.h
-40e9808eyjiahG5uF6AMelNVujBzCg tools/libxutil/enum.c
-40e9808eZpbdn9q2KSSMGCNvY_ZgpQ tools/libxutil/enum.h
-40e03332p5Dc_owJQRuN72ymJZddFQ tools/libxutil/file_stream.c
-40e03332jWfB2viAhLSkq1WK0r_iDQ tools/libxutil/file_stream.h
-40e03332rUjNMGg11n2rN6V4DCrvOg tools/libxutil/gzip_stream.c
-40e033321O5Qg22haLoq5lpmk4tooQ tools/libxutil/gzip_stream.h
-40e9808easXCzzAZQodEfKAhgUXSPA tools/libxutil/hash_table.c
-40e9808e94BNXIVVKBFHC3rnkvwtJg tools/libxutil/hash_table.h
-40e03332ihnBGzHykVwZnFmkAppb4g tools/libxutil/iostream.c
-40e03332UGwbLR4wsw4ft14p0Yw5pg tools/libxutil/iostream.h
-40e0333245DLDzJemeSVBLuutHtzEQ tools/libxutil/kernel_stream.c
-40e03332aK0GkgpDdc-PVTkWKTeOBg tools/libxutil/kernel_stream.h
-40e9808epW9iHcLXuO3QfUfLzB7onw tools/libxutil/lexis.c
-40e9808egccMhCizayQRGtpBA3L5MQ tools/libxutil/lexis.h
-41a216caM4z39Fzjb91rv9Ed_4By1A tools/libxutil/socket_stream.c
-41a216caqinvF1I5FQMHA4HTRz8MSA tools/libxutil/socket_stream.h
-40e03332KT_tnnoAMbPVAZBB7kSOAQ tools/libxutil/string_stream.c
-40e03332-VtK6_OZa1vMHXFil8uq6w tools/libxutil/string_stream.h
-40e9808e5_PLdodqVOSx0b4T_f5aeg tools/libxutil/sxpr.c
-40e9808e0O4sHZtkDv5hlSqjYcdQAQ tools/libxutil/sxpr.h
-40ec1cc6SIiGbynOi-1NtPesOlzF-Q tools/libxutil/sxpr_parser.c
-40ec1cc6wpvvGxZiq4EFvNOcw0tUFg tools/libxutil/sxpr_parser.h
-40e03332Rkvq6nn_UNjzAAK_Tk9v1g tools/libxutil/sys_net.c
-40e03332lQHvQHw4Rh7VsT1_sui29A tools/libxutil/sys_net.h
-40e033321smklZd7bDSdWvQCeIshtg tools/libxutil/sys_string.c
-40e03332h5V611rRWURRLqb1Ekatxg tools/libxutil/sys_string.h
-41a216cayFe2FQroFuzvNPw1AvNiqQ tools/libxutil/util.c
-41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/libxutil/util.h
+42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c
+4273458duzL--nsTfT6e_q6Kfij48g tools/libxc/xc_ptrace_core.c
+41ebbfe9U0b0kI-HgjK7VEY4EvW7_w tools/libxc/xc_sedf.c
+41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile
+4225f56d7sa9aEARfjNeCVTMYDAmZA tools/misc/cpuperf/Makefile
+4225f56dS5TGdKojmuBnrV3PzbE6Rg tools/misc/cpuperf/README.txt
+4225f56dcodvBSPoWYS6kvwZCQhgzg tools/misc/cpuperf/cpuperf.c
+4225f56dMjZK14EWd8K0gq4v5Diwjg tools/misc/cpuperf/cpuperf_perfcntr.h
+4225f56d_XjSY1297IiH96qeqD4sCA tools/misc/cpuperf/cpuperf_xeno.h
+4225f56dqlGC_UZ681F95mCgLbOeHQ tools/misc/cpuperf/module/Makefile
+4225f56dnmms-VFr1MiDVG_dYoM7IQ tools/misc/cpuperf/module/perfcntr.c
+4225f56dYhIGQRD_kKVJ6xQrkqO0YQ tools/misc/cpuperf/p4perf.h
40ab2cfawIw8tsYo0dQKtp83h4qfTQ tools/misc/fakei386xen
+4249273cDOw6_uLUPvvUwWU1ZrJxnQ tools/misc/mbootpack/GPL
+4249273cSmj2h8Fj3UpTg0g-k6CLsA tools/misc/mbootpack/Makefile
+4249273c8gKIttF1QPiczvGo5AEOeA tools/misc/mbootpack/README
+4249273c4N4PAkvt3trNlto4h76k8A tools/misc/mbootpack/bin2c.c
+4249273cISg5nhW1Pt7OJ0jFu343ig tools/misc/mbootpack/bootsect.S
+4249273cUiz8CgLqnG7XYFa8x5-MoQ tools/misc/mbootpack/buildimage.c
+4249273c_gZ2yI_h-ci66E1Y5oSEPA tools/misc/mbootpack/mb_header.h
+4249273cWnlW0-lOIYua1bkKirn6vA tools/misc/mbootpack/mb_info.h
+4249273cA8LI3IMaSuhLOjykuMeQJA tools/misc/mbootpack/mbootpack.c
+4249273cVTgyv2HYd-mC29IDaz0-mg tools/misc/mbootpack/mbootpack.h
+4249273cLXQbRWFp_v-FqcyOm0sYtg tools/misc/mbootpack/setup.S
3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
3f6dc142IHaf6XIcAYGmhV9nNSIHFQ tools/misc/miniterm/miniterm.c
40c9c469kT0H9COWzA4XzPBjWK0WsA tools/misc/netfix
4022a73cEKvrYe_DVZW2JlAxobg9wg tools/misc/nsplitd/Makefile
4022a73cKms4Oq030x2JBzUB426lAQ tools/misc/nsplitd/nsplitd.c
+42308df9dv_ZuP49nNPIROEMQ3F_LA tools/misc/xc_shadow.c
3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone
3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
405eedf6_nnNhFQ1I85lhCkLK6jFGA tools/misc/xencons
40c9c4697z76HDfkCLdMhmaEwzFoNQ tools/misc/xend
41adc641dV-0cDLSyzMs5BT8nL7v3Q tools/misc/xenperf.c
-4107986eMWVdBoz4tXYoOscpN_BCYg tools/misc/xensv
4056f5155QYZdsk-1fLdjsZPFTnlhg tools/misc/xensymoops
40cf2937dqM1jWW87O5OoOYND8leuA tools/misc/xm
+4270cc81g3nSNYCZ1ryCMDEbLtMtbQ tools/pygrub/Makefile
+4270deeccyRsJn6jLnRh9odRtMW9SA tools/pygrub/README
+4270cc81EIl7NyaS3Av6IPRk2c2a6Q tools/pygrub/setup.py
+4270cc81t7eNCDp4Bhbh58p1CNxaCQ tools/pygrub/src/GrubConf.py
+4270d6c2fWF4r5-zF1pSuAFwUZS0aA tools/pygrub/src/__init__.py
+4270cc81CzKMiujDPWcaYhu709vGXw tools/pygrub/src/fsys/__init__.py
+4270cc81RTIiq9si0dI4YRTRE4KRMw tools/pygrub/src/fsys/ext2/__init__.py
+4270cc81YCYa4pexivBD2NdLE2F_Pg tools/pygrub/src/fsys/ext2/ext2module.c
+4270cc81o4BL5e8Cs87aSi8EXA5NtQ tools/pygrub/src/fsys/ext2/test.py
+4294fab3_A8gB1E3T-8fDt0x0eGRqw tools/pygrub/src/fsys/reiser/__init__.py
+4294fab3On_kRmhm1lwm4SDteFP_7Q tools/pygrub/src/fsys/reiser/reisermodule.c
+4270cc81TS6L_tEO6wSp5wcURcpldQ tools/pygrub/src/pygrub
40c9c468icGyC5RAF1bRKsCXPDCvsA tools/python/Makefile
40ffc44dOwe1CcYXGCkYHdG_NxcccA tools/python/logging/logging-0.4.9.2/PKG-INFO
40ffc44dpqpgqgrnLfR70PsiBc3liA tools/python/logging/logging-0.4.9.2/README.txt
@@ -528,36 +852,38 @@
40dc4076hGpwa8-sWRN0jtXZeQJuKg tools/python/xen/__init__.py
40dfd40aMOhnw_cQLve9462UR5yYxQ tools/python/xen/lowlevel/__init__.py
3fbd0a42l40lM0IICw2jXbQBVZSdZg tools/python/xen/lowlevel/xc/xc.c
+42a59f20JpCmm9DsCoVZowGafnhBuw tools/python/xen/lowlevel/xs/xs.c
40dc4076St6AmPTmQPrtQ6LGHPxGmw tools/python/xen/lowlevel/xu/__init__.py
40dc4076CwBYRTUQDdbdU1L6KcLgSw tools/python/xen/lowlevel/xu/xu.c
-41052eb84_irpx0E9N_kqBp9eoin5g tools/python/xen/sv/CreateDomain.py
-4107986egkTAMIHW7n-i4ShvCGWpLQ tools/python/xen/sv/Daemon.py
-40fcefb2qm13BbRZBydAatOavaS0fQ tools/python/xen/sv/DomInfo.py
-40fcefb2-RIU8GB67mJMRzybME9bxw tools/python/xen/sv/DomList.py
-40fcefb23FfQn-ZBCbcHqA0cPGqQxw tools/python/xen/sv/GenTabbed.py
-40fcefb2QZAn3u3sX-M7NXBjOv5HGg tools/python/xen/sv/HTMLBase.py
-40fcefb2vnfDbl4w_yCTedROPuqs0g tools/python/xen/sv/Main.py
-4186e24fZMp7_bX4f50MvUscdrST9Q tools/python/xen/sv/MigrateDomain.py
-40fcefb24h-04WaHag-Tg4nxWPhTig tools/python/xen/sv/NodeInfo.py
-4186e24fb4YtJw155tNtSXXN6nEWqA tools/python/xen/sv/RestoreDomain.py
-4186e24fVXt0lfeQSAy1eiFKnPCHTg tools/python/xen/sv/SaveDomain.py
-40fcefb2Sif__6AqrANeBQZZfvP-6w tools/python/xen/sv/TabView.py
-41052eb8UrgtUkuJPg7oY1tutVQHsg tools/python/xen/sv/Wizard.py
-40fcefb2DqteqCCZYDCvvh4Q5jBd0w tools/python/xen/sv/__init__.py
-4107986e6qN1IdvIDdId0AYFmDMkiQ tools/python/xen/sv/params.py
-40fcefb4rnaZNjqsBu7A5V2rlLyqRw tools/python/xen/sv/util.py
40d8915cyoVA0hJxiBFNymL7YvDaRg tools/python/xen/util/Brctl.py
40dfd40aGqGkiopOOgJxSF4iCbHM0Q tools/python/xen/util/__init__.py
+4270e4efFg3wHCCxXpA0h6yoMTkeSQ tools/python/xen/util/blkif.py
4055ee4dwy4l0MghZosxoiu6zmhc9Q tools/python/xen/util/console_client.py
40c9c468IienauFHQ_xJIcqnPJ8giQ tools/python/xen/util/ip.py
+42a4a80aiq_AT5whiSw-fKhNhRKITw tools/python/xen/util/mac.py
+41dde8b0yuJX-S79w4xJKxBQ-Mhp1A tools/python/xen/util/memmap.py
+4288c6fcB1kUAqX0gzU85GGxmamS4Q tools/python/xen/util/process.py
4059c6a0pnxhG8hwSOivXybbGOwuXw tools/python/xen/util/tempfile.py
+4292565fDy2PaatawinIckKB0cKusg tools/python/xen/util/xpopen.py
+4267a9b16u4IEPhjRryesk6A17sobA tools/python/xen/web/SrvBase.py
+4267a9b1FfCUjW7m9anLERcx9lwhJg tools/python/xen/web/SrvDir.py
+4267a9b1uMXIfzB6-81ZLqMCyTgJmw tools/python/xen/web/__init__.py
+4267a9b1i_zVq36tt2iQejVuR6DGFw tools/python/xen/web/connection.py
+4267a9b1KzSWZwWKYrGRc9bUhow_7Q tools/python/xen/web/http.py
+4267a9b1KWNZhhmZnySe_nLASwO47g tools/python/xen/web/httpserver.py
+4267a9b21miObgEJLAgtLTAKRBK8uQ tools/python/xen/web/protocol.py
+4267a9b2pA22-lF37dB7XfapMNroGw tools/python/xen/web/reactor.py
+4267a9b2AbH-azu7SXIUETXC39tu-A tools/python/xen/web/resource.py
+4267a9b21XhDCpkVXtgea3ko8uS16g tools/python/xen/web/static.py
+4267a9b2q7UA0cU5-KATCWX6O-TKsA tools/python/xen/web/tcp.py
+4267a9b2XqvzKDWxfAdV22c3mO6NHA tools/python/xen/web/unix.py
40c9c468SNuObE_YWARyS0hzTPSzKg tools/python/xen/xend/Args.py
41597996WNvJA-DVCBmc0xU9w_XmoA tools/python/xen/xend/Blkctl.py
40c9c468Um_qc66OQeLEceIz1pgD5g tools/python/xen/xend/EventServer.py
-40c9c468U8EVl0d3G--8YXVg6VJD3g tools/python/xen/xend/EventTypes.py
40c9c468QJTEuk9g4qHxGpmIi70PEQ tools/python/xen/xend/PrettyPrint.py
40e15b7eeQxWE_hUPB2YTgM9fsZ1PQ tools/python/xen/xend/Vifctl.py
-4151594bBq8h-bwTfEt8dbBuojMtcA tools/python/xen/xend/XendAsynchProtocol.py
+4270cc81xbweGYhsM4326N3dX1bGHQ tools/python/xen/xend/XendBootloader.py
+42944ee8FQaAdZMF56O_WkWyBdCalA tools/python/xen/xend/XendCheckpoint.py
40c9c4688m3eqnC8fhLu1APm36VOVA tools/python/xen/xend/XendClient.py
40c9c468t6iIKTjwuYoe-UMCikDcOQ tools/python/xen/xend/XendConsole.py
40c9c468WnXs6eOUSff23IIGI4kMfQ tools/python/xen/xend/XendDB.py
@@ -566,23 +892,20 @@
40c9c4685ykq87_n1kVUbMr9flx9fg tools/python/xen/xend/XendDomainInfo.py
40f50d99YiiaMI1fZBh1VCDFLD57qg tools/python/xen/xend/XendError.py
40ffc44eGsgTEY355E3nN4mPLZHhMQ tools/python/xen/xend/XendLogging.py
-40c9c46854nsHmuxHQHncKk5rAs5NA tools/python/xen/xend/XendMigrate.py
40c9c468M96gA1EYDvNa5w5kQNYLFA tools/python/xen/xend/XendNode.py
4151594bhib4aUerB2SMKDl-iCtc4Q tools/python/xen/xend/XendProtocol.py
40c9c4686jruMyZIqiaZRMiMoqMJtg tools/python/xen/xend/XendRoot.py
40c9c468xzANp6o2D_MeCYwNmOIUsQ tools/python/xen/xend/XendVnet.py
40c9c468x191zetrVlMnExfsQWHxIQ tools/python/xen/xend/__init__.py
40c9c468S2YnCEKmk4ey8XQIST7INg tools/python/xen/xend/encode.py
-40c9c468DCpMe542varOolW1Xc68ew tools/python/xen/xend/server/SrvBase.py
+42a475165HuglqWwNi2fjqNOIHbIKQ tools/python/xen/xend/image.py
+4266169ezWIlXSfY50n6HSoVFbosmw tools/python/xen/xend/scheduler.py
40c9c468IxQabrKJSWs0aEjl-27mRQ tools/python/xen/xend/server/SrvConsole.py
40c9c4689Io5bxfbYIfRiUvsiLX0EQ tools/python/xen/xend/server/SrvConsoleDir.py
40c9c468woSmBByfeXA4o_jGf2gCgA tools/python/xen/xend/server/SrvDaemon.py
-40c9c468kACsmkqjxBWKHRo071L26w tools/python/xen/xend/server/SrvDeviceDir.py
-40c9c468EQZJVkCLds-OhesJVVyZbQ tools/python/xen/xend/server/SrvDir.py
40eee3a0m38EwYXfCSFIjWNwG6jx_A tools/python/xen/xend/server/SrvDmesg.py
40c9c468TyHZUq8sk0FF_vxM6Sozrg tools/python/xen/xend/server/SrvDomain.py
40c9c469WzajDjutou3X7FmL9hMf3g tools/python/xen/xend/server/SrvDomainDir.py
-40c9c469-8mYEJJTAR6w_ClrJRAfwQ tools/python/xen/xend/server/SrvEventDir.py
40c9c4694eu5759Dehr4Uhakei0EMg tools/python/xen/xend/server/SrvNode.py
40c9c469TaZ83ypsrktmPSHLEZiP5w tools/python/xen/xend/server/SrvRoot.py
40c9c469W3sgDMbBJYQdz5wbQweL0Q tools/python/xen/xend/server/SrvServer.py
@@ -593,12 +916,19 @@
40c9c469N2-b3GqpLHHHPZykJPLVvA tools/python/xen/xend/server/channel.py
40c9c469hJ_IlatRne-9QEa0-wlquw tools/python/xen/xend/server/console.py
40c9c469UcNJh_NuLU0ytorM0Lk5Ow tools/python/xen/xend/server/controller.py
-40d83983OXjt-y3HjSCcuoPp9rzvmw tools/python/xen/xend/server/domain.py
+4266169exkN9o3hA8vxe8Er0BZv1Xw tools/python/xen/xend/server/event.py
40c9c469yrm31i60pGKslTi2Zgpotg tools/python/xen/xend/server/messages.py
40c9c46925x-Rjb0Cv2f1-l2jZrPYg tools/python/xen/xend/server/netif.py
40c9c469ZqILEQ8x6yWy0_51jopiCg tools/python/xen/xend/server/params.py
+4266169eI_oX3YBjwaeC0V-THBRnjg tools/python/xen/xend/server/pciif.py
+4294a1bf8rMUcddot-B2-pOxORimOg tools/python/xen/xend/server/relocate.py
+41ee5e8dq9NtihbL4nWKjuSLOhXPUg tools/python/xen/xend/server/usbif.py
40c9c469LNxLVizOUpOjEaTKKCm8Aw tools/python/xen/xend/sxp.py
-4189125cL90jKSOcBJ3Vx4nWGiXXvA tools/python/xen/xend/util.py
+42a48d152jkT7ykQT_LWKnS-ojV_ZA tools/python/xen/xend/uuid.py
+42a5a2c0ik9zrQvwjTUKDVVEQmvO2Q tools/python/xen/xend/xenstore/__init__.py
+42a5a2c04xNCYAUXD0b9IDf4XekXRg tools/python/xen/xend/xenstore/xsnode.py
+42a5a2c0-aP98db2PJIDxQJfTEMZ-A tools/python/xen/xend/xenstore/xsobj.py
+42a5a2c0gxfQiAH_oVTShNPeG0LG2Q tools/python/xen/xend/xenstore/xsresource.py
40d05079aFRp6NQdo5wIh5Ly31c0cg tools/python/xen/xm/__init__.py
40cf2937gKQcATgXKGtNeWb1PDH5nA tools/python/xen/xm/create.py
40f552eariuUSB9TWqCPnDLz5zvxMw tools/python/xen/xm/destroy.py
@@ -608,35 +938,11 @@
40cf2937PSslwBliN1g7ofDy2H_RhA tools/python/xen/xm/opts.py
40cf2937Z8WCNOnO2FcWdubvEAF9QQ tools/python/xen/xm/shutdown.py
41b88ba6_C4---jeA895Efg9YFZgKA tools/python/xen/xm/sysrq.py
-40fcefb2K1xqVVT4D-p7nL2GzS4scg tools/sv/Main.rpy
-40ffbcb66Dj5F-1kCK9BcgSqCWkt1w tools/sv/Makefile
-4120b0e5L_nW-u0MWRfIdXg4ng4OjA tools/sv/images/destroy.png
-4107c921_OR9NTSv2dKFiLCXxrXoxA tools/sv/images/finish.png
-40fcefb3wXQMsl9WkgQAVtdrupm4sw tools/sv/images/left-end-highlight.jpg
-40fcefb3K6ESt5sQhD9aCQRscQIlXQ tools/sv/images/left-end-no-highlight.jpg
-40fcefb3BUT98zPzW8kAFKuxGdh4XA tools/sv/images/middle-highlight.jpg
-40fcefb38OTgsUKHBpwshLLIsiIaCA tools/sv/images/middle-no-highlight.jpg
-41052eb9SDUqSLGtG6rxk6Ep5fOhFA tools/sv/images/next.png
-40fcefb32SPtrw36c4S6YGFlLvkKuw tools/sv/images/orb_01.jpg
-40fcefb3Ok5qkX3iM7ZEPVkRInrUpg tools/sv/images/orb_02.jpg
-4104ffca9_GhWOxRE-83uZIad2Z1gg tools/sv/images/pause.png
-41052eb9NQqHe_f9-ev1CaA3y5YYZg tools/sv/images/previous.png
-41013a82ILk71xLqWFH5ZO5VmOIvBw tools/sv/images/reboot.png
-40fcefb3JnT5XeKTuVF4yUMGOtuNZg tools/sv/images/right-end-highlight.jpg
-40fcefb3-DuYOS7noo2W7b_0p7TOUg tools/sv/images/right-end-no-highlight.jpg
-40fcefb3qNbAZR5FYGPAZ9sFPVMTDA tools/sv/images/seperator-left-highlight.jpg
-40fcefb3dgsa24WLk_BJeYQHrDLuOg tools/sv/images/seperator-right-highlight.jpg
-40fcefb3FtiX4Pd2kT8wDlp8u8xRhQ tools/sv/images/seperator.jpg
-41013a82sUdUqBv8EoAUJii3gsZ-4g tools/sv/images/shutdown.png
-4120b0e5RyNoIQNMjUs4A2kshovjaQ tools/sv/images/small-destroy.png
-4120b0e6vW66wW6WvjQyFD0AZH2tng tools/sv/images/small-pause.png
-4120b0e6USof7ieyGxEvtCdTMpxaQw tools/sv/images/small-unpause.png
-4104ffca-jPHLVOrW0n0VghEXXtKxg tools/sv/images/unpause.png
-40fcefb3yMSrZvApO9ToIi-iQwnchA tools/sv/images/xen.png
-41013a83z27rKvWIxAfUBMVZ1eDCDg tools/sv/inc/script.js
-40fcefb3zGC9XNBkSwTEobCoq8YClA tools/sv/inc/style.css
+422f27c8MDeRoOWZNdcRC5VDTcj3TQ tools/tests/Makefile
+422f27c81CCtXt4Lthf7JF3Ajr0fUA tools/tests/test_x86_emulator.c
420b963dK3yGNtqxRM8npGZtrCQd1g tools/vnet/00INSTALL
41a21888_WlknVWjSxb32Fo13_ujsw tools/vnet/00README
+420a9b706I-bN_uPdiy0m3rmDifNNg tools/vnet/INSTALL
41a21888bOiOJc7blzRbe4MNJoaYTw tools/vnet/Makefile
41a21888mg2k5HeiVjlQYEtJBZT4Qg tools/vnet/doc/vnet-module.txt
41a21888cuxfT8wjCdRR6V1lqf5NtA tools/vnet/doc/vnet-xend.txt
@@ -645,6 +951,40 @@
41a21888QPgKrulCfR9SY_pxZKU0KA tools/vnet/examples/vnet97.sxp
41a21888Gm0UBs1i7HqveT7Yz0u8DQ tools/vnet/examples/vnet98.sxp
41a21888r4oGPuGv2Lxl-thgV3H54w tools/vnet/examples/vnet99.sxp
+40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/vnet/libxutil/Makefile
+40e033325Sjqs-_4TuzeUEprP_gYFg tools/vnet/libxutil/allocate.c
+40e03332KYz7o1bn2MG_KPbBlyoIMA tools/vnet/libxutil/allocate.h
+41a216cav5JJbtDQnusfuMa_1x_Xpw tools/vnet/libxutil/debug.h
+40e9808eyjiahG5uF6AMelNVujBzCg tools/vnet/libxutil/enum.c
+40e9808eZpbdn9q2KSSMGCNvY_ZgpQ tools/vnet/libxutil/enum.h
+4284c2ecWyadIhHF1u_QSgWqIXkaLA tools/vnet/libxutil/fd_stream.c
+4284c2ecEOOcF6fZUf_NsZzYAoNo-w tools/vnet/libxutil/fd_stream.h
+40e03332p5Dc_owJQRuN72ymJZddFQ tools/vnet/libxutil/file_stream.c
+40e03332jWfB2viAhLSkq1WK0r_iDQ tools/vnet/libxutil/file_stream.h
+40e03332rUjNMGg11n2rN6V4DCrvOg tools/vnet/libxutil/gzip_stream.c
+40e033321O5Qg22haLoq5lpmk4tooQ tools/vnet/libxutil/gzip_stream.h
+40e9808easXCzzAZQodEfKAhgUXSPA tools/vnet/libxutil/hash_table.c
+40e9808e94BNXIVVKBFHC3rnkvwtJg tools/vnet/libxutil/hash_table.h
+40e03332ihnBGzHykVwZnFmkAppb4g tools/vnet/libxutil/iostream.c
+40e03332UGwbLR4wsw4ft14p0Yw5pg tools/vnet/libxutil/iostream.h
+40e0333245DLDzJemeSVBLuutHtzEQ tools/vnet/libxutil/kernel_stream.c
+40e03332aK0GkgpDdc-PVTkWKTeOBg tools/vnet/libxutil/kernel_stream.h
+40e9808epW9iHcLXuO3QfUfLzB7onw tools/vnet/libxutil/lexis.c
+40e9808egccMhCizayQRGtpBA3L5MQ tools/vnet/libxutil/lexis.h
+41a216caM4z39Fzjb91rv9Ed_4By1A tools/vnet/libxutil/socket_stream.c
+41a216caqinvF1I5FQMHA4HTRz8MSA tools/vnet/libxutil/socket_stream.h
+40e03332KT_tnnoAMbPVAZBB7kSOAQ tools/vnet/libxutil/string_stream.c
+40e03332-VtK6_OZa1vMHXFil8uq6w tools/vnet/libxutil/string_stream.h
+40e9808e5_PLdodqVOSx0b4T_f5aeg tools/vnet/libxutil/sxpr.c
+40e9808e0O4sHZtkDv5hlSqjYcdQAQ tools/vnet/libxutil/sxpr.h
+40ec1cc6SIiGbynOi-1NtPesOlzF-Q tools/vnet/libxutil/sxpr_parser.c
+40ec1cc6wpvvGxZiq4EFvNOcw0tUFg tools/vnet/libxutil/sxpr_parser.h
+40e03332Rkvq6nn_UNjzAAK_Tk9v1g tools/vnet/libxutil/sys_net.c
+40e03332lQHvQHw4Rh7VsT1_sui29A tools/vnet/libxutil/sys_net.h
+40e033321smklZd7bDSdWvQCeIshtg tools/vnet/libxutil/sys_string.c
+40e03332h5V611rRWURRLqb1Ekatxg tools/vnet/libxutil/sys_string.h
+41a216cayFe2FQroFuzvNPw1AvNiqQ tools/vnet/libxutil/util.c
+41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/vnet/libxutil/util.h
41a21888c9TCRlUwJS9WBjB3e9aWgg tools/vnet/vnet-module/00README
41a21888K2ItolEkksc1MUqyTDI_Kg tools/vnet/vnet-module/Makefile
41a21888mJsFJD7bVMm-nrnWnalGBw tools/vnet/vnet-module/Makefile-2.4
@@ -694,64 +1034,236 @@
41a2188ar6_vOO3_tEJQjmFVU3409A tools/vnet/vnetd/vcache.h
41a2188aETrGU60X9WtGhYVfU7z0Pw tools/vnet/vnetd/vnetd.c
41a2188ahYjemudGyB7078AWMFR-0w tools/vnet/vnetd/vnetd.h
-4194e861IgTabTt8HOuh143QIJFD1Q tools/x2d2/Makefile
-4194e861M2gcBz4i94cQYpqzi8n6UA tools/x2d2/cntrl_con.c
-4194e8612TrrMvC8ZlA4h2ZYCPWz4g tools/x2d2/minixend.c
-4194e861x2eqNCD61RYPCUEBVdMYuw tools/x2d2/minixend.h
-4194e861A4V9VbD_FYmgXpYEj5YwVg tools/x2d2/util.c
+41d58ba63w1WfBmd6Cr_18nhLNv7PA tools/xcs/Makefile
+41d58ba6NxgkfzD_rmsGjgd_zJ3H_w tools/xcs/bindings.c
+41d58ba6I2umi60mShq4Pl0RDg7lzQ tools/xcs/connection.c
+41d58ba6YyYu53bFuoIAw9hNNmneEg tools/xcs/ctrl_interface.c
+423d82c7ZKf2bDOxRcR4Nc1kN5StNQ tools/xcs/dump.c
+423d82c7U__LHy9dvkUNUvSIhOqnBQ tools/xcs/dump.h
+41d58ba6Ru9ZbhTjgYX_oiszSIwCww tools/xcs/evtchn.c
+41d58ba6x9KO1CQBT7kKOKq_pJYC3g tools/xcs/xcs.c
+41d58ba6R6foSMtSFEcu-yxWFrT8VQ tools/xcs/xcs.h
+41d58ba6qyr2BkTcH2WlNBYLRyl2Yw tools/xcs/xcs_proto.h
+41d58ba6ijEF6fedqRO5vFu7uCirZg tools/xcs/xcsdump.c
+4292540couq-V0TPwyQ6bspNEWNcvw tools/xcutils/Makefile
+42925407VysDb9O06OK_RUzTZxfLoA tools/xcutils/xc_restore.c
+42936745WTLYamYsmXm_JGJ72JX-_Q tools/xcutils/xc_save.c
+42a57d97mxMTlPnxBKep6R4ViI5rjg tools/xenstore/.gdbinit
+42a57d97ZEoHuhMAFTuBMlLzA9v_ng tools/xenstore/Makefile
+42a57d97ccA4uY-RxONvIH0P8U0gqg tools/xenstore/TODO
+42a57d972RzmyLgsoH9b8qqk-UjcCA tools/xenstore/fake_libxc.c
+42a57d97IjoPvbIVc4BUzwoKyM0VSw tools/xenstore/list.h
+42a57d97fKgtf0HQLiQkAkVsOvuSyA tools/xenstore/talloc.c
+42a57d98U3p0XP6xzCybTuaVQscUdw tools/xenstore/talloc.h
+42a57d98LFN6Mug-uR4xgAxCE7lwUg tools/xenstore/talloc_guide.txt
+42a57d98S69vKJYwO_WUjoFQZ6KzQg tools/xenstore/testsuite/01simple.sh
+42a57d98BHcFpZz_fXHweylUEUU97Q tools/xenstore/testsuite/02directory.sh
+42a57d98ua4Xeb6pmtbFNTAI833dyw tools/xenstore/testsuite/03write.sh
+42a57d98nbuCUsVT0RJj1zA1JyMDsw tools/xenstore/testsuite/04rm.sh
+42a57d98_ULKHP3_uX1PK2nPMTzWSQ tools/xenstore/testsuite/05filepermissions.sh
+42a57d98YGCLyTDSGmoyFqRqQUlagQ tools/xenstore/testsuite/06dirpermissions.sh
+42a57d98fdO519YyATk4_Zwr1STNfQ tools/xenstore/testsuite/07watch.sh
+42a57d98zZUtvirUMjmHxFphJjmO7Q tools/xenstore/testsuite/08transaction.sh
+42a57d98sn9RbpBgHRv1D99Kt7LwYA tools/xenstore/testsuite/09domain.sh
+42a57d98tSuoFCHnnM2GgENXJrRQmw tools/xenstore/testsuite/test.sh
+42a57d98zxDP2Ti7dTznGROi66rUGw tools/xenstore/utils.c
+42a57d98SDvOYCEjmCjwHSk6390GLA tools/xenstore/utils.h
+42a57d98hFKbOY9D0mCE4H4NDoKr1w tools/xenstore/xenstored.h
+42a57d981KFHLmJ0CjKkn1_gZhYvdw tools/xenstore/xenstored_core.c
+42a57d98bcgE13vYaFxGTusmWbrFDA tools/xenstore/xenstored_core.h
+42a57d98cD9wOFyRYfaEP0QgtqL1Xw tools/xenstore/xenstored_domain.c
+42a57d98noLWvXU8ePbcqvvmu4p2Gw tools/xenstore/xenstored_domain.h
+42a57d98kxHaQ1ApS7RpqmFoEnDmbg tools/xenstore/xenstored_test.h
+42a57d981c9P3aFkWtxWEIRUapt_FQ tools/xenstore/xenstored_transaction.c
+42a57d99pVo__10bbckp_b_rm6i59A tools/xenstore/xenstored_transaction.h
+42a57d99izTIjWfG-IjQAPqYlDWJNg tools/xenstore/xenstored_watch.c
+42a57d99-zLxBjzC7rfj_perV-orUg tools/xenstore/xenstored_watch.h
+42a57d99BnkhISKgCCRcUqhteyuxCw tools/xenstore/xs.c
+42a57d99FyiYSz9AkKKROrRydnA-gQ tools/xenstore/xs.h
+42a57d99SrtsJCDUlKyRPf3EX86A1Q tools/xenstore/xs_lib.c
+42a57d99L2pYeMFyjQ_4Rnb17xTSMg tools/xenstore/xs_lib.h
+42a57d99Kl6Ba8oCHv2fggl7QN9QZA tools/xenstore/xs_random.c
+42a57d99SHYR1lQOD0shuErPDg9NKQ tools/xenstore/xs_stress.c
+42a57d996aBawpkQNOWkNWXD6LrhPg tools/xenstore/xs_test.c
403a3edbrr8RE34gkbR40zep98SXbg tools/xentrace/Makefile
40a107afN60pFdURgBv9KwEzgRl5mQ tools/xentrace/formats
+420d52d2_znVbT4JAPIU36vQOme83g tools/xentrace/xenctx.c
4050c413PhhLNAYk3TEwP37i_iLw9Q tools/xentrace/xentrace.8
403a3edbVpV2E_wq1zeEkJ_n4Uu2eg tools/xentrace/xentrace.c
403a3edblCUrzSj0mmKhO5HOPrOrSQ tools/xentrace/xentrace_format
4050c413NtuyIq5lsYJV4P7KIjujXw tools/xentrace/xentrace_format.1
-40e9808eHO3QprCFKg9l2JJzgt2voA tools/xfrd/Make.xfrd
-40e9808epTR4zWrYjGUnaaynK20Q5A tools/xfrd/Makefile
-40e9808eysqT4VNDlJFqsZB2rdg4Qw tools/xfrd/connection.c
-40e9808eyXfJUi4E0C3WSgrEXqQ1sQ tools/xfrd/connection.h
-40e9808eULGwffNOE4kBrAfZ9YAVMA tools/xfrd/debug.h
-411b5139tfKZfWs1LQHmwDR_wjKoxQ tools/xfrd/http.h
-40e9808ePADCSKL1YgGCt2TbYPnYkw tools/xfrd/lzi_stream.c
-40e9808eDNAdpF71o5teYb9DTT-PRw tools/xfrd/lzi_stream.h
-40e9808eQxi0EzTcPJtosrzxEIjA-Q tools/xfrd/marshal.c
-40e9808etg13xfRm0Lqd8vY-jHOoTg tools/xfrd/marshal.h
-40e9808eCsmywryb036TdtRMJHDMmQ tools/xfrd/select.c
-40e9808e99OcM547cKMTfmCVSoWVAw tools/xfrd/select.h
-40e9808eF3NVldqRNS5IHM8gbFAvpw tools/xfrd/xdr.c
-40e9808ezXzoRHm7pybXU69NtnjimA tools/xfrd/xdr.h
-40e9808edpUtf4bJ8IbqClPJj_OvbA tools/xfrd/xen_domain.c
-40e9808eHviFFIwdUKOA234uIeifjA tools/xfrd/xen_domain.h
-40e9808eIFeV-MDCNyVTNt5NfMPKeQ tools/xfrd/xfrd.c
-40e9808eGIbOoSNJRiwWK2C3mjGWaA tools/xfrd/xfrd.h
-40e9808eHXvs_5eggj9McD_J90mhNw tools/xfrd/xfrdClient.py
3f72f1bdJPsV3JCnBqs9ddL9tr6D2g xen/COPYING
3ddb79bcbOVHh38VJzc97-JEGD4dJQ xen/Makefile
3ddb79bcWnTwYsQRWl_PaneJfa6p0w xen/Rules.mk
+421098b25A0RvuYN3rP28ga3_FN3_Q xen/arch/ia64/Makefile
+421098b2okIeYXS9w9avmSozls61xA xen/arch/ia64/Rules.mk
+421098b21p12UcKjHBrLh_LjlvNEwA xen/arch/ia64/acpi.c
+421098b26C_0yoypoHqjDcJA9UrG_g xen/arch/ia64/asm-offsets.c
+421098b2PHgzf_Gg4R65YRNi_QzMKQ xen/arch/ia64/dom0_ops.c
+421098b2O7jsNfzQXA1v3rbAc1QhpA xen/arch/ia64/dom_fw.c
+421098b2ZlaBcyiuuPr3WpzaSDwg6Q xen/arch/ia64/domain.c
+42a08294zRikvZk_CR1iVojHjcVFZw xen/arch/ia64/hpsimserial.c
+4239e98a_HX-FCIcXtVqY0BbrDqVug xen/arch/ia64/hypercall.c
+4295e18f42gf1T-8W97A3KSlBaY1tA xen/arch/ia64/hyperprivop.S
+421098b3LYAS8xJkQiGP7tiTlyBt0Q xen/arch/ia64/idle0_task.c
+421098b3ys5GAr4z6_H1jD33oem82g xen/arch/ia64/irq.c
+4272a8e4lavI6DrTvqaIhXeR5RuKBw xen/arch/ia64/ivt.S
+421098b3Heh72KuoVlND3CH6c0B0aA xen/arch/ia64/lib/Makefile
+421098b3O0MYMUsmYVFy84VV_1gFwQ xen/arch/ia64/mm_init.c
+428b9f38Gp0KcPokG9Nq5v1rGk2FkA xen/arch/ia64/mmio.c
+425ae516maKAsHBJVSzs19cdRgt3Nw xen/arch/ia64/patch/linux-2.6.11/cpumask.h
+425ae516cGqvMzGtihTEsQXAXsuOhQ xen/arch/ia64/patch/linux-2.6.11/efi.c
+425ae516Y1A4q4_Kfre3qnDj7lbHJg xen/arch/ia64/patch/linux-2.6.11/entry.S
+428bb037eJ4qs48I-tUdhht5_95obA xen/arch/ia64/patch/linux-2.6.11/entry.h
+428bb037jPbybWNkNymaqkFr83vT6Q xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h
+425ae516txAP-owjzpTJ7ThfzWR8nw xen/arch/ia64/patch/linux-2.6.11/hardirq.h
+425ae516PDO1ESDHXHVeDNvlqUfmdQ xen/arch/ia64/patch/linux-2.6.11/head.S
+425ae516JR7HWvt1zxJ-wLvEWmJGgg xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h
+428bb037UxfxIhZaslk-qHazO4w0yg xen/arch/ia64/patch/linux-2.6.11/ia64regs.h
+425ae516AHRNmaVuZjJY-9YjmKRDqg xen/arch/ia64/patch/linux-2.6.11/interrupt.h
+425ae516U2wFUzrUJQUpy3z38jZHsQ xen/arch/ia64/patch/linux-2.6.11/io.h
+425ae516GGRmXijPBLC5ii6yWOn0rg xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c
+425ae516atiECmpn_6nZDw4kkmbJ6g xen/arch/ia64/patch/linux-2.6.11/kregs.h
+425ae516Je2zI-Iw30_uGhvUYdlCZQ xen/arch/ia64/patch/linux-2.6.11/mca_asm.h
+425ae5160-9wHxh0tOnIjavEjt6W0A xen/arch/ia64/patch/linux-2.6.11/minstate.h
+425ae516N7SaORdbodDr90tmtCzYXw xen/arch/ia64/patch/linux-2.6.11/mm_contig.c
+425ae516WDLrfEA4zr40d00z0VIWPg xen/arch/ia64/patch/linux-2.6.11/page.h
+425ae516pVQ75NhdItT593SiWI0lbQ xen/arch/ia64/patch/linux-2.6.11/pal.S
+428bb037THuiyhERFP8RhRgapNkWXg xen/arch/ia64/patch/linux-2.6.11/pal.h
+425ae516QfmjiF_a-mabAXqV8Imzkg xen/arch/ia64/patch/linux-2.6.11/pgalloc.h
+425ae516EWaNOBEnc1xnphTbRmNZsw xen/arch/ia64/patch/linux-2.6.11/processor.h
+428bb037KSxe7_UyqseK5bWhGe3KwA xen/arch/ia64/patch/linux-2.6.11/ptrace.h
+425ae516LecDyXlwh3NLBtHZKXmMcA xen/arch/ia64/patch/linux-2.6.11/series
+425ae516RFiPn2CGkpJ21LM-1lJcQg xen/arch/ia64/patch/linux-2.6.11/setup.c
+42a8bcc8E6zmTKC5xgOcFLcnzbhVEw xen/arch/ia64/patch/linux-2.6.11/sn_sal.h
+425ae516p4ICTkjqNYEfYFxqULj4dw xen/arch/ia64/patch/linux-2.6.11/system.h
+425ae516juUB257qrwUdsL9AsswrqQ xen/arch/ia64/patch/linux-2.6.11/time.c
+425ae5167zQn7zYcgKtDUDX2v-e8mw xen/arch/ia64/patch/linux-2.6.11/tlb.c
+425ae5162bIl2Dgd19x-FceB4L9oGw xen/arch/ia64/patch/linux-2.6.11/types.h
+425ae516cFUNY2jHD46bujcF5NJheA xen/arch/ia64/patch/linux-2.6.11/unaligned.c
+421098b39QFMC-1t1r38CA7NxAYBPA xen/arch/ia64/patch/linux-2.6.7/bootmem.h
+421098b3SIA1vZX9fFUjo1T3o_jMCQ xen/arch/ia64/patch/linux-2.6.7/current.h
+421098b3ZBl80iPuSeDU_Id5AgZl0w xen/arch/ia64/patch/linux-2.6.7/efi.c
+421098b3VUmGT2Jdy4SWeDTwcCHaqg xen/arch/ia64/patch/linux-2.6.7/efi.h
+421098b3dPmLXyvKEmvH_2XALeIYlg xen/arch/ia64/patch/linux-2.6.7/entry.S
+421098b3eoimqDUiVw9p_RADfvICwQ xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h
+421098b3ZcvjJahWCTvmpNb1RWArww xen/arch/ia64/patch/linux-2.6.7/hardirq.h
+421098b3gZO0kxetbOVLlpsFkf0PWQ xen/arch/ia64/patch/linux-2.6.7/head.S
+421098b3Hz4y9vxFo6rZ03PXkFF6-w xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c
+421098b3mn7maohx9UTPjTZEVov-kg xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h
+421098b4HWTbzGFd8fAT27GIavt61g xen/arch/ia64/patch/linux-2.6.7/hw_irq.h
+421098b4wVriEglxpLtvD9NMUr76Ew xen/arch/ia64/patch/linux-2.6.7/ide.h
+421098b4ckKw7I-p3APMhFOuefMWMA xen/arch/ia64/patch/linux-2.6.7/init_task.c
+421098b4CSuWMM-4vHvAa4F4luDOLQ xen/arch/ia64/patch/linux-2.6.7/init_task.h
+421098b4x5Hnxgvf22nhvxzPMszw1g xen/arch/ia64/patch/linux-2.6.7/interrupt.h
+421098b4BgHuG3PiGY2QOQCNEqMYsA xen/arch/ia64/patch/linux-2.6.7/io.h
+421098b4JnNHXkW2732slXwxMX79RA xen/arch/ia64/patch/linux-2.6.7/irq.h
+421098b4H-Upf_mxF2apXBffvYadUw xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c
+421098b4C0Lc3xag4Nm-_yC9IMTDqA xen/arch/ia64/patch/linux-2.6.7/ivt.S
+421098b4weyd0AQTjPLmooChUJm13Q xen/arch/ia64/patch/linux-2.6.7/kregs.h
+421098b4vHCejAUPem4w8p5V-AD1Ig xen/arch/ia64/patch/linux-2.6.7/lds.S
+421098b4uooGl5X8zZM96qpmS0Furg xen/arch/ia64/patch/linux-2.6.7/linuxtime.h
+424dad01Txy4dcgKHGkTx1L2z7GuQA xen/arch/ia64/patch/linux-2.6.7/mca_asm.h
+421098b4awnw3Jf23gohJWoK8s7-Qg xen/arch/ia64/patch/linux-2.6.7/minstate.h
+421098b5hIfMbZlQTfrOKN4BtzJgDQ xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c
+421098b53IVBoQPcDjFciZy86YEhRQ xen/arch/ia64/patch/linux-2.6.7/mm_contig.c
+421098b5pZw41QuBTvhjvSol6aAHDw xen/arch/ia64/patch/linux-2.6.7/mmzone.h
+421098b5B_dClZDGuPYeY3IXo8Hlbw xen/arch/ia64/patch/linux-2.6.7/page.h
+421098b5saClfxPj36l47H9Um7h1Fw xen/arch/ia64/patch/linux-2.6.7/page_alloc.c
+424dab78_JGGpJDaAb6ZtkPJAkAKOA xen/arch/ia64/patch/linux-2.6.7/pal.S
+4241ed05l9ZdG7Aj0tygIxIwPRXhog xen/arch/ia64/patch/linux-2.6.7/pgalloc.h
+421098b5OkmcjMBq8gxs7ZrTa4Ao6g xen/arch/ia64/patch/linux-2.6.7/processor.h
+421098b51RLB6jWr6rIlpB2SNObxZg xen/arch/ia64/patch/linux-2.6.7/sal.h
+421098b5WFeRnwGtZnHkSvHVzA4blg xen/arch/ia64/patch/linux-2.6.7/setup.c
+421098b5Jm2i8abzb0mpT6mlEiKZDg xen/arch/ia64/patch/linux-2.6.7/slab.c
+421098b5w6MBnluEpQJAWDTBFrbWSQ xen/arch/ia64/patch/linux-2.6.7/slab.h
+4241eb584dcZqssR_Uuz2-PgMJXZ5Q xen/arch/ia64/patch/linux-2.6.7/swiotlb.c
+421098b5Cg7nbIXm3RhUF-uG3SKaUA xen/arch/ia64/patch/linux-2.6.7/system.h
+421098b5XrkDYW_Nd9lg5CDgNzHLmg xen/arch/ia64/patch/linux-2.6.7/time.c
+421098b5_kFbvZIIPM3bdCES1Ocqnw xen/arch/ia64/patch/linux-2.6.7/tlb.c
+421098b5DWbgK-tBR4um8PEAqPwqTA xen/arch/ia64/patch/linux-2.6.7/types.h
+421098b5il9YfZM0HpeCnaMgVN_q9g xen/arch/ia64/patch/linux-2.6.7/unaligned.c
+421098b65M5cPramsLGbODg8lQwUjQ xen/arch/ia64/patch/linux-2.6.7/wait.h
+42a0d69cCiNxr2Y1GY1khO7qRiNkbw xen/arch/ia64/pcdp.c
+421098b6cYDwzXP86ViTLlTO2x7ovA xen/arch/ia64/pdb-stub.c
41a26ebcqaSGVQ8qTMwpPwOJSJ7qSw xen/arch/ia64/privop.c
41a26ebc4BOHDUsT0TSnryPeV2xfRA xen/arch/ia64/process.c
41a26ebcJ30TFl1v2kR8rqpEBvOtVw xen/arch/ia64/regionreg.c
+421098b69pUiIJrqu_w0JMUnZ2uc2A xen/arch/ia64/smp.c
+421098b6_ToSGrf6Pk1Uwg5aMAIBxg xen/arch/ia64/smpboot.c
+42a8bd43dIEIsS-EoQqt5Df1RTr5Hg xen/arch/ia64/sn_console.c
+428b9f38JJDW35iDn5DlfXTu700rkQ xen/arch/ia64/tools/README.RunVT
+421098b6AUdbxR3wyn1ATcmNuTao_Q xen/arch/ia64/tools/README.xenia64
+42376c6dfyY0eq8MS2dK3BW2rFuEGg xen/arch/ia64/tools/README.xenia64linux
+421098b6rQ2BQ103qu1n1HNofbS2Og xen/arch/ia64/tools/mkbuildtree
+4252ace7eQQmDdwOqsKWdHo8JpKqnQ xen/arch/ia64/tools/privify/Makefile
+4252ace76fKAIizJRS6S84KbK6yXYw xen/arch/ia64/tools/privify/README.privify
+4252ace7uR0Th8eEXiLyafNPTDYrOg xen/arch/ia64/tools/privify/privify.c
+4252ace7H2dIMPFeFwczAVoP4yAHxA xen/arch/ia64/tools/privify/privify.h
+4252ace74lKUPFnO8PmF0Dtpk7Xkng xen/arch/ia64/tools/privify/privify_elf64.c
41a26ebc--sjlYZQxmIxyCx3jw70qA xen/arch/ia64/vcpu.c
+421098b6M2WhsJ_ZMzFamAQcdc5gzw xen/arch/ia64/vhpt.c
+428b9f38PglyXM-mJJfo19ycuQrEhw xen/arch/ia64/vlsapic.c
+428b9f38EmpBsMHL3WbOZoieteBGdQ xen/arch/ia64/vmmu.c
+428b9f38hU-X5aX0MIY3EU0Yw4PjcA xen/arch/ia64/vmx_entry.S
+428b9f38S76bWI96g7uPLmE-uAcmdg xen/arch/ia64/vmx_init.c
+428b9f385AMSyCRYBsckQClQY4ZgHA xen/arch/ia64/vmx_interrupt.c
+428b9f380IOjPmj0N6eelH-WJjl1xg xen/arch/ia64/vmx_ivt.S
+428b9f38Y7tp9uyNRdru3lPDXLjOCA xen/arch/ia64/vmx_minstate.h
+428b9f38H9Pz0ZhRUT0-11A6jceE1w xen/arch/ia64/vmx_phy_mode.c
+428b9f38pXU56r2OjoFW2Z8H1XY17w xen/arch/ia64/vmx_process.c
+428b9f38GmZxD-GMDnQB3m7tOoukTA xen/arch/ia64/vmx_utility.c
+428b9f38Pflg6Z4CtXeVGv7dyEOM4g xen/arch/ia64/vmx_vcpu.c
+428b9f38Y7p7hXHWx9QF_oYUjdD__g xen/arch/ia64/vmx_virt.c
+428b9f38EL7qKbbKkhBNr0KzMLS4Gg xen/arch/ia64/vmx_vsa.S
+428b9f3805WejQ1E-OqAPANPAu8vPw xen/arch/ia64/vtlb.c
41a26ebc4jSBGQOuyNIPDST58mNbBw xen/arch/ia64/xenasm.S
+4272adaeit9raZ9KnjO_wR4Ii9LJNQ xen/arch/ia64/xenirq.c
+427162263zDUiPmTj-lP4eGyXs5eIg xen/arch/ia64/xenmem.c
+421098b6mWyFPtkhPz9h1LCmKpoCLg xen/arch/ia64/xenmisc.c
+421098b6lY2JzrV1oFDbrt7XQhtElg xen/arch/ia64/xensetup.c
+427664f5eygrc3nEhI3RKf0Y37PzyA xen/arch/ia64/xentime.c
3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/x86/Makefile
3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
-3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
+3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi/boot.c
3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
+42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
+4299ca46lrYcJPWxWgB4KTNkRQ7CwQ xen/arch/x86/bitops.c
3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/x86_32.S
40e42bdbNu4MjI750THP_8J1S-Sa0g xen/arch/x86/boot/x86_64.S
+4107c15e-VmEcLsE-7JCXZaabI8C7A xen/arch/x86/cdb.c
+4295ecb1Ynez_TseZvDdjD7PzVMDiw xen/arch/x86/cpu/amd.c
+4295ecb1KPPNny26nBEJzK4pAG-KXQ xen/arch/x86/cpu/centaur.c
+4295ecb1QnJx9cbqCJQ1o4TTFQL5Vg xen/arch/x86/cpu/common.c
+4295ecb1ZIJLN5uklV1xompN7DN1WQ xen/arch/x86/cpu/cpu.h
+4295ecb1g6Ye-zy_oXVQQaKw4AtDmw xen/arch/x86/cpu/cyrix.c
+4295ecb1MOdQxXznHu3g-p5DzhMv8g xen/arch/x86/cpu/intel.c
+4295ecb1LsW7ov9JOtRP8euvJKbgbQ xen/arch/x86/cpu/intel_cacheinfo.c
+4295ecb1AeClyruqwLz-xDthMZ5eoA xen/arch/x86/cpu/rise.c
+4295ecb1GO92quFeyoVz2LsPQcFuHg xen/arch/x86/cpu/transmeta.c
3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
+4294b5ee34eGSh5YNDKMSxBIOycluw xen/arch/x86/dmi_scan.c
40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c
3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c
+4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c
41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c
3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
+4294b5ee0qd8iX0Z4a6XpmbS-7r9CQ xen/arch/x86/genapic/bigsmp.c
+4294b5eeRyEW29Ue9ykTGCgG4PD2OQ xen/arch/x86/genapic/default.c
+4294b5eeGvaKRkeAfnvIbNqPn__sLA xen/arch/x86/genapic/es7000.c
+4294b5eezzXwm3k_PG72kjEidxESjA xen/arch/x86/genapic/es7000.h
+4294b5eeUsoC73al4Bsg2E1NKy0oYQ xen/arch/x86/genapic/es7000plat.c
+4294b5ee8T56zBzx90toTSftqiKoVA xen/arch/x86/genapic/probe.c
+4294b5ee2PhCf6SsLxck58bGLR8hYA xen/arch/x86/genapic/summit.c
3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/x86/i387.c
3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen/arch/x86/i8259.c
3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/x86/idle0_task.c
3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/x86/io_apic.c
3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/x86/irq.c
-40ec29ffuOa1ZvmJHzFKyZn4k_RcXg xen/arch/x86/memory.c
41d54a76qfpO0VnbL2tYs0Jgt3W3XA xen/arch/x86/microcode.c
+40ec29ffuOa1ZvmJHzFKyZn4k_RcXg xen/arch/x86/mm.c
3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/x86/mpparse.c
41aaf566Z4sTDgJ77eEg0TzzQ1ka6Q xen/arch/x86/mtrr/amd.c
41aaf566TOpOBXT00wwQGUh20f1rlA xen/arch/x86/mtrr/centaur.c
@@ -761,36 +1273,38 @@
41aaf567a36esU-rUK7twPiv-yTFyw xen/arch/x86/mtrr/mtrr.h
41aaf567DcTL6pqVtLZJI5cSryyA1A xen/arch/x86/mtrr/state.c
3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
-3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
-3ddb79bcZ_2FxINljqNSkqa17ISyJw xen/arch/x86/pci-pc.c
-3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen/arch/x86/pci-x86.c
-3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h
-40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/x86/pdb-linux.c
-4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/x86/pdb-stub.c
+4051bcecFeq4DE70p4zGO5setf47CA xen/arch/x86/physdev.c
3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c
3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c
405b8599xI_PoEr3zZoJ2on-jdn7iw xen/arch/x86/shadow.c
3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/x86/smp.c
3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen/arch/x86/smpboot.c
+4266673dBje6CS6CwQ3lEdvWbf5Dcw xen/arch/x86/string.c
3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/x86/time.c
3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S
3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c
+40e96d3ahBTZqbTViInnq0lM03vs7A xen/arch/x86/usercopy.c
+41c0c411tD3C7TpfDMiFTf7BaNd_Dg xen/arch/x86/vmx.c
+420951dcf1rSGnCH0AEYN2KjWGLG6A xen/arch/x86/vmx_intercept.c
+41c0c411ODt8uEmV-yUxpQLpqimE5Q xen/arch/x86/vmx_io.c
+41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/x86/vmx_platform.c
+41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
40f92331jfOlE7MfKwpdkEb1CEf23g xen/arch/x86/x86_32/seg_fixup.c
-3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
+42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/x86/x86_32/traps.c
3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c
40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c
-40e96d3ahBTZqbTViInnq0lM03vs7A xen/arch/x86/x86_64/usercopy.c
+42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/x86/x86_64/xen.lds
+422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c
3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
-4022a73c_BbDFd2YJ_NQYVvKX5Oz7w xen/common/debug-linux.c
-3fa152581E5KhrAtqZef2Sr5NKTz4w xen/common/debug.c
+427fa2d0J0LU2s5oKbsM0nTZ2iyd2Q xen/common/bitmap.c
3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c
3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
@@ -803,66 +1317,78 @@
41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c
3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c
3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c
-4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c
-3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
-4064773cJ31vZt-zhbSoxqft1Jaw0w xen/common/sched_atropos.c
40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
-40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c
+41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c
3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
-3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c
3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
403a3edbejm33XLTGMuinKEwQBrOIg xen/common/trace.c
3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen/common/vsprintf.c
+4203fb92Qcy7mGpauBdq09J-WAqfoA xen/common/xmalloc.c
3ddb79c0ppNeJtjC4va8j41ADCnchA xen/drivers/Makefile
40715b2bi9gU43-cYzlmPDgreYQchw xen/drivers/acpi/Makefile
-40715b2bDxNCz5LFV8FAXihmYJZFUQ xen/drivers/acpi/acpi_ksyms.c
40715b2cNVOegtvyft_AHFKJYRprfA xen/drivers/acpi/tables.c
3e4a8cb7alzQCDKS7MlioPoHBKYkdQ xen/drivers/char/Makefile
4049e6bfNSIq7s7OV-Bd69QD0RpR2Q xen/drivers/char/console.c
+4298e018XQtZkCdufpyFimOGZqqsFA xen/drivers/char/ns16550.c
3e4a8cb7nMChlro4wvOBo76n__iCFA xen/drivers/char/serial.c
-3ddb79beUWngyIhMHgyPtuTem4o4JA xen/drivers/pci/Makefile
-3ddb79beU9td0Mnm0VUMklerBa37qQ xen/drivers/pci/compat.c
-3ddb79beHkGQE58z5t5gyUCYiwOxvw xen/drivers/pci/gen-devlist.c
-3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen/drivers/pci/names.c
-3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen/drivers/pci/pci.c
-3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen/drivers/pci/pci.ids
-3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen/drivers/pci/quirks.c
-3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen/drivers/pci/setup-res.c
40715b2cFpte_UNWnBZW0Du7z9AhTQ xen/include/acpi/acconfig.h
-40715b2cEQWX-PaxEH30qI48K1krnQ xen/include/acpi/acdebug.h
-40715b2c_7j-oy3ZNAuqE3IFNPzArg xen/include/acpi/acdisasm.h
-40715b2cFdcPx-2FHGM1Q-M-592xYQ xen/include/acpi/acdispat.h
-40715b2cKqD_Lihnlmvnyl4AAl3EFw xen/include/acpi/acevents.h
40715b2ctNvVZ058w8eM8DR9hOat_A xen/include/acpi/acexcep.h
40715b2com8I01qcHcAw47e93XsCqQ xen/include/acpi/acglobal.h
40715b2cS1t4uI3sMsu-c0M4qqAIrw xen/include/acpi/achware.h
-40715b2chSz545A4Tq-y3WAtanzPMQ xen/include/acpi/acinterp.h
40715b2cGf23lRI58NphiaDQl698-w xen/include/acpi/aclocal.h
40715b2cdG7tCF2NMk0j1RCQQPzPXg xen/include/acpi/acmacros.h
-40715b2ckP2XZZDkSTehu6riuMogDA xen/include/acpi/acnamesp.h
40715b2c4AvHYn2-2YIyt3mx-Mm5tw xen/include/acpi/acobject.h
40715b2cPUXsHzmchvXx7QHAfW0nMw xen/include/acpi/acoutput.h
-40715b2cNbYpt9CHmCHg3RG7fBdACA xen/include/acpi/acparser.h
40715b2cWM_6zR14U9Tp0s_q8D002A xen/include/acpi/acpi.h
40715b2dcJDTiROgyMk9swD_veWktA xen/include/acpi/acpi_bus.h
40715b2dRFlZK6apnH7WkUCBdyFXWA xen/include/acpi/acpi_drivers.h
40715b2dtgZhNGAzlyBMe3kqve3mqw xen/include/acpi/acpiosxf.h
40715b2dpW5TY7n5rzCufsDhJVWuMQ xen/include/acpi/acpixf.h
-40715b2dp9-_D9LTjcvtf-Yopih5mQ xen/include/acpi/acresrc.h
40715b2djvd97KbIpt4wyJgxwqCqmg xen/include/acpi/acstruct.h
-40715b2dRIU9cQgmTjtCRNleIJDAYg xen/include/acpi/actables.h
40715b2dy8ECRkSo9x0tRRueAjPx1g xen/include/acpi/actbl.h
40715b2ds4J-XWn9Ix-lgBiJffNgxw xen/include/acpi/actbl1.h
40715b2d_aMKMjKKNImJR4km52KRHA xen/include/acpi/actbl2.h
-40715b2dxC03aemijgL-iDH_-BkKRw xen/include/acpi/actbl71.h
40715b2d0oQUijKwEw6SDJ4LhD8c4g xen/include/acpi/actypes.h
40715b2dBByvcAtRpN5mafyEYLcBWA xen/include/acpi/acutils.h
-40715b2d_iPcZ0uUVGblPvjsHOwE5Q xen/include/acpi/amlcode.h
-40715b2d3CdS6dIpZDTiCJRlDG3LCA xen/include/acpi/amlresrc.h
40715b2dKRW7A71SNaeV6zfrEzYxPw xen/include/acpi/platform/acenv.h
40715b2d8fYydJMcODFrV1ocLklGDg xen/include/acpi/platform/acgcc.h
40715b2d1yZkqyAt0kgx2xEwsatuuA xen/include/acpi/platform/aclinux.h
+421098b6Y3xqcv873Gvg1rQ5CChfFw xen/include/asm-ia64/config.h
+421098b6ZcIrn_gdqjUtdJyCE0YkZQ xen/include/asm-ia64/debugger.h
+421098b6z0zSuW1rcSJK1gR8RUi-fw xen/include/asm-ia64/dom_fw.h
+421098b6Nn0I7hGB8Mkd1Cis0KMkhA xen/include/asm-ia64/domain.h
+4241e880hAyo_dk0PPDYj3LsMIvf-Q xen/include/asm-ia64/flushtlb.h
+421098b6X3Fs2yht42TE2ufgKqt2Fw xen/include/asm-ia64/ia64_int.h
+421098b7psFAn8kbeR-vcRCdc860Vw xen/include/asm-ia64/init.h
+421098b7XC1A5PhA-lrU9pIO3sSSmA xen/include/asm-ia64/mm.h
+421098b7c0Dx0ABuW_yHQdAqKhUoiQ xen/include/asm-ia64/mmu_context.h
+421098b7C2dr3O7lgc_oeC9TEE9GKw xen/include/asm-ia64/multicall.h
+421098b7dX_56NCV9zjftqm1yIqC8w xen/include/asm-ia64/offsets.h
+421098b72bPUyviWloEAIB85dGCm2Q xen/include/asm-ia64/privop.h
+421098b7Z6OwjZnrTZkh34DoDfcjrA xen/include/asm-ia64/regionreg.h
+421098b707cY5YluUcWK5Pc-71ETVw xen/include/asm-ia64/regs.h
+4214e2f3fbO_n9Z1kIcBR83d7W4OJw xen/include/asm-ia64/serial.h
+429fb3bc53qJOyKJCBfhDNmTasj8Gw xen/include/asm-ia64/slab.h
+421098b7GkWOnlzSmPvNAhByOSZ1Dw xen/include/asm-ia64/time.h
+421098b7FK3xgShpnH0I0Ou3O4fJ2Q xen/include/asm-ia64/tlb.h
+421098b78IGdFOGUlPmpS7h_QBmoFg xen/include/asm-ia64/vcpu.h
+421098b7PiAencgmBFGAqALU-V5rqQ xen/include/asm-ia64/vhpt.h
+428b9f38_b0DgWwkJcBEsTdEmO9WNQ xen/include/asm-ia64/virt_event.h
+428b9f38B0KbUj3o2FBQJ5tmIIMDHg xen/include/asm-ia64/vmmu.h
+428b9f38ewjoJ-RL-2lsXFT04H2aag xen/include/asm-ia64/vmx.h
+428b9f38coGlYeXx-7hpvfCTAPOd7w xen/include/asm-ia64/vmx_mm_def.h
+428b9f387tov0OtOEeF8fVWSR2v5Pg xen/include/asm-ia64/vmx_pal.h
+428b9f38is0zTsIm96_BKo4MLw0SzQ xen/include/asm-ia64/vmx_pal_vsa.h
+428b9f38iDqbugHUheJrcTCD7zlb4g xen/include/asm-ia64/vmx_phy_mode.h
+428b9f38grd_B0AGB1yp0Gi2befHaQ xen/include/asm-ia64/vmx_platform.h
+428b9f38lm0ntDBusHggeQXkx1-1HQ xen/include/asm-ia64/vmx_ptrace.h
+428b9f38XgwHchZEpOzRtWfz0agFNQ xen/include/asm-ia64/vmx_vcpu.h
+428b9f38tDTTJbkoONcAB9ODP8CiVg xen/include/asm-ia64/vmx_vpd.h
+428b9f38_o0U5uJqmxZf_bqi6_PqVw xen/include/asm-ia64/vtm.h
+428e120a-H-bqn10zOlnhlzlVEuW8A xen/include/asm-ia64/xenprocessor.h
+421098b7LfwIHQ2lRYWhO4ruEXqIuQ xen/include/asm-ia64/xenserial.h
+428e120esS-Tp1mX5VoUrsGJDNY_ow xen/include/asm-ia64/xensystem.h
40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-x86/acpi.h
3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-x86/apic.h
3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen/include/asm-x86/apicdef.h
@@ -878,26 +1404,53 @@
3ddb79c3r9-31dIsewPV3P3i8HALsQ xen/include/asm-x86/delay.h
3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen/include/asm-x86/desc.h
40715b2dTokMLYGSuD58BnxOqyWVew xen/include/asm-x86/div64.h
-3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/asm-x86/domain_page.h
+4204e7acwzqgXyTAPKa1nM-L7Ec0Qw xen/include/asm-x86/domain.h
41d3eaaeIBzW621S1oa0c2yk7X43qQ xen/include/asm-x86/e820.h
3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen/include/asm-x86/fixmap.h
3e2d29944GI24gf7vOP_7x8EyuqxeA xen/include/asm-x86/flushtlb.h
+4294b5eep4lWuDtYUR74gYwt-_FnHA xen/include/asm-x86/genapic.h
3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen/include/asm-x86/hardirq.h
3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen/include/asm-x86/i387.h
+4204e7acwXDo-5iAAiO2eQbtDeYZXA xen/include/asm-x86/init.h
3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen/include/asm-x86/io.h
3ddb79c2TKeScYHQZreTdHqYNLbehQ xen/include/asm-x86/io_apic.h
3ddb79c2L7rTlFzazOLW1XuSZefpFw xen/include/asm-x86/irq.h
404f1b93OjLO4bFfBXYNaJdIqlNz-Q xen/include/asm-x86/ldt.h
+4294b5eeeAE-U0umBauOpcfs9bOixw xen/include/asm-x86/mach-bigsmp/mach_apic.h
+4294b5ee5qY2lHkA2hcNVFnZkHBVQw xen/include/asm-x86/mach-bigsmp/mach_apicdef.h
+4294b5eeq6ore4EePanoutorWtvS1w xen/include/asm-x86/mach-bigsmp/mach_ipi.h
+427fa2d0suK9Av7vsAXhsQxZjqpc_Q xen/include/asm-x86/mach-default/bios_ebda.h
+427fa2d0-SWcuwbdSypo4953bc2JdQ xen/include/asm-x86/mach-default/io_ports.h
+427fa2d0eyAl7LAeO-SVV4IW7lZPGQ xen/include/asm-x86/mach-default/irq_vectors.h
+427fa2d0df7VWG4KKpnKbKR2Cbd1_w xen/include/asm-x86/mach-default/irq_vectors_limits.h
+4260510aYPj2kr6rMbBfMxcvvmXndQ xen/include/asm-x86/mach-default/mach_apic.h
+427fa2d0I3FWjE2tWdOhlEOJn7stcg xen/include/asm-x86/mach-default/mach_apicdef.h
+427fa2d093fDS2gOBLcl7Yndzl7HmA xen/include/asm-x86/mach-default/mach_ipi.h
+427fa2d0Y7bD35d-FvDAeiJDIdRw2A xen/include/asm-x86/mach-default/mach_mpparse.h
+427fa2d0OfglYyfpDTD5DII4M0uZRw xen/include/asm-x86/mach-default/mach_wakecpu.h
+4294b5eeTwL8TeEI5pEzxvGD5obZsA xen/include/asm-x86/mach-es7000/mach_apic.h
+4294b5efhhBHJ81dsuLfJxWuN9PcDQ xen/include/asm-x86/mach-es7000/mach_apicdef.h
+4294b5efvb29X4mFAhUBdeGUPTFoBw xen/include/asm-x86/mach-es7000/mach_ipi.h
+4294b5efLlV3WvmctnQsCPAte4Bf6A xen/include/asm-x86/mach-es7000/mach_mpparse.h
+4294b5efC90I55FIDtKg8jyW8FVffA xen/include/asm-x86/mach-es7000/mach_wakecpu.h
+4294b5efqI--HHz7d7QVcVOi635jgw xen/include/asm-x86/mach-generic/mach_apic.h
+4294b5efHFX7nHDP4ch4NGAPmCsp_w xen/include/asm-x86/mach-generic/mach_apicdef.h
+4294b5efaen_warQx_kSN54AgXgBtg xen/include/asm-x86/mach-generic/mach_ipi.h
+4294b5efq7CDZzdxl-Rxu2K_6cIePQ xen/include/asm-x86/mach-generic/mach_mpparse.h
+4294b5efsEtawAifmsBZAjFagr8Z6Q xen/include/asm-x86/mach-generic/mach_mpspec.h
+4294b5efz5xMcRrYJfcH-wTylihXMA xen/include/asm-x86/mach-summit/mach_apic.h
+4294b5efmKbMzT7YOGp4Jn-5xoB3Uw xen/include/asm-x86/mach-summit/mach_apicdef.h
+4294b5efyUK3aZFqxp7BVF_GXCx56g xen/include/asm-x86/mach-summit/mach_ipi.h
+4294b5efjw2vUbiP4dQX6S6xZvAmZA xen/include/asm-x86/mach-summit/mach_mpparse.h
3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen/include/asm-x86/mc146818rtc.h
40ec25fd7cSvbP7Biw91zaU_g0xsEQ xen/include/asm-x86/mm.h
3ddb79c3n_UbPuxlkNxvvLycClIkxA xen/include/asm-x86/mpspec.h
+427fa2d1eJRenftJJnRyLsHKl1ghtA xen/include/asm-x86/mpspec_def.h
3ddb79c2wa0dA_LGigxOelSGbJ284Q xen/include/asm-x86/msr.h
41aaf567Mi3OishhvrCtET1y-mxQBg xen/include/asm-x86/mtrr.h
41a61536MFhNalgbVmYGXAhQsPTZNw xen/include/asm-x86/multicall.h
3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen/include/asm-x86/page.h
-3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen/include/asm-x86/pci.h
-404f1bb41Yl-5ZjIWnG66HDCj6OIWA xen/include/asm-x86/pda.h
-4022a73diKn2Ax4-R4gzk59lm1YdDg xen/include/asm-x86/pdb.h
+42422fb0FVX-TJkSvAXnbfwMf19XFA xen/include/asm-x86/physdev.h
3ddb79c2QF5-pZGzuX4QukPCDAl59A xen/include/asm-x86/processor.h
40cf1596bim9F9DNdV75klgRSZ6Y2A xen/include/asm-x86/regs.h
3ddb79c2plf7ciNgoNjU-RsbUzawsw xen/include/asm-x86/rwlock.h
@@ -905,23 +1458,30 @@
3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen/include/asm-x86/smp.h
3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen/include/asm-x86/smpboot.h
3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen/include/asm-x86/spinlock.h
-40e1966akOHWvvunCED7x3HPv35QvQ xen/include/asm-x86/string.h
+3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/string.h
3ddb79c3ezddh34MdelJpa5tNR00Dw xen/include/asm-x86/system.h
+42033fc1Bb8ffTshBYFGouGkiAMoUQ xen/include/asm-x86/time.h
3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen/include/asm-x86/types.h
40cf1596saFaHD5DC5zvrSn7CDCWGQ xen/include/asm-x86/uaccess.h
+41c0c412k6GHYF3cJtDdw37ee3TVaw xen/include/asm-x86/vmx.h
+41c0c412hck3QX-6_MaXaISGkngQuA xen/include/asm-x86/vmx_cpu.h
+420951dcGoqsqnmjjAtEtm6-3dM9KA xen/include/asm-x86/vmx_intercept.h
+41c0c41243jC1mcArZx_t3YkBL4lTA xen/include/asm-x86/vmx_platform.h
+420951dcqyUCe_gXA_XJPu1ix_poKg xen/include/asm-x86/vmx_virpit.h
+41c0c412lQ0NVVN9PsOSznQ-qhOiPA xen/include/asm-x86/vmx_vmcs.h
418fbcfe_WliJPToeVM-9VStvym-hw xen/include/asm-x86/x86_32/asm_defns.h
-3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen/include/asm-x86/x86_32/current.h
+429c852fi3pvfa9kIjryYK5AGBmXAg xen/include/asm-x86/x86_32/page-2level.h
+429c852fskvSOgcD5EC25_m9um9t4g xen/include/asm-x86/x86_32/page-3level.h
+4208e2a3ZNFroNXbX9OYaOB-xtUyDQ xen/include/asm-x86/x86_32/page.h
3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen/include/asm-x86/x86_32/regs.h
-3e7f358aG11EvMI9VJ4_9hD4LUO7rQ xen/include/asm-x86/x86_32/string.h
3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen/include/asm-x86/x86_32/uaccess.h
41bf1717bML6GxpclTWJabiaO5W5vg xen/include/asm-x86/x86_64/asm_defns.h
-404f1b9ceJeGVaPNIENm2FkK0AgEOQ xen/include/asm-x86/x86_64/current.h
-404f1b9fl6AQ_a-T1TDK3fuwTPXmHw xen/include/asm-x86/x86_64/desc.h
-404f1badfXZJZ2sU8sh9PS2EZvd19Q xen/include/asm-x86/x86_64/ldt.h
+4208e2a3Fktw4ZttKdDxbhvTQ6brfQ xen/include/asm-x86/x86_64/page.h
404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86/x86_64/regs.h
-40e1966azOJZfNI6Ilthe6Q-T3Hewg xen/include/asm-x86/x86_64/string.h
404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86/x86_64/uaccess.h
+422f27c8RHFkePhD34VIEpMMqofZcA xen/include/asm-x86/x86_emulate.h
400304fcmRQmDdFYEzDh0wcBba9alg xen/include/public/COPYING
+421098b7OKb9YH_EUA_UpCxBjaqtgA xen/include/public/arch-ia64.h
404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/public/arch-x86_32.h
404f1bc7IwU-qnH8mJeVu0YsNGMrcw xen/include/public/arch-x86_64.h
3ddb79c2PMeWTK86y4C3F4MzHw4A1g xen/include/public/dom0_ops.h
@@ -929,27 +1489,36 @@
4121d149udGfSUGhn3k1ECz0bM31nQ xen/include/public/grant_table.h
40f5623bqoi4GEoBiiUc6TZk1HjsMg xen/include/public/io/blkif.h
40dc4076pVeE1kEEWzcUaNZin65kCA xen/include/public/io/domain_controller.h
+41c0c412FLc0gunlJl91qMYscFtXVA xen/include/public/io/ioreq.h
40f5623cTZ80EwjWUBlh44A9F9i_Lg xen/include/public/io/netif.h
+41d40e9b8zCk5VDqhVbuQyhc7G3lqA xen/include/public/io/ring.h
+41ee5e8c6mLxIx82KPsbpt_uts_vSA xen/include/public/io/usbif.h
4051db79512nOCGweabrFWO2M2h5ng xen/include/public/physdev.h
40589968wmhPmV5-ENbBYmMjnedgKw xen/include/public/sched_ctl.h
404f3d2eR2Owk-ZcGOx9ULGHg3nrww xen/include/public/trace.h
+4266bd01Ul-pC01ZVvBkhBnv5eqzvw xen/include/public/vmx_assist.h
3ddb79c25UE59iu4JJcbRalx95mvcg xen/include/public/xen.h
3e397e66m2tO3s-J8Jnr7Ws_tGoPTg xen/include/xen/ac_timer.h
40715b2epYl2jBbxzz9CI2rgIca7Zg xen/include/xen/acpi.h
+427fa2d1wyoVbvCyZRLposYjA_D_4g xen/include/xen/bitmap.h
+427fa2d1ItcC_yWuBUkhc7adedP5ow xen/include/xen/bitops.h
3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen/include/xen/cache.h
41f2cea7Yna7xc0X9fyavIjoSFFeVg xen/include/xen/compile.h.in
3f840f12CkbYSlwMrY2S11Mpyxg7Nw xen/include/xen/compiler.h
3ddb79c259jh8hE7vre_8NuE7nwNSA xen/include/xen/config.h
3eb165e0eawr3R-p2ZQtSdLWtLRN_A xen/include/xen/console.h
+427fa2d1bQCWgEQqTTh5MjG4MPEH9g xen/include/xen/cpumask.h
3ddb79c1V44RD26YqCUm-kqIupM37A xen/include/xen/ctype.h
3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen/include/xen/delay.h
+4294b5efxcDdUVp4XMEE__IFw7nPow xen/include/xen/dmi.h
40f2b4a2hC3HtChu-ArD8LyojxWMjg xen/include/xen/domain.h
+3e20b82fl1jmQiKdLy7fxMcutfpjWA xen/include/xen/domain_page.h
3ddb79c2O729EttZTYu1c8LcsUO_GQ xen/include/xen/elf.h
3ddb79c0HIghfBF8zFUdmXhOU8i6hA xen/include/xen/errno.h
3ddb79c1W0lQca8gRV7sN6j3iY4Luw xen/include/xen/event.h
41262590CyJy4vd42dnqzsn8-eeGvw xen/include/xen/grant_table.h
3ddb79c0GurNF9tDWqQbAwJFH8ugfA xen/include/xen/init.h
-3ddb79c1nzaWu8NoF4xCCMSFJR4MlA xen/include/xen/ioport.h
+428084e41zemtCAtYLcD9bUzwE35SA xen/include/xen/inttypes.h
3ddb79c2qAxCOABlkKtD8Txohe-qEw xen/include/xen/irq.h
3ddb79c2b3qe-6Ann09FqZBF4IrJaQ xen/include/xen/irq_cpustat.h
3e4540ccPHqIIv2pvnQ1gV8LUnoHIg xen/include/xen/kernel.h
@@ -959,8 +1528,6 @@
3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen/include/xen/mm.h
3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen/include/xen/multiboot.h
41a61536ii6j2lJ2rXwMOLaG1CHPvw xen/include/xen/multicall.h
-3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/include/xen/pci.h
-3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h
3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h
3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h
3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xen/prefetch.h
@@ -968,7 +1535,7 @@
40589969nPq3DMzv24RDb5LXE9brHw xen/include/xen/sched-if.h
3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xen/sched.h
403a06a7H0hpHcKpAiDe5BPnaXWTlA xen/include/xen/serial.h
-3ddb79c14dXIhP7C2ahnoD08K90G_w xen/include/xen/slab.h
+4252c315hw0xXYMKIfFzhomi1M1yNA xen/include/xen/shadow.h
3ddb79c09xbS-xxfKxuV3JETIhBzmg xen/include/xen/smp.h
3ddb79c1Vi5VleJAOKHAlY0G2zAsgw xen/include/xen/softirq.h
3ddb79c2iIcESrDAB8samy_yAh6olQ xen/include/xen/spinlock.h
@@ -976,6 +1543,7 @@
3ddb79c0BnA20PbgmuMPSGIBljNRQw xen/include/xen/time.h
403a3edbG9K5uZjuY19_LORbQGmFbA xen/include/xen/trace.h
3ddb79c1-kVvF8cVa0k3ZHDdBMj01Q xen/include/xen/types.h
+3ddb79c14dXIhP7C2ahnoD08K90G_w xen/include/xen/xmalloc.h
41d291f5u3J3HYViXLs3cNuFGTvzNg xen/tools/Makefile
3eb3c87fc79FXLA6R9TvdBJNTvQDwA xen/tools/figlet/LICENSE
3eb3c87fPL2T_zBb0bHlbZY-ACEKRw xen/tools/figlet/Makefile
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index f87c654e98..33edc625d6 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -8,11 +8,14 @@
*.tar.bz2
*.tar.gz
*~
+BLOG
BitKeeper/*/*
+Makefile.rej
PENDING/*
TAGS
Twisted-1.3.0.tar.gz
Twisted-1.3.0/*
+dist/*
docs/*.aux
docs/*.dvi
docs/*.log
@@ -43,39 +46,114 @@ docs/user/labels.pl
docs/user/user.css
docs/user/user.html
extras/mini-os/h/hypervisor-ifs
-dist/*
+extras/mini-os/h/xen-public
+extras/mini-os/mini-os.*
+install/*
linux-*-xen0/*
linux-*-xenU/*
-linux-xen-sparse
linux-*.patch
+linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h.smh
+linux-xen-sparse
+mkddbxen
netbsd-*-tools/*
netbsd-*-xen0/*
netbsd-*-xenU/*
netbsd-*.patch
patches/*/.makedep
+patches/ebtables-brnf-5_vs_2.4.25.diff
+patches/ebtables.diff
patches/tmp/*
pristine-*
ref-*
tools/*/build/lib*/*.py
+tools/Makefile.smh
tools/balloon/balloon
+tools/blktap/Makefile.smh
+tools/blktap/blkcow
+tools/blktap/blkcowgnbd
+tools/blktap/blkcowimg
+tools/blktap/blkdump
+tools/blktap/blkgnbd
+tools/blktap/blkimg
+tools/blktap/blockstore.dat
+tools/blktap/blockstored
+tools/blktap/bstest
+tools/blktap/parallax
+tools/blktap/vdi.dot
+tools/blktap/vdi.ps
+tools/blktap/vdi_create
+tools/blktap/vdi_fill
+tools/blktap/vdi_list
+tools/blktap/vdi_snap
+tools/blktap/vdi_snap_list
+tools/blktap/vdi_tree
+tools/blktap/vdi_validate
+tools/blktap/xen/*
tools/check/.*
+tools/cmdline/*
+tools/cmdline/xen/*
+tools/firmware/*.bin
+tools/firmware/*.sym
+tools/firmware/*/biossums
+tools/firmware/*bios/*bios*.txt
+tools/firmware/rombios/BIOS-bochs-latest
+tools/firmware/rombios/_rombios_.c
+tools/firmware/rombios/rombios.s
+tools/firmware/vmxassist/gen
+tools/firmware/vmxassist/offsets.h
+tools/firmware/vmxassist/roms.h
+tools/firmware/vmxassist/vmxassist
+tools/firmware/vmxassist/vmxloader
+tools/gdb/gdb-6.2.1-linux-i386-xen/*
+tools/gdb/gdb-6.2.1/*
+tools/ioemu/config-host.*
+tools/ioemu/keysym_adapter_sdl.h
+tools/ioemu/target-*/Makefile
+tools/ioemu/target-*/config.*
+tools/ioemu/target-*/qemu-dm
+tools/ioemu/target-*/qemu-vgaram-bin
tools/libxc/xen/*
+tools/misc/cpuperf/cpuperf-perfcntr
+tools/misc/cpuperf/cpuperf-xen
+tools/misc/mbootpack/bin2c
+tools/misc/mbootpack/bootsect
+tools/misc/mbootpack/bzimage_header.c
+tools/misc/mbootpack/mbootpack
+tools/misc/mbootpack/setup
tools/misc/miniterm/miniterm
+tools/misc/xc_shadow
+tools/misc/xen_cpuperf
tools/misc/xenperf
+tools/pygrub/build/*
+tools/python/build/*
+tools/tests/test_x86_emulator
tools/vnet/gc
tools/vnet/gc*/*
-tools/vnet/vnet-module/.tmp_versions/*
-tools/vnet/vnet-module/.*.cmd
tools/vnet/vnet-module/*.ko
+tools/vnet/vnet-module/.*.cmd
+tools/vnet/vnet-module/.tmp_versions/*
tools/vnet/vnet-module/vnet_module.mod.*
tools/vnetd/vnetd
tools/web-shutdown.tap
+tools/x2d2/minixend
+tools/xcs/xcs
+tools/xcs/xcsdump
+tools/xcutils/xc_restore
+tools/xcutils/xc_save
+tools/xenstore/testsuite/tmp/*
+tools/xenstore/xen
+tools/xenstore/xenstored
+tools/xenstore/xenstored_test
+tools/xenstore/xs_random
+tools/xenstore/xs_stress
+tools/xenstore/xs_test
tools/xentrace/xentrace
tools/xfrd/xfrd
-xen/tags
+xen/BLOG
xen/TAGS
xen/arch/x86/asm-offsets.s
xen/arch/x86/boot/mkelf32
+xen/ddb/*
xen/drivers/pci/classlist.h
xen/drivers/pci/devlist.h
xen/drivers/pci/gen-devlist
@@ -83,9 +161,11 @@ xen/figlet/figlet
xen/include/asm
xen/include/asm-*/asm-offsets.h
xen/include/hypervisor-ifs/arch
+xen/include/public/public
xen/include/xen/*.new
xen/include/xen/banner.h
xen/include/xen/compile.h
+xen/tags
xen/tools/elf-reloc
xen/tools/figlet/figlet
xen/xen
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok
index 59eac58efc..afc6cd647f 100644
--- a/BitKeeper/etc/logging_ok
+++ b/BitKeeper/etc/logging_ok
@@ -1,78 +1,105 @@
+ach61@arcadians.cl.cam.ac.uk
ach61@boulderdash.cl.cam.ac.uk
ach61@labyrinth.cl.cam.ac.uk
ach61@soar.cl.cam.ac.uk
+adsharma@linux-t08.sc.intel.com
+akw27@arcadians.cl.cam.ac.uk
akw27@boulderdash.cl.cam.ac.uk
akw27@labyrinth.cl.cam.ac.uk
akw27@plucky.localdomain
+arun.sharma@intel.com
bd240@boulderdash.cl.cam.ac.uk
bd240@labyrinth.cl.cam.ac.uk
br260@br260.wolfson.cam.ac.uk
br260@labyrinth.cl.cam.ac.uk
br260@laudney.cl.cam.ac.uk
bren@anvil.research
+bren@br260.wolfson.cam.ac.uk
ccoffing@novell.com
+chrisw@osdl.org
cl349@arcadians.cl.cam.ac.uk
cl349@firebug.cl.cam.ac.uk
cl349@freefall.cl.cam.ac.uk
cl349@labyrinth.cl.cam.ac.uk
+cw@f00f.org
cwc22@centipede.cl.cam.ac.uk
+cwc22@donkeykong.cl.cam.ac.uk
+djm@djmnc4000.(none)
djm@kirby.fc.hp.com
+djm@sportsman.spdomain
doogie@brainfood.com
gm281@boulderdash.cl.cam.ac.uk
gm281@tetrapod.cl.cam.ac.uk
+harry@dory.(none)
hollisb@us.ibm.com
+iap10@firebug.cl.cam.ac.uk
iap10@freefall.cl.cam.ac.uk
iap10@labyrinth.cl.cam.ac.uk
iap10@nidd.cl.cam.ac.uk
+iap10@pb001.cl.cam.ac.uk
iap10@pb007.cl.cam.ac.uk
iap10@striker.cl.cam.ac.uk
iap10@tetris.cl.cam.ac.uk
jrb44@plym.cl.cam.ac.uk
+jrb44@swoop.cl.cam.ac.uk
jws22@gauntlet.cl.cam.ac.uk
jws@cairnwell.research
kaf24@camelot.eng.3leafnetworks.com
kaf24@firebug.cl.cam.ac.uk
kaf24@freefall.cl.cam.ac.uk
kaf24@labyrinth.cl.cam.ac.uk
+kaf24@pb001.cl.cam.ac.uk
kaf24@penguin.local
+kaf24@planb.cl.cam.ac.uk
kaf24@plym.cl.cam.ac.uk
kaf24@scramble.cl.cam.ac.uk
kaf24@striker.cl.cam.ac.uk
kaf24@viper.(none)
katzj@redhat.com
+kmacy@netapp.com
+kmacy@shemp.lab.netapp.com
kraxel@bytesex.org
laudney@eclipse.(none)
+leendert@watson.ibm.com
lists-xen@pimb.org
lynx@idefix.cl.cam.ac.uk
+maf46@burn.cl.cam.ac.uk
mafetter@fleming.research
mark@maw48.kings.cam.ac.uk
maw48@labyrinth.cl.cam.ac.uk
mjw@wray-m-3.hpl.hp.com
+mulix@mulix.org
mwilli2@anvil.research
mwilli2@equilibrium.research
mwilli2@equilibrium.research.intel-research.net
mwilli2@pug.(none)
rac61@labyrinth.cl.cam.ac.uk
rgr22@boulderdash.cl.cam.ac.uk
+riel@redhat.com
rminnich@lanl.gov
rn@wyvis.camb.intel-research.net
rn@wyvis.research.intel-research.net
rneugeba@wyvis.research
rneugeba@wyvis.research.intel-research.net
+rusty@rustcorp.com.au
ryanh@us.ibm.com
sd386@font.cl.cam.ac.uk
shand@spidean.research.intel-research.net
smh22@boulderdash.cl.cam.ac.uk
+smh22@firebug.cl.cam.ac.uk
smh22@labyrinth.cl.cam.ac.uk
smh22@tempest.cl.cam.ac.uk
smh22@uridium.cl.cam.ac.uk
+sos22@donkeykong.cl.cam.ac.uk
sos22@douglas.cl.cam.ac.uk
sos22@labyrinth.cl.cam.ac.uk
+tjd21@arcadians.cl.cam.ac.uk
tlh20@elite.cl.cam.ac.uk
tlh20@labyrinth.cl.cam.ac.uk
tw275@labyrinth.cl.cam.ac.uk
tw275@striker.cl.cam.ac.uk
vh249@airwolf.cl.cam.ac.uk
vh249@arcadians.cl.cam.ac.uk
+xen-ia64.adm@bkbits.net
xenbk@gandalf.hpl.hp.com
ydroneaud@mandriva.com
diff --git a/Config.mk b/Config.mk
new file mode 100644
index 0000000000..b3320b2711
--- /dev/null
+++ b/Config.mk
@@ -0,0 +1,33 @@
+# -*- mode: Makefile; -*-
+
+# Currently supported architectures: x86_32, x86_64
+XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/)
+XEN_TARGET_ARCH ?= $(XEN_COMPILE_ARCH)
+
+# Tools to run on system hosting the build
+HOSTCC = gcc
+HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+
+AS = $(CROSS_COMPILE)as
+LD = $(CROSS_COMPILE)ld
+CC = $(CROSS_COMPILE)gcc
+CPP = $(CROSS_COMPILE)gcc -E
+AR = $(CROSS_COMPILE)ar
+NM = $(CROSS_COMPILE)nm
+STRIP = $(CROSS_COMPILE)strip
+OBJCOPY = $(CROSS_COMPILE)objcopy
+OBJDUMP = $(CROSS_COMPILE)objdump
+
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+LIBDIR = lib64
+else
+LIBDIR = lib
+endif
+
+ifneq ($(EXTRA_PREFIX),)
+EXTRA_INCLUDES += $(EXTRA_PREFIX)/include
+EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBDIR)
+endif
+
+LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i))
+CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
diff --git a/Makefile b/Makefile
index 6782496885..264cc2015e 100644
--- a/Makefile
+++ b/Makefile
@@ -20,18 +20,20 @@ XKERNELS := $(foreach kernel, $(KERNELS), $(patsubst buildconfigs/mk.%,%,$(wildc
export DESTDIR
# Export target architecture overrides to Xen and Linux sub-trees.
-ifneq ($(TARGET_ARCH),)
-SUBARCH := $(subst x86_32,i386,$(TARGET_ARCH))
-export TARGET_ARCH SUBARCH
+ifneq ($(XEN_TARGET_ARCH),)
+SUBARCH := $(subst x86_32,i386,$(XEN_TARGET_ARCH))
+export XEN_TARGET_ARCH SUBARCH
endif
+# Default target must appear before any include lines
+all: dist
+
+include Config.mk
include buildconfigs/Rules.mk
.PHONY: all dist install xen tools kernels docs world clean mkpatches mrproper
.PHONY: kbuild kdelete kclean
-all: dist
-
# build and install everything into the standard system directories
install: install-xen install-tools install-kernels install-docs
@@ -159,15 +161,19 @@ uninstall: D=$(DESTDIR)
uninstall:
[ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-$(date +%s)
rm -rf $(D)/etc/init.d/xend*
- rm -rf $(D)/usr/lib/libxc* $(D)/usr/lib/libxutil*
- rm -rf $(D)/usr/lib/python/xen $(D)/usr/include/xen
+ rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
+ rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
+ rm -rf $(D)/usr/$(LIBDIR)/share/xen $(D)/usr/$(LIBDIR)/libxenstore*
+ rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
rm -rf $(D)/usr/include/xcs_proto.h $(D)/usr/include/xc.h
+ rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
rm -rf $(D)/usr/sbin/xcs $(D)/usr/sbin/xcsdump $(D)/usr/sbin/xen*
rm -rf $(D)/usr/sbin/netfix
- rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm $(D)/var/lib/xen
+ rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm
rm -rf $(D)/usr/share/doc/xen $(D)/usr/man/man*/xentrace*
rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
rm -rf $(D)/boot/*xen*
+ rm -rf $(D)/lib/modules/*xen*
# Legacy targets for compatibility
linux24:
diff --git a/buildconfigs/Rules.mk b/buildconfigs/Rules.mk
index 11678ed9db..fe02b1e11b 100644
--- a/buildconfigs/Rules.mk
+++ b/buildconfigs/Rules.mk
@@ -1,4 +1,6 @@
+include Config.mk
+
# We expect these two to already be set if people
# are using the top-level Makefile
DISTDIR ?= $(CURDIR)/dist
@@ -53,6 +55,7 @@ endif
$(patsubst %,pristine-%/.valid-pristine,$(ALLSPARSETREES)) : pristine-%/.valid-pristine: %.tar.bz2
rm -rf tmp-pristine-$* $(@D)
mkdir -p tmp-pristine-$*
+ touch tmp-pristine-$*/.bk_skip
tar -C tmp-pristine-$* -jxf $<
mv tmp-pristine-$*/* $(@D)
@rm -rf tmp-pristine-$*
@@ -84,6 +87,9 @@ ref-%/.valid-ref: pristine-%/.valid-pristine
%-clean:
$(MAKE) -f buildconfigs/mk.$* clean
+%-config:
+ $(MAKE) -f buildconfigs/mk.$* config
+
%-xen.patch: ref-%/.valid-ref
rm -rf tmp-$@
cp -al $(<D) tmp-$@
diff --git a/buildconfigs/mk.linux-2.4-xen0 b/buildconfigs/mk.linux-2.4-xen0
index 1b36b53a84..408cb15a1d 100644
--- a/buildconfigs/mk.linux-2.4-xen0
+++ b/buildconfigs/mk.linux-2.4-xen0
@@ -45,6 +45,11 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
make -C $(LINUX_DIR) ARCH=xen oldconfig
make -C $(LINUX_DIR) ARCH=xen dep
+config: CONFIGMODE = menuconfig
+config: $(LINUX_DIR)/include/linux/autoconf.h
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen $(CONFIGMODE)
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen dep
+
clean::
$(MAKE) -C $(LINUX_DIR) ARCH=xen clean
diff --git a/buildconfigs/mk.linux-2.4-xenU b/buildconfigs/mk.linux-2.4-xenU
index f5c3b5a4a2..da38873d91 100644
--- a/buildconfigs/mk.linux-2.4-xenU
+++ b/buildconfigs/mk.linux-2.4-xenU
@@ -38,6 +38,11 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-linux-$(LINUX_VER)/.valid-ref
make -C $(LINUX_DIR) ARCH=xen oldconfig
make -C $(LINUX_DIR) ARCH=xen dep
+config: CONFIGMODE = menuconfig
+config: $(LINUX_DIR)/include/linux/autoconf.h
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen $(CONFIGMODE)
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen dep
+
clean::
$(MAKE) -C $(LINUX_DIR) ARCH=xen clean
diff --git a/buildconfigs/mk.linux-2.6-xen0 b/buildconfigs/mk.linux-2.6-xen0
index f273d49701..1034cccf4c 100644
--- a/buildconfigs/mk.linux-2.6-xen0
+++ b/buildconfigs/mk.linux-2.6-xen0
@@ -29,7 +29,7 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \
[ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \
cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \
- || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig \
+ || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \
$(LINUX_DIR)/.config
# Patch kernel Makefile to set EXTRAVERSION
( cd $(LINUX_DIR) ; \
@@ -37,6 +37,10 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
rm -f Makefile ; mv Mk.tmp Makefile )
$(MAKE) -C $(LINUX_DIR) ARCH=xen oldconfig
+config: CONFIGMODE = menuconfig
+config: $(LINUX_DIR)/include/linux/autoconf.h
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen $(CONFIGMODE)
+
clean::
$(MAKE) -C $(LINUX_DIR) ARCH=xen clean
diff --git a/buildconfigs/mk.linux-2.6-xenU b/buildconfigs/mk.linux-2.6-xenU
index dd979745c0..9292b78232 100644
--- a/buildconfigs/mk.linux-2.6-xenU
+++ b/buildconfigs/mk.linux-2.6-xenU
@@ -29,7 +29,7 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
CONFIG_VERSION=$$(sed -ne 's/^EXTRAVERSION = //p' $(LINUX_DIR)/Makefile); \
[ -r $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) ] && \
cp $(DESTDIR)/boot/config-$(LINUX_VER)$$CONFIG_VERSION-$(EXTRAVERSION) $(LINUX_DIR)/.config \
- || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig \
+ || cp $(LINUX_DIR)/arch/xen/configs/$(EXTRAVERSION)_defconfig_$(XEN_TARGET_ARCH) \
$(LINUX_DIR)/.config
# Patch kernel Makefile to set EXTRAVERSION
( cd $(LINUX_DIR) ; \
@@ -37,6 +37,10 @@ $(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
rm -f Makefile ; mv Mk.tmp Makefile )
$(MAKE) -C $(LINUX_DIR) ARCH=xen oldconfig
+config: CONFIGMODE = menuconfig
+config: $(LINUX_DIR)/include/linux/autoconf.h
+ $(MAKE) -C $(LINUX_DIR) ARCH=xen $(CONFIGMODE)
+
clean::
$(MAKE) -C $(LINUX_DIR) ARCH=xen clean
diff --git a/docs/misc/VMX_changes.txt b/docs/misc/VMX_changes.txt
new file mode 100644
index 0000000000..739d315e79
--- /dev/null
+++ b/docs/misc/VMX_changes.txt
@@ -0,0 +1,90 @@
+Changes to Xen in support of Intel(R) Vanderpool Technology
+-------------------------------------------------------------
+
+Our VT extensions to the Xen hypervisor provide full platform
+virtualization, including CPU(s), memory, and I/O infrastructure. The
+generic code in Xen handles and schedules those virtual machines as it
+does for the existing para-virtualized domains.
+
+Full virtualization required by the OS guests requires full device
+virtualization as well. The device models in BOCHS
+(http://bochs.sourceforge.net/) were decoupled from the CPU
+virtualization, and are used to virtualize the legacy devices (such as
+keyboard, mouse, VGA, IDE) in the PC platform. At this point, the
+device models run in user mode on domain 0, not in the Xen hypervisor.
+
+We would like to thank Ian Pratt and Keir Fraser for reviewing our
+design and code intensively, and for providing numerous useful
+suggestions to improve the architecture and code.
+
+We have a list of Intel team members who take credit for making this
+release happen: Yunhong Jiang, Nitin Kamble, Chengyuan Li, Xin Li,
+Xiaofeng Ling, Benjamin Liu, Asit Mallick, Jun Nakajima, Sunil Saxena,
+Arun Sharma, Edwin Zhai, Jeff Zheng, and Louis Zhuang. We'll continue
+to add more features to complete full virtualization in Xen using VT.
+
+The notes document the changes to the Xen hypervisor in order to add
+VT support. The changes to other areas, such as Control Panel will be
+added as we deliver the code.
+
+Summary of changes for the first release
+----------------------------------------
+December 15, 2004
+
+ * VT specific event handling and domain management were added.
+
+ * Shadow mode was extended to support full 32-bit guests
+
+ * Domain switching code was extended to support VT domain
+
+ * I/O request handling was added to communicate with the device model
+
+ * Domain builder was extended to provide the environment when the
+ guest enters the protected mode, including E820 memory and VGA
+ info, typically obtained by BIOS calls.
+
+New code:
+---------
+ VT (Vanderpool Technology) is based on the new VMX (Virtual
+ Machine Extensions) architecture. The current release of the
+ software supports 32-bit only.
+
+ * arch/x86/vmx.[ch] and arch/x86/vmx_*.[ch]: created to handle
+ VMX-specific events in order to provide virtual machine.
+
+ * arch/x86/x86_32/entry.S: new code path was added to have the
+ first-level handler from VM exits. The first-level handler calls
+ the second-level handler in arch/x86/vmx.c.
+
+ * arch/x86/setup.c: new function start_vmx() to init_intel() to
+ enable VMX mode.
+
+ * include/asm-x86/config.h: #ifdef CONFIG_VMX was added.
+
+ * arch/x86/domain.c: new code patch was added to create a VMX
+ domain given the flag from the control panel.
+
+ * include/public/io/ioreq.h: A new data structure was added to
+ define the I/O requests between the Xen hypervisor and the
+ device models.
+
+Changes to the existing code:
+-----------------------------
+
+ * arch/x86/shadow.[ch]: new mode SHM_full_32 was added to support
+ full virtualization. The current Xen code assumes that the guest
+ page directory and tables have _machine_ (or host) physical page
+ frame numbers, and the new code allows to support _guest_
+ physical page frame numbers
+
+ * include/asm-x86/processor.h: struct arch_vmx_struct arch_vmx has
+ been added to the thread_struct data structure. The arch_vmx has
+ the addtional VMX-related CPU context.
+
+ * arch/x86/io_apic.c: reverse mapping between vector and irq has
+ been added. We will revisit this code when considering MSI
+ support.
+
+--- Jun
+
+
diff --git a/docs/misc/crashdb.txt b/docs/misc/crashdb.txt
new file mode 100644
index 0000000000..a366f72f5d
--- /dev/null
+++ b/docs/misc/crashdb.txt
@@ -0,0 +1,50 @@
+Xen crash debugger notes
+------------------------
+
+Xen has a simple gdb stub for doing post-mortem debugging i.e. once
+you've crashed it, you get to poke around and find out why. There's
+also a special key handler for making it crash, which is handy.
+
+You need to have crash_debug=y set when compiling to enable the crash
+debugger (so go ``export crash_debug=y; make'', or ``crash_debug=y
+make'' or ``make crash_debug=y''), and you also need to enable it on
+the Xen command line, by going e.g. cdb=com1. If you need to have a
+serial port shared between cdb and the console, try cdb=com1H. CDB
+will then set the high bit on every byte it sends, and only respond to
+bytes with the high bit set. Similarly for com2.
+
+The next step depends on your individual setup. This is how to do
+it for a normal test box in the SRG:
+
+-- Make your test machine crash. Either a normal panic or hitting
+ 'C-A C-A C-A %' on the serial console will do.
+-- Start gdb as ``gdb ./xen-syms''
+-- Go ``target remote serial.srg:12331'', where 12331 is the second port
+ reported for that machine by xenuse. (In this case, the machine is
+ bombjack)
+-- Go ``add-symbol-file vmlinux''
+-- Debug as if you had a core file
+-- When you're finished, go and reboot your test box. Hitting 'R' on the
+ serial console won't work.
+
+At one stage, it was sometimes possible to resume after entering the
+debugger from the serial console. This seems to have rotted, however,
+and I'm not terribly interested in putting it back.
+
+As soon as you reach the debugger, we disable interrupts, the
+watchdog, and every other CPU, so the state of the world shouldn't
+change too much behind your back.
+
+
+Reasons why we might fail to reach the debugger:
+-----------------------------------------------
+
+-- In order to stop the other processors, we need to acquire the SMP
+ call lock. If you happen to have crashed in the middle of that,
+ you're screwed.
+-- If the page tables are wrong, you're screwed
+-- If the serial port setup is wrong, badness happens
+-- We acquire the console lock at one stage XXX this is unnecessary and
+ stupid
+-- Obviously, the low level processor state can be screwed in any
+ number of wonderful ways
diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
new file mode 100644
index 0000000000..19db4ec415
--- /dev/null
+++ b/docs/misc/grant-tables.txt
@@ -0,0 +1,325 @@
+********************************************************************************
+ A Rough Introduction to Using Grant Tables
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Christopher Clark, March, 2005.
+
+Grant tables are a mechanism for sharing and transferring frames between
+domains, without requiring the participating domains to be privileged.
+
+The first mode of use allows domA to grant domB access to a specific frame,
+whilst retaining ownership. The block front driver uses this to grant memory
+access to the block back driver, so that it may read or write as requested.
+
+ 1. domA creates a grant access reference, and transmits the ref id to domB.
+ 2. domB uses the reference to map the granted frame.
+ 3. domB performs the memory access.
+ 4. domB unmaps the granted frame.
+ 5. domA removes its grant.
+
+
+The second mode allows domA to accept a transfer of ownership of a frame from
+domB. The net front and back driver will use this for packet tx/rx. This
+mechanism is still being implemented, though the xen<->guest interface design
+is complete.
+
+ 1. domA creates an accept transfer grant reference, and transmits it to domB.
+ 2. domB uses the ref to hand over a frame it owns.
+ 3. domA accepts the transfer
+ 4. domA clears the used reference.
+
+
+********************************************************************************
+ Data structures
+ ~~~~~~~~~~~~~~~
+
+ The following data structures are used by Xen and the guests to implement
+ grant tables:
+
+ 1. Shared grant entries
+ 2. Active grant entries
+ 3. Map tracking
+
+ These are not the users primary interface to grant tables, but are discussed
+ because an understanding of how they work may be useful. Each of these is a
+ finite resource.
+
+ Shared grant entries
+ ~~~~~~~~~~~~~~~~~~~~
+
+ A set of pages are shared between Xen and a guest, holding the shared grant
+ entries. The guest writes into these entries to create grant references. The
+ index of the entry is transmitted to the remote domain: this is the
+ reference used to activate an entry. Xen will write into a shared entry to
+ indicate to a guest that its grant is in use.
+ sha->domid : remote domain being granted rights
+ sha->frame : machine frame being granted
+ sha->flags : allow access, allow transfer, remote is reading/writing, etc.
+
+ Active grant entries
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Xen maintains a set of private frames per domain, holding the active grant
+ entries for safety, and to reference count mappings.
+ act->domid : remote domain being granted rights
+ act->frame : machine frame being granted
+ act->pin : used to hold reference counts
+
+ Map tracking
+ ~~~~~~~~~~~~
+
+ Every time a frame is mapped, a map track entry is stored in the metadata of
+ the mapping domain. The index of this entry is returned from the map call,
+ and is used to unmap the frame. Map track entries are also searched whenever a
+ page table entry containing a foreign frame number is overwritten: the first
+ matching map track entry is then removed, as if unmap had been invoked.
+ These are not used by the transfer mechanism.
+ map->domid : owner of the mapped frame
+ map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+
+********************************************************************************
+
+ Granting a foreign domain access to frames
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domA [frame]--> domB
+
+
+ domA: #include <asm-xen/gnttab.h>
+ grant_ref_t gref[BATCH_SIZE];
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ gref[i] = gnttab_grant_foreign_access( domBid, mfn, (readonly ? 1 : 0) );
+
+
+ .. gref is then somehow transmitted to domB for use.
+
+
+ Mapping foreign frames
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+ domB: #include <asm-xen/hypervisor.h>
+ unsigned long mmap_vstart;
+ gnttab_op_t aop[BATCH_SIZE];
+ grant_ref_t mapped_handle[BATCH_SIZE];
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(BATCH_SIZE)) == 0 )
+ BUG();
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ aop[i].u.map_grant_ref.host_virt_addr =
+ mmap_vstart + (i * PAGE_SIZE);
+ aop[i].u.map_grant_ref.dom = domAid;
+ aop[i].u.map_grant_ref.ref = gref[i];
+ aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map | GNTMAP_readonly );
+ }
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, aop, BATCH_SIZE)))
+ BUG();
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ if ( unlikely(aop[i].u.map_grant_ref.handle < 0) )
+ {
+ tidyup_all(aop, i);
+ goto panic;
+ }
+
+ phys_to_machine_mapping[__pa(mmap_vstart + (i * PAGE_SIZE))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
+
+ mapped_handle[i] = aop[i].u.map_grant_ref.handle;
+ }
+
+
+
+ Unmapping foreign frames
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domB:
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ aop[i].u.unmap_grant_ref.host_virt_addr = mmap_vstart + (i * PAGE_SIZE);
+ aop[i].u.unmap_grant_ref.dev_bus_addr = 0;
+ aop[i].u.unmap_grant_ref.handle = mapped_handle[i];
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, aop, BATCH_SIZE)))
+ BUG();
+
+
+ Ending foreign access
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ Note that this only prevents further mappings; it does _not_ revoke access.
+ Should _only_ be used when the remote domain has unmapped the frame.
+ gnttab_query_foreign_access( gref ) will indicate the state of any mapping.
+
+ domA:
+ if ( gnttab_query_foreign_access( gref[i] ) == 0 )
+ gnttab_end_foreign_access( gref[i], readonly );
+
+ TODO: readonly yet to be implemented.
+
+
+********************************************************************************
+
+ Transferring ownership of a frame to another domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ [ XXX: Transfer mechanism is alpha-calibre code, untested, use at own risk XXX ]
+ [ XXX: show use of batch operations below, rather than single frame XXX ]
+ [ XXX: linux internal interface could/should be wrapped to be tidier XXX ]
+
+
+ Prepare to accept a frame from a foreign domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domA:
+ if ( (p = alloc_page(GFP_HIGHUSER)) == NULL )
+ {
+ printk("Cannot alloc a frame to surrender\n");
+ break;
+ }
+ pfn = p - mem_map;
+ mfn = phys_to_machine_mapping[pfn];
+
+ if ( !PageHighMem(p) )
+ {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+ queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+ }
+
+ /* Ensure that ballooned highmem pages don't have cached mappings. */
+ kmap_flush_unused();
+
+ /* Flush updates through and flush the TLB. */
+ xen_tlb_flush();
+
+ phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+
+ if ( HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1 )
+ {
+ printk("MEMOP_decrease_reservation failed\n");
+ /* er... ok. free the page then */
+ __free_page(p);
+ break;
+ }
+
+ accepting_pfn = pfn;
+ ref = gnttab_grant_foreign_transfer( (domid_t) args.arg[0], pfn );
+ printk("Accepting dom %lu frame at ref (%d)\n", args.arg[0], ref);
+
+
+ Transfer a frame to a foreign domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domB:
+ mmu_update_t update;
+ domid_t domid;
+ grant_ref_t gref;
+ unsigned long pfn, mfn, *v;
+ struct page *transfer_page = 0;
+
+ /* alloc a page and grant access.
+ * alloc page returns a page struct. */
+ if ( (transfer_page = alloc_page(GFP_HIGHUSER)) == NULL )
+ return -ENOMEM;
+
+ pfn = transfer_page - mem_map;
+ mfn = phys_to_machine_mapping[pfn];
+
+ /* need to remove all references to this page */
+ if ( !PageHighMem(transfer_page) )
+ {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+ sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
+ queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+ }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+ else
+ {
+ v = kmap(transfer_page);
+ scrub_pages(v, 1);
+ sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
+ kunmap(transfer_page);
+ }
+#endif
+ /* Delete any cached kmappings */
+ kmap_flush_unused();
+
+ /* Flush updates through and flush the TLB */
+ xen_tlb_flush();
+
+ /* invalidate in P2M */
+ phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+
+ domid = (domid_t)args.arg[0];
+ gref = (grant_ref_t)args.arg[1];
+
+ update.ptr = MMU_EXTENDED_COMMAND;
+ update.ptr |= ((gref & 0x00FF) << 2);
+ update.ptr |= mfn << PAGE_SHIFT;
+
+ update.val = MMUEXT_TRANSFER_PAGE;
+ update.val |= (domid << 16);
+ update.val |= (gref & 0xFF00);
+
+ ret = HYPERVISOR_mmu_update(&update, 1, NULL);
+
+
+ Map a transferred frame
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ TODO:
+
+
+ Clear the used transfer reference
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ TODO:
+
+
+********************************************************************************
+
+ Using a private reserve of grant references
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Where it is known in advance how many grant references are required, and
+failure to allocate them on demand would cause difficulty, a batch can be
+allocated and held in a private reserve.
+
+To reserve a private batch:
+
+ /* housekeeping data - treat as opaque: */
+ grant_ref_t gref_head, gref_terminal;
+
+ if ( 0 > gnttab_alloc_grant_references( number_to_reserve,
+ &gref_head, &gref_terminal ))
+ return -ENOSPC;
+
+
+To release a batch back to the shared pool:
+
+ gnttab_free_grant_references( number_reserved, gref_head );
+
+
+To claim a reserved reference:
+
+ ref = gnttab_claim_grant_reference( &gref_head, gref_terminal );
+
+
+To release a claimed reference back to the reserve pool:
+
+ gnttab_release_grant_reference( &gref_head, gref );
+
+
+To use a claimed reference to grant access, use these alternative functions
+that take an additional parameter of the grant reference to use:
+
+ gnttab_grant_foreign_access_ref
+ gnttab_grant_foreign_transfer_ref
diff --git a/docs/misc/sedf_scheduler_mini-HOWTO.txt b/docs/misc/sedf_scheduler_mini-HOWTO.txt
new file mode 100644
index 0000000000..ad3cc62e1f
--- /dev/null
+++ b/docs/misc/sedf_scheduler_mini-HOWTO.txt
@@ -0,0 +1,44 @@
+sEDF scheduler
+--------------
+Author:
+ Stephan.Diestelhorst@{cl.cam.ac.uk, inf.tu-dresden.de}
+
+Overview:
+ This scheduler provides weighted CPU sharing in an intuitive way and
+ uses realtime-algorithms to ensure time guarantees.
+
+Usage:
+   -add "sched=sedf" on Xen's boot command-line
+   -create domains as usual
+   -use "xm sedf <dom-id> <period> <slice> <latency-hint> <extra> <weight>"
+  Where:
+  -period/slice are the normal EDF scheduling parameters in nanosecs
+  -latency-hint is the scaled period in case the domain is doing heavy I/O
+ (unused by the currently compiled version)
+  -extra is a flag (0/1), which controls whether the domain can run in
+ extra-time
+  -weight is mutually exclusive with period/slice and specifies another
+ way of setting a domains cpu slice
+
+Examples:
+ normal EDF (20ms/5ms):
+  xm sedf <dom-id> 20000000 5000000 0 0 0
+
+ best-effort domains (i.e. non-realtime):
+  xm sedf <dom-id> 20000000 0 0 1 0
+ normal EDF (20ms/5ms) + share of extra-time:
+  xm sedf <dom-id> 20000000 5000000 0 1 0
+
+ 4 domains with weights 2:3:4:2
+  xm sedf <d1> 0 0 0 0 2
+  xm sedf <d2> 0 0 0 0 3
+  xm sedf <d3> 0 0 0 0 4
+  xm sedf <d4> 0 0 0 0 2
+
+ 1 fully-specified (10ms/3ms) domain, 3 other domains share
+ available rest in 2:7:3 ratio:
+  xm sedf <d1> 10000000 3000000 0 0 0
+  xm sedf <d2> 0 0 0 0 2
+  xm sedf <d3> 0 0 0 0 7
+  xm sedf <d4> 0 0 0 0 3 \ No newline at end of file
diff --git a/docs/src/interface.tex b/docs/src/interface.tex
index f06313a20c..1f2ee72470 100644
--- a/docs/src/interface.tex
+++ b/docs/src/interface.tex
@@ -721,24 +721,6 @@ stack-frame layout is used.
\end{quote}
-Finally, as an optimization it is possible for each guest OS
-to install one ``fast trap'': this is a trap gate which will
-allow direct transfer of control from ring 3 into ring 1 without
-indirecting via Xen. In most cases this is suitable for use by
-the guest OS system call mechanism, although it may be used for
-any purpose.
-
-
-\begin{quote}
-\hypercall{set\_fast\_trap(int idx)}
-
-Install the handler for exception vector {\tt idx} as the ``fast
-trap'' for this domain. Note that this installs the current handler
-(i.e. that which has been installed more recently via a call
-to {\tt set\_trap\_table()}).
-
-\end{quote}
-
\section{Scheduling and Timer}
diff --git a/docs/src/user.tex b/docs/src/user.tex
index 408d127df6..b3a2786979 100644
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -491,7 +491,7 @@ title Xen 2.0 / XenLinux 2.6
The kernel line tells GRUB where to find Xen itself and what boot
parameters should be passed to it (in this case, setting domain 0's
-memory allocation and the settings for the serial port). For more
+memory allocation in kilobytes and the settings for the serial port). For more
details on the various Xen boot parameters see Section~\ref{s:xboot}.
The module line of the configuration describes the location of the
@@ -536,7 +536,7 @@ with:
\begin{verbatim}
kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
\end{verbatim}}
-\end{quote}
+\end{quote}
This configures Xen to output on COM1 at 115,200 baud, 8 data bits,
1 stop bit and no parity. Modify these parameters for your set up.
@@ -1101,7 +1101,7 @@ features.
To initialise a partition to support LVM volumes:
\begin{quote}
\begin{verbatim}
-# pvcreate /dev/sda10
+# pvcreate /dev/sda10
\end{verbatim}
\end{quote}
@@ -1680,12 +1680,6 @@ should be appended to Xen's command line, either manually or by
editing \path{grub.conf}.
\begin{description}
-\item [ignorebiostables ]
- Disable parsing of BIOS-supplied tables. This may help with some
- chipsets that aren't fully supported by Xen. If you specify this
- option then ACPI tables are also ignored, and SMP support is
- disabled.
-
\item [noreboot ]
Don't reboot the machine automatically on errors. This is
useful to catch debug output if you aren't catching console messages
@@ -1695,16 +1689,9 @@ editing \path{grub.conf}.
Disable SMP support.
This option is implied by `ignorebiostables'.
-\item [noacpi ]
- Disable ACPI tables, which confuse Xen on some chipsets.
- This option is implied by `ignorebiostables'.
-
\item [watchdog ]
Enable NMI watchdog which can report certain failures.
-\item [noht ]
- Disable Hyperthreading.
-
\item [badpage=$<$page number$>$,$<$page number$>$, \ldots ]
Specify a list of pages not to be allocated for use
because they contain bad bytes. For example, if your
@@ -1739,6 +1726,13 @@ editing \path{grub.conf}.
transmitted/received character.
[NB. Default for this option is `com1,vga']
+\item [sync\_console ]
+ Force synchronous console output. This is useful if you system fails
+ unexpectedly before it has sent all available output to the
+ console. In most cases Xen will automatically enter synchronous mode
+ when an exceptional event occurs, but this option provides a manual
+ fallback.
+
\item [conswitch=$<$switch-char$><$auto-switch-char$>$ ]
Specify how to switch serial-console input between
Xen and DOM0. The required sequence is CTRL-$<$switch-char$>$
@@ -1756,8 +1750,19 @@ editing \path{grub.conf}.
`nmi=dom0': Inform DOM0 of the NMI. \\
`nmi=ignore': Ignore the NMI.
+\item [mem=xxx ]
+ Set the physical RAM address limit. Any RAM appearing beyond this
+ physical address in the memory map will be ignored. This parameter
+ may be specified with a B, K, M or G suffix, representing bytes,
+ kilobytes, megabytes and gigabytes respectively. The
+ default unit, if no suffix is specified, is bytes.
+
\item [dom0\_mem=xxx ]
- Set the amount of memory (in kB) to be allocated to domain0.
+ Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
+ may be specified with a B, K, M or G suffix, representing bytes,
+ kilobytes, megabytes and gigabytes respectively; if no suffix is specified,
+ the parameter defaults to kilobytes. In previous versions of Xen, suffixes
+ were not supported and the value is always interpreted as kilobytes.
\item [tbuf\_size=xxx ]
Set the size of the per-cpu trace buffers, in pages
@@ -1770,15 +1775,39 @@ editing \path{grub.conf}.
possibilities are `bvt' (default), `atropos' and `rrobin'.
For more information see Section~\ref{s:sched}.
-\item [physdev\_dom0\_hide=(xx:xx.x)(yy:yy.y)\ldots ]
-Hide selected PCI devices from domain 0 (for instance, to stop it
-taking ownership of them so that they can be driven by another
-domain). Device IDs should be given in hex format. Bridge devices do
-not need to be hidden --- they are hidden implicitly, since guest OSes
-do not need to configure them.
+\item [apic\_verbosity=debug,verbose ]
+ Print more detailed information about local APIC and IOAPIC configuration.
+
+\item [lapic ]
+ Force use of local APIC even when left disabled by uniprocessor BIOS.
+
+\item [nolapic ]
+ Ignore local APIC in a uniprocessor system, even if enabled by the BIOS.
+
+\item [apic=bigsmp,default,es7000,summit ]
+ Specify NUMA platform. This can usually be probed automatically.
+
\end{description}
+In addition, the following options may be specified on the Xen command
+line. Since domain 0 shares responsibility for booting the platform,
+Xen will automatically propagate these options to its command
+line. These options are taken from Linux's command-line syntax with
+unchanged semantics.
+
+\begin{description}
+\item [acpi=off,force,strict,ht,noirq,\ldots ]
+ Modify how Xen (and domain 0) parses the BIOS ACPI tables.
+
+\item [acpi\_skip\_timer\_override ]
+ Instruct Xen (and domain 0) to ignore timer-interrupt override
+ instructions specified by the BIOS ACPI tables.
+\item [noapic ]
+ Instruct Xen (and domain 0) to ignore any IOAPICs that are present in
+ the system, and instead continue to use the legacy PIC.
+
+\end{description}
\section{XenLinux Boot Options}
diff --git a/extras/mini-os/Makefile b/extras/mini-os/Makefile
index 606454b6bb..56bc7f9e5f 100644
--- a/extras/mini-os/Makefile
+++ b/extras/mini-os/Makefile
@@ -2,31 +2,44 @@
CC := gcc
LD := ld
-# Linker should relocate monitor to this address
-MONITOR_BASE := 0xE0100000
+TARGET_ARCH := $(shell uname -m | sed -e s/i.86/x86_32/)
# NB. '-Wcast-qual' is nasty, so I omitted it.
-CFLAGS := -fno-builtin -O3 -Wall -Ih/ -Wredundant-decls
-CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline -ansi
+CFLAGS := -fno-builtin -O3 -Wall -Ih/ -Wredundant-decls -Wno-format
+CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
+
+ifeq ($(TARGET_ARCH),x86_32)
+CFLAGS += -m32 -march=i686
+LDFLAGS := -m elf_i386
+endif
+
+ifeq ($(TARGET_ARCH),x86_64)
+CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks
+CFLAGS += -fno-asynchronous-unwind-tables
+LDFLAGS := -m elf_x86_64
+endif
TARGET := mini-os
-LOBJS := lib/malloc.o lib/math.o lib/printf.o lib/string.o
-OBJS := entry.o kernel.o traps.o hypervisor.o mm.o events.o time.o ${LOBJS}
+OBJS := $(TARGET_ARCH).o
+OBJS += $(patsubst %.c,%.o,$(wildcard *.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
-HINTF := h/xen-public/xen.h
-HDRS := h/os.h h/types.h h/hypervisor.h h/mm.h h/events.h h/time.h h/lib.h
-HDRS += $(HINTF)
+OBJS := $(subst events.o,,$(OBJS))
+OBJS := $(subst hypervisor.o,,$(OBJS))
+OBJS := $(subst time.o,,$(OBJS))
+
+HDRS := $(wildcard h/*.h)
+HDRS += $(wildcard h/xen-public/*.h)
default: $(TARGET)
xen-public:
- [ -e h/xen-public] || ln -sf ../../../xen/include/public h/xen-public
+ [ -e h/xen-public ] || ln -sf ../../../xen/include/public h/xen-public
-$(TARGET): xen-public head.o $(OBJS)
- $(LD) -N -T minios.lds head.o $(OBJS) -o $@.elf
- objcopy -R .note -R .comment $@.elf $@
- gzip -f -9 -c $@ >$@.gz
+$(TARGET): xen-public $(OBJS)
+ $(LD) -N -T minios-$(TARGET_ARCH).lds $(OBJS) -o $@.elf
+ gzip -f -9 -c $@.elf >$@.gz
clean:
find . -type f -name '*.o' | xargs rm -f
@@ -38,4 +51,3 @@ clean:
%.o: %.S $(HDRS) Makefile
$(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
-
diff --git a/extras/mini-os/h/hypervisor.h b/extras/mini-os/h/hypervisor.h
index 3a127e1290..c167850d55 100644
--- a/extras/mini-os/h/hypervisor.h
+++ b/extras/mini-os/h/hypervisor.h
@@ -36,13 +36,25 @@ void ack_hypervisor_event(unsigned int ev);
* Assembler stubs for hyper-calls.
*/
+#ifdef __i386__
+#define _a1 "b"
+#define _a2 "c"
+#define _a3 "d"
+#define _a4 "S"
+#else
+#define _a1 "D"
+#define _a2 "S"
+#define _a3 "d"
+#define _a4 "b"
+#endif
+
static __inline__ int HYPERVISOR_set_trap_table(trap_info_t *table)
{
int ret;
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
- "b" (table) : "memory" );
+ _a1 (table) : "memory" );
return ret;
}
@@ -55,7 +67,7 @@ static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req,
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_mmu_update),
- "b" (req), "c" (count), "d" (success_count) : "memory" );
+ _a1 (req), _a2 (count), _a3 (success_count) : "memory" );
return ret;
}
@@ -66,7 +78,7 @@ static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_gdt),
- "b" (frame_list), "c" (entries) : "memory" );
+ _a1 (frame_list), _a2 (entries) : "memory" );
return ret;
@@ -78,11 +90,12 @@ static __inline__ int HYPERVISOR_stack_switch(unsigned long ss, unsigned long es
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_stack_switch),
- "b" (ss), "c" (esp) : "memory" );
+ _a1 (ss), _a2 (esp) : "memory" );
return ret;
}
+#ifdef __i386__
static __inline__ int HYPERVISOR_set_callbacks(
unsigned long event_selector, unsigned long event_address,
unsigned long failsafe_selector, unsigned long failsafe_address)
@@ -91,18 +104,40 @@ static __inline__ int HYPERVISOR_set_callbacks(
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
- "b" (event_selector), "c" (event_address),
- "d" (failsafe_selector), "S" (failsafe_address) : "memory" );
+ _a1 (event_selector), _a2 (event_address),
+ _a3 (failsafe_selector), _a4 (failsafe_address) : "memory" );
+
+ return ret;
+}
+#else
+static __inline__ int HYPERVISOR_set_callbacks(
+ unsigned long event_address,
+ unsigned long failsafe_address,
+ unsigned long syscall_address)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks),
+ _a1 (event_address), _a2 (failsafe_address),
+ _a3 (syscall_address) : "memory" );
return ret;
}
+#endif
-static __inline__ int HYPERVISOR_fpu_taskswitch(void)
+static __inline__ int
+HYPERVISOR_fpu_taskswitch(
+ int set)
{
int ret;
+ unsigned long ign;
+
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
+ : "memory" );
return ret;
}
@@ -113,7 +148,7 @@ static __inline__ int HYPERVISOR_yield(void)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_yield) : "memory" );
+ _a1 (SCHEDOP_yield) : "memory" );
return ret;
}
@@ -124,7 +159,7 @@ static __inline__ int HYPERVISOR_block(void)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_block) : "memory" );
+ _a1 (SCHEDOP_block) : "memory" );
return ret;
}
@@ -135,7 +170,7 @@ static __inline__ int HYPERVISOR_shutdown(void)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+ _a1 (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
: "memory" );
return ret;
@@ -147,7 +182,7 @@ static __inline__ int HYPERVISOR_reboot(void)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+ _a1 (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
: "memory" );
return ret;
@@ -160,7 +195,7 @@ static __inline__ int HYPERVISOR_suspend(unsigned long srec)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
+ _a1 (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
"S" (srec) : "memory" );
return ret;
@@ -172,7 +207,7 @@ static __inline__ long HYPERVISOR_set_timer_op(void *timer_arg)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_timer_op),
- "b" (timer_arg) : "memory" );
+ _a1 (timer_arg) : "memory" );
return ret;
}
@@ -183,7 +218,7 @@ static __inline__ int HYPERVISOR_dom0_op(void *dom0_op)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
- "b" (dom0_op) : "memory" );
+ _a1 (dom0_op) : "memory" );
return ret;
}
@@ -194,7 +229,7 @@ static __inline__ int HYPERVISOR_set_debugreg(int reg, unsigned long value)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_debugreg),
- "b" (reg), "c" (value) : "memory" );
+ _a1 (reg), _a2 (value) : "memory" );
return ret;
}
@@ -205,7 +240,7 @@ static __inline__ unsigned long HYPERVISOR_get_debugreg(int reg)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_get_debugreg),
- "b" (reg) : "memory" );
+ _a1 (reg) : "memory" );
return ret;
}
@@ -217,18 +252,7 @@ static __inline__ int HYPERVISOR_update_descriptor(
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_update_descriptor),
- "b" (pa), "c" (word1), "d" (word2) : "memory" );
-
- return ret;
-}
-
-static __inline__ int HYPERVISOR_set_fast_trap(int idx)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap),
- "b" (idx) : "memory" );
+ _a1 (pa), _a2 (word1), _a3 (word2) : "memory" );
return ret;
}
@@ -239,7 +263,7 @@ static __inline__ int HYPERVISOR_dom_mem_op(void *dom_mem_op)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op),
- "b" (dom_mem_op) : "memory" );
+ _a1 (dom_mem_op) : "memory" );
return ret;
}
@@ -250,7 +274,7 @@ static __inline__ int HYPERVISOR_multicall(void *call_list, int nr_calls)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_multicall),
- "b" (call_list), "c" (nr_calls) : "memory" );
+ _a1 (call_list), _a2 (nr_calls) : "memory" );
return ret;
}
@@ -262,7 +286,7 @@ static __inline__ int HYPERVISOR_update_va_mapping(
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping),
- "b" (page_nr), "c" (new_val), "d" (flags) : "memory" );
+ _a1 (page_nr), _a2 (new_val), _a3 (flags) : "memory" );
return ret;
}
@@ -273,7 +297,7 @@ static __inline__ int HYPERVISOR_xen_version(int cmd)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_xen_version),
- "b" (cmd) : "memory" );
+ _a1 (cmd) : "memory" );
return ret;
}
@@ -284,7 +308,7 @@ static __inline__ int HYPERVISOR_console_io(int cmd, int count, char *str)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_console_io),
- "b" (cmd), "c" (count), "d" (str) : "memory" );
+ _a1 (cmd), _a2 (count), _a3 (str) : "memory" );
return ret;
}
diff --git a/extras/mini-os/h/lib.h b/extras/mini-os/h/lib.h
index d9996a2f85..0b405ee006 100644
--- a/extras/mini-os/h/lib.h
+++ b/extras/mini-os/h/lib.h
@@ -55,22 +55,7 @@
#ifndef _LIB_H_
#define _LIB_H_
-
-/* variadic function support */
-typedef char *va_list;
-#define __va_size(type) \
- (((sizeof(type) + sizeof(int) - 1) / sizeof(int)) * sizeof(int))
-#ifdef __GNUC__
-#define va_start(ap, last) \
- ((ap) = (va_list)__builtin_next_arg(last))
-#else
-#define va_start(ap, last) \
- ((ap) = (va_list)&(last) + __va_size(last))
-#endif
-#define va_arg(ap, type) \
- (*(type *)((ap) += __va_size(type), (ap) - __va_size(type)))
-#define va_end(ap)
-
+#include <stdarg.h>
/* printing */
#define printk printf
diff --git a/extras/mini-os/h/mm.h b/extras/mini-os/h/mm.h
index e12b56495d..c5f6ad4788 100644
--- a/extras/mini-os/h/mm.h
+++ b/extras/mini-os/h/mm.h
@@ -1,20 +1,8 @@
/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
- ****************************************************************************
*
- * File: mm.h
- * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
- * Changes:
- *
- * Date: Aug 2003
- *
- * Environment:
- * Description:
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ * Copyright (c) 2005, Keir A Fraser
*
- ****************************************************************************
- * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
- ****************************************************************************
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
@@ -37,6 +25,48 @@
#ifndef _MM_H_
#define _MM_H_
+#ifdef __x86_64__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+
+#define L1_PAGETABLE_ENTRIES 512
+#define L2_PAGETABLE_ENTRIES 512
+#define L3_PAGETABLE_ENTRIES 512
+#define L4_PAGETABLE_ENTRIES 512
+
+/* These are page-table limitations. Current CPUs support only 40-bit phys. */
+#define PADDR_BITS 52
+#define VADDR_BITS 48
+#define PADDR_MASK ((1UL << PADDR_BITS)-1)
+#define VADDR_MASK ((1UL << VADDR_BITS)-1)
+
+#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(_a) \
+ (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(_a) \
+ (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+#endif
+
+#define _PAGE_PRESENT 0x001UL
+#define _PAGE_RW 0x002UL
+#define _PAGE_USER 0x004UL
+#define _PAGE_PWT 0x008UL
+#define _PAGE_PCD 0x010UL
+#define _PAGE_ACCESSED 0x020UL
+#define _PAGE_DIRTY 0x040UL
+#define _PAGE_PAT 0x080UL
+#define _PAGE_PSE 0x080UL
+#define _PAGE_GLOBAL 0x100UL
+
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
@@ -64,10 +94,16 @@ static __inline__ unsigned long machine_to_phys(unsigned long machine)
return phys;
}
+#ifdef __x86_64__
+#define VIRT_START 0xFFFFFFFF00000000UL
+#else
#define VIRT_START 0xC0000000UL
+#endif
#define to_phys(x) ((unsigned long)(x)-VIRT_START)
#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START))
+#define __va to_virt
+#define __pa to_phys
void init_mm(void);
unsigned long alloc_pages(int order);
diff --git a/extras/mini-os/h/os.h b/extras/mini-os/h/os.h
index cf15d92126..434dd992f6 100644
--- a/extras/mini-os/h/os.h
+++ b/extras/mini-os/h/os.h
@@ -9,116 +9,130 @@
#define NULL 0
-/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
- a mechanism by which the user can annotate likely branch directions and
- expect the blocks to be reordered appropriately. Define __builtin_expect
- to nothing for earlier compilers. */
-
#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
#define __builtin_expect(x, expected_value) (x)
#endif
+#define unlikely(x) __builtin_expect((x),0)
-/*
- * These are the segment descriptors provided for us by the hypervisor.
- * For now, these are hardwired -- guest OSes cannot update the GDT
- * or LDT.
- *
- * It shouldn't be hard to support descriptor-table frobbing -- let me
- * know if the BSD or XP ports require flexibility here.
- */
-
+#define smp_processor_id() 0
+#define preempt_disable() ((void)0)
+#define preempt_enable() ((void)0)
-/*
- * these are also defined in xen-public/xen.h but can't be pulled in as
- * they are used in start of day assembly. Need to clean up the .h files
- * a bit more...
- */
+#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0))
-#ifndef FLAT_RING1_CS
-#define FLAT_RING1_CS 0x0819
-#define FLAT_RING1_DS 0x0821
-#define FLAT_RING3_CS 0x082b
-#define FLAT_RING3_DS 0x0833
+#ifndef __ASSEMBLY__
+#include <types.h>
#endif
+#include <xen-public/xen.h>
-#define __KERNEL_CS FLAT_RING1_CS
-#define __KERNEL_DS FLAT_RING1_DS
+#define __KERNEL_CS FLAT_KERNEL_CS
+#define __KERNEL_DS FLAT_KERNEL_DS
+#define __KERNEL_SS FLAT_KERNEL_SS
+
+#define TRAP_divide_error 0
+#define TRAP_debug 1
+#define TRAP_nmi 2
+#define TRAP_int3 3
+#define TRAP_overflow 4
+#define TRAP_bounds 5
+#define TRAP_invalid_op 6
+#define TRAP_no_device 7
+#define TRAP_double_fault 8
+#define TRAP_copro_seg 9
+#define TRAP_invalid_tss 10
+#define TRAP_no_segment 11
+#define TRAP_stack_error 12
+#define TRAP_gp_fault 13
+#define TRAP_page_fault 14
+#define TRAP_spurious_int 15
+#define TRAP_copro_error 16
+#define TRAP_alignment_check 17
+#define TRAP_machine_check 18
+#define TRAP_simd_error 19
+#define TRAP_deferred_nmi 31
/* Everything below this point is not included by assembler (.S) files. */
#ifndef __ASSEMBLY__
-#include <types.h>
-#include <xen-public/xen.h>
+#define pt_regs xen_regs
-
-/* this struct defines the way the registers are stored on the
- stack during an exception or interrupt. */
-struct pt_regs {
- long ebx;
- long ecx;
- long edx;
- long esi;
- long edi;
- long ebp;
- long eax;
- int xds;
- int xes;
- long orig_eax;
- long eip;
- int xcs;
- long eflags;
- long esp;
- int xss;
-};
-
-/* some function prototypes */
void trap_init(void);
void dump_regs(struct pt_regs *regs);
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in teh shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
+/*
+ * The use of 'barrier' in the following reflects their use as local-lock
+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
+ * critical operations are executed. All critical operations must complete
+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
+ * includes these barriers, for example.
*/
-#define unlikely(x) __builtin_expect((x),0)
-#define __save_flags(x) \
-do { \
- (x) = test_bit(EVENTS_MASTER_ENABLE_BIT, \
- &HYPERVISOR_shared_info->events_mask); \
- barrier(); \
+
+#define __cli() \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
+} while (0)
+
+#define __sti() \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
} while (0)
-#define __restore_flags(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- if (x) set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \
- barrier(); \
- if ( unlikely(_shared->events) && (x) ) do_hypervisor_callback(NULL); \
+#define __save_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
} while (0)
-#define __cli() \
-do { \
- clear_bit(EVENTS_MASTER_ENABLE_BIT, &HYPERVISOR_shared_info->events_mask);\
- barrier(); \
+#define __restore_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
} while (0)
-#define __sti() \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- set_bit(EVENTS_MASTER_ENABLE_BIT, &_shared->events_mask); \
- barrier(); \
- if ( unlikely(_shared->events) ) do_hypervisor_callback(NULL); \
+#define safe_halt() ((void)0)
+
+#define __save_and_cli(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
} while (0)
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-#define save_and_sti(x) __save_and_sti(x)
+#define local_irq_save(x) __save_and_cli(x)
+#define local_irq_restore(x) __restore_flags(x)
+#define local_save_flags(x) __save_flags(x)
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+#define irqs_disabled() \
+ HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask
/* This is a barrier for the compiler only, NOT the processor! */
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/extras/mini-os/h/types.h b/extras/mini-os/h/types.h
index 85ede91c13..7bf103ab9c 100644
--- a/extras/mini-os/h/types.h
+++ b/extras/mini-os/h/types.h
@@ -26,8 +26,13 @@ typedef signed short s16;
typedef unsigned short u16;
typedef signed int s32;
typedef unsigned int u32;
+#ifdef __i386__
typedef signed long long s64;
typedef unsigned long long u64;
+#elif defined(__x86_64__)
+typedef signed long s64;
+typedef unsigned long u64;
+#endif
typedef unsigned int size_t;
@@ -35,7 +40,13 @@ typedef unsigned int size_t;
typedef unsigned char u_char;
typedef unsigned int u_int;
typedef unsigned long u_long;
+#ifdef __i386__
typedef long long quad_t;
typedef unsigned long long u_quad_t;
typedef unsigned int uintptr_t;
+#elif defined(__x86_64__)
+typedef long quad_t;
+typedef unsigned long u_quad_t;
+typedef unsigned long uintptr_t;
+#endif
#endif /* _TYPES_H_ */
diff --git a/extras/mini-os/head.S b/extras/mini-os/head.S
deleted file mode 100644
index 52eae8f818..0000000000
--- a/extras/mini-os/head.S
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <os.h>
-
-.globl _start, shared_info
-
-_start:
- cld
- lss stack_start,%esp
- push %esi
- call start_kernel
-
-stack_start:
- .long stack+8192, __KERNEL_DS
-
- /* Unpleasant -- the PTE that maps this page is actually overwritten */
- /* to map the real shared-info page! :-) */
- .org 0x1000
-shared_info:
- .org 0x2000
diff --git a/extras/mini-os/kernel.c b/extras/mini-os/kernel.c
index a9f423c192..b6f89b8bbb 100644
--- a/extras/mini-os/kernel.c
+++ b/extras/mini-os/kernel.c
@@ -64,8 +64,8 @@ extern char shared_info[PAGE_SIZE];
static shared_info_t *map_shared_info(unsigned long pa)
{
- if ( HYPERVISOR_update_va_mapping((unsigned long)shared_info >> PAGE_SHIFT,
- pa | 3, UVMF_INVLPG) )
+ if ( HYPERVISOR_update_va_mapping(
+ (unsigned long)shared_info, pa | 7, UVMF_INVLPG) )
{
printk("Failed to map shared_info!!\n");
*(int*)0=0;
@@ -79,6 +79,9 @@ static shared_info_t *map_shared_info(unsigned long pa)
*/
void start_kernel(start_info_t *si)
{
+ static char hello[] = "Bootstrapping...\n";
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello);
+
/* Copy the start_info struct to a globally-accessible area. */
memcpy(&start_info, si, sizeof(*si));
@@ -86,9 +89,15 @@ void start_kernel(start_info_t *si)
HYPERVISOR_shared_info = map_shared_info(start_info.shared_info);
/* Set up event and failsafe callback addresses. */
+#ifdef __i386__
HYPERVISOR_set_callbacks(
__KERNEL_CS, (unsigned long)hypervisor_callback,
__KERNEL_CS, (unsigned long)failsafe_callback);
+#else
+ HYPERVISOR_set_callbacks(
+ (unsigned long)hypervisor_callback,
+ (unsigned long)failsafe_callback, 0);
+#endif
trap_init();
@@ -117,7 +126,7 @@ void start_kernel(start_info_t *si)
init_mm();
/* set up events */
- init_events();
+// init_events();
/*
* These need to be replaced with event-channel/control-interface
@@ -135,7 +144,7 @@ void start_kernel(start_info_t *si)
#endif
/* init time and timers */
- init_time();
+// init_time();
/* do nothing */
for ( ; ; ) HYPERVISOR_yield();
diff --git a/extras/mini-os/lib/math.c b/extras/mini-os/lib/math.c
index be08740965..8e97be6d18 100644
--- a/extras/mini-os/lib/math.c
+++ b/extras/mini-os/lib/math.c
@@ -96,14 +96,14 @@ union uu {
* (sizeof(long)*CHAR_BIT/2).
*/
#define HHALF(x) ((x) >> HALF_BITS)
-#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
+#define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1))
#define LHUP(x) ((x) << HALF_BITS)
/*
* Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
* section 4.3.1, pp. 257--259.
*/
-#define B (1 << HALF_BITS) /* digit base */
+#define B (1UL << HALF_BITS) /* digit base */
/* Combine two `digits' to make a single two-digit number. */
#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
diff --git a/extras/mini-os/lib/printf.c b/extras/mini-os/lib/printf.c
index f6232c4044..a08bb20e6c 100644
--- a/extras/mini-os/lib/printf.c
+++ b/extras/mini-os/lib/printf.c
@@ -341,7 +341,9 @@ reswitch: switch (ch = (u_char)*fmt++) {
case 'p':
ul = (uintptr_t)va_arg(ap, void *);
base = 16;
- sharpflag = (width == 0);
+ sharpflag = 0;
+ padc = '0';
+ width = sizeof(uintptr_t)*2;
goto nosign;
case 'q':
qflag = 1;
diff --git a/extras/mini-os/minios.lds b/extras/mini-os/minios-x86_32.lds
index a53504e9a2..a53504e9a2 100644
--- a/extras/mini-os/minios.lds
+++ b/extras/mini-os/minios-x86_32.lds
diff --git a/extras/mini-os/minios-x86_64.lds b/extras/mini-os/minios-x86_64.lds
new file mode 100644
index 0000000000..71b61131b8
--- /dev/null
+++ b/extras/mini-os/minios-x86_64.lds
@@ -0,0 +1,54 @@
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SECTIONS
+{
+ . = 0xFFFFFFFF00000000;
+ _text = .; /* Text and read-only data */
+ .text : {
+ *(.text)
+ *(.gnu.warning)
+ } = 0x9090
+
+ _etext = .; /* End of text section */
+
+ .rodata : { *(.rodata) *(.rodata.*) }
+
+ .data : { /* Data */
+ *(.data)
+ CONSTRUCTORS
+ }
+
+ _edata = .; /* End of data section */
+
+ . = ALIGN(8192); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096);
+ .data.page_aligned : { *(.data.idt) }
+
+ . = ALIGN(32);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+ __bss_start = .; /* BSS */
+ .bss : {
+ *(.bss)
+ }
+ _end = . ;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ *(.exitcall.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+}
diff --git a/extras/mini-os/mm.c b/extras/mini-os/mm.c
index 2bc87a5165..40a80e2ccb 100644
--- a/extras/mini-os/mm.c
+++ b/extras/mini-os/mm.c
@@ -84,6 +84,7 @@ void init_mm(void)
*/
max_free_pfn = PFN_DOWN(to_phys(pgd));
+#ifdef __i386__
{
unsigned long *pgd = (unsigned long *)start_info.pt_base;
unsigned long pte;
@@ -110,6 +111,7 @@ void init_mm(void)
(u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
(u_long)to_virt(PFN_PHYS(max_free_pfn)), PFN_PHYS(max_free_pfn));
init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_free_pfn));
+#endif
/* Now initialise the physical->machine mapping table. */
diff --git a/extras/mini-os/traps.c b/extras/mini-os/traps.c
index c0ef335025..7000128230 100644
--- a/extras/mini-os/traps.c
+++ b/extras/mini-os/traps.c
@@ -15,7 +15,6 @@ void overflow(void);
void bounds(void);
void invalid_op(void);
void device_not_available(void);
-void double_fault(void);
void coprocessor_segment_overrun(void);
void invalid_TSS(void);
void segment_not_present(void);
@@ -33,74 +32,68 @@ extern void do_exit(void);
void dump_regs(struct pt_regs *regs)
{
- int in_kernel = 1;
- unsigned long esp;
- unsigned short ss;
-
- esp = (unsigned long) (&regs->esp);
- ss = __KERNEL_DS;
- if (regs->xcs & 2) {
- in_kernel = 0;
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
- printf("EIP: %04x:[<%08lx>]\n",
- 0xffff & regs->xcs, regs->eip);
- printf("EFLAGS: %08lx\n",regs->eflags);
- printf("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printf("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printf("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
- printf("\n");
+ unsigned long esp;
+ unsigned short ss;
+
+#ifdef __x86_64__
+ esp = regs->rsp;
+ ss = regs->ss;
+#else
+ esp = (unsigned long) (&regs->esp);
+ ss = __KERNEL_DS;
+ if (regs->cs & 2) {
+ esp = regs->esp;
+ ss = regs->ss & 0xffff;
+ }
+#endif
+ printf("EIP: %04x:[<%p>] %08x\n",
+ 0xffff & regs->cs , regs->eip, regs->error_code);
+ printf("EFLAGS: %p\n",regs->eflags);
+ printf("eax: %p ebx: %p ecx: %p edx: %p\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printf("esi: %p edi: %p ebp: %p esp: %p\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+#ifdef __x86_64__
+ printf("r8 : %p r9 : %p r10: %p r11: %p\n",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ printf("r12: %p r13: %p r14: %p r15: %p\n",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+#endif
+ printf("ds: %04x es: %04x ss: %04x\n",
+ regs->ds & 0xffff, regs->es & 0xffff, ss);
}
-static __inline__ void dump_code(unsigned eip)
+static __inline__ void dump_code(unsigned long eip)
{
- unsigned *ptr = (unsigned *)eip;
- int x;
-
- printk("Bytes at eip:\n");
- for (x = -4; x < 5; x++)
- printf("%x", ptr[x]);
+ unsigned char *ptr = (unsigned char *)eip;
+ int x;
+
+ printk("Bytes at eip: ");
+ for ( x = -4; x < 5; x++ )
+ printf("%02x ", ptr[x]);
+ printk("\n");
}
-
-/*
- * C handlers here have their parameter-list constructed by the
- * assembler stubs above. Each one gets a pointer to a list
- * of register values (to be restored at end of exception).
- * Some will also receive an error code -- this is the code that
- * was generated by the processor for the underlying real exception.
- *
- * Note that the page-fault exception is special. It also receives
- * the faulting linear address. Normally this would be found in
- * register CR2, but that is not accessible in a virtualised OS.
- */
-
static void __inline__ do_trap(int trapnr, char *str,
- struct pt_regs * regs, long error_code)
+ struct pt_regs * regs)
{
- printk("FATAL: Unhandled Trap (see mini-os:traps.c)");
- printf("%d %s", trapnr, str);
- dump_regs(regs);
- dump_code(regs->eip);
-
- do_exit();
+ printk("FATAL: Unhandled Trap %d (%s)\n", trapnr, str);
+ dump_regs(regs);
+ dump_code(regs->eip);
+ do_exit();
}
#define DO_ERROR(trapnr, str, name) \
-void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs) \
{ \
- do_trap(trapnr, str, regs, error_code); \
+ do_trap(trapnr, str, regs); \
}
#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \
-void do_##name(struct pt_regs * regs, long error_code) \
+void do_##name(struct pt_regs * regs) \
{ \
- do_trap(trapnr, str, regs, error_code); \
+ do_trap(trapnr, str, regs); \
}
DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip)
@@ -109,7 +102,6 @@ DO_ERROR( 4, "overflow", overflow)
DO_ERROR( 5, "bounds", bounds)
DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
DO_ERROR( 7, "device not available", device_not_available)
-DO_ERROR( 8, "double fault", double_fault)
DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, "invalid TSS", invalid_TSS)
DO_ERROR(11, "segment not present", segment_not_present)
@@ -117,29 +109,57 @@ DO_ERROR(12, "stack segment", stack_segment)
DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0)
DO_ERROR(18, "machine check", machine_check)
-void do_page_fault(struct pt_regs *regs, long error_code,
- unsigned long address)
+extern unsigned long virt_cr2;
+void do_page_fault(struct pt_regs *regs)
{
- printk("Page fault\n");
- printk("Address: 0x%lx", address);
- printk("Error Code: 0x%lx", error_code);
- printk("eip: \t 0x%lx", regs->eip);
+ unsigned long addr = virt_cr2;
+ printk("Page fault at linear address %p\n", addr);
+ dump_regs(regs);
+ dump_code(regs->eip);
+#ifdef __x86_64__
+ {
+ unsigned long *tab = (unsigned long *)start_info.pt_base;
+ unsigned long page;
+
+ printk("Pagetable walk from %p:\n", tab);
+
+ page = tab[l4_table_offset(addr)];
+ tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+ printk(" L4 = %p (%p)\n", page, tab);
+ if ( !(page & _PAGE_PRESENT) )
+ goto out;
+
+ page = tab[l3_table_offset(addr)];
+ tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+ printk(" L3 = %p (%p)\n", page, tab);
+ if ( !(page & _PAGE_PRESENT) )
+ goto out;
+
+ page = tab[l2_table_offset(addr)];
+ tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT);
+ printk(" L2 = %p (%p) %s\n", page, tab,
+ (page & _PAGE_PSE) ? "(2MB)" : "");
+ if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ goto out;
+
+ page = tab[l1_table_offset(addr)];
+ printk(" L1 = %p\n", page);
+ }
+#endif
+ out:
do_exit();
}
-void do_general_protection(struct pt_regs * regs, long error_code)
+void do_general_protection(struct pt_regs *regs)
{
-
- HYPERVISOR_shared_info->events_mask = 0;
- printk("GPF\n");
- printk("Error Code: 0x%lx", error_code);
- dump_regs(regs);
- dump_code(regs->eip);
- do_exit();
+ printk("GPF\n");
+ dump_regs(regs);
+ dump_code(regs->eip);
+ do_exit();
}
-void do_debug(struct pt_regs * regs, long error_code)
+void do_debug(struct pt_regs * regs)
{
printk("Debug exception\n");
#define TF_MASK 0x100
@@ -148,9 +168,7 @@ void do_debug(struct pt_regs * regs, long error_code)
do_exit();
}
-
-
-void do_coprocessor_error(struct pt_regs * regs, long error_code)
+void do_coprocessor_error(struct pt_regs * regs)
{
printk("Copro error\n");
dump_regs(regs);
@@ -163,14 +181,12 @@ void simd_math_error(void *eip)
printk("SIMD error\n");
}
-void do_simd_coprocessor_error(struct pt_regs * regs,
- long error_code)
+void do_simd_coprocessor_error(struct pt_regs * regs)
{
printk("SIMD copro error\n");
}
-void do_spurious_interrupt_bug(struct pt_regs * regs,
- long error_code)
+void do_spurious_interrupt_bug(struct pt_regs * regs)
{
}
@@ -180,26 +196,28 @@ void do_spurious_interrupt_bug(struct pt_regs * regs,
* The 'privilege ring' field specifies the least-privileged ring that
* can trap to that vector using a software-interrupt instruction (INT).
*/
+#ifdef __x86_64__
+#define _P 0,
+#endif
static trap_info_t trap_table[] = {
- { 0, 0, __KERNEL_CS, (unsigned long)divide_error },
- { 1, 0, __KERNEL_CS, (unsigned long)debug },
- { 3, 3, __KERNEL_CS, (unsigned long)int3 },
- { 4, 3, __KERNEL_CS, (unsigned long)overflow },
- { 5, 3, __KERNEL_CS, (unsigned long)bounds },
- { 6, 0, __KERNEL_CS, (unsigned long)invalid_op },
- { 7, 0, __KERNEL_CS, (unsigned long)device_not_available },
- { 8, 0, __KERNEL_CS, (unsigned long)double_fault },
- { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
- { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS },
- { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present },
- { 12, 0, __KERNEL_CS, (unsigned long)stack_segment },
- { 13, 0, __KERNEL_CS, (unsigned long)general_protection },
- { 14, 0, __KERNEL_CS, (unsigned long)page_fault },
- { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug },
- { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error },
- { 17, 0, __KERNEL_CS, (unsigned long)alignment_check },
- { 18, 0, __KERNEL_CS, (unsigned long)machine_check },
- { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
+ { 0, 0, __KERNEL_CS, _P (unsigned long)divide_error },
+ { 1, 0, __KERNEL_CS, _P (unsigned long)debug },
+ { 3, 3, __KERNEL_CS, _P (unsigned long)int3 },
+ { 4, 3, __KERNEL_CS, _P (unsigned long)overflow },
+ { 5, 3, __KERNEL_CS, _P (unsigned long)bounds },
+ { 6, 0, __KERNEL_CS, _P (unsigned long)invalid_op },
+ { 7, 0, __KERNEL_CS, _P (unsigned long)device_not_available },
+ { 9, 0, __KERNEL_CS, _P (unsigned long)coprocessor_segment_overrun },
+ { 10, 0, __KERNEL_CS, _P (unsigned long)invalid_TSS },
+ { 11, 0, __KERNEL_CS, _P (unsigned long)segment_not_present },
+ { 12, 0, __KERNEL_CS, _P (unsigned long)stack_segment },
+ { 13, 0, __KERNEL_CS, _P (unsigned long)general_protection },
+ { 14, 0, __KERNEL_CS, _P (unsigned long)page_fault },
+ { 15, 0, __KERNEL_CS, _P (unsigned long)spurious_interrupt_bug },
+ { 16, 0, __KERNEL_CS, _P (unsigned long)coprocessor_error },
+ { 17, 0, __KERNEL_CS, _P (unsigned long)alignment_check },
+ { 18, 0, __KERNEL_CS, _P (unsigned long)machine_check },
+ { 19, 0, __KERNEL_CS, _P (unsigned long)simd_coprocessor_error },
{ 0, 0, 0, 0 }
};
diff --git a/extras/mini-os/entry.S b/extras/mini-os/x86_32.S
index 8565caf745..c02048f9a0 100644
--- a/extras/mini-os/entry.S
+++ b/extras/mini-os/x86_32.S
@@ -1,6 +1,26 @@
-
#include <os.h>
+.section __xen_guest
+ .asciz "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE"
+.text
+
+.globl _start, shared_info
+
+_start:
+ cld
+ lss stack_start,%esp
+ push %esi
+ call start_kernel
+
+stack_start:
+ .long stack+8192, __KERNEL_SS
+
+ /* Unpleasant -- the PTE that maps this page is actually overwritten */
+ /* to map the real shared-info page! :-) */
+ .org 0x1000
+shared_info:
+ .org 0x2000
+
ES = 0x20
ORIG_EAX = 0x24
EIP = 0x28
@@ -92,7 +112,7 @@ ENTRY(hypervisor_callback)
cmpl $ecrit,%eax
jb critical_region_fixup
11: push %esp
- call do_hypervisor_callback
+# call do_hypervisor_callback
add $4,%esp
movl HYPERVISOR_shared_info,%esi
xorl %eax,%eax
diff --git a/extras/mini-os/x86_64.S b/extras/mini-os/x86_64.S
new file mode 100644
index 0000000000..d5cf9badd7
--- /dev/null
+++ b/extras/mini-os/x86_64.S
@@ -0,0 +1,222 @@
+#include <os.h>
+
+.section __xen_guest
+ .asciz "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE"
+.text
+
+#define ENTRY(X) .globl X ; X :
+.globl _start, shared_info
+
+#define SAVE_ALL \
+ cld; \
+ pushq %rdi; \
+ pushq %rsi; \
+ pushq %rdx; \
+ pushq %rcx; \
+ pushq %rax; \
+ pushq %r8; \
+ pushq %r9; \
+ pushq %r10; \
+ pushq %r11; \
+ pushq %rbx; \
+ pushq %rbp; \
+ pushq %r12; \
+ pushq %r13; \
+ pushq %r14; \
+ pushq %r15;
+
+#define RESTORE_ALL \
+ popq %r15; \
+ popq %r14; \
+ popq %r13; \
+ popq %r12; \
+ popq %rbp; \
+ popq %rbx; \
+ popq %r11; \
+ popq %r10; \
+ popq %r9; \
+ popq %r8; \
+ popq %rax; \
+ popq %rcx; \
+ popq %rdx; \
+ popq %rsi; \
+ popq %rdi
+
+_start:
+ cld
+ movq stack_start(%rip),%rsp
+ movq %rsi,%rdi
+ call start_kernel
+
+stack_start:
+ .quad stack+8192
+
+ /* Unpleasant -- the PTE that maps this page is actually overwritten */
+ /* to map the real shared-info page! :-) */
+ .org 0x1000
+shared_info:
+ .org 0x2000
+
+ENTRY(hypervisor_callback)
+ popq %rcx
+ popq %r11
+ iretq
+
+ENTRY(failsafe_callback)
+ popq %rcx
+ popq %r11
+ iretq
+
+error_code:
+ SAVE_ALL
+ movq %rsp,%rdi
+ movl 15*8+4(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ callq *(%rdx,%rax,8)
+ RESTORE_ALL
+ addq $8,%rsp
+ iretq
+
+ENTRY(divide_error)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_divide_error,4(%rsp)
+ jmp error_code
+
+ENTRY(coprocessor_error)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_copro_error,4(%rsp)
+ jmp error_code
+
+ENTRY(simd_coprocessor_error)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_simd_error,4(%rsp)
+ jmp error_code
+
+ENTRY(device_not_available)
+ popq %rcx
+ popq %r11
+ movl $TRAP_no_device,4(%rsp)
+ jmp error_code
+
+ENTRY(debug)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_debug,4(%rsp)
+ jmp error_code
+
+ENTRY(int3)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_int3,4(%rsp)
+ jmp error_code
+
+ENTRY(overflow)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_overflow,4(%rsp)
+ jmp error_code
+
+ENTRY(bounds)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_bounds,4(%rsp)
+ jmp error_code
+
+ENTRY(invalid_op)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_invalid_op,4(%rsp)
+ jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_copro_seg,4(%rsp)
+ jmp error_code
+
+ENTRY(invalid_TSS)
+ popq %rcx
+ popq %r11
+ movl $TRAP_invalid_tss,4(%rsp)
+ jmp error_code
+
+ENTRY(segment_not_present)
+ popq %rcx
+ popq %r11
+ movl $TRAP_no_segment,4(%rsp)
+ jmp error_code
+
+ENTRY(stack_segment)
+ popq %rcx
+ popq %r11
+ movl $TRAP_stack_error,4(%rsp)
+ jmp error_code
+
+ENTRY(general_protection)
+ popq %rcx
+ popq %r11
+ movl $TRAP_gp_fault,4(%rsp)
+ jmp error_code
+
+ENTRY(alignment_check)
+ popq %rcx
+ popq %r11
+ movl $TRAP_alignment_check,4(%rsp)
+ jmp error_code
+
+ENTRY(virt_cr2)
+ .quad 0
+ENTRY(page_fault)
+ popq %rcx
+ popq %r11
+ popq virt_cr2(%rip)
+ movl $TRAP_page_fault,4(%rsp)
+ jmp error_code
+
+ENTRY(machine_check)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_machine_check,4(%rsp)
+ jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+ popq %rcx
+ popq %r11
+ pushq $0
+ movl $TRAP_spurious_int,4(%rsp)
+ jmp error_code
+
+ENTRY(exception_table)
+ .quad do_divide_error
+ .quad do_debug
+ .quad 0 # nmi
+ .quad do_int3
+ .quad do_overflow
+ .quad do_bounds
+ .quad do_invalid_op
+ .quad 0
+ .quad 0
+ .quad do_coprocessor_segment_overrun
+ .quad do_invalid_TSS
+ .quad do_segment_not_present
+ .quad do_stack_segment
+ .quad do_general_protection
+ .quad do_page_fault
+ .quad do_spurious_interrupt_bug
+ .quad do_coprocessor_error
+ .quad do_alignment_check
+ .quad do_machine_check
+ .quad do_simd_coprocessor_error
diff --git a/freebsd-5.3-xen-sparse/conf/files.i386-xen b/freebsd-5.3-xen-sparse/conf/files.i386-xen
index 189378d469..0c6aea90a2 100644
--- a/freebsd-5.3-xen-sparse/conf/files.i386-xen
+++ b/freebsd-5.3-xen-sparse/conf/files.i386-xen
@@ -202,18 +202,19 @@ i386/i386/perfmon.c optional perfmon profiling-routine
i386-xen/i386-xen/pmap.c standard
i386-xen/i386-xen/support.s standard
i386-xen/i386-xen/swtch.s standard
-i386-xen/i386-xen/sys_machdep.c standard
+i386-xen/i386-xen/sys_machdep.c standard
i386-xen/i386-xen/trap.c standard
i386/i386/tsc.c standard
-i386-xen/i386-xen/vm_machdep.c standard
+i386-xen/i386-xen/vm_machdep.c standard
i386-xen/i386-xen/clock.c standard
# xen specific arch-dep files
i386-xen/i386-xen/hypervisor.c standard
i386-xen/i386-xen/xen_machdep.c standard
-i386-xen/i386-xen/xen_bus.c standard
-i386-xen/i386-xen/evtchn.c standard
-i386-xen/i386-xen/ctrl_if.c standard
+i386-xen/i386-xen/xen_bus.c standard
+i386-xen/i386-xen/evtchn.c standard
+i386-xen/i386-xen/ctrl_if.c standard
+i386-xen/i386-xen/gnttab.c standard
i386/isa/asc.c count asc
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
index 393e091986..902ae965e8 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/clock.c
@@ -87,6 +87,12 @@ __FBSDID("$FreeBSD: src/sys/i386/isa/clock.c,v 1.207 2003/11/13 10:02:12 phk Exp
/* XEN specific defines */
#include <machine/xen_intr.h>
+#include <vm/vm.h> /* needed by machine/pmap.h */
+#include <vm/pmap.h> /* needed by machine/pmap.h */
+#include <machine/pmap.h> /* needed by xen-os.h */
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen-os.h> /* needed by xenfunc.h */
+#include <machine/xenfunc.h>
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -105,6 +111,8 @@ int statclock_disable;
#define TIMER_FREQ 1193182
#endif
u_int timer_freq = TIMER_FREQ;
+struct mtx clock_lock;
+
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
@@ -127,7 +135,15 @@ static uint64_t shadow_system_time;
static uint32_t shadow_time_version;
static struct timeval shadow_tv;
+#define DEFINE_PER_CPU(type, name) \
+ __typeof__(type) per_cpu__##name
+
+#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
+
+
static uint64_t processed_system_time;/* System time (ns) at last processing. */
+static DEFINE_PER_CPU(uint64_t, processed_system_time);
+
#define NS_PER_TICK (1000000000ULL/hz)
@@ -200,18 +216,19 @@ static struct timecounter xen_timecounter = {
static void
clkintr(struct clockframe *frame)
{
- int64_t delta;
+ int64_t cpu_delta, delta;
+ int cpu = smp_processor_id();
long ticks = 0;
-
do {
__get_time_values_from_xen();
- delta = (int64_t)(shadow_system_time +
- xen_get_offset() * 1000 -
- processed_system_time);
+ delta = cpu_delta = (int64_t)shadow_system_time +
+ (int64_t)xen_get_offset() * 1000;
+ delta -= processed_system_time;
+ cpu_delta -= per_cpu(processed_system_time, cpu);
} while (!TIME_VALUES_UP_TO_DATE);
- if (unlikely(delta < 0)) {
+ if (unlikely(delta < 0) || unlikely(cpu_delta < 0)) {
printk("Timer ISR: Time went backwards: %lld\n", delta);
return;
}
@@ -223,15 +240,28 @@ clkintr(struct clockframe *frame)
delta -= NS_PER_TICK;
processed_system_time += NS_PER_TICK;
}
-
- if (ticks > 0) {
- if (frame)
- timer_func(frame);
-#ifdef SMP
- if (timer_func == hardclock && frame)
- forward_hardclock();
+ /* Local CPU jiffy work. */
+ while (cpu_delta >= NS_PER_TICK) {
+ cpu_delta -= NS_PER_TICK;
+ per_cpu(processed_system_time, cpu) += NS_PER_TICK;
+#if 0
+ update_process_times(user_mode(regs));
+ profile_tick(CPU_PROFILING, regs);
#endif
}
+ if (ticks > 0) {
+ if (frame) timer_func(frame);
+ }
+
+ if (cpu != 0)
+ return;
+ /*
+ * Take synchronised time from Xen once a minute if we're not
+ * synchronised ourselves, and we haven't chosen to keep an independent
+ * time base.
+ */
+
+ /* XXX TODO */
}
#include "opt_ddb.h"
@@ -427,7 +457,7 @@ resettodr()
* Start clocks running.
*/
void
-cpu_initclocks()
+cpu_initclocks(void)
{
int diag;
int time_irq = bind_virq_to_irq(VIRQ_TIMER);
@@ -443,7 +473,25 @@ cpu_initclocks()
/* initialize xen values */
__get_time_values_from_xen();
processed_system_time = shadow_system_time;
+ per_cpu(processed_system_time, 0) = processed_system_time;
+
+}
+
+#ifdef SMP
+void
+ap_cpu_initclocks(void)
+{
+ int irq;
+ int cpu = smp_processor_id();
+
+ per_cpu(processed_system_time, cpu) = shadow_system_time;
+
+ irq = bind_virq_to_irq(VIRQ_TIMER);
+ PCPU_SET(time_irq, irq);
+ PANIC_IF(intr_add_handler("clk", irq, (driver_intr_t *)clkintr,
+ NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
}
+#endif
void
cpu_startprofclock(void)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
index 8e8ce9fde7..133734d028 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/ctrl_if.c
@@ -39,6 +39,18 @@
#include <machine/evtchn.h>
/*
+ * Extra ring macros to sync a consumer index up to the public producer index.
+ * Generally UNSAFE, but we use it for recovery and shutdown in some cases.
+ */
+#define RING_DROP_PENDING_REQUESTS(_r) \
+ do { \
+ (_r)->req_cons = (_r)->sring->req_prod; \
+ } while (0)
+#define RING_DROP_PENDING_RESPONSES(_r) \
+ do { \
+ (_r)->rsp_cons = (_r)->sring->rsp_prod; \
+ } while (0)
+/*
* Only used by initial domain which must create its own control-interface
* event channel. This value is picked up by the user-space domain controller
* via an ioctl.
@@ -51,8 +63,8 @@ static struct mtx ctrl_if_lock;
static int * ctrl_if_wchan = &ctrl_if_evtchn;
-static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
-static CONTROL_RING_IDX ctrl_if_rx_req_cons;
+static ctrl_front_ring_t ctrl_if_tx_ring;
+static ctrl_back_ring_t ctrl_if_rx_ring;
/* Incoming message requests. */
/* Primary message type -> message handler. */
@@ -85,7 +97,7 @@ TASKQUEUE_DECLARE(ctrl_if_txB);
TASKQUEUE_DEFINE(ctrl_if_txB, NULL, NULL, {});
struct taskqueue **taskqueue_ctrl_if_tx[2] = { &taskqueue_ctrl_if_txA,
&taskqueue_ctrl_if_txB };
-int ctrl_if_idx;
+static int ctrl_if_idx = 0;
static struct task ctrl_if_rx_tasklet;
static struct task ctrl_if_tx_tasklet;
@@ -95,8 +107,6 @@ static struct task ctrl_if_rxmsg_deferred_task;
#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
-#define TX_FULL(_c) \
- (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
static void
ctrl_if_notify_controller(void)
@@ -114,13 +124,17 @@ ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
static void
__ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
{
- control_if_t *ctrl_if = get_ctrl_if();
ctrl_msg_t *msg;
- int was_full = TX_FULL(ctrl_if);
+ int was_full = RING_FULL(&ctrl_if_tx_ring);
+ RING_IDX i, rp;
+
+ i = ctrl_if_tx_ring.rsp_cons;
+ rp = ctrl_if_tx_ring.sring->rsp_prod;
+ rmb(); /* Ensure we see all requests up to 'rp'. */
- while ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+ for ( ; i != rp; i++ )
{
- msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
+ msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i);
/* Execute the callback handler, if one was specified. */
if ( msg->id != 0xFF )
@@ -131,77 +145,102 @@ __ctrl_if_tx_tasklet(void *context __unused, int pending __unused)
ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
}
- /*
- * Step over the message in the ring /after/ finishing reading it. As
- * soon as the index is updated then the message may get blown away.
- */
- smp_mb();
- ctrl_if_tx_resp_cons++;
}
- if ( was_full && !TX_FULL(ctrl_if) )
+ /*
+ * Step over the message in the ring /after/ finishing reading it. As
+ * soon as the index is updated then the message may get blown away.
+ */
+ smp_mb();
+ ctrl_if_tx_ring.rsp_cons = i;
+
+ if ( was_full && !RING_FULL(&ctrl_if_tx_ring) )
{
wakeup(ctrl_if_wchan);
/* bump idx so future enqueues will occur on the next taskq
* process any currently pending tasks
*/
- ctrl_if_idx++;
+ ctrl_if_idx++;
taskqueue_run(*taskqueue_ctrl_if_tx[(ctrl_if_idx-1) & 1]);
}
+
}
static void
__ctrl_if_rxmsg_deferred_task(void *context __unused, int pending __unused)
{
ctrl_msg_t *msg;
+ CONTROL_RING_IDX dp;
- while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod )
+ dp = ctrl_if_rxmsg_deferred_prod;
+ rmb(); /* Ensure we see all deferred requests up to 'dp'. */
+
+ while ( ctrl_if_rxmsg_deferred_cons != dp )
{
msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
ctrl_if_rxmsg_deferred_cons++)];
(*ctrl_if_rxmsg_handler[msg->type])(msg, 0);
}
+
}
static void
__ctrl_if_rx_tasklet(void *context __unused, int pending __unused)
{
- control_if_t *ctrl_if = get_ctrl_if();
ctrl_msg_t msg, *pmsg;
+ CONTROL_RING_IDX dp;
+ RING_IDX rp, i;
+
+ i = ctrl_if_rx_ring.req_cons;
+ rp = ctrl_if_rx_ring.sring->req_prod;
+ dp = ctrl_if_rxmsg_deferred_prod;
- while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ rmb(); /* Ensure we see all requests up to 'rp'. */
+
+ for ( ; i != rp; i++)
{
- pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+ pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i);
memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
+
+ if ( msg.length > sizeof(msg.msg))
+ msg.length = sizeof(msg.msg);
if ( msg.length != 0 )
memcpy(msg.msg, pmsg->msg, msg.length);
if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) )
{
- pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(
- ctrl_if_rxmsg_deferred_prod++)];
- memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length);
- taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+ memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)],
+ &msg, offsetof(ctrl_msg_t, msg) + msg.length);
}
else
{
(*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
}
}
+ ctrl_if_rx_ring.req_cons = i;
+
+ if ( dp != ctrl_if_rxmsg_deferred_prod )
+ {
+ wmb();
+ ctrl_if_rxmsg_deferred_prod = dp;
+ taskqueue_enqueue(taskqueue_thread, &ctrl_if_rxmsg_deferred_task);
+ }
+
}
static void
ctrl_if_interrupt(void *ctrl_sc)
/* (int irq, void *dev_id, struct pt_regs *regs) */
{
- control_if_t *ctrl_if = get_ctrl_if();
- if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+
+ if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) )
taskqueue_enqueue(taskqueue_swi, &ctrl_if_tx_tasklet);
- if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) )
taskqueue_enqueue(taskqueue_swi, &ctrl_if_rx_tasklet);
+
}
int
@@ -210,13 +249,13 @@ ctrl_if_send_message_noblock(
ctrl_msg_handler_t hnd,
unsigned long id)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
+ ctrl_msg_t *dmsg;
int i;
mtx_lock_irqsave(&ctrl_if_lock, flags);
- if ( TX_FULL(ctrl_if) )
+ if ( RING_FULL(&ctrl_if_tx_ring) )
{
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
return EAGAIN;
@@ -232,10 +271,11 @@ ctrl_if_send_message_noblock(
msg->id = i;
}
- memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)],
- msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->tx_req_prod++;
+ dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring,
+ ctrl_if_tx_ring.req_prod_pvt);
+ memcpy(dmsg, msg, sizeof(*msg));
+ ctrl_if_tx_ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS(&ctrl_if_tx_ring);
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
@@ -252,34 +292,35 @@ ctrl_if_send_message_block(
long wait_state)
{
int rc, sst = 0;
-
+
/* Fast path. */
- if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
- return rc;
-
-
+ if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
+ goto done;
+
for ( ; ; )
{
if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != EAGAIN )
break;
- if ( sst != 0)
- return EINTR;
+ if ( sst != 0) {
+ rc = EINTR;
+ goto done;
+ }
sst = tsleep(ctrl_if_wchan, PWAIT|PCATCH, "ctlrwt", 10);
}
-
+ done:
+
return rc;
}
int
ctrl_if_enqueue_space_callback(struct task *task)
{
- control_if_t *ctrl_if = get_ctrl_if();
/* Fast path. */
- if ( !TX_FULL(ctrl_if) )
+ if ( !RING_FULL(&ctrl_if_tx_ring) )
return 0;
(void)taskqueue_enqueue(*taskqueue_ctrl_if_tx[(ctrl_if_idx & 1)], task);
@@ -290,13 +331,12 @@ ctrl_if_enqueue_space_callback(struct task *task)
* certainly return 'not full'.
*/
smp_mb();
- return TX_FULL(ctrl_if);
+ return RING_FULL(&ctrl_if_tx_ring);
}
void
ctrl_if_send_response(ctrl_msg_t *msg)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
ctrl_msg_t *dmsg;
@@ -305,11 +345,14 @@ ctrl_if_send_response(ctrl_msg_t *msg)
* In this situation we may have src==dst, so no copying is required.
*/
mtx_lock_irqsave(&ctrl_if_lock, flags);
- dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
+ dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring,
+ ctrl_if_rx_ring.rsp_prod_pvt);
if ( dmsg != msg )
memcpy(dmsg, msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->rx_resp_prod++;
+
+ ctrl_if_rx_ring.rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(&ctrl_if_rx_ring);
+
mtx_unlock_irqrestore(&ctrl_if_lock, flags);
ctrl_if_notify_controller();
@@ -323,7 +366,7 @@ ctrl_if_register_receiver(
{
unsigned long _flags;
int inuse;
-
+
mtx_lock_irqsave(&ctrl_if_lock, _flags);
inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler);
@@ -344,7 +387,7 @@ ctrl_if_register_receiver(
}
mtx_unlock_irqrestore(&ctrl_if_lock, _flags);
-
+
return !inuse;
}
@@ -382,6 +425,7 @@ ctrl_if_suspend(void)
unbind_evtchn_from_irq(ctrl_if_evtchn);
}
+#if 0
/** Reset the control interface progress pointers.
* Marks the queues empty if 'clear' non-zero.
*/
@@ -398,10 +442,13 @@ ctrl_if_reset(int clear)
ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
}
-
+#endif
void
ctrl_if_resume(void)
{
+ control_if_t *ctrl_if = get_ctrl_if();
+
+ TRACE_ENTER;
if ( xen_start_info->flags & SIF_INITDOMAIN )
{
/*
@@ -421,7 +468,10 @@ ctrl_if_resume(void)
initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2;
}
- ctrl_if_reset(0);
+
+ /* Sync up with shared indexes. */
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
ctrl_if_evtchn = xen_start_info->domain_controller_evtchn;
ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
@@ -433,17 +483,24 @@ ctrl_if_resume(void)
*/
intr_add_handler("ctrl-if", ctrl_if_irq, (driver_intr_t*)ctrl_if_interrupt,
- NULL, INTR_TYPE_NET | INTR_MPSAFE, NULL);
+ NULL, INTR_TYPE_NET, NULL);
+ TRACE_EXIT;
+ /* XXX currently assuming not MPSAFE */
}
static void
ctrl_if_init(void *dummy __unused)
{
+ control_if_t *ctrl_if = get_ctrl_if();
+
int i;
for ( i = 0; i < 256; i++ )
ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
+
mtx_init(&ctrl_if_lock, "ctrlif", NULL, MTX_SPIN | MTX_NOWITNESS);
TASK_INIT(&ctrl_if_tx_tasklet, 0, __ctrl_if_tx_tasklet, NULL);
@@ -452,7 +509,7 @@ ctrl_if_init(void *dummy __unused)
TASK_INIT(&ctrl_if_rxmsg_deferred_task, 0, __ctrl_if_rxmsg_deferred_task, NULL);
- ctrl_if_reset(1);
+
ctrl_if_resume();
}
@@ -464,13 +521,13 @@ ctrl_if_init(void *dummy __unused)
int
ctrl_if_transmitter_empty(void)
{
- return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
+ return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons);
}
void
ctrl_if_discard_responses(void)
{
- ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
+ RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring);
}
SYSINIT(ctrl_if_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, ctrl_if_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
index 635a3bfe4e..fde7fda330 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/evtchn.c
@@ -31,8 +31,9 @@ static struct mtx irq_mapping_update_lock;
static int evtchn_to_irq[NR_EVENT_CHANNELS];
static int irq_to_evtchn[NR_IRQS];
-/* IRQ <-> VIRQ mapping. */
-static int virq_to_irq[NR_VIRQS];
+static int virq_to_irq[MAX_VIRT_CPUS][NR_VIRQS];
+static int ipi_to_evtchn[MAX_VIRT_CPUS][NR_VIRQS];
+
/* Reference counts for bindings to IRQs. */
static int irq_bindcount[NR_IRQS];
@@ -57,6 +58,7 @@ evtchn_do_upcall(struct intrframe *frame)
int irq, owned;
unsigned long flags;
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
local_irq_save(flags);
@@ -64,7 +66,7 @@ evtchn_do_upcall(struct intrframe *frame)
{
s->vcpu_data[0].evtchn_upcall_pending = 0;
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- l1 = xen_xchg(&s->evtchn_pending_sel, 0);
+ l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( (l1i = ffs(l1)) != 0 )
{
l1i--;
@@ -77,17 +79,21 @@ evtchn_do_upcall(struct intrframe *frame)
l2 &= ~(1 << l2i);
port = (l1i << 5) + l2i;
- if ((owned = mtx_owned(&sched_lock)) != 0)
- mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
- if ( (irq = evtchn_to_irq[port]) != -1 ) {
+ irq = evtchn_to_irq[port];
+#ifdef SMP
+ if (irq == PCPU_GET(cpuast))
+ continue;
+#endif
+ if ( (owned = mtx_owned(&sched_lock)) != 0 )
+ mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
+ if ( irq != -1 ) {
struct intsrc *isrc = intr_lookup_source(irq);
intr_execute_handlers(isrc, frame);
-
} else {
evtchn_device_upcall(port);
}
- if (owned)
- mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
+ if ( owned )
+ mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
}
}
}
@@ -120,7 +126,7 @@ bind_virq_to_irq(int virq)
mtx_lock(&irq_mapping_update_lock);
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
{
op.cmd = EVTCHNOP_bind_virq;
op.u.bind_virq.virq = virq;
@@ -132,7 +138,7 @@ bind_virq_to_irq(int virq)
evtchn_to_irq[evtchn] = irq;
irq_to_evtchn[irq] = evtchn;
- virq_to_irq[virq] = irq;
+ PCPU_GET(virq_to_irq)[virq] = irq;
}
irq_bindcount[irq]++;
@@ -146,7 +152,7 @@ void
unbind_virq_from_irq(int virq)
{
evtchn_op_t op;
- int irq = virq_to_irq[virq];
+ int irq = PCPU_GET(virq_to_irq)[virq];
int evtchn = irq_to_evtchn[irq];
mtx_lock(&irq_mapping_update_lock);
@@ -161,7 +167,64 @@ unbind_virq_from_irq(int virq)
evtchn_to_irq[evtchn] = -1;
irq_to_evtchn[irq] = -1;
- virq_to_irq[virq] = -1;
+ PCPU_GET(virq_to_irq)[virq] = -1;
+ }
+
+ mtx_unlock(&irq_mapping_update_lock);
+}
+
+
+int
+bind_ipi_on_cpu_to_irq(int cpu, int ipi)
+{
+ evtchn_op_t op;
+ int evtchn, irq;
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( (evtchn = PCPU_GET(ipi_to_evtchn)[ipi]) == 0 )
+ {
+ op.cmd = EVTCHNOP_bind_ipi;
+ op.u.bind_ipi.ipi_edom = cpu;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, cpu);
+ evtchn = op.u.bind_ipi.port;
+
+ irq = find_unbound_irq();
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ PCPU_GET(ipi_to_evtchn)[ipi] = evtchn;
+ } else
+ irq = evtchn_to_irq[evtchn];
+
+ irq_bindcount[irq]++;
+
+ mtx_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+void
+unbind_ipi_on_cpu_from_irq(int cpu, int ipi)
+{
+ evtchn_op_t op;
+ int evtchn = PCPU_GET(ipi_to_evtchn)[ipi];
+ int irq = irq_to_evtchn[evtchn];
+
+ mtx_lock(&irq_mapping_update_lock);
+
+ if ( --irq_bindcount[irq] == 0 )
+ {
+ op.cmd = EVTCHNOP_close;
+ op.u.close.dom = DOMID_SELF;
+ op.u.close.port = evtchn;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+ PCPU_GET(ipi_to_evtchn)[ipi] = 0;
}
mtx_unlock(&irq_mapping_update_lock);
@@ -451,12 +514,12 @@ static struct hw_interrupt_type pirq_type = {
};
#endif
-
+#if 0
static void
misdirect_interrupt(void *sc)
{
}
-
+#endif
void irq_suspend(void)
{
int virq, irq, evtchn;
@@ -464,7 +527,7 @@ void irq_suspend(void)
/* Unbind VIRQs from event channels. */
for ( virq = 0; virq < NR_VIRQS; virq++ )
{
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
continue;
evtchn = irq_to_evtchn[irq];
@@ -493,7 +556,7 @@ void irq_resume(void)
for ( virq = 0; virq < NR_VIRQS; virq++ )
{
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = PCPU_GET(virq_to_irq)[virq]) == -1 )
continue;
/* Get a new binding from Xen. */
@@ -512,6 +575,21 @@ void irq_resume(void)
}
}
+void
+ap_evtchn_init(int cpu)
+{
+ int i;
+
+ /* XXX -- expedience hack */
+ PCPU_SET(virq_to_irq, (int *)&virq_to_irq[cpu]);
+ PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[cpu]);
+
+ /* No VIRQ -> IRQ mappings. */
+ for ( i = 0; i < NR_VIRQS; i++ )
+ PCPU_GET(virq_to_irq)[i] = -1;
+}
+
+
static void
evtchn_init(void *dummy __unused)
{
@@ -519,17 +597,14 @@ evtchn_init(void *dummy __unused)
struct xenpic *xp;
struct xenpic_intsrc *pin;
- /*
- * xenpic_lock: in order to allow an interrupt to occur in a critical
- * section, to set pcpu->ipending (etc...) properly, we
- * must be able to get the icu lock, so it can't be
- * under witness.
- */
- mtx_init(&irq_mapping_update_lock, "xp", NULL, MTX_DEF);
+
+ /* XXX -- expedience hack */
+ PCPU_SET(virq_to_irq, (int *)&virq_to_irq[0]);
+ PCPU_SET(ipi_to_evtchn, (int *)&ipi_to_evtchn[0]);
/* No VIRQ -> IRQ mappings. */
for ( i = 0; i < NR_VIRQS; i++ )
- virq_to_irq[i] = -1;
+ PCPU_GET(virq_to_irq)[i] = -1;
/* No event-channel -> IRQ mappings. */
for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
@@ -572,9 +647,20 @@ evtchn_init(void *dummy __unused)
}
#endif
+#if 0
(void) intr_add_handler("xb_mis", bind_virq_to_irq(VIRQ_MISDIRECT),
(driver_intr_t *)misdirect_interrupt,
NULL, INTR_TYPE_MISC, NULL);
+
+#endif
}
SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL);
+ /*
+ * xenpic_lock: in order to allow an interrupt to occur in a critical
+ * section, to set pcpu->ipending (etc...) properly, we
+ * must be able to get the icu lock, so it can't be
+ * under witness.
+ */
+
+MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_DEF|MTX_NOWITNESS);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
index 4adb61a350..670d2809fa 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/exception.s
@@ -91,47 +91,52 @@ MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
IDTVEC(div)
- pushl $0; pushl $0; TRAP(T_DIVIDE)
+ pushl $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
- pushl $0; pushl $0; TRAP(T_TRCTRAP)
+ pushl $0; TRAP(T_TRCTRAP)
IDTVEC(nmi)
- pushl $0; pushl $0; TRAP(T_NMI)
+ pushl $0; TRAP(T_NMI)
IDTVEC(bpt)
- pushl $0; pushl $0; TRAP(T_BPTFLT)
+ pushl $0; TRAP(T_BPTFLT)
IDTVEC(ofl)
- pushl $0; pushl $0; TRAP(T_OFLOW)
+ pushl $0; TRAP(T_OFLOW)
IDTVEC(bnd)
- pushl $0; pushl $0; TRAP(T_BOUND)
+ pushl $0; TRAP(T_BOUND)
IDTVEC(ill)
- pushl $0; pushl $0; TRAP(T_PRIVINFLT)
+ pushl $0; TRAP(T_PRIVINFLT)
IDTVEC(dna)
- pushl $0; pushl $0; TRAP(T_DNA)
+ pushl $0; TRAP(T_DNA)
IDTVEC(fpusegm)
- pushl $0; pushl $0; TRAP(T_FPOPFLT)
+ pushl $0; TRAP(T_FPOPFLT)
IDTVEC(tss)
- pushl $0; TRAP(T_TSSFLT)
+ TRAP(T_TSSFLT)
IDTVEC(missing)
- pushl $0; TRAP(T_SEGNPFLT)
+ TRAP(T_SEGNPFLT)
IDTVEC(stk)
- pushl $0; TRAP(T_STKFLT)
+ TRAP(T_STKFLT)
IDTVEC(prot)
- pushl $0; TRAP(T_PROTFLT)
+ TRAP(T_PROTFLT)
IDTVEC(page)
+ pushl %eax
+ movl 4(%esp),%eax
+ movl %eax,-44(%esp) # move cr2 after trap frame
+ popl %eax
+ addl $4,%esp
TRAP(T_PAGEFLT)
IDTVEC(mchk)
- pushl $0; pushl $0; TRAP(T_MCHK)
+ pushl $0; TRAP(T_MCHK)
IDTVEC(rsvd)
- pushl $0; pushl $0; TRAP(T_RESERVED)
+ pushl $0; TRAP(T_RESERVED)
IDTVEC(fpu)
- pushl $0; pushl $0; TRAP(T_ARITHTRAP)
+ pushl $0; TRAP(T_ARITHTRAP)
IDTVEC(align)
- pushl $0; TRAP(T_ALIGNFLT)
+ TRAP(T_ALIGNFLT)
IDTVEC(xmm)
- pushl $0; pushl $0; TRAP(T_XMMFLT)
+ pushl $0; TRAP(T_XMMFLT)
IDTVEC(hypervisor_callback)
- pushl $T_HYPCALLBACK; pushl %eax; TRAP(T_HYPCALLBACK)
+ pushl %eax; TRAP(T_HYPCALLBACK)
hypervisor_callback_pending:
movl $T_HYPCALLBACK,TF_TRAPNO(%esp)
@@ -161,6 +166,12 @@ alltraps_with_regs_pushed:
movl $KPSEL,%eax
movl %eax,%fs
FAKE_MCOUNT(TF_EIP(%esp))
+save_cr2:
+ movl TF_TRAPNO(%esp),%eax
+ cmpl $T_PAGEFLT,%eax
+ jne calltrap
+ movl -4(%esp),%eax
+ movl %eax,PCPU(CR2)
calltrap:
movl TF_EIP(%esp),%eax
cmpl $scrit,%eax
@@ -217,8 +228,7 @@ IDTVEC(lcall_syscall)
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
- pushl $0xCAFE
- pushl $0xDEAD
+ pushl $0xBEEF
pushal
pushl %ds
pushl %es
@@ -324,7 +334,7 @@ doreti_popl_es:
doreti_popl_ds:
popl %ds
POPA
- addl $12,%esp
+ addl $8,%esp
.globl doreti_iret
doreti_iret:
iret
@@ -341,7 +351,7 @@ ecrit:
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
- subl $12,%esp
+ subl $8,%esp
pushal
pushl %ds
.globl doreti_popl_ds_fault
@@ -376,7 +386,7 @@ critical_region_fixup:
movl %esp,%esi
add %eax,%esi # %esi points at end of src region
movl %esp,%edi
- add $0x44,%edi # %edi points at end of dst region
+ add $0x40,%edi # %edi points at end of dst region
movl %eax,%ecx
shr $2,%ecx # convert bytes to words
je 16f # skip loop if nothing to copy
@@ -403,8 +413,8 @@ critical_fixup_table:
.byte 0x20 #pop %edx
.byte 0x24 #pop %ecx
.byte 0x28 #pop %eax
-.byte 0x2c,0x2c,0x2c #add $0xc,%esp
-.byte 0x38 #iret
+.byte 0x2c,0x2c,0x2c #add $0x8,%esp
+.byte 0x34 #iret
/* # Hypervisor uses this for application faults while it executes.*/
@@ -412,17 +422,17 @@ ENTRY(failsafe_callback)
pushal
call xen_failsafe_handler
/*# call install_safe_pf_handler */
- movl 32(%esp),%ebx
+ movl 28(%esp),%ebx
1: movl %ebx,%ds
- movl 36(%esp),%ebx
+ movl 32(%esp),%ebx
2: movl %ebx,%es
- movl 40(%esp),%ebx
+ movl 36(%esp),%ebx
3: movl %ebx,%fs
- movl 44(%esp),%ebx
+ movl 40(%esp),%ebx
4: movl %ebx,%gs
/*# call install_normal_pf_handler */
popal
- addl $16,%esp
+ addl $12,%esp
iret
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
index 1e9df732c7..8b320fb200 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/genassym.c
@@ -200,8 +200,7 @@ ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
-ASSYM(PC_TRAP_NESTING, offsetof(struct pcpu, pc_trap_nesting));
-
+ASSYM(PC_CR2, offsetof(struct pcpu, pc_cr2));
ASSYM(PC_CR3, offsetof(struct pcpu, pc_pdir));
#ifdef DEV_APIC
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c
new file mode 100644
index 0000000000..97ff0d129d
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/gnttab.c
@@ -0,0 +1,367 @@
+/******************************************************************************
+ * gnttab.c
+ *
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ *
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include "opt_pmap.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+
+#include <machine/gnttab.h>
+#include <machine/pmap.h>
+
+#include <machine/hypervisor-ifs.h>
+
+#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
+
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__ ( "rep;nop" : : : "memory" );
+}
+#define cpu_relax() rep_nop()
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \
+ #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) \
+ printk("xen_grant: " fmt, ##args)
+
+static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+static grant_ref_t gnttab_free_head;
+
+static grant_entry_t *shared;
+#if 0
+/* /proc/xen/grant */
+static struct proc_dir_entry *grant_pde;
+#endif
+
+/*
+ * Lock-free grant-entry allocator
+ */
+
+static inline int
+get_free_entry(void)
+{
+ grant_ref_t fh, nfh = gnttab_free_head;
+ do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
+ while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
+ gnttab_free_list[fh])) != fh) );
+ return fh;
+}
+
+static inline void
+put_free_entry(grant_ref_t ref)
+{
+ grant_ref_t fh, nfh = gnttab_free_head;
+ do { gnttab_free_list[ref] = fh = nfh; wmb(); }
+ while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+
+int
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
+{
+ int ref;
+
+ if ( unlikely((ref = get_free_entry()) == -1) )
+ return -ENOSPC;
+
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+
+ return ref;
+}
+
+void
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly)
+{
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+}
+
+
+int
+gnttab_query_foreign_access(grant_ref_t ref)
+{
+ uint16_t nflags;
+
+ nflags = shared[ref].flags;
+
+ return (nflags & (GTF_reading|GTF_writing));
+}
+
+void
+gnttab_end_foreign_access(grant_ref_t ref, int readonly)
+{
+ uint16_t flags, nflags;
+
+ nflags = shared[ref].flags;
+ do {
+ if ( (flags = nflags) & (GTF_reading|GTF_writing) )
+ printk("WARNING: g.e. still in use!\n");
+ }
+ while ( (nflags = cmpxchg(&shared[ref].flags, flags, 0)) != flags );
+
+ put_free_entry(ref);
+}
+
+int
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+{
+ int ref;
+
+ if ( unlikely((ref = get_free_entry()) == -1) )
+ return -ENOSPC;
+
+ shared[ref].frame = pfn;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_accept_transfer;
+
+ return ref;
+}
+
+void
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
+{
+ shared[ref].frame = pfn;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_accept_transfer;
+}
+
+unsigned long
+gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+ unsigned long frame = 0;
+ uint16_t flags;
+
+ flags = shared[ref].flags;
+ ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
+
+ /*
+ * If a transfer is committed then wait for the frame address to appear.
+ * Otherwise invalidate the grant entry against future use.
+ */
+ if ( likely(flags != GTF_accept_transfer) ||
+ (cmpxchg(&shared[ref].flags, flags, 0) != GTF_accept_transfer) )
+ while ( unlikely((frame = shared[ref].frame) == 0) )
+ cpu_relax();
+
+ put_free_entry(ref);
+
+ return frame;
+}
+
+void
+gnttab_free_grant_references(uint16_t count, grant_ref_t head)
+{
+ /* TODO: O(N)...? */
+ grant_ref_t to_die = 0, next = head;
+ int i;
+
+ for ( i = 0; i < count; i++ )
+ to_die = next;
+ next = gnttab_free_list[next];
+ put_free_entry( to_die );
+}
+
+int
+gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head,
+ grant_ref_t *terminal)
+{
+ int i;
+ grant_ref_t h = gnttab_free_head;
+
+ for ( i = 0; i < count; i++ )
+ if ( unlikely(get_free_entry() == -1) )
+ goto not_enough_refs;
+
+ *head = h;
+ *terminal = gnttab_free_head;
+
+ return 0;
+
+not_enough_refs:
+ gnttab_free_head = h;
+ return -ENOSPC;
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head, grant_ref_t terminal )
+{
+ grant_ref_t g;
+ if ( unlikely((g = *private_head) == terminal) )
+ return -ENOSPC;
+ *private_head = gnttab_free_list[g];
+ return g;
+}
+
+void
+gnttab_release_grant_reference( grant_ref_t *private_head,
+ grant_ref_t release )
+{
+ gnttab_free_list[release] = *private_head;
+ *private_head = release;
+}
+#ifdef notyet
+static int
+grant_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
+ int flag, struct thread *td)
+{
+
+ int ret;
+ privcmd_hypercall_t hypercall;
+
+ /* XXX Need safety checks here if using for anything other
+ * than debugging */
+ return -ENOSYS;
+
+ if ( cmd != IOCTL_PRIVCMD_HYPERCALL )
+ return -ENOSYS;
+
+ if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
+ return -EFAULT;
+
+ if ( hypercall.op != __HYPERVISOR_grant_table_op )
+ return -ENOSYS;
+
+ /* hypercall-invoking asm taken from privcmd.c */
+ __asm__ __volatile__ (
+ "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
+ "movl 4(%%eax),%%ebx ;"
+ "movl 8(%%eax),%%ecx ;"
+ "movl 12(%%eax),%%edx ;"
+ "movl 16(%%eax),%%esi ;"
+ "movl 20(%%eax),%%edi ;"
+ "movl (%%eax),%%eax ;"
+ TRAP_INSTR "; "
+ "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
+ : "=a" (ret) : "0" (&hypercall) : "memory" );
+
+ return ret;
+
+}
+
+static struct cdevsw gnttab_cdevsw = {
+ d_ioctl: grant_ioctl,
+};
+
+static int
+grant_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+ unsigned int i;
+ grant_entry_t *gt;
+
+ gt = (grant_entry_t *)shared;
+ len = 0;
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ /* TODO: safety catch here until this can handle >PAGE_SIZE output */
+ if (len > (PAGE_SIZE - 200))
+ {
+ len += sprintf( page + len, "Truncated.\n");
+ break;
+ }
+
+ if ( gt[i].flags )
+ len += sprintf( page + len,
+ "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n",
+ i,
+ gt[i].flags,
+ gt[i].domid,
+ gt[i].frame );
+
+ *eof = 1;
+ return len;
+}
+
+static int
+grant_write(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ /* TODO: implement this */
+ return -ENOSYS;
+}
+#endif
+static int
+gnttab_init(void *unused)
+{
+ gnttab_setup_table_t setup;
+ unsigned long frames[NR_GRANT_FRAMES];
+ int i;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = NR_GRANT_FRAMES;
+ setup.frame_list = frames;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
+ panic("grant table setup failed\n");
+ if (setup.status != 0)
+ panic("non-zero status in grant table setup\n");
+ shared = (grant_entry_t *)kmem_alloc_nofault(kernel_map, NR_GRANT_FRAMES);
+
+ for (i = 0; i < NR_GRANT_FRAMES; i++)
+ pmap_kenter_ma((vm_offset_t)(shared + (i*PAGE_SIZE)), frames[i] << PAGE_SHIFT);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ gnttab_free_list[i] = i + 1;
+#if 0
+ /*
+ * /proc/xen/grant : used by libxc to access grant tables
+ */
+ if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL )
+ {
+ WPRINTK("Unable to create grant xen proc entry\n");
+ return -1;
+ }
+
+ grant_file_ops.read = grant_pde->proc_fops->read;
+ grant_file_ops.write = grant_pde->proc_fops->write;
+
+ grant_pde->proc_fops = &grant_file_ops;
+
+ grant_pde->read_proc = &grant_read;
+ grant_pde->write_proc = &grant_write;
+#endif
+ printk("Grant table initialized\n");
+ return 0;
+}
+
+SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
index 8fb7f9f12e..7b8fa1b2e5 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/local_apic.c
@@ -545,7 +545,6 @@ apic_register_enumerator(struct apic_enumerator *enumerator)
static void
apic_init(void *dummy __unused)
{
- struct apic_enumerator *enumerator;
uint64_t apic_base;
int retval, best;
@@ -560,6 +559,7 @@ apic_init(void *dummy __unused)
/* First, probe all the enumerators to find the best match. */
best_enum = NULL;
best = 0;
+#ifndef XEN
SLIST_FOREACH(enumerator, &enumerators, apic_next) {
retval = enumerator->apic_probe();
if (retval > 0)
@@ -569,6 +569,7 @@ apic_init(void *dummy __unused)
best = retval;
}
}
+#endif
if (best_enum == NULL) {
if (bootverbose)
printf("APIC: Could not find any APICs.\n");
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
index 5146169162..427af5e628 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/locore.s
@@ -56,7 +56,7 @@
#include "assym.s"
.section __xen_guest
- .asciz "LOADER=generic,GUEST_VER=5.2.1,XEN_VER=2.0,BSD_SYMTAB"
+ .asciz "LOADER=generic,GUEST_VER=5.3,XEN_VER=3.0,BSD_SYMTAB"
/*
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
index ea813b897c..3bdfd7a4b4 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/machdep.c
@@ -78,6 +78,7 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/machdep.c,v 1.584 2003/12/03 21:12:09 jhb
#include <sys/sched.h>
#include <sys/sysent.h>
#include <sys/sysctl.h>
+#include <sys/smp.h>
#include <sys/ucontext.h>
#include <sys/vmmeter.h>
#include <sys/bus.h>
@@ -214,19 +215,7 @@ static struct trapframe proc0_tf;
#ifndef SMP
static struct pcpu __pcpu;
#endif
-
-static void
-map_range(void *physptr, unsigned long physptrindex,
- unsigned long physindex, int count, unsigned int flags) {
- int i;
- unsigned long pte, ppa;
- for (i = 0; i < count; i++) {
- pte = ((unsigned long)physptr) + (physptrindex << 2) + (i << 2);
- ppa = (PTOM(physindex + i) << PAGE_SHIFT) | flags | PG_V | PG_A;
- xpq_queue_pt_update((pt_entry_t *)pte, ppa);
- }
- mcl_flush_queue();
-}
+struct mtx icu_lock;
struct mem_range_softc mem_range_softc;
@@ -236,7 +225,6 @@ cpu_startup(void *dummy)
/*
* Good {morning,afternoon,evening,night}.
*/
- /* XXX need to write clock driver */
startrtclock();
printcpuinfo();
@@ -896,14 +884,6 @@ SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
static void
cpu_idle_default(void)
{
-#if 0
- /*
- * we must absolutely guarentee that hlt is the
- * absolute next instruction after sti or we
- * introduce a timing window.
- */
- __asm __volatile("sti; hlt");
-#endif
idle_block();
enable_intr();
}
@@ -1377,20 +1357,19 @@ getmemsize(void)
pmap_bootstrap((init_first)<< PAGE_SHIFT, 0);
for (i = 0; i < 10; i++)
phys_avail[i] = 0;
-#ifdef MAXMEM
- if (MAXMEM/4 < Maxmem)
- Maxmem = MAXMEM/4;
-#endif
physmem = Maxmem;
avail_end = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
phys_avail[0] = init_first << PAGE_SHIFT;
phys_avail[1] = avail_end;
}
-extern pt_entry_t *KPTphys;
-extern int kernbase;
+extern unsigned long cpu0prvpage;
+extern unsigned long *SMPpt;
pteinfo_t *pteinfo_list;
unsigned long *xen_machine_phys = ((unsigned long *)VADDR(1008, 0));
+int preemptable;
+int gdt_set;
+static int ncpus;
/* Linux infection */
#define PAGE_OFFSET KERNBASE
@@ -1400,22 +1379,35 @@ void
initvalues(start_info_t *startinfo)
{
int i;
+ vm_paddr_t pdir_shadow_ma, KPTphys;
+ vm_offset_t *pdir_shadow;
+#ifdef SMP
+ int j;
+#endif
+
+#ifdef WRITABLE_PAGETABLES
+ printk("using writable pagetables\n");
+ HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+#endif
+
xen_start_info = startinfo;
xen_phys_machine = (unsigned long *)startinfo->mfn_list;
unsigned long tmpindex = ((__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + xen_start_info->nr_pt_frames) + 3 /* number of pages allocated after the pts + 1*/;
xendebug_flags = 0xffffffff;
/* pre-zero unused mapped pages */
bzero((char *)(KERNBASE + (tmpindex << PAGE_SHIFT)), (1024 - tmpindex)*PAGE_SIZE);
-
- KPTphys = (pt_entry_t *)xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
IdlePTD = (pd_entry_t *)xpmap_ptom(__pa(startinfo->pt_base));
+ KPTphys = xpmap_ptom(__pa(startinfo->pt_base + PAGE_SIZE));
XENPRINTF("IdlePTD %p\n", IdlePTD);
XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
"mod_start: 0x%lx mod_len: 0x%lx\n",
xen_start_info->nr_pages, xen_start_info->shared_info,
xen_start_info->flags, xen_start_info->pt_base,
xen_start_info->mod_start, xen_start_info->mod_len);
-
+
+
+
+
/* Map proc0's UPAGES */
proc0uarea = (struct user *)(KERNBASE + (tmpindex << PAGE_SHIFT));
tmpindex += UAREA_PAGES;
@@ -1432,6 +1424,49 @@ initvalues(start_info_t *startinfo)
ldt = (union descriptor *)(KERNBASE + (tmpindex << PAGE_SHIFT));
tmpindex++;
+ /* initialize page directory shadow page */
+ pdir_shadow = (vm_offset_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ i686_pagezero(pdir_shadow);
+ pdir_shadow_ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
+ PT_SET_MA(pdir_shadow, pdir_shadow_ma | PG_V | PG_A);
+ tmpindex++;
+
+ /* setup shadow mapping first so vtomach will work */
+ xen_pt_pin((vm_paddr_t)pdir_shadow_ma);
+ xen_queue_pt_update((vm_paddr_t)(IdlePTD + PTDPTDI),
+ pdir_shadow_ma | PG_V | PG_A | PG_RW | PG_M);
+ xen_queue_pt_update(pdir_shadow_ma + PTDPTDI*sizeof(vm_paddr_t),
+ ((vm_paddr_t)IdlePTD) | PG_V | PG_A);
+ xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
+ KPTphys | PG_V | PG_A);
+
+ xen_flush_queue();
+ /* allocate remainder of NKPT pages */
+
+
+#ifdef SMP
+#if 0
+ /* allocate cpu0 private page */
+ cpu0prvpage = (KERNBASE + (tmpindex << PAGE_SHIFT));
+ tmpindex++;
+#endif
+ /* allocate SMP page table */
+ SMPpt = (unsigned long *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+#if 0
+ /* Map the private page into the SMP page table */
+ SMPpt[0] = vtomach(cpu0prvpage) | PG_RW | PG_M | PG_V | PG_A;
+#endif
+ /* map SMP page table RO */
+ PT_SET_MA(SMPpt, *vtopte((vm_offset_t)SMPpt) & ~PG_RW);
+
+ /* put the page table into the page directory */
+ xen_queue_pt_update((vm_paddr_t)(IdlePTD + MPPTDI),
+ xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_M | PG_RW | PG_V | PG_A);
+ xen_queue_pt_update(pdir_shadow_ma + MPPTDI*sizeof(vm_paddr_t),
+ xpmap_ptom((tmpindex << PAGE_SHIFT))| PG_V | PG_A);
+ tmpindex++;
+#endif
+
#ifdef PMAP_DEBUG
pteinfo_list = (pteinfo_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
tmpindex += ((xen_start_info->nr_pages >> 10) + 1)*(1 + XPQ_CALL_DEPTH*XPQ_CALL_COUNT);
@@ -1441,57 +1476,80 @@ initvalues(start_info_t *startinfo)
#endif
/* unmap remaining pages from initial 4MB chunk */
for (i = tmpindex; i%1024 != 0; i++)
- PT_CLEAR(KERNBASE + (i << PAGE_SHIFT), TRUE);
-
+ xen_queue_pt_update(KPTphys + i*sizeof(vm_paddr_t), 0);
+ xen_flush_queue();
+
/* allocate remainder of NKPT pages */
- map_range(IdlePTD, KPTDI + 1, tmpindex, NKPT-1, PG_U | PG_M | PG_RW);
+ for (i = 0; i < NKPT-1; i++, tmpindex++) {
+ xen_queue_pt_update((vm_paddr_t)(IdlePTD + KPTDI + i + 1),
+ xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_M | PG_RW | PG_V | PG_A));
+ xen_queue_pt_update(pdir_shadow_ma + (KPTDI + i + 1)*sizeof(vm_paddr_t),
+ xpmap_ptom((tmpindex << PAGE_SHIFT)| PG_V | PG_A));
+ }
tmpindex += NKPT-1;
- map_range(IdlePTD, PTDPTDI, __pa(xen_start_info->pt_base) >> PAGE_SHIFT, 1, 0);
+ PT_UPDATES_FLUSH();
- xpq_queue_pt_update(KPTphys + tmpindex, xen_start_info->shared_info | PG_A | PG_V | PG_RW);
HYPERVISOR_shared_info = (shared_info_t *)(KERNBASE + (tmpindex << PAGE_SHIFT));
+ PT_SET_MA(HYPERVISOR_shared_info,
+ xen_start_info->shared_info | PG_A | PG_V | PG_RW | PG_M);
tmpindex++;
- mcl_flush_queue();
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = (unsigned long)xen_phys_machine;
- HYPERVISOR_shared_info->arch.mfn_to_pfn_start = (unsigned long)xen_machine_phys;
+ ncpus = HYPERVISOR_shared_info->n_vcpu;
+#ifdef SMP
+ for (i = 0; i < ncpus; i++) {
+ int npages = (sizeof(struct privatespace) + 1)/PAGE_SIZE;
+ for (j = 0; j < npages; j++) {
+ vm_paddr_t ma = xpmap_ptom(tmpindex << PAGE_SHIFT);
+ tmpindex++;
+ PT_SET_VA_MA(SMPpt + i*npages + j, ma | PG_A | PG_V | PG_RW | PG_M, FALSE);
+ }
+ }
+ xen_flush_queue();
+#endif
init_first = tmpindex;
}
+
+trap_info_t trap_table[] = {
+ { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+ { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+ { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+ { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+ /* This is UPL on Linux and KPL on BSD */
+ { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+ { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+ { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+ /*
+ * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+ * no handler for double fault
+ */
+ { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+ {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+ {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+ {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+ {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+ {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+ {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+ {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+ {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+ {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+ {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+ {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
+ { 0, 0, 0, 0 }
+};
+
void
init386(void)
{
int gsel_tss, metadata_missing, off, x, error;
struct pcpu *pc;
- trap_info_t trap_table[] = {
- { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
- { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
- { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
- { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
- /* This is UPL on Linux and KPL on BSD */
- { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
- { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
- { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
- /*
- * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
- * no handler for double fault
- */
- { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
- {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
- {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
- {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
- {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
- {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
- {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
- {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
- {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
- {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
- {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
- {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
- { 0, 0, 0, 0 }
- };
+ unsigned long gdtmachpfn;
+#ifdef SMP
+ int i;
+#endif
proc0.p_uarea = proc0uarea;
thread0.td_kstack = proc0kstack;
thread0.td_pcb = (struct pcb *)
@@ -1541,36 +1599,45 @@ init386(void)
gdt_segs[GDATA_SEL].ssd_limit = atop(0 - ((1 << 26) - (1 << 22) + (1 << 16)));
#endif
#ifdef SMP
+ /* XXX this will blow up if there are more than 512/NGDT vcpus */
pc = &SMP_prvspace[0].pcpu;
- gdt_segs[GPRIV_SEL].ssd_limit =
- atop(sizeof(struct privatespace) - 1);
+ for (i = 0; i < ncpus; i++) {
+ cpu_add(i, (i == 0));
+
+ gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[i];
+ gdt_segs[GPRIV_SEL].ssd_limit =
+ atop(sizeof(struct privatespace) - 1);
+ gdt_segs[GPROC0_SEL].ssd_base =
+ (int) &SMP_prvspace[i].pcpu.pc_common_tss;
+ SMP_prvspace[i].pcpu.pc_prvspace =
+ &SMP_prvspace[i].pcpu;
+
+ for (x = 0; x < NGDT; x++) {
+ ssdtosd(&gdt_segs[x], &gdt[i * NGDT + x].sd);
+ }
+ }
#else
pc = &__pcpu;
gdt_segs[GPRIV_SEL].ssd_limit =
atop(sizeof(struct pcpu) - 1);
-#endif
gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
for (x = 0; x < NGDT; x++)
ssdtosd(&gdt_segs[x], &gdt[x].sd);
- /* re-map GDT read-only */
- {
- unsigned long gdtindex = (((unsigned long)gdt - KERNBASE) >> PAGE_SHIFT);
- unsigned long gdtphys = PTOM(gdtindex);
- map_range(KPTphys, gdtindex, gdtindex, 1, 0);
- mcl_flush_queue();
- if (HYPERVISOR_set_gdt(&gdtphys, LAST_RESERVED_GDT_ENTRY + 1)) {
- panic("set_gdt failed\n");
- }
- lgdt_finish();
- }
+#endif
+
+
+ PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~PG_RW);
+ gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+ PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
+
+
+ lgdt_finish();
+ gdt_set = 1;
if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
panic("set_trap_table failed - error %d\n", error);
}
- if ((error = HYPERVISOR_set_fast_trap(0x80)) != 0) {
- panic("set_fast_trap failed - error %d\n", error);
- }
HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback,
GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
@@ -1580,7 +1647,6 @@ init386(void)
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
- PCPU_SET(trap_nesting, 0);
PCPU_SET(pdir, (unsigned long)IdlePTD);
/*
* Initialize mutexes.
@@ -1588,6 +1654,11 @@ init386(void)
*/
mutex_init();
+ mtx_init(&clock_lock, "clk", NULL, MTX_SPIN);
+ mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+
+
/* make ldt memory segments */
/*
* XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it
@@ -1600,14 +1671,11 @@ init386(void)
default_proc_ldt.ldt_base = (caddr_t)ldt;
default_proc_ldt.ldt_len = 6;
_default_ldt = (int)&default_proc_ldt;
- PCPU_SET(currentldt, _default_ldt);
- {
- unsigned long ldtindex = (((unsigned long)ldt - KERNBASE) >> PAGE_SHIFT);
- map_range(KPTphys, ldtindex, ldtindex, 1, 0);
- mcl_flush_queue();
- xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
- }
-
+ PCPU_SET(currentldt, _default_ldt)
+ PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
+ xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+
+
/*
* Initialize the console before we print anything out.
*/
@@ -1638,12 +1706,15 @@ init386(void)
KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+#if 0
private_tss = 0;
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+#endif
HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0));
+
dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
@@ -1667,7 +1738,6 @@ init386(void)
PT_UPDATES_FLUSH();
/* safe to enable xen page queue locking */
- xpq_init();
msgbufinit(msgbufp, MSGBUF_SIZE);
/* XXX KMM I don't think we need call gates */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
index b975c9e491..d084a54303 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/mp_machdep.c
@@ -83,7 +83,16 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24 15:02:
#include <machine/specialreg.h>
#include <machine/privatespace.h>
+
+/* XEN includes */
#include <machine/xenfunc.h>
+#include <machine/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+/***************/
+
#define WARMBOOT_TARGET 0
#define WARMBOOT_OFF (KERNBASE + 0x0467)
@@ -94,6 +103,10 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/mp_machdep.c,v 1.235.2.3 2004/09/24 15:02:
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
+
+#undef POSTCODE
+#define POSTCODE(x)
+
/*
* this code MUST be enabled here and in mpboot.s.
* it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -175,6 +188,8 @@ extern pt_entry_t *KPTphys;
/* SMP page table page */
extern pt_entry_t *SMPpt;
+extern trap_info_t trap_table[];
+
struct pcb stoppcbs[MAXCPU];
/* Variables needed for SMP tlb shootdown. */
@@ -208,7 +223,9 @@ static u_int boot_address;
static void set_logical_apic_ids(void);
static int start_all_aps(void);
+#if 0
static void install_ap_tramp(void);
+#endif
static int start_ap(int apic_id);
static void release_aps(void *dummy);
@@ -314,6 +331,7 @@ int
cpu_mp_probe(void)
{
+ mp_ncpus = HYPERVISOR_shared_info->n_vcpu;
/*
* Always record BSP in CPU map so that the mbuf init code works
* correctly.
@@ -342,20 +360,24 @@ cpu_mp_probe(void)
return (1);
}
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
+static void
+cpu_mp_ipi_init(void)
{
- int i;
-
- POSTCODE(MP_START_POST);
-
- /* Initialize the logical ID to APIC ID table. */
- for (i = 0; i < MAXCPU; i++)
- cpu_apic_ids[i] = -1;
-
+ int irq;
+ int cpu = smp_processor_id();
+ /*
+ * these are not needed by XenFreeBSD - from Keir:
+ * For TLB-flush related IPIs, Xen has hypercalls
+ * you should use instead. You can pass a pointer
+ * to a vcpu bitmap to update_va_mapping(), and to
+ * MMUEXT_flush_tlb_multi and MMEXT_invlpg_multi.
+ * Xen will then make sure that those vcpus get
+ * flushed appropriately before returning to the
+ * caller.
+ * There is also no indication that we need to forward
+ * clock interrupts.
+ */
+#if 0
/* Install an inter-CPU IPI for TLB invalidation */
setidt(IPI_INVLTLB, IDTVEC(invltlb),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -371,22 +393,69 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for forwarding statclock() */
setidt(IPI_STATCLOCK, IDTVEC(statclock),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
+#endif
+
+ /*
+ * These can all be consolidated. For now leaving
+ * as individual IPIs.
+ *
+ */
+#if 0
/* Install an inter-CPU IPI for lazy pmap release */
setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+ irq = bind_ipi_on_cpu_to_irq(cpu, IPI_LAZYPMAP);
+ PCPU_SET(lazypmap, irq);
+ PANIC_IF(intr_add_handler("pmap_lazyfix", irq,
+ (driver_intr_t *)pmap_lazyfix_action,
+ NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
+#if 0
/* Install an inter-CPU IPI for all-CPU rendezvous */
setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+ irq = bind_ipi_on_cpu_to_irq(cpu, IPI_RENDEZVOUS);
+ PCPU_SET(rendezvous, irq);
+ PANIC_IF(intr_add_handler("smp_rendezvous", irq,
+ (driver_intr_t *)smp_rendezvous_action,
+ NULL, INTR_TYPE_CLK | INTR_FAST, NULL));
+#endif
+#if 0
/* Install an inter-CPU IPI for forcing an additional software trap */
setidt(IPI_AST, IDTVEC(cpuast),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
-
+#else
+ irq = bind_ipi_on_cpu_to_irq(cpu, IPI_AST);
+ PCPU_SET(cpuast, irq);
+#endif
+ /* XXX ignore for now */
+#if 0
/* Install an inter-CPU IPI for CPU stop/restart */
setidt(IPI_STOP, IDTVEC(cpustop),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+
+}
+
+SYSINIT(ipi_setup, SI_SUB_INTR, SI_ORDER_ANY, cpu_mp_ipi_init, NULL);
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void) /* --- Start here --- */
+{
+ int i;
+
+ POSTCODE(MP_START_POST);
+
+ /* Initialize the logical ID to APIC ID table. */
+ for (i = 0; i < MAXCPU; i++)
+ cpu_apic_ids[i] = -1;
/* Set boot_cpu_id if needed. */
@@ -437,35 +506,44 @@ cpu_mp_announce(void)
void
init_secondary(void)
{
- int gsel_tss;
- int x, myid;
+ int myid;
+ unsigned long gdtmachpfn;
+ printk("MADE IT!!");
+
#if 0
u_int cr0;
#endif
+ /* Steps to booting SMP on xen as gleaned from XenLinux:
+ * - cpu_init() - processor specific initialization
+ * - smp_callin()
+ * - wait 2s for BP to finish its startup sequence
+ * - map_cpu_to_logical_apicid()
+ * - save cpuid info
+ * - set bit in callin map to let master (BP?) continue
+ * - local setup timer() - per cpu timer initialization
+ * - ldebug_setup() - bind debug IRQ to local CPU.
+ * - smp_intr_init() - IPI setup that we do in cpu_mp_start
+ * - local_irq_enable() - enable interrupts locally
+ * - cpu_set(id, map) - announce that we're up
+ * - cpu_idle() - make us schedulable
+ */
+
+
/* bootAP is set in start_ap() to our ID. */
myid = bootAP;
- gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
- gdt_segs[GPROC0_SEL].ssd_base =
- (int) &SMP_prvspace[myid].pcpu.pc_common_tss;
- SMP_prvspace[myid].pcpu.pc_prvspace =
- &SMP_prvspace[myid].pcpu;
-
- for (x = 0; x < NGDT; x++) {
- ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
- }
-#if 0
- r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
- r_gdt.rd_base = (int) &gdt[myid * NGDT];
- lgdt(&r_gdt); /* does magic intra-segment return */
+ gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+ PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);
- lidt(&r_idt);
- lldt(_default_ldt);
-#endif
+
+ lgdt_finish();
+
+ PCPU_SET(cpuid, myid);
+
+
+ set_user_ldt((struct mdproc *)_default_ldt);
PCPU_SET(currentldt, _default_ldt);
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
@@ -557,6 +635,13 @@ init_secondary(void)
while (smp_started == 0)
ia32_pause();
+ /* need to wait until now to setup the IPIs as SI_SUB_CPU is
+ * much earlier than SI_SUB_INTR
+ */
+ ap_evtchn_init(myid);
+ ap_cpu_initclocks();
+ cpu_mp_ipi_init();
+
/* ok, now grab sched_lock and enter the scheduler */
mtx_lock_spin(&sched_lock);
@@ -610,28 +695,35 @@ set_logical_apic_ids(void)
static int
start_all_aps(void)
{
-#ifndef PC98
- u_char mpbiosreason;
-#endif
- u_long mpbioswarmvec;
struct pcpu *pc;
char *stack;
- uintptr_t kptbase;
- int i, pg, apic_id, cpu;
+ int i, apic_id, cpu;
+
+ /*
+ * This function corresponds most closely to
+ * smp_boot_cpus in XenLinux - the sequence there
+ * is:
+ * - check if SMP config is found - if not:
+ * - clear the I/O APIC IRQs
+ * - map cpu to logical apicid
+ * - exit
+ * - smp_intr_init - IPI initialization
+ * - map cpu to logical apicid
+ * - boot each of the vcpus
+ * - clear and then construct the cpu sibling [logical CPUs] map.
+ *
+ */
POSTCODE(START_ALL_APS_POST);
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
+#if 0
/* install the AP 1st level boot code */
install_ap_tramp();
/* save the current value of the warm-start vector */
mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
-#ifndef PC98
- outb(CMOS_REG, BIOS_RESET);
- mpbiosreason = inb(CMOS_DATA);
-#endif
+
/* set up temporary P==V mapping for AP boot */
/* XXX this is a hack, we should boot the AP on its own stack/PTD */
@@ -640,7 +732,7 @@ start_all_aps(void)
PTD[i] = (pd_entry_t)(PG_V | PG_RW |
((kptbase + i * PAGE_SIZE) & PG_FRAME));
invltlb();
-
+#endif
/* start each AP */
for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
if (!cpu_info[apic_id].cpu_present ||
@@ -650,7 +742,7 @@ start_all_aps(void)
/* save APIC ID for this logical ID */
cpu_apic_ids[cpu] = apic_id;
-
+#if 0
/* first page of AP's private space */
pg = cpu * i386_btop(sizeof(struct privatespace));
@@ -665,11 +757,14 @@ start_all_aps(void)
for (i = 0; i < KSTACK_PAGES; i++)
SMPpt[pg + 1 + i] = (pt_entry_t)
(PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
+#endif
+ pc = &SMP_prvspace[cpu].pcpu;
/* prime data page for it to use */
pcpu_init(pc, cpu, sizeof(struct pcpu));
pc->pc_apic_id = apic_id;
+#if 0
/* setup a vector to our boot code */
*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
@@ -677,7 +772,7 @@ start_all_aps(void)
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
#endif
-
+#endif
bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
PAGE_SIZE];
bootAP = cpu;
@@ -700,13 +795,10 @@ start_all_aps(void)
/* build our map of 'other' CPUs */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+#if 0
/* restore the warmstart vector */
*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
-#ifndef PC98
- outb(CMOS_REG, BIOS_RESET);
- outb(CMOS_DATA, mpbiosreason);
#endif
-
/*
* Set up the idle context for the BSP. Similar to above except
* that some was done by locore, some by pmap.c and some is implicit
@@ -739,7 +831,7 @@ extern void bootDataSeg(void);
extern void MPentry(void);
extern u_int MP_GDT;
extern u_int mp_gdtbase;
-
+#if 0
static void
install_ap_tramp(void)
{
@@ -791,6 +883,19 @@ install_ap_tramp(void)
*dst16 = (u_int) boot_address & 0xffff;
*dst8 = ((u_int) boot_address >> 16) & 0xff;
}
+#endif
+
+static void
+cpu_mp_trap_init(trap_info_t *trap_ctxt)
+{
+ trap_info_t *t = trap_table;
+
+ for (t = trap_table; t->address; t++) {
+ trap_ctxt[t->vector].flags = t->flags;
+ trap_ctxt[t->vector].cs = t->cs;
+ trap_ctxt[t->vector].address = t->address;
+ }
+}
/*
* This function starts the AP (application processor) identified
@@ -802,8 +907,25 @@ install_ap_tramp(void)
static int
start_ap(int apic_id)
{
- int vector, ms;
- int cpus;
+ int vector, ms, i;
+ int cpus, boot_error;
+ vcpu_guest_context_t ctxt;
+
+ /*
+ * This is the FreeBSD equivalent to do_boot_cpu(apicid) in
+ * smpboot.c.
+ * its initialization sequence consists of:
+ * - fork_idle(cpu) to create separate idle context
+ * - initialization of idle's context to start_secondary
+ * - initialization of cpu ctxt to start in startup_32_smp
+ * - then we call HYPERVISOR_boot_vcpu with the cpu index and
+ * a pointer to the context.
+ * - on boot success we:
+ * - set ourselves in the callout_map
+ * - wait up to 5 seconds for us to be set in the callin map
+ * - set x86_cpu_to_apicid[cpu] = apicid;
+ *
+ */
POSTCODE(START_AP_POST);
@@ -813,6 +935,55 @@ start_ap(int apic_id)
/* used as a watchpoint to signal AP startup */
cpus = mp_naps;
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
+ ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
+ ctxt.user_regs.fs = 0;
+ ctxt.user_regs.gs = 0;
+ ctxt.user_regs.ss = __KERNEL_DS;
+ ctxt.user_regs.cs = __KERNEL_CS;
+ ctxt.user_regs.eip = (unsigned long)init_secondary;
+ ctxt.user_regs.esp = (unsigned long)bootSTK;
+#ifdef notyet
+ ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+#else
+ ctxt.user_regs.eflags = (1<<9) | (1<<2);
+#endif
+ /* FPU is set up to default initial state. */
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+ cpu_mp_trap_init(ctxt.trap_ctxt);
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = (unsigned long)bootSTK;
+
+ /* Callback handlers. */
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+ ctxt.pt_base = (vm_paddr_t)IdlePTD;
+
+ boot_error = HYPERVISOR_boot_vcpu(bootAP, &ctxt);
+
+
+ if (boot_error)
+ printk("Houston we have a problem\n");
+ else
+ printk("boot_vcpu succeeded\n");
+#if 0
/*
* first we do an INIT/RESET IPI this INIT IPI might be run, reseting
* and running the target CPU. OR this INIT IPI might be latched (P5
@@ -862,6 +1033,7 @@ start_ap(int apic_id)
APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
vector, apic_id);
lapic_ipi_wait(-1);
+#endif
DELAY(200); /* wait ~200uS */
/* Wait up to 5 seconds for it to start. */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
index ee61e80ed9..6529602eb7 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/pmap.c
@@ -273,6 +273,7 @@ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
vm_offset_t va);
+static void pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst);
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
@@ -300,6 +301,32 @@ static void pmap_mark_unprivileged(unsigned long pa);
static void pmap_dec_ref_page(vm_page_t m);
int pmap_pid_dump(int pid);
#endif
+
+void
+pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type)
+{
+ vm_paddr_t shadow_pdir_ma = pmap->pm_pdir[PTDPTDI] & ~0xFFF;
+ vm_paddr_t shadow_offset = (vm_paddr_t)(ptr - pmap->pm_pdir)*sizeof(vm_paddr_t);
+
+ switch (type) {
+ case SH_PD_SET_VA:
+ xen_queue_pt_update(shadow_pdir_ma + shadow_offset,
+ xpmap_ptom(val & ~(PG_RW|PG_M)));
+ xen_queue_pt_update(vtomach(ptr),
+ xpmap_ptom(val));
+ break;
+ case SH_PD_SET_VA_MA:
+ xen_queue_pt_update(shadow_pdir_ma + shadow_offset,
+ val & ~(PG_RW|PG_M));
+ xen_queue_pt_update(vtomach(ptr), val);
+ break;
+ case SH_PD_SET_VA_CLEAR:
+ xen_queue_pt_update(shadow_pdir_ma + shadow_offset, 0);
+ xen_queue_pt_update(vtomach(ptr), 0);
+ break;
+ }
+}
+
/*
* Move the kernel virtual free pointer to the next
* 4MB. This is used to help improve performance
@@ -335,7 +362,6 @@ pmap_bootstrap(firstaddr, loadaddr)
{
vm_offset_t va;
pt_entry_t *pte, *unused;
- int i;
/*
* XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
@@ -381,9 +407,10 @@ pmap_bootstrap(firstaddr, loadaddr)
* CMAP1/CMAP2 are used for zeroing and copying pages.
* CMAP3 is used for the idle process page zeroing.
*/
- SYSMAP(caddr_t, CMAP1, CADDR1, 1)
- SYSMAP(caddr_t, CMAP2, CADDR2, 1)
- SYSMAP(caddr_t, CMAP3, CADDR3, 1)
+ SYSMAP(caddr_t, CMAP1, CADDR1, 1);
+ SYSMAP(caddr_t, CMAP2, CADDR2, 1);
+ SYSMAP(caddr_t, CMAP3, CADDR3, 1);
+
PT_CLEAR_VA(CMAP3, TRUE);
mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
@@ -415,8 +442,6 @@ pmap_bootstrap(firstaddr, loadaddr)
PT_CLEAR_VA(CMAP1, FALSE);
PT_CLEAR_VA(CMAP2, FALSE);
- for (i = 0; i < NKPT; i++)
- PT_CLEAR_VA(&PTD[i], FALSE);
PT_UPDATES_FLUSH();
#ifdef XEN_UNNEEDED
/* Turn on PG_G on kernel page(s) */
@@ -642,6 +667,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
void
@@ -681,6 +707,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
void
@@ -716,6 +743,7 @@ pmap_invalidate_all(pmap_t pmap)
mtx_unlock_spin(&smp_rv_mtx);
else
critical_exit();
+ PT_UPDATES_FLUSH();
}
#else /* !SMP */
/*
@@ -763,7 +791,7 @@ pmap_invalidate_all(pmap_t pmap)
static __inline int
pmap_is_current(pmap_t pmap)
{
-
+ /* XXX validate */
return (pmap == kernel_pmap ||
(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
(pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
@@ -790,7 +818,7 @@ pmap_pte(pmap_t pmap, vm_offset_t va)
newpf = PT_GET(pde) & PG_FRAME;
tmppf = PT_GET(PMAP2) & PG_FRAME;
if (tmppf != newpf) {
- PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE);
+ PT_SET_VA(PMAP2, newpf | PG_V | PG_A, FALSE);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
}
return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
@@ -952,7 +980,23 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
- PT_SET(va, pa | PG_RW | PG_V | pgeflag, TRUE);
+ pt_entry_t *pte;
+
+ pte = vtopte(va);
+ pte_store(pte, pa | PG_RW | PG_V | pgeflag);
+}
+
+/*
+ * Add a wired page to the kva.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
+{
+ pt_entry_t *pte;
+
+ pte = vtopte(va);
+ PT_SET_VA_MA(pte, ma | PG_RW | PG_V | pgeflag, TRUE);
}
/*
@@ -962,7 +1006,10 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa)
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
{
- PT_CLEAR(va, TRUE);
+ pt_entry_t *pte;
+
+ pte = vtopte(va);
+ pte_clear(pte);
}
/*
@@ -981,12 +1028,10 @@ vm_offset_t
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
{
vm_offset_t va, sva;
- pt_entry_t *pte;
va = sva = *virt;
while (start < end) {
- pte = vtopte(va);
- PT_SET_VA(pte, start | PG_RW | PG_V | pgeflag, FALSE);
+ pmap_kenter(va, start);
va += PAGE_SIZE;
start += PAGE_SIZE;
}
@@ -1013,8 +1058,7 @@ pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
va = sva;
while (count-- > 0) {
- PT_SET(va, VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag,
- FALSE);
+ pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
va += PAGE_SIZE;
m++;
}
@@ -1034,7 +1078,7 @@ pmap_qremove(vm_offset_t sva, int count)
va = sva;
while (count-- > 0) {
- PT_CLEAR(va, FALSE);
+ pmap_kremove(va);
va += PAGE_SIZE;
}
/* invalidate will flush the update queue */
@@ -1067,8 +1111,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
/*
* unmap the page table page
*/
- xpq_queue_unpin_table(pmap->pm_pdir[m->pindex]);
- PT_CLEAR_VA(&pmap->pm_pdir[m->pindex], TRUE);
+ xen_pt_unpin(pmap->pm_pdir[m->pindex]);
+ PD_CLEAR_VA(pmap, &pmap->pm_pdir[m->pindex], TRUE);
--pmap->pm_stats.resident_count;
/*
@@ -1126,8 +1170,8 @@ pmap_pinit0(pmap)
void
pmap_pinit(struct pmap *pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
- vm_paddr_t ma;
+ vm_page_t m, ptdpg[NPGPTD*2];
+ vm_paddr_t ma, ma_shadow;
static int color;
int i;
@@ -1153,7 +1197,7 @@ pmap_pinit(struct pmap *pmap)
/*
* allocate the page directory page(s)
*/
- for (i = 0; i < NPGPTD;) {
+ for (i = 0; i < NPGPTD*2;) {
m = vm_page_alloc(NULL, color++,
VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
@@ -1164,38 +1208,51 @@ pmap_pinit(struct pmap *pmap)
ptdpg[i++] = m;
}
}
+#ifdef PAE
+ #error "missing shadow handling for PAE"
+#endif
pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
- for (i = 0; i < NPGPTD; i++) {
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
- }
-
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
/* Wire in kernel global address entries. */
/* XXX copies current process, does not fill in MPPTDI */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+ bcopy(kernel_pmap->pm_pdir + KPTDI, pmap->pm_pdir + KPTDI,
+ nkpt * sizeof(pd_entry_t));
+ /* XXX need to copy global address entries to page directory's L1 shadow */
+ ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD]));
+ /* L1 pin shadow page director{y,ies} */
+ for (i = 0; i < NPGPTD; i++) {
+ ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD + i]));
+ pmap_copy_ma(kernel_pmap->pm_pdir[PTDPTDI + i] & ~(PG_RW|PG_M), ma);
+ xen_pt_pin(ma);
+ }
+
#ifdef SMP
- pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
+ pmap->pm_pdir[MPPTDI] = kernel_pmap->pm_pdir[MPPTDI];
#endif
- /* install self-referential address mapping entry(s) */
+ /* pin and install L1 shadow */
for (i = 0; i < NPGPTD; i++) {
ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
- pmap->pm_pdir[PTDPTDI + i] = ma | PG_V | PG_A;
+ ma_shadow = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD+i]));
+ /* re-map page directory read-only and pin */
+ PT_SET_MA(pmap->pm_pdir + i*PAGE_SIZE, ma | PG_V | PG_A);
+ xen_pgd_pin(ma);
+ /* add L1 shadow of L2 */
+ xen_queue_pt_update(vtomach(&pmap->pm_pdir[PTDPTDI + i]),
+ ma_shadow | PG_V | PG_A);
+ xen_queue_pt_update(ma_shadow + PTDPTDI*sizeof(vm_paddr_t),
+ vtomach(pmap->pm_pdir) | PG_V | PG_A);
+
#ifdef PAE
+ #error "unsupported currently"
pmap->pm_pdpt[i] = ma | PG_V;
#endif
-#ifndef PAE
- PT_SET_MA(pmap->pm_pdir, ma | PG_V | PG_A, TRUE);
-#else
- panic("FIX ME!");
-#endif
- xpq_queue_pin_table(ma, XPQ_PIN_L2_TABLE);
}
+ xen_flush_queue();
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
@@ -1246,9 +1303,9 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
pmap->pm_stats.resident_count++;
ptepa = VM_PAGE_TO_PHYS(m);
- xpq_queue_pin_table(xpmap_ptom(ptepa), XPQ_PIN_L1_TABLE);
- PT_SET_VA(&pmap->pm_pdir[ptepindex],
- (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
+ xen_pt_pin(xpmap_ptom(ptepa));
+ PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex],
+ (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
return m;
}
@@ -1317,7 +1374,6 @@ static u_int *lazymask;
static u_int lazyptd;
static volatile u_int lazywait;
-void pmap_lazyfix_action(void);
void
pmap_lazyfix_action(void)
@@ -1405,7 +1461,7 @@ pmap_lazyfix(pmap_t pmap)
void
pmap_release(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
+ vm_page_t m, ptdpg[NPGPTD + 1];
vm_paddr_t ma;
int i;
@@ -1418,27 +1474,28 @@ pmap_release(pmap_t pmap)
LIST_REMOVE(pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
- for (i = 0; i < NPGPTD; i++)
+ for (i = 0; i < NPGPTD; i++) {
ptdpg[i] = PHYS_TO_VM_PAGE(PT_GET(&pmap->pm_pdir[PTDPTDI + i]));
-
- for (i = 0; i < nkpt + NPGPTD; i++)
- PT_CLEAR_VA(&pmap->pm_pdir[PTDPTDI + i], FALSE);
+ }
+ ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir));
+ for (i = 0; i < nkpt + NPGPTD; i++)
+ PD_CLEAR_VA(pmap, &pmap->pm_pdir[PTDPTDI + i], FALSE);
bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
sizeof(*pmap->pm_pdir));
#ifdef SMP
- PT_CLEAR_VA(&pmap->pm_pdir[MPPTDI], FALSE);
+ PD_CLEAR_VA(pmap, &pmap->pm_pdir[MPPTDI], FALSE);
#endif
PT_UPDATES_FLUSH();
pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
vm_page_lock_queues();
- for (i = 0; i < NPGPTD; i++) {
+ for (i = 0; i < NPGPTD + 1; i++) {
m = ptdpg[i];
ma = xpmap_ptom(VM_PAGE_TO_PHYS(m));
- xpq_queue_unpin_table(ma);
- pmap_zero_page(m);
+ /* unpinning L1 and L2 treated the same */
+ xen_pgd_unpin(ma);
#ifdef PAE
KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
("pmap_release: got wrong ptd page"));
@@ -1513,12 +1570,12 @@ pmap_growkernel(vm_offset_t addr)
pmap_zero_page(nkpg);
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- PT_SET_VA(&pdir_pde(PTD, kernel_vm_end), newpdir, TRUE);
+ PD_SET_VA(kernel_pmap, &pdir_pde(kernel_pmap->pm_pdir, kernel_vm_end), newpdir, TRUE);
mtx_lock_spin(&allpmaps_lock);
LIST_FOREACH(pmap, &allpmaps, pm_list) {
pde = pmap_pde(pmap, kernel_vm_end);
- PT_SET_VA(pde, newpdir, FALSE);
+ PD_SET_VA(pmap, pde, newpdir, FALSE);
}
PT_UPDATES_FLUSH();
mtx_unlock_spin(&allpmaps_lock);
@@ -1735,7 +1792,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- PT_CLEAR_VA(pmap->pm_pdir[pdirindex], TRUE);
+ PD_CLEAR_VA(pmap, &pmap->pm_pdir[pdirindex], TRUE);
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
anyvalid = 1;
continue;
@@ -2219,9 +2276,9 @@ retry:
* Now validate mapping with RO protection
*/
if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
- PT_SET(va, pa | PG_V | PG_U, TRUE);
+ pte_store(pte, pa | PG_V | PG_U);
else
- PT_SET(va, pa | PG_V | PG_U | PG_MANAGED, TRUE);
+ pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
out:
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
@@ -2309,7 +2366,7 @@ retry:
pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
npdes = size >> PDRSHIFT;
for(i = 0; i < npdes; i++) {
- PT_SET_VA(&pmap->pm_pdir[ptepindex],
+ PD_SET_VA(pmap, &pmap->pm_pdir[ptepindex],
ptepa | PG_U | PG_RW | PG_V | PG_PS, FALSE);
ptepa += NBPDR;
ptepindex += 1;
@@ -2327,7 +2384,7 @@ pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
for (i = 0; i < npages; i++) {
pt_entry_t *pte;
pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
- PT_SET_MA(va + i*PAGE_SIZE, *pte & ~(PG_RW|PG_M), FALSE);
+ pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
PMAP_MARK_PRIV(xpmap_mtop(*pte));
pmap_pte_release(pte);
}
@@ -2342,7 +2399,7 @@ pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
pt_entry_t *pte;
pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
- PT_SET_MA(va + i*PAGE_SIZE, *pte | (PG_RW|PG_M), FALSE);
+ pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
pmap_pte_release(pte);
}
PT_UPDATES_FLUSH();
@@ -2441,7 +2498,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (srcptepaddr & PG_PS) {
if (dst_pmap->pm_pdir[ptepindex] == 0) {
- PT_SET_VA(&dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE);
+ PD_SET_VA(dst_pmap, &dst_pmap->pm_pdir[ptepindex], srcptepaddr, TRUE);
dst_pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
}
@@ -2612,6 +2669,32 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
mtx_unlock(&CMAPCADDR12_lock);
}
+void
+pmap_copy_ma(vm_paddr_t src, vm_paddr_t dst)
+{
+
+ mtx_lock(&CMAPCADDR12_lock);
+ if (*CMAP1)
+ panic("pmap_copy_ma: CMAP1 busy");
+ if (*CMAP2)
+ panic("pmap_copy_ma: CMAP2 busy");
+ sched_pin();
+#ifdef I386_CPU
+ invltlb();
+#else
+ invlpg((u_int)CADDR1);
+ invlpg((u_int)CADDR2);
+#endif
+ PT_SET_VA_MA(CMAP1, PG_V | src | PG_A, FALSE);
+ PT_SET_VA_MA(CMAP2, PG_V | PG_RW | dst | PG_A | PG_M, TRUE);
+
+ bcopy(CADDR1, CADDR2, PAGE_SIZE);
+ PT_CLEAR_VA(CMAP1, FALSE);
+ PT_CLEAR_VA(CMAP2, TRUE);
+ sched_unpin();
+ mtx_unlock(&CMAPCADDR12_lock);
+}
+
/*
* Returns true if the pmap's pv is one of the first
* 16 pvs linked to from this page. This count may
@@ -2790,7 +2873,11 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
boolean_t rv;
rv = FALSE;
-
+ /* XXX
+ * in order for writable pagetables to help,
+ * this has to work - check if we aren't doing
+ * an invlpg on the page tables linear mappings
+ */
return (rv);
PMAP_LOCK(pmap);
if (pmap_pde(pmap, addr)) {
@@ -3007,7 +3094,7 @@ pmap_mapdev(pa, size)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
for (tmpva = va; size > 0; ) {
- PT_SET(tmpva, pa | PG_RW | PG_V | pgeflag, FALSE);
+ pmap_kenter(tmpva, pa);
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
@@ -3029,7 +3116,7 @@ pmap_unmapdev(va, size)
offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE);
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
- PT_CLEAR(tmpva, FALSE);
+ pmap_kremove(tmpva);
pmap_invalidate_range(kernel_pmap, va, tmpva);
kmem_free(kernel_map, base, size);
}
@@ -3379,3 +3466,9 @@ pmap_pvdump(pa)
printf(" ");
}
#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
index a74986ed18..662f94c456 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/trap.c
@@ -181,11 +181,6 @@ trap(struct trapframe frame)
u_int sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
-#ifdef STACK_DEBUGGING
- int nesting, current_sp;
- static int prev_csp = 0, prev_ssp = 0;
- nesting = PCPU_GET(trap_nesting);
-#endif
#ifdef POWERFAIL_NMI
static int lastalert = 0;
@@ -227,7 +222,7 @@ trap(struct trapframe frame)
* kernel can print out a useful trap message and even get
* to the debugger.
*/
- eva = frame.tf_cr2;
+ eva = PCPU_GET(cr2);
if (td->td_critnest != 0)
trap_fatal(&frame, eva);
@@ -613,9 +608,6 @@ user:
mtx_assert(&Giant, MA_NOTOWNED);
userout:
out:
-#ifdef STACK_DEBUGGING
- PCPU_SET(trap_nesting, nesting);
-#endif
return;
}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
index cff67833f7..7f04666723 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/vm_machdep.c
@@ -94,12 +94,13 @@ __FBSDID("$FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.219 2003/11/17 18:22:24 a
#endif
#include <machine/xenfunc.h>
-
+#if 0
#ifdef SMP
static void cpu_reset_proxy(void);
static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+#endif
static void sf_buf_init(void *arg);
SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
@@ -462,6 +463,7 @@ kvtop(void *addr)
* Force reset the processor by invalidating the entire address space!
*/
+#if 0
#ifdef SMP
static void
cpu_reset_proxy()
@@ -473,10 +475,10 @@ cpu_reset_proxy()
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
- cpu_reset_real();
+ cpu_reset();
}
#endif
-
+#endif
void
cpu_reset()
{
diff --git a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
index dd24a206b1..2d7f184ac7 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
@@ -1,8 +1,7 @@
-/* $NetBSD:$ */
-
/*
*
* Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -381,245 +380,164 @@ printk(const char *fmt, ...)
(void)HYPERVISOR_console_write(buf, ret);
}
-#define XPQUEUE_SIZE 2048
-
-typedef struct xpq_queue {
- uint32_t ptr;
- uint32_t val;
-} xpq_queue_t;
-#define MCLQUEUE_SIZE 512
-static multicall_entry_t mcl_queue[MCLQUEUE_SIZE];
-static int mcl_idx = 0;
+#define XPQUEUE_SIZE 128
+#ifdef SMP
+/* per-cpu queues and indices */
+static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
+static int xpq_idx[MAX_VIRT_CPUS];
-static xpq_queue_t xpq_queue[XPQUEUE_SIZE];
-static boolean_t xpq_initialized;
-static struct mtx update_lock;
+#define XPQ_QUEUE xpq_queue[vcpu]
+#define XPQ_IDX xpq_idx[vcpu]
+#define SET_VCPU() int vcpu = smp_processor_id()
+#else
+static mmu_update_t xpq_queue[XPQUEUE_SIZE];
static int xpq_idx = 0;
-/*
- * Don't attempt to lock until after lock & memory initialization
- */
-#define XPQ_LOCK(lock, flags) \
- if (likely(xpq_initialized)) \
- mtx_lock_irqsave(lock, flags)
-#define XPQ_UNLOCK(lock, flags) \
- if (likely(xpq_initialized)) \
- mtx_unlock_irqrestore(lock, flags)
+#define XPQ_QUEUE xpq_queue
+#define XPQ_IDX xpq_idx
+#define SET_VCPU()
+#endif
+#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-void
-xpq_init(void)
-{
- xpq_initialized = TRUE;
- mtx_init(&update_lock, "mmu", "MMU LOCK", MTX_SPIN);
-}
static __inline void
-_xpq_flush_queue(void)
+_xen_flush_queue(void)
{
- int _xpq_idx = xpq_idx;
- int error, i;
-
- xpq_idx = 0;
- /* Make sure index is cleared first to avoid double updates. */
- error = HYPERVISOR_mmu_update((mmu_update_t *)xpq_queue, _xpq_idx,
- NULL);
-
- if (__predict_false(error < 0)) {
- for (i = 0; i < _xpq_idx; i++)
- printk("val: %x ptr: %p\n", xpq_queue[i].val, xpq_queue[i].ptr);
- panic("Failed to execute MMU updates: %d", error);
- }
+ SET_VCPU();
+ int _xpq_idx = XPQ_IDX;
+ int error, i;
+ /* window of vulnerability here? */
-}
-static void
-xpq_flush_queue(void)
-{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- if (xpq_idx != 0) _xpq_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
-}
+ XPQ_IDX = 0;
+ /* Make sure index is cleared first to avoid double updates. */
+ error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
+ _xpq_idx, NULL, DOMID_SELF);
+
+ if (__predict_false(error < 0)) {
+ for (i = 0; i < _xpq_idx; i++)
+ printk("val: %x ptr: %p\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
+ panic("Failed to execute MMU updates: %d", error);
+ }
-static __inline void
-_mcl_flush_queue(void)
-{
- int _mcl_idx = mcl_idx;
- mcl_idx = 0;
- (void)HYPERVISOR_multicall(mcl_queue, _mcl_idx);
}
void
-mcl_flush_queue(void)
+xen_flush_queue(void)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- if (__predict_true(mcl_idx != 0)) _mcl_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
- /* XXX: until we can remove the pervasive
- * __HYPERVISOR_update_va_mapping calls, we have 2 queues. In order
- * to ensure that they never get out of sync, only 1 flush interface
- * is provided.
- */
- xpq_flush_queue();
+ SET_VCPU();
+ if (XPQ_IDX != 0) _xen_flush_queue();
}
-
static __inline void
-xpq_increment_idx(void)
+xen_increment_idx(void)
{
- xpq_idx++;
- if (__predict_false(xpq_idx == XPQUEUE_SIZE))
- xpq_flush_queue();
-}
+ SET_VCPU();
-static __inline void
-mcl_increment_idx(void)
-{
- mcl_idx++;
- if (__predict_false(mcl_idx == MCLQUEUE_SIZE))
- mcl_flush_queue();
+ XPQ_IDX++;
+ if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
+ xen_flush_queue();
}
void
-xpq_queue_invlpg(vm_offset_t va)
+xen_invlpg(vm_offset_t va)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_INVLPG_LOCAL;
+ op.linear_addr = va & ~PAGE_MASK;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
void
load_cr3(uint32_t val)
{
- xpq_queue_pt_switch(val);
- xpq_flush_queue();
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_BASEPTR;
+ op.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void
-xen_set_ldt(vm_offset_t base, uint32_t entries)
-{
- xpq_queue_set_ldt(base, entries);
- _xpq_flush_queue();
-}
void
xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- xpq_queue[xpq_idx].val = pfn;
- xpq_increment_idx();
- _xpq_flush_queue();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ XPQ_QUEUE[XPQ_IDX].val = pfn;
+ xen_increment_idx();
+ _xen_flush_queue();
}
void
-xpq_queue_pt_update(pt_entry_t *ptr, pt_entry_t val)
+xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = (uint32_t)ptr;
- xpq_queue[xpq_idx].val = val;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ SET_VCPU();
+
+ XPQ_QUEUE[XPQ_IDX].ptr = (memory_t)ptr;
+ XPQ_QUEUE[XPQ_IDX].val = (memory_t)val;
+ xen_increment_idx();
}
void
-mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma)
+xen_pgd_pin(unsigned long ma)
{
-#if 0
- printf("setting va %x to ma %x\n", va, ma);
-#endif
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- mcl_queue[mcl_idx].op = __HYPERVISOR_update_va_mapping;
- mcl_queue[mcl_idx].args[0] = (unsigned long)(va >> PAGE_SHIFT);
- mcl_queue[mcl_idx].args[1] = (unsigned long)ma;
- mcl_queue[mcl_idx].args[2] = 0;
- mcl_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L2_TABLE;
+ op.mfn = ma >> PAGE_SHIFT;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-
-
-void
-xpq_queue_pt_switch(uint32_t val)
+void
+xen_pgd_unpin(unsigned long ma)
{
- unsigned long flags = 0;
- vm_paddr_t ma = xpmap_ptom(val) & PG_FRAME;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = ma | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = ma >> PAGE_SHIFT;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-
-void
-xpq_queue_pin_table(uint32_t pa, int type)
+void
+xen_pt_pin(unsigned long ma)
{
- unsigned long flags = 0;
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
- switch (type) {
- case XPQ_PIN_L1_TABLE:
- xpq_queue[xpq_idx].val = MMUEXT_PIN_L1_TABLE;
- break;
- case XPQ_PIN_L2_TABLE:
- xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
- break;
- }
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L1_TABLE;
+ op.mfn = ma >> PAGE_SHIFT;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void
-xpq_queue_unpin_table(uint32_t pa)
+void
+xen_pt_unpin(unsigned long ma)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = ma >> PAGE_SHIFT;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void
-xpq_queue_set_ldt(vm_offset_t va, uint32_t entries)
+void
+xen_set_ldt(unsigned long ptr, unsigned long len)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
- KASSERT(va == (va & PG_FRAME), ("ldt not page aligned"));
- xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
- xpq_queue[xpq_idx].val = MMUEXT_SET_LDT |
- (entries << MMUEXT_CMD_SHIFT);
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_SET_LDT;
+ op.linear_addr = ptr;
+ op.nr_ents = len;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void
-xpq_queue_tlb_flush()
+void xen_tlb_flush(void)
{
- unsigned long flags = 0;
-
- XPQ_LOCK(&update_lock, flags);
-
- xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
- xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
- xpq_increment_idx();
- XPQ_UNLOCK(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+ xen_flush_queue();
+ PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
index 3e962e3014..22e960652d 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/evtchn.h
@@ -9,11 +9,28 @@
#ifndef __ASM_EVTCHN_H__
#define __ASM_EVTCHN_H__
-
+#include <machine/pcpu.h>
#include <machine/hypervisor.h>
#include <machine/synch_bitops.h>
#include <machine/hypervisor-ifs.h>
+#ifdef SMP
+#include <sys/param.h> /* XXX for time.h */
+#include <sys/time.h> /* XXX for pcpu.h */
+#include <sys/pcpu.h> /* XXX for PCPU_GET */
+extern int gdt_set;
+static inline int
+smp_processor_id(void)
+{
+ if (likely(gdt_set))
+ return PCPU_GET(cpuid);
+ return 0;
+}
+
+#else
+#define smp_processor_id() 0
+#endif
+
/*
* LOW-LEVEL DEFINITIONS
*/
@@ -38,6 +55,7 @@ static inline void
unmask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
synch_clear_bit(port, &s->evtchn_mask[0]);
@@ -46,7 +64,7 @@ unmask_evtchn(int port)
* a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
*/
if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
- !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
{
s->vcpu_data[0].evtchn_upcall_pending = 1;
if ( !s->vcpu_data[0].evtchn_upcall_mask )
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/frame.h b/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
deleted file mode 100644
index a6572d85a9..0000000000
--- a/freebsd-5.3-xen-sparse/i386-xen/include/frame.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)frame.h 5.2 (Berkeley) 1/18/91
- * $FreeBSD: src/sys/i386/include/frame.h,v 1.23 2003/07/22 08:11:15 peter Exp $
- */
-
-#ifndef _MACHINE_FRAME_H_
-#define _MACHINE_FRAME_H_ 1
-
-/*
- * System stack frames.
- */
-
-/*
- * Exception/Trap Stack Frame
- */
-
-struct trapframe {
- int tf_fs;
- int tf_es;
- int tf_ds;
- int tf_edi;
- int tf_esi;
- int tf_ebp;
- int tf_isp;
- int tf_ebx;
- int tf_edx;
- int tf_ecx;
- int tf_eax;
- int tf_trapno;
- int tf_cr2;
- /* below portion defined in 386 hardware */
- int tf_err;
- int tf_eip;
- int tf_cs;
- int tf_eflags;
- /* below only when crossing rings (e.g. user to kernel) */
- int tf_esp;
- int tf_ss;
-};
-
-/* Interrupt stack frame */
-
-struct intrframe {
- int if_fs;
- int if_es;
- int if_ds;
- int if_edi;
- int if_esi;
- int if_ebp;
- int :32;
- int if_ebx;
- int if_edx;
- int if_ecx;
- int if_eax;
- int :32; /* for compat with trap frame - trapno */
- int if_vec; /* cr2 in trap frame */
- int :32; /* for compat with trap frame - err */
- /* below portion defined in 386 hardware */
- int if_eip;
- int if_cs;
- int if_eflags;
- /* below only when crossing rings (e.g. user to kernel) */
- int if_esp;
- int if_ss;
-};
-
-/* frame of clock (same as interrupt frame) */
-
-struct clockframe {
- int cf_fs;
- int cf_es;
- int cf_ds;
- int cf_edi;
- int cf_esi;
- int cf_ebp;
- int :32;
- int cf_ebx;
- int cf_edx;
- int cf_ecx;
- int cf_eax;
- int :32; /* for compat with trap frame - trapno */
- int cf_vec; /* cr2 in trap frame */
- int :32; /* for compat with trap frame - err */
- /* below portion defined in 386 hardware */
- int cf_eip;
- int cf_cs;
- int cf_eflags;
- /* below only when crossing rings (e.g. user to kernel) */
- int cf_esp;
- int cf_ss;
-};
-
-#define INTR_TO_TRAPFRAME(frame) ((struct trapframe *)&(frame)->if_fs)
-
-#endif /* _MACHINE_FRAME_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h b/freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h
new file mode 100644
index 0000000000..76733a0400
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/gnttab.h
@@ -0,0 +1,71 @@
+/******************************************************************************
+ * gnttab.h
+ *
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ *
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2005, Christopher Clark
+ */
+
+#ifndef __ASM_GNTTAB_H__
+#define __ASM_GNTTAB_H__
+
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+
+/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
+#define NR_GRANT_FRAMES 4
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+
+int
+gnttab_grant_foreign_access(
+ domid_t domid, unsigned long frame, int readonly);
+
+void
+gnttab_end_foreign_access(
+ grant_ref_t ref, int readonly);
+
+int
+gnttab_grant_foreign_transfer(
+ domid_t domid, unsigned long pfn);
+
+unsigned long
+gnttab_end_foreign_transfer(
+ grant_ref_t ref);
+
+int
+gnttab_query_foreign_access(
+ grant_ref_t ref );
+
+/*
+ * operations on reserved batches of grant references
+ */
+int
+gnttab_alloc_grant_references(
+ uint16_t count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+
+void
+gnttab_free_grant_references(
+ uint16_t count, grant_ref_t private_head );
+
+int
+gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
+);
+
+void
+gnttab_release_grant_reference(
+ grant_ref_t *private_head, grant_ref_t release );
+
+void
+gnttab_grant_foreign_access_ref(
+ grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+
+void
+gnttab_grant_foreign_transfer_ref(
+ grant_ref_t, domid_t domid, unsigned long pfn);
+
+
+#endif /* __ASM_GNTTAB_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
index 4f75d27a9a..a77ee4a6be 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor-ifs.h
@@ -11,6 +11,7 @@
#define u32 uint32_t
#define u64 uint64_t
+#define CONFIG_XEN_BLKDEV_GRANT
#include <machine/xen-public/xen.h>
#include <machine/xen-public/io/domain_controller.h>
#include <machine/xen-public/io/netif.h>
@@ -19,6 +20,7 @@
#include <machine/xen-public/event_channel.h>
#include <machine/xen-public/sched_ctl.h>
#include <machine/xen-public/physdev.h>
+#include <machine/xen-public/grant_table.h>
#undef blkif_sector_t /* XXX pre-processor didn't do the */
#define blkif_sector_t uint64_t /* right thing */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
index 95ee85f352..0a290cef19 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/hypervisor.h
@@ -22,6 +22,7 @@ extern start_info_t *xen_start_info;
* be MACHINE addresses.
*/
+static inline void HYPERVISOR_crash(void) __dead2;
void MULTICALL_flush_page_update_queue(void);
@@ -38,7 +39,8 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t;
* Assembler stubs for hyper-calls.
*/
-static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+static inline int
+HYPERVISOR_set_trap_table(trap_info_t *table)
{
int ret;
__asm__ __volatile__ (
@@ -49,32 +51,57 @@ static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
return ret;
}
-static inline int HYPERVISOR_mmu_update(mmu_update_t *req,
- int count,
- int *success_count)
+static inline int
+HYPERVISOR_mmu_update(mmu_update_t *req, int count,
+ int *success_count, domid_t domid)
{
int ret;
+ unsigned long ign1, ign2, ign3, ign4;
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_mmu_update),
- "b" (req), "c" (count), "d" (success_count) : "memory" );
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
return ret;
}
-static inline int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+static inline int
+HYPERVISOR_mmuext_op(
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
{
int ret;
+ unsigned long ign1, ign2, ign3, ign4;
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_gdt),
- "b" (frame_list), "c" (entries) : "memory" );
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
+
+ return ret;
+}
+
+
+static inline int
+HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
+ : "memory" );
return ret;
}
-static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
+static inline int
+HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
{
int ret;
__asm__ __volatile__ (
@@ -85,7 +112,8 @@ static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
return ret;
}
-static inline int HYPERVISOR_set_callbacks(
+static inline int
+HYPERVISOR_set_callbacks(
unsigned long event_selector, unsigned long event_address,
unsigned long failsafe_selector, unsigned long failsafe_address)
{
@@ -99,7 +127,8 @@ static inline int HYPERVISOR_set_callbacks(
return ret;
}
-static inline int HYPERVISOR_fpu_taskswitch(void)
+static inline int
+HYPERVISOR_fpu_taskswitch(void)
{
int ret;
__asm__ __volatile__ (
@@ -109,7 +138,8 @@ static inline int HYPERVISOR_fpu_taskswitch(void)
return ret;
}
-static inline int HYPERVISOR_yield(void)
+static inline int
+HYPERVISOR_yield(void)
{
int ret;
__asm__ __volatile__ (
@@ -120,7 +150,8 @@ static inline int HYPERVISOR_yield(void)
return ret;
}
-static inline int HYPERVISOR_block(void)
+static inline int
+HYPERVISOR_block(void)
{
int ret;
__asm__ __volatile__ (
@@ -131,7 +162,8 @@ static inline int HYPERVISOR_block(void)
return ret;
}
-static inline int HYPERVISOR_shutdown(void)
+static inline int
+HYPERVISOR_shutdown(void)
{
int ret;
__asm__ __volatile__ (
@@ -143,7 +175,8 @@ static inline int HYPERVISOR_shutdown(void)
return ret;
}
-static inline int HYPERVISOR_reboot(void)
+static inline int
+HYPERVISOR_reboot(void)
{
int ret;
__asm__ __volatile__ (
@@ -155,7 +188,8 @@ static inline int HYPERVISOR_reboot(void)
return ret;
}
-static inline int HYPERVISOR_suspend(unsigned long srec)
+static inline int
+HYPERVISOR_suspend(unsigned long srec)
{
int ret;
/* NB. On suspend, control software expects a suspend record in %esi. */
@@ -168,7 +202,25 @@ static inline int HYPERVISOR_suspend(unsigned long srec)
return ret;
}
-static inline long HYPERVISOR_set_timer_op(uint64_t timeout)
+
+static inline void
+HYPERVISOR_crash(void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
+ : "memory" );
+
+ for (;;) ; /* eliminate noreturn error */
+
+}
+
+static inline long
+HYPERVISOR_set_timer_op(uint64_t timeout)
{
int ret;
unsigned long timeout_hi = (unsigned long)(timeout>>32);
@@ -176,12 +228,13 @@ static inline long HYPERVISOR_set_timer_op(uint64_t timeout)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_set_timer_op),
- "b" (timeout_hi), "c" (timeout_lo) : "memory" );
+ "b" (timeout_lo), "c" (timeout_hi) : "memory" );
return ret;
}
-static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
+static inline int
+HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
{
int ret;
dom0_op->interface_version = DOM0_INTERFACE_VERSION;
@@ -193,7 +246,8 @@ static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
return ret;
}
-static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
+static inline int
+HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
int ret;
__asm__ __volatile__ (
@@ -204,7 +258,8 @@ static inline int HYPERVISOR_set_debugreg(int reg, unsigned long value)
return ret;
}
-static inline unsigned long HYPERVISOR_get_debugreg(int reg)
+static inline unsigned long
+HYPERVISOR_get_debugreg(int reg)
{
unsigned long ret;
__asm__ __volatile__ (
@@ -215,7 +270,8 @@ static inline unsigned long HYPERVISOR_get_debugreg(int reg)
return ret;
}
-static inline int HYPERVISOR_update_descriptor(
+static inline int
+HYPERVISOR_update_descriptor(
unsigned long pa, unsigned long word1, unsigned long word2)
{
int ret;
@@ -227,20 +283,10 @@ static inline int HYPERVISOR_update_descriptor(
return ret;
}
-static inline int HYPERVISOR_set_fast_trap(int idx)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_fast_trap),
- "b" (idx) : "memory" );
-
- return ret;
-}
-
-static inline int HYPERVISOR_dom_mem_op(unsigned int op,
- unsigned long *pages,
- unsigned long nr_pages)
+static inline int
+HYPERVISOR_dom_mem_op(unsigned int op,
+ unsigned long *pages,
+ unsigned long nr_pages)
{
int ret;
__asm__ __volatile__ (
@@ -250,7 +296,8 @@ static inline int HYPERVISOR_dom_mem_op(unsigned int op,
return ret;
}
-static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
+static inline int
+HYPERVISOR_multicall(void *call_list, int nr_calls)
{
int ret;
__asm__ __volatile__ (
@@ -261,14 +308,15 @@ static inline int HYPERVISOR_multicall(void *call_list, int nr_calls)
return ret;
}
-static inline int HYPERVISOR_update_va_mapping(
- unsigned long page_nr, pte_t new_val, unsigned long flags)
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long page_nr, unsigned long new_val, unsigned long flags)
{
int ret;
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping),
- "b" (page_nr), "c" ((new_val).pte_low), "d" (flags):
+ "b" (page_nr), "c" (new_val), "d" (flags):
"memory" );
/* XXX */
#if 0
@@ -279,7 +327,8 @@ static inline int HYPERVISOR_update_va_mapping(
return ret;
}
-static inline int HYPERVISOR_event_channel_op(void *op)
+static inline int
+HYPERVISOR_event_channel_op(void *op)
{
int ret;
__asm__ __volatile__ (
@@ -290,7 +339,8 @@ static inline int HYPERVISOR_event_channel_op(void *op)
return ret;
}
-static inline int HYPERVISOR_xen_version(int cmd)
+static inline int
+HYPERVISOR_xen_version(int cmd)
{
int ret;
__asm__ __volatile__ (
@@ -301,7 +351,8 @@ static inline int HYPERVISOR_xen_version(int cmd)
return ret;
}
-static inline int HYPERVISOR_console_io(int cmd, int count, char *str)
+static inline int
+HYPERVISOR_console_io(int cmd, int count, char *str)
{
int ret;
__asm__ __volatile__ (
@@ -312,36 +363,62 @@ static inline int HYPERVISOR_console_io(int cmd, int count, char *str)
return ret;
}
-static __inline int HYPERVISOR_console_write(char *str, int count)
+static inline int
+HYPERVISOR_console_write(char *str, int count)
{
return HYPERVISOR_console_io(CONSOLEIO_write, count, str);
}
-static inline int HYPERVISOR_physdev_op(void *physdev_op)
+static inline int
+HYPERVISOR_physdev_op(void *physdev_op)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
{
int ret;
+ unsigned long ign1, ign2, ign3;
+
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_physdev_op),
- "b" (physdev_op) : "memory" );
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
+ : "memory" );
return ret;
}
-static inline int HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
{
int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "b" (page_nr), "c" ((new_val).pte_low), "d" (flags), "S" (domid) :
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
+ "1" (va), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
"memory" );
-
+
return ret;
}
-static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
+static inline int
+HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
{
int ret;
__asm__ __volatile__ (
@@ -352,4 +429,20 @@ static inline int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
return ret;
}
+static inline int
+HYPERVISOR_boot_vcpu(
+ unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+ : "memory");
+
+ return ret;
+}
+
#endif /* __HYPERVISOR_H__ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
index 80a675cd4a..42941d5a91 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pcpu.h
@@ -50,9 +50,15 @@
int pc_currentldt; \
u_int pc_acpi_id; \
u_int pc_apic_id; \
- u_int pc_faultaddr; \
- u_int pc_trap_nesting; \
- u_int pc_pdir
+ int *pc_ipi_to_evtchn; \
+ int *pc_virq_to_irq; \
+ u_int pc_cr2; \
+ u_int pc_pdir; \
+ u_int pc_lazypmap; \
+ u_int pc_rendezvous; \
+ u_int pc_cpuast; \
+ u_int pc_time_irq; \
+ uint64_t pc_processed_system_time;
#if defined(lint)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
index 9e838b9bd4..d7f1923234 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/pmap.h
@@ -149,8 +149,8 @@
*/
#ifdef SMP
-#define MPPTDI (NPDEPTD-1) /* per cpu ptd entry */
-#define KPTDI (MPPTDI-NKPDE-XEN_PAGES /* start of kernel virtual pde's */
+#define MPPTDI (NPDEPTD-1-XEN_PAGES) /* per cpu ptd entry */
+#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */
#else
#define KPTDI (NPDEPTD-NKPDE-XEN_PAGES) /* start of kernel virtual pde's */
#endif /* SMP */
@@ -253,8 +253,8 @@ pte_load_store(pt_entry_t *ptep, pt_entry_t v)
return (r);
}
-#define pte_store(ptep, pte) PT_SET_VA_MA(ptep, pte, TRUE);
-#define pte_clear(pte) PT_CLEAR_VA(pte, TRUE);
+#define pte_store(ptep, pte) PT_SET_VA(ptep, pte, TRUE)
+#define pte_clear(pte) PT_CLEAR_VA(pte, TRUE)
#endif /* _KERNEL */
@@ -343,6 +343,7 @@ void pmap_set_pg(void);
void pmap_invalidate_page(pmap_t, vm_offset_t);
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
+void pmap_lazyfix_action(void);
void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len);
void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len);
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h b/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
deleted file mode 100644
index eda584b62e..0000000000
--- a/freebsd-5.3-xen-sparse/i386-xen/include/ucontext.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*-
- * Copyright (c) 1999 Marcel Moolenaar
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer
- * in this position and unchanged.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/i386/include/ucontext.h,v 1.10 2002/12/02 19:58:55 deischen Exp $
- */
-
-#ifndef _MACHINE_UCONTEXT_H_
-#define _MACHINE_UCONTEXT_H_
-
-typedef struct __mcontext {
- /*
- * The first 20 fields must match the definition of
- * sigcontext. So that we can support sigcontext
- * and ucontext_t at the same time.
- */
- int mc_onstack; /* XXX - sigcontext compat. */
- int mc_gs; /* machine state (struct trapframe) */
- int mc_fs;
- int mc_es;
- int mc_ds;
- int mc_edi;
- int mc_esi;
- int mc_ebp;
- int mc_isp;
- int mc_ebx;
- int mc_edx;
- int mc_ecx;
- int mc_eax;
- int mc_trapno;
- int mc_cr2;
- int mc_err;
- int mc_eip;
- int mc_cs;
- int mc_eflags;
- int mc_esp;
- int mc_ss;
-
- int mc_len; /* sizeof(mcontext_t) */
-#define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */
-#define _MC_FPFMT_387 0x10001
-#define _MC_FPFMT_XMM 0x10002
- int mc_fpformat;
-#define _MC_FPOWNED_NONE 0x20000 /* FP state not used */
-#define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */
-#define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */
- int mc_ownedfp;
- /*
- * See <machine/npx.h> for the internals of mc_fpstate[].
- */
- int mc_fpstate[128] __aligned(16);
- int mc_spare2[8];
-} mcontext_t;
-
-#if defined(_KERNEL) && defined(COMPAT_FREEBSD4)
-struct mcontext4 {
- int mc_onstack; /* XXX - sigcontext compat. */
- int mc_gs; /* machine state (struct trapframe) */
- int mc_fs;
- int mc_es;
- int mc_ds;
- int mc_edi;
- int mc_esi;
- int mc_ebp;
- int mc_isp;
- int mc_ebx;
- int mc_edx;
- int mc_ecx;
- int mc_eax;
- int mc_trapno;
- int mc_err;
- int mc_eip;
- int mc_cs;
- int mc_eflags;
- int mc_esp; /* machine state */
- int mc_ss;
- int mc_fpregs[28]; /* env87 + fpacc87 + u_long */
- int __spare__[17];
-};
-#endif
-
-#endif /* !_MACHINE_UCONTEXT_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
index 7fa9af3c68..9315c606af 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/vmparam.h
@@ -105,7 +105,7 @@
#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
-#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI-1, 0)
+#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
#define USRSTACK VM_MAXUSER_ADDRESS
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
index e483fc535c..47d726a040 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen-os.h
@@ -6,6 +6,7 @@
#ifndef _OS_H_
#define _OS_H_
+#include <machine/param.h>
#ifndef NULL
#define NULL (void *)0
@@ -58,6 +59,11 @@ void printk(const char *fmt, ...);
/* some function prototypes */
void trap_init(void);
+extern int preemptable;
+#define preempt_disable() (preemptable = 0)
+#define preempt_enable() (preemptable = 1)
+#define preempt_enable_no_resched() (preemptable = 1)
+
/*
* STI/CLI equivalents. These basically set and clear the virtual
@@ -68,70 +74,74 @@ void trap_init(void);
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
-#define __cli() \
-do { \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
- barrier(); \
+
+
+#define __cli() \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
} while (0)
-#define __sti() \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
+#define __sti() \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
} while (0)
+
#define __save_flags(x) \
do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
+ vcpu_info_t *vcpu; \
+ vcpu = HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
} while (0)
-#define __restore_flags(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- if ( (_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0 ) { \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
- } \
+#define __restore_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
} while (0)
-#define __save_and_cli(x) \
-do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
- barrier(); \
-} while (0)
-#define __save_and_sti(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
+#define __save_and_cli(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
} while (0)
-#ifdef SMP
-/* extra macros need for the SMP case */
-#error "global_irq_* not defined"
-#endif
#define cli() __cli()
#define sti() __sti()
#define save_flags(x) __save_flags(x)
#define restore_flags(x) __restore_flags(x)
#define save_and_cli(x) __save_and_cli(x)
-#define save_and_sti(x) __save_and_sti(x)
#define local_irq_save(x) __save_and_cli(x)
-#define local_irq_set(x) __save_and_sti(x)
#define local_irq_restore(x) __restore_flags(x)
#define local_irq_disable() __cli()
#define local_irq_enable() __sti()
@@ -141,9 +151,20 @@ do { \
#define mb()
#define rmb()
-#define smp_mb()
#define wmb()
-
+#ifdef SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends() read_barrier_depends()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while(0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
/* This is a barrier for the compiler only, NOT the processor! */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
index e35eafa5d2..f4e32823df 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xen_intr.h
@@ -31,6 +31,9 @@ extern int bind_virq_to_irq(int virq);
extern void unbind_virq_from_irq(int virq);
extern int bind_evtchn_to_irq(int evtchn);
extern void unbind_evtchn_from_irq(int evtchn);
+extern int bind_ipi_on_cpu_to_irq(int cpu, int ipi);
+extern void unbind_ipi_on_cpu_from_irq(int cpu, int ipi);
+extern void ap_evtchn_init(int cpu);
static __inline__ int irq_cannonicalize(int irq)
{
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
index 93ffd7853a..e2b682265f 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenfunc.h
@@ -1,8 +1,7 @@
-/* $NetBSD:$ */
-
/*
*
* Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,10 +58,12 @@ extern pteinfo_t *pteinfo_list;
char *xen_setbootenv(char *cmd_line);
int xen_boothowto(char *envp);
void load_cr3(uint32_t val);
-void xen_set_ldt(vm_offset_t, uint32_t);
void xen_machphys_update(unsigned long, unsigned long);
void xen_update_descriptor(union descriptor *, union descriptor *);
void lldt(u_short sel);
+void ap_cpu_initclocks(void);
+
+
/*
* Invalidate a patricular VA on all cpus
*
@@ -71,15 +72,16 @@ void lldt(u_short sel);
static __inline void
invlpg(u_int addr)
{
- xpq_queue_invlpg(addr);
+ xen_invlpg(addr);
}
static __inline void
invltlb(void)
{
- xpq_queue_tlb_flush();
- mcl_flush_queue();
+ xen_tlb_flush();
+
}
+#define PANIC_IF(exp) if (unlikely(exp)) {printk("%s failed\n",#exp); panic("%s: %s:%d", #exp, __FILE__, __LINE__);}
#endif /* _XEN_XENFUNC_H_ */
diff --git a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
index f445096228..4d5e73f9b0 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
+++ b/freebsd-5.3-xen-sparse/i386-xen/include/xenpmap.h
@@ -1,8 +1,7 @@
-/* $NetBSD:$ */
-
/*
*
* Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,17 +34,16 @@
#ifndef _XEN_XENPMAP_H_
#define _XEN_XENPMAP_H_
#include <machine/xenvar.h>
-void xpq_physbcopy(const unsigned long *, unsigned long, size_t);
-void xpq_queue_invlpg(vm_offset_t);
-void xpq_queue_pt_update(pt_entry_t *, pt_entry_t);
-void xpq_queue_pt_switch(uint32_t);
-void xpq_queue_set_ldt(vm_offset_t, uint32_t);
-void xpq_queue_tlb_flush(void);
-void xpq_queue_pin_table(uint32_t, int);
-void xpq_queue_unpin_table(uint32_t);
-void xpq_record(unsigned long, unsigned long);
-void mcl_queue_pt_update(vm_offset_t, vm_offset_t);
-void mcl_flush_queue(void);
+void xen_invlpg(vm_offset_t);
+void xen_queue_pt_update(vm_paddr_t, vm_paddr_t);
+void xen_pt_switch(uint32_t);
+void xen_set_ldt(unsigned long, unsigned long);
+void xen_tlb_flush(void);
+void xen_pgd_pin(unsigned long);
+void xen_pgd_unpin(unsigned long);
+void xen_pt_pin(unsigned long);
+void xen_pt_unpin(unsigned long);
+void xen_flush_queue(void);
void pmap_ref(pt_entry_t *pte, unsigned long ma);
@@ -61,58 +59,108 @@ void pmap_ref(pt_entry_t *pte, unsigned long ma);
#define PMAP_DEC_REF_PAGE(a)
#endif
+#if 0
+#define WRITABLE_PAGETABLES
+#endif
#define ALWAYS_SYNC 0
+#ifdef PT_DEBUG
+#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__)
+#else
+#define PT_LOG()
+#endif
+
#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */
-#define XPQ_PIN_L1_TABLE 1
-#define XPQ_PIN_L2_TABLE 2
+#define SH_PD_SET_VA 1
+#define SH_PD_SET_VA_MA 2
+#define SH_PD_SET_VA_CLEAR 3
+
+struct pmap;
+void pd_set(struct pmap *pmap, vm_paddr_t *ptr, vm_paddr_t val, int type);
#define PT_GET(_ptp) \
(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : *(_ptp))
+
+#ifdef WRITABLE_PAGETABLES
#define PT_SET_VA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), xpmap_ptom(_npte)); \
- xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), \
- xpmap_ptom((_npte))); \
- if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+ PT_LOG(); \
+ *(_ptp) = xpmap_ptom((_npte)); \
} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
+#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
PMAP_REF((_ptp), (_npte)); \
- xpq_queue_pt_update((pt_entry_t *)vtomach((_ptp)), (_npte)); \
- if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+ PT_LOG(); \
+ *(_ptp) = (_npte); \
} while (/*CONSTCOND*/0)
#define PT_CLEAR_VA(_ptp, sync) do { \
PMAP_REF((pt_entry_t *)(_ptp), 0); \
- xpq_queue_pt_update((pt_entry_t *)vtomach(_ptp), 0); \
- if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+ PT_LOG(); \
+ *(_ptp) = 0; \
} while (/*CONSTCOND*/0)
-#define PT_CLEAR(_ptp, sync) do { \
- PMAP_REF((pt_entry_t *)(vtopte(_ptp)), 0); \
- mcl_queue_pt_update((unsigned long)_ptp, 0); \
- if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+
+#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \
+ PMAP_REF((_ptp), xpmap_ptom(_npte)); \
+ pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
-#define PT_SET_MA(_va,_ma,sync) do { \
- PMAP_REF(vtopte((unsigned long)_va), (_ma)); \
- mcl_queue_pt_update((vm_offset_t )(_va), (_ma)); \
- if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \
+ PMAP_REF((_ptp), (_npte)); \
+ pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
} while (/*CONSTCOND*/0)
-#define PT_SET(_va,_pa,sync) do { \
- PMAP_REF((pt_entry_t *)(vtopte(_va)), xpmap_ptom(_pa)); \
- mcl_queue_pt_update((vm_offset_t)(_va), \
- xpmap_ptom((_pa))); \
+#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
+ PMAP_REF((pt_entry_t *)(_ptp), 0); \
+ pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+
+
+#else /* !WRITABLE_PAGETABLES */
+
+#define PT_SET_VA(_ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), xpmap_ptom(_npte)); \
+ xen_queue_pt_update(vtomach(_ptp), \
+ xpmap_ptom(_npte)); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), (_npte)); \
+ xen_queue_pt_update(vtomach(_ptp), _npte); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do { \
+ PMAP_REF((pt_entry_t *)(_ptp), 0); \
+ xen_queue_pt_update(vtomach(_ptp), 0); \
if (sync || ALWAYS_SYNC) \
- mcl_flush_queue(); \
+ xen_flush_queue(); \
} while (/*CONSTCOND*/0)
+#define PD_SET_VA(_pmap, _ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), xpmap_ptom(_npte)); \
+ pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptp,_npte,sync) do { \
+ PMAP_REF((_ptp), (_npte)); \
+ pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \
+ PMAP_REF((pt_entry_t *)(_ptp), 0); \
+ pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \
+ if (sync || ALWAYS_SYNC) xen_flush_queue(); \
+} while (/*CONSTCOND*/0)
+
+#endif
+#define PT_SET_MA(_va, _ma) \
+ HYPERVISOR_update_va_mapping(((unsigned long)_va), \
+ ((unsigned long)_ma), \
+ UVMF_INVLPG| UVMF_LOCAL)\
#define PT_UPDATES_FLUSH() do { \
- mcl_flush_queue(); \
+ xen_flush_queue(); \
} while (/*CONSTCOND*/0)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
index 66c80f3ece..cd7f9d60a2 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/blkfront/xb_blkfront.c
@@ -46,12 +46,19 @@
#include <machine/ctrl_if.h>
#include <machine/xenfunc.h>
+
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <machine/gnttab.h>
+#endif
+
/* prototypes */
struct xb_softc;
static void xb_startio(struct xb_softc *sc);
static void xb_vbdinit(void);
static void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+static void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
struct xb_softc {
device_t xb_dev;
@@ -61,6 +68,7 @@ struct xb_softc {
void *xb_resp_handler;
int xb_unit;
int xb_flags;
+ struct xb_softc *xb_next_blocked;
#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
};
@@ -100,8 +108,23 @@ static unsigned int blkif_irq;
static int blkif_control_rsp_valid;
static blkif_response_t blkif_control_rsp;
-static unsigned long xb_rec_ring_free;
-blkif_request_t xb_rec_ring[BLKIF_RING_SIZE]; /* shadow recovery ring */
+static blkif_front_ring_t blk_ring;
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+static domid_t rdomid = 0;
+static grant_ref_t gref_head, gref_terminal;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
+#endif
+
+static struct xb_softc *xb_kick_pending_head = NULL;
+static struct xb_softc *xb_kick_pending_tail = NULL;
+static struct mtx blkif_io_block_lock;
+
+static unsigned long rec_ring_free;
+blkif_request_t rec_ring[BLK_RING_SIZE];
/* XXX move to xb_vbd.c when VBD update support is added */
#define MAX_VBDS 64
@@ -115,16 +138,10 @@ static unsigned int xb_kick_pending;
static struct mtx blkif_io_lock;
-static blkif_ring_t *xb_blk_ring;
-static BLKIF_RING_IDX xb_resp_cons; /* Response consumer for comms ring. */
-static BLKIF_RING_IDX xb_req_prod; /* Private request producer */
static int xb_recovery = 0; /* "Recovery in progress" flag. Protected
* by the blkif_io_lock */
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define BLKIF_RING_FULL (((xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE) || \
- (blkif_state != BLKIF_STATE_CONNECTED))
void blkif_completion(blkif_request_t *req);
void xb_response_intr(void *);
@@ -135,13 +152,13 @@ void xb_response_intr(void *);
static inline int
GET_ID_FROM_FREELIST( void )
{
- unsigned long free = xb_rec_ring_free;
+ unsigned long free = rec_ring_free;
- KASSERT(free <= BLKIF_RING_SIZE, ("free %lu > BLKIF_RING_SIZE", free));
+ KASSERT(free <= BLK_RING_SIZE, ("free %lu > RING_SIZE", free));
- xb_rec_ring_free = xb_rec_ring[free].id;
+ rec_ring_free = rec_ring[free].id;
- xb_rec_ring[free].id = 0x0fffffee; /* debug */
+ rec_ring[free].id = 0x0fffffee; /* debug */
return free;
}
@@ -149,12 +166,13 @@ GET_ID_FROM_FREELIST( void )
static inline void
ADD_ID_TO_FREELIST( unsigned long id )
{
- xb_rec_ring[id].id = xb_rec_ring_free;
- xb_rec_ring_free = id;
+ rec_ring[id].id = rec_ring_free;
+ rec_ring_free = id;
}
-static inline void translate_req_to_pfn(blkif_request_t *xreq,
- blkif_request_t *req)
+static inline void
+translate_req_to_pfn(blkif_request_t *xreq,
+ blkif_request_t *req)
{
int i;
@@ -165,7 +183,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq,
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ ){
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ xreq->frame_and_sects[i] = req->frame_and_sects[i];
+#else
xreq->frame_and_sects[i] = xpmap_mtop(req->frame_and_sects[i]);
+#endif
}
}
@@ -181,14 +203,18 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ ){
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ xreq->frame_and_sects[i] = req->frame_and_sects[i];
+#else
xreq->frame_and_sects[i] = xpmap_ptom(req->frame_and_sects[i]);
+#endif
}
}
static inline void flush_requests(void)
{
- xb_blk_ring->req_prod = xb_req_prod;
+ RING_PUSH_REQUESTS(&blk_ring);
notify_via_evtchn(blkif_evtchn);
}
@@ -207,12 +233,9 @@ xb_response_intr(void *xsc)
struct xb_softc *sc = NULL;
struct bio *bp;
blkif_response_t *bret;
- BLKIF_RING_IDX i, rp;
+ RING_IDX i, rp;
unsigned long flags;
- if (blkif_state == BLKIF_STATE_CLOSED)
- return;
-
mtx_lock_irqsave(&blkif_io_lock, flags);
if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
@@ -221,20 +244,21 @@ xb_response_intr(void *xsc)
return;
}
- rp = xb_blk_ring->resp_prod;
+ rp = blk_ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
/* sometimes we seem to lose i/o. stay in the interrupt handler while
* there is stuff to process: continually recheck the response producer.
*/
- for ( i = xb_resp_cons; i != (rp = xb_blk_ring->resp_prod); i++ ) {
+ process_rcvd:
+ for ( i = blk_ring.rsp_cons; i != (rp = blk_ring.sring->rsp_prod); i++ ) {
unsigned long id;
- bret = &xb_blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+ bret = RING_GET_RESPONSE(&blk_ring, i);
id = bret->id;
- bp = (struct bio *)xb_rec_ring[id].id;
+ bp = (struct bio *)rec_ring[id].id;
- blkif_completion(&xb_rec_ring[id]);
+ blkif_completion(&rec_ring[id]);
ADD_ID_TO_FREELIST(id); /* overwrites req */
@@ -277,11 +301,30 @@ xb_response_intr(void *xsc)
}
}
- xb_resp_cons = i;
+ blk_ring.rsp_cons = i;
+
+ if (xb_kick_pending) {
+ unsigned long flags;
+ mtx_lock_irqsave(&blkif_io_block_lock, flags);
+ xb_kick_pending = FALSE;
+ /* Run as long as there are blocked devs or queue fills again */
+ while ((NULL != xb_kick_pending_head) && (FALSE == xb_kick_pending)) {
+ struct xb_softc *xb_cur = xb_kick_pending_head;
+ xb_kick_pending_head = xb_cur->xb_next_blocked;
+ if(NULL == xb_kick_pending_head) {
+ xb_kick_pending_tail = NULL;
+ }
+ xb_cur->xb_next_blocked = NULL;
+ mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
+ xb_startio(xb_cur);
+ mtx_lock_irqsave(&blkif_io_block_lock, flags);
+ }
+ mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
- if (sc && xb_kick_pending) {
- xb_kick_pending = FALSE;
- xb_startio(sc);
+ if(blk_ring.rsp_cons != blk_ring.sring->rsp_prod) {
+ /* Consume those, too */
+ goto process_rcvd;
+ }
}
mtx_unlock_irqrestore(&blkif_io_lock, flags);
@@ -323,8 +366,6 @@ xb_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
{
struct xb_softc *sc = (struct xb_softc *)dp->d_drv1;
- TRACE_ENTER;
-
if (sc == NULL)
return (ENXIO);
@@ -347,6 +388,9 @@ xb_startio(struct xb_softc *sc)
int s, queued = 0;
unsigned long id;
unsigned int fsect, lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref;
+#endif
if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
@@ -355,8 +399,8 @@ xb_startio(struct xb_softc *sc)
s = splbio();
for (bp = bioq_first(&sc->xb_bioq);
- bp && !BLKIF_RING_FULL;
- xb_req_prod++, queued++, bp = bioq_first(&sc->xb_bioq)) {
+ bp && !RING_FULL(&blk_ring);
+ blk_ring.req_prod_pvt++, queued++, bp = bioq_first(&sc->xb_bioq)) {
/* Check if the buffer is properly aligned */
if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
@@ -388,9 +432,10 @@ xb_startio(struct xb_softc *sc)
buffer_ma &= ~PAGE_MASK;
/* Fill out a communications ring structure. */
- req = &xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req;
+ req = RING_GET_REQUEST(&blk_ring,
+ blk_ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
- xb_rec_ring[id].id= (unsigned long)bp;
+ rec_ring[id].id= (unsigned long)bp;
req->id = id;
req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
@@ -402,19 +447,47 @@ xb_startio(struct xb_softc *sc)
req->nr_segments = 1; /* not doing scatter/gather since buffer
* chaining is not supported.
*/
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ KASSERT( ref != -ENOSPC, ("grant_reference failed") );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ req->operation & 1 ); /* ??? */
+
+ req->frame_and_sects[0] =
+ (((uint32_t) ref) << 16) | (fsect << 3) | lsect;
+#else
/*
* upper bits represent the machine address of the buffer and the
* lower bits is the number of sectors to be read/written.
*/
req->frame_and_sects[0] = buffer_ma | (fsect << 3) | lsect;
-
+#endif
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn( &xb_rec_ring[id], req);
+ translate_req_to_pfn( &rec_ring[id], req);
}
- if (BLKIF_RING_FULL)
+ if (RING_FULL(&blk_ring)) {
+ unsigned long flags;
+ mtx_lock_irqsave(&blkif_io_block_lock, flags);
xb_kick_pending = TRUE;
+ /* If we are not already on blocked list, add us */
+ if((NULL == sc->xb_next_blocked) && (xb_kick_pending_tail != sc)) {
+
+ if(NULL == xb_kick_pending_head) {
+ xb_kick_pending_head = xb_kick_pending_tail = sc;
+ } else {
+ xb_kick_pending_tail->xb_next_blocked = sc;
+ xb_kick_pending_tail = sc;
+ }
+ }
+ mtx_unlock_irqrestore(&blkif_io_block_lock, flags);
+ }
if (queued != 0)
flush_requests();
@@ -466,6 +539,7 @@ xb_create(int unit)
sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK);
sc->xb_unit = unit;
+ sc->xb_next_blocked = NULL;
memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
sc->xb_disk.d_unit = unit;
@@ -503,17 +577,20 @@ xb_vbdinit(void)
blkif_response_t rsp;
vdisk_t *buf;
- TRACE_ENTER;
-
buf = (vdisk_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
/* Probe for disk information. */
memset(&req, 0, sizeof(req));
req.operation = BLKIF_OP_PROBE;
req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ blkif_control_probe_send(&req, &rsp,
+ (unsigned long)(vtomach(buf)));
+
+#else
req.frame_and_sects[0] = vtomach(buf) | 7;
blkif_control_send(&req, &rsp);
-
+#endif
if ( rsp.status <= 0 ) {
printk("xb_identify: Could not identify disks (%d)\n", rsp.status);
free(buf, M_DEVBUF);
@@ -534,32 +611,51 @@ xb_vbdinit(void)
/***************************** COMMON CODE *******************************/
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+static void
+blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
+ unsigned long address)
+{
+ int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ KASSERT( ref != -ENOSPC, ("couldn't get grant reference") );
+
+ gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
+
+ req->frame_and_sects[0] = (((uint32_t) ref) << 16) | 7;
+
+ blkif_control_send(req, rsp);
+}
+#endif
+
void
blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
unsigned long flags, id;
+ blkif_request_t *req_d;
retry:
- while ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE ) {
+ while ( RING_FULL(&blk_ring) )
+ {
tsleep( req, PWAIT | PCATCH, "blkif", hz);
}
mtx_lock_irqsave(&blkif_io_lock, flags);
- if ( (xb_req_prod - xb_resp_cons) == BLKIF_RING_SIZE )
+ if ( RING_FULL(&blk_ring) )
{
mtx_unlock_irqrestore(&blkif_io_lock, flags);
goto retry;
}
- xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req = *req;
+ req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ *req_d = *req;
id = GET_ID_FROM_FREELIST();
- xb_blk_ring->ring[MASK_BLKIF_IDX(xb_req_prod)].req.id = id;
- xb_rec_ring[id].id = (unsigned long) req;
+ req_d->id = id;
+ rec_ring[id].id = (unsigned long) req;
- translate_req_to_pfn( &xb_rec_ring[id], req );
+ translate_req_to_pfn( &rec_ring[id], req );
- xb_req_prod++;
+ blk_ring.req_prod_pvt++;
flush_requests();
mtx_unlock_irqrestore(&blkif_io_lock, flags);
@@ -602,7 +698,7 @@ blkif_send_interface_connect(void)
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = (vtomach(xb_blk_ring) >> PAGE_SHIFT);
+ msg->shmem_frame = (vtomach(blk_ring.sring) >> PAGE_SHIFT);
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
@@ -622,9 +718,9 @@ blkif_free(void)
mtx_unlock_irqrestore(&blkif_io_lock, flags);
/* Free resources associated with old device channel. */
- if (xb_blk_ring) {
- free(xb_blk_ring, M_DEVBUF);
- xb_blk_ring = NULL;
+ if (blk_ring.sring != NULL) {
+ free(blk_ring.sring, M_DEVBUF);
+ blk_ring.sring = NULL;
}
/* free_irq(blkif_irq, NULL);*/
blkif_irq = 0;
@@ -642,10 +738,10 @@ blkif_close(void)
static void
blkif_disconnect(void)
{
- if (xb_blk_ring) free(xb_blk_ring, M_DEVBUF);
- xb_blk_ring = (blkif_ring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
- xb_blk_ring->req_prod = xb_blk_ring->resp_prod = 0;
- xb_resp_cons = xb_req_prod = 0;
+ if (blk_ring.sring) free(blk_ring.sring, M_DEVBUF);
+ blk_ring.sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+ SHARED_RING_INIT(blk_ring.sring);
+ FRONT_RING_INIT(&blk_ring, blk_ring.sring, PAGE_SIZE);
blkif_state = BLKIF_STATE_DISCONNECTED;
blkif_send_interface_connect();
}
@@ -663,36 +759,39 @@ blkif_recover(void)
{
int i;
+ blkif_request_t *req;
/* Hmm, requests might be re-ordered when we re-issue them.
* This will need to be fixed once we have barriers */
/* Stage 1 : Find active and move to safety. */
- for ( i = 0; i < BLKIF_RING_SIZE; i++ ) {
- if ( xb_rec_ring[i].id >= KERNBASE ) {
- translate_req_to_mfn(
- &xb_blk_ring->ring[xb_req_prod].req, &xb_rec_ring[i]);
- xb_req_prod++;
+ for ( i = 0; i < BLK_RING_SIZE; i++ ) {
+ if ( rec_ring[i].id >= KERNBASE ) {
+ req = RING_GET_REQUEST(&blk_ring,
+ blk_ring.req_prod_pvt);
+ translate_req_to_mfn(req, &rec_ring[i]);
+ blk_ring.req_prod_pvt++;
}
}
- printk("blkfront: recovered %d descriptors\n",xb_req_prod);
+ printk("blkfront: recovered %d descriptors\n",blk_ring.req_prod_pvt);
/* Stage 2 : Set up shadow list. */
- for ( i = 0; i < xb_req_prod; i++ ) {
- xb_rec_ring[i].id = xb_blk_ring->ring[i].req.id;
- xb_blk_ring->ring[i].req.id = i;
- translate_req_to_pfn(&xb_rec_ring[i], &xb_blk_ring->ring[i].req);
+ for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) {
+ req = RING_GET_REQUEST(&blk_ring, i);
+ rec_ring[i].id = req->id;
+ req->id = i;
+ translate_req_to_pfn(&rec_ring[i], req);
}
/* Stage 3 : Set up free list. */
- for ( ; i < BLKIF_RING_SIZE; i++ ){
- xb_rec_ring[i].id = i+1;
+ for ( ; i < BLK_RING_SIZE; i++ ){
+ rec_ring[i].id = i+1;
}
- xb_rec_ring_free = xb_req_prod;
- xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ rec_ring_free = blk_ring.req_prod_pvt;
+ rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
- /* xb_blk_ring->req_prod will be set when we flush_requests().*/
+ /* blk_ring.req_prod will be set when we flush_requests().*/
wmb();
/* Switch off recovery mode, using a memory barrier to ensure that
@@ -715,6 +814,10 @@ blkif_connect(blkif_fe_interface_status_t *status)
blkif_evtchn = status->evtchn;
blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ rdomid = status->domid;
+#endif
+
err = intr_add_handler("xbd", blkif_irq,
(driver_intr_t *)xb_response_intr, NULL,
@@ -877,11 +980,22 @@ xb_init(void *unused)
printk("[XEN] Initialising virtual block device driver\n");
- xb_rec_ring_free = 0;
- for (i = 0; i < BLKIF_RING_SIZE; i++) {
- xb_rec_ring[i].id = i+1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
+ &gref_head, &gref_terminal ))
+ return;
+ printk("Blkif frontend is using grant tables.\n");
+#endif
+
+ xb_kick_pending = FALSE;
+ xb_kick_pending_head = NULL;
+ xb_kick_pending_tail = NULL;
+
+ rec_ring_free = 0;
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ rec_ring[i].id = i+1;
}
- xb_rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff;
(void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 0);
@@ -901,13 +1015,21 @@ blkdev_resume(void)
}
#endif
-/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
-
void
blkif_completion(blkif_request_t *req)
{
int i;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ grant_ref_t gref;
+
+ for ( i = 0; i < req->nr_segments; i++ )
+ {
+ gref = blkif_gref_from_fas(req->frame_and_sects[i]);
+ gnttab_release_grant_reference(&gref_head, gref);
+ }
+#else
+ /* This is a hack to get the dirty logging bits set */
switch ( req->operation )
{
case BLKIF_OP_READ:
@@ -919,7 +1041,8 @@ blkif_completion(blkif_request_t *req)
}
break;
}
-
+#endif
}
-MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN);
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_SPIN | MTX_NOWITNESS); /* XXX how does one enroll a lock? */
+ MTX_SYSINIT(ioreq_block, &blkif_io_block_lock, "BIO BLOCK LOCK", MTX_SPIN | MTX_NOWITNESS);
SYSINIT(xbdev, SI_SUB_PSEUDO, SI_ORDER_ANY, xb_init, NULL)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
index de379b6bf9..fa06fb855e 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/misc/evtchn_dev.c
@@ -46,8 +46,10 @@ static devfs_handle_t xen_dev_dir;
static unsigned long evtchn_dev_inuse;
/* Notification ring, accessed via /dev/xen/evtchn. */
-#define RING_SIZE 2048 /* 2048 16-bit entries */
-#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
+
+#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
static uint16_t *ring;
static unsigned int ring_cons, ring_prod, ring_overflow;
@@ -76,8 +78,8 @@ evtchn_device_upcall(int port)
clear_evtchn(port);
if ( ring != NULL ) {
- if ( (ring_prod - ring_cons) < RING_SIZE ) {
- ring[RING_MASK(ring_prod)] = (uint16_t)port;
+ if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+ ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
if ( ring_cons == ring_prod++ ) {
wakeup(evtchn_waddr);
}
@@ -136,9 +138,9 @@ evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
- if ( ((c ^ p) & RING_SIZE) != 0 ) {
- bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(uint16_t);
- bytes2 = RING_MASK(p) * sizeof(uint16_t);
+ if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+ bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
}
else {
bytes1 = (p - c) * sizeof(uint16_t);
@@ -154,7 +156,7 @@ evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
bytes2 = count - bytes1;
}
- if ( uiomove(&ring[RING_MASK(c)], bytes1, uio) ||
+ if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
/* keeping this around as its replacement is not equivalent
* copyout(&ring[0], &buf[bytes1], bytes2)
diff --git a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
index e25f218eb3..a4ee3fbba0 100644
--- a/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
+++ b/freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
@@ -264,7 +264,7 @@ static int
netctrl_connected(void)
{
int ok;
-
+ XENPRINTF("err %d up %d\n", netctrl.err, netctrl.up);
if (netctrl.err)
ok = netctrl.err;
else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
@@ -424,8 +424,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
= INVALID_P2M_ENTRY;
xn_rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
- xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t)
- >> PAGE_SHIFT;
+ xn_rx_mcl[i].args[0] = (unsigned long)mtod(m_new,vm_offset_t);
xn_rx_mcl[i].args[1] = 0;
xn_rx_mcl[i].args[2] = 0;
@@ -441,11 +440,11 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
PT_UPDATES_FLUSH();
/* After all PTEs have been zapped we blow away stale TLB entries. */
- xn_rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+ xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
/* Give away a batch of pages. */
xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
- xn_rx_mcl[i].args[0] = (unsigned long) MEMOP_decrease_reservation;
+ xn_rx_mcl[i].args[0] = MEMOP_decrease_reservation;
xn_rx_mcl[i].args[1] = (unsigned long)xn_rx_pfns;
xn_rx_mcl[i].args[2] = (unsigned long)i;
xn_rx_mcl[i].args[3] = 0;
@@ -455,7 +454,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
(void)HYPERVISOR_multicall(xn_rx_mcl, i+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
- if ( xn_rx_mcl[i].args[5] != i )
+ if (unlikely(xn_rx_mcl[i].result != i))
panic("Unable to reduce memory reservation\n");
/* Above is a suitable barrier to ensure backend will see requests. */
@@ -520,7 +519,7 @@ xn_rxeof(struct xn_softc *sc)
mmu->val = (unsigned long)m->m_ext.ext_args >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = (unsigned long)m->m_data >> PAGE_SHIFT;
+ mcl->args[0] = (unsigned long)m->m_data;
mcl->args[1] = (rx->addr & ~PAGE_MASK) | PG_KERNEL;
mcl->args[2] = 0;
mcl++;
@@ -545,6 +544,7 @@ xn_rxeof(struct xn_softc *sc)
mcl->args[0] = (unsigned long)xn_rx_mmu;
mcl->args[1] = mmu - xn_rx_mmu;
mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
mcl++;
(void)HYPERVISOR_multicall(xn_rx_mcl, mcl - xn_rx_mcl);
}
@@ -1303,7 +1303,6 @@ netif_driver_status(netif_fe_driver_status_t *status)
static void
netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
-
switch ( msg->subtype )
{
case CMSG_NETIF_FE_INTERFACE_STATUS:
@@ -1326,7 +1325,7 @@ netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
break;
}
- ctrl_if_send_response(msg);
+ ctrl_if_send_response(msg);
}
#if 1
@@ -1338,7 +1337,6 @@ static int probe_interfaces(void)
{
int err = 0, conn = 0;
int wait_i, wait_n = 100;
-
for ( wait_i = 0; wait_i < wait_n; wait_i++)
{
XENPRINTF("> wait_i=%d\n", wait_i);
@@ -1421,7 +1419,7 @@ xn_init(void *unused)
{
int err = 0;
-
+
netctrl_init();
(void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
diff --git a/freebsd-5.3-xen-sparse/kern/kern_shutdown.c b/freebsd-5.3-xen-sparse/kern/kern_shutdown.c
new file mode 100644
index 0000000000..0143bd7446
--- /dev/null
+++ b/freebsd-5.3-xen-sparse/kern/kern_shutdown.c
@@ -0,0 +1,635 @@
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/kern/kern_shutdown.c,v 1.163.2.2 2004/09/10 00:04:17 scottl Exp $");
+
+#include "opt_kdb.h"
+#include "opt_hw_wdog.h"
+#include "opt_mac.h"
+#include "opt_panic.h"
+#include "opt_show_busybufs.h"
+#include "opt_sched.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/cons.h>
+#include <sys/eventhandler.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/mac.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <sys/resourcevar.h>
+#include <sys/smp.h> /* smp_active */
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/vnode.h>
+
+#include <machine/cpu.h>
+#include <machine/pcb.h>
+#include <machine/smp.h>
+
+#include <sys/signalvar.h>
+
+#ifndef PANIC_REBOOT_WAIT_TIME
+#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
+#endif
+
+/*
+ * Note that stdarg.h and the ANSI style va_start macro is used for both
+ * ANSI and traditional C compilers.
+ */
+#include <machine/stdarg.h>
+
+#ifdef KDB
+#ifdef KDB_UNATTENDED
+int debugger_on_panic = 0;
+#else
+int debugger_on_panic = 1;
+#endif
+SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
+ &debugger_on_panic, 0, "Run debugger on kernel panic");
+
+#ifdef KDB_TRACE
+int trace_on_panic = 1;
+#else
+int trace_on_panic = 0;
+#endif
+SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
+ &trace_on_panic, 0, "Print stack trace on kernel panic");
+#endif /* KDB */
+
+int sync_on_panic = 0;
+SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
+ &sync_on_panic, 0, "Do a sync before rebooting from a panic");
+
+SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
+
+#ifdef HW_WDOG
+/*
+ * If there is a hardware watchdog, point this at the function needed to
+ * hold it off.
+ * It's needed when the kernel needs to do some lengthy operations.
+ * e.g. in wd.c when dumping core.. It's most annoying to have
+ * your precious core-dump only half written because the wdog kicked in.
+ */
+watchdog_tickle_fn wdog_tickler = NULL;
+#endif /* HW_WDOG */
+
+/*
+ * Variable panicstr contains argument to first call to panic; used as flag
+ * to indicate that the kernel has already called panic.
+ */
+const char *panicstr;
+
+int dumping; /* system is dumping */
+static struct dumperinfo dumper; /* our selected dumper */
+
+/* Context information for dump-debuggers. */
+static struct pcb dumppcb; /* Registers. */
+static lwpid_t dumptid; /* Thread ID. */
+
+static void boot(int) __dead2;
+static void poweroff_wait(void *, int);
+static void shutdown_halt(void *junk, int howto);
+static void shutdown_panic(void *junk, int howto);
+static void shutdown_reset(void *junk, int howto);
+
+/* register various local shutdown events */
+static void
+shutdown_conf(void *unused)
+{
+
+ EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
+ SHUTDOWN_PRI_FIRST);
+ EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
+ SHUTDOWN_PRI_LAST + 100);
+ EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
+ SHUTDOWN_PRI_LAST + 100);
+ EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
+ SHUTDOWN_PRI_LAST + 200);
+}
+
+SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
+
+/*
+ * The system call that results in a reboot
+ *
+ * MPSAFE
+ */
+/* ARGSUSED */
+int
+reboot(struct thread *td, struct reboot_args *uap)
+{
+ int error;
+
+ error = 0;
+#ifdef MAC
+ error = mac_check_system_reboot(td->td_ucred, uap->opt);
+#endif
+ if (error == 0)
+ error = suser(td);
+ if (error == 0) {
+ mtx_lock(&Giant);
+ boot(uap->opt);
+ mtx_unlock(&Giant);
+ }
+ return (error);
+}
+
+/*
+ * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC
+ */
+static int shutdown_howto = 0;
+
+void
+shutdown_nice(int howto)
+{
+
+ shutdown_howto = howto;
+
+ /* Send a signal to init(8) and have it shutdown the world */
+ if (initproc != NULL) {
+ PROC_LOCK(initproc);
+ psignal(initproc, SIGINT);
+ PROC_UNLOCK(initproc);
+ } else {
+ /* No init(8) running, so simply reboot */
+ boot(RB_NOSYNC);
+ }
+ return;
+}
+static int waittime = -1;
+
+static void
+print_uptime(void)
+{
+ int f;
+ struct timespec ts;
+
+ getnanouptime(&ts);
+ printf("Uptime: ");
+ f = 0;
+ if (ts.tv_sec >= 86400) {
+ printf("%ldd", (long)ts.tv_sec / 86400);
+ ts.tv_sec %= 86400;
+ f = 1;
+ }
+ if (f || ts.tv_sec >= 3600) {
+ printf("%ldh", (long)ts.tv_sec / 3600);
+ ts.tv_sec %= 3600;
+ f = 1;
+ }
+ if (f || ts.tv_sec >= 60) {
+ printf("%ldm", (long)ts.tv_sec / 60);
+ ts.tv_sec %= 60;
+ f = 1;
+ }
+ printf("%lds\n", (long)ts.tv_sec);
+}
+
+static void
+doadump(void)
+{
+
+ /*
+ * Sometimes people have to call this from the kernel debugger.
+ * (if 'panic' can not dump)
+ * Give them a clue as to why they can't dump.
+ */
+ if (dumper.dumper == NULL) {
+ printf("Cannot dump. No dump device defined.\n");
+ return;
+ }
+
+ savectx(&dumppcb);
+ dumptid = curthread->td_tid;
+ dumping++;
+ dumpsys(&dumper);
+}
+
+/*
+ * Go through the rigmarole of shutting down..
+ * this used to be in machdep.c but I'll be dammned if I could see
+ * anything machine dependant in it.
+ */
+static void
+boot(int howto)
+{
+ static int first_buf_printf = 1;
+
+ /* collect extra flags that shutdown_nice might have set */
+ howto |= shutdown_howto;
+
+ /* We are out of the debugger now. */
+ kdb_active = 0;
+
+#ifdef SMP
+ if (smp_active)
+ printf("boot() called on cpu#%d\n", PCPU_GET(cpuid));
+#endif
+ /*
+ * Do any callouts that should be done BEFORE syncing the filesystems.
+ */
+ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
+
+ /*
+ * Now sync filesystems
+ */
+ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
+ register struct buf *bp;
+ int iter, nbusy, pbusy;
+#ifndef PREEMPTION
+ int subiter;
+#endif
+
+ waittime = 0;
+
+ sync(&thread0, NULL);
+
+ /*
+ * With soft updates, some buffers that are
+ * written will be remarked as dirty until other
+ * buffers are written.
+ */
+ for (iter = pbusy = 0; iter < 20; iter++) {
+ nbusy = 0;
+ for (bp = &buf[nbuf]; --bp >= buf; ) {
+ if ((bp->b_flags & B_INVAL) == 0 &&
+ BUF_REFCNT(bp) > 0) {
+ nbusy++;
+ } else if ((bp->b_flags & (B_DELWRI | B_INVAL))
+ == B_DELWRI) {
+ /* bawrite(bp);*/
+ nbusy++;
+ }
+ }
+ if (nbusy == 0) {
+ if (first_buf_printf)
+ printf("No buffers busy after final sync");
+ break;
+ }
+ if (first_buf_printf) {
+ printf("Syncing disks, buffers remaining... ");
+ first_buf_printf = 0;
+ }
+ printf("%d ", nbusy);
+ if (nbusy < pbusy)
+ iter = 0;
+ pbusy = nbusy;
+ sync(&thread0, NULL);
+
+#ifdef PREEMPTION
+ /*
+ * Drop Giant and spin for a while to allow
+ * interrupt threads to run.
+ */
+ DROP_GIANT();
+ DELAY(50000 * iter);
+ PICKUP_GIANT();
+#else
+ /*
+ * Drop Giant and context switch several times to
+ * allow interrupt threads to run.
+ */
+ DROP_GIANT();
+ for (subiter = 0; subiter < 50 * iter; subiter++) {
+ mtx_lock_spin(&sched_lock);
+ mi_switch(SW_VOL, NULL);
+ mtx_unlock_spin(&sched_lock);
+ DELAY(1000);
+ }
+ PICKUP_GIANT();
+#endif
+ }
+ printf("\n");
+ /*
+ * Count only busy local buffers to prevent forcing
+ * a fsck if we're just a client of a wedged NFS server
+ */
+ nbusy = 0;
+ for (bp = &buf[nbuf]; --bp >= buf; ) {
+ if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
+ ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
+ if (bp->b_dev == NULL) {
+ TAILQ_REMOVE(&mountlist,
+ bp->b_vp->v_mount, mnt_list);
+ continue;
+ }
+ nbusy++;
+#if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
+ printf(
+ "%d: dev:%s, flags:%0x, blkno:%ld, lblkno:%ld\n",
+ nbusy, devtoname(bp->b_dev),
+ bp->b_flags, (long)bp->b_blkno,
+ (long)bp->b_lblkno);
+#endif
+ }
+ }
+ if (nbusy) {
+ /*
+ * Failed to sync all blocks. Indicate this and don't
+ * unmount filesystems (thus forcing an fsck on reboot).
+ */
+ printf("Giving up on %d buffers\n", nbusy);
+ DELAY(5000000); /* 5 seconds */
+ } else {
+ if (!first_buf_printf)
+ printf("Final sync complete\n");
+ /*
+ * Unmount filesystems
+ */
+ if (panicstr == 0)
+ vfs_unmountall();
+ }
+ DELAY(100000); /* wait for console output to finish */
+ }
+
+ print_uptime();
+
+ /*
+ * Ok, now do things that assume all filesystem activity has
+ * been completed.
+ */
+ EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
+ splhigh();
+ if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping)
+ doadump();
+
+ /* Now that we're going to really halt the system... */
+ EVENTHANDLER_INVOKE(shutdown_final, howto);
+
+ for(;;) ; /* safety against shutdown_reset not working */
+ /* NOTREACHED */
+}
+
+/*
+ * If the shutdown was a clean halt, behave accordingly.
+ */
+static void
+shutdown_halt(void *junk, int howto)
+{
+
+ if (howto & RB_HALT) {
+ printf("\n");
+ printf("The operating system has halted.\n");
+ printf("Please press any key to reboot.\n\n");
+ switch (cngetc()) {
+ case -1: /* No console, just die */
+ cpu_halt();
+ /* NOTREACHED */
+ default:
+ howto &= ~RB_HALT;
+ break;
+ }
+ }
+}
+
+/*
+ * Check to see if the system paniced, pause and then reboot
+ * according to the specified delay.
+ */
+static void
+shutdown_panic(void *junk, int howto)
+{
+ int loop;
+
+ if (howto & RB_DUMP) {
+ if (PANIC_REBOOT_WAIT_TIME != 0) {
+ if (PANIC_REBOOT_WAIT_TIME != -1) {
+ printf("Automatic reboot in %d seconds - "
+ "press a key on the console to abort\n",
+ PANIC_REBOOT_WAIT_TIME);
+ for (loop = PANIC_REBOOT_WAIT_TIME * 10;
+ loop > 0; --loop) {
+ DELAY(1000 * 100); /* 1/10th second */
+ /* Did user type a key? */
+ if (cncheckc() != -1)
+ break;
+ }
+ if (!loop)
+ return;
+ }
+ } else { /* zero time specified - reboot NOW */
+ return;
+ }
+ printf("--> Press a key on the console to reboot,\n");
+ printf("--> or switch off the system now.\n");
+ cngetc();
+ }
+}
+
+/*
+ * Everything done, now reset
+ */
+static void
+shutdown_reset(void *junk, int howto)
+{
+
+ printf("Rebooting...\n");
+ DELAY(1000000); /* wait 1 sec for printf's to complete and be read */
+ /* cpu_boot(howto); */ /* doesn't do anything at the moment */
+ cpu_reset();
+ /* NOTREACHED */ /* assuming reset worked */
+}
+
+#ifdef SMP
+static u_int panic_cpu = NOCPU;
+#endif
+
+/*
+ * Panic is called on unresolvable fatal errors. It prints "panic: mesg",
+ * and then reboots. If we are called twice, then we avoid trying to sync
+ * the disks as this often leads to recursive panics.
+ *
+ * MPSAFE
+ */
+void
+panic(const char *fmt, ...)
+{
+ struct thread *td = curthread;
+ int bootopt, newpanic;
+ va_list ap;
+ static char buf[256];
+
+#ifdef SMP
+ /*
+ * We don't want multiple CPU's to panic at the same time, so we
+ * use panic_cpu as a simple spinlock. We have to keep checking
+ * panic_cpu if we are spinning in case the panic on the first
+ * CPU is canceled.
+ */
+ if (panic_cpu != PCPU_GET(cpuid))
+ while (atomic_cmpset_int(&panic_cpu, NOCPU,
+ PCPU_GET(cpuid)) == 0)
+ while (panic_cpu != NOCPU)
+ ; /* nothing */
+#endif
+
+ bootopt = RB_AUTOBOOT | RB_DUMP;
+ newpanic = 0;
+ if (panicstr)
+ bootopt |= RB_NOSYNC;
+ else {
+ panicstr = fmt;
+ newpanic = 1;
+ }
+
+ va_start(ap, fmt);
+ if (newpanic) {
+ (void)vsnprintf(buf, sizeof(buf), fmt, ap);
+ panicstr = buf;
+ printf("panic: %s\n", buf);
+ } else {
+ printf("panic: ");
+ vprintf(fmt, ap);
+ printf("\n");
+ }
+ va_end(ap);
+#ifdef SMP
+ printf("cpuid = %d\n", PCPU_GET(cpuid));
+#endif
+
+#ifdef KDB
+ if (newpanic && trace_on_panic)
+ kdb_backtrace();
+ if (debugger_on_panic)
+ kdb_enter("panic");
+#ifdef RESTARTABLE_PANICS
+ /* See if the user aborted the panic, in which case we continue. */
+ if (panicstr == NULL) {
+#ifdef SMP
+ atomic_store_rel_int(&panic_cpu, NOCPU);
+#endif
+ return;
+ }
+#endif
+#endif
+ mtx_lock_spin(&sched_lock);
+ td->td_flags |= TDF_INPANIC;
+ mtx_unlock_spin(&sched_lock);
+ if (!sync_on_panic)
+ bootopt |= RB_NOSYNC;
+#ifdef XEN
+ HYPERVISOR_crash();
+#else
+ boot(bootopt);
+#endif
+}
+
+/*
+ * Support for poweroff delay.
+ */
+#ifndef POWEROFF_DELAY
+# define POWEROFF_DELAY 5000
+#endif
+static int poweroff_delay = POWEROFF_DELAY;
+
+SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
+ &poweroff_delay, 0, "");
+
+static void
+poweroff_wait(void *junk, int howto)
+{
+
+ if (!(howto & RB_POWEROFF) || poweroff_delay <= 0)
+ return;
+ DELAY(poweroff_delay * 1000);
+}
+
+/*
+ * Some system processes (e.g. syncer) need to be stopped at appropriate
+ * points in their main loops prior to a system shutdown, so that they
+ * won't interfere with the shutdown process (e.g. by holding a disk buf
+ * to cause sync to fail). For each of these system processes, register
+ * shutdown_kproc() as a handler for one of shutdown events.
+ */
+static int kproc_shutdown_wait = 60;
+SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
+ &kproc_shutdown_wait, 0, "");
+
+void
+kproc_shutdown(void *arg, int howto)
+{
+ struct proc *p;
+ char procname[MAXCOMLEN + 1];
+ int error;
+
+ if (panicstr)
+ return;
+
+ p = (struct proc *)arg;
+ strlcpy(procname, p->p_comm, sizeof(procname));
+ printf("Waiting (max %d seconds) for system process `%s' to stop...",
+ kproc_shutdown_wait, procname);
+ error = kthread_suspend(p, kproc_shutdown_wait * hz);
+
+ if (error == EWOULDBLOCK)
+ printf("timed out\n");
+ else
+ printf("done\n");
+}
+
+/* Registration of dumpers */
+int
+set_dumper(struct dumperinfo *di)
+{
+
+ if (di == NULL) {
+ bzero(&dumper, sizeof dumper);
+ return (0);
+ }
+ if (dumper.dumper != NULL)
+ return (EBUSY);
+ dumper = *di;
+ return (0);
+}
+
+#if defined(__powerpc__)
+void
+dumpsys(struct dumperinfo *di __unused)
+{
+
+ printf("Kernel dumps not implemented on this architecture\n");
+}
+#endif
diff --git a/linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c b/linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c
deleted file mode 100644
index 27ca6e83dd..0000000000
--- a/linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/******************************************************************************
- * balloon.c
- *
- * Xen balloon driver - enables returning/claiming memory to/from Xen.
- *
- * Copyright (c) 2003, B Dragovic
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <asm/xen_proc.h>
-
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/smp_lock.h>
-#include <linux/pagemap.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/vmalloc.h>
-
-#include <asm/hypervisor.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/tlb.h>
-
-/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
-#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */
-#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */
-typedef struct user_balloon_op {
- unsigned int op;
- unsigned long size;
-} user_balloon_op_t;
-/* END OF USER DEFINE */
-
-static struct proc_dir_entry *balloon_pde;
-unsigned long credit;
-static unsigned long current_pages, most_seen_pages;
-
-/*
- * Dead entry written into balloon-owned entries in the PMT.
- * It is deliberately different to INVALID_P2M_ENTRY.
- */
-#define DEAD 0xdead1234
-
-static inline pte_t *get_ptep(unsigned long addr)
-{
- pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
- pgd = pgd_offset_k(addr);
-
- if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
-
- pmd = pmd_offset(pgd, addr);
- if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
-
- ptep = pte_offset(pmd, addr);
-
- return ptep;
-}
-
-/* Main function for relinquishing memory. */
-static unsigned long inflate_balloon(unsigned long num_pages)
-{
- unsigned long *parray;
- unsigned long *currp;
- unsigned long curraddr;
- unsigned long ret = 0;
- unsigned long i, j;
-
- parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
- if ( parray == NULL )
- {
- printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
- return -EFAULT;
- }
-
- currp = parray;
-
- for ( i = 0; i < num_pages; i++, currp++ )
- {
- struct page *page = alloc_page(GFP_HIGHUSER);
- unsigned long pfn = page - mem_map;
-
- /* If allocation fails then free all reserved pages. */
- if ( page == NULL )
- {
- printk(KERN_ERR "Unable to inflate balloon by %ld, only"
- " %ld pages free.", num_pages, i);
- currp = parray;
- for ( j = 0; j < i; j++, currp++ )
- __free_page((struct page *) (mem_map + *currp));
- ret = -EFAULT;
- goto cleanup;
- }
-
- *currp = pfn;
- }
-
-
- for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
- {
- unsigned long mfn = phys_to_machine_mapping[*currp];
- curraddr = (unsigned long)page_address(mem_map + *currp);
- /* Blow away page contents for security, and also p.t. ref if any. */
- if ( curraddr != 0 )
- {
- scrub_pages(curraddr, 1);
- queue_l1_entry_update(get_ptep(curraddr), 0);
- }
-#ifdef CONFIG_XEN_SCRUB_PAGES
- else
- {
- void *p = kmap(&mem_map[*currp]);
- scrub_pages(p, 1);
- kunmap(&mem_map[*currp]);
- }
-#endif
- phys_to_machine_mapping[*currp] = DEAD;
- *currp = mfn;
- }
-
- /* Flush updates through and flush the TLB. */
- xen_tlb_flush();
-
- ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
- parray, num_pages, 0);
- if ( unlikely(ret != num_pages) )
- {
- printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
- goto cleanup;
- }
-
- credit += num_pages;
- ret = num_pages;
-
- cleanup:
- vfree(parray);
-
- return ret;
-}
-
-/*
- * Install new mem pages obtained by deflate_balloon. function walks
- * phys->machine mapping table looking for DEAD entries and populates
- * them.
- */
-static unsigned long process_returned_pages(unsigned long * parray,
- unsigned long num)
-{
- /* currently, this function is rather simplistic as
- * it is assumed that domain reclaims only number of
- * pages previously released. this is to change soon
- * and the code to extend page tables etc. will be
- * incorporated here.
- */
-
- unsigned long tot_pages = most_seen_pages;
- unsigned long * curr = parray;
- unsigned long num_installed;
- unsigned long i;
-
- num_installed = 0;
- for ( i = 0; (i < tot_pages) && (num_installed < num); i++ )
- {
- if ( phys_to_machine_mapping[i] == DEAD )
- {
- phys_to_machine_mapping[i] = *curr;
- queue_machphys_update(*curr, i);
- if (i<max_low_pfn)
- queue_l1_entry_update(
- get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
- ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-
- __free_page(mem_map + i);
-
- curr++;
- num_installed++;
- }
- }
-
- return num_installed;
-}
-
-unsigned long deflate_balloon(unsigned long num_pages)
-{
- unsigned long ret;
- unsigned long * parray;
-
- if ( num_pages > credit )
- {
- printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
- num_pages, credit);
- return -EAGAIN;
- }
-
- parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
- if ( parray == NULL )
- {
- printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
- return 0;
- }
-
- ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
- parray, num_pages, 0);
- if ( unlikely(ret != num_pages) )
- {
- printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
- ret);
- goto cleanup;
- }
-
- if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
- {
- printk(KERN_WARNING
- "deflate_balloon: restored only %lx of %lx pages.\n",
- ret, num_pages);
- goto cleanup;
- }
-
- ret = num_pages;
- credit -= num_pages;
-
- cleanup:
- vfree(parray);
-
- return ret;
-}
-
-#define PAGE_TO_MB_SHIFT 8
-
-/*
- * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c
- * The loops do go through all of low memory (ZONE_NORMAL). The
- * old pages have _PAGE_PRESENT set and so get skipped.
- * If low memory is not full, the new pages are used to fill it, going
- * from cur_low_pfn to low_pfn. high memory is not direct mapped so
- * no extension is needed for new high memory.
- */
-
-static void pagetable_extend (int cur_low_pfn, int newpages)
-{
- unsigned long vaddr, end;
- pgd_t *kpgd, *pgd, *pgd_base;
- int i, j, k;
- pmd_t *kpmd, *pmd;
- pte_t *kpte, *pte, *pte_base;
- int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
-
- /*
- * This can be zero as well - no problem, in that case we exit
- * the loops anyway due to the PTRS_PER_* conditions.
- */
- end = (unsigned long)__va(low_pfn*PAGE_SIZE);
-
- pgd_base = init_mm.pgd;
- i = __pgd_offset(PAGE_OFFSET);
- pgd = pgd_base + i;
-
- for (; i < PTRS_PER_PGD; pgd++, i++) {
- vaddr = i*PGDIR_SIZE;
- if (end && (vaddr >= end))
- break;
- pmd = (pmd_t *)pgd;
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
- vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
- if (end && (vaddr >= end))
- break;
-
- /* Filled in for us already? */
- if ( pmd_val(*pmd) & _PAGE_PRESENT )
- continue;
-
- pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
-
- for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
- vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
- if (end && (vaddr >= end))
- break;
- *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
- }
- kpgd = pgd_offset_k((unsigned long)pte_base);
- kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
- kpte = pte_offset(kpmd, (unsigned long)pte_base);
- queue_l1_entry_update(kpte,
- (*(unsigned long *)kpte)&~_PAGE_RW);
- set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
- XEN_flush_page_update_queue();
- }
- }
-}
-
-/*
- * claim_new_pages() asks xen to increase this domain's memory reservation
- * and return a list of the new pages of memory. This new pages are
- * added to the free list of the memory manager.
- *
- * Available RAM does not normally change while Linux runs. To make this work,
- * the linux mem= boottime command line param must say how big memory could
- * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c
- * sets max_pfn, max_low_pfn and the zones according to
- * this max memory size. The page tables themselves can only be
- * extended after xen has assigned new pages to this domain.
- */
-
-static unsigned long
-claim_new_pages(unsigned long num_pages)
-{
- unsigned long new_page_cnt, pfn;
- unsigned long * parray, *curr;
-
- if (most_seen_pages+num_pages> max_pfn)
- num_pages = max_pfn-most_seen_pages;
- if (num_pages==0) return 0;
-
- parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
- if ( parray == NULL )
- {
- printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
- return 0;
- }
-
- new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
- parray, num_pages, 0);
- if ( new_page_cnt != num_pages )
- {
- printk(KERN_WARNING
- "claim_new_pages: xen granted only %lu of %lu requested pages\n",
- new_page_cnt, num_pages);
-
- /*
- * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
- * usually can dribble out a few pages and then hangs.
- */
- if ( new_page_cnt < 1000 )
- {
- printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
- HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
- parray, new_page_cnt, 0);
- return -EFAULT;
- }
- }
- memcpy(phys_to_machine_mapping+most_seen_pages, parray,
- new_page_cnt * sizeof(unsigned long));
-
- pagetable_extend(most_seen_pages,new_page_cnt);
-
- for ( pfn = most_seen_pages, curr = parray;
- pfn < most_seen_pages+new_page_cnt;
- pfn++, curr++ )
- {
- struct page *page = mem_map + pfn;
-
-#ifndef CONFIG_HIGHMEM
- if ( pfn>=max_low_pfn )
- {
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
- pfn>>PAGE_TO_MB_SHIFT);
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
- break;
- }
-#endif
- queue_machphys_update(*curr, pfn);
- if ( pfn < max_low_pfn )
- queue_l1_entry_update(
- get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
- ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-
- XEN_flush_page_update_queue();
-
- /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
- ClearPageReserved(page);
- if ( pfn >= max_low_pfn )
- set_bit(PG_highmem, &page->flags);
- set_page_count(page, 1);
- __free_page(page);
- }
-
- vfree(parray);
-
- return new_page_cnt;
-}
-
-static int balloon_write(struct file *file, const char *buffer,
- u_long count, void *data)
-{
- char memstring[64], *endchar;
- int len, i;
- unsigned long target;
- unsigned long long targetbytes;
-
- /* Only admin can play with the balloon :) */
- if ( !capable(CAP_SYS_ADMIN) )
- return -EPERM;
-
- if ( count > sizeof(memstring) )
- return -EFBIG;
-
- len = strnlen_user(buffer, count);
- if ( len == 0 ) return -EBADMSG;
- if ( len == 1 ) return 1; /* input starts with a NUL char */
- if ( strncpy_from_user(memstring, buffer, len) < 0 )
- return -EFAULT;
-
- endchar = memstring;
- for ( i = 0; i < len; ++i, ++endchar )
- if ( (memstring[i] < '0') || (memstring[i] > '9') )
- break;
- if ( i == 0 )
- return -EBADMSG;
-
- targetbytes = memparse(memstring,&endchar);
- target = targetbytes >> PAGE_SHIFT;
-
- if ( target < current_pages )
- {
- int change = inflate_balloon(current_pages-target);
- if ( change <= 0 )
- return change;
-
- current_pages -= change;
- printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
- change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
- }
- else if ( target > current_pages )
- {
- int change, reclaim = min(target,most_seen_pages) - current_pages;
-
- if ( reclaim )
- {
- change = deflate_balloon( reclaim);
- if ( change <= 0 )
- return change;
- current_pages += change;
- printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
- change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
- }
-
- if ( most_seen_pages < target )
- {
- int growth = claim_new_pages(target-most_seen_pages);
- if ( growth <= 0 )
- return growth;
- most_seen_pages += growth;
- current_pages += growth;
- printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
- growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
- }
- }
-
-
- return len;
-}
-
-
-static int balloon_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len;
- len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
-
- if (len <= off+count) *eof = 1;
- *start = page + off;
- len -= off;
- if (len>count) len = count;
- if (len<0) len = 0;
- return len;
-}
-
-static int __init init_module(void)
-{
- printk(KERN_ALERT "Starting Xen Balloon driver\n");
-
- most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
- if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
- {
- printk(KERN_ALERT "Unable to create balloon driver proc entry!");
- return -1;
- }
-
- balloon_pde->write_proc = balloon_write;
- balloon_pde->read_proc = balloon_read;
-
- /*
- * make a new phys map if mem= says xen can give us memory to grow
- */
- if ( max_pfn > xen_start_info.nr_pages )
- {
- extern unsigned long *phys_to_machine_mapping;
- unsigned long *newmap;
- newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
- memset(newmap, ~0, max_pfn * sizeof(unsigned long));
- memcpy(newmap, phys_to_machine_mapping,
- xen_start_info.nr_pages * sizeof(unsigned long));
- phys_to_machine_mapping = newmap;
- }
-
- return 0;
-}
-
-static void __exit cleanup_module(void)
-{
- if ( balloon_pde != NULL )
- {
- remove_xen_proc_entry("balloon");
- balloon_pde = NULL;
- }
-}
-
-module_init(init_module);
-module_exit(cleanup_module);
diff --git a/linux-2.4.30-xen-sparse/arch/xen/Makefile b/linux-2.4.30-xen-sparse/arch/xen/Makefile
index e8e161a395..77da37bfaf 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/Makefile
+++ b/linux-2.4.30-xen-sparse/arch/xen/Makefile
@@ -61,6 +61,7 @@ SUBDIRS += arch/xen/drivers/console
SUBDIRS += arch/xen/drivers/evtchn
SUBDIRS += arch/xen/drivers/blkif
SUBDIRS += arch/xen/drivers/netif
+SUBDIRS += arch/xen/drivers/usbif
SUBDIRS += arch/xen/drivers/balloon
ifdef CONFIG_XEN_PRIVILEGED_GUEST
SUBDIRS += arch/xen/drivers/dom0
@@ -71,6 +72,7 @@ CORE_FILES += arch/xen/drivers/evtchn/drv.o
CORE_FILES += arch/xen/drivers/console/drv.o
DRIVERS += arch/xen/drivers/blkif/drv.o
DRIVERS += arch/xen/drivers/netif/drv.o
+DRIVERS += arch/xen/drivers/usbif/drv.o
ifdef CONFIG_XEN_PRIVILEGED_GUEST
CORE_FILES += arch/xen/drivers/dom0/drv.o
endif
diff --git a/linux-2.4.30-xen-sparse/arch/xen/config.in b/linux-2.4.30-xen-sparse/arch/xen/config.in
index b69fbf5930..23492fb5c8 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/config.in
+++ b/linux-2.4.30-xen-sparse/arch/xen/config.in
@@ -16,14 +16,19 @@ mainmenu_option next_comment
comment 'Xen'
bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+ bool 'USB-device backend driver' CONFIG_XEN_USB_BACKEND
+fi
bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
+bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
+bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
endmenu
# The IBM S/390 patch needs this.
define_bool CONFIG_NO_IDLE_HZ y
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" == "y" ]; then
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
define_bool CONFIG_FOREIGN_PAGES y
else
define_bool CONFIG_FOREIGN_PAGES n
@@ -262,7 +267,7 @@ fi
source drivers/char/Config.in
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; then
source drivers/media/Config.in
fi
@@ -295,9 +300,16 @@ if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
source drivers/sound/Config.in
fi
endmenu
+fi
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" -o "$CONFIG_XEN_USB_FRONTEND" = "y" ]; then
+ if [ "$CONFIG_XEN_USB_FRONTEND" = "y" -o "$CONFIG_XEN_USB_BACKEND" = "y" ]; then
+ define_bool CONFIG_USB y
+ fi
source drivers/usb/Config.in
+fi
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
source net/bluetooth/Config.in
fi
diff --git a/linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0 b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0
index f9d953db6d..78e93e900b 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0
+++ b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xen0
@@ -12,9 +12,12 @@ CONFIG_UID16=y
#
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
+# CONFIG_XEN_USB_BACKEND is not set
CONFIG_XEN_SCRUB_PAGES=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
CONFIG_NO_IDLE_HZ=y
CONFIG_FOREIGN_PAGES=y
diff --git a/linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU
index 9678a2c3fc..3640bfc19b 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU
+++ b/linux-2.4.30-xen-sparse/arch/xen/defconfig-xenU
@@ -15,6 +15,8 @@ CONFIG_UID16=y
CONFIG_XEN_SCRUB_PAGES=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_BLKDEV_GRANT=y
+# CONFIG_XEN_USB_FRONTEND is not set
CONFIG_NO_IDLE_HZ=y
# CONFIG_FOREIGN_PAGES is not set
CONFIG_NETDEVICES=y
diff --git a/linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
index d3f43f9426..682906bf66 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -67,9 +67,14 @@ static int xlvbd_get_vbd_info(vdisk_t *disk_info)
memset(&req, 0, sizeof(req));
req.operation = BLKIF_OP_PROBE;
req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ blkif_control_probe_send(&req, &rsp,
+ (unsigned long)(virt_to_machine(buf)));
+#else
req.frame_and_sects[0] = virt_to_machine(buf) | 7;
blkif_control_send(&req, &rsp);
+#endif
if ( rsp.status <= 0 )
{
@@ -114,12 +119,6 @@ static int xlvbd_init_device(vdisk_t *xd)
if ( (bd = bdget(device)) == NULL )
return -1;
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
{
printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
@@ -137,11 +136,6 @@ static int xlvbd_init_device(vdisk_t *xd)
major_name = XLSCSI_MAJOR_NAME;
max_part = XLSCSI_MAX_PART;
- } else if (VDISK_VIRTUAL(xd->info)) {
-
- major_name = XLVBD_MAJOR_NAME;
- max_part = XLVBD_MAX_PART;
-
} else {
/* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
@@ -248,8 +242,8 @@ static int xlvbd_init_device(vdisk_t *xd)
blk_size[major] = gd->sizes;
}
- if ( VDISK_READONLY(xd->info) )
- set_device_ro(device, 1);
+ if ( xd->info & VDISK_READONLY )
+ set_device_ro(device, 1);
gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
@@ -298,20 +292,16 @@ static int xlvbd_init_device(vdisk_t *xd)
gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
/* Some final fix-ups depending on the device type */
- switch ( VDISK_TYPE(xd->info) )
+ if ( xd->info & VDISK_REMOVABLE )
{
- case VDISK_TYPE_CDROM:
- case VDISK_TYPE_FLOPPY:
- case VDISK_TYPE_TAPE:
gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE;
printk(KERN_ALERT
"Skipping partition check on %s /dev/%s\n",
- VDISK_TYPE(xd->info)==VDISK_TYPE_CDROM ? "cdrom" :
- (VDISK_TYPE(xd->info)==VDISK_TYPE_TAPE ? "tape" :
- "floppy"), disk_name(gd, MINOR(device), buf));
- break;
-
- case VDISK_TYPE_DISK:
+ (xd->info & VDISK_CDROM) ? "cdrom" : "removable",
+ disk_name(gd, MINOR(device), buf));
+ }
+ else
+ {
/* Only check partitions on real discs (not virtual!). */
if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
{
@@ -321,17 +311,10 @@ static int xlvbd_init_device(vdisk_t *xd)
break;
}
register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
- break;
-
- default:
- printk(KERN_ALERT "XenoLinux: unknown device type %d\n",
- VDISK_TYPE(xd->info));
- break;
}
}
out:
- up(&bd->bd_sem);
bdput(bd);
return rc;
}
@@ -356,12 +339,6 @@ static int xlvbd_remove_device(int device)
if ( (bd = bdget(device)) == NULL )
return -1;
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
if ( ((gd = get_gendisk(device)) == NULL) ||
((disk = xldev_to_xldisk(device)) == NULL) )
BUG();
@@ -423,7 +400,6 @@ static int xlvbd_remove_device(int device)
}
out:
- up(&bd->bd_sem);
bdput(bd);
return rc;
}
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile b/linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile
index b17b430bf3..3eb0701958 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/Makefile
@@ -6,12 +6,12 @@ all: kernel.o head.o init_task.o
O_TARGET := kernel.o
-export-objs := i386_ksyms.o skbuff.o ctrl_if.o
+export-objs := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
i386_ksyms.o i387.o evtchn.o ctrl_if.o pci-dma.o \
- reboot.o fixup.o skbuff.o
+ reboot.o fixup.o gnttab.o skbuff.o
ifdef CONFIG_PCI
obj-y += pci-i386.o pci-pc.o
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/head.S b/linux-2.4.30-xen-sparse/arch/xen/kernel/head.S
index d48bb7098d..e8c563572b 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/head.S
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/head.S
@@ -1,6 +1,8 @@
.section __xen_guest
- .asciz "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=2.0,VIRT_BASE=0xC0000000"
+ .ascii "GUEST_OS=linux,GUEST_VER=2.4,XEN_VER=3.0,VIRT_BASE=0xC0000000"
+ .ascii ",LOADER=generic"
+ .byte 0
.text
#include <linux/config.h>
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c b/linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c
index 374c9b6c30..6235778493 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/ldt.c
@@ -14,6 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <asm/mmu_context.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/ldt.h>
@@ -58,7 +59,6 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
pc->ldt,
(pc->size*LDT_ENTRY_SIZE)/PAGE_SIZE);
load_LDT(pc);
- flush_page_update_queue();
#ifdef CONFIG_SMP
if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
smp_call_function(flush_ldt, 0, 1, 1);
@@ -66,6 +66,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
}
wmb();
if (oldsize) {
+ make_pages_writable(
+ oldldt, (oldsize*LDT_ENTRY_SIZE)/PAGE_SIZE);
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
@@ -118,7 +120,6 @@ void destroy_context(struct mm_struct *mm)
make_pages_writable(
mm->context.ldt,
(mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE);
- flush_page_update_queue();
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/process.c b/linux-2.4.30-xen-sparse/arch/xen/kernel/process.c
index ad7e82dc79..c9d553627f 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/process.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/process.c
@@ -43,8 +43,7 @@
#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
-#include <asm/multicall.h>
-#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/xen-public/physdev.h>
#include <linux/irq.h>
@@ -214,7 +213,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
struct task_struct * p, struct pt_regs * regs)
{
struct pt_regs * childregs;
- unsigned long eflags;
childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
struct_cpy(childregs, regs);
@@ -232,9 +230,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
unlazy_fpu(current);
struct_cpy(&p->thread.i387, &current->thread.i387);
-
- __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : );
- p->thread.io_pl = (eflags >> 12) & 3;
+ p->thread.io_pl = current->thread.io_pl;
return 0;
}
@@ -307,19 +303,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *next = &next_p->thread;
-
- __cli();
-
- /*
- * We clobber FS and GS here so that we avoid a GPF when restoring previous
- * task's FS/GS values in Xen when the LDT is switched. If we don't do this
- * then we can end up erroneously re-flushing the page-update queue when
- * we 'execute_multicall_list'.
- */
- __asm__ __volatile__ (
- "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs" : : : "eax" );
-
- MULTICALL_flush_page_update_queue();
+ physdev_op_t op;
+ multicall_entry_t _mcl[8], *mcl = _mcl;
/*
* This is basically 'unlazy_fpu', except that we queue a multicall to
@@ -334,23 +319,26 @@ void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p
asm volatile( "fnsave %0 ; fwait"
: "=m" (prev_p->thread.i387.fsave) );
prev_p->flags &= ~PF_USEDFPU;
- queue_multicall0(__HYPERVISOR_fpu_taskswitch);
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
}
- queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0);
- if ( xen_start_info.flags & SIF_PRIVILEGED )
+ mcl->op = __HYPERVISOR_stack_switch;
+ mcl->args[0] = __KERNEL_DS;
+ mcl->args[1] = next->esp0;
+ mcl++;
+
+ if ( prev_p->thread.io_pl != next->io_pl )
{
- dom0_op_t op;
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = next->io_pl;
- op.interface_version = DOM0_INTERFACE_VERSION;
- queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op);
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = next->io_pl;
+ mcl->op = __HYPERVISOR_physdev_op;
+ mcl->args[0] = (unsigned long)&op;
+ mcl++;
}
- /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
- execute_multicall_list();
- __sti();
+ (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
/*
* Restore %fs and %gs.
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c b/linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c
index 50fc7c1b13..230e0bb66a 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/setup.c
@@ -48,7 +48,7 @@ static int errno;
#include <asm/mmu_context.h>
#include <asm/ctrl_if.h>
#include <asm/hypervisor.h>
-#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/xen-public/physdev.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/tqueue.h>
@@ -60,10 +60,7 @@ static int errno;
*/
shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
-unsigned long *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
-
-multicall_entry_t multicall_list[8];
-int nr_multicall_ents = 0;
+unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
/*
* Machine setup..
@@ -116,7 +113,7 @@ int enable_acpi_smp_table;
/* Raw start-of-day parameters from the hypervisor. */
union xen_start_info_union xen_start_info_union;
-#define COMMAND_LINE_SIZE 256
+#define COMMAND_LINE_SIZE MAX_GUEST_CMDLINE
static char command_line[COMMAND_LINE_SIZE];
char saved_command_line[COMMAND_LINE_SIZE];
@@ -206,6 +203,7 @@ void __init setup_arch(char **cmdline_p)
unsigned long bootmap_size, start_pfn, lmax_low_pfn;
int mem_param; /* user specified memory size in pages */
int boot_pfn; /* low pages available for bootmem */
+ physdev_op_t op;
extern void hypervisor_callback(void);
extern void failsafe_callback(void);
@@ -230,8 +228,10 @@ void __init setup_arch(char **cmdline_p)
blk_nohighio = 1;
#endif
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_4gb_segments);
+ HYPERVISOR_vm_assist(
+ VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
+ HYPERVISOR_vm_assist(
+ VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
HYPERVISOR_set_callbacks(
__KERNEL_CS, (unsigned long)hypervisor_callback,
@@ -327,7 +327,7 @@ void __init setup_arch(char **cmdline_p)
}
#endif
- phys_to_machine_mapping = (unsigned long *)xen_start_info.mfn_list;
+ phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
cur_pgd = init_mm.pgd = (pgd_t *)xen_start_info.pt_base;
start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) +
@@ -416,17 +416,9 @@ void __init setup_arch(char **cmdline_p)
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
- /* If we are a privileged guest OS then we should request IO privileges. */
- if ( xen_start_info.flags & SIF_PRIVILEGED )
- {
- dom0_op_t op;
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = 1;
- if( HYPERVISOR_dom0_op(&op) != 0 )
- panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED");
- current->thread.io_pl = 1;
- }
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = current->thread.io_pl = 1;
+ HYPERVISOR_physdev_op(&op);
if (xen_start_info.flags & SIF_INITDOMAIN )
{
@@ -1213,7 +1205,6 @@ void __init cpu_init (void)
HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0);
load_LDT(&init_mm.context);
- flush_page_update_queue();
/* Force FPU initialization. */
current->flags &= ~PF_USEDFPU;
diff --git a/linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c b/linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c
index ada06dd973..b87fc3804c 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/kernel/traps.c
@@ -316,15 +316,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
if ( ldt == 0 )
{
- mmu_update_t u;
- u.ptr = MMU_EXTENDED_COMMAND;
- u.ptr |= (unsigned long)&default_ldt[0];
- u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
- if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0) )
- {
- show_trace(NULL);
- panic("Failed to install default LDT");
- }
+ xen_set_ldt((unsigned long)&default_ldt[0], 5);
return;
}
}
@@ -613,7 +605,6 @@ static trap_info_t trap_table[] = {
void __init trap_init(void)
{
HYPERVISOR_set_trap_table(trap_table);
- HYPERVISOR_set_fast_trap(SYSCALL_VECTOR);
/*
* The default LDT is a single-entry callgate to lcall7 for iBCS and a
diff --git a/linux-2.4.30-xen-sparse/arch/xen/mm/fault.c b/linux-2.4.30-xen-sparse/arch/xen/mm/fault.c
index 76d95ff03a..7db6463e09 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/mm/fault.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/mm/fault.c
@@ -84,9 +84,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs,
error_code &= 3;
error_code |= (regs->xcs & 2) << 1;
- if ( flush_page_update_queue() != 0 )
- return;
-
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
@@ -296,7 +293,6 @@ vmalloc_fault:
if (!pmd_present(*pmd_k))
goto no_context;
set_pmd(pmd, *pmd_k);
- XEN_flush_page_update_queue(); /* flush PMD update */
pte_k = pte_offset(pmd_k, address);
if (!pte_present(*pte_k))
diff --git a/linux-2.4.30-xen-sparse/arch/xen/mm/init.c b/linux-2.4.30-xen-sparse/arch/xen/mm/init.c
index 40a5af9273..88d775bcd4 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/mm/init.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/mm/init.c
@@ -142,7 +142,7 @@ static inline void set_pte_phys (unsigned long vaddr,
}
pte = pte_offset(pmd, vaddr);
- queue_l1_entry_update(pte, phys | pgprot_val(prot));
+ set_pte(pte, (pte_t) { phys | pgprot_val(prot) });
/*
* It's enough to flush this one mapping.
@@ -201,17 +201,13 @@ static void __init fixrange_init (unsigned long start,
kpgd = pgd_offset_k((unsigned long)pte);
kpmd = pmd_offset(kpgd, (unsigned long)pte);
kpte = pte_offset(kpmd, (unsigned long)pte);
- queue_l1_entry_update(kpte,
- (*(unsigned long *)kpte)&~_PAGE_RW);
-
+ set_pte(kpte, pte_wrprotect(*kpte));
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
}
vaddr += PMD_SIZE;
}
j = 0;
}
-
- XEN_flush_page_update_queue();
}
@@ -257,10 +253,8 @@ static void __init pagetable_init (void)
kpgd = pgd_offset_k((unsigned long)pte_base);
kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
kpte = pte_offset(kpmd, (unsigned long)pte_base);
- queue_l1_entry_update(kpte,
- (*(unsigned long *)kpte)&~_PAGE_RW);
+ set_pte(kpte, pte_wrprotect(*kpte));
set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
- XEN_flush_page_update_queue();
}
}
@@ -311,6 +305,7 @@ void __init paging_init(void)
pagetable_init();
zone_sizes_init();
+
/* Switch to the real shared_info page, and clear the dummy page. */
set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
diff --git a/linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c b/linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c
index 34c95c84b5..2f3db057d9 100644
--- a/linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c
+++ b/linux-2.4.30-xen-sparse/arch/xen/mm/ioremap.c
@@ -113,12 +113,7 @@ int direct_remap_area_pages(struct mm_struct *mm,
int i;
unsigned long start_address;
#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
-
- u[0].ptr = MMU_EXTENDED_COMMAND;
- u[0].val = MMUEXT_SET_FOREIGNDOM;
- u[0].val |= (unsigned long)domid << 16;
- v = w = &u[1];
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
start_address = address;
@@ -130,11 +125,11 @@ int direct_remap_area_pages(struct mm_struct *mm,
__direct_remap_area_pages( mm,
start_address,
address-start_address,
- w);
+ u);
- if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 )
+ if ( HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0 )
return -EFAULT;
- v = w;
+ v = u;
start_address = address;
}
@@ -149,14 +144,14 @@ int direct_remap_area_pages(struct mm_struct *mm,
v++;
}
- if ( v != w )
+ if ( v != u )
{
/* get the ptep's filled in */
__direct_remap_area_pages(mm,
start_address,
address-start_address,
- w);
- if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) )
+ u);
+ if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) )
return -EFAULT;
}
diff --git a/linux-2.4.30-xen-sparse/fs/exec.c b/linux-2.4.30-xen-sparse/fs/exec.c
deleted file mode 100644
index 8a114151a9..0000000000
--- a/linux-2.4.30-xen-sparse/fs/exec.c
+++ /dev/null
@@ -1,1179 +0,0 @@
-/*
- * linux/fs/exec.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- */
-
-/*
- * #!-checking implemented by tytso.
- */
-/*
- * Demand-loading implemented 01.12.91 - no need to read anything but
- * the header into memory. The inode of the executable is put into
- * "current->executable", and page faults do the actual loading. Clean.
- *
- * Once more I can proudly say that linux stood up to being changed: it
- * was less than 2 hours work to get demand-loading completely implemented.
- *
- * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
- * current->executable is only used by the procfs. This allows a dispatch
- * table to check for several different types of binary formats. We keep
- * trying until we recognize the file or we run out of supported binary
- * formats.
- */
-
-#include <linux/config.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
-#include <linux/init.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/spinlock.h>
-#include <linux/personality.h>
-#include <linux/swap.h>
-#include <linux/utsname.h>
-#define __NO_VERSION__
-#include <linux/module.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-
-#ifdef CONFIG_KMOD
-#include <linux/kmod.h>
-#endif
-
-int core_uses_pid;
-char core_pattern[65] = "core";
-int core_setuid_ok = 0;
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
-static struct linux_binfmt *formats;
-static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
-
-int register_binfmt(struct linux_binfmt * fmt)
-{
- struct linux_binfmt ** tmp = &formats;
-
- if (!fmt)
- return -EINVAL;
- if (fmt->next)
- return -EBUSY;
- write_lock(&binfmt_lock);
- while (*tmp) {
- if (fmt == *tmp) {
- write_unlock(&binfmt_lock);
- return -EBUSY;
- }
- tmp = &(*tmp)->next;
- }
- fmt->next = formats;
- formats = fmt;
- write_unlock(&binfmt_lock);
- return 0;
-}
-
-int unregister_binfmt(struct linux_binfmt * fmt)
-{
- struct linux_binfmt ** tmp = &formats;
-
- write_lock(&binfmt_lock);
- while (*tmp) {
- if (fmt == *tmp) {
- *tmp = fmt->next;
- write_unlock(&binfmt_lock);
- return 0;
- }
- tmp = &(*tmp)->next;
- }
- write_unlock(&binfmt_lock);
- return -EINVAL;
-}
-
-static inline void put_binfmt(struct linux_binfmt * fmt)
-{
- if (fmt->module)
- __MOD_DEC_USE_COUNT(fmt->module);
-}
-
-/*
- * Note that a shared library must be both readable and executable due to
- * security reasons.
- *
- * Also note that we take the address to load from from the file itself.
- */
-asmlinkage long sys_uselib(const char * library)
-{
- struct file * file;
- struct nameidata nd;
- int error;
-
- error = user_path_walk(library, &nd);
- if (error)
- goto out;
-
- error = -EINVAL;
- if (!S_ISREG(nd.dentry->d_inode->i_mode))
- goto exit;
-
- error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
- if (error)
- goto exit;
-
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
- error = PTR_ERR(file);
- if (IS_ERR(file))
- goto out;
-
- error = -ENOEXEC;
- if(file->f_op && file->f_op->read) {
- struct linux_binfmt * fmt;
-
- read_lock(&binfmt_lock);
- for (fmt = formats ; fmt ; fmt = fmt->next) {
- if (!fmt->load_shlib)
- continue;
- if (!try_inc_mod_count(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
- read_unlock(&binfmt_lock);
- }
- fput(file);
-out:
- return error;
-exit:
- path_release(&nd);
- goto out;
-}
-
-/*
- * count() counts the number of arguments/envelopes
- */
-static int count(char ** argv, int max)
-{
- int i = 0;
-
- if (argv != NULL) {
- for (;;) {
- char * p;
-
- if (get_user(p, argv))
- return -EFAULT;
- if (!p)
- break;
- argv++;
- if(++i > max)
- return -E2BIG;
- }
- }
- return i;
-}
-
-/*
- * 'copy_strings()' copies argument/envelope strings from user
- * memory to free pages in kernel mem. These are in a format ready
- * to be put directly into the top of new user memory.
- */
-int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
-{
- struct page *kmapped_page = NULL;
- char *kaddr = NULL;
- int ret;
-
- while (argc-- > 0) {
- char *str;
- int len;
- unsigned long pos;
-
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, bprm->p))) {
- ret = -EFAULT;
- goto out;
- }
-
- if (bprm->p < len) {
- ret = -E2BIG;
- goto out;
- }
-
- bprm->p -= len;
- /* XXX: add architecture specific overflow check here. */
- pos = bprm->p;
-
- while (len > 0) {
- int i, new, err;
- int offset, bytes_to_copy;
- struct page *page;
-
- offset = pos % PAGE_SIZE;
- i = pos/PAGE_SIZE;
- page = bprm->page[i];
- new = 0;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER);
- bprm->page[i] = page;
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
- new = 1;
- }
-
- if (page != kmapped_page) {
- if (kmapped_page)
- kunmap(kmapped_page);
- kmapped_page = page;
- kaddr = kmap(kmapped_page);
- }
- if (new && offset)
- memset(kaddr, 0, offset);
- bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len) {
- bytes_to_copy = len;
- if (new)
- memset(kaddr+offset+len, 0,
- PAGE_SIZE-offset-len);
- }
- err = copy_from_user(kaddr+offset, str, bytes_to_copy);
- if (err) {
- ret = -EFAULT;
- goto out;
- }
-
- pos += bytes_to_copy;
- str += bytes_to_copy;
- len -= bytes_to_copy;
- }
- }
- ret = 0;
-out:
- if (kmapped_page)
- kunmap(kmapped_page);
- return ret;
-}
-
-/*
- * Like copy_strings, but get argv and its values from kernel memory.
- */
-int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
-{
- int r;
- mm_segment_t oldfs = get_fs();
- set_fs(KERNEL_DS);
- r = copy_strings(argc, argv, bprm);
- set_fs(oldfs);
- return r;
-}
-
-/*
- * This routine is used to map in a page into an address space: needed by
- * execve() for the initial stack and environment pages.
- *
- * tsk->mmap_sem is held for writing.
- */
-void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
-{
- pgd_t * pgd;
- pmd_t * pmd;
- pte_t * pte;
- struct vm_area_struct *vma;
- pgprot_t prot = PAGE_COPY;
-
- if (page_count(page) != 1)
- printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
- pgd = pgd_offset(tsk->mm, address);
-
- spin_lock(&tsk->mm->page_table_lock);
- pmd = pmd_alloc(tsk->mm, pgd, address);
- if (!pmd)
- goto out;
- pte = pte_alloc(tsk->mm, pmd, address);
- if (!pte)
- goto out;
- if (!pte_none(*pte))
- goto out;
- lru_cache_add(page);
- flush_dcache_page(page);
- flush_page_to_ram(page);
- /* lookup is cheap because there is only a single entry in the list */
- vma = find_vma(tsk->mm, address);
- if (vma)
- prot = vma->vm_page_prot;
- set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
- XEN_flush_page_update_queue();
- tsk->mm->rss++;
- spin_unlock(&tsk->mm->page_table_lock);
-
- /* no need for flush_tlb */
- return;
-out:
- spin_unlock(&tsk->mm->page_table_lock);
- __free_page(page);
- force_sig(SIGKILL, tsk);
- return;
-}
-
-int setup_arg_pages(struct linux_binprm *bprm)
-{
- unsigned long stack_base;
- struct vm_area_struct *mpnt;
- int i, ret;
-
- stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
-
- bprm->p += stack_base;
- if (bprm->loader)
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
- return -ENOMEM;
-
- down_write(&current->mm->mmap_sem);
- {
- mpnt->vm_mm = current->mm;
- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
- mpnt->vm_end = STACK_TOP;
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
- mpnt->vm_ops = NULL;
- mpnt->vm_pgoff = 0;
- mpnt->vm_file = NULL;
- mpnt->vm_private_data = (void *) 0;
- if ((ret = insert_vm_struct(current->mm, mpnt))) {
- up_write(&current->mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
- current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
- }
-
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page *page = bprm->page[i];
- if (page) {
- bprm->page[i] = NULL;
- put_dirty_page(current,page,stack_base);
- }
- stack_base += PAGE_SIZE;
- }
- up_write(&current->mm->mmap_sem);
-
- return 0;
-}
-
-struct file *open_exec(const char *name)
-{
- struct nameidata nd;
- struct inode *inode;
- struct file *file;
- int err = 0;
-
- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
- file = ERR_PTR(err);
- if (!err) {
- inode = nd.dentry->d_inode;
- file = ERR_PTR(-EACCES);
- if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
- S_ISREG(inode->i_mode)) {
- int err = permission(inode, MAY_EXEC);
- if (!err && !(inode->i_mode & 0111))
- err = -EACCES;
- file = ERR_PTR(err);
- if (!err) {
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
- if (!IS_ERR(file)) {
- err = deny_write_access(file);
- if (err) {
- fput(file);
- file = ERR_PTR(err);
- }
- }
-out:
- return file;
- }
- }
- path_release(&nd);
- }
- goto out;
-}
-
-int kernel_read(struct file *file, unsigned long offset,
- char * addr, unsigned long count)
-{
- mm_segment_t old_fs;
- loff_t pos = offset;
- int result = -ENOSYS;
-
- if (!file->f_op->read)
- goto fail;
- old_fs = get_fs();
- set_fs(get_ds());
- result = file->f_op->read(file, addr, count, &pos);
- set_fs(old_fs);
-fail:
- return result;
-}
-
-static int exec_mmap(void)
-{
- struct mm_struct * mm, * old_mm;
-
- old_mm = current->mm;
-
- if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
- mm_release();
- down_write(&old_mm->mmap_sem);
- exit_mmap(old_mm);
- up_write(&old_mm->mmap_sem);
- return 0;
- }
-
-
- mm = mm_alloc();
- if (mm) {
- struct mm_struct *active_mm;
-
- if (init_new_context(current, mm)) {
- mmdrop(mm);
- return -ENOMEM;
- }
-
- /* Add it to the list of mm's */
- spin_lock(&mmlist_lock);
- list_add(&mm->mmlist, &init_mm.mmlist);
- mmlist_nr++;
- spin_unlock(&mmlist_lock);
-
- task_lock(current);
- active_mm = current->active_mm;
- current->mm = mm;
- current->active_mm = mm;
- task_unlock(current);
- activate_mm(active_mm, mm);
- mm_release();
- if (old_mm) {
- if (active_mm != old_mm) BUG();
- mmput(old_mm);
- return 0;
- }
- mmdrop(active_mm);
- return 0;
- }
- return -ENOMEM;
-}
-
-/*
- * This function makes sure the current process has its own signal table,
- * so that flush_signal_handlers can later reset the handlers without
- * disturbing other processes. (Other processes might share the signal
- * table via the CLONE_SIGNAL option to clone().)
- */
-
-static inline int make_private_signals(void)
-{
- struct signal_struct * newsig;
-
- if (atomic_read(&current->sig->count) <= 1)
- return 0;
- newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
- if (newsig == NULL)
- return -ENOMEM;
- spin_lock_init(&newsig->siglock);
- atomic_set(&newsig->count, 1);
- memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
- spin_lock_irq(&current->sigmask_lock);
- current->sig = newsig;
- spin_unlock_irq(&current->sigmask_lock);
- return 0;
-}
-
-/*
- * If make_private_signals() made a copy of the signal table, decrement the
- * refcount of the original table, and free it if necessary.
- * We don't do that in make_private_signals() so that we can back off
- * in flush_old_exec() if an error occurs after calling make_private_signals().
- */
-
-static inline void release_old_signals(struct signal_struct * oldsig)
-{
- if (current->sig == oldsig)
- return;
- if (atomic_dec_and_test(&oldsig->count))
- kmem_cache_free(sigact_cachep, oldsig);
-}
-
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-
-static inline void flush_old_files(struct files_struct * files)
-{
- long j = -1;
-
- write_lock(&files->file_lock);
- for (;;) {
- unsigned long set, i;
-
- j++;
- i = j * __NFDBITS;
- if (i >= files->max_fds || i >= files->max_fdset)
- break;
- set = files->close_on_exec->fds_bits[j];
- if (!set)
- continue;
- files->close_on_exec->fds_bits[j] = 0;
- write_unlock(&files->file_lock);
- for ( ; set ; i++,set >>= 1) {
- if (set & 1) {
- sys_close(i);
- }
- }
- write_lock(&files->file_lock);
-
- }
- write_unlock(&files->file_lock);
-}
-
-/*
- * An execve() will automatically "de-thread" the process.
- * Note: we don't have to hold the tasklist_lock to test
- * whether we migth need to do this. If we're not part of
- * a thread group, there is no way we can become one
- * dynamically. And if we are, we only need to protect the
- * unlink - even if we race with the last other thread exit,
- * at worst the list_del_init() might end up being a no-op.
- */
-static inline void de_thread(struct task_struct *tsk)
-{
- if (!list_empty(&tsk->thread_group)) {
- write_lock_irq(&tasklist_lock);
- list_del_init(&tsk->thread_group);
- write_unlock_irq(&tasklist_lock);
- }
-
- /* Minor oddity: this might stay the same. */
- tsk->tgid = tsk->pid;
-}
-
-void get_task_comm(char *buf, struct task_struct *tsk)
-{
- /* buf must be at least sizeof(tsk->comm) in size */
- task_lock(tsk);
- memcpy(buf, tsk->comm, sizeof(tsk->comm));
- task_unlock(tsk);
-}
-
-void set_task_comm(struct task_struct *tsk, char *buf)
-{
- task_lock(tsk);
- strncpy(tsk->comm, buf, sizeof(tsk->comm));
- tsk->comm[sizeof(tsk->comm)-1]='\0';
- task_unlock(tsk);
-}
-
-int flush_old_exec(struct linux_binprm * bprm)
-{
- char * name;
- int i, ch, retval;
- struct signal_struct * oldsig;
- struct files_struct * files;
- char tcomm[sizeof(current->comm)];
-
- /*
- * Make sure we have a private signal table
- */
- oldsig = current->sig;
- retval = make_private_signals();
- if (retval) goto flush_failed;
-
- /*
- * Make sure we have private file handles. Ask the
- * fork helper to do the work for us and the exit
- * helper to do the cleanup of the old one.
- */
-
- files = current->files; /* refcounted so safe to hold */
- retval = unshare_files();
- if(retval)
- goto flush_failed;
-
- /*
- * Release all of the old mmap stuff
- */
- retval = exec_mmap();
- if (retval) goto mmap_failed;
-
- /* This is the point of no return */
- steal_locks(files);
- put_files_struct(files);
- release_old_signals(oldsig);
-
- current->sas_ss_sp = current->sas_ss_size = 0;
-
- if (current->euid == current->uid && current->egid == current->gid) {
- current->mm->dumpable = 1;
- current->task_dumpable = 1;
- }
- name = bprm->filename;
- for (i=0; (ch = *(name++)) != '\0';) {
- if (ch == '/')
- i = 0;
- else
- if (i < (sizeof(tcomm) - 1))
- tcomm[i++] = ch;
- }
- tcomm[i] = '\0';
- set_task_comm(current, tcomm);
-
- flush_thread();
-
- de_thread(current);
-
- if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
- permission(bprm->file->f_dentry->d_inode,MAY_READ))
- current->mm->dumpable = 0;
-
- /* An exec changes our domain. We are no longer part of the thread
- group */
-
- current->self_exec_id++;
-
- flush_signal_handlers(current);
- flush_old_files(current->files);
-
- return 0;
-
-mmap_failed:
- put_files_struct(current->files);
- current->files = files;
-flush_failed:
- spin_lock_irq(&current->sigmask_lock);
- if (current->sig != oldsig) {
- kmem_cache_free(sigact_cachep, current->sig);
- current->sig = oldsig;
- }
- spin_unlock_irq(&current->sigmask_lock);
- return retval;
-}
-
-/*
- * We mustn't allow tracing of suid binaries, unless
- * the tracer has the capability to trace anything..
- */
-static inline int must_not_trace_exec(struct task_struct * p)
-{
- return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
-}
-
-/*
- * Fill the binprm structure from the inode.
- * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
- */
-int prepare_binprm(struct linux_binprm *bprm)
-{
- int mode;
- struct inode * inode = bprm->file->f_dentry->d_inode;
-
- mode = inode->i_mode;
- /*
- * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
- * vfs_permission lets a non-executable through
- */
- if (!(mode & 0111)) /* with at least _one_ execute bit set */
- return -EACCES;
- if (bprm->file->f_op == NULL)
- return -EACCES;
-
- bprm->e_uid = current->euid;
- bprm->e_gid = current->egid;
-
- if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
- /* Set-uid? */
- if (mode & S_ISUID)
- bprm->e_uid = inode->i_uid;
-
- /* Set-gid? */
- /*
- * If setgid is set but no group execute bit then this
- * is a candidate for mandatory locking, not a setgid
- * executable.
- */
- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
- bprm->e_gid = inode->i_gid;
- }
-
- /* We don't have VFS support for capabilities yet */
- cap_clear(bprm->cap_inheritable);
- cap_clear(bprm->cap_permitted);
- cap_clear(bprm->cap_effective);
-
- /* To support inheritance of root-permissions and suid-root
- * executables under compatibility mode, we raise all three
- * capability sets for the file.
- *
- * If only the real uid is 0, we only raise the inheritable
- * and permitted sets of the executable file.
- */
-
- if (!issecure(SECURE_NOROOT)) {
- if (bprm->e_uid == 0 || current->uid == 0) {
- cap_set_full(bprm->cap_inheritable);
- cap_set_full(bprm->cap_permitted);
- }
- if (bprm->e_uid == 0)
- cap_set_full(bprm->cap_effective);
- }
-
- memset(bprm->buf,0,BINPRM_BUF_SIZE);
- return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
-}
-
-/*
- * This function is used to produce the new IDs and capabilities
- * from the old ones and the file's capabilities.
- *
- * The formula used for evolving capabilities is:
- *
- * pI' = pI
- * (***) pP' = (fP & X) | (fI & pI)
- * pE' = pP' & fE [NB. fE is 0 or ~0]
- *
- * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
- * ' indicates post-exec(), and X is the global 'cap_bset'.
- *
- */
-
-void compute_creds(struct linux_binprm *bprm)
-{
- kernel_cap_t new_permitted, working;
- int do_unlock = 0;
-
- new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
- working = cap_intersect(bprm->cap_inheritable,
- current->cap_inheritable);
- new_permitted = cap_combine(new_permitted, working);
-
- if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
- !cap_issubset(new_permitted, current->cap_permitted)) {
- current->mm->dumpable = 0;
-
- lock_kernel();
- if (must_not_trace_exec(current)
- || atomic_read(&current->fs->count) > 1
- || atomic_read(&current->files->count) > 1
- || atomic_read(&current->sig->count) > 1) {
- if(!capable(CAP_SETUID)) {
- bprm->e_uid = current->uid;
- bprm->e_gid = current->gid;
- }
- if(!capable(CAP_SETPCAP)) {
- new_permitted = cap_intersect(new_permitted,
- current->cap_permitted);
- }
- }
- do_unlock = 1;
- }
-
-
- /* For init, we want to retain the capabilities set
- * in the init_task struct. Thus we skip the usual
- * capability rules */
- if (current->pid != 1) {
- current->cap_permitted = new_permitted;
- current->cap_effective =
- cap_intersect(new_permitted, bprm->cap_effective);
- }
-
- /* AUD: Audit candidate if current->cap_effective is set */
-
- current->suid = current->euid = current->fsuid = bprm->e_uid;
- current->sgid = current->egid = current->fsgid = bprm->e_gid;
-
- if(do_unlock)
- unlock_kernel();
- current->keep_capabilities = 0;
-}
-
-
-void remove_arg_zero(struct linux_binprm *bprm)
-{
- if (bprm->argc) {
- unsigned long offset;
- char * kaddr;
- struct page *page;
-
- offset = bprm->p % PAGE_SIZE;
- goto inside;
-
- while (bprm->p++, *(kaddr+offset++)) {
- if (offset != PAGE_SIZE)
- continue;
- offset = 0;
- kunmap(page);
-inside:
- page = bprm->page[bprm->p/PAGE_SIZE];
- kaddr = kmap(page);
- }
- kunmap(page);
- bprm->argc--;
- }
-}
-
-/*
- * cycle the list of binary formats handler, until one recognizes the image
- */
-int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
-{
- int try,retval=0;
- struct linux_binfmt *fmt;
-#ifdef __alpha__
- /* handle /sbin/loader.. */
- {
- struct exec * eh = (struct exec *) bprm->buf;
-
- if (!bprm->loader && eh->fh.f_magic == 0x183 &&
- (eh->fh.f_flags & 0x3000) == 0x3000)
- {
- struct file * file;
- unsigned long loader;
-
- allow_write_access(bprm->file);
- fput(bprm->file);
- bprm->file = NULL;
-
- loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-
- file = open_exec("/sbin/loader");
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- return retval;
-
- /* Remember if the application is TASO. */
- bprm->sh_bang = eh->ah.entry < 0x100000000;
-
- bprm->file = file;
- bprm->loader = loader;
- retval = prepare_binprm(bprm);
- if (retval<0)
- return retval;
- /* should call search_binary_handler recursively here,
- but it does not matter */
- }
- }
-#endif
- /* kernel module loader fixup */
- /* so we don't try to load run modprobe in kernel space. */
- set_fs(USER_DS);
- for (try=0; try<2; try++) {
- read_lock(&binfmt_lock);
- for (fmt = formats ; fmt ; fmt = fmt->next) {
- int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
- if (!fn)
- continue;
- if (!try_inc_mod_count(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- retval = fn(bprm, regs);
- if (retval >= 0) {
- put_binfmt(fmt);
- allow_write_access(bprm->file);
- if (bprm->file)
- fput(bprm->file);
- bprm->file = NULL;
- current->did_exec = 1;
- return retval;
- }
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (retval != -ENOEXEC)
- break;
- if (!bprm->file) {
- read_unlock(&binfmt_lock);
- return retval;
- }
- }
- read_unlock(&binfmt_lock);
- if (retval != -ENOEXEC) {
- break;
-#ifdef CONFIG_KMOD
- }else{
-#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
- char modname[20];
- if (printable(bprm->buf[0]) &&
- printable(bprm->buf[1]) &&
- printable(bprm->buf[2]) &&
- printable(bprm->buf[3]))
- break; /* -ENOEXEC */
- sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
- request_module(modname);
-#endif
- }
- }
- return retval;
-}
-
-
-/*
- * sys_execve() executes a new program.
- */
-int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
-{
- struct linux_binprm bprm;
- struct file *file;
- int retval;
- int i;
-
- file = open_exec(filename);
-
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- return retval;
-
- bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
- memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
-
- bprm.file = file;
- bprm.filename = filename;
- bprm.sh_bang = 0;
- bprm.loader = 0;
- bprm.exec = 0;
- if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
- allow_write_access(file);
- fput(file);
- return bprm.argc;
- }
-
- if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
- allow_write_access(file);
- fput(file);
- return bprm.envc;
- }
-
- retval = prepare_binprm(&bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings_kernel(1, &bprm.filename, &bprm);
- if (retval < 0)
- goto out;
-
- bprm.exec = bprm.p;
- retval = copy_strings(bprm.envc, envp, &bprm);
- if (retval < 0)
- goto out;
-
- retval = copy_strings(bprm.argc, argv, &bprm);
- if (retval < 0)
- goto out;
-
- retval = search_binary_handler(&bprm,regs);
- if (retval >= 0)
- /* execve success */
- return retval;
-
-out:
- /* Something went wrong, return the inode and free the argument pages*/
- allow_write_access(bprm.file);
- if (bprm.file)
- fput(bprm.file);
-
- for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
- struct page * page = bprm.page[i];
- if (page)
- __free_page(page);
- }
-
- return retval;
-}
-
-void set_binfmt(struct linux_binfmt *new)
-{
- struct linux_binfmt *old = current->binfmt;
- if (new && new->module)
- __MOD_INC_USE_COUNT(new->module);
- current->binfmt = new;
- if (old && old->module)
- __MOD_DEC_USE_COUNT(old->module);
-}
-
-#define CORENAME_MAX_SIZE 64
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-void format_corename(char *corename, const char *pattern, long signr)
-{
- const char *pat_ptr = pattern;
- char *out_ptr = corename;
- char *const out_end = corename + CORENAME_MAX_SIZE;
- int rc;
- int pid_in_pattern = 0;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (out_ptr == out_end)
- goto out;
- *out_ptr++ = *pat_ptr++;
- } else {
- switch (*++pat_ptr) {
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- if (out_ptr == out_end)
- goto out;
- *out_ptr++ = '%';
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", current->pid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- /* uid */
- case 'u':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", current->uid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- /* gid */
- case 'g':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%d", current->gid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- /* signal that caused the coredump */
- case 's':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%ld", signr);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%ld", tv.tv_sec);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- }
- /* hostname */
- case 'h':
- down_read(&uts_sem);
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", system_utsname.nodename);
- up_read(&uts_sem);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- /* executable */
- case 'e':
- rc = snprintf(out_ptr, out_end - out_ptr,
- "%s", current->comm);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
- }
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename */
- if (!pid_in_pattern
- && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
- rc = snprintf(out_ptr, out_end - out_ptr,
- ".%d", current->pid);
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
- }
- out:
- *out_ptr = 0;
-}
-
-int do_coredump(long signr, struct pt_regs * regs)
-{
- struct linux_binfmt * binfmt;
- char corename[CORENAME_MAX_SIZE + 1];
- struct file * file;
- struct inode * inode;
- int retval = 0;
- int fsuid = current->fsuid;
-
- lock_kernel();
- binfmt = current->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!is_dumpable(current))
- {
- if(!core_setuid_ok || !current->task_dumpable)
- goto fail;
- current->fsuid = 0;
- }
- current->mm->dumpable = 0;
- if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
- goto fail;
-
- format_corename(corename, core_pattern, signr);
- file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
- if (IS_ERR(file))
- goto fail;
- inode = file->f_dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail; /* multiple links - don't dump */
- if (d_unhashed(file->f_dentry))
- goto close_fail;
-
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- if (!file->f_op)
- goto close_fail;
- if (!file->f_op->write)
- goto close_fail;
- if (do_truncate(file->f_dentry, 0) != 0)
- goto close_fail;
-
- retval = binfmt->core_dump(signr, regs, file);
-
-close_fail:
- filp_close(file, NULL);
-fail:
- if (fsuid != current->fsuid)
- current->fsuid = fsuid;
- unlock_kernel();
- return retval;
-}
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/desc.h b/linux-2.4.30-xen-sparse/include/asm-xen/desc.h
index 33309a9671..b59b998d95 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/desc.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/desc.h
@@ -18,11 +18,7 @@ extern struct desc_struct default_ldt[];
static inline void clear_LDT(void)
{
- /*
- * NB. We load the default_ldt for lcall7/27 handling on demand, as
- * it slows down context switching. Noone uses it anyway.
- */
- queue_set_ldt(0, 0);
+ xen_set_ldt(0, 0);
}
static inline void load_LDT(mm_context_t *pc)
@@ -33,7 +29,7 @@ static inline void load_LDT(mm_context_t *pc)
if ( count == 0 )
segments = NULL;
- queue_set_ldt((unsigned long)segments, count);
+ xen_set_ldt((unsigned long)segments, count);
}
#endif /* __ASSEMBLY__ */
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h b/linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h
index bc6e2c2004..255ac4a468 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/fixmap.h
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm-xen/gnttab.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -52,7 +53,8 @@ enum fixed_addresses {
FIX_NETRING2_BASE,
FIX_NETRING3_BASE,
FIX_SHARED_INFO,
- FIX_GNTTAB,
+ FIX_GNTTAB_BEGIN,
+ FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
#ifdef CONFIG_VGA_CONSOLE
#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */
#else
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h b/linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h
index 7972ce7d74..74004c8d46 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/mmu_context.h
@@ -31,44 +31,29 @@ extern pgd_t *cur_pgd;
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
{
+ struct mmuext_op _op[2], *op = _op;
if (prev != next) {
/* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
- cpu_tlbstate[cpu].state = TLBSTATE_OK;
- cpu_tlbstate[cpu].active_mm = next;
-#endif
-
/* Re-load page tables */
cur_pgd = next->pgd;
- queue_pt_switch(__pa(cur_pgd));
- /* load_LDT, if either the previous or next thread
- * has a non-default LDT.
- */
- if (next->context.size+prev->context.size)
- load_LDT(&next->context);
- }
-#ifdef CONFIG_SMP
- else {
- cpu_tlbstate[cpu].state = TLBSTATE_OK;
- if(cpu_tlbstate[cpu].active_mm != next)
- out_of_line_bug();
- if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
- /* We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload %cr3.
- */
- cur_pgd = next->pgd;
- queue_pt_switch(__pa(cur_pgd));
- load_LDT(next);
+ op->cmd = MMUEXT_NEW_BASEPTR;
+ op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+ op++;
+ /* load_LDT, if either the previous or next thread
+ * has a non-default LDT.
+ */
+ if (next->context.size+prev->context.size) {
+ op->cmd = MMUEXT_SET_LDT;
+ op->linear_addr = (unsigned long)next->context.ldt;
+ op->nr_ents = next->context.size;
+ op++;
}
+ BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
}
-#endif
}
-#define activate_mm(prev, next) \
-do { \
- switch_mm((prev),(next),NULL,smp_processor_id()); \
- flush_page_update_queue(); \
-} while ( 0 )
+#define activate_mm(prev, next) \
+ switch_mm((prev),(next),NULL,smp_processor_id())
#endif
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/msr.h b/linux-2.4.30-xen-sparse/include/asm-xen/msr.h
deleted file mode 100644
index 1a2c8765a8..0000000000
--- a/linux-2.4.30-xen-sparse/include/asm-xen/msr.h
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef __ASM_MSR_H
-#define __ASM_MSR_H
-
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(msr,val1,val2) \
-{ \
- dom0_op_t op; \
- op.cmd = DOM0_MSR; \
- op.u.msr.write = 0; \
- op.u.msr.msr = msr; \
- op.u.msr.cpu_mask = (1 << current->processor); \
- HYPERVISOR_dom0_op(&op); \
- val1 = op.u.msr.out1; \
- val2 = op.u.msr.out2; \
-}
-
-#define wrmsr(msr,val1,val2) \
-{ \
- dom0_op_t op; \
- op.cmd = DOM0_MSR; \
- op.u.msr.write = 1; \
- op.u.msr.cpu_mask = (1 << current->processor); \
- op.u.msr.msr = msr; \
- op.u.msr.in1 = val1; \
- op.u.msr.in2 = val2; \
- HYPERVISOR_dom0_op(&op); \
-}
-
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
- __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscll(val) \
- __asm__ __volatile__("rdtsc" : "=A" (val))
-
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-/* symbolic names for some interesting MSRs */
-/* Intel defined MSRs. */
-#define MSR_IA32_P5_MC_ADDR 0
-#define MSR_IA32_P5_MC_TYPE 1
-#define MSR_IA32_PLATFORM_ID 0x17
-#define MSR_IA32_EBL_CR_POWERON 0x2a
-
-#define MSR_IA32_APICBASE 0x1b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
-#define MSR_IA32_UCODE_WRITE 0x79
-#define MSR_IA32_UCODE_REV 0x8b
-
-#define MSR_IA32_BBL_CR_CTL 0x119
-
-#define MSR_IA32_MCG_CAP 0x179
-#define MSR_IA32_MCG_STATUS 0x17a
-#define MSR_IA32_MCG_CTL 0x17b
-
-#define MSR_IA32_THERM_CONTROL 0x19a
-#define MSR_IA32_THERM_INTERRUPT 0x19b
-#define MSR_IA32_THERM_STATUS 0x19c
-#define MSR_IA32_MISC_ENABLE 0x1a0
-
-#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
-
-#define MSR_IA32_MC0_CTL 0x400
-#define MSR_IA32_MC0_STATUS 0x401
-#define MSR_IA32_MC0_ADDR 0x402
-#define MSR_IA32_MC0_MISC 0x403
-
-#define MSR_P6_PERFCTR0 0xc1
-#define MSR_P6_PERFCTR1 0xc2
-#define MSR_P6_EVNTSEL0 0x186
-#define MSR_P6_EVNTSEL1 0x187
-
-#define MSR_IA32_PERF_STATUS 0x198
-#define MSR_IA32_PERF_CTL 0x199
-
-/* AMD Defined MSRs */
-#define MSR_K6_EFER 0xC0000080
-#define MSR_K6_STAR 0xC0000081
-#define MSR_K6_WHCR 0xC0000082
-#define MSR_K6_UWCCR 0xC0000085
-#define MSR_K6_EPMR 0xC0000086
-#define MSR_K6_PSOR 0xC0000087
-#define MSR_K6_PFIR 0xC0000088
-
-#define MSR_K7_EVNTSEL0 0xC0010000
-#define MSR_K7_PERFCTR0 0xC0010004
-#define MSR_K7_HWCR 0xC0010015
-#define MSR_K7_CLK_CTL 0xC001001b
-#define MSR_K7_FID_VID_CTL 0xC0010041
-#define MSR_K7_VID_STATUS 0xC0010042
-
-/* Centaur-Hauls/IDT defined MSRs. */
-#define MSR_IDT_FCR1 0x107
-#define MSR_IDT_FCR2 0x108
-#define MSR_IDT_FCR3 0x109
-#define MSR_IDT_FCR4 0x10a
-
-#define MSR_IDT_MCR0 0x110
-#define MSR_IDT_MCR1 0x111
-#define MSR_IDT_MCR2 0x112
-#define MSR_IDT_MCR3 0x113
-#define MSR_IDT_MCR4 0x114
-#define MSR_IDT_MCR5 0x115
-#define MSR_IDT_MCR6 0x116
-#define MSR_IDT_MCR7 0x117
-#define MSR_IDT_MCR_CTRL 0x120
-
-/* VIA Cyrix defined MSRs*/
-#define MSR_VIA_FCR 0x1107
-#define MSR_VIA_LONGHAUL 0x110a
-#define MSR_VIA_BCR2 0x1147
-
-/* Transmeta defined MSRs */
-#define MSR_TMTA_LONGRUN_CTRL 0x80868010
-#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
-#define MSR_TMTA_LRTI_READOUT 0x80868018
-#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
-
-#endif /* __ASM_MSR_H */
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/page.h b/linux-2.4.30-xen-sparse/include/asm-xen/page.h
index ca73ccfc31..901d9acfc5 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/page.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/page.h
@@ -43,9 +43,9 @@
#define copy_user_page(to, from, vaddr) copy_page(to, from)
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
-extern unsigned long *phys_to_machine_mapping;
-#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
-#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)])
+extern unsigned int *phys_to_machine_mapping;
+#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)]))
+#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)]))
static inline unsigned long phys_to_machine(unsigned long phys)
{
unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
@@ -86,22 +86,18 @@ typedef struct { unsigned long pgprot; } pgprot_t;
static inline unsigned long pmd_val(pmd_t x)
{
unsigned long ret = x.pmd;
- if ( (ret & 1) ) ret = machine_to_phys(ret);
+ if ( ret ) ret = machine_to_phys(ret) | 1;
return ret;
}
+#define pmd_val_ma(x) ((x).pmd)
#define pgd_val(x) ({ BUG(); (unsigned long)0; })
#define pgprot_val(x) ((x).pgprot)
-static inline pte_t __pte(unsigned long x)
-{
- if ( (x & 1) ) x = phys_to_machine(x);
- return ((pte_t) { (x) });
-}
-static inline pmd_t __pmd(unsigned long x)
-{
- if ( (x & 1) ) x = phys_to_machine(x);
- return ((pmd_t) { (x) });
-}
+#define __pte(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pte_ma(x) ((pte_t) { (x) } )
+#define __pmd(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; })
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h b/linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h
index f6bee4d689..3f8f388774 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/pgalloc.h
@@ -22,7 +22,6 @@
#define pmd_populate(mm, pmd, pte) \
do { \
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
- XEN_flush_page_update_queue(); \
} while ( 0 )
/*
@@ -79,8 +78,8 @@ static inline pgd_t *get_pgd_slow(void)
memcpy(pgd + USER_PTRS_PER_PGD,
init_mm.pgd + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- __make_page_readonly(pgd);
- queue_pgd_pin(__pa(pgd));
+ __make_page_readonly(pgd);
+ xen_pgd_pin(__pa(pgd));
}
return pgd;
}
@@ -110,8 +109,8 @@ static inline void free_pgd_slow(pgd_t *pgd)
free_page((unsigned long)__va(pgd_val(pgd[i])-1));
kmem_cache_free(pae_pgd_cachep, pgd);
#else
- queue_pgd_unpin(__pa(pgd));
- __make_page_writable(pgd);
+ xen_pgd_unpin(__pa(pgd));
+ __make_page_writable(pgd);
free_page((unsigned long)pgd);
#endif
}
@@ -134,7 +133,7 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
clear_page(pte);
__make_page_readonly(pte);
- queue_pte_pin(__pa(pte));
+ xen_pte_pin(__pa(pte));
}
return pte;
@@ -153,7 +152,7 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
static __inline__ void pte_free_slow(pte_t *pte)
{
- queue_pte_unpin(__pa(pte));
+ xen_pte_unpin(__pa(pte));
__make_page_writable(pte);
free_page((unsigned long)pte);
}
@@ -208,26 +207,23 @@ extern int do_check_pgt_cache(int, int);
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- if (mm == current->active_mm) queue_tlb_flush();
- XEN_flush_page_update_queue();
+ if (mm == current->active_mm) xen_tlb_flush();
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr)
{
- if (vma->vm_mm == current->active_mm) queue_invlpg(addr);
- XEN_flush_page_update_queue();
+ if (vma->vm_mm == current->active_mm) xen_invlpg(addr);
}
static inline void flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- if (mm == current->active_mm) queue_tlb_flush();
- XEN_flush_page_update_queue();
+ if (mm == current->active_mm) xen_tlb_flush();
}
#else
-#error no guestos SMP support yet...
+#error no kernel SMP support yet...
#include <asm/smp.h>
#define local_flush_tlb() \
@@ -261,7 +257,6 @@ static inline void flush_tlb_pgtables(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
/* i386 does not keep any page table caches in TLB */
- XEN_flush_page_update_queue();
}
/*
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h
index 750ebfeae0..70f8356fb1 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/pgtable-2level.h
@@ -34,9 +34,19 @@ static inline int pgd_bad(pgd_t pgd) { return 0; }
static inline int pgd_present(pgd_t pgd) { return 1; }
#define pgd_clear(xp) do { } while (0)
-#define set_pte(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low)
-#define set_pte_atomic(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low)
-#define set_pmd(pmdptr, pmdval) queue_l2_entry_update((pmdptr), (pmdval))
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
+#define set_pte_atomic(pteptr, pteval) (*(pteptr) = pteval)
+
+/*
+ * (pmds are folded into pgds so this doesnt get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
#define set_pgd(pgdptr, pgdval) ((void)0)
#define pgd_page(pgd) \
@@ -47,6 +57,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
return (pmd_t *) dir;
}
+#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
/*
@@ -69,7 +80,7 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* require. In all the cases we care about, the high bit gets shifted out
* (e.g., phys_to_machine()) so behaviour there is correct.
*/
-#define INVALID_P2M_ENTRY (~0UL)
+#define INVALID_P2M_ENTRY (~0U)
#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_page(_pte) \
({ \
@@ -83,21 +94,4 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
#define pte_none(x) (!(x).pte_low)
#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
-/*
- * A note on implementation of this atomic 'get-and-clear' operation.
- * This is actually very simple because XenoLinux can only run on a single
- * processor. Therefore, we cannot race other processors setting the 'accessed'
- * or 'dirty' bits on a page-table entry.
- * Even if pages are shared between domains, that is not a problem because
- * each domain will have separate page tables, with their own versions of
- * accessed & dirty state.
- */
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
- pte_t pte = *xp;
- if ( !pte_none(pte) )
- queue_l1_entry_update(xp, 0);
- return pte;
-}
-
#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h b/linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h
index c15f0e9509..19947a9aae 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/pgtable.h
@@ -38,11 +38,11 @@ extern void paging_init(void);
extern unsigned long pgkern_mask;
-#define __flush_tlb() ({ queue_tlb_flush(); XEN_flush_page_update_queue(); })
+#define __flush_tlb() xen_tlb_flush()
#define __flush_tlb_global() __flush_tlb()
#define __flush_tlb_all() __flush_tlb_global()
-#define __flush_tlb_one(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); })
-#define __flush_tlb_single(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); })
+#define __flush_tlb_one(addr) xen_invlpg(addr)
+#define __flush_tlb_single(addr) xen_invlpg(addr)
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -179,12 +179,14 @@ extern void * high_memory;
#define __S111 PAGE_SHARED
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
-#define pte_clear(xp) queue_l1_entry_update(xp, 0)
+#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
-#define pmd_none(x) (!(x).pmd)
-#define pmd_present(x) ((x).pmd & _PAGE_PRESENT)
+#define pmd_none(x) (!pmd_val(x))
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+ can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
@@ -212,29 +214,28 @@ static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return p
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
- unsigned long pteval = *(unsigned long *)ptep;
- int ret = pteval & _PAGE_DIRTY;
- if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_DIRTY);
- return ret;
+ if (!pte_dirty(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
-static inline int ptep_test_and_clear_young(pte_t *ptep)
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
{
- unsigned long pteval = *(unsigned long *)ptep;
- int ret = pteval & _PAGE_ACCESSED;
- if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_ACCESSED);
- return ret;
+ if (!pte_young(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
}
+
static inline void ptep_set_wrprotect(pte_t *ptep)
{
- unsigned long pteval = *(unsigned long *)ptep;
- if ( (pteval & _PAGE_RW) )
- queue_l1_entry_update(ptep, pteval & ~_PAGE_RW);
+ if (pte_write(*ptep))
+ clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
}
+
static inline void ptep_mkdirty(pte_t *ptep)
{
- unsigned long pteval = *(unsigned long *)ptep;
- if ( !(pteval & _PAGE_DIRTY) )
- queue_l1_entry_update(ptep, pteval | _PAGE_DIRTY);
+ if (!pte_dirty(*ptep))
+ set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
/*
@@ -299,7 +300,7 @@ static inline void __make_page_readonly(void *va)
pgd_t *pgd = pgd_offset_k((unsigned long)va);
pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
pte_t *pte = pte_offset(pmd, (unsigned long)va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ set_pte(pte, pte_wrprotect(*pte));
}
static inline void __make_page_writable(void *va)
@@ -307,7 +308,7 @@ static inline void __make_page_writable(void *va)
pgd_t *pgd = pgd_offset_k((unsigned long)va);
pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
pte_t *pte = pte_offset(pmd, (unsigned long)va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ set_pte(pte, pte_mkwrite(*pte));
}
static inline void make_page_readonly(void *va)
@@ -315,7 +316,7 @@ static inline void make_page_readonly(void *va)
pgd_t *pgd = pgd_offset_k((unsigned long)va);
pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
pte_t *pte = pte_offset(pmd, (unsigned long)va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ set_pte(pte, pte_wrprotect(*pte));
if ( (unsigned long)va >= VMALLOC_START )
__make_page_readonly(machine_to_virt(
*(unsigned long *)pte&PAGE_MASK));
@@ -326,7 +327,7 @@ static inline void make_page_writable(void *va)
pgd_t *pgd = pgd_offset_k((unsigned long)va);
pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
pte_t *pte = pte_offset(pmd, (unsigned long)va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ set_pte(pte, pte_mkwrite(*pte));
if ( (unsigned long)va >= VMALLOC_START )
__make_page_writable(machine_to_virt(
*(unsigned long *)pte&PAGE_MASK));
diff --git a/linux-2.4.30-xen-sparse/include/asm-xen/system.h b/linux-2.4.30-xen-sparse/include/asm-xen/system.h
index fda33efd3a..f694674233 100644
--- a/linux-2.4.30-xen-sparse/include/asm-xen/system.h
+++ b/linux-2.4.30-xen-sparse/include/asm-xen/system.h
@@ -109,7 +109,7 @@ static inline unsigned long _get_base(char * addr)
/* NB. 'clts' is done for us by Xen during virtual trap. */
#define clts() ((void)0)
-#define stts() (HYPERVISOR_fpu_taskswitch())
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
#endif /* __KERNEL__ */
diff --git a/linux-2.4.30-xen-sparse/mkbuildtree b/linux-2.4.30-xen-sparse/mkbuildtree
index 7e8177802c..714d85e69f 100755
--- a/linux-2.4.30-xen-sparse/mkbuildtree
+++ b/linux-2.4.30-xen-sparse/mkbuildtree
@@ -163,6 +163,7 @@ ln -sf ../asm-i386/mmu.h
ln -sf ../asm-i386/mmx.h
ln -sf ../asm-i386/mpspec.h
ln -sf ../asm-i386/msgbuf.h
+ln -sf ../asm-i386/msr.h
ln -sf ../asm-i386/mtrr.h
ln -sf ../asm-i386/namei.h
ln -sf ../asm-i386/param.h
@@ -207,10 +208,11 @@ ln -sf ../asm-i386/vm86.h
ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h
ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h
ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h
+ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
ln -sf ../../${LINUX_26}/include/asm-xen/hypervisor.h
-ln -sf ../../${LINUX_26}/include/asm-xen/multicall.h
ln -sf ../../${LINUX_26}/include/asm-xen/xen_proc.h
ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/synch_bitops.h
+ln -sf ../../${LINUX_26}/include/asm-xen/asm-i386/hypercall.h
mkdir -p linux-public && cd linux-public
ln -sf ../../../${LINUX_26}/include/asm-xen/linux-public/privcmd.h
@@ -227,10 +229,9 @@ ln -sf ../../i386/kernel/sys_i386.c
ln -sf ../../../${LINUX_26}/arch/xen/kernel/ctrl_if.c
ln -sf ../../../${LINUX_26}/arch/xen/kernel/evtchn.c
ln -sf ../../../${LINUX_26}/arch/xen/kernel/fixup.c
+ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c
ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c
ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c
-ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/ioport.c
-ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c
cd ${AD}/arch/xen/lib
ln -sf ../../i386/lib/checksum.S
@@ -280,4 +281,12 @@ ln -sf ../../../../../${LINUX_26}/drivers/xen/blkback/vbd.c
cd ${AD}/arch/xen/drivers/blkif/frontend
ln -sf ../../../../../${LINUX_26}/drivers/xen/blkfront/blkfront.c
+cd ${AD}/arch/xen/drivers/usbif/frontend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/usbfront.c main.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbfront/xhci.h
+cd ${AD}/arch/xen/drivers/usbif/backend
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/common.h
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/control.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/interface.c
+ln -sf ../../../../../${LINUX_26}/drivers/xen/usbback/usbback.c main.c
diff --git a/linux-2.4.30-xen-sparse/mm/highmem.c b/linux-2.4.30-xen-sparse/mm/highmem.c
index 341e6e29a9..f8182820ac 100644
--- a/linux-2.4.30-xen-sparse/mm/highmem.c
+++ b/linux-2.4.30-xen-sparse/mm/highmem.c
@@ -122,7 +122,6 @@ start:
}
vaddr = PKMAP_ADDR(last_pkmap_nr);
set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
- XEN_flush_page_update_queue();
pkmap_count[last_pkmap_nr] = 1;
page->virtual = (void *) vaddr;
diff --git a/linux-2.4.30-xen-sparse/mm/memory.c b/linux-2.4.30-xen-sparse/mm/memory.c
index 8f61e6657a..6b7c807fd3 100644
--- a/linux-2.4.30-xen-sparse/mm/memory.c
+++ b/linux-2.4.30-xen-sparse/mm/memory.c
@@ -153,7 +153,6 @@ void clear_page_tables(struct mm_struct *mm, unsigned long first, int nr)
free_one_pgd(page_dir);
page_dir++;
} while (--nr);
- XEN_flush_page_update_queue();
spin_unlock(&mm->page_table_lock);
/* keep the page table cache within bounds */
@@ -249,10 +248,8 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
/* If it's a COW mapping, write protect it both in the parent and the child */
if (cow && pte_write(pte)) {
- /* XEN modification: modified ordering here to avoid RaW hazard. */
- pte = *src_pte;
- pte = pte_wrprotect(pte);
ptep_set_wrprotect(src_pte);
+ pte = *src_pte;
}
/* If it's a shared mapping, mark it clean in the child */
@@ -916,8 +913,7 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr
{
#ifdef CONFIG_XEN
if ( likely(vma->vm_mm == current->mm) ) {
- XEN_flush_page_update_queue();
- HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG);
+ HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL);
} else {
set_pte(page_table, entry);
flush_tlb_page(vma, address);
@@ -1191,13 +1187,10 @@ static int do_swap_page(struct mm_struct * mm,
flush_page_to_ram(page);
flush_icache_page(vma, page);
#ifdef CONFIG_XEN
- if ( likely(vma->vm_mm == current->mm) ) {
- XEN_flush_page_update_queue();
- HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0);
- } else {
+ if ( likely(vma->vm_mm == current->mm) )
+ HYPERVISOR_update_va_mapping(address, pte, 0);
+ else
set_pte(page_table, pte);
- XEN_flush_page_update_queue();
- }
#else
set_pte(page_table, pte);
#endif
@@ -1247,13 +1240,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
}
#ifdef CONFIG_XEN
- if ( likely(vma->vm_mm == current->mm) ) {
- XEN_flush_page_update_queue();
- HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0);
- } else {
+ if ( likely(vma->vm_mm == current->mm) )
+ HYPERVISOR_update_va_mapping(addr, entry, 0);
+ else
set_pte(page_table, entry);
- XEN_flush_page_update_queue();
- }
#else
set_pte(page_table, entry);
#endif
@@ -1333,13 +1323,10 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
if (write_access)
entry = pte_mkwrite(pte_mkdirty(entry));
#ifdef CONFIG_XEN
- if ( likely(vma->vm_mm == current->mm) ) {
- XEN_flush_page_update_queue();
- HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0);
- } else {
+ if ( likely(vma->vm_mm == current->mm) )
+ HYPERVISOR_update_va_mapping(address, entry, 0);
+ else
set_pte(page_table, entry);
- XEN_flush_page_update_queue();
- }
#else
set_pte(page_table, entry);
#endif
@@ -1486,7 +1473,6 @@ pte_t fastcall *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long addres
/* "fast" allocation can happen without dropping the lock.. */
new = pte_alloc_one_fast(mm, address);
if (!new) {
- XEN_flush_page_update_queue();
spin_unlock(&mm->page_table_lock);
new = pte_alloc_one(mm, address);
spin_lock(&mm->page_table_lock);
diff --git a/linux-2.4.30-xen-sparse/mm/mremap.c b/linux-2.4.30-xen-sparse/mm/mremap.c
index 330e194bae..475c308b1b 100644
--- a/linux-2.4.30-xen-sparse/mm/mremap.c
+++ b/linux-2.4.30-xen-sparse/mm/mremap.c
@@ -119,11 +119,9 @@ static int move_page_tables(struct mm_struct * mm,
* the old page tables)
*/
oops_we_failed:
- XEN_flush_page_update_queue();
flush_cache_range(mm, new_addr, new_addr + len);
while ((offset += PAGE_SIZE) < len)
move_one_page(mm, new_addr + offset, old_addr + offset);
- XEN_flush_page_update_queue();
zap_page_range(mm, new_addr, len);
return -1;
}
diff --git a/linux-2.4.30-xen-sparse/mm/swapfile.c b/linux-2.4.30-xen-sparse/mm/swapfile.c
deleted file mode 100644
index 26d4aa4f67..0000000000
--- a/linux-2.4.30-xen-sparse/mm/swapfile.c
+++ /dev/null
@@ -1,1269 +0,0 @@
-/*
- * linux/mm/swapfile.c
- *
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- * Swap reorganised 29.12.95, Stephen Tweedie
- */
-
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/swap.h>
-#include <linux/swapctl.h>
-#include <linux/blkdev.h> /* for blk_size */
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/shm.h>
-
-#include <asm/pgtable.h>
-
-spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
-unsigned int nr_swapfiles;
-int total_swap_pages;
-static int swap_overflow;
-
-static const char Bad_file[] = "Bad swap file entry ";
-static const char Unused_file[] = "Unused swap file entry ";
-static const char Bad_offset[] = "Bad swap offset entry ";
-static const char Unused_offset[] = "Unused swap offset entry ";
-
-struct swap_list_t swap_list = {-1, -1};
-
-struct swap_info_struct swap_info[MAX_SWAPFILES];
-
-#define SWAPFILE_CLUSTER 256
-
-static inline int scan_swap_map(struct swap_info_struct *si)
-{
- unsigned long offset;
- /*
- * We try to cluster swap pages by allocating them
- * sequentially in swap. Once we've allocated
- * SWAPFILE_CLUSTER pages this way, however, we resort to
- * first-free allocation, starting a new cluster. This
- * prevents us from scattering swap pages all over the entire
- * swap partition, so that we reduce overall disk seek times
- * between swap pages. -- sct */
- if (si->cluster_nr) {
- while (si->cluster_next <= si->highest_bit) {
- offset = si->cluster_next++;
- if (si->swap_map[offset])
- continue;
- si->cluster_nr--;
- goto got_page;
- }
- }
- si->cluster_nr = SWAPFILE_CLUSTER;
-
- /* try to find an empty (even not aligned) cluster. */
- offset = si->lowest_bit;
- check_next_cluster:
- if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
- {
- int nr;
- for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
- if (si->swap_map[nr])
- {
- offset = nr+1;
- goto check_next_cluster;
- }
- /* We found a completly empty cluster, so start
- * using it.
- */
- goto got_page;
- }
- /* No luck, so now go finegrined as usual. -Andrea */
- for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
- if (si->swap_map[offset])
- continue;
- si->lowest_bit = offset+1;
- got_page:
- if (offset == si->lowest_bit)
- si->lowest_bit++;
- if (offset == si->highest_bit)
- si->highest_bit--;
- if (si->lowest_bit > si->highest_bit) {
- si->lowest_bit = si->max;
- si->highest_bit = 0;
- }
- si->swap_map[offset] = 1;
- nr_swap_pages--;
- si->cluster_next = offset+1;
- return offset;
- }
- si->lowest_bit = si->max;
- si->highest_bit = 0;
- return 0;
-}
-
-swp_entry_t get_swap_page(void)
-{
- struct swap_info_struct * p;
- unsigned long offset;
- swp_entry_t entry;
- int type, wrapped = 0;
-
- entry.val = 0; /* Out of memory */
- swap_list_lock();
- type = swap_list.next;
- if (type < 0)
- goto out;
- if (nr_swap_pages <= 0)
- goto out;
-
- while (1) {
- p = &swap_info[type];
- if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
- swap_device_lock(p);
- offset = scan_swap_map(p);
- swap_device_unlock(p);
- if (offset) {
- entry = SWP_ENTRY(type,offset);
- type = swap_info[type].next;
- if (type < 0 ||
- p->prio != swap_info[type].prio) {
- swap_list.next = swap_list.head;
- } else {
- swap_list.next = type;
- }
- goto out;
- }
- }
- type = p->next;
- if (!wrapped) {
- if (type < 0 || p->prio != swap_info[type].prio) {
- type = swap_list.head;
- wrapped = 1;
- }
- } else
- if (type < 0)
- goto out; /* out of swap space */
- }
-out:
- swap_list_unlock();
- return entry;
-}
-
-static struct swap_info_struct * swap_info_get(swp_entry_t entry)
-{
- struct swap_info_struct * p;
- unsigned long offset, type;
-
- if (!entry.val)
- goto out;
- type = SWP_TYPE(entry);
- if (type >= nr_swapfiles)
- goto bad_nofile;
- p = & swap_info[type];
- if (!(p->flags & SWP_USED))
- goto bad_device;
- offset = SWP_OFFSET(entry);
- if (offset >= p->max)
- goto bad_offset;
- if (!p->swap_map[offset])
- goto bad_free;
- swap_list_lock();
- if (p->prio > swap_info[swap_list.next].prio)
- swap_list.next = type;
- swap_device_lock(p);
- return p;
-
-bad_free:
- printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
- goto out;
-bad_offset:
- printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
- goto out;
-bad_device:
- printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
- goto out;
-bad_nofile:
- printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
-out:
- return NULL;
-}
-
-static void swap_info_put(struct swap_info_struct * p)
-{
- swap_device_unlock(p);
- swap_list_unlock();
-}
-
-static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
-{
- int count = p->swap_map[offset];
-
- if (count < SWAP_MAP_MAX) {
- count--;
- p->swap_map[offset] = count;
- if (!count) {
- if (offset < p->lowest_bit)
- p->lowest_bit = offset;
- if (offset > p->highest_bit)
- p->highest_bit = offset;
- nr_swap_pages++;
- }
- }
- return count;
-}
-
-/*
- * Caller has made sure that the swapdevice corresponding to entry
- * is still around or has not been recycled.
- */
-void swap_free(swp_entry_t entry)
-{
- struct swap_info_struct * p;
-
- p = swap_info_get(entry);
- if (p) {
- swap_entry_free(p, SWP_OFFSET(entry));
- swap_info_put(p);
- }
-}
-
-/*
- * Check if we're the only user of a swap page,
- * when the page is locked.
- */
-static int exclusive_swap_page(struct page *page)
-{
- int retval = 0;
- struct swap_info_struct * p;
- swp_entry_t entry;
-
- entry.val = page->index;
- p = swap_info_get(entry);
- if (p) {
- /* Is the only swap cache user the cache itself? */
- if (p->swap_map[SWP_OFFSET(entry)] == 1) {
- /* Recheck the page count with the pagecache lock held.. */
- spin_lock(&pagecache_lock);
- if (page_count(page) - !!page->buffers == 2)
- retval = 1;
- spin_unlock(&pagecache_lock);
- }
- swap_info_put(p);
- }
- return retval;
-}
-
-/*
- * We can use this swap cache entry directly
- * if there are no other references to it.
- *
- * Here "exclusive_swap_page()" does the real
- * work, but we opportunistically check whether
- * we need to get all the locks first..
- */
-int fastcall can_share_swap_page(struct page *page)
-{
- int retval = 0;
-
- if (!PageLocked(page))
- BUG();
- switch (page_count(page)) {
- case 3:
- if (!page->buffers)
- break;
- /* Fallthrough */
- case 2:
- if (!PageSwapCache(page))
- break;
- retval = exclusive_swap_page(page);
- break;
- case 1:
- if (PageReserved(page))
- break;
- retval = 1;
- }
- return retval;
-}
-
-/*
- * Work out if there are any other processes sharing this
- * swap cache page. Free it if you can. Return success.
- */
-int fastcall remove_exclusive_swap_page(struct page *page)
-{
- int retval;
- struct swap_info_struct * p;
- swp_entry_t entry;
-
- if (!PageLocked(page))
- BUG();
- if (!PageSwapCache(page))
- return 0;
- if (page_count(page) - !!page->buffers != 2) /* 2: us + cache */
- return 0;
-
- entry.val = page->index;
- p = swap_info_get(entry);
- if (!p)
- return 0;
-
- /* Is the only swap cache user the cache itself? */
- retval = 0;
- if (p->swap_map[SWP_OFFSET(entry)] == 1) {
- /* Recheck the page count with the pagecache lock held.. */
- spin_lock(&pagecache_lock);
- if (page_count(page) - !!page->buffers == 2) {
- __delete_from_swap_cache(page);
- SetPageDirty(page);
- retval = 1;
- }
- spin_unlock(&pagecache_lock);
- }
- swap_info_put(p);
-
- if (retval) {
- block_flushpage(page, 0);
- swap_free(entry);
- page_cache_release(page);
- }
-
- return retval;
-}
-
-/*
- * Free the swap entry like above, but also try to
- * free the page cache entry if it is the last user.
- */
-void free_swap_and_cache(swp_entry_t entry)
-{
- struct swap_info_struct * p;
- struct page *page = NULL;
-
- p = swap_info_get(entry);
- if (p) {
- if (swap_entry_free(p, SWP_OFFSET(entry)) == 1)
- page = find_trylock_page(&swapper_space, entry.val);
- swap_info_put(p);
- }
- if (page) {
- page_cache_get(page);
- /* Only cache user (+us), or swap space full? Free it! */
- if (page_count(page) - !!page->buffers == 2 || vm_swap_full()) {
- delete_from_swap_cache(page);
- SetPageDirty(page);
- }
- UnlockPage(page);
- page_cache_release(page);
- }
-}
-
-/*
- * The swap entry has been read in advance, and we return 1 to indicate
- * that the page has been used or is no longer needed.
- *
- * Always set the resulting pte to be nowrite (the same as COW pages
- * after one process has exited). We don't know just how many PTEs will
- * share this swap entry, so be cautious and let do_wp_page work out
- * what to do if a write is requested later.
- */
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
- pte_t *dir, swp_entry_t entry, struct page* page)
-{
- pte_t pte = *dir;
-
- if (likely(pte_to_swp_entry(pte).val != entry.val))
- return;
- if (unlikely(pte_none(pte) || pte_present(pte)))
- return;
- get_page(page);
- set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
- swap_free(entry);
- ++vma->vm_mm->rss;
-}
-
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
- unsigned long address, unsigned long size, unsigned long offset,
- swp_entry_t entry, struct page* page)
-{
- pte_t * pte;
- unsigned long end;
-
- if (pmd_none(*dir))
- return;
- if (pmd_bad(*dir)) {
- pmd_ERROR(*dir);
- pmd_clear(dir);
- return;
- }
- pte = pte_offset(dir, address);
- offset += address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- do {
- unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
- unsigned long address, unsigned long size,
- swp_entry_t entry, struct page* page)
-{
- pmd_t * pmd;
- unsigned long offset, end;
-
- if (pgd_none(*dir))
- return;
- if (pgd_bad(*dir)) {
- pgd_ERROR(*dir);
- pgd_clear(dir);
- return;
- }
- pmd = pmd_offset(dir, address);
- offset = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- if (address >= end)
- BUG();
- do {
- unuse_pmd(vma, pmd, address, end - address, offset, entry,
- page);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
-}
-
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
-static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
- swp_entry_t entry, struct page* page)
-{
- unsigned long start = vma->vm_start, end = vma->vm_end;
-
- if (start >= end)
- BUG();
- do {
- unuse_pgd(vma, pgdir, start, end - start, entry, page);
- start = (start + PGDIR_SIZE) & PGDIR_MASK;
- pgdir++;
- } while (start && (start < end));
-}
-
-static void unuse_process(struct mm_struct * mm,
- swp_entry_t entry, struct page* page)
-{
- struct vm_area_struct* vma;
-
- /*
- * Go through process' page directory.
- */
- spin_lock(&mm->page_table_lock);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- pgd_t * pgd = pgd_offset(mm, vma->vm_start);
- unuse_vma(vma, pgd, entry, page);
- }
- XEN_flush_page_update_queue();
- spin_unlock(&mm->page_table_lock);
- return;
-}
-
-/*
- * Scan swap_map from current position to next entry still in use.
- * Recycle to start on reaching the end, returning 0 when empty.
- */
-static int find_next_to_unuse(struct swap_info_struct *si, int prev)
-{
- int max = si->max;
- int i = prev;
- int count;
-
- /*
- * No need for swap_device_lock(si) here: we're just looking
- * for whether an entry is in use, not modifying it; false
- * hits are okay, and sys_swapoff() has already prevented new
- * allocations from this area (while holding swap_list_lock()).
- */
- for (;;) {
- if (++i >= max) {
- if (!prev) {
- i = 0;
- break;
- }
- /*
- * No entries in use at top of swap_map,
- * loop back to start and recheck there.
- */
- max = prev + 1;
- prev = 0;
- i = 1;
- }
- count = si->swap_map[i];
- if (count && count != SWAP_MAP_BAD)
- break;
- }
- return i;
-}
-
-/*
- * We completely avoid races by reading each swap page in advance,
- * and then search for the process using it. All the necessary
- * page table adjustments can then be made atomically.
- */
-static int try_to_unuse(unsigned int type)
-{
- struct swap_info_struct * si = &swap_info[type];
- struct mm_struct *start_mm;
- unsigned short *swap_map;
- unsigned short swcount;
- struct page *page;
- swp_entry_t entry;
- int i = 0;
- int retval = 0;
- int reset_overflow = 0;
- int shmem;
-
- /*
- * When searching mms for an entry, a good strategy is to
- * start at the first mm we freed the previous entry from
- * (though actually we don't notice whether we or coincidence
- * freed the entry). Initialize this start_mm with a hold.
- *
- * A simpler strategy would be to start at the last mm we
- * freed the previous entry from; but that would take less
- * advantage of mmlist ordering (now preserved by swap_out()),
- * which clusters forked address spaces together, most recent
- * child immediately after parent. If we race with dup_mmap(),
- * we very much want to resolve parent before child, otherwise
- * we may miss some entries: using last mm would invert that.
- */
- start_mm = &init_mm;
- atomic_inc(&init_mm.mm_users);
-
- /*
- * Keep on scanning until all entries have gone. Usually,
- * one pass through swap_map is enough, but not necessarily:
- * mmput() removes mm from mmlist before exit_mmap() and its
- * zap_page_range(). That's not too bad, those entries are
- * on their way out, and handled faster there than here.
- * do_munmap() behaves similarly, taking the range out of mm's
- * vma list before zap_page_range(). But unfortunately, when
- * unmapping a part of a vma, it takes the whole out first,
- * then reinserts what's left after (might even reschedule if
- * open() method called) - so swap entries may be invisible
- * to swapoff for a while, then reappear - but that is rare.
- */
- while ((i = find_next_to_unuse(si, i))) {
- /*
- * Get a page for the entry, using the existing swap
- * cache page if there is one. Otherwise, get a clean
- * page and read the swap into it.
- */
- swap_map = &si->swap_map[i];
- entry = SWP_ENTRY(type, i);
- page = read_swap_cache_async(entry);
- if (!page) {
- /*
- * Either swap_duplicate() failed because entry
- * has been freed independently, and will not be
- * reused since sys_swapoff() already disabled
- * allocation from here, or alloc_page() failed.
- */
- if (!*swap_map)
- continue;
- retval = -ENOMEM;
- break;
- }
-
- /*
- * Don't hold on to start_mm if it looks like exiting.
- */
- if (atomic_read(&start_mm->mm_users) == 1) {
- mmput(start_mm);
- start_mm = &init_mm;
- atomic_inc(&init_mm.mm_users);
- }
-
- /*
- * Wait for and lock page. When do_swap_page races with
- * try_to_unuse, do_swap_page can handle the fault much
- * faster than try_to_unuse can locate the entry. This
- * apparently redundant "wait_on_page" lets try_to_unuse
- * defer to do_swap_page in such a case - in some tests,
- * do_swap_page and try_to_unuse repeatedly compete.
- */
- wait_on_page(page);
- lock_page(page);
-
- /*
- * Remove all references to entry, without blocking.
- * Whenever we reach init_mm, there's no address space
- * to search, but use it as a reminder to search shmem.
- */
- shmem = 0;
- swcount = *swap_map;
- if (swcount > 1) {
- flush_page_to_ram(page);
- if (start_mm == &init_mm)
- shmem = shmem_unuse(entry, page);
- else
- unuse_process(start_mm, entry, page);
- }
- if (*swap_map > 1) {
- int set_start_mm = (*swap_map >= swcount);
- struct list_head *p = &start_mm->mmlist;
- struct mm_struct *new_start_mm = start_mm;
- struct mm_struct *mm;
-
- spin_lock(&mmlist_lock);
- while (*swap_map > 1 &&
- (p = p->next) != &start_mm->mmlist) {
- mm = list_entry(p, struct mm_struct, mmlist);
- swcount = *swap_map;
- if (mm == &init_mm) {
- set_start_mm = 1;
- spin_unlock(&mmlist_lock);
- shmem = shmem_unuse(entry, page);
- spin_lock(&mmlist_lock);
- } else
- unuse_process(mm, entry, page);
- if (set_start_mm && *swap_map < swcount) {
- new_start_mm = mm;
- set_start_mm = 0;
- }
- }
- atomic_inc(&new_start_mm->mm_users);
- spin_unlock(&mmlist_lock);
- mmput(start_mm);
- start_mm = new_start_mm;
- }
-
- /*
- * How could swap count reach 0x7fff when the maximum
- * pid is 0x7fff, and there's no way to repeat a swap
- * page within an mm (except in shmem, where it's the
- * shared object which takes the reference count)?
- * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
- *
- * If that's wrong, then we should worry more about
- * exit_mmap() and do_munmap() cases described above:
- * we might be resetting SWAP_MAP_MAX too early here.
- * We know "Undead"s can happen, they're okay, so don't
- * report them; but do report if we reset SWAP_MAP_MAX.
- */
- if (*swap_map == SWAP_MAP_MAX) {
- swap_list_lock();
- swap_device_lock(si);
- nr_swap_pages++;
- *swap_map = 1;
- swap_device_unlock(si);
- swap_list_unlock();
- reset_overflow = 1;
- }
-
- /*
- * If a reference remains (rare), we would like to leave
- * the page in the swap cache; but try_to_swap_out could
- * then re-duplicate the entry once we drop page lock,
- * so we might loop indefinitely; also, that page could
- * not be swapped out to other storage meanwhile. So:
- * delete from cache even if there's another reference,
- * after ensuring that the data has been saved to disk -
- * since if the reference remains (rarer), it will be
- * read from disk into another page. Splitting into two
- * pages would be incorrect if swap supported "shared
- * private" pages, but they are handled by tmpfs files.
- *
- * Note shmem_unuse already deleted swappage from cache,
- * unless corresponding filepage found already in cache:
- * in which case it left swappage in cache, lowered its
- * swap count to pass quickly through the loops above,
- * and now we must reincrement count to try again later.
- */
- if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
- rw_swap_page(WRITE, page);
- lock_page(page);
- }
- if (PageSwapCache(page)) {
- if (shmem)
- swap_duplicate(entry);
- else
- delete_from_swap_cache(page);
- }
-
- /*
- * So we could skip searching mms once swap count went
- * to 1, we did not mark any present ptes as dirty: must
- * mark page dirty so try_to_swap_out will preserve it.
- */
- SetPageDirty(page);
- UnlockPage(page);
- page_cache_release(page);
-
- /*
- * Make sure that we aren't completely killing
- * interactive performance. Interruptible check on
- * signal_pending() would be nice, but changes the spec?
- */
- if (current->need_resched)
- schedule();
- }
-
- mmput(start_mm);
- if (reset_overflow) {
- printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
- swap_overflow = 0;
- }
- return retval;
-}
-
-asmlinkage long sys_swapoff(const char * specialfile)
-{
- struct swap_info_struct * p = NULL;
- unsigned short *swap_map;
- struct nameidata nd;
- int i, type, prev;
- int err;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- err = user_path_walk(specialfile, &nd);
- if (err)
- goto out;
-
- lock_kernel();
- prev = -1;
- swap_list_lock();
- for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
- p = swap_info + type;
- if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
- if (p->swap_file == nd.dentry ||
- (S_ISBLK(nd.dentry->d_inode->i_mode) &&
- p->swap_device == nd.dentry->d_inode->i_rdev))
- break;
- }
- prev = type;
- }
- err = -EINVAL;
- if (type < 0) {
- swap_list_unlock();
- goto out_dput;
- }
-
- if (prev < 0) {
- swap_list.head = p->next;
- } else {
- swap_info[prev].next = p->next;
- }
- if (type == swap_list.next) {
- /* just pick something that's safe... */
- swap_list.next = swap_list.head;
- }
- nr_swap_pages -= p->pages;
- total_swap_pages -= p->pages;
- p->flags = SWP_USED;
- swap_list_unlock();
- unlock_kernel();
- err = try_to_unuse(type);
- lock_kernel();
- if (err) {
- /* re-insert swap space back into swap_list */
- swap_list_lock();
- for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next)
- if (p->prio >= swap_info[i].prio)
- break;
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = p - swap_info;
- else
- swap_info[prev].next = p - swap_info;
- nr_swap_pages += p->pages;
- total_swap_pages += p->pages;
- p->flags = SWP_WRITEOK;
- swap_list_unlock();
- goto out_dput;
- }
- if (p->swap_device)
- blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
- path_release(&nd);
-
- swap_list_lock();
- swap_device_lock(p);
- nd.mnt = p->swap_vfsmnt;
- nd.dentry = p->swap_file;
- p->swap_vfsmnt = NULL;
- p->swap_file = NULL;
- p->swap_device = 0;
- p->max = 0;
- swap_map = p->swap_map;
- p->swap_map = NULL;
- p->flags = 0;
- swap_device_unlock(p);
- swap_list_unlock();
- vfree(swap_map);
- err = 0;
-
-out_dput:
- unlock_kernel();
- path_release(&nd);
-out:
- return err;
-}
-
-int get_swaparea_info(char *buf)
-{
- char * page = (char *) __get_free_page(GFP_KERNEL);
- struct swap_info_struct *ptr = swap_info;
- int i, j, len = 0, usedswap;
-
- if (!page)
- return -ENOMEM;
-
- len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
- for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
- if ((ptr->flags & SWP_USED) && ptr->swap_map) {
- char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
- page, PAGE_SIZE);
-
- len += sprintf(buf + len, "%-31s ", path);
-
- if (!ptr->swap_device)
- len += sprintf(buf + len, "file\t\t");
- else
- len += sprintf(buf + len, "partition\t");
-
- usedswap = 0;
- for (j = 0; j < ptr->max; ++j)
- switch (ptr->swap_map[j]) {
- case SWAP_MAP_BAD:
- case 0:
- continue;
- default:
- usedswap++;
- }
- len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
- usedswap << (PAGE_SHIFT - 10), ptr->prio);
- }
- }
- free_page((unsigned long) page);
- return len;
-}
-
-int is_swap_partition(kdev_t dev) {
- struct swap_info_struct *ptr = swap_info;
- int i;
-
- for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
- if (ptr->flags & SWP_USED)
- if (ptr->swap_device == dev)
- return 1;
- }
- return 0;
-}
-
-/*
- * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
- *
- * The swapon system call
- */
-asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
-{
- struct swap_info_struct * p;
- struct nameidata nd;
- struct inode * swap_inode;
- unsigned int type;
- int i, j, prev;
- int error;
- static int least_priority = 0;
- union swap_header *swap_header = 0;
- int swap_header_version;
- int nr_good_pages = 0;
- unsigned long maxpages = 1;
- int swapfilesize;
- struct block_device *bdev = NULL;
- unsigned short *swap_map;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- lock_kernel();
- swap_list_lock();
- p = swap_info;
- for (type = 0 ; type < nr_swapfiles ; type++,p++)
- if (!(p->flags & SWP_USED))
- break;
- error = -EPERM;
- if (type >= MAX_SWAPFILES) {
- swap_list_unlock();
- goto out;
- }
- if (type >= nr_swapfiles)
- nr_swapfiles = type+1;
- p->flags = SWP_USED;
- p->swap_file = NULL;
- p->swap_vfsmnt = NULL;
- p->swap_device = 0;
- p->swap_map = NULL;
- p->lowest_bit = 0;
- p->highest_bit = 0;
- p->cluster_nr = 0;
- p->sdev_lock = SPIN_LOCK_UNLOCKED;
- p->next = -1;
- if (swap_flags & SWAP_FLAG_PREFER) {
- p->prio =
- (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
- } else {
- p->prio = --least_priority;
- }
- swap_list_unlock();
- error = user_path_walk(specialfile, &nd);
- if (error)
- goto bad_swap_2;
-
- p->swap_file = nd.dentry;
- p->swap_vfsmnt = nd.mnt;
- swap_inode = nd.dentry->d_inode;
- error = -EINVAL;
-
- if (S_ISBLK(swap_inode->i_mode)) {
- kdev_t dev = swap_inode->i_rdev;
- struct block_device_operations *bdops;
- devfs_handle_t de;
-
- if (is_mounted(dev)) {
- error = -EBUSY;
- goto bad_swap_2;
- }
-
- p->swap_device = dev;
- set_blocksize(dev, PAGE_SIZE);
-
- bd_acquire(swap_inode);
- bdev = swap_inode->i_bdev;
- de = devfs_get_handle_from_inode(swap_inode);
- bdops = devfs_get_ops(de); /* Increments module use count */
- if (bdops) bdev->bd_op = bdops;
-
- error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
- devfs_put_ops(de);/*Decrement module use count now we're safe*/
- if (error)
- goto bad_swap_2;
- set_blocksize(dev, PAGE_SIZE);
- error = -ENODEV;
- if (!dev || (blk_size[MAJOR(dev)] &&
- !blk_size[MAJOR(dev)][MINOR(dev)]))
- goto bad_swap;
- swapfilesize = 0;
- if (blk_size[MAJOR(dev)])
- swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
- >> (PAGE_SHIFT - 10);
- } else if (S_ISREG(swap_inode->i_mode))
- swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
- else
- goto bad_swap;
-
- error = -EBUSY;
- for (i = 0 ; i < nr_swapfiles ; i++) {
- struct swap_info_struct *q = &swap_info[i];
- if (i == type || !q->swap_file)
- continue;
- if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
- goto bad_swap;
- }
-
- swap_header = (void *) __get_free_page(GFP_USER);
- if (!swap_header) {
- printk("Unable to start swapping: out of memory :-)\n");
- error = -ENOMEM;
- goto bad_swap;
- }
-
- lock_page(virt_to_page(swap_header));
- rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header);
-
- if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
- swap_header_version = 1;
- else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
- swap_header_version = 2;
- else {
- printk("Unable to find swap-space signature\n");
- error = -EINVAL;
- goto bad_swap;
- }
-
- switch (swap_header_version) {
- case 1:
- memset(((char *) swap_header)+PAGE_SIZE-10,0,10);
- j = 0;
- p->lowest_bit = 0;
- p->highest_bit = 0;
- for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
- if (test_bit(i,(char *) swap_header)) {
- if (!p->lowest_bit)
- p->lowest_bit = i;
- p->highest_bit = i;
- maxpages = i+1;
- j++;
- }
- }
- nr_good_pages = j;
- p->swap_map = vmalloc(maxpages * sizeof(short));
- if (!p->swap_map) {
- error = -ENOMEM;
- goto bad_swap;
- }
- for (i = 1 ; i < maxpages ; i++) {
- if (test_bit(i,(char *) swap_header))
- p->swap_map[i] = 0;
- else
- p->swap_map[i] = SWAP_MAP_BAD;
- }
- break;
-
- case 2:
- /* Check the swap header's sub-version and the size of
- the swap file and bad block lists */
- if (swap_header->info.version != 1) {
- printk(KERN_WARNING
- "Unable to handle swap header version %d\n",
- swap_header->info.version);
- error = -EINVAL;
- goto bad_swap;
- }
-
- p->lowest_bit = 1;
- maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)) - 1;
- if (maxpages > swap_header->info.last_page)
- maxpages = swap_header->info.last_page;
- p->highest_bit = maxpages - 1;
-
- error = -EINVAL;
- if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
- goto bad_swap;
-
- /* OK, set up the swap map and apply the bad block list */
- if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
- error = -ENOMEM;
- goto bad_swap;
- }
-
- error = 0;
- memset(p->swap_map, 0, maxpages * sizeof(short));
- for (i=0; i<swap_header->info.nr_badpages; i++) {
- int page = swap_header->info.badpages[i];
- if (page <= 0 || page >= swap_header->info.last_page)
- error = -EINVAL;
- else
- p->swap_map[page] = SWAP_MAP_BAD;
- }
- nr_good_pages = swap_header->info.last_page -
- swap_header->info.nr_badpages -
- 1 /* header page */;
- if (error)
- goto bad_swap;
- }
-
- if (swapfilesize && maxpages > swapfilesize) {
- printk(KERN_WARNING
- "Swap area shorter than signature indicates\n");
- error = -EINVAL;
- goto bad_swap;
- }
- if (!nr_good_pages) {
- printk(KERN_WARNING "Empty swap-file\n");
- error = -EINVAL;
- goto bad_swap;
- }
- p->swap_map[0] = SWAP_MAP_BAD;
- swap_list_lock();
- swap_device_lock(p);
- p->max = maxpages;
- p->flags = SWP_WRITEOK;
- p->pages = nr_good_pages;
- nr_swap_pages += nr_good_pages;
- total_swap_pages += nr_good_pages;
- printk(KERN_INFO "Adding Swap: %dk swap-space (priority %d)\n",
- nr_good_pages<<(PAGE_SHIFT-10), p->prio);
-
- /* insert swap space into swap_list: */
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
- if (p->prio >= swap_info[i].prio) {
- break;
- }
- prev = i;
- }
- p->next = i;
- if (prev < 0) {
- swap_list.head = swap_list.next = p - swap_info;
- } else {
- swap_info[prev].next = p - swap_info;
- }
- swap_device_unlock(p);
- swap_list_unlock();
- error = 0;
- goto out;
-bad_swap:
- if (bdev)
- blkdev_put(bdev, BDEV_SWAP);
-bad_swap_2:
- swap_list_lock();
- swap_map = p->swap_map;
- nd.mnt = p->swap_vfsmnt;
- nd.dentry = p->swap_file;
- p->swap_device = 0;
- p->swap_file = NULL;
- p->swap_vfsmnt = NULL;
- p->swap_map = NULL;
- p->flags = 0;
- if (!(swap_flags & SWAP_FLAG_PREFER))
- ++least_priority;
- swap_list_unlock();
- if (swap_map)
- vfree(swap_map);
- path_release(&nd);
-out:
- if (swap_header)
- free_page((long) swap_header);
- unlock_kernel();
- return error;
-}
-
-void si_swapinfo(struct sysinfo *val)
-{
- unsigned int i;
- unsigned long nr_to_be_unused = 0;
-
- swap_list_lock();
- for (i = 0; i < nr_swapfiles; i++) {
- unsigned int j;
- if (swap_info[i].flags != SWP_USED)
- continue;
- for (j = 0; j < swap_info[i].max; ++j) {
- switch (swap_info[i].swap_map[j]) {
- case 0:
- case SWAP_MAP_BAD:
- continue;
- default:
- nr_to_be_unused++;
- }
- }
- }
- val->freeswap = nr_swap_pages + nr_to_be_unused;
- val->totalswap = total_swap_pages + nr_to_be_unused;
- swap_list_unlock();
-}
-
-/*
- * Verify that a swap entry is valid and increment its swap map count.
- *
- * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
- * "permanent", but will be reclaimed by the next swapoff.
- */
-int swap_duplicate(swp_entry_t entry)
-{
- struct swap_info_struct * p;
- unsigned long offset, type;
- int result = 0;
-
- type = SWP_TYPE(entry);
- if (type >= nr_swapfiles)
- goto bad_file;
- p = type + swap_info;
- offset = SWP_OFFSET(entry);
-
- swap_device_lock(p);
- if (offset < p->max && p->swap_map[offset]) {
- if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
- p->swap_map[offset]++;
- result = 1;
- } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
- if (swap_overflow++ < 5)
- printk(KERN_WARNING "swap_dup: swap entry overflow\n");
- p->swap_map[offset] = SWAP_MAP_MAX;
- result = 1;
- }
- }
- swap_device_unlock(p);
-out:
- return result;
-
-bad_file:
- printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
- goto out;
-}
-
-/*
- * Prior swap_duplicate protects against swap device deletion.
- */
-void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
- kdev_t *dev, struct inode **swapf)
-{
- unsigned long type;
- struct swap_info_struct *p;
-
- type = SWP_TYPE(entry);
- if (type >= nr_swapfiles) {
- printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
- return;
- }
-
- p = &swap_info[type];
- *offset = SWP_OFFSET(entry);
- if (*offset >= p->max && *offset != 0) {
- printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
- return;
- }
- if (p->swap_map && !p->swap_map[*offset]) {
- printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
- return;
- }
- if (!(p->flags & SWP_USED)) {
- printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
- return;
- }
-
- if (p->swap_device) {
- *dev = p->swap_device;
- } else if (p->swap_file) {
- *swapf = p->swap_file->d_inode;
- } else {
- printk(KERN_ERR "rw_swap_page: no swap file or device\n");
- }
- return;
-}
-
-/*
- * swap_device_lock prevents swap_map being freed. Don't grab an extra
- * reference on the swaphandle, it doesn't matter if it becomes unused.
- */
-int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
-{
- int ret = 0, i = 1 << page_cluster;
- unsigned long toff;
- struct swap_info_struct *swapdev = SWP_TYPE(entry) + swap_info;
-
- if (!page_cluster) /* no readahead */
- return 0;
- toff = (SWP_OFFSET(entry) >> page_cluster) << page_cluster;
- if (!toff) /* first page is swap header */
- toff++, i--;
- *offset = toff;
-
- swap_device_lock(swapdev);
- do {
- /* Don't read-ahead past the end of the swap area */
- if (toff >= swapdev->max)
- break;
- /* Don't read in free or bad pages */
- if (!swapdev->swap_map[toff])
- break;
- if (swapdev->swap_map[toff] == SWAP_MAP_BAD)
- break;
- toff++;
- ret++;
- } while (--i);
- swap_device_unlock(swapdev);
- return ret;
-}
diff --git a/linux-2.4.30-xen-sparse/mm/vmalloc.c b/linux-2.4.30-xen-sparse/mm/vmalloc.c
deleted file mode 100644
index df02fcbf7a..0000000000
--- a/linux-2.4.30-xen-sparse/mm/vmalloc.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * linux/mm/vmalloc.c
- *
- * Copyright (C) 1993 Linus Torvalds
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
- * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
- */
-
-#include <linux/config.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/spinlock.h>
-#include <linux/highmem.h>
-#include <linux/smp_lock.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-
-rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
-struct vm_struct * vmlist;
-
-static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
-{
- pte_t * pte;
- unsigned long end;
-
- if (pmd_none(*pmd))
- return;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
- pte = pte_offset(pmd, address);
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- do {
- pte_t page;
- page = ptep_get_and_clear(pte);
- address += PAGE_SIZE;
- pte++;
- if (pte_none(page))
- continue;
- if (pte_present(page)) {
- struct page *ptpage = pte_page(page);
- if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
- __free_page(ptpage);
- continue;
- }
- printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
- } while (address < end);
-}
-
-static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size)
-{
- pmd_t * pmd;
- unsigned long end;
-
- if (pgd_none(*dir))
- return;
- if (pgd_bad(*dir)) {
- pgd_ERROR(*dir);
- pgd_clear(dir);
- return;
- }
- pmd = pmd_offset(dir, address);
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- do {
- free_area_pte(pmd, address, end - address);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
-}
-
-void vmfree_area_pages(unsigned long address, unsigned long size)
-{
- pgd_t * dir;
- unsigned long end = address + size;
-
- dir = pgd_offset_k(address);
- flush_cache_all();
- do {
- free_area_pmd(dir, address, end - address);
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (address && (address < end));
- flush_tlb_all();
-}
-
-static inline int alloc_area_pte (pte_t * pte, unsigned long address,
- unsigned long size, int gfp_mask,
- pgprot_t prot, struct page ***pages)
-{
- unsigned long end;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- do {
- struct page * page;
-
- if (!pages) {
- spin_unlock(&init_mm.page_table_lock);
- page = alloc_page(gfp_mask);
- spin_lock(&init_mm.page_table_lock);
- } else {
- page = (**pages);
- (*pages)++;
-
- /* Add a reference to the page so we can free later */
- if (page)
- atomic_inc(&page->count);
-
- }
- if (!pte_none(*pte))
- printk(KERN_ERR "alloc_area_pte: page already exists\n");
- if (!page)
- return -ENOMEM;
- set_pte(pte, mk_pte(page, prot));
- address += PAGE_SIZE;
- pte++;
- } while (address < end);
- return 0;
-}
-
-static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address,
- unsigned long size, int gfp_mask,
- pgprot_t prot, struct page ***pages)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- do {
- pte_t * pte = pte_alloc(&init_mm, pmd, address);
- if (!pte)
- return -ENOMEM;
- if (alloc_area_pte(pte, address, end - address,
- gfp_mask, prot, pages))
- return -ENOMEM;
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
- return 0;
-}
-
-/*static inline*/ int __vmalloc_area_pages (unsigned long address,
- unsigned long size,
- int gfp_mask,
- pgprot_t prot,
- struct page ***pages)
-{
- pgd_t * dir;
- unsigned long start = address;
- unsigned long end = address + size;
-
- dir = pgd_offset_k(address);
- spin_lock(&init_mm.page_table_lock);
- do {
- pmd_t *pmd;
-
- pmd = pmd_alloc(&init_mm, dir, address);
- if (!pmd)
- goto err;
-
- if (alloc_area_pmd(pmd, address, end - address, gfp_mask, prot, pages))
- goto err; // The kernel NEVER reclaims pmds, so no need to undo pmd_alloc() here
-
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
- flush_cache_all();
- XEN_flush_page_update_queue();
- return 0;
-err:
- spin_unlock(&init_mm.page_table_lock);
- flush_cache_all();
- if (address > start)
- vmfree_area_pages(start, address - start);
- return -ENOMEM;
-}
-
-int vmalloc_area_pages(unsigned long address, unsigned long size,
- int gfp_mask, pgprot_t prot)
-{
- return __vmalloc_area_pages(address, size, gfp_mask, prot, NULL);
-}
-
-struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
-{
- unsigned long addr, next;
- struct vm_struct **p, *tmp, *area;
-
- area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
- if (!area)
- return NULL;
-
- size += PAGE_SIZE;
- if (!size) {
- kfree (area);
- return NULL;
- }
-
- addr = VMALLOC_START;
- write_lock(&vmlist_lock);
- for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
- if ((size + addr) < addr)
- goto out;
- if (size + addr <= (unsigned long) tmp->addr)
- break;
- next = tmp->size + (unsigned long) tmp->addr;
- if (next > addr)
- addr = next;
- if (addr > VMALLOC_END-size)
- goto out;
- }
- area->flags = flags;
- area->addr = (void *)addr;
- area->size = size;
- area->next = *p;
- *p = area;
- write_unlock(&vmlist_lock);
- return area;
-
-out:
- write_unlock(&vmlist_lock);
- kfree(area);
- return NULL;
-}
-
-void __vfree(void * addr, int free_area_pages)
-{
- struct vm_struct **p, *tmp;
-
- if (!addr)
- return;
- if ((PAGE_SIZE-1) & (unsigned long) addr) {
- printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
- return;
- }
- write_lock(&vmlist_lock);
- for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
- if (tmp->addr == addr) {
- *p = tmp->next;
- if (free_area_pages)
- vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
- write_unlock(&vmlist_lock);
- kfree(tmp);
- return;
- }
- }
- write_unlock(&vmlist_lock);
- printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr);
-}
-
-void vfree(void * addr)
-{
- __vfree(addr,1);
-}
-
-void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot)
-{
- void * addr;
- struct vm_struct *area;
-
- size = PAGE_ALIGN(size);
- if (!size || (size >> PAGE_SHIFT) > num_physpages)
- return NULL;
- area = get_vm_area(size, VM_ALLOC);
- if (!area)
- return NULL;
- addr = area->addr;
- if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, gfp_mask,
- prot, NULL)) {
- __vfree(addr, 0);
- return NULL;
- }
- return addr;
-}
-
-void * vmap(struct page **pages, int count,
- unsigned long flags, pgprot_t prot)
-{
- void * addr;
- struct vm_struct *area;
- unsigned long size = count << PAGE_SHIFT;
-
- if (!size || size > (max_mapnr << PAGE_SHIFT))
- return NULL;
- area = get_vm_area(size, flags);
- if (!area) {
- return NULL;
- }
- addr = area->addr;
- if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, 0,
- prot, &pages)) {
- __vfree(addr, 0);
- return NULL;
- }
- return addr;
-}
-
-long vread(char *buf, char *addr, unsigned long count)
-{
- struct vm_struct *tmp;
- char *vaddr, *buf_start = buf;
- unsigned long n;
-
- /* Don't allow overflow */
- if ((unsigned long) addr + count < count)
- count = -(unsigned long) addr;
-
- read_lock(&vmlist_lock);
- for (tmp = vmlist; tmp; tmp = tmp->next) {
- vaddr = (char *) tmp->addr;
- if (addr >= vaddr + tmp->size - PAGE_SIZE)
- continue;
- while (addr < vaddr) {
- if (count == 0)
- goto finished;
- *buf = '\0';
- buf++;
- addr++;
- count--;
- }
- n = vaddr + tmp->size - PAGE_SIZE - addr;
- do {
- if (count == 0)
- goto finished;
- *buf = *addr;
- buf++;
- addr++;
- count--;
- } while (--n > 0);
- }
-finished:
- read_unlock(&vmlist_lock);
- return buf - buf_start;
-}
-
-long vwrite(char *buf, char *addr, unsigned long count)
-{
- struct vm_struct *tmp;
- char *vaddr, *buf_start = buf;
- unsigned long n;
-
- /* Don't allow overflow */
- if ((unsigned long) addr + count < count)
- count = -(unsigned long) addr;
-
- read_lock(&vmlist_lock);
- for (tmp = vmlist; tmp; tmp = tmp->next) {
- vaddr = (char *) tmp->addr;
- if (addr >= vaddr + tmp->size - PAGE_SIZE)
- continue;
- while (addr < vaddr) {
- if (count == 0)
- goto finished;
- buf++;
- addr++;
- count--;
- }
- n = vaddr + tmp->size - PAGE_SIZE - addr;
- do {
- if (count == 0)
- goto finished;
- *addr = *buf;
- buf++;
- addr++;
- count--;
- } while (--n > 0);
- }
-finished:
- read_unlock(&vmlist_lock);
- return buf - buf_start;
-}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
index 27eac46739..480c4e8fd1 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
@@ -48,6 +48,28 @@ config XEN_BLKDEV_BACKEND
block devices to other guests via a high-performance shared-memory
interface.
+config XEN_BLKDEV_TAP_BE
+ bool "Block Tap support for backend driver (DANGEROUS)"
+ depends on XEN_BLKDEV_BACKEND
+ default n
+ help
+ If you intend to use the block tap driver, the backend domain will
+ not know the domain id of the real frontend, and so will not be able
+ to map its data pages. This modifies the backend to attempt to map
+ from both the tap domain and the real frontend. This presents a
+ security risk, and so should ONLY be used for development
+ with the blktap. This option will be removed as the block drivers are
+ modified to use grant tables.
+
+config XEN_BLKDEV_GRANT
+ bool "Grant table substrate for block drivers"
+ depends on !XEN_BLKDEV_TAP_BE
+ default y
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend block drivers. This currently
+ conflicts with the block tap.
+
config XEN_NETDEV_BACKEND
bool "Network-device backend driver"
depends on XEN_PHYSDEV_ACCESS
@@ -92,9 +114,21 @@ config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
are unsure; or if you experience network hangs when this option is
enabled; then you must say N here.
-config XEN_WRITABLE_PAGETABLES
- bool
- default y
+config XEN_BLKDEV_TAP
+ bool "Block device tap driver"
+ default n
+ help
+ This driver allows a VM to interact on block device channels
+ to other VMs. Block messages may be passed through or redirected
+ to a character device, allowing device prototyping in application
+ space. Odds are that you want to say N here.
+
+config XEN_SHADOW_MODE
+ bool "Fake shadow mode"
+ default n
+ help
+ fakes out a shadow mode kernel
+
config XEN_SCRUB_PAGES
bool "Scrub memory before freeing it to Xen"
@@ -109,17 +143,17 @@ config XEN_SCRUB_PAGES
choice
prompt "Processor Type"
- default X86
+ default XEN_X86
-config X86
+config XEN_X86
bool "X86"
help
Choose this option if your computer is a X86 architecture.
-config X86_64
+config XEN_X86_64
bool "X86_64"
help
- Choose this option if your computer is a X86 architecture.
+ Choose this option if your computer is a X86_64 architecture.
endchoice
@@ -131,10 +165,14 @@ config HAVE_ARCH_DEV_ALLOC_SKB
source "init/Kconfig"
-if X86
+if XEN_X86
source "arch/xen/i386/Kconfig"
endif
+if XEN_X86_64
+source "arch/xen/x86_64/Kconfig"
+endif
+
menu "Executable file formats"
source "fs/Kconfig.binfmt"
@@ -143,6 +181,12 @@ endmenu
source "arch/xen/Kconfig.drivers"
+if XEN_PRIVILEGED_GUEST
+menu "Power management options"
+source "drivers/acpi/Kconfig"
+endmenu
+endif
+
source "fs/Kconfig"
source "security/Kconfig"
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
index 70f8cd69ca..b869de31c7 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.11-xen0
-# Tue May 3 13:22:55 2005
+# Wed May 4 17:11:56 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -13,14 +13,17 @@ CONFIG_NO_IDLE_HZ=y
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
-CONFIG_XEN_WRITABLE_PAGETABLES=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SHADOW_MODE is not set
CONFIG_XEN_SCRUB_PAGES=y
-CONFIG_X86=y
-# CONFIG_X86_64 is not set
+CONFIG_XEN_X86=y
+# CONFIG_XEN_X86_64 is not set
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
#
@@ -75,6 +78,7 @@ CONFIG_KMOD=y
# X86 Processor Configuration
#
CONFIG_XENARCH="i386"
+CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
@@ -129,12 +133,23 @@ CONFIG_NOHIGHMEM=y
CONFIG_MTRR=y
CONFIG_HAVE_DEC_LOCK=y
# CONFIG_REGPARM is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
#
# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
#
+CONFIG_X86_UP_APIC=y
+CONFIG_X86_UP_IOAPIC=y
CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+# CONFIG_PCI_BIOS is not set
CONFIG_PCI_DIRECT=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_MSI is not set
CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_NAMES is not set
CONFIG_ISA=y
@@ -161,17 +176,25 @@ CONFIG_PCMCIA_PROBE=y
# Kernel hacking
#
CONFIG_DEBUG_KERNEL=y
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_SLAB is not set
CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
# CONFIG_FRAME_POINTER is not set
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
@@ -595,7 +618,7 @@ CONFIG_NETDEVICES=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
+CONFIG_TUN=y
#
# ARCnet devices
@@ -838,6 +861,7 @@ CONFIG_DRM_MGA=m
CONFIG_DRM_SIS=m
# CONFIG_MWAVE is not set
# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
# CONFIG_HANGCHECK_TIMER is not set
#
@@ -1004,6 +1028,37 @@ CONFIG_USB_HIDINPUT=y
# CONFIG_INFINIBAND is not set
#
+# Power management options
+#
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_ASUS=m
+CONFIG_ACPI_IBM=m
+CONFIG_ACPI_TOSHIBA=m
+# CONFIG_ACPI_CUSTOM_DSDT is not set
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+# CONFIG_X86_PM_TIMER is not set
+# CONFIG_ACPI_CONTAINER is not set
+
+#
# File systems
#
CONFIG_EXT2_FS=y
@@ -1030,7 +1085,7 @@ CONFIG_REISERFS_FS=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=y
-# CONFIG_AUTOFS4_FS is not set
+CONFIG_AUTOFS4_FS=y
#
# CD-ROM/DVD Filesystems
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
new file mode 100644
index 0000000000..7df45e168c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
@@ -0,0 +1,1023 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.11.1-xen0
+# Tue May 10 11:07:02 2005
+#
+CONFIG_XEN=y
+CONFIG_ARCH_XEN=y
+CONFIG_NO_IDLE_HZ=y
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+CONFIG_XEN_PHYSDEV_ACCESS=y
+CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
+CONFIG_XEN_BLKDEV_GRANT=y
+CONFIG_XEN_NETDEV_BACKEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SHADOW_MODE is not set
+CONFIG_XEN_SCRUB_PAGES=y
+# CONFIG_XEN_X86 is not set
+CONFIG_XEN_X86_64=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+# CONFIG_CLEAN_COMPILE is not set
+CONFIG_BROKEN=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_HOTPLUG is not set
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_XENARCH="x86_64"
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_GOOD_APIC=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_SMP is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_MICROCODE is not set
+# CONFIG_X86_CPUID is not set
+# CONFIG_NUMA is not set
+# CONFIG_MTRR is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_PCI=y
+CONFIG_PCI_DIRECT=y
+# CONFIG_PCI_MMCONFIG is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+
+#
+# X86_64 processor configuration
+#
+CONFIG_X86_64=y
+CONFIG_64BIT=y
+
+#
+# Processor type and features
+#
+# CONFIG_MPSC is not set
+CONFIG_GENERIC_CPU=y
+CONFIG_X86_L1_CACHE_BYTES=128
+# CONFIG_X86_TSC is not set
+# CONFIG_X86_MSR is not set
+# CONFIG_GART_IOMMU is not set
+CONFIG_DUMMY_IOMMU=y
+# CONFIG_X86_MCE is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI etc.)
+#
+# CONFIG_UNORDERED_IO is not set
+
+#
+# Executable file formats / Emulations
+#
+# CONFIG_IA32_EMULATION is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=y
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_LBD is not set
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+# CONFIG_BLK_DEV_CMD640 is not set
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_ATIIXP is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+CONFIG_BLK_DEV_SVWKS=y
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SIS5513 is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_ADVANSYS is not set
+CONFIG_MEGARAID_NEWGEN=y
+# CONFIG_MEGARAID_MM is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_NV is not set
+CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=y
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_CTL is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_CT_ACCT=y
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+# CONFIG_IP_NF_IRC is not set
+# CONFIG_IP_NF_TFTP is not set
+# CONFIG_IP_NF_AMANDA is not set
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+CONFIG_IP_NF_MATCH_IPRANGE=m
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+# CONFIG_IP_NF_MATCH_STATE is not set
+# CONFIG_IP_NF_MATCH_CONNTRACK is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
+# CONFIG_IP_NF_MATCH_REALM is not set
+# CONFIG_IP_NF_MATCH_SCTP is not set
+# CONFIG_IP_NF_MATCH_COMMENT is not set
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+# CONFIG_IP_NF_TARGET_LOG is not set
+# CONFIG_IP_NF_TARGET_ULOG is not set
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+# CONFIG_IP_NF_TARGET_REDIRECT is not set
+# CONFIG_IP_NF_TARGET_NETMAP is not set
+# CONFIG_IP_NF_TARGET_SAME is not set
+# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
+CONFIG_IP_NF_NAT_FTP=m
+# CONFIG_IP_NF_MANGLE is not set
+# CONFIG_IP_NF_RAW is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=y
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+CONFIG_E100=y
+# CONFIG_E100_NAPI is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+CONFIG_NE2K_PCI=y
+# CONFIG_8139CP is not set
+CONFIG_8139TOO=y
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+CONFIG_VIA_RHINE=y
+# CONFIG_VIA_RHINE_MMIO is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=y
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+CONFIG_RTC=y
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+CONFIG_AGP=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL_MCH=m
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+# CONFIG_DRM_GAMMA is not set
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_SIS=m
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HPET is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+# CONFIG_VIDEO_SELECT is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# Power management options
+#
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_ASUS=m
+CONFIG_ACPI_IBM=m
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_BLACKLIST_YEAR=0
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+# CONFIG_ACPI_CONTAINER is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_REISERFS_FS_XATTR is not set
+# CONFIG_JFS_FS is not set
+
+#
+# XFS support
+#
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_XATTR is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=y
+CONFIG_ZLIB_INFLATE=y
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
index 37fce4937e..478d412669 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.11-xenU
-# Wed Apr 13 23:18:37 2005
+# Wed May 4 17:14:10 2005
#
CONFIG_XEN=y
CONFIG_ARCH_XEN=y
@@ -12,13 +12,15 @@ CONFIG_NO_IDLE_HZ=y
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_XEN_BLKDEV_GRANT=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
-CONFIG_XEN_WRITABLE_PAGETABLES=y
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SHADOW_MODE is not set
CONFIG_XEN_SCRUB_PAGES=y
-CONFIG_X86=y
-# CONFIG_X86_64 is not set
+CONFIG_XEN_X86=y
+# CONFIG_XEN_X86_64 is not set
CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
#
@@ -72,6 +74,7 @@ CONFIG_KMOD=y
# X86 Processor Configuration
#
CONFIG_XENARCH="i386"
+CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
@@ -129,16 +132,22 @@ CONFIG_HAVE_DEC_LOCK=y
# Kernel hacking
#
CONFIG_DEBUG_KERNEL=y
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_SLAB is not set
CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
# CONFIG_FRAME_POINTER is not set
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
diff --git a/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
new file mode 100644
index 0000000000..e28efc0d5b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
@@ -0,0 +1,897 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.11.10-xenU
+# Mon May 23 15:07:58 2005
+#
+CONFIG_XEN=y
+CONFIG_ARCH_XEN=y
+CONFIG_NO_IDLE_HZ=y
+
+#
+# XEN
+#
+# CONFIG_XEN_PRIVILEGED_GUEST is not set
+# CONFIG_XEN_PHYSDEV_ACCESS is not set
+CONFIG_XEN_BLKDEV_GRANT=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+# CONFIG_XEN_BLKDEV_TAP is not set
+# CONFIG_XEN_SHADOW_MODE is not set
+CONFIG_XEN_SCRUB_PAGES=y
+# CONFIG_XEN_X86 is not set
+CONFIG_XEN_X86_64=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_SYSCTL=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_XENARCH="x86_64"
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_X86_GOOD_APIC=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_SMP is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_MICROCODE is not set
+CONFIG_X86_CPUID=y
+# CONFIG_NUMA is not set
+# CONFIG_MTRR is not set
+# CONFIG_X86_LOCAL_APIC is not set
+# CONFIG_X86_IO_APIC is not set
+# CONFIG_PCI is not set
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+
+#
+# X86_64 processor configuration
+#
+CONFIG_X86_64=y
+CONFIG_64BIT=y
+
+#
+# Processor type and features
+#
+CONFIG_MPSC=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=128
+# CONFIG_X86_TSC is not set
+# CONFIG_X86_MSR is not set
+CONFIG_DUMMY_IOMMU=y
+# CONFIG_X86_MCE is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
+
+#
+# Bus options (PCI etc.)
+#
+# CONFIG_UNORDERED_IO is not set
+
+#
+# Executable file formats / Emulations
+#
+# CONFIG_IA32_EMULATION is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_LBD=y
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_TUNNEL=m
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_CT_ACCT=y
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+CONFIG_IP_NF_CT_PROTO_SCTP=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_REALM=m
+CONFIG_IP_NF_MATCH_SCTP=m
+CONFIG_IP_NF_MATCH_COMMENT=m
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_NOTRACK=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_PHYSDEV=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_IP6_NF_RAW=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+# CONFIG_BRIDGE_EBT_ULOG is not set
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_LANE=m
+# CONFIG_ATM_MPOA is not set
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+# CONFIG_DECNET is not set
+CONFIG_LLC=m
+# CONFIG_LLC2 is not set
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CLK_JIFFIES=y
+# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
+# CONFIG_NET_SCH_CLK_CPU is not set
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_NET_CLS_IND=y
+# CONFIG_CLS_U32_MARK is not set
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+# CONFIG_NET_CLS_ACT is not set
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+# CONFIG_IRDA_ULTRA is not set
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_FAST_RR=y
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+# CONFIG_IRPORT_SIR is not set
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+
+#
+# Token Ring devices
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+# CONFIG_STRIP is not set
+CONFIG_ATMEL=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+# CONFIG_SLIP is not set
+# CONFIG_SHAPER is not set
+CONFIG_NETCONSOLE=m
+CONFIG_INPUT=m
+CONFIG_UNIX98_PTYS=y
+# CONFIG_LEGACY_PTYS is not set
+
+#
+# Character devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+# CONFIG_JFS_SECURITY is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_FS_POSIX_ACL=y
+
+#
+# XFS support
+#
+CONFIG_XFS_FS=m
+CONFIG_XFS_EXPORT=y
+# CONFIG_XFS_RT is not set
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+CONFIG_DNOTIFY=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_TMPFS_SECURITY=y
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+# CONFIG_HPFS_FS is not set
+CONFIG_QNX4FS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_SPKM3=m
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_CIFS=m
+# CONFIG_CIFS_STATS is not set
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_EXPERIMENTAL is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=y
+# CONFIG_SECURITY_SECLVL is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+# CONFIG_CRYPTO_AES is not set
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_KHAZAD=m
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
index fb421d855d..d1ffcb05d1 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig
@@ -9,6 +9,15 @@ config XENARCH
string
default i386
+config X86
+ bool
+ default y
+ help
+ This is Linux's home port. Linux was originally native to the Intel
+ 386, and runs on all the later x86 processors including the Intel
+ 486, 586, Pentiums, and various instruction-set-compatible chips by
+ AMD, Cyrix, and others.
+
config MMU
bool
default y
@@ -335,36 +344,33 @@ config HPET_EMULATE_RTC
def_bool HPET_TIMER && RTC=y
config SMP
- bool
- default n
-#config SMP
-# bool "Symmetric multi-processing support"
-# ---help---
-# This enables support for systems with more than one CPU. If you have
-# a system with only one CPU, like most personal computers, say N. If
-# you have a system with more than one CPU, say Y.
-#
-# If you say N here, the kernel will run on single and multiprocessor
-# machines, but will use only one CPU of a multiprocessor machine. If
-# you say Y here, the kernel will run on many, but not all,
-# singleprocessor machines. On a singleprocessor machine, the kernel
-# will run faster if you say N here.
-#
-# Note that if you say Y here and choose architecture "586" or
-# "Pentium" under "Processor family", the kernel will not work on 486
-# architectures. Similarly, multiprocessor kernels for the "PPro"
-# architecture may not work on all Pentium based boards.
-#
-# People using multiprocessor machines who say Y here should also say
-# Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
-# Management" code will be disabled if you say Y here.
-#
-# See also the <file:Documentation/smp.txt>,
-# <file:Documentation/i386/IO-APIC.txt>,
-# <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
-# <http://www.tldp.org/docs.html#howto>.
-#
-# If you don't know what to do here, say N.
+ bool "Symmetric multi-processing support"
+ ---help---
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, like most personal computers, say N. If
+ you have a system with more than one CPU, say Y.
+
+ If you say N here, the kernel will run on single and multiprocessor
+ machines, but will use only one CPU of a multiprocessor machine. If
+ you say Y here, the kernel will run on many, but not all,
+ singleprocessor machines. On a singleprocessor machine, the kernel
+ will run faster if you say N here.
+
+ Note that if you say Y here and choose architecture "586" or
+ "Pentium" under "Processor family", the kernel will not work on 486
+ architectures. Similarly, multiprocessor kernels for the "PPro"
+ architecture may not work on all Pentium based boards.
+
+ People using multiprocessor machines who say Y here should also say
+ Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+ Management" code will be disabled if you say Y here.
+
+ See also the <file:Documentation/smp.txt>,
+ <file:Documentation/i386/IO-APIC.txt>,
+ <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+ <http://www.tldp.org/docs.html#howto>.
+
+ If you don't know what to do here, say N.
config NR_CPUS
int "Maximum number of CPUs (2-255)"
@@ -630,7 +636,7 @@ config MTRR
config IRQBALANCE
bool "Enable kernel irq balancing"
- depends on SMP && X86_IO_APIC
+ depends on SMP && X86_IO_APIC && !XEN
default y
help
The default yes will allow the kernel to do irq load balancing.
@@ -661,6 +667,25 @@ config REGPARM
generate incorrect output with certain kernel constructs when
-mregparm=3 is used.
+config X86_LOCAL_APIC
+ bool
+ depends on !SMP && X86_UP_APIC
+ default y
+
+config X86_IO_APIC
+ bool
+ depends on !SMP && X86_UP_IOAPIC
+ default y
+
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+ depends on SMP && HOTPLUG && EXPERIMENTAL
+ ---help---
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu.
+
+ Say N.
+
if XEN_PHYSDEV_ACCESS
@@ -676,6 +701,36 @@ config X86_LOCAL_APIC
depends on (X86_VISWS || SMP) && !X86_VOYAGER
default y
+config X86_UP_APIC
+ bool "Local APIC support on uniprocessors" if !SMP
+ depends on !(X86_VISWS || X86_VOYAGER)
+ ---help---
+ A local APIC (Advanced Programmable Interrupt Controller) is an
+ integrated interrupt controller in the CPU. If you have a single-CPU
+ system which has a processor with a local APIC, you can say Y here to
+ enable and use it. If you say Y here even though your machine doesn't
+ have a local APIC, then the kernel will still run with no slowdown at
+ all. The local APIC supports CPU-generated self-interrupts (timer,
+ performance counters), and the NMI watchdog which detects hard
+ lockups.
+
+ If you have a system with several CPUs, you do not need to say Y
+ here: the local APIC will be used automatically.
+
+config X86_UP_IOAPIC
+ bool "IO-APIC support on uniprocessors"
+ depends on !SMP && X86_UP_APIC
+ help
+ An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
+ SMP-capable replacement for PC-style interrupt controllers. Most
+ SMP systems and a small number of uniprocessor systems have one.
+ If you have a single-CPU system with an IO-APIC, you can say Y here
+ to use it. If you say Y here even though your machine doesn't have
+ an IO-APIC, then the kernel will still run with no slowdown at all.
+
+ If you have a system with several CPUs, you do not need to say Y
+ here: the IO-APIC will be used automatically.
+
config X86_IO_APIC
bool
depends on SMP && !(X86_VISWS || X86_VOYAGER)
@@ -696,52 +751,53 @@ config PCI
information about which PCI hardware does work under Linux and which
doesn't.
-#choice
-# prompt "PCI access mode"
-# depends on PCI && !X86_VISWS
-# default PCI_GOANY
-# ---help---
-# On PCI systems, the BIOS can be used to detect the PCI devices and
-# determine their configuration. However, some old PCI motherboards
-# have BIOS bugs and may crash if this is done. Also, some embedded
-# PCI-based systems don't have any BIOS at all. Linux can also try to
-# detect the PCI hardware directly without using the BIOS.
-#
-# With this option, you can specify how Linux should detect the
-# PCI devices. If you choose "BIOS", the BIOS will be used,
-# if you choose "Direct", the BIOS won't be used, and if you
-# choose "MMConfig", then PCI Express MMCONFIG will be used.
-# If you choose "Any", the kernel will try MMCONFIG, then the
-# direct access method and falls back to the BIOS if that doesn't
-# work. If unsure, go with the default, which is "Any".
-#
-#config PCI_GOBIOS
-# bool "BIOS"
-#
-#config PCI_GOMMCONFIG
-# bool "MMConfig"
-#
-#config PCI_GODIRECT
-# bool "Direct"
-#
-#config PCI_GOANY
-# bool "Any"
-#
-#endchoice
-#
-#config PCI_BIOS
-# bool
-# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
-# default y
-#
-#config PCI_DIRECT
-# bool
-# depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
-# default y
+choice
+ prompt "PCI access mode"
+ depends on PCI && !X86_VISWS
+ default PCI_GOANY
+ ---help---
+ On PCI systems, the BIOS can be used to detect the PCI devices and
+ determine their configuration. However, some old PCI motherboards
+ have BIOS bugs and may crash if this is done. Also, some embedded
+ PCI-based systems don't have any BIOS at all. Linux can also try to
+ detect the PCI hardware directly without using the BIOS.
+
+ With this option, you can specify how Linux should detect the
+ PCI devices. If you choose "BIOS", the BIOS will be used,
+ if you choose "Direct", the BIOS won't be used, and if you
+ choose "MMConfig", then PCI Express MMCONFIG will be used.
+ If you choose "Any", the kernel will try MMCONFIG, then the
+ direct access method and falls back to the BIOS if that doesn't
+ work. If unsure, go with the default, which is "Any".
+
+config PCI_GOBIOS
+ bool "BIOS"
+
+config PCI_GOMMCONFIG
+ bool "MMConfig"
+
+config PCI_GODIRECT
+ bool "Direct"
+
+config PCI_GOANY
+ bool "Any"
+
+endchoice
+
+config PCI_BIOS
+ bool
+ depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+ default y
config PCI_DIRECT
bool
- depends on PCI
+ depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
+ default y
+
+config PCI_MMCONFIG
+ bool
+ depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
+ select ACPI_BOOT
default y
source "drivers/pci/pcie/Kconfig"
@@ -811,129 +867,7 @@ endmenu
endif
-menu "Kernel hacking"
-
-config DEBUG_KERNEL
- bool "Kernel debugging"
- help
- Say Y here if you are developing drivers or trying to debug and
- identify kernel problems.
-
-config EARLY_PRINTK
- bool "Early printk" if EMBEDDED
- default y
- help
- Write kernel log output directly into the VGA buffer or to a serial
- port.
-
- This is useful for kernel debugging when your machine crashes very
- early before the console code is initialized. For normal operation
- it is not recommended because it looks ugly and doesn't cooperate
- with klogd/syslogd or the X server. You should normally N here,
- unless you want to debug such a crash.
-
-config DEBUG_STACKOVERFLOW
- bool "Check for stack overflows"
- depends on DEBUG_KERNEL
-
-config DEBUG_STACK_USAGE
- bool "Stack utilization instrumentation"
- depends on DEBUG_KERNEL
- help
- Enables the display of the minimum amount of free stack which each
- task has ever had available in the sysrq-T and sysrq-P debug output.
-
- This option will slow down process creation somewhat.
-
-config DEBUG_SLAB
- bool "Debug memory allocations"
- depends on DEBUG_KERNEL
- help
- Say Y here to have the kernel do limited verification on memory
- allocation as well as poisoning memory on free to catch use of freed
- memory.
-
-config MAGIC_SYSRQ
- bool "Magic SysRq key"
- depends on DEBUG_KERNEL
- help
- If you say Y here, you will have some control over the system even
- if the system crashes for example during kernel debugging (e.g., you
- will be able to flush the buffer cache to disk, reboot the system
- immediately or dump some status information). This is accomplished
- by pressing various keys while holding SysRq (Alt+PrintScreen). It
- also works on a serial console (on PC hardware at least), if you
- send a BREAK and then within 5 seconds a command keypress. The
- keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
- unless you really know what this hack does.
-
-config DEBUG_SPINLOCK
- bool "Spinlock debugging"
- depends on DEBUG_KERNEL
- help
- Say Y here and build SMP to catch missing spinlock initialization
- and certain other kinds of spinlock errors commonly made. This is
- best used in conjunction with the NMI watchdog so that spinlock
- deadlocks are also debuggable.
-
-config DEBUG_PAGEALLOC
- bool "Page alloc debugging"
- depends on DEBUG_KERNEL
- help
- Unmap pages from the kernel linear mapping after free_pages().
- This results in a large slowdown, but helps to find certain types
- of memory corruptions.
-
-config DEBUG_HIGHMEM
- bool "Highmem debugging"
- depends on DEBUG_KERNEL && HIGHMEM
- help
- This options enables addition error checking for high memory systems.
- Disable for production systems.
-
-config DEBUG_INFO
- bool "Compile the kernel with debug info"
- depends on DEBUG_KERNEL
- help
- If you say Y here the resulting kernel image will include
- debugging info resulting in a larger kernel image.
- Say Y here only if you plan to use gdb to debug the kernel.
- If you don't debug the kernel, you can say N.
-
-config DEBUG_SPINLOCK_SLEEP
- bool "Sleep-inside-spinlock checking"
- help
- If you say Y here, various routines which may sleep will become very
- noisy if they are called with a spinlock held.
-
-config FRAME_POINTER
- bool "Compile the kernel with frame pointers"
- help
- If you say Y here the resulting kernel image will be slightly larger
- and slower, but it will give very useful debugging information.
- If you don't debug the kernel, you can say N, but we may not be able
- to solve problems without frame pointers.
-
-config 4KSTACKS
- bool "Use 4Kb for kernel stacks instead of 8Kb"
- help
- If you say Y here the kernel will use a 4Kb stacksize for the
- kernel stack attached to each process/thread. This facilitates
- running more threads on a system and also reduces the pressure
- on the VM subsystem for higher order allocations. This option
- will also use IRQ stacks to compensate for the reduced stackspace.
-
-config X86_FIND_SMP_CONFIG
- bool
- depends on X86_LOCAL_APIC || X86_VOYAGER
- default y
-
-config X86_MPPARSE
- bool
- depends on X86_LOCAL_APIC && !X86_VISWS
- default y
-
-endmenu
+source "arch/i386/Kconfig.debug"
#
# Use the generic interrupt handling code in kernel/irq/:
@@ -951,10 +885,10 @@ config X86_SMP
depends on SMP && !X86_VOYAGER
default y
-config X86_HT
- bool
- depends on SMP && !(X86_VISWS || X86_VOYAGER)
- default y
+#config X86_HT
+# bool
+# depends on SMP && !(X86_VISWS || X86_VOYAGER)
+# default y
config X86_BIOS_REBOOT
bool
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
index 8af1059853..053c0984ac 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile
@@ -72,6 +72,7 @@ head-y := arch/xen/i386/kernel/head.o arch/xen/i386/kernel/init_task.o
libs-y += arch/i386/lib/
core-y += arch/xen/i386/kernel/ \
arch/xen/i386/mm/ \
+ arch/xen/i386/mach-default/ \
arch/i386/crypto/
# \
# arch/xen/$(mcore-y)/
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile
index b91b7faac6..3f2fef4a94 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile
@@ -10,9 +10,9 @@ extra-y := head.o init_task.o
obj-y := process.o signal.o entry.o traps.o \
time.o ioport.o ldt.o setup.o \
- pci-dma.o i386_ksyms.o
+ pci-dma.o i386_ksyms.o irq.o
-c-obj-y := semaphore.o irq.o vm86.o \
+c-obj-y := semaphore.o vm86.o \
ptrace.o sys_i386.o \
i387.o dmi_scan.o bootflag.o \
doublefault.o quirks.o
@@ -20,18 +20,19 @@ s-obj-y :=
obj-y += cpu/
obj-y += timers/
-c-obj-$(CONFIG_ACPI_BOOT) += acpi/
+obj-$(CONFIG_ACPI_BOOT) += acpi/
#c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
c-obj-$(CONFIG_MCA) += mca.o
c-obj-$(CONFIG_X86_MSR) += msr.o
c-obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
c-obj-$(CONFIG_APM) += apm.o
-c-obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
-c-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
-c-obj-$(CONFIG_X86_MPPARSE) += mpparse.o
-c-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
-c-obj-$(CONFIG_X86_IO_APIC) += io_apic.o
+obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
+#obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-$(CONFIG_X86_MPPARSE) += mpparse.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
+c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o
c-obj-$(CONFIG_X86_NUMAQ) += numaq.o
c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
c-obj-$(CONFIG_MODULES) += module.o
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile
new file mode 100644
index 0000000000..9254f472f2
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_ACPI_BOOT) := boot.o
+c-obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
+c-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@
+
+obj-y += $(c-obj-y) $(s-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
new file mode 100644
index 0000000000..86ad650024
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/acpi/boot.c
@@ -0,0 +1,906 @@
+/*
+ * boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_XEN
+#include <asm/fixmap.h>
+#endif
+
+void (*pm_power_off)(void) = NULL;
+
+#ifdef CONFIG_X86_64
+
+static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { }
+extern void __init clustered_apic_check(void);
+static inline int ioapic_setup_disabled(void) { return 0; }
+#include <asm/proto.h>
+
+#else /* X86 */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+#ifdef CONFIG_ACPI_PCI
+int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
+#else
+int acpi_noirq __initdata = 1;
+int acpi_pci_disabled __initdata = 1;
+#endif
+int acpi_ht __initdata = 1; /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES 256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+ { [0 ... MAX_MADT_ENTRIES-1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+ Boot-time Configuration
+ -------------------------------------------------------------------------- */
+
+/*
+ * The default interrupt routing model is PIC (8259). This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+ if (!phys_addr || !size)
+ return NULL;
+
+ if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+ return __va(phys_addr);
+
+ return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+ unsigned long base, offset, mapped_size;
+ int idx;
+
+#ifndef CONFIG_XEN
+ if (phys + size < 8*1024*1024)
+ return __va(phys);
+#endif
+
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_ACPI_END, phys);
+ base = fix_to_virt(FIX_ACPI_END);
+
+ /*
+ * Most cases can be covered by the below.
+ */
+ idx = FIX_ACPI_END;
+ while (mapped_size < size) {
+ if (--idx < FIX_ACPI_BEGIN)
+ return NULL; /* cannot handle this */
+ phys += PAGE_SIZE;
+ set_fixmap(idx, phys);
+ mapped_size += PAGE_SIZE;
+ }
+
+ return ((unsigned char *) base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_reserved) {
+ printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+ return -ENODEV;
+ }
+
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#else
+#define acpi_parse_mcfg NULL
+#endif /* !CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init
+acpi_parse_madt (
+ unsigned long phys_addr,
+ unsigned long size)
+{
+ struct acpi_table_madt *madt = NULL;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+ return -ENODEV;
+ }
+
+ if (madt->lapic_address) {
+ acpi_lapic_addr = (u64) madt->lapic_address;
+
+ printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+ madt->lapic_address);
+ }
+
+ acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic *processor = NULL;
+
+ processor = (struct acpi_table_lapic*) header;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* no utility in registering a disabled processor */
+ if (processor->flags.enabled == 0)
+ return 0;
+
+ x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+ mp_register_lapic (
+ processor->id, /* APIC ID */
+ processor->flags.enabled); /* Enabled? */
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+ lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
+
+ if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+ return -EINVAL;
+
+ acpi_lapic_addr = lapic_addr_ovr->address;
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+ lapic_nmi = (struct acpi_table_lapic_nmi*) header;
+
+ if (BAD_MADT_ENTRY(lapic_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic_nmi->lint != 1)
+ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+ return 0;
+}
+
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+
+static int __init
+acpi_parse_ioapic (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_ioapic *ioapic = NULL;
+
+ ioapic = (struct acpi_table_ioapic*) header;
+
+ if (BAD_MADT_ENTRY(ioapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_register_ioapic (
+ ioapic->id,
+ ioapic->address,
+ ioapic->global_irq_base);
+
+ return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void
+acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+ if (trigger == 0) /* compatible SCI trigger is level */
+ trigger = 3;
+
+ if (polarity == 0) /* compatible SCI polarity is low */
+ polarity = 3;
+
+ /* Command-line over-ride via acpi_sci= */
+ if (acpi_sci_flags.trigger)
+ trigger = acpi_sci_flags.trigger;
+
+ if (acpi_sci_flags.polarity)
+ polarity = acpi_sci_flags.polarity;
+
+ /*
+ * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+ * If GSI is < 16, this will update its flags,
+ * else it will create a new mp_irqs[] entry.
+ */
+ mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+ /*
+ * stash over-ride to indicate we've been here
+ * and for later update of acpi_fadt
+ */
+ acpi_sci_override_gsi = gsi;
+ return;
+}
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *intsrc = NULL;
+
+ intsrc = (struct acpi_table_int_src_ovr*) header;
+
+ if (BAD_MADT_ENTRY(intsrc, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (intsrc->bus_irq == acpi_fadt.sci_int) {
+ acpi_sci_ioapic_setup(intsrc->global_irq,
+ intsrc->flags.polarity, intsrc->flags.trigger);
+ return 0;
+ }
+
+ if (acpi_skip_timer_override &&
+ intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+
+ mp_override_legacy_irq (
+ intsrc->bus_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger,
+ intsrc->global_irq);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src = NULL;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries? */
+
+ return 0;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+#ifdef CONFIG_ACPI_BUS
+
+/*
+ * acpi_pic_sci_set_trigger()
+ *
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init
+acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+ unsigned int mask = 1 << irq;
+ unsigned int old, new;
+
+ /* Real old ELCR mask */
+ old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+ /*
+ * If we use ACPI to set PCI irq's, then we should clear ELCR
+ * since we will set it correctly as we enable the PCI irq
+ * routing.
+ */
+ new = acpi_noirq ? old : 0;
+
+ /*
+ * Update SCI information in the ELCR, it isn't in the PCI
+ * routing tables..
+ */
+ switch (trigger) {
+ case 1: /* Edge - clear */
+ new &= ~mask;
+ break;
+ case 3: /* Level - set */
+ new |= mask;
+ break;
+ }
+
+ if (old == new)
+ return;
+
+ printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+ outb(new, 0x4d0);
+ outb(new >> 8, 0x4d1);
+}
+
+
+#endif /* CONFIG_ACPI_BUS */
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (use_pci_vector() && !platform_legacy_irq(gsi))
+ *irq = IO_APIC_VECTOR(gsi);
+ else
+#endif
+ *irq = gsi;
+ return 0;
+}
+
+unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low)
+{
+ unsigned int irq;
+ unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_X86_IO_APIC
+ if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+ plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
+ }
+#endif
+ acpi_gsi_to_irq(plat_gsi, &irq);
+ return irq;
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ * ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int
+acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+
+int
+acpi_unmap_lsapic(int cpu)
+{
+ /* TBD */
+ return -EINVAL;
+}
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+static unsigned long __init
+acpi_scan_rsdp (
+ unsigned long start,
+ unsigned long length)
+{
+ unsigned long offset = 0;
+ unsigned long sig_len = sizeof("RSD PTR ") - 1;
+ unsigned long vstart = (unsigned long)isa_bus_to_virt(start);
+
+ /*
+ * Scan all 16-byte boundaries of the physical memory region for the
+ * RSDP signature.
+ */
+ for (offset = 0; offset < length; offset += 16) {
+ if (strncmp((char *) (vstart + offset), "RSD PTR ", sig_len))
+ continue;
+ return (start + offset);
+ }
+
+ return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_sbf *sb;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
+ if (!sb) {
+ printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+ return -ENODEV;
+ }
+
+ sbf_port = sb->sbf_cmos; /* Save CMOS port */
+
+ return 0;
+}
+
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+ struct acpi_table_hpet *hpet_tbl;
+
+ if (!phys || !size)
+ return -EINVAL;
+
+ hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
+ if (!hpet_tbl) {
+ printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+ return -ENODEV;
+ }
+
+ if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+ printk(KERN_WARNING PREFIX "HPET timers must be located in "
+ "memory.\n");
+ return -1;
+ }
+
+#ifdef CONFIG_X86_64
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long) hpet_tbl->addr.addrh << 32);
+
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
+#else /* X86 */
+ {
+ extern unsigned long hpet_address;
+
+ hpet_address = hpet_tbl->addr.addrl;
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
+ }
+#endif /* X86 */
+
+ return 0;
+}
+#else
+#define acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+ struct fadt_descriptor_rev2 *fadt = NULL;
+
+ fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
+ if(!fadt) {
+ printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+ return 0;
+ }
+
+#ifdef CONFIG_ACPI_INTERPRETER
+ /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+ acpi_fadt.sci_int = fadt->sci_int;
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+ /* detect the location of the ACPI PM Timer */
+ if (fadt->revision >= FADT2_REVISION_ID) {
+ /* FADT rev. 2 */
+ if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ } else {
+ /* FADT rev. 1 */
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ }
+ if (pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
+#endif
+ return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi_enabled) {
+ if (efi.acpi20)
+ return __pa(efi.acpi20);
+ else if (efi.acpi)
+ return __pa(efi.acpi);
+ }
+ /*
+ * Scan memory looking for the RSDP signature. First search EBDA (low
+ * memory) paragraphs and then search upper memory (E0000-FFFFF).
+ */
+ rsdp_phys = acpi_scan_rsdp (0, 0x400);
+ if (!rsdp_phys)
+ rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
+
+ set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys);
+
+ return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_lapic_entries(void)
+{
+ int count;
+
+ /*
+ * Note that the LAPIC address is obtained from the MADT (32-bit value)
+ * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ */
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+ return count;
+ }
+
+ mp_register_lapic_address(acpi_lapic_addr);
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+ MAX_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return -ENODEV;
+ }
+ else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+ return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_ioapic_entries(void)
+{
+ int count;
+
+ /*
+ * ACPI interpreter is required to complete interrupt setup,
+ * so if it is off, don't enumerate the io-apics with ACPI.
+ * If MPS is present, it will handle them,
+ * otherwise the system will stay in PIC mode
+ */
+ if (acpi_disabled || acpi_noirq) {
+ return -ENODEV;
+ }
+
+ /*
+ * if "noapic" boot option, don't look for IO-APICs
+ */
+ if (skip_ioapic_setup) {
+ printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+ "due to 'noapic' option.\n");
+ return -ENODEV;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+ return -ENODEV;
+ }
+ else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+ return count;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ /*
+ * If BIOS did not supply an INT_SRC_OVR for the SCI
+ * pretend we got one so we can set the SCI flags.
+ */
+ if (!acpi_sci_override_gsi)
+ acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+ /* Fill in identity legacy mapings where no override */
+ mp_config_acpi_legacy_irqs();
+
+ count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+ return -1;
+}
+#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
+
+
+static void __init
+acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ int count, error;
+
+ count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+ if (count >= 1) {
+
+ /*
+ * Parse MADT LAPIC entries
+ */
+ error = acpi_parse_madt_lapic_entries();
+ if (!error) {
+ acpi_lapic = 1;
+
+ /*
+ * Parse MADT IO-APIC entries
+ */
+ error = acpi_parse_madt_ioapic_entries();
+ if (!error) {
+ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+ acpi_irq_balance_set(NULL);
+ acpi_ioapic = 1;
+
+ smp_found_config = 1;
+ clustered_apic_check();
+ }
+ }
+ if (error == -EINVAL) {
+ /*
+ * Dell Precision Workstation 410, 610 come here.
+ */
+ printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
+ disable_acpi();
+ }
+ }
+#endif
+ return;
+}
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ * called from setup_arch(), always.
+ * 1. checksums all tables
+ * 2. enumerates lapics
+ * 3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ * acpi_lapic = 1 if LAPIC found
+ * acpi_ioapic = 1 if IOAPIC found
+ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ * if acpi_blacklisted() acpi_disabled = 1;
+ * acpi_irq_model=...
+ * ...
+ *
+ * return value: (currently ignored)
+ * 0: success
+ * !0: failure
+ */
+
+int __init
+acpi_boot_table_init(void)
+{
+ int error;
+
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ /*
+ * Initialize the ACPI boot-time table parser.
+ */
+ error = acpi_table_init();
+ if (error) {
+ disable_acpi();
+ return error;
+ }
+
+#ifdef __i386__
+ check_acpi_pci();
+#endif
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * blacklist may disable ACPI entirely
+ */
+ error = acpi_blacklisted();
+ if (error) {
+ extern int acpi_force;
+
+ if (acpi_force) {
+ printk(KERN_WARNING PREFIX "acpi=force override\n");
+ } else {
+ printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+ disable_acpi();
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+
+int __init acpi_boot_init(void)
+{
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * set sci_int and PM timer address
+ */
+ acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+ /*
+ * Process the Multiple APIC Description Table (MADT), if present
+ */
+ acpi_process_madt();
+
+ acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+ acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+
+ return 0;
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c
new file mode 100644
index 0000000000..5f071a5956
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/apic.c
@@ -0,0 +1,83 @@
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ * Maciej W. Rozycki : Various updates and fixes.
+ * Mikael Pettersson : Power Management for UP-APIC.
+ * Pavel Machek and
+ * Mikael Pettersson : PM converted to driver model.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+int get_physical_broadcast(void)
+{
+ return 0xff;
+}
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config)
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c
index e4092fbc67..197225266d 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c
@@ -554,7 +554,7 @@ void __init early_cpu_init(void)
void __init cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
{
- unsigned long frames[gdt_descr->size >> PAGE_SHIFT];
+ unsigned long frames[16];
unsigned long va;
int f;
@@ -564,7 +564,6 @@ void __init cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
make_page_readonly((void *)va);
}
- flush_page_update_queue();
if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
BUG();
lgdt_finish();
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
index af2fad5236..064be004e7 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
@@ -80,23 +80,42 @@ VM_MASK = 0x00020000
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
-#define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
-#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
-#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
-#define XEN_SAVE_UPCALL_MASK(reg,tmp,off) \
- movb evtchn_upcall_mask(reg), tmp; \
- movb tmp, off(%esp)
+#define sizeof_vcpu_shift 3
+
+#ifdef CONFIG_SMP
+#define preempt_disable(reg) incl TI_preempt_count(reg)
+#define preempt_enable(reg) decl TI_preempt_count(reg)
+#define XEN_GET_VCPU_INFO(reg) preempt_disable(%ebp) ; \
+ movl TI_cpu(%ebp),reg ; \
+ shl $sizeof_vcpu_shift,reg ; \
+ addl HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%ebp)
+#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
+#else
+#define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg)
+#define XEN_PUT_VCPU_INFO_fixup
+#endif
+#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
+ XEN_LOCKED_BLOCK_EVENTS(reg) ; \
+ XEN_PUT_VCPU_INFO(reg)
+#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \
+ XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \
+ XEN_PUT_VCPU_INFO(reg)
#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
#ifdef CONFIG_PREEMPT
-#define preempt_stop XEN_BLOCK_EVENTS(%esi)
+#define preempt_stop GET_THREAD_INFO(%ebp) ; \
+ XEN_BLOCK_EVENTS(%esi)
#else
#define preempt_stop
#define resume_kernel restore_all
#endif
-#define SAVE_ALL_NO_EVENTMASK \
+#define SAVE_ALL \
cld; \
pushl %es; \
pushl %ds; \
@@ -111,11 +130,6 @@ VM_MASK = 0x00020000
movl %edx, %ds; \
movl %edx, %es;
-#define SAVE_ALL \
- SAVE_ALL_NO_EVENTMASK; \
- XEN_GET_VCPU_INFO(%esi); \
- XEN_SAVE_UPCALL_MASK(%esi,%dl,EVENT_MASK)
-
#define RESTORE_INT_REGS \
popl %ebx; \
popl %ecx; \
@@ -164,7 +178,6 @@ ENTRY(ret_from_fork)
call schedule_tail
GET_THREAD_INFO(%ebp)
popl %eax
- XEN_GET_VCPU_INFO(%esi)
jmp syscall_exit
/*
@@ -185,7 +198,6 @@ ret_from_intr:
testl $(VM_MASK | 2), %eax
jz resume_kernel # returning to kernel or vm86-space
ENTRY(resume_userspace)
- XEN_GET_VCPU_INFO(%esi)
XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
@@ -197,7 +209,6 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
- XEN_GET_VCPU_INFO(%esi)
XEN_BLOCK_EVENTS(%esi)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_all
@@ -284,9 +295,11 @@ restore_all:
jnz resume_vm86
movb EVENT_MASK(%esp), %al
notb %al # %al == ~saved_mask
+ XEN_GET_VCPU_INFO(%esi)
andb evtchn_upcall_mask(%esi),%al
andb $1,%al # %al == mask & ~saved_mask
jnz restore_all_enable_events # != 0 => reenable event delivery
+ XEN_PUT_VCPU_INFO(%esi)
RESTORE_ALL
resume_vm86:
@@ -436,8 +449,6 @@ error_code:
movl %ecx, %ds
movl %ecx, %es
movl %esp,%eax # pt_regs pointer
- XEN_GET_VCPU_INFO(%esi)
- XEN_SAVE_UPCALL_MASK(%esi,%bl,EVENT_MASK)
call *%edi
jmp ret_from_exception
@@ -454,27 +465,27 @@ error_code:
# activation and restart the handler using the previous one.
ENTRY(hypervisor_callback)
pushl %eax
- SAVE_ALL_NO_EVENTMASK
+ SAVE_ALL
movl EIP(%esp),%eax
cmpl $scrit,%eax
jb 11f
cmpl $ecrit,%eax
jb critical_region_fixup
-11: XEN_GET_VCPU_INFO(%esi)
- movb $0, EVENT_MASK(%esp)
- push %esp
+11: push %esp
call evtchn_do_upcall
add $4,%esp
jmp ret_from_intr
ALIGN
restore_all_enable_events:
- XEN_UNBLOCK_EVENTS(%esi)
+ XEN_LOCKED_UNBLOCK_EVENTS(%esi)
scrit: /**** START OF CRITICAL REGION ****/
XEN_TEST_PENDING(%esi)
jnz 14f # process more events if necessary...
+ XEN_PUT_VCPU_INFO(%esi)
RESTORE_ALL
-14: XEN_BLOCK_EVENTS(%esi)
+14: XEN_LOCKED_BLOCK_EVENTS(%esi)
+ XEN_PUT_VCPU_INFO(%esi)
jmp 11b
ecrit: /**** END OF CRITICAL REGION ****/
# [How we do the fixup]. We want to merge the current stack frame with the
@@ -487,24 +498,30 @@ ecrit: /**** END OF CRITICAL REGION ****/
critical_region_fixup:
addl $critical_fixup_table-scrit,%eax
movzbl (%eax),%eax # %eax contains num bytes popped
- mov %esp,%esi
+ cmpb $0xff,%al # 0xff => vcpu_info critical region
+ jne 15f
+ GET_THREAD_INFO(%ebp)
+ XEN_PUT_VCPU_INFO(%esi) # abort vcpu_info critical region
+ xorl %eax,%eax
+15: mov %esp,%esi
add %eax,%esi # %esi points at end of src region
mov %esp,%edi
add $0x34,%edi # %edi points at end of dst region
mov %eax,%ecx
shr $2,%ecx # convert words to bytes
- je 16f # skip loop if nothing to copy
-15: subl $4,%esi # pre-decrementing copy loop
+ je 17f # skip loop if nothing to copy
+16: subl $4,%esi # pre-decrementing copy loop
subl $4,%edi
movl (%esi),%eax
movl %eax,(%edi)
- loop 15b
-16: movl %edi,%esp # final %edi is top of merged stack
+ loop 16b
+17: movl %edi,%esp # final %edi is top of merged stack
jmp 11b
critical_fixup_table:
- .byte 0x00,0x00,0x00 # testb $0xff,(%esi) = XEN_TEST_PENDING
- .byte 0x00,0x00 # jnz 14f
+ .byte 0xff,0xff,0xff # testb $0xff,(%esi) = XEN_TEST_PENDING
+ .byte 0xff,0xff # jnz 14f
+ XEN_PUT_VCPU_INFO_fixup
.byte 0x00 # pop %ebx
.byte 0x04 # pop %ecx
.byte 0x08 # pop %edx
@@ -516,7 +533,8 @@ critical_fixup_table:
.byte 0x20 # pop %es
.byte 0x24,0x24,0x24 # add $4,%esp
.byte 0x28 # iret
- .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi)
+ .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
+ XEN_PUT_VCPU_INFO_fixup
.byte 0x00,0x00 # jmp 11b
# Hypervisor uses this for application faults while it executes.
@@ -720,8 +738,6 @@ ENTRY(page_fault)
movl %eax, %ds
movl %eax, %es
movl %esp,%eax /* pt_regs pointer */
- XEN_GET_VCPU_INFO(%esi)
- XEN_SAVE_UPCALL_MASK(%esi,%bl,EVENT_MASK)
call do_page_fault
jmp ret_from_exception
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
index 0564db4c26..91036572bc 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
@@ -3,10 +3,9 @@
.section __xen_guest
.ascii "GUEST_OS=linux,GUEST_VER=2.6"
- .ascii ",XEN_VER=2.0"
+ .ascii ",XEN_VER=3.0"
.ascii ",VIRT_BASE=0xC0000000"
.ascii ",LOADER=generic"
- .ascii ",PT_MODE_WRITABLE"
.byte 0
.text
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c
new file mode 100644
index 0000000000..882ff3fe9c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/io_apic.c
@@ -0,0 +1,2611 @@
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ * Paul Diefenbaugh : Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+
+#include <linux/sysdev.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/timer.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+#ifdef CONFIG_XEN
+
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq) ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ physdev_op_t op;
+ int ret;
+
+ op.cmd = PHYSDEVOP_APIC_READ;
+ op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+ op.u.apic_op.offset = reg;
+ ret = HYPERVISOR_physdev_op(&op);
+ if (ret)
+ return ret;
+ return op.u.apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ physdev_op_t op;
+
+ op.cmd = PHYSDEVOP_APIC_WRITE;
+ op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+ op.u.apic_op.offset = reg;
+ op.u.apic_op.value = value;
+ HYPERVISOR_physdev_op(&op);
+}
+
+#define io_apic_read(a,r) xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#endif /* CONFIG_XEN */
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
+
+static DEFINE_SPINLOCK(ioapic_lock);
+
+/*
+ * Is the SiS APIC rmw bug present ?
+ * -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+ int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector) \
+ (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector) (vector)
+#endif
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
+
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+}
+
+#ifndef CONFIG_XEN
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+ int oldapic, int oldpin,
+ int newapic, int newpin)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
+{
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int pin, reg;
+
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ reg &= ~disable;
+ reg |= enable;
+ io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00010000, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0, 0x00010000);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+}
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+{
+ unsigned long flags;
+ int pin;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int apicid_value;
+
+ apicid_value = cpu_mask_to_apicid(cpumask);
+ /* Prepare to do the io_apic_write */
+ apicid_value = apicid_value << 24;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+#else
+#define clear_IO_APIC() ((void)0)
+#endif
+
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h> /* kernel_thread() */
+# include <linux/kernel_stat.h> /* kstat */
+# include <linux/slab.h> /* kmalloc() */
+# include <linux/timer.h> /* time_after() */
+
+# ifdef CONFIG_BALANCED_IRQ_DEBUG
+# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
+# define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+# define TDprintk(x...)
+# define Dprintk(x...)
+# endif
+
+cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
+
+#define IRQBALANCE_CHECK_ARCH -999
+static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
+static int physical_balance = 0;
+
+struct irq_cpu_info {
+ unsigned long * last_irq;
+ unsigned long * irq_delta;
+ unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+ (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA (HZ)
+
+long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+ unsigned long now, int direction)
+{
+ int search_idle = 1;
+ int cpu = curr_cpu;
+
+ goto inside;
+
+ do {
+ if (unlikely(cpu == curr_cpu))
+ search_idle = 0;
+inside:
+ if (direction == 1) {
+ cpu++;
+ if (cpu >= NR_CPUS)
+ cpu = 0;
+ } else {
+ cpu--;
+ if (cpu == -1)
+ cpu = NR_CPUS-1;
+ }
+ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+ (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+ return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+ unsigned long now = jiffies;
+ cpumask_t allowed_mask;
+ unsigned int new_cpu;
+
+ if (irqbalance_disabled)
+ return;
+
+ cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
+ new_cpu = move(cpu, allowed_mask, now, 1);
+ if (cpu != new_cpu) {
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+ int i, j;
+ Dprintk("Rotating IRQs among CPUs.\n");
+ for (i = 0; i < NR_CPUS; i++) {
+ for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+ if (!irq_desc[j].action)
+ continue;
+ /* Is it a significant load ? */
+ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+ useful_load_threshold)
+ continue;
+ balance_irq(i, j);
+ }
+ }
+ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
+ return;
+}
+
+static void do_irq_balance(void)
+{
+ int i, j;
+ unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+ unsigned long move_this_load = 0;
+ int max_loaded = 0, min_loaded = 0;
+ int load;
+ unsigned long useful_load_threshold = balanced_irq_interval + 10;
+ int selected_irq;
+ int tmp_loaded, first_attempt = 1;
+ unsigned long tmp_cpu_irq;
+ unsigned long imbalance = 0;
+ cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ int package_index;
+ CPU_IRQ(i) = 0;
+ if (!cpu_online(i))
+ continue;
+ package_index = CPU_TO_PACKAGEINDEX(i);
+ for (j = 0; j < NR_IRQS; j++) {
+ unsigned long value_now, delta;
+ /* Is this an active IRQ? */
+ if (!irq_desc[j].action)
+ continue;
+ if ( package_index == i )
+ IRQ_DELTA(package_index,j) = 0;
+ /* Determine the total count per processor per IRQ */
+ value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+ /* Determine the activity per processor per IRQ */
+ delta = value_now - LAST_CPU_IRQ(i,j);
+
+ /* Update last_cpu_irq[][] for the next time */
+ LAST_CPU_IRQ(i,j) = value_now;
+
+ /* Ignore IRQs whose rate is less than the clock */
+ if (delta < useful_load_threshold)
+ continue;
+ /* update the load for the processor or package total */
+ IRQ_DELTA(package_index,j) += delta;
+
+ /* Keep track of the higher numbered sibling as well */
+ if (i != package_index)
+ CPU_IRQ(i) += delta;
+ /*
+ * We have sibling A and sibling B in the package
+ *
+ * cpu_irq[A] = load for cpu A + load for cpu B
+ * cpu_irq[B] = load for cpu B
+ */
+ CPU_IRQ(package_index) += delta;
+ }
+ }
+ /* Find the least loaded processor package */
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+ if (i != CPU_TO_PACKAGEINDEX(i))
+ continue;
+ if (min_cpu_irq > CPU_IRQ(i)) {
+ min_cpu_irq = CPU_IRQ(i);
+ min_loaded = i;
+ }
+ }
+ max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+ /* Look for heaviest loaded processor.
+ * We may come back to get the next heaviest loaded processor.
+ * Skip processors with trivial loads.
+ */
+ tmp_cpu_irq = 0;
+ tmp_loaded = -1;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+ if (i != CPU_TO_PACKAGEINDEX(i))
+ continue;
+ if (max_cpu_irq <= CPU_IRQ(i))
+ continue;
+ if (tmp_cpu_irq < CPU_IRQ(i)) {
+ tmp_cpu_irq = CPU_IRQ(i);
+ tmp_loaded = i;
+ }
+ }
+
+ if (tmp_loaded == -1) {
+ /* In the case of small number of heavy interrupt sources,
+ * loading some of the cpus too much. We use Ingo's original
+ * approach to rotate them around.
+ */
+ if (!first_attempt && imbalance >= useful_load_threshold) {
+ rotate_irqs_among_cpus(useful_load_threshold);
+ return;
+ }
+ goto not_worth_the_effort;
+ }
+
+ first_attempt = 0; /* heaviest search */
+ max_cpu_irq = tmp_cpu_irq; /* load */
+ max_loaded = tmp_loaded; /* processor */
+ imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+
+ Dprintk("max_loaded cpu = %d\n", max_loaded);
+ Dprintk("min_loaded cpu = %d\n", min_loaded);
+ Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+ Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+ Dprintk("load imbalance = %lu\n", imbalance);
+
+ /* if imbalance is less than approx 10% of max load, then
+ * observe diminishing returns action. - quit
+ */
+ if (imbalance < (max_cpu_irq >> 3)) {
+ Dprintk("Imbalance too trivial\n");
+ goto not_worth_the_effort;
+ }
+
+tryanotherirq:
+ /* if we select an IRQ to move that can't go where we want, then
+ * see if there is another one to try.
+ */
+ move_this_load = 0;
+ selected_irq = -1;
+ for (j = 0; j < NR_IRQS; j++) {
+ /* Is this an active IRQ? */
+ if (!irq_desc[j].action)
+ continue;
+ if (imbalance <= IRQ_DELTA(max_loaded,j))
+ continue;
+ /* Try to find the IRQ that is closest to the imbalance
+ * without going over.
+ */
+ if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+ move_this_load = IRQ_DELTA(max_loaded,j);
+ selected_irq = j;
+ }
+ }
+ if (selected_irq == -1) {
+ goto tryanothercpu;
+ }
+
+ imbalance = move_this_load;
+
+ /* For physical_balance case, we accumlated both load
+ * values in the one of the siblings cpu_irq[],
+ * to use the same code for physical and logical processors
+ * as much as possible.
+ *
+ * NOTE: the cpu_irq[] array holds the sum of the load for
+ * sibling A and sibling B in the slot for the lowest numbered
+ * sibling (A), _AND_ the load for sibling B in the slot for
+ * the higher numbered sibling.
+ *
+ * We seek the least loaded sibling by making the comparison
+ * (A+B)/2 vs B
+ */
+ load = CPU_IRQ(min_loaded) >> 1;
+ for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+ if (load > CPU_IRQ(j)) {
+ /* This won't change cpu_sibling_map[min_loaded] */
+ load = CPU_IRQ(j);
+ min_loaded = j;
+ }
+ }
+
+ cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
+ target_cpu_mask = cpumask_of_cpu(min_loaded);
+ cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+ if (!cpus_empty(tmp)) {
+ irq_desc_t *desc = irq_desc + selected_irq;
+ unsigned long flags;
+
+ Dprintk("irq = %d moved to cpu = %d\n",
+ selected_irq, min_loaded);
+ /* mark for change destination */
+ spin_lock_irqsave(&desc->lock, flags);
+ pending_irq_balance_cpumask[selected_irq] =
+ cpumask_of_cpu(min_loaded);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ /* Since we made a change, come back sooner to
+ * check for more variation.
+ */
+ balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
+ return;
+ }
+ goto tryanotherirq;
+
+not_worth_the_effort:
+ /*
+ * if we did not find an IRQ to move, then adjust the time interval
+ * upward
+ */
+ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
+ Dprintk("IRQ worth rotating not found\n");
+ return;
+}
+
+static int balanced_irq(void *unused)
+{
+ int i;
+ unsigned long prev_balance_time = jiffies;
+ long time_remaining = balanced_irq_interval;
+
+ daemonize("kirqd");
+
+ /* push everything to CPU 0 to give us a starting point. */
+ for (i = 0 ; i < NR_IRQS ; i++) {
+ pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
+ }
+
+ for ( ; ; ) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ time_remaining = schedule_timeout(time_remaining);
+ try_to_freeze(PF_FREEZE);
+ if (time_after(jiffies,
+ prev_balance_time+balanced_irq_interval)) {
+ do_irq_balance();
+ prev_balance_time = jiffies;
+ time_remaining = balanced_irq_interval;
+ }
+ }
+ return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+ int i;
+ struct cpuinfo_x86 *c;
+ cpumask_t tmp;
+
+ cpus_shift_right(tmp, cpu_online_map, 2);
+ c = &boot_cpu_data;
+ /* When not overwritten by the command line ask subarchitecture. */
+ if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+ irqbalance_disabled = NO_BALANCE_IRQ;
+ if (irqbalance_disabled)
+ return 0;
+
+ /* disable irqbalance completely if there is only one processor online */
+ if (num_online_cpus() < 2) {
+ irqbalance_disabled = 1;
+ return 0;
+ }
+ /*
+ * Enable physical balance only if more than 1 physical processor
+ * is present
+ */
+ if (smp_num_siblings > 1 && !cpus_empty(tmp))
+ physical_balance = 1;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+ irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+ irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
+ if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
+ printk(KERN_ERR "balanced_irq_init: out of memory");
+ goto failed;
+ }
+ memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
+ memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
+ }
+
+ printk(KERN_INFO "Starting balanced_irq\n");
+ if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
+ return 0;
+ else
+ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
+failed:
+ for (i = 0; i < NR_CPUS; i++) {
+ if(irq_cpu_data[i].irq_delta)
+ kfree(irq_cpu_data[i].irq_delta);
+ if(irq_cpu_data[i].last_irq)
+ kfree(irq_cpu_data[i].last_irq);
+ }
+ return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+ irqbalance_disabled = 1;
+ return 0;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+static inline void move_irq(int irq)
+{
+ /* note - we hold the desc->lock */
+ if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
+ set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
+ cpus_clear(pending_irq_balance_cpumask[irq]);
+ }
+}
+
+late_initcall(balanced_irq_init);
+
+#else /* !CONFIG_IRQBALANCE */
+static inline void move_irq(int irq) { }
+#endif /* CONFIG_IRQBALANCE */
+
+#ifndef CONFIG_SMP
+void fastcall send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+#endif
+}
+#endif /* !CONFIG_SMP */
+
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+#ifndef CONFIG_XEN
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ ) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
+}
+#endif
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+ "slot:%d, pin:%d.\n", bus, slot, pin);
+ if (mp_bus_id_to_pci_bus[bus] == -1) {
+ printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ return irq;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0)
+ best_guess = irq;
+ }
+ }
+ return best_guess;
+}
+
+#ifndef CONFIG_XEN
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+void __init setup_ioapic_dest(void)
+{
+ int pin, ioapic, irq, irq_entry;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
+ }
+
+ }
+}
+#endif /* !CONFIG_XEN */
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx) (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) (0)
+
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx) (0)
+#define default_NEC98_polarity(idx) (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ polarity = default_NEC98_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ trigger = default_NEC98_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+ return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+ return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ case MP_BUS_NEC98:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+
+ /*
+ * For MPS mode, so far only needed by ES7000 platform
+ */
+ if (ioapic_renumber_irq)
+ irq = ioapic_renumber_irq(apic, irq);
+
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+ return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS]; /* = { FIRST_DEVICE_VECTOR , 0 }; */
+
+int assign_irq_vector(int irq)
+{
+ static int current_vector = FIRST_DEVICE_VECTOR;
+ physdev_op_t op;
+
+ BUG_ON(irq >= NR_IRQ_VECTORS);
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ return IO_APIC_VECTOR(irq);
+
+ op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
+ op.u.irq_op.irq = irq;
+ if (HYPERVISOR_physdev_op(&op))
+ return -ENOSPC;
+ current_vector = op.u.irq_op.vector;
+
+ vector_irq[current_vector] = irq;
+ if (irq != AUTO_ASSIGN)
+ IO_APIC_VECTOR(irq) = current_vector;
+
+ return current_vector;
+}
+
+#ifndef CONFIG_XEN
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
+static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+ if (use_pci_vector() && !platform_legacy_irq(irq)) {
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[vector].handler = &ioapic_level_type;
+ else
+ irq_desc[vector].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[vector]);
+ } else {
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[irq].handler = &ioapic_level_type;
+ else
+ irq_desc[irq].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[irq]);
+ }
+}
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif
+
+void __init setup_IO_APIC_irqs(void)
+{
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest =
+ cpu_mask_to_apicid(TARGET_CPUS);
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ " IO-APIC (apicid-pin) %d-%d",
+ mp_ioapics[apic].mpc_apicid,
+ pin);
+ first_notcon = 0;
+ } else
+ apic_printk(APIC_VERBOSE, ", %d-%d",
+ mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if (multi_timer_check(apic, irq))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
+
+ if (/*!apic &&*/ !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+
+ if (!first_notcon)
+ apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin:
+ */
+#ifndef CONFIG_XEN
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[0].handler = &ioapic_edge_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
+}
+
+static inline void UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __init print_IO_APIC(void)
+{
+ int apic, i;
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+ union IO_APIC_reg_03 reg_03;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ reg_01.raw = io_apic_read(apic, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(apic, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(apic, 3);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
+ if (reg_00.bits.ID >= get_physical_broadcast())
+ UNEXPECTED_IO_APIC();
+ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
+ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.bits.entries != 0x2E) &&
+ (reg_01.bits.entries != 0x3F)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
+ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+ * but the value of reg_02 is read as the previous read register
+ * value, so ignore it if reg_02 == reg_01.
+ */
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+ }
+
+ /*
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+ * or reg_03, but the value of reg_0[23] is read as the previous read
+ * register value, so ignore it if reg_03 == reg_0[12].
+ */
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+ if (reg_03.bits.__reserved_1)
+ UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+ " Stat Dest Deli Vect: \n");
+
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
+ );
+
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+ if (use_pci_vector())
+ printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ if (use_pci_vector() && !platform_legacy_irq(i))
+ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+ else
+ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+
+ return;
+}
+
+static void print_APIC_bitfield (int base)
+{
+ unsigned int v;
+ int i, j;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+ for (i = 0; i < 8; i++) {
+ v = apic_read(base + i*0x10);
+ for (j = 0; j < 32; j++) {
+ if (v & (1<<j))
+ printk("1");
+ else
+ printk("0");
+ }
+ printk("\n");
+ }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+ unsigned int v, ver, maxlvt;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ v = apic_read(APIC_EOI);
+ printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_bitfield(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_bitfield(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_bitfield(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_ICR);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+ v = apic_read(APIC_ICR2);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+ printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+ on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void /*__init*/ print_PIC(void)
+{
+ extern spinlock_t i8259A_lock;
+ unsigned int v;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
+
+static void __init enable_IO_APIC(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ int i;
+ unsigned long flags;
+
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (i = 0; i < nr_ioapics; i++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(i, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[i] = reg_01.bits.entries+1;
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+
+#ifndef CONFIG_XEN
+ disconnect_bsp_APIC();
+#endif
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+ */
+
+#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+ union IO_APIC_reg_00 reg_00;
+ physid_mask_t phys_id_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ /*
+ * This is broken; anything with a real cpu count has to
+ * circumvent this idiocy regardless.
+ */
+ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.bits.ID);
+ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+ }
+
+ /* Don't check I/O APIC IDs for some xAPIC systems. They have
+ * no meaning without the serial APIC bus. */
+ if (NO_IOAPIC_CHECK)
+ continue;
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(phys_id_present_map,
+ mp_ioapics[apic].mpc_apicid)) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ for (i = 0; i < get_physical_broadcast(); i++)
+ if (!physid_isset(i, phys_id_present_map))
+ break;
+ if (i >= get_physical_broadcast())
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ physid_set(i, phys_id_present_map);
+ mp_ioapics[apic].mpc_apicid = i;
+ } else {
+ physid_mask_t tmp;
+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
+ "phys_id_present_map\n",
+ mp_ioapics[apic].mpc_apicid);
+ physids_or(phys_id_present_map, phys_id_present_map, tmp);
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, reg_00.raw);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE, " ok.\n");
+ }
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+#ifndef CONFIG_XEN
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+ unsigned long t1 = jiffies;
+
+ local_irq_enable();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+ if (jiffies - t1 > 4)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+ move_irq(irq);
+ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+
+ return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+ unsigned long v;
+ int i;
+
+ move_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+ i = IO_APIC_VECTOR(irq);
+
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+ ack_APIC_irq();
+
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+ spin_lock(&ioapic_lock);
+ __mask_and_edge_IO_APIC_irq(irq);
+ __unmask_and_level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ unmask_IO_APIC_irq(irq);
+}
+
+static void set_ioapic_affinity_vector (unsigned int vector,
+ cpumask_t cpu_mask)
+{
+ int irq = vector_to_irq(vector);
+
+ set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type = {
+ .typename = "IO-APIC-edge",
+ .startup = startup_edge_ioapic,
+ .shutdown = shutdown_edge_ioapic,
+ .enable = enable_edge_ioapic,
+ .disable = disable_edge_ioapic,
+ .ack = ack_edge_ioapic,
+ .end = end_edge_ioapic,
+ .set_affinity = set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_type = {
+ .typename = "IO-APIC-level",
+ .startup = startup_level_ioapic,
+ .shutdown = shutdown_level_ioapic,
+ .enable = enable_level_ioapic,
+ .disable = disable_level_ioapic,
+ .ack = mask_and_ack_level_ioapic,
+ .end = end_level_ioapic,
+ .set_affinity = set_ioapic_affinity,
+};
+#endif /* !CONFIG_XEN */
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ int tmp = irq;
+ if (use_pci_vector()) {
+ if (!platform_legacy_irq(tmp))
+ if ((tmp = vector_to_irq(tmp)) == -1)
+ continue;
+ }
+ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < 16)
+ make_8259A_irq(irq);
+#ifndef CONFIG_XEN
+ else
+ /* Strange. Oh, well.. */
+ irq_desc[irq].handler = &no_irq_type;
+#endif
+ }
+ }
+}
+
+#ifndef CONFIG_XEN
+static void enable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+ .typename = "local-APIC-edge",
+ .startup = NULL, /* startup_irq() not used for IRQ0 */
+ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+ .enable = enable_lapic_irq,
+ .disable = disable_lapic_irq,
+ .ack = ack_lapic_irq,
+ .end = end_lapic_irq
+};
+
+static void setup_nmi (void)
+{
+ /*
+ * Dirty trick to enable the NMI watchdog ...
+ * We put the 8259A master into AEOI mode and
+ * unmask on all local APICs LVT0 as NMI.
+ *
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+ * is from Maciej W. Rozycki - so we do not have to EOI from
+ * the NMI handler or the timer interrupt.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+ on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+
+ apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+ int pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(0, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(0, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+ int pin1, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+ set_intr_gate(vector, interrupt[0]);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ timer_ack = 1;
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+ printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works()) {
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ check_nmi_watchdog();
+ }
+ return;
+ }
+ clear_IO_APIC_pin(0, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ }
+
+ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+ if (pin2 != -1) {
+ printk("\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ else
+ add_pin_to_irq(0, 0, pin2);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ setup_nmi();
+ check_nmi_watchdog();
+ }
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(0, pin2);
+ }
+ printk(" failed.\n");
+
+ if (nmi_watchdog == NMI_IO_APIC) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
+
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+ irq_desc[0].handler = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ printk(" failed.\n");
+
+ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+ timer_ack = 0;
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ printk(" failed :(.\n");
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
+ "report. Then try booting with the 'noapic' option");
+}
+#else
+#define check_timer() ((void)0)
+#endif
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ * Linux doesn't really care, as it's not actually used
+ * for any interrupt handling anyway.
+ */
+#define PIC_IRQS (1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+ enable_IO_APIC();
+
+ if (acpi_ioapic)
+ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
+ else
+ io_apic_irqs = ~PIC_IRQS;
+
+ printk("ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
+ sync_Arb_IDs();
+#endif
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ if (!acpi_ioapic)
+ print_IO_APIC();
+}
+
+/*
+ * Called after all the initialization is done. If we didnt find any
+ * APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+ if(sis_apic_bug == -1)
+ sis_apic_bug = 0;
+ return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+ struct sys_device dev;
+ struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, u32 state)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ union IO_APIC_reg_00 reg_00;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(dev->id, 0);
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ io_apic_write(dev->id, 0, reg_00.raw);
+ }
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+ set_kset_name("ioapic"),
+ .suspend = ioapic_suspend,
+ .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+ struct sys_device * dev;
+ int i, size, error = 0;
+
+ error = sysdev_class_register(&ioapic_sysdev_class);
+ if (error)
+ return error;
+
+ for (i = 0; i < nr_ioapics; i++ ) {
+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+ * sizeof(struct IO_APIC_route_entry);
+ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ if (!mp_ioapic_data[i]) {
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ memset(mp_ioapic_data[i], 0, size);
+ dev = &mp_ioapic_data[i]->dev;
+ dev->id = i;
+ dev->cls = &ioapic_sysdev_class;
+ error = sysdev_register(dev);
+ if (error) {
+ kfree(mp_ioapic_data[i]);
+ mp_ioapic_data[i] = NULL;
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ }
+
+ return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+#ifndef CONFIG_XEN
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+ physid_mask_t tmp;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= get_physical_broadcast()) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(apic_id_map, apic_id)) {
+
+ for (i = 0; i < get_physical_broadcast(); i++) {
+ if (!check_apicid_used(apic_id_map, i))
+ break;
+ }
+
+ if (i == get_physical_broadcast())
+ panic("Max apic_id exceeded!\n");
+
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ tmp = apicid_to_cpu_present(apic_id);
+ physids_or(apic_id_map, apic_id_map, tmp);
+
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id)
+ panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+ }
+
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
+
+ return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ if (!IO_APIC_IRQ(irq)) {
+ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+ */
+
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+ entry.mask = 1;
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= 16)
+ add_pin_to_irq(irq, ioapic, pin);
+
+ entry.vector = assign_irq_vector(irq);
+
+ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ edge_level, active_high_low);
+
+ ioapic_register_intr(irq, entry.vector, edge_level);
+
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
index 89c1c7e38f..3aa6c5a4cf 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
@@ -1,19 +1,110 @@
+/*
+ * linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/ioport.h>
-#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/slab.h>
-#include <asm-xen/xen-public/dom0_ops.h>
+#include <linux/thread_info.h>
+#include <asm-xen/xen-public/physdev.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
+{
+ unsigned long mask;
+ unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
+ unsigned int low_index = base & (BITS_PER_LONG-1);
+ int length = low_index + extent;
+
+ if (low_index != 0) {
+ mask = (~0UL << low_index);
+ if (length < BITS_PER_LONG)
+ mask &= ~(~0UL << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ length -= BITS_PER_LONG;
+ }
+
+ mask = (new_value ? ~0UL : 0UL);
+ while (length >= BITS_PER_LONG) {
+ *bitmap_base++ = mask;
+ length -= BITS_PER_LONG;
+ }
+
+ if (length > 0) {
+ mask = ~(~0UL << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ }
+}
+
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ struct thread_struct * t = &current->thread;
+ unsigned long *bitmap;
+ physdev_op_t op;
+
+ if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
+ return -EINVAL;
+ if (turn_on && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ /*
+ * If it's the first ioperm() call in this thread's lifetime, set the
+ * IO bitmap up. ioperm() is much less timing critical than clone(),
+ * this is why we delay this operation until now:
+ */
+ if (!t->io_bitmap_ptr) {
+ bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!bitmap)
+ return -ENOMEM;
+
+ memset(bitmap, 0xff, IO_BITMAP_BYTES);
+ t->io_bitmap_ptr = bitmap;
+
+ op.cmd = PHYSDEVOP_SET_IOBITMAP;
+ op.u.set_iobitmap.bitmap = (unsigned long)bitmap;
+ op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
+ HYPERVISOR_physdev_op(&op);
+ }
+
+ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+ return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
asmlinkage long sys_iopl(unsigned int new_io_pl)
{
unsigned int old_io_pl = current->thread.io_pl;
- dom0_op_t op;
+ physdev_op_t op;
if (new_io_pl > 3)
return -EINVAL;
@@ -22,9 +113,6 @@ asmlinkage long sys_iopl(unsigned int new_io_pl)
if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
return -EPERM;
- if (!(xen_start_info.flags & SIF_PRIVILEGED))
- return -EPERM;
-
/* Maintain OS privileges even if user attempts to relinquish them. */
if (new_io_pl == 0)
new_io_pl = 1;
@@ -33,19 +121,9 @@ asmlinkage long sys_iopl(unsigned int new_io_pl)
current->thread.io_pl = new_io_pl;
/* Force the change at ring 0. */
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = new_io_pl;
- HYPERVISOR_dom0_op(&op);
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = new_io_pl;
+ HYPERVISOR_physdev_op(&op);
return 0;
}
-
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
-{
-#if 0
- printk(KERN_INFO "ioperm not fully supported - %s\n",
- turn_on ? "set iopl to 3" : "ignore resource release");
-#endif
- return turn_on ? sys_iopl(3) : 0;
-}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
new file mode 100644
index 0000000000..f31697ecb1
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
@@ -0,0 +1,297 @@
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+ struct thread_info tinfo;
+ u32 stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{
+ /* high bits used in ret_from_ code */
+ int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
+#ifdef CONFIG_4KSTACKS
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+#endif
+
+ irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+ {
+ long esp;
+
+ __asm__ __volatile__("andl %%esp,%0" :
+ "=r" (esp) : "0" (THREAD_SIZE - 1));
+ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ esp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+ curctx = (union irq_ctx *) current_thread_info();
+ irqctx = hardirq_ctx[smp_processor_id()];
+
+ /*
+ * this is where we switch to the IRQ stack. However, if we are
+ * already using the IRQ stack (because we interrupted a hardirq
+ * handler) we can't do that and just have to keep using the
+ * current stack (which is the irq stack already after all)
+ */
+ if (curctx != irqctx) {
+ int arg1, arg2, ebx;
+
+ /* build the stack frame on the IRQ stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+ irqctx->tinfo.task = curctx->tinfo.task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_IRQ \n"
+ " movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+ : "0" (irq), "1" (regs), "2" (isp)
+ : "memory", "cc", "ecx"
+ );
+ } else
+#endif
+ __do_IRQ(irq, regs);
+
+ irq_exit();
+
+ return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+ union irq_ctx *irqctx;
+
+ if (hardirq_ctx[cpu])
+ return;
+
+ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ hardirq_ctx[cpu] = irqctx;
+
+ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ softirq_ctx[cpu] = irqctx;
+
+ printk("CPU %u irqstacks, hard=%p soft=%p\n",
+ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curctx;
+ union irq_ctx *irqctx;
+ u32 *isp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curctx = current_thread_info();
+ irqctx = softirq_ctx[smp_processor_id()];
+ irqctx->tinfo.task = curctx->task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ /* build the stack frame on the softirq stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_softirq \n"
+ " movl %%ebx,%%esp \n"
+ : "=b"(isp)
+ : "0"(isp)
+ : "memory", "cc", "edx", "ecx", "eax"
+ );
+ }
+
+ local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for_each_cpu(j)
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for_each_cpu(j)
+ seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+ unsigned int irq;
+ static int warned;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ cpumask_t mask;
+ if (irq == 2)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ printk("Breaking affinity for irq %i\n", irq);
+ mask = map;
+ }
+ if (irq_desc[irq].handler->set_affinity)
+ irq_desc[irq].handler->set_affinity(irq, mask);
+ else if (irq_desc[irq].action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+#if 0
+ barrier();
+ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+ [note the nop - the interrupt-enable boundary on x86 is two
+ instructions from sti] - to flush out pending hardirqs and
+ IPIs. After this point nothing is supposed to reach this CPU." */
+ __asm__ __volatile__("sti; nop; cli");
+ barrier();
+#else
+ /* That doesn't seem sufficient. Give it 1ms. */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+#endif
+}
+#endif
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
index a5a80e4ea7..363010f1ed 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
@@ -18,6 +18,7 @@
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
+#include <asm/mmu_context.h>
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *null)
@@ -72,7 +73,6 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (oldsize) {
make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
PAGE_SIZE);
- flush_page_update_queue();
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
@@ -89,7 +89,6 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
make_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
PAGE_SIZE);
- flush_page_update_queue();
return 0;
}
@@ -102,14 +101,19 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
struct mm_struct * old_mm;
int retval = 0;
+ memset(&mm->context, 0, sizeof(mm->context));
init_MUTEX(&mm->context.sem);
- mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
retval = copy_ldt(&mm->context, &old_mm->context);
up(&old_mm->context.sem);
}
+ if (retval == 0) {
+ spin_lock(&mm_unpinned_lock);
+ list_add(&mm->context.unpinned, &mm_unpinned);
+ spin_unlock(&mm_unpinned_lock);
+ }
return retval;
}
@@ -124,13 +128,17 @@ void destroy_context(struct mm_struct *mm)
make_pages_writable(mm->context.ldt,
(mm->context.size * LDT_ENTRY_SIZE) /
PAGE_SIZE);
- flush_page_update_queue();
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
kfree(mm->context.ldt);
mm->context.size = 0;
}
+ if (!mm->context.pinned) {
+ spin_lock(&mm_unpinned_lock);
+ list_del(&mm->context.unpinned);
+ spin_unlock(&mm_unpinned_lock);
+ }
}
static int read_ldt(void __user * ptr, unsigned long bytecount)
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c
new file mode 100644
index 0000000000..16f2ee8c80
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/mpparse.c
@@ -0,0 +1,1115 @@
+/*
+ * Intel Multiprocessor Specification 1.1 and 1.4
+ * compliant MP-table parsing routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
+ * Maciej W. Rozycki: Bits for default MP configurations
+ * Paul Diefenbaugh: Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __initdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+
+#ifdef CONFIG_X86_NUMAQ
+static int MP_valid_apicid(int apicid, int version)
+{
+ return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
+}
+#elif !defined(CONFIG_XEN)
+static int MP_valid_apicid(int apicid, int version)
+{
+ if (version >= 0x14)
+ return apicid < 0xff;
+ else
+ return apicid < 0xf;
+}
+#endif
+
+#ifndef CONFIG_XEN
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver, apicid;
+ physid_mask_t tmp;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+ if (m->mpc_featureflag&(1<<0))
+ Dprintk(" Floating point unit present.\n");
+ if (m->mpc_featureflag&(1<<7))
+ Dprintk(" Machine Exception supported.\n");
+ if (m->mpc_featureflag&(1<<8))
+ Dprintk(" 64 bit compare & exchange supported.\n");
+ if (m->mpc_featureflag&(1<<9))
+ Dprintk(" Internal APIC present.\n");
+ if (m->mpc_featureflag&(1<<11))
+ Dprintk(" SEP present.\n");
+ if (m->mpc_featureflag&(1<<12))
+ Dprintk(" MTRR present.\n");
+ if (m->mpc_featureflag&(1<<13))
+ Dprintk(" PGE present.\n");
+ if (m->mpc_featureflag&(1<<14))
+ Dprintk(" MCA present.\n");
+ if (m->mpc_featureflag&(1<<15))
+ Dprintk(" CMOV present.\n");
+ if (m->mpc_featureflag&(1<<16))
+ Dprintk(" PAT present.\n");
+ if (m->mpc_featureflag&(1<<17))
+ Dprintk(" PSE present.\n");
+ if (m->mpc_featureflag&(1<<18))
+ Dprintk(" PSN present.\n");
+ if (m->mpc_featureflag&(1<<19))
+ Dprintk(" Cache Line Flush Instruction present.\n");
+ /* 20 Reserved */
+ if (m->mpc_featureflag&(1<<21))
+ Dprintk(" Debug Trace and EMON Store present.\n");
+ if (m->mpc_featureflag&(1<<22))
+ Dprintk(" ACPI Thermal Throttle Registers present.\n");
+ if (m->mpc_featureflag&(1<<23))
+ Dprintk(" MMX present.\n");
+ if (m->mpc_featureflag&(1<<24))
+ Dprintk(" FXSR present.\n");
+ if (m->mpc_featureflag&(1<<25))
+ Dprintk(" XMM present.\n");
+ if (m->mpc_featureflag&(1<<26))
+ Dprintk(" Willamette New Instructions present.\n");
+ if (m->mpc_featureflag&(1<<27))
+ Dprintk(" Self Snoop present.\n");
+ if (m->mpc_featureflag&(1<<28))
+ Dprintk(" HT present.\n");
+ if (m->mpc_featureflag&(1<<29))
+ Dprintk(" Thermal Monitor present.\n");
+ /* 30, 31 Reserved */
+
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_physical_apicid = m->mpc_apicid;
+ boot_cpu_logical_apicid = apicid;
+ }
+
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+ return;
+ }
+
+ if (num_processors >= maxcpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+ " Processor ignored.\n", maxcpus);
+ return;
+ }
+ num_processors++;
+ ver = m->mpc_apicver;
+
+ if (!MP_valid_apicid(apicid, ver)) {
+ printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ --num_processors;
+ return;
+ }
+
+ tmp = apicid_to_cpu_present(apicid);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
+
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+ char str[7];
+
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
+
+ mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mpc_oem_pci_bus(m, translation_table[mpc_record]);
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+ } else {
+ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+ }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+
+ if (mpc_record >= MAX_MPC_ENTRY)
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+ else
+ translation_table[mpc_record] = m; /* stash this for later */
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+ unsigned short oemsize)
+{
+ int count = sizeof (*oemtable); /* the header size */
+ unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+
+ mpc_record = 0;
+ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+ if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+ {
+ printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ oemtable->oem_signature[0],
+ oemtable->oem_signature[1],
+ oemtable->oem_signature[2],
+ oemtable->oem_signature[3]);
+ return;
+ }
+ if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+ {
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+ return;
+ }
+ while (count < oemtable->oem_length) {
+ switch (*oemptr) {
+ case MP_TRANSLATION:
+ {
+ struct mpc_config_translation *m=
+ (struct mpc_config_translation *)oemptr;
+ MP_translation_info(m);
+ oemptr += sizeof(*m);
+ count += sizeof(*m);
+ ++mpc_record;
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+ return;
+ }
+ }
+ }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk("Warning! May not be a NUMA-Q system!\n");
+ if (mpc->mpc_oemptr)
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+ mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+ char str[16];
+ char oem[10];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+ *(u32 *)mpc->mpc_signature);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ printk(KERN_ERR "SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(oem,mpc->mpc_oem,8);
+ oem[8]=0;
+ printk(KERN_INFO "OEM ID: %s ",oem);
+
+ memcpy(str,mpc->mpc_productid,12);
+ str[12]=0;
+ printk("Product ID: %s ",str);
+
+ mps_oem_check(mpc, oem, str);
+
+ printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+ /*
+ * Save the local APIC address (it might be non-default) -- but only
+ * if we're not using ACPI.
+ */
+ if (!acpi_lapic)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ /*
+ * Now process the configuration blocks.
+ */
+ mpc_record = 0;
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+ /* ACPI may have already provided this data */
+ if (!acpi_lapic)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ default:
+ {
+ count = mpc->mpc_length;
+ break;
+ }
+ }
+ ++mpc_record;
+ }
+ clustered_apic_check();
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+ unsigned int port;
+
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+ printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
+ else {
+ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt, we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk("???\n");
+ printk(KERN_ERR "Unknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+ struct intel_mp_floating *mpf = mpf_found;
+
+ /*
+ * ACPI may be used to obtain the entire SMP configuration or just to
+ * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic) {
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+ return;
+ }
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+
+ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk(KERN_INFO "Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+ unsigned long *bp = isa_bus_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ printk("Error: MPF size\n");
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+ printk(KERN_INFO "found SMP MP-table at %08lx\n",
+ virt_to_phys(mpf));
+ if (mpf->mpf_physptr) {
+ /*
+ * We cannot access to MPC table to compute
+ * table size yet, as only few megabytes from
+ * the bottom is mapped now.
+ * PC-9800's MPC table places on the very last
+ * of physical memory; so that simply reserving
+ * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+ * in reserve_bootmem.
+ */
+ unsigned long size = PAGE_SIZE;
+ unsigned long end = max_low_pfn * PAGE_SIZE;
+ if (mpf->mpf_physptr + size > end)
+ size = end - mpf->mpf_physptr;
+ reserve_bootmem(mpf->mpf_physptr, size);
+ }
+
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
+}
+
+void __init find_smp_config (void)
+{
+ unsigned int address;
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There are Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. These loaders are buggy and
+ * should be fixed.
+ *
+ * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+ */
+
+ address = get_bios_ebda();
+ if (address)
+ smp_scan_config(address, 0x400);
+}
+
+/* --------------------------------------------------------------------------
+ ACPI-based MP Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+void __init mp_register_lapic_address (
+ u64 address)
+{
+#ifndef CONFIG_XEN
+ mp_lapic_addr = (unsigned long) address;
+
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __init mp_register_lapic (
+ u8 id,
+ u8 enabled)
+{
+ struct mpc_config_processor processor;
+ int boot_cpu = 0;
+
+ if (MAX_APICS - id <= 0) {
+ printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+ id, MAX_APICS);
+ return;
+ }
+
+ if (id == boot_cpu_physical_apicid)
+ boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+ processor.mpc_type = MP_PROCESSOR;
+ processor.mpc_apicid = id;
+ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+#endif
+
+ MP_processor_info(&processor);
+}
+
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
+
+#define MP_ISA_BUS 0
+#define MP_MAX_IOAPIC_PIN 127
+
+struct mp_ioapic_routing {
+ int apic_id;
+ int gsi_base;
+ int gsi_end;
+ u32 pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+ int gsi)
+{
+ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+ if ((gsi >= mp_ioapic_routing[i].gsi_base)
+ && (gsi <= mp_ioapic_routing[i].gsi_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+ return -1;
+}
+
+
+void __init mp_register_ioapic (
+ u8 id,
+ u32 address,
+ u32 gsi_base)
+{
+ int idx = 0;
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+ }
+ if (!address) {
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+ " found in MADT table, skipping!\n");
+ return;
+ }
+
+ idx = nr_ioapics++;
+
+ mp_ioapics[idx].mpc_type = MP_IOAPIC;
+ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].mpc_apicaddr = address;
+
+ mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+ */
+ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+ mp_ioapic_routing[idx].gsi_base = gsi_base;
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
+ io_apic_get_redir_entries(idx);
+
+ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_base,
+ mp_ioapic_routing[idx].gsi_end);
+
+ return;
+}
+
+
+void __init mp_override_legacy_irq (
+ u8 bus_irq,
+ u8 polarity,
+ u8 trigger,
+ u32 gsi)
+{
+ struct mpc_config_intsrc intsrc;
+ int ioapic = -1;
+ int pin = -1;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return;
+ pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (trigger == 3))
+ trigger = 1;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_irqflag = (trigger << 2) | polarity;
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
+ intsrc.mpc_dstirq = pin; /* INTIN# */
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+
+ return;
+}
+
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+ struct mpc_config_intsrc intsrc;
+ int i = 0;
+ int ioapic = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+ */
+ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+ /*
+ * ES7000 has no legacy identity mappings
+ */
+ if (es7000_plat)
+ return;
+
+ /*
+ * Locate the IOAPIC that manages the ISA IRQs (0-15).
+ */
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ return;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* Conforming */
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+ /*
+ * Use the default configuration for the IRQs 0-15. Unless
+ * overriden by (MADT) interrupt source override entries.
+ */
+ for (i = 0; i < 16; i++) {
+ int idx;
+
+ for (idx = 0; idx < mp_irq_entries; idx++) {
+ struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+ /* Do we already have a mapping for this ISA IRQ? */
+ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+ break;
+
+ /* Do we already have a mapping for this IOAPIC pin */
+ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+ (irq->mpc_dstirq == i))
+ break;
+ }
+
+ if (idx != mp_irq_entries) {
+ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+ continue; /* IRQ already used */
+ }
+
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_srcbusirq = i; /* Identity mapped */
+ intsrc.mpc_dstirq = i;
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
+ intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+}
+
+int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
+{
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int idx, bit = 0;
+
+#ifdef CONFIG_ACPI_BUS
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_fadt.sci_int == gsi)
+ return gsi;
+#endif
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+ return gsi;
+ }
+
+ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+ if (ioapic_renumber_irq)
+ gsi = ioapic_renumber_irq(ioapic, gsi);
+
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ bit = ioapic_pin % 32;
+ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+ if (idx > 3) {
+ printk(KERN_ERR "Invalid reference to IOAPIC pin "
+ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ ioapic_pin);
+ return gsi;
+ }
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+ return gsi;
+ }
+
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+ edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
+ return gsi;
+}
+
+#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
index 9df49e66ae..dc51c7972a 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
@@ -14,12 +14,7 @@
#include <linux/version.h>
#include <asm/io.h>
#include <asm-xen/balloon.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#define pte_offset_kernel pte_offset
-#define pud_t pgd_t
-#define pud_offset(d, va) d
-#endif
+#include <asm/tlbflush.h>
struct dma_coherent_mem {
void *virt_base;
@@ -29,78 +24,13 @@ struct dma_coherent_mem {
unsigned long *bitmap;
};
-static void
-xen_contig_memory(unsigned long vstart, unsigned int order)
-{
- /*
- * Ensure multi-page extents are contiguous in machine memory.
- * This code could be cleaned up some, and the number of
- * hypercalls reduced.
- */
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- unsigned long pfn, i, flags;
-
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
-
- /* 1. Zap current PTEs, giving away the underlying pages. */
- for (i = 0; i < (1<<order); i++) {
- pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
- pud = pud_offset(pgd, vstart + (i*PAGE_SIZE));
- pmd = pmd_offset(pud, vstart + (i*PAGE_SIZE));
- pte = pte_offset_kernel(pmd, vstart + (i*PAGE_SIZE));
- pfn = pte_val_ma(*pte) >> PAGE_SHIFT;
- queue_l1_entry_update(pte, 0);
- phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
- INVALID_P2M_ENTRY;
- flush_page_update_queue();
- if (HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
- &pfn, 1, 0) != 1) BUG();
- }
- /* 2. Get a new contiguous memory extent. */
- if (HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
- &pfn, 1, order) != 1) BUG();
- /* 3. Map the new extent in place of old pages. */
- for (i = 0; i < (1<<order); i++) {
- pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
- pud = pud_offset(pgd, vstart + (i*PAGE_SIZE));
- pmd = pmd_offset(pud, vstart + (i*PAGE_SIZE));
- pte = pte_offset_kernel(pmd, vstart + (i*PAGE_SIZE));
- queue_l1_entry_update(pte,
- ((pfn+i)<<PAGE_SHIFT)|__PAGE_KERNEL);
- queue_machphys_update(pfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
- phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = pfn+i;
- }
- /* Flush updates through and flush the TLB. */
- xen_tlb_flush();
-
- balloon_unlock(flags);
-}
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
- dma_addr_t *dma_handle)
-#else
void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, int gfp)
-#endif
{
void *ret;
+ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
unsigned int order = get_order(size);
unsigned long vstart;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- int gfp = GFP_ATOMIC;
-
- if (hwdev == NULL || ((u32)hwdev->dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
-#else
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
@@ -119,7 +49,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
gfp |= GFP_DMA;
-#endif
vstart = __get_free_pages(gfp, order);
ret = (void *)vstart;
@@ -133,14 +62,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
return ret;
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-void pci_free_consistent(struct pci_dev *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
-#else
-
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
@@ -231,5 +152,3 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
return mem->virt_base + (pos << PAGE_SHIFT);
}
EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-#endif
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
index 0ef40b7035..d428b2305f 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
@@ -13,6 +13,7 @@
#include <stdarg.h>
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -46,8 +47,7 @@
#include <asm/i387.h>
#include <asm/irq.h>
#include <asm/desc.h>
-#include <asm-xen/multicall.h>
-#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/xen-public/physdev.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
@@ -55,6 +55,9 @@
#include <linux/irq.h>
#include <linux/err.h>
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int hlt_counter;
@@ -106,6 +109,33 @@ void xen_idle(void)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /* We shouldn't have to disable interrupts while dead, but
+ * some interrupts just don't seem to go away, and this makes
+ * it "work" for testing purposes. */
+ /* Death loop */
+ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+ HYPERVISOR_yield();
+
+ local_irq_disable();
+ __flush_tlb_all();
+ cpu_set(smp_processor_id(), cpu_online_map);
+ local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
@@ -124,6 +154,9 @@ void cpu_idle (void)
cpu_clear(cpu, cpu_idle_map);
rmb();
+ if (cpu_is_offline(cpu))
+ play_dead();
+
irq_stat[cpu].idle_timestamp = jiffies;
xen_idle();
}
@@ -221,20 +254,11 @@ void exit_thread(void)
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
+ physdev_op_t op = { 0 };
+ op.cmd = PHYSDEVOP_SET_IOBITMAP;
+ HYPERVISOR_physdev_op(&op);
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
- t->io_bitmap_max = 0;
- tss->io_bitmap_owner = NULL;
- tss->io_bitmap_max = 0;
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- put_cpu();
}
}
@@ -283,7 +307,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
struct pt_regs * childregs;
struct task_struct *tsk;
int err;
- unsigned long eflags;
childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
*childregs = *regs;
@@ -333,9 +356,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
desc->b = LDT_entry_b(&info);
}
-
- __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : );
- p->thread.io_pl = (eflags >> 12) & 3;
+ p->thread.io_pl = current->thread.io_pl;
err = 0;
out:
@@ -408,37 +429,6 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
return 1;
}
-static inline void
-handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
-{
- if (!next->io_bitmap_ptr) {
- /*
- * Disable the bitmap via an invalid offset. We still cache
- * the previous bitmap owner and the IO bitmap contents:
- */
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
- return;
- }
- if (likely(next == tss->io_bitmap_owner)) {
- /*
- * Previous owner of the bitmap (hence the bitmap content)
- * matches the next task, we dont have to do anything but
- * to set a valid offset in the TSS:
- */
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
- return;
- }
- /*
- * Lazy TSS's I/O bitmap copy. We set an invalid offset here
- * and we let the task to get a GPF in case an I/O instruction
- * is performed. The handler of the GPF will verify that the
- * faulting task has a valid I/O bitmap and, it true, does the
- * real copy and restart the instruction. This will save us
- * redundant copies when the currently switched task does not
- * perform any I/O during its timeslice.
- */
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-}
/*
* This special macro can be used to load a debugging register
*/
@@ -479,29 +469,10 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
- dom0_op_t op;
-
- /* NB. No need to disable interrupts as already done in sched.c */
- /* __cli(); */
-
- /*
- * Save away %fs and %gs. No need to save %es and %ds, as
- * those are always kernel segments while inside the kernel.
- */
- asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
- asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+ physdev_op_t iopl_op, iobmp_op;
+ multicall_entry_t _mcl[8], *mcl = _mcl;
- /*
- * We clobber FS and GS here so that we avoid a GPF when restoring
- * previous task's FS/GS values in Xen when the LDT is switched.
- */
- __asm__ __volatile__ (
- "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs" : : :
- "eax" );
-
- MULTICALL_flush_page_update_queue();
-
- /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+ /* XEN NOTE: FS/GS saved in switch_mm(), not here. */
/*
* This is basically '__unlazy_fpu', except that we queue a
@@ -510,7 +481,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
*/
if (prev_p->thread_info->status & TS_USEDFPU) {
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- queue_multicall0(__HYPERVISOR_fpu_taskswitch);
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
}
/*
@@ -518,35 +491,50 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
* This is load_esp0(tss, next) with a multicall.
*/
tss->esp0 = next->esp0;
- queue_multicall2(__HYPERVISOR_stack_switch, tss->ss0, tss->esp0);
+ mcl->op = __HYPERVISOR_stack_switch;
+ mcl->args[0] = tss->ss0;
+ mcl->args[1] = tss->esp0;
+ mcl++;
/*
* Load the per-thread Thread-Local Storage descriptor.
* This is load_TLS(next, cpu) with multicalls.
*/
-#define C(i) do { \
- if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
- next->tls_array[i].b != prev->tls_array[i].b)) \
- queue_multicall3(__HYPERVISOR_update_descriptor, \
- virt_to_machine(&get_cpu_gdt_table(cpu) \
- [GDT_ENTRY_TLS_MIN + i]), \
- ((u32 *)&next->tls_array[i])[0], \
- ((u32 *)&next->tls_array[i])[1]); \
+#define C(i) do { \
+ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
+ next->tls_array[i].b != prev->tls_array[i].b)) { \
+ mcl->op = __HYPERVISOR_update_descriptor; \
+ mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
+ [GDT_ENTRY_TLS_MIN + i]); \
+ mcl->args[1] = ((u32 *)&next->tls_array[i])[0]; \
+ mcl->args[2] = ((u32 *)&next->tls_array[i])[1]; \
+ mcl++; \
+ } \
} while (0)
C(0); C(1); C(2);
#undef C
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = next->io_pl;
- op.interface_version = DOM0_INTERFACE_VERSION;
- queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op);
+ if (unlikely(prev->io_pl != next->io_pl)) {
+ iopl_op.cmd = PHYSDEVOP_SET_IOPL;
+ iopl_op.u.set_iopl.iopl = next->io_pl;
+ mcl->op = __HYPERVISOR_physdev_op;
+ mcl->args[0] = (unsigned long)&iopl_op;
+ mcl++;
}
- /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
- execute_multicall_list();
- /* __sti(); */
+ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
+ iobmp_op.cmd =
+ PHYSDEVOP_SET_IOBITMAP;
+ iobmp_op.u.set_iobitmap.bitmap =
+ (unsigned long)next->io_bitmap_ptr;
+ iobmp_op.u.set_iobitmap.nr_ports =
+ next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+ mcl->op = __HYPERVISOR_physdev_op;
+ mcl->args[0] = (unsigned long)&iobmp_op;
+ mcl++;
+ }
+
+ (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
/*
* Restore %fs and %gs if needed.
@@ -569,9 +557,6 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
loaddebug(next, 7);
}
- if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
- handle_io_bitmap(next, tss);
-
return prev_p;
}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
index f6096f03b7..938bcabd86 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/edd.h>
#include <linux/kernel.h>
+#include <linux/percpu.h>
#include <linux/notifier.h>
#include <video/edid.h>
#include <asm/e820.h>
@@ -52,6 +53,7 @@
#include <asm/ist.h>
#include <asm/io.h>
#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/physdev.h>
#include "setup_arch_pre.h"
#include <bios_ebda.h>
@@ -287,6 +289,10 @@ static void __init probe_roms(void)
unsigned char *rom;
int i;
+ /* Nothing to do if not running in dom0. */
+ if (!(xen_start_info.flags & SIF_INITDOMAIN))
+ return;
+
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
@@ -357,9 +363,6 @@ EXPORT_SYMBOL(HYPERVISOR_shared_info);
unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
EXPORT_SYMBOL(phys_to_machine_mapping);
-multicall_entry_t multicall_list[8];
-int nr_multicall_ents = 0;
-
/* Raw start-of-day parameters from the hypervisor. */
union xen_start_info_union xen_start_info_union;
@@ -696,12 +699,14 @@ static inline void copy_edd(void)
static void __init parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = saved_command_line;
- int len = 0;
+ int len = 0, max_cmdline;
int userdef = 0;
- memcpy(saved_command_line, xen_start_info.cmd_line, MAX_CMDLINE);
+ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+ max_cmdline = COMMAND_LINE_SIZE;
+ memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
/* Save unparsed command line copy for /proc/cmdline */
- saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+ saved_command_line[max_cmdline-1] = '\0';
for (;;) {
if (c != ' ')
@@ -780,7 +785,7 @@ static void __init parse_cmdline_early (char ** cmdline_p)
noexec_setup(from + 7);
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_X86_MPPARSE
/*
* If the BIOS enumerates physical processors before logical,
* maxcpus=N at enumeration-time can be used to disable HT.
@@ -1134,12 +1139,6 @@ static unsigned long __init setup_memory(void)
*/
acpi_reserve_bootmem();
#endif
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
-#endif
#ifdef CONFIG_BLK_DEV_INITRD
if (xen_start_info.mod_start) {
@@ -1220,8 +1219,9 @@ static void __init register_memory(void)
else
legacy_init_iomem_resources(&code_resource, &data_resource);
- /* EFI systems may still have VGA */
- request_resource(&iomem_resource, &video_ram_resource);
+ if (xen_start_info.flags & SIF_INITDOMAIN)
+ /* EFI systems may still have VGA */
+ request_resource(&iomem_resource, &video_ram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < STANDARD_IO_RESOURCES; i++)
@@ -1397,6 +1397,7 @@ static void set_mca_bus(int x) { }
void __init setup_arch(char **cmdline_p)
{
int i, j;
+ physdev_op_t op;
unsigned long max_low_pfn;
/* Force a quick death if the kernel panics. */
@@ -1408,6 +1409,8 @@ void __init setup_arch(char **cmdline_p)
notifier_chain_register(&panic_notifier_list, &xen_panic_block);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_writable_pagetables);
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
early_cpu_init();
@@ -1501,6 +1504,13 @@ void __init setup_arch(char **cmdline_p)
#endif
paging_init();
+#ifdef CONFIG_X86_FIND_SMP_CONFIG
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+#endif
+
/* Make sure we have a correctly sized P->M table. */
if (max_pfn != xen_start_info.nr_pages) {
phys_to_machine_mapping = alloc_bootmem_low_pages(
@@ -1564,6 +1574,18 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_map_memmap();
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = current->thread.io_pl = 1;
+ HYPERVISOR_physdev_op(&op);
+
+#ifdef CONFIG_ACPI_BOOT
+ if (!(xen_start_info.flags & SIF_INITDOMAIN)) {
+ printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
+ acpi_disabled = 1;
+ acpi_ht = 0;
+ }
+#endif
+
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
@@ -1581,17 +1603,6 @@ void __init setup_arch(char **cmdline_p)
register_memory();
- /* If we are a privileged guest OS then we should request IO privs. */
- if (xen_start_info.flags & SIF_PRIVILEGED) {
- dom0_op_t op;
- op.cmd = DOM0_IOPL;
- op.u.iopl.domain = DOMID_SELF;
- op.u.iopl.iopl = 1;
- if (HYPERVISOR_dom0_op(&op) != 0)
- panic("Unable to obtain IOPL, despite SIF_PRIVILEGED");
- current->thread.io_pl = 1;
- }
-
if (xen_start_info.flags & SIF_INITDOMAIN) {
if (!(xen_start_info.flags & SIF_PRIVILEGED))
panic("Xen granted us console access "
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
new file mode 100644
index 0000000000..fddadbba25
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
@@ -0,0 +1,624 @@
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#if 0
+#include <mach_apic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+/*
+ * Some notes on x86 processor bugs affecting SMP operation:
+ *
+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ * The Linux implications for SMP are handled as follows:
+ *
+ * Pentium III / [Xeon]
+ * None of the E1AP-E3AP errata are visible to the user.
+ *
+ * E1AP. see PII A1AP
+ * E2AP. see PII A2AP
+ * E3AP. see PII A3AP
+ *
+ * Pentium II / [Xeon]
+ * None of the A1AP-A3AP errata are visible to the user.
+ *
+ * A1AP. see PPro 1AP
+ * A2AP. see PPro 2AP
+ * A3AP. see PPro 7AP
+ *
+ * Pentium Pro
+ * None of 1AP-9AP errata are visible to the normal user,
+ * except occasional delivery of 'spurious interrupt' as trap #15.
+ * This is very rare and a non-problem.
+ *
+ * 1AP. Linux maps APIC as non-cacheable
+ * 2AP. worked around in hardware
+ * 3AP. fixed in C0 and above steppings microcode update.
+ * Linux does not use excessive STARTUP_IPIs.
+ * 4AP. worked around in hardware
+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
+ * 'noapic' mode has vector 0xf filled out properly.
+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
+ * 7AP. We do not assume writes to the LVT deassering IRQs
+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
+ * 9AP. We do not use mixed mode
+ *
+ * Pentium
+ * There is a marginal case where REP MOVS on 100MHz SMP
+ * machines with B stepping processors can fail. XXX should provide
+ * an L1cache=Writethrough or L1cache=off option.
+ *
+ * B stepping CPUs may hang. There are hardware work arounds
+ * for this. We warn about it in case your board doesn't have the work
+ * arounds. Basically thats so I can tell anyone with a B stepping
+ * CPU and SMP problems "tough".
+ *
+ * Specific items [From Pentium Processor Specification Update]
+ *
+ * 1AP. Linux doesn't use remote read
+ * 2AP. Linux doesn't trust APIC errors
+ * 3AP. We work around this
+ * 4AP. Linux never generated 3 interrupts of the same priority
+ * to cause a lost local interrupt.
+ * 5AP. Remote read is never used
+ * 6AP. not affected - worked around in hardware
+ * 7AP. not affected - worked around in hardware
+ * 8AP. worked around in hardware - we get explicit CS errors if not
+ * 9AP. only 'noapic' mode affected. Might generate spurious
+ * interrupts, we log only the first one and count the
+ * rest silently.
+ * 10AP. not affected - worked around in hardware
+ * 11AP. Linux reads the APIC between writes to avoid this, as per
+ * the documentation. Make sure you preserve this as it affects
+ * the C stepping chips too.
+ * 12AP. not affected - worked around in hardware
+ * 13AP. not affected - worked around in hardware
+ * 14AP. we always deassert INIT during bootup
+ * 15AP. not affected - worked around in hardware
+ * 16AP. not affected - worked around in hardware
+ * 17AP. not affected - worked around in hardware
+ * 18AP. not affected - worked around in hardware
+ * 19AP. not affected - worked around in BIOS
+ *
+ * If this sounds worrying believe me these bugs are either ___RARE___,
+ * or are signal timing bugs worked around in hardware and there's
+ * about nothing of note with C stepping upwards.
+ */
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+ return SET_APIC_DEST_FIELD(mask);
+}
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+ unsigned int evtchn;
+
+ evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+ // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
+ if (evtchn) {
+#if 0
+ shared_info_t *s = HYPERVISOR_shared_info;
+ while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
+ synch_test_bit(evtchn, &s->evtchn_mask[0]))
+ ;
+#endif
+ notify_via_evtchn(evtchn);
+ } else
+ printk("send_IPI to unbound port %d/%d",
+ cpu, vector);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ int cpu;
+
+ switch (shortcut) {
+ case APIC_DEST_SELF:
+ __send_IPI_one(smp_processor_id(), vector);
+ break;
+ case APIC_DEST_ALLBUT:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ default:
+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+ vector);
+ break;
+ }
+}
+
+void fastcall send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned int cpu;
+
+ local_irq_save(flags);
+ WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
+
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, mask)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+
+ send_IPI_mask_bitmask(mask, vector);
+}
+
+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+
+#if 0 /* XEN */
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ *
+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL 0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+ BUG();
+ cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+ load_cr3(swapper_pg_dir);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superflous
+ * tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ * Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ * flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ * runs in kernel space, the cpu could load tlb entries for user space
+ * pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ unsigned long cpu;
+
+ cpu = get_cpu();
+
+ if (!cpu_isset(cpu, flush_cpumask))
+ goto out;
+ /*
+ * This was a BUG() but until someone can quote me the
+ * line from the intel manual that guarantees an IPI to
+ * multiple CPUs is retried _only_ on the erroring CPUs
+ * its staying as a return
+ *
+ * BUG();
+ */
+
+ if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+ if (flush_va == FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
+ } else
+ leave_mm(cpu);
+ }
+ smp_mb__before_clear_bit();
+ cpu_clear(cpu, flush_cpumask);
+ smp_mb__after_clear_bit();
+out:
+ put_cpu_no_resched();
+
+ return IRQ_HANDLED;
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+ unsigned long va)
+{
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+ * - current CPU must not be in mask
+ * - mask must exist :)
+ */
+ BUG_ON(cpus_empty(cpumask));
+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+ BUG_ON(!mm);
+
+ /* If a CPU which we ran on has gone down, OK. */
+ cpus_and(cpumask, cpumask, cpu_online_map);
+ if (cpus_empty(cpumask))
+ return;
+
+ /*
+ * i'm not happy about this global shared spinlock in the
+ * MM hot path, but we'll see how contended it is.
+ * Temporarily this turns IRQs off, so that lockups are
+ * detected by the NMI watchdog.
+ */
+ spin_lock(&tlbstate_lock);
+
+ flush_mm = mm;
+ flush_va = va;
+#if NR_CPUS <= BITS_PER_LONG
+ atomic_set_mask(cpumask, &flush_cpumask);
+#else
+ {
+ int k;
+ unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+ unsigned long *cpu_mask = (unsigned long *)&cpumask;
+ for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+ atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+ }
+#endif
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+ while (!cpus_empty(flush_cpumask))
+ /* nothing. lockup detection does not belong here */
+ mb();
+
+ flush_mm = NULL;
+ flush_va = 0;
+ spin_unlock(&tlbstate_lock);
+}
+
+void flush_tlb_current_task(void)
+{
+ struct mm_struct *mm = current->mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ local_flush_tlb();
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if (current->mm)
+ local_flush_tlb();
+ else
+ leave_mm(smp_processor_id());
+ }
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+ preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if(current->mm)
+ __flush_tlb_one(va);
+ else
+ leave_mm(smp_processor_id());
+ }
+
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, va);
+
+ preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+ unsigned long cpu = smp_processor_id();
+
+ __flush_tlb_all();
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+ leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+ on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+#else
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{ return 0; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
+void flush_tlb_mm(struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+
+#endif /* XEN */
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ WARN_ON(cpu_is_offline(cpu));
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+{
+ struct call_data_struct data;
+ int cpus;
+
+ /* Holding any lock stops cpus from going down. */
+ spin_lock(&call_lock);
+ cpus = num_online_cpus()-1;
+
+ if (!cpus) {
+ spin_unlock(&call_lock);
+ return 0;
+ }
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ mb();
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ barrier();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ barrier();
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ local_irq_disable();
+#if 1
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#else
+ disable_local_APIC();
+#endif
+ if (cpu_data[smp_processor_id()].hlt_works_ok)
+ for(;;) __asm__("hlt");
+ for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+ local_irq_disable();
+#if 1
+ xxprint("smp_send_stop disable_local_APIC\n");
+#else
+ disable_local_APIC();
+#endif
+ local_irq_enable();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+
+ return IRQ_HANDLED;
+}
+
+#include <linux/kallsyms.h>
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ mb();
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ irq_enter();
+ (*func)(info);
+ irq_exit();
+
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
+
+ return IRQ_HANDLED;
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
new file mode 100644
index 0000000000..a716095376
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
@@ -0,0 +1,1437 @@
+/*
+ * x86 SMP booting functions
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Much of the core SMP work is based on previous work by Thomas Radke, to
+ * whom a great many thanks are extended.
+ *
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
+ * Original development of Linux SMP code supported by Caldera.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ *
+ * Fixes
+ * Felix Koop : NR_CPUS used properly
+ * Jose Renau : Handle single CPU case.
+ * Alan Cox : By repeated request 8) - Total BogoMIPS report.
+ * Greg Wright : Fix for kernel stacks panic.
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
+ * Ingo Molnar : various cleanups and rewrites
+ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Martin J. Bligh : Added support for multi-quad systems
+ * Dave Jones : Report invalid combinations of Athlon CPUs.
+* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+
+#ifndef CONFIG_X86_IO_APIC
+#define Dprintk(args...)
+#endif
+#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
+
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
+
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+u8 x86_cpu_to_apicid[NR_CPUS] =
+ { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+#if 0
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
+static unsigned char *trampoline_base;
+static int trampoline_exec;
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+#endif
+
+#if 0
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
+}
+#endif
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+#if 1
+ int cpu;
+
+ for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ cpu_gdt_descr[cpu].address = (unsigned long)
+ alloc_bootmem_low_pages(PAGE_SIZE);
+ /* XXX free unused pages later */
+ }
+#else
+ trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ if (__pa(trampoline_base) >= 0x9F000)
+ BUG();
+ /*
+ * Make the SMP trampoline executable:
+ */
+ trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+#endif
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ if (id!=0)
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+
+ /*
+ * Certain Athlons might work (for various values of 'work') in SMP
+ * but they are not certified as MP capable.
+ */
+ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+ /* Athlon 660/661 is valid. */
+ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+ goto valid_k7;
+
+ /* Duron 670 is valid */
+ if ((c->x86_model==7) && (c->x86_mask==0))
+ goto valid_k7;
+
+ /*
+ * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+ * It's worth noting that the A5 stepping (662) of some Athlon XP's
+ * have the MP bit set.
+ * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+ */
+ if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+ ((c->x86_model==7) && (c->x86_mask>=1)) ||
+ (c->x86_model> 7))
+ if (cpu_has_mp)
+ goto valid_k7;
+
+ /* If we get here, it's not a certified SMP capable AMD system. */
+ tainted |= TAINT_UNSAFE_SMP;
+ }
+
+valid_k7:
+ ;
+}
+
+#if 0
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ unsigned long one_usec;
+ int buggy = 0;
+
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+ /* convert from kcyc/sec to cyc/usec */
+ one_usec = cpu_khz / 1000;
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_isset(i, cpu_callout_map)) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ }
+ avg = sum;
+ do_div(avg, num_booting_cpus());
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = delta;
+ do_div(realdelta, one_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * Not every cpu is online at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != num_booting_cpus())
+ mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
+}
+#undef NR_LOOPS
+#endif
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+ int cpuid, phys_id;
+ unsigned long timeout;
+
+#if 0
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ wait_for_init_deassert(&init_deasserted);
+#endif
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = smp_processor_id();
+ cpuid = smp_processor_id();
+ if (cpu_isset(cpuid, cpu_callin_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup (udelay is not yet working)
+ */
+ timeout = jiffies + 2*HZ;
+ while (time_before(jiffies, timeout)) {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (cpu_isset(cpuid, cpu_callout_map))
+ break;
+ rep_nop();
+ }
+
+ if (!time_before(jiffies, timeout)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+#if 0
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ smp_callin_clear_local_apic();
+ setup_local_APIC();
+#endif
+ map_cpu_to_logical_apicid();
+
+ /*
+ * Get our bogomips.
+ */
+ calibrate_delay();
+ Dprintk("Stack at about %p\n",&cpuid);
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+#if 0
+ disable_APIC_timer();
+#endif
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+
+#if 0
+ /*
+ * Synchronize the TSC with the BP
+ */
+ if (cpu_has_tsc && cpu_khz)
+ synchronize_tsc_ap();
+#endif
+}
+
+int cpucount;
+
+
+static irqreturn_t ldebug_interrupt(
+ int irq, void *dev_id, struct pt_regs *regs)
+{
+ return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+ sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+ BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+ SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+
+extern void local_setup_timer(void);
+
+/*
+ * Activate a secondary processor.
+ */
+static void __init start_secondary(void *unused)
+{
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
+ local_setup_timer();
+ ldebug_setup();
+ smp_intr_init();
+ local_irq_enable();
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
+
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+
+ wmb();
+ cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+ /*
+ * We don't actually need to load the full TSS,
+ * basically just the stack pointer and the eip.
+ */
+
+ asm volatile(
+ "movl %0,%%esp\n\t"
+ "jmp *%1"
+ :
+ :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+ void * esp;
+ unsigned short ss;
+} stack_start;
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+ { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+ printk("Mapping cpu %d to node %d\n", cpu, node);
+ cpu_set(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
+{
+ int node;
+
+ printk("Unmapping cpu %d from all nodes\n", cpu);
+ for (node = 0; node < MAX_NUMNODES; node ++)
+ cpu_clear(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = 0;
+}
+#else /* !CONFIG_NUMA */
+
+#define map_cpu_to_node(cpu, node) ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+ int cpu = smp_processor_id();
+ int apicid = smp_processor_id();
+
+ cpu_2_logical_apicid[cpu] = apicid;
+ map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+ cpu_2_logical_apicid[cpu] = BAD_APICID;
+ unmap_cpu_to_node(cpu);
+}
+
+#if APIC_DEBUG
+static inline void __inquire_remote_apic(int apicid)
+{
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
+}
+#endif
+
+#if 0
+#ifdef WAKE_SECONDARY_VIA_NMI
+/*
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int timeout, maxlvt;
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ Dprintk("NMI sent.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
+
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_INIT */
+#endif
+
+extern cpumask_t cpu_initialized;
+
+static int __init do_boot_cpu(int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ */
+{
+ struct task_struct *idle;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_eip;
+#if 0
+ unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+ vcpu_guest_context_t ctxt;
+ extern void startup_32_smp(void);
+ extern void hypervisor_callback(void);
+ extern void failsafe_callback(void);
+ extern void smp_trap_init(trap_info_t *);
+ int i;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to
+ * reschedule the child.
+ */
+ idle = fork_idle(cpu);
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+ idle->thread.eip = (unsigned long) start_secondary;
+ /* start_eip had better be page-aligned! */
+ start_eip = (unsigned long)startup_32_smp;
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+ /* Stack for startup_32 can be just as for start_secondary onwards */
+ stack_start.esp = (void *) idle->thread.esp;
+
+ irq_ctx_init(cpu);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+#if 1
+ if (cpu_gdt_descr[0].size > PAGE_SIZE)
+ BUG();
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.user_regs.ds = __USER_DS;
+ ctxt.user_regs.es = __USER_DS;
+ ctxt.user_regs.fs = 0;
+ ctxt.user_regs.gs = 0;
+ ctxt.user_regs.ss = __KERNEL_DS;
+ ctxt.user_regs.cs = __KERNEL_CS;
+ ctxt.user_regs.eip = start_eip;
+ ctxt.user_regs.esp = idle->thread.esp;
+ ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+ /* FPU is set up to default initial state. */
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+ smp_trap_init(ctxt.trap_ctxt);
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ {
+ unsigned long va;
+ int f;
+
+ for (va = cpu_gdt_descr[cpu].address, f = 0;
+ va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+ va += PAGE_SIZE, f++) {
+ ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ make_page_readonly((void *)va);
+ }
+ ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+ }
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.esp;
+
+ /* Callback handlers. */
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+ ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+ boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+#else
+ Dprintk("Setting warm reset code and vector.\n");
+
+ store_NMI_vector(&nmi_high, &nmi_low);
+
+ smpboot_setup_warm_reset_vector(start_eip);
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned char *)trampoline_base)
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+ else
+ /* trampoline code not run */
+ printk("Not responding.\n");
+ inquire_remote_apic(apicid);
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)trampoline_base) = 0;
+#endif
+
+ return boot_error;
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+ unsigned long cachesize; /* kB */
+ unsigned long bandwidth = 350; /* MB/s */
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_khz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1) {
+ cachesize = 16; /* Pentiums, 2x8kB cache */
+ bandwidth = 100;
+ }
+
+ cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ }
+
+ cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_khz/1000),
+ ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+ printk("task migration cache decay timeout: %ld msecs.\n",
+ cache_decay_ticks);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+#if 0
+static int boot_cpu_logical_apicid;
+#endif
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+ int cpu, kicked;
+ unsigned long bogosum = 0;
+#if 0
+ int apicid, bit;
+#endif
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d: ", 0);
+ print_cpu_info(&cpu_data[0]);
+
+#if 0
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+ // boot_cpu_physical_apicid = 0;
+ // boot_cpu_logical_apicid = 0;
+ x86_cpu_to_apicid[0] = 0;
+#endif
+
+ current_thread_info()->cpu = 0;
+ smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * If we couldn't find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config && !acpi_lapic) {
+ printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ smpboot_clear_io_apic_irqs();
+#if 0
+ phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (APIC_init_uniprocessor())
+ printk(KERN_NOTICE "Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+#endif
+ map_cpu_to_logical_apicid();
+ return;
+ }
+#endif
+
+#if 0
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ * Makes no sense to do this check in clustered apic mode, so skip it
+ */
+ if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ smpboot_clear_io_apic_irqs();
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ return;
+ }
+
+ verify_local_APIC();
+#endif
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ HYPERVISOR_shared_info->n_vcpu = 1;
+ printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ smpboot_clear_io_apic_irqs();
+#if 0
+ phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+ return;
+ }
+
+ smp_intr_init();
+
+#if 0
+ connect_bsp_APIC();
+ setup_local_APIC();
+#endif
+ map_cpu_to_logical_apicid();
+#if 0
+
+
+ setup_portio_remap();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+ Dprintk("CPU present map: %lx\n",
+ (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
+
+ kicked = 1;
+ for (cpu = 1; kicked < NR_CPUS &&
+ cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
+ if (max_cpus <= cpucount+1)
+ continue;
+
+ if (do_boot_cpu(cpu))
+ printk("CPU #%d not responding - cannot use it.\n",
+ cpu);
+ else
+ ++kicked;
+ }
+
+#if 0
+ /*
+ * Cleanup possible dangling ends...
+ */
+ smpboot_restore_warm_reset_vector();
+#endif
+
+ /*
+ * Allow the user to impress friends.
+ */
+ Dprintk("Before bogomips.\n");
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_isset(cpu, cpu_callout_map))
+ bogosum += cpu_data[cpu].loops_per_jiffy;
+ printk(KERN_INFO
+ "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
+ bogosum/(500000/HZ),
+ (bogosum/(5000/HZ))%100);
+
+ Dprintk("Before bogocount - setting activated=1.\n");
+
+ if (smp_b_stepping)
+ printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+ /*
+ * Don't taint if we are running SMP kernel on a single non-MP
+ * approved Athlon
+ */
+ if (tainted & TAINT_UNSAFE_SMP) {
+ if (cpucount)
+ printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+ else
+ tainted &= ~TAINT_UNSAFE_SMP;
+ }
+
+ Dprintk("Boot done.\n");
+
+ /*
+ * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * efficiently.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (siblings != smp_num_siblings)
+ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+ }
+
+#if 0
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
+#endif
+
+ smpboot_setup_io_apic();
+
+#if 0
+ setup_boot_APIC_clock();
+
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpu_has_tsc && cpucount && cpu_khz)
+ synchronize_tsc_bp();
+#endif
+}
+
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_commenced_mask = cpumask_of_cpu(0);
+ cpu_callin_map = cpumask_of_cpu(0);
+ mb();
+ smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* must be called with the cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+ /* get the target out of its holding state */
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+ wmb();
+
+ /* wait for the processor to ack it. timeout? */
+ while (!cpu_online(cpu))
+ cpu_relax();
+
+ fixup_irqs(cpu_online_map);
+ /* counter the disable in fixup_irqs() */
+ local_irq_enable();
+ return 0;
+}
+
+int __cpu_disable(void)
+{
+ cpumask_t map = cpu_online_map;
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ /* Allow any queued timer interrupts to get serviced */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
+ cpu_clear(cpu, map);
+ fixup_irqs(map);
+ /* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpu_online_map);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We don't do anything here: idle task is faking death itself. */
+ unsigned int i;
+
+ for (i = 0; i < 10; i++) {
+ /* They ack this in play_dead by setting CPU_DEAD */
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+ return;
+ current->state = TASK_UNINTERRUPTIBLE;
+ schedule_timeout(HZ/10);
+ }
+ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+ return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ /* We said "no" in __cpu_disable */
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
+ local_irq_enable();
+ return -EIO;
+ }
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /* Already up, and in cpu_quiescent now? */
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ cpu_enable(cpu);
+ return 0;
+ }
+#endif
+
+ local_irq_enable();
+ /* Unleash the CPU! */
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#if 1
+#else
+#ifdef CONFIG_X86_IO_APIC
+ setup_ioapic_dest();
+#endif
+ zap_low_mappings();
+ /*
+ * Disable executability of the SMP trampoline:
+ */
+ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void __init smp_intr_init(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(resched_irq, cpu) =
+ bind_ipi_on_cpu_to_irq(cpu, RESCHEDULE_VECTOR);
+ sprintf(resched_name[cpu], "resched%d", cpu);
+ BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+ SA_INTERRUPT, resched_name[cpu], NULL));
+
+ per_cpu(callfunc_irq, cpu) =
+ bind_ipi_on_cpu_to_irq(cpu, CALL_FUNCTION_VECTOR);
+ sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+ BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+ smp_call_function_interrupt,
+ SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
index acf5dd3130..821d6905b0 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
@@ -47,6 +47,7 @@
#include <linux/efi.h>
#include <linux/mca.h>
#include <linux/sysctl.h>
+#include <linux/percpu.h>
#include <asm/io.h>
#include <asm/smp.h>
@@ -76,7 +77,20 @@ u64 jiffies_64 = INITIAL_JIFFIES;
EXPORT_SYMBOL(jiffies_64);
+#if defined(__x86_64__)
+unsigned long vxtime_hz = PIT_TICK_RATE;
+struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
+volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
+struct timespec __xtime __section_xtime;
+struct timezone __sys_tz __section_sys_tz;
+#endif
+
+#if defined(__x86_64__)
+unsigned int cpu_khz; /* Detected as we calibrate the TSC */
+#else
unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+#endif
extern unsigned long wall_jiffies;
@@ -94,7 +108,6 @@ u32 shadow_tsc_stamp;
u64 shadow_system_time;
static u32 shadow_time_version;
static struct timeval shadow_tv;
-extern u64 processed_system_time;
/*
* We use this to ensure that gettimeofday() is monotonically increasing. We
@@ -111,7 +124,8 @@ static long last_rtc_update, last_update_to_xen;
static long last_update_from_xen; /* UTC seconds when last read Xen clock. */
/* Keep track of last time we did processing/updating of jiffies and xtime. */
-u64 processed_system_time; /* System time (ns) at last processing. */
+static u64 processed_system_time; /* System time (ns) at last processing. */
+static DEFINE_PER_CPU(u64, processed_system_time);
#define NS_PER_TICK (1000000000ULL/HZ)
@@ -378,39 +392,52 @@ static inline void do_timer_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
time_t wtm_sec, sec;
- s64 delta, nsec;
+ s64 delta, delta_cpu, nsec;
long sec_diff, wtm_nsec;
+ int cpu = smp_processor_id();
do {
__get_time_values_from_xen();
- delta = (s64)(shadow_system_time +
- ((s64)cur_timer->get_offset() *
- (s64)NSEC_PER_USEC) -
- processed_system_time);
+ delta = delta_cpu = (s64)shadow_system_time +
+ ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
+ delta -= processed_system_time;
+ delta_cpu -= per_cpu(processed_system_time, cpu);
}
while (!TIME_VALUES_UP_TO_DATE);
- if (unlikely(delta < 0)) {
- printk("Timer ISR: Time went backwards: %lld %lld %lld %lld\n",
- delta, shadow_system_time,
+ if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+ printk("Timer ISR/%d: Time went backwards: "
+ "delta=%lld cpu_delta=%lld shadow=%lld "
+ "off=%lld processed=%lld cpu_processed=%lld\n",
+ cpu, delta, delta_cpu, shadow_system_time,
((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC),
- processed_system_time);
+ processed_system_time,
+ per_cpu(processed_system_time, cpu));
+ for (cpu = 0; cpu < num_online_cpus(); cpu++)
+ printk(" %d: %lld\n", cpu,
+ per_cpu(processed_system_time, cpu));
return;
}
- /* Process elapsed jiffies since last call. */
+ /* System-wide jiffy work. */
while (delta >= NS_PER_TICK) {
delta -= NS_PER_TICK;
processed_system_time += NS_PER_TICK;
do_timer(regs);
-#ifndef CONFIG_SMP
+ }
+
+ /* Local CPU jiffy work. */
+ while (delta_cpu >= NS_PER_TICK) {
+ delta_cpu -= NS_PER_TICK;
+ per_cpu(processed_system_time, cpu) += NS_PER_TICK;
update_process_times(user_mode(regs));
-#endif
- if (regs)
- profile_tick(CPU_PROFILING, regs);
+ profile_tick(CPU_PROFILING, regs);
}
+ if (cpu != 0)
+ return;
+
/*
* Take synchronised time from Xen once a minute if we're not
* synchronised ourselves, and we haven't chosen to keep an independent
@@ -618,10 +645,10 @@ void __init hpet_time_init(void)
#endif
/* Dynamically-mapped IRQ. */
-static int TIMER_IRQ;
+static DEFINE_PER_CPU(int, timer_irq);
static struct irqaction irq_timer = {
- timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer",
+ timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
NULL, NULL
};
@@ -643,25 +670,43 @@ void __init time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
processed_system_time = shadow_system_time;
+ per_cpu(processed_system_time, 0) = processed_system_time;
if (timer_tsc_init.init(NULL) != 0)
BUG();
printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
- TIMER_IRQ = bind_virq_to_irq(VIRQ_TIMER);
+#if defined(__x86_64__)
+ vxtime.mode = VXTIME_TSC;
+ vxtime.quot = (1000000L << 32) / vxtime_hz;
+ vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.hz = vxtime_hz;
+ sync_core();
+ rdtscll(vxtime.last_tsc);
+#endif
- (void)setup_irq(TIMER_IRQ, &irq_timer);
+ per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
+ (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
}
-/* Convert jiffies to system time. Call with xtime_lock held for reading. */
-static inline u64 __jiffies_to_st(unsigned long j)
+/* Convert jiffies to system time. */
+static inline u64 jiffies_to_st(unsigned long j)
{
- long delta = j - jiffies;
- /* NB. The next check can trigger in some wrap-around cases, but
- * that's ok -- we'll just end up with a shorter timeout. */
- if (delta < 1)
- delta = 1;
- return processed_system_time + (delta * NS_PER_TICK);
+ unsigned long seq;
+ long delta;
+ u64 st;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ delta = j - jiffies;
+ /* NB. The next check can trigger in some wrap-around cases,
+ * but that's ok: we'll just end up with a shorter timeout. */
+ if (delta < 1)
+ delta = 1;
+ st = processed_system_time + (delta * NS_PER_TICK);
+ } while (read_seqretry(&xtime_lock, seq));
+
+ return st;
}
/*
@@ -684,7 +729,7 @@ void stop_hz_timer(void)
j = next_timer_interrupt();
}
- BUG_ON(HYPERVISOR_set_timer_op(__jiffies_to_st(j)) != 0);
+ BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
}
void start_hz_timer(void)
@@ -708,6 +753,7 @@ void time_resume(void)
/* Reset our own concept of passage of system time. */
processed_system_time = shadow_system_time;
+ per_cpu(processed_system_time, 0) = processed_system_time;
/* Accept a warp in UTC (wall-clock) time. */
last_seen_tv.tv_sec = 0;
@@ -716,6 +762,24 @@ void time_resume(void)
last_update_from_xen = 0;
}
+#ifdef CONFIG_SMP
+static char timer_name[NR_CPUS][15];
+void local_setup_timer(void)
+{
+ int seq, cpu = smp_processor_id();
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ per_cpu(processed_system_time, cpu) = shadow_system_time;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
+ sprintf(timer_name[cpu], "timer%d", cpu);
+ BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
+ SA_INTERRUPT, timer_name[cpu], NULL));
+}
+#endif
+
/*
* /proc/sys/xen: This really belongs in another file. It can stay here for
* now however.
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
index 47396aa186..539c1d5b7d 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c
@@ -465,14 +465,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code)
unsigned long ldt;
__asm__ __volatile__ ("sldt %0" : "=r" (ldt));
if (ldt == 0) {
- mmu_update_t u;
- u.ptr = MMU_EXTENDED_COMMAND;
- u.ptr |= (unsigned long)&default_ldt[0];
- u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
- if (unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0)) {
- show_trace(NULL, (unsigned long *)&u);
- panic("Failed to install default LDT");
- }
+ xen_set_ldt((unsigned long)&default_ldt[0], 5);
return;
}
}
@@ -616,6 +609,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
nmi_enter();
cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (!cpu_online(cpu)) {
+ nmi_exit();
+ return;
+ }
+#endif
+
++nmi_count(cpu);
if (!nmi_callback(regs, cpu))
@@ -893,15 +894,7 @@ asmlinkage void math_state_restore(struct pt_regs regs)
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;
- /*
- * A trap in kernel mode can be ignored. It'll be the fast XOR or
- * copying libraries, which will correctly save/restore state and
- * reset the TS bit in CR0.
- */
- if ((regs.xcs & 2) == 0)
- return;
-
- clts(); /* Allow maths ops (or we recurse) */
+ /* NB. 'clts' is done for us by Xen during virtual trap. */
if (!tsk_used_math(tsk))
init_fpu(tsk);
restore_fpu(tsk);
@@ -964,17 +957,26 @@ static trap_info_t trap_table[] = {
void __init trap_init(void)
{
HYPERVISOR_set_trap_table(trap_table);
- HYPERVISOR_set_fast_trap(SYSCALL_VECTOR);
/*
* default LDT is a single-entry callgate to lcall7 for iBCS
* and a callgate to lcall27 for Solaris/x86 binaries
*/
make_lowmem_page_readonly(&default_ldt[0]);
- xen_flush_page_update_queue();
/*
* Should be a barrier for any external CPU state.
*/
cpu_init();
}
+
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+ trap_info_t *t = trap_table;
+
+ for (t = trap_table; t->address; t++) {
+ trap_ctxt[t->vector].flags = t->flags;
+ trap_ctxt[t->vector].cs = t->cs;
+ trap_ctxt[t->vector].address = t->address;
+ }
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile
new file mode 100644
index 0000000000..7d50b2926e
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mach-default/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux kernel.
+#
+
+c-obj-y := topology.o
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y)):
+ @ln -fsn $(srctree)/arch/i386/mach-default/$(notdir $@) $@
+
+obj-y += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile
index 016d205d60..50d99c2d3d 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile
@@ -6,8 +6,8 @@ XENARCH := $(subst ",,$(CONFIG_XENARCH))
CFLAGS += -Iarch/$(XENARCH)/mm
-obj-y := init.o pgtable.o fault.o ioremap.o pageattr.o hypervisor.o
-c-obj-y := extable.o mmap.o
+obj-y := init.o pgtable.o fault.o ioremap.o hypervisor.o
+c-obj-y := extable.o mmap.o pageattr.o
c-obj-$(CONFIG_DISCONTIGMEM) += discontig.o
c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c
index 478c20fe8d..99e2cccc40 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c
@@ -21,6 +21,7 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/percpu.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -29,7 +30,7 @@
extern void die(const char *,struct pt_regs *,long);
-pgd_t *cur_pgd; /* XXXsmp */
+DEFINE_PER_CPU(pgd_t *, cur_pgd);
/*
* Unlock any spinlocks which will prevent us from getting the
@@ -453,7 +454,8 @@ no_context:
printk(" at virtual address %08lx\n",address);
printk(KERN_ALERT " printing eip:\n");
printk("%08lx\n", regs->eip);
- page = ((unsigned long *) cur_pgd)[address >> 22];
+ page = ((unsigned long *) per_cpu(cur_pgd, smp_processor_id()))
+ [address >> 22];
printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
machine_to_phys(page));
/*
@@ -529,7 +531,7 @@ vmalloc_fault:
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
- pgd = index + cur_pgd;
+ pgd = index + per_cpu(cur_pgd, smp_processor_id());
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k))
@@ -551,7 +553,6 @@ vmalloc_fault:
if (!pmd_present(*pmd_k))
goto no_context;
set_pmd(pmd, *pmd_k);
- xen_flush_page_update_queue(); /* flush PMD update */
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
index 6c223c79e1..cf0488175f 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
@@ -34,324 +34,272 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm-xen/hypervisor.h>
-#include <asm-xen/multicall.h>
#include <asm-xen/balloon.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/percpu.h>
+#include <asm/tlbflush.h>
+#endif
-/*
- * This suffices to protect us if we ever move to SMP domains.
- * Further, it protects us against interrupts. At the very least, this is
- * required for the network driver which flushes the update queue before
- * pushing new receive buffers.
- */
-static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;
-
-/* Linux 2.6 isn't using the traditional batched interface. */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#define QUEUE_SIZE 2048
#define pte_offset_kernel pte_offset
-#define pmd_val_ma(v) (v).pmd;
#define pud_t pgd_t
#define pud_offset(d, va) d
+#elif defined(CONFIG_X86_64)
+#define pmd_val_ma(v) (v).pmd
#else
-#define QUEUE_SIZE 128
-#define pmd_val_ma(v) (v).pud.pgd.pgd;
+#define pmd_val_ma(v) (v).pud.pgd.pgd
#endif
-static mmu_update_t update_queue[QUEUE_SIZE];
-unsigned int mmu_update_queue_idx = 0;
-#define idx mmu_update_queue_idx
-
-/*
- * MULTICALL_flush_page_update_queue:
- * This is a version of the flush which queues as part of a multicall.
- */
-void MULTICALL_flush_page_update_queue(void)
+#ifndef CONFIG_XEN_SHADOW_MODE
+void xen_l1_entry_update(pte_t *ptr, unsigned long val)
{
- unsigned long flags;
- unsigned int _idx;
- spin_lock_irqsave(&update_lock, flags);
- if ( (_idx = idx) != 0 )
- {
- idx = 0;
- wmb(); /* Make sure index is cleared first to avoid double updates. */
- queue_multicall3(__HYPERVISOR_mmu_update,
- (unsigned long)update_queue,
- (unsigned long)_idx,
- (unsigned long)NULL);
- }
- spin_unlock_irqrestore(&update_lock, flags);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = val;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
-static inline void __flush_page_update_queue(void)
+void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
{
- unsigned int _idx = idx;
- idx = 0;
- wmb(); /* Make sure index is cleared first to avoid double updates. */
- if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx, NULL) < 0) )
- {
- printk(KERN_ALERT "Failed to execute MMU updates.\n");
- BUG();
- }
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pmd_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
-void _flush_page_update_queue(void)
+#ifdef CONFIG_X86_64
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- if ( idx != 0 ) __flush_page_update_queue();
- spin_unlock_irqrestore(&update_lock, flags);
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = val.pud;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
-static inline void increment_index(void)
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
{
- idx++;
- if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue();
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = val.pgd;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_XEN_SHADOW_MODE */
-static inline void increment_index_and_flush(void)
+void xen_machphys_update(unsigned long mfn, unsigned long pfn)
{
- idx++;
- __flush_page_update_queue();
+ mmu_update_t u;
+ u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ u.val = pfn;
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
-void queue_l1_entry_update(pte_t *ptr, unsigned long val)
+void xen_pt_switch(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = val;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_BASEPTR;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_l2_entry_update(pmd_t *ptr, pmd_t val)
+void xen_new_user_pt(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = pmd_val_ma(val);
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_USER_BASEPTR;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_pt_switch(unsigned long ptr)
+void xen_tlb_flush(void)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_NEW_BASEPTR;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_tlb_flush(void)
+void xen_invlpg(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_TLB_FLUSH;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_INVLPG_LOCAL;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_invlpg(unsigned long ptr)
-{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
- update_queue[idx].ptr |= ptr & PAGE_MASK;
- update_queue[idx].val = MMUEXT_INVLPG;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
-}
+#ifdef CONFIG_SMP
-void queue_pgd_pin(unsigned long ptr)
+void xen_tlb_flush_all(void)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_PIN_L2_TABLE;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_TLB_FLUSH_ALL;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_pgd_unpin(unsigned long ptr)
+void xen_tlb_flush_mask(cpumask_t *mask)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_UNPIN_TABLE;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ if ( cpus_empty(*mask) )
+ return;
+ op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+ op.vcpumask = mask->bits;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_pte_pin(unsigned long ptr)
+void xen_invlpg_all(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_PIN_L1_TABLE;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_INVLPG_ALL;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_pte_unpin(unsigned long ptr)
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_UNPIN_TABLE;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ if ( cpus_empty(*mask) )
+ return;
+ op.cmd = MMUEXT_INVLPG_MULTI;
+ op.vcpumask = mask->bits;
+ op.linear_addr = ptr & PAGE_MASK;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void queue_set_ldt(unsigned long ptr, unsigned long len)
-{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr;
- update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
-}
+#endif /* CONFIG_SMP */
-void queue_machphys_update(unsigned long mfn, unsigned long pfn)
+#ifndef CONFIG_XEN_SHADOW_MODE
+void xen_pgd_pin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- update_queue[idx].val = pfn;
- increment_index();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+#ifdef CONFIG_X86_64
+ op.cmd = MMUEXT_PIN_L4_TABLE;
+#else
+ op.cmd = MMUEXT_PIN_L2_TABLE;
+#endif
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-/* queue and flush versions of the above */
-void xen_l1_entry_update(pte_t *ptr, unsigned long val)
+void xen_pgd_unpin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = val;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
+void xen_pte_pin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = virt_to_machine(ptr);
- update_queue[idx].val = pmd_val_ma(val);
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L1_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_pt_switch(unsigned long ptr)
+void xen_pte_unpin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_NEW_BASEPTR;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_tlb_flush(void)
+#ifdef CONFIG_X86_64
+void xen_pud_pin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_TLB_FLUSH;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L3_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_invlpg(unsigned long ptr)
+void xen_pud_unpin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
- update_queue[idx].ptr |= ptr & PAGE_MASK;
- update_queue[idx].val = MMUEXT_INVLPG;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_pgd_pin(unsigned long ptr)
+void xen_pmd_pin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_PIN_L2_TABLE;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PIN_L2_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_pgd_unpin(unsigned long ptr)
+void xen_pmd_unpin(unsigned long ptr)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_UNPIN_TABLE;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_UNPIN_TABLE;
+ op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_XEN_SHADOW_MODE */
-void xen_pte_pin(unsigned long ptr)
+void xen_set_ldt(unsigned long ptr, unsigned long len)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_PIN_L1_TABLE;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ struct mmuext_op op;
+ op.cmd = MMUEXT_SET_LDT;
+ op.linear_addr = ptr;
+ op.nr_ents = len;
+ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_pte_unpin(unsigned long ptr)
+void xen_contig_memory(unsigned long vstart, unsigned int order)
{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = phys_to_machine(ptr);
- update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
- update_queue[idx].val = MMUEXT_UNPIN_TABLE;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
-}
+ /*
+ * Ensure multi-page extents are contiguous in machine memory. This code
+ * could be cleaned up some, and the number of hypercalls reduced.
+ */
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long mfn, i, flags;
-void xen_set_ldt(unsigned long ptr, unsigned long len)
-{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr;
- update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
-}
+ scrub_pages(vstart, 1 << order);
-void xen_machphys_update(unsigned long mfn, unsigned long pfn)
-{
- unsigned long flags;
- spin_lock_irqsave(&update_lock, flags);
- update_queue[idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- update_queue[idx].val = pfn;
- increment_index_and_flush();
- spin_unlock_irqrestore(&update_lock, flags);
+ balloon_lock(flags);
+
+ /* 1. Zap current PTEs, giving away the underlying pages. */
+ for (i = 0; i < (1<<order); i++) {
+ pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+ pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ mfn = pte_mfn(*pte);
+ HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+ }
+
+ /* 2. Get a new contiguous memory extent. */
+ BUG_ON(HYPERVISOR_dom_mem_op(
+ MEMOP_increase_reservation, &mfn, 1, order) != 1);
+
+ /* 3. Map the new extent in place of old pages. */
+ for (i = 0; i < (1<<order); i++) {
+ HYPERVISOR_update_va_mapping(
+ vstart + (i*PAGE_SIZE),
+ __pte_ma(((mfn+i)<<PAGE_SHIFT)|__PAGE_KERNEL), 0);
+ xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
+ phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
+ }
+
+ flush_tlb_all();
+
+ balloon_unlock(flags);
}
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
unsigned long allocate_empty_lowmem_region(unsigned long pages)
{
- pgd_t *pgd;
+ pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -376,14 +324,17 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages)
pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
- queue_l1_entry_update(pte, 0);
+ pfn_array[i] = pte_mfn(*pte);
+#ifdef CONFIG_X86_64
+ xen_l1_entry_update(pte, 0);
+#else
+ HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
+#endif
phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
INVALID_P2M_ENTRY;
}
- /* Flush updates through and flush the TLB. */
- xen_tlb_flush();
+ flush_tlb_all();
balloon_put_pages(pfn_array, 1 << order);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
index 6c4387ac3a..044568c42b 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
@@ -192,7 +192,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
}
pte_ofs = 0;
}
- flush_page_update_queue();
}
pmd_idx = 0;
}
@@ -356,12 +355,13 @@ static void __init pagetable_init (void)
*/
memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
make_page_readonly(pgd_base);
- queue_pgd_pin(__pa(pgd_base));
+ xen_pgd_pin(__pa(pgd_base));
load_cr3(pgd_base);
- queue_pgd_unpin(__pa(old_pgd));
+ xen_pgd_unpin(__pa(old_pgd));
make_page_writable(old_pgd);
__flush_tlb_all();
free_bootmem(__pa(old_pgd), PAGE_SIZE);
+ init_mm.context.pinned = 1;
kernel_physical_mapping_init(pgd_base);
remap_numa_kva();
@@ -563,8 +563,7 @@ void __init paging_init(void)
zone_sizes_init();
/* Switch to the real shared_info page, and clear the dummy page. */
- flush_page_update_queue();
- set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
+ set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
memset(empty_zero_page, 0, sizeof(empty_zero_page));
@@ -572,10 +571,11 @@ void __init paging_init(void)
/* Setup mapping of lower 1st MB */
for (i = 0; i < NR_FIX_ISAMAPS; i++)
if (xen_start_info.flags & SIF_PRIVILEGED)
- set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
else
- set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
- virt_to_machine(empty_zero_page));
+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
+ virt_to_machine(empty_zero_page),
+ PAGE_KERNEL_RO);
#endif
}
@@ -715,18 +715,9 @@ void __init mem_init(void)
kmem_cache_t *pgd_cache;
kmem_cache_t *pmd_cache;
-kmem_cache_t *pte_cache;
void __init pgtable_cache_init(void)
{
- pte_cache = kmem_cache_create("pte",
- PTRS_PER_PTE*sizeof(pte_t),
- PTRS_PER_PTE*sizeof(pte_t),
- 0,
- pte_ctor,
- pte_dtor);
- if (!pte_cache)
- panic("pgtable_cache_init(): Cannot create pte cache");
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pmd",
PTRS_PER_PMD*sizeof(pmd_t),
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
index 8a0df417ed..86a3672e33 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
@@ -108,7 +108,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
if(!PageReserved(page))
return NULL;
- domid = DOMID_LOCAL;
+ domid = DOMID_SELF;
}
/*
@@ -256,7 +256,7 @@ void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
*/
idx = FIX_BTMAP_BEGIN;
while (nrpages > 0) {
- set_fixmap_ma(idx, phys_addr);
+ set_fixmap(idx, phys_addr);
phys_addr += PAGE_SIZE;
--idx;
--nrpages;
@@ -393,15 +393,7 @@ int direct_remap_area_pages(struct mm_struct *mm,
int i;
unsigned long start_address;
#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
-
- v = w = &u[0];
- if (domid != DOMID_LOCAL) {
- u[0].ptr = MMU_EXTENDED_COMMAND;
- u[0].val = MMUEXT_SET_FOREIGNDOM;
- u[0].val |= (unsigned long)domid << 16;
- v = w = &u[1];
- }
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
start_address = address;
@@ -413,11 +405,11 @@ int direct_remap_area_pages(struct mm_struct *mm,
__direct_remap_area_pages(mm,
start_address,
address-start_address,
- w);
+ u);
- if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
+ if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
return -EFAULT;
- v = w;
+ v = u;
start_address = address;
}
@@ -432,13 +424,13 @@ int direct_remap_area_pages(struct mm_struct *mm,
v++;
}
- if (v != w) {
+ if (v != u) {
/* get the ptep's filled in */
__direct_remap_area_pages(mm,
start_address,
address-start_address,
- w);
- if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
+ u);
+ if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
return -EFAULT;
}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c
deleted file mode 100644
index 1b79c7e684..0000000000
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-
-static DEFINE_SPINLOCK(cpa_lock);
-static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
-
-pte_t *lookup_address(unsigned long address)
-{
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- if (pgd_none(*pgd))
- return NULL;
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- return NULL;
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd))
- return NULL;
- if (pmd_large(*pmd))
- return (pte_t *)pmd;
- return pte_offset_kernel(pmd, address);
-}
-
-static struct page *split_large_page(unsigned long address, pgprot_t prot)
-{
- int i;
- unsigned long addr;
- struct page *base;
- pte_t *pbase;
-
- spin_unlock_irq(&cpa_lock);
- base = alloc_pages(GFP_KERNEL, 0);
- spin_lock_irq(&cpa_lock);
- if (!base)
- return NULL;
-
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : PAGE_KERNEL);
- }
- return base;
-}
-
-static void flush_kernel_map(void *dummy)
-{
- /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
- if (boot_cpu_data.x86_model >= 4)
- wbinvd();
- /* Flush all to work around Errata in early athlons regarding
- * large page flushing.
- */
- __flush_tlb_all();
-}
-
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
-{
- struct page *page;
- unsigned long flags;
-
- set_pte_atomic(kpte, pte); /* change init_mm */
- if (PTRS_PER_PMD > 1)
- return;
-
- spin_lock_irqsave(&pgd_lock, flags);
- for (page = pgd_list; page; page = (struct page *)page->index) {
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- pud = pud_offset(pgd, address);
- pmd = pmd_offset(pud, address);
- set_pte_atomic((pte_t *)pmd, pte);
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static inline void revert_page(struct page *kpte_page, unsigned long address)
-{
- pte_t *linear = (pte_t *)
- pmd_offset(pud_offset(pgd_offset_k(address), address), address);
- set_pmd_pte(linear, address,
- pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE));
-}
-
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
-{
- pte_t *kpte;
- unsigned long address;
- struct page *kpte_page;
-
- BUG_ON(PageHighMem(page));
- address = (unsigned long)page_address(page);
-
- kpte = lookup_address(address);
- if (!kpte)
- return -EINVAL;
- kpte_page = virt_to_page(kpte);
- if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
- if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
- set_pte_batched(kpte, mk_pte(page, prot));
- } else {
- struct page *split = split_large_page(address, prot);
- if (!split)
- return -ENOMEM;
- set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
- kpte_page = split;
- }
- get_page(kpte_page);
- } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
- set_pte_batched(kpte, mk_pte(page, PAGE_KERNEL));
- __put_page(kpte_page);
- } else
- BUG();
-
- /*
- * If the pte was reserved, it means it was created at boot
- * time (not via split_large_page) and in turn we must not
- * replace it with a largepage.
- */
- if (!PageReserved(kpte_page)) {
- /* memleak and potential failed 2M page regeneration */
- BUG_ON(!page_count(kpte_page));
-
- if (cpu_has_pse && (page_count(kpte_page) == 1)) {
- list_add(&kpte_page->lru, &df_list);
- revert_page(kpte_page, address);
- }
- }
- return 0;
-}
-
-static inline void flush_map(void)
-{
- on_each_cpu(flush_kernel_map, NULL, 1, 1);
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
- */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- int err = 0;
- int i;
- unsigned long flags;
-
- spin_lock_irqsave(&cpa_lock, flags);
- for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr(page, prot);
- if (err)
- break;
- }
- flush_page_update_queue();
- spin_unlock_irqrestore(&cpa_lock, flags);
- return err;
-}
-
-void global_flush_tlb(void)
-{
- LIST_HEAD(l);
- struct list_head* n;
-
- BUG_ON(irqs_disabled());
-
- spin_lock_irq(&cpa_lock);
- list_splice_init(&df_list, &l);
- spin_unlock_irq(&cpa_lock);
- flush_map();
- n = l.next;
- while (n != &l) {
- struct page *pg = list_entry(n, struct page, lru);
- n = n->next;
- __free_page(pg);
- }
-}
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
- if (PageHighMem(page))
- return;
- /* the return value is ignored - the calls cannot fail,
- * large pages are disabled at boot time.
- */
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
- /* we should perform an IPI and flush all tlbs,
- * but that can deadlock->flush only current cpu.
- */
- __flush_tlb_all();
-}
-#endif
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
index 336d109981..f3756654c3 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
@@ -22,6 +22,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <asm/mmu_context.h>
#include <asm-xen/foreign_page.h>
@@ -176,85 +177,57 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
BUG();
return;
}
- set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
-}
-
-void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
+ switch (idx) {
+ case FIX_WP_TEST:
+ case FIX_VSYSCALL:
+#ifdef CONFIG_X86_F00F_BUG
+ case FIX_F00F_IDT:
+#endif
+ set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+ break;
+ default:
+ set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
+ break;
}
- set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
- if (pte) {
+ if (pte)
make_page_readonly(pte);
- xen_flush_page_update_queue();
- }
return pte;
}
-void pte_ctor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
- struct page *page = virt_to_page(pte);
- SetPageForeign(page, pte_free);
- set_page_count(page, 1);
-
- clear_page(pte);
- make_page_readonly(pte);
- queue_pte_pin(__pa(pte));
- flush_page_update_queue();
-}
-
-void pte_dtor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
- struct page *page = virt_to_page(pte);
- ClearPageForeign(page);
-
- queue_pte_unpin(__pa(pte));
- make_page_writable(pte);
- flush_page_update_queue();
-}
-
struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *ptep;
-
-#ifdef CONFIG_HIGHPTE
struct page *pte;
+#ifdef CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
- if (pte == NULL)
- return pte;
- if (PageHighMem(pte))
- return pte;
- /* not a highmem page -- free page and grab one from the cache */
- __free_page(pte);
+#else
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ if (pte) {
+ SetPageForeign(pte, pte_free);
+ set_page_count(pte, 1);
+ }
#endif
- ptep = kmem_cache_alloc(pte_cache, GFP_KERNEL);
- if (ptep)
- return virt_to_page(ptep);
- return NULL;
+
+ return pte;
}
void pte_free(struct page *pte)
{
+ unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+ if (!pte_write(*virt_to_ptep(va)))
+ HYPERVISOR_update_va_mapping(
+ va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+
+ ClearPageForeign(pte);
set_page_count(pte, 1);
-#ifdef CONFIG_HIGHPTE
- if (!PageHighMem(pte))
-#endif
- kmem_cache_free(pte_cache,
- phys_to_virt(page_to_pseudophys(pte)));
-#ifdef CONFIG_HIGHPTE
- else
- __free_page(pte);
-#endif
+
+ __free_page(pte);
}
void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
@@ -309,15 +282,11 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
if (PTRS_PER_PMD > 1)
- goto out;
+ return;
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- out:
- make_page_readonly(pgd);
- queue_pgd_pin(__pa(pgd));
- flush_page_update_queue();
}
/* never called when PTRS_PER_PMD > 1 */
@@ -325,10 +294,6 @@ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
- queue_pgd_unpin(__pa(pgd));
- make_page_writable(pgd);
- flush_page_update_queue();
-
if (PTRS_PER_PMD > 1)
return;
@@ -363,6 +328,15 @@ out_oom:
void pgd_free(pgd_t *pgd)
{
int i;
+ pte_t *ptep = virt_to_ptep(pgd);
+
+ if (!pte_write(*ptep)) {
+ xen_pgd_unpin(__pa(pgd));
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)pgd,
+ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
+ 0);
+ }
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1)
@@ -372,22 +346,23 @@ void pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
+#ifndef CONFIG_XEN_SHADOW_MODE
void make_lowmem_page_readonly(void *va)
{
pte_t *pte = virt_to_ptep(va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ set_pte(pte, pte_wrprotect(*pte));
}
void make_lowmem_page_writable(void *va)
{
pte_t *pte = virt_to_ptep(va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ set_pte(pte, pte_mkwrite(*pte));
}
void make_page_readonly(void *va)
{
pte_t *pte = virt_to_ptep(va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ set_pte(pte, pte_wrprotect(*pte));
if ( (unsigned long)va >= (unsigned long)high_memory )
{
unsigned long phys;
@@ -402,7 +377,7 @@ void make_page_readonly(void *va)
void make_page_writable(void *va)
{
pte_t *pte = virt_to_ptep(va);
- queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ set_pte(pte, pte_mkwrite(*pte));
if ( (unsigned long)va >= (unsigned long)high_memory )
{
unsigned long phys;
@@ -431,3 +406,118 @@ void make_pages_writable(void *va, unsigned int nr)
va = (void *)((unsigned long)va + PAGE_SIZE);
}
}
+#endif /* CONFIG_XEN_SHADOW_MODE */
+
+LIST_HEAD(mm_unpinned);
+DEFINE_SPINLOCK(mm_unpinned_lock);
+
+static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
+{
+ struct page *page = virt_to_page(pt);
+ unsigned long pfn = page_to_pfn(page);
+
+ if (PageHighMem(page))
+ return;
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte(pfn, flags), 0);
+}
+
+static void mm_walk(struct mm_struct *mm, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int g,u,m;
+
+ pgd = mm->pgd;
+ for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+ if (pgd_none(*pgd))
+ continue;
+ pud = pud_offset(pgd, 0);
+ if (PTRS_PER_PUD > 1) /* not folded */
+ mm_walk_set_prot(pud,flags);
+ for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, 0);
+ if (PTRS_PER_PMD > 1) /* not folded */
+ mm_walk_set_prot(pmd,flags);
+ for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+ if (pmd_none(*pmd))
+ continue;
+ pte = pte_offset_kernel(pmd,0);
+ mm_walk_set_prot(pte,flags);
+ }
+ }
+ }
+}
+
+void mm_pin(struct mm_struct *mm)
+{
+ spin_lock(&mm->page_table_lock);
+
+ mm_walk(mm, PAGE_KERNEL_RO);
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 0);
+ xen_pgd_pin(__pa(mm->pgd));
+ mm->context.pinned = 1;
+ spin_lock(&mm_unpinned_lock);
+ list_del(&mm->context.unpinned);
+ spin_unlock(&mm_unpinned_lock);
+
+ spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+ spin_lock(&mm->page_table_lock);
+
+ xen_pgd_unpin(__pa(mm->pgd));
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+ mm_walk(mm, PAGE_KERNEL);
+ mm->context.pinned = 0;
+ spin_lock(&mm_unpinned_lock);
+ list_add(&mm->context.unpinned, &mm_unpinned);
+ spin_unlock(&mm_unpinned_lock);
+
+ spin_unlock(&mm->page_table_lock);
+}
+
+void mm_pin_all(void)
+{
+ while (!list_empty(&mm_unpinned))
+ mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
+ context.unpinned));
+}
+
+void _arch_exit_mmap(struct mm_struct *mm)
+{
+ struct task_struct *tsk = current;
+
+ task_lock(tsk);
+
+ /*
+ * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+ * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+ */
+ if ( tsk->active_mm == mm )
+ {
+ tsk->active_mm = &init_mm;
+ atomic_inc(&init_mm.mm_count);
+
+ switch_mm(mm, &init_mm, tsk);
+
+ atomic_dec(&mm->mm_count);
+ BUG_ON(atomic_read(&mm->mm_count) == 0);
+ }
+
+ task_unlock(tsk);
+
+ if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) )
+ mm_unpin(mm);
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
index 175f5f4819..76c024abfd 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
@@ -6,12 +6,13 @@ c-obj-y := i386.o
c-obj-$(CONFIG_PCI_BIOS) += pcbios.o
c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
-obj-$(CONFIG_PCI_DIRECT) += direct.o
+c-obj-$(CONFIG_PCI_DIRECT) += direct.o
c-pci-y := fixup.o
c-pci-$(CONFIG_ACPI_PCI) += acpi.o
c-pci-y += legacy.o
-pci-y += irq.o
+# Make sure irq.o gets linked in after legacy.o
+l-pci-y += irq.o
c-pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
pci-$(CONFIG_X86_VISWS) :=
@@ -26,6 +27,6 @@ c-link :=
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
@ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
-obj-y += $(c-obj-y)
+obj-y += $(c-obj-y) $(l-pci-y)
clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c
deleted file mode 100644
index 88c6692a9a..0000000000
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * direct.c - Low-level direct PCI config space access
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-#include "pci.h"
-
-#include <asm-xen/xen-public/xen.h>
-#include <asm-xen/xen-public/physdev.h>
-
-/*
- * Functions for accessing PCI configuration space with type xen accesses
- */
-
-static int pci_conf_read (int seg, int bus, int devfn, int reg, int len, u32 *value)
-{
- unsigned long flags;
- physdev_op_t op;
- int ret;
-
- if (!value || (bus > 255) || (devfn > 255) || (reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- op.cmd = PHYSDEVOP_PCI_CFGREG_READ;
- op.u.pci_cfgreg_read.bus = bus;
- op.u.pci_cfgreg_read.dev = (devfn & ~0x7) >> 3;
- op.u.pci_cfgreg_read.func = devfn & 0x7;
- op.u.pci_cfgreg_read.reg = reg;
- op.u.pci_cfgreg_read.len = len;
-
- ret = HYPERVISOR_physdev_op(&op);
- if (ret == 0)
- *value = op.u.pci_cfgreg_read.value;
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return ret;
-}
-
-static int pci_conf_write (int seg, int bus, int devfn, int reg, int len, u32 value)
-{
- unsigned long flags;
- physdev_op_t op;
- int ret;
-
- if ((bus > 255) || (devfn > 255) || (reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- op.cmd = PHYSDEVOP_PCI_CFGREG_WRITE;
- op.u.pci_cfgreg_write.bus = bus;
- op.u.pci_cfgreg_write.dev = (devfn & ~0x7) >> 3;
- op.u.pci_cfgreg_write.func = devfn & 0x7;
- op.u.pci_cfgreg_write.reg = reg;
- op.u.pci_cfgreg_write.len = len;
- op.u.pci_cfgreg_write.value = value;
-
- ret = HYPERVISOR_physdev_op(&op);
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return ret;
-}
-
-struct pci_raw_ops pci_direct_xen = {
- .read = pci_conf_read,
- .write = pci_conf_write,
-};
-
-static int __init pci_direct_init(void)
-{
- printk(KERN_INFO "PCI: Using configuration type Xen\n");
- raw_pci_ops = &pci_direct_xen;
- return 0;
-}
-
-arch_initcall(pci_direct_init);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c
index 816d439b2c..7eeea04f72 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/dmi.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/io_apic.h>
@@ -20,8 +21,15 @@
#include "pci.h"
-#include <asm-xen/xen-public/xen.h>
-#include <asm-xen/xen-public/physdev.h>
+#define DBG printk
+
+#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+static int broken_hp_bios_irq9;
+static int acer_tm360_irqrouting;
+
+static struct irq_routing_table *pirq_table;
static int pirq_enable_irq(struct pci_dev *dev);
@@ -37,33 +45,963 @@ static int pirq_penalty[16] = {
0, 0, 0, 0, 1000, 100000, 100000, 100000
};
+struct irq_router {
+ char *name;
+ u16 vendor, device;
+ int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+struct irq_router_handler {
+ u16 vendor;
+ int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
+};
+
int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
+/*
+ * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
-static int __init pcibios_irq_init(void)
+static struct irq_routing_table * __init pirq_find_routing_table(void)
{
- int bus;
- physdev_op_t op;
+ u8 *addr;
+ struct irq_routing_table *rt;
+ int i;
+ u8 sum;
- DBG("PCI: IRQ init\n");
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ for(addr = (u8 *) isa_bus_to_virt(0xf0000); addr < (u8 *) isa_bus_to_virt(0x100000); addr += 16) {
+ rt = (struct irq_routing_table *) addr;
+ if (rt->signature != PIRQ_SIGNATURE ||
+ rt->version != PIRQ_VERSION ||
+ rt->size % 16 ||
+ rt->size < sizeof(struct irq_routing_table))
+ continue;
+ sum = 0;
+ for(i=0; i<rt->size; i++)
+ sum += addr[i];
+ if (!sum) {
+ DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ return rt;
+ }
+ }
+#endif
+
+ return NULL;
+}
- if (pcibios_enable_irq || raw_pci_ops == NULL)
+/*
+ * If we have a IRQ routing table, use it to search for peer host
+ * bridges. It's a gross hack, but since there are no other known
+ * ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+ struct irq_routing_table *rt = pirq_table;
+ u8 busmap[256];
+ int i;
+ struct irq_info *e;
+
+ memset(busmap, 0, sizeof(busmap));
+ for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+ e = &rt->slots[i];
+#ifdef DEBUG
+ {
+ int j;
+ DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+ for(j=0; j<4; j++)
+ DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+ DBG("\n");
+ }
+#endif
+ busmap[e->bus] = 1;
+ }
+ for(i = 1; i < 256; i++) {
+ if (!busmap[i] || pci_find_bus(0, i))
+ continue;
+ if (pci_scan_bus(i, &pci_root_ops, NULL))
+ printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+ }
+ pcibios_last_bus = -1;
+}
+
+/*
+ * Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+void eisa_set_level_irq(unsigned int irq)
+{
+ unsigned char mask = 1 << (irq & 7);
+ unsigned int port = 0x4d0 + (irq >> 3);
+ unsigned char val;
+ static u16 eisa_irq_mask;
+
+ if (irq >= 16 || (1 << irq) & eisa_irq_mask)
+ return;
+
+ eisa_irq_mask |= (1 << irq);
+ printk("PCI: setting IRQ %u as level-triggered\n", irq);
+ val = inb(port);
+ if (!(val & mask)) {
+ DBG(" -> edge");
+ outb(val | mask, port);
+ }
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+ u8 x;
+ unsigned reg = offset + (nr >> 1);
+
+ pci_read_config_byte(router, reg, &x);
+ x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+ pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+ return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+ unsigned int val = irqmap[irq];
+
+ if (val) {
+ write_config_nybble(router, 0x48, pirq-1, val);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+
+ pci_read_config_byte(router, pirq, &x);
+ return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ pci_write_config_byte(router, pirq, irq);
+ return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ * However, PIRQD is in the upper instead of lower 4 bits.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
+ return 1;
+}
+
+/*
+ * ITE 8330G pirq rules are nibble-based
+ * FIXME: pirqmap may be { 1, 0, 3, 2 },
+ * 2+3 are both mapped to irq 9 on my system
+ */
+static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+}
+
+static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+ return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0xb8, pirq >> 4, irq);
+ return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ * 0x5C bits 7:4 is INTB bits 3:0 is INTA
+ * 0x5D bits 7:4 is INTD bits 3:0 is INTC
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ return read_config_nybble(router, 0x5C, (pirq-1)^1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
+ return 1;
+}
+
+/*
+ * PIRQ routing for SiS 85C503 router used in several SiS chipsets.
+ * We have to deal with the following issues here:
+ * - vendors have different ideas about the meaning of link values
+ * - some onboard devices (integrated in the chipset) have special
+ * links and are thus routed differently (i.e. not via PCI INTA-INTD)
+ * - different revision of the router have a different layout for
+ * the routing registers, particularly for the onchip devices
+ *
+ * For all routing registers the common thing is we have one byte
+ * per routeable link which is defined as:
+ * bit 7 IRQ mapping enabled (0) or disabled (1)
+ * bits [6:4] reserved (sometimes used for onchip devices)
+ * bits [3:0] IRQ to map to
+ * allowed: 3-7, 9-12, 14-15
+ * reserved: 0, 1, 2, 8, 13
+ *
+ * The config-space registers located at 0x41/0x42/0x43/0x44 are
+ * always used to route the normal PCI INT A/B/C/D respectively.
+ * Apparently there are systems implementing PCI routing table using
+ * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
+ * We try our best to handle both link mappings.
+ *
+ * Currently (2003-05-21) it appears most SiS chipsets follow the
+ * definition of routing registers from the SiS-5595 southbridge.
+ * According to the SiS 5595 datasheets the revision id's of the
+ * router (ISA-bridge) should be 0x01 or 0xb0.
+ *
+ * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
+ * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
+ * They seem to work with the current routing code. However there is
+ * some concern because of the two USB-OHCI HCs (original SiS 5595
+ * had only one). YMMV.
+ *
+ * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
+ *
+ * 0x61: IDEIRQ:
+ * bits [6:5] must be written 01
+ * bit 4 channel-select primary (0), secondary (1)
+ *
+ * 0x62: USBIRQ:
+ * bit 6 OHCI function disabled (0), enabled (1)
+ *
+ * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
+ *
+ * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
+ *
+ * We support USBIRQ (in addition to INTA-INTD) and keep the
+ * IDE, ACPI and DAQ routing untouched as set by the BIOS.
+ *
+ * Currently the only reported exception is the new SiS 65x chipset
+ * which includes the SiS 69x southbridge. Here we have the 85C503
+ * router revision 0x04 and there are changes in the register layout
+ * mostly related to the different USB HCs with USB 2.0 support.
+ *
+ * Onchip routing for router rev-id 0x04 (try-and-error observation)
+ *
+ * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs
+ * bit 6-4 are probably unused, not like 5595
+ */
+
+#define PIRQ_SIS_IRQ_MASK 0x0f
+#define PIRQ_SIS_IRQ_DISABLE 0x80
+#define PIRQ_SIS_USB_ENABLE 0x40
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 x;
+ int reg;
+
+ reg = pirq;
+ if (reg >= 0x01 && reg <= 0x04)
+ reg += 0x40;
+ pci_read_config_byte(router, reg, &x);
+ return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ u8 x;
+ int reg;
+
+ reg = pirq;
+ if (reg >= 0x01 && reg <= 0x04)
+ reg += 0x40;
+ pci_read_config_byte(router, reg, &x);
+ x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE);
+ x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE;
+ pci_write_config_byte(router, reg, x);
+ return 1;
+}
+
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ * for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ if (pirq > 8) {
+ printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+ return 0;
+ }
+ write_config_nybble(router, 0x74, pirq-1, irq);
+ return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register. There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ outb_p(pirq, 0xc00);
+ return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ outb_p(pirq, 0xc00);
+ outb_p(irq, 0xc01);
+ return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA 4-7 PIRQB
+ * offset 0x57 0-3 PIRQC 4-7 PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+ u8 irq;
+ irq = 0;
+ if (pirq <= 4)
+ {
+ irq = read_config_nybble(router, 0x56, pirq - 1);
+ }
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
+ dev->vendor, dev->device, pirq, irq);
+ if (pirq <= 4)
+ {
+ write_config_nybble(router, 0x56, pirq - 1, irq);
+ }
+ return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+ struct pci_dev *bridge;
+ int pin = pci_get_interrupt_pin(dev, &bridge);
+ return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+#endif
+
+static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ static struct pci_device_id pirq_440gx[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) },
+ { },
+ };
+
+ /* 440GX has a proprietary PIRQ router -- don't use it */
+ if (pci_dev_present(pirq_440gx))
+ return 0;
+
+ switch(device)
+ {
+ case PCI_DEVICE_ID_INTEL_82371FB_0:
+ case PCI_DEVICE_ID_INTEL_82371SB_0:
+ case PCI_DEVICE_ID_INTEL_82371AB_0:
+ case PCI_DEVICE_ID_INTEL_82371MX:
+ case PCI_DEVICE_ID_INTEL_82443MX_0:
+ case PCI_DEVICE_ID_INTEL_82801AA_0:
+ case PCI_DEVICE_ID_INTEL_82801AB_0:
+ case PCI_DEVICE_ID_INTEL_82801BA_0:
+ case PCI_DEVICE_ID_INTEL_82801BA_10:
+ case PCI_DEVICE_ID_INTEL_82801CA_0:
+ case PCI_DEVICE_ID_INTEL_82801CA_12:
+ case PCI_DEVICE_ID_INTEL_82801DB_0:
+ case PCI_DEVICE_ID_INTEL_82801E_0:
+ case PCI_DEVICE_ID_INTEL_82801EB_0:
+ case PCI_DEVICE_ID_INTEL_ESB_1:
+ case PCI_DEVICE_ID_INTEL_ICH6_0:
+ case PCI_DEVICE_ID_INTEL_ICH6_1:
+ case PCI_DEVICE_ID_INTEL_ICH7_0:
+ case PCI_DEVICE_ID_INTEL_ICH7_1:
+ r->name = "PIIX/ICH";
+ r->get = pirq_piix_get;
+ r->set = pirq_piix_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ /* FIXME: We should move some of the quirk fixup stuff here */
+ switch(device)
+ {
+ case PCI_DEVICE_ID_VIA_82C586_0:
+ case PCI_DEVICE_ID_VIA_82C596:
+ case PCI_DEVICE_ID_VIA_82C686:
+ case PCI_DEVICE_ID_VIA_8231:
+ /* FIXME: add new ones for 8233/5 */
+ r->name = "VIA";
+ r->get = pirq_via_get;
+ r->set = pirq_via_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_VLSI_82C534:
+ r->name = "VLSI 82C534";
+ r->get = pirq_vlsi_get;
+ r->set = pirq_vlsi_set;
+ return 1;
+ }
+ return 0;
+}
+
+
+static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_SERVERWORKS_OSB4:
+ case PCI_DEVICE_ID_SERVERWORKS_CSB5:
+ r->name = "ServerWorks";
+ r->get = pirq_serverworks_get;
+ r->set = pirq_serverworks_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ if (device != PCI_DEVICE_ID_SI_503)
return 0;
+
+ r->name = "SIS";
+ r->get = pirq_sis_get;
+ r->set = pirq_sis_set;
+ return 1;
+}
+
+static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_CYRIX_5520:
+ r->name = "NatSemi";
+ r->get = pirq_cyrix_get;
+ r->set = pirq_cyrix_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_OPTI_82C700:
+ r->name = "OPTI";
+ r->get = pirq_opti_get;
+ r->set = pirq_opti_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_ITE_IT8330G_0:
+ r->name = "ITE";
+ r->get = pirq_ite_get;
+ r->set = pirq_ite_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_AL_M1533:
+ case PCI_DEVICE_ID_AL_M1563:
+ printk("PCI: Using ALI IRQ Router\n");
+ r->name = "ALI";
+ r->get = pirq_ali_get;
+ r->set = pirq_ali_set;
+ return 1;
+ }
+ return 0;
+}
+
+static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+{
+ switch(device)
+ {
+ case PCI_DEVICE_ID_AMD_VIPER_740B:
+ r->name = "AMD756";
+ break;
+ case PCI_DEVICE_ID_AMD_VIPER_7413:
+ r->name = "AMD766";
+ break;
+ case PCI_DEVICE_ID_AMD_VIPER_7443:
+ r->name = "AMD768";
+ break;
+ default:
+ return 0;
+ }
+ r->get = pirq_amd756_get;
+ r->set = pirq_amd756_set;
+ return 1;
+}
+
+static __initdata struct irq_router_handler pirq_routers[] = {
+ { PCI_VENDOR_ID_INTEL, intel_router_probe },
+ { PCI_VENDOR_ID_AL, ali_router_probe },
+ { PCI_VENDOR_ID_ITE, ite_router_probe },
+ { PCI_VENDOR_ID_VIA, via_router_probe },
+ { PCI_VENDOR_ID_OPTI, opti_router_probe },
+ { PCI_VENDOR_ID_SI, sis_router_probe },
+ { PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
+ { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
+ { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
+ { PCI_VENDOR_ID_AMD, amd_router_probe },
+ /* Someone with docs needs to add the ATI Radeon IGP */
+ { 0, NULL }
+};
+static struct irq_router pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+
+/*
+ * FIXME: should we have an option to say "generic for
+ * chipset" ?
+ */
+
+static void __init pirq_find_router(struct irq_router *r)
+{
+ struct irq_routing_table *rt = pirq_table;
+ struct irq_router_handler *h;
+
+#ifdef CONFIG_PCI_BIOS
+ if (!rt->signature) {
+ printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+ r->set = pirq_bios_set;
+ r->name = "BIOS";
+ return;
+ }
+#endif
+
+ /* Default unless a driver reloads it */
+ r->name = "default";
+ r->get = NULL;
+ r->set = NULL;
+
+ DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+ rt->rtr_vendor, rt->rtr_device);
+
+ pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+ if (!pirq_router_dev) {
+ DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+ return;
+ }
+
+ for( h = pirq_routers; h->vendor; h++) {
+ /* First look for a router match */
+ if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+ break;
+ /* Fall back to a device match */
+ if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+ break;
+ }
+ printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+ pirq_router.name,
+ pirq_router_dev->vendor,
+ pirq_router_dev->device,
+ pci_name(pirq_router_dev));
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+ struct irq_routing_table *rt = pirq_table;
+ int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+ struct irq_info *info;
+
+ for (info = rt->slots; entries--; info++)
+ if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+ return info;
+ return NULL;
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+ u8 pin;
+ struct irq_info *info;
+ int i, pirq, newirq;
+ int irq = 0;
+ u32 mask;
+ struct irq_router *r = &pirq_router;
+ struct pci_dev *dev2 = NULL;
+ char *msg = NULL;
+
+ /* Find IRQ pin */
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (!pin) {
+ DBG(" -> no interrupt pin\n");
+ return 0;
+ }
+ pin = pin - 1;
- op.cmd = PHYSDEVOP_PCI_PROBE_ROOT_BUSES;
- if (HYPERVISOR_physdev_op(&op) != 0) {
- printk(KERN_WARNING "PCI: System does not support PCI\n");
+ /* Find IRQ routing entry */
+
+ if (!pirq_table)
+ return 0;
+
+ DBG("IRQ for %s[%c]", pci_name(dev), 'A' + pin);
+ info = pirq_get_info(dev);
+ if (!info) {
+ DBG(" -> not found in routing table\n");
return 0;
}
+ pirq = info->irq[pin].link;
+ mask = info->irq[pin].bitmap;
+ if (!pirq) {
+ DBG(" -> not routed\n");
+ return 0;
+ }
+ DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+ mask &= pcibios_irq_mask;
+
+ /* Work around broken HP Pavilion Notebooks which assign USB to
+ IRQ 9 even though it is actually wired to IRQ 11 */
+
+ if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
+ dev->irq = 11;
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
+ r->set(pirq_router_dev, dev, pirq, 11);
+ }
+
+ /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
+ if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
+ pirq = 0x68;
+ mask = 0x400;
+ dev->irq = r->get(pirq_router_dev, dev, pirq);
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+ }
+
+ /*
+ * Find the best IRQ to assign: use the one
+ * reported by the device if possible.
+ */
+ newirq = dev->irq;
+ if (!((1 << newirq) & mask)) {
+ if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0;
+ else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, pci_name(dev));
+ }
+ if (!newirq && assign) {
+ for (i = 0; i < 16; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+ if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, SA_SHIRQ))
+ newirq = i;
+ }
+ }
+ DBG(" -> newirq=%d", newirq);
+
+ /* Check if it is hardcoded */
+ if ((pirq & 0xf0) == 0xf0) {
+ irq = pirq & 0xf;
+ DBG(" -> hardcoded IRQ %d\n", irq);
+ msg = "Hardcoded";
+ } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
+ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
+ DBG(" -> got IRQ %d\n", irq);
+ msg = "Found";
+ } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+ DBG(" -> assigning IRQ %d", newirq);
+ if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+ eisa_set_level_irq(newirq);
+ DBG(" ... OK\n");
+ msg = "Assigned";
+ irq = newirq;
+ }
+ }
+
+ if (!irq) {
+ DBG(" ... failed\n");
+ if (newirq && mask == (1 << newirq)) {
+ msg = "Guessed";
+ irq = newirq;
+ } else
+ return 0;
+ }
+ printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
+
+ /* Update IRQ for all devices with the same pirq value */
+ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
+ pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+ if (!pin)
+ continue;
+ pin--;
+ info = pirq_get_info(dev2);
+ if (!info)
+ continue;
+ if (info->irq[pin].link == pirq) {
+ /* We refuse to override the dev->irq information. Give a warning! */
+ if ( dev2->irq && dev2->irq != irq && \
+ (!(pci_probe & PCI_USE_PIRQ_MASK) || \
+ ((1 << dev2->irq) & mask)) ) {
+#ifndef CONFIG_PCI_MSI
+ printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+ pci_name(dev2), dev2->irq, irq);
+#endif
+ continue;
+ }
+ dev2->irq = irq;
+ pirq_penalty[irq]++;
+ if (dev != dev2)
+ printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
+ }
+ }
+ return 1;
+}
+
+static void __init pcibios_fixup_irqs(void)
+{
+ struct pci_dev *dev = NULL;
+ u8 pin;
+
+ DBG("PCI: IRQ fixup\n");
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ /*
+ * If the BIOS has set an out of range IRQ number, just ignore it.
+ * Also keep track of which IRQ's are already in use.
+ */
+ if (dev->irq >= 16) {
+ DBG("%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
+ dev->irq = 0;
+ }
+ /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+ if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+ pirq_penalty[dev->irq] = 0;
+ pirq_penalty[dev->irq]++;
+ }
+
+ dev = NULL;
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Recalculate IRQ numbers if we use the I/O APIC.
+ */
+ if (io_apic_assign_pci_irqs)
+ {
+ int irq;
- printk(KERN_INFO "PCI: Probing PCI hardware\n");
- for (bus = 0; bus < 256; bus++)
- if (test_bit(bus, (unsigned long *)
- &op.u.pci_probe_root_buses.busmask[0]))
- (void)pcibios_scan_root(bus);
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the MP-table.
+ * In this case we have to look up the IRQ based on the parent bus,
+ * parent slot, and pin number. The SMP code detects such bridged
+ * busses itself so we should get into this branch reliably.
+ */
+ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
+ pci_name(bridge), 'A' + pin, irq);
+ }
+ if (irq >= 0) {
+ if (use_pci_vector() &&
+ !platform_legacy_irq(irq))
+ irq = IO_APIC_VECTOR(irq);
+
+ printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ pci_name(dev), 'A' + pin, irq);
+ dev->irq = irq;
+ }
+ }
+ }
+#endif
+ /*
+ * Still no IRQ? Try to lookup one...
+ */
+ if (pin && !dev->irq)
+ pcibios_lookup_irq(dev, 0);
+ }
+}
+
+/*
+ * Work around broken HP Pavilion Notebooks which assign USB to
+ * IRQ 9 even though it is actually wired to IRQ 11
+ */
+static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d)
+{
+ if (!broken_hp_bios_irq9) {
+ broken_hp_bios_irq9 = 1;
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+ }
+ return 0;
+}
+
+/*
+ * Work around broken Acer TravelMate 360 Notebooks which assign
+ * Cardbus to IRQ 11 even though it is actually wired to IRQ 10
+ */
+static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d)
+{
+ if (!acer_tm360_irqrouting) {
+ acer_tm360_irqrouting = 1;
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+ }
+ return 0;
+}
+
+static struct dmi_system_id __initdata pciirq_dmi_table[] = {
+ {
+ .callback = fix_broken_hp_bios_irq9,
+ .ident = "HP Pavilion N5400 Series Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
+ DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
+ },
+ },
+ {
+ .callback = fix_acer_tm360_irqrouting,
+ .ident = "Acer TravelMate 36x Laptop",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ },
+ },
+ { }
+};
+
+static int __init pcibios_irq_init(void)
+{
+ DBG("PCI: IRQ init\n");
+
+ if (pcibios_enable_irq || raw_pci_ops == NULL)
+ return 0;
+
+ dmi_check_system(pciirq_dmi_table);
+
+ pirq_table = pirq_find_routing_table();
+
+#ifdef CONFIG_PCI_BIOS
+ if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+ pirq_table = pcibios_get_irq_routing_table();
+#endif
+ if (pirq_table) {
+ pirq_peer_trick();
+ pirq_find_router(&pirq_router);
+ if (pirq_table->exclusive_irqs) {
+ int i;
+ for (i=0; i<16; i++)
+ if (!(pirq_table->exclusive_irqs & (1 << i)))
+ pirq_penalty[i] += 100;
+ }
+ /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+ if (io_apic_assign_pci_irqs)
+ pirq_table = NULL;
+ }
pcibios_enable_irq = pirq_enable_irq;
+ pcibios_fixup_irqs();
return 0;
}
@@ -92,35 +1030,67 @@ void pcibios_penalize_isa_irq(int irq)
static int pirq_enable_irq(struct pci_dev *dev)
{
- int err;
u8 pin;
- physdev_op_t op;
-
- /* Inform Xen that we are going to use this device. */
- op.cmd = PHYSDEVOP_PCI_INITIALISE_DEVICE;
- op.u.pci_initialise_device.bus = dev->bus->number;
- op.u.pci_initialise_device.dev = PCI_SLOT(dev->devfn);
- op.u.pci_initialise_device.func = PCI_FUNC(dev->devfn);
- if ( (err = HYPERVISOR_physdev_op(&op)) != 0 )
- return err;
-
- /* Now we can bind to the very final IRQ line. */
- pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &pin);
- dev->irq = pin;
-
- /* Sanity-check that an interrupt-producing device is routed
- * to an IRQ. */
+ extern int via_interrupt_line_quirk;
+ struct pci_dev *temp_dev;
+
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (pin != 0) {
- if (dev->irq != 0)
- printk(KERN_INFO "PCI: Obtained IRQ %d for device %s\n",
- dev->irq, dev->slot_name);
+ if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ char *msg;
+ msg = "";
+ if (io_apic_assign_pci_irqs) {
+ int irq;
+
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * Busses behind bridges are typically not listed in the MP-table.
+ * In this case we have to look up the IRQ based on the parent bus,
+ * parent slot, and pin number. The SMP code detects such bridged
+ * busses itself so we should get into this branch reliably.
+ */
+ temp_dev = dev;
+ while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
+ pci_name(bridge), 'A' + pin, irq);
+ dev = bridge;
+ }
+ dev = temp_dev;
+ if (irq >= 0) {
+#ifdef CONFIG_PCI_MSI
+ if (!platform_legacy_irq(irq))
+ irq = IO_APIC_VECTOR(irq);
+#endif
+ printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ pci_name(dev), 'A' + pin, irq);
+ dev->irq = irq;
+ return 0;
+ } else
+ msg = " Probably buggy MP table.";
+ }
+ } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+ msg = "";
else
- printk(KERN_WARNING "PCI: No IRQ known for interrupt "
- "pin %c of device %s.\n", 'A' + pin - 1,
- dev->slot_name);
+ msg = " Please try using pci=biosirq.";
+
+ /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+ return 0;
+
+ printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+ 'A' + pin - 1, pci_name(dev), msg);
}
-
+ /* VIA bridges use interrupt line for apic/pci steering across
+ the V-Link */
+ else if (via_interrupt_line_quirk)
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
return 0;
}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile
index c2fad2a8dd..7f7f3b173a 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile
@@ -11,4 +11,8 @@ $(obj)/vmlinux.lds.S:
extra-y += vmlinux.lds
-obj-y := ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o skbuff.o devmem.o
+obj-y := ctrl_if.o evtchn.o fixup.o reboot.o gnttab.o devmem.o
+
+obj-$(CONFIG_PROC_FS) += xen_proc.o
+obj-$(CONFIG_NET) += skbuff.o
+obj-$(CONFIG_SMP) += smp.o
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c
index 16852cb02a..c1cfb82dbd 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c
@@ -47,6 +47,19 @@
#endif
/*
+ * Extra ring macros to sync a consumer index up to the public producer index.
+ * Generally UNSAFE, but we use it for recovery and shutdown in some cases.
+ */
+#define RING_DROP_PENDING_REQUESTS(_r) \
+ do { \
+ (_r)->req_cons = (_r)->sring->req_prod; \
+ } while (0)
+#define RING_DROP_PENDING_RESPONSES(_r) \
+ do { \
+ (_r)->rsp_cons = (_r)->sring->rsp_prod; \
+ } while (0)
+
+/*
* Only used by initial domain which must create its own control-interface
* event channel. This value is picked up by the user-space domain controller
* via an ioctl.
@@ -59,8 +72,8 @@ static spinlock_t ctrl_if_lock;
static struct irqaction ctrl_if_irq_action;
-static CONTROL_RING_IDX ctrl_if_tx_resp_cons;
-static CONTROL_RING_IDX ctrl_if_rx_req_cons;
+static ctrl_front_ring_t ctrl_if_tx_ring;
+static ctrl_back_ring_t ctrl_if_rx_ring;
/* Incoming message requests. */
/* Primary message type -> message handler. */
@@ -97,8 +110,6 @@ static void __ctrl_if_rx_tasklet(unsigned long data);
static DECLARE_TASKLET(ctrl_if_rx_tasklet, __ctrl_if_rx_tasklet, 0);
#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048))
-#define TX_FULL(_c) \
- (((_c)->tx_req_prod - ctrl_if_tx_resp_cons) == CONTROL_RING_SIZE)
static void ctrl_if_notify_controller(void)
{
@@ -113,21 +124,20 @@ static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
static void __ctrl_if_tx_tasklet(unsigned long data)
{
- control_if_t *ctrl_if = get_ctrl_if();
- ctrl_msg_t *msg;
- int was_full = TX_FULL(ctrl_if);
- CONTROL_RING_IDX rp;
+ ctrl_msg_t *msg;
+ int was_full = RING_FULL(&ctrl_if_tx_ring);
+ RING_IDX i, rp;
- rp = ctrl_if->tx_resp_prod;
+ i = ctrl_if_tx_ring.rsp_cons;
+ rp = ctrl_if_tx_ring.sring->rsp_prod;
rmb(); /* Ensure we see all requests up to 'rp'. */
- while ( ctrl_if_tx_resp_cons != rp )
+ for ( ; i != rp; i++ )
{
- msg = &ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if_tx_resp_cons)];
-
- DPRINTK("Rx-Rsp %u/%u :: %d/%d\n",
- ctrl_if_tx_resp_cons,
- ctrl_if->tx_resp_prod,
+ msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i);
+
+ DPRINTK("Rx-Rsp %u/%u :: %d/%d\n", i-1,
+ ctrl_if_tx_ring.sring->rsp_prod,
msg->type, msg->subtype);
/* Execute the callback handler, if one was specified. */
@@ -138,16 +148,16 @@ static void __ctrl_if_tx_tasklet(unsigned long data)
smp_mb(); /* Execute, /then/ free. */
ctrl_if_txmsg_id_mapping[msg->id].fn = NULL;
}
-
- /*
- * Step over the message in the ring /after/ finishing reading it. As
- * soon as the index is updated then the message may get blown away.
- */
- smp_mb();
- ctrl_if_tx_resp_cons++;
}
- if ( was_full && !TX_FULL(ctrl_if) )
+ /*
+ * Step over messages in the ring /after/ finishing reading them. As soon
+ * as the index is updated then the message may get blown away.
+ */
+ smp_mb();
+ ctrl_if_tx_ring.rsp_cons = i;
+
+ if ( was_full && !RING_FULL(&ctrl_if_tx_ring) )
{
wake_up(&ctrl_if_tx_wait);
run_task_queue(&ctrl_if_tx_tq);
@@ -172,24 +182,27 @@ static void __ctrl_if_rxmsg_deferred(void *unused)
static void __ctrl_if_rx_tasklet(unsigned long data)
{
- control_if_t *ctrl_if = get_ctrl_if();
ctrl_msg_t msg, *pmsg;
- CONTROL_RING_IDX rp, dp;
+ CONTROL_RING_IDX dp;
+ RING_IDX rp, i;
+ i = ctrl_if_rx_ring.req_cons;
+ rp = ctrl_if_rx_ring.sring->req_prod;
dp = ctrl_if_rxmsg_deferred_prod;
- rp = ctrl_if->rx_req_prod;
rmb(); /* Ensure we see all requests up to 'rp'. */
-
- while ( ctrl_if_rx_req_cons != rp )
+
+ for ( ; i != rp; i++)
{
- pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)];
+ pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i);
memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
- DPRINTK("Rx-Req %u/%u :: %d/%d\n",
- ctrl_if_rx_req_cons-1,
- ctrl_if->rx_req_prod,
+ DPRINTK("Rx-Req %u/%u :: %d/%d\n", i-1,
+ ctrl_if_rx_ring.sring->req_prod,
msg.type, msg.subtype);
+ if ( msg.length > sizeof(msg.msg) )
+ msg.length = sizeof(msg.msg);
+
if ( msg.length != 0 )
memcpy(msg.msg, pmsg->msg, msg.length);
@@ -201,6 +214,8 @@ static void __ctrl_if_rx_tasklet(unsigned long data)
(*ctrl_if_rxmsg_handler[msg.type])(&msg, 0);
}
+ ctrl_if_rx_ring.req_cons = i;
+
if ( dp != ctrl_if_rxmsg_deferred_prod )
{
wmb();
@@ -212,12 +227,10 @@ static void __ctrl_if_rx_tasklet(unsigned long data)
static irqreturn_t ctrl_if_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
- control_if_t *ctrl_if = get_ctrl_if();
-
- if ( ctrl_if_tx_resp_cons != ctrl_if->tx_resp_prod )
+ if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) )
tasklet_schedule(&ctrl_if_tx_tasklet);
- if ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod )
+ if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) )
tasklet_schedule(&ctrl_if_rx_tasklet);
return IRQ_HANDLED;
@@ -229,13 +242,13 @@ ctrl_if_send_message_noblock(
ctrl_msg_handler_t hnd,
unsigned long id)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
+ ctrl_msg_t *dmsg;
int i;
spin_lock_irqsave(&ctrl_if_lock, flags);
- if ( TX_FULL(ctrl_if) )
+ if ( RING_FULL(&ctrl_if_tx_ring) )
{
spin_unlock_irqrestore(&ctrl_if_lock, flags);
return -EAGAIN;
@@ -252,14 +265,15 @@ ctrl_if_send_message_noblock(
}
DPRINTK("Tx-Req %u/%u :: %d/%d\n",
- ctrl_if->tx_req_prod,
- ctrl_if_tx_resp_cons,
+ ctrl_if_tx_ring.req_prod_pvt,
+ ctrl_if_tx_ring.rsp_cons,
msg->type, msg->subtype);
- memcpy(&ctrl_if->tx_ring[MASK_CONTROL_IDX(ctrl_if->tx_req_prod)],
- msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->tx_req_prod++;
+ dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring,
+ ctrl_if_tx_ring.req_prod_pvt);
+ memcpy(dmsg, msg, sizeof(*msg));
+ ctrl_if_tx_ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS(&ctrl_if_tx_ring);
spin_unlock_irqrestore(&ctrl_if_lock, flags);
@@ -358,10 +372,8 @@ int
ctrl_if_enqueue_space_callback(
struct tq_struct *task)
{
- control_if_t *ctrl_if = get_ctrl_if();
-
/* Fast path. */
- if ( !TX_FULL(ctrl_if) )
+ if ( !RING_FULL(&ctrl_if_tx_ring) )
return 0;
(void)queue_task(task, &ctrl_if_tx_tq);
@@ -372,14 +384,13 @@ ctrl_if_enqueue_space_callback(
* certainly return 'not full'.
*/
smp_mb();
- return TX_FULL(ctrl_if);
+ return RING_FULL(&ctrl_if_tx_ring);
}
void
ctrl_if_send_response(
ctrl_msg_t *msg)
{
- control_if_t *ctrl_if = get_ctrl_if();
unsigned long flags;
ctrl_msg_t *dmsg;
@@ -390,15 +401,16 @@ ctrl_if_send_response(
spin_lock_irqsave(&ctrl_if_lock, flags);
DPRINTK("Tx-Rsp %u :: %d/%d\n",
- ctrl_if->rx_resp_prod,
+ ctrl_if_rx_ring.rsp_prod_pvt,
msg->type, msg->subtype);
- dmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if->rx_resp_prod)];
+ dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring,
+ ctrl_if_rx_ring.rsp_prod_pvt);
if ( dmsg != msg )
memcpy(dmsg, msg, sizeof(*msg));
- wmb(); /* Write the message before letting the controller peek at it. */
- ctrl_if->rx_resp_prod++;
+ ctrl_if_rx_ring.rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(&ctrl_if_rx_ring);
spin_unlock_irqrestore(&ctrl_if_lock, flags);
@@ -491,8 +503,8 @@ void ctrl_if_resume(void)
}
/* Sync up with shared indexes. */
- ctrl_if_tx_resp_cons = ctrl_if->tx_resp_prod;
- ctrl_if_rx_req_cons = ctrl_if->rx_resp_prod;
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
ctrl_if_evtchn = xen_start_info.domain_controller_evtchn;
ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn);
@@ -505,11 +517,15 @@ void ctrl_if_resume(void)
void __init ctrl_if_init(void)
{
- int i;
+ control_if_t *ctrl_if = get_ctrl_if();
+ int i;
for ( i = 0; i < 256; i++ )
ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
+ FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM);
+ BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM);
+
spin_lock_init(&ctrl_if_lock);
ctrl_if_resume();
@@ -532,12 +548,13 @@ __initcall(ctrl_if_late_setup);
int ctrl_if_transmitter_empty(void)
{
- return (get_ctrl_if()->tx_req_prod == ctrl_if_tx_resp_cons);
+ return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons);
+
}
void ctrl_if_discard_responses(void)
{
- ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
+ RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring);
}
EXPORT_SYMBOL(ctrl_if_send_message_noblock);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c
index 22885955c9..4e8db3e003 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c
@@ -37,11 +37,12 @@
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/ptrace.h>
-#include <asm/synch_bitops.h>
+#include <asm-xen/synch_bitops.h>
#include <asm-xen/xen-public/event_channel.h>
#include <asm-xen/xen-public/physdev.h>
#include <asm-xen/ctrl_if.h>
#include <asm-xen/hypervisor.h>
+#include <asm-xen/evtchn.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
EXPORT_SYMBOL(force_evtchn_callback);
@@ -59,7 +60,13 @@ static int evtchn_to_irq[NR_EVENT_CHANNELS];
static int irq_to_evtchn[NR_IRQS];
/* IRQ <-> VIRQ mapping. */
-static int virq_to_irq[NR_VIRQS];
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+
+/* evtchn <-> IPI mapping. */
+#ifndef NR_IPIS
+#define NR_IPIS 1
+#endif
+DEFINE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
/* Reference counts for bindings to IRQs. */
static int irq_bindcount[NR_IRQS];
@@ -67,6 +74,33 @@ static int irq_bindcount[NR_IRQS];
/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)];
+#ifdef CONFIG_SMP
+
+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
+static u32 cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/32];
+
+#define active_evtchns(cpu,sh,idx) \
+ ((sh)->evtchn_pending[idx] & \
+ cpu_evtchn_mask[cpu][idx] & \
+ ~(sh)->evtchn_mask[idx])
+
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+{
+ clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
+ set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
+ cpu_evtchn[chn] = cpu;
+}
+
+#else
+
+#define active_evtchns(cpu,sh,idx) \
+ ((sh)->evtchn_pending[idx] & \
+ ~(sh)->evtchn_mask[idx])
+
+#define bind_evtchn_to_cpu(chn,cpu) ((void)0)
+
+#endif
+
/* Upcall to generic IRQ layer. */
#ifdef CONFIG_X86
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,9)
@@ -74,8 +108,13 @@ extern fastcall unsigned int do_IRQ(struct pt_regs *regs);
#else
extern asmlinkage unsigned int do_IRQ(struct pt_regs *regs);
#endif
+#if defined (__i386__)
+#define IRQ_REG orig_eax
+#elif defined (__x86_64__)
+#define IRQ_REG orig_rax
+#endif
#define do_IRQ(irq, regs) do { \
- (regs)->orig_eax = (irq); \
+ (regs)->IRQ_REG = (irq); \
do_IRQ((regs)); \
} while (0)
#endif
@@ -95,22 +134,22 @@ void force_evtchn_callback(void)
/* NB. Interrupts are disabled on entry. */
asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
{
- unsigned long l1, l2;
+ u32 l1, l2;
unsigned int l1i, l2i, port;
- int irq;
+ int irq, cpu = smp_processor_id();
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[cpu];
- s->vcpu_data[0].evtchn_upcall_pending = 0;
-
+ vcpu_info->evtchn_upcall_pending = 0;
+
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- l1 = xchg(&s->evtchn_pending_sel, 0);
+ l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( l1 != 0 )
{
l1i = __ffs(l1);
l1 &= ~(1 << l1i);
- l2 = s->evtchn_pending[l1i] & ~s->evtchn_mask[l1i];
- while ( l2 != 0 )
+ while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 )
{
l2i = __ffs(l2);
l2 &= ~(1 << l2i);
@@ -142,10 +181,11 @@ int bind_virq_to_irq(int virq)
{
evtchn_op_t op;
int evtchn, irq;
+ int cpu = smp_processor_id();
spin_lock(&irq_mapping_update_lock);
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = per_cpu(virq_to_irq, cpu)[virq]) == -1 )
{
op.cmd = EVTCHNOP_bind_virq;
op.u.bind_virq.virq = virq;
@@ -157,7 +197,9 @@ int bind_virq_to_irq(int virq)
evtchn_to_irq[evtchn] = irq;
irq_to_evtchn[irq] = evtchn;
- virq_to_irq[virq] = irq;
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
}
irq_bindcount[irq]++;
@@ -170,7 +212,8 @@ int bind_virq_to_irq(int virq)
void unbind_virq_from_irq(int virq)
{
evtchn_op_t op;
- int irq = virq_to_irq[virq];
+ int cpu = smp_processor_id();
+ int irq = per_cpu(virq_to_irq, cpu)[virq];
int evtchn = irq_to_evtchn[irq];
spin_lock(&irq_mapping_update_lock);
@@ -185,7 +228,66 @@ void unbind_virq_from_irq(int virq)
evtchn_to_irq[evtchn] = -1;
irq_to_evtchn[irq] = -1;
- virq_to_irq[virq] = -1;
+ per_cpu(virq_to_irq, cpu)[virq] = -1;
+ }
+
+ spin_unlock(&irq_mapping_update_lock);
+}
+
+int bind_ipi_on_cpu_to_irq(int cpu, int ipi)
+{
+ evtchn_op_t op;
+ int evtchn, irq;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ if ( (evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi]) == 0 )
+ {
+ op.cmd = EVTCHNOP_bind_ipi;
+ op.u.bind_ipi.ipi_vcpu = cpu;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, cpu);
+ evtchn = op.u.bind_ipi.port;
+
+ irq = find_unbound_irq();
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+ }
+ else
+ {
+ irq = evtchn_to_irq[evtchn];
+ }
+
+ irq_bindcount[irq]++;
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ return irq;
+}
+
+void unbind_ipi_on_cpu_from_irq(int cpu, int ipi)
+{
+ evtchn_op_t op;
+ int evtchn = per_cpu(ipi_to_evtchn, cpu)[ipi];
+ int irq = irq_to_evtchn[evtchn];
+
+ spin_lock(&irq_mapping_update_lock);
+
+ if ( --irq_bindcount[irq] == 0 )
+ {
+ op.cmd = EVTCHNOP_close;
+ op.u.close.dom = DOMID_SELF;
+ op.u.close.port = evtchn;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to unbind virtual IPI %d on cpu %d\n", ipi, cpu);
+
+ evtchn_to_irq[evtchn] = -1;
+ irq_to_evtchn[irq] = -1;
+ per_cpu(ipi_to_evtchn, cpu)[ipi] = 0;
}
spin_unlock(&irq_mapping_update_lock);
@@ -415,30 +517,15 @@ static struct hw_interrupt_type pirq_type = {
NULL
};
-static irqreturn_t misdirect_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
-{
- /* nothing */
- return IRQ_HANDLED;
-}
-
-static struct irqaction misdirect_action = {
- misdirect_interrupt,
- SA_INTERRUPT,
- CPU_MASK_NONE,
- "misdirect",
- NULL,
- NULL
-};
-
void irq_suspend(void)
{
int pirq, virq, irq, evtchn;
+ int cpu = smp_processor_id(); /* XXX */
/* Unbind VIRQs from event channels. */
for ( virq = 0; virq < NR_VIRQS; virq++ )
{
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = per_cpu(virq_to_irq, cpu)[virq]) == -1 )
continue;
evtchn = irq_to_evtchn[irq];
@@ -458,13 +545,14 @@ void irq_resume(void)
{
evtchn_op_t op;
int virq, irq, evtchn;
+ int cpu = smp_processor_id(); /* XXX */
for ( evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++ )
mask_evtchn(evtchn); /* New event-channel space is not 'live' yet. */
for ( virq = 0; virq < NR_VIRQS; virq++ )
{
- if ( (irq = virq_to_irq[virq]) == -1 )
+ if ( (irq = per_cpu(virq_to_irq, cpu)[virq]) == -1 )
continue;
/* Get a new binding from Xen. */
@@ -486,14 +574,22 @@ void irq_resume(void)
void __init init_IRQ(void)
{
int i;
+ int cpu;
irq_ctx_init(0);
spin_lock_init(&irq_mapping_update_lock);
- /* No VIRQ -> IRQ mappings. */
- for ( i = 0; i < NR_VIRQS; i++ )
- virq_to_irq[i] = -1;
+#ifdef CONFIG_SMP
+ /* By default all event channels notify CPU#0. */
+ memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+#endif
+
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ ) {
+ /* No VIRQ -> IRQ mappings. */
+ for ( i = 0; i < NR_VIRQS; i++ )
+ per_cpu(virq_to_irq, cpu)[i] = -1;
+ }
/* No event-channel -> IRQ mappings. */
for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
@@ -528,8 +624,6 @@ void __init init_IRQ(void)
irq_desc[pirq_to_irq(i)].handler = &pirq_type;
}
- (void)setup_irq(bind_virq_to_irq(VIRQ_MISDIRECT), &misdirect_action);
-
/* This needs to be done early, but after the IRQ subsystem is alive. */
ctrl_if_init();
}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c
index a9d9e511c2..64a98e62a7 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c
@@ -35,7 +35,7 @@
#include <linux/delay.h>
#include <linux/version.h>
-#define DP(_f) printk(KERN_ALERT " " _f "\n")
+#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args )
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#define __LINKAGE fastcall
@@ -46,12 +46,15 @@
__LINKAGE void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
{
static unsigned long printed = 0;
+ char info[100];
int i;
if ( !test_and_set_bit(0, &printed) )
{
HYPERVISOR_vm_assist(VMASST_CMD_disable,
- VMASST_TYPE_4gb_segments_notify);
+ VMASST_TYPE_4gb_segments_notify);
+
+ sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
DP("");
DP("***************************************************************");
@@ -61,6 +64,7 @@ __LINKAGE void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
DP("** slow. To ensure full performance you should **");
DP("** execute the following as root: **");
DP("** mv /lib/tls /lib/tls.disabled **");
+ DP("** Offending process: %-38.38s **", info);
DP("***************************************************************");
DP("***************************************************************");
DP("");
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
new file mode 100644
index 0000000000..838b53cd1f
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
@@ -0,0 +1,390 @@
+/******************************************************************************
+ * gnttab.c
+ *
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ *
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/uaccess.h>
+#include <asm-xen/xen_proc.h>
+#include <asm-xen/linux-public/privcmd.h>
+#include <asm-xen/gnttab.h>
+#include <asm-xen/synch_bitops.h>
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \
+ #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) \
+ printk(KERN_WARNING "xen_grant: " fmt, ##args)
+
+
+EXPORT_SYMBOL(gnttab_grant_foreign_access);
+EXPORT_SYMBOL(gnttab_end_foreign_access);
+EXPORT_SYMBOL(gnttab_query_foreign_access);
+EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
+EXPORT_SYMBOL(gnttab_end_foreign_transfer);
+EXPORT_SYMBOL(gnttab_alloc_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_claim_grant_reference);
+EXPORT_SYMBOL(gnttab_release_grant_reference);
+EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
+EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
+
+static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
+static grant_ref_t gnttab_free_head;
+
+static grant_entry_t *shared;
+
+/*
+ * Lock-free grant-entry allocator
+ */
+
+static inline int
+get_free_entry(
+ void)
+{
+ grant_ref_t fh, nfh = gnttab_free_head;
+ do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
+ while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
+ gnttab_free_list[fh])) != fh) );
+ return fh;
+}
+
+static inline void
+put_free_entry(
+ grant_ref_t ref)
+{
+ grant_ref_t fh, nfh = gnttab_free_head;
+ do { gnttab_free_list[ref] = fh = nfh; wmb(); }
+ while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) );
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+
+int
+gnttab_grant_foreign_access(
+ domid_t domid, unsigned long frame, int readonly)
+{
+ int ref;
+
+ if ( unlikely((ref = get_free_entry()) == -1) )
+ return -ENOSPC;
+
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+
+ return ref;
+}
+
+void
+gnttab_grant_foreign_access_ref(
+ grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+{
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+}
+
+
+int
+gnttab_query_foreign_access( grant_ref_t ref )
+{
+ u16 nflags;
+
+ nflags = shared[ref].flags;
+
+ return ( nflags & (GTF_reading|GTF_writing) );
+}
+
+void
+gnttab_end_foreign_access( grant_ref_t ref, int readonly )
+{
+ u16 flags, nflags;
+
+ nflags = shared[ref].flags;
+ do {
+ if ( (flags = nflags) & (GTF_reading|GTF_writing) )
+ printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+ }
+ while ( (nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags );
+
+ put_free_entry(ref);
+}
+
+int
+gnttab_grant_foreign_transfer(
+ domid_t domid, unsigned long pfn )
+{
+ int ref;
+
+ if ( unlikely((ref = get_free_entry()) == -1) )
+ return -ENOSPC;
+
+ shared[ref].frame = pfn;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_accept_transfer;
+
+ return ref;
+}
+
+void
+gnttab_grant_foreign_transfer_ref(
+ grant_ref_t ref, domid_t domid, unsigned long pfn )
+{
+ shared[ref].frame = pfn;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_accept_transfer;
+}
+
+unsigned long
+gnttab_end_foreign_transfer(
+ grant_ref_t ref)
+{
+ unsigned long frame = 0;
+ u16 flags;
+
+ flags = shared[ref].flags;
+ ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
+
+ /*
+ * If a transfer is committed then wait for the frame address to appear.
+ * Otherwise invalidate the grant entry against future use.
+ */
+ if ( likely(flags != GTF_accept_transfer) ||
+ (synch_cmpxchg(&shared[ref].flags, flags, 0) != GTF_accept_transfer) )
+ while ( unlikely((frame = shared[ref].frame) == 0) )
+ cpu_relax();
+
+ put_free_entry(ref);
+
+ return frame;
+}
+
+void
+gnttab_free_grant_references( u16 count, grant_ref_t head )
+{
+ /* TODO: O(N)...? */
+ grant_ref_t to_die = 0, next = head;
+ int i;
+
+ for ( i = 0; i < count; i++ )
+ {
+ to_die = next;
+ next = gnttab_free_list[next];
+ put_free_entry( to_die );
+ }
+}
+
+int
+gnttab_alloc_grant_references( u16 count,
+ grant_ref_t *head,
+ grant_ref_t *terminal )
+{
+ int i;
+ grant_ref_t h = gnttab_free_head;
+
+ for ( i = 0; i < count; i++ )
+ if ( unlikely(get_free_entry() == -1) )
+ goto not_enough_refs;
+
+ *head = h;
+ *terminal = gnttab_free_head;
+
+ return 0;
+
+not_enough_refs:
+ gnttab_free_head = h;
+ return -ENOSPC;
+}
+
+int
+gnttab_claim_grant_reference( grant_ref_t *private_head,
+ grant_ref_t terminal )
+{
+ grant_ref_t g;
+ if ( unlikely((g = *private_head) == terminal) )
+ return -ENOSPC;
+ *private_head = gnttab_free_list[g];
+ return g;
+}
+
+void
+gnttab_release_grant_reference( grant_ref_t *private_head,
+ grant_ref_t release )
+{
+ gnttab_free_list[release] = *private_head;
+ *private_head = release;
+}
+
+/*
+ * ProcFS operations
+ */
+
+#ifdef CONFIG_PROC_FS
+
+static struct proc_dir_entry *grant_pde;
+
+static int grant_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long data)
+{
+ int ret;
+ privcmd_hypercall_t hypercall;
+
+ /* XXX Need safety checks here if using for anything other
+ * than debugging */
+ return -ENOSYS;
+
+ if ( cmd != IOCTL_PRIVCMD_HYPERCALL )
+ return -ENOSYS;
+
+ if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
+ return -EFAULT;
+
+ if ( hypercall.op != __HYPERVISOR_grant_table_op )
+ return -ENOSYS;
+
+ /* hypercall-invoking asm taken from privcmd.c */
+ __asm__ __volatile__ (
+ "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
+ "movl 4(%%eax),%%ebx ;"
+ "movl 8(%%eax),%%ecx ;"
+ "movl 12(%%eax),%%edx ;"
+ "movl 16(%%eax),%%esi ;"
+ "movl 20(%%eax),%%edi ;"
+ "movl (%%eax),%%eax ;"
+ TRAP_INSTR "; "
+ "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
+ : "=a" (ret) : "0" (&hypercall) : "memory" );
+
+ return ret;
+}
+
+static struct file_operations grant_file_ops = {
+ ioctl: grant_ioctl,
+};
+
+static int grant_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len;
+ unsigned int i;
+ grant_entry_t *gt;
+
+ gt = (grant_entry_t *)shared;
+ len = 0;
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ /* TODO: safety catch here until this can handle >PAGE_SIZE output */
+ if (len > (PAGE_SIZE - 200))
+ {
+ len += sprintf( page + len, "Truncated.\n");
+ break;
+ }
+
+ if ( gt[i].flags )
+ len += sprintf( page + len,
+ "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n",
+ i,
+ gt[i].flags,
+ gt[i].domid,
+ gt[i].frame );
+
+ *eof = 1;
+ return len;
+}
+
+static int grant_write(struct file *file, const char __user *buffer,
+ unsigned long count, void *data)
+{
+ /* TODO: implement this */
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+int gnttab_resume(void)
+{
+ gnttab_setup_table_t setup;
+ unsigned long frames[NR_GRANT_FRAMES];
+ int i;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = NR_GRANT_FRAMES;
+ setup.frame_list = frames;
+
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0);
+ BUG_ON(setup.status != 0);
+
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
+
+ return 0;
+}
+
+int gnttab_suspend(void)
+{
+ int i;
+
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ clear_fixmap(FIX_GNTTAB_END - i);
+
+ return 0;
+}
+
+static int __init gnttab_init(void)
+{
+ int i;
+
+ BUG_ON(gnttab_resume());
+
+ shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ gnttab_free_list[i] = i + 1;
+
+#ifdef CONFIG_PROC_FS
+ /*
+ * /proc/xen/grant : used by libxc to access grant tables
+ */
+ if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL )
+ {
+ WPRINTK("Unable to create grant xen proc entry\n");
+ return -1;
+ }
+
+ grant_file_ops.read = grant_pde->proc_fops->read;
+ grant_file_ops.write = grant_pde->proc_fops->write;
+
+ grant_pde->proc_fops = &grant_file_ops;
+
+ grant_pde->read_proc = &grant_read;
+ grant_pde->write_proc = &grant_write;
+#endif
+
+ printk("Grant table initialized\n");
+ return 0;
+}
+
+__initcall(gnttab_init);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
index 9dce9e2abc..572b1a71b3 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
@@ -76,6 +76,20 @@ static void __do_suspend(void)
#define netif_resume() do{}while(0)
#endif
+#ifdef CONFIG_XEN_USB_FRONTEND
+ extern void usbif_resume();
+#else
+#define usbif_resume() do{}while(0)
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ extern int gnttab_suspend(void);
+ extern int gnttab_resume(void);
+#else
+#define gnttab_suspend() do{}while(0)
+#define gnttab_resume() do{}while(0)
+#endif
+
extern void time_suspend(void);
extern void time_resume(void);
extern unsigned long max_pfn;
@@ -89,6 +103,11 @@ static void __do_suspend(void)
__cli();
+#ifdef __i386__
+ mm_pin_all();
+ kmem_cache_shrink(pgd_cache);
+#endif
+
netif_suspend();
blkdev_suspend();
@@ -99,31 +118,22 @@ static void __do_suspend(void)
irq_suspend();
+ gnttab_suspend();
+
HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
clear_fixmap(FIX_SHARED_INFO);
memcpy(&suspend_record->resume_info, &xen_start_info,
- sizeof(xen_start_info));
+ sizeof(xen_start_info));
HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_4gb_segments);
-#ifdef CONFIG_XEN_WRITABLE_PAGETABLES
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_writable_pagetables);
-#endif
-
shutting_down = -1;
memcpy(&xen_start_info, &suspend_record->resume_info,
- sizeof(xen_start_info));
+ sizeof(xen_start_info));
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
-#else
set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
-#endif
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
@@ -137,6 +147,7 @@ static void __do_suspend(void)
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+ gnttab_resume();
irq_resume();
@@ -148,6 +159,8 @@ static void __do_suspend(void)
netif_resume();
+ usbif_resume();
+
__sti();
out:
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
new file mode 100644
index 0000000000..fb2a6eb6fa
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
@@ -0,0 +1,16 @@
+/* Copyright (C) 2004, Christian Limpach */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/threads.h>
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ printk("setup_profiling_timer\n");
+
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig
new file mode 100644
index 0000000000..6785cf8e8a
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Kconfig
@@ -0,0 +1,455 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+# Note: ISA is disabled and will hopefully never be enabled.
+# If you managed to buy an ISA x86-64 box you'll have to fix all the
+# ISA drivers you need yourself.
+#
+
+menu "X86_64 processor configuration"
+
+config XENARCH
+ string
+ default x86_64
+
+config X86_64
+ bool
+ default y
+ help
+ Port to the x86-64 architecture. x86-64 is a 64-bit extension to the
+ classical 32-bit x86 architecture. For details see
+ <http://www.x86-64.org/>.
+
+config X86
+ bool
+ default y
+
+config 64BIT
+ def_bool y
+
+config MMU
+ bool
+ default y
+
+config ISA
+ bool
+
+config SBUS
+ bool
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+
+config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config X86_CMPXCHG
+ bool
+ default y
+
+config EARLY_PRINTK
+ bool "Early Printk"
+ default n
+ help
+ Write kernel log output directly into the VGA buffer or to a serial
+ port.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd or the X server. You should normally N here,
+ unless you want to debug such a crash.
+
+config HPET_TIMER
+ bool
+ default n
+ help
+ Use the IA-PC HPET (High Precision Event Timer) to manage
+ time in preference to the PIT and RTC, if a HPET is
+ present. The HPET provides a stable time base on SMP
+ systems, unlike the RTC, but it is more expensive to access,
+ as it is off-chip. You can find the HPET spec at
+ <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+
+ If unsure, say Y.
+
+config HPET_EMULATE_RTC
+ bool "Provide RTC interrupt"
+ depends on HPET_TIMER && RTC=y
+
+config GENERIC_ISA_DMA
+ bool
+ default y
+
+config GENERIC_IOMAP
+ bool
+ default y
+
+#source "init/Kconfig"
+
+
+menu "Processor type and features"
+
+choice
+ prompt "Processor family"
+ default MK8
+
+#config MK8
+# bool "AMD-Opteron/Athlon64"
+# help
+# Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
+
+config MPSC
+ bool "Intel x86-64"
+ help
+ Optimize for Intel IA32 with 64bit extension CPUs
+ (Prescott/Nocona/Potomac)
+
+config GENERIC_CPU
+ bool "Generic-x86-64"
+ help
+ Generic x86-64 CPU.
+
+endchoice
+
+#
+# Define implied options from the CPU selection here
+#
+config X86_L1_CACHE_BYTES
+ int
+ default "128" if GENERIC_CPU || MPSC
+ default "64" if MK8
+
+config X86_L1_CACHE_SHIFT
+ int
+ default "7" if GENERIC_CPU || MPSC
+ default "6" if MK8
+
+config X86_TSC
+ bool
+ default n
+
+config X86_GOOD_APIC
+ bool
+ default y
+
+config X86_IO_APIC
+ bool
+ default XEN_PRIVILEGED_GUEST
+
+config X86_LOCAL_APIC
+ bool
+ default XEN_PRIVILEGED_GUEST
+
+config MICROCODE
+ tristate "/dev/cpu/microcode - Intel CPU microcode support"
+ ---help---
+ If you say Y here the 'File systems' section, you will be
+ able to update the microcode on Intel processors. You will
+ obviously need the actual microcode binary data itself which is
+ not shipped with the Linux kernel.
+
+ For latest news and information on obtaining all the required
+ ingredients for this driver, check:
+ <http://www.urbanmyth.org/microcode/>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called microcode.
+ If you use modprobe or kmod you may also want to add the line
+ 'alias char-major-10-184 microcode' to your /etc/modules.conf file.
+
+config X86_MSR
+ tristate "/dev/cpu/*/msr - Model-specific register support"
+ help
+ This device gives privileged processes access to the x86
+ Model-Specific Registers (MSRs). It is a character device with
+ major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+ MSR accesses are directed to a specific CPU on multi-processor
+ systems.
+
+config X86_CPUID
+ tristate "/dev/cpu/*/cpuid - CPU information support"
+ help
+ This device gives processes access to the x86 CPUID instruction to
+ be executed on a specific processor. It is a character device
+ with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
+ /dev/cpu/31/cpuid.
+
+# disable it for opteron optimized builds because it pulls in ACPI_BOOT
+config X86_HT
+ bool
+ depends on SMP && !MK8
+ default y
+
+config MATH_EMULATION
+ bool
+
+config MCA
+ bool
+
+config EISA
+ bool
+
+config MTRR
+ bool "MTRR (Memory Type Range Register) support"
+ ---help---
+ On Intel P6 family processors (Pentium Pro, Pentium II and later)
+ the Memory Type Range Registers (MTRRs) may be used to control
+ processor access to memory ranges. This is most useful if you have
+ a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+ allows bus write transfers to be combined into a larger transfer
+ before bursting over the PCI/AGP bus. This can increase performance
+ of image write operations 2.5 times or more. Saying Y here creates a
+ /proc/mtrr file which may be used to manipulate your processor's
+ MTRRs. Typically the X server should use this.
+
+ This code has a reasonably generic interface so that similar
+ control registers on other processors can be easily supported
+ as well.
+
+ Saying Y here also fixes a problem with buggy SMP BIOSes which only
+ set the MTRRs for the boot CPU and not for the secondary CPUs. This
+ can lead to all sorts of problems, so it's good to say Y here.
+
+ Just say Y here, all x86-64 machines support MTRRs.
+
+ See <file:Documentation/mtrr.txt> for more information.
+
+config SMP
+ bool "Symmetric multi-processing support"
+ ---help---
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, like most personal computers, say N. If
+ you have a system with more than one CPU, say Y.
+
+ If you say N here, the kernel will run on single and multiprocessor
+ machines, but will use only one CPU of a multiprocessor machine. If
+ you say Y here, the kernel will run on many, but not all,
+ singleprocessor machines. On a singleprocessor machine, the kernel
+ will run faster if you say N here.
+
+ If you don't know what to do here, say N.
+
+config PREEMPT
+ bool "Preemptible Kernel"
+ ---help---
+ This option reduces the latency of the kernel when reacting to
+ real-time or interactive events by allowing a low priority process to
+ be preempted even if it is in kernel mode executing a system call.
+ This allows applications to run more reliably even when the system is
+ under load. On contrary it may also break your drivers and add
+ priority inheritance problems to your system. Don't select it if
+ you rely on a stable system or have slightly obscure hardware.
+ It's also not very well tested on x86-64 currently.
+ You have been warned.
+
+ Say Y here if you are feeling brave and building a kernel for a
+ desktop, embedded or real-time system. Say N if you are unsure.
+
+config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+ default off
+ help
+ SMT scheduler support improves the CPU scheduler's decision making
+ when dealing with Intel Pentium 4 chips with HyperThreading at a
+ cost of slightly increased overhead in some places. If unsure say
+ N here.
+
+config K8_NUMA
+ bool "K8 NUMA support"
+ select NUMA
+ depends on SMP
+ help
+ Enable NUMA (Non Unified Memory Architecture) support for
+ AMD Opteron Multiprocessor systems. The kernel will try to allocate
+ memory used by a CPU on the local memory controller of the CPU
+ and add some more NUMA awareness to the kernel.
+ This code is recommended on all multiprocessor Opteron systems
+ and normally doesn't hurt on others.
+
+config NUMA_EMU
+ bool "NUMA emulation support"
+ select NUMA
+ depends on SMP
+ help
+ Enable NUMA emulation. A flat machine will be split
+ into virtual nodes when booted with "numa=fake=N", where N is the
+ number of nodes. This is only useful for debugging.
+
+config DISCONTIGMEM
+ bool
+ depends on NUMA
+ default y
+
+config NUMA
+ bool
+ default n
+
+config HAVE_DEC_LOCK
+ bool
+ depends on SMP
+ default y
+
+# actually 64 maximum, but you need to fix the APIC code first
+# to use clustered mode or whatever your big iron needs
+config NR_CPUS
+ int "Maximum number of CPUs (2-8)"
+ range 2 8
+ depends on SMP
+ default "8"
+ help
+ This allows you to specify the maximum number of CPUs which this
+ kernel will support. The maximum supported value is 32 and the
+ minimum value which makes sense is 2.
+
+ This is purely to save memory - each supported CPU requires
+ memory in the static kernel configuration.
+
+config GART_IOMMU
+ bool "IOMMU support"
+ depends on PCI
+ help
+ Support the K8 IOMMU. Needed to run systems with more than 4GB of memory
+ properly with 32-bit PCI devices that do not support DAC (Double Address
+ Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter.
+ Normally the kernel will take the right choice by itself.
+ If unsure, say Y.
+
+# need this always enabled with GART_IOMMU for the VIA workaround
+config SWIOTLB
+ bool
+ depends on GART_IOMMU
+ default y
+
+config DUMMY_IOMMU
+ bool
+ depends on !GART_IOMMU && !SWIOTLB
+ default y
+ help
+ Don't use IOMMU code. This will cause problems when you have more than 4GB
+ of memory and any 32-bit devices. Don't turn on unless you know what you
+ are doing.
+
+config X86_MCE
+ bool "Machine check support" if EMBEDDED
+ default n
+ help
+ Include a machine check error handler to report hardware errors.
+ This version will require the mcelog utility to decode some
+ machine check error logs. See
+ ftp://ftp.x86-64.org/pub/linux/tools/mcelog
+
+endmenu
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+ bool
+ default y
+
+config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+menu "Power management options"
+
+source kernel/power/Kconfig
+
+source "arch/x86_64/kernel/cpufreq/Kconfig"
+
+endmenu
+
+menu "Bus options (PCI etc.)"
+
+config PCI
+ bool "PCI support"
+
+# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
+config PCI_DIRECT
+ bool
+ depends on PCI
+ default y
+
+config PCI_MMCONFIG
+ bool "Support mmconfig PCI config space access"
+ depends on PCI
+ select ACPI_BOOT
+
+config UNORDERED_IO
+ bool "Unordered IO mapping access"
+ depends on EXPERIMENTAL
+ help
+ Use unordered stores to access IO memory mappings in device drivers.
+ Still very experimental. When a driver works on IA64/ppc64/pa-risc it should
+ work with this option, but it makes the drivers behave differently
+ from i386. Requires that the driver writer used memory barriers
+ properly.
+
+#source "drivers/pci/Kconfig"
+
+#source "drivers/pcmcia/Kconfig"
+
+#source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+
+menu "Executable file formats / Emulations"
+
+# source "fs/Kconfig.binfmt"
+
+config IA32_EMULATION
+ bool "IA32 Emulation"
+ help
+ Include code to run 32-bit programs under a 64-bit kernel. You should likely
+ turn this on, unless you're 100% sure that you don't have any 32-bit programs
+ left.
+
+config IA32_AOUT
+ bool "IA32 a.out support"
+ depends on IA32_EMULATION
+ help
+ Support old a.out binaries in the 32bit emulation.
+
+config COMPAT
+ bool
+ depends on IA32_EMULATION
+ default y
+
+config SYSVIPC_COMPAT
+ bool
+ depends on COMPAT && SYSVIPC
+ default y
+
+config UID16
+ bool
+ depends on IA32_EMULATION
+ default y
+
+endmenu
+
+# source drivers/Kconfig
+
+# source "drivers/firmware/Kconfig"
+
+# source fs/Kconfig
+
+#source "arch/x86_64/oprofile/Kconfig"
+
+#source "arch/x86_64/Kconfig.debug"
+
+# source "security/Kconfig"
+
+# source "crypto/Kconfig"
+
+# source "lib/Kconfig"
+
+endmenu
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile
new file mode 100644
index 0000000000..9f506b3203
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/Makefile
@@ -0,0 +1,92 @@
+#
+# x86_64/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713 Artur Skawina <skawina@geocities.com>
+# Added '-march' and '-mpreferred-stack-boundary' support
+# 20000913 Pavel Machek <pavel@suse.cz>
+# Converted for x86_64 architecture
+# 20010105 Andi Kleen, add IA32 compiler.
+# ....and later removed it again....
+# 20050205 Jun Nakajima <jun.nakajima@intel.com>
+# Modified for Xen
+#
+# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
+
+#
+# early bootup linking needs 32bit. You can either use real 32bit tools
+# here or 64bit tools in 32bit mode.
+#
+XENARCH := $(subst ",,$(CONFIG_XENARCH))
+
+IA32_CC := $(CC) $(CPPFLAGS) -m32 -O2 -fomit-frame-pointer
+IA32_LD := $(LD) -m elf_i386
+IA32_AS := $(CC) $(AFLAGS) -m32 -Wa,--32 -traditional -c
+IA32_OBJCOPY := $(CROSS_COMPILE)objcopy
+IA32_CPP := $(CROSS_COMPILE)gcc -m32 -E
+export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP
+
+
+LDFLAGS := -m elf_x86_64
+#LDFLAGS_vmlinux := -e stext
+
+CHECKFLAGS += -D__x86_64__ -m64
+
+cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
+cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+CFLAGS += $(cflags-y)
+
+CFLAGS += -mno-red-zone
+CFLAGS += -mcmodel=kernel
+CFLAGS += -pipe
+# this makes reading assembly source easier, but produces worse code
+# actually it makes the kernel smaller too.
+CFLAGS += -fno-reorder-blocks
+CFLAGS += -Wno-sign-compare
+ifneq ($(CONFIG_DEBUG_INFO),y)
+CFLAGS += -fno-asynchronous-unwind-tables
+# -fweb shrinks the kernel a bit, but the difference is very small
+# it also messes up debugging, so don't use it for now.
+#CFLAGS += $(call cc-option,-fweb)
+endif
+# -funit-at-a-time shrinks the kernel .text considerably
+# unfortunately it makes reading oopses harder.
+CFLAGS += $(call cc-option,-funit-at-a-time,)
+
+head-y := arch/xen/x86_64/kernel/head.o arch/xen/x86_64/kernel/head64.o arch/xen/x86_64/kernel/init_task.o
+
+libs-y += arch/x86_64/lib/
+core-y += arch/xen/x86_64/kernel/ arch/xen/x86_64/mm/
+core-$(CONFIG_IA32_EMULATION) += arch/xen/x86_64/ia32/
+drivers-$(CONFIG_PCI) += arch/xen/x86_64/pci/
+drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
+
+# for clean
+obj- += kernel/ mm/ pci/
+
+xenflags-y += -Iinclude/asm-xen/asm-x86_64/mach-xen
+
+CFLAGS += $(xenflags-y)
+AFLAGS += $(xenflags-y)
+
+prepare: include/asm-$(XENARCH)/asm_offset.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offset.h
+
+arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
+ include/linux/version.h include/config/MARKER
+
+
+include/asm-$(XENARCH)/asm_offset.h: arch/xen/x86_64/kernel/asm-offsets.s
+ $(call filechk,gen-asm-offsets)
+ ln -fsn asm_offset.h include/asm-$(XENARCH)/offset.h
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
new file mode 100644
index 0000000000..63c89d7f23
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/Makefile
@@ -0,0 +1,67 @@
+#
+# Makefile for the linux kernel.
+#
+XENARCH := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/kernel
+
+extra-y := head.o head64.o init_task.o
+
+obj-y := process.o signal.o entry.o traps.o \
+ ioport.o ldt.o setup.o \
+ x8664_ksyms.o vsyscall.o \
+ setup64.o e820.o irq.o early_printk.o
+c-obj-y := semaphore.o i387.o sys_x86_64.o \
+ ptrace.o quirks.o syscall.o bootflag.o
+
+i386-obj-y := time.o
+obj-y += ../../i386/kernel/timers/
+
+s-obj-y :=
+
+#obj-$(CONFIG_X86_MCE) += mce.o
+#obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
+obj-$(CONFIG_ACPI_BOOT) += acpi/
+obj-$(CONFIG_X86_MSR) += msr.o
+obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_X86_CPUID) += cpuid.o
+#obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
+c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
+c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o
+#obj-$(CONFIG_PM) += suspend.o
+#obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
+#obj-$(CONFIG_CPU_FREQ) += cpufreq/
+#obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+#obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
+c-obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
+#obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+
+c-obj-$(CONFIG_MODULES) += module.o
+
+#obj-y += topology.o
+c-obj-y += intel_cacheinfo.o
+
+bootflag-y += ../../../i386/kernel/bootflag.o
+cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o
+topology-y += ../../../i386/mach-default/topology.o
+swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o
+intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o
+quirks-y += ../../../i386/kernel/quirks.o
+
+c-link := init_task.o
+s-link := vsyscall.o
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
+ ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(i386-obj-y)):
+ ln -fsn $(srctree)/arch/xen/i386/kernel/$(notdir $@) $@
+
+obj-y += $(c-obj-y) $(s-obj-y) $(i386-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link) $(i386-obj-y))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile
new file mode 100644
index 0000000000..0824a570e6
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile
@@ -0,0 +1,16 @@
+i386-obj-$(CONFIG_ACPI_BOOT) := boot.o
+c-obj-$(CONFIG_X86_IO_APIC) := earlyquirk.o
+c-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(i386-obj-y)):
+ ln -fsn $(srctree)/arch/xen/i386/kernel/acpi/$(notdir $@) $@
+
+obj-y += $(c-obj-y) $(s-obj-y) $(i386-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c
new file mode 100644
index 0000000000..f383328018
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/apic.c
@@ -0,0 +1,200 @@
+/*
+ * Local APIC handling, local APIC timers
+ *
+ * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively.
+ * Maciej W. Rozycki : Various updates and fixes.
+ * Mikael Pettersson : Power Management for UP-APIC.
+ * Pavel Machek and
+ * Mikael Pettersson : PM converted to driver model.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+
+#include "io_ports.h"
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+int disable_apic;
+
+void smp_local_timer_interrupt(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ profile_tick(CPU_PROFILING, regs);
+#if 0
+ if (--per_cpu(prof_counter, cpu) <= 0) {
+ /*
+ * The multiplier may have changed since the last time we got
+ * to this point as a result of the user writing to
+ * /proc/profile. In this case we need to adjust the APIC
+ * timer accordingly.
+ *
+ * Interrupts are already masked off at this point.
+ */
+ per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
+ if (per_cpu(prof_counter, cpu) !=
+ per_cpu(prof_old_multiplier, cpu)) {
+ __setup_APIC_LVTT(calibration_result/
+ per_cpu(prof_counter, cpu));
+ per_cpu(prof_old_multiplier, cpu) =
+ per_cpu(prof_counter, cpu);
+ }
+
+#ifdef CONFIG_SMP
+ update_process_times(user_mode(regs));
+#endif
+ }
+#endif
+
+ /*
+ * We take the 'long' return path, and there every subsystem
+ * grabs the appropriate locks (kernel lock/ irq lock).
+ *
+ * we might want to decouple profiling from the 'long path',
+ * and do the profiling totally in assembly.
+ *
+ * Currently this isn't too much of an issue (performance wise),
+ * we can take more than 100K local irqs per second on a 100 MHz P5.
+ */
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ * interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+void smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+ add_pda(apic_timer_irqs, 1);
+
+ /*
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow.
+ */
+ ack_APIC_irq();
+ /*
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
+ */
+ irq_enter();
+ smp_local_timer_interrupt(regs);
+ irq_exit();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+ unsigned int v;
+ irq_enter();
+ /*
+ * Check if this really is a spurious interrupt and ACK it
+ * if it is a vectored one. Just in case...
+ * Spurious interrupts should not be ACKed.
+ */
+ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+ if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+ ack_APIC_irq();
+
+#if 0
+ static unsigned long last_warning;
+ static unsigned long skipped;
+
+ /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+ if (time_before(last_warning+30*HZ,jiffies)) {
+ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
+ smp_processor_id(), skipped);
+ last_warning = jiffies;
+ skipped = 0;
+ } else {
+ skipped++;
+ }
+#endif
+ irq_exit();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+ unsigned int v, v1;
+
+ irq_enter();
+ /* First tickle the hardware, only then report what went on. -- REW */
+ v = apic_read(APIC_ESR);
+ apic_write(APIC_ESR, 0);
+ v1 = apic_read(APIC_ESR);
+ ack_APIC_irq();
+ atomic_inc(&irq_err_count);
+
+ /* Here is what the APIC error bits mean:
+ 0: Send CS error
+ 1: Receive CS error
+ 2: Send accept error
+ 3: Receive accept error
+ 4: Reserved
+ 5: Send illegal vector
+ 6: Received illegal vector
+ 7: Illegal register address
+ */
+ printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+ smp_processor_id(), v , v1);
+ irq_exit();
+}
+
+int get_physical_broadcast(void)
+{
+ return 0xff;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ if (smp_found_config)
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
new file mode 100644
index 0000000000..b965d6d1eb
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c
@@ -0,0 +1,70 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <asm/pda.h>
+#include <asm/processor.h>
+#include <asm/segment.h>
+#include <asm/thread_info.h>
+#include <asm/ia32.h>
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+int main(void)
+{
+#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
+ ENTRY(state);
+ ENTRY(flags);
+ ENTRY(thread);
+ ENTRY(pid);
+ BLANK();
+#undef ENTRY
+#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
+ ENTRY(flags);
+ ENTRY(addr_limit);
+ ENTRY(preempt_count);
+ BLANK();
+#undef ENTRY
+#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
+ ENTRY(kernelstack);
+ ENTRY(oldrsp);
+ ENTRY(pcurrent);
+ ENTRY(irqrsp);
+ ENTRY(irqcount);
+ ENTRY(cpunumber);
+ ENTRY(irqstackptr);
+ ENTRY(kernel_mode);
+ BLANK();
+#undef ENTRY
+#ifdef CONFIG_IA32_EMULATION
+#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ ENTRY(eax);
+ ENTRY(ebx);
+ ENTRY(ecx);
+ ENTRY(edx);
+ ENTRY(esi);
+ ENTRY(edi);
+ ENTRY(ebp);
+ ENTRY(esp);
+ ENTRY(eip);
+ BLANK();
+#undef ENTRY
+ DEFINE(IA32_RT_SIGFRAME_sigcontext,
+ offsetof (struct rt_sigframe32, uc.uc_mcontext));
+ BLANK();
+#endif
+ DEFINE(SIZEOF_PBE, sizeof(struct pbe));
+ DEFINE(pbe_address, offsetof(struct pbe, address));
+ DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/e820.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/e820.c
new file mode 100644
index 0000000000..7c56a9f53c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/e820.c
@@ -0,0 +1,533 @@
+/*
+ * Handle the memory map.
+ * The functions here do the job until bootmem takes over.
+ * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/proto.h>
+#include <asm/bootsetup.h>
+
+/*
+ * PFN of last memory page.
+ */
+unsigned long end_pfn;
+unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
+unsigned long end_pfn_map;
+
+/*
+ * Add a memory region to the kernel e820 map.
+ */
+void __init add_memory_region(unsigned long start, unsigned long size, int type)
+{
+ int x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+}
+
+#if 0
+extern char _end[];
+
+/*
+ * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
+ * The direct mapping extends to end_pfn_map, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+
+/*
+ * Last pfn which the user wants to use.
+ */
+
+extern struct resource code_resource, data_resource;
+
+/* Check for some hardcoded bad areas that early boot is not allowed to touch */
+static inline int bad_addr(unsigned long *addrp, unsigned long size)
+{
+ unsigned long addr = *addrp, last = addr + size;
+
+ /* various gunk below that needed for SMP startup */
+ if (addr < 0x8000) {
+ *addrp = 0x8000;
+ return 1;
+ }
+
+ /* direct mapping tables of the kernel */
+ if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
+ *addrp = table_end << PAGE_SHIFT;
+ return 1;
+ }
+
+ /* initrd */
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
+ addr < INITRD_START+INITRD_SIZE) {
+ *addrp = INITRD_START + INITRD_SIZE;
+ return 1;
+ }
+#endif
+ /* kernel code + 640k memory hole (later should not be needed, but
+ be paranoid for now) */
+ if (last >= 640*1024 && addr < __pa_symbol(&_end)) {
+ *addrp = __pa_symbol(&_end);
+ return 1;
+ }
+ /* XXX ramdisk image here? */
+ return 0;
+}
+
+int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ if (ei->addr >= end || ei->addr + ei->size < start)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Find a free area in a specific range.
+ */
+unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ unsigned long addr = ei->addr, last;
+ if (ei->type != E820_RAM)
+ continue;
+ if (addr < start)
+ addr = start;
+ if (addr > ei->addr + ei->size)
+ continue;
+ while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size)
+ ;
+ last = addr + size;
+ if (last > ei->addr + ei->size)
+ continue;
+ if (last > end)
+ continue;
+ return addr;
+ }
+ return -1UL;
+}
+
+/*
+ * Free bootmem based on the e820 table for a node.
+ */
+void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ unsigned long last, addr;
+
+ if (ei->type != E820_RAM ||
+ ei->addr+ei->size <= start ||
+ ei->addr > end)
+ continue;
+
+ addr = round_up(ei->addr, PAGE_SIZE);
+ if (addr < start)
+ addr = start;
+
+ last = round_down(ei->addr + ei->size, PAGE_SIZE);
+ if (last >= end)
+ last = end;
+
+ if (last > addr && last-addr >= PAGE_SIZE)
+ free_bootmem_node(pgdat, addr, last-addr);
+ }
+}
+
+/*
+ * Find the highest page frame number we have available
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+ int i;
+ unsigned long end_pfn = 0;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ unsigned long start, end;
+
+ start = round_up(ei->addr, PAGE_SIZE);
+ end = round_down(ei->addr + ei->size, PAGE_SIZE);
+ if (start >= end)
+ continue;
+ if (ei->type == E820_RAM) {
+ if (end > end_pfn<<PAGE_SHIFT)
+ end_pfn = end>>PAGE_SHIFT;
+ } else {
+ if (end > end_pfn_map<<PAGE_SHIFT)
+ end_pfn_map = end>>PAGE_SHIFT;
+ }
+ }
+
+ if (end_pfn > end_pfn_map)
+ end_pfn_map = end_pfn;
+ if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
+ end_pfn_map = MAXMEM>>PAGE_SHIFT;
+ if (end_pfn > end_user_pfn)
+ end_pfn = end_user_pfn;
+ if (end_pfn > end_pfn_map)
+ end_pfn = end_pfn_map;
+
+ return end_pfn;
+}
+
+/*
+ * Mark e820 reserved areas as busy for the resource manager.
+ */
+void __init e820_reserve_resources(void)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct resource *res;
+ if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+ continue;
+ res = alloc_bootmem_low(sizeof(struct resource));
+ switch (e820.map[i].type) {
+ case E820_RAM: res->name = "System RAM"; break;
+ case E820_ACPI: res->name = "ACPI Tables"; break;
+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
+ default: res->name = "reserved";
+ }
+ res->start = e820.map[i].addr;
+ res->end = res->start + e820.map[i].size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ request_resource(&iomem_resource, res);
+ if (e820.map[i].type == E820_RAM) {
+ /*
+ * We don't know which RAM region contains kernel data,
+ * so we try it repeatedly and let the resource manager
+ * test it.
+ */
+ request_resource(res, &code_resource);
+ request_resource(res, &data_resource);
+ }
+ }
+}
+
+
+void __init e820_print_map(char *who)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ printk(" %s: %016Lx - %016Lx ", who,
+ (unsigned long long) e820.map[i].addr,
+ (unsigned long long) (e820.map[i].addr + e820.map[i].size));
+ switch (e820.map[i].type) {
+ case E820_RAM: printk("(usable)\n");
+ break;
+ case E820_RESERVED:
+ printk("(reserved)\n");
+ break;
+ case E820_ACPI:
+ printk("(ACPI data)\n");
+ break;
+ case E820_NVS:
+ printk("(ACPI NVS)\n");
+ break;
+ default: printk("type %u\n", e820.map[i].type);
+ break;
+ }
+ }
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries. The following
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+{
+ struct change_member {
+ struct e820entry *pbios; /* pointer to original bios entry */
+ unsigned long long addr; /* address for this change point */
+ };
+ static struct change_member change_point_list[2*E820MAX] __initdata;
+ static struct change_member *change_point[2*E820MAX] __initdata;
+ static struct e820entry *overlap_list[E820MAX] __initdata;
+ static struct e820entry new_bios[E820MAX] __initdata;
+ struct change_member *change_tmp;
+ unsigned long current_type, last_type;
+ unsigned long long last_addr;
+ int chgidx, still_changing;
+ int overlap_entries;
+ int new_bios_entry;
+ int old_nr, new_nr;
+ int i;
+
+ /*
+ Visually we're performing the following (1,2,3,4 = memory types)...
+
+ Sample memory map (w/overlaps):
+ ____22__________________
+ ______________________4_
+ ____1111________________
+ _44_____________________
+ 11111111________________
+ ____________________33__
+ ___________44___________
+ __________33333_________
+ ______________22________
+ ___________________2222_
+ _________111111111______
+ _____________________11_
+ _________________4______
+
+ Sanitized equivalent (no overlap):
+ 1_______________________
+ _44_____________________
+ ___1____________________
+ ____22__________________
+ ______11________________
+ _________1______________
+ __________3_____________
+ ___________44___________
+ _____________33_________
+ _______________2________
+ ________________1_______
+ _________________4______
+ ___________________2____
+ ____________________33__
+ ______________________4_
+ */
+
+ /* if there's only one memory region, don't bother */
+ if (*pnr_map < 2)
+ return -1;
+
+ old_nr = *pnr_map;
+
+ /* bail out if we find any unreasonable addresses in bios map */
+ for (i=0; i<old_nr; i++)
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+ return -1;
+
+ /* create pointers for initial change-point information (for sorting) */
+ for (i=0; i < 2*old_nr; i++)
+ change_point[i] = &change_point_list[i];
+
+ /* record all known change-points (starting and ending addresses) */
+ chgidx = 0;
+ for (i=0; i < old_nr; i++) {
+ change_point[chgidx]->addr = biosmap[i].addr;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ }
+
+ /* sort change-point list by memory addresses (low -> high) */
+ still_changing = 1;
+ while (still_changing) {
+ still_changing = 0;
+ for (i=1; i < 2*old_nr; i++) {
+ /* if <current_addr> > <last_addr>, swap */
+ /* or, if current=<start_addr> & last=<end_addr>, swap */
+ if ((change_point[i]->addr < change_point[i-1]->addr) ||
+ ((change_point[i]->addr == change_point[i-1]->addr) &&
+ (change_point[i]->addr == change_point[i]->pbios->addr) &&
+ (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+ )
+ {
+ change_tmp = change_point[i];
+ change_point[i] = change_point[i-1];
+ change_point[i-1] = change_tmp;
+ still_changing=1;
+ }
+ }
+ }
+
+ /* create a new bios memory map, removing overlaps */
+ overlap_entries=0; /* number of entries in the overlap table */
+ new_bios_entry=0; /* index for creating new bios map entries */
+ last_type = 0; /* start with undefined memory type */
+ last_addr = 0; /* start with 0 as last starting address */
+ /* loop through change-points, determining affect on the new bios map */
+ for (chgidx=0; chgidx < 2*old_nr; chgidx++)
+ {
+ /* keep track of all overlapping bios entries */
+ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+ {
+ /* add map entry to overlap list (> 1 entry implies an overlap) */
+ overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+ }
+ else
+ {
+ /* remove entry from list (order independent, so swap with last) */
+ for (i=0; i<overlap_entries; i++)
+ {
+ if (overlap_list[i] == change_point[chgidx]->pbios)
+ overlap_list[i] = overlap_list[overlap_entries-1];
+ }
+ overlap_entries--;
+ }
+ /* if there are overlapping entries, decide which "type" to use */
+ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+ current_type = 0;
+ for (i=0; i<overlap_entries; i++)
+ if (overlap_list[i]->type > current_type)
+ current_type = overlap_list[i]->type;
+ /* continue building up new bios map based on this information */
+ if (current_type != last_type) {
+ if (last_type != 0) {
+ new_bios[new_bios_entry].size =
+ change_point[chgidx]->addr - last_addr;
+ /* move forward only if the new size was non-zero */
+ if (new_bios[new_bios_entry].size != 0)
+ if (++new_bios_entry >= E820MAX)
+ break; /* no more space left for new bios entries */
+ }
+ if (current_type != 0) {
+ new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+ new_bios[new_bios_entry].type = current_type;
+ last_addr=change_point[chgidx]->addr;
+ }
+ last_type = current_type;
+ }
+ }
+ new_nr = new_bios_entry; /* retain count for new bios entries */
+
+ /* copy new bios mapping into original location */
+ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+ *pnr_map = new_nr;
+
+ return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory. If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up. (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+{
+ /* Only one memory region (or negative)? Ignore it */
+ if (nr_map < 2)
+ return -1;
+
+ do {
+ unsigned long start = biosmap->addr;
+ unsigned long size = biosmap->size;
+ unsigned long end = start + size;
+ unsigned long type = biosmap->type;
+
+ /* Overflow in 64 bits? Ignore the memory map. */
+ if (start > end)
+ return -1;
+
+ /*
+ * Some BIOSes claim RAM in the 640k - 1M region.
+ * Not right. Fix it up.
+ *
+ * This should be removed on Hammer which is supposed to not
+ * have non e820 covered ISA mappings there, but I had some strange
+ * problems so it stays for now. -AK
+ */
+ if (type == E820_RAM) {
+ if (start < 0x100000ULL && end > 0xA0000ULL) {
+ if (start < 0xA0000ULL)
+ add_memory_region(start, 0xA0000ULL-start, type);
+ if (end <= 0x100000ULL)
+ continue;
+ start = 0x100000ULL;
+ size = end - start;
+ }
+ }
+
+ add_memory_region(start, size, type);
+ } while (biosmap++,--nr_map);
+ return 0;
+}
+
+void __init setup_memory_region(void)
+{
+ char *who = "BIOS-e820";
+
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+ * Otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+ unsigned long mem_size;
+
+ /* compare results from other methods and take the greater */
+ if (ALT_MEM_K < EXT_MEM_K) {
+ mem_size = EXT_MEM_K;
+ who = "BIOS-88";
+ } else {
+ mem_size = ALT_MEM_K;
+ who = "BIOS-e801";
+ }
+
+ e820.nr_map = 0;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ e820_print_map(who);
+}
+#endif
+
+extern unsigned long xen_override_max_pfn;
+
+void __init parse_memopt(char *p, char **from)
+{
+ /*
+ * mem=XXX[kKmM] limits kernel memory to XXX+1MB
+ *
+ * It would be more logical to count from 0 instead of from
+ * HIGH_MEMORY, but we keep that for now for i386 compatibility.
+ *
+ * No support for custom mapping like i386. The reason is
+ * that we need to read the e820 map anyways to handle the
+ * ACPI mappings in the direct map. Also on x86-64 there
+ * should be always a good e820 map. This is only an upper
+ * limit, you cannot force usage of memory not in e820.
+ *
+ * -AK
+ */
+ end_user_pfn = memparse(p, from) + HIGH_MEMORY;
+ end_user_pfn >>= PAGE_SHIFT;
+ xen_override_max_pfn = (unsigned long) end_user_pfn;
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
new file mode 100644
index 0000000000..3f28d0e04e
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
@@ -0,0 +1,1119 @@
+/*
+ * linux/arch/x86_64/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ *
+ * $Id$
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Modified for Xen
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after an interrupt and after each system call.
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
+ * only done for syscall tracing, signals or fork/exec et.al.
+ *
+ * A note on terminology:
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
+ * - partial stack frame: partially saved registers upto R11.
+ * - full stack frame: Like partial stack frame, but all register saved.
+ *
+ * TODO:
+ * - schedule it carefully for the final hardware.
+ */
+
+#define ASSEMBLY 1
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/dwarf2.h>
+#include <asm/calling.h>
+#include <asm/asm_offset.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/hw_irq.h>
+#include <asm/errno.h>
+#include <asm-xen/xen-public/arch-x86_64.h>
+
+#include "irq_vectors.h"
+
+EVENT_MASK = (CS+4)
+VGCF_IN_SYSCALL = (1<<8)
+
+/*
+ * Copied from arch/xen/i386/kernel/entry.S
+ */
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending 0
+#define evtchn_upcall_mask 1
+
+#define sizeof_vcpu_shift 3
+
+#ifdef CONFIG_SMP
+#define XEN_GET_VCPU_INFO(reg)
+#define preempt_disable(reg) incl TI_preempt_count(reg)
+#define preempt_enable(reg) decl TI_preempt_count(reg)
+#define XEN_LOCK_VCPU_INFO_SMP(reg) preempt_disable(%rbp) ; \
+ movl TI_cpu(%rbp),reg ; \
+ shl $sizeof_vcpu_shift,reg ; \
+ addl HYPERVISOR_shared_info,reg
+#define XEN_UNLOCK_VCPU_INFO_SMP(reg) preempt_enable(%rbp)
+#define XEN_UNLOCK_VCPU_INFO_SMP_fixup .byte 0xff,0xff,0xff
+#define Ux00 0xff
+#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
+ XEN_LOCKED_BLOCK_EVENTS(reg) ; \
+ XEN_UNLOCK_VCPU_INFO_SMP(reg)
+#define XEN_UNBLOCK_EVENTS(reg) XEN_LOCK_VCPU_INFO_SMP(reg) ; \
+ movb $0,evtchn_upcall_mask(reg) ; \
+ XEN_UNLOCK_VCPU_INFO_SMP(reg)
+#define XEN_SAVE_UPCALL_MASK(reg,tmp,off) GET_THREAD_INFO(%ebp) ; \
+ XEN_LOCK_VCPU_INFO_SMP(reg) ; \
+ movb evtchn_upcall_mask(reg), tmp ; \
+ movb tmp, off(%rsp) ; \
+ XEN_UNLOCK_VCPU_INFO_SMP(reg)
+#else
+#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
+#define XEN_LOCK_VCPU_INFO_SMP(reg) movq HYPERVISOR_shared_info,reg
+#define XEN_UNLOCK_VCPU_INFO_SMP(reg)
+#define XEN_UNLOCK_VCPU_INFO_SMP_fixup
+#define Ux00 0x00
+#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg) XEN_LOCKED_BLOCK_EVENTS(reg)
+#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_SAVE_UPCALL_MASK(reg,tmp,off) \
+ movb evtchn_upcall_mask(reg), tmp; \
+ movb tmp, off(%rsp)
+#endif
+
+#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
+
+ .code64
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop XEN_BLOCK_EVENTS(%rsi)
+#else
+#define preempt_stop
+#define retint_kernel retint_restore_args
+#endif
+
+/*
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
+ * fast path FIXUP_TOP_OF_STACK is needed.
+ * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
+ * manipulation.
+ */
+
+ /* %rsp:at FRAMEEND */
+ .macro FIXUP_TOP_OF_STACK tmp
+ movq $__USER_CS,CS(%rsp)
+ movq $-1,RCX(%rsp)
+ .endm
+
+ .macro RESTORE_TOP_OF_STACK tmp,offset=0
+ .endm
+
+ .macro FAKE_STACK_FRAME child_rip
+ /* push in order ss, rsp, eflags, cs, rip */
+ xorq %rax, %rax
+ pushq %rax /* ss */
+ CFI_ADJUST_CFA_OFFSET 8
+ pushq %rax /* rsp */
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_OFFSET rip,0
+ pushq $(1<<9) /* eflags - interrupts on */
+ CFI_ADJUST_CFA_OFFSET 8
+ pushq $__KERNEL_CS /* cs */
+ CFI_ADJUST_CFA_OFFSET 8
+ pushq \child_rip /* rip */
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_OFFSET rip,0
+ pushq %rax /* orig rax */
+ CFI_ADJUST_CFA_OFFSET 8
+ .endm
+
+ .macro UNFAKE_STACK_FRAME
+ addq $8*6, %rsp
+ CFI_ADJUST_CFA_OFFSET -(6*8)
+ .endm
+
+ .macro CFI_DEFAULT_STACK
+ CFI_ADJUST_CFA_OFFSET (SS)
+ CFI_OFFSET r15,R15-SS
+ CFI_OFFSET r14,R14-SS
+ CFI_OFFSET r13,R13-SS
+ CFI_OFFSET r12,R12-SS
+ CFI_OFFSET rbp,RBP-SS
+ CFI_OFFSET rbx,RBX-SS
+ CFI_OFFSET r11,R11-SS
+ CFI_OFFSET r10,R10-SS
+ CFI_OFFSET r9,R9-SS
+ CFI_OFFSET r8,R8-SS
+ CFI_OFFSET rax,RAX-SS
+ CFI_OFFSET rcx,RCX-SS
+ CFI_OFFSET rdx,RDX-SS
+ CFI_OFFSET rsi,RSI-SS
+ CFI_OFFSET rdi,RDI-SS
+ CFI_OFFSET rsp,RSP-SS
+ CFI_OFFSET rip,RIP-SS
+ .endm
+
+ /*
+ * Must be consistent with the definition in arch_x86_64.h:
+ * struct switch_to_user {
+ * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ * } PACKED;
+ * #define VGCF_IN_SYSCALL (1<<8)
+ */
+ .macro SWITCH_TO_USER flag
+ movl $0,%gs:pda_kernel_mode # change to user mode
+ subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in the stack
+ movq %rax,(%rsp)
+ movq %r11,1*8(%rsp)
+ movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
+ movq $\flag,3*8(%rsp)
+ movq $__USER_CS,5*8(%rsp)
+ movq $__USER_DS,8*8(%rsp)
+ movq $__HYPERVISOR_switch_to_user,%rax
+ syscall
+ .endm
+
+ .macro SWITCH_TO_KERNEL ssoff,adjust=0
+ btsq $0,%gs:pda_kernel_mode
+ jc 1f
+ orb $1,\ssoff-\adjust+4(%rsp)
+1:
+ .endm
+
+/*
+ * A newly forked process directly context switches into this.
+ */
+/* rdi: prev */
+ENTRY(ret_from_fork)
+ CFI_STARTPROC
+ CFI_DEFAULT_STACK
+ call schedule_tail
+ GET_THREAD_INFO(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ jnz rff_trace
+rff_action:
+ RESTORE_REST
+ testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
+ je int_ret_from_sys_call
+ testl $_TIF_IA32,threadinfo_flags(%rcx)
+ jnz int_ret_from_sys_call
+ RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+ jmp ret_from_sys_call
+rff_trace:
+ movq %rsp,%rdi
+ call syscall_trace_leave
+ GET_THREAD_INFO(%rcx)
+ jmp rff_action
+ CFI_ENDPROC
+
+/*
+ * System call entry. Upto 6 arguments in registers are supported.
+ *
+ * SYSCALL does not save anything on the stack and does not change the
+ * stack pointer.
+ */
+
+/*
+ * Register setup:
+ * rax system call number
+ * rdi arg0
+ * rcx return address for syscall/sysret, C arg3
+ * rsi arg1
+ * rdx arg2
+ * r10 arg3 (--> moved to rcx for C)
+ * r8 arg4
+ * r9 arg5
+ * r11 eflags for syscall/sysret, temporary for C
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
+ * Interrupts are off on entry.
+ * Only called from user space.
+ *
+ * XXX if we had a free scratch register we could save the RSP into the stack frame
+ * and report it properly in ps. Unfortunately we haven't.
+ */
+
+ENTRY(system_call)
+ CFI_STARTPROC
+ SAVE_ARGS -8,0
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ XEN_GET_VCPU_INFO(%r11)
+ XEN_SAVE_UPCALL_MASK(%r11,%cl,EVENT_MASK-ARGOFFSET) # saved %rcx
+ XEN_UNBLOCK_EVENTS(%r11)
+ GET_THREAD_INFO(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
+ jnz tracesys
+ cmpq $__NR_syscall_max,%rax
+ ja badsys
+ movq %r10,%rcx
+ call *sys_call_table(,%rax,8) # XXX: rip relative
+ movq %rax,RAX-ARGOFFSET(%rsp)
+/*
+ * Syscall return path ending with SYSRET (fast path)
+ * Has incomplete stack frame and undefined top of stack.
+ */
+ .globl ret_from_sys_call
+ret_from_sys_call:
+ movl $_TIF_WORK_MASK,%edi
+ /* edi: flagmask */
+sysret_check:
+ GET_THREAD_INFO(%rcx)
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ movl threadinfo_flags(%rcx),%edx
+ andl %edi,%edx
+ jnz sysret_careful
+ XEN_UNBLOCK_EVENTS(%rsi)
+ RESTORE_ARGS 0,8,0
+ SWITCH_TO_USER VGCF_IN_SYSCALL
+
+ /* Handle reschedules */
+ /* edx: work, edi: workmask */
+sysret_careful:
+ bt $TIF_NEED_RESCHED,%edx
+ jnc sysret_signal
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ pushq %rdi
+ call schedule
+ popq %rdi
+ jmp sysret_check
+
+ /* Handle a signal */
+sysret_signal:
+/* sti */
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+ testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ jz 1f
+
+ /* Really a signal */
+ /* edx: work flags (arg3) */
+ leaq do_notify_resume(%rip),%rax
+ leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
+ xorl %esi,%esi # oldset -> arg2
+ call ptregscall_common
+1: movl $_TIF_NEED_RESCHED,%edi
+ jmp sysret_check
+
+ /* Do syscall tracing */
+tracesys:
+ SAVE_REST
+ movq $-ENOSYS,RAX(%rsp)
+ FIXUP_TOP_OF_STACK %rdi
+ movq %rsp,%rdi
+ call syscall_trace_enter
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ cmpq $__NR_syscall_max,%rax
+ ja 1f
+ movq %r10,%rcx /* fixup for C */
+ call *sys_call_table(,%rax,8)
+ movq %rax,RAX-ARGOFFSET(%rsp)
+1: SAVE_REST
+ movq %rsp,%rdi
+ call syscall_trace_leave
+ RESTORE_TOP_OF_STACK %rbx
+ RESTORE_REST
+ jmp ret_from_sys_call
+
+badsys:
+ movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+ jmp ret_from_sys_call
+
+/*
+ * Syscall return path ending with IRET.
+ * Has correct top of stack, but partial stack frame.
+ */
+ENTRY(int_ret_from_sys_call)
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ testb $3,CS-ARGOFFSET(%rsp)
+ jnz 1f
+ /* Need to set the proper %ss (not NULL) for ring 3 iretq */
+ movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
+ jmp retint_restore_args # retrun from ring3 kernel
+1:
+ movl $_TIF_ALLWORK_MASK,%edi
+ /* edi: mask to check */
+int_with_check:
+ GET_THREAD_INFO(%rcx)
+ movl threadinfo_flags(%rcx),%edx
+ andl %edi,%edx
+ jnz int_careful
+ jmp retint_restore_args
+
+ /* Either reschedule or signal or syscall exit tracking needed. */
+ /* First do a reschedule test. */
+ /* edx: work, edi: workmask */
+int_careful:
+ bt $TIF_NEED_RESCHED,%edx
+ jnc int_very_careful
+/* sti */
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+ pushq %rdi
+ call schedule
+ popq %rdi
+ jmp int_with_check
+
+ /* handle signals and tracing -- both require a full stack frame */
+int_very_careful:
+/* sti */
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+ SAVE_REST
+ /* Check for syscall exit trace */
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
+ jz int_signal
+ pushq %rdi
+ leaq 8(%rsp),%rdi # &ptregs -> arg1
+ call syscall_trace_leave
+ popq %rdi
+ btr $TIF_SYSCALL_TRACE,%edi
+ btr $TIF_SYSCALL_AUDIT,%edi
+ btr $TIF_SINGLESTEP,%edi
+ jmp int_restore_rest
+
+int_signal:
+ testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+ jz 1f
+ movq %rsp,%rdi # &ptregs -> arg1
+ xorl %esi,%esi # oldset -> arg2
+ call do_notify_resume
+1: movl $_TIF_NEED_RESCHED,%edi
+int_restore_rest:
+ RESTORE_REST
+ jmp int_with_check
+ CFI_ENDPROC
+
+/*
+ * Certain special system calls that need to save a complete full stack frame.
+ */
+
+ .macro PTREGSCALL label,func,arg
+ .globl \label
+\label:
+ leaq \func(%rip),%rax
+ leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
+ jmp ptregscall_common
+ .endm
+
+ PTREGSCALL stub_clone, sys_clone, %r8
+ PTREGSCALL stub_fork, sys_fork, %rdi
+ PTREGSCALL stub_vfork, sys_vfork, %rdi
+ PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
+ PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
+ PTREGSCALL stub_iopl, sys_iopl, %rsi
+
+ENTRY(ptregscall_common)
+ CFI_STARTPROC
+ popq %r11
+ CFI_ADJUST_CFA_OFFSET -8
+ SAVE_REST
+ movq %r11, %r15
+ FIXUP_TOP_OF_STACK %r11
+ call *%rax
+ RESTORE_TOP_OF_STACK %r11
+ movq %r15, %r11
+ RESTORE_REST
+ pushq %r11
+ CFI_ADJUST_CFA_OFFSET 8
+ ret
+ CFI_ENDPROC
+
+ENTRY(stub_execve)
+ CFI_STARTPROC
+ popq %r11
+ CFI_ADJUST_CFA_OFFSET -8
+ SAVE_REST
+ movq %r11, %r15
+ FIXUP_TOP_OF_STACK %r11
+ call sys_execve
+ GET_THREAD_INFO(%rcx)
+ bt $TIF_IA32,threadinfo_flags(%rcx)
+ jc exec_32bit
+ RESTORE_TOP_OF_STACK %r11
+ movq %r15, %r11
+ RESTORE_REST
+ push %r11
+ ret
+
+exec_32bit:
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rax,RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+ CFI_ENDPROC
+
+/*
+ * sigreturn is special because it needs to restore all registers on return.
+ * This cannot be done with SYSRET, so use the IRET return path instead.
+ */
+ENTRY(stub_rt_sigreturn)
+ CFI_STARTPROC
+ addq $8, %rsp
+ SAVE_REST
+ movq %rsp,%rdi
+ FIXUP_TOP_OF_STACK %r11
+ call sys_rt_sigreturn
+ movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+ CFI_ENDPROC
+
+
+/*
+ * Interrupt entry/exit.
+ *
+ * Interrupt entry points save only callee clobbered registers in fast path.
+ *
+ * Entry runs with interrupts off.
+ */
+
+/* 0(%rsp): interrupt number */
+ .macro interrupt func
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,(SS-RDI)
+ CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
+ CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
+ cld
+#ifdef CONFIG_DEBUG_INFO
+ SAVE_ALL
+ movq %rsp,%rdi
+ /*
+ * Setup a stack frame pointer. This allows gdb to trace
+ * back to the original stack.
+ */
+ movq %rsp,%rbp
+ CFI_DEF_CFA_REGISTER rbp
+#else
+ SAVE_ARGS
+ leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
+#endif
+#if 0 /* For Xen we don't need to do this */
+ testl $3,CS(%rdi)
+ je 1f
+ swapgs
+#endif
+1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
+ movq %gs:pda_irqstackptr,%rax
+ cmoveq %rax,%rsp
+ pushq %rdi # save old stack
+ call \func
+ .endm
+
+retint_check:
+ movl threadinfo_flags(%rcx),%edx
+ andl %edi,%edx
+ jnz retint_careful
+retint_restore_args:
+ movb EVENT_MASK-REST_SKIP(%rsp), %al
+ notb %al # %al == ~saved_mask
+ XEN_LOCK_VCPU_INFO_SMP(%rsi)
+ andb evtchn_upcall_mask(%rsi),%al
+ andb $1,%al # %al == mask & ~saved_mask
+ jnz restore_all_enable_events # != 0 => reenable event delivery
+ XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
+
+ RESTORE_ARGS 0,8,0
+ testb $3,8(%rsp) # check CS
+ jnz user_mode
+kernel_mode:
+ orb $3,1*8(%rsp)
+ iretq
+user_mode:
+ SWITCH_TO_USER 0
+
+ /* edi: workmask, edx: work */
+retint_careful:
+ bt $TIF_NEED_RESCHED,%edx
+ jnc retint_signal
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+/* sti */
+ pushq %rdi
+ call schedule
+ popq %rdi
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ GET_THREAD_INFO(%rcx)
+/* cli */
+ jmp retint_check
+
+retint_signal:
+ testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ jz retint_restore_args
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+ SAVE_REST
+ movq $-1,ORIG_RAX(%rsp)
+ xorq %rsi,%rsi # oldset
+ movq %rsp,%rdi # &pt_regs
+ call do_notify_resume
+ RESTORE_REST
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ movl $_TIF_NEED_RESCHED,%edi
+ GET_THREAD_INFO(%rcx)
+ jmp retint_check
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption */
+ /* rcx: threadinfo. interrupts off. */
+ .p2align
+retint_kernel:
+ cmpl $0,threadinfo_preempt_count(%rcx)
+ jnz retint_restore_args
+ bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+ jnc retint_restore_args
+ bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
+ jc retint_restore_args
+ movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
+/* sti */
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_UNBLOCK_EVENTS(%rsi)
+ call schedule
+ XEN_GET_VCPU_INFO(%rsi) /* %esi can be different */
+ XEN_BLOCK_EVENTS(%rsi)
+/* cli */
+ GET_THREAD_INFO(%rcx)
+ movl $0,threadinfo_preempt_count(%rcx)
+ jmp retint_kernel /* check again */
+#endif
+ CFI_ENDPROC
+
+/*
+ * APIC interrupts.
+ */
+ .macro apicinterrupt num,func
+ pushq $\num-256
+ interrupt \func
+ jmp error_entry
+ CFI_ENDPROC
+ .endm
+
+#ifdef CONFIG_SMP
+ENTRY(reschedule_interrupt)
+ apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
+
+ENTRY(invalidate_interrupt)
+ apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
+
+ENTRY(call_function_interrupt)
+ apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ENTRY(apic_timer_interrupt)
+ apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+
+ENTRY(error_interrupt)
+ apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
+
+ENTRY(spurious_interrupt)
+ apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+#endif
+
+/*
+ * Exception entry points.
+ */
+ .macro zeroentry sym
+ movq (%rsp),%rcx
+ movq 8(%rsp),%r11
+ addq $0x10,%rsp /* skip rcx and r11 */
+ pushq $0 /* push error code/oldrax */
+ pushq %rax /* push real oldrax to the rdi slot */
+ leaq \sym(%rip),%rax
+ jmp error_entry
+ .endm
+
+ .macro errorentry sym
+ movq (%rsp),%rcx
+ movq 8(%rsp),%r11
+ addq $0x10,%rsp /* rsp points to the error code */
+ pushq %rax
+ leaq \sym(%rip),%rax
+ jmp error_entry
+ .endm
+
+ /* error code is on the stack already */
+ /* handle NMI like exceptions that can happen everywhere */
+ .macro paranoidentry sym
+ movq (%rsp),%rcx
+ movq 8(%rsp),%r11
+ addq $0x10,%rsp /* skip rcx and r11 */
+ SAVE_ALL
+ cld
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f
+/* swapgs */
+ xorl %ebx,%ebx
+1: movq %rsp,%rdi
+ movq ORIG_RAX(%rsp),%rsi
+ movq $-1,ORIG_RAX(%rsp)
+ call \sym
+ .endm
+
+/*
+ * Exception entry point. This expects an error code/orig_rax on the stack
+ * and the exception handler in %rax.
+ */
+ENTRY(error_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA rsp,(SS-RDI)
+ CFI_REL_OFFSET rsp,(RSP-RDI)
+ CFI_REL_OFFSET rip,(RIP-RDI)
+ /* rdi slot contains rax, oldrax contains error code */
+ cld
+ subq $14*8,%rsp
+ CFI_ADJUST_CFA_OFFSET (14*8)
+ movq %rsi,13*8(%rsp)
+ CFI_REL_OFFSET rsi,RSI
+ movq 14*8(%rsp),%rsi /* load rax from rdi slot */
+ movq %rdx,12*8(%rsp)
+ CFI_REL_OFFSET rdx,RDX
+ movq %rcx,11*8(%rsp)
+ CFI_REL_OFFSET rcx,RCX
+ movq %rsi,10*8(%rsp) /* store rax */
+ CFI_REL_OFFSET rax,RAX
+ movq %r8, 9*8(%rsp)
+ CFI_REL_OFFSET r8,R8
+ movq %r9, 8*8(%rsp)
+ CFI_REL_OFFSET r9,R9
+ movq %r10,7*8(%rsp)
+ CFI_REL_OFFSET r10,R10
+ movq %r11,6*8(%rsp)
+ CFI_REL_OFFSET r11,R11
+ movq %rbx,5*8(%rsp)
+ CFI_REL_OFFSET rbx,RBX
+ movq %rbp,4*8(%rsp)
+ CFI_REL_OFFSET rbp,RBP
+ movq %r12,3*8(%rsp)
+ CFI_REL_OFFSET r12,R12
+ movq %r13,2*8(%rsp)
+ CFI_REL_OFFSET r13,R13
+ movq %r14,1*8(%rsp)
+ CFI_REL_OFFSET r14,R14
+ movq %r15,(%rsp)
+ CFI_REL_OFFSET r15,R15
+#if 0
+ cmpl $__KERNEL_CS,CS(%rsp)
+ je error_kernelspace
+#endif
+error_call_handler:
+ movq %rdi, RDI(%rsp)
+ movq %rsp,%rdi
+ movq ORIG_RAX(%rsp),%rsi # get error code
+ movq $-1,ORIG_RAX(%rsp)
+ leaq do_hypervisor_callback,%rcx
+ cmpq %rax,%rcx
+ je 0f # don't save event mask for callbacks
+ XEN_GET_VCPU_INFO(%r11)
+ XEN_SAVE_UPCALL_MASK(%r11,%cl,EVENT_MASK)
+0:
+ call *%rax
+error_exit:
+ RESTORE_REST
+/* cli */
+ XEN_GET_VCPU_INFO(%rsi)
+ XEN_BLOCK_EVENTS(%rsi)
+ GET_THREAD_INFO(%rcx)
+ testb $3,CS-ARGOFFSET(%rsp)
+ jz retint_kernel
+ movl threadinfo_flags(%rcx),%edx
+ movl $_TIF_WORK_MASK,%edi
+ andl %edi,%edx
+ jnz retint_careful
+ jmp retint_restore_args
+
+error_kernelspace:
+ /*
+ * We need to re-write the logic here because we don't do iretq to
+ * to return to user mode. It's still possible that we get trap/fault
+ * in the kernel (when accessing buffers pointed to by system calls,
+ * for example).
+ *
+ */
+#if 0
+ incl %ebx
+ /* There are two places in the kernel that can potentially fault with
+ usergs. Handle them here. The exception handlers after
+ iret run with kernel gs again, so don't set the user space flag.
+ B stepping K8s sometimes report an truncated RIP for IRET
+ exceptions returning to compat mode. Check for these here too. */
+ leaq iret_label(%rip),%rbp
+ cmpq %rbp,RIP(%rsp)
+ je error_swapgs
+ movl %ebp,%ebp /* zero extend */
+ cmpq %rbp,RIP(%rsp)
+ je error_swapgs
+ cmpq $gs_change,RIP(%rsp)
+ je error_swapgs
+ jmp error_sti
+#endif
+
+ENTRY(hypervisor_callback)
+ zeroentry do_hypervisor_callback
+
+/*
+ * Copied from arch/xen/i386/kernel/entry.S
+ */
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
+# Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+# see the correct pointer to the pt_regs
+ addq $8, %rsp # we don't return, adjust the stack frame
+11: movb $0, EVENT_MASK(%rsp)
+ call evtchn_do_upcall
+ jmp error_exit
+
+ ALIGN
+restore_all_enable_events:
+ XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
+
+scrit: /**** START OF CRITICAL REGION ****/
+ XEN_TEST_PENDING(%rsi)
+ jnz 14f # process more events if necessary...
+ XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
+ RESTORE_ARGS 0,8,0
+ testb $3,8(%rsp) # check CS
+ jnz crit_user_mode
+ orb $3,1*8(%rsp)
+ iretq
+crit_user_mode:
+ SWITCH_TO_USER 0
+
+14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
+ XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
+ SAVE_REST
+ movq %rsp,%rdi # set the argument again
+ jmp 11b
+ecrit: /**** END OF CRITICAL REGION ****/
+# At this point, unlike on x86-32, we don't do the fixup to simplify the
+# code and the stack frame is more complex on x86-64.
+# When the kernel is interrupted in the critical section, the kernel
+# will do IRET in that case, and everything will be restored at that point,
+# i.e. it just resumes from the next instruction interrupted with the same context.
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+ addq $0x10,%rsp /* skip rcx and r11 */
+1: movl (%rsp),%ds
+2: movl 8(%rsp),%es
+3: movl 16(%rsp),%fs
+4: movl 24(%rsp),%gs
+ addq $0x20,%rsp /* skip the above selectors */
+ SAVE_ALL
+ jmp error_exit
+.section .fixup,"ax"; \
+6: movq $0,(%rsp); \
+ jmp 1b; \
+7: movq $0,(%rsp); \
+ jmp 2b; \
+8: movq $0,(%rsp); \
+ jmp 3b; \
+9: movq $0,(%rsp); \
+ jmp 4b; \
+.previous; \
+.section __ex_table,"a";\
+ .align 16; \
+ .quad 1b,6b; \
+ .quad 2b,7b; \
+ .quad 3b,8b; \
+ .quad 4b,9b; \
+.previous
+
+#if 0
+ .section __ex_table,"a"
+ .align 8
+ .quad gs_change,bad_gs
+ .previous
+ .section .fixup,"ax"
+ /* running with kernelgs */
+bad_gs:
+/* swapgs */ /* switch back to user gs */
+ xorl %eax,%eax
+ movl %eax,%gs
+ jmp 2b
+ .previous
+#endif
+
+/*
+ * Create a kernel thread.
+ *
+ * C extern interface:
+ * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ *
+ * asm input arguments:
+ * rdi: fn, rsi: arg, rdx: flags
+ */
+ENTRY(kernel_thread)
+ CFI_STARTPROC
+ FAKE_STACK_FRAME $child_rip
+ SAVE_ALL
+
+ # rdi: flags, rsi: usp, rdx: will be &pt_regs
+ movq %rdx,%rdi
+ orq kernel_thread_flags(%rip),%rdi
+ movq $-1, %rsi
+ movq %rsp, %rdx
+
+ xorl %r8d,%r8d
+ xorl %r9d,%r9d
+
+ # clone now
+ call do_fork
+ movq %rax,RAX(%rsp)
+ xorl %edi,%edi
+
+ /*
+ * It isn't worth to check for reschedule here,
+ * so internally to the x86_64 port you can rely on kernel_thread()
+ * not to reschedule the child before returning, this avoids the need
+ * of hacks for example to fork off the per-CPU idle tasks.
+ * [Hopefully no generic code relies on the reschedule -AK]
+ */
+ RESTORE_ALL
+ UNFAKE_STACK_FRAME
+ ret
+ CFI_ENDPROC
+
+
+child_rip:
+ /*
+ * Here we are in the child and the registers are set as they were
+ * at kernel_thread() invocation in the parent.
+ */
+ movq %rdi, %rax
+ movq %rsi, %rdi
+ call *%rax
+ # exit
+ xorq %rdi, %rdi
+ call do_exit
+
+/*
+ * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
+ *
+ * C extern interface:
+ * extern long execve(char *name, char **argv, char **envp)
+ *
+ * asm input arguments:
+ * rdi: name, rsi: argv, rdx: envp
+ *
+ * We want to fallback into:
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ *
+ * do_sys_execve asm fallback arguments:
+ * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ */
+ENTRY(execve)
+ CFI_STARTPROC
+ FAKE_STACK_FRAME $0
+ SAVE_ALL
+ call sys_execve
+ movq %rax, RAX(%rsp)
+ RESTORE_REST
+ testq %rax,%rax
+ jne 1f
+ jmp int_ret_from_sys_call
+1: RESTORE_ARGS
+ UNFAKE_STACK_FRAME
+ ret
+ CFI_ENDPROC
+
+
+ /*
+ * Copy error_entry because of the different stack frame
+ */
+ENTRY(page_fault)
+ movq (%rsp),%rcx
+ movq 8(%rsp),%r11
+ addq $0x10,%rsp # now %rsp points to %cr2
+ pushq %rax
+ leaq do_page_fault(%rip),%rax
+ cld
+ subq $13*8,%rsp
+ movq %rdx,12*8(%rsp) # save %rdx
+ movq 13*8(%rsp),%rdx # load rax
+ movq %rcx,11*8(%rsp)
+ movq %rdx,10*8(%rsp) # store rax
+ movq %rsi,13*8(%rsp) # now save %rsi
+ movq 14*8(%rsp),%rdx # load %cr2, 3rd argument
+ movq %r8, 9*8(%rsp)
+ movq %r9, 8*8(%rsp)
+ movq %r10,7*8(%rsp)
+ movq %r11,6*8(%rsp)
+ movq %rbx,5*8(%rsp)
+ movq %rbp,4*8(%rsp)
+ movq %r12,3*8(%rsp)
+ movq %r13,2*8(%rsp)
+ movq %r14,1*8(%rsp)
+ movq %r15,(%rsp)
+#if 0
+ cmpl $__KERNEL_CS,CS(%rsp)
+ je error_kernelspace
+#endif
+ /*
+ * 1st and 2nd arguments are set by error_call_handler
+ */
+ jmp error_call_handler
+
+ENTRY(coprocessor_error)
+ zeroentry do_coprocessor_error
+
+ENTRY(simd_coprocessor_error)
+ zeroentry do_simd_coprocessor_error
+
+ENTRY(device_not_available)
+ zeroentry math_state_restore
+
+ /* runs on exception stack */
+ENTRY(debug)
+ CFI_STARTPROC
+ pushq $0
+ CFI_ADJUST_CFA_OFFSET 8
+ paranoidentry do_debug
+ /* switch back to process stack to restore the state ptrace touched */
+ movq %rax,%rsp
+ jmp paranoid_exit
+ CFI_ENDPROC
+
+#if 0
+ /* runs on exception stack */
+ENTRY(nmi)
+ CFI_STARTPROC
+ pushq $-1
+ CFI_ADJUST_CFA_OFFSET 8
+ paranoidentry do_nmi
+ /* ebx: no swapgs flag */
+#endif
+paranoid_exit:
+ testl %ebx,%ebx /* swapgs needed? */
+ jnz paranoid_restore
+paranoid_swapgs:
+/* cli
+ swapgs */
+paranoid_restore:
+ RESTORE_ALL 8
+/* iretq */
+paranoid_userspace:
+/* cli */
+ GET_THREAD_INFO(%rcx)
+ movl threadinfo_flags(%rcx),%edx
+ testl $_TIF_NEED_RESCHED,%edx
+ jnz paranoid_resched
+ testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ jnz paranoid_signal
+ jmp paranoid_swapgs
+paranoid_resched:
+/* sti */
+ call schedule
+ jmp paranoid_exit
+paranoid_signal:
+/* sti */
+ xorl %esi,%esi /* oldset */
+ movq %rsp,%rdi /* &pt_regs */
+ call do_notify_resume
+ jmp paranoid_exit
+ CFI_ENDPROC
+
+ENTRY(int3)
+ zeroentry do_int3
+
+ENTRY(overflow)
+ zeroentry do_overflow
+
+ENTRY(bounds)
+ zeroentry do_bounds
+
+ENTRY(invalid_op)
+ zeroentry do_invalid_op
+
+ENTRY(coprocessor_segment_overrun)
+ zeroentry do_coprocessor_segment_overrun
+
+ENTRY(reserved)
+ zeroentry do_reserved
+
+ /* runs on exception stack */
+ENTRY(double_fault)
+ CFI_STARTPROC
+ paranoidentry do_double_fault
+ movq %rax,%rsp
+ jmp paranoid_exit
+ CFI_ENDPROC
+
+ENTRY(invalid_TSS)
+ errorentry do_invalid_TSS
+
+ENTRY(segment_not_present)
+ errorentry do_segment_not_present
+
+ /* runs on exception stack */
+ENTRY(stack_segment)
+ CFI_STARTPROC
+ paranoidentry do_stack_segment
+ movq %rax,%rsp
+ jmp paranoid_exit
+ CFI_ENDPROC
+
+ENTRY(general_protection)
+ errorentry do_general_protection
+
+ENTRY(alignment_check)
+ errorentry do_alignment_check
+
+ENTRY(divide_error)
+ zeroentry do_divide_error
+
+ENTRY(spurious_interrupt_bug)
+ zeroentry do_spurious_interrupt_bug
+
+#ifdef CONFIG_X86_MCE
+ /* runs on exception stack */
+ENTRY(machine_check)
+ CFI_STARTPROC
+ pushq $0
+ CFI_ADJUST_CFA_OFFSET 8
+ paranoidentry do_machine_check
+ jmp paranoid_exit
+ CFI_ENDPROC
+#endif
+
+ENTRY(call_debug)
+ zeroentry do_call_debug
+
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S
new file mode 100644
index 0000000000..8a9caf951a
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head.S
@@ -0,0 +1,207 @@
+/*
+ * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
+ * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+ *
+ * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen
+ */
+
+
+#include <linux/linkage.h>
+
+.section __xen_guest
+ .ascii "GUEST_OS=linux,GUEST_VER=2.6,XEN_VER=3.0,VIRT_BASE=0xffffffff80100000"
+ .ascii ",LOADER=generic"
+/* .ascii ",PT_MODE_WRITABLE" */
+ .byte 0
+
+
+#include <linux/threads.h>
+#include <asm/desc.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <asm/cache.h>
+/* #include <asm/thread_info.h> */
+
+
+/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
+ * because we need identity-mapped pages on setup so define __START_KERNEL to
+ * 0x100000 for this stage
+ *
+ */
+
+ .text
+ .code64
+ENTRY(_start)
+ cld
+ movq init_rsp(%rip),%rsp
+ /* Copy the necessary stuff from xen_start_info structure. */
+ movq $xen_start_info_union,%rdi
+ movq $64,%rcx /* sizeof (union xen_start_info_union) / sizeof (long) */
+ rep movsq
+
+#ifdef CONFIG_SMP
+ ENTRY(startup_64_smp)
+ cld
+#endif /* CONFIG_SMP */
+
+ /* zero EFLAGS after setting rsp */
+ pushq $0
+ popfq
+ movq initial_code(%rip),%rax
+ jmp *%rax
+
+ /* SMP bootup changes these two */
+ .globl initial_code
+initial_code:
+ .quad x86_64_start_kernel
+ .globl init_rsp
+init_rsp:
+ .quad init_thread_union+THREAD_SIZE-8
+
+ENTRY(early_idt_handler)
+ xorl %eax,%eax
+ movq 8(%rsp),%rsi # get rip
+ movq (%rsp),%rdx
+ leaq early_idt_msg(%rip),%rdi
+1: hlt # generate #GP
+ jmp 1b
+
+early_idt_msg:
+ .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
+
+#if 0
+ENTRY(lgdt_finish)
+ movl $(__USER_DS),%eax # DS/ES contains default USER segment
+ movw %ax,%ds
+ movw %ax,%es
+ movl $(__KERNEL_DS),%eax
+ movw %ax,%ss # after changing gdt.
+ popq %rax # get the retrun address
+ pushq $(__KERNEL_CS)
+ pushq %rax
+ lretq
+#endif
+
+ENTRY(stext)
+ENTRY(_stext)
+
+ /*
+ * This default setting generates an ident mapping at address 0x100000
+ * and a mapping for the kernel that precisely maps virtual address
+ * 0xffffffff80000000 to physical address 0x000000. (always using
+ * 2Mbyte large pages provided by PAE mode)
+ */
+.org 0x1000
+ENTRY(init_level4_pgt)
+ .fill 512,8,0
+
+ /*
+ * We update two pgd entries to make kernel and user pgd consistent
+ * at pgd_populate(). It can be used for kernel modules. So we place
+ * this page here for those cases to avoid memory corruption.
+ * We also use this page to establish the initiali mapping for
+ * vsyscall area.
+ */
+.org 0x2000
+ENTRY(init_level4_user_pgt)
+ .fill 512,8,0
+
+ /*
+ * This is used for vsyscall area mapping as we have a different
+ * level4 page table for user.
+ */
+.org 0x3000
+ENTRY(level3_user_pgt)
+ .fill 512,8,0
+
+.org 0x4000
+ENTRY(cpu_gdt_table)
+/* The TLS descriptors are currently at a different place compared to i386.
+ Hopefully nobody expects them at a fixed place (Wine?) */
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x008ffa000000ffff /* __KERNEL_COMPAT32_CS */
+ .quad 0x00affa000000ffff /* __KERNEL_CS */
+ .quad 0x00cff2000000ffff /* __KERNEL_DS */
+
+ .quad 0x00cffa000000ffff /* __USER32_CS */
+ .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
+ .quad 0x00affa000000ffff /* __USER_CS */
+ .quad 0x00cffa000000ffff /* __KERNEL32_CS */
+ .quad 0,0 /* TSS */
+ .quad 0 /* LDT */
+ .quad 0,0,0 /* three TLS descriptors */
+ .quad 0 /* unused now */
+
+gdt_end:
+ /* asm/segment.h:GDT_ENTRIES must match this */
+ /* This should be a multiple of the cache line size */
+ /* GDTs of other CPUs: */
+ .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
+
+.org 0x5000
+ENTRY(empty_zero_page)
+
+.org 0x6000
+ENTRY(empty_bad_page)
+
+.org 0x7000
+ENTRY(empty_bad_pte_table)
+
+.org 0x8000
+ENTRY(empty_bad_pmd_table)
+
+ .org 0x9000
+#ifdef CONFIG_ACPI_SLEEP
+ENTRY(wakeup_level4_pgt)
+ .quad 0x0000000000102007 /* -> level3_ident_pgt */
+ .fill 255,8,0
+ .quad 0x000000000010a007
+ .fill 254,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad 0x0000000000103007 /* -> level3_kernel_pgt */
+#endif
+
+ .data
+
+ .align 16
+ .globl cpu_gdt_descr
+cpu_gdt_descr:
+ .word gdt_end-cpu_gdt_table
+gdt:
+ .quad cpu_gdt_table
+#ifdef CONFIG_SMP
+ .rept NR_CPUS-1
+ .word 0
+ .quad 0
+ .endr
+#endif
+
+ENTRY(gdt_table32)
+ .quad 0x0000000000000000 /* This one is magic */
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+gdt32_end:
+
+/* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ */
+
+#if 0
+.align L1_CACHE_BYTES
+#endif
+ .align L1_CACHE_BYTES
+ENTRY(idt_table)
+ .rept 256
+ .quad 0
+ .quad 0
+ .endr
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head64.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head64.c
new file mode 100644
index 0000000000..a4304edf6d
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/head64.c
@@ -0,0 +1,132 @@
+/*
+ * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/percpu.h>
+
+#include <asm/processor.h>
+#include <asm/proto.h>
+#include <asm/smp.h>
+#include <asm/bootsetup.h>
+#include <asm/setup.h>
+#include <asm/desc.h>
+
+unsigned long start_pfn;
+
+/* Don't add a printk in there. printk relies on the PDA which is not initialized
+ yet. */
+#if 0
+static void __init clear_bss(void)
+{
+ extern char __bss_start[], __bss_end[];
+ memset(__bss_start, 0,
+ (unsigned long) __bss_end - (unsigned long) __bss_start);
+}
+#endif
+
+extern char x86_boot_params[2048];
+
+#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
+#define OLD_CL_MAGIC_ADDR 0x90020
+#define OLD_CL_MAGIC 0xA33F
+#define OLD_CL_BASE_ADDR 0x90000
+#define OLD_CL_OFFSET 0x90022
+
+extern char saved_command_line[];
+
+#if 0
+static void __init copy_bootdata(char *real_mode_data)
+{
+ int new_data;
+ char * command_line;
+
+ memcpy(x86_boot_params, real_mode_data, 2048);
+ new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+ if (!new_data) {
+ if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+ printk("so old bootloader that it does not support commandline?!\n");
+ return;
+ }
+ new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+ printk("old bootloader convention, maybe loadlin?\n");
+ }
+ command_line = (char *) ((u64)(new_data));
+ memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
+ printk("Bootdata ok (command line is %s)\n", saved_command_line);
+}
+#endif
+
+static void __init setup_boot_cpu_data(void)
+{
+ int dummy, eax;
+
+ /* get vendor info */
+ cpuid(0, &boot_cpu_data.cpuid_level,
+ (int *)&boot_cpu_data.x86_vendor_id[0],
+ (int *)&boot_cpu_data.x86_vendor_id[8],
+ (int *)&boot_cpu_data.x86_vendor_id[4]);
+
+ /* get cpu type */
+ cpuid(1, &eax, &dummy, &dummy,
+ (unsigned int *) &boot_cpu_data.x86_capability);
+ boot_cpu_data.x86 = (eax >> 8) & 0xf;
+ boot_cpu_data.x86_model = (eax >> 4) & 0xf;
+ boot_cpu_data.x86_mask = eax & 0xf;
+}
+
+extern char _end[];
+
+void __init x86_64_start_kernel(char * real_mode_data)
+{
+ int i;
+
+ phys_to_machine_mapping = (u32 *)xen_start_info.mfn_list;
+ start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) + xen_start_info.nr_pt_frames;
+
+ for (i = 0; i < 256; i++)
+ set_intr_gate(i, early_idt_handler);
+#if 0
+ asm volatile("lidt %0" :: "m" (idt_descr));
+#endif
+ pda_init(0);
+ /* copy_bootdata(real_mode_data); */
+#ifdef CONFIG_SMP
+ cpu_set(0, cpu_online_map);
+#endif
+ /* default console: */
+ if (!strstr(saved_command_line, "console="))
+ strcat(saved_command_line, " console=tty0");
+#if 0
+ s = strstr(saved_command_line, "earlyprintk=");
+ if (s != NULL)
+ setup_early_printk(s);
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+ s = strstr(saved_command_line, "numa=");
+ if (s != NULL)
+ numa_setup(s+5);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ if (strstr(saved_command_line, "disableapic"))
+ disable_apic = 1;
+#endif
+ /* You need early console to see that */
+ if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
+ panic("Kernel too big for kernel mapping\n");
+
+ setup_boot_cpu_data();
+ start_kernel();
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c
new file mode 100644
index 0000000000..c4dc914911
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/init_task.c
@@ -0,0 +1,49 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp;
+
+#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
new file mode 100644
index 0000000000..647f09b32e
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/io_apic.c
@@ -0,0 +1,2051 @@
+/*
+ * Intel IO-APIC support for multi-Pentium hosts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ * Many thanks to Stig Venaas for trying out countless experimental
+ * patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
+ * thanks to Eric Gilmore
+ * and Rolf G. Tews
+ * for testing these extensively
+ * Paul Diefenbaugh : Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/acpi.h>
+#include <linux/sysdev.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/mach_apic.h>
+
+#define __apicdebuginit __init
+
+int sis_apic_bug; /* not actually supported, dummy for compile */
+
+static DEFINE_SPINLOCK(ioapic_lock);
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+ short apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector) \
+ (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector) (vector)
+#endif
+
+#ifdef CONFIG_XEN
+
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq) ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ physdev_op_t op;
+ int ret;
+
+ op.cmd = PHYSDEVOP_APIC_READ;
+ op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+ op.u.apic_op.offset = reg;
+ ret = HYPERVISOR_physdev_op(&op);
+ if (ret)
+ return ret;
+ return op.u.apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ physdev_op_t op;
+
+ op.cmd = PHYSDEVOP_APIC_WRITE;
+ op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+ op.u.apic_op.offset = reg;
+ op.u.apic_op.value = value;
+ HYPERVISOR_physdev_op(&op);
+}
+
+#define io_apic_read(a,r) xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#define clear_IO_APIC() ((void)0)
+
+#endif /* !CONFIG_XEN */
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
+
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
+}
+
+#ifndef CONFIG_XEN
+#define __DO_ACTION(R, ACTION, FINAL) \
+ \
+{ \
+ int pin; \
+ struct irq_pin_list *entry = irq_2_pin + irq; \
+ \
+ for (;;) { \
+ unsigned int reg; \
+ pin = entry->pin; \
+ if (pin == -1) \
+ break; \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
+ reg ACTION; \
+ io_apic_modify(entry->apic, reg); \
+ if (!entry->next) \
+ break; \
+ entry = irq_2_pin + entry->next; \
+ } \
+ FINAL; \
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL) \
+ \
+ static void name##_IO_APIC_irq (unsigned int irq) \
+ __DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
+ /* mask = 1 */
+DO_ACTION( __unmask, 0, &= 0xfffeffff, )
+ /* mask = 0 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
+
+#endif /* !CONFIG_XEN */
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+int skip_ioapic_setup;
+int ioapic_force;
+
+/* dummy parsing: see setup.c */
+
+static int __init disable_ioapic_setup(char *str)
+{
+ skip_ioapic_setup = 1;
+ return 1;
+}
+
+static int __init enable_ioapic_setup(char *str)
+{
+ ioapic_force = 1;
+ skip_ioapic_setup = 0;
+ return 1;
+}
+
+
+__setup("noapic", disable_ioapic_setup);
+__setup("apic", enable_ioapic_setup);
+
+
+#include <asm/pci-direct.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+
+/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
+ off. Check for an Nvidia or VIA PCI bridge and turn it off.
+ Use pci direct infrastructure because this runs before the PCI subsystem.
+
+ Can be overwritten with "apic"
+
+ And another hack to disable the IOMMU on VIA chipsets.
+
+ Kludge-O-Rama. */
+void __init check_ioapic(void)
+{
+ int num,slot,func;
+ if (ioapic_force)
+ return;
+
+ /* Poor man's PCI discovery */
+ for (num = 0; num < 32; num++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
+ u32 class;
+ u32 vendor;
+ u8 type;
+ class = read_pci_config(num,slot,func,
+ PCI_CLASS_REVISION);
+ if (class == 0xffffffff)
+ break;
+
+ if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+ continue;
+
+ vendor = read_pci_config(num, slot, func,
+ PCI_VENDOR_ID);
+ vendor &= 0xffff;
+ switch (vendor) {
+ case PCI_VENDOR_ID_VIA:
+#ifdef CONFIG_GART_IOMMU
+ if ((end_pfn >= (0xffffffff>>PAGE_SHIFT) ||
+ force_iommu) &&
+ !iommu_aperture_allowed) {
+ printk(KERN_INFO
+ "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n");
+ iommu_aperture_disabled = 1;
+ }
+#endif
+ return;
+ case PCI_VENDOR_ID_NVIDIA:
+#ifdef CONFIG_ACPI
+ /* All timer overrides on Nvidia
+ seem to be wrong. Skip them. */
+ acpi_skip_timer_override = 1;
+ printk(KERN_INFO
+ "Nvidia board detected. Ignoring ACPI timer override.\n");
+#endif
+ /* RED-PEN skip them on mptables too? */
+ return;
+ }
+
+ /* No multi-function device? */
+ type = read_pci_config_byte(num,slot,func,
+ PCI_HEADER_TYPE);
+ if (!(type & 0x80))
+ break;
+ }
+ }
+ }
+}
+
+static int __init ioapic_pirq_setup(char *str)
+{
+ int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
+
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ pirqs_enabled = 1;
+ apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
+
+ for (i = 0; i < max; i++) {
+ apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+ }
+ return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
+
+ return -1;
+}
+
+#ifndef CONFIG_XEN
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
+}
+#endif
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (mp_bus_id_to_pci_bus[bus] == -1) {
+ apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+ return irq;
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0)
+ best_guess = irq;
+ }
+ }
+ return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx) (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx) (0)
+#define default_ISA_polarity(idx) (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx) (1)
+#define default_PCI_polarity(idx) (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx) (1)
+#define default_MCA_polarity(idx) (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
+
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
+
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+ return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+ return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ /*
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
+ */
+ if ((pin >= 16) && (pin <= 23)) {
+ if (pirq_entries[pin-16] != -1) {
+ if (!pirq_entries[pin-16]) {
+ apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
+ } else {
+ irq = pirq_entries[pin-16];
+ apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
+ pin-16, irq);
+ }
+ }
+ }
+ return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS];
+
+int assign_irq_vector(int irq)
+{
+ static int current_vector = FIRST_DEVICE_VECTOR;
+ physdev_op_t op;
+
+ BUG_ON(irq >= NR_IRQ_VECTORS);
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ return IO_APIC_VECTOR(irq);
+
+ op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
+ op.u.irq_op.irq = irq;
+ if (HYPERVISOR_physdev_op(&op))
+ return -ENOSPC;
+ current_vector = op.u.irq_op.vector;
+
+ vector_irq[current_vector] = irq;
+ if (irq != AUTO_ASSIGN)
+ IO_APIC_VECTOR(irq) = current_vector;
+
+ return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+#ifndef CONFIG_XEN
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
+static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+ if (use_pci_vector() && !platform_legacy_irq(irq)) {
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[vector].handler = &ioapic_level_type;
+ else
+ irq_desc[vector].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[vector]);
+ } else {
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[irq].handler = &ioapic_level_type;
+ else
+ irq_desc[irq].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[irq]);
+ }
+}
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif /* !CONFIG_XEN */
+
+void __init setup_IO_APIC_irqs(void)
+{
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ first_notcon = 0;
+ } else
+ apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ add_pin_to_irq(irq, apic, pin);
+
+ if (/* !apic && */ !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ }
+
+ if (!first_notcon)
+ apic_printk(APIC_VERBOSE," not connected.\n");
+}
+
+#ifndef CONFIG_XEN
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[0].handler = &ioapic_edge_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __apicdebuginit print_IO_APIC(void)
+{
+ int apic, i;
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ reg_01.raw = io_apic_read(apic, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(apic, 2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk("\n");
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
+ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.bits.entries != 0x2E) &&
+ (reg_01.bits.entries != 0x3F) &&
+ (reg_01.bits.entries != 0x03)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
+ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
+ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ if (reg_01.bits.version >= 0x10) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+ }
+
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+ printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+ " Stat Dest Deli Vect: \n");
+
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
+ );
+
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
+ );
+ }
+ }
+ if (use_pci_vector())
+ printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ if (use_pci_vector() && !platform_legacy_irq(i))
+ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+ else
+ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+ }
+
+ printk(KERN_INFO ".................................... done.\n");
+
+ return;
+}
+
+static __apicdebuginit void print_APIC_bitfield (int base)
+{
+ unsigned int v;
+ int i, j;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+ for (i = 0; i < 8; i++) {
+ v = apic_read(base + i*0x10);
+ for (j = 0; j < 32; j++) {
+ if (v & (1<<j))
+ printk("1");
+ else
+ printk("0");
+ }
+ printk("\n");
+ }
+}
+
+void __apicdebuginit print_local_APIC(void * dummy)
+{
+ unsigned int v, ver, maxlvt;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ smp_processor_id(), hard_smp_processor_id());
+ v = apic_read(APIC_ID);
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
+ v = apic_read(APIC_LVR);
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+ ver = GET_APIC_VERSION(v);
+ maxlvt = get_maxlvt();
+
+ v = apic_read(APIC_TASKPRI);
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ v = apic_read(APIC_ARBPRI);
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+ v & APIC_ARBPRI_MASK);
+ v = apic_read(APIC_PROCPRI);
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+ }
+
+ v = apic_read(APIC_EOI);
+ printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+ v = apic_read(APIC_RRR);
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+ v = apic_read(APIC_LDR);
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+ v = apic_read(APIC_DFR);
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+ v = apic_read(APIC_SPIV);
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+ printk(KERN_DEBUG "... APIC ISR field:\n");
+ print_APIC_bitfield(APIC_ISR);
+ printk(KERN_DEBUG "... APIC TMR field:\n");
+ print_APIC_bitfield(APIC_TMR);
+ printk(KERN_DEBUG "... APIC IRR field:\n");
+ print_APIC_bitfield(APIC_IRR);
+
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
+ v = apic_read(APIC_ESR);
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_ICR);
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+ v = apic_read(APIC_ICR2);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+ v = apic_read(APIC_LVTT);
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+ if (maxlvt > 3) { /* PC is LVT#4. */
+ v = apic_read(APIC_LVTPC);
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+ }
+ v = apic_read(APIC_LVT0);
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+ v = apic_read(APIC_LVT1);
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+ if (maxlvt > 2) { /* ERR is LVT#3. */
+ v = apic_read(APIC_LVTERR);
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+ }
+
+ v = apic_read(APIC_TMICT);
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+ v = apic_read(APIC_TMCCT);
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+ v = apic_read(APIC_TDCR);
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+ printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+ on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void __apicdebuginit print_PIC(void)
+{
+ extern spinlock_t i8259A_lock;
+ unsigned int v;
+ unsigned long flags;
+
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ v = inb(0xa1) << 8 | inb(0x21);
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
+
+ v = inb(0xa0) << 8 | inb(0x20);
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
+
+ outb(0x0b,0xa0);
+ outb(0x0b,0x20);
+ v = inb(0xa0) << 8 | inb(0x20);
+ outb(0x0a,0xa0);
+ outb(0x0a,0x20);
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+
+ v = inb(0x4d1) << 8 | inb(0x4d0);
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
+
+static void __init enable_IO_APIC(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ int i;
+ unsigned long flags;
+
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+ if (!pirqs_enabled)
+ for (i = 0; i < MAX_PIRQS; i++)
+ pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (i = 0; i < nr_ioapics; i++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(i, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[i] = reg_01.bits.entries+1;
+ }
+
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+ /*
+ * Clear the IO-APIC before rebooting:
+ */
+ clear_IO_APIC();
+#ifndef CONFIG_XEN
+ disconnect_bsp_APIC();
+#endif
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+ */
+#ifndef CONFIG_XEN
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+ union IO_APIC_reg_00 reg_00;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+
+ printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, reg_00.raw);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE," ok.\n");
+ }
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ * - timer IRQ defaults to IO-APIC IRQ
+ * - if this function detects that timer IRQs are defunct, then we fall
+ * back to ISA timer IRQs
+ */
+#ifndef CONFIG_XEN
+static int __init timer_irq_works(void)
+{
+ unsigned long t1 = jiffies;
+
+ local_irq_enable();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+
+ /* jiffies wrap? */
+ if (jiffies - t1 > 4)
+ return 1;
+ return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+ if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+ unmask_IO_APIC_irq(irq);
+
+ return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ unsigned long flags;
+ unsigned int dest;
+
+ dest = cpu_mask_to_apicid(mask);
+
+ /*
+ * Only the high 8 bits are valid.
+ */
+ dest = SET_APIC_LOGICAL_ID(dest);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __DO_ACTION(1, = dest, )
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ unmask_IO_APIC_irq(irq);
+}
+
+static void set_ioapic_affinity_vector (unsigned int vector,
+ cpumask_t cpu_mask)
+{
+ int irq = vector_to_irq(vector);
+
+ set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static struct hw_interrupt_type ioapic_edge_type = {
+ .typename = "IO-APIC-edge",
+ .startup = startup_edge_ioapic,
+ .shutdown = shutdown_edge_ioapic,
+ .enable = enable_edge_ioapic,
+ .disable = disable_edge_ioapic,
+ .ack = ack_edge_ioapic,
+ .end = end_edge_ioapic,
+ .set_affinity = set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_type = {
+ .typename = "IO-APIC-level",
+ .startup = startup_level_ioapic,
+ .shutdown = shutdown_level_ioapic,
+ .enable = enable_level_ioapic,
+ .disable = disable_level_ioapic,
+ .ack = mask_and_ack_level_ioapic,
+ .end = end_level_ioapic,
+ .set_affinity = set_ioapic_affinity,
+};
+#endif /* !CONFIG_XEN */
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ int tmp = irq;
+ if (use_pci_vector()) {
+ if (!platform_legacy_irq(tmp))
+ if ((tmp = vector_to_irq(tmp)) == -1)
+ continue;
+ }
+ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+ * interrupt if we can..
+ */
+ if (irq < 16)
+ make_8259A_irq(irq);
+#ifndef CONFIG_XEN
+ else
+ /* Strange. Oh, well.. */
+ irq_desc[irq].handler = &no_irq_type;
+#endif
+ }
+ }
+}
+
+#ifndef CONFIG_XEN
+static void enable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+ unsigned long v;
+
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+ ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+ .typename = "local-APIC-edge",
+ .startup = NULL, /* startup_irq() not used for IRQ0 */
+ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+ .enable = enable_lapic_irq,
+ .disable = disable_lapic_irq,
+ .ack = ack_lapic_irq,
+ .end = end_lapic_irq,
+};
+
+static void setup_nmi (void)
+{
+ /*
+ * Dirty trick to enable the NMI watchdog ...
+ * We put the 8259A master into AEOI mode and
+ * unmask on all local APICs LVT0 as NMI.
+ *
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+ * is from Maciej W. Rozycki - so we do not have to EOI from
+ * the NMI handler or the timer interrupt.
+ */
+ printk(KERN_INFO "activating NMI Watchdog ...");
+
+ enable_NMI_through_LVT0(NULL);
+
+ printk(" done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
+ * not support the ExtINT mode, unfortunately. We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA. --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+ int pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(0, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
+
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(0, pin);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+ int pin1, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+ set_intr_gate(vector, interrupt[0]);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+ apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works()) {
+ nmi_watchdog_default();
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ check_nmi_watchdog();
+ }
+ return;
+ }
+ clear_IO_APIC_pin(0, pin1);
+ apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ }
+
+ apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+ if (pin2 != -1) {
+ apic_printk(APIC_VERBOSE,"\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ nmi_watchdog_default();
+ if (nmi_watchdog == NMI_IO_APIC) {
+ setup_nmi();
+ check_nmi_watchdog();
+ }
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(0, pin2);
+ }
+ printk(" failed.\n");
+
+ if (nmi_watchdog) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
+
+ apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+ irq_desc[0].handler = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+ if (timer_irq_works()) {
+ apic_printk(APIC_QUIET, " works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ apic_printk(APIC_VERBOSE," failed.\n");
+
+ apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+ unlock_ExtINT_logic();
+
+ if (timer_irq_works()) {
+ apic_printk(APIC_VERBOSE," works.\n");
+ return;
+ }
+ apic_printk(APIC_VERBOSE," failed :(.\n");
+ panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
+}
+#else
+#define check_timer() ((void)0)
+#endif /* !CONFIG_XEN */
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ * Linux doesn't really care, as it's not actually used
+ * for any interrupt handling anyway.
+ */
+#define PIC_IRQS (1<<2)
+
+void __init setup_IO_APIC(void)
+{
+ enable_IO_APIC();
+
+ if (acpi_ioapic)
+ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
+ else
+ io_apic_irqs = ~PIC_IRQS;
+
+ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up the IO-APIC IRQ routing table.
+ */
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
+ sync_Arb_IDs();
+#endif /* !CONFIG_XEN */
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ if (!acpi_ioapic)
+ print_IO_APIC();
+}
+
+struct sysfs_ioapic_data {
+ struct sys_device dev;
+ struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, u32 state)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+ *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+ unsigned long flags;
+ union IO_APIC_reg_00 reg_00;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(dev->id, 0);
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ io_apic_write(dev->id, 0, reg_00.raw);
+ }
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+ io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+ io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+ set_kset_name("ioapic"),
+ .suspend = ioapic_suspend,
+ .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+ struct sys_device * dev;
+ int i, size, error = 0;
+
+ error = sysdev_class_register(&ioapic_sysdev_class);
+ if (error)
+ return error;
+
+ for (i = 0; i < nr_ioapics; i++ ) {
+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+ * sizeof(struct IO_APIC_route_entry);
+ mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ if (!mp_ioapic_data[i]) {
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ memset(mp_ioapic_data[i], 0, size);
+ dev = &mp_ioapic_data[i]->dev;
+ dev->id = i;
+ dev->cls = &ioapic_sysdev_class;
+ error = sysdev_register(dev);
+ if (error) {
+ kfree(mp_ioapic_data[i]);
+ mp_ioapic_data[i] = NULL;
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+ continue;
+ }
+ }
+
+ return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define IO_APIC_MAX_ID 0xFE
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+#ifndef CONFIG_XEN
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic_id_map = phys_cpu_present_map;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= IO_APIC_MAX_ID) {
+ apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
+
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (physid_isset(apic_id, apic_id_map)) {
+
+ for (i = 0; i < IO_APIC_MAX_ID; i++) {
+ if (!physid_isset(i, apic_id_map))
+ break;
+ }
+
+ if (i == IO_APIC_MAX_ID)
+ panic("Max apic_id exceeded!\n");
+
+ apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
+
+ apic_id = i;
+ }
+
+ physid_set(apic_id, apic_id_map);
+
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id)
+ panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+ }
+
+ apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
+
+ return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+{
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+ */
+
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+ entry.mask = 1; /* Disabled (masked) */
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= 16)
+ add_pin_to_irq(irq, ioapic, pin);
+
+ entry.vector = assign_irq_vector(irq);
+
+ apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+ "IRQ %d Mode:%i Active:%i)\n", ioapic,
+ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ edge_level, active_high_low);
+
+ ioapic_register_intr(irq, entry.vector, edge_level);
+
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+}
+
+#endif /*CONFIG_ACPI_BOOT*/
+
+
+#ifndef CONFIG_XEN
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+void __init setup_ioapic_dest(void)
+{
+ int pin, ioapic, irq, irq_entry;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
+ }
+
+ }
+}
+#endif /* !CONFIG_XEN */
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ioport.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ioport.c
new file mode 100644
index 0000000000..ff96e22341
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ioport.c
@@ -0,0 +1,63 @@
+/*
+ * linux/arch/x86_64/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <asm-xen/xen-public/physdev.h>
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ */
+
+// asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
+asmlinkage long sys_iopl(unsigned int new_io_pl)
+{
+ unsigned int old_io_pl = current->thread.io_pl;
+ physdev_op_t op;
+
+
+ if (new_io_pl > 3)
+ return -EINVAL;
+
+ /* Need "raw I/O" privileges for direct port access. */
+ if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ /* Maintain OS privileges even if user attempts to relinquish them. */
+ if (new_io_pl == 0)
+ new_io_pl = 1;
+
+ /* Change our version of the privilege levels. */
+ current->thread.io_pl = new_io_pl;
+
+ /* Force the change at ring 0. */
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = new_io_pl;
+ HYPERVISOR_physdev_op(&op);
+
+ return 0;
+}
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ return turn_on ? sys_iopl(3) : 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
new file mode 100644
index 0000000000..08effa3bc1
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/irq.c
@@ -0,0 +1,105 @@
+/*
+ * linux/arch/x86_64/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86_64-specific interrupt
+ * entry and irq statistics code. All the remaining irq logic is
+ * done by the generic kernel/irq/ code and in the
+ * x86_64-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ",
+ kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+
+ seq_printf(p, " %s", action->name);
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", cpu_pda[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+ }
+ return 0;
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+{
+ /* high bits used in ret_from_ code */
+ int irq = regs->orig_rax & __IRQ_MASK(HARDIRQ_BITS);
+
+ irq_enter();
+
+ __do_IRQ(irq, regs);
+ irq_exit();
+
+ return 1;
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
new file mode 100644
index 0000000000..9111fb80d0
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/ldt.c
@@ -0,0 +1,267 @@
+/*
+ * linux/arch/x86_64/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002 Andi Kleen
+ *
+ * This handles calls from both 32bit and 64bit mode.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/pgalloc.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+ if (current->active_mm)
+ load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
+{
+ void *oldldt;
+ void *newldt;
+ unsigned oldsize;
+
+ if (mincount <= (unsigned)pc->size)
+ return 0;
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+ newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+ else
+ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+ if (!newldt)
+ return -ENOMEM;
+
+ if (oldsize)
+ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+ oldldt = pc->ldt;
+ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+ wmb();
+ pc->ldt = newldt;
+ wmb();
+ pc->size = mincount;
+ wmb();
+ if (reload) {
+#ifdef CONFIG_SMP
+ cpumask_t mask;
+
+ preempt_disable();
+#endif
+ make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ load_LDT(pc);
+#ifdef CONFIG_SMP
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+ smp_call_function(flush_ldt, NULL, 1, 1);
+ preempt_enable();
+#else
+ load_LDT(pc);
+#endif
+ }
+ if (oldsize) {
+ make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(oldldt);
+ else
+ kfree(oldldt);
+ }
+ return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+ int err = alloc_ldt(new, old->size, 0);
+ if (err < 0)
+ return err;
+ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ make_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct mm_struct * old_mm;
+ int retval = 0;
+
+ init_MUTEX(&mm->context.sem);
+ mm->context.size = 0;
+ old_mm = current->mm;
+ if (old_mm && old_mm->context.size > 0) {
+ down(&old_mm->context.sem);
+ retval = copy_ldt(&mm->context, &old_mm->context);
+ up(&old_mm->context.sem);
+ }
+ return retval;
+}
+
+/*
+ *
+ * Don't touch the LDT register - we're already in the next thread.
+ */
+void destroy_context(struct mm_struct *mm)
+{
+ if (mm->context.size) {
+ if (mm == current->active_mm)
+ clear_LDT();
+ make_pages_writable(mm->context.ldt,
+ (mm->context.size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(mm->context.ldt);
+ else
+ kfree(mm->context.ldt);
+ mm->context.size = 0;
+ }
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+ int err;
+ unsigned long size;
+ struct mm_struct * mm = current->mm;
+
+ if (!mm->context.size)
+ return 0;
+ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+ down(&mm->context.sem);
+ size = mm->context.size*LDT_ENTRY_SIZE;
+ if (size > bytecount)
+ size = bytecount;
+
+ err = 0;
+ if (copy_to_user(ptr, mm->context.ldt, size))
+ err = -EFAULT;
+ up(&mm->context.sem);
+ if (err < 0)
+ goto error_return;
+ if (size != bytecount) {
+ /* zero-fill the rest */
+ if (clear_user(ptr+size, bytecount-size) != 0) {
+ err = -EFAULT;
+ goto error_return;
+ }
+ }
+ return bytecount;
+error_return:
+ return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+ /* Arbitrary number */
+ /* x86-64 default LDT is all zeros */
+ if (bytecount > 128)
+ bytecount = 128;
+ if (clear_user(ptr, bytecount))
+ return -EFAULT;
+ return bytecount;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+ struct task_struct *me = current;
+ struct mm_struct * mm = me->mm;
+ unsigned long entry = 0, *lp;
+ unsigned long mach_lp;
+ int error;
+ struct user_desc ldt_info;
+
+ error = -EINVAL;
+
+ if (bytecount != sizeof(ldt_info))
+ goto out;
+ error = -EFAULT;
+ if (copy_from_user(&ldt_info, ptr, bytecount))
+ goto out;
+
+ error = -EINVAL;
+ if (ldt_info.entry_number >= LDT_ENTRIES)
+ goto out;
+ if (ldt_info.contents == 3) {
+ if (oldmode)
+ goto out;
+ if (ldt_info.seg_not_present == 0)
+ goto out;
+ }
+
+ down(&mm->context.sem);
+ if (ldt_info.entry_number >= (unsigned)mm->context.size) {
+ error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
+ if (error < 0)
+ goto out_unlock;
+ }
+
+ lp = (unsigned long *)((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
+ mach_lp = arbitrary_virt_to_machine(lp);
+
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ if (oldmode || LDT_empty(&ldt_info)) {
+ entry = 0;
+ goto install;
+ }
+ }
+
+#if 0
+ entry = LDT_entry(&ldt_info);
+#endif
+ if (oldmode)
+ entry &= ~(1 << 20);
+
+ /* Install the new entry ... */
+install:
+ error = HYPERVISOR_update_descriptor(mach_lp, entry);
+
+out_unlock:
+ up(&mm->context.sem);
+out:
+ return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+ int ret = -ENOSYS;
+
+ switch (func) {
+ case 0:
+ ret = read_ldt(ptr, bytecount);
+ break;
+ case 1:
+ ret = write_ldt(ptr, bytecount, 1);
+ break;
+ case 2:
+ ret = read_default_ldt(ptr, bytecount);
+ break;
+ case 0x11:
+ ret = write_ldt(ptr, bytecount, 0);
+ break;
+ }
+ return ret;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
new file mode 100644
index 0000000000..c2aff7edad
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
@@ -0,0 +1,954 @@
+/*
+ * Intel Multiprocessor Specification 1.1 and 1.4
+ * compliant MP-table parsing routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Fixes
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
+ * Maciej W. Rozycki: Bits for default MP configurations
+ * Paul Diefenbaugh: Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/acpi.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/io_apic.h>
+#include <asm/proto.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+int acpi_found_madt;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL };
+
+int mp_current_pci_id = 0;
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+int pic_mode;
+unsigned long mp_lapic_addr = 0;
+
+
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_id = -1U;
+/* Internal processor count */
+static unsigned int num_processors = 0;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+
+/* ACPI MADT entry parsing functions */
+#ifdef CONFIG_ACPI_BOOT
+extern struct acpi_boot_flags acpi_boot;
+#ifdef CONFIG_X86_LOCAL_APIC
+extern int acpi_parse_lapic (acpi_table_entry_header *header);
+extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
+extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
+#endif /*CONFIG_X86_LOCAL_APIC*/
+#ifdef CONFIG_X86_IO_APIC
+extern int acpi_parse_ioapic (acpi_table_entry_header *header);
+#endif /*CONFIG_X86_IO_APIC*/
+#endif /*CONFIG_ACPI_BOOT*/
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+ int sum = 0;
+
+ while (len--)
+ sum += *mp++;
+
+ return sum & 0xFF;
+}
+
+#ifndef CONFIG_XEN
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ int ver;
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED))
+ return;
+
+ printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
+ m->mpc_apicver);
+
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ Dprintk(" Bootup CPU\n");
+ boot_cpu_id = m->mpc_apicid;
+ }
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+ return;
+ }
+ if (num_processors >= maxcpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+ " Processor ignored.\n", maxcpus);
+ return;
+ }
+
+ num_processors++;
+
+ if (m->mpc_apicid > MAX_APICS) {
+ printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+ m->mpc_apicid, MAX_APICS);
+ return;
+ }
+ ver = m->mpc_apicver;
+
+ physid_set(m->mpc_apicid, phys_cpu_present_map);
+ /*
+ * Validate version
+ */
+ if (ver == 0x0) {
+ printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ ver = 0x10;
+ }
+ apic_version[m->mpc_apicid] = ver;
+ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+ num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+ char str[7];
+
+ memcpy(str, m->mpc_bustype, 6);
+ str[6] = 0;
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+
+ if (strncmp(str, "ISA", 3) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+ } else if (strncmp(str, "EISA", 4) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+ } else if (strncmp(str, "PCI", 3) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+ } else if (strncmp(str, "MCA", 3) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else {
+ printk(KERN_ERR "Unknown bustype %s\n", str);
+ }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+ printk("I/O APIC #%d Version %d at 0x%X.\n",
+ m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
+ MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+ }
+ if (!m->mpc_apicaddr) {
+ printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+ " found in MP table, skipping!\n");
+ return;
+ }
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+ mp_irqs [mp_irq_entries] = *m;
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+ m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+ Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+ /*
+ * Well it seems all SMP boards in existence
+ * use ExtINT/LVT1 == LINT0 and
+ * NMI/LVT2 == LINT1 - the following check
+ * will show us if this assumptions is false.
+ * Until then we do not have to add baggage.
+ */
+ if ((m->mpc_irqtype == mp_ExtINT) &&
+ (m->mpc_destapiclint != 0))
+ BUG();
+ if ((m->mpc_irqtype == mp_NMI) &&
+ (m->mpc_destapiclint != 1))
+ BUG();
+}
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+ char str[16];
+ int count=sizeof(*mpc);
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+ printk("SMP mptable: bad signature [%c%c%c%c]!\n",
+ mpc->mpc_signature[0],
+ mpc->mpc_signature[1],
+ mpc->mpc_signature[2],
+ mpc->mpc_signature[3]);
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+ printk("SMP mptable: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+ printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+ printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(str,mpc->mpc_oem,8);
+ str[8]=0;
+ printk(KERN_INFO "OEM ID: %s ",str);
+
+ memcpy(str,mpc->mpc_productid,12);
+ str[12]=0;
+ printk(KERN_INFO "Product ID: %s ",str);
+
+ printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic);
+
+ /* save the local APIC address, it might be non-default */
+ if (!acpi_lapic)
+ mp_lapic_addr = mpc->mpc_lapic;
+
+ /*
+ * Now process the configuration blocks.
+ */
+ while (count < mpc->mpc_length) {
+ switch(*mpt) {
+ case MP_PROCESSOR:
+ {
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+ if (!acpi_lapic)
+ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_BUS:
+ {
+ struct mpc_config_bus *m=
+ (struct mpc_config_bus *)mpt;
+ MP_bus_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+ }
+ case MP_IOAPIC:
+ {
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+ {
+ struct mpc_config_intsrc *m=
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+ {
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
+ }
+ }
+ }
+ clustered_apic_check();
+ if (!num_processors)
+ printk(KERN_ERR "SMP mptable: no processors registered!\n");
+ return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+ unsigned int port;
+
+ port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+ struct mpc_config_intsrc intsrc;
+ int i;
+ int ELCR_fallback = 0;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* conforming */
+ intsrc.mpc_srcbus = 0;
+ intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+ intsrc.mpc_irqtype = mp_INT;
+
+ /*
+ * If true, we have an ISA/PCI system with no IRQ entries
+ * in the MP table. To prevent the PCI interrupts from being set up
+ * incorrectly, we try to use the ELCR. The sanity check to see if
+ * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+ * never be level sensitive, so we simply see if the ELCR agrees.
+ * If it does, we assume it's valid.
+ */
+ if (mpc_default_type == 5) {
+ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+ if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+ printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
+ else {
+ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+ ELCR_fallback = 1;
+ }
+ }
+
+ for (i = 0; i < 16; i++) {
+ switch (mpc_default_type) {
+ case 2:
+ if (i == 0 || i == 13)
+ continue; /* IRQ0 & IRQ13 not connected */
+ /* fall through */
+ default:
+ if (i == 2)
+ continue; /* IRQ2 is never connected */
+ }
+
+ if (ELCR_fallback) {
+ /*
+ * If the ELCR indicates a level-sensitive interrupt, we
+ * copy that information over to the MP table in the
+ * irqflag field (level sensitive, active high polarity).
+ */
+ if (ELCR_trigger(i))
+ intsrc.mpc_irqflag = 13;
+ else
+ intsrc.mpc_irqflag = 0;
+ }
+
+ intsrc.mpc_srcbusirq = i;
+ intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
+ MP_intsrc_info(&intsrc);
+ }
+
+ intsrc.mpc_irqtype = mp_ExtINT;
+ intsrc.mpc_srcbusirq = 0;
+ intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
+ MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+ struct mpc_config_processor processor;
+ struct mpc_config_bus bus;
+ struct mpc_config_ioapic ioapic;
+ struct mpc_config_lintsrc lintsrc;
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
+ int i;
+
+ /*
+ * local APIC has default address
+ */
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /*
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+ /* Either an integrated APIC or a discrete 82489DX. */
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ processor.mpc_cpuflag = CPU_ENABLED;
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) |
+ boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+ processor.mpc_apicid = i;
+ MP_processor_info(&processor);
+ }
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ switch (mpc_default_type) {
+ default:
+ printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+ mpc_default_type);
+ /* fall through */
+ case 1:
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+ case 2:
+ case 6:
+ case 3:
+ memcpy(bus.mpc_bustype, "EISA ", 6);
+ break;
+ case 4:
+ case 7:
+ memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+ bus.mpc_busid = 1;
+ memcpy(bus.mpc_bustype, "PCI ", 6);
+ MP_bus_info(&bus);
+ }
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+ ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+
+ /*
+ * We set up most of the low 16 IO-APIC pins according to MPS rules.
+ */
+ construct_default_ioirq_mptable(mpc_default_type);
+
+ lintsrc.mpc_type = MP_LINTSRC;
+ lintsrc.mpc_irqflag = 0; /* conforming */
+ lintsrc.mpc_srcbusid = 0;
+ lintsrc.mpc_srcbusirq = 0;
+ lintsrc.mpc_destapic = MP_APIC_ALL;
+ for (i = 0; i < 2; i++) {
+ lintsrc.mpc_irqtype = linttypes[i];
+ lintsrc.mpc_destapiclint = i;
+ MP_lintsrc_info(&lintsrc);
+ }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+ struct intel_mp_floating *mpf = mpf_found;
+
+ /*
+ * ACPI may be used to obtain the entire SMP configuration or just to
+ * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic) {
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+ return;
+ }
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+
+ printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ if (mpf->mpf_feature2 & (1<<7)) {
+ printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+ pic_mode = 1;
+ } else {
+ printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+ pic_mode = 0;
+ }
+
+ /*
+ * Now see if we need to read further.
+ */
+ if (mpf->mpf_feature1 != 0) {
+
+ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+ construct_default_ISA_mptable(mpf->mpf_feature1);
+
+ } else if (mpf->mpf_physptr) {
+
+ /*
+ * Read the physical hardware table. Anything here will
+ * override the defaults.
+ */
+ if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+ smp_found_config = 0;
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+ printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+ return;
+ }
+ /*
+ * If there are no explicit MP IRQ entries, then we are
+ * broken. We set up most of the low 16 IO-APIC pins to
+ * ISA defaults and hope it will work.
+ */
+ if (!mp_irq_entries) {
+ struct mpc_config_bus bus;
+
+ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+ bus.mpc_type = MP_BUS;
+ bus.mpc_busid = 0;
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ MP_bus_info(&bus);
+
+ construct_default_ioirq_mptable(0);
+ }
+
+ } else
+ BUG();
+
+ printk(KERN_INFO "Processors: %d\n", num_processors);
+ /*
+ * Only use the first configuration found.
+ */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+ extern void __bad_mpf_size(void);
+ unsigned int *bp = isa_bus_to_virt(base);
+ struct intel_mp_floating *mpf;
+
+ Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+ if (sizeof(*mpf) != 16)
+ __bad_mpf_size();
+
+ while (length > 0) {
+ mpf = (struct intel_mp_floating *)bp;
+ if ((*bp == SMP_MAGIC_IDENT) &&
+ (mpf->mpf_length == 1) &&
+ !mpf_checksum((unsigned char *)bp, 16) &&
+ ((mpf->mpf_specification == 1)
+ || (mpf->mpf_specification == 4)) ) {
+
+ smp_found_config = 1;
+ mpf_found = mpf;
+ return 1;
+ }
+ bp += 4;
+ length -= 16;
+ }
+ return 0;
+}
+
+void __init find_intel_smp (void)
+{
+ unsigned int address;
+
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (smp_scan_config(0x0,0x400) ||
+ smp_scan_config(639*0x400,0x400) ||
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There are Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. These loaders are buggy and
+ * should be fixed.
+ */
+
+ address = *(unsigned short *)phys_to_virt(0x40E);
+ address <<= 4;
+ if (smp_scan_config(address, 0x1000))
+ return;
+
+ /* If we have come this far, we did not find an MP table */
+ printk(KERN_INFO "No mptable found.\n");
+}
+
+/*
+ * - Intel MP Configuration Table
+ */
+void __init find_smp_config (void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ find_intel_smp();
+#endif
+}
+
+
+/* --------------------------------------------------------------------------
+ ACPI-based MP Configuration
+ -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_BOOT
+
+void __init mp_register_lapic_address (
+ u64 address)
+{
+#ifndef CONFIG_XEN
+ mp_lapic_addr = (unsigned long) address;
+
+ if (boot_cpu_id == -1U)
+ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+
+ Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __init mp_register_lapic (
+ u8 id,
+ u8 enabled)
+{
+ struct mpc_config_processor processor;
+ int boot_cpu = 0;
+
+ if (id >= MAX_APICS) {
+ printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+ id, MAX_APICS);
+ return;
+ }
+
+ if (id == boot_cpu_physical_apicid)
+ boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+ processor.mpc_type = MP_PROCESSOR;
+ processor.mpc_apicid = id;
+ processor.mpc_apicver = 0x10; /* TBD: lapic version */
+ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+#endif
+
+ MP_processor_info(&processor);
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define MP_ISA_BUS 0
+#define MP_MAX_IOAPIC_PIN 127
+
+struct mp_ioapic_routing {
+ int apic_id;
+ int gsi_start;
+ int gsi_end;
+ u32 pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+ int gsi)
+{
+ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+ if ((gsi >= mp_ioapic_routing[i].gsi_start)
+ && (gsi <= mp_ioapic_routing[i].gsi_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+ return -1;
+}
+
+
+void __init mp_register_ioapic (
+ u8 id,
+ u32 address,
+ u32 gsi_base)
+{
+ int idx = 0;
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+ }
+ if (!address) {
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+ " found in MADT table, skipping!\n");
+ return;
+ }
+
+ idx = nr_ioapics++;
+
+ mp_ioapics[idx].mpc_type = MP_IOAPIC;
+ mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].mpc_apicaddr = address;
+
+ mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+ mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
+ */
+ mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+ mp_ioapic_routing[idx].gsi_start = gsi_base;
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
+ io_apic_get_redir_entries(idx);
+
+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_start,
+ mp_ioapic_routing[idx].gsi_end);
+
+ return;
+}
+
+
+void __init mp_override_legacy_irq (
+ u8 bus_irq,
+ u8 polarity,
+ u8 trigger,
+ u32 gsi)
+{
+ struct mpc_config_intsrc intsrc;
+ int ioapic = -1;
+ int pin = -1;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return;
+ pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (trigger == 3))
+ trigger = 1;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_irqflag = (trigger << 2) | polarity;
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
+ intsrc.mpc_dstirq = pin; /* INTIN# */
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+ intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+
+ return;
+}
+
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+ struct mpc_config_intsrc intsrc;
+ int i = 0;
+ int ioapic = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+ */
+ mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+ Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+ /*
+ * Locate the IOAPIC that manages the ISA IRQs (0-15).
+ */
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ return;
+
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* Conforming */
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+ /*
+ * Use the default configuration for the IRQs 0-15. Unless
+ * overridden by (MADT) interrupt source override entries.
+ */
+ for (i = 0; i < 16; i++) {
+ int idx;
+
+ for (idx = 0; idx < mp_irq_entries; idx++) {
+ struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+ /* Do we already have a mapping for this ISA IRQ? */
+ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+ break;
+
+ /* Do we already have a mapping for this IOAPIC pin */
+ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+ (irq->mpc_dstirq == i))
+ break;
+ }
+
+ if (idx != mp_irq_entries) {
+ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+ continue; /* IRQ already used */
+ }
+
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_srcbusirq = i; /* Identity mapped */
+ intsrc.mpc_dstirq = i;
+
+ Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
+ intsrc.mpc_dstirq);
+
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+
+ return;
+}
+
+int mp_register_gsi(u32 gsi, int edge_level, int active_high_low)
+{
+ int ioapic = -1;
+ int ioapic_pin = 0;
+ int idx, bit = 0;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return gsi;
+
+#ifdef CONFIG_ACPI_BUS
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_fadt.sci_int == gsi)
+ return gsi;
+#endif
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+ return gsi;
+ }
+
+ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
+
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ bit = ioapic_pin % 32;
+ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+ if (idx > 3) {
+ printk(KERN_ERR "Invalid reference to IOAPIC pin "
+ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ ioapic_pin);
+ return gsi;
+ }
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+ return gsi;
+ }
+
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+ edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
+ return gsi;
+}
+
+#endif /*CONFIG_X86_IO_APIC*/
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
new file mode 100644
index 0000000000..4a66a273f6
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c
@@ -0,0 +1,203 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm-xen/balloon.h>
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA. This is the scatter-gather version of the
+ * above pci_map_single interface. Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length. They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction)
+{
+ int i;
+
+ BUG_ON(direction == DMA_NONE);
+ for (i = 0; i < nents; i++ ) {
+ struct scatterlist *s = &sg[i];
+ BUG_ON(!s->page);
+ s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ s->dma_length = s->length;
+ }
+ return nents;
+}
+
+EXPORT_SYMBOL(dma_map_sg);
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int dir)
+{
+ int i;
+ for (i = 0; i < nents; i++) {
+ struct scatterlist *s = &sg[i];
+ BUG_ON(s->page == NULL);
+ BUG_ON(s->dma_address == 0);
+ dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
+ }
+}
+
+struct dma_coherent_mem {
+ void *virt_base;
+ u32 device_base;
+ int size;
+ int flags;
+ unsigned long *bitmap;
+};
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, unsigned gfp)
+{
+ void *ret;
+ unsigned int order = get_order(size);
+ unsigned long vstart;
+
+ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+
+ /* ignore region specifiers */
+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+ if (mem) {
+ int page = bitmap_find_free_region(mem->bitmap, mem->size,
+ order);
+ if (page >= 0) {
+ *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+ ret = mem->virt_base + (page << PAGE_SHIFT);
+ memset(ret, 0, size);
+ return ret;
+ }
+ if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+ return NULL;
+ }
+
+ if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+ gfp |= GFP_DMA;
+
+ vstart = __get_free_pages(gfp, order);
+ ret = (void *)vstart;
+ if (ret == NULL)
+ return ret;
+
+ xen_contig_memory(vstart, order);
+
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+ int order = get_order(size);
+
+ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+ int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+
+ bitmap_release_region(mem->bitmap, page, order);
+ } else
+ free_pages((unsigned long)vaddr, order);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+#if 0
+int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+ dma_addr_t device_addr, size_t size, int flags)
+{
+ void __iomem *mem_base;
+ int pages = size >> PAGE_SHIFT;
+ int bitmap_size = (pages + 31)/32;
+
+ if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
+ goto out;
+ if (!size)
+ goto out;
+ if (dev->dma_mem)
+ goto out;
+
+ /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
+
+ mem_base = ioremap(bus_addr, size);
+ if (!mem_base)
+ goto out;
+
+ dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
+ if (!dev->dma_mem)
+ goto out;
+ memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
+ dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
+ if (!dev->dma_mem->bitmap)
+ goto free1_out;
+ memset(dev->dma_mem->bitmap, 0, bitmap_size);
+
+ dev->dma_mem->virt_base = mem_base;
+ dev->dma_mem->device_base = device_addr;
+ dev->dma_mem->size = pages;
+ dev->dma_mem->flags = flags;
+
+ if (flags & DMA_MEMORY_MAP)
+ return DMA_MEMORY_MAP;
+
+ return DMA_MEMORY_IO;
+
+ free1_out:
+ kfree(dev->dma_mem->bitmap);
+ out:
+ return 0;
+}
+EXPORT_SYMBOL(dma_declare_coherent_memory);
+
+void dma_release_declared_memory(struct device *dev)
+{
+ struct dma_coherent_mem *mem = dev->dma_mem;
+
+ if(!mem)
+ return;
+ dev->dma_mem = NULL;
+ iounmap(mem->virt_base);
+ kfree(mem->bitmap);
+ kfree(mem);
+}
+EXPORT_SYMBOL(dma_release_declared_memory);
+
+void *dma_mark_declared_memory_occupied(struct device *dev,
+ dma_addr_t device_addr, size_t size)
+{
+ struct dma_coherent_mem *mem = dev->dma_mem;
+ int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ int pos, err;
+
+ if (!mem)
+ return ERR_PTR(-EINVAL);
+
+ pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
+ err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
+ if (err != 0)
+ return ERR_PTR(err);
+ return mem->virt_base + (pos << PAGE_SHIFT);
+}
+EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+#endif
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
new file mode 100644
index 0000000000..b359ae80bb
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c
@@ -0,0 +1,96 @@
+/* Fallback functions when the main IOMMU code is not compiled in. This
+ code is roughly equivalent to i386. */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <asm/proto.h>
+#include <asm/processor.h>
+
+int iommu_merge = 0;
+EXPORT_SYMBOL(iommu_merge);
+
+dma_addr_t bad_dma_address;
+EXPORT_SYMBOL(bad_dma_address);
+
+int iommu_bio_merge = 0;
+EXPORT_SYMBOL(iommu_bio_merge);
+
+int iommu_sac_force = 0;
+EXPORT_SYMBOL(iommu_sac_force);
+
+#if 0
+/*
+ * Dummy IO MMU functions
+ */
+
+void *dma_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, unsigned gfp)
+{
+ void *ret;
+ u64 mask;
+ int order = get_order(size);
+
+ if (hwdev)
+ mask = hwdev->coherent_dma_mask & *hwdev->dma_mask;
+ else
+ mask = 0xffffffff;
+ for (;;) {
+ ret = (void *)__get_free_pages(gfp, order);
+ if (ret == NULL)
+ return NULL;
+ *dma_handle = virt_to_bus(ret);
+ if ((*dma_handle & ~mask) == 0)
+ break;
+ free_pages((unsigned long)ret, order);
+ if (gfp & GFP_DMA)
+ return NULL;
+ gfp |= GFP_DMA;
+ }
+
+ memset(ret, 0, size);
+ return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+EXPORT_SYMBOL(dma_free_coherent);
+#endif
+
+int dma_supported(struct device *hwdev, u64 mask)
+{
+ /*
+ * we fall back to GFP_DMA when the mask isn't all 1s,
+ * so we can't guarantee allocations that must be
+ * within a tighter range than GFP_DMA..
+ * RED-PEN this won't work for pci_map_single. Caller has to
+ * use GFP_DMA in the first place.
+ */
+ if (mask < 0x00ffffff)
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_get_cache_alignment(void)
+{
+ return boot_cpu_data.x86_clflush_size;
+}
+EXPORT_SYMBOL(dma_get_cache_alignment);
+
+static int __init check_ram(void)
+{
+ if (end_pfn >= 0xffffffff>>PAGE_SHIFT) {
+ printk(
+ KERN_ERR "WARNING more than 4GB of memory but IOMMU not compiled in.\n"
+ KERN_ERR "WARNING 32bit PCI may malfunction.\n");
+ }
+ return 0;
+}
+__initcall(check_ram);
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c
new file mode 100644
index 0000000000..99210e91cb
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c
@@ -0,0 +1,753 @@
+/*
+ * linux/arch/x86-64/kernel/process.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * X86-64 port
+ * Andi Kleen.
+ *
+ * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/module.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/ptrace.h>
+#include <linux/utsname.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/mmu_context.h>
+#include <asm/pda.h>
+#include <asm/prctl.h>
+#include <asm/kdebug.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/xen-public/physdev.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/hardirq.h>
+#include <asm/ia32.h>
+
+asmlinkage extern void ret_from_fork(void);
+
+unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
+
+atomic_t hlt_counter = ATOMIC_INIT(0);
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+static cpumask_t cpu_idle_map;
+
+void disable_hlt(void)
+{
+ atomic_inc(&hlt_counter);
+}
+
+EXPORT_SYMBOL(disable_hlt);
+
+void enable_hlt(void)
+{
+ atomic_dec(&hlt_counter);
+}
+
+EXPORT_SYMBOL(enable_hlt);
+
+/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
+extern void stop_hz_timer(void);
+extern void start_hz_timer(void);
+void xen_idle(void)
+{
+ local_irq_disable();
+
+ if (need_resched()) {
+ local_irq_enable();
+ } else {
+ stop_hz_timer();
+ HYPERVISOR_block(); /* implicit local_irq_enable() */
+ start_hz_timer();
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /* We shouldn't have to disable interrupts while dead, but
+ * some interrupts just don't seem to go away, and this makes
+ * it "work" for testing purposes. */
+ /* Death loop */
+ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+ HYPERVISOR_yield();
+
+ local_irq_disable();
+ __flush_tlb_all();
+ cpu_set(smp_processor_id(), cpu_online_map);
+ local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+ int cpu = smp_processor_id();
+
+ /* endless idle loop with no priority at all */
+ while (1) {
+ while (!need_resched()) {
+ if (cpu_isset(cpu, cpu_idle_map))
+ cpu_clear(cpu, cpu_idle_map);
+ rmb();
+
+ if (cpu_is_offline(cpu))
+ play_dead();
+
+ __IRQ_STAT(cpu,idle_timestamp) = jiffies;
+ xen_idle();
+ }
+ schedule();
+ }
+}
+
+void cpu_idle_wait(void)
+{
+ int cpu;
+ cpumask_t map;
+
+ for_each_online_cpu(cpu)
+ cpu_set(cpu, cpu_idle_map);
+
+ wmb();
+ do {
+ ssleep(1);
+ cpus_and(map, cpu_idle_map, cpu_online_map);
+ } while (!cpus_empty(map));
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
+/* Always use xen_idle() instead. */
+void __init select_idle_routine(const struct cpuinfo_x86 *c) {}
+
+/* Prints also some state that isn't saved in the pt_regs */
+void __show_regs(struct pt_regs * regs)
+{
+ unsigned long fs, gs, shadowgs;
+ unsigned int fsindex,gsindex;
+ unsigned int ds,cs,es;
+
+ printk("\n");
+ print_modules();
+ printk("Pid: %d, comm: %.20s %s %s\n",
+ current->pid, current->comm, print_tainted(), system_utsname.release);
+ printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
+ printk_address(regs->rip);
+ printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+ printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ regs->rax, regs->rbx, regs->rcx);
+ printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ regs->rdx, regs->rsi, regs->rdi);
+ printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+ regs->rbp, regs->r8, regs->r9);
+ printk("R10: %016lx R11: %016lx R12: %016lx\n",
+ regs->r10, regs->r11, regs->r12);
+ printk("R13: %016lx R14: %016lx R15: %016lx\n",
+ regs->r13, regs->r14, regs->r15);
+
+ asm("movl %%ds,%0" : "=r" (ds));
+ asm("movl %%cs,%0" : "=r" (cs));
+ asm("movl %%es,%0" : "=r" (es));
+ asm("movl %%fs,%0" : "=r" (fsindex));
+ asm("movl %%gs,%0" : "=r" (gsindex));
+
+ rdmsrl(MSR_FS_BASE, fs);
+ rdmsrl(MSR_GS_BASE, gs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+
+ printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs,fsindex,gs,gsindex,shadowgs);
+ printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
+
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ __show_regs(regs);
+ show_trace(&regs->rsp);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ struct task_struct *me = current;
+ struct thread_struct *t = &me->thread;
+ if (me->thread.io_bitmap_ptr) {
+ struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+
+ kfree(t->io_bitmap_ptr);
+ t->io_bitmap_ptr = NULL;
+ /*
+ * Careful, clear this in the TSS too:
+ */
+ memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+ t->io_bitmap_max = 0;
+ put_cpu();
+ }
+}
+
+void load_gs_index(unsigned gs)
+{
+ HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs);
+}
+
+void flush_thread(void)
+{
+ struct task_struct *tsk = current;
+ struct thread_info *t = current_thread_info();
+
+ if (t->flags & _TIF_ABI_PENDING)
+ t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
+
+ tsk->thread.debugreg0 = 0;
+ tsk->thread.debugreg1 = 0;
+ tsk->thread.debugreg2 = 0;
+ tsk->thread.debugreg3 = 0;
+ tsk->thread.debugreg6 = 0;
+ tsk->thread.debugreg7 = 0;
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ /*
+ * Forget coprocessor state..
+ */
+ clear_fpu(tsk);
+ clear_used_math();
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+ if (dead_task->mm) {
+ if (dead_task->mm->context.size) {
+ printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+ dead_task->comm,
+ dead_task->mm->context.ldt,
+ dead_task->mm->context.size);
+ BUG();
+ }
+ }
+}
+
+static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
+{
+ struct user_desc ud = {
+ .base_addr = addr,
+ .limit = 0xfffff,
+ .contents = (3 << 3), /* user */
+ .seg_32bit = 1,
+ .limit_in_pages = 1,
+ .useable = 1,
+ };
+ struct n_desc_struct *desc = (void *)t->thread.tls_array;
+ desc += tls;
+ desc->a = LDT_entry_a(&ud);
+ desc->b = LDT_entry_b(&ud);
+}
+
+static inline u32 read_32bit_tls(struct task_struct *t, int tls)
+{
+ struct desc_struct *desc = (void *)t->thread.tls_array;
+ desc += tls;
+ return desc->base0 |
+ (((u32)desc->base1) << 16) |
+ (((u32)desc->base2) << 24);
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+ unlazy_fpu(tsk);
+}
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
+ unsigned long unused,
+ struct task_struct * p, struct pt_regs * regs)
+{
+ int err;
+ struct pt_regs * childregs;
+ struct task_struct *me = current;
+
+ childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+
+ *childregs = *regs;
+
+ childregs->rax = 0;
+ childregs->rsp = rsp;
+ if (rsp == ~0UL) {
+ childregs->rsp = (unsigned long)childregs;
+ }
+
+ p->thread.rsp = (unsigned long) childregs;
+ p->thread.rsp0 = (unsigned long) (childregs+1);
+ p->thread.userrsp = me->thread.userrsp;
+
+ set_ti_thread_flag(p->thread_info, TIF_FORK);
+
+ p->thread.fs = me->thread.fs;
+ p->thread.gs = me->thread.gs;
+
+ asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
+ asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
+ asm("movl %%es,%0" : "=m" (p->thread.es));
+ asm("movl %%ds,%0" : "=m" (p->thread.ds));
+
+ if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
+ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!p->thread.io_bitmap_ptr) {
+ p->thread.io_bitmap_max = 0;
+ return -ENOMEM;
+ }
+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+ }
+
+ /*
+ * Set a new TLS for the child thread?
+ */
+ if (clone_flags & CLONE_SETTLS) {
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ err = ia32_child_tls(p, childregs);
+ else
+#endif
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+ if (err)
+ goto out;
+ }
+ p->thread.io_pl = current->thread.io_pl;
+
+ err = 0;
+out:
+ if (err && p->thread.io_bitmap_ptr) {
+ kfree(p->thread.io_bitmap_ptr);
+ p->thread.io_bitmap_max = 0;
+ }
+ return err;
+}
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ HYPERVISOR_set_debugreg((register), \
+ (thread->debugreg ## register))
+
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+ asm volatile( "rex64 ; fxsave %0 ; fnclex"
+ : "=m" (tsk->thread.i387.fxsave));
+ tsk->thread_info->status &= ~TS_USEDFPU;
+}
+
+/*
+ * switch_to(x,y) should switch tasks from x to y.
+ *
+ * This could still be optimized:
+ * - fold all the options into a flag word and test it with a single test.
+ * - could test fs/gs bitsliced
+ */
+struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ struct thread_struct *prev = &prev_p->thread,
+ *next = &next_p->thread;
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ physdev_op_t iopl_op, iobmp_op;
+ multicall_entry_t _mcl[8], *mcl = _mcl;
+
+ /*
+ * This is basically '__unlazy_fpu', except that we queue a
+ * multicall to indicate FPU task switch, rather than
+ * synchronously trapping to Xen.
+ */
+ if (prev_p->thread_info->status & TS_USEDFPU) {
+ __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
+ }
+
+ /*
+ * Reload esp0, LDT and the page table pointer:
+ */
+ tss->rsp0 = next->rsp0;
+ mcl->op = __HYPERVISOR_stack_switch;
+ mcl->args[0] = __KERNEL_DS;
+ mcl->args[1] = tss->rsp0;
+ mcl++;
+
+ /*
+ * Load the per-thread Thread-Local Storage descriptor.
+ * This is load_TLS(next, cpu) with multicalls.
+ */
+#define C(i) do { \
+ if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \
+ mcl->op = __HYPERVISOR_update_descriptor; \
+ mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
+ [GDT_ENTRY_TLS_MIN + i]); \
+ mcl->args[1] = next->tls_array[i]; \
+ mcl++; \
+ } \
+} while (0)
+ C(0); C(1); C(2);
+#undef C
+
+ if (unlikely(prev->io_pl != next->io_pl)) {
+ iopl_op.cmd = PHYSDEVOP_SET_IOPL;
+ iopl_op.u.set_iopl.iopl = next->io_pl;
+ mcl->op = __HYPERVISOR_physdev_op;
+ mcl->args[0] = (unsigned long)&iopl_op;
+ mcl++;
+ }
+
+ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
+ iobmp_op.cmd =
+ PHYSDEVOP_SET_IOBITMAP;
+ iobmp_op.u.set_iobitmap.bitmap =
+ (unsigned long)next->io_bitmap_ptr;
+ iobmp_op.u.set_iobitmap.nr_ports =
+ next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+ mcl->op = __HYPERVISOR_physdev_op;
+ mcl->args[0] = (unsigned long)&iobmp_op;
+ mcl++;
+ }
+
+ (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
+ /*
+ * Switch DS and ES.
+ * This won't pick up thread selector changes, but I guess that is ok.
+ */
+ asm volatile("movl %%es,%0" : "=m" (prev->es));
+ if (unlikely(next->es | prev->es))
+ loadsegment(es, next->es);
+
+ asm volatile ("movl %%ds,%0" : "=m" (prev->ds));
+ if (unlikely(next->ds | prev->ds))
+ loadsegment(ds, next->ds);
+
+ /*
+ * Switch FS and GS.
+ */
+ {
+ unsigned fsindex;
+ asm volatile("movl %%fs,%0" : "=g" (fsindex));
+ /* segment register != 0 always requires a reload.
+ also reload when it has changed.
+ when prev process used 64bit base always reload
+ to avoid an information leak. */
+ if (unlikely(fsindex | next->fsindex | prev->fs)) {
+ loadsegment(fs, next->fsindex);
+ /* check if the user used a selector != 0
+ * if yes clear 64bit base, since overloaded base
+ * is always mapped to the Null selector
+ */
+ if (fsindex)
+ prev->fs = 0;
+ }
+ /* when next process has a 64bit base use it */
+ if (next->fs) {
+ HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs);
+ }
+ prev->fsindex = fsindex;
+ }
+ {
+ unsigned gsindex;
+ asm volatile("movl %%gs,%0" : "=g" (gsindex));
+ if (unlikely(gsindex | next->gsindex | prev->gs)) {
+ load_gs_index(next->gsindex);
+ if (gsindex)
+ prev->gs = 0;
+ }
+ if (next->gs)
+ HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs);
+ prev->gsindex = gsindex;
+ }
+
+ /*
+ * Switch the PDA context.
+ */
+ prev->userrsp = read_pda(oldrsp);
+ write_pda(oldrsp, next->userrsp);
+ write_pda(pcurrent, next_p);
+ write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+
+ /*
+ * Now maybe reload the debug registers
+ */
+ if (unlikely(next->debugreg7)) {
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+ return prev_p;
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage
+long sys_execve(char __user *name, char __user * __user *argv,
+ char __user * __user *envp, struct pt_regs regs)
+{
+ long error;
+ char * filename;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+ error = do_execve(filename, argv, envp, &regs);
+ if (error == 0) {
+ task_lock(current);
+ current->ptrace &= ~PT_DTRACE;
+ task_unlock(current);
+ }
+ putname(filename);
+ return error;
+}
+
+void set_personality_64bit(void)
+{
+ /* inherit personality from parent */
+
+ /* Make sure to be in 64bit mode */
+ clear_thread_flag(TIF_IA32);
+
+ /* TBD: overwrites user setup. Should have two bits.
+ But 64bit processes have always behaved this way,
+ so it's not too bad. The main problem is just that
+ 32bit childs are affected again. */
+ current->personality &= ~READ_IMPLIES_EXEC;
+}
+
+asmlinkage long sys_fork(struct pt_regs *regs)
+{
+ return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
+}
+
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+{
+ if (!newsp)
+ newsp = regs->rsp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage long sys_vfork(struct pt_regs *regs)
+{
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
+ NULL, NULL);
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long stack;
+ u64 fp,rip;
+ int count = 0;
+
+ if (!p || p == current || p->state==TASK_RUNNING)
+ return 0;
+ stack = (unsigned long)p->thread_info;
+ if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
+ return 0;
+ fp = *(u64 *)(p->thread.rsp);
+ do {
+ if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+ return 0;
+ rip = *(u64 *)(fp+8);
+ if (!in_sched_functions(rip))
+ return rip;
+ fp = *(u64 *)fp;
+ } while (count++ < 16);
+ return 0;
+}
+
+long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+{
+ int ret = 0;
+ int doit = task == current;
+ int cpu;
+
+ switch (code) {
+ case ARCH_SET_GS:
+ if (addr >= TASK_SIZE)
+ return -EPERM;
+ cpu = get_cpu();
+ /* handle small bases via the GDT because that's faster to
+ switch. */
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, GS_TLS, addr);
+ if (doit) {
+ load_TLS(&task->thread, cpu);
+ load_gs_index(GS_TLS_SEL);
+ }
+ task->thread.gsindex = GS_TLS_SEL;
+ task->thread.gs = 0;
+ } else {
+ task->thread.gsindex = 0;
+ task->thread.gs = addr;
+ if (doit) {
+ load_gs_index(0);
+ ret = HYPERVISOR_set_segment_base(SEGBASE_GS_USER, addr);
+ printk("do_arch_prctl: SET_SET: addr = %lx\n", addr);
+ }
+ }
+ put_cpu();
+ break;
+ case ARCH_SET_FS:
+ /* Not strictly needed for fs, but do it for symmetry
+ with gs */
+ if (addr >= TASK_SIZE)
+ return -EPERM;
+ cpu = get_cpu();
+ /* handle small bases via the GDT because that's faster to
+ switch. */
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, FS_TLS, addr);
+ if (doit) {
+ load_TLS(&task->thread, cpu);
+ asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
+ }
+ task->thread.fsindex = FS_TLS_SEL;
+ task->thread.fs = 0;
+ } else {
+ task->thread.fsindex = 0;
+ task->thread.fs = addr;
+ if (doit) {
+ /* set the selector to 0 to not confuse
+ __switch_to */
+ asm volatile("movl %0,%%fs" :: "r" (0));
+ ret = HYPERVISOR_set_segment_base(SEGBASE_FS, addr);
+
+ }
+ }
+ put_cpu();
+ break;
+ case ARCH_GET_FS: {
+ unsigned long base;
+ if (task->thread.fsindex == FS_TLS_SEL)
+ base = read_32bit_tls(task, FS_TLS);
+ else if (doit) {
+ rdmsrl(MSR_FS_BASE, base);
+ printk("do_arch_prctl: GET_FS: addr = %lx\n", addr);
+ } else
+ base = task->thread.fs;
+ ret = put_user(base, (unsigned long __user *)addr);
+ break;
+ }
+ case ARCH_GET_GS: {
+ unsigned long base;
+ if (task->thread.gsindex == GS_TLS_SEL)
+ base = read_32bit_tls(task, GS_TLS);
+ else if (doit) {
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ printk("do_arch_prctl: GET_GS: addr = %lx\n", addr);
+ } else
+ base = task->thread.gs;
+ ret = put_user(base, (unsigned long __user *)addr);
+ break;
+ }
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+long sys_arch_prctl(int code, unsigned long addr)
+{
+ return do_arch_prctl(current, code, addr);
+}
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+ struct pt_regs *pp, ptregs;
+
+ pp = (struct pt_regs *)(tsk->thread.rsp0);
+ --pp;
+
+ ptregs = *pp;
+ ptregs.cs &= 0xffff;
+ ptregs.ss &= 0xffff;
+
+ elf_core_copy_regs(regs, &ptregs);
+
+ boot_option_idle_override = 1;
+ return 1;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
new file mode 100644
index 0000000000..8951d61c2a
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -0,0 +1,1378 @@
+/*
+ * linux/arch/x86-64/kernel/setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Nov 2001 Dave Jones <davej@suse.de>
+ * Forked from i386 setup code.
+ *
+ * $Id$
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/kallsyms.h>
+#include <linux/edd.h>
+#include <linux/percpu.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/desc.h>
+#include <video/edid.h>
+#include <asm/e820.h>
+#include <asm/dma.h>
+#include <asm/mpspec.h>
+#include <asm/mmu_context.h>
+#include <asm/bootsetup.h>
+#include <asm/proto.h>
+#include <asm/setup.h>
+#include <asm/mach_apic.h>
+#include <asm/numa.h>
+#include <asm-xen/xen-public/physdev.h>
+#include "setup_arch_pre.h"
+#include <asm-xen/hypervisor.h>
+
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
+
+#include <asm/mach-xen/setup_arch_post.h>
+
+extern unsigned long start_pfn;
+
+#if 0
+struct edid_info {
+ unsigned char dummy[128];
+};
+#endif
+
+extern struct edid_info edid_info;
+
+/* Allows setting of maximum possible memory size */
+unsigned long xen_override_max_pfn;
+/*
+ * Machine setup..
+ */
+
+struct cpuinfo_x86 boot_cpu_data;
+
+unsigned long mmu_cr4_features;
+EXPORT_SYMBOL_GPL(mmu_cr4_features);
+
+int acpi_disabled;
+EXPORT_SYMBOL(acpi_disabled);
+#ifdef CONFIG_ACPI_BOOT
+extern int __initdata acpi_ht;
+extern acpi_interrupt_flags acpi_sci_flags;
+int __initdata acpi_force = 0;
+#endif
+
+int acpi_numa __initdata;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/* Boot loader ID as an integer, for the benefit of proc_dointvec */
+int bootloader_type;
+
+unsigned long saved_video_mode;
+
+#ifdef CONFIG_SWIOTLB
+int swiotlb;
+EXPORT_SYMBOL(swiotlb);
+#endif
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct sys_desc_table_struct {
+ unsigned short length;
+ unsigned char table[0];
+};
+
+struct edid_info edid_info;
+struct e820map e820;
+
+unsigned char aux_device_present;
+
+extern int root_mountflags;
+extern char _text, _etext, _edata, _end;
+
+char command_line[COMMAND_LINE_SIZE];
+
+struct resource standard_io_resources[] = {
+ { .name = "dma1", .start = 0x00, .end = 0x1f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "pic1", .start = 0x20, .end = 0x21,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "timer0", .start = 0x40, .end = 0x43,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "timer1", .start = 0x50, .end = 0x53,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "keyboard", .start = 0x60, .end = 0x6f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "dma page reg", .start = 0x80, .end = 0x8f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "pic2", .start = 0xa0, .end = 0xa1,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "dma2", .start = 0xc0, .end = 0xdf,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "fpu", .start = 0xf0, .end = 0xff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO }
+};
+
+#define STANDARD_IO_RESOURCES \
+ (sizeof standard_io_resources / sizeof standard_io_resources[0])
+
+#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
+
+struct resource data_resource = {
+ .name = "Kernel data",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_RAM,
+};
+struct resource code_resource = {
+ .name = "Kernel code",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_RAM,
+};
+
+#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static struct resource system_rom_resource = {
+ .name = "System ROM",
+ .start = 0xf0000,
+ .end = 0xfffff,
+ .flags = IORESOURCE_ROM,
+};
+
+static struct resource extension_rom_resource = {
+ .name = "Extension ROM",
+ .start = 0xe0000,
+ .end = 0xeffff,
+ .flags = IORESOURCE_ROM,
+};
+
+static struct resource adapter_rom_resources[] = {
+ { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
+ .flags = IORESOURCE_ROM },
+ { .name = "Adapter ROM", .start = 0, .end = 0,
+ .flags = IORESOURCE_ROM },
+ { .name = "Adapter ROM", .start = 0, .end = 0,
+ .flags = IORESOURCE_ROM },
+ { .name = "Adapter ROM", .start = 0, .end = 0,
+ .flags = IORESOURCE_ROM },
+ { .name = "Adapter ROM", .start = 0, .end = 0,
+ .flags = IORESOURCE_ROM },
+ { .name = "Adapter ROM", .start = 0, .end = 0,
+ .flags = IORESOURCE_ROM }
+};
+#endif
+
+#define ADAPTER_ROM_RESOURCES \
+ (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+
+static struct resource video_rom_resource = {
+ .name = "Video ROM",
+ .start = 0xc0000,
+ .end = 0xc7fff,
+ .flags = IORESOURCE_ROM,
+};
+
+static struct resource video_ram_resource = {
+ .name = "Video RAM area",
+ .start = 0xa0000,
+ .end = 0xbffff,
+ .flags = IORESOURCE_RAM,
+};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static int __init romchecksum(unsigned char *rom, unsigned long length)
+{
+ unsigned char *p, sum = 0;
+
+ for (p = rom; p < rom + length; p++)
+ sum += *p;
+ return sum == 0;
+}
+
+static void __init probe_roms(void)
+{
+ unsigned long start, length, upper;
+ unsigned char *rom;
+ int i;
+
+ /* video rom */
+ upper = adapter_rom_resources[0].start;
+ for (start = video_rom_resource.start; start < upper; start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ video_rom_resource.start = start;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = rom[2] * 512;
+
+ /* if checksum okay, trust length byte */
+ if (length && romchecksum(rom, length))
+ video_rom_resource.end = start + length - 1;
+
+ request_resource(&iomem_resource, &video_rom_resource);
+ break;
+ }
+
+ start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+ if (start < upper)
+ start = upper;
+
+ /* system rom */
+ request_resource(&iomem_resource, &system_rom_resource);
+ upper = system_rom_resource.start;
+
+ /* check for extension rom (ignore length byte!) */
+ rom = isa_bus_to_virt(extension_rom_resource.start);
+ if (romsignature(rom)) {
+ length = extension_rom_resource.end - extension_rom_resource.start + 1;
+ if (romchecksum(rom, length)) {
+ request_resource(&iomem_resource, &extension_rom_resource);
+ upper = extension_rom_resource.start;
+ }
+ }
+
+ /* check for adapter roms on 2k boundaries */
+ for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = rom[2] * 512;
+
+ /* but accept any length that fits if checksum okay */
+ if (!length || start + length > upper || !romchecksum(rom, length))
+ continue;
+
+ adapter_rom_resources[i].start = start;
+ adapter_rom_resources[i].end = start + length - 1;
+ request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+ start = adapter_rom_resources[i++].end & ~2047UL;
+ }
+}
+#endif
+
+/*
+ * Point at the empty zero page to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
+DEFINE_PER_CPU(int, nr_multicall_ents);
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+
+static __init void parse_cmdline_early (char ** cmdline_p)
+{
+ char c = ' ', *to = command_line, *from = COMMAND_LINE;
+ int len = 0, max_cmdline;
+
+ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+ max_cmdline = COMMAND_LINE_SIZE;
+ memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
+ /* Save unparsed command line copy for /proc/cmdline */
+ saved_command_line[max_cmdline-1] = '\0';
+
+ for (;;) {
+ if (c != ' ')
+ goto next_char;
+
+#ifdef CONFIG_SMP
+ /*
+ * If the BIOS enumerates physical processors before logical,
+ * maxcpus=N at enumeration-time can be used to disable HT.
+ */
+ else if (!memcmp(from, "maxcpus=", 8)) {
+ extern unsigned int maxcpus;
+
+ maxcpus = simple_strtoul(from + 8, NULL, 0);
+ }
+#endif
+#ifdef CONFIG_ACPI_BOOT
+ /* "acpi=off" disables both ACPI table parsing and interpreter init */
+ if (!memcmp(from, "acpi=off", 8))
+ disable_acpi();
+
+ if (!memcmp(from, "acpi=force", 10)) {
+ /* add later when we do DMI horrors: */
+ acpi_force = 1;
+ acpi_disabled = 0;
+ }
+
+ /* acpi=ht just means: do ACPI MADT parsing
+ at bootup, but don't enable the full ACPI interpreter */
+ if (!memcmp(from, "acpi=ht", 7)) {
+ if (!acpi_force)
+ disable_acpi();
+ acpi_ht = 1;
+ }
+ else if (!memcmp(from, "pci=noacpi", 10))
+ acpi_disable_pci();
+ else if (!memcmp(from, "acpi=noirq", 10))
+ acpi_noirq_set();
+
+ else if (!memcmp(from, "acpi_sci=edge", 13))
+ acpi_sci_flags.trigger = 1;
+ else if (!memcmp(from, "acpi_sci=level", 14))
+ acpi_sci_flags.trigger = 3;
+ else if (!memcmp(from, "acpi_sci=high", 13))
+ acpi_sci_flags.polarity = 1;
+ else if (!memcmp(from, "acpi_sci=low", 12))
+ acpi_sci_flags.polarity = 3;
+
+ /* acpi=strict disables out-of-spec workarounds */
+ else if (!memcmp(from, "acpi=strict", 11)) {
+ acpi_strict = 1;
+ }
+#endif
+
+#if 0
+ if (!memcmp(from, "nolapic", 7) ||
+ !memcmp(from, "disableapic", 11))
+ disable_apic = 1;
+
+ if (!memcmp(from, "noapic", 6))
+ skip_ioapic_setup = 1;
+
+ if (!memcmp(from, "apic", 4)) {
+ skip_ioapic_setup = 0;
+ ioapic_force = 1;
+ }
+#endif
+
+ if (!memcmp(from, "mem=", 4))
+ parse_memopt(from+4, &from);
+
+#ifdef CONFIG_DISCONTIGMEM
+ if (!memcmp(from, "numa=", 5))
+ numa_setup(from+5);
+#endif
+
+#ifdef CONFIG_GART_IOMMU
+ if (!memcmp(from,"iommu=",6)) {
+ iommu_setup(from+6);
+ }
+#endif
+
+ if (!memcmp(from,"oops=panic", 10))
+ panic_on_oops = 1;
+
+ if (!memcmp(from, "noexec=", 7))
+ nonx_setup(from + 7);
+
+ next_char:
+ c = *(from++);
+ if (!c)
+ break;
+ if (COMMAND_LINE_SIZE <= ++len)
+ break;
+ *(to++) = c;
+ }
+ *to = '\0';
+ *cmdline_p = command_line;
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+static void __init contig_initmem_init(void)
+{
+ unsigned long bootmap_size, bootmap;
+
+ /*
+ * partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+
+ bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
+ bootmap = start_pfn;
+ bootmap_size = init_bootmem(bootmap, end_pfn);
+ reserve_bootmem(bootmap, bootmap_size);
+
+ free_bootmem(start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << PAGE_SHIFT);
+ printk("Registering memory for bootmem: from %lx, size = %lx\n",
+ start_pfn << PAGE_SHIFT, (end_pfn - start_pfn) << PAGE_SHIFT);
+ /*
+ * This should cover kernel_end
+ */
+#if 0
+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+#endif
+ reserve_bootmem(0, (PFN_PHYS(start_pfn) +
+ bootmap_size + PAGE_SIZE-1));
+
+}
+#endif
+
+/* Use inline assembly to define this because the nops are defined
+ as inline assembly strings in the include files and we cannot
+ get them easily into strings. */
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+
+extern unsigned char k8nops[];
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+ NULL,
+ k8nops,
+ k8nops + 1,
+ k8nops + 1 + 2,
+ k8nops + 1 + 2 + 3,
+ k8nops + 1 + 2 + 3 + 4,
+ k8nops + 1 + 2 + 3 + 4 + 5,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+
+/* Replace instructions with better alternatives for this CPU type.
+
+ This runs before SMP is initialized to avoid SMP problems with
+ self modifying code. This implies that assymetric systems where
+ APs have less capabilities than the boot processor are not handled.
+ In this case boot with "noreplacement". */
+void apply_alternatives(void *start, void *end)
+{
+ struct alt_instr *a;
+ int diff, i, k;
+ for (a = start; (void *)a < end; a++) {
+ if (!boot_cpu_has(a->cpuid))
+ continue;
+
+ BUG_ON(a->replacementlen > a->instrlen);
+ __inline_memcpy(a->instr, a->replacement, a->replacementlen);
+ diff = a->instrlen - a->replacementlen;
+
+ /* Pad the rest with nops */
+ for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+ k = diff;
+ if (k > ASM_NOP_MAX)
+ k = ASM_NOP_MAX;
+ __inline_memcpy(a->instr + i, k8_nops[k], k);
+ }
+ }
+}
+
+static int no_replacement __initdata = 0;
+
+void __init alternative_instructions(void)
+{
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ if (no_replacement)
+ return;
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 0;
+}
+
+__setup("noreplacement", noreplacement_setup);
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+struct edd edd;
+#ifdef CONFIG_EDD_MODULE
+EXPORT_SYMBOL(edd);
+#endif
+/**
+ * copy_edd() - Copy the BIOS EDD information
+ * from boot_params into a safe place.
+ *
+ */
+static inline void copy_edd(void)
+{
+ memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
+ memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
+ edd.mbr_signature_nr = EDD_MBR_SIG_NR;
+ edd.edd_info_nr = EDD_NR;
+}
+#else
+static inline void copy_edd(void)
+{
+}
+#endif
+
+#if 0
+#define EBDA_ADDR_POINTER 0x40E
+static void __init reserve_ebda_region(void)
+{
+ unsigned int addr;
+ /**
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E
+ */
+ addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
+ addr <<= 4;
+ if (addr)
+ reserve_bootmem_generic(addr, PAGE_SIZE);
+}
+#endif
+
+/*
+ * Guest physical starts from 0.
+ */
+
+unsigned long __init xen_end_of_ram(void)
+{
+ unsigned long max_end_pfn = xen_start_info.nr_pages;
+
+ if ( xen_override_max_pfn < max_end_pfn)
+ xen_override_max_pfn = max_end_pfn;
+
+ return xen_override_max_pfn;
+}
+
+static void __init print_memory_map(char *who)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ early_printk(" %s: %016Lx - %016Lx ", who,
+ e820.map[i].addr,
+ e820.map[i].addr + e820.map[i].size);
+ switch (e820.map[i].type) {
+ case E820_RAM: early_printk("(usable)\n");
+ break;
+ case E820_RESERVED:
+ early_printk("(reserved)\n");
+ break;
+ case E820_ACPI:
+ early_printk("(ACPI data)\n");
+ break;
+ case E820_NVS:
+ early_printk("(ACPI NVS)\n");
+ break;
+ default: early_printk("type %u\n", e820.map[i].type);
+ break;
+ }
+ }
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+ unsigned long low_mem_size;
+ int i, j;
+ physdev_op_t op;
+
+#if 0
+ ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+#else
+ ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
+#endif
+ drive_info = DRIVE_INFO;
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ screen_info = SCREEN_INFO;
+#endif
+ edid_info = EDID_INFO;
+ aux_device_present = AUX_DEVICE_INFO;
+ saved_video_mode = SAVED_VIDEO_MODE;
+ bootloader_type = LOADER_TYPE;
+
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+/* register_console(&xen_console); */
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ /* This is drawn from a dump from vgacon:startup in standard Linux. */
+ screen_info.orig_video_mode = 3;
+ screen_info.orig_video_isVGA = 1;
+ screen_info.orig_video_lines = 25;
+ screen_info.orig_video_cols = 80;
+ screen_info.orig_video_ega_bx = 3;
+ screen_info.orig_video_points = 16;
+#endif
+ ARCH_SETUP
+ print_memory_map(machine_specific_memory_setup());
+
+ /* copy_edd(); */
+
+ if (!MOUNT_ROOT_RDONLY)
+ root_mountflags &= ~MS_RDONLY;
+ init_mm.start_code = (unsigned long) &_text;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+/* init_mm.brk = (unsigned long) &_end; */
+ init_mm.brk = start_pfn << PAGE_SHIFT;
+
+
+#if 0 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
+ code_resource.start = virt_to_phys(&_text);
+ code_resource.end = virt_to_phys(&_etext)-1;
+ data_resource.start = virt_to_phys(&_etext);
+ data_resource.end = virt_to_phys(&_edata)-1;
+#endif
+ parse_cmdline_early(cmdline_p);
+
+ early_identify_cpu(&boot_cpu_data);
+
+ /*
+ * partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+#if 0
+ end_pfn = e820_end_of_ram();
+#else
+ end_pfn = xen_end_of_ram();
+#endif
+
+ check_efer();
+
+ init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
+
+#ifdef CONFIG_ACPI_NUMA
+ /*
+ * Parse SRAT to discover nodes.
+ */
+ acpi_numa_init();
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+ numa_initmem_init(0, end_pfn);
+#else
+ contig_initmem_init();
+#endif
+
+ /* Reserve direct mapping and shared info etc. */
+// reserve_bootmem_generic(table_start << PAGE_SHIFT, (table_end + 1 - table_start) << PAGE_SHIFT);
+
+// reserve_bootmem_generic(0, (table_end + 1) << PAGE_SHIFT);
+
+ /* reserve kernel */
+// kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
+
+#if 0
+ /*
+ * reserve physical page 0 - it's a special BIOS page on many boxes,
+ * enabling clean reboots, SMP operation, laptop functions.
+ */
+ reserve_bootmem_generic(0, PAGE_SIZE);
+#endif
+
+ /* reserve ebda region */
+/* reserve_ebda_region(); */
+
+#ifdef CONFIG_SMP
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
+
+ /* Reserve SMP trampoline */
+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
+#endif
+
+#ifdef CONFIG_ACPI_SLEEP
+ /*
+ * Reserve low memory region for sleep support.
+ */
+ acpi_reserve_bootmem();
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (xen_start_info.mod_start) {
+ if (LOADER_TYPE && INITRD_START) {
+ if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
+ /* reserve_bootmem_generic(INITRD_START, INITRD_SIZE); */
+ initrd_start = INITRD_START + PAGE_OFFSET;
+ initrd_end = initrd_start+INITRD_SIZE;
+ initrd_below_start_ok = 1;
+ }
+ else {
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ (unsigned long)(INITRD_START + INITRD_SIZE),
+ (unsigned long)(end_pfn << PAGE_SHIFT));
+ initrd_start = 0;
+ }
+ }
+ }
+#endif
+ paging_init();
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+#endif
+ /* Make sure we have a large enough P->M table. */
+ if (end_pfn > xen_start_info.nr_pages) {
+ phys_to_machine_mapping = alloc_bootmem(
+ max_pfn * sizeof(unsigned long));
+ memset(phys_to_machine_mapping, ~0,
+ max_pfn * sizeof(unsigned long));
+ memcpy(phys_to_machine_mapping,
+ (unsigned long *)xen_start_info.mfn_list,
+ xen_start_info.nr_pages * sizeof(unsigned long));
+ free_bootmem(
+ __pa(xen_start_info.mfn_list),
+ PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+ sizeof(unsigned long))));
+ }
+
+ pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE);
+
+ for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+ {
+ pfn_to_mfn_frame_list[j] =
+ virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ }
+
+#if 0
+ check_ioapic();
+#endif
+
+#ifdef CONFIG_ACPI_BOOT
+ /*
+ * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
+ * Call this early for SRAT node setup.
+ */
+ acpi_boot_table_init();
+
+ /*
+ * Read APIC and some other early information from ACPI tables.
+ */
+ acpi_boot_init();
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * get boot-time SMP configuration:
+ */
+ if (smp_found_config)
+ get_smp_config();
+#ifndef CONFIG_XEN
+ init_apic_mappings();
+#endif
+#endif
+
+ /* XXX Disable irqdebug until we have a way to avoid interrupt
+ * conflicts. */
+/* noirqdebug_setup(""); */
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ /*
+ * Request address space for all standard RAM and ROM resources
+ * and also for regions reported as reserved by the e820.
+ */
+ probe_roms();
+#endif
+/* e820_reserve_resources(); */
+
+ request_resource(&iomem_resource, &video_ram_resource);
+
+ {
+ unsigned i;
+ /* request I/O space for devices used on all i[345]86 PCs */
+ for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+ }
+
+ /* Will likely break when you have unassigned resources with more
+ than 4GB memory and bridges that don't support more than 4GB.
+ Doing it properly would require to use pci_alloc_consistent
+ in this case. */
+ low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+ if (low_mem_size > pci_mem_start)
+ pci_mem_start = low_mem_size;
+
+#ifdef CONFIG_GART_IOMMU
+ iommu_hole_init();
+#endif
+
+ op.cmd = PHYSDEVOP_SET_IOPL;
+ op.u.set_iopl.iopl = current->thread.io_pl = 1;
+ HYPERVISOR_physdev_op(&op);
+
+ if (xen_start_info.flags & SIF_INITDOMAIN) {
+ if (!(xen_start_info.flags & SIF_PRIVILEGED))
+ panic("Xen granted us console access "
+ "but not privileged status");
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+ conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+#endif
+#endif
+ } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ extern const struct consw xennull_con;
+ extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+ /* disable VGA driver */
+ ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
+#endif
+ conswitchp = &xennull_con;
+ console_use_vt = 0;
+#endif
+ }
+}
+
+static int __init get_model_name(struct cpuinfo_x86 *c)
+{
+ unsigned int *v;
+
+ if (c->x86_cpuid_level < 0x80000004)
+ return 0;
+
+ v = (unsigned int *) c->x86_model_id;
+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+ c->x86_model_id[48] = 0;
+ return 1;
+}
+
+
+static void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+ unsigned int n, dummy, eax, ebx, ecx, edx;
+
+ n = c->x86_cpuid_level;
+
+ if (n >= 0x80000005) {
+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size=(ecx>>24)+(edx>>24);
+ /* On K8 L1 TLB is inclusive, so don't count it */
+ c->x86_tlbsize = 0;
+ }
+
+ if (n >= 0x80000006) {
+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
+ ecx = cpuid_ecx(0x80000006);
+ c->x86_cache_size = ecx >> 16;
+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+
+ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+ c->x86_cache_size, ecx & 0xFF);
+ }
+
+ if (n >= 0x80000007)
+ cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
+ if (n >= 0x80000008) {
+ cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+ }
+}
+
+
+static int __init init_amd(struct cpuinfo_x86 *c)
+{
+ int r;
+ int level;
+#ifdef CONFIG_NUMA
+ int cpu;
+#endif
+
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, &c->x86_capability);
+
+ /* C-stepping K8? */
+ level = cpuid_eax(1);
+ if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+ set_bit(X86_FEATURE_K8_C, &c->x86_capability);
+
+ r = get_model_name(c);
+ if (!r) {
+ switch (c->x86) {
+ case 15:
+ /* Should distinguish Models here, but this is only
+ a fallback anyways. */
+ strcpy(c->x86_model_id, "Hammer");
+ break;
+ }
+ }
+ display_cacheinfo(c);
+
+ if (c->x86_cpuid_level >= 0x80000008) {
+ c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ if (c->x86_num_cores & (c->x86_num_cores - 1))
+ c->x86_num_cores = 1;
+
+#ifdef CONFIG_NUMA
+ /* On a dual core setup the lower bits of apic id
+ distingush the cores. Fix up the CPU<->node mappings
+ here based on that.
+ Assumes number of cores is a power of two.
+ When using SRAT use mapping from SRAT. */
+ cpu = c->x86_apicid;
+ if (acpi_numa <= 0 && c->x86_num_cores > 1) {
+ cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
+ if (!node_online(cpu_to_node[cpu]))
+ cpu_to_node[cpu] = first_node(node_online_map);
+ }
+ printk(KERN_INFO "CPU %d(%d) -> Node %d\n",
+ cpu, c->x86_num_cores, cpu_to_node[cpu]);
+#endif
+ }
+
+ return r;
+}
+
+static void __init detect_ht(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ u32 eax, ebx, ecx, edx;
+ int index_lsb, index_msb, tmp;
+ int cpu = smp_processor_id();
+
+ if (!cpu_has(c, X86_FEATURE_HT))
+ return;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+ if (smp_num_siblings == 1) {
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ } else if (smp_num_siblings > 1) {
+ index_lsb = 0;
+ index_msb = 31;
+ /*
+ * At this point we only support two siblings per
+ * processor package.
+ */
+ if (smp_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+ smp_num_siblings = 1;
+ return;
+ }
+ tmp = smp_num_siblings;
+ while ((tmp & 1) == 0) {
+ tmp >>=1 ;
+ index_lsb++;
+ }
+ tmp = smp_num_siblings;
+ while ((tmp & 0x80000000 ) == 0) {
+ tmp <<=1 ;
+ index_msb--;
+ }
+ if (index_lsb != index_msb )
+ index_msb++;
+ phys_proc_id[cpu] = phys_pkg_id(index_msb);
+
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ phys_proc_id[cpu]);
+ }
+#endif
+}
+
+static void __init sched_cmp_hack(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ /* AMD dual core looks like HT but isn't really. Hide it from the
+ scheduler. This works around problems with the domain scheduler.
+ Also probably gives slightly better scheduling and disables
+ SMT nice which is harmful on dual core.
+ TBD tune the domain scheduler for dual core. */
+ if (c->x86_vendor == X86_VENDOR_AMD && cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ smp_num_siblings = 1;
+#endif
+}
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+ /* Cache sizes */
+ unsigned n;
+
+ init_intel_cacheinfo(c);
+ n = c->x86_cpuid_level;
+ if (n >= 0x80000008) {
+ unsigned eax = cpuid_eax(0x80000008);
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+ }
+
+ if (c->x86 == 15)
+ c->x86_cache_alignment = c->x86_clflush_size * 2;
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+ char *v = c->x86_vendor_id;
+
+ if (!strcmp(v, "AuthenticAMD"))
+ c->x86_vendor = X86_VENDOR_AMD;
+ else if (!strcmp(v, "GenuineIntel"))
+ c->x86_vendor = X86_VENDOR_INTEL;
+ else
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+struct cpu_model_info {
+ int vendor;
+ int family;
+ char *model_names[16];
+};
+
+/* Do some early cpuid on the boot CPU to get some parameter that are
+ needed before check_bugs. Everything advanced is in identify_cpu
+ below. */
+void __init early_identify_cpu(struct cpuinfo_x86 *c)
+{
+ u32 tfms;
+
+ c->loops_per_jiffy = loops_per_jiffy;
+ c->x86_cache_size = -1;
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ c->x86_model_id[0] = '\0'; /* Unset */
+ c->x86_clflush_size = 64;
+ c->x86_cache_alignment = c->x86_clflush_size;
+ c->x86_num_cores = 1;
+ c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
+ c->x86_cpuid_level = 0;
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+ /* Get vendor name */
+ cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
+ (unsigned int *)&c->x86_vendor_id[0],
+ (unsigned int *)&c->x86_vendor_id[8],
+ (unsigned int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c);
+
+ /* Initialize the standard set of capabilities */
+ /* Note that the vendor-specific code below might override */
+
+ /* Intel-defined flags: level 0x00000001 */
+ if (c->cpuid_level >= 0x00000001) {
+ __u32 misc;
+ cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
+ &c->x86_capability[0]);
+ c->x86 = (tfms >> 8) & 0xf;
+ c->x86_model = (tfms >> 4) & 0xf;
+ c->x86_mask = tfms & 0xf;
+ if (c->x86 == 0xf) {
+ c->x86 += (tfms >> 20) & 0xff;
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
+ }
+ if (c->x86_capability[0] & (1<<19))
+ c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+ c->x86_apicid = misc >> 24;
+ } else {
+ /* Have CPUID level 0 only - unheard of */
+ c->x86 = 4;
+ }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+ int i;
+ u32 xlvl;
+
+ early_identify_cpu(c);
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ c->x86_cpuid_level = xlvl;
+ if ((xlvl & 0xffff0000) == 0x80000000) {
+ if (xlvl >= 0x80000001) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[5] = cpuid_ecx(0x80000001);
+ }
+ if (xlvl >= 0x80000004)
+ get_model_name(c); /* Default name */
+ }
+
+ /* Transmeta-defined flags: level 0x80860001 */
+ xlvl = cpuid_eax(0x80860000);
+ if ((xlvl & 0xffff0000) == 0x80860000) {
+ /* Don't set x86_cpuid_level here for now to not confuse. */
+ if (xlvl >= 0x80860001)
+ c->x86_capability[2] = cpuid_edx(0x80860001);
+ }
+
+ /*
+ * Vendor-specific initialization. In this section we
+ * canonicalize the feature flags, meaning if there are
+ * features a certain CPU supports which CPUID doesn't
+ * tell us, CPUID claiming incorrect flags, or other bugs,
+ * we handle them here.
+ *
+ * At the end of this section, c->x86_capability better
+ * indicate the features this CPU genuinely supports!
+ */
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ init_amd(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ init_intel(c);
+ break;
+
+ case X86_VENDOR_UNKNOWN:
+ default:
+ display_cacheinfo(c);
+ break;
+ }
+
+ select_idle_routine(c);
+ detect_ht(c);
+ sched_cmp_hack(c);
+
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
+ * all CPUs; so make sure that we indicate which features are
+ * common between the CPUs. The first time this routine gets
+ * executed, c == &boot_cpu_data.
+ */
+ if (c != &boot_cpu_data) {
+ /* AND the already accumulated flags with these */
+ for (i = 0 ; i < NCAPINTS ; i++)
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ }
+
+#ifdef CONFIG_X86_MCE
+ mcheck_init(c);
+#endif
+#ifdef CONFIG_NUMA
+ if (c != &boot_cpu_data)
+ numa_add_cpu(c - cpu_data);
+#endif
+}
+
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+ if (c->x86_model_id[0])
+ printk("%s", c->x86_model_id);
+
+ if (c->x86_mask || c->cpuid_level >= 0)
+ printk(" stepping %02x\n", c->x86_mask);
+ else
+ printk("\n");
+}
+
+/*
+ * Get CPU information for use by the procfs.
+ */
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ struct cpuinfo_x86 *c = v;
+
+ /*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned. Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+ static char *x86_cap_flags[] = {
+ /* Intel-defined */
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
+
+ /* AMD-defined */
+ "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
+ NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+ /* Transmeta-defined */
+ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Other (Linux-defined) */
+ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Intel-defined (#2) */
+ "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* AMD-defined (#2) */
+ "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+ };
+ static char *x86_power_flags[] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ };
+
+
+#ifdef CONFIG_SMP
+ if (!cpu_online(c-cpu_data))
+ return 0;
+#endif
+
+ seq_printf(m,"processor\t: %u\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n",
+ (unsigned)(c-cpu_data),
+ c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+ c->x86,
+ (int)c->x86_model,
+ c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
+ if (c->x86_mask || c->cpuid_level >= 0)
+ seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+ else
+ seq_printf(m, "stepping\t: unknown\n");
+
+ if (cpu_has(c,X86_FEATURE_TSC)) {
+ seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+ cpu_khz / 1000, (cpu_khz % 1000));
+ }
+
+ /* Cache size */
+ if (c->x86_cache_size >= 0)
+ seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+
+#ifdef CONFIG_SMP
+ seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
+ seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
+#endif
+
+ seq_printf(m,
+ "fpu\t\t: yes\n"
+ "fpu_exception\t: yes\n"
+ "cpuid level\t: %d\n"
+ "wp\t\t: yes\n"
+ "flags\t\t:",
+ c->cpuid_level);
+
+ {
+ int i;
+ for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+ if ( test_bit(i, &c->x86_capability) &&
+ x86_cap_flags[i] != NULL )
+ seq_printf(m, " %s", x86_cap_flags[i]);
+ }
+
+ seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+ c->loops_per_jiffy/(500000/HZ),
+ (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+ if (c->x86_tlbsize > 0)
+ seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+ seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
+ seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+
+ seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+ c->x86_phys_bits, c->x86_virt_bits);
+
+ seq_printf(m, "power management:");
+ {
+ unsigned i;
+ for (i = 0; i < 32; i++)
+ if (c->x86_power & (1 << i)) {
+ if (i < ARRAY_SIZE(x86_power_flags))
+ seq_printf(m, " %s", x86_power_flags[i]);
+ else
+ seq_printf(m, " [%d]", i);
+ }
+ }
+ seq_printf(m, "\n");
+
+ if (c->x86_num_cores > 1)
+ seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+
+ seq_printf(m, "\n\n");
+
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+ .start =c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c
new file mode 100644
index 0000000000..03452e1bf3
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -0,0 +1,344 @@
+/*
+ * X86-64 specific CPU setup.
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
+ * See setup.c for older changelog.
+ * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen
+ *
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/bootmem.h>
+#include <linux/bitops.h>
+#include <asm/pda.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/i387.h>
+#include <asm/percpu.h>
+#include <asm/mtrr.h>
+#include <asm/proto.h>
+#include <asm/mman.h>
+#include <asm/numa.h>
+
+#include <asm-xen/hypervisor.h>
+
+char x86_boot_params[2048] __initdata = {0,};
+
+cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+
+struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
+
+extern struct task_struct init_task;
+
+extern unsigned char __per_cpu_start[], __per_cpu_end[];
+
+extern struct desc_ptr cpu_gdt_descr[];
+struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
+
+char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
+
+unsigned long __supported_pte_mask = ~0UL;
+static int do_not_nx __initdata = 0;
+
+/* noexec=on|off
+Control non executable mappings for 64bit processes.
+
+on Enable(default)
+off Disable
+*/
+int __init nonx_setup(char *str)
+{
+ if (!strncmp(str, "on", 2)) {
+ __supported_pte_mask |= _PAGE_NX;
+ do_not_nx = 0;
+ } else if (!strncmp(str, "off", 3)) {
+ do_not_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+ return 0;
+}
+__setup("noexec=", nonx_setup); /* parsed early actually */
+
+int force_personality32 = READ_IMPLIES_EXEC;
+
+/* noexec32=on|off
+Control non executable heap for 32bit processes.
+To control the stack too use noexec=off
+
+on PROT_READ does not imply PROT_EXEC for 32bit processes
+off PROT_READ implies PROT_EXEC (default)
+*/
+static int __init nonx32_setup(char *str)
+{
+ if (!strcmp(str, "on"))
+ force_personality32 &= ~READ_IMPLIES_EXEC;
+ else if (!strcmp(str, "off"))
+ force_personality32 |= READ_IMPLIES_EXEC;
+ return 0;
+}
+__setup("noexec32=", nonx32_setup);
+
+/*
+ * Great future plan:
+ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
+ * Always point %gs to its beginning
+ */
+void __init setup_per_cpu_areas(void)
+{
+ int i;
+ unsigned long size;
+
+ /* Copy section for each CPU (we discard the original) */
+ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+#ifdef CONFIG_MODULES
+ if (size < PERCPU_ENOUGH_ROOM)
+ size = PERCPU_ENOUGH_ROOM;
+#endif
+
+ for (i = 0; i < NR_CPUS; i++) {
+ unsigned char *ptr;
+
+ if (!NODE_DATA(cpu_to_node(i))) {
+ printk("cpu with no node %d, num_online_nodes %d\n",
+ i, num_online_nodes());
+ ptr = alloc_bootmem(size);
+ } else {
+ ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
+ }
+ if (!ptr)
+ panic("Cannot allocate cpu data for CPU %d\n", i);
+ cpu_pda[i].data_offset = ptr - __per_cpu_start;
+ memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+ }
+}
+
+void pda_init(int cpu)
+{
+ pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
+ struct x8664_pda *pda = &cpu_pda[cpu];
+
+ /* Setup up data that may be needed in __get_free_pages early */
+ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+ HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
+ (unsigned long)(cpu_pda + cpu));
+
+ pda->me = pda;
+ pda->cpunumber = cpu;
+ pda->irqcount = -1;
+ pda->kernelstack =
+ (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
+ pda->active_mm = &init_mm;
+ pda->mmu_state = 0;
+ pda->kernel_mode = 1;
+
+ if (cpu == 0) {
+ memcpy((void *)init_level4_pgt,
+ (void *) xen_start_info.pt_base, PAGE_SIZE);
+ /* others are initialized in smpboot.c */
+ pda->pcurrent = &init_task;
+ pda->irqstackptr = boot_cpu_stack;
+ make_page_readonly(init_level4_pgt);
+ make_page_readonly(init_level4_user_pgt);
+ make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
+ xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
+ xen_pud_pin(__pa_symbol(level3_user_pgt));
+ set_pgd((pgd_t *)(init_level4_user_pgt + 511),
+ mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+ } else {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d", cpu);
+ }
+
+ xen_pt_switch(__pa(init_level4_pgt));
+ xen_new_user_pt(__pa(init_level4_user_pgt));
+
+ if (cpu == 0) {
+ xen_pgd_unpin(__pa(old_level4));
+#if 0
+ early_printk("__pa: %x, <machine_phys> old_level 4 %x\n",
+ __pa(xen_start_info.pt_base),
+ pfn_to_mfn(__pa(old_level4) >> PAGE_SHIFT));
+#endif
+// make_page_writable(old_level4);
+// free_bootmem(__pa(old_level4), PAGE_SIZE);
+ }
+
+ pda->irqstackptr += IRQSTACKSIZE-64;
+}
+
+char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ]
+__attribute__((section(".bss.page_aligned")));
+
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+#ifdef CONFIG_IA32_EMULATION
+ syscall32_cpu_init ();
+#endif
+}
+
+void __init check_efer(void)
+{
+ unsigned long efer;
+
+ /* rdmsrl(MSR_EFER, efer); */
+
+ /*
+ * At this point, Xen does not like the bit 63.
+ * So NX is not supported. Come back later.
+ */
+ efer = 0;
+
+ if (!(efer & EFER_NX) || do_not_nx) {
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+ unsigned long frames[16];
+ unsigned long va;
+ int f;
+
+ for (va = gdt_descr->address, f = 0;
+ va < gdt_descr->address + gdt_descr->size;
+ va += PAGE_SIZE, f++) {
+ frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ make_page_readonly((void *)va);
+ }
+ if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
+ sizeof (struct desc_struct)))
+ BUG();
+}
+
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ * A lot of state is already set up in PDA init.
+ */
+void __init cpu_init (void)
+{
+#ifdef CONFIG_SMP
+ int cpu = stack_smp_processor_id();
+#else
+ int cpu = smp_processor_id();
+#endif
+ struct tss_struct *t = &per_cpu(init_tss, cpu);
+ unsigned long v;
+ char *estacks = NULL;
+ struct task_struct *me;
+ int i;
+
+ /* CPU 0 is initialised in head64.c */
+ if (cpu != 0) {
+ pda_init(cpu);
+ } else
+ estacks = boot_exception_stacks;
+
+ me = current;
+
+ if (test_and_set_bit(cpu, &cpu_initialized))
+ panic("CPU#%d already initialized!\n", cpu);
+
+ printk("Initializing CPU#%d\n", cpu);
+
+#if 0
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+#endif
+ /*
+ * Initialize the per-CPU GDT with the boot GDT,
+ * and set up the GDT descriptor:
+ */
+ if (cpu) {
+ memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
+ }
+
+ cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
+#if 0
+ asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+ asm volatile("lidt %0" :: "m" (idt_descr));
+#endif
+ cpu_gdt_init(&cpu_gdt_descr[cpu]);
+
+#if 0
+ memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
+
+#endif
+ memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
+ GDT_ENTRY_TLS_ENTRIES * 8);
+
+ /*
+ * Delete NT
+ */
+
+ asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
+
+ if (cpu == 0)
+ early_identify_cpu(&boot_cpu_data);
+
+ syscall_init();
+
+ barrier();
+ check_efer();
+
+ /*
+ * set up and load the per-CPU TSS
+ */
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC,
+ EXCEPTION_STACK_ORDER);
+ if (!estacks)
+ panic("Cannot allocate exception stack %ld %d\n",
+ v, cpu);
+ }
+ estacks += EXCEPTION_STKSZ;
+ t->ist[v] = (unsigned long)estacks;
+ }
+
+ t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ /*
+ * <= is required because the CPU will access up to
+ * 8 bits beyond the end of the IO permission bitmap.
+ */
+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
+ t->io_bitmap[i] = ~0UL;
+
+ atomic_inc(&init_mm.mm_count);
+ me->active_mm = &init_mm;
+ if (me->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, me);
+
+ load_LDT(&init_mm.context);
+
+ /*
+ * Clear all 6 debug registers:
+ */
+#define CD(register) HYPERVISOR_set_debugreg(register, 0)
+
+ CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+
+#undef CD
+ fpu_init();
+
+#ifdef CONFIG_NUMA
+ numa_add_cpu(cpu);
+#endif
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c
new file mode 100644
index 0000000000..2282257d98
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/signal.c
@@ -0,0 +1,493 @@
+/*
+ * linux/arch/x86_64/kernel/signal.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
+ * 2000-2002 x86-64 support by Andi Kleen
+ *
+ * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/compiler.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include <asm/proto.h>
+
+/* #define DEBUG_SIG 1 */
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs * regs);
+void ia32_setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs * regs);
+
+asmlinkage long
+sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
+{
+ sigset_t saveset, newset;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (copy_from_user(&newset, unewset, sizeof(newset)))
+ return -EFAULT;
+ sigdelsetmask(&newset, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sighand->siglock);
+ saveset = current->blocked;
+ current->blocked = newset;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+#ifdef DEBUG_SIG
+ printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
+ saveset, newset, regs, regs->rip);
+#endif
+ regs->rax = -EINTR;
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (do_signal(regs, &saveset))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ struct pt_regs *regs)
+{
+ return do_sigaltstack(uss, uoss, regs->rsp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct rt_sigframe
+{
+ char *pretcode;
+ struct ucontext uc;
+ struct siginfo info;
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *prax)
+{
+ unsigned int err = 0;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+#define COPY(x) err |= __get_user(regs->x, &sc->x)
+
+ COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
+ COPY(rdx); COPY(rcx); COPY(rip);
+ COPY(r8);
+ COPY(r9);
+ COPY(r10);
+ COPY(r11);
+ COPY(r12);
+ COPY(r13);
+ COPY(r14);
+ COPY(r15);
+
+ {
+ unsigned int tmpflags;
+ err |= __get_user(tmpflags, &sc->eflags);
+ regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+ regs->orig_rax = -1; /* disable syscall checks */
+ }
+
+ {
+ struct _fpstate __user * buf;
+ err |= __get_user(buf, &sc->fpstate);
+
+ if (buf) {
+ if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387(buf);
+ } else {
+ struct task_struct *me = current;
+ if (used_math()) {
+ clear_fpu(me);
+ clear_used_math();
+ }
+ }
+ }
+
+ err |= __get_user(*prax, &sc->rax);
+ return err;
+
+badframe:
+ return 1;
+}
+
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ sigset_t set;
+ unsigned long eax;
+
+ frame = (struct rt_sigframe __user *)(regs->rsp - 8);
+ if (verify_area(VERIFY_READ, frame, sizeof(*frame))) {
+ goto badframe;
+ }
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) {
+ goto badframe;
+ }
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sighand->siglock);
+ current->blocked = set;
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) {
+ goto badframe;
+ }
+
+#ifdef DEBUG_SIG
+ printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
+#endif
+
+ if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT)
+ goto badframe;
+
+ return eax;
+
+badframe:
+ signal_fault(regs,frame,"sigreturn");
+ return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+static inline int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+{
+ int err = 0;
+ unsigned long eflags;
+
+ err |= __put_user(0, &sc->gs);
+ err |= __put_user(0, &sc->fs);
+
+ err |= __put_user(regs->rdi, &sc->rdi);
+ err |= __put_user(regs->rsi, &sc->rsi);
+ err |= __put_user(regs->rbp, &sc->rbp);
+ err |= __put_user(regs->rsp, &sc->rsp);
+ err |= __put_user(regs->rbx, &sc->rbx);
+ err |= __put_user(regs->rdx, &sc->rdx);
+ err |= __put_user(regs->rcx, &sc->rcx);
+ err |= __put_user(regs->rax, &sc->rax);
+ err |= __put_user(regs->r8, &sc->r8);
+ err |= __put_user(regs->r9, &sc->r9);
+ err |= __put_user(regs->r10, &sc->r10);
+ err |= __put_user(regs->r11, &sc->r11);
+ err |= __put_user(regs->r12, &sc->r12);
+ err |= __put_user(regs->r13, &sc->r13);
+ err |= __put_user(regs->r14, &sc->r14);
+ err |= __put_user(regs->r15, &sc->r15);
+ err |= __put_user(me->thread.trap_no, &sc->trapno);
+ err |= __put_user(me->thread.error_code, &sc->err);
+ err |= __put_user(regs->rip, &sc->rip);
+ eflags = regs->eflags;
+ if (current->ptrace & PT_PTRACED) {
+ eflags &= ~TF_MASK;
+ }
+ err |= __put_user(eflags, &sc->eflags);
+ err |= __put_user(mask, &sc->oldmask);
+ err |= __put_user(me->thread.cr2, &sc->cr2);
+
+ return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+
+static void __user *
+get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
+{
+ unsigned long rsp;
+
+ /* Default to using normal stack - redzone*/
+ rsp = regs->rsp - 128;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ /* RED-PEN: redzone on that stack? */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(rsp) == 0)
+ rsp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)round_down(rsp - size, 16);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs * regs)
+{
+ struct rt_sigframe __user *frame;
+ struct _fpstate __user *fp = NULL;
+ int err = 0;
+ struct task_struct *me = current;
+
+ if (used_math()) {
+ fp = get_stack(ka, regs, sizeof(struct _fpstate));
+ frame = (void __user *)round_down((unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+
+ if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) {
+ goto give_sigsegv;
+ }
+
+ if (save_i387(fp) < 0)
+ err |= -1;
+ } else {
+ frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
+ }
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) {
+ goto give_sigsegv;
+ }
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ err |= copy_siginfo_to_user(&frame->info, info);
+ if (err) {
+ goto give_sigsegv;
+ }
+ }
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->rsp),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
+ err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
+ if (sizeof(*set) == 16) {
+ __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
+ __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
+ } else {
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ }
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ /* x86-64 should always use SA_RESTORER. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+ } else {
+ /* could use a vstub here */
+ goto give_sigsegv;
+ }
+
+ if (err) {
+ goto give_sigsegv;
+ }
+
+#ifdef DEBUG_SIG
+ printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
+#endif
+
+ /* Set up registers for signal handler */
+ {
+ struct exec_domain *ed = current_thread_info()->exec_domain;
+ if (unlikely(ed && ed->signal_invmap && sig < 32))
+ sig = ed->signal_invmap[sig];
+ }
+ regs->rdi = sig;
+ /* In case the signal handler was declared without prototypes */
+ regs->rax = 0;
+
+ /* This also works for non SA_SIGINFO handlers because they expect the
+ next argument after the signal number on the stack. */
+ regs->rsi = (unsigned long)&frame->info;
+ regs->rdx = (unsigned long)&frame->uc;
+ regs->rip = (unsigned long) ka->sa.sa_handler;
+
+ regs->rsp = (unsigned long)frame;
+
+ set_fs(USER_DS);
+ if (regs->eflags & TF_MASK) {
+ if ((current->ptrace & (PT_PTRACED | PT_DTRACE)) == (PT_PTRACED | PT_DTRACE)) {
+ ptrace_notify(SIGTRAP);
+ } else {
+ regs->eflags &= ~TF_MASK;
+ }
+ }
+
+#ifdef DEBUG_SIG
+ printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+ current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+ return;
+
+give_sigsegv:
+ force_sigsegv(sig, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+static void
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+ sigset_t *oldset, struct pt_regs *regs)
+{
+#ifdef DEBUG_SIG
+ printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig,
+ regs->rip, regs->rsp, regs);
+#endif
+
+ /* Are we from a system call? */
+ if ((long)regs->orig_rax >= 0) {
+ /* If so, check system call restarting.. */
+ switch (regs->rax) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->rax = -EINTR;
+ break;
+
+ case -ERESTARTSYS:
+ if (!(ka->sa.sa_flags & SA_RESTART)) {
+ regs->rax = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ regs->rax = regs->orig_rax;
+ regs->rip -= 2;
+ }
+ }
+
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32)) {
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ ia32_setup_rt_frame(sig, ka, info, oldset, regs);
+ else
+ ia32_setup_frame(sig, ka, oldset, regs);
+ } else
+#endif
+ setup_rt_frame(sig, ka, info, oldset, regs);
+
+ if (!(ka->sa.sa_flags & SA_NODEFER)) {
+ spin_lock_irq(&current->sighand->siglock);
+ sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+ sigaddset(&current->blocked,sig);
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+ }
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+ struct k_sigaction ka;
+ siginfo_t info;
+ int signr;
+
+ /*
+ * We want the common case to go fast, which
+ * is why we may in certain cases get here from
+ * kernel mode. Just return without doing anything
+ * if so.
+ */
+ if ((regs->cs & 2) != 2) {
+ return 1;
+ }
+
+ if (try_to_freeze(0))
+ goto no_signal;
+
+ if (!oldset)
+ oldset = &current->blocked;
+
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+ if (signr > 0) {
+ /* Reenable any watchpoints before delivering the
+ * signal to user space. The processor register will
+ * have been cleared if the watchpoint triggered
+ * inside the kernel.
+ */
+ if (current->thread.debugreg7) {
+ HYPERVISOR_set_debugreg(7,
+ current->thread.debugreg7);
+ }
+
+ /* Whee! Actually deliver the signal. */
+ handle_signal(signr, &info, &ka, oldset, regs);
+ return 1;
+ }
+
+ no_signal:
+ /* Did we come from a system call? */
+ if ((long)regs->orig_rax >= 0) {
+ /* Restart the system call - no handlers present */
+ long res = regs->rax;
+ if (res == -ERESTARTNOHAND ||
+ res == -ERESTARTSYS ||
+ res == -ERESTARTNOINTR) {
+ regs->rax = regs->orig_rax;
+ regs->rip -= 2;
+ }
+ if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
+ regs->rax = __NR_restart_syscall;
+ regs->rip -= 2;
+ }
+ }
+ return 0;
+}
+
+void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
+{
+#ifdef DEBUG_SIG
+ printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
+ thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
+#endif
+
+ /* Pending single-step? */
+ if (thread_info_flags & _TIF_SINGLESTEP) {
+ regs->eflags |= TF_MASK;
+ clear_thread_flag(TIF_SINGLESTEP);
+ }
+
+ /* deal with pending signal delivery */
+ if (thread_info_flags & _TIF_SIGPENDING)
+ do_signal(regs,oldset);
+}
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+ struct task_struct *me = current;
+ if (exception_trace)
+ printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
+ me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax);
+
+ force_sig(SIGSEGV, me);
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
new file mode 100644
index 0000000000..b183fafacd
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -0,0 +1,411 @@
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ * (c) 2002,2003 Andi Kleen, SuSE Labs.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/interrupt.h>
+
+#include <asm/mtrr.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mach_apic.h>
+#include <asm/proto.h>
+
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ *
+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL 0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+ if (read_pda(mmu_state) == TLBSTATE_OK)
+ BUG();
+ clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
+ __flush_tlb();
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superfluous
+ * tlb flush.
+ * 1a2) set cpu mmu_state to TLBSTATE_OK
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * was in lazy tlb mode.
+ * 1a3) update cpu active_mm
+ * Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
+ * Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ * cpu active_mm is correct, cpu0 already handles
+ * flush ipis.
+ * 1b1) set cpu mmu_state to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ * runs in kernel space, the cpu could load tlb entries for user space
+ * pages.
+ *
+ * The good news is that cpu mmu_state is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+asmlinkage void smp_invalidate_interrupt (void)
+{
+ unsigned long cpu;
+
+ cpu = get_cpu();
+
+ if (!cpu_isset(cpu, flush_cpumask))
+ goto out;
+ /*
+ * This was a BUG() but until someone can quote me the
+ * line from the intel manual that guarantees an IPI to
+ * multiple CPUs is retried _only_ on the erroring CPUs
+ * its staying as a return
+ *
+ * BUG();
+ */
+
+ if (flush_mm == read_pda(active_mm)) {
+ if (read_pda(mmu_state) == TLBSTATE_OK) {
+ if (flush_va == FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
+ } else
+ leave_mm(cpu);
+ }
+ ack_APIC_irq();
+ cpu_clear(cpu, flush_cpumask);
+
+out:
+ put_cpu_no_resched();
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+ unsigned long va)
+{
+ cpumask_t tmp;
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+ * - we do not send IPIs to not-yet booted CPUs.
+ * - current CPU must not be in mask
+ * - mask must exist :)
+ */
+ BUG_ON(cpus_empty(cpumask));
+ cpus_and(tmp, cpumask, cpu_online_map);
+ BUG_ON(!cpus_equal(tmp, cpumask));
+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+ if (!mm)
+ BUG();
+
+ /*
+ * I'm not happy about this global shared spinlock in the
+ * MM hot path, but we'll see how contended it is.
+ * Temporarily this turns IRQs off, so that lockups are
+ * detected by the NMI watchdog.
+ */
+ spin_lock(&tlbstate_lock);
+
+ flush_mm = mm;
+ flush_va = va;
+ cpus_or(flush_cpumask, cpumask, flush_cpumask);
+
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+ while (!cpus_empty(flush_cpumask))
+ mb(); /* nothing. lockup detection does not belong here */;
+
+ flush_mm = NULL;
+ flush_va = 0;
+ spin_unlock(&tlbstate_lock);
+}
+
+void flush_tlb_current_task(void)
+{
+ struct mm_struct *mm = current->mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ local_flush_tlb();
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if (current->mm)
+ local_flush_tlb();
+ else
+ leave_mm(smp_processor_id());
+ }
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+ preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if(current->mm)
+ __flush_tlb_one(va);
+ else
+ leave_mm(smp_processor_id());
+ }
+
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, va);
+
+ preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+ unsigned long cpu = smp_processor_id();
+
+ __flush_tlb_all();
+ if (read_pda(mmu_state) == TLBSTATE_LAZY)
+ leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+ on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+void smp_kdb_stop(void)
+{
+ send_IPI_allbutself(KDB_VECTOR);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+
+void smp_send_reschedule(int cpu)
+{
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+static void __smp_call_function (void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = num_online_cpus()-1;
+
+ if (!cpus)
+ return;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (!wait)
+ return;
+
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+/*
+ * smp_call_function - run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other
+ * CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute func or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ * Actually there are a few legal cases, like panic.
+ */
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+{
+ spin_lock(&call_lock);
+ __smp_call_function(func,info,nonatomic,wait);
+ spin_unlock(&call_lock);
+ return 0;
+}
+
+void smp_stop_cpu(void)
+{
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ local_irq_disable();
+ disable_local_APIC();
+ local_irq_enable();
+}
+
+static void smp_really_stop_cpu(void *dummy)
+{
+ smp_stop_cpu();
+ for (;;)
+ asm("hlt");
+}
+
+void smp_send_stop(void)
+{
+ int nolock = 0;
+ if (reboot_force)
+ return;
+ /* Don't deadlock on the call lock in panic */
+ if (!spin_trylock(&call_lock)) {
+ /* ignore locking because we have paniced anyways */
+ nolock = 1;
+ }
+ __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
+ if (!nolock)
+ spin_unlock(&call_lock);
+ smp_stop_cpu();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_reschedule_interrupt(void)
+{
+ ack_APIC_irq();
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ ack_APIC_irq();
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ mb();
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ irq_enter();
+ (*func)(info);
+ irq_exit();
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
new file mode 100644
index 0000000000..a7e2c3e95e
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
@@ -0,0 +1,958 @@
+/*
+ * x86 SMP booting functions
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ * Copyright 2001 Andi Kleen, SuSE Labs.
+ *
+ * Much of the core SMP work is based on previous work by Thomas Radke, to
+ * whom a great many thanks are extended.
+ *
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
+ * Original development of Linux SMP code supported by Caldera.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ *
+ * Fixes
+ * Felix Koop : NR_CPUS used properly
+ * Jose Renau : Handle single CPU case.
+ * Alan Cox : By repeated request 8) - Total BogoMIP report.
+ * Greg Wright : Fix for kernel stacks panic.
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
+ * Ingo Molnar : various cleanups and rewrites
+ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Andi Kleen : Changed for SMP boot into long mode.
+ * Rusty Russell : Hacked into shape for new "hotplug" boot process.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/thread_info.h>
+#include <linux/module.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/mtrr.h>
+#include <asm/pgalloc.h>
+#include <asm/desc.h>
+#include <asm/kdebug.h>
+#include <asm/tlbflush.h>
+#include <asm/proto.h>
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+/* Package ID of each logical CPU */
+u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(phys_proc_id);
+
+/* Bitmask of currently online CPUs */
+cpumask_t cpu_online_map;
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+ void *tramp = __va(SMP_TRAMPOLINE_BASE);
+ extern volatile __u32 tramp_gdt_ptr;
+ tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table);
+ memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(tramp);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ identify_cpu(c);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+extern unsigned int fast_gettimeoffset_quotient;
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ long one_usec;
+ int buggy = 0;
+
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus());
+
+ one_usec = cpu_khz;
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ sync_core();
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_isset(i, cpu_callout_map)) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ }
+ avg = sum / num_booting_cpus();
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = delta / one_usec;
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+ i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * Not every cpu is online at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
+
+ sync_core();
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
+}
+#undef NR_LOOPS
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+ int cpuid, phys_id;
+ unsigned long timeout;
+
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ while (!atomic_read(&init_deasserted));
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ cpuid = smp_processor_id();
+ if (cpu_isset(cpuid, cpu_callin_map)) {
+ panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup (udelay is not yet working)
+ */
+ timeout = jiffies + 2*HZ;
+ while (time_before(jiffies, timeout)) {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (cpu_isset(cpuid, cpu_callout_map))
+ break;
+ rep_nop();
+ }
+
+ if (!time_before(jiffies, timeout)) {
+ panic("smp_callin: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ }
+
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ setup_local_APIC();
+
+ local_irq_enable();
+
+ /*
+ * Get our bogomips.
+ */
+ calibrate_delay();
+ Dprintk("Stack at about %p\n",&cpuid);
+
+ disable_APIC_timer();
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+ local_irq_disable();
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+
+ /*
+ * Synchronize the TSC with the BP
+ */
+ if (cpu_has_tsc)
+ synchronize_tsc_ap();
+}
+
+int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+void __init start_secondary(void)
+{
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+
+ /* otherwise gcc will move up the smp_processor_id before the cpu_init */
+ barrier();
+
+ Dprintk("cpu %d: waiting for commence\n", smp_processor_id());
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
+
+ Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
+ setup_secondary_APIC_clock();
+
+ Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
+
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ enable_NMI_through_LVT0(NULL);
+ enable_8259A_irq(0);
+ }
+
+
+ enable_APIC_timer();
+
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+
+ Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id());
+ cpu_set(smp_processor_id(), cpu_online_map);
+ wmb();
+
+ cpu_idle();
+}
+
+extern volatile unsigned long init_rsp;
+extern void (*initial_code)(void);
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+ unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
+}
+#endif
+
+static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_rip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk(KERN_ERR "APIC never delivered???\n");
+ if (accept_status)
+ printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+
+static void __init do_boot_cpu (int apicid)
+{
+ struct task_struct *idle;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_rip;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to
+ * reschedule the child.
+ */
+ idle = fork_idle(cpu);
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+ x86_cpu_to_apicid[cpu] = apicid;
+
+ cpu_pda[cpu].pcurrent = idle;
+
+ start_rip = setup_trampoline();
+
+ init_rsp = idle->thread.rsp;
+ per_cpu(init_tss,cpu).rsp0 = init_rsp;
+ initial_code = start_secondary;
+ clear_ti_thread_flag(idle->thread_info, TIF_FORK);
+
+ printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
+ start_rip, init_rsp);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ /*
+ * Status is now clean
+ */
+ boot_error = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error = 1;
+ if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+ else
+ /* trampoline code not run */
+ printk("Not responding.\n");
+#if APIC_DEBUG
+ inquire_remote_apic(apicid);
+#endif
+ }
+ }
+ if (boot_error) {
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ x86_cpu_to_apicid[cpu] = BAD_APICID;
+ x86_cpu_to_log_apicid[cpu] = BAD_APICID;
+ }
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+ int cachesize; /* kB */
+ unsigned long bandwidth = 1000; /* MB/s */
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_khz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1) {
+ cachesize = 16; /* Pentiums, 2x8kB cache */
+ bandwidth = 100;
+ }
+
+ cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ }
+
+ cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
+
+ printk(KERN_INFO "per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_khz/1000),
+ ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+ printk(KERN_INFO "task migration cache decay timeout: %ld msecs.\n",
+ (cache_decay_ticks + 1) * 1000 / HZ);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+ unsigned apicid, cpu, bit, kicked;
+
+ nmi_watchdog_default();
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk(KERN_INFO "CPU%d: ", 0);
+ print_cpu_info(&cpu_data[0]);
+
+ current_thread_info()->cpu = 0;
+ smp_tune_scheduling();
+
+ if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ hard_smp_processor_id());
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config) {
+ printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = cpumask_of_cpu(0);
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ if (APIC_init_uniprocessor())
+ printk(KERN_NOTICE "Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+ goto smp_done;
+ }
+
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ */
+ if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
+ printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_id);
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_id);
+ printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ io_apic_irqs = 0;
+ cpu_online_map = cpumask_of_cpu(0);
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ disable_apic = 1;
+ goto smp_done;
+ }
+
+ verify_local_APIC();
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ smp_found_config = 0;
+ printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ io_apic_irqs = 0;
+ cpu_online_map = cpumask_of_cpu(0);
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ disable_apic = 1;
+ goto smp_done;
+ }
+
+ connect_bsp_APIC();
+ setup_local_APIC();
+
+ if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
+ BUG();
+
+ x86_cpu_to_apicid[0] = boot_cpu_id;
+
+ /*
+ * Now scan the CPU present map and fire up the other CPUs.
+ */
+ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+
+ kicked = 1;
+ for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
+ /*
+ * Don't even attempt to start the boot CPU!
+ */
+ if (apicid == boot_cpu_id || (apicid == BAD_APICID))
+ continue;
+
+ if (!physid_isset(apicid, phys_cpu_present_map))
+ continue;
+ if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+ continue;
+
+ do_boot_cpu(apicid);
+ ++kicked;
+ }
+
+ /*
+ * Cleanup possible dangling ends...
+ */
+ {
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile int *) phys_to_virt(0x467)) = 0;
+ }
+
+ /*
+ * Allow the user to impress friends.
+ */
+
+ Dprintk("Before bogomips.\n");
+ if (!cpucount) {
+ printk(KERN_INFO "Only one processor found.\n");
+ } else {
+ unsigned long bogosum = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_isset(cpu, cpu_callout_map))
+ bogosum += cpu_data[cpu].loops_per_jiffy;
+ printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
+ bogosum/(500000/HZ),
+ (bogosum/(5000/HZ))%100);
+ Dprintk("Before bogocount - setting activated=1.\n");
+ }
+
+ /*
+ * Construct cpu_sibling_map[], so that we can tell the
+ * sibling CPU efficiently.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (siblings != smp_num_siblings) {
+ printk(KERN_WARNING
+ "WARNING: %d siblings found for CPU%d, should be %d\n",
+ siblings, cpu, smp_num_siblings);
+ smp_num_siblings = siblings;
+ }
+ }
+
+ Dprintk("Boot done.\n");
+
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+ else
+ nr_ioapics = 0;
+
+ setup_boot_APIC_clock();
+
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpu_has_tsc && cpucount)
+ synchronize_tsc_bp();
+
+ smp_done:
+ time_init_smp();
+}
+
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* This only works at boot for x86. See "rewrite" above. */
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ local_irq_enable();
+ return -ENOSYS;
+ }
+
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ local_irq_enable();
+ return -EIO;
+ }
+ local_irq_enable();
+
+ /* Unleash the CPU! */
+ Dprintk("waiting for cpu %d\n", cpu);
+
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_IO_APIC
+ setup_ioapic_dest();
+#endif
+ zap_low_mappings();
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
new file mode 100644
index 0000000000..973d7350c6
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -0,0 +1,972 @@
+/*
+ * linux/arch/x86-64/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'entry.S'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/kdebug.h>
+#include <asm/processor.h>
+
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/pda.h>
+#include <asm/proto.h>
+#include <asm/nmi.h>
+
+#include <linux/irq.h>
+
+
+extern struct gate_struct idt_table[256];
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void reserved(void);
+asmlinkage void alignment_check(void);
+asmlinkage void machine_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void call_debug(void);
+
+struct notifier_block *die_chain;
+static DEFINE_SPINLOCK(die_notifier_lock);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+ int err = 0;
+ unsigned long flags;
+ spin_lock_irqsave(&die_notifier_lock, flags);
+ err = notifier_chain_register(&die_chain, nb);
+ spin_unlock_irqrestore(&die_notifier_lock, flags);
+ return err;
+}
+
+static inline void conditional_sti(struct pt_regs *regs)
+{
+ if (regs->eflags & X86_EFLAGS_IF)
+ local_irq_enable();
+}
+
+static int kstack_depth_to_print = 10;
+
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+int printk_address(unsigned long address)
+{
+ unsigned long offset = 0, symsize;
+ const char *symname;
+ char *modname;
+ char *delim = ":";
+ char namebuf[128];
+
+ symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
+ if (!symname)
+ return printk("[<%016lx>]", address);
+ if (!modname)
+ modname = delim = "";
+ return printk("<%016lx>{%s%s%s%s%+ld}",
+ address,delim,modname,delim,symname,offset);
+}
+#else
+int printk_address(unsigned long address)
+{
+ return printk("[<%016lx>]", address);
+}
+#endif
+
+unsigned long *in_exception_stack(int cpu, unsigned long stack)
+{
+ int k;
+ for (k = 0; k < N_EXCEPTION_STACKS; k++) {
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
+
+ if (stack >= tss->ist[k] && stack <= end)
+ return (unsigned long *)end;
+ }
+ return NULL;
+}
+
+/*
+ * x86-64 can have upto three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault) hardware stack
+ * Check and process them in order.
+ */
+
+void show_trace(unsigned long *stack)
+{
+ unsigned long addr;
+ unsigned long *irqstack, *irqstack_end, *estack_end;
+ const int cpu = safe_smp_processor_id();
+ int i;
+
+ printk("\nCall Trace:");
+ i = 0;
+
+ estack_end = in_exception_stack(cpu, (unsigned long)stack);
+ if (estack_end) {
+ while (stack < estack_end) {
+ addr = *stack++;
+ if (__kernel_text_address(addr)) {
+ i += printk_address(addr);
+ i += printk(" ");
+ if (i > 50) {
+ printk("\n");
+ i = 0;
+ }
+ }
+ }
+ i += printk(" <EOE> ");
+ i += 7;
+ stack = (unsigned long *) estack_end[-2];
+ }
+
+ irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
+ irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
+
+ if (stack >= irqstack && stack < irqstack_end) {
+ printk("<IRQ> ");
+ while (stack < irqstack_end) {
+ addr = *stack++;
+ /*
+ * If the address is either in the text segment of the
+ * kernel, or in the region which contains vmalloc'ed
+ * memory, it *may* be the address of a calling
+ * routine; if so, print it so that someone tracing
+ * down the cause of the crash will be able to figure
+ * out the call path that was taken.
+ */
+ if (__kernel_text_address(addr)) {
+ i += printk_address(addr);
+ i += printk(" ");
+ if (i > 50) {
+ printk("\n ");
+ i = 0;
+ }
+ }
+ }
+ stack = (unsigned long *) (irqstack_end[-1]);
+ printk(" <EOI> ");
+ i += 7;
+ }
+
+ while (((long) stack & (THREAD_SIZE-1)) != 0) {
+ addr = *stack++;
+ if (__kernel_text_address(addr)) {
+ i += printk_address(addr);
+ i += printk(" ");
+ if (i > 50) {
+ printk("\n ");
+ i = 0;
+ }
+ }
+ }
+ printk("\n");
+}
+
+void show_stack(struct task_struct *tsk, unsigned long * rsp)
+{
+ unsigned long *stack;
+ int i;
+ const int cpu = safe_smp_processor_id();
+ unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
+ unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE);
+
+ // debugging aid: "show_stack(NULL, NULL);" prints the
+ // back trace for this cpu.
+
+ if (rsp == NULL) {
+ if (tsk)
+ rsp = (unsigned long *)tsk->thread.rsp;
+ else
+ rsp = (unsigned long *)&rsp;
+ }
+
+ stack = rsp;
+ for(i=0; i < kstack_depth_to_print; i++) {
+ if (stack >= irqstack && stack <= irqstack_end) {
+ if (stack == irqstack_end) {
+ stack = (unsigned long *) (irqstack_end[-1]);
+ printk(" <EOI> ");
+ }
+ } else {
+ if (((long) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ }
+ if (i && ((i % 4) == 0))
+ printk("\n ");
+ printk("%016lx ", *stack++);
+ }
+ show_trace((unsigned long *)rsp);
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long dummy;
+ show_trace(&dummy);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void show_registers(struct pt_regs *regs)
+{
+ int i;
+ int in_kernel = (regs->cs & 3) == 0;
+ unsigned long rsp;
+ const int cpu = safe_smp_processor_id();
+ struct task_struct *cur = cpu_pda[cpu].pcurrent;
+
+ rsp = regs->rsp;
+
+ printk("CPU %d ", cpu);
+ __show_regs(regs);
+ printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
+ cur->comm, cur->pid, cur->thread_info, cur);
+
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+ */
+ if (in_kernel) {
+
+ printk("Stack: ");
+ show_stack(NULL, (unsigned long*)rsp);
+
+ printk("\nCode: ");
+ if(regs->rip < PAGE_OFFSET)
+ goto bad;
+
+ for(i=0;i<20;i++)
+ {
+ unsigned char c;
+ if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
+bad:
+ printk(" Bad RIP value.");
+ break;
+ }
+ printk("%02x ", c);
+ }
+ }
+ printk("\n");
+}
+
+void handle_BUG(struct pt_regs *regs)
+{
+ struct bug_frame f;
+ char tmp;
+
+ if (regs->cs & 3)
+ return;
+ if (__copy_from_user(&f, (struct bug_frame *) regs->rip,
+ sizeof(struct bug_frame)))
+ return;
+ if ((unsigned long)f.filename < __PAGE_OFFSET ||
+ f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
+ return;
+ if (__get_user(tmp, f.filename))
+ f.filename = "unmapped filename";
+ printk("----------- [cut here ] --------- [please bite here ] ---------\n");
+ printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
+}
+
+void out_of_line_bug(void)
+{
+ BUG();
+}
+
+static DEFINE_SPINLOCK(die_lock);
+static int die_owner = -1;
+
+void oops_begin(void)
+{
+ int cpu = safe_smp_processor_id();
+ /* racy, but better than risking deadlock. */
+ local_irq_disable();
+ if (!spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ spin_lock(&die_lock);
+ }
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+}
+
+void oops_end(void)
+{
+ die_owner = -1;
+ bust_spinlocks(0);
+ spin_unlock(&die_lock);
+ local_irq_enable(); /* make sure back scroll still works */
+ if (panic_on_oops)
+ panic("Oops");
+}
+
+void __die(const char * str, struct pt_regs * regs, long err)
+{
+ static int die_counter;
+ printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
+ show_registers(regs);
+ /* Executive summary in case the oops scrolled away */
+ printk(KERN_ALERT "RIP ");
+ printk_address(regs->rip);
+ printk(" RSP <%016lx>\n", regs->rsp);
+}
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+ oops_begin();
+ handle_BUG(regs);
+ __die(str, regs, err);
+ oops_end();
+ do_exit(SIGSEGV);
+}
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+ if (!(regs->eflags & VM_MASK) && (regs->cs == __KERNEL_CS))
+ die(str, regs, err);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+void die_nmi(char *str, struct pt_regs *regs)
+{
+ oops_begin();
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ printk(str, safe_smp_processor_id());
+ show_registers(regs);
+ if (panic_on_timeout || panic_on_oops)
+ panic("nmi watchdog");
+ printk("console shuts up ...\n");
+ oops_end();
+ do_exit(SIGSEGV);
+}
+#endif
+
+static void do_trap(int trapnr, int signr, char *str,
+ struct pt_regs * regs, long error_code, siginfo_t *info)
+{
+ conditional_sti(regs);
+
+#ifdef CONFIG_CHECKING
+ {
+ unsigned long gs;
+ struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
+ rdmsrl(MSR_GS_BASE, gs);
+ if (gs != (unsigned long)pda) {
+ wrmsrl(MSR_GS_BASE, pda);
+ printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
+ regs->rip);
+ }
+ }
+#endif
+
+ if ((regs->cs & 3) != 0) {
+ struct task_struct *tsk = current;
+
+ if (exception_trace && unhandled_signal(tsk, signr))
+ printk(KERN_INFO
+ "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
+ tsk->comm, tsk->pid, str,
+ regs->rip,regs->rsp,error_code);
+
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+ if (info)
+ force_sig_info(signr, info, tsk);
+ else
+ force_sig(signr, tsk);
+ return;
+ }
+
+
+ /* kernel trap */
+ {
+ const struct exception_table_entry *fixup;
+ fixup = search_exception_tables(regs->rip);
+ if (fixup) {
+ regs->rip = fixup->fixup;
+ } else
+ die(str, regs, error_code);
+ return;
+ }
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ siginfo_t info; \
+ info.si_signo = signr; \
+ info.si_errno = 0; \
+ info.si_code = sicode; \
+ info.si_addr = (void __user *)siaddr; \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, regs, error_code, &info); \
+}
+
+DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR(18, SIGSEGV, "reserved", reserved)
+
+#define DO_ERROR_STACK(trapnr, signr, str, name) \
+asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return regs; \
+ if (regs->cs & 3) { \
+ memcpy(pr, regs, sizeof(struct pt_regs)); \
+ regs = pr; \
+ } \
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
+ return regs; \
+}
+
+DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
+DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+ conditional_sti(regs);
+
+#ifdef CONFIG_CHECKING
+ {
+ unsigned long gs;
+ struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
+ rdmsrl(MSR_GS_BASE, gs);
+ if (gs != (unsigned long)pda) {
+ wrmsrl(MSR_GS_BASE, pda);
+ oops_in_progress++;
+ printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
+ oops_in_progress--;
+ }
+ }
+#endif
+
+ if ((regs->cs & 3)!=0) {
+ struct task_struct *tsk = current;
+
+ if (exception_trace && unhandled_signal(tsk, SIGSEGV))
+ printk(KERN_INFO
+ "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
+ tsk->comm, tsk->pid,
+ regs->rip,regs->rsp,error_code);
+
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+ /* kernel gp */
+ {
+ const struct exception_table_entry *fixup;
+ fixup = search_exception_tables(regs->rip);
+ if (fixup) {
+ regs->rip = fixup->fixup;
+ return;
+ }
+ if (notify_die(DIE_GPF, "general protection fault", regs,
+ error_code, 13, SIGSEGV) == NOTIFY_STOP)
+ return;
+ die("general protection fault", regs, error_code);
+ }
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+
+ /* Clear and disable the memory parity error line. */
+ reason = (reason & 0xf) | 4;
+ outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+ reason = (reason & 0xf) | 8;
+ outb(reason, 0x61);
+ mdelay(2000);
+ reason &= ~8;
+ outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void default_do_nmi(struct pt_regs *regs)
+{
+ unsigned char reason = 0;
+
+ /* Only the BSP gets external NMIs from the system. */
+ if (!smp_processor_id())
+ reason = get_nmi_reason();
+
+ if (!(reason & 0xc0)) {
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+ == NOTIFY_STOP)
+ return;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Ok, so this is none of the documented NMI sources,
+ * so it must be the NMI watchdog.
+ */
+ if (nmi_watchdog > 0) {
+ nmi_watchdog_tick(regs,reason);
+ return;
+ }
+#endif
+ unknown_nmi_error(reason, regs);
+ return;
+ }
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+ return;
+
+ /* AK: following checks seem to be broken on modern chipsets. FIXME */
+
+ if (reason & 0x80)
+ mem_parity_error(reason, regs);
+ if (reason & 0x40)
+ io_check_error(reason, regs);
+
+ /*
+ * Reassert NMI in case it became active meanwhile
+ * as it's edge-triggered.
+ */
+ outb(0x8f, 0x70);
+ inb(0x71); /* dummy */
+ outb(0x0f, 0x70);
+ inb(0x71); /* dummy */
+}
+
+asmlinkage void do_int3(struct pt_regs * regs, long error_code)
+{
+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
+ return;
+ }
+ do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
+ return;
+}
+
+/* runs on IST stack. */
+asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
+{
+ struct pt_regs *pr;
+ unsigned long condition;
+ struct task_struct *tsk = current;
+ siginfo_t info;
+
+ pr = (struct pt_regs *)(current->thread.rsp0)-1;
+ if (regs->cs & 3) {
+ memcpy(pr, regs, sizeof(struct pt_regs));
+ regs = pr;
+ }
+
+#ifdef CONFIG_CHECKING
+ {
+ /* RED-PEN interaction with debugger - could destroy gs */
+ unsigned long gs;
+ struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
+ rdmsrl(MSR_GS_BASE, gs);
+ if (gs != (unsigned long)pda) {
+ wrmsrl(MSR_GS_BASE, pda);
+ printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
+ }
+ }
+#endif
+
+ asm("movq %%db6,%0" : "=r" (condition));
+
+ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+ SIGTRAP) == NOTIFY_STOP) {
+ return regs;
+ }
+ conditional_sti(regs);
+
+ /* Mask out spurious debug traps due to lazy DR7 setting */
+ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ if (!tsk->thread.debugreg7) {
+ goto clear_dr7;
+ }
+ }
+
+ tsk->thread.debugreg6 = condition;
+
+ /* Mask out spurious TF errors due to lazy TF clearing */
+ if ((condition & DR_STEP) &&
+ (notify_die(DIE_DEBUGSTEP, "debugstep", regs, condition,
+ 1, SIGTRAP) != NOTIFY_STOP)) {
+ /*
+ * The TF error should be masked out only if the current
+ * process is not traced and if the TRAP flag has been set
+ * previously by a tracing process (condition detected by
+ * the PT_DTRACE flag); remember that the i386 TRAP flag
+ * can be modified by the process itself in user mode,
+ * allowing programs to debug themselves without the ptrace()
+ * interface.
+ */
+ if ((regs->cs & 3) == 0)
+ goto clear_TF_reenable;
+ if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+ goto clear_TF;
+ }
+
+ /* Ok, finally something we can handle */
+ tsk->thread.trap_no = 1;
+ tsk->thread.error_code = error_code;
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_BRKPT;
+ if ((regs->cs & 3) == 0)
+ goto clear_dr7;
+
+ info.si_addr = (void __user *)regs->rip;
+ force_sig_info(SIGTRAP, &info, tsk);
+clear_dr7:
+ asm volatile("movq %0,%%db7"::"r"(0UL));
+ notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
+ return regs;
+
+clear_TF_reenable:
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+
+clear_TF:
+ /* RED-PEN could cause spurious errors */
+ if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
+ != NOTIFY_STOP)
+ regs->eflags &= ~TF_MASK;
+ return regs;
+}
+
+static int kernel_math_error(struct pt_regs *regs, char *str)
+{
+ const struct exception_table_entry *fixup;
+ fixup = search_exception_tables(regs->rip);
+ if (fixup) {
+ regs->rip = fixup->fixup;
+ return 1;
+ }
+ notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE);
+#if 0
+ /* This should be a die, but warn only for now */
+ die(str, regs, 0);
+#else
+ printk(KERN_DEBUG "%s: %s at ", current->comm, str);
+ printk_address(regs->rip);
+ printk("\n");
+#endif
+ return 0;
+}
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+asmlinkage void do_coprocessor_error(struct pt_regs *regs)
+{
+ void __user *rip = (void __user *)(regs->rip);
+ struct task_struct * task;
+ siginfo_t info;
+ unsigned short cwd, swd;
+
+ conditional_sti(regs);
+ if ((regs->cs & 3) == 0 &&
+ kernel_math_error(regs, "kernel x87 math error"))
+ return;
+
+ /*
+ * Save the info for the exception handler and clear the error.
+ */
+ task = current;
+ save_init_fpu(task);
+ task->thread.trap_no = 16;
+ task->thread.error_code = 0;
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_code = __SI_FAULT;
+ info.si_addr = rip;
+ /*
+ * (~cwd & swd) will mask out exceptions that are not set to unmasked
+ * status. 0x3f is the exception bits in these regs, 0x200 is the
+ * C1 reg you need in case of a stack fault, 0x040 is the stack
+ * fault bit. We should only be taking one exception at a time,
+ * so if this combination doesn't produce any single exception,
+ * then we have a bad program that isn't synchronizing its FPU usage
+ * and it will suffer the consequences since we won't be able to
+ * fully reproduce the context of the exception
+ */
+ cwd = get_fpu_cwd(task);
+ swd = get_fpu_swd(task);
+ switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ case 0x041: /* Stack Fault */
+ case 0x241: /* Stack Fault | Direction */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
+ }
+ force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void bad_intr(void)
+{
+ printk("bad interrupt");
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
+{
+ void __user *rip = (void __user *)(regs->rip);
+ struct task_struct * task;
+ siginfo_t info;
+ unsigned short mxcsr;
+
+ conditional_sti(regs);
+ if ((regs->cs & 3) == 0 &&
+ kernel_math_error(regs, "simd math error"))
+ return;
+
+ /*
+ * Save the info for the exception handler and clear the error.
+ */
+ task = current;
+ save_init_fpu(task);
+ task->thread.trap_no = 19;
+ task->thread.error_code = 0;
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_code = __SI_FAULT;
+ info.si_addr = rip;
+ /*
+ * The SIMD FPU exceptions are handled a little differently, as there
+ * is only a single status/control register. Thus, to determine which
+ * unmasked exception was caught we must mask the exception mask bits
+ * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+ */
+ mxcsr = get_fpu_mxcsr(task);
+ switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
+ }
+ force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+{
+}
+
+#if 0
+asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+{
+}
+#endif
+
+/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(void)
+{
+ struct task_struct *me = current;
+
+ /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
+
+ if (!used_math())
+ init_fpu(me);
+ restore_fpu_checking(&me->thread.i387.fxsave);
+ me->thread_info->status |= TS_USEDFPU;
+}
+
+void do_call_debug(struct pt_regs *regs)
+{
+ notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
+}
+
+
+static trap_info_t trap_table[] = {
+ { 0, 0, (__KERNEL_CS|0x3), 0, (unsigned long)divide_error },
+ { 1, 0, (__KERNEL_CS|0x3), 0, (unsigned long)debug },
+ { 3, 3, (__KERNEL_CS|0x3), 0, (unsigned long)int3 },
+ { 4, 3, (__KERNEL_CS|0x3), 0, (unsigned long)overflow },
+ { 5, 3, (__KERNEL_CS|0x3), 0, (unsigned long)bounds },
+ { 6, 0, (__KERNEL_CS|0x3), 0, (unsigned long)invalid_op },
+ { 7, 0, (__KERNEL_CS|0x3), 0, (unsigned long)device_not_available },
+ { 9, 0, (__KERNEL_CS|0x3), 0, (unsigned long)coprocessor_segment_overrun},
+ { 10, 0, (__KERNEL_CS|0x3), 0, (unsigned long)invalid_TSS },
+ { 11, 0, (__KERNEL_CS|0x3), 0, (unsigned long)segment_not_present },
+ { 12, 0, (__KERNEL_CS|0x3), 0, (unsigned long)stack_segment },
+ { 13, 0, (__KERNEL_CS|0x3), 0, (unsigned long)general_protection },
+ { 14, 0, (__KERNEL_CS|0x3), 0, (unsigned long)page_fault },
+ { 15, 0, (__KERNEL_CS|0x3), 0, (unsigned long)spurious_interrupt_bug },
+ { 16, 0, (__KERNEL_CS|0x3), 0, (unsigned long)coprocessor_error },
+ { 17, 0, (__KERNEL_CS|0x3), 0, (unsigned long)alignment_check },
+#ifdef CONFIG_X86_MCE
+ { 18, 0, (__KERNEL_CS|0x3), 0, (unsigned long)machine_check },
+#endif
+ { 19, 0, (__KERNEL_CS|0x3), 0, (unsigned long)simd_coprocessor_error },
+ { SYSCALL_VECTOR, 3, (__KERNEL_CS|0x3), 0, (unsigned long)system_call },
+ { 0, 0, 0, 0, 0 }
+};
+
+void __init trap_init(void)
+{
+ int ret;
+
+ ret = HYPERVISOR_set_trap_table(trap_table);
+
+ if (ret)
+ printk("HYPERVISOR_set_trap_table faild: error %d\n",
+ ret);
+
+#ifdef CONFIG_IA32_EMULATION
+ set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+#endif
+
+ /*
+ * Should be a barrier for any external CPU state.
+ */
+ cpu_init();
+}
+
+
+/* Actual parsing is done early in setup.c. */
+static int __init oops_dummy(char *s)
+{
+ panic_on_oops = 1;
+ return -1;
+}
+__setup("oops=", oops_dummy);
+
+static int __init kstack_setup(char *s)
+{
+ kstack_depth_to_print = simple_strtoul(s,NULL,0);
+ return 0;
+}
+__setup("kstack=", kstack_setup);
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
new file mode 100644
index 0000000000..f980cdefff
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
@@ -0,0 +1,190 @@
+/*
+ * linux/arch/x86_64/kernel/vsyscall.c
+ *
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ *
+ * Thanks to hpa@transmeta.com for some useful hint.
+ * Special thanks to Ingo Molnar for his early experience with
+ * a different vsyscall implementation for Linux/IA32 and for the name.
+ *
+ * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
+ * at virtual address -10Mbyte+1024bytes etc... There are at max 8192
+ * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
+ * jumping out of line if necessary.
+ *
+ * Note: the concept clashes with user mode linux. If you use UML just
+ * set the kernel.vsyscall sysctl to 0.
+ */
+
+/*
+ * TODO 2001-03-20:
+ *
+ * 1) make page fault handler detect faults on page1-page-last of the vsyscall
+ * virtual space, and make it increase %rip and write -ENOSYS in %rax (so
+ * we'll be able to upgrade to a new glibc without upgrading kernel after
+ * we add more vsyscalls.
+ * 2) Possibly we need a fixmap table for the vsyscalls too if we want
+ * to avoid SIGSEGV and we want to return -EFAULT from the vsyscalls as well.
+ * Can we segfault inside a "syscall"? We can fix this anytime and those fixes
+ * won't be visible for userspace. Not fixing this is a noop for correct programs,
+ * broken programs will segfault and there's no security risk until we choose to
+ * fix it.
+ *
+ * These are not urgent things that we need to address only before shipping the first
+ * production binary kernels.
+ */
+
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/seqlock.h>
+#include <linux/jiffies.h>
+
+#include <asm/vsyscall.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/fixmap.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define force_inline __attribute__((always_inline)) inline
+
+int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
+seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+
+#include <asm/unistd.h>
+
+static force_inline void timeval_normalize(struct timeval * tv)
+{
+ time_t __sec;
+
+ __sec = tv->tv_usec / 1000000;
+ if (__sec)
+ {
+ tv->tv_usec %= 1000000;
+ tv->tv_sec += __sec;
+ }
+}
+
+static force_inline void do_vgettimeofday(struct timeval * tv)
+{
+ long sequence, t;
+ unsigned long sec, usec;
+
+ do {
+ sequence = read_seqbegin(&__xtime_lock);
+
+ sec = __xtime.tv_sec;
+ usec = (__xtime.tv_nsec / 1000) +
+ (__jiffies - __wall_jiffies) * (1000000 / HZ);
+
+ if (__vxtime.mode == VXTIME_TSC) {
+ sync_core();
+ rdtscll(t);
+ if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
+ usec += ((t - __vxtime.last_tsc) *
+ __vxtime.tsc_quot) >> 32;
+ /* See comment in x86_64 do_gettimeofday. */
+ } else {
+ usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+ __vxtime.last) * __vxtime.quot) >> 32;
+ }
+ } while (read_seqretry(&__xtime_lock, sequence));
+
+ tv->tv_sec = sec + usec / 1000000;
+ tv->tv_usec = usec % 1000000;
+}
+
+/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
+static force_inline void do_get_tz(struct timezone * tz)
+{
+ *tz = __sys_tz;
+}
+
+
+static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ int ret;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
+ return ret;
+}
+
+static force_inline long time_syscall(long *t)
+{
+ long secs;
+ asm volatile("syscall"
+ : "=a" (secs)
+ : "0" (__NR_time),"D" (t) : __syscall_clobber);
+ return secs;
+}
+
+static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
+{
+ if (unlikely(!__sysctl_vsyscall))
+ return gettimeofday(tv,tz);
+ if (tv)
+ do_vgettimeofday(tv);
+ if (tz)
+ do_get_tz(tz);
+ return 0;
+}
+
+/* This will break when the xtime seconds get inaccurate, but that is
+ * unlikely */
+static time_t __vsyscall(1) vtime(time_t *t)
+{
+ if (unlikely(!__sysctl_vsyscall))
+ return time_syscall(t);
+ else if (t)
+ *t = __xtime.tv_sec;
+ return __xtime.tv_sec;
+}
+
+static long __vsyscall(2) venosys_0(void)
+{
+ return -ENOSYS;
+}
+
+static long __vsyscall(3) venosys_1(void)
+{
+ return -ENOSYS;
+
+}
+
+static void __init map_vsyscall(void)
+{
+ extern char __vsyscall_0;
+ unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+
+ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+}
+
+extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t);
+
+static void __init map_vsyscall_user(void)
+{
+ extern char __vsyscall_0;
+ unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+
+ __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+}
+
+static int __init vsyscall_init(void)
+{
+ BUG_ON(((unsigned long) &vgettimeofday !=
+ VSYSCALL_ADDR(__NR_vgettimeofday)));
+ BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
+ BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
+ map_vsyscall();
+ map_vsyscall_user(); /* establish tranlation for user address space */
+ sysctl_vsyscall = 0; /* TBD */
+
+ return 0;
+}
+
+__initcall(vsyscall_init);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
new file mode 100644
index 0000000000..8ec2c0fdae
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c
@@ -0,0 +1,223 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/syscalls.h>
+#include <linux/tty.h>
+#include <linux/ioctl32.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/nmi.h>
+#include <asm/kdebug.h>
+#include <asm/unistd.h>
+#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+
+extern spinlock_t rtc_lock;
+
+#ifdef CONFIG_SMP
+extern void __write_lock_failed(rwlock_t *rw);
+extern void __read_lock_failed(rwlock_t *rw);
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+//EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(ioremap_nocache);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
+EXPORT_SYMBOL(probe_irq_mask);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+// EXPORT_SYMBOL(pm_power_off);
+EXPORT_SYMBOL(get_cmos_time);
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_failed_interruptible);
+EXPORT_SYMBOL(__down_failed_trylock);
+EXPORT_SYMBOL(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(ip_compute_csum);
+/* Delay loops */
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__ndelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(copy_user_generic);
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+EXPORT_SYMBOL(strnlen_user);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(pci_free_consistent);
+#endif
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pcibios_penalize_isa_irq);
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+EXPORT_SYMBOL(copy_page);
+EXPORT_SYMBOL(clear_page);
+
+EXPORT_SYMBOL(cpu_pda);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(__write_lock_failed);
+EXPORT_SYMBOL(__read_lock_failed);
+
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(smp_call_function);
+EXPORT_SYMBOL(cpu_callout_map);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+/* EXPORT_SYMBOL_GPL(set_nmi_callback);
+ EXPORT_SYMBOL_GPL(unset_nmi_callback); */
+
+/* Export string functions. We normally rely on gcc builtin for most of these,
+ but gcc sometimes decides not to inline them. */
+#undef memcpy
+#undef memset
+#undef memmove
+#undef memchr
+#undef strlen
+#undef strcpy
+#undef strncmp
+#undef strncpy
+#undef strchr
+#undef strcmp
+#undef strcpy
+#undef strcat
+#undef memcmp
+
+extern void * memset(void *,int,__kernel_size_t);
+extern size_t strlen(const char *);
+extern void * memmove(void * dest,const void *src,size_t count);
+extern char * strcpy(char * dest,const char *src);
+extern int strcmp(const char * cs,const char * ct);
+extern void *memchr(const void *s, int c, size_t n);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+extern void * __memcpy(void *,const void *,__kernel_size_t);
+extern char * strcat(char *, const char *);
+extern int memcmp(const void * cs,const void * ct,size_t count);
+
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memcmp);
+
+#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
+/* prototypes are wrong, these are assembly with custom calling functions */
+extern void rwsem_down_read_failed_thunk(void);
+extern void rwsem_wake_thunk(void);
+extern void rwsem_downgrade_thunk(void);
+extern void rwsem_down_write_failed_thunk(void);
+EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
+EXPORT_SYMBOL(rwsem_wake_thunk);
+EXPORT_SYMBOL(rwsem_downgrade_thunk);
+EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
+#endif
+
+EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif
+
+EXPORT_SYMBOL(die_chain);
+EXPORT_SYMBOL(register_die_notifier);
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(smp_num_siblings);
+#endif
+
+extern void do_softirq_thunk(void);
+EXPORT_SYMBOL(do_softirq_thunk);
+
+void out_of_line_bug(void);
+EXPORT_SYMBOL(out_of_line_bug);
+
+EXPORT_SYMBOL(init_level4_pgt);
+
+extern unsigned long __supported_pte_mask;
+EXPORT_SYMBOL(__supported_pte_mask);
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(flush_tlb_page);
+EXPORT_SYMBOL_GPL(flush_tlb_all);
+#endif
+
+EXPORT_SYMBOL(cpu_khz);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile
new file mode 100644
index 0000000000..48c16ea11b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/Makefile
@@ -0,0 +1,31 @@
+#
+# Makefile for the linux x86_64-specific parts of the memory manager.
+#
+
+XENARCH := $(subst ",,$(CONFIG_XENARCH))
+
+CFLAGS += -Iarch/$(XENARCH)/mm
+
+obj-y := init.o fault.o ioremap.o pageattr.o
+c-obj-y := extable.o
+
+i386-obj-y := hypervisor.o
+
+#obj-y := init.o fault.o ioremap.o extable.o pageattr.o
+#c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+c-obj-$(CONFIG_DISCONTIGMEM) += numa.o
+c-obj-$(CONFIG_K8_NUMA) += k8topology.o
+
+hugetlbpage-y = ../../../i386/mm/hugetlbpage.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/x86_64/mm/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(i386-obj-y)):
+ ln -fsn $(srctree)/arch/xen/i386/mm/$(notdir $@) $@
+
+obj-y += $(c-obj-y) $(i386-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link) $(i386-obj-y))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
new file mode 100644
index 0000000000..636cd1873f
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/fault.c
@@ -0,0 +1,591 @@
+/*
+ * linux/arch/x86-64/mm/fault.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/kprobes.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/tlbflush.h>
+#include <asm/proto.h>
+#include <asm/kdebug.h>
+#include <asm-generic/sections.h>
+#include <asm/kdebug.h>
+
+DEFINE_PER_CPU(pgd_t *, cur_pgd);
+
+void bust_spinlocks(int yes)
+{
+ int loglevel_save = console_loglevel;
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+#ifdef CONFIG_VT
+ unblank_screen();
+#endif
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give klogd
+ * a poke. Hold onto your hats...
+ */
+ console_loglevel = 15; /* NMI oopser may have shut the console up */
+ printk(" ");
+ console_loglevel = loglevel_save;
+ }
+}
+
+/* Sometimes the CPU reports invalid exceptions on prefetch.
+ Check that here and ignore.
+ Opcode checker based on code by Richard Brunner */
+static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
+ unsigned long error_code)
+{
+ unsigned char *instr = (unsigned char *)(regs->rip);
+ int scan_more = 1;
+ int prefetch = 0;
+ unsigned char *max_instr = instr + 15;
+
+ /* If it was a exec fault ignore */
+ if (error_code & (1<<4))
+ return 0;
+
+ /* Code segments in LDT could have a non zero base. Don't check
+ when that's possible */
+ if (regs->cs & (1<<2))
+ return 0;
+
+ if ((regs->cs & 3) != 0 && regs->rip >= TASK_SIZE)
+ return 0;
+
+ while (scan_more && instr < max_instr) {
+ unsigned char opcode;
+ unsigned char instr_hi;
+ unsigned char instr_lo;
+
+ if (__get_user(opcode, instr))
+ break;
+
+ instr_hi = opcode & 0xf0;
+ instr_lo = opcode & 0x0f;
+ instr++;
+
+ switch (instr_hi) {
+ case 0x20:
+ case 0x30:
+ /* Values 0x26,0x2E,0x36,0x3E are valid x86
+ prefixes. In long mode, the CPU will signal
+ invalid opcode if some of these prefixes are
+ present so we will never get here anyway */
+ scan_more = ((instr_lo & 7) == 0x6);
+ break;
+
+ case 0x40:
+ /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
+ Need to figure out under what instruction mode the
+ instruction was issued ... */
+ /* Could check the LDT for lm, but for now it's good
+ enough to assume that long mode only uses well known
+ segments or kernel. */
+ scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS);
+ break;
+
+ case 0x60:
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
+ scan_more = (instr_lo & 0xC) == 0x4;
+ break;
+ case 0xF0:
+ /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+ scan_more = !instr_lo || (instr_lo>>1) == 1;
+ break;
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ scan_more = 0;
+ if (__get_user(opcode, instr))
+ break;
+ prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+ break;
+ default:
+ scan_more = 0;
+ break;
+ }
+ }
+ return prefetch;
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+ return __get_user(dummy, (unsigned long *)p);
+}
+
+void dump_pagetable(unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+ pgd += pgd_index(address);
+
+ printk("PGD %lx ", pgd_val(*pgd));
+ if (bad_address(pgd)) goto bad;
+ if (!pgd_present(*pgd)) goto ret;
+
+ pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
+ if (bad_address(pud)) goto bad;
+ printk("PUD %lx ", pud_val(*pud));
+ if (!pud_present(*pud)) goto ret;
+
+ pmd = pmd_offset(pud, address);
+ if (bad_address(pmd)) goto bad;
+ printk("PMD %lx ", pmd_val(*pmd));
+ if (!pmd_present(*pmd)) goto ret;
+
+ pte = pte_offset_kernel(pmd, address);
+ if (bad_address(pte)) goto bad;
+ printk("PTE %lx", pte_val(*pte));
+ret:
+ printk("\n");
+ return;
+bad:
+ printk("BAD\n");
+}
+
+static const char errata93_warning[] =
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+KERN_ERR "******* Please consider a BIOS update.\n"
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+
+/* Workaround for K8 erratum #93 & buggy BIOS.
+ BIOS SMM functions are required to use a specific workaround
+ to avoid corruption of the 64bit RIP register on C stepping K8.
+ A lot of BIOS that didn't get tested properly miss this.
+ The OS sees this as a page fault with the upper 32bits of RIP cleared.
+ Try to work around it here.
+ Note we only handle faults in kernel here. */
+
+static int is_errata93(struct pt_regs *regs, unsigned long address)
+{
+ static int warned;
+ if (address != regs->rip)
+ return 0;
+ if ((address >> 32) != 0)
+ return 0;
+ address |= 0xffffffffUL << 32;
+ if ((address >= (u64)_stext && address <= (u64)_etext) ||
+ (address >= MODULES_VADDR && address <= MODULES_END)) {
+ if (!warned) {
+ printk(errata93_warning);
+ warned = 1;
+ }
+ regs->rip = address;
+ return 1;
+ }
+ return 0;
+}
+
+int unhandled_signal(struct task_struct *tsk, int sig)
+{
+ if (tsk->pid == 1)
+ return 1;
+ /* Warn for strace, but not for gdb */
+ if (!test_ti_thread_flag(tsk->thread_info, TIF_SYSCALL_TRACE) &&
+ (tsk->ptrace & PT_PTRACED))
+ return 0;
+ return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
+ (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
+}
+
+static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code)
+{
+ oops_begin();
+ printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
+ current->comm, address);
+ dump_pagetable(address);
+ __die("Bad pagetable", regs, error_code);
+ oops_end();
+ do_exit(SIGKILL);
+}
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static int vmalloc_fault(unsigned long address)
+{
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Copy kernel mappings over when needed. This can also
+ happen within a race in page table update. In the later
+ case just flush. */
+
+ pgd = pgd_offset(current->mm ?: &init_mm, address);
+ pgd_ref = pgd_offset_k(address);
+ if (pgd_none(*pgd_ref))
+ return -1;
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+
+ /* Below here mismatches are bugs because these lower tables
+ are shared */
+
+ pud = pud_offset(pgd, address);
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+ if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
+ BUG();
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ BUG();
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
+ pte = pte_offset_kernel(pmd, address);
+ if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref))
+ BUG();
+ __flush_tlb_all();
+ return 0;
+}
+
+int page_fault_trace = 0;
+int exception_trace = 1;
+
+
+#define MEM_VERBOSE 1
+
+#ifdef MEM_VERBOSE
+#define MEM_LOG(_f, _a...) \
+ printk("fault.c:[%d]-> " _f "\n", \
+ __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ * bit 0 == 0 means no page found, 1 means protection fault
+ * bit 1 == 0 means read, 1 means write
+ * bit 2 == 0 means kernel, 1 means user-mode
+ * bit 3 == 1 means fault was an instruction fetch
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct * vma;
+ const struct exception_table_entry *fixup;
+ int write;
+ siginfo_t info;
+
+ if (!user_mode(regs))
+ error_code &= ~4; /* means kernel */
+
+#ifdef CONFIG_CHECKING
+ {
+ unsigned long gs;
+ struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
+ rdmsrl(MSR_GS_BASE, gs);
+ if (gs != (unsigned long)pda) {
+ wrmsrl(MSR_GS_BASE, pda);
+ printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
+ }
+ }
+#endif
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (likely(regs->eflags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+ printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+
+ tsk = current;
+ mm = tsk->mm;
+ info.si_code = SEGV_MAPERR;
+
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ *
+ * This verifies that the fault happens in kernel space
+ * (error_code & 4) == 0, and that the fault was not a
+ * protection error (error_code & 1) == 0.
+ */
+ if (unlikely(address >= TASK_SIZE)) {
+ if (!(error_code & 5)) {
+ if (vmalloc_fault(address) < 0)
+ goto bad_area_nosemaphore;
+ return;
+ }
+ /*
+ * Don't take the mm semaphore here. If we fixup a prefetch
+ * fault we could otherwise deadlock.
+ */
+ goto bad_area_nosemaphore;
+ }
+
+ if (unlikely(error_code & (1 << 3)))
+ pgtable_bad(address, regs, error_code);
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (unlikely(in_atomic() || !mm))
+ goto bad_area_nosemaphore;
+
+ again:
+ /* When running in the kernel we expect faults to occur only to
+ * addresses in user space. All other faults represent errors in the
+ * kernel and should generate an OOPS. Unfortunatly, in the case of an
+ * erroneous fault occuring in a code path which already holds mmap_sem
+ * we will deadlock attempting to validate the fault against the
+ * address space. Luckily the kernel only validly references user
+ * space from well defined areas of code, which are listed in the
+ * exceptions table.
+ *
+ * As the vast majority of faults will be valid we will only perform
+ * the source reference check when there is a possibilty of a deadlock.
+ * Attempt to lock the address space, if we cannot we then validate the
+ * source. If this is invalid we can skip the address space check,
+ * thus avoiding the deadlock.
+ */
+ if (!down_read_trylock(&mm->mmap_sem)) {
+ if ((error_code & 4) == 0 &&
+ !search_exception_tables(regs->rip))
+ goto bad_area_nosemaphore;
+ down_read(&mm->mmap_sem);
+ }
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (likely(vma->vm_start <= address))
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (error_code & 4) {
+ // XXX: align red zone size with ABI
+ if (address + 128 < regs->rsp)
+ goto bad_area;
+ }
+ if (expand_stack(vma, address))
+ goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+ info.si_code = SEGV_ACCERR;
+ write = 0;
+ switch (error_code & 3) {
+ default: /* 3: write, present */
+ /* fall through */
+ case 2: /* write, not present */
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ write++;
+ break;
+ case 1: /* read, present */
+ goto bad_area;
+ case 0: /* read, not present */
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto bad_area;
+ }
+
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ switch (handle_mm_fault(mm, vma, address, write)) {
+ case 1:
+ tsk->min_flt++;
+ break;
+ case 2:
+ tsk->maj_flt++;
+ break;
+ case 0:
+ goto do_sigbus;
+ default:
+ goto out_of_memory;
+ }
+
+ up_read(&mm->mmap_sem);
+ return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+ up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+
+#ifdef CONFIG_IA32_EMULATION
+ /* 32bit vsyscall. map on demand. */
+ if (test_thread_flag(TIF_IA32) &&
+ address >= VSYSCALL32_BASE && address < VSYSCALL32_END) {
+ if (map_syscall32(mm, address) < 0)
+ goto out_of_memory2;
+ return;
+ }
+#endif
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & 4) {
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+ /* Work around K8 erratum #100 K8 in compat mode
+ occasionally jumps to illegal addresses >4GB. We
+ catch this here in the page fault handler because
+ these addresses are not reachable. Just detect this
+ case and return. Any code segment in LDT is
+ compatibility mode. */
+ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+ (address >> 32))
+ return;
+
+ if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
+ printk(
+ "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+ tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, tsk->pid, address, regs->rip,
+ regs->rsp, error_code);
+ }
+
+ tsk->thread.cr2 = address;
+ /* Kernel addresses are always protection faults */
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ /* info.si_code has been set above */
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGSEGV, &info, tsk);
+ return;
+ }
+
+no_context:
+
+ /* Are we prepared to handle this kernel fault? */
+ fixup = search_exception_tables(regs->rip);
+ if (fixup) {
+ regs->rip = fixup->fixup;
+ return;
+ }
+
+ /*
+ * Hall of shame of CPU/BIOS bugs.
+ */
+
+ if (is_prefetch(regs, address, error_code))
+ return;
+
+ if (is_errata93(regs, address))
+ return;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+ oops_begin();
+
+ if (address < PAGE_SIZE)
+ printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+ else
+ printk(KERN_ALERT "Unable to handle kernel paging request");
+ printk(" at %016lx RIP: \n" KERN_ALERT,address);
+ printk_address(regs->rip);
+ printk("\n");
+ dump_pagetable(address);
+ __die("Oops", regs, error_code);
+ /* Executive summary in case the body of the oops scrolled away */
+ printk(KERN_EMERG "CR2: %016lx\n", address);
+ oops_end();
+ do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ up_read(&mm->mmap_sem);
+ if (current->pid == 1) {
+ yield();
+ goto again;
+ }
+ printk("VM: killing process %s\n", tsk->comm);
+ if (error_code & 4)
+ do_exit(SIGKILL);
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!(error_code & 4))
+ goto no_context;
+
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, tsk);
+ return;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c
new file mode 100644
index 0000000000..0e85f182b2
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/init.c
@@ -0,0 +1,966 @@
+/*
+ * linux/arch/x86_64/mm/init.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
+ *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+#include <asm/proto.h>
+#include <asm/smp.h>
+
+#ifndef Dprintk
+#define Dprintk(x...)
+#endif
+
+#ifdef CONFIG_GART_IOMMU
+extern int swiotlb;
+#endif
+
+extern char _stext[];
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+extern unsigned long start_pfn;
+
+static int init_mapping_done;
+
+/*
+ * Use this until direct mapping is established, i.e. before __va() is
+ * avaialble in init_memory_mapping().
+ */
+
+#define addr_to_page(addr, page) \
+ (addr) &= PHYSICAL_PAGE_MASK; \
+ (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
+
+static void __make_page_readonly(unsigned long va)
+{
+ unsigned long addr;
+ pte_t *pte;
+ unsigned long *page = (unsigned long *) init_level4_pgt;
+
+ addr = (unsigned long) page[pgd_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pud_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, page);
+
+ pte = (pte_t *) &page[pte_index(va)];
+ xen_l1_entry_update(pte, (*(unsigned long*)pte) & ~_PAGE_RW);
+ __flush_tlb_one(addr);
+}
+
+static void __make_page_writable(unsigned long va)
+{
+ unsigned long addr;
+ pte_t *pte;
+ unsigned long *page = (unsigned long *) init_level4_pgt;
+
+ addr = (unsigned long) page[pgd_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pud_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, page);
+
+ pte = (pte_t *) &page[pte_index(va)];
+ xen_l1_entry_update(pte, (*(unsigned long*)pte)| _PAGE_RW);
+ __flush_tlb_one(addr);
+}
+
+
+/*
+ * Assume the translation is already established.
+ */
+void make_page_readonly(void *va)
+{
+ pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t *pte;
+ unsigned long addr = (unsigned long) va;
+
+ if (!init_mapping_done) {
+ __make_page_readonly(addr);
+ return;
+ }
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pte = pte_offset_kernel(pmd, addr);
+ xen_l1_entry_update(pte, (*(unsigned long*)pte)&~_PAGE_RW);
+ __flush_tlb_one(addr);
+}
+
+void make_page_writable(void *va)
+{
+ pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t *pte;
+ unsigned long addr = (unsigned long) va;
+
+ if (!init_mapping_done) {
+ __make_page_writable(addr);
+ return;
+ }
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pte = pte_offset_kernel(pmd, addr);
+ xen_l1_entry_update(pte, (*(unsigned long*)pte)|_PAGE_RW);
+ __flush_tlb_one(addr);
+}
+
+void make_pages_readonly(void* va, unsigned nr)
+{
+ while ( nr-- != 0 ) {
+ make_page_readonly(va);
+ va = (void*)((unsigned long)va + PAGE_SIZE);
+ }
+}
+
+void make_pages_writable(void* va, unsigned nr)
+{
+ while ( nr-- != 0 ) {
+ make_page_writable(va);
+ va = (void*)((unsigned long)va + PAGE_SIZE);
+ }
+}
+
+/*
+ * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
+ * physical space so we can cache the place of the first one and move
+ * around without checking the pgd every time.
+ */
+
+void show_mem(void)
+{
+ int i, total = 0, reserved = 0;
+ int shared = 0, cached = 0;
+ pg_data_t *pgdat;
+ struct page *page;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+
+ for_each_pgdat(pgdat) {
+ for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+ page = pfn_to_page(pgdat->node_start_pfn + i);
+ total++;
+ if (PageReserved(page))
+ reserved++;
+ else if (PageSwapCache(page))
+ cached++;
+ else if (page_count(page))
+ shared += page_count(page) - 1;
+ }
+ }
+ printk("%d pages of RAM\n", total);
+ printk("%d reserved pages\n",reserved);
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end[];
+extern char __init_begin, __init_end;
+
+int after_bootmem;
+
+static void *spp_getpage(void)
+{
+ void *ptr;
+ if (after_bootmem)
+ ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+ else
+ ptr = alloc_bootmem_pages(PAGE_SIZE);
+ if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
+ panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
+
+ Dprintk("spp_getpage %p\n", ptr);
+ return ptr;
+}
+
+#define pgd_offset_u(address) (pgd_t *)(init_level4_user_pgt + pgd_index(address))
+
+static inline pud_t *pud_offset_u(unsigned long address)
+{
+ pud_t *pud = level3_user_pgt;
+
+ return pud + pud_index(address);
+}
+
+static void set_pte_phys(unsigned long vaddr,
+ unsigned long phys, pgprot_t prot, int user_mode)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte, new_pte;
+
+ Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
+
+ pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
+
+ if (pgd_none(*pgd)) {
+ printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
+ return;
+ }
+
+ pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
+
+ if (pud_none(*pud)) {
+ pmd = (pmd_t *) spp_getpage();
+
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
+ if (pmd != pmd_offset(pud, 0)) {
+ printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
+ return;
+ }
+ }
+
+ pmd = pmd_offset(pud, vaddr);
+
+ if (pmd_none(*pmd)) {
+ pte = (pte_t *) spp_getpage();
+ make_page_readonly(pte);
+
+ xen_pte_pin(__pa(pte));
+ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
+ if (pte != pte_offset_kernel(pmd, 0)) {
+ printk("PAGETABLE BUG #02!\n");
+ return;
+ }
+ }
+ new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
+
+ pte = pte_offset_kernel(pmd, vaddr);
+
+ if (!pte_none(*pte) &&
+ pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
+ pte_ERROR(*pte);
+ xen_l1_entry_update(pte, new_pte.pte);
+
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+static void set_pte_phys_ma(unsigned long vaddr,
+ unsigned long phys, pgprot_t prot)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte, new_pte;
+
+ Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
+
+ pgd = pgd_offset_k(vaddr);
+ if (pgd_none(*pgd)) {
+ printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
+ return;
+ }
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud)) {
+
+ pmd = (pmd_t *) spp_getpage();
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
+
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
+
+ if (pmd != pmd_offset(pud, 0)) {
+ printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
+ return;
+ }
+ }
+ pmd = pmd_offset(pud, vaddr);
+
+ if (pmd_none(*pmd)) {
+ pte = (pte_t *) spp_getpage();
+ make_page_readonly(pte);
+ xen_pte_pin(__pa(pte));
+
+ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
+ if (pte != pte_offset_kernel(pmd, 0)) {
+ printk("PAGETABLE BUG #02!\n");
+ return;
+ }
+ }
+
+ new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
+ pte = pte_offset_kernel(pmd, vaddr);
+
+ if (!pte_none(*pte) &&
+ pte_val_ma(*pte) != (pte_val_ma(new_pte) & __supported_pte_mask))
+ pte_ERROR(*pte);
+
+ /*
+ * Note that the pte page is already RO, thus we want to use
+ * xen_l1_entry_update(), not set_pte().
+ */
+ xen_l1_entry_update(pte,
+ (pfn_pte_ma(phys >> PAGE_SHIFT, prot).pte));
+
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+#define SET_FIXMAP_KERNEL 0
+#define SET_FIXMAP_USER 1
+
+/* NOTE: this is meant to be run only at boot */
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ printk("Invalid __set_fixmap\n");
+ return;
+ }
+ switch (idx) {
+ case VSYSCALL_FIRST_PAGE:
+ set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
+ break;
+ default:
+ set_pte_phys_ma(address, phys, prot);
+ break;
+ }
+}
+
+
+/*
+ * At this point it only supports vsyscall area.
+ */
+void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ printk("Invalid __set_fixmap\n");
+ return;
+ }
+
+ set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
+}
+
+unsigned long __initdata table_start, table_end, tables_space;
+
+unsigned long get_machine_pfn(unsigned long addr)
+{
+ pud_t* pud = pud_offset_k(addr);
+ pmd_t* pmd = pmd_offset(pud, addr);
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+
+ return pte_mfn(*pte);
+}
+
+#define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
+#define MAX_LOW_PAGES 0x20
+static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K;
+static int __init_pgt_index;
+
+/*
+ * We start using from start_pfn
+ */
+static __init void *alloc_static_page(unsigned long *phys)
+{
+ int i = __init_pgt_index++;
+
+ if (__init_pgt_index >= MAX_LOW_PAGES) {
+ printk("Need to increase MAX_LOW_PAGES");
+ BUG();
+ }
+
+ *phys = __pa(__init_pgt[i]);
+
+ return (void *) __init_pgt[i];
+}
+
+/*
+ * Get RO page
+ */
+static void __init *alloc_low_page(unsigned long *phys)
+{
+ unsigned long pfn = table_end++;
+
+ *phys = (pfn << PAGE_SHIFT);
+ memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, PAGE_SIZE);
+ return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
+}
+
+#define PTE_SIZE PAGE_SIZE
+
+static inline void __set_pte(pte_t *dst, pte_t val)
+{
+ *dst = val;
+}
+
+void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i, j, k;
+ unsigned long paddr;
+
+ i = pud_index(address);
+ pud = pud + i;
+
+ for (; i < PTRS_PER_PUD; pud++, i++) {
+ unsigned long pmd_phys;
+ pmd_t *pmd;
+
+ paddr = address + i*PUD_SIZE;
+ if (paddr >= end) {
+ for (; i < PTRS_PER_PUD; i++, pud++)
+ set_pud(pud, __pud(0));
+ break;
+ }
+
+ pmd = alloc_low_page(&pmd_phys);
+ make_page_readonly(pmd);
+ xen_pmd_pin(pmd_phys);
+ set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
+
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+ unsigned long pte_phys;
+ pte_t *pte, *pte_save;
+
+ if (paddr >= end) {
+ for (; j < PTRS_PER_PMD; j++, pmd++)
+ set_pmd(pmd, __pmd(0));
+ break;
+ }
+ pte = alloc_low_page(&pte_phys);
+ pte_save = pte;
+ for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
+ if (paddr < (table_start << PAGE_SHIFT)
+ + tables_space)
+ {
+ __set_pte(pte,
+ __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
+ continue;
+ }
+ if (paddr >= end) {
+ for (; k < PTRS_PER_PTE; k++, pte++)
+ __set_pte(pte, __pte(0));
+ break;
+ }
+ __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
+ }
+ pte = pte_save;
+ make_page_readonly(pte);
+ xen_pte_pin(pte_phys);
+ set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+ }
+ }
+ __flush_tlb();
+}
+
+static void __init find_early_table_space(unsigned long end)
+{
+ unsigned long puds, pmds, ptes;
+
+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
+
+ tables_space = round_up(puds * 8, PAGE_SIZE) +
+ round_up(pmds * 8, PAGE_SIZE) +
+ round_up(ptes * 8, PAGE_SIZE);
+}
+
+
+/*
+ * Extend kernel mapping to access pages for page tables. The initial
+ * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
+ * mapping for early initialization.
+ */
+
+#define MIN_INIT_SIZE 0x800000
+static unsigned long current_size, extended_size;
+
+void __init extend_init_mapping(void)
+{
+ unsigned long va = __START_KERNEL_map;
+ unsigned long addr, *pte_page;
+
+ unsigned long phys;
+ pmd_t *pmd;
+ pte_t *pte, new_pte;
+ unsigned long *page = (unsigned long *) init_level4_pgt;
+ int i;
+
+ addr = (unsigned long) page[pgd_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pud_index(va)];
+ addr_to_page(addr, page);
+
+ for (;;) {
+ pmd = (pmd_t *) &page[pmd_index(va)];
+ if (pmd_present(*pmd)) {
+ /*
+ * if pmd is valid, check pte.
+ */
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, pte_page);
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = (pte_t *) &pte_page[pte_index(va)];
+
+ if (pte_present(*pte)) {
+ va += PAGE_SIZE;
+ current_size += PAGE_SIZE;
+ } else
+ break;
+ }
+
+ } else
+ break;
+ }
+
+ for (; va < __START_KERNEL_map + current_size + tables_space; ) {
+ pmd = (pmd_t *) &page[pmd_index(va)];
+
+ if (pmd_none(*pmd)) {
+ pte_page = (unsigned long *) alloc_static_page(&phys);
+ make_page_readonly(pte_page);
+ xen_pte_pin(phys);
+ set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+
+ for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+ new_pte = pfn_pte((va - __START_KERNEL_map) >> PAGE_SHIFT,
+ __pgprot(_KERNPG_TABLE | _PAGE_USER));
+
+ pte = (pte_t *) &pte_page[pte_index(va)];
+ xen_l1_entry_update(pte, new_pte.pte);
+ extended_size += PAGE_SIZE;
+ }
+ }
+ }
+}
+
+
+/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ This runs before bootmem is initialized and gets pages directly from the
+ physical memory. To access them they are temporarily mapped. */
+void __init init_memory_mapping(unsigned long start, unsigned long end)
+{
+ unsigned long next;
+
+ Dprintk("init_memory_mapping\n");
+
+ find_early_table_space(end);
+ extend_init_mapping();
+ start_pfn = current_size >> PAGE_SHIFT;
+
+ table_start = start_pfn;
+ table_end = table_start;
+
+ start = (unsigned long)__va(start);
+ end = (unsigned long)__va(end);
+
+ for (; start < end; start = next) {
+ unsigned long pud_phys;
+ pud_t *pud = alloc_low_page(&pud_phys);
+ make_page_readonly(pud);
+ xen_pud_pin(pud_phys);
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ phys_pud_init(pud, __pa(start), __pa(next));
+ set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+ }
+
+ printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
+ table_start<<PAGE_SHIFT,
+ table_end<<PAGE_SHIFT);
+
+ start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
+
+ /*
+ * TBD: Need to calculate at runtime
+ */
+
+ __flush_tlb_all();
+ init_mapping_done = 1;
+}
+
+extern struct x8664_pda cpu_pda[NR_CPUS];
+
+void zap_low_mappings(void)
+{
+ /* this is not required for Xen */
+#if 0
+ swap_low_mappings();
+#endif
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+void __init paging_init(void)
+{
+ int i;
+
+ {
+ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ /* unsigned int max_dma; */
+ /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
+ /* if (end_pfn < max_dma) */
+ zones_size[ZONE_DMA] = end_pfn;
+#if 0
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = end_pfn - max_dma;
+ }
+#endif
+ free_area_init(zones_size);
+ }
+
+ set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ /* Setup mapping of lower 1st MB */
+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
+ if (xen_start_info.flags & SIF_PRIVILEGED)
+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+ else
+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
+ virt_to_machine(empty_zero_page),
+ PAGE_KERNEL_RO);
+#endif
+
+}
+#endif
+
+/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
+ from the CPU leading to inconsistent cache lines. address and size
+ must be aligned to 2MB boundaries.
+ Does nothing when the mapping doesn't exist. */
+void __init clear_kernel_mapping(unsigned long address, unsigned long size)
+{
+ unsigned long end = address + size;
+
+ BUG_ON(address & ~LARGE_PAGE_MASK);
+ BUG_ON(size & ~LARGE_PAGE_MASK);
+
+ for (; address < end; address += LARGE_PAGE_SIZE) {
+ pgd_t *pgd = pgd_offset_k(address);
+ pud_t *pud;
+ pmd_t *pmd;
+ if (pgd_none(*pgd))
+ continue;
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, address);
+ if (!pmd || pmd_none(*pmd))
+ continue;
+ if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
+ /* Could handle this, but it should not happen currently. */
+ printk(KERN_ERR
+ "clear_kernel_mapping: mapping has been split. will leak memory\n");
+ pmd_ERROR(*pmd);
+ }
+ set_pmd(pmd, __pmd(0));
+ }
+ __flush_tlb_all();
+}
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+ if (pagenr < start_pfn || pagenr >= end_pfn)
+ return 0;
+
+ return 1;
+}
+
+extern int swiotlb_force;
+
+static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
+ kcore_vsyscall;
+
+void __init mem_init(void)
+{
+ int codesize, reservedpages, datasize, initsize;
+ int tmp;
+
+#ifdef CONFIG_SWIOTLB
+ if (swiotlb_force)
+ swiotlb = 1;
+ if (!iommu_aperture &&
+ (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
+ swiotlb = 1;
+ if (swiotlb)
+ swiotlb_init();
+#endif
+
+ /* How many end-of-memory variables you have, grandma! */
+ max_low_pfn = end_pfn;
+ max_pfn = end_pfn;
+ num_physpages = end_pfn;
+ high_memory = (void *) __va(end_pfn * PAGE_SIZE);
+
+ /* clear the zero-page */
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ reservedpages = 0;
+
+ /* this will put all low memory onto the freelists */
+#ifdef CONFIG_DISCONTIGMEM
+ totalram_pages += numa_free_all_bootmem();
+ tmp = 0;
+ /* should count reserved pages here for all nodes */
+#else
+ max_mapnr = end_pfn;
+ if (!mem_map) BUG();
+
+ totalram_pages += free_all_bootmem();
+
+ for (tmp = 0; tmp < end_pfn; tmp++)
+ /*
+ * Only count reserved RAM pages
+ */
+ if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
+ reservedpages++;
+#endif
+
+ after_bootmem = 1;
+
+ codesize = (unsigned long) &_etext - (unsigned long) &_text;
+ datasize = (unsigned long) &_edata - (unsigned long) &_etext;
+ initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+ /* Register memory areas for /proc/kcore */
+ kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
+ VMALLOC_END-VMALLOC_START);
+ kclist_add(&kcore_kernel, &_stext, _end - _stext);
+ kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
+ VSYSCALL_END - VSYSCALL_START);
+
+ printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ end_pfn << (PAGE_SHIFT-10),
+ codesize >> 10,
+ reservedpages << (PAGE_SHIFT-10),
+ datasize >> 10,
+ initsize >> 10);
+
+ /*
+ * Subtle. SMP is doing its boot stuff late (because it has to
+ * fork idle threads) - but it also needs low mappings for the
+ * protected-mode entry to work. We zap these entries only after
+ * the WP-bit has been tested.
+ */
+#ifndef CONFIG_SMP
+ zap_low_mappings();
+#endif
+}
+
+extern char __initdata_begin[], __initdata_end[];
+
+void free_initmem(void)
+{
+#ifdef __DO_LATER__
+ /*
+ * Some pages can be pinned, but some are not. Unpinning such pages
+ * triggers BUG().
+ */
+ unsigned long addr;
+
+ addr = (unsigned long)(&__init_begin);
+ for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ set_page_count(virt_to_page(addr), 1);
+ memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
+ xen_pte_unpin(__pa(addr));
+ make_page_writable(__va(__pa(addr)));
+ /*
+ * Make pages from __PAGE_OFFSET address as well
+ */
+ make_page_writable((void *)addr);
+ free_page(addr);
+ totalram_pages++;
+ }
+ memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
+ printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+#endif
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ if (start < (unsigned long)&_end)
+ return;
+ printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ set_page_count(virt_to_page(start), 1);
+ free_page(start);
+ totalram_pages++;
+ }
+}
+#endif
+
+void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+{
+ /* Should check here against the e820 map to avoid double free */
+#ifdef CONFIG_DISCONTIGMEM
+ int nid = phys_to_nid(phys);
+ reserve_bootmem_node(NODE_DATA(nid), phys, len);
+#else
+ reserve_bootmem(phys, len);
+#endif
+}
+
+int kern_addr_valid(unsigned long addr)
+{
+ unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (above != 0 && above != -1UL)
+ return 0;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return 0;
+
+ pud = pud_offset_k(addr);
+ if (pud_none(*pud))
+ return 0;
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return 0;
+ if (pmd_large(*pmd))
+ return pfn_valid(pmd_pfn(*pmd));
+
+ pte = pte_offset_kernel(pmd, addr);
+ if (pte_none(*pte))
+ return 0;
+ return pfn_valid(pte_pfn(*pte));
+}
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+
+extern int exception_trace, page_fault_trace;
+
+static ctl_table debug_table2[] = {
+ { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
+ proc_dointvec },
+#ifdef CONFIG_CHECKING
+ { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
+ proc_dointvec },
+#endif
+ { 0, }
+};
+
+static ctl_table debug_root_table2[] = {
+ { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
+ .child = debug_table2 },
+ { 0 },
+};
+
+static __init int x8664_sysctl_init(void)
+{
+ register_sysctl_table(debug_root_table2, 1);
+ return 0;
+}
+__initcall(x8664_sysctl_init);
+#endif
+
+/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two
+ different ones: one for 32bit and one for 64bit. Use the appropiate
+ for the target task. */
+
+static struct vm_area_struct gate_vma = {
+ .vm_start = VSYSCALL_START,
+ .vm_end = VSYSCALL_END,
+ .vm_page_prot = PAGE_READONLY
+};
+
+static struct vm_area_struct gate32_vma = {
+ .vm_start = VSYSCALL32_BASE,
+ .vm_end = VSYSCALL32_END,
+ .vm_page_prot = PAGE_READONLY
+};
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+ /* lookup code assumes the pages are present. set them up
+ now */
+ if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0)
+ return NULL;
+ return &gate32_vma;
+ }
+#endif
+ return &gate_vma;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ struct vm_area_struct *vma = get_gate_vma(task);
+ return (addr >= vma->vm_start) && (addr < vma->vm_end);
+}
+
+/* Use this when you have no reliable task/vma, typically from interrupt
+ * context. It is less reliable than using the task's vma and may give
+ * false positives.
+ */
+int in_gate_area_no_task(unsigned long addr)
+{
+ return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) ||
+ ((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END)));
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c
new file mode 100644
index 0000000000..3714880f72
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/ioremap.c
@@ -0,0 +1,466 @@
+/*
+ * arch/x86_64/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+/*
+ * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later
+ */
+#ifndef CONFIG_XEN_PHYSDEV_ACCESS
+
+void * __ioremap(unsigned long phys_addr, unsigned long size,
+ unsigned long flags)
+{
+ return NULL;
+}
+
+void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+ return NULL;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+}
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+ return NULL;
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+}
+
+#else
+
+#if defined(__i386__)
+/*
+ * Does @address reside within a non-highmem page that is local to this virtual
+ * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
+ * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
+ * why this works.
+ */
+static inline int is_local_lowmem(unsigned long address)
+{
+ extern unsigned long max_low_pfn;
+ unsigned long mfn = address >> PAGE_SHIFT;
+ unsigned long pfn = mfn_to_pfn(mfn);
+ return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
+}
+#elif defined(__x86_64__)
+/*
+ *
+ */
+static inline int is_local_lowmem(unsigned long address)
+{
+ return 0;
+}
+#endif
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+ void __iomem * addr;
+ struct vm_struct * area;
+ unsigned long offset, last_addr;
+ domid_t domid = DOMID_IO;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
+ if (phys_addr >= 0x0 && last_addr < 0x100000)
+ return isa_bus_to_virt(phys_addr);
+#endif
+
+ /*
+ * Don't allow anybody to remap normal RAM that we're using..
+ */
+ if (is_local_lowmem(phys_addr)) {
+ char *t_addr, *t_end;
+ struct page *page;
+
+ t_addr = bus_to_virt(phys_addr);
+ t_end = t_addr + (size - 1);
+
+ for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+ if(!PageReserved(page))
+ return NULL;
+
+ domid = DOMID_LOCAL;
+ }
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP | (flags << 20));
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = (void __iomem *) area->addr;
+ if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
+ size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
+ _PAGE_DIRTY | _PAGE_ACCESSED
+#if defined(__x86_64__)
+ | _PAGE_USER
+#endif
+ | flags), domid)) {
+ vunmap((void __force *) addr);
+ return NULL;
+ }
+ return (void __iomem *) (offset + (char __iomem *)addr);
+}
+
+
+/**
+ * ioremap_nocache - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ *
+ * Must be freed with iounmap.
+ */
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+ unsigned long last_addr;
+ void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
+ if (!p)
+ return p;
+
+ /* Guaranteed to be > phys_addr, as per __ioremap() */
+ last_addr = phys_addr + size - 1;
+
+ if (is_local_lowmem(last_addr)) {
+ struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
+ unsigned long npages;
+
+ phys_addr &= PAGE_MASK;
+
+ /* This might overflow and become zero.. */
+ last_addr = PAGE_ALIGN(last_addr);
+
+ /* .. but that's ok, because modulo-2**n arithmetic will make
+ * the page-aligned "last - first" come out right.
+ */
+ npages = (last_addr - phys_addr) >> PAGE_SHIFT;
+
+ if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
+ iounmap(p);
+ p = NULL;
+ }
+ global_flush_tlb();
+ }
+
+ return p;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ struct vm_struct *p;
+ if ((void __force *) addr <= high_memory)
+ return;
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
+ return;
+#endif
+ p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+ if (!p) {
+ printk("__iounmap: bad address %p\n", addr);
+ return;
+ }
+
+ if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
+ /* p->size includes the guard page, but cpa doesn't like that */
+ change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
+ (p->size - PAGE_SIZE) >> PAGE_SHIFT,
+ PAGE_KERNEL);
+ global_flush_tlb();
+ }
+ kfree(p);
+}
+
+#if defined(__i386__)
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+ unsigned long offset, last_addr;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
+ if (phys_addr >= 0x0 && last_addr < 0x100000)
+ return isa_bus_to_virt(phys_addr);
+#endif
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr) - phys_addr;
+
+ /*
+ * Mappings have to fit in the FIX_BTMAP area.
+ */
+ nrpages = size >> PAGE_SHIFT;
+ if (nrpages > NR_FIX_BTMAPS)
+ return NULL;
+
+ /*
+ * Ok, go for it..
+ */
+ idx = FIX_BTMAP_BEGIN;
+ while (nrpages > 0) {
+ set_fixmap(idx, phys_addr);
+ phys_addr += PAGE_SIZE;
+ --idx;
+ --nrpages;
+ }
+ return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+ unsigned long virt_addr;
+ unsigned long offset;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+
+ virt_addr = (unsigned long)addr;
+ if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
+ return;
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
+ return;
+#endif
+ offset = virt_addr & ~PAGE_MASK;
+ nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+ idx = FIX_BTMAP_BEGIN;
+ while (nrpages > 0) {
+ clear_fixmap(idx);
+ --idx;
+ --nrpages;
+ }
+}
+#endif /* defined(__i386__) */
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
+/* These hacky macros avoid phys->machine translations. */
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+ __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+ __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline void direct_remap_area_pte(pte_t *pte,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t **v)
+{
+ unsigned long end;
+
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ if (address >= end)
+ BUG();
+
+ do {
+ (*v)->ptr = virt_to_machine(pte);
+ (*v)++;
+ address += PAGE_SIZE;
+ pte++;
+ } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+ pmd_t *pmd,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t **v)
+{
+ unsigned long end;
+
+ address &= ~PGDIR_MASK;
+ end = address + size;
+ if (end > PGDIR_SIZE)
+ end = PGDIR_SIZE;
+ if (address >= end)
+ BUG();
+ do {
+ pte_t *pte = (mm == &init_mm) ?
+ pte_alloc_kernel(mm, pmd, address) :
+ pte_alloc_map(mm, pmd, address);
+ if (!pte)
+ return -ENOMEM;
+ direct_remap_area_pte(pte, address, end - address, v);
+ pte_unmap(pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+ return 0;
+}
+
+int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v)
+{
+ pgd_t * dir;
+ unsigned long end = address + size;
+ int error;
+
+#if defined(__i386__)
+ dir = pgd_offset(mm, address);
+#elif defined (__x86_64)
+ dir = (mm == &init_mm) ?
+ pgd_offset_k(address):
+ pgd_offset(mm, address);
+#endif
+ if (address >= end)
+ BUG();
+ spin_lock(&mm->page_table_lock);
+ do {
+ pud_t *pud;
+ pmd_t *pmd;
+
+ error = -ENOMEM;
+ pud = pud_alloc(mm, dir, address);
+ if (!pud)
+ break;
+ pmd = pmd_alloc(mm, pud, address);
+ if (!pmd)
+ break;
+ error = 0;
+ direct_remap_area_pmd(mm, pmd, address, end - address, &v);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ dir++;
+
+ } while (address && (address < end));
+ spin_unlock(&mm->page_table_lock);
+ return error;
+}
+
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid)
+{
+ int i;
+ unsigned long start_address;
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
+
+ start_address = address;
+
+ flush_cache_all();
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
+ /* Fill in the PTE pointers. */
+ __direct_remap_area_pages(mm,
+ start_address,
+ address-start_address,
+ u);
+
+ if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
+ return -EFAULT;
+ v = u;
+ start_address = address;
+ }
+
+ /*
+ * Fill in the machine address: PTE ptr is done later by
+ * __direct_remap_area_pages().
+ */
+ v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+
+ machine_addr += PAGE_SIZE;
+ address += PAGE_SIZE;
+ v++;
+ }
+
+ if (v != u) {
+ /* get the ptep's filled in */
+ __direct_remap_area_pages(mm,
+ start_address,
+ address-start_address,
+ u);
+ if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
+ return -EFAULT;
+ }
+
+ flush_tlb_all();
+
+ return 0;
+}
+
+EXPORT_SYMBOL(direct_remap_area_pages);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/pageattr.c b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/pageattr.c
new file mode 100644
index 0000000000..63e9a38a17
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/mm/pageattr.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Thanks to Ben LaHaise for precious feedback.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+
+void pte_free(struct page *pte)
+{
+ pte_t *ptep;
+
+ ptep = pfn_to_kaddr(page_to_pfn(pte));
+
+ xen_pte_unpin(__pa(ptep));
+ make_page_writable(ptep);
+ __free_page(pte);
+}
+
+static inline pte_t *lookup_address(unsigned long address)
+{
+ pgd_t *pgd = pgd_offset_k(address);
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ if (pgd_none(*pgd))
+ return NULL;
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return NULL;
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return NULL;
+ if (pmd_large(*pmd))
+ return (pte_t *)pmd;
+ pte = pte_offset_kernel(pmd, address);
+ if (pte && !pte_present(*pte))
+ pte = NULL;
+ return pte;
+}
+
+static struct page *split_large_page(unsigned long address, pgprot_t prot,
+ pgprot_t ref_prot)
+{
+ int i;
+ unsigned long addr;
+ struct page *base = alloc_pages(GFP_KERNEL, 0);
+ pte_t *pbase;
+ if (!base)
+ return NULL;
+ address = __pa(address);
+ addr = address & LARGE_PAGE_MASK;
+ pbase = (pte_t *)page_address(base);
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
+ pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
+ addr == address ? prot : ref_prot);
+ }
+ return base;
+}
+
+
+static void flush_kernel_map(void *address)
+{
+ if (0 && address && cpu_has_clflush) {
+ /* is this worth it? */
+ int i;
+ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+ asm volatile("clflush (%0)" :: "r" (address + i));
+ } else
+ asm volatile("wbinvd":::"memory");
+ if (address)
+ __flush_tlb_one((unsigned long) address);
+ else
+ __flush_tlb_all();
+}
+
+
+static inline void flush_map(unsigned long address)
+{
+ on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
+}
+
+struct deferred_page {
+ struct deferred_page *next;
+ struct page *fpage;
+ unsigned long address;
+};
+static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
+
+static inline void save_page(unsigned long address, struct page *fpage)
+{
+ struct deferred_page *df;
+ df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
+ if (!df) {
+ flush_map(address);
+ __free_page(fpage);
+ } else {
+ df->next = df_list;
+ df->fpage = fpage;
+ df->address = address;
+ df_list = df;
+ }
+}
+
+/*
+ * No more special protections in this 2/4MB area - revert to a
+ * large page again.
+ */
+static void revert_page(unsigned long address, pgprot_t ref_prot)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t large_pte;
+
+ pgd = pgd_offset_k(address);
+ BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd,address);
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, address);
+ BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
+ set_pte((pte_t *)pmd, large_pte);
+}
+
+static int
+__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
+ pgprot_t ref_prot)
+{
+ pte_t *kpte;
+ struct page *kpte_page;
+ unsigned kpte_flags;
+ kpte = lookup_address(address);
+ if (!kpte) return 0;
+ kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+ kpte_flags = pte_val(*kpte);
+ if (pgprot_val(prot) != pgprot_val(ref_prot)) {
+ if ((kpte_flags & _PAGE_PSE) == 0) {
+ set_pte(kpte, pfn_pte(pfn, prot));
+ } else {
+ /*
+ * split_large_page will take the reference for this change_page_attr
+ * on the split page.
+ */
+ struct page *split = split_large_page(address, prot, ref_prot);
+ if (!split)
+ return -ENOMEM;
+ set_pte(kpte,mk_pte(split, ref_prot));
+ kpte_page = split;
+ }
+ get_page(kpte_page);
+ } else if ((kpte_flags & _PAGE_PSE) == 0) {
+ set_pte(kpte, pfn_pte(pfn, ref_prot));
+ __put_page(kpte_page);
+ } else
+ BUG();
+
+ /* on x86-64 the direct mapping set at boot is not using 4k pages */
+ BUG_ON(PageReserved(kpte_page));
+
+ switch (page_count(kpte_page)) {
+ case 1:
+ save_page(address, kpte_page);
+ revert_page(address, ref_prot);
+ break;
+ case 0:
+ BUG(); /* memleak and failed 2M page regeneration */
+ }
+ return 0;
+}
+
+/*
+ * Change the page attributes of an page in the linear mapping.
+ *
+ * This should be used when a page is mapped with a different caching policy
+ * than write-back somewhere - some CPUs do not like it when mappings with
+ * different caching policies exist. This changes the page attributes of the
+ * in kernel linear mapping too.
+ *
+ * The caller needs to ensure that there are no conflicting mappings elsewhere.
+ * This function only deals with the kernel linear map.
+ *
+ * Caller must call global_flush_tlb() after this.
+ */
+int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
+{
+ int err = 0;
+ int i;
+
+ down_write(&init_mm.mmap_sem);
+ for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
+ unsigned long pfn = __pa(address) >> PAGE_SHIFT;
+
+ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
+ if (err)
+ break;
+ /* Handle kernel mapping too which aliases part of the
+ * lowmem */
+ if (__pa(address) < KERNEL_TEXT_SIZE) {
+ unsigned long addr2;
+ pgprot_t prot2 = prot;
+ addr2 = __START_KERNEL_map + __pa(address);
+ pgprot_val(prot2) &= ~_PAGE_NX;
+ err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
+ }
+ }
+ up_write(&init_mm.mmap_sem);
+ return err;
+}
+
+/* Don't call this for MMIO areas that may not have a mem_map entry */
+int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+ unsigned long addr = (unsigned long)page_address(page);
+ return change_page_attr_addr(addr, numpages, prot);
+}
+
+void global_flush_tlb(void)
+{
+ struct deferred_page *df, *next_df;
+
+ down_read(&init_mm.mmap_sem);
+ df = xchg(&df_list, NULL);
+ up_read(&init_mm.mmap_sem);
+ if (!df)
+ return;
+ flush_map((df && !df->next) ? df->address : 0);
+ for (; df; df = next_df) {
+ next_df = df->next;
+ if (df->fpage)
+ __free_page(df->fpage);
+ kfree(df);
+ }
+}
+
+EXPORT_SYMBOL(change_page_attr);
+EXPORT_SYMBOL(global_flush_tlb);
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile
new file mode 100644
index 0000000000..47dbc45daa
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile
@@ -0,0 +1,41 @@
+#
+# Makefile for X86_64 specific PCI routines
+#
+# Reuse the i386 PCI subsystem
+#
+XENARCH := $(subst ",,$(CONFIG_XENARCH))
+CFLAGS += -Iarch/$(XENARCH)/pci
+
+CFLAGS += -Iarch/i386/pci
+
+c-obj-y := i386.o
+c-obj-y += fixup.o
+c-obj-$(CONFIG_ACPI_PCI) += acpi.o
+c-obj-y += legacy.o common.o
+c-obj-$(CONFIG_PCI_DIRECT)+= direct.o
+c-xen-obj-y += irq.o
+# mmconfig has a 64bit special
+c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+
+c-obj-$(CONFIG_NUMA) += k8-bus.o
+
+c-direct-y += ../../i386/pci/direct.o
+c-acpi-y += ../../i386/pci/acpi.o
+c-legacy-y += ../../i386/pci/legacy.o
+c-irq-y += ../../i386/pci/irq.o
+c-common-y += ../../i386/pci/common.o
+c-fixup-y += ../../i386/pci/fixup.o
+c-i386-y += ../../i386/pci/i386.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-xen-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/xen/i386/pci/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
+
+obj-y += $(c-obj-y)
+obj-y += $(c-xen-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff --git a/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS
new file mode 100644
index 0000000000..291985f0d2
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS
@@ -0,0 +1,22 @@
+#
+# Makefile for X86_64 specific PCI routines
+#
+# Reuse the i386 PCI subsystem
+#
+CFLAGS += -I arch/i386/pci
+
+obj-y := i386.o
+obj-$(CONFIG_PCI_DIRECT)+= direct.o
+obj-y += fixup.o
+obj-$(CONFIG_ACPI_PCI) += acpi.o
+obj-y += legacy.o irq.o common.o
+# mmconfig has a 64bit special
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+
+direct-y += ../../i386/pci/direct.o
+acpi-y += ../../i386/pci/acpi.o
+legacy-y += ../../i386/pci/legacy.o
+irq-y += ../../i386/pci/irq.o
+common-y += ../../i386/pci/common.o
+fixup-y += ../../i386/pci/fixup.o
+i386-y += ../../i386/pci/i386.o
diff --git a/linux-2.6.11-xen-sparse/drivers/acpi/tables.c b/linux-2.6.11-xen-sparse/drivers/acpi/tables.c
new file mode 100644
index 0000000000..745fcec789
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/acpi/tables.c
@@ -0,0 +1,615 @@
+/*
+ * acpi_tables.c - ACPI Boot-Time Table Parsing
+ *
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+
+#define PREFIX "ACPI: "
+
+#define ACPI_MAX_TABLES 256
+
+static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
+ [ACPI_TABLE_UNKNOWN] = "????",
+ [ACPI_APIC] = "APIC",
+ [ACPI_BOOT] = "BOOT",
+ [ACPI_DBGP] = "DBGP",
+ [ACPI_DSDT] = "DSDT",
+ [ACPI_ECDT] = "ECDT",
+ [ACPI_ETDT] = "ETDT",
+ [ACPI_FADT] = "FACP",
+ [ACPI_FACS] = "FACS",
+ [ACPI_OEMX] = "OEM",
+ [ACPI_PSDT] = "PSDT",
+ [ACPI_SBST] = "SBST",
+ [ACPI_SLIT] = "SLIT",
+ [ACPI_SPCR] = "SPCR",
+ [ACPI_SRAT] = "SRAT",
+ [ACPI_SSDT] = "SSDT",
+ [ACPI_SPMI] = "SPMI",
+ [ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
+};
+
+static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
+static char *mps_inti_flags_trigger[] = { "dfl", "edge", "res", "level" };
+
+/* System Description Table (RSDT/XSDT) */
+struct acpi_table_sdt {
+ unsigned long pa;
+ enum acpi_table_id id;
+ unsigned long size;
+} __attribute__ ((packed));
+
+static unsigned long sdt_pa; /* Physical Address */
+static unsigned long sdt_count; /* Table count */
+
+static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES];
+
+void
+acpi_table_print (
+ struct acpi_table_header *header,
+ unsigned long phys_addr)
+{
+ char *name = NULL;
+
+ if (!header)
+ return;
+
+ /* Some table signatures aren't good table names */
+
+ if (!strncmp((char *) &header->signature,
+ acpi_table_signatures[ACPI_APIC],
+ sizeof(header->signature))) {
+ name = "MADT";
+ }
+ else if (!strncmp((char *) &header->signature,
+ acpi_table_signatures[ACPI_FADT],
+ sizeof(header->signature))) {
+ name = "FADT";
+ }
+ else
+ name = header->signature;
+
+ printk(KERN_DEBUG PREFIX "%.4s (v%3.3d %6.6s %8.8s 0x%08x %.4s 0x%08x) @ 0x%p\n",
+ name, header->revision, header->oem_id,
+ header->oem_table_id, header->oem_revision,
+ header->asl_compiler_id, header->asl_compiler_revision,
+ (void *) phys_addr);
+}
+
+
+void
+acpi_table_print_madt_entry (
+ acpi_table_entry_header *header)
+{
+ if (!header)
+ return;
+
+ switch (header->type) {
+
+ case ACPI_MADT_LAPIC:
+ {
+ struct acpi_table_lapic *p =
+ (struct acpi_table_lapic*) header;
+ printk(KERN_INFO PREFIX "LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
+ p->acpi_id, p->id, p->flags.enabled?"enabled":"disabled");
+ }
+ break;
+
+ case ACPI_MADT_IOAPIC:
+ {
+ struct acpi_table_ioapic *p =
+ (struct acpi_table_ioapic*) header;
+ printk(KERN_INFO PREFIX "IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
+ p->id, p->address, p->global_irq_base);
+ }
+ break;
+
+ case ACPI_MADT_INT_SRC_OVR:
+ {
+ struct acpi_table_int_src_ovr *p =
+ (struct acpi_table_int_src_ovr*) header;
+ printk(KERN_INFO PREFIX "INT_SRC_OVR (bus %d bus_irq %d global_irq %d %s %s)\n",
+ p->bus, p->bus_irq, p->global_irq,
+ mps_inti_flags_polarity[p->flags.polarity],
+ mps_inti_flags_trigger[p->flags.trigger]);
+ if(p->flags.reserved)
+ printk(KERN_INFO PREFIX "INT_SRC_OVR unexpected reserved flags: 0x%x\n",
+ p->flags.reserved);
+
+ }
+ break;
+
+ case ACPI_MADT_NMI_SRC:
+ {
+ struct acpi_table_nmi_src *p =
+ (struct acpi_table_nmi_src*) header;
+ printk(KERN_INFO PREFIX "NMI_SRC (%s %s global_irq %d)\n",
+ mps_inti_flags_polarity[p->flags.polarity],
+ mps_inti_flags_trigger[p->flags.trigger], p->global_irq);
+ }
+ break;
+
+ case ACPI_MADT_LAPIC_NMI:
+ {
+ struct acpi_table_lapic_nmi *p =
+ (struct acpi_table_lapic_nmi*) header;
+ printk(KERN_INFO PREFIX "LAPIC_NMI (acpi_id[0x%02x] %s %s lint[0x%x])\n",
+ p->acpi_id,
+ mps_inti_flags_polarity[p->flags.polarity],
+ mps_inti_flags_trigger[p->flags.trigger], p->lint);
+ }
+ break;
+
+ case ACPI_MADT_LAPIC_ADDR_OVR:
+ {
+ struct acpi_table_lapic_addr_ovr *p =
+ (struct acpi_table_lapic_addr_ovr*) header;
+ printk(KERN_INFO PREFIX "LAPIC_ADDR_OVR (address[%p])\n",
+ (void *) (unsigned long) p->address);
+ }
+ break;
+
+ case ACPI_MADT_IOSAPIC:
+ {
+ struct acpi_table_iosapic *p =
+ (struct acpi_table_iosapic*) header;
+ printk(KERN_INFO PREFIX "IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
+ p->id, (void *) (unsigned long) p->address, p->global_irq_base);
+ }
+ break;
+
+ case ACPI_MADT_LSAPIC:
+ {
+ struct acpi_table_lsapic *p =
+ (struct acpi_table_lsapic*) header;
+ printk(KERN_INFO PREFIX "LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
+ p->acpi_id, p->id, p->eid, p->flags.enabled?"enabled":"disabled");
+ }
+ break;
+
+ case ACPI_MADT_PLAT_INT_SRC:
+ {
+ struct acpi_table_plat_int_src *p =
+ (struct acpi_table_plat_int_src*) header;
+ printk(KERN_INFO PREFIX "PLAT_INT_SRC (%s %s type[0x%x] id[0x%04x] eid[0x%x] iosapic_vector[0x%x] global_irq[0x%x]\n",
+ mps_inti_flags_polarity[p->flags.polarity],
+ mps_inti_flags_trigger[p->flags.trigger],
+ p->type, p->id, p->eid, p->iosapic_vector, p->global_irq);
+ }
+ break;
+
+ default:
+ printk(KERN_WARNING PREFIX "Found unsupported MADT entry (type = 0x%x)\n",
+ header->type);
+ break;
+ }
+}
+
+
+static int
+acpi_table_compute_checksum (
+ void *table_pointer,
+ unsigned long length)
+{
+ u8 *p = (u8 *) table_pointer;
+ unsigned long remains = length;
+ unsigned long sum = 0;
+
+ if (!p || !length)
+ return -EINVAL;
+
+ while (remains--)
+ sum += *p++;
+
+ return (sum & 0xFF);
+}
+
+/*
+ * acpi_get_table_header_early()
+ * for acpi_blacklisted(), acpi_table_get_sdt()
+ */
+int __init
+acpi_get_table_header_early (
+ enum acpi_table_id id,
+ struct acpi_table_header **header)
+{
+ unsigned int i;
+ enum acpi_table_id temp_id;
+
+ /* DSDT is different from the rest */
+ if (id == ACPI_DSDT)
+ temp_id = ACPI_FADT;
+ else
+ temp_id = id;
+
+ /* Locate the table. */
+
+ for (i = 0; i < sdt_count; i++) {
+ if (sdt_entry[i].id != temp_id)
+ continue;
+ *header = (void *)
+ __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
+ if (!*header) {
+ printk(KERN_WARNING PREFIX "Unable to map %s\n",
+ acpi_table_signatures[temp_id]);
+ return -ENODEV;
+ }
+ break;
+ }
+
+ if (!*header) {
+ printk(KERN_WARNING PREFIX "%s not present\n",
+ acpi_table_signatures[id]);
+ return -ENODEV;
+ }
+
+ /* Map the DSDT header via the pointer in the FADT */
+ if (id == ACPI_DSDT) {
+ struct fadt_descriptor_rev2 *fadt = (struct fadt_descriptor_rev2 *) *header;
+
+ if (fadt->revision == 3 && fadt->Xdsdt) {
+ *header = (void *) __acpi_map_table(fadt->Xdsdt,
+ sizeof(struct acpi_table_header));
+ } else if (fadt->V1_dsdt) {
+ *header = (void *) __acpi_map_table(fadt->V1_dsdt,
+ sizeof(struct acpi_table_header));
+ } else
+ *header = NULL;
+
+ if (!*header) {
+ printk(KERN_WARNING PREFIX "Unable to map DSDT\n");
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+
+int __init
+acpi_table_parse_madt_family (
+ enum acpi_table_id id,
+ unsigned long madt_size,
+ int entry_id,
+ acpi_madt_entry_handler handler,
+ unsigned int max_entries)
+{
+ void *madt = NULL;
+ acpi_table_entry_header *entry;
+ unsigned int count = 0;
+ unsigned long madt_end;
+ unsigned int i;
+
+ if (!handler)
+ return -EINVAL;
+
+ /* Locate the MADT (if exists). There should only be one. */
+
+ for (i = 0; i < sdt_count; i++) {
+ if (sdt_entry[i].id != id)
+ continue;
+ madt = (void *)
+ __acpi_map_table(sdt_entry[i].pa, sdt_entry[i].size);
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "Unable to map %s\n",
+ acpi_table_signatures[id]);
+ return -ENODEV;
+ }
+ break;
+ }
+
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "%s not present\n",
+ acpi_table_signatures[id]);
+ return -ENODEV;
+ }
+
+ madt_end = (unsigned long) madt + sdt_entry[i].size;
+
+ /* Parse all entries looking for a match. */
+
+ entry = (acpi_table_entry_header *)
+ ((unsigned long) madt + madt_size);
+
+ while (((unsigned long) entry) + sizeof(acpi_table_entry_header) < madt_end) {
+ if (entry->type == entry_id &&
+ (!max_entries || count++ < max_entries))
+ if (handler(entry, madt_end))
+ return -EINVAL;
+
+ entry = (acpi_table_entry_header *)
+ ((unsigned long) entry + entry->length);
+ }
+ if (max_entries && count > max_entries) {
+ printk(KERN_WARNING PREFIX "[%s:0x%02x] ignored %i entries of "
+ "%i found\n", acpi_table_signatures[id], entry_id,
+ count - max_entries, count);
+ }
+
+ return count;
+}
+
+
+int __init
+acpi_table_parse_madt (
+ enum acpi_madt_entry_id id,
+ acpi_madt_entry_handler handler,
+ unsigned int max_entries)
+{
+ return acpi_table_parse_madt_family(ACPI_APIC, sizeof(struct acpi_table_madt),
+ id, handler, max_entries);
+}
+
+
+int __init
+acpi_table_parse (
+ enum acpi_table_id id,
+ acpi_table_handler handler)
+{
+ int count = 0;
+ unsigned int i = 0;
+
+ if (!handler)
+ return -EINVAL;
+
+ for (i = 0; i < sdt_count; i++) {
+ if (sdt_entry[i].id != id)
+ continue;
+ count++;
+ if (count == 1)
+ handler(sdt_entry[i].pa, sdt_entry[i].size);
+
+ else
+ printk(KERN_WARNING PREFIX "%d duplicate %s table ignored.\n",
+ count, acpi_table_signatures[id]);
+ }
+
+ return count;
+}
+
+
+static int __init
+acpi_table_get_sdt (
+ struct acpi_table_rsdp *rsdp)
+{
+ struct acpi_table_header *header = NULL;
+ unsigned int i, id = 0;
+
+ if (!rsdp)
+ return -EINVAL;
+
+ /* First check XSDT (but only on ACPI 2.0-compatible systems) */
+
+ if ((rsdp->revision >= 2) &&
+ (((struct acpi20_table_rsdp*)rsdp)->xsdt_address)) {
+
+ struct acpi_table_xsdt *mapped_xsdt = NULL;
+
+ sdt_pa = ((struct acpi20_table_rsdp*)rsdp)->xsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+
+ if (!header) {
+ printk(KERN_WARNING PREFIX "Unable to map XSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_xsdt = (struct acpi_table_xsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_xsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map XSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_xsdt->header;
+
+ if (strncmp(header->signature, "XSDT", 4)) {
+ printk(KERN_WARNING PREFIX "XSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n");
+ return -ENODEV;
+ }
+
+ sdt_count = (header->length - sizeof(struct acpi_table_header)) >> 3;
+ if (sdt_count > ACPI_MAX_TABLES) {
+ printk(KERN_WARNING PREFIX "Truncated %lu XSDT entries\n",
+ (sdt_count - ACPI_MAX_TABLES));
+ sdt_count = ACPI_MAX_TABLES;
+ }
+
+ for (i = 0; i < sdt_count; i++)
+ sdt_entry[i].pa = (unsigned long) mapped_xsdt->entry[i];
+ }
+
+ /* Then check RSDT */
+
+ else if (rsdp->rsdt_address) {
+
+ struct acpi_table_rsdt *mapped_rsdt = NULL;
+
+ sdt_pa = rsdp->rsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+ if (!header) {
+ printk(KERN_WARNING PREFIX "Unable to map RSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_rsdt = (struct acpi_table_rsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_rsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map RSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_rsdt->header;
+
+ if (strncmp(header->signature, "RSDT", 4)) {
+ printk(KERN_WARNING PREFIX "RSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n");
+ return -ENODEV;
+ }
+
+ sdt_count = (header->length - sizeof(struct acpi_table_header)) >> 2;
+ if (sdt_count > ACPI_MAX_TABLES) {
+ printk(KERN_WARNING PREFIX "Truncated %lu RSDT entries\n",
+ (sdt_count - ACPI_MAX_TABLES));
+ sdt_count = ACPI_MAX_TABLES;
+ }
+
+ for (i = 0; i < sdt_count; i++)
+ sdt_entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
+ }
+
+ else {
+ printk(KERN_WARNING PREFIX "No System Description Table (RSDT/XSDT) specified in RSDP\n");
+ return -ENODEV;
+ }
+
+ acpi_table_print(header, sdt_pa);
+
+ for (i = 0; i < sdt_count; i++) {
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_entry[i].pa,
+ sizeof(struct acpi_table_header));
+ if (!header)
+ continue;
+
+ /* remap in the entire table before processing */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_entry[i].pa,
+ header->length);
+ if (!header)
+ continue;
+
+ acpi_table_print(header, sdt_entry[i].pa);
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING " >>> ERROR: Invalid checksum\n");
+ continue;
+ }
+
+ sdt_entry[i].size = header->length;
+
+ for (id = 0; id < ACPI_TABLE_COUNT; id++) {
+ if (!strncmp((char *) &header->signature,
+ acpi_table_signatures[id],
+ sizeof(header->signature))) {
+ sdt_entry[i].id = id;
+ }
+ }
+ }
+
+ /*
+ * The DSDT is *not* in the RSDT (why not? no idea.) but we want
+ * to print its info, because this is what people usually blacklist
+ * against. Unfortunately, we don't know the phys_addr, so just
+ * print 0. Maybe no one will notice.
+ */
+ if(!acpi_get_table_header_early(ACPI_DSDT, &header))
+ acpi_table_print(header, 0);
+
+ return 0;
+}
+
+/*
+ * acpi_table_init()
+ *
+ * find RSDP, find and checksum SDT/XSDT.
+ * checksum all tables, print SDT/XSDT
+ *
+ * result: sdt_entry[] is initialized
+ */
+#if CONFIG_XEN
+#define acpi_rsdp_phys_to_va(rsdp_phys) (__fix_to_virt(FIX_ACPI_RSDP_PAGE) + \
+ (rsdp_phys & ~PAGE_MASK))
+#else
+#define acpi_rsdp_phys_to_va(rsdp_phys) __va(rsdp_phys)
+#endif
+
+int __init
+acpi_table_init (void)
+{
+ struct acpi_table_rsdp *rsdp = NULL;
+ unsigned long rsdp_phys = 0;
+ int result = 0;
+
+ /* Locate and map the Root System Description Table (RSDP) */
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys) {
+ printk(KERN_ERR PREFIX "Unable to locate RSDP\n");
+ return -ENODEV;
+ }
+
+ rsdp = (struct acpi_table_rsdp *) acpi_rsdp_phys_to_va(rsdp_phys);
+ if (!rsdp) {
+ printk(KERN_WARNING PREFIX "Unable to map RSDP\n");
+ return -ENODEV;
+ }
+
+ printk(KERN_DEBUG PREFIX "RSDP (v%3.3d %6.6s ) @ 0x%p\n",
+ rsdp->revision, rsdp->oem_id, (void *) rsdp_phys);
+
+ if (rsdp->revision < 2)
+ result = acpi_table_compute_checksum(rsdp, sizeof(struct acpi_table_rsdp));
+ else
+ result = acpi_table_compute_checksum(rsdp, ((struct acpi20_table_rsdp *)rsdp)->length);
+
+ if (result) {
+ printk(KERN_WARNING " >>> ERROR: Invalid checksum\n");
+ return -ENODEV;
+ }
+
+ /* Locate and map the System Description table (RSDT/XSDT) */
+
+ if (acpi_table_get_sdt(rsdp))
+ return -ENODEV;
+
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/Makefile
index 0bfb5a50c3..50e13067a3 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/Makefile
+++ b/linux-2.6.11-xen-sparse/drivers/xen/Makefile
@@ -3,10 +3,11 @@
obj-y += console/
obj-y += evtchn/
obj-y += balloon/
+obj-y += privcmd/
-obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += privcmd/
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/
+obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c
index ff82cd2f0a..649f64c402 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c
@@ -139,24 +139,6 @@ static struct page *balloon_retrieve(void)
return page;
}
-static inline pte_t *get_ptep(unsigned long addr)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- pgd = pgd_offset_k(addr);
- if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
-
- pud = pud_offset(pgd, addr);
- if ( pud_none(*pud) || pud_bad(*pud) ) BUG();
-
- pmd = pmd_offset(pud, addr);
- if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
-
- return pte_offset_kernel(pmd, addr);
-}
-
static void balloon_alarm(unsigned long unused)
{
schedule_work(&balloon_worker);
@@ -220,14 +202,18 @@ static void balloon_process(void *unused)
/* Update P->M and M->P tables. */
phys_to_machine_mapping[pfn] = mfn_list[i];
- queue_machphys_update(mfn_list[i], pfn);
+ xen_machphys_update(mfn_list[i], pfn);
/* Link back into the page tables if it's not a highmem page. */
if ( pfn < max_low_pfn )
- queue_l1_entry_update(
- get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
- (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-
+ {
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ __pte_ma((mfn_list[i] << PAGE_SHIFT) |
+ pgprot_val(PAGE_KERNEL)),
+ 0);
+ }
+
/* Finally, relinquish the memory back to the system allocator. */
ClearPageReserved(page);
set_page_count(page, 1);
@@ -259,7 +245,8 @@ static void balloon_process(void *unused)
{
v = phys_to_virt(pfn << PAGE_SHIFT);
scrub_pages(v, 1);
- queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)v, __pte_ma(0), 0);
}
#ifdef CONFIG_XEN_SCRUB_PAGES
else
@@ -273,9 +260,7 @@ static void balloon_process(void *unused)
/* Ensure that ballooned highmem pages don't have cached mappings. */
kmap_flush_unused();
-
- /* Flush updates through and flush the TLB. */
- xen_tlb_flush();
+ flush_tlb_all();
/* No more mappings: invalidate pages in P2M and add to balloon. */
for ( i = 0; i < debt; i++ )
@@ -319,22 +304,17 @@ static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
case CMSG_MEM_REQUEST_SET:
{
mem_request_t *req = (mem_request_t *)&msg->msg[0];
- if ( msg->length != sizeof(mem_request_t) )
- goto parse_error;
set_new_target(req->target);
req->status = 0;
}
break;
+
default:
- goto parse_error;
+ msg->length = 0;
+ break;
}
ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
}
static int balloon_write(struct file *file, const char __user *buffer,
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
index 80db9e1752..3b5b7ff2d8 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
@@ -8,9 +8,14 @@
* arch/xen/drivers/blkif/frontend
*
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
*/
#include "common.h"
+#include <asm-xen/evtchn.h>
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
/*
* These are rather arbitrary. They are fairly large because adjacent requests
@@ -25,13 +30,11 @@
#define BATCH_PER_DOMAIN 16
static unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
((_seg) * PAGE_SIZE))
/*
@@ -47,8 +50,6 @@ typedef struct {
atomic_t pendcnt;
unsigned short operation;
int status;
- void *bounce_page;
- unsigned int bounce_off, bounce_len;
} pending_req_t;
/*
@@ -83,6 +84,29 @@ static inline void flush_plugged_queue(void)
}
#endif
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+static u16 pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+ (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+/*
+ * If the tap driver is used, we may get pages belonging to either the tap
+ * or (more likely) the real frontend. The backend must specify which domain
+ * a given page belongs to in update_va_mapping though. For the moment,
+ * the tap rewrites the ID field of the request to contain the request index
+ * and the id of the real front end domain.
+ */
+#define BLKTAP_COOKIE 0xbeadfeed
+static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
+#endif
+
static int do_block_io_op(blkif_t *blkif, int max_to_do);
static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
@@ -91,20 +115,42 @@ static void make_response(blkif_t *blkif, unsigned long id,
static void fast_flush_area(int idx, int nr_pages)
{
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int i, invcount = 0;
+ u16 handle;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
+ {
+ aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i);
+ aop[i].u.unmap_grant_ref.dev_bus_addr = 0;
+ aop[i].u.unmap_grant_ref.handle = handle;
+ pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+ invcount++;
+ }
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, aop, invcount)))
+ BUG();
+#else
+
+ multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int i;
for ( i = 0; i < nr_pages; i++ )
{
mcl[i].op = __HYPERVISOR_update_va_mapping;
- mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT;
+ mcl[i].args[0] = MMAP_VADDR(idx, i);
mcl[i].args[1] = 0;
mcl[i].args[2] = 0;
}
- mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+ mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
BUG();
+#endif
}
@@ -234,15 +280,6 @@ static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
if ( atomic_dec_and_test(&pending_req->pendcnt) )
{
int pending_idx = pending_req - pending_reqs;
- if ( unlikely(pending_req->bounce_page != NULL) )
- {
- memcpy((void *)(MMAP_VADDR(pending_idx, 0) +
- pending_req->bounce_off),
- (void *)((unsigned long)pending_req->bounce_page +
- pending_req->bounce_off),
- pending_req->bounce_len);
- free_page((unsigned long)pending_req->bounce_page);
- }
fast_flush_area(pending_idx, pending_req->nr_pages);
make_response(pending_req->blkif, pending_req->id,
pending_req->operation, pending_req->status);
@@ -292,17 +329,16 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
static int do_block_io_op(blkif_t *blkif, int max_to_do)
{
- blkif_ring_t *blk_ring = blkif->blk_ring_base;
+ blkif_back_ring_t *blk_ring = &blkif->blk_ring;
blkif_request_t *req;
- BLKIF_RING_IDX i, rp;
+ RING_IDX i, rp;
int more_to_do = 0;
- rp = blk_ring->req_prod;
+ rp = blk_ring->sring->req_prod;
rmb(); /* Ensure we see queued requests up to 'rp'. */
- /* Take items off the comms ring, taking care not to overflow. */
- for ( i = blkif->blk_req_cons;
- (i != rp) && ((i-blkif->blk_resp_prod) != BLKIF_RING_SIZE);
+ for ( i = blk_ring->req_cons;
+ (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
i++ )
{
if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
@@ -311,7 +347,7 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do)
break;
}
- req = &blk_ring->ring[MASK_BLKIF_IDX(i)].req;
+ req = RING_GET_REQUEST(blk_ring, i);
switch ( req->operation )
{
case BLKIF_OP_READ:
@@ -325,14 +361,13 @@ static int do_block_io_op(blkif_t *blkif, int max_to_do)
default:
DPRINTK("error: unknown block io operation [%d]\n",
- blk_ring->ring[i].req.operation);
- make_response(blkif, blk_ring->ring[i].req.id,
- blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
+ req->operation);
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
break;
}
}
- blkif->blk_req_cons = i;
+ blk_ring->req_cons = i;
return more_to_do;
}
@@ -350,12 +385,50 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
(blkif_last_sect(req->frame_and_sects[0]) != 7) )
goto out;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ {
+ gnttab_op_t op;
+
+ op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0);
+ op.u.map_grant_ref.flags = GNTMAP_host_map;
+ op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
+ op.u.map_grant_ref.dom = blkif->domid;
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, &op, 1)))
+ BUG();
+
+ if ( op.u.map_grant_ref.handle < 0 )
+ goto out;
+
+ pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle;
+ }
+#else /* else CONFIG_XEN_BLKDEV_GRANT */
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ /* Grab the real frontend out of the probe message. */
+ if (req->frame_and_sects[1] == BLKTAP_COOKIE)
+ blkif->is_blktap = 1;
+#endif
+
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
if ( HYPERVISOR_update_va_mapping_otherdomain(
- MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
+ MMAP_VADDR(pending_idx, 0),
(pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
- 0, blkif->domid) )
+ 0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
+
goto out;
-
+#else
+ if ( HYPERVISOR_update_va_mapping_otherdomain(
+ MMAP_VADDR(pending_idx, 0),
+ (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
+ 0, blkif->domid) )
+
+ goto out;
+#endif
+#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
+
rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
PAGE_SIZE / sizeof(vdisk_t));
@@ -368,114 +441,152 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
- short nr_sects;
- unsigned long buffer, fas;
- int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ unsigned long fas = 0;
+ int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
pending_req_t *pending_req;
- unsigned long remap_prot;
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-
- /* We map virtual scatter/gather segments to physical segments. */
- int new_segs, nr_psegs = 0;
- phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1];
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#else
+ unsigned long remap_prot;
+ multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
+ struct phys_req preq;
+ struct {
+ unsigned long buf; unsigned int nsec;
+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int nseg;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ struct buffer_head *bh;
+#else
+ struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int nbio = 0;
+ request_queue_t *q;
+#endif
/* Check that number of segments is sane. */
- if ( unlikely(req->nr_segments == 0) ||
- unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
+ nseg = req->nr_segments;
+ if ( unlikely(nseg == 0) ||
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
{
- DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
+ DPRINTK("Bad number of segments in request (%d)\n", nseg);
goto bad_descriptor;
}
- /*
- * Check each address/size pair is sane, and convert into a
- * physical device and block offset. Note that if the offset and size
- * crosses a virtual extent boundary, we may end up with more
- * physical scatter/gather segments than virtual segments.
- */
- for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
+ preq.dev = req->device;
+ preq.sector_number = req->sector_number;
+ preq.nr_sects = 0;
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < nseg; i++ )
{
- fas = req->frame_and_sects[i];
- buffer = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
- nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+ fas = req->frame_and_sects[i];
+ seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
- if ( nr_sects <= 0 )
+ if ( seg[i].nsec <= 0 )
goto bad_descriptor;
+ preq.nr_sects += seg[i].nsec;
+
+ aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i);
+ aop[i].u.map_grant_ref.dom = blkif->domid;
+ aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas);
+ aop[i].u.map_grant_ref.flags = GNTMAP_host_map;
+ if ( operation == WRITE )
+ aop[i].u.map_grant_ref.flags |= GNTMAP_readonly;
+ }
- phys_seg[nr_psegs].dev = req->device;
- phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
- phys_seg[nr_psegs].buffer = buffer;
- phys_seg[nr_psegs].nr_sects = nr_sects;
-
- /* Translate the request into the relevant 'physical device' */
- new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
- if ( new_segs < 0 )
- {
- DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
- operation == READ ? "read" : "write",
- req->sector_number + tot_sects,
- req->sector_number + tot_sects + nr_sects,
- req->device);
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, aop, nseg)))
+ BUG();
+
+ for ( i = 0; i < nseg; i++ )
+ {
+ if ( unlikely(aop[i].u.map_grant_ref.handle < 0) )
+ {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ fast_flush_area(pending_idx, nseg);
goto bad_descriptor;
}
-
- nr_psegs += new_segs;
- ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1));
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
+
+ pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle;
}
+#endif
- /* Nonsensical zero-sized request? */
- if ( unlikely(nr_psegs == 0) )
+ for ( i = 0; i < nseg; i++ )
+ {
+ fas = req->frame_and_sects[i];
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ seg[i].buf = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) |
+ (blkif_first_sect(fas) << 9);
+#else
+ seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
+ seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+ if ( seg[i].nsec <= 0 )
+ goto bad_descriptor;
+ preq.nr_sects += seg[i].nsec;
+#endif
+ }
+
+ if ( vbd_translate(&preq, blkif, operation) != 0 )
+ {
+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
+ operation == READ ? "read" : "write", preq.sector_number,
+ preq.sector_number + preq.nr_sects, preq.dev);
goto bad_descriptor;
+ }
+#ifndef CONFIG_XEN_BLKDEV_GRANT
if ( operation == READ )
remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
else
remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
- for ( i = 0; i < nr_psegs; i++ )
+ for ( i = 0; i < nseg; i++ )
{
mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
- mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+ mcl[i].args[1] = (seg[i].buf & PAGE_MASK) | remap_prot;
mcl[i].args[2] = 0;
mcl[i].args[3] = blkif->domid;
-
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ if ( blkif->is_blktap )
+ mcl[i].args[3] = ID_TO_DOM(req->id);
+#endif
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT);
+ FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
}
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
- BUG();
+ BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
- for ( i = 0; i < nr_psegs; i++ )
+ for ( i = 0; i < nseg; i++ )
{
- if ( unlikely(mcl[i].args[5] != 0) )
+ if ( unlikely(mcl[i].result != 0) )
{
DPRINTK("invalid buffer -- could not remap it\n");
- fast_flush_area(pending_idx, nr_psegs);
+ fast_flush_area(pending_idx, nseg);
goto bad_descriptor;
}
}
+#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
pending_req = &pending_reqs[pending_idx];
pending_req->blkif = blkif;
pending_req->id = req->id;
pending_req->operation = operation;
pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nr_psegs;
- pending_req->bounce_page = NULL;
- atomic_set(&pending_req->pendcnt, nr_psegs);
- pending_cons++;
+ pending_req->nr_pages = nseg;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ atomic_set(&pending_req->pendcnt, nseg);
+ pending_cons++;
blkif_get(blkif);
- /* Now we pass each segment down to the real blkdev layer. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- for ( i = 0; i < nr_psegs; i++ )
+ for ( i = 0; i < nseg; i++ )
{
- struct buffer_head *bh;
-
- bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
+ bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
if ( unlikely(bh == NULL) )
{
__end_block_io_op(pending_req, 0);
@@ -485,12 +596,12 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
memset(bh, 0, sizeof (struct buffer_head));
init_waitqueue_head(&bh->b_wait);
- bh->b_size = phys_seg[i].nr_sects << 9;
- bh->b_dev = phys_seg[i].dev;
- bh->b_rdev = phys_seg[i].dev;
- bh->b_rsector = (unsigned long)phys_seg[i].sector_number;
+ bh->b_size = seg[i].nsec << 9;
+ bh->b_dev = preq.dev;
+ bh->b_rdev = preq.dev;
+ bh->b_rsector = (unsigned long)preq.sector_number;
bh->b_data = (char *)MMAP_VADDR(pending_idx, i) +
- (phys_seg[i].buffer & ~PAGE_MASK);
+ (seg[i].buf & ~PAGE_MASK);
bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i));
bh->b_end_io = end_block_io_op;
bh->b_private = pending_req;
@@ -504,67 +615,60 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
/* Dispatch a single request. We'll flush it to disc later. */
generic_make_request(operation, bh);
+
+ preq.sector_number += seg[i].nsec;
}
+
#else
- for ( i = 0; i < nr_psegs; i++ )
- {
- struct bio *bio;
- request_queue_t *q;
- bio = bio_alloc(GFP_ATOMIC, 1);
- if ( unlikely(bio == NULL) )
+ for ( i = 0; i < nseg; i++ )
+ {
+ if ( ((int)preq.sector_number|(int)seg[i].nsec) &
+ ((bdev_hardsect_size(preq.bdev) >> 9) - 1) )
{
- __end_block_io_op(pending_req, 0);
- continue;
+ DPRINTK("Misaligned I/O request from domain %d", blkif->domid);
+ goto cleanup_and_fail;
}
- bio->bi_bdev = phys_seg[i].bdev;
- bio->bi_private = pending_req;
- bio->bi_end_io = end_block_io_op;
- bio->bi_sector = phys_seg[i].sector_number;
-
- /* Is the request misaligned with respect to hardware sector size? */
- if ( ((bio->bi_sector | phys_seg[i].nr_sects) &
- ((bdev_hardsect_size(bio->bi_bdev) >> 9) - 1)) )
+ while ( (bio == NULL) ||
+ (bio_add_page(bio,
+ virt_to_page(MMAP_VADDR(pending_idx, i)),
+ seg[i].nsec << 9,
+ seg[i].buf & ~PAGE_MASK) == 0) )
{
- /* We can't bounce scatter-gather requests. */
- if ( (nr_psegs != 1) ||
- ((pending_req->bounce_page = (void *)
- __get_free_page(GFP_KERNEL)) == NULL) )
+ bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+ if ( unlikely(bio == NULL) )
{
- printk("xen_blk: Unaligned scatter-gather request!\n");
- bio_put(bio);
- __end_block_io_op(pending_req, 0);
- continue;
+ cleanup_and_fail:
+ for ( i = 0; i < (nbio-1); i++ )
+ bio_put(biolist[i]);
+ fast_flush_area(pending_idx, nseg);
+ goto bad_descriptor;
}
-
- /* Record offset and length within a bounce page. */
- pending_req->bounce_off = (bio->bi_sector << 9) & ~PAGE_MASK;
- pending_req->bounce_len = phys_seg[i].nr_sects << 9;
-
- /* Submit a page-aligned I/O. */
- bio->bi_sector &= ~((PAGE_SIZE >> 9) - 1);
- bio_add_page(
- bio, virt_to_page(pending_req->bounce_page), PAGE_SIZE, 0);
- }
- else
- {
- bio_add_page(
- bio,
- virt_to_page(MMAP_VADDR(pending_idx, i)),
- phys_seg[i].nr_sects << 9,
- phys_seg[i].buffer & ~PAGE_MASK);
+
+ bio->bi_bdev = preq.bdev;
+ bio->bi_private = pending_req;
+ bio->bi_end_io = end_block_io_op;
+ bio->bi_sector = preq.sector_number;
}
- if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
- {
- flush_plugged_queue();
- blk_get_queue(q);
- plugged_queue = q;
- }
+ preq.sector_number += seg[i].nsec;
+ }
- submit_bio(operation, bio);
+ if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
+ {
+ flush_plugged_queue();
+ blk_get_queue(q);
+ plugged_queue = q;
}
+
+ atomic_set(&pending_req->pendcnt, nbio);
+ pending_cons++;
+ blkif_get(blkif);
+
+ for ( i = 0; i < nbio; i++ )
+ submit_bio(operation, biolist[i]);
+
#endif
return;
@@ -585,16 +689,17 @@ static void make_response(blkif_t *blkif, unsigned long id,
{
blkif_response_t *resp;
unsigned long flags;
+ blkif_back_ring_t *blk_ring = &blkif->blk_ring;
/* Place on the response ring for the relevant domain. */
spin_lock_irqsave(&blkif->blk_ring_lock, flags);
- resp = &blkif->blk_ring_base->
- ring[MASK_BLKIF_IDX(blkif->blk_resp_prod)].resp;
+ resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
resp->id = id;
resp->operation = op;
resp->status = st;
wmb(); /* Ensure other side can see the response fields. */
- blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
+ blk_ring->rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(blk_ring);
spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
/* Kick the relevant domain. */
@@ -638,6 +743,15 @@ static int __init blkif_init(void)
#endif
blkif_ctrlif_init();
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
+ printk(KERN_ALERT "Blkif backend is using grant tables.\n");
+#endif
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
+#endif
return 0;
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h
index 4a12ca8fe9..a698e01c64 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h
@@ -15,6 +15,7 @@
#include <asm-xen/ctrl_if.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
#if 0
#define ASSERT(_p) \
@@ -36,19 +37,17 @@ struct block_device;
typedef struct blkif_st {
/* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
+ domid_t domid;
+ unsigned int handle;
/* Physical parameters of the comms window. */
- unsigned long shmem_frame;
- unsigned int evtchn;
- int irq;
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
/* Comms information. */
- blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
- BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
- BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
+ blkif_back_ring_t blk_ring;
/* VBDs attached to this interface. */
- rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */
- spinlock_t vbd_lock; /* Protects VBD mapping. */
+ rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/
+ spinlock_t vbd_lock; /* Protects VBD mapping. */
/* Private fields. */
enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
/*
@@ -56,6 +55,10 @@ typedef struct blkif_st {
* We therefore need to store the id from the original request.
*/
u8 disconnect_rspid;
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ /* Is this a blktap frontend */
+ unsigned int is_blktap;
+#endif
struct blkif_st *hash_next;
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
@@ -77,38 +80,19 @@ blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
blkif_disconnect_complete(_b); \
} while (0)
-/* An entry in a list of xen_extents. */
-typedef struct _blkif_extent_le {
- blkif_extent_t extent; /* an individual extent */
- struct _blkif_extent_le *next; /* and a pointer to the next */
- struct block_device *bdev;
-} blkif_extent_le_t;
-
-typedef struct _vbd {
- blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
- unsigned char readonly; /* Non-zero -> read-only */
- unsigned char type; /* VDISK_TYPE_xxx */
- blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */
- rb_node_t rb; /* for linking into R-B tree lookup struct */
-} vbd_t;
-
void vbd_create(blkif_be_vbd_create_t *create);
-void vbd_grow(blkif_be_vbd_grow_t *grow);
-void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
void vbd_destroy(blkif_be_vbd_destroy_t *delete);
int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
void destroy_all_vbds(blkif_t *blkif);
-/* Describes a [partial] disk extent (part of a block io request) */
-typedef struct {
+struct phys_req {
unsigned short dev;
unsigned short nr_sects;
struct block_device *bdev;
- unsigned long buffer;
blkif_sector_t sector_number;
-} phys_seg_t;
+};
-int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation);
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
void blkif_interface_init(void);
void blkif_ctrlif_init(void);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c
index b55bae7b1f..cedfcf7565 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c
@@ -15,58 +15,32 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
switch ( msg->subtype )
{
case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
blkif_create((blkif_be_create_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_CONNECT:
- if ( msg->length != sizeof(blkif_be_connect_t) )
- goto parse_error;
blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_DISCONNECT:
- if ( msg->length != sizeof(blkif_be_disconnect_t) )
- goto parse_error;
if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
return; /* Sending the response is deferred until later. */
break;
case CMSG_BLKIF_BE_VBD_CREATE:
- if ( msg->length != sizeof(blkif_be_vbd_create_t) )
- goto parse_error;
vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
break;
case CMSG_BLKIF_BE_VBD_DESTROY:
- if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
- goto parse_error;
vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
break;
- case CMSG_BLKIF_BE_VBD_GROW:
- if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
- goto parse_error;
- vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_VBD_SHRINK:
- if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
- goto parse_error;
- vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
- break;
default:
- goto parse_error;
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ break;
}
ctrl_if_send_response(msg);
- return;
-
- parse_error:
- DPRINTK("Parse error while reading message subtype %d, len %d\n",
- msg->subtype, msg->length);
- msg->length = 0;
- ctrl_if_send_response(msg);
}
void blkif_ctrlif_init(void)
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c
index 4196014597..46d55d1fd4 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c
@@ -39,7 +39,7 @@ static void __blkif_disconnect_complete(void *arg)
* must still be notified to the remote driver.
*/
unbind_evtchn_from_irq(blkif->evtchn);
- vfree(blkif->blk_ring_base);
+ vfree(blkif->blk_ring.sring);
/* Construct the deferred response message. */
cmsg.type = CMSG_BLKIF_BE;
@@ -149,14 +149,15 @@ void blkif_destroy(blkif_be_destroy_t *destroy)
void blkif_connect(blkif_be_connect_t *connect)
{
- domid_t domid = connect->domid;
- unsigned int handle = connect->blkif_handle;
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
struct vm_struct *vma;
- pgprot_t prot;
- int error;
- blkif_t *blkif;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+ blkif_sring_t *sring;
blkif = blkif_find_by_handle(domid, handle);
if ( unlikely(blkif == NULL) )
@@ -195,11 +196,13 @@ void blkif_connect(blkif_be_connect_t *connect)
vfree(vma->addr);
return;
}
-
+ sring = (blkif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+
blkif->evtchn = evtchn;
blkif->irq = bind_evtchn_to_irq(evtchn);
blkif->shmem_frame = shmem_frame;
- blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
blkif->status = CONNECTED;
blkif_get(blkif);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c
index 493a2a7268..2fcc42273e 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c
@@ -7,21 +7,35 @@
* in vbd_translate. All other lookups are implicitly protected because the
* only caller (the control message dispatch routine) serializes the calls.
*
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
*/
#include "common.h"
+struct vbd {
+ blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
+ unsigned char readonly; /* Non-zero -> read-only */
+ unsigned char type; /* VDISK_xxx */
+ blkif_pdev_t pdevice; /* phys device that this vbd maps to */
+ struct block_device *bdev;
+ rb_node_t rb; /* for linking into R-B tree lookup struct */
+};
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static dev_t vbd_map_devnum(blkif_pdev_t);
+static inline dev_t vbd_map_devnum(blkif_pdev_t cookie)
+{ return MKDEV(cookie>>8, cookie&0xff); }
+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
+ (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
#define bdev_put(_b) blkdev_put(_b)
#else
+#define vbd_sz(_v) (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2)
#define bdev_put(_b) ((void)0)
+#define bdev_hardsect_size(_b) 512
#endif
void vbd_create(blkif_be_vbd_create_t *create)
{
- vbd_t *vbd;
+ struct vbd *vbd;
rb_node_t **rb_p, *rb_parent = NULL;
blkif_t *blkif;
blkif_vdev_t vdevice = create->vdevice;
@@ -39,7 +53,7 @@ void vbd_create(blkif_be_vbd_create_t *create)
while ( *rb_p != NULL )
{
rb_parent = *rb_p;
- vbd = rb_entry(rb_parent, vbd_t, rb);
+ vbd = rb_entry(rb_parent, struct vbd, rb);
if ( vdevice < vbd->vdevice )
{
rb_p = &rb_parent->rb_left;
@@ -56,7 +70,7 @@ void vbd_create(blkif_be_vbd_create_t *create)
}
}
- if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+ if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
{
DPRINTK("vbd_create: out of memory\n");
create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
@@ -65,216 +79,60 @@ void vbd_create(blkif_be_vbd_create_t *create)
vbd->vdevice = vdevice;
vbd->readonly = create->readonly;
- vbd->type = VDISK_TYPE_DISK;
- vbd->extents = NULL;
-
- spin_lock(&blkif->vbd_lock);
- rb_link_node(&vbd->rb, rb_parent, rb_p);
- rb_insert_color(&vbd->rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
-
- DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
- vdevice, create->domid);
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-void vbd_grow(blkif_be_vbd_grow_t *grow)
-{
- blkif_t *blkif;
- blkif_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = grow->vdevice;
- unsigned long sz;
-
- blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n",
- grow->domid, grow->blkif_handle);
- grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
- {
- DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
- grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
- }
-
- if ( grow->extent.sector_start > 0 )
- {
- DPRINTK("vbd_grow: dev %08x start not zero.\n", grow->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- return;
- }
-
- if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t),
- GFP_KERNEL)) == NULL) )
- {
- DPRINTK("vbd_grow: out of memory\n");
- grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
+ vbd->type = 0;
/* Mask to 16-bit for compatibility with old tools */
- x->extent.device = grow->extent.device & 0xffff;
- x->extent.sector_start = grow->extent.sector_start;
- x->extent.sector_length = grow->extent.sector_length;
- x->next = (blkif_extent_le_t *)NULL;
+ vbd->pdevice = create->pdevice & 0xffff;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- x->bdev = open_by_devnum(vbd_map_devnum(x->extent.device),
- vbd->readonly ? FMODE_READ : FMODE_WRITE);
- if ( IS_ERR(x->bdev) )
- {
- DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
- /* XXXcl maybe bd_claim? */
-
- if ( (x->bdev->bd_disk == NULL) )
- {
- DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- bdev_put(x->bdev);
- goto out;
- }
-
- /* get size in sectors */
- if ( x->bdev->bd_part )
- sz = x->bdev->bd_part->nr_sects;
- else
- sz = x->bdev->bd_disk->capacity;
-
- vbd->type = (x->bdev->bd_disk->flags & GENHD_FL_CD) ?
- VDISK_TYPE_CDROM : VDISK_TYPE_DISK;
-
-#else
- if( !blk_size[MAJOR(x->extent.device)] )
- {
- DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-
- /* convert blocks (1KB) to sectors */
- sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2;
-
- if ( sz == 0 )
- {
- DPRINTK("vbd_grow: device %08x zero size!\n", x->extent.device);
- grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
- goto out;
- }
-#endif
-
- /*
- * NB. This test assumes sector_start == 0, which is always the case
- * in Xen 1.3. In fact the whole grow/shrink interface could do with
- * some simplification.
- */
- if ( x->extent.sector_length > sz )
- x->extent.sector_length = sz;
-
- DPRINTK("vbd_grow: requested_len %llu actual_len %lu\n",
- x->extent.sector_length, sz);
-
- for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
- continue;
-
- *px = x; /* ATOMIC: no need for vbd_lock. */
-
- DPRINTK("Successful grow of vdev=%04x (dom=%u)\n",
- vdevice, grow->domid);
-
- grow->status = BLKIF_BE_STATUS_OKAY;
- return;
-
- out:
- kfree(x);
-}
-
-
-void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
-{
- blkif_t *blkif;
- blkif_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- blkif_vdev_t vdevice = shrink->vdevice;
-
- blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
- if ( unlikely(blkif == NULL) )
+ vbd->bdev = open_by_devnum(
+ vbd_map_devnum(vbd->pdevice),
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
+ if ( IS_ERR(vbd->bdev) )
{
- DPRINTK("vbd_shrink attempted for non-existent blkif (%u,%u)\n",
- shrink->domid, shrink->blkif_handle);
- shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
return;
}
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
+ if ( (vbd->bdev->bd_disk == NULL) )
{
- vbd = rb_entry(rb, vbd_t, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
- {
- shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
+ bdev_put(vbd->bdev);
return;
}
- if ( unlikely(vbd->extents == NULL) )
+ if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
+ vbd->type |= VDISK_CDROM;
+ if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
+ vbd->type |= VDISK_REMOVABLE;
+
+#else
+ if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
{
- shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
return;
}
-
- /* Find the last extent. We now know that there is at least one. */
- for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
- continue;
-
- x = *px;
- *px = x->next; /* ATOMIC: no need for vbd_lock. */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- bdev_put(x->bdev);
#endif
- kfree(x);
- shrink->status = BLKIF_BE_STATUS_OKAY;
+ spin_lock(&blkif->vbd_lock);
+ rb_link_node(&vbd->rb, rb_parent, rb_p);
+ rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+ spin_unlock(&blkif->vbd_lock);
+
+ DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
+ vdevice, create->domid);
+ create->status = BLKIF_BE_STATUS_OKAY;
}
void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
{
blkif_t *blkif;
- vbd_t *vbd;
+ struct vbd *vbd;
rb_node_t *rb;
- blkif_extent_le_t *x, *t;
blkif_vdev_t vdevice = destroy->vdevice;
blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
@@ -289,7 +147,7 @@ void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
- vbd = rb_entry(rb, vbd_t, rb);
+ vbd = rb_entry(rb, struct vbd, rb);
if ( vdevice < vbd->vdevice )
rb = rb->rb_left;
else if ( vdevice > vbd->vdevice )
@@ -305,66 +163,39 @@ void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
spin_lock(&blkif->vbd_lock);
rb_erase(rb, &blkif->vbd_rb);
spin_unlock(&blkif->vbd_lock);
-
- x = vbd->extents;
+ bdev_put(vbd->bdev);
kfree(vbd);
-
- while ( x != NULL )
- {
- t = x->next;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- bdev_put(x->bdev);
-#endif
- kfree(x);
- x = t;
- }
}
void destroy_all_vbds(blkif_t *blkif)
{
- vbd_t *vbd;
- rb_node_t *rb;
- blkif_extent_le_t *x, *t;
+ struct vbd *vbd;
+ rb_node_t *rb;
spin_lock(&blkif->vbd_lock);
while ( (rb = blkif->vbd_rb.rb_node) != NULL )
{
- vbd = rb_entry(rb, vbd_t, rb);
-
+ vbd = rb_entry(rb, struct vbd, rb);
rb_erase(rb, &blkif->vbd_rb);
- x = vbd->extents;
+ spin_unlock(&blkif->vbd_lock);
+ bdev_put(vbd->bdev);
kfree(vbd);
-
- while ( x != NULL )
- {
- t = x->next;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- bdev_put(x->bdev);
-#endif
- kfree(x);
- x = t;
- }
+ spin_lock(&blkif->vbd_lock);
}
spin_unlock(&blkif->vbd_lock);
}
-static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
+static void vbd_probe_single(
+ blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
{
- blkif_extent_le_t *x;
-
- vbd_info->device = vbd->vdevice;
- vbd_info->info = vbd->type;
- if ( vbd->readonly )
- vbd_info->info |= VDISK_FLAG_RO;
- vbd_info->capacity = 0ULL;
- for ( x = vbd->extents; x != NULL; x = x->next )
- vbd_info->capacity += x->extent.sector_length;
-
- return 0;
+ vbd_info->device = vbd->vdevice;
+ vbd_info->info = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
+ vbd_info->capacity = vbd_sz(vbd);
+ vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
}
@@ -386,9 +217,8 @@ int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
for ( ; ; )
{
/* STEP 2. Dealt with left subtree. Now process current node. */
- if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds],
- rb_entry(rb, vbd_t, rb))) != 0 )
- goto out;
+ vbd_probe_single(blkif, &vbd_info[nr_vbds],
+ rb_entry(rb, struct vbd, rb));
if ( ++nr_vbds == max_vbds )
goto out;
@@ -421,13 +251,11 @@ int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
}
-int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
{
- blkif_extent_le_t *x;
- vbd_t *vbd;
- rb_node_t *rb;
- blkif_sector_t sec_off;
- unsigned long nr_secs;
+ struct vbd *vbd;
+ rb_node_t *rb;
+ int rc = -EACCES;
/* Take the vbd_lock because another thread could be updating the tree. */
spin_lock(&blkif->vbd_lock);
@@ -435,10 +263,10 @@ int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
- vbd = rb_entry(rb, vbd_t, rb);
- if ( pseg->dev < vbd->vdevice )
+ vbd = rb_entry(rb, struct vbd, rb);
+ if ( req->dev < vbd->vdevice )
rb = rb->rb_left;
- else if ( pseg->dev > vbd->vdevice )
+ else if ( req->dev > vbd->vdevice )
rb = rb->rb_right;
else
goto found;
@@ -446,138 +274,22 @@ int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
DPRINTK("vbd_translate; domain %u attempted to access "
"non-existent VBD.\n", blkif->domid);
-
- spin_unlock(&blkif->vbd_lock);
- return -ENODEV;
+ rc = -ENODEV;
+ goto out;
found:
if ( (operation == WRITE) && vbd->readonly )
- {
- spin_unlock(&blkif->vbd_lock);
- return -EACCES;
- }
+ goto out;
- /*
- * Now iterate through the list of blkif_extents, working out which should
- * be used to perform the translation.
- */
- sec_off = pseg->sector_number;
- nr_secs = pseg->nr_sects;
- for ( x = vbd->extents; x != NULL; x = x->next )
- {
- if ( sec_off < x->extent.sector_length )
- {
- pseg->dev = x->extent.device;
- pseg->bdev = x->bdev;
- pseg->sector_number = x->extent.sector_start + sec_off;
- if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
- goto overrun;
- spin_unlock(&blkif->vbd_lock);
- return 1;
- }
- sec_off -= x->extent.sector_length;
- }
+ if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) )
+ goto out;
- DPRINTK("vbd_translate: end of vbd.\n");
- spin_unlock(&blkif->vbd_lock);
- return -EACCES;
-
- /*
- * Here we deal with overrun onto the following extent. We don't deal with
- * overrun of more than one boundary since each request is restricted to
- * 2^9 512-byte sectors, so it should be trivial for control software to
- * ensure that extents are large enough to prevent excessive overrun.
- */
- overrun:
-
- /* Adjust length of first chunk to run to end of first extent. */
- pseg[0].nr_sects = x->extent.sector_length - sec_off;
-
- /* Set second chunk buffer and length to start where first chunk ended. */
- pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9);
- pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
-
- /* Now move to the next extent. Check it exists and is long enough! */
- if ( unlikely((x = x->next) == NULL) ||
- unlikely(x->extent.sector_length < pseg[1].nr_sects) )
- {
- DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
- spin_unlock(&blkif->vbd_lock);
- return -EACCES;
- }
+ req->dev = vbd->pdevice;
+ req->bdev = vbd->bdev;
+ rc = 0;
- /* Store the real device and start sector for the second chunk. */
- pseg[1].dev = x->extent.device;
- pseg[1].bdev = x->bdev;
- pseg[1].sector_number = x->extent.sector_start;
-
+ out:
spin_unlock(&blkif->vbd_lock);
- return 2;
-}
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-
-#define MAJOR_XEN(dev) ((dev)>>8)
-#define MINOR_XEN(dev) ((dev) & 0xff)
-
-#ifndef FANCY_REMAPPING
-static dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{
- int major = MAJOR_XEN(cookie);
- int minor = MINOR_XEN(cookie);
-
- return MKDEV(major, minor);
-}
-#else
-#define XEN_IDE0_MAJOR IDE0_MAJOR
-#define XEN_IDE1_MAJOR IDE1_MAJOR
-#define XEN_IDE2_MAJOR IDE2_MAJOR
-#define XEN_IDE3_MAJOR IDE3_MAJOR
-#define XEN_IDE4_MAJOR IDE4_MAJOR
-#define XEN_IDE5_MAJOR IDE5_MAJOR
-#define XEN_IDE6_MAJOR IDE6_MAJOR
-#define XEN_IDE7_MAJOR IDE7_MAJOR
-#define XEN_IDE8_MAJOR IDE8_MAJOR
-#define XEN_IDE9_MAJOR IDE9_MAJOR
-#define XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR
-#define XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR
-#define XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR
-#define XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR
-#define XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR
-#define XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR
-#define XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR
-#define XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR
-#define XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR
-
-static dev_t vbd_map_devnum(blkif_pdev_t cookie)
-{
- int new_major;
- int major = MAJOR_XEN(cookie);
- int minor = MINOR_XEN(cookie);
-
- switch (major) {
- case XEN_IDE0_MAJOR: new_major = IDE0_MAJOR; break;
- case XEN_IDE1_MAJOR: new_major = IDE1_MAJOR; break;
- case XEN_IDE2_MAJOR: new_major = IDE2_MAJOR; break;
- case XEN_IDE3_MAJOR: new_major = IDE3_MAJOR; break;
- case XEN_IDE4_MAJOR: new_major = IDE4_MAJOR; break;
- case XEN_IDE5_MAJOR: new_major = IDE5_MAJOR; break;
- case XEN_IDE6_MAJOR: new_major = IDE6_MAJOR; break;
- case XEN_IDE7_MAJOR: new_major = IDE7_MAJOR; break;
- case XEN_IDE8_MAJOR: new_major = IDE8_MAJOR; break;
- case XEN_IDE9_MAJOR: new_major = IDE9_MAJOR; break;
- case XEN_SCSI_DISK0_MAJOR: new_major = SCSI_DISK0_MAJOR; break;
- case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR:
- new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR;
- break;
- case XEN_SCSI_CDROM_MAJOR: new_major = SCSI_CDROM_MAJOR; break;
- default: new_major = 0; break;
- }
-
- return MKDEV(new_major, minor);
+ return rc;
}
-#endif
-
-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION_CODE(2,6,0) */
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
index 4bf1fdabcd..7d1fb03ab6 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -6,6 +6,8 @@
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
* Copyright (c) 2004, Christian Limpach
+ * Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
@@ -29,6 +31,14 @@
* IN THE SOFTWARE.
*/
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p)
+#endif
+
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -44,6 +54,11 @@
#include <linux/interrupt.h>
#include <scsi/scsi.h>
#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#endif
typedef unsigned char byte; /* from linux/ide.h */
@@ -68,44 +83,47 @@ static unsigned int blkif_irq = 0;
static int blkif_control_rsp_valid;
static blkif_response_t blkif_control_rsp;
-static blkif_ring_t *blk_ring = NULL;
-static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
-static BLKIF_RING_IDX req_prod; /* Private request producer. */
+static blkif_front_ring_t blk_ring;
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-unsigned long rec_ring_free;
-blkif_request_t rec_ring[BLKIF_RING_SIZE];
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+static domid_t rdomid = 0;
+static grant_ref_t gref_head, gref_terminal;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
+#define GRANTREF_INVALID (1<<15)
+#endif
-static int recovery = 0; /* "Recovery in progress" flag. Protected
- * by the blkif_io_lock */
+static struct blk_shadow {
+ blkif_request_t req;
+ unsigned long request;
+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+} blk_shadow[BLK_RING_SIZE];
+unsigned long blk_shadow_free;
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
- (blkif_state != BLKIF_STATE_CONNECTED))
+static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
static void kick_pending_request_queues(void);
int __init xlblk_init(void);
-void blkif_completion( blkif_request_t *req );
+static void blkif_completion(struct blk_shadow *s);
-static inline int GET_ID_FROM_FREELIST( void )
+static inline int GET_ID_FROM_FREELIST(void)
{
- unsigned long free = rec_ring_free;
-
- if ( free > BLKIF_RING_SIZE )
- BUG();
-
- rec_ring_free = rec_ring[free].id;
-
- rec_ring[free].id = 0x0fffffee; /* debug */
-
+ unsigned long free = blk_shadow_free;
+ BUG_ON(free > BLK_RING_SIZE);
+ blk_shadow_free = blk_shadow[free].req.id;
+ blk_shadow[free].req.id = 0x0fffffee; /* debug */
return free;
}
-static inline void ADD_ID_TO_FREELIST( unsigned long id )
+static inline void ADD_ID_TO_FREELIST(unsigned long id)
{
- rec_ring[id].id = rec_ring_free;
- rec_ring_free = id;
+ blk_shadow[id].req.id = blk_shadow_free;
+ blk_shadow[id].request = 0;
+ blk_shadow_free = id;
}
@@ -119,48 +137,43 @@ static int sg_operation = -1;
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
#endif
-static inline void translate_req_to_pfn(blkif_request_t *xreq,
- blkif_request_t *req)
+static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
{
+#ifndef CONFIG_XEN_BLKDEV_GRANT
int i;
+#endif
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- /* preserve id */
- xreq->sector_number = req->sector_number;
+ s->req = *r;
- for ( i = 0; i < req->nr_segments; i++ )
- xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < r->nr_segments; i++ )
+ s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]);
+#endif
}
-static inline void translate_req_to_mfn(blkif_request_t *xreq,
- blkif_request_t *req)
+static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
{
+#ifndef CONFIG_XEN_BLKDEV_GRANT
int i;
+#endif
- xreq->operation = req->operation;
- xreq->nr_segments = req->nr_segments;
- xreq->device = req->device;
- xreq->id = req->id; /* copy id (unlike above) */
- xreq->sector_number = req->sector_number;
+ *r = s->req;
- for ( i = 0; i < req->nr_segments; i++ )
- xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < s->req.nr_segments; i++ )
+ r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]);
+#endif
}
static inline void flush_requests(void)
{
DISABLE_SCATTERGATHER();
- wmb(); /* Ensure that the frontend can see the requests. */
- blk_ring->req_prod = req_prod;
+ RING_PUSH_REQUESTS(&blk_ring);
notify_via_evtchn(blkif_evtchn);
}
-
-
/************************** KERNEL VERSION 2.6 **************************/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -180,22 +193,21 @@ static void vbd_update(void)
}
#endif /* ENABLE_VBD_UPDATE */
+static struct xlbd_disk_info *head_waiting = NULL;
static void kick_pending_request_queues(void)
{
-
- if ( (xlbd_blk_queue != NULL) &&
- test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
+ struct xlbd_disk_info *di;
+ while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) )
{
- blk_start_queue(xlbd_blk_queue);
- /* XXXcl call to request_fn should not be needed but
- * we get stuck without... needs investigating
- */
- xlbd_blk_queue->request_fn(xlbd_blk_queue);
+ head_waiting = di->next_waiting;
+ di->next_waiting = NULL;
+ /* Re-enable calldowns. */
+ blk_start_queue(di->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(di->rq);
}
-
}
-
int blkif_open(struct inode *inode, struct file *filep)
{
struct gendisk *gd = inode->i_bdev->bd_disk;
@@ -217,9 +229,8 @@ int blkif_release(struct inode *inode, struct file *filep)
* When usage drops to zero it may allow more VBD updates to occur.
* Update of usage count is protected by a per-device semaphore.
*/
- if (--di->mi->usage == 0) {
+ if ( --di->mi->usage == 0 )
vbd_update();
- }
return 0;
}
@@ -228,14 +239,13 @@ int blkif_release(struct inode *inode, struct file *filep)
int blkif_ioctl(struct inode *inode, struct file *filep,
unsigned command, unsigned long argument)
{
- int i;
- /* struct gendisk *gd = inode->i_bdev->bd_disk; */
+ int i;
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
command, (long)argument, inode->i_rdev);
- switch (command) {
-
+ switch ( command )
+ {
case HDIO_GETGEO:
/* return ENOSYS to use defaults */
return -ENOSYS;
@@ -255,65 +265,6 @@ int blkif_ioctl(struct inode *inode, struct file *filep,
return 0;
}
-#if 0
-/* check media change: should probably do something here in some cases :-) */
-int blkif_check(kdev_t dev)
-{
- DPRINTK("blkif_check\n");
- return 0;
-}
-
-int blkif_revalidate(kdev_t dev)
-{
- struct block_device *bd;
- struct gendisk *gd;
- xen_block_t *disk;
- unsigned long capacity;
- int i, rc = 0;
-
- if ( (bd = bdget(dev)) == NULL )
- return -EINVAL;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(dev)) == NULL) ||
- ((disk = xldev_to_xldisk(dev)) == NULL) ||
- ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- if ( disk->usage > 1 )
- {
- rc = -EBUSY;
- goto out;
- }
-
- /* Only reread partition table if VBDs aren't mapped to partitions. */
- if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
- {
- for ( i = gd->max_p - 1; i >= 0; i-- )
- {
- invalidate_device(dev+i, 1);
- gd->part[MINOR(dev+i)].start_sect = 0;
- gd->part[MINOR(dev+i)].nr_sects = 0;
- gd->sizes[MINOR(dev+i)] = 0;
- }
-
- grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
- }
-
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
-}
-#endif
/*
* blkif_queue_request
@@ -327,8 +278,7 @@ int blkif_revalidate(kdev_t dev)
*/
static int blkif_queue_request(struct request *req)
{
- struct xlbd_disk_info *di =
- (struct xlbd_disk_info *)req->rq_disk->private_data;
+ struct xlbd_disk_info *di = req->rq_disk->private_data;
unsigned long buffer_ma;
blkif_request_t *ring_req;
struct bio *bio;
@@ -336,14 +286,17 @@ static int blkif_queue_request(struct request *req)
int idx;
unsigned long id;
unsigned int fsect, lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref;
+#endif
if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
return 1;
/* Fill out a communications ring structure. */
- ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
+ ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST();
- rec_ring[id].id = (unsigned long) req;
+ blk_shadow[id].request = (unsigned long)req;
ring_req->id = id;
ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
@@ -361,15 +314,34 @@ static int blkif_queue_request(struct request *req)
buffer_ma = page_to_phys(bvec->bv_page);
fsect = bvec->bv_offset >> 9;
lsect = fsect + (bvec->bv_len >> 9) - 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ rq_data_dir(req) );
+
+ blk_shadow[id].frame[ring_req->nr_segments] =
+ buffer_ma >> PAGE_SHIFT;
+
+ ring_req->frame_and_sects[ring_req->nr_segments++] =
+ (((u32) ref) << 16) | (fsect << 3) | lsect;
+
+#else
ring_req->frame_and_sects[ring_req->nr_segments++] =
buffer_ma | (fsect << 3) | lsect;
+#endif
}
}
- req_prod++;
-
+ blk_ring.req_prod_pvt++;
+
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn(&rec_ring[id], ring_req);
+ pickle_request(&blk_shadow[id], ring_req);
return 0;
}
@@ -381,6 +353,7 @@ static int blkif_queue_request(struct request *req)
*/
void do_blkif_request(request_queue_t *rq)
{
+ struct xlbd_disk_info *di;
struct request *req;
int queued;
@@ -388,30 +361,41 @@ void do_blkif_request(request_queue_t *rq)
queued = 0;
- while ((req = elv_next_request(rq)) != NULL) {
- if (!blk_fs_request(req)) {
+ while ( (req = elv_next_request(rq)) != NULL )
+ {
+ if ( !blk_fs_request(req) )
+ {
end_request(req, 0);
continue;
}
- if ( BLKIF_RING_FULL )
- {
- blk_stop_queue(rq);
- break;
- }
+ if ( RING_FULL(&blk_ring) )
+ goto wait;
+
DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
req, req->cmd, req->sector, req->current_nr_sectors,
req->nr_sectors, req->buffer,
rq_data_dir(req) ? "write" : "read");
+
blkdev_dequeue_request(req);
- if (blkif_queue_request(req)) {
- blk_stop_queue(rq);
+ if ( blkif_queue_request(req) )
+ {
+ wait:
+ di = req->rq_disk->private_data;
+ if ( di->next_waiting == NULL )
+ {
+ di->next_waiting = head_waiting;
+ head_waiting = di;
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
+ }
break;
}
+
queued++;
}
- if (queued != 0)
+ if ( queued != 0 )
flush_requests();
}
@@ -420,9 +404,9 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
struct request *req;
blkif_response_t *bret;
- BLKIF_RING_IDX i, rp;
+ RING_IDX i, rp;
unsigned long flags;
-
+
spin_lock_irqsave(&blkif_io_lock, flags);
if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
@@ -431,21 +415,21 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
spin_unlock_irqrestore(&blkif_io_lock, flags);
return IRQ_HANDLED;
}
-
- rp = blk_ring->resp_prod;
+
+ rp = blk_ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = resp_cons; i != rp; i++ )
+ for ( i = blk_ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
- bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
- id = bret->id;
- req = (struct request *)rec_ring[id].id;
+ bret = RING_GET_RESPONSE(&blk_ring, i);
+ id = bret->id;
+ req = (struct request *)blk_shadow[id].request;
- blkif_completion( &rec_ring[id] );
+ blkif_completion(&blk_shadow[id]);
- ADD_ID_TO_FREELIST(id); /* overwrites req */
+ ADD_ID_TO_FREELIST(id);
switch ( bret->operation )
{
@@ -454,7 +438,7 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
DPRINTK("Bad return from blkdev data request: %x\n",
bret->status);
-
+
if ( unlikely(end_that_request_first
(req,
(bret->status == BLKIF_RSP_OKAY),
@@ -471,8 +455,8 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
BUG();
}
}
-
- resp_cons = i;
+
+ blk_ring.rsp_cons = i;
kick_pending_request_queues();
@@ -522,15 +506,14 @@ static void vbd_update(void)
#endif /* ENABLE_VBD_UPDATE */
/*============================================================================*/
-
static void kick_pending_request_queues(void)
{
/* We kick pending request queues if the ring is reasonably empty. */
if ( (nr_pending != 0) &&
- ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
+ (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
{
/* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !BLKIF_RING_FULL )
+ while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
do_blkif_request(pending_queues[--nr_pending]);
}
}
@@ -783,6 +766,9 @@ static int blkif_queue_request(unsigned long id,
blkif_request_t *req;
struct buffer_head *bh;
unsigned int fsect, lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref;
+#endif
fsect = (buffer_ma & ~PAGE_MASK) >> 9;
lsect = fsect + nr_sectors - 1;
@@ -824,28 +810,44 @@ static int blkif_queue_request(unsigned long id,
(sg_dev == device) &&
(sg_next_sect == sector_number) )
{
-
- req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
+ req = RING_GET_REQUEST(&blk_ring,
+ blk_ring.req_prod_pvt - 1);
bh = (struct buffer_head *)id;
- bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
-
+ bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
+ blk_shadow[req->id].request = (unsigned long)id;
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
- rec_ring[req->id].id = id;
+ blk_shadow[req->id].frame[req->nr_segments] =
+ buffer_ma >> PAGE_SHIFT;
- req->frame_and_sects[req->nr_segments] =
- buffer_ma | (fsect<<3) | lsect;
+ req->frame_and_sects[req->nr_segments] =
+ (((u32) ref ) << 16) | (fsect << 3) | lsect;
+#else
+ req->frame_and_sects[req->nr_segments] =
+ buffer_ma | (fsect << 3) | lsect;
+#endif
if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
sg_next_sect += nr_sectors;
else
DISABLE_SCATTERGATHER();
/* Update the copy of the request in the recovery ring. */
- translate_req_to_pfn(&rec_ring[req->id], req );
+ pickle_request(&blk_shadow[req->id], req );
return 0;
}
- else if ( BLKIF_RING_FULL )
+ else if ( RING_FULL(&blk_ring) )
{
return 1;
}
@@ -862,23 +864,39 @@ static int blkif_queue_request(unsigned long id,
}
/* Fill out a communications ring structure. */
- req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
+ req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
xid = GET_ID_FROM_FREELIST();
- rec_ring[xid].id = id;
+ blk_shadow[xid].request = (unsigned long)id;
req->id = xid;
req->operation = operation;
req->sector_number = (blkif_sector_t)sector_number;
req->device = device;
req->nr_segments = 1;
- req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
- req_prod++;
+ blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
+
+ req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect;
+#else
+ req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
+#endif
/* Keep a private copy so we can reissue requests when recovering. */
- translate_req_to_pfn(&rec_ring[xid], req );
+ pickle_request(&blk_shadow[xid], req);
+ blk_ring.req_prod_pvt++;
+
return 0;
}
@@ -967,7 +985,7 @@ void do_blkif_request(request_queue_t *rq)
static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
- BLKIF_RING_IDX i, rp;
+ RING_IDX i, rp;
unsigned long flags;
struct buffer_head *bh, *next_bh;
@@ -979,18 +997,19 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
return;
}
- rp = blk_ring->resp_prod;
+ rp = blk_ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for ( i = resp_cons; i != rp; i++ )
+ for ( i = blk_ring.rsp_cons; i != rp; i++ )
{
unsigned long id;
- blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
-
+ blkif_response_t *bret;
+
+ bret = RING_GET_RESPONSE(&blk_ring, i);
id = bret->id;
- bh = (struct buffer_head *)rec_ring[id].id;
+ bh = (struct buffer_head *)blk_shadow[id].request;
- blkif_completion( &rec_ring[id] );
+ blkif_completion(&blk_shadow[id]);
ADD_ID_TO_FREELIST(id);
@@ -1016,10 +1035,10 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
default:
BUG();
}
+
}
+ blk_ring.rsp_cons = i;
- resp_cons = i;
-
kick_pending_request_queues();
spin_unlock_irqrestore(&io_request_lock, flags);
@@ -1029,35 +1048,51 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
/***************************** COMMON CODE *******************************/
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
+ unsigned long address)
+{
+ int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
+
+ req->frame_and_sects[0] = (((u32) ref) << 16) | 7;
+
+ blkif_control_send(req, rsp);
+}
+#endif
void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
unsigned long flags, id;
+ blkif_request_t *req_d;
retry:
- while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ while ( RING_FULL(&blk_ring) )
{
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
spin_lock_irqsave(&blkif_io_lock, flags);
- if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ if ( RING_FULL(&blk_ring) )
{
spin_unlock_irqrestore(&blkif_io_lock, flags);
goto retry;
}
DISABLE_SCATTERGATHER();
- blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;
+ req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
+ *req_d = *req;
id = GET_ID_FROM_FREELIST();
- blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
- rec_ring[id].id = (unsigned long) req;
+ req_d->id = id;
+ blk_shadow[id].request = (unsigned long)req;
- translate_req_to_pfn( &rec_ring[id], req );
+ pickle_request(&blk_shadow[id], req);
- req_prod++;
+ blk_ring.req_prod_pvt++;
flush_requests();
spin_unlock_irqrestore(&blkif_io_lock, flags);
@@ -1099,7 +1134,7 @@ static void blkif_send_interface_connect(void)
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = (virt_to_machine(blk_ring) >> PAGE_SHIFT);
+ msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
@@ -1113,10 +1148,10 @@ static void blkif_free(void)
spin_unlock_irq(&blkif_io_lock);
/* Free resources associated with old device channel. */
- if ( blk_ring != NULL )
+ if ( blk_ring.sring != NULL )
{
- free_page((unsigned long)blk_ring);
- blk_ring = NULL;
+ free_page((unsigned long)blk_ring.sring);
+ blk_ring.sring = NULL;
}
free_irq(blkif_irq, NULL);
blkif_irq = 0;
@@ -1132,10 +1167,14 @@ static void blkif_close(void)
/* Move from CLOSED to DISCONNECTED state. */
static void blkif_disconnect(void)
{
- if ( blk_ring != NULL )
- free_page((unsigned long)blk_ring);
- blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
- blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+ blkif_sring_t *sring;
+
+ if ( blk_ring.sring != NULL )
+ free_page((unsigned long)blk_ring.sring);
+
+ sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
blkif_state = BLKIF_STATE_DISCONNECTED;
blkif_send_interface_connect();
}
@@ -1149,48 +1188,70 @@ static void blkif_reset(void)
static void blkif_recover(void)
{
int i;
+ blkif_request_t *req;
+ struct blk_shadow *copy;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int j;
+#endif
+
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
+ BUG_ON(copy == NULL);
+ memcpy(copy, blk_shadow, sizeof(blk_shadow));
- /* Hmm, requests might be re-ordered when we re-issue them.
- * This will need to be fixed once we have barriers */
+ /* Stage 2: Set up free list. */
+ memset(&blk_shadow, 0, sizeof(blk_shadow));
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
+ blk_shadow[i].req.id = i+1;
+ blk_shadow_free = blk_ring.req_prod_pvt;
+ blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
- /* Stage 1 : Find active and move to safety. */
- for ( i = 0; i < BLKIF_RING_SIZE; i++ )
+ /* Stage 3: Find pending requests and requeue them. */
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
{
- if ( rec_ring[i].id >= PAGE_OFFSET )
+ /* Not in use? */
+ if ( copy[i].request == 0 )
+ continue;
+
+ /* Grab a request slot and unpickle shadow state into it. */
+ req = RING_GET_REQUEST(
+ &blk_ring, blk_ring.req_prod_pvt);
+ unpickle_request(req, &copy[i]);
+
+ /* We get a new request id, and must reset the shadow state. */
+ req->id = GET_ID_FROM_FREELIST();
+ memcpy(&blk_shadow[req->id], &copy[i], sizeof(copy[i]));
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* Rewrite any grant references invalidated by suspend/resume. */
+ for ( j = 0; j < req->nr_segments; j++ )
{
- translate_req_to_mfn(
- &blk_ring->ring[req_prod].req, &rec_ring[i]);
- req_prod++;
+ if ( req->frame_and_sects[j] & GRANTREF_INVALID )
+ gnttab_grant_foreign_access_ref(
+ blkif_gref_from_fas(req->frame_and_sects[j]),
+ rdomid,
+ blk_shadow[req->id].frame[j],
+ rq_data_dir((struct request *)
+ blk_shadow[req->id].request));
+ req->frame_and_sects[j] &= ~GRANTREF_INVALID;
}
- }
+ blk_shadow[req->id].req = *req;
+#endif
- /* Stage 2 : Set up shadow list. */
- for ( i = 0; i < req_prod; i++ )
- {
- rec_ring[i].id = blk_ring->ring[i].req.id;
- blk_ring->ring[i].req.id = i;
- translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
+ blk_ring.req_prod_pvt++;
}
- /* Stage 3 : Set up free list. */
- for ( ; i < BLKIF_RING_SIZE; i++ )
- rec_ring[i].id = i+1;
- rec_ring_free = req_prod;
- rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ kfree(copy);
- /* blk_ring->req_prod will be set when we flush_requests().*/
- wmb();
-
- /* Switch off recovery mode, using a memory barrier to ensure that
- * it's seen before we flush requests - we don't want to miss any
- * interrupts. */
recovery = 0;
+
+ /* blk_ring->req_prod will be set when we flush_requests().*/
wmb();
/* Kicks things back into life. */
flush_requests();
- /* Now safe to left other peope use interface. */
+ /* Now safe to left other people use the interface. */
blkif_state = BLKIF_STATE_CONNECTED;
}
@@ -1200,6 +1261,9 @@ static void blkif_connect(blkif_fe_interface_status_t *status)
blkif_evtchn = status->evtchn;
blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ rdomid = status->domid;
+#endif
err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
if ( err )
@@ -1238,7 +1302,8 @@ static void blkif_status(blkif_fe_interface_status_t *status)
{
if ( status->handle != blkif_handle )
{
- WPRINTK(" Invalid blkif: handle=%u", status->handle);
+ WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
+ unexpected(status);
return;
}
@@ -1315,21 +1380,15 @@ static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
switch ( msg->subtype )
{
case CMSG_BLKIF_FE_INTERFACE_STATUS:
- if ( msg->length != sizeof(blkif_fe_interface_status_t) )
- goto parse_error;
blkif_status((blkif_fe_interface_status_t *)
&msg->msg[0]);
- break;
+ break;
default:
- goto parse_error;
+ msg->length = 0;
+ break;
}
ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
}
int wait_for_blkif(void)
@@ -1360,17 +1419,25 @@ int wait_for_blkif(void)
int __init xlblk_init(void)
{
int i;
-
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
+ &gref_head, &gref_terminal ))
+ return 1;
+ printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
+#endif
+
if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
return 0;
IPRINTK("Initialising virtual block device driver\n");
- rec_ring_free = 0;
- for ( i = 0; i < BLKIF_RING_SIZE; i++ )
- rec_ring[i].id = i+1;
- rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+ blk_shadow_free = 0;
+ memset(blk_shadow, 0, sizeof(blk_shadow));
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
+ blk_shadow[i].req.id = i+1;
+ blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
(void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
@@ -1386,25 +1453,32 @@ void blkdev_suspend(void)
void blkdev_resume(void)
{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int i, j;
+ for ( i = 0; i < BLK_RING_SIZE; i++ )
+ for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
+ blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
+#endif
send_driver_status(1);
}
-/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
-
-void blkif_completion(blkif_request_t *req)
+static void blkif_completion(struct blk_shadow *s)
{
int i;
-
- switch ( req->operation )
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ for ( i = 0; i < s->req.nr_segments; i++ )
+ gnttab_release_grant_reference(
+ &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
+#else
+ /* This is a hack to get the dirty logging bits set */
+ if ( s->req.operation == BLKIF_OP_READ )
{
- case BLKIF_OP_READ:
- for ( i = 0; i < req->nr_segments; i++ )
+ for ( i = 0; i < s->req.nr_segments; i++ )
{
- unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
+ unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT;
unsigned long mfn = phys_to_machine_mapping[pfn];
xen_machphys_update(mfn, pfn);
}
- break;
}
-
+#endif
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
index 1a7eaf8f60..412d9a4394 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
@@ -46,6 +46,7 @@
#include <linux/devfs_fs_kernel.h>
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
#include <asm/io.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
@@ -98,13 +99,16 @@ struct xlbd_major_info {
struct xlbd_disk_info {
int xd_device;
struct xlbd_major_info *mi;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ struct xlbd_disk_info *next_waiting;
+ request_queue_t *rq;
+#endif
};
typedef struct xen_block {
int usage;
} xen_block_t;
-extern struct request_queue *xlbd_blk_queue;
extern spinlock_t blkif_io_lock;
extern int blkif_open(struct inode *inode, struct file *filep);
@@ -114,6 +118,10 @@ extern int blkif_ioctl(struct inode *inode, struct file *filep,
extern int blkif_check(dev_t dev);
extern int blkif_revalidate(dev_t dev);
extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+extern void blkif_control_probe_send(
+ blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
+#endif
extern void do_blkif_request (request_queue_t *rq);
extern void xlvbd_update_vbds(void);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
index e2faa59999..233aeda16c 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -87,8 +87,6 @@ static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
#define MAX_VBDS 64
struct list_head vbds_list;
-struct request_queue *xlbd_blk_queue = NULL;
-
#define MAJOR_XEN(dev) ((dev)>>8)
#define MINOR_XEN(dev) ((dev) & 0xff)
@@ -233,35 +231,33 @@ static struct xlbd_major_info *xlbd_get_major_info(int device)
xlbd_alloc_major_info(major, minor, index));
}
-static int xlvbd_blk_queue_alloc(struct xlbd_type_info *type)
+static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
{
- xlbd_blk_queue = blk_init_queue(do_blkif_request, &blkif_io_lock);
- if (xlbd_blk_queue == NULL)
- return -1;
+ request_queue_t *rq;
- elevator_init(xlbd_blk_queue, "noop");
+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+ if (rq == NULL)
+ return -1;
- /*
- * Turn off barking 'headactive' mode. We dequeue
- * buffer heads as soon as we pass them to back-end
- * driver.
- */
- blk_queue_headactive(xlbd_blk_queue, 0);
+ elevator_init(rq, "noop");
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(xlbd_blk_queue, 512);
- blk_queue_max_sectors(xlbd_blk_queue, 512);
+ blk_queue_hardsect_size(rq, disk->sector_size);
+ blk_queue_max_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
- blk_queue_max_segment_size(xlbd_blk_queue, PAGE_SIZE);
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(xlbd_blk_queue, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(xlbd_blk_queue, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(xlbd_blk_queue, 511);
+ blk_queue_dma_alignment(rq, 511);
+
+ gd->queue = rq;
+
return 0;
}
@@ -274,7 +270,8 @@ struct gendisk *xlvbd_alloc_gendisk(
di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
if (di == NULL)
- goto out;
+ return NULL;
+ memset(di, 0, sizeof(*di));
di->mi = mi;
di->xd_device = disk->device;
@@ -282,7 +279,7 @@ struct gendisk *xlvbd_alloc_gendisk(
nr_minors = 1 << mi->type->partn_shift;
gd = alloc_disk(nr_minors);
- if ( !gd )
+ if (gd == NULL)
goto out;
if (nr_minors > 1)
@@ -301,21 +298,26 @@ struct gendisk *xlvbd_alloc_gendisk(
gd->private_data = di;
set_capacity(gd, disk->capacity);
- if ((xlbd_blk_queue == NULL) && xlvbd_blk_queue_alloc(mi->type))
- goto out_gendisk;
+ if (xlvbd_init_blk_queue(gd, disk)) {
+ del_gendisk(gd);
+ goto out;
+ }
+
+ di->rq = gd->queue;
- if (VDISK_READONLY(disk->info))
+ if (disk->info & VDISK_READONLY)
set_disk_ro(gd, 1);
- if (VDISK_TYPE(disk->info) == VDISK_TYPE_CDROM)
- gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD;
+ if (disk->info & VDISK_REMOVABLE)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
+ if (disk->info & VDISK_CDROM)
+ gd->flags |= GENHD_FL_CD;
- gd->queue = xlbd_blk_queue;
add_disk(gd);
+
return gd;
-out_gendisk:
- del_gendisk(gd);
out:
kfree(di);
return NULL;
@@ -366,6 +368,7 @@ static int xlvbd_device_del(struct lvdisk *disk)
struct gendisk *gd;
struct xlbd_disk_info *di;
int ret = 0, unused;
+ request_queue_t *rq;
device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
@@ -382,7 +385,10 @@ static int xlvbd_device_del(struct lvdisk *disk)
goto out;
}
+ rq = gd->queue;
del_gendisk(gd);
+ put_disk(gd);
+ blk_cleanup_queue(rq);
xlvbd_device_free(disk);
out:
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
new file mode 100644
index 0000000000..80b7ca0627
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
@@ -0,0 +1,3 @@
+
+obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o
+
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
new file mode 100644
index 0000000000..a9a00677bc
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
@@ -0,0 +1,87 @@
+/******************************************************************************
+ * blktap.c
+ *
+ * XenLinux virtual block-device tap.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Based on the original split block driver:
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * Note that unlike the split block driver code, this driver has been developed
+ * strictly for Linux 2.6
+ */
+
+#include "blktap.h"
+
+int __init xlblktap_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t fe_st;
+ blkif_be_driver_status_t be_st;
+
+ printk(KERN_INFO "Initialising Xen block tap device\n");
+
+ DPRINTK(" tap - Backend connection init:\n");
+
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ fe_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Frontend connection init:\n");
+
+ active_reqs_init();
+ blkif_interface_init();
+ blkdev_schedule_init();
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_be_driver_status_t);
+ be_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &be_st, sizeof(be_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Userland channel init:\n");
+
+ blktap_init();
+
+ DPRINTK("Blkif tap device initialized.\n");
+
+ return 0;
+}
+
+#if 0 /* tap doesn't handle suspend/resume */
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t st;
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+#endif
+
+__initcall(xlblktap_init);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
new file mode 100644
index 0000000000..dac4b0a676
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
@@ -0,0 +1,253 @@
+/*
+ * blktap.h
+ *
+ * Interfaces for the Xen block tap driver.
+ *
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ *
+ */
+
+#ifndef __BLKTAP_H__
+#define __BLKTAP_H__
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
+
+/* Used to signal to the backend that this is a tap domain. */
+#define BLKTAP_COOKIE 0xbeadfeed
+
+/* -------[ debug / pretty printing ]--------------------------------- */
+
+#define PRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#if 0
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
+
+/* -------[ state descriptors ]--------------------------------------- */
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+
+/* -------[ connection tracking ]------------------------------------- */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+extern spinlock_t blkif_io_lock;
+
+typedef struct blkif_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms information. */
+ blkif_back_ring_t blk_ring;
+
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ struct blkif_st *hash_next;
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+ atomic_t refcnt;
+ struct work_struct work;
+} blkif_t;
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_disconnect_complete(blkif_t *blkif);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ blkif_disconnect_complete(_b); \
+ } while (0)
+
+
+/* -------[ active request tracking ]--------------------------------- */
+
+typedef struct {
+ blkif_t *blkif;
+ unsigned long id;
+ int nr_pages;
+ unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int next_free;
+} active_req_t;
+
+typedef unsigned int ACTIVE_RING_IDX;
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
+
+extern inline unsigned int ID_TO_IDX(unsigned long id)
+{
+ return ( id & 0x0000ffff );
+}
+
+extern inline domid_t ID_TO_DOM(unsigned long id)
+{
+ return (id >> 16);
+}
+
+void active_reqs_init(void);
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=200 */
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_PRINT_IDXS 100
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+#define BLKTAP_MODE_COPY_FE 0x00000004
+#define BLKTAP_MODE_COPY_BE 0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+ (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+ (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+ );
+}
+
+
+
+/* -------[ Mappings to User VMA ]------------------------------------ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+extern struct vm_area_struct *blktap_vma;
+
+/* The following are from blkback.c and should probably be put in a
+ * header and included from there.
+ * The mmap area described here is where attached data pages eill be mapped.
+ */
+
+extern unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+
+#define RING_PAGES 3 /* Ctrl, Front, and Back */
+extern unsigned long rings_vstart;
+
+
+/* -------[ Here be globals ]----------------------------------------- */
+extern unsigned long blktap_mode;
+
+/* Connection to a single backend domain. */
+extern blkif_front_ring_t blktap_be_ring;
+extern unsigned int blktap_be_evtchn;
+extern unsigned int blktap_be_state;
+
+/* User ring status. */
+extern unsigned long blktap_ring_ok;
+
+/* -------[ ...and function prototypes. ]----------------------------- */
+
+/* init function for character device interface. */
+int blktap_init(void);
+
+/* init function for the blkif cache. */
+void __init blkif_interface_init(void);
+void __init blkdev_schedule_init(void);
+void blkif_deschedule(blkif_t *blkif);
+
+/* interfaces to the char driver, passing messages to and from apps. */
+void blktap_kick_user(void);
+
+/* user ring access functions: */
+int blktap_write_fe_ring(blkif_request_t *req);
+int blktap_write_be_ring(blkif_response_t *rsp);
+int blktap_write_ctrl_ring(ctrl_msg_t *msg);
+
+/* fe/be ring access functions: */
+int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
+int write_req_to_be_ring(blkif_request_t *req);
+
+/* event notification functions */
+void kick_fe_domain(blkif_t *blkif);
+void kick_be_domain(void);
+
+/* Interrupt handlers. */
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs);
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Control message receiver. */
+extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+/* debug */
+void print_fe_ring_idxs(void);
+void print_be_ring_idxs(void);
+
+#define __BLKINT_H__
+#endif
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
new file mode 100644
index 0000000000..e31fc8f6cd
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
@@ -0,0 +1,540 @@
+/******************************************************************************
+ * blktap_controlmsg.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interfaces to the frontend and backend drivers.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include "blktap.h"
+
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_CLOSED] = "closed",
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
+static unsigned blktap_be_irq;
+unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
+unsigned int blktap_be_evtchn;
+
+/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static kmem_cache_t *blkif_cachep;
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+static void __blkif_disconnect_complete(void *arg)
+{
+ blkif_t *blkif = (blkif_t *)arg;
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(blkif->evtchn);
+ vfree(blkif->blk_ring.sring);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void blkif_disconnect_complete(blkif_t *blkif)
+{
+ INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
+ schedule_work(&blkif->work);
+}
+
+void blkif_ptfe_create(blkif_be_create_t *create)
+{
+ blkif_t *blkif, **pblkif;
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+
+
+ /* May want to store info on the connecting domain here. */
+
+ DPRINTK("PT got BE_CREATE\n");
+
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+ {
+ WPRINTK("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ /* blkif struct init code from blkback.c */
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ WPRINTK("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
+{
+ /* Clear anything that we initialized above. */
+
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTK("PT got BE_DESTROY\n");
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ kmem_cache_free(blkif_cachep, blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+ blkif_sring_t *sring;
+
+ DPRINTK("PT got BE_CONNECT\n");
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ WPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ WPRINTK("BE_CONNECT: error! (%d)\n", error);
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT ) {
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ WPRINTK("BE_CONNECT: MAPPING error!\n");
+ }
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ sring = (blkif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->status = CONNECTED;
+ blkif_get(blkif);
+
+ request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
+
+ DPRINTK("PT got BE_DISCONNECT\n");
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ WPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, blkif);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ return 0; /* Caller should not send response message. */
+ }
+
+ disconnect->status = BLKIF_BE_STATUS_OKAY;
+ return 1;
+}
+
+/*-----[ Control Messages to/from Backend VM ]----------------------------*/
+
+/* Tell the controller to bring up the interface. */
+static void blkif_ptbe_send_interface_connect(void)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(blkif_fe_interface_connect_t),
+ };
+ blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+ msg->handle = 0;
+ msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_ptbe_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_ptbe_disconnect(void)
+{
+ blkif_sring_t *sring;
+
+ sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blktap_be_ring, sring, PAGE_SIZE);
+ blktap_be_state = BLKIF_STATE_DISCONNECTED;
+ DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
+ blkif_ptbe_send_interface_connect();
+}
+
+static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
+{
+ int err = 0;
+
+ blktap_be_evtchn = status->evtchn;
+ blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn);
+
+ err = request_irq(blktap_be_irq, blkif_ptbe_int,
+ SA_SAMPLE_RANDOM, "blkif", NULL);
+ if ( err ) {
+ WPRINTK("blkfront request_irq failed (%d)\n", err);
+ return;
+ } else {
+ /* transtion to connected in case we need to do a
+ a partion probe on a whole disk */
+ blktap_be_state = BLKIF_STATE_CONNECTED;
+ }
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
+ WPRINTK(" TAP: Unexpected blkif status %s in state %s\n",
+ blkif_status_name[status->status],
+ blkif_state_name[blktap_be_state]);
+}
+
+static void blkif_ptbe_status(
+ blkif_fe_interface_status_t *status)
+{
+ if ( status->handle != 0 )
+ {
+ DPRINTK("Status change on unsupported blkif %d\n",
+ status->handle);
+ return;
+ }
+
+ DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_CLOSED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_close();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ blkif_ptbe_disconnect();
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
+ unexpected(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ blkif_ptbe_disconnect();
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_connect(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CHANGED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ case BLKIF_STATE_DISCONNECTED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ /* vbd_update(); */
+ /* tap doesn't really get state changes... */
+ unexpected(status);
+ break;
+ }
+ break;
+
+ default:
+ DPRINTK("Status change to unknown value %d\n", status->status);
+ break;
+ }
+}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->type )
+ {
+ case CMSG_BLKIF_FE:
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS:
+ blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
+ break;
+
+ default:
+ goto parse_error;
+ }
+
+ break;
+
+ case CMSG_BLKIF_BE:
+
+ /* send a copy of the message to user if wanted */
+
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ blktap_write_ctrl_ring(msg);
+ blktap_kick_user();
+ }
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
+ msg->id) )
+ return;
+ break;
+
+ /* We just ignore anything to do with vbds for now. */
+
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ DPRINTK("PT got VBD_CREATE\n");
+ ((blkif_be_vbd_create_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ DPRINTK("PT got VBD_DESTROY\n");
+ ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ default:
+ goto parse_error;
+ }
+
+ break;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+/*-----[ Initialization ]-------------------------------------------------*/
+
+void __init blkif_interface_init(void)
+{
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
+ 0, 0, NULL, NULL);
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+
+ blktap_be_ring.sring = NULL;
+}
+
+
+
+/* Debug : print the current ring indices. */
+
+void print_fe_ring_idxs(void)
+{
+ int i;
+ blkif_t *blkif;
+
+ WPRINTK("FE Rings: \n---------\n");
+ for ( i = 0; i < BLKIF_HASHSZ; i++) {
+ blkif = blkif_hash[i];
+ while (blkif != NULL) {
+ if (blkif->status == DISCONNECTED) {
+ WPRINTK("(%2d,%2d) DISCONNECTED\n",
+ blkif->domid, blkif->handle);
+ } else if (blkif->status == DISCONNECTING) {
+ WPRINTK("(%2d,%2d) DISCONNECTING\n",
+ blkif->domid, blkif->handle);
+ } else if (blkif->blk_ring.sring == NULL) {
+ WPRINTK("(%2d,%2d) CONNECTED, but null sring!\n",
+ blkif->domid, blkif->handle);
+ } else {
+ blkif_get(blkif);
+ WPRINTK("(%2d,%2d): req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blkif->domid, blkif->handle,
+ blkif->blk_ring.req_cons,
+ blkif->blk_ring.rsp_prod_pvt,
+ blkif->blk_ring.sring->req_prod,
+ blkif->blk_ring.sring->rsp_prod);
+ blkif_put(blkif);
+ }
+ blkif = blkif->hash_next;
+ }
+ }
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
new file mode 100644
index 0000000000..56bf59d718
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
@@ -0,0 +1,451 @@
+/******************************************************************************
+ * blktap_datapath.c
+ *
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ * -- see full header in blktap.c
+ */
+
+#include "blktap.h"
+#include <asm-xen/evtchn.h>
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+
+active_req_t active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+
+inline active_req_t *get_active_req(void)
+{
+ ACTIVE_RING_IDX idx;
+ active_req_t *ar;
+ unsigned long flags;
+
+ ASSERT(active_cons != active_prod);
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+ ar = &active_reqs[idx];
+ spin_unlock_irqrestore(&active_req_lock, flags);
+
+ return ar;
+}
+
+inline void free_active_req(active_req_t *ar)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+ spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+ return &active_reqs[idx];
+}
+
+void active_reqs_init(void)
+{
+ ACTIVE_RING_IDX i;
+
+ active_cons = 0;
+ active_prod = MAX_ACTIVE_REQS;
+ memset(active_reqs, 0, sizeof(active_reqs));
+ for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+ active_req_ring[i] = i;
+}
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message. In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+ return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
+}
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+static void maybe_trigger_blktap_schedule(void);
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+ blkif_response_t *resp_d;
+ active_req_t *ar;
+
+ ar = &active_reqs[ID_TO_IDX(rsp->id)];
+ rsp->id = ar->id;
+
+ resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+ blkif->blk_ring.rsp_prod_pvt);
+ memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ blkif->blk_ring.rsp_prod_pvt++;
+
+ blkif_put(ar->blkif);
+ free_active_req(ar);
+
+ return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+ WPRINTK("Tap trying to access an unconnected backend!\n");
+ return 0;
+ }
+
+ req_d = RING_GET_REQUEST(&blktap_be_ring,
+ blktap_be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ blktap_be_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+void kick_fe_domain(blkif_t *blkif)
+{
+ RING_PUSH_RESPONSES(&blkif->blk_ring);
+ notify_via_evtchn(blkif->evtchn);
+ DPRINTK("notified FE(dom %u)\n", blkif->domid);
+
+ /* We just feed up a batch of request slots... */
+ maybe_trigger_blktap_schedule();
+
+}
+
+void kick_be_domain(void)
+{
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED )
+ return;
+
+ wmb(); /* Ensure that the frontend can see the requests. */
+ RING_PUSH_REQUESTS(&blktap_be_ring);
+ notify_via_evtchn(blktap_be_evtchn);
+ DPRINTK("notified BE\n");
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+ return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( __on_blkdev_list(blkif) )
+ {
+ list_del(&blkif->blkdev_list);
+ blkif->blkdev_list.next = NULL;
+ blkif_put(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ {
+ list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ blkif_get(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ blkif_t *blkif;
+ struct list_head *ent;
+
+ daemonize(
+ "xentapd"
+ );
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&blkio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
+ list_empty(&blkio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&blkio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+ !list_empty(&blkio_schedule_list) )
+ {
+ ent = blkio_schedule_list.next;
+ blkif = list_entry(ent, blkif_t, blkdev_list);
+ blkif_get(blkif);
+ remove_from_blkdev_list(blkif);
+ if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(blkif);
+ blkif_put(blkif);
+ }
+ }
+}
+
+static void maybe_trigger_blktap_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) &&
+ !list_empty(&blkio_schedule_list) )
+ wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+ remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+ spin_lock_init(&blkio_schedule_list_lock);
+ INIT_LIST_HEAD(&blkio_schedule_list);
+
+ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+}
+
+/*-----[ Interrupt entry from a frontend ]------ */
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ blkif_t *blkif = dev_id;
+
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blktap_schedule();
+ return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+ /* we have pending messages from the real frontend. */
+
+ blkif_request_t *req_s;
+ RING_IDX i, rp;
+ unsigned long flags;
+ active_req_t *ar;
+ int more_to_do = 0;
+ int notify_be = 0, notify_user = 0;
+
+ if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blkif->blk_ring.sring->req_prod;
+ rmb();
+
+ for ( i = blkif->blk_ring.req_cons;
+ (i != rp) &&
+ !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+ i++ )
+ {
+
+ if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+ /* This is a new request:
+ * Assign an active request record, and remap the id.
+ */
+ ar = get_active_req();
+ ar->id = req_s->id;
+ ar->nr_pages = req_s->nr_segments;
+ blkif_get(blkif);
+ ar->blkif = blkif;
+ req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+ /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+
+ /* FE -> BE interposition point is here. */
+
+ /* ------------------------------------------------------------- */
+ /* BLKIF_OP_PROBE_HACK: */
+ /* Signal to the backend that we are a tap domain. */
+
+ if (req_s->operation == BLKIF_OP_PROBE) {
+ DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+ req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+ }
+
+ /* ------------------------------------------------------------- */
+
+ /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ /* Copy the response message to UFERing */
+ /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+ /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("req->UFERing\n");
+ blktap_write_fe_ring(req_s);
+ notify_user = 1;
+ }
+
+ /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+
+ /* be included to prevent noise from the fe when its off */
+ /* copy the request message to the BERing */
+
+ DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+ (unsigned)blktap_be_ring.req_prod_pvt &
+ (RING_SIZE((&blktap_be_ring)-1)));
+
+ write_req_to_be_ring(req_s);
+ notify_be = 1;
+ }
+ }
+
+ blkif->blk_ring.req_cons = i;
+
+ /* unlock rings */
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ if (notify_user)
+ blktap_kick_user();
+ if (notify_be)
+ kick_be_domain();
+
+ return more_to_do;
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs)
+{
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ RING_IDX rp, i;
+ unsigned long flags;
+
+ DPRINTK("PT got BE interrupt.\n");
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blktap_be_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+
+ /* BE -> FE interposition point is here. */
+
+ blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+
+ /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+ /* Copy the response message to UBERing */
+ /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+ /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("rsp->UBERing\n");
+ blktap_write_be_ring(resp_s);
+ blktap_kick_user();
+
+ }
+
+ /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+
+ /* (fe included to prevent random interference from the BE) */
+ /* Copy the response message to FERing */
+
+ DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+ (unsigned)blkif->blk_ring.rsp_prod_pvt &
+ (RING_SIZE((&blkif->blk_ring)-1)));
+
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+
+ }
+ }
+
+ blktap_be_ring.rsp_cons = i;
+
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/* Debug : print the current ring indices. */
+
+void print_be_ring_idxs(void)
+{
+ if (blktap_be_ring.sring != NULL) {
+ WPRINTK("BE Ring: \n--------\n");
+ WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_be_ring.rsp_cons,
+ blktap_be_ring.req_prod_pvt,
+ blktap_be_ring.sring->req_prod,
+ blktap_be_ring.sring->rsp_prod);
+ }
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
new file mode 100644
index 0000000000..c01818b3d2
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
@@ -0,0 +1,478 @@
+/******************************************************************************
+ * blktap_userdev.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma = NULL;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+static blkif_back_ring_t blktap_ube_ring;
+static ctrl_front_ring_t blktap_uctrl_ring;
+
+/* local prototypes */
+static int blktap_read_fe_ring(void);
+static int blktap_read_be_ring(void);
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ /*
+ * if the page has not been mapped in by the driver then generate
+ * a SIGBUS to the domain.
+ */
+
+ force_sig(SIGBUS, current);
+
+ return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+ nopage: blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+ blkif_sring_t *sring;
+ ctrl_sring_t *csring;
+
+ if ( test_and_set_bit(0, &blktap_dev_inuse) )
+ return -EBUSY;
+
+ printk(KERN_ALERT "blktap open.\n");
+
+ /* Allocate the ctrl ring. */
+ csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (csring == NULL)
+ goto fail_nomem;
+
+ SetPageReserved(virt_to_page(csring));
+
+ SHARED_RING_INIT(csring);
+ FRONT_RING_INIT(&blktap_uctrl_ring, csring, PAGE_SIZE);
+
+ /* Allocate the fe ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_ctrl;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
+
+ /* Allocate the be ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_fe;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blktap_ube_ring, sring, PAGE_SIZE);
+
+ DPRINTK(KERN_ALERT "blktap open.\n");
+
+ return 0;
+
+ fail_free_ctrl:
+ free_page( (unsigned long) blktap_uctrl_ring.sring);
+
+ fail_free_fe:
+ free_page( (unsigned long) blktap_ufe_ring.sring);
+
+ fail_nomem:
+ return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+ blktap_dev_inuse = 0;
+ blktap_ring_ok = 0;
+
+ printk(KERN_ALERT "blktap closed.\n");
+
+ /* Free the ring page. */
+ ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
+ free_page((unsigned long) blktap_uctrl_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+ free_page((unsigned long) blktap_ufe_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+ free_page((unsigned long) blktap_ube_ring.sring);
+
+ /* Clear any active mappings. */
+ if (blktap_vma != NULL) {
+ zap_page_range(blktap_vma, blktap_vma->vm_start,
+ blktap_vma->vm_end - blktap_vma->vm_start, NULL);
+ blktap_vma = NULL;
+ }
+
+ return 0;
+}
+
+/* Note on mmap:
+ * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio
+ * work to do direct page access from userspace, this ended up being a
+ * problem. The bigger issue seems to be that there is no way to map
+ * a foreign page in to user space and have the virtual address of that
+ * page map sanely down to a mfn.
+ * Removing the VM_IO flag results in a loop in get_user_pages, as
+ * pfn_valid() always fails on a foreign page.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int size;
+
+ printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ vma->vm_start, vma->vm_end);
+
+ vma->vm_ops = &blktap_vm_ops;
+
+ size = vma->vm_end - vma->vm_start;
+ if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+ printk(KERN_INFO
+ "blktap: you _must_ map exactly %d pages!\n",
+ MMAP_PAGES + RING_PAGES);
+ return -EAGAIN;
+ }
+
+ size >>= PAGE_SHIFT;
+ printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+
+ rings_vstart = vma->vm_start;
+ mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+
+ /* Map the ring pages to the start of the region and reserve it. */
+
+ /* not sure if I really need to do this... */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start,
+ __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
+ }
+
+
+ DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE,
+ __pa(blktap_ube_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("be_ring: remap_pfn_range failure!\n");
+ }
+
+ DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ),
+ __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("fe_ring: remap_pfn_range failure!\n");
+ }
+
+ blktap_vma = vma;
+ blktap_ring_ok = 1;
+
+ return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+ case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+ return blktap_read_fe_ring();
+
+ case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+ return blktap_read_be_ring();
+
+ case BLKTAP_IOCTL_SETMODE:
+ if (BLKTAP_MODE_VALID(arg)) {
+ blktap_mode = arg;
+ /* XXX: may need to flush rings here. */
+ printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+ return 0;
+ }
+ case BLKTAP_IOCTL_PRINT_IDXS:
+ {
+ print_be_ring_idxs();
+ print_fe_ring_idxs();
+ WPRINTK("User Rings: \n-----------\n");
+ WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ufe_ring.rsp_cons,
+ blktap_ufe_ring.req_prod_pvt,
+ blktap_ufe_ring.sring->req_prod,
+ blktap_ufe_ring.sring->rsp_prod);
+ WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ube_ring.req_cons,
+ blktap_ube_ring.rsp_prod_pvt,
+ blktap_ube_ring.sring->req_prod,
+ blktap_ube_ring.sring->rsp_prod);
+
+ }
+ }
+ return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &blktap_wait, wait);
+
+ if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
+ RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ||
+ RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+
+ RING_PUSH_REQUESTS(&blktap_uctrl_ring);
+ RING_PUSH_REQUESTS(&blktap_ufe_ring);
+ RING_PUSH_RESPONSES(&blktap_ube_ring);
+ return POLLIN | POLLRDNORM;
+ }
+
+ return 0;
+}
+
+void blktap_kick_user(void)
+{
+ /* blktap_ring->req_prod = blktap_req_prod; */
+ wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+ owner: THIS_MODULE,
+ poll: blktap_poll,
+ ioctl: blktap_ioctl,
+ open: blktap_open,
+ release: blktap_release,
+ mmap: blktap_mmap,
+};
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+ blkif_request_t *target;
+ int error, i;
+
+ /*
+ * This is called to pass a request from the real frontend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: ufe_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( RING_FULL(&blktap_ufe_ring) ) {
+ PRINTK("blktap: fe_ring is full, can't add.\n");
+ return 0;
+ }
+
+ target = RING_GET_REQUEST(&blktap_ufe_ring,
+ blktap_ufe_ring.req_prod_pvt);
+ memcpy(target, req, sizeof(*req));
+
+ /* Attempt to map the foreign pages directly in to the application */
+ for (i=0; i<target->nr_segments; i++) {
+
+ error = direct_remap_area_pages(blktap_vma->vm_mm,
+ MMAP_VADDR(ID_TO_IDX(req->id), i),
+ target->frame_and_sects[i] & PAGE_MASK,
+ PAGE_SIZE,
+ blktap_vma->vm_page_prot,
+ ID_TO_DOM(req->id));
+ if ( error != 0 ) {
+ printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ /* the request is now dropped on the floor. */
+ return 0;
+ }
+ }
+
+ blktap_ufe_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *target;
+
+ /*
+ * This is called to pass a request from the real backend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_RESPONSE(&blktap_ube_ring,
+ blktap_ube_ring.rsp_prod_pvt);
+ memcpy(target, rsp, sizeof(*rsp));
+
+ /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+
+ blktap_ube_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+static int blktap_read_fe_ring(void)
+{
+ /* This is called to read responses from the UFE ring. */
+
+ RING_IDX i, rp;
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ active_req_t *ar;
+
+ DPRINTK("blktap_read_fe_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ufe_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+
+ DPRINTK("resp->fe_ring\n");
+ ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+ blkif = ar->blkif;
+ zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0),
+ ar->nr_pages << PAGE_SHIFT, NULL);
+ write_resp_to_fe_ring(blkif, resp_s);
+ blktap_ufe_ring.rsp_cons = i + 1;
+ kick_fe_domain(blkif);
+ }
+ }
+ return 0;
+}
+
+static int blktap_read_be_ring(void)
+{
+ /* This is called to read requests from the UBE ring. */
+
+ RING_IDX i, rp;
+ blkif_request_t *req_s;
+
+ DPRINTK("blktap_read_be_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ube_ring.sring->req_prod;
+ rmb();
+ for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+ {
+ req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
+
+ DPRINTK("req->be_ring\n");
+ write_req_to_be_ring(req_s);
+ kick_be_domain();
+ }
+
+ blktap_ube_ring.req_cons = i;
+ }
+
+ return 0;
+}
+
+int blktap_write_ctrl_ring(ctrl_msg_t *msg)
+{
+ ctrl_msg_t *target;
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_REQUEST(&blktap_uctrl_ring,
+ blktap_uctrl_ring.req_prod_pvt);
+ memcpy(target, msg, sizeof(*msg));
+
+ blktap_uctrl_ring.req_prod_pvt++;
+
+ /* currently treat the ring as unidirectional. */
+ blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
+
+ return 0;
+
+}
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+ .minor = BLKTAP_MINOR,
+ .name = "blktap",
+ .fops = &blktap_fops,
+ .devfs_name = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+ int err;
+
+ err = misc_register(&blktap_miscdev);
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+ return err;
+ }
+
+ init_waitqueue_head(&blktap_wait);
+
+
+ return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/console/console.c b/linux-2.6.11-xen-sparse/drivers/xen/console/console.c
index a524688ec3..142ca2baaa 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/console/console.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/console/console.c
@@ -64,15 +64,34 @@
* warnings from standard distro startup scripts.
*/
static enum { XC_OFF, XC_DEFAULT, XC_TTY, XC_SERIAL } xc_mode = XC_DEFAULT;
+static int xc_num = -1;
static int __init xencons_setup(char *str)
{
- if ( !strcmp(str, "tty") )
- xc_mode = XC_TTY;
- else if ( !strcmp(str, "ttyS") )
+ char *q;
+ int n;
+
+ if ( !strncmp(str, "ttyS", 4) )
xc_mode = XC_SERIAL;
- else if ( !strcmp(str, "off") )
+ else if ( !strncmp(str, "tty", 3) )
+ xc_mode = XC_TTY;
+ else if ( !strncmp(str, "off", 3) )
xc_mode = XC_OFF;
+
+ switch ( xc_mode )
+ {
+ case XC_SERIAL:
+ n = simple_strtol( str+4, &q, 10 );
+ if ( q > (str + 4) ) xc_num = n;
+ break;
+ case XC_TTY:
+ n = simple_strtol( str+3, &q, 10 );
+ if ( q > (str + 3) ) xc_num = n;
+ break;
+ default:
+ break;
+ }
+
return 1;
}
__setup("xencons=", xencons_setup);
@@ -141,16 +160,12 @@ static void kcons_write_dom0(
{
int rc;
- while ( count > 0 )
+ while ( (count > 0) &&
+ ((rc = HYPERVISOR_console_io(
+ CONSOLEIO_write, count, (char *)s)) > 0) )
{
- if ( (rc = HYPERVISOR_console_io(CONSOLEIO_write,
- count, (char *)s)) > 0 )
- {
- count -= rc;
- s += rc;
- }
- else
- break;
+ count -= rc;
+ s += rc;
}
}
@@ -187,8 +202,8 @@ void xen_console_init(void)
xc_mode = XC_SERIAL;
kcons_info.write = kcons_write_dom0;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- if ( xc_mode == XC_SERIAL )
- kcons_info.flags |= CON_ENABLED;
+ if ( xc_mode == XC_SERIAL )
+ kcons_info.flags |= CON_ENABLED;
#endif
}
else
@@ -198,17 +213,26 @@ void xen_console_init(void)
kcons_info.write = kcons_write;
}
- if ( xc_mode == XC_OFF )
- return __RETCODE;
-
- if ( xc_mode == XC_SERIAL )
+ switch ( xc_mode )
+ {
+ case XC_SERIAL:
strcpy(kcons_info.name, "ttyS");
- else
+ if ( xc_num == -1 ) xc_num = 0;
+ break;
+
+ case XC_TTY:
strcpy(kcons_info.name, "tty");
+ if ( xc_num == -1 ) xc_num = 1;
+ break;
+
+ default:
+ return __RETCODE;
+ }
wbuf = alloc_bootmem(wbuf_size);
register_console(&kcons_info);
+
return __RETCODE;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -247,7 +271,7 @@ void xencons_force_flush(void)
* We use dangerous control-interface functions that require a quiescent
* system and no interrupts. Try to ensure this with a global cli().
*/
- local_irq_disable(); /* XXXsmp */
+ local_irq_disable(); /* XXXsmp */
/* Spin until console data is flushed through to the domain controller. */
while ( (wc != wp) && !ctrl_if_transmitter_empty() )
@@ -488,8 +512,10 @@ static inline int __xencons_put_char(int ch)
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static int xencons_write(struct tty_struct *tty, const unsigned char *buf,
- int count)
+static int xencons_write(
+ struct tty_struct *tty,
+ const unsigned char *buf,
+ int count)
{
int i;
unsigned long flags;
@@ -511,8 +537,11 @@ static int xencons_write(struct tty_struct *tty, const unsigned char *buf,
return i;
}
#else
-static int xencons_write(struct tty_struct *tty, int from_user,
- const u_char *buf, int count)
+static int xencons_write(
+ struct tty_struct *tty,
+ int from_user,
+ const u_char *buf,
+ int count)
{
int i;
unsigned long flags;
@@ -655,7 +684,7 @@ static int xennullcon_dummy(void)
return 0;
}
-#define DUMMY (void *)xennullcon_dummy
+#define DUMMY (void *)xennullcon_dummy
/*
* The console `switch' structure for the dummy console
@@ -718,14 +747,14 @@ static int __init xencons_init(void)
if ( xc_mode == XC_SERIAL )
{
DRV(xencons_driver)->name = "ttyS";
- DRV(xencons_driver)->minor_start = 64;
- DRV(xencons_driver)->name_base = 0;
+ DRV(xencons_driver)->minor_start = 64 + xc_num;
+ DRV(xencons_driver)->name_base = 0 + xc_num;
}
else
{
DRV(xencons_driver)->name = "tty";
- DRV(xencons_driver)->minor_start = 1;
- DRV(xencons_driver)->name_base = 1;
+ DRV(xencons_driver)->minor_start = xc_num;
+ DRV(xencons_driver)->name_base = xc_num;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -772,8 +801,9 @@ static int __init xencons_init(void)
(void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
}
- printk("Xen virtual console successfully installed as %s\n",
- DRV(xencons_driver)->name);
+ printk("Xen virtual console successfully installed as %s%d\n",
+ DRV(xencons_driver)->name,
+ DRV(xencons_driver)->name_base );
return 0;
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c b/linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c
index 97d229001d..f5da4283d1 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c
@@ -44,6 +44,7 @@
#include <linux/poll.h>
#include <linux/irq.h>
#include <linux/init.h>
+#define XEN_EVTCHN_MASK_OPS
#include <asm-xen/evtchn.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
@@ -60,8 +61,8 @@ static devfs_handle_t xen_dev_dir;
struct per_user_data {
/* Notification ring, accessed via /dev/xen/evtchn. */
-# define RING_SIZE 2048 /* 2048 16-bit entries */
-# define RING_MASK(_i) ((_i)&(RING_SIZE-1))
+# define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */
+# define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
u16 *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
@@ -85,9 +86,9 @@ void evtchn_device_upcall(int port)
if ( (u = port_user[port]) != NULL )
{
- if ( (u->ring_prod - u->ring_cons) < RING_SIZE )
+ if ( (u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE )
{
- u->ring[RING_MASK(u->ring_prod)] = (u16)port;
+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
if ( u->ring_cons == u->ring_prod++ )
{
wake_up_interruptible(&u->evtchn_wait);
@@ -153,10 +154,10 @@ static ssize_t evtchn_read(struct file *file, char *buf,
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
- if ( ((c ^ p) & RING_SIZE) != 0 )
+ if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 )
{
- bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(u16);
- bytes2 = RING_MASK(p) * sizeof(u16);
+ bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(u16);
+ bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
}
else
{
@@ -175,7 +176,7 @@ static ssize_t evtchn_read(struct file *file, char *buf,
bytes2 = count - bytes1;
}
- if ( copy_to_user(buf, &u->ring[RING_MASK(c)], bytes1) ||
+ if ( copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2)) )
{
rc = -EFAULT;
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile
index 3279442145..5085bf034d 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile
@@ -1,2 +1,2 @@
-obj-y := netback.o control.o interface.o
+obj-y := netback.o control.o interface.o loopback.o
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
index 0831cbe311..dfb750ee36 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h
@@ -35,6 +35,8 @@ typedef struct netif_st {
domid_t domid;
unsigned int handle;
+ u8 fe_dev_addr[6];
+
/* Physical parameters of the comms window. */
unsigned long tx_shmem_frame;
unsigned long rx_shmem_frame;
@@ -76,6 +78,7 @@ typedef struct netif_st {
void netif_create(netif_be_create_t *create);
void netif_destroy(netif_be_destroy_t *destroy);
+void netif_creditlimit(netif_be_creditlimit_t *creditlimit);
void netif_connect(netif_be_connect_t *connect);
int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id);
void netif_disconnect_complete(netif_t *netif);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
index 6ca0ba2c75..9392d5a3d2 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c
@@ -10,41 +10,34 @@
static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
{
+ DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype);
+
switch ( msg->subtype )
{
case CMSG_NETIF_BE_CREATE:
- if ( msg->length != sizeof(netif_be_create_t) )
- goto parse_error;
netif_create((netif_be_create_t *)&msg->msg[0]);
break;
case CMSG_NETIF_BE_DESTROY:
- if ( msg->length != sizeof(netif_be_destroy_t) )
- goto parse_error;
netif_destroy((netif_be_destroy_t *)&msg->msg[0]);
- break;
+ break;
+ case CMSG_NETIF_BE_CREDITLIMIT:
+ netif_creditlimit((netif_be_creditlimit_t *)&msg->msg[0]);
+ break;
case CMSG_NETIF_BE_CONNECT:
- if ( msg->length != sizeof(netif_be_connect_t) )
- goto parse_error;
netif_connect((netif_be_connect_t *)&msg->msg[0]);
- break;
+ break;
case CMSG_NETIF_BE_DISCONNECT:
- if ( msg->length != sizeof(netif_be_disconnect_t) )
- goto parse_error;
if ( !netif_disconnect((netif_be_disconnect_t *)&msg->msg[0],msg->id) )
return; /* Sending the response is deferred until later. */
break;
default:
- goto parse_error;
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ break;
}
ctrl_if_send_response(msg);
- return;
-
- parse_error:
- DPRINTK("Parse error while reading message subtype %d, len %d\n",
- msg->subtype, msg->length);
- msg->length = 0;
- ctrl_if_send_response(msg);
}
void netif_ctrlif_init(void)
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c
index 1d1a5923c4..98c6cfa98b 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c
@@ -3,7 +3,7 @@
*
* Network-device interface management.
*
- * Copyright (c) 2004, Keir Fraser
+ * Copyright (c) 2004-2005, Keir Fraser
*/
#include "common.h"
@@ -140,7 +140,7 @@ void netif_create(netif_be_create_t *create)
netif->credit_bytes = netif->remaining_credit = ~0UL;
netif->credit_usec = 0UL;
- /*init_ac_timer(&new_vif->credit_timeout);*/
+ init_timer(&netif->credit_timeout);
pnetif = &netif_hash[NETIF_HASH(domid, handle)];
while ( *pnetif != NULL )
@@ -159,17 +159,29 @@ void netif_create(netif_be_create_t *create)
dev->get_stats = netif_be_get_stats;
dev->open = net_open;
dev->stop = net_close;
+ dev->features = NETIF_F_NO_CSUM;
/* Disable queuing. */
dev->tx_queue_len = 0;
- /*
- * Initialise a dummy MAC address. We choose the numerically largest
- * non-broadcast address to prevent the address getting stolen by an
- * Ethernet bridge for STP purposes. (FE:FF:FF:FF:FF:FF)
- */
- memset(dev->dev_addr, 0xFF, ETH_ALEN);
- dev->dev_addr[0] &= ~0x01;
+ if ( (create->be_mac[0] == 0) && (create->be_mac[1] == 0) &&
+ (create->be_mac[2] == 0) && (create->be_mac[3] == 0) &&
+ (create->be_mac[4] == 0) && (create->be_mac[5] == 0) )
+ {
+ /*
+ * Initialise a dummy MAC address. We choose the numerically largest
+ * non-broadcast address to prevent the address getting stolen by an
+ * Ethernet bridge for STP purposes. (FE:FF:FF:FF:FF:FF)
+ */
+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
+ dev->dev_addr[0] &= ~0x01;
+ }
+ else
+ {
+ memcpy(dev->dev_addr, create->be_mac, ETH_ALEN);
+ }
+
+ memcpy(netif->fe_dev_addr, create->mac, ETH_ALEN);
rtnl_lock();
err = register_netdevice(dev);
@@ -223,6 +235,38 @@ void netif_destroy(netif_be_destroy_t *destroy)
destroy->status = NETIF_BE_STATUS_OKAY;
}
+void netif_creditlimit(netif_be_creditlimit_t *creditlimit)
+{
+ domid_t domid = creditlimit->domid;
+ unsigned int handle = creditlimit->netif_handle;
+ netif_t *netif;
+
+ netif = netif_find_by_handle(domid, handle);
+ if ( unlikely(netif == NULL) )
+ {
+ DPRINTK("netif_creditlimit attempted for non-existent netif"
+ " (%u,%u)\n", creditlimit->domid, creditlimit->netif_handle);
+ creditlimit->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ /* Set the credit limit (reset remaining credit to new limit). */
+ netif->credit_bytes = netif->remaining_credit = creditlimit->credit_bytes;
+ netif->credit_usec = creditlimit->period_usec;
+
+ if ( netif->status == CONNECTED )
+ {
+ /*
+ * Schedule work so that any packets waiting under previous credit
+ * limit are dealt with (acts like a replenishment point).
+ */
+ netif->credit_timeout.expires = jiffies;
+ netif_schedule_work(netif);
+ }
+
+ creditlimit->status = NETIF_BE_STATUS_OKAY;
+}
+
void netif_connect(netif_be_connect_t *connect)
{
domid_t domid = connect->domid;
@@ -234,9 +278,6 @@ void netif_connect(netif_be_connect_t *connect)
pgprot_t prot;
int error;
netif_t *netif;
-#if 0
- struct net_device *eth0_dev;
-#endif
netif = netif_find_by_handle(domid, handle);
if ( unlikely(netif == NULL) )
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/loopback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/loopback.c
index ebada3721a..fa19e600c2 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/loopback.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/loopback.c
@@ -67,6 +67,17 @@ static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
np->stats.rx_bytes += skb->len;
np->stats.rx_packets++;
+ if ( skb->ip_summed == CHECKSUM_HW )
+ {
+ /* Defer checksum calculation. */
+ skb->proto_csum_blank = 1;
+ /* Must be a local packet: assert its integrity. */
+ skb->proto_csum_valid = 1;
+ }
+
+ skb->ip_summed = skb->proto_csum_valid ?
+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
+
skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
skb->protocol = eth_type_trans(skb, dev);
skb->dev = dev;
@@ -95,6 +106,8 @@ static void loopback_construct(struct net_device *dev, struct net_device *lo)
dev->tx_queue_len = 0;
+ dev->features = NETIF_F_HIGHDMA | NETIF_F_LLTX;
+
/*
* We do not set a jumbo MTU on the interface. Otherwise the network
* stack will try to send large packets that will get dropped by the
@@ -118,6 +131,9 @@ static int __init loopback_init(void)
loopback_construct(dev1, dev2);
loopback_construct(dev2, dev1);
+ dev1->features |= NETIF_F_NO_CSUM;
+ dev2->features |= NETIF_F_IP_CSUM;
+
/*
* Initialise a dummy MAC address for the 'dummy backend' interface. We
* choose the numerically largest non-broadcast address to prevent the
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
index 6d29cc262f..02f0c2fc51 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
@@ -7,11 +7,16 @@
* reference front-end implementation can be found in:
* drivers/xen/netfront/netfront.c
*
- * Copyright (c) 2002-2004, K A Fraser
+ * Copyright (c) 2002-2005, K A Fraser
*/
#include "common.h"
#include <asm-xen/balloon.h>
+#include <asm-xen/evtchn.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#include <linux/delay.h>
+#endif
static void netif_idx_release(u16 pending_idx);
static void netif_page_release(struct page *page);
@@ -22,7 +27,8 @@ static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
memory_t addr,
- u16 size);
+ u16 size,
+ u16 csum_valid);
static void net_tx_action(unsigned long unused);
static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
@@ -33,8 +39,9 @@ static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
static struct timer_list net_timer;
static struct sk_buff_head rx_queue;
-static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
-static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
+static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
+static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
static unsigned char rx_notify[NR_EVENT_CHANNELS];
/* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -146,8 +153,10 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
goto drop;
skb_reserve(nskb, hlen);
__skb_put(nskb, skb->len);
- (void)skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen);
+ if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
+ BUG();
nskb->dev = skb->dev;
+ nskb->proto_csum_valid = skb->proto_csum_valid;
dev_kfree_skb(skb);
skb = nskb;
}
@@ -190,8 +199,9 @@ static void net_rx_action(unsigned long unused)
netif_t *netif;
s8 status;
u16 size, id, evtchn;
- mmu_update_t *mmu;
multicall_entry_t *mcl;
+ mmu_update_t *mmu;
+ struct mmuext_op *mmuext;
unsigned long vdata, mdata, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
@@ -202,6 +212,7 @@ static void net_rx_action(unsigned long unused)
mcl = rx_mcl;
mmu = rx_mmu;
+ mmuext = rx_mmuext;
while ( (skb = skb_dequeue(&rx_queue)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -224,25 +235,26 @@ static void net_rx_action(unsigned long unused)
*/
phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
- mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- mmu[0].val = __pa(vdata) >> PAGE_SHIFT;
- mmu[1].ptr = MMU_EXTENDED_COMMAND;
- mmu[1].val = MMUEXT_SET_FOREIGNDOM;
- mmu[1].val |= (unsigned long)netif->domid << 16;
- mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
- mmu[2].val = MMUEXT_REASSIGN_PAGE;
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = vdata;
+ mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+ mcl->args[2] = 0;
+ mcl++;
- mcl[0].op = __HYPERVISOR_update_va_mapping;
- mcl[0].args[0] = vdata >> PAGE_SHIFT;
- mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
- mcl[0].args[2] = 0;
- mcl[1].op = __HYPERVISOR_mmu_update;
- mcl[1].args[0] = (unsigned long)mmu;
- mcl[1].args[1] = 3;
- mcl[1].args[2] = 0;
+ mcl->op = __HYPERVISOR_mmuext_op;
+ mcl->args[0] = (unsigned long)mmuext;
+ mcl->args[1] = 1;
+ mcl->args[2] = 0;
+ mcl->args[3] = netif->domid;
+ mcl++;
- mcl += 2;
- mmu += 3;
+ mmuext->cmd = MMUEXT_REASSIGN_PAGE;
+ mmuext->mfn = mdata >> PAGE_SHIFT;
+ mmuext++;
+
+ mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ mmu->val = __pa(vdata) >> PAGE_SHIFT;
+ mmu++;
__skb_queue_tail(&rxq, skb);
@@ -254,12 +266,19 @@ static void net_rx_action(unsigned long unused)
if ( mcl == rx_mcl )
return;
- mcl[-2].args[2] = UVMF_FLUSH_TLB;
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = mmu - rx_mmu;
+ mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
+ mcl++;
+
+ mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
BUG();
mcl = rx_mcl;
- mmu = rx_mmu;
+ mmuext = rx_mmuext;
while ( (skb = __skb_dequeue(&rxq)) != NULL )
{
netif = netdev_priv(skb->dev);
@@ -267,7 +286,7 @@ static void net_rx_action(unsigned long unused)
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
- mdata = ((mmu[2].ptr & PAGE_MASK) |
+ mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
((unsigned long)skb->data & ~PAGE_MASK));
atomic_set(&(skb_shinfo(skb)->dataref), 1);
@@ -278,12 +297,12 @@ static void net_rx_action(unsigned long unused)
netif->stats.tx_packets++;
/* The update_va_mapping() must not fail. */
- if ( unlikely(mcl[0].args[5] != 0) )
+ if ( unlikely(mcl[0].result != 0) )
BUG();
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
- if ( unlikely(mcl[1].args[5] != 0) )
+ if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
free_mfn(mdata >> PAGE_SHIFT);
@@ -292,7 +311,8 @@ static void net_rx_action(unsigned long unused)
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
- if ( make_rx_response(netif, id, status, mdata, size) &&
+ if ( make_rx_response(netif, id, status, mdata,
+ size, skb->proto_csum_valid) &&
(rx_notify[evtchn] == 0) )
{
rx_notify[evtchn] = 1;
@@ -303,7 +323,7 @@ static void net_rx_action(unsigned long unused)
dev_kfree_skb(skb);
mcl += 2;
- mmu += 3;
+ mmuext += 1;
}
while ( notify_nr != 0 )
@@ -379,14 +399,13 @@ void netif_deschedule_work(netif_t *netif)
remove_from_net_schedule_list(netif);
}
-#if 0
+
static void tx_credit_callback(unsigned long data)
{
netif_t *netif = (netif_t *)data;
netif->remaining_credit = netif->credit_bytes;
netif_schedule_work(netif);
}
-#endif
static void net_tx_action(unsigned long unused)
{
@@ -408,13 +427,13 @@ static void net_tx_action(unsigned long unused)
{
pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
mcl[0].op = __HYPERVISOR_update_va_mapping;
- mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT;
+ mcl[0].args[0] = MMAP_VADDR(pending_idx);
mcl[0].args[1] = 0;
mcl[0].args[2] = 0;
mcl++;
}
- mcl[-1].args[2] = UVMF_FLUSH_TLB;
+ mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
BUG();
@@ -422,7 +441,7 @@ static void net_tx_action(unsigned long unused)
while ( dealloc_cons != dp )
{
/* The update_va_mapping() must not fail. */
- if ( unlikely(mcl[0].args[5] != 0) )
+ if ( unlikely(mcl[0].result != 0) )
BUG();
pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
@@ -470,42 +489,52 @@ static void net_tx_action(unsigned long unused)
continue;
}
- netif->tx->req_cons = ++netif->tx_req_cons;
-
- /*
- * 1. Ensure that we see the request when we copy it.
- * 2. Ensure that frontend sees updated req_cons before we check
- * for more work to schedule.
- */
- mb();
-
+ rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
sizeof(txreq));
-#if 0
/* Credit-based scheduling. */
- if ( tx.size > netif->remaining_credit )
+ if ( txreq.size > netif->remaining_credit )
{
- s_time_t now = NOW(), next_credit =
- netif->credit_timeout.expires + MICROSECS(netif->credit_usec);
- if ( next_credit <= now )
+ unsigned long now = jiffies;
+ unsigned long next_credit =
+ netif->credit_timeout.expires +
+ msecs_to_jiffies(netif->credit_usec / 1000);
+
+ /* Timer could already be pending in some rare cases. */
+ if ( timer_pending(&netif->credit_timeout) )
+ break;
+
+ /* Already passed the point at which we can replenish credit? */
+ if ( time_after_eq(now, next_credit) )
{
netif->credit_timeout.expires = now;
netif->remaining_credit = netif->credit_bytes;
}
- else
+
+ /* Still too big to send right now? Then set a timer callback. */
+ if ( txreq.size > netif->remaining_credit )
{
netif->remaining_credit = 0;
netif->credit_timeout.expires = next_credit;
netif->credit_timeout.data = (unsigned long)netif;
netif->credit_timeout.function = tx_credit_callback;
- netif->credit_timeout.cpu = smp_processor_id();
- add_ac_timer(&netif->credit_timeout);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ add_timer_on(&netif->credit_timeout, smp_processor_id());
+#else
+ add_timer(&netif->credit_timeout);
+#endif
break;
}
}
- netif->remaining_credit -= tx.size;
-#endif
+ netif->remaining_credit -= txreq.size;
+
+ /*
+ * Why the barrier? It ensures that the frontend sees updated req_cons
+ * before we check for more work to schedule.
+ */
+ netif->tx->req_cons = ++netif->tx_req_cons;
+ mb();
netif_schedule_work(netif);
@@ -545,7 +574,7 @@ static void net_tx_action(unsigned long unused)
skb_reserve(skb, 16);
mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT;
+ mcl[0].args[0] = MMAP_VADDR(pending_idx);
mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
mcl[0].args[2] = 0;
mcl[0].args[3] = netif->domid;
@@ -578,7 +607,7 @@ static void net_tx_action(unsigned long unused)
memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
/* Check the remap error code. */
- if ( unlikely(mcl[0].args[5] != 0) )
+ if ( unlikely(mcl[0].result != 0) )
{
DPRINTK("Bad page frame\n");
make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
@@ -621,6 +650,11 @@ static void net_tx_action(unsigned long unused)
skb->dev = netif->dev;
skb->protocol = eth_type_trans(skb, skb->dev);
+ /* No checking needed on localhost, but remember the field is blank. */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->proto_csum_valid = 1;
+ skb->proto_csum_blank = txreq.csum_blank;
+
netif->stats.rx_bytes += txreq.size;
netif->stats.rx_packets++;
@@ -686,15 +720,17 @@ static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
memory_t addr,
- u16 size)
+ u16 size,
+ u16 csum_valid)
{
NETIF_RING_IDX i = netif->rx_resp_prod;
netif_rx_response_t *resp;
resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
- resp->addr = addr;
- resp->id = id;
- resp->status = (s16)size;
+ resp->addr = addr;
+ resp->csum_valid = csum_valid;
+ resp->id = id;
+ resp->status = (s16)size;
if ( st < 0 )
resp->status = (s16)st;
wmb();
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
index 55d2670944..c1cf253510 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
@@ -40,6 +40,7 @@
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/proc_fs.h>
+#include <linux/ethtool.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
@@ -51,6 +52,7 @@
#include <asm-xen/xen-public/io/netif.h>
#include <asm-xen/balloon.h>
#include <asm/page.h>
+#include <asm/uaccess.h>
#ifndef __GFP_NOWARN
#define __GFP_NOWARN 0
@@ -394,19 +396,13 @@ static void network_alloc_rx_buffers(struct net_device *dev)
= INVALID_P2M_ENTRY;
rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
- rx_mcl[i].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ rx_mcl[i].args[0] = (unsigned long)skb->head;
rx_mcl[i].args[1] = 0;
rx_mcl[i].args[2] = 0;
}
- /*
- * We may have allocated buffers which have entries outstanding in the page
- * update queue -- make sure we flush those first!
- */
- flush_page_update_queue();
-
/* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+ rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
/* Give away a batch of pages. */
rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
@@ -423,7 +419,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
(void)HYPERVISOR_multicall(rx_mcl, i+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
- if (unlikely(rx_mcl[i].args[5] != i))
+ if (unlikely(rx_mcl[i].result != i))
panic("Unable to reduce memory reservation\n");
/* Above is a suitable barrier to ensure backend will see requests. */
@@ -478,6 +474,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx->id = id;
tx->addr = virt_to_machine(skb->data);
tx->size = skb->len;
+ tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
wmb(); /* Ensure that backend will see the request. */
np->tx->req_prod = i + 1;
@@ -578,6 +575,9 @@ static int netif_poll(struct net_device *dev, int *pbudget)
skb->len = rx->status;
skb->tail = skb->data + skb->len;
+ if ( rx->csum_valid )
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
np->stats.rx_packets++;
np->stats.rx_bytes += rx->status;
@@ -586,7 +586,7 @@ static int netif_poll(struct net_device *dev, int *pbudget)
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ mcl->args[0] = (unsigned long)skb->head;
mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
mcl->args[2] = 0;
mcl++;
@@ -606,6 +606,7 @@ static int netif_poll(struct net_device *dev, int *pbudget)
mcl->args[0] = (unsigned long)rx_mmu;
mcl->args[1] = mmu - rx_mmu;
mcl->args[2] = 0;
+ mcl->args[3] = DOMID_SELF;
mcl++;
(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
}
@@ -928,6 +929,11 @@ vif_connect(struct net_private *np, netif_fe_interface_status_t *status)
vif_show(np);
}
+static struct ethtool_ops network_ethtool_ops =
+{
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+};
/** Create a network device.
* @param handle device handle
@@ -971,7 +977,10 @@ static int create_netdev(int handle, struct net_device **val)
dev->get_stats = network_get_stats;
dev->poll = netif_poll;
dev->weight = 64;
-
+ dev->features = NETIF_F_IP_CSUM;
+
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
if ((err = register_netdev(dev)) != 0) {
printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
goto exit;
@@ -1122,18 +1131,13 @@ static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
switch (msg->subtype) {
case CMSG_NETIF_FE_INTERFACE_STATUS:
- if (msg->length != sizeof(netif_fe_interface_status_t))
- goto error;
netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]);
break;
case CMSG_NETIF_FE_DRIVER_STATUS:
- if (msg->length != sizeof(netif_fe_driver_status_t))
- goto error;
netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]);
break;
- error:
default:
msg->length = 0;
break;
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
index 98e7e92ff4..2f9d5fde38 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
@@ -28,6 +28,11 @@
#include <asm-xen/xen-public/dom0_ops.h>
#include <asm-xen/xen_proc.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#define pud_t pgd_t
+#define pud_offset(d, va) d
+#endif
+
static struct proc_dir_entry *privcmd_intf;
static int privcmd_ioctl(struct inode *inode, struct file *file,
@@ -44,6 +49,7 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
return -EFAULT;
+#if defined(__i386__)
__asm__ __volatile__ (
"pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
"movl 4(%%eax),%%ebx ;"
@@ -55,7 +61,18 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
TRAP_INSTR "; "
"popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
: "=a" (ret) : "0" (&hypercall) : "memory" );
-
+#elif defined (__x86_64__)
+ __asm__ __volatile__ (
+ "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
+ : "=a" (ret)
+ : "a" ((unsigned long)hypercall.op),
+ "D" ((unsigned long)hypercall.arg[0]),
+ "S" ((unsigned long)hypercall.arg[1]),
+ "d" ((unsigned long)hypercall.arg[2]),
+ "g" ((unsigned long)hypercall.arg[3]),
+ "g" ((unsigned long)hypercall.arg[4])
+ : "r11","rcx","r8","r10","memory");
+#endif
}
break;
@@ -83,6 +100,8 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
{
int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
+
+
if ( copy_from_user(&msg, p, n*sizeof(privcmd_mmap_entry_t)) )
return -EFAULT;
@@ -115,8 +134,7 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
case IOCTL_PRIVCMD_MMAPBATCH:
{
-#define MAX_DIRECTMAP_MMU_QUEUE 130
- mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
+ mmu_update_t u;
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
unsigned long *p, addr;
@@ -137,11 +155,6 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
if ( (m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end )
{ ret = -EFAULT; goto batch_err; }
- u[0].ptr = MMU_EXTENDED_COMMAND;
- u[0].val = MMUEXT_SET_FOREIGNDOM;
- u[0].val |= (unsigned long)m.dom << 16;
- v = w = &u[1];
-
p = m.arr;
addr = m.addr;
for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ )
@@ -149,24 +162,24 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
if ( get_user(mfn, p) )
return -EFAULT;
- v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
+ u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
__direct_remap_area_pages(vma->vm_mm,
addr,
PAGE_SIZE,
- v);
-
- if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) )
- put_user( 0xF0000000 | mfn, p );
+ &u);
- v = w;
+ if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
+ put_user(0xF0000000 | mfn, p);
}
+
ret = 0;
break;
batch_err:
printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n",
- ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end);
+ ret, vma, m.addr, m.num, m.arr,
+ vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
break;
}
break;
@@ -174,13 +187,12 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN:
{
- unsigned long m2p_start_mfn =
- HYPERVISOR_shared_info->arch.mfn_to_pfn_start;
-
- if( put_user( m2p_start_mfn, (unsigned long *) data ) )
- ret = -EFAULT;
- else
- ret = 0;
+ unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
+ pgd_t *pgd = pgd_offset_k(m2pv);
+ pud_t *pud = pud_offset(pgd, m2pv);
+ pmd_t *pmd = pmd_offset(pud, m2pv);
+ unsigned long m2p_start_mfn = (*(unsigned long *)pmd) >> PAGE_SHIFT;
+ ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
}
break;
@@ -207,9 +219,6 @@ static struct file_operations privcmd_file_ops = {
static int __init privcmd_init(void)
{
- if ( !(xen_start_info.flags & SIF_PRIVILEGED) )
- return 0;
-
privcmd_intf = create_xen_proc_entry("privcmd", 0400);
if ( privcmd_intf != NULL )
privcmd_intf->proc_fops = &privcmd_file_ops;
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
new file mode 100644
index 0000000000..bcab2041bc
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
@@ -0,0 +1,85 @@
+
+#ifndef __USBIF__BACKEND__COMMON_H__
+#define __USBIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/hypervisor.h>
+
+#include <asm-xen/xen-public/io/usbif.h>
+
+#if 0
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct usbif_priv_st usbif_priv_t;
+
+struct usbif_priv_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms Information */
+ usbif_back_ring_t usb_ring;
+ /* Private fields. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ usbif_priv_t *hash_next;
+ struct list_head usbif_list;
+ spinlock_t usb_ring_lock;
+ atomic_t refcnt;
+
+ struct work_struct work;
+};
+
+void usbif_create(usbif_be_create_t *create);
+void usbif_destroy(usbif_be_destroy_t *destroy);
+void usbif_connect(usbif_be_connect_t *connect);
+int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
+void usbif_disconnect_complete(usbif_priv_t *up);
+
+void usbif_release_port(usbif_be_release_port_t *msg);
+int usbif_claim_port(usbif_be_claim_port_t *msg);
+void usbif_release_ports(usbif_priv_t *up);
+
+usbif_priv_t *usbif_find(domid_t domid);
+#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define usbif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ usbif_disconnect_complete(_b); \
+ } while (0)
+
+
+void usbif_interface_init(void);
+void usbif_ctrlif_init(void);
+
+void usbif_deschedule(usbif_priv_t *up);
+void remove_from_usbif_list(usbif_priv_t *up);
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __USBIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
new file mode 100644
index 0000000000..b46b16d8bc
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/control.c
+ *
+ * Routines for interfacing with the control plane.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
+
+ switch ( msg->subtype )
+ {
+ case CMSG_USBIF_BE_CREATE:
+ usbif_create((usbif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_DESTROY:
+ usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_CONNECT:
+ usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_DISCONNECT:
+ if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
+ break;
+ case CMSG_USBIF_BE_CLAIM_PORT:
+ usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_RELEASE_PORT:
+ usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
+ break;
+ default:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ break;
+ }
+
+ ctrl_if_send_response(msg);
+}
+
+void usbif_ctrlif_init(void)
+{
+ ctrl_msg_t cmsg;
+ usbif_be_driver_status_changed_t st;
+
+ (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_USBIF_BE;
+ cmsg.subtype = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(usbif_be_driver_status_changed_t);
+ st.status = USBIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
new file mode 100644
index 0000000000..c1a16e8000
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
@@ -0,0 +1,252 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/interface.c
+ *
+ * USB device interface management.
+ *
+ * by Mark Williamson, Copyright (c) 2004
+ */
+
+
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ *
+ * Block-device interface management.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#define USBIF_HASHSZ 1024
+#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
+
+static kmem_cache_t *usbif_priv_cachep;
+static usbif_priv_t *usbif_priv_hash[USBIF_HASHSZ];
+
+usbif_priv_t *usbif_find(domid_t domid)
+{
+ usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
+ while ( (up != NULL ) && ( up->domid != domid ) )
+ up = up->hash_next;
+ return up;
+}
+
+static void __usbif_disconnect_complete(void *arg)
+{
+ usbif_priv_t *usbif = (usbif_priv_t *)arg;
+ ctrl_msg_t cmsg;
+ usbif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in usbif_disconnect() because at that point there
+ * may be outstanding requests at the device whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(usbif->evtchn);
+ vfree(usbif->usb_ring.sring);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_USBIF_BE;
+ cmsg.subtype = CMSG_USBIF_BE_DISCONNECT;
+ cmsg.id = usbif->disconnect_rspid;
+ cmsg.length = sizeof(usbif_be_disconnect_t);
+ disc.domid = usbif->domid;
+ disc.status = USBIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'usbif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( usbif->status != DISCONNECTING )
+ BUG();
+ usbif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void usbif_disconnect_complete(usbif_priv_t *up)
+{
+ INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
+ schedule_work(&up->work);
+}
+
+void usbif_create(usbif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ usbif_priv_t **pup, *up;
+
+ if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
+ {
+ DPRINTK("Could not create usbif: out of memory\n");
+ create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ memset(up, 0, sizeof(*up));
+ up->domid = domid;
+ up->status = DISCONNECTED;
+ spin_lock_init(&up->usb_ring_lock);
+ atomic_set(&up->refcnt, 0);
+
+ pup = &usbif_priv_hash[USBIF_HASH(domid)];
+ while ( *pup != NULL )
+ {
+ if ( (*pup)->domid == domid )
+ {
+ create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(usbif_priv_cachep, up);
+ return;
+ }
+ pup = &(*pup)->hash_next;
+ }
+
+ up->hash_next = *pup;
+ *pup = up;
+
+ create->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_destroy(usbif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ usbif_priv_t **pup, *up;
+
+ pup = &usbif_priv_hash[USBIF_HASH(domid)];
+ while ( (up = *pup) != NULL )
+ {
+ if ( up->domid == domid )
+ {
+ if ( up->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pup = &up->hash_next;
+ }
+
+ destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pup = up->hash_next;
+ usbif_release_ports(up);
+ kmem_cache_free(usbif_priv_cachep, up);
+ destroy->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_connect(usbif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ usbif_priv_t *up;
+ usbif_sring_t *sring;
+
+ up = usbif_find(domid);
+ if ( unlikely(up == NULL) )
+ {
+ DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n",
+ connect->domid);
+ connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = USBIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( up->status != DISCONNECTED )
+ {
+ connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ sring = (usbif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&up->usb_ring, sring, PAGE_SIZE);
+
+ up->evtchn = evtchn;
+ up->irq = bind_evtchn_to_irq(evtchn);
+ up->shmem_frame = shmem_frame;
+ up->status = CONNECTED;
+ usbif_get(up);
+
+ request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
+
+ connect->status = USBIF_BE_STATUS_OKAY;
+}
+
+/* Remove URBs for this interface before destroying it. */
+void usbif_deschedule(usbif_priv_t *up)
+{
+ remove_from_usbif_list(up);
+}
+
+int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ usbif_priv_t *up;
+
+ up = usbif_find(domid);
+ if ( unlikely(up == NULL) )
+ {
+ DPRINTK("usbif_disconnect attempted for non-existent usbif"
+ " (%u)\n", disconnect->domid);
+ disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( up->status == CONNECTED )
+ {
+ up->status = DISCONNECTING;
+ up->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(up->irq, up);
+ usbif_deschedule(up);
+ usbif_put(up);
+ return 0; /* Caller should not send response message. */
+ }
+
+ disconnect->status = USBIF_BE_STATUS_OKAY;
+ return 1;
+}
+
+void __init usbif_interface_init(void)
+{
+ usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
+ sizeof(usbif_priv_t),
+ 0, 0, NULL, NULL);
+ memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
new file mode 100644
index 0000000000..0a4cf8b4ea
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
@@ -0,0 +1,1070 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/main.c
+ *
+ * Backend for the Xen virtual USB driver - provides an abstraction of a
+ * USB host controller to the corresponding frontend driver.
+ *
+ * by Mark Williamson
+ * Copyright (c) 2004 Intel Research Cambridge
+ * Copyright (c) 2004, 2005 Mark Williamson
+ *
+ * Based on arch/xen/drivers/blkif/backend/main.c
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/tqueue.h>
+
+/*
+ * This is rather arbitrary.
+ */
+#define MAX_PENDING_REQS 4
+#define BATCH_PER_DOMAIN 1
+
+static unsigned long mmap_vstart;
+
+/* Needs to be sufficiently large that we can map the (large) buffers
+ * the USB mass storage driver wants. */
+#define MMAP_PAGES_PER_REQUEST \
+ (128)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+
+static spinlock_t owned_ports_lock;
+LIST_HEAD(owned_ports);
+
+/* A list of these structures is used to track ownership of physical USB
+ * ports. */
+typedef struct
+{
+ usbif_priv_t *usbif_priv;
+ char path[16];
+ int guest_port;
+ int enabled;
+ struct list_head list;
+ unsigned long guest_address; /* The USB device address that has been
+ * assigned by the guest. */
+ int dev_present; /* Is there a device present? */
+ struct usb_device * dev;
+ unsigned long ifaces; /* What interfaces are present on this device? */
+} owned_port_t;
+
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. The request is complete, the specified
+ * domain has a response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+ usbif_priv_t *usbif_priv;
+ unsigned long id;
+ int nr_pages;
+ unsigned short operation;
+ int status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of
+ * order. We therefore maintain an allocation ring. This ring also indicates
+ * when enough work has been passed down -- at that point the allocation ring
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock;
+
+/* NB. We use a different index type to differentiate from shared usb rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
+static void make_response(usbif_priv_t *usbif, unsigned long id,
+ unsigned short op, int st, int inband,
+ unsigned long actual_length);
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port);
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
+static owned_port_t *usbif_find_port(char *);
+
+/******************************************************************
+ * PRIVATE DEBUG FUNCTIONS
+ */
+
+#undef DEBUG
+#ifdef DEBUG
+
+static void dump_port(owned_port_t *p)
+{
+ printk(KERN_DEBUG "owned_port_t @ %p\n"
+ " usbif_priv @ %p\n"
+ " path: %s\n"
+ " guest_port: %d\n"
+ " guest_address: %ld\n"
+ " dev_present: %d\n"
+ " dev @ %p\n"
+ " ifaces: 0x%lx\n",
+ p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
+ p->dev_present, p->dev, p->ifaces);
+}
+
+
+static void dump_request(usbif_request_t *req)
+{
+ printk(KERN_DEBUG "id = 0x%lx\n"
+ "devnum %d\n"
+ "endpoint 0x%x\n"
+ "direction %d\n"
+ "speed %d\n"
+ "pipe_type 0x%x\n"
+ "transfer_buffer 0x%lx\n"
+ "length 0x%lx\n"
+ "transfer_flags 0x%lx\n"
+ "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
+ "iso_schedule = 0x%lx\n"
+ "num_iso %ld\n",
+ req->id, req->devnum, req->endpoint, req->direction, req->speed,
+ req->pipe_type, req->transfer_buffer, req->length,
+ req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
+ req->setup[3], req->setup[4], req->setup[5], req->setup[6],
+ req->setup[7], req->iso_schedule, req->num_iso);
+}
+
+static void dump_urb(struct urb *urb)
+{
+ printk(KERN_DEBUG "dumping urb @ %p\n", urb);
+
+#define DUMP_URB_FIELD(name, format) \
+ printk(KERN_DEBUG " " # name " " format "\n", urb-> name)
+
+ DUMP_URB_FIELD(pipe, "0x%x");
+ DUMP_URB_FIELD(status, "%d");
+ DUMP_URB_FIELD(transfer_flags, "0x%x");
+ DUMP_URB_FIELD(transfer_buffer, "%p");
+ DUMP_URB_FIELD(transfer_buffer_length, "%d");
+ DUMP_URB_FIELD(actual_length, "%d");
+}
+
+static void dump_response(usbif_response_t *resp)
+{
+ printk(KERN_DEBUG "usbback: Sending response:\n"
+ " id = 0x%x\n"
+ " op = %d\n"
+ " status = %d\n"
+ " data = %d\n"
+ " length = %d\n",
+ resp->id, resp->op, resp->status, resp->data, resp->length);
+}
+
+#else /* DEBUG */
+
+#define dump_port(blah) ((void)0)
+#define dump_request(blah) ((void)0)
+#define dump_urb(blah) ((void)0)
+#define dump_response(blah) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************
+ * MEMORY MANAGEMENT
+ */
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ int i;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping;
+ mcl[i].args[0] = MMAP_VADDR(idx, i);
+ mcl[i].args[1] = 0;
+ mcl[i].args[2] = 0;
+ }
+
+ mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+ BUG();
+}
+
+
+/******************************************************************
+ * USB INTERFACE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head usbio_schedule_list;
+static spinlock_t usbio_schedule_list_lock;
+
+static int __on_usbif_list(usbif_priv_t *up)
+{
+ return up->usbif_list.next != NULL;
+}
+
+void remove_from_usbif_list(usbif_priv_t *up)
+{
+ unsigned long flags;
+ if ( !__on_usbif_list(up) ) return;
+ spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+ if ( __on_usbif_list(up) )
+ {
+ list_del(&up->usbif_list);
+ up->usbif_list.next = NULL;
+ usbif_put(up);
+ }
+ spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+static void add_to_usbif_list_tail(usbif_priv_t *up)
+{
+ unsigned long flags;
+ if ( __on_usbif_list(up) ) return;
+ spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+ if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
+ {
+ list_add_tail(&up->usbif_list, &usbio_schedule_list);
+ usbif_get(up);
+ }
+ spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+void free_pending(int pending_idx)
+{
+ unsigned long flags;
+
+ /* Free the pending request. */
+ spin_lock_irqsave(&pend_prod_lock, flags);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ spin_unlock_irqrestore(&pend_prod_lock, flags);
+}
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as urb->complete()
+ */
+
+static void maybe_trigger_usbio_schedule(void);
+
+static void __end_usb_io_op(struct urb *purb)
+{
+ pending_req_t *pending_req;
+ int pending_idx;
+
+ pending_req = purb->context;
+
+ pending_idx = pending_req - pending_reqs;
+
+ ASSERT(purb->actual_length <= purb->transfer_buffer_length);
+ ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
+
+ /* An error fails the entire request. */
+ if ( purb->status )
+ {
+ printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status);
+ }
+
+ if ( usb_pipetype(purb->pipe) == 0 )
+ {
+ int i;
+ usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1);
+
+ /* If we're dealing with an iso pipe, we need to copy back the schedule. */
+ for ( i = 0; i < purb->number_of_packets; i++ )
+ {
+ sched[i].length = purb->iso_frame_desc[i].actual_length;
+ ASSERT(sched[i].buffer_offset ==
+ purb->iso_frame_desc[i].offset);
+ sched[i].status = purb->iso_frame_desc[i].status;
+ }
+ }
+
+ fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
+
+ kfree(purb->setup_packet);
+
+ make_response(pending_req->usbif_priv, pending_req->id,
+ pending_req->operation, pending_req->status, 0, purb->actual_length);
+ usbif_put(pending_req->usbif_priv);
+
+ usb_free_urb(purb);
+
+ free_pending(pending_idx);
+
+ rmb();
+
+ /* Check for anything still waiting in the rings, having freed a request... */
+ maybe_trigger_usbio_schedule();
+}
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
+
+static int usbio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ usbif_priv_t *up;
+ struct list_head *ent;
+
+ daemonize();
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&usbio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
+ list_empty(&usbio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&usbio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+ !list_empty(&usbio_schedule_list) )
+ {
+ ent = usbio_schedule_list.next;
+ up = list_entry(ent, usbif_priv_t, usbif_list);
+ usbif_get(up);
+ remove_from_usbif_list(up);
+ if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
+ add_to_usbif_list_tail(up);
+ usbif_put(up);
+ }
+ }
+}
+
+static void maybe_trigger_usbio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( !list_empty(&usbio_schedule_list) )
+ wake_up(&usbio_schedule_wait);
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ usbif_priv_t *up = dev_id;
+
+ smp_mb();
+
+ add_to_usbif_list_tail(up);
+
+ /* Will in fact /always/ trigger an io schedule in this case. */
+ maybe_trigger_usbio_schedule();
+
+ return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the usb-device layer proper.
+ */
+
+static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
+{
+ usbif_back_ring_t *usb_ring = &up->usb_ring;
+ usbif_request_t *req;
+ RING_IDX i, rp;
+ int more_to_do = 0;
+
+ rp = usb_ring->sring->req_prod;
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ /* Take items off the comms ring, taking care not to overflow. */
+ for ( i = usb_ring->req_cons;
+ (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
+ i++ )
+ {
+ if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req = RING_GET_REQUEST(usb_ring, i);
+
+ switch ( req->operation )
+ {
+ case USBIF_OP_PROBE:
+ dispatch_usb_probe(up, req->id, req->port);
+ break;
+
+ case USBIF_OP_IO:
+ /* Assemble an appropriate URB. */
+ dispatch_usb_io(up, req);
+ break;
+
+ case USBIF_OP_RESET:
+ dispatch_usb_reset(up, req->port);
+ break;
+
+ default:
+ DPRINTK("error: unknown USB io operation [%d]\n",
+ req->operation);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ break;
+ }
+ }
+
+ usb_ring->req_cons = i;
+
+ return more_to_do;
+}
+
+static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
+{
+ unsigned long flags;
+ struct list_head *l;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each(l, &owned_ports)
+ {
+ owned_port_t *p = list_entry(l, owned_port_t, list);
+ if(p->usbif_priv == up && p->guest_port == port)
+ {
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+ return p;
+ }
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return NULL;
+}
+
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
+{
+ owned_port_t *port = find_guest_port(up, portid);
+ int ret = 0;
+
+
+ /* Allowing the guest to actually reset the device causes more problems
+ * than it's worth. We just fake it out in software but we will do a real
+ * reset when the interface is destroyed. */
+
+ dump_port(port);
+
+ port->guest_address = 0;
+ /* If there's an attached device then the port is now enabled. */
+ if ( port->dev_present )
+ port->enabled = 1;
+ else
+ port->enabled = 0;
+
+ make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
+}
+
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid)
+{
+ owned_port_t *port = find_guest_port(up, portid);
+ int ret;
+
+ if ( port != NULL )
+ ret = port->dev_present;
+ else
+ {
+ ret = -EINVAL;
+ printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
+ "(port %ld)\n", portid);
+ }
+
+ /* Probe result is sent back in-band. Probes don't have an associated id
+ * right now... */
+ make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
+}
+
+/**
+ * check_iso_schedule - safety check the isochronous schedule for an URB
+ * @purb : the URB in question
+ */
+static int check_iso_schedule(struct urb *purb)
+{
+ int i;
+ unsigned long total_length = 0;
+
+ for ( i = 0; i < purb->number_of_packets; i++ )
+ {
+ struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
+
+ if ( desc->offset >= purb->transfer_buffer_length
+ || ( desc->offset + desc->length) > purb->transfer_buffer_length )
+ return -EINVAL;
+
+ total_length += desc->length;
+
+ if ( total_length > purb->transfer_buffer_length )
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
+
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
+{
+ unsigned long buffer_mach;
+ int i = 0, offset = 0,
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ pending_req_t *pending_req;
+ unsigned long remap_prot;
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ struct urb *purb = NULL;
+ owned_port_t *port;
+ unsigned char *setup;
+
+ dump_request(req);
+
+ if ( NR_PENDING_REQS == MAX_PENDING_REQS )
+ {
+ printk(KERN_WARNING "usbback: Max requests already queued. "
+ "Giving up!\n");
+
+ return;
+ }
+
+ port = find_port_for_request(up, req);
+
+ if ( port == NULL )
+ {
+ printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
+ dump_request(req);
+
+ make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+ return;
+ }
+ else if ( !port->dev_present )
+ {
+ /* In normal operation, we'll only get here if a device is unplugged
+ * and the frontend hasn't noticed yet. */
+ make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+ return;
+ }
+
+
+ setup = kmalloc(8, GFP_KERNEL);
+
+ if ( setup == NULL )
+ goto no_mem;
+
+ /* Copy request out for safety. */
+ memcpy(setup, req->setup, 8);
+
+ if( setup[0] == 0x0 && setup[1] == 0x5)
+ {
+ /* To virtualise the USB address space, we need to intercept
+ * set_address messages and emulate. From the USB specification:
+ * bmRequestType = 0x0;
+ * Brequest = SET_ADDRESS (i.e. 0x5)
+ * wValue = device address
+ * wIndex = 0
+ * wLength = 0
+ * data = None
+ */
+ /* Store into the guest transfer buffer using cpu_to_le16 */
+ port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
+ /* Make a successful response. That was easy! */
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+ else if ( setup[0] == 0x0 && setup[1] == 0x9 )
+ {
+ /* The host kernel needs to know what device configuration is in use
+ * because various error checks get confused otherwise. We just do
+ * configuration settings here, under controlled conditions.
+ */
+
+ /* Ignore configuration setting and hope that the host kernel
+ did it right. */
+ /* usb_set_configuration(port->dev, setup[2]); */
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+ else if ( setup[0] == 0x1 && setup[1] == 0xB )
+ {
+ /* The host kernel needs to know what device interface is in use
+ * because various error checks get confused otherwise. We just do
+ * configuration settings here, under controlled conditions.
+ */
+ usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
+ (setup[2] | setup[3] << 8) );
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+
+ if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
+ + req->length )
+ > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
+ {
+ printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
+ req->length);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ kfree(setup);
+ return;
+ }
+
+ buffer_mach = req->transfer_buffer;
+
+ if( buffer_mach == 0 )
+ goto no_remap;
+
+ ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
+ ASSERT(buffer_mach);
+
+ /* Always map writeable for now. */
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
+ for ( i = 0, offset = 0; offset < req->length;
+ i++, offset += PAGE_SIZE )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+ mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = up->domid;
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
+
+ ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
+ == buffer_mach + i << PAGE_SHIFT);
+ }
+
+ if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
+ {
+ /* Map in ISO schedule, if necessary. */
+ mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+ mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = up->domid;
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
+
+ i++;
+ }
+
+ if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
+ BUG();
+
+ {
+ int j;
+ for ( j = 0; j < i; j++ )
+ {
+ if ( unlikely(mcl[j].result != 0) )
+ {
+ printk(KERN_WARNING
+ "invalid buffer %d -- could not remap it\n", j);
+ fast_flush_area(pending_idx, i);
+ goto bad_descriptor;
+ }
+ }
+ }
+
+ no_remap:
+
+ ASSERT(i <= MMAP_PAGES_PER_REQUEST);
+ ASSERT(i * PAGE_SIZE >= req->length);
+
+ /* We have to do this because some things might complete out of order. */
+ pending_req = &pending_reqs[pending_idx];
+ pending_req->usbif_priv= up;
+ pending_req->id = req->id;
+ pending_req->operation = req->operation;
+ pending_req->nr_pages = i;
+
+ pending_cons++;
+
+ usbif_get(up);
+
+ /* Fill out an actual request for the USB layer. */
+ purb = usb_alloc_urb(req->num_iso);
+
+ if ( purb == NULL )
+ {
+ usbif_put(up);
+ free_pending(pending_idx);
+ goto no_mem;
+ }
+
+ purb->dev = port->dev;
+ purb->context = pending_req;
+ purb->transfer_buffer =
+ (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
+ if(buffer_mach == 0)
+ purb->transfer_buffer = NULL;
+ purb->complete = __end_usb_io_op;
+ purb->transfer_buffer_length = req->length;
+ purb->transfer_flags = req->transfer_flags;
+
+ purb->pipe = 0;
+ purb->pipe |= req->direction << 7;
+ purb->pipe |= port->dev->devnum << 8;
+ purb->pipe |= req->speed << 26;
+ purb->pipe |= req->pipe_type << 30;
+ purb->pipe |= req->endpoint << 15;
+
+ purb->number_of_packets = req->num_iso;
+
+ if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
+ goto urb_error;
+
+ /* Make sure there's always some kind of timeout. */
+ purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
+ : 1000;
+
+ purb->setup_packet = setup;
+
+ if ( req->pipe_type == 0 ) /* ISO */
+ {
+ int j;
+ usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
+
+ /* If we're dealing with an iso pipe, we need to copy in a schedule. */
+ for ( j = 0; j < purb->number_of_packets; j++ )
+ {
+ purb->iso_frame_desc[j].length = iso_sched[j].length;
+ purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
+ iso_sched[j].status = 0;
+ }
+ }
+
+ if ( check_iso_schedule(purb) != 0 )
+ goto urb_error;
+
+ if ( usb_submit_urb(purb) != 0 )
+ goto urb_error;
+
+ return;
+
+ urb_error:
+ dump_urb(purb);
+ usbif_put(up);
+ free_pending(pending_idx);
+
+ bad_descriptor:
+ kfree ( setup );
+ if ( purb != NULL )
+ usb_free_urb(purb);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ return;
+
+ no_mem:
+ if ( setup != NULL )
+ kfree(setup);
+ make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
+ return;
+}
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(usbif_priv_t *up, unsigned long id,
+ unsigned short op, int st, int inband,
+ unsigned long length)
+{
+ usbif_response_t *resp;
+ unsigned long flags;
+ usbif_back_ring_t *usb_ring = &up->usb_ring;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&up->usb_ring_lock, flags);
+ resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+ resp->data = inband;
+ resp->length = length;
+ wmb(); /* Ensure other side can see the response fields. */
+
+ dump_response(resp);
+
+ usb_ring->rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(usb_ring);
+ spin_unlock_irqrestore(&up->usb_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ notify_via_evtchn(up->evtchn);
+}
+
+/**
+ * usbif_claim_port - claim devices on a port on behalf of guest
+ *
+ * Once completed, this will ensure that any device attached to that
+ * port is claimed by this driver for use by the guest.
+ */
+int usbif_claim_port(usbif_be_claim_port_t *msg)
+{
+ owned_port_t *o_p;
+
+ /* Sanity... */
+ if ( usbif_find_port(msg->path) != NULL )
+ {
+ printk(KERN_WARNING "usbback: Attempted to claim USB port "
+ "we already own!\n");
+ return -EINVAL;
+ }
+
+ /* No need for a slab cache - this should be infrequent. */
+ o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
+
+ if ( o_p == NULL )
+ return -ENOMEM;
+
+ o_p->enabled = 0;
+ o_p->usbif_priv = usbif_find(msg->domid);
+ o_p->guest_port = msg->usbif_port;
+ o_p->dev_present = 0;
+ o_p->guest_address = 0; /* Default address. */
+
+ strcpy(o_p->path, msg->path);
+
+ spin_lock_irq(&owned_ports_lock);
+
+ list_add(&o_p->list, &owned_ports);
+
+ spin_unlock_irq(&owned_ports_lock);
+
+ printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
+ msg->domid, msg->usbif_port);
+
+ /* Force a reprobe for unclaimed devices. */
+ usb_scan_devices();
+
+ return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
+{
+ unsigned long flags;
+ struct list_head *port;
+
+ /* I'm assuming this is not called from IRQ context - correct? I think
+ * it's probably only called in response to control messages or plug events
+ * in the USB hub kernel thread, so should be OK. */
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each(port, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled )
+ {
+ dump_port(p);
+
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+ return p;
+ }
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return NULL;
+}
+
+owned_port_t *__usbif_find_port(char *path)
+{
+ struct list_head *port;
+
+ list_for_each(port, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if(!strcmp(path, p->path))
+ {
+ return p;
+ }
+ }
+
+ return NULL;
+}
+
+owned_port_t *usbif_find_port(char *path)
+{
+ owned_port_t *ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ ret = __usbif_find_port(path);
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return ret;
+}
+
+
+static void *probe(struct usb_device *dev, unsigned iface,
+ const struct usb_device_id *id)
+{
+ owned_port_t *p;
+
+ /* We don't care what the device is - if we own the port, we want it. We
+ * don't deal with device-specifics in this driver, so we don't care what
+ * the device actually is ;-) */
+ if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
+ {
+ printk(KERN_INFO "usbback: claimed device attached to owned port\n");
+
+ p->dev_present = 1;
+ p->dev = dev;
+ set_bit(iface, &p->ifaces);
+
+ return p->usbif_priv;
+ }
+ else
+ printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n",
+ dev->devpath);
+
+
+ return NULL;
+}
+
+static void disconnect(struct usb_device *dev, void *usbif)
+{
+ /* Note the device is removed so we can tell the guest when it probes. */
+ owned_port_t *port = usbif_find_port(dev->devpath);
+ port->dev_present = 0;
+ port->dev = NULL;
+ port->ifaces = 0;
+}
+
+
+struct usb_driver driver =
+{
+ .owner = THIS_MODULE,
+ .name = "Xen USB Backend",
+ .probe = probe,
+ .disconnect = disconnect,
+ .id_table = NULL,
+};
+
+/* __usbif_release_port - internal mechanics for releasing a port */
+void __usbif_release_port(owned_port_t *p)
+{
+ int i;
+
+ for ( i = 0; p->ifaces != 0; i++)
+ if ( p->ifaces & 1 << i )
+ {
+ usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
+ clear_bit(i, &p->ifaces);
+ }
+ list_del(&p->list);
+
+ /* Reset the real device. We don't simulate disconnect / probe for other
+ * drivers in this kernel because we assume the device is completely under
+ * the control of ourselves (i.e. the guest!). This should ensure that the
+ * device is in a sane state for the next customer ;-) */
+
+ /* MAW NB: we're not resetting the real device here. This looks perfectly
+ * valid to me but it causes memory corruption. We seem to get away with not
+ * resetting for now, although it'd be nice to have this tracked down. */
+/* if ( p->dev != NULL) */
+/* usb_reset_device(p->dev); */
+
+ kfree(p);
+}
+
+
+/**
+ * usbif_release_port - stop claiming devices on a port on behalf of guest
+ */
+void usbif_release_port(usbif_be_release_port_t *msg)
+{
+ owned_port_t *p;
+
+ spin_lock_irq(&owned_ports_lock);
+ p = __usbif_find_port(msg->path);
+ __usbif_release_port(p);
+ spin_unlock_irq(&owned_ports_lock);
+}
+
+void usbif_release_ports(usbif_priv_t *up)
+{
+ struct list_head *port, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each_safe(port, tmp, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if ( p->usbif_priv == up )
+ __usbif_release_port(p);
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+}
+
+static int __init usbif_init(void)
+{
+ int i;
+
+ if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
+ !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+ return 0;
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+ BUG();
+
+ pending_cons = 0;
+ pending_prod = MAX_PENDING_REQS;
+ memset(pending_reqs, 0, sizeof(pending_reqs));
+ for ( i = 0; i < MAX_PENDING_REQS; i++ )
+ pending_ring[i] = i;
+
+ spin_lock_init(&pend_prod_lock);
+
+ spin_lock_init(&owned_ports_lock);
+ INIT_LIST_HEAD(&owned_ports);
+
+ spin_lock_init(&usbio_schedule_list_lock);
+ INIT_LIST_HEAD(&usbio_schedule_list);
+
+ if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+
+ usbif_interface_init();
+
+ usbif_ctrlif_init();
+
+ usb_register(&driver);
+
+ printk(KERN_INFO "Xen USB Backend Initialised");
+
+ return 0;
+}
+
+__initcall(usbif_init);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
new file mode 100644
index 0000000000..6a517b13bc
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
@@ -0,0 +1,1738 @@
+/*
+ * Xen Virtual USB Frontend Driver
+ *
+ * This file contains the first version of the Xen virtual USB hub
+ * that I've managed not to delete by mistake (3rd time lucky!).
+ *
+ * Based on Linux's uhci.c, original copyright notices are displayed
+ * below. Portions also (c) 2004 Intel Research Cambridge
+ * and (c) 2004, 2005 Mark Williamson
+ *
+ * Contact <mark.williamson@cl.cam.ac.uk> or
+ * <xen-devel@lists.sourceforge.net> regarding this code.
+ *
+ * Still to be (maybe) implemented:
+ * - migration / backend restart support?
+ * - support for building / using as a module
+ */
+
+/*
+ * Universal Host Controller Interface driver for USB.
+ *
+ * Maintainer: Johannes Erdfelt <johannes@erdfelt.com>
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com
+ * (C) Copyright 1999 Randy Dunlap
+ * (C) Copyright 1999 Georg Acher, acher@in.tum.de
+ * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de
+ * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch
+ * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at
+ * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
+ * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
+ * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
+ *
+ * Intel documents this fairly well, and as far as I know there
+ * are no royalties or anything like that, but even so there are
+ * people who decided that they want to do the same thing in a
+ * completely different way.
+ *
+ * WARNING! The USB documentation is downright evil. Most of it
+ * is just crap, written by a committee. You're better off ignoring
+ * most of it, the important stuff is:
+ * - the low-level protocol (fairly simple but lots of small details)
+ * - working around the horridness of the rest
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#ifdef CONFIG_USB_DEBUG
+#define DEBUG
+#else
+#undef DEBUG
+#endif
+#include <linux/usb.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+
+#include "xhci.h"
+
+#include "../../../../../drivers/usb/hcd.h"
+
+#include <asm-xen/xen-public/io/usbif.h>
+#include <asm/ctrl_if.h>
+#include <asm/xen-public/io/domain_controller.h>
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v1.0"
+#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
+ "Randy Dunlap, Georg Acher, Deti Fliegl, " \
+ "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
+#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
+
+/*
+ * debug = 0, no debugging messages
+ * debug = 1, dump failed URB's except for stalls
+ * debug = 2, dump all failed URB's (including stalls)
+ */
+#ifdef DEBUG
+static int debug = 1;
+#else
+static int debug = 0;
+#endif
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "Debug level");
+static char *errbuf;
+#define ERRBUF_LEN (PAGE_SIZE * 8)
+
+static int rh_submit_urb(struct urb *urb);
+static int rh_unlink_urb(struct urb *urb);
+static int xhci_unlink_urb(struct urb *urb);
+static void xhci_call_completion(struct urb *urb);
+static void xhci_drain_ring(void);
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
+static void xhci_finish_completion(void);
+
+#define MAX_URB_LOOP 2048 /* Maximum number of linked URB's */
+
+static kmem_cache_t *xhci_up_cachep; /* urb_priv cache */
+static struct xhci *xhci; /* XHCI structure for the interface */
+
+/******************************************************************************
+ * DEBUGGING
+ */
+
+#ifdef DEBUG
+
+static void dump_urb(struct urb *urb)
+{
+ printk(KERN_DEBUG "dumping urb @ %p\n"
+ " hcpriv = %p\n"
+ " next = %p\n"
+ " dev = %p\n"
+ " pipe = 0x%lx\n"
+ " status = %d\n"
+ " transfer_flags = 0x%lx\n"
+ " transfer_buffer = %p\n"
+ " transfer_buffer_length = %d\n"
+ " actual_length = %d\n"
+ " bandwidth = %d\n"
+ " setup_packet = %p\n",
+ urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
+ urb->transfer_flags, urb->transfer_buffer,
+ urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
+ urb->setup_packet);
+ if ( urb->setup_packet != NULL )
+ printk(KERN_DEBUG
+ "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
+ urb->setup_packet[0], urb->setup_packet[1],
+ urb->setup_packet[2], urb->setup_packet[3],
+ urb->setup_packet[4], urb->setup_packet[5],
+ urb->setup_packet[6], urb->setup_packet[7]);
+ printk(KERN_DEBUG "complete = %p\n"
+ "interval = %d\n", urb->complete, urb->interval);
+
+}
+
+static void xhci_show_resp(usbif_response_t *r)
+{
+ printk(KERN_DEBUG "dumping response @ %p\n"
+ " id=0x%lx\n"
+ " op=0x%x\n"
+ " data=0x%x\n"
+ " status=0x%x\n"
+ " length=0x%lx\n",
+ r->id, r->operation, r->data, r->status, r->length);
+}
+
+#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
+
+#else /* DEBUG */
+
+#define dump_urb(blah) ((void)0)
+#define xhci_show_resp(blah) ((void)0)
+#define DPRINTK(blah,...) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************************
+ * RING REQUEST HANDLING
+ */
+
+#define RING_PLUGGED(_hc) ( RING_FULL(&_hc->usb_ring) || _hc->recovery )
+
+/**
+ * xhci_construct_isoc - add isochronous information to a request
+ */
+static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
+{
+ usbif_iso_t *schedule;
+ int i;
+ struct urb_priv *urb_priv = urb->hcpriv;
+
+ req->num_iso = urb->number_of_packets;
+ schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
+
+ if ( schedule == NULL )
+ return -ENOMEM;
+
+ for ( i = 0; i < req->num_iso; i++ )
+ {
+ schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
+ schedule[i].length = urb->iso_frame_desc[i].length;
+ }
+
+ urb_priv->schedule = schedule;
+ req->iso_schedule = virt_to_machine(schedule);
+
+ return 0;
+}
+
+/**
+ * xhci_queue_req - construct and queue request for an URB
+ */
+static int xhci_queue_req(struct urb *urb)
+{
+ unsigned long flags;
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+ printk(KERN_DEBUG
+ "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n",
+ usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+ usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+
+ spin_lock_irqsave(&xhci->ring_lock, flags);
+
+ if ( RING_PLUGGED(xhci) )
+ {
+ printk(KERN_WARNING
+ "xhci_queue_req(): USB ring plugged, not queuing request\n");
+ spin_unlock_irqrestore(&xhci->ring_lock, flags);
+ return -ENOBUFS;
+ }
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ req->operation = USBIF_OP_IO;
+ req->port = 0; /* We don't care what the port is. */
+ req->id = (unsigned long) urb->hcpriv;
+ req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+ req->devnum = usb_pipedevice(urb->pipe);
+ req->direction = usb_pipein(urb->pipe);
+ req->speed = usb_pipeslow(urb->pipe);
+ req->pipe_type = usb_pipetype(urb->pipe);
+ req->length = urb->transfer_buffer_length;
+ req->transfer_flags = urb->transfer_flags;
+ req->endpoint = usb_pipeendpoint(urb->pipe);
+ req->speed = usb_pipeslow(urb->pipe);
+ req->timeout = urb->timeout * (1000 / HZ);
+
+ if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+ {
+ int ret = xhci_construct_isoc(req, urb);
+ if ( ret != 0 )
+ return ret;
+ }
+
+ if(urb->setup_packet != NULL)
+ memcpy(req->setup, urb->setup_packet, 8);
+ else
+ memset(req->setup, 0, 8);
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ spin_unlock_irqrestore(&xhci->ring_lock, flags);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ DPRINTK("Queued request for an URB.\n");
+ dump_urb(urb);
+
+ return -EINPROGRESS;
+}
+
+/**
+ * xhci_queue_probe - queue a probe request for a particular port
+ */
+static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
+{
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+ printk(KERN_DEBUG
+ "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
+ "resp_cons = %d\n", usbif->req_prod,
+ virt_to_machine(&usbif->req_prod),
+ usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+
+ /* This is always called from the timer interrupt. */
+ spin_lock(&xhci->ring_lock);
+
+ if ( RING_PLUGGED(xhci) )
+ {
+ printk(KERN_WARNING
+ "xhci_queue_probe(): ring full, not queuing request\n");
+ spin_unlock(&xhci->ring_lock);
+ return NULL;
+ }
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ memset(req, 0, sizeof(*req));
+
+ req->operation = USBIF_OP_PROBE;
+ req->port = port;
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ spin_unlock(&xhci->ring_lock);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ return req;
+}
+
+/**
+ * xhci_port_reset - queue a reset request for a particular port
+ */
+static int xhci_port_reset(usbif_vdev_t port)
+{
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+ /* Only ever happens from process context (hub thread). */
+ spin_lock_irq(&xhci->ring_lock);
+
+ if ( RING_PLUGGED(xhci) )
+ {
+ printk(KERN_WARNING
+ "xhci_port_reset(): ring plugged, not queuing request\n");
+ spin_unlock_irq(&xhci->ring_lock);
+ return -ENOBUFS;
+ }
+
+ /* We only reset one port at a time, so we only need one variable per
+ * hub. */
+ xhci->awaiting_reset = 1;
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ memset(req, 0, sizeof(*req));
+
+ req->operation = USBIF_OP_RESET;
+ req->port = port;
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ spin_unlock_irq(&xhci->ring_lock);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ while ( xhci->awaiting_reset > 0 )
+ {
+ mdelay(1);
+ xhci_drain_ring();
+ }
+
+ xhci->rh.ports[port].pe = 1;
+ xhci->rh.ports[port].pe_chg = 1;
+
+ return xhci->awaiting_reset;
+}
+
+
+/******************************************************************************
+ * RING RESPONSE HANDLING
+ */
+
+static void receive_usb_reset(usbif_response_t *resp)
+{
+ xhci->awaiting_reset = resp->status;
+ rmb();
+
+}
+
+static void receive_usb_probe(usbif_response_t *resp)
+{
+ spin_lock(&xhci->rh.port_state_lock);
+
+ if ( resp->status >= 0 )
+ {
+ if ( resp->status == 1 )
+ {
+ /* If theres a device there and there wasn't one before there must
+ * have been a connection status change. */
+ if( xhci->rh.ports[resp->data].cs == 0 )
+ {
+ xhci->rh.ports[resp->data].cs = 1;
+ xhci->rh.ports[resp->data].cs_chg = 1;
+ }
+ }
+ else if ( resp->status == 0 )
+ {
+ if(xhci->rh.ports[resp->data].cs == 1 )
+ {
+ xhci->rh.ports[resp->data].cs = 0;
+ xhci->rh.ports[resp->data].cs_chg = 1;
+ xhci->rh.ports[resp->data].pe = 0;
+ /* According to USB Spec v2.0, 11.24.2.7.2.2, we don't need
+ * to set pe_chg since an error has not occurred. */
+ }
+ }
+ else
+ printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
+ "for port %d\n", resp->status, resp->data);
+ }
+ else if ( resp->status < 0)
+ printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
+ resp->status);
+
+ spin_unlock(&xhci->rh.port_state_lock);
+}
+
+static void receive_usb_io(usbif_response_t *resp)
+{
+ struct urb_priv *urbp = (struct urb_priv *)resp->id;
+ struct urb *urb = urbp->urb;
+
+ urb->actual_length = resp->length;
+ urbp->in_progress = 0;
+
+ if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+ {
+ int i;
+
+ /* Copy ISO schedule results back in. */
+ for ( i = 0; i < urb->number_of_packets; i++ )
+ {
+ urb->iso_frame_desc[i].status
+ = urbp->schedule[i].status;
+ urb->iso_frame_desc[i].actual_length
+ = urbp->schedule[i].length;
+ }
+ free_page((unsigned long)urbp->schedule);
+ }
+
+ /* Only set status if it's not been changed since submission. It might
+ * have been changed if the URB has been unlinked asynchronously, for
+ * instance. */
+ if ( urb->status == -EINPROGRESS )
+ urbp->status = urb->status = resp->status;
+}
+
+/**
+ * xhci_drain_ring - drain responses from the ring, calling handlers
+ *
+ * This may be called from interrupt context when an event is received from the
+ * backend domain, or sometimes in process context whilst waiting for a port
+ * reset or URB completion.
+ */
+static void xhci_drain_ring(void)
+{
+ struct list_head *tmp, *head;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+ usbif_response_t *resp;
+ RING_IDX i, rp;
+
+ /* Walk the ring here to get responses, updating URBs to show what
+ * completed. */
+
+ rp = usb_ring->sring->rsp_prod;
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ /* Take items off the comms ring, taking care not to overflow. */
+ for ( i = usb_ring->rsp_cons; i != rp; i++ )
+ {
+ resp = RING_GET_RESPONSE(usb_ring, i);
+
+ /* May need to deal with batching and with putting a ceiling on
+ the number dispatched for performance and anti-dos reasons */
+
+ xhci_show_resp(resp);
+
+ switch ( resp->operation )
+ {
+ case USBIF_OP_PROBE:
+ receive_usb_probe(resp);
+ break;
+
+ case USBIF_OP_IO:
+ receive_usb_io(resp);
+ break;
+
+ case USBIF_OP_RESET:
+ receive_usb_reset(resp);
+ break;
+
+ default:
+ printk(KERN_WARNING
+ "error: unknown USB io operation response [%d]\n",
+ resp->operation);
+ break;
+ }
+ }
+
+ usb_ring->rsp_cons = i;
+
+ /* Walk the list of pending URB's to see which ones completed and do
+ * callbacks, etc. */
+ spin_lock(&xhci->urb_list_lock);
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *urb = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ /* Checks the status and does all of the magic necessary */
+ xhci_transfer_result(xhci, urb);
+ }
+ spin_unlock(&xhci->urb_list_lock);
+
+ xhci_finish_completion();
+}
+
+
+static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
+{
+ xhci_drain_ring();
+}
+
+/******************************************************************************
+ * HOST CONTROLLER FUNCTIONALITY
+ */
+
+/**
+ * no-op implementation of private device alloc / free routines
+ */
+static int xhci_do_nothing_dev(struct usb_device *dev)
+{
+ return 0;
+}
+
+static inline void xhci_add_complete(struct urb *urb)
+{
+ struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ list_add_tail(&urbp->complete_list, &xhci->complete_list);
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+/* When this returns, the owner of the URB may free its
+ * storage.
+ *
+ * We spin and wait for the URB to complete before returning.
+ *
+ * Call with urb->lock acquired.
+ */
+static void xhci_delete_urb(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = urb->hcpriv;
+
+ /* If there's no urb_priv structure for this URB then it can't have
+ * been submitted at all. */
+ if ( urbp == NULL )
+ return;
+
+ /* For now we just spin until the URB completes. It shouldn't take too
+ * long and we don't expect to have to do this very often. */
+ while ( urb->status == -EINPROGRESS )
+ {
+ xhci_drain_ring();
+ mdelay(1);
+ }
+
+ /* Now we know that further transfers to the buffer won't
+ * occur, so we can safely return. */
+}
+
+static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
+ if (!urbp) {
+ err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n");
+ return NULL;
+ }
+
+ memset((void *)urbp, 0, sizeof(*urbp));
+
+ urbp->inserttime = jiffies;
+ urbp->urb = urb;
+ urbp->dev = urb->dev;
+
+ INIT_LIST_HEAD(&urbp->complete_list);
+
+ urb->hcpriv = urbp;
+
+ return urbp;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+/* When is this called? Do we need to stop the transfer (as we
+ * currently do)? */
+static void xhci_destroy_urb_priv(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+ if (!urbp)
+ return;
+
+ if (!list_empty(&urb->urb_list))
+ warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
+
+ if (!list_empty(&urbp->complete_list))
+ warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb);
+
+ kmem_cache_free(xhci_up_cachep, urb->hcpriv);
+
+ urb->hcpriv = NULL;
+}
+
+/**
+ * Try to find URBs in progress on the same pipe to the same device.
+ *
+ * MUST be called with xhci->urb_list_lock acquired
+ */
+static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
+{
+ struct list_head *tmp, *head;
+
+ /* We don't match Isoc transfers since they are special */
+ if (usb_pipeisoc(urb->pipe))
+ return NULL;
+
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ if (u->dev == urb->dev && u->pipe == urb->pipe &&
+ u->status == -EINPROGRESS)
+ return u;
+ }
+
+ return NULL;
+}
+
+static int xhci_submit_urb(struct urb *urb)
+{
+ int ret = -EINVAL;
+ unsigned long flags;
+ struct urb *eurb;
+ int bustime;
+
+ DPRINTK("URB submitted to XHCI driver.\n");
+ dump_urb(urb);
+
+ if (!urb)
+ return -EINVAL;
+
+ if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
+ warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb);
+ return -ENODEV;
+ }
+
+ if ( urb->dev->devpath == NULL )
+ BUG();
+
+ usb_inc_dev_use(urb->dev);
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ spin_lock(&urb->lock);
+
+ if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
+ urb->status == -ECONNABORTED) {
+ dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status);
+ /* Since we can have problems on the out path */
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ usb_dec_dev_use(urb->dev);
+
+ return ret;
+ }
+
+ INIT_LIST_HEAD(&urb->urb_list);
+ if (!xhci_alloc_urb_priv(urb)) {
+ ret = -ENOMEM;
+
+ goto out;
+ }
+
+ ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
+
+ eurb = xhci_find_urb_ep(xhci, urb);
+ if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
+ ret = -ENXIO;
+
+ goto out;
+ }
+
+ /* Short circuit the virtual root hub */
+ if (urb->dev == xhci->rh.dev) {
+ ret = rh_submit_urb(urb);
+
+ goto out;
+ }
+
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_CONTROL:
+ case PIPE_BULK:
+ ret = xhci_queue_req(urb);
+ break;
+
+ case PIPE_INTERRUPT:
+ if (urb->bandwidth == 0) { /* not yet checked/allocated */
+ bustime = usb_check_bandwidth(urb->dev, urb);
+ if (bustime < 0)
+ ret = bustime;
+ else {
+ ret = xhci_queue_req(urb);
+ if (ret == -EINPROGRESS)
+ usb_claim_bandwidth(urb->dev, urb,
+ bustime, 0);
+ }
+ } else /* bandwidth is already set */
+ ret = xhci_queue_req(urb);
+ break;
+
+ case PIPE_ISOCHRONOUS:
+ if (urb->bandwidth == 0) { /* not yet checked/allocated */
+ if (urb->number_of_packets <= 0) {
+ ret = -EINVAL;
+ break;
+ }
+ bustime = usb_check_bandwidth(urb->dev, urb);
+ if (bustime < 0) {
+ ret = bustime;
+ break;
+ }
+
+ ret = xhci_queue_req(urb);
+ if (ret == -EINPROGRESS)
+ usb_claim_bandwidth(urb->dev, urb, bustime, 1);
+ } else /* bandwidth is already set */
+ ret = xhci_queue_req(urb);
+ break;
+ }
+out:
+ urb->status = ret;
+
+ if (ret == -EINPROGRESS) {
+ /* We use _tail to make find_urb_ep more efficient */
+ list_add_tail(&urb->urb_list, &xhci->urb_list);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ return 0;
+ }
+
+ xhci_delete_urb(urb);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ /* Only call completion if it was successful */
+ if (!ret)
+ xhci_call_completion(urb);
+
+ return ret;
+}
+
+/*
+ * Return the result of a transfer
+ *
+ * MUST be called with urb_list_lock acquired
+ */
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct urb_priv *urbp;
+
+ /* The root hub is special */
+ if (urb->dev == xhci->rh.dev)
+ return;
+
+ spin_lock_irqsave(&urb->lock, flags);
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+
+ if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
+ ret = -EINPROGRESS;
+
+ if (urb->actual_length < urb->transfer_buffer_length) {
+ if (urb->transfer_flags & USB_DISABLE_SPD) {
+ ret = -EREMOTEIO;
+ }
+ }
+
+ if (urb->status == -EPIPE)
+ {
+ ret = urb->status;
+ /* endpoint has stalled - mark it halted */
+ usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
+ usb_pipeout(urb->pipe));
+ }
+
+ if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
+ (ret != 0 && debug > 1)) {
+ /* Some debugging code */
+ dbg("xhci_result_interrupt/bulk() failed with status %x",
+ status);
+ }
+
+ if (ret == -EINPROGRESS)
+ goto out;
+
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_CONTROL:
+ case PIPE_BULK:
+ case PIPE_ISOCHRONOUS:
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth)
+ usb_release_bandwidth(urb->dev, urb, 1);
+ xhci_delete_urb(urb);
+ break;
+ case PIPE_INTERRUPT:
+ /* Interrupts are an exception */
+ if (urb->interval)
+ goto out_complete;
+
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth)
+ usb_release_bandwidth(urb->dev, urb, 0);
+ xhci_delete_urb(urb);
+ break;
+ default:
+ info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
+ usb_pipetype(urb->pipe), urb);
+ }
+
+ /* Remove it from xhci->urb_list */
+ list_del_init(&urb->urb_list);
+
+out_complete:
+ xhci_add_complete(urb);
+
+out:
+ spin_unlock_irqrestore(&urb->lock, flags);
+}
+
+static int xhci_unlink_urb(struct urb *urb)
+{
+ unsigned long flags;
+ struct urb_priv *urbp = urb->hcpriv;
+
+ if (!urb)
+ return -EINVAL;
+
+ if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
+ return -ENODEV;
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ spin_lock(&urb->lock);
+
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth) {
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_INTERRUPT:
+ usb_release_bandwidth(urb->dev, urb, 0);
+ break;
+ case PIPE_ISOCHRONOUS:
+ usb_release_bandwidth(urb->dev, urb, 1);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (urb->status != -EINPROGRESS) {
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ return 0;
+ }
+
+ list_del_init(&urb->urb_list);
+
+ /* Short circuit the virtual root hub */
+ if (urb->dev == xhci->rh.dev) {
+ rh_unlink_urb(urb);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ xhci_call_completion(urb);
+ } else {
+ if (urb->transfer_flags & USB_ASYNC_UNLINK) {
+ /* We currently don't currently attempt to cancel URBs
+ * that have been queued in the ring. We handle async
+ * unlinked URBs when they complete. */
+ urbp->status = urb->status = -ECONNABORTED;
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ } else {
+ urb->status = -ENOENT;
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ if (in_interrupt()) { /* wait at least 1 frame */
+ static int errorcount = 10;
+
+ if (errorcount--)
+ dbg("xhci_unlink_urb called from interrupt for urb %p", urb);
+ udelay(1000);
+ } else
+ schedule_timeout(1+1*HZ/1000);
+
+ xhci_delete_urb(urb);
+
+ xhci_call_completion(urb);
+ }
+ }
+
+ return 0;
+}
+
+static void xhci_call_completion(struct urb *urb)
+{
+ struct urb_priv *urbp;
+ struct usb_device *dev = urb->dev;
+ int is_ring = 0, killed, resubmit_interrupt, status;
+ struct urb *nurb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&urb->lock, flags);
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+ if (!urbp || !urb->dev) {
+ spin_unlock_irqrestore(&urb->lock, flags);
+ return;
+ }
+
+ killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
+ urb->status == -ECONNRESET);
+ resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
+ urb->interval);
+
+ nurb = urb->next;
+ if (nurb && !killed) {
+ int count = 0;
+
+ while (nurb && nurb != urb && count < MAX_URB_LOOP) {
+ if (nurb->status == -ENOENT ||
+ nurb->status == -ECONNABORTED ||
+ nurb->status == -ECONNRESET) {
+ killed = 1;
+ break;
+ }
+
+ nurb = nurb->next;
+ count++;
+ }
+
+ if (count == MAX_URB_LOOP)
+ err("xhci_call_completion: too many linked URB's, loop? (first loop)");
+
+ /* Check to see if chain is a ring */
+ is_ring = (nurb == urb);
+ }
+
+ status = urbp->status;
+ if (!resubmit_interrupt || killed)
+ /* We don't need urb_priv anymore */
+ xhci_destroy_urb_priv(urb);
+
+ if (!killed)
+ urb->status = status;
+
+ spin_unlock_irqrestore(&urb->lock, flags);
+
+ if (urb->complete)
+ urb->complete(urb);
+
+ if (resubmit_interrupt)
+ /* Recheck the status. The completion handler may have */
+ /* unlinked the resubmitting interrupt URB */
+ killed = (urb->status == -ENOENT ||
+ urb->status == -ECONNABORTED ||
+ urb->status == -ECONNRESET);
+
+ if (resubmit_interrupt && !killed) {
+ if ( urb->dev != xhci->rh.dev )
+ xhci_queue_req(urb); /* XXX What if this fails? */
+ /* Don't need to resubmit URBs for the virtual root dev. */
+ } else {
+ if (is_ring && !killed) {
+ urb->dev = dev;
+ xhci_submit_urb(urb);
+ } else {
+ /* We decrement the usage count after we're done */
+ /* with everything */
+ usb_dec_dev_use(dev);
+ }
+ }
+}
+
+static void xhci_finish_completion(void)
+{
+ struct list_head *tmp, *head;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ head = &xhci->complete_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
+ complete_list);
+ struct urb *urb = urbp->urb;
+
+ list_del_init(&urbp->complete_list);
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+
+ xhci_call_completion(urb);
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ head = &xhci->complete_list;
+ tmp = head->next;
+ }
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+static struct usb_operations xhci_device_operations = {
+ .allocate = xhci_do_nothing_dev,
+ .deallocate = xhci_do_nothing_dev,
+ /* It doesn't look like any drivers actually care what the frame number
+ * is at the moment! If necessary, we could approximate the current
+ * frame nubmer by passing it from the backend in response messages. */
+ .get_frame_number = NULL,
+ .submit_urb = xhci_submit_urb,
+ .unlink_urb = xhci_unlink_urb
+};
+
+/******************************************************************************
+ * VIRTUAL ROOT HUB EMULATION
+ */
+
+static __u8 root_hub_dev_des[] =
+{
+ 0x12, /* __u8 bLength; */
+ 0x01, /* __u8 bDescriptorType; Device */
+ 0x00, /* __u16 bcdUSB; v1.0 */
+ 0x01,
+ 0x09, /* __u8 bDeviceClass; HUB_CLASSCODE */
+ 0x00, /* __u8 bDeviceSubClass; */
+ 0x00, /* __u8 bDeviceProtocol; */
+ 0x08, /* __u8 bMaxPacketSize0; 8 Bytes */
+ 0x00, /* __u16 idVendor; */
+ 0x00,
+ 0x00, /* __u16 idProduct; */
+ 0x00,
+ 0x00, /* __u16 bcdDevice; */
+ 0x00,
+ 0x00, /* __u8 iManufacturer; */
+ 0x02, /* __u8 iProduct; */
+ 0x01, /* __u8 iSerialNumber; */
+ 0x01 /* __u8 bNumConfigurations; */
+};
+
+
+/* Configuration descriptor */
+static __u8 root_hub_config_des[] =
+{
+ 0x09, /* __u8 bLength; */
+ 0x02, /* __u8 bDescriptorType; Configuration */
+ 0x19, /* __u16 wTotalLength; */
+ 0x00,
+ 0x01, /* __u8 bNumInterfaces; */
+ 0x01, /* __u8 bConfigurationValue; */
+ 0x00, /* __u8 iConfiguration; */
+ 0x40, /* __u8 bmAttributes;
+ Bit 7: Bus-powered, 6: Self-powered,
+ Bit 5 Remote-wakeup, 4..0: resvd */
+ 0x00, /* __u8 MaxPower; */
+
+ /* interface */
+ 0x09, /* __u8 if_bLength; */
+ 0x04, /* __u8 if_bDescriptorType; Interface */
+ 0x00, /* __u8 if_bInterfaceNumber; */
+ 0x00, /* __u8 if_bAlternateSetting; */
+ 0x01, /* __u8 if_bNumEndpoints; */
+ 0x09, /* __u8 if_bInterfaceClass; HUB_CLASSCODE */
+ 0x00, /* __u8 if_bInterfaceSubClass; */
+ 0x00, /* __u8 if_bInterfaceProtocol; */
+ 0x00, /* __u8 if_iInterface; */
+
+ /* endpoint */
+ 0x07, /* __u8 ep_bLength; */
+ 0x05, /* __u8 ep_bDescriptorType; Endpoint */
+ 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */
+ 0x03, /* __u8 ep_bmAttributes; Interrupt */
+ 0x08, /* __u16 ep_wMaxPacketSize; 8 Bytes */
+ 0x00,
+ 0xff /* __u8 ep_bInterval; 255 ms */
+};
+
+static __u8 root_hub_hub_des[] =
+{
+ 0x09, /* __u8 bLength; */
+ 0x29, /* __u8 bDescriptorType; Hub-descriptor */
+ 0x02, /* __u8 bNbrPorts; */
+ 0x00, /* __u16 wHubCharacteristics; */
+ 0x00,
+ 0x01, /* __u8 bPwrOn2pwrGood; 2ms */
+ 0x00, /* __u8 bHubContrCurrent; 0 mA */
+ 0x00, /* __u8 DeviceRemovable; *** 7 Ports max *** */
+ 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */
+};
+
+/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
+static int rh_send_irq(struct urb *urb)
+{
+ struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+ xhci_port_t *ports = xhci->rh.ports;
+ unsigned long flags;
+ int i, len = 1;
+ __u16 data = 0;
+
+ spin_lock_irqsave(&urb->lock, flags);
+ for (i = 0; i < xhci->rh.numports; i++) {
+ /* Set a bit if anything at all has changed on the port, as per
+ * USB spec 11.12 */
+ data |= (ports[i].cs_chg || ports[i].pe_chg )
+ ? (1 << (i + 1))
+ : 0;
+
+ len = (i + 1) / 8 + 1;
+ }
+
+ *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
+ urb->actual_length = len;
+ urbp->status = 0;
+
+ spin_unlock_irqrestore(&urb->lock, flags);
+
+ if ((data > 0) && (xhci->rh.send != 0)) {
+ dbg("root-hub INT complete: data: %x", data);
+ xhci_call_completion(urb);
+ }
+
+ return 0;
+}
+
+/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
+static int rh_init_int_timer(struct urb *urb);
+
+static void rh_int_timer_do(unsigned long ptr)
+{
+ struct urb *urb = (struct urb *)ptr;
+ struct list_head list, *tmp, *head;
+ unsigned long flags;
+ int i;
+
+ for ( i = 0; i < xhci->rh.numports; i++)
+ xhci_queue_probe(i);
+
+ if (xhci->rh.send)
+ rh_send_irq(urb);
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+ struct urb_priv *up = (struct urb_priv *)u->hcpriv;
+
+ tmp = tmp->next;
+
+ spin_lock(&u->lock);
+
+ /* Check if the URB timed out */
+ if (u->timeout && time_after_eq(jiffies,
+ up->inserttime + u->timeout)) {
+ list_del(&u->urb_list);
+ list_add_tail(&u->urb_list, &list);
+ }
+
+ spin_unlock(&u->lock);
+ }
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ head = &list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
+ xhci_unlink_urb(u);
+ }
+
+ rh_init_int_timer(urb);
+}
+
+/* Root Hub INTs are polled by this timer */
+static int rh_init_int_timer(struct urb *urb)
+{
+ xhci->rh.interval = urb->interval;
+ init_timer(&xhci->rh.rh_int_timer);
+ xhci->rh.rh_int_timer.function = rh_int_timer_do;
+ xhci->rh.rh_int_timer.data = (unsigned long)urb;
+ xhci->rh.rh_int_timer.expires = jiffies
+ + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
+ add_timer(&xhci->rh.rh_int_timer);
+
+ return 0;
+}
+
+#define OK(x) len = (x); break
+
+/* Root Hub Control Pipe */
+static int rh_submit_urb(struct urb *urb)
+{
+ unsigned int pipe = urb->pipe;
+ struct usb_ctrlrequest *cmd =
+ (struct usb_ctrlrequest *)urb->setup_packet;
+ void *data = urb->transfer_buffer;
+ int leni = urb->transfer_buffer_length;
+ int len = 0;
+ xhci_port_t *status;
+ int stat = 0;
+ int i;
+ int retstatus;
+ unsigned long flags;
+
+ __u16 cstatus;
+ __u16 bmRType_bReq;
+ __u16 wValue;
+ __u16 wIndex;
+ __u16 wLength;
+
+ if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
+ xhci->rh.urb = urb;
+ xhci->rh.send = 1;
+ xhci->rh.interval = urb->interval;
+ rh_init_int_timer(urb);
+
+ return -EINPROGRESS;
+ }
+
+ bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
+ wValue = le16_to_cpu(cmd->wValue);
+ wIndex = le16_to_cpu(cmd->wIndex);
+ wLength = le16_to_cpu(cmd->wLength);
+
+ for (i = 0; i < 8; i++)
+ xhci->rh.c_p_r[i] = 0;
+
+ status = &xhci->rh.ports[wIndex - 1];
+
+ spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
+
+ switch (bmRType_bReq) {
+ /* Request Destination:
+ without flags: Device,
+ RH_INTERFACE: interface,
+ RH_ENDPOINT: endpoint,
+ RH_CLASS means HUB here,
+ RH_OTHER | RH_CLASS almost ever means HUB_PORT here
+ */
+
+ case RH_GET_STATUS:
+ *(__u16 *)data = cpu_to_le16(1);
+ OK(2);
+ case RH_GET_STATUS | RH_INTERFACE:
+ *(__u16 *)data = cpu_to_le16(0);
+ OK(2);
+ case RH_GET_STATUS | RH_ENDPOINT:
+ *(__u16 *)data = cpu_to_le16(0);
+ OK(2);
+ case RH_GET_STATUS | RH_CLASS:
+ *(__u32 *)data = cpu_to_le32(0);
+ OK(4); /* hub power */
+ case RH_GET_STATUS | RH_OTHER | RH_CLASS:
+ cstatus = (status->cs_chg) |
+ (status->pe_chg << 1) |
+ (xhci->rh.c_p_r[wIndex - 1] << 4);
+ retstatus = (status->cs) |
+ (status->pe << 1) |
+ (status->susp << 2) |
+ (1 << 8) | /* power on */
+ (status->lsda << 9);
+ *(__u16 *)data = cpu_to_le16(retstatus);
+ *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
+ OK(4);
+ case RH_CLEAR_FEATURE | RH_ENDPOINT:
+ switch (wValue) {
+ case RH_ENDPOINT_STALL:
+ OK(0);
+ }
+ break;
+ case RH_CLEAR_FEATURE | RH_CLASS:
+ switch (wValue) {
+ case RH_C_HUB_OVER_CURRENT:
+ OK(0); /* hub power over current */
+ }
+ break;
+ case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
+ switch (wValue) {
+ case RH_PORT_ENABLE:
+ status->pe = 0;
+ OK(0);
+ case RH_PORT_SUSPEND:
+ status->susp = 0;
+ OK(0);
+ case RH_PORT_POWER:
+ OK(0); /* port power */
+ case RH_C_PORT_CONNECTION:
+ status->cs_chg = 0;
+ OK(0);
+ case RH_C_PORT_ENABLE:
+ status->pe_chg = 0;
+ OK(0);
+ case RH_C_PORT_SUSPEND:
+ /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
+ OK(0);
+ case RH_C_PORT_OVER_CURRENT:
+ OK(0); /* port power over current */
+ case RH_C_PORT_RESET:
+ xhci->rh.c_p_r[wIndex - 1] = 0;
+ OK(0);
+ }
+ break;
+ case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
+ switch (wValue) {
+ case RH_PORT_SUSPEND:
+ status->susp = 1;
+ OK(0);
+ case RH_PORT_RESET:
+ {
+ int ret;
+ xhci->rh.c_p_r[wIndex - 1] = 1;
+ status->pr = 0;
+ status->pe = 1;
+ ret = xhci_port_reset(wIndex - 1);
+ /* XXX MAW: should probably cancel queued transfers during reset... *\/ */
+ if ( ret == 0 ) { OK(0); }
+ else { return ret; }
+ }
+ break;
+ case RH_PORT_POWER:
+ OK(0); /* port power ** */
+ case RH_PORT_ENABLE:
+ status->pe = 1;
+ OK(0);
+ }
+ break;
+ case RH_SET_ADDRESS:
+ xhci->rh.devnum = wValue;
+ OK(0);
+ case RH_GET_DESCRIPTOR:
+ switch ((wValue & 0xff00) >> 8) {
+ case 0x01: /* device descriptor */
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int,
+ sizeof(root_hub_dev_des), wLength));
+ memcpy(data, root_hub_dev_des, len);
+ OK(len);
+ case 0x02: /* configuration descriptor */
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int,
+ sizeof(root_hub_config_des), wLength));
+ memcpy (data, root_hub_config_des, len);
+ OK(len);
+ case 0x03: /* string descriptors */
+ len = usb_root_hub_string (wValue & 0xff,
+ 0, "XHCI-alt",
+ data, wLength);
+ if (len > 0) {
+ OK(min_t(int, leni, len));
+ } else
+ stat = -EPIPE;
+ }
+ break;
+ case RH_GET_DESCRIPTOR | RH_CLASS:
+ root_hub_hub_des[2] = xhci->rh.numports;
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int, sizeof(root_hub_hub_des), wLength));
+ memcpy(data, root_hub_hub_des, len);
+ OK(len);
+ case RH_GET_CONFIGURATION:
+ *(__u8 *)data = 0x01;
+ OK(1);
+ case RH_SET_CONFIGURATION:
+ OK(0);
+ case RH_GET_INTERFACE | RH_INTERFACE:
+ *(__u8 *)data = 0x00;
+ OK(1);
+ case RH_SET_INTERFACE | RH_INTERFACE:
+ OK(0);
+ default:
+ stat = -EPIPE;
+ }
+
+ spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
+
+ urb->actual_length = len;
+
+ return stat;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+static int rh_unlink_urb(struct urb *urb)
+{
+ if (xhci->rh.urb == urb) {
+ urb->status = -ENOENT;
+ xhci->rh.send = 0;
+ xhci->rh.urb = NULL;
+ del_timer(&xhci->rh.rh_int_timer);
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * CONTROL PLANE FUNCTIONALITY
+ */
+
+/**
+ * alloc_xhci - initialise a new virtual root hub for a new USB device channel
+ */
+static int alloc_xhci(void)
+{
+ int retval;
+ struct usb_bus *bus;
+
+ retval = -EBUSY;
+
+ xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
+ if (!xhci) {
+ err("couldn't allocate xhci structure");
+ retval = -ENOMEM;
+ goto err_alloc_xhci;
+ }
+
+ xhci->state = USBIF_STATE_CLOSED;
+
+ spin_lock_init(&xhci->urb_list_lock);
+ INIT_LIST_HEAD(&xhci->urb_list);
+
+ spin_lock_init(&xhci->complete_list_lock);
+ INIT_LIST_HEAD(&xhci->complete_list);
+
+ spin_lock_init(&xhci->frame_list_lock);
+
+ bus = usb_alloc_bus(&xhci_device_operations);
+
+ if (!bus) {
+ err("unable to allocate bus");
+ goto err_alloc_bus;
+ }
+
+ xhci->bus = bus;
+ bus->bus_name = "XHCI";
+ bus->hcpriv = xhci;
+
+ usb_register_bus(xhci->bus);
+
+ /* Initialize the root hub */
+
+ xhci->rh.numports = 0;
+
+ xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
+ if (!xhci->rh.dev) {
+ err("unable to allocate root hub");
+ goto err_alloc_root_hub;
+ }
+
+ xhci->state = 0;
+
+ return 0;
+
+/*
+ * error exits:
+ */
+err_alloc_root_hub:
+ usb_deregister_bus(xhci->bus);
+ usb_free_bus(xhci->bus);
+ xhci->bus = NULL;
+
+err_alloc_bus:
+ kfree(xhci);
+
+err_alloc_xhci:
+ return retval;
+}
+
+/**
+ * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
+ */
+static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
+{
+ ctrl_msg_t cmsg;
+ usbif_fe_interface_connect_t up;
+ long rc;
+ usbif_sring_t *sring;
+
+ switch ( status->status )
+ {
+ case USBIF_INTERFACE_STATUS_DESTROYED:
+ printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
+ xhci->state);
+ break;
+
+ case USBIF_INTERFACE_STATUS_DISCONNECTED:
+ if ( xhci->state != USBIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
+ " in state %d\n", xhci->state);
+ break;
+ /* Not bothering to do recovery here for now. Keep things
+ * simple. */
+
+ spin_lock_irq(&xhci->ring_lock);
+
+ /* Clean up resources. */
+ free_page((unsigned long)xhci->usb_ring.sring);
+ free_irq(xhci->irq, xhci);
+ unbind_evtchn_from_irq(xhci->evtchn);
+
+ /* Plug the ring. */
+ xhci->recovery = 1;
+ wmb();
+
+ spin_unlock_irq(&xhci->ring_lock);
+ }
+
+ /* Move from CLOSED to DISCONNECTED state. */
+ sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&xhci->usb_ring, sring, PAGE_SIZE);
+ xhci->state = USBIF_STATE_DISCONNECTED;
+
+ /* Construct an interface-CONNECT message for the domain controller. */
+ cmsg.type = CMSG_USBIF_FE;
+ cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(usbif_fe_interface_connect_t);
+ up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+ memcpy(cmsg.msg, &up, sizeof(up));
+
+ /* Tell the controller to bring up the interface. */
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ break;
+
+ case USBIF_INTERFACE_STATUS_CONNECTED:
+ if ( xhci->state == USBIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
+ " in state %d\n", xhci->state);
+ break;
+ }
+
+ xhci->evtchn = status->evtchn;
+ xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
+ xhci->bandwidth = status->bandwidth;
+ xhci->rh.numports = status->num_ports;
+
+ xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL);
+
+ if ( xhci->rh.ports == NULL )
+ goto alloc_ports_nomem;
+
+ memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
+
+ usb_connect(xhci->rh.dev);
+
+ if (usb_new_device(xhci->rh.dev) != 0) {
+ err("unable to start root hub");
+ }
+
+ /* Allocate the appropriate USB bandwidth here... Need to
+ * somehow know what the total available is thought to be so we
+ * can calculate the reservation correctly. */
+ usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
+ 1000 - xhci->bandwidth, 0);
+
+ if ( (rc = request_irq(xhci->irq, xhci_interrupt,
+ SA_SAMPLE_RANDOM, "usbif", xhci)) )
+ printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
+
+ DPRINTK(KERN_INFO __FILE__
+ ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
+ xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
+ xhci->evtchn, xhci->irq);
+
+ xhci->state = USBIF_STATE_CONNECTED;
+
+ break;
+
+ default:
+ printk(KERN_WARNING "Status change to unknown value %d\n",
+ status->status);
+ break;
+ }
+
+ return;
+
+ alloc_ports_nomem:
+ printk(KERN_WARNING "Failed to allocate port memory, XHCI failed to connect.\n");
+ return;
+}
+
+/**
+ * usbif_ctrlif_rx - demux control messages by subtype
+ */
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
+ usbif_status_change((usbif_fe_interface_status_changed_t *)
+ &msg->msg[0]);
+ break;
+
+ /* New interface...? */
+ default:
+ msg->length = 0;
+ break;
+ }
+
+ ctrl_if_send_response(msg);
+}
+
+static void send_driver_up(void)
+{
+ control_msg_t cmsg;
+ usbif_fe_interface_status_changed_t st;
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_USBIF_FE;
+ cmsg.subtype = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(usbif_fe_driver_status_changed_t);
+ st.status = USBIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+void usbif_resume(void)
+{
+ int i;
+
+ /* Fake disconnection on all virtual USB ports (suspending / migrating
+ * will destroy hard state associated will the USB devices anyhow). */
+ /* No need to lock here. */
+ for ( i = 0; i < xhci->rh.numports; i++ )
+ {
+ xhci->rh.ports[i].cs = 0;
+ xhci->rh.ports[i].cs_chg = 1;
+ xhci->rh.ports[i].pe = 0;
+ }
+
+ send_driver_up();
+}
+
+static int __init xhci_hcd_init(void)
+{
+ int retval = -ENOMEM, i;
+
+ if ( (xen_start_info.flags & SIF_INITDOMAIN)
+ || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+ return 0;
+
+ info(DRIVER_DESC " " DRIVER_VERSION);
+
+ if (debug) {
+ errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
+ if (!errbuf)
+ goto errbuf_failed;
+ }
+
+ xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
+ sizeof(struct urb_priv), 0, 0, NULL, NULL);
+ if (!xhci_up_cachep)
+ goto up_failed;
+
+ /* Let the domain controller know we're here. For now we wait until
+ * connection, as for the block and net drivers. This is only strictly
+ * necessary if we're going to boot off a USB device. */
+ printk(KERN_INFO "Initialising Xen virtual USB hub\n");
+
+ (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ alloc_xhci();
+
+ send_driver_up();
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ if (xhci->state != USBIF_STATE_CONNECTED)
+ printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
+
+ return 0;
+
+up_failed:
+ if (errbuf)
+ kfree(errbuf);
+
+errbuf_failed:
+ return retval;
+}
+
+module_init(xhci_hcd_init);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
new file mode 100644
index 0000000000..b42e860e2c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
@@ -0,0 +1,183 @@
+/******************************************************************************
+ * xhci.h
+ *
+ * Private definitions for the Xen Virtual USB Controller. Based on
+ * drivers/usb/host/uhci.h from Linux. Copyright for the imported content is
+ * retained by the original authors.
+ *
+ * Modifications are:
+ * Copyright (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2004, 2005 Mark Williamson
+ */
+
+#ifndef __LINUX_XHCI_H
+#define __LINUX_XHCI_H
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <asm-xen/xen-public/io/usbif.h>
+#include <linux/spinlock.h>
+
+/* xhci_port_t - current known state of a virtual hub ports */
+typedef struct {
+ unsigned int cs :1; /* Connection status. */
+ unsigned int cs_chg :1; /* Connection status change. */
+ unsigned int pe :1; /* Port enable. */
+ unsigned int pe_chg :1; /* Port enable change. */
+ unsigned int susp :1; /* Suspended. */
+ unsigned int lsda :1; /* Low speed device attached. */
+ unsigned int pr :1; /* Port reset. */
+} xhci_port_t;
+
+/* struct virt_root_hub - state related to the virtual root hub */
+struct virt_root_hub {
+ struct usb_device *dev;
+ int devnum; /* Address of Root Hub endpoint */
+ struct urb *urb;
+ void *int_addr;
+ int send;
+ int interval;
+ int numports;
+ int c_p_r[8];
+ struct timer_list rh_int_timer;
+ spinlock_t port_state_lock;
+ xhci_port_t *ports;
+};
+
+/* struct xhci - contains the state associated with a single USB interface */
+struct xhci {
+
+#ifdef CONFIG_PROC_FS
+ /* procfs */
+ int num;
+ struct proc_dir_entry *proc_entry;
+#endif
+
+ int evtchn; /* Interdom channel to backend */
+ int irq; /* Bound to evtchn */
+ enum {
+ USBIF_STATE_CONNECTED = 2,
+ USBIF_STATE_DISCONNECTED = 1,
+ USBIF_STATE_CLOSED = 0
+ } state; /* State of this USB interface */
+ unsigned long recovery; /* boolean recovery in progress flag */
+
+ unsigned long bandwidth;
+
+ struct usb_bus *bus;
+
+ /* Main list of URB's currently controlled by this HC */
+ spinlock_t urb_list_lock;
+ struct list_head urb_list; /* P: xhci->urb_list_lock */
+
+ /* List of URB's awaiting completion callback */
+ spinlock_t complete_list_lock;
+ struct list_head complete_list; /* P: xhci->complete_list_lock */
+
+ struct virt_root_hub rh; /* private data of the virtual root hub */
+
+ spinlock_t ring_lock;
+ usbif_front_ring_t usb_ring;
+
+ int awaiting_reset;
+};
+
+/* per-URB private data structure for the host controller */
+struct urb_priv {
+ struct urb *urb;
+ usbif_iso_t *schedule;
+ struct usb_device *dev;
+
+ int in_progress : 1; /* QH was queued (not linked in) */
+ int short_control_packet : 1; /* If we get a short packet during */
+ /* a control transfer, retrigger */
+ /* the status phase */
+
+ int status; /* Final status */
+
+ unsigned long inserttime; /* In jiffies */
+
+ struct list_head complete_list; /* P: xhci->complete_list_lock */
+};
+
+/*
+ * Locking in xhci.c
+ *
+ * spinlocks are used extensively to protect the many lists and data
+ * structures we have. It's not that pretty, but it's necessary. We
+ * need to be done with all of the locks (except complete_list_lock) when
+ * we call urb->complete. I've tried to make it simple enough so I don't
+ * have to spend hours racking my brain trying to figure out if the
+ * locking is safe.
+ *
+ * Here's the safe locking order to prevent deadlocks:
+ *
+ * #1 xhci->urb_list_lock
+ * #2 urb->lock
+ * #3 xhci->urb_remove_list_lock
+ * #4 xhci->complete_list_lock
+ *
+ * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
+ * at the lowest level FIRST and NEVER grab locks at the same level at the
+ * same time.
+ *
+ * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
+ */
+
+/* -------------------------------------------------------------------------
+ Virtual Root HUB
+ ------------------------------------------------------------------------- */
+/* destination of request */
+#define RH_DEVICE 0x00
+#define RH_INTERFACE 0x01
+#define RH_ENDPOINT 0x02
+#define RH_OTHER 0x03
+
+#define RH_CLASS 0x20
+#define RH_VENDOR 0x40
+
+/* Requests: bRequest << 8 | bmRequestType */
+#define RH_GET_STATUS 0x0080
+#define RH_CLEAR_FEATURE 0x0100
+#define RH_SET_FEATURE 0x0300
+#define RH_SET_ADDRESS 0x0500
+#define RH_GET_DESCRIPTOR 0x0680
+#define RH_SET_DESCRIPTOR 0x0700
+#define RH_GET_CONFIGURATION 0x0880
+#define RH_SET_CONFIGURATION 0x0900
+#define RH_GET_STATE 0x0280
+#define RH_GET_INTERFACE 0x0A80
+#define RH_SET_INTERFACE 0x0B00
+#define RH_SYNC_FRAME 0x0C80
+/* Our Vendor Specific Request */
+#define RH_SET_EP 0x2000
+
+/* Hub port features */
+#define RH_PORT_CONNECTION 0x00
+#define RH_PORT_ENABLE 0x01
+#define RH_PORT_SUSPEND 0x02
+#define RH_PORT_OVER_CURRENT 0x03
+#define RH_PORT_RESET 0x04
+#define RH_PORT_POWER 0x08
+#define RH_PORT_LOW_SPEED 0x09
+#define RH_C_PORT_CONNECTION 0x10
+#define RH_C_PORT_ENABLE 0x11
+#define RH_C_PORT_SUSPEND 0x12
+#define RH_C_PORT_OVER_CURRENT 0x13
+#define RH_C_PORT_RESET 0x14
+
+/* Hub features */
+#define RH_C_HUB_LOCAL_POWER 0x00
+#define RH_C_HUB_OVER_CURRENT 0x01
+#define RH_DEVICE_REMOTE_WAKEUP 0x00
+#define RH_ENDPOINT_STALL 0x01
+
+/* Our Vendor Specific feature */
+#define RH_REMOVE_EP 0x00
+
+#define RH_ACK 0x01
+#define RH_REQ_ERR -1
+#define RH_NACK 0x00
+
+#endif
+
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/bugs.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/bugs.h
deleted file mode 100644
index dde78e3616..0000000000
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/bugs.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * include/asm-i386/bugs.h
- *
- * Copyright (C) 1994 Linus Torvalds
- *
- * Cyrix stuff, June 1998 by:
- * - Rafael R. Reilova (moved everything from head.S),
- * <rreilova@ececs.uc.edu>
- * - Channing Corn (tests & fixes),
- * - Andrew D. Balsa (code cleanup).
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- */
-
-#include <linux/config.h>
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/i387.h>
-#include <asm/msr.h>
-
-static int __init no_halt(char *s)
-{
- boot_cpu_data.hlt_works_ok = 0;
- return 1;
-}
-
-__setup("no-hlt", no_halt);
-
-static int __init mca_pentium(char *s)
-{
- mca_pentium_flag = 1;
- return 1;
-}
-
-__setup("mca-pentium", mca_pentium);
-
-static int __init no_387(char *s)
-{
- boot_cpu_data.hard_math = 0;
- write_cr0(0xE | read_cr0());
- return 1;
-}
-
-__setup("no387", no_387);
-
-static double __initdata x = 4195835.0;
-static double __initdata y = 3145727.0;
-
-/*
- * This used to check for exceptions..
- * However, it turns out that to support that,
- * the XMM trap handlers basically had to
- * be buggy. So let's have a correct XMM trap
- * handler, and forget about printing out
- * some status at boot.
- *
- * We should really only care about bugs here
- * anyway. Not features.
- */
-static void __init check_fpu(void)
-{
- if (!boot_cpu_data.hard_math) {
-#ifndef CONFIG_MATH_EMULATION
- printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
- printk(KERN_EMERG "Giving up.\n");
- for (;;) ;
-#endif
- return;
- }
-
-/* Enable FXSR and company _before_ testing for FP problems. */
- /*
- * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
- */
- if (offsetof(struct task_struct, thread.i387.fxsave) & 15) {
- extern void __buggy_fxsr_alignment(void);
- __buggy_fxsr_alignment();
- }
- if (cpu_has_fxsr) {
- printk(KERN_INFO "Enabling fast FPU save and restore... ");
- set_in_cr4(X86_CR4_OSFXSR);
- printk("done.\n");
- }
- if (cpu_has_xmm) {
- printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... ");
- set_in_cr4(X86_CR4_OSXMMEXCPT);
- printk("done.\n");
- }
-
- /* Test for the divl bug.. */
- __asm__("fninit\n\t"
- "fldl %1\n\t"
- "fdivl %2\n\t"
- "fmull %2\n\t"
- "fldl %1\n\t"
- "fsubp %%st,%%st(1)\n\t"
- "fistpl %0\n\t"
- "fwait\n\t"
- "fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
- : "m" (*&x), "m" (*&y));
- stts();
- if (boot_cpu_data.fdiv_bug)
- printk("Hmm, FPU with FDIV bug.\n");
-}
-
-static void __init check_hlt(void)
-{
- printk(KERN_INFO "Checking 'hlt' instruction... ");
- if (!boot_cpu_data.hlt_works_ok) {
- printk("disabled\n");
- return;
- }
- __asm__ __volatile__("hlt ; hlt ; hlt ; hlt");
- printk("OK.\n");
-}
-
-/*
- * Most 386 processors have a bug where a POPAD can lock the
- * machine even from user space.
- */
-
-static void __init check_popad(void)
-{
-#ifndef CONFIG_X86_POPAD_OK
- int res, inp = (int) &res;
-
- printk(KERN_INFO "Checking for popad bug... ");
- __asm__ __volatile__(
- "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
- : "=&a" (res)
- : "d" (inp)
- : "ecx", "edi" );
- /* If this fails, it means that any user program may lock the CPU hard. Too bad. */
- if (res != 12345678) printk( "Buggy.\n" );
- else printk( "OK.\n" );
-#endif
-}
-
-/*
- * Check whether we are able to run this kernel safely on SMP.
- *
- * - In order to run on a i386, we need to be compiled for i386
- * (for due to lack of "invlpg" and working WP on a i386)
- * - In order to run on anything without a TSC, we need to be
- * compiled for a i486.
- * - In order to support the local APIC on a buggy Pentium machine,
- * we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
- * which happens implicitly if compiled for a Pentium or lower
- * (unless an advanced selection of CPU features is used) as an
- * otherwise config implies a properly working local APIC without
- * the need to do extra reads from the APIC.
-*/
-
-static void __init check_config(void)
-{
-/*
- * We'd better not be a i386 if we're configured to use some
- * i486+ only features! (WP works in supervisor mode and the
- * new "invlpg" and "bswap" instructions)
- */
-#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
- if (boot_cpu_data.x86 == 3)
- panic("Kernel requires i486+ for 'invlpg' and other features");
-#endif
-
-/*
- * If we configured ourselves for a TSC, we'd better have one!
- */
-#ifdef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- panic("Kernel compiled for Pentium+, requires TSC feature!");
-#endif
-
-/*
- * If we were told we had a good local APIC, check for buggy Pentia,
- * i.e. all B steppings and the C2 stepping of P54C when using their
- * integrated APIC (see 11AP erratum in "Pentium Processor
- * Specification Update").
- */
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
- && cpu_has_apic
- && boot_cpu_data.x86 == 5
- && boot_cpu_data.x86_model == 2
- && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
- panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
-#endif
-}
-
-extern void alternative_instructions(void);
-
-static void __init check_bugs(void)
-{
- identify_cpu(&boot_cpu_data);
-#ifndef CONFIG_SMP
- printk("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- check_config();
- check_fpu();
- check_hlt();
- check_popad();
- system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
- alternative_instructions();
-}
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
index af9c5b2a9e..85f022109c 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
@@ -103,7 +103,7 @@ static inline void clear_LDT(void)
* it slows down context switching. Noone uses it anyway.
*/
cpu = cpu; /* XXX avoid compiler warning */
- queue_set_ldt(0UL, 0);
+ xen_set_ldt(0UL, 0);
put_cpu();
}
@@ -118,14 +118,13 @@ static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
if (likely(!count))
segments = NULL;
- queue_set_ldt((unsigned long)segments, count);
+ xen_set_ldt((unsigned long)segments, count);
}
static inline void load_LDT(mm_context_t *pc)
{
int cpu = get_cpu();
load_LDT_nolock(pc, cpu);
- flush_page_update_queue();
put_cpu();
}
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
index ee7e4aeeba..2bd859ff5f 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
@@ -27,6 +27,7 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm-xen/gnttab.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -79,11 +80,14 @@ enum fixed_addresses {
#ifdef CONFIG_ACPI_BOOT
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+ FIX_ACPI_RSDP_PAGE,
#endif
#ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
#endif
FIX_SHARED_INFO,
+ FIX_GNTTAB_BEGIN,
+ FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
#define NR_FIX_ISAMAPS 256
FIX_ISAMAP_END,
@@ -100,15 +104,9 @@ enum fixed_addresses {
extern void __set_fixmap (enum fixed_addresses idx,
unsigned long phys, pgprot_t flags);
-extern void __set_fixmap_ma (enum fixed_addresses idx,
- unsigned long mach, pgprot_t flags);
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
-#define set_fixmap_ma(idx, phys) \
- __set_fixmap_ma(idx, phys, PAGE_KERNEL)
-#define set_fixmap_ma_ro(idx, phys) \
- __set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
/*
* Some hardware wants to get fixmapped without caching.
*/
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
new file mode 100644
index 0000000000..13ab9c3fde
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
@@ -0,0 +1,520 @@
+/******************************************************************************
+ * hypercall.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __HYPERCALL_H__
+#define __HYPERCALL_H__
+#include <asm-xen/xen-public/xen.h>
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+static inline int
+HYPERVISOR_set_trap_table(
+ trap_info_t *table)
+{
+ int ret;
+ unsigned long ignore;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_set_trap_table), "1" (table)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
+ "3" (success_count), "4" (domid)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+ unsigned long *frame_list, int entries)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
+ : "memory" );
+
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+ unsigned long ss, unsigned long esp)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
+ "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+ int set)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_yield(
+ void)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_block(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_reboot(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ /* NB. On suspend, control software expects a suspend record in %esi. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=S" (ign2)
+ : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
+ "S" (srec) : "memory");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_crash(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+ u64 timeout)
+{
+ int ret;
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi)
+ : "memory");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom0_op(
+ dom0_op_t *dom0_op)
+{
+ int ret;
+ unsigned long ign1;
+
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
+ : "memory");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_debugreg(
+ int reg, unsigned long value)
+{
+ int ret;
+ unsigned long ign1, ign2;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
+ : "memory" );
+
+ return ret;
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+ int reg)
+{
+ unsigned long ret;
+ unsigned long ign;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+ unsigned long ma, unsigned long word1, unsigned long word2)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_update_descriptor), "1" (ma), "2" (word1),
+ "3" (word2)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom_mem_op(
+ unsigned int op, unsigned long *extent_list,
+ unsigned long nr_extents, unsigned int extent_order)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4, ign5;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
+ "=D" (ign5)
+ : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
+ "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_multicall(
+ void *call_list, int nr_calls)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long va, pte_t new_val, unsigned long flags)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_update_va_mapping),
+ "1" (va), "2" ((new_val).pte_low), "3" (flags)
+ : "memory" );
+
+ if ( unlikely(ret < 0) )
+ {
+ printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
+ va, (new_val).pte_low, flags);
+ BUG();
+ }
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+ void *op)
+{
+ int ret;
+ unsigned long ignore;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_event_channel_op), "1" (op)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_xen_version(
+ int cmd)
+{
+ int ret;
+ unsigned long ignore;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_xen_version), "1" (cmd)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_console_io(
+ int cmd, int count, char *str)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+ void *physdev_op)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
+ "1" (va), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
+ "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+ unsigned int cmd, unsigned int type)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_boot_vcpu(
+ unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+ : "memory");
+
+ return ret;
+}
+
+#endif /* __HYPERCALL_H__ */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
index 30f4d88b62..88c5faaf84 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
@@ -34,6 +34,7 @@
* Vectors 0x20-0x2f are used for ISA interrupts.
*/
+#if 0
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
*
@@ -56,6 +57,10 @@
* sources per level' errata.
*/
#define LOCAL_TIMER_VECTOR 0xef
+#endif
+
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
/*
* First APIC vector available to drivers: (vectors 0x30-0xee)
@@ -65,8 +70,6 @@
#define FIRST_DEVICE_VECTOR 0x31
#define FIRST_SYSTEM_VECTOR 0xef
-#define TIMER_IRQ timer_irq
-
/*
* 16 8259A IRQ's, 208 potential APIC interrupt sources.
* Right now the APIC is mostly only used for SMP.
@@ -77,14 +80,18 @@
* the usable vector space is 0x20-0xff (224 vectors)
*/
-#if 0
+#define NR_IPIS 8
+
+#define RESCHEDULE_VECTOR 1
+#define INVALIDATE_TLB_VECTOR 2
+#define CALL_FUNCTION_VECTOR 3
+
/*
* The maximum number of vectors supported by i386 processors
* is limited to 256. For processors other than i386, NR_VECTORS
* should be changed accordingly.
*/
#define NR_VECTORS 256
-#endif
#define FPU_IRQ 13
@@ -103,10 +110,10 @@
*/
#define PIRQ_BASE 0
-#define NR_PIRQS 128
+#define NR_PIRQS 256
#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
-#define NR_DYNIRQS 128
+#define NR_DYNIRQS 256
#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
#define NR_IRQ_VECTORS NR_IRQS
@@ -121,6 +128,8 @@
/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
extern int bind_virq_to_irq(int virq);
extern void unbind_virq_from_irq(int virq);
+extern int bind_ipi_on_cpu_to_irq(int cpu, int ipi);
+extern void unbind_ipi_on_cpu_from_irq(int cpu, int ipi);
extern int bind_evtchn_to_irq(int evtchn);
extern void unbind_evtchn_from_irq(int evtchn);
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
index 2bfd4df069..78886d339c 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
@@ -13,7 +13,12 @@ static char * __init machine_specific_memory_setup(void)
who = "Xen";
- start_pfn = 0;
+ /* In dom0, we have to start the fake e820 map above the first
+ * 1MB, in other domains, it can start at 0. */
+ if (xen_start_info.flags & SIF_INITDOMAIN)
+ start_pfn = 0x100;
+ else
+ start_pfn = 0;
max_pfn = xen_start_info.nr_pages;
e820.nr_map = 0;
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
new file mode 100644
index 0000000000..28adeaf244
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
@@ -0,0 +1,55 @@
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ io_apic_irqs = 0;
+#endif
+}
+
+static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+{
+#if 1
+ printk("smpboot_setup_warm_reset_vector\n");
+#else
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+#endif
+}
+
+static inline void smpboot_restore_warm_reset_vector(void)
+{
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+}
+
+static inline void smpboot_setup_io_apic(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+}
+
+
+#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h
new file mode 100644
index 0000000000..b628b46f3b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h
@@ -0,0 +1,26 @@
+#ifndef __i386_MMU_H
+#define __i386_MMU_H
+
+#include <asm/semaphore.h>
+/*
+ * The i386 doesn't have a mmu context, but
+ * we put the segment information here.
+ *
+ * cpu_vm_mask is used to optimize ldt flushing.
+ */
+typedef struct {
+ int size;
+ struct semaphore sem;
+ void *ldt;
+ unsigned pinned:1;
+ struct list_head unpinned;
+} mm_context_t;
+
+extern struct list_head mm_unpinned;
+extern spinlock_t mm_unpinned_lock;
+
+/* mm/memory.c:exit_mmap hook */
+extern void _arch_exit_mmap(struct mm_struct *mm);
+#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
index a815fad09d..f46144e37f 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
@@ -16,38 +16,74 @@ void destroy_context(struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
-#ifdef CONFIG_SMP
+#if 0 /* XEN: no lazy tlb */
unsigned cpu = smp_processor_id();
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
#endif
}
+#define prepare_arch_switch(rq,next) __prepare_arch_switch()
+#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
+#define task_running(rq, p) ((rq)->curr == (p))
+
+static inline void __prepare_arch_switch(void)
+{
+ /*
+ * Save away %fs and %gs. No need to save %es and %ds, as those
+ * are always kernel segments while inside the kernel. Must
+ * happen before reload of cr3/ldt (i.e., not in __switch_to).
+ */
+ __asm__ __volatile__ ( "movl %%fs,%0 ; movl %%gs,%1"
+ : "=m" (*(int *)&current->thread.fs),
+ "=m" (*(int *)&current->thread.gs));
+ __asm__ __volatile__ ( "movl %0,%%fs ; movl %0,%%gs"
+ : : "r" (0) );
+}
+
+extern void mm_pin(struct mm_struct *mm);
+extern void mm_unpin(struct mm_struct *mm);
+void mm_pin_all(void);
+
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
int cpu = smp_processor_id();
+ struct mmuext_op _op[2], *op = _op;
if (likely(prev != next)) {
+ if (!next->context.pinned)
+ mm_pin(next);
+
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
+#if 0 /* XEN: no lazy tlb */
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
per_cpu(cpu_tlbstate, cpu).active_mm = next;
#endif
cpu_set(cpu, next->cpu_vm_mask);
- /* Re-load page tables */
- load_cr3(next->pgd);
+ /* Re-load page tables: load_cr3(next->pgd) */
+ per_cpu(cur_pgd, cpu) = next->pgd;
+ op->cmd = MMUEXT_NEW_BASEPTR;
+ op->mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+ op++;
/*
* load the LDT, if the LDT is different:
*/
- if (unlikely(prev->context.ldt != next->context.ldt))
- load_LDT_nolock(&next->context, cpu);
+ if (unlikely(prev->context.ldt != next->context.ldt)) {
+ /* load_LDT_nolock(&next->context, cpu) */
+ op->cmd = MMUEXT_SET_LDT;
+ op->linear_addr = (unsigned long)next->context.ldt;
+ op->nr_ents = next->context.size;
+ op++;
+ }
+
+ BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
}
-#ifdef CONFIG_SMP
+#if 0 /* XEN: no lazy tlb */
else {
per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
@@ -66,9 +102,7 @@ static inline void switch_mm(struct mm_struct *prev,
#define deactivate_mm(tsk, mm) \
asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
-#define activate_mm(prev, next) do { \
- switch_mm((prev),(next),NULL); \
- flush_page_update_queue(); \
-} while (0)
+#define activate_mm(prev, next) \
+ switch_mm((prev),(next),NULL)
#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h
deleted file mode 100644
index 4cc4f318ba..0000000000
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h
+++ /dev/null
@@ -1,272 +0,0 @@
-#ifndef __ASM_MSR_H
-#define __ASM_MSR_H
-
-#include <linux/smp.h>
-#include <asm-xen/hypervisor.h>
-
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
-#define rdmsr(_msr,_val1,_val2) do { \
- dom0_op_t op; \
- op.cmd = DOM0_MSR; \
- op.u.msr.write = 0; \
- op.u.msr.msr = (_msr); \
- op.u.msr.cpu_mask = (1 << smp_processor_id()); \
- HYPERVISOR_dom0_op(&op); \
- (_val1) = op.u.msr.out1; \
- (_val2) = op.u.msr.out2; \
-} while(0)
-
-#define wrmsr(_msr,_val1,_val2) do { \
- dom0_op_t op; \
- op.cmd = DOM0_MSR; \
- op.u.msr.write = 1; \
- op.u.msr.cpu_mask = (1 << smp_processor_id()); \
- op.u.msr.msr = (_msr); \
- op.u.msr.in1 = (_val1); \
- op.u.msr.in2 = (_val2); \
- HYPERVISOR_dom0_op(&op); \
-} while(0)
-
-#define rdmsrl(msr,val) do { \
- unsigned long l__,h__; \
- rdmsr (msr, l__, h__); \
- val = l__; \
- val |= ((u64)h__<<32); \
-} while(0)
-
-static inline void wrmsrl (unsigned long msr, unsigned long long val)
-{
- unsigned long lo, hi;
- lo = (unsigned long) val;
- hi = val >> 32;
- wrmsr (msr, lo, hi);
-}
-
-#define rdtsc(low,high) \
- __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
-
-#define rdtscl(low) \
- __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
-
-#define rdtscll(val) \
- __asm__ __volatile__("rdtsc" : "=A" (val))
-
-#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
-
-#define rdpmc(counter,low,high) \
- __asm__ __volatile__("rdpmc" \
- : "=a" (low), "=d" (high) \
- : "c" (counter))
-
-/* symbolic names for some interesting MSRs */
-/* Intel defined MSRs. */
-#define MSR_IA32_P5_MC_ADDR 0
-#define MSR_IA32_P5_MC_TYPE 1
-#define MSR_IA32_PLATFORM_ID 0x17
-#define MSR_IA32_EBL_CR_POWERON 0x2a
-
-#define MSR_IA32_APICBASE 0x1b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
-#define MSR_IA32_UCODE_WRITE 0x79
-#define MSR_IA32_UCODE_REV 0x8b
-
-#define MSR_P6_PERFCTR0 0xc1
-#define MSR_P6_PERFCTR1 0xc2
-
-#define MSR_IA32_BBL_CR_CTL 0x119
-
-#define MSR_IA32_SYSENTER_CS 0x174
-#define MSR_IA32_SYSENTER_ESP 0x175
-#define MSR_IA32_SYSENTER_EIP 0x176
-
-#define MSR_IA32_MCG_CAP 0x179
-#define MSR_IA32_MCG_STATUS 0x17a
-#define MSR_IA32_MCG_CTL 0x17b
-
-/* P4/Xeon+ specific */
-#define MSR_IA32_MCG_EAX 0x180
-#define MSR_IA32_MCG_EBX 0x181
-#define MSR_IA32_MCG_ECX 0x182
-#define MSR_IA32_MCG_EDX 0x183
-#define MSR_IA32_MCG_ESI 0x184
-#define MSR_IA32_MCG_EDI 0x185
-#define MSR_IA32_MCG_EBP 0x186
-#define MSR_IA32_MCG_ESP 0x187
-#define MSR_IA32_MCG_EFLAGS 0x188
-#define MSR_IA32_MCG_EIP 0x189
-#define MSR_IA32_MCG_RESERVED 0x18A
-
-#define MSR_P6_EVNTSEL0 0x186
-#define MSR_P6_EVNTSEL1 0x187
-
-#define MSR_IA32_PERF_STATUS 0x198
-#define MSR_IA32_PERF_CTL 0x199
-
-#define MSR_IA32_THERM_CONTROL 0x19a
-#define MSR_IA32_THERM_INTERRUPT 0x19b
-#define MSR_IA32_THERM_STATUS 0x19c
-#define MSR_IA32_MISC_ENABLE 0x1a0
-
-#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
-
-#define MSR_IA32_MC0_CTL 0x400
-#define MSR_IA32_MC0_STATUS 0x401
-#define MSR_IA32_MC0_ADDR 0x402
-#define MSR_IA32_MC0_MISC 0x403
-
-/* Pentium IV performance counter MSRs */
-#define MSR_P4_BPU_PERFCTR0 0x300
-#define MSR_P4_BPU_PERFCTR1 0x301
-#define MSR_P4_BPU_PERFCTR2 0x302
-#define MSR_P4_BPU_PERFCTR3 0x303
-#define MSR_P4_MS_PERFCTR0 0x304
-#define MSR_P4_MS_PERFCTR1 0x305
-#define MSR_P4_MS_PERFCTR2 0x306
-#define MSR_P4_MS_PERFCTR3 0x307
-#define MSR_P4_FLAME_PERFCTR0 0x308
-#define MSR_P4_FLAME_PERFCTR1 0x309
-#define MSR_P4_FLAME_PERFCTR2 0x30a
-#define MSR_P4_FLAME_PERFCTR3 0x30b
-#define MSR_P4_IQ_PERFCTR0 0x30c
-#define MSR_P4_IQ_PERFCTR1 0x30d
-#define MSR_P4_IQ_PERFCTR2 0x30e
-#define MSR_P4_IQ_PERFCTR3 0x30f
-#define MSR_P4_IQ_PERFCTR4 0x310
-#define MSR_P4_IQ_PERFCTR5 0x311
-#define MSR_P4_BPU_CCCR0 0x360
-#define MSR_P4_BPU_CCCR1 0x361
-#define MSR_P4_BPU_CCCR2 0x362
-#define MSR_P4_BPU_CCCR3 0x363
-#define MSR_P4_MS_CCCR0 0x364
-#define MSR_P4_MS_CCCR1 0x365
-#define MSR_P4_MS_CCCR2 0x366
-#define MSR_P4_MS_CCCR3 0x367
-#define MSR_P4_FLAME_CCCR0 0x368
-#define MSR_P4_FLAME_CCCR1 0x369
-#define MSR_P4_FLAME_CCCR2 0x36a
-#define MSR_P4_FLAME_CCCR3 0x36b
-#define MSR_P4_IQ_CCCR0 0x36c
-#define MSR_P4_IQ_CCCR1 0x36d
-#define MSR_P4_IQ_CCCR2 0x36e
-#define MSR_P4_IQ_CCCR3 0x36f
-#define MSR_P4_IQ_CCCR4 0x370
-#define MSR_P4_IQ_CCCR5 0x371
-#define MSR_P4_ALF_ESCR0 0x3ca
-#define MSR_P4_ALF_ESCR1 0x3cb
-#define MSR_P4_BPU_ESCR0 0x3b2
-#define MSR_P4_BPU_ESCR1 0x3b3
-#define MSR_P4_BSU_ESCR0 0x3a0
-#define MSR_P4_BSU_ESCR1 0x3a1
-#define MSR_P4_CRU_ESCR0 0x3b8
-#define MSR_P4_CRU_ESCR1 0x3b9
-#define MSR_P4_CRU_ESCR2 0x3cc
-#define MSR_P4_CRU_ESCR3 0x3cd
-#define MSR_P4_CRU_ESCR4 0x3e0
-#define MSR_P4_CRU_ESCR5 0x3e1
-#define MSR_P4_DAC_ESCR0 0x3a8
-#define MSR_P4_DAC_ESCR1 0x3a9
-#define MSR_P4_FIRM_ESCR0 0x3a4
-#define MSR_P4_FIRM_ESCR1 0x3a5
-#define MSR_P4_FLAME_ESCR0 0x3a6
-#define MSR_P4_FLAME_ESCR1 0x3a7
-#define MSR_P4_FSB_ESCR0 0x3a2
-#define MSR_P4_FSB_ESCR1 0x3a3
-#define MSR_P4_IQ_ESCR0 0x3ba
-#define MSR_P4_IQ_ESCR1 0x3bb
-#define MSR_P4_IS_ESCR0 0x3b4
-#define MSR_P4_IS_ESCR1 0x3b5
-#define MSR_P4_ITLB_ESCR0 0x3b6
-#define MSR_P4_ITLB_ESCR1 0x3b7
-#define MSR_P4_IX_ESCR0 0x3c8
-#define MSR_P4_IX_ESCR1 0x3c9
-#define MSR_P4_MOB_ESCR0 0x3aa
-#define MSR_P4_MOB_ESCR1 0x3ab
-#define MSR_P4_MS_ESCR0 0x3c0
-#define MSR_P4_MS_ESCR1 0x3c1
-#define MSR_P4_PMH_ESCR0 0x3ac
-#define MSR_P4_PMH_ESCR1 0x3ad
-#define MSR_P4_RAT_ESCR0 0x3bc
-#define MSR_P4_RAT_ESCR1 0x3bd
-#define MSR_P4_SAAT_ESCR0 0x3ae
-#define MSR_P4_SAAT_ESCR1 0x3af
-#define MSR_P4_SSU_ESCR0 0x3be
-#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */
-#define MSR_P4_TBPU_ESCR0 0x3c2
-#define MSR_P4_TBPU_ESCR1 0x3c3
-#define MSR_P4_TC_ESCR0 0x3c4
-#define MSR_P4_TC_ESCR1 0x3c5
-#define MSR_P4_U2L_ESCR0 0x3b0
-#define MSR_P4_U2L_ESCR1 0x3b1
-
-/* AMD Defined MSRs */
-#define MSR_K6_EFER 0xC0000080
-#define MSR_K6_STAR 0xC0000081
-#define MSR_K6_WHCR 0xC0000082
-#define MSR_K6_UWCCR 0xC0000085
-#define MSR_K6_EPMR 0xC0000086
-#define MSR_K6_PSOR 0xC0000087
-#define MSR_K6_PFIR 0xC0000088
-
-#define MSR_K7_EVNTSEL0 0xC0010000
-#define MSR_K7_EVNTSEL1 0xC0010001
-#define MSR_K7_EVNTSEL2 0xC0010002
-#define MSR_K7_EVNTSEL3 0xC0010003
-#define MSR_K7_PERFCTR0 0xC0010004
-#define MSR_K7_PERFCTR1 0xC0010005
-#define MSR_K7_PERFCTR2 0xC0010006
-#define MSR_K7_PERFCTR3 0xC0010007
-#define MSR_K7_HWCR 0xC0010015
-#define MSR_K7_CLK_CTL 0xC001001b
-#define MSR_K7_FID_VID_CTL 0xC0010041
-#define MSR_K7_FID_VID_STATUS 0xC0010042
-
-/* extended feature register */
-#define MSR_EFER 0xc0000080
-
-/* EFER bits: */
-
-/* Execute Disable enable */
-#define _EFER_NX 11
-#define EFER_NX (1<<_EFER_NX)
-
-/* Centaur-Hauls/IDT defined MSRs. */
-#define MSR_IDT_FCR1 0x107
-#define MSR_IDT_FCR2 0x108
-#define MSR_IDT_FCR3 0x109
-#define MSR_IDT_FCR4 0x10a
-
-#define MSR_IDT_MCR0 0x110
-#define MSR_IDT_MCR1 0x111
-#define MSR_IDT_MCR2 0x112
-#define MSR_IDT_MCR3 0x113
-#define MSR_IDT_MCR4 0x114
-#define MSR_IDT_MCR5 0x115
-#define MSR_IDT_MCR6 0x116
-#define MSR_IDT_MCR7 0x117
-#define MSR_IDT_MCR_CTRL 0x120
-
-/* VIA Cyrix defined MSRs*/
-#define MSR_VIA_FCR 0x1107
-#define MSR_VIA_LONGHAUL 0x110a
-#define MSR_VIA_RNG 0x110b
-#define MSR_VIA_BCR2 0x1147
-
-/* Transmeta defined MSRs */
-#define MSR_TMTA_LONGRUN_CTRL 0x80868010
-#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
-#define MSR_TMTA_LRTI_READOUT 0x80868018
-#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
-
-#endif /* __ASM_MSR_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
index 520e98d072..404da2640b 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
@@ -11,12 +11,23 @@
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
-#define pmd_populate(mm, pmd, pte) do { \
- set_pmd(pmd, __pmd(_PAGE_TABLE + \
- ((unsigned long long)page_to_pfn(pte) << \
- (unsigned long long) PAGE_SHIFT))); \
- flush_page_update_queue(); \
+#define pmd_populate(mm, pmd, pte) \
+do { \
+ if (unlikely((mm)->context.pinned)) { \
+ if (!PageHighMem(pte)) \
+ HYPERVISOR_update_va_mapping( \
+ (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT),\
+ pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0);\
+ set_pmd(pmd, __pmd(_PAGE_TABLE + \
+ ((unsigned long long)page_to_pfn(pte) << \
+ (unsigned long long) PAGE_SHIFT))); \
+ } else { \
+ *(pmd) = __pmd(_PAGE_TABLE + \
+ ((unsigned long long)page_to_pfn(pte) << \
+ (unsigned long long) PAGE_SHIFT)); \
+ } \
} while (0)
+
/*
* Allocate and free page tables.
*/
@@ -30,7 +41,6 @@ static inline void pte_free_kernel(pte_t *pte)
{
free_page((unsigned long)pte);
make_page_writable(pte);
- flush_page_update_queue();
}
extern void pte_free(struct page *pte);
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
index 5e070af2ab..9eddbd8012 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
@@ -13,29 +13,16 @@
* within a page table are directly modified. Thus, the following
* hook is made available.
*/
-#define set_pte_batched(pteptr, pteval) \
- queue_l1_entry_update(pteptr, (pteval).pte_low)
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
-#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
-/*
- * A note on implementation of this atomic 'get-and-clear' operation.
- * This is actually very simple because Xen Linux can only run on a single
- * processor. Therefore, we cannot race other processors setting the 'accessed'
- * or 'dirty' bits on a page-table entry.
- * Even if pages are shared between domains, that is not a problem because
- * each domain will have separate page tables, with their own versions of
- * accessed & dirty state.
- */
-static inline pte_t ptep_get_and_clear(pte_t *xp)
-{
- pte_t pte = *xp;
- if (pte.pte_low)
- set_pte(xp, __pte_ma(0));
- return pte;
-}
+#ifndef CONFIG_XEN_SHADOW_MODE
+#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+#else
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#endif
+#define ptep_get_and_clear(xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
/*
* We detect special mappings in one of two ways:
@@ -59,9 +46,10 @@ static inline pte_t ptep_get_and_clear(pte_t *xp)
*/
#define INVALID_P2M_ENTRY (~0U)
#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
#define pte_pfn(_pte) \
({ \
- unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
+ unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
pfn = max_mapnr; /* special: force !pfn_valid() */ \
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
index e008baf6fb..f611f04781 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
@@ -35,12 +35,9 @@ extern unsigned long empty_zero_page[1024];
extern pgd_t swapper_pg_dir[1024];
extern kmem_cache_t *pgd_cache;
extern kmem_cache_t *pmd_cache;
-extern kmem_cache_t *pte_cache;
extern spinlock_t pgd_lock;
extern struct page *pgd_list;
-void pte_ctor(void *, kmem_cache_t *, unsigned long);
-void pte_dtor(void *, kmem_cache_t *, unsigned long);
void pmd_ctor(void *, kmem_cache_t *, unsigned long);
void pgd_ctor(void *, kmem_cache_t *, unsigned long);
void pgd_dtor(void *, kmem_cache_t *, unsigned long);
@@ -89,9 +86,6 @@ void paging_init(void);
# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
#endif
-extern void *high_memory;
-extern unsigned long vmalloc_earlyreserve;
-
/*
* The 4MB page is guessing.. Detailed in the infamous "Chapter H"
* of the Pentium details, but assuming intel did the straightforward
@@ -214,7 +208,7 @@ extern unsigned long pg0[];
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
-/* pmd_clear below */
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
@@ -254,34 +248,28 @@ static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return p
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
- pte_t pte = *ptep;
- int ret = pte_dirty(pte);
- if (ret)
- xen_l1_entry_update(ptep, pte_mkclean(pte).pte_low);
- return ret;
+ if (!pte_dirty(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
static inline int ptep_test_and_clear_young(pte_t *ptep)
{
- pte_t pte = *ptep;
- int ret = pte_young(pte);
- if (ret)
- xen_l1_entry_update(ptep, pte_mkold(pte).pte_low);
- return ret;
+ if (!pte_young(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
}
static inline void ptep_set_wrprotect(pte_t *ptep)
{
- pte_t pte = *ptep;
- if (pte_write(pte))
- set_pte(ptep, pte_wrprotect(pte));
+ if (pte_write(*ptep))
+ clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
}
static inline void ptep_mkdirty(pte_t *ptep)
{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
+ if (!pte_dirty(*ptep))
+ set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
}
/*
@@ -317,11 +305,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define page_pte(page) page_pte_prot(page, __pgprot(0))
-#define pmd_clear(xp) do { \
- set_pmd(xp, __pmd(0)); \
- xen_flush_page_update_queue(); \
-} while (0)
-
#define pmd_large(pmd) \
((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
@@ -421,8 +404,7 @@ extern void noexec_setup(const char *str);
do { \
if (__dirty) { \
if ( likely((__vma)->vm_mm == current->mm) ) { \
- xen_flush_page_update_queue(); \
- HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, (__entry), UVMF_INVLPG); \
+ HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
} else { \
xen_l1_entry_update((__ptep), (__entry).pte_low); \
flush_tlb_page((__vma), (__address)); \
@@ -440,21 +422,28 @@ do { \
#define ptep_establish_new(__vma, __address, __ptep, __entry) \
do { \
if (likely((__vma)->vm_mm == current->mm)) { \
- xen_flush_page_update_queue(); \
- HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, \
+ HYPERVISOR_update_va_mapping((__address), \
__entry, 0); \
} else { \
xen_l1_entry_update((__ptep), (__entry).pte_low); \
} \
} while (0)
-/* NOTE: make_page* callers must call flush_page_update_queue() */
+#ifndef CONFIG_XEN_SHADOW_MODE
void make_lowmem_page_readonly(void *va);
void make_lowmem_page_writable(void *va);
void make_page_readonly(void *va);
void make_page_writable(void *va);
void make_pages_readonly(void *va, unsigned int nr);
void make_pages_writable(void *va, unsigned int nr);
+#else
+#define make_lowmem_page_readonly(_va) ((void)0)
+#define make_lowmem_page_writable(_va) ((void)0)
+#define make_page_readonly(_va) ((void)0)
+#define make_page_writable(_va) ((void)0)
+#define make_pages_readonly(_va, _nr) ((void)0)
+#define make_pages_writable(_va, _nr) ((void)0)
+#endif
#define virt_to_ptep(__va) \
({ \
@@ -477,7 +466,6 @@ void make_pages_writable(void *va, unsigned int nr);
#define kern_addr_valid(addr) (1)
#endif /* !CONFIG_DISCONTIGMEM */
-#define DOMID_LOCAL (0xFFFFU)
int direct_remap_area_pages(struct mm_struct *mm,
unsigned long address,
unsigned long machine_addr,
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h
index a6a5a9cf83..fd54b409e2 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h
@@ -88,7 +88,7 @@ extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
DECLARE_PER_CPU(struct tss_struct, init_tss);
-extern pgd_t *cur_pgd; /* XXXsmp */
+DECLARE_PER_CPU(pgd_t *, cur_pgd);
#ifdef CONFIG_SMP
extern struct cpuinfo_x86 cpu_data[];
@@ -193,8 +193,8 @@ static inline unsigned int cpuid_edx(unsigned int op)
}
#define load_cr3(pgdir) do { \
- queue_pt_switch(__pa(pgdir)); \
- cur_pgd = pgdir; /* XXXsmp */ \
+ xen_pt_switch(__pa(pgdir)); \
+ per_cpu(cur_pgd, smp_processor_id()) = pgdir; \
} while (/* CONSTCOND */0)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
index 288243f05a..5496d69023 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
@@ -74,17 +74,17 @@
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
/*
- * The GDT has LAST_RESERVED_GDT_ENTRY + 1 entries
+ * The GDT has 32 entries
*/
-#define GDT_ENTRIES (LAST_RESERVED_GDT_ENTRY + 1)
+#define GDT_ENTRIES 32
#define GDT_SIZE (GDT_ENTRIES * 8)
/* Simple and small GDT entries for booting only */
-#define __BOOT_CS FLAT_GUESTOS_CS
+#define __BOOT_CS FLAT_KERNEL_CS
-#define __BOOT_DS FLAT_GUESTOS_DS
+#define __BOOT_DS FLAT_KERNEL_DS
/*
* The interrupt descriptor table has room for 256 idt's,
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
new file mode 100644
index 0000000000..d7189a7c28
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
@@ -0,0 +1,250 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+asmlinkage int printk(const char * fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+ volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "jns 3f\n" \
+ "2:\t" \
+ "rep;nop\n\t" \
+ "cmpb $0,%0\n\t" \
+ "jle 2b\n\t" \
+ "jmp 1b\n" \
+ "3:\n\t"
+
+#define spin_lock_string_flags \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "jns 4f\n\t" \
+ "2:\t" \
+ "testl $0x200, %1\n\t" \
+ "jz 3f\n\t" \
+ "#sti\n\t" \
+ "3:\t" \
+ "rep;nop\n\t" \
+ "cmpb $0, %0\n\t" \
+ "jle 3b\n\t" \
+ "#cli\n\t" \
+ "jmp 1b\n" \
+ "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+ "movb $1,%0" \
+ :"=m" (lock->slock) : : "memory"
+
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(lock->magic != SPINLOCK_MAGIC);
+ BUG_ON(!spin_is_locked(lock));
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#else
+
+#define spin_unlock_string \
+ "xchgb %b0, %1" \
+ :"=q" (oldval), "=m" (lock->slock) \
+ :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+ char oldval = 1;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(lock->magic != SPINLOCK_MAGIC);
+ BUG_ON(!spin_is_locked(lock));
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#endif
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+ char oldval;
+ __asm__ __volatile__(
+ "xchgb %b0,%1"
+ :"=q" (oldval), "=m" (lock->slock)
+ :"0" (0) : "memory");
+ return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+ printk("eip: %p\n", __builtin_return_address(0));
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+ printk("eip: %p\n", __builtin_return_address(0));
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string_flags
+ :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC 0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define read_can_lock(x) ((int)(x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores. See
+ * semaphore.h for details. -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void _raw_read_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+ __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+ __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ atomic_dec(count);
+ if (atomic_read(count) >= 0)
+ return 1;
+ atomic_inc(count);
+ return 0;
+}
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ return 1;
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
index 8093de0ac9..11f88c2507 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
@@ -60,6 +60,63 @@ static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
return oldbit;
}
+struct __synch_xchg_dummy { unsigned long a[100]; };
+#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x))
+
+#define synch_cmpxchg(ptr, old, new) \
+((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
+ (unsigned long)(old), \
+ (unsigned long)(new), \
+ sizeof(*(ptr))))
+
+static inline unsigned long __synch_cmpxchg(volatile void *ptr,
+ unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("lock; cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__synch_xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__("lock; cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__synch_xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+#ifdef CONFIG_X86_64
+ case 4:
+ __asm__ __volatile__("lock; cmpxchgl %k1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__synch_xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+ __asm__ __volatile__("lock; cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__synch_xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+#else
+ case 4:
+ __asm__ __volatile__("lock; cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__synch_xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+#endif
+ }
+ return old;
+}
+
static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
{
return ((1UL << (nr & 31)) &
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
index ed4f1b5673..021acbd159 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
@@ -4,11 +4,10 @@
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/bitops.h>
-#include <asm/synch_bitops.h>
+#include <asm-xen/synch_bitops.h>
#include <asm/segment.h>
#include <asm/cpufeature.h>
#include <asm-xen/hypervisor.h>
-#include <asm-xen/evtchn.h>
#ifdef __KERNEL__
@@ -107,28 +106,32 @@ static inline unsigned long _get_base(char * addr)
/*
* Clear and set 'TS' bit respectively
*/
-/* NB. 'clts' is done for us by Xen during virtual trap. */
-#define clts() ((void)0)
-#define read_cr0() \
- BUG();
+#define clts() (HYPERVISOR_fpu_taskswitch(0))
+#define read_cr0() ({ \
+ unsigned int __dummy; \
+ __asm__( \
+ "movl %%cr0,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
#define write_cr0(x) \
- BUG();
-
-#define read_cr4() \
- BUG();
+ __asm__("movl %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+ unsigned int __dummy; \
+ __asm__( \
+ "movl %%cr4,%0\n\t" \
+ :"=r" (__dummy)); \
+ __dummy; \
+})
#define write_cr4(x) \
- BUG();
-#define stts() (HYPERVISOR_fpu_taskswitch())
+ __asm__("movl %0,%%cr4": :"r" (x));
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
#endif /* __KERNEL__ */
-static inline void wbinvd(void)
-{
- mmu_update_t u;
- u.ptr = MMU_EXTENDED_COMMAND;
- u.val = MMUEXT_FLUSH_CACHE;
- (void)HYPERVISOR_mmu_update(&u, 1, NULL);
-}
+#define wbinvd() \
+ __asm__ __volatile__ ("wbinvd": : :"memory");
static inline unsigned long get_limit(unsigned long segment)
{
@@ -451,63 +454,70 @@ struct alt_instr {
#define __cli() \
do { \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
barrier(); \
} while (0)
#define __sti() \
do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
+ vcpu_info_t *_vcpu; \
barrier(); \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 0; \
barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
} while (0)
#define __save_flags(x) \
do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
+ vcpu_info_t *_vcpu; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
} while (0)
#define __restore_flags(x) \
do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
+ vcpu_info_t *_vcpu; \
barrier(); \
- if ( (_shared->vcpu_data[0].evtchn_upcall_mask = (x)) == 0 ) { \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
- } \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
} while (0)
-#define safe_halt() ((void)0)
+#define safe_halt() ((void)0)
#define __save_and_cli(x) \
do { \
- (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \
- HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
barrier(); \
} while (0)
-#define __save_and_sti(x) \
-do { \
- shared_info_t *_shared = HYPERVISOR_shared_info; \
- barrier(); \
- (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \
- _shared->vcpu_data[0].evtchn_upcall_mask = 0; \
- barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \
- force_evtchn_callback(); \
-} while (0)
-
#define local_irq_save(x) __save_and_cli(x)
#define local_irq_restore(x) __restore_flags(x)
#define local_save_flags(x) __save_flags(x)
#define local_irq_disable() __cli()
#define local_irq_enable() __sti()
-#define irqs_disabled() HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
+#define irqs_disabled() \
+ HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask
/*
* disable hlt during certain critical i/o operations
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
index 4d93bb317a..4d13a650a2 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
@@ -7,17 +7,10 @@
#define __flush_tlb() xen_tlb_flush()
#define __flush_tlb_global() xen_tlb_flush()
+#define __flush_tlb_all() xen_tlb_flush()
extern unsigned long pgkern_mask;
-# define __flush_tlb_all() \
- do { \
- if (cpu_has_pge) \
- __flush_tlb_global(); \
- else \
- __flush_tlb(); \
- } while (0)
-
#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
#define __flush_tlb_single(addr) xen_invlpg(addr)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h
deleted file mode 100644
index 79a45debc6..0000000000
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h
+++ /dev/null
@@ -1,884 +0,0 @@
-/*
- * include/asm-i386/xor.h
- *
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * High-speed RAID5 checksumming functions utilizing MMX instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n"
-#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n"
-#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n"
-#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n"
-#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
-#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
-
-#include <asm/i387.h>
-
-static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- ST(i,0) \
- XO1(i+1,1) \
- ST(i+1,1) \
- XO1(i+2,2) \
- ST(i+2,2) \
- XO1(i+3,3) \
- ST(i+3,3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- ST(i,0) \
- XO2(i+1,1) \
- ST(i+1,1) \
- XO2(i+2,2) \
- ST(i+2,2) \
- XO2(i+3,3) \
- ST(i+3,3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- ST(i,0) \
- XO3(i+1,1) \
- ST(i+1,1) \
- XO3(i+2,2) \
- ST(i+2,2) \
- XO3(i+3,3) \
- ST(i+3,3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-
-static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 7;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- __asm__ ("" : "+r" (p4), "+r" (p5));
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- XO4(i,0) \
- ST(i,0) \
- XO4(i+1,1) \
- ST(i+1,1) \
- XO4(i+2,2) \
- ST(i+2,2) \
- XO4(i+3,3) \
- ST(i+3,3)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $128, %1 ;\n"
- " addl $128, %2 ;\n"
- " addl $128, %3 ;\n"
- " addl $128, %4 ;\n"
- " addl $128, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- __asm__ ("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
-#undef LD
-#undef XO1
-#undef XO2
-#undef XO3
-#undef XO4
-#undef ST
-#undef BLOCK
-
-static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
- " .align 32 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- __asm__ __volatile__ (
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 6;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- __asm__ ("" : "+r" (p4), "+r" (p5));
-
- __asm__ __volatile__ (
- " .align 32,0x90 ;\n"
- " 1: ;\n"
- " movq (%1), %%mm0 ;\n"
- " movq 8(%1), %%mm1 ;\n"
- " pxor (%2), %%mm0 ;\n"
- " pxor 8(%2), %%mm1 ;\n"
- " movq 16(%1), %%mm2 ;\n"
- " pxor (%3), %%mm0 ;\n"
- " pxor 8(%3), %%mm1 ;\n"
- " pxor 16(%2), %%mm2 ;\n"
- " pxor (%4), %%mm0 ;\n"
- " pxor 8(%4), %%mm1 ;\n"
- " pxor 16(%3), %%mm2 ;\n"
- " movq 24(%1), %%mm3 ;\n"
- " pxor (%5), %%mm0 ;\n"
- " pxor 8(%5), %%mm1 ;\n"
- " movq %%mm0, (%1) ;\n"
- " pxor 16(%4), %%mm2 ;\n"
- " pxor 24(%2), %%mm3 ;\n"
- " movq %%mm1, 8(%1) ;\n"
- " pxor 16(%5), %%mm2 ;\n"
- " pxor 24(%3), %%mm3 ;\n"
- " movq 32(%1), %%mm4 ;\n"
- " movq %%mm2, 16(%1) ;\n"
- " pxor 24(%4), %%mm3 ;\n"
- " pxor 32(%2), %%mm4 ;\n"
- " movq 40(%1), %%mm5 ;\n"
- " pxor 24(%5), %%mm3 ;\n"
- " pxor 32(%3), %%mm4 ;\n"
- " pxor 40(%2), %%mm5 ;\n"
- " movq %%mm3, 24(%1) ;\n"
- " pxor 32(%4), %%mm4 ;\n"
- " pxor 40(%3), %%mm5 ;\n"
- " movq 48(%1), %%mm6 ;\n"
- " movq 56(%1), %%mm7 ;\n"
- " pxor 32(%5), %%mm4 ;\n"
- " pxor 40(%4), %%mm5 ;\n"
- " pxor 48(%2), %%mm6 ;\n"
- " pxor 56(%2), %%mm7 ;\n"
- " movq %%mm4, 32(%1) ;\n"
- " pxor 48(%3), %%mm6 ;\n"
- " pxor 56(%3), %%mm7 ;\n"
- " pxor 40(%5), %%mm5 ;\n"
- " pxor 48(%4), %%mm6 ;\n"
- " pxor 56(%4), %%mm7 ;\n"
- " movq %%mm5, 40(%1) ;\n"
- " pxor 48(%5), %%mm6 ;\n"
- " pxor 56(%5), %%mm7 ;\n"
- " movq %%mm6, 48(%1) ;\n"
- " movq %%mm7, 56(%1) ;\n"
-
- " addl $64, %1 ;\n"
- " addl $64, %2 ;\n"
- " addl $64, %3 ;\n"
- " addl $64, %4 ;\n"
- " addl $64, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- __asm__ ("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
-static struct xor_block_template xor_block_pII_mmx = {
- .name = "pII_mmx",
- .do_2 = xor_pII_mmx_2,
- .do_3 = xor_pII_mmx_3,
- .do_4 = xor_pII_mmx_4,
- .do_5 = xor_pII_mmx_5,
-};
-
-static struct xor_block_template xor_block_p5_mmx = {
- .name = "p5_mmx",
- .do_2 = xor_p5_mmx_2,
- .do_3 = xor_p5_mmx_3,
- .do_4 = xor_p5_mmx_4,
- .do_5 = xor_p5_mmx_5,
-};
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define XMMS_SAVE do { \
- preempt_disable(); \
- if (!(current_thread_info()->status & TS_USEDFPU)) \
- clts(); \
- __asm__ __volatile__ ( \
- "movups %%xmm0,(%1) ;\n\t" \
- "movups %%xmm1,0x10(%1) ;\n\t" \
- "movups %%xmm2,0x20(%1) ;\n\t" \
- "movups %%xmm3,0x30(%1) ;\n\t" \
- : "=&r" (cr0) \
- : "r" (xmm_save) \
- : "memory"); \
-} while(0)
-
-#define XMMS_RESTORE do { \
- __asm__ __volatile__ ( \
- "sfence ;\n\t" \
- "movups (%1),%%xmm0 ;\n\t" \
- "movups 0x10(%1),%%xmm1 ;\n\t" \
- "movups 0x20(%1),%%xmm2 ;\n\t" \
- "movups 0x30(%1),%%xmm3 ;\n\t" \
- : \
- : "r" (cr0), "r" (xmm_save) \
- : "memory"); \
- if (!(current_thread_info()->status & TS_USEDFPU)) \
- stts(); \
- preempt_enable(); \
-} while(0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
-#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
-#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
-
- XMMS_SAVE;
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- LD(i,0) \
- LD(i+1,1) \
- PF1(i) \
- PF1(i+2) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF0(i+4) \
- PF0(i+6) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- XMMS_RESTORE;
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
-
- XMMS_SAVE;
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r"(p2), "+r"(p3)
- :
- : "memory" );
-
- XMMS_RESTORE;
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
-
- XMMS_SAVE;
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- PF3(i) \
- PF3(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory" );
-
- XMMS_RESTORE;
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 8;
- char xmm_save[16*4] ALIGN16;
- int cr0;
-
- XMMS_SAVE;
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- __asm__ ("" : "+r" (p4), "+r" (p5));
-
- __asm__ __volatile__ (
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i+2) \
- LD(i,0) \
- LD(i+1,1) \
- LD(i+2,2) \
- LD(i+3,3) \
- PF2(i) \
- PF2(i+2) \
- XO1(i,0) \
- XO1(i+1,1) \
- XO1(i+2,2) \
- XO1(i+3,3) \
- PF3(i) \
- PF3(i+2) \
- XO2(i,0) \
- XO2(i+1,1) \
- XO2(i+2,2) \
- XO2(i+3,3) \
- PF4(i) \
- PF4(i+2) \
- PF0(i+4) \
- PF0(i+6) \
- XO3(i,0) \
- XO3(i+1,1) \
- XO3(i+2,2) \
- XO3(i+3,3) \
- XO4(i,0) \
- XO4(i+1,1) \
- XO4(i+2,2) \
- XO4(i+3,3) \
- ST(i,0) \
- ST(i+1,1) \
- ST(i+2,2) \
- ST(i+3,3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " addl $256, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- __asm__ ("" : "=r" (p4), "=r" (p5));
-
- XMMS_RESTORE;
-}
-
-static struct xor_block_template xor_block_pIII_sse = {
- .name = "pIII_sse",
- .do_2 = xor_sse_2,
- .do_3 = xor_sse_3,
- .do_4 = xor_sse_4,
- .do_5 = xor_sse_5,
-};
-
-/* Also try the generic routines. */
-#include <asm-generic/xor.h>
-
-#undef XOR_TRY_TEMPLATES
-#define XOR_TRY_TEMPLATES \
- do { \
- xor_speed(&xor_block_8regs); \
- xor_speed(&xor_block_8regs_p); \
- xor_speed(&xor_block_32regs); \
- xor_speed(&xor_block_32regs_p); \
- if (cpu_has_xmm) \
- xor_speed(&xor_block_pIII_sse); \
- if (cpu_has_mmx) { \
- xor_speed(&xor_block_pII_mmx); \
- xor_speed(&xor_block_p5_mmx); \
- } \
- } while (0)
-
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h
new file mode 100644
index 0000000000..28b96a6fb9
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_ARCH_HOOKS_H
+#define _ASM_ARCH_HOOKS_H
+
+#include <linux/interrupt.h>
+
+/*
+ * linux/include/asm/arch_hooks.h
+ *
+ * define the architecture specific hooks
+ */
+
+/* these aren't arch hooks, they are generic routines
+ * that can be used by the hooks */
+extern void init_ISA_irqs(void);
+extern void apic_intr_init(void);
+extern void smp_intr_init(void);
+extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+/* these are the defined hooks */
+extern void intr_init_hook(void);
+extern void pre_intr_init_hook(void);
+extern void pre_setup_arch_hook(void);
+extern void trap_init_hook(void);
+extern void time_init_hook(void);
+extern void mca_nmi_hook(void);
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
new file mode 100644
index 0000000000..731d8678ab
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h
@@ -0,0 +1,41 @@
+
+#ifndef _X86_64_BOOTSETUP_H
+#define _X86_64_BOOTSETUP_H 1
+
+extern char x86_boot_params[2048];
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define PARAM ((unsigned char *)x86_boot_params)
+#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
+#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
+#define ALT_MEM_K (*(unsigned int *) (PARAM+0x1e0))
+#define E820_MAP_NR (*(char*) (PARAM+E820NR))
+#define E820_MAP ((struct e820entry *) (PARAM+E820MAP))
+#define APM_BIOS_INFO (*(struct apm_bios_info *) (PARAM+0x40))
+#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
+#define SYS_DESC_TABLE (*(struct sys_desc_table_struct*)(PARAM+0xa0))
+#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
+#define RAMDISK_FLAGS (*(unsigned short *) (PARAM+0x1F8))
+#define SAVED_VIDEO_MODE (*(unsigned short *) (PARAM+0x1FA))
+#define ORIG_ROOT_DEV (*(unsigned short *) (PARAM+0x1FC))
+#define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
+#define LOADER_TYPE (*(unsigned char *) (PARAM+0x210))
+#define KERNEL_START (*(unsigned int *) (PARAM+0x214))
+
+#define INITRD_START (__pa(xen_start_info.mod_start))
+#define INITRD_SIZE (xen_start_info.mod_len)
+#define EDID_INFO (*(struct edid_info *) (PARAM+0x440))
+
+#define EDD_NR (*(unsigned char *) (PARAM+EDDNR))
+#define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF))
+#define EDD_MBR_SIGNATURE ((unsigned int *) (PARAM+EDD_MBR_SIG_BUF))
+#define EDD_BUF ((struct edd_info *) (PARAM+EDDBUF))
+#define COMMAND_LINE saved_command_line
+
+#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_PROMPT_FLAG 0x8000
+#define RAMDISK_LOAD_FLAG 0x4000
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/desc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/desc.h
new file mode 100644
index 0000000000..15a10a135d
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/desc.h
@@ -0,0 +1,240 @@
+/* Written 2000 by Andi Kleen */
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#include <linux/threads.h>
+#include <asm/ldt.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/mmu.h>
+
+// 8 byte segment descriptor
+struct desc_struct {
+ u16 limit0;
+ u16 base0;
+ unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
+ unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
+} __attribute__((packed));
+
+struct n_desc_struct {
+ unsigned int a,b;
+};
+
+enum {
+ GATE_INTERRUPT = 0xE,
+ GATE_TRAP = 0xF,
+ GATE_CALL = 0xC,
+};
+
+// 16byte gate
+struct gate_struct {
+ u16 offset_low;
+ u16 segment;
+ unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+ u16 offset_middle;
+ u32 offset_high;
+ u32 zero1;
+} __attribute__((packed));
+
+#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
+#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+
+enum {
+ DESC_TSS = 0x9,
+ DESC_LDT = 0x2,
+};
+
+// LDT or TSS descriptor in the GDT. 16 bytes.
+struct ldttss_desc {
+ u16 limit0;
+ u16 base0;
+ unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+ unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+ u32 base3;
+ u32 zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ unsigned short size;
+ unsigned long address;
+} __attribute__((packed)) ;
+
+extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS];
+
+extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)(cpu_gdt_descr[(_cpu)].address))
+
+#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
+
+static inline void clear_LDT(void)
+{
+ int cpu = get_cpu();
+
+ /*
+ * NB. We load the default_ldt for lcall7/27 handling on demand, as
+ * it slows down context switching. Noone uses it anyway.
+ */
+ cpu = cpu; /* XXX avoid compiler warning */
+ xen_set_ldt(0UL, 0);
+ put_cpu();
+}
+
+/*
+ * This is the ldt that every process will get unless we need
+ * something other than this.
+ */
+extern struct desc_struct default_ldt[];
+extern struct gate_struct idt_table[];
+
+static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
+{
+ struct gate_struct s;
+ s.offset_low = PTR_LOW(func);
+ s.segment = __KERNEL_CS;
+ s.ist = ist;
+ s.p = 1;
+ s.dpl = dpl;
+ s.zero0 = 0;
+ s.zero1 = 0;
+ s.type = type;
+ s.offset_middle = PTR_MIDDLE(func);
+ s.offset_high = PTR_HIGH(func);
+ /* does not need to be atomic because it is only done once at setup time */
+ memcpy(adr, &s, 16);
+}
+
+static inline void set_intr_gate(int nr, void *func)
+{
+ _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
+}
+
+static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
+{
+ _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
+}
+
+static inline void set_system_gate(int nr, void *func)
+{
+ _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
+}
+
+static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type,
+ unsigned size)
+{
+ struct ldttss_desc d;
+ memset(&d,0,sizeof(d));
+ d.limit0 = size & 0xFFFF;
+ d.base0 = PTR_LOW(tss);
+ d.base1 = PTR_MIDDLE(tss) & 0xFF;
+ d.type = type;
+ d.p = 1;
+ d.limit1 = (size >> 16) & 0xF;
+ d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF;
+ d.base3 = PTR_HIGH(tss);
+ memcpy(ptr, &d, 16);
+}
+
+static inline void set_tss_desc(unsigned cpu, void *addr)
+{
+ set_tssldt_descriptor((struct ldttss_desc *)&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
+ (unsigned long)addr,
+ DESC_TSS,
+ sizeof(struct tss_struct) - 1);
+}
+
+static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
+{
+ set_tssldt_descriptor((struct ldttss_desc *)&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
+ (unsigned long)addr,
+ DESC_LDT, size * 8 - 1);
+}
+
+static inline void set_seg_base(unsigned cpu, int entry, void *base)
+{
+ struct desc_struct *d = (struct desc_struct *)&get_cpu_gdt_table(cpu)[entry];
+ u32 addr = (u32)(u64)base;
+ BUG_ON((u64)base >> 32);
+ d->base0 = addr & 0xffff;
+ d->base1 = (addr >> 16) & 0xff;
+ d->base2 = (addr >> 24) & 0xff;
+}
+
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+/* Don't allow setting of the lm bit. It is useless anyways because
+ 64bit system calls require __USER_CS. */
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ /* ((info)->lm << 21) | */ \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 && \
+ (info)->lm == 0)
+
+#if TLS_SIZE != 24
+# error update this code.
+#endif
+
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+{
+#if 0
+ u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN);
+ gdt[0] = t->tls_array[0];
+ gdt[1] = t->tls_array[1];
+ gdt[2] = t->tls_array[2];
+#endif
+#define C(i) \
+ HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), t->tls_array[i])
+
+ C(0); C(1); C(2);
+#undef C
+}
+
+/*
+ * load one particular LDT into the current CPU
+ */
+extern inline void load_LDT_nolock (mm_context_t *pc, int cpu)
+{
+ void *segments = pc->ldt;
+ int count = pc->size;
+
+ if (likely(!count))
+ segments = NULL;
+
+ xen_set_ldt((unsigned long)segments, count);
+}
+
+static inline void load_LDT(mm_context_t *pc)
+{
+ int cpu = get_cpu();
+ load_LDT_nolock(pc, cpu);
+ put_cpu();
+}
+
+extern struct desc_ptr idt_descr;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
new file mode 100644
index 0000000000..8d4e666cf4
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h
@@ -0,0 +1,136 @@
+#ifndef _X8664_DMA_MAPPING_H
+#define _X8664_DMA_MAPPING_H 1
+
+/*
+ * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+ * documentation.
+ */
+
+#include <linux/config.h>
+
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+#include <asm/swiotlb.h>
+
+extern dma_addr_t bad_dma_address;
+#define dma_mapping_error(x) \
+ (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address))
+
+void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ unsigned gfp);
+void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle);
+
+#ifdef CONFIG_GART_IOMMU
+
+extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size,
+ int direction);
+extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
+ int direction);
+
+#else
+
+/* No IOMMU */
+
+static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr,
+ size_t size, int direction)
+{
+ dma_addr_t addr;
+
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+ addr = virt_to_machine(ptr);
+
+ if ((addr+size) & ~*hwdev->dma_mask)
+ out_of_line_bug();
+ return addr;
+}
+
+static inline void dma_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
+ size_t size, int direction)
+{
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+ /* Nothing to do */
+}
+#endif
+
+#define dma_map_page(dev,page,offset,size,dir) \
+ dma_map_single((dev), page_address(page)+(offset), (size), (dir))
+
+static inline void dma_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+
+ if (swiotlb)
+ return swiotlb_sync_single_for_cpu(hwdev,dma_handle,size,direction);
+
+ flush_write_buffers();
+}
+
+static inline void dma_sync_single_for_device(struct device *hwdev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+
+ if (swiotlb)
+ return swiotlb_sync_single_for_device(hwdev,dma_handle,size,direction);
+
+ flush_write_buffers();
+}
+
+static inline void dma_sync_sg_for_cpu(struct device *hwdev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+
+ if (swiotlb)
+ return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction);
+
+ flush_write_buffers();
+}
+
+static inline void dma_sync_sg_for_device(struct device *hwdev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+ if (direction == DMA_NONE)
+ out_of_line_bug();
+
+ if (swiotlb)
+ return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction);
+
+ flush_write_buffers();
+}
+
+extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+ int nents, int direction);
+
+#define dma_unmap_page dma_unmap_single
+
+extern int dma_supported(struct device *hwdev, u64 mask);
+extern int dma_get_cache_alignment(void);
+#define dma_is_consistent(h) 1
+
+static inline int dma_set_mask(struct device *dev, u64 mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+ *dev->dma_mask = mask;
+ return 0;
+}
+
+static inline void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
+{
+ flush_write_buffers();
+}
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h
new file mode 100644
index 0000000000..6d429e1175
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h
@@ -0,0 +1,114 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/apicdef.h>
+#include <asm-xen/gnttab.h>
+#include <asm/page.h>
+#include <asm/vsyscall.h>
+#include <asm/vsyscall32.h>
+#include <asm/acpi.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+enum fixed_addresses {
+ VSYSCALL_LAST_PAGE,
+ VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+ VSYSCALL_HPET,
+ FIX_HPET_BASE,
+#ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_ACPI_BOOT
+ FIX_ACPI_BEGIN,
+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+ FIX_ACPI_RSDP_PAGE,
+#endif
+ FIX_SHARED_INFO,
+ FIX_GNTTAB_BEGIN,
+ FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+#define NR_FIX_ISAMAPS 256
+ FIX_ISAMAP_END,
+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
+#endif
+ __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+ unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
+#define clear_fixmap(idx) \
+ __set_fixmap(idx, 0, __pgprot(0))
+
+#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
+#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
+#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without translation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+extern inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/floppy.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/floppy.h
new file mode 100644
index 0000000000..dd6a76bc06
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/floppy.h
@@ -0,0 +1,204 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ *
+ * Modifications for Xen are Copyright (c) 2004, Keir Fraser.
+ */
+#ifndef __ASM_XEN_X86_64_FLOPPY_H
+#define __ASM_XEN_X86_64_FLOPPY_H
+
+#include <linux/vmalloc.h>
+
+
+/*
+ * The DMA channel used by the floppy controller cannot access data at
+ * addresses >= 16MB
+ *
+ * Went back to the 1MB limit, as some people had problems with the floppy
+ * driver otherwise. It doesn't matter much for performance anyway, as most
+ * floppy accesses go through the track buffer.
+ */
+#define _CROSS_64KB(a,s,vdma) \
+(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+
+#include <linux/vmalloc.h>
+
+/* XEN: Hit DMA paths on the head. This trick from asm-m68k/floppy.h. */
+#include <asm/dma.h>
+#undef MAX_DMA_ADDRESS
+#define MAX_DMA_ADDRESS 0
+#define CROSS_64KB(a,s) (0)
+
+#define fd_inb(port) inb_p(port)
+#define fd_outb(value,port) outb_p(value,port)
+
+#define fd_request_dma() (0)
+#define fd_free_dma() ((void)0)
+#define fd_enable_irq() enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq() disable_irq(FLOPPY_IRQ)
+#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL)
+#define fd_get_dma_residue() vdma_get_dma_residue(FLOPPY_DMA)
+#define fd_dma_mem_alloc(size) vdma_mem_alloc(size)
+#define fd_dma_mem_free(addr, size) vdma_mem_free(addr, size)
+#define fd_dma_setup(addr, size, mode, io) vdma_dma_setup(addr, size, mode, io)
+
+static int virtual_dma_count;
+static int virtual_dma_residue;
+static char *virtual_dma_addr;
+static int virtual_dma_mode;
+static int doing_pdma;
+
+static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
+{
+ register unsigned char st;
+
+#undef TRACE_FLPY_INT
+
+#ifdef TRACE_FLPY_INT
+ static int calls=0;
+ static int bytes=0;
+ static int dma_wait=0;
+#endif
+ if (!doing_pdma)
+ return floppy_interrupt(irq, dev_id, regs);
+
+#ifdef TRACE_FLPY_INT
+ if(!calls)
+ bytes = virtual_dma_count;
+#endif
+
+ {
+ register int lcount;
+ register char *lptr;
+
+ st = 1;
+ for(lcount=virtual_dma_count, lptr=virtual_dma_addr;
+ lcount; lcount--, lptr++) {
+ st=inb(virtual_dma_port+4) & 0xa0 ;
+ if(st != 0xa0)
+ break;
+ if(virtual_dma_mode)
+ outb_p(*lptr, virtual_dma_port+5);
+ else
+ *lptr = inb_p(virtual_dma_port+5);
+ }
+ virtual_dma_count = lcount;
+ virtual_dma_addr = lptr;
+ st = inb(virtual_dma_port+4);
+ }
+
+#ifdef TRACE_FLPY_INT
+ calls++;
+#endif
+ if(st == 0x20)
+ return IRQ_HANDLED;
+ if(!(st & 0x20)) {
+ virtual_dma_residue += virtual_dma_count;
+ virtual_dma_count=0;
+#ifdef TRACE_FLPY_INT
+ printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
+ virtual_dma_count, virtual_dma_residue, calls, bytes,
+ dma_wait);
+ calls = 0;
+ dma_wait=0;
+#endif
+ doing_pdma = 0;
+ floppy_interrupt(irq, dev_id, regs);
+ return IRQ_HANDLED;
+ }
+#ifdef TRACE_FLPY_INT
+ if(!virtual_dma_count)
+ dma_wait++;
+#endif
+ return IRQ_HANDLED;
+}
+
+static void fd_disable_dma(void)
+{
+ doing_pdma = 0;
+ virtual_dma_residue += virtual_dma_count;
+ virtual_dma_count=0;
+}
+
+static int vdma_get_dma_residue(unsigned int dummy)
+{
+ return virtual_dma_count + virtual_dma_residue;
+}
+
+
+static int fd_request_irq(void)
+{
+ return request_irq(FLOPPY_IRQ, floppy_hardint,SA_INTERRUPT,
+ "floppy", NULL);
+}
+
+
+static unsigned long vdma_mem_alloc(unsigned long size)
+{
+ return (unsigned long) vmalloc(size);
+
+}
+
+static void vdma_mem_free(unsigned long addr, unsigned long size)
+{
+ vfree((void *)addr);
+}
+
+static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+ doing_pdma = 1;
+ virtual_dma_port = io;
+ virtual_dma_mode = (mode == DMA_MODE_WRITE);
+ virtual_dma_addr = addr;
+ virtual_dma_count = size;
+ virtual_dma_residue = 0;
+ return 0;
+}
+
+/* XEN: This trick to force 'virtual DMA' is from include/asm-m68k/floppy.h. */
+#define FDC1 xen_floppy_init()
+static int FDC2 = -1;
+
+static int xen_floppy_init(void)
+{
+ use_virtual_dma = 1;
+ can_use_virtual_dma = 1;
+ return 0x340;
+}
+
+/*
+ * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock
+ * is needed to prevent corrupted CMOS RAM in case "insmod floppy"
+ * coincides with another rtc CMOS user. Paul G.
+ */
+#define FLOPPY0_TYPE ({ \
+ unsigned long flags; \
+ unsigned char val; \
+ spin_lock_irqsave(&rtc_lock, flags); \
+ val = (CMOS_READ(0x10) >> 4) & 15; \
+ spin_unlock_irqrestore(&rtc_lock, flags); \
+ val; \
+})
+
+#define FLOPPY1_TYPE ({ \
+ unsigned long flags; \
+ unsigned char val; \
+ spin_lock_irqsave(&rtc_lock, flags); \
+ val = CMOS_READ(0x10) & 15; \
+ spin_unlock_irqrestore(&rtc_lock, flags); \
+ val; \
+})
+
+#define N_FDC 2
+#define N_DRIVE 8
+
+#define FLOPPY_MOTOR_MASK 0xf0
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_XEN_X86_64_FLOPPY_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
new file mode 100644
index 0000000000..e57c54769a
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
@@ -0,0 +1,505 @@
+/******************************************************************************
+ * hypercall.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/*
+ * Benjamin Liu <benjamin.liu@intel.com>
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Ported to x86-64.
+ *
+ */
+
+#ifndef __HYPERCALL_H__
+#define __HYPERCALL_H__
+#include <asm-xen/xen-public/xen.h>
+
+#define __syscall_clobber "r11","rcx","memory"
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+static inline int
+HYPERVISOR_set_trap_table(
+ trap_info_t *table)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ "movq %5, %%r10;" TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" ((long)count),
+ "d" (success_count), "g" ((unsigned long)domid)
+ : __syscall_clobber, "r10" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ "movq %5, %%r10;" TRAP_INSTR
+ : "=a" (ret)
+ : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count),
+ "d" (success_count), "g" ((unsigned long)domid)
+ : __syscall_clobber, "r10" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+ unsigned long *frame_list, int entries)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" ((long)entries)
+ : __syscall_clobber );
+
+
+ return ret;
+}
+static inline int
+HYPERVISOR_stack_switch(
+ unsigned long ss, unsigned long esp)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+ unsigned long event_address, unsigned long failsafe_address,
+ unsigned long syscall_address)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address),
+ "S" (failsafe_address), "d" (syscall_address)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+ int set)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
+ "D" ((unsigned long) set) : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_yield(
+ void)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_yield)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_block(
+ void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_block)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+ void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op),
+ "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)))
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_reboot(
+ void)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op),
+ "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)))
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ int ret;
+
+ /* NB. On suspend, control software expects a suspend record in %esi. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_sched_op),
+ "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift))),
+ "S" (srec)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+/*
+ * We can have the timeout value in a single argument for the hypercall, but
+ * that will break the common code.
+ */
+static inline long
+HYPERVISOR_set_timer_op(
+ u64 timeout)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_timer_op),
+ "D" (timeout)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom0_op(
+ dom0_op_t *dom0_op)
+{
+ int ret;
+
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_dom0_op), "D" (dom0_op)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_debugreg(
+ int reg, unsigned long value)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_debugreg), "D" ((unsigned long)reg), "S" (value)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+ int reg)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_get_debugreg), "D" ((unsigned long)reg)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+ unsigned long ma, unsigned long word)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_update_descriptor), "D" (ma),
+ "S" (word)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom_mem_op(
+ unsigned int op, unsigned long *extent_list,
+ unsigned long nr_extents, unsigned int extent_order)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_dom_mem_op), "D" ((unsigned long)op), "S" (extent_list),
+ "d" (nr_extents), "g" ((unsigned long) extent_order), "g" ((unsigned long) DOMID_SELF)
+ : __syscall_clobber,"r8","r10");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_multicall(
+ void *call_list, int nr_calls)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_multicall), "D" (call_list), "S" ((unsigned long)nr_calls)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long page_nr, pte_t new_val, unsigned long flags)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_update_va_mapping),
+ "D" (page_nr), "S" (new_val.pte), "d" (flags)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+ void *op)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_event_channel_op), "D" (op)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_xen_version(
+ int cmd)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_xen_version), "D" ((unsigned long)cmd)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_console_io(
+ int cmd, int count, char *str)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_console_io), "D" ((unsigned long)cmd), "S" ((unsigned long)count), "d" (str)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+ void *physdev_op)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_physdev_op), "D" (physdev_op)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_grant_table_op), "D" ((unsigned long)cmd), "S" ((unsigned long)uop), "d" (count)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ "movq %5, %%r10;" TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_update_va_mapping_otherdomain),
+ "D" (page_nr), "S" (new_val.pte), "d" (flags), "g" ((unsigned long)domid)
+ : __syscall_clobber,"r10");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+ unsigned int cmd, unsigned int type)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_vm_assist), "D" ((unsigned long)cmd), "S" ((unsigned long)type)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_switch_to_user(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_switch_to_user) : __syscall_clobber );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_boot_vcpu(
+ unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" (__HYPERVISOR_boot_vcpu), "D" (vcpu), "S" (ctxt)
+ : __syscall_clobber);
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_segment_base(
+ int reg, unsigned long value)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret)
+ : "0" ((unsigned long)__HYPERVISOR_set_segment_base), "D" ((unsigned long)reg), "S" (value)
+ : __syscall_clobber );
+
+ return ret;
+}
+
+#endif /* __HYPERCALL_H__ */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/io.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/io.h
new file mode 100644
index 0000000000..90466f397b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/io.h
@@ -0,0 +1,365 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <linux/config.h>
+#include <asm/fixmap.h>
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ * Linus
+ */
+
+ /*
+ * Bit simplified and optimized by Jan Hubicka
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+ *
+ * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+ * isa_read[wl] and isa_write[wl] fixed
+ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+extern inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+
+#define __IN1(s) \
+extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+
+#define __INS(s) \
+extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#define IO_SPACE_LIMIT 0xffff
+
+#if defined(__KERNEL__) && __x86_64__
+
+#include <linux/vmalloc.h>
+
+#ifndef __i386__
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+extern inline unsigned long virt_to_phys(volatile void * address)
+{
+ return __pa(address);
+}
+
+extern inline void * phys_to_virt(unsigned long address)
+{
+ return __va(address);
+}
+
+
+#define virt_to_bus(_x) phys_to_machine(__pa(_x))
+#define bus_to_virt(_x) __va(machine_to_phys(_x))
+#endif
+
+/*
+ * Change "struct page" to physical address.
+ */
+#ifdef CONFIG_DISCONTIGMEM
+#include <asm/mmzone.h>
+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
+
+#define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + \
+ (unsigned long) bio_offset((bio)))
+#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
+ (unsigned long) (bv)->bv_offset)
+
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ (((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) && \
+ ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == \
+ bvec_to_pseudophys((vec2))))
+#else
+// #define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
+
+#define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + \
+ (unsigned long) bio_offset((bio)))
+#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
+ (unsigned long) (bv)->bv_offset)
+
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ (((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) && \
+ ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == \
+ bvec_to_pseudophys((vec2))))
+#endif
+
+#include <asm-generic/iomap.h>
+
+extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+extern inline void __iomem * ioremap (unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size, 0);
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that area.
+ * it's useful if some control registers are in such an area and write combining
+ * or read caching is not desirable:
+ */
+extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void iounmap(volatile void __iomem *addr);
+
+/*
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
+ */
+
+#define isa_virt_to_bus(_x) isa_virt_to_bus_is_UNSUPPORTED->x
+#define isa_page_to_bus(_x) isa_page_to_bus_is_UNSUPPORTED->x
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+#define isa_bus_to_virt(_x) (void *)(__fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
+#else
+#define isa_bus_to_virt(_x) isa_bus_to_virt_needs_PRIVILEGED_BUILD
+#endif
+/*
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ *
+ * Allow them on x86 for legacy drivers, though.
+ */
+#define virt_to_bus(_x) phys_to_machine(__pa(_x))
+#define bus_to_virt(_x) __va(machine_to_phys(_x))
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+static inline __u8 __readb(const volatile void __iomem *addr)
+{
+ return *(__force volatile __u8 *)addr;
+}
+static inline __u16 __readw(const volatile void __iomem *addr)
+{
+ return *(__force volatile __u16 *)addr;
+}
+static inline __u32 __readl(const volatile void __iomem *addr)
+{
+ return *(__force volatile __u32 *)addr;
+}
+static inline __u64 __readq(const volatile void __iomem *addr)
+{
+ return *(__force volatile __u64 *)addr;
+}
+#define readb(x) __readb(x)
+#define readw(x) __readw(x)
+#define readl(x) __readl(x)
+#define readq(x) __readq(x)
+#define readb_relaxed(a) readb(a)
+#define readw_relaxed(a) readw(a)
+#define readl_relaxed(a) readl(a)
+#define readq_relaxed(a) readq(a)
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+#define __raw_readq readq
+
+#define mmiowb()
+
+#ifdef CONFIG_UNORDERED_IO
+static inline void __writel(__u32 val, volatile void __iomem *addr)
+{
+ volatile __u32 __iomem *target = addr;
+ asm volatile("movnti %1,%0"
+ : "=m" (*target)
+ : "r" (val) : "memory");
+}
+
+static inline void __writeq(__u64 val, volatile void __iomem *addr)
+{
+ volatile __u64 __iomem *target = addr;
+ asm volatile("movnti %1,%0"
+ : "=m" (*target)
+ : "r" (val) : "memory");
+}
+#else
+static inline void __writel(__u32 b, volatile void __iomem *addr)
+{
+ *(__force volatile __u32 *)addr = b;
+}
+static inline void __writeq(__u64 b, volatile void __iomem *addr)
+{
+ *(__force volatile __u64 *)addr = b;
+}
+#endif
+static inline void __writeb(__u8 b, volatile void __iomem *addr)
+{
+ *(__force volatile __u8 *)addr = b;
+}
+static inline void __writew(__u16 b, volatile void __iomem *addr)
+{
+ *(__force volatile __u16 *)addr = b;
+}
+#define writeq(val,addr) __writeq((val),(addr))
+#define writel(val,addr) __writel((val),(addr))
+#define writew(val,addr) __writew((val),(addr))
+#define writeb(val,addr) __writeb((val),(addr))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+#define __raw_writeq writeq
+
+void __memcpy_fromio(void*,unsigned long,unsigned);
+void __memcpy_toio(unsigned long,const void*,unsigned);
+
+static inline void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned len)
+{
+ __memcpy_fromio(to,(unsigned long)from,len);
+}
+static inline void memcpy_toio(volatile void __iomem *to, const void *from, unsigned len)
+{
+ __memcpy_toio((unsigned long)to,from,len);
+}
+
+void memset_io(volatile void __iomem *a, int b, size_t c);
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, x86-64 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(__ISA_IO_base + (b)),(c),(d))
+
+/**
+ * check_signature - find BIOS signatures
+ * @io_addr: mmio address to check
+ * @signature: signature block
+ * @length: length of signature
+ *
+ * Perform a signature comparison with the mmio address io_addr. This
+ * address should have been obtained by ioremap.
+ * Returns 1 on a match.
+ */
+
+static inline int check_signature(void __iomem *io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+ do {
+ if (readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+ return retval;
+}
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size) do { } while (0)
+#define dma_cache_wback(_start,_size) do { } while (0)
+#define dma_cache_wback_inv(_start,_size) do { } while (0)
+
+#define flush_write_buffers()
+
+extern int iommu_bio_merge;
+#define BIO_VMERGE_BOUNDARY iommu_bio_merge
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h
new file mode 100644
index 0000000000..ccd85685fd
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/irq.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ * linux/include/asm/irq.h
+ *
+ * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ * IRQ/IPI changes taken from work by Thomas Radke
+ * <tomsoft@informatik.tu-chemnitz.de>
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+/* include comes from machine specific directory */
+#include "irq_vectors.h"
+#include <asm/thread_info.h>
+
+static __inline__ int irq_canonicalize(int irq)
+{
+ return ((irq == 2) ? 9 : irq);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
+#endif
+
+#define KDB_VECTOR 0xf9
+
+# define irq_ctx_init(cpu) do { } while (0)
+
+struct irqaction;
+struct pt_regs;
+int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+
+#endif /* _ASM_IRQ_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h
new file mode 100644
index 0000000000..a96d9f6604
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h
@@ -0,0 +1,30 @@
+/*
+ * arch/i386/mach-generic/io_ports.h
+ *
+ * Machine specific IO port address definition for generic.
+ * Written by Osamu Tomita <tomita@cinet.co.jp>
+ */
+#ifndef _MACH_IO_PORTS_H
+#define _MACH_IO_PORTS_H
+
+/* i8253A PIT registers */
+#define PIT_MODE 0x43
+#define PIT_CH0 0x40
+#define PIT_CH2 0x42
+
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD 0x20
+#define PIC_MASTER_IMR 0x21
+#define PIC_MASTER_ISR PIC_MASTER_CMD
+#define PIC_MASTER_POLL PIC_MASTER_ISR
+#define PIC_MASTER_OCW3 PIC_MASTER_ISR
+#define PIC_SLAVE_CMD 0xa0
+#define PIC_SLAVE_IMR 0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR 2
+#define MASTER_ICW4_DEFAULT 0x01
+#define SLAVE_ICW4_DEFAULT 0x01
+#define PIC_ICW4_AEOI 2
+
+#endif /* !_MACH_IO_PORTS_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
new file mode 100644
index 0000000000..8f19c41675
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h
@@ -0,0 +1,138 @@
+/*
+ * This file should contain #defines for all of the interrupt vector
+ * numbers used by this architecture.
+ *
+ * In addition, there are some standard defines:
+ *
+ * FIRST_EXTERNAL_VECTOR:
+ * The first free place for external interrupts
+ *
+ * SYSCALL_VECTOR:
+ * The IRQ vector a syscall makes the user to kernel transition
+ * under.
+ *
+ * TIMER_IRQ:
+ * The IRQ number the timer interrupt comes in at.
+ *
+ * NR_IRQS:
+ * The total number of interrupt vectors (including all the
+ * architecture specific interrupts) needed.
+ *
+ */
+#ifndef _ASM_IRQ_VECTORS_H
+#define _ASM_IRQ_VECTORS_H
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR 0x20
+
+#define SYSCALL_VECTOR 0x80
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+#if 0
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ * some of the following vectors are 'rare', they are merged
+ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ * TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ * Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define RESCHEDULE_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+
+#define THERMAL_APIC_VECTOR 0xf0
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+#endif
+
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR 0x31
+#define FIRST_SYSTEM_VECTOR 0xef
+
+/*
+ * 16 8259A IRQ's, 208 potential APIC interrupt sources.
+ * Right now the APIC is mostly only used for SMP.
+ * 256 vectors is an architectural limit. (we can have
+ * more than 256 devices theoretically, but they will
+ * have to use shared interrupts)
+ * Since vectors 0x00-0x1f are used/reserved for the CPU,
+ * the usable vector space is 0x20-0xff (224 vectors)
+ */
+
+#define NR_IPIS 8
+
+#define RESCHEDULE_VECTOR 1
+#define INVALIDATE_TLB_VECTOR 2
+#define CALL_FUNCTION_VECTOR 3
+
+/*
+ * The maximum number of vectors supported by i386 processors
+ * is limited to 256. For processors other than i386, NR_VECTORS
+ * should be changed accordingly.
+ */
+#define NR_VECTORS 256
+
+#define FPU_IRQ 13
+
+#define FIRST_VM86_IRQ 3
+#define LAST_VM86_IRQ 15
+#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+
+/*
+ * The flat IRQ space is divided into two regions:
+ * 1. A one-to-one mapping of real physical IRQs. This space is only used
+ * if we have physical device-access privilege. This region is at the
+ * start of the IRQ space so that existing device drivers do not need
+ * to be modified to translate physical IRQ numbers into our IRQ space.
+ * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+ * are bound using the provided bind/unbind functions.
+ */
+
+#define PIRQ_BASE 0
+#define NR_PIRQS 256
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS 256
+
+#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
+#define NR_IRQ_VECTORS NR_IRQS
+
+#define pirq_to_irq(_x) ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x) ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+#ifndef __ASSEMBLY__
+/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */
+extern int bind_virq_to_irq(int virq);
+extern void unbind_virq_from_irq(int virq);
+extern int bind_ipi_on_cpu_to_irq(int cpu, int ipi);
+extern void unbind_ipi_on_cpu_from_irq(int cpu, int ipi);
+extern int bind_evtchn_to_irq(int evtchn);
+extern void unbind_evtchn_from_irq(int evtchn);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_time.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_time.h
new file mode 100644
index 0000000000..b749aa44a8
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_time.h
@@ -0,0 +1,122 @@
+/*
+ * include/asm-i386/mach-default/mach_time.h
+ *
+ * Machine specific set RTC function for generic.
+ * Split out from time.c by Osamu Tomita <tomita@cinet.co.jp>
+ */
+#ifndef _MACH_TIME_H
+#define _MACH_TIME_H
+
+#include <linux/mc146818rtc.h>
+
+/* for check timing call set_rtc_mmss() 500ms */
+/* used in arch/i386/time.c::do_timer_interrupt() */
+#define USEC_AFTER 500000
+#define USEC_BEFORE 500000
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ * sets the minutes. Usually you'll only notice that after reboot!
+ */
+static inline int mach_set_rtc_mmss(unsigned long nowtime)
+{
+ int retval = 0;
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char save_control, save_freq_select;
+
+ save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ BCD_TO_BIN(cmos_minutes);
+
+ /*
+ * since we're only adjusting minutes and seconds,
+ * don't interfere with hour overflow. This avoids
+ * messing with unknown time zones but requires your
+ * RTC not to be off by more than 15 minutes
+ */
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+ real_minutes += 30; /* correct for half hour time zone */
+ real_minutes %= 60;
+
+ if (abs(real_minutes - cmos_minutes) < 30) {
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
+ }
+ CMOS_WRITE(real_seconds,RTC_SECONDS);
+ CMOS_WRITE(real_minutes,RTC_MINUTES);
+ } else {
+ printk(KERN_WARNING
+ "set_rtc_mmss: can't update from %d to %d\n",
+ cmos_minutes, real_minutes);
+ retval = -1;
+ }
+
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+
+ return retval;
+}
+
+static inline unsigned long mach_get_cmos_time(void)
+{
+ unsigned int year, mon, day, hour, min, sec;
+ int i;
+
+ /* The Linux interpretation of the CMOS clock register contents:
+ * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+ * RTC registers show the second which has precisely just started.
+ * Let's hope other operating systems interpret the RTC the same way.
+ */
+ /* read RTC exactly on falling edge of update flag */
+ for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+ break;
+ for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
+ if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ break;
+ do { /* Isn't this overkill ? UIP above should guarantee consistency */
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+ } while (sec != CMOS_READ(RTC_SECONDS));
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+ if ((year += 1900) < 1970)
+ year += 100;
+
+ return mktime(year, mon, day, hour, min, sec);
+}
+
+#endif /* !_MACH_TIME_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h
new file mode 100644
index 0000000000..4b9703bb02
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h
@@ -0,0 +1,48 @@
+/*
+ * include/asm-i386/mach-default/mach_timer.h
+ *
+ * Machine specific calibrate_tsc() for generic.
+ * Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
+ */
+/* ------ Calibrate the TSC -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+ * Too much 64-bit arithmetic here to do this cleanly in C, and for
+ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+ * output busy loop as low as possible. We avoid reading the CTC registers
+ * directly because of the awkward 8-bit access mechanism of the 82C54
+ * device.
+ */
+#ifndef _MACH_TIMER_H
+#define _MACH_TIMER_H
+
+#define CALIBRATE_LATCH (5 * LATCH)
+
+static inline void mach_prepare_counter(void)
+{
+ /* Set the Gate high, disable speaker */
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+ /*
+ * Now let's take care of CTC channel 2
+ *
+ * Set the Gate high, program CTC channel 2 for mode 0,
+ * (interrupt on terminal count mode), binary count,
+ * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+ *
+ * Some devices need a delay here.
+ */
+ outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
+ outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+}
+
+static inline void mach_countup(unsigned long *count_p)
+{
+ unsigned long count = 0;
+ do {
+ count++;
+ } while ((inb_p(0x61) & 0x20) == 0);
+ *count_p = count;
+}
+
+#endif /* !_MACH_TIMER_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
new file mode 100644
index 0000000000..5761edd144
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h
@@ -0,0 +1,47 @@
+/**
+ * machine_specific_memory_setup - Hook for machine specific memory setup.
+ *
+ * Description:
+ * This is included late in kernel/setup.c so that it can make
+ * use of all of the static functions.
+ **/
+
+static char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+ unsigned long start_pfn, max_pfn;
+
+ who = "Xen";
+
+ start_pfn = 0;
+ max_pfn = xen_start_info.nr_pages;
+
+ e820.nr_map = 0;
+ add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) - PFN_PHYS(start_pfn), E820_RAM);
+
+ return who;
+}
+
+void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
+{
+ clear_bit(X86_FEATURE_VME, c->x86_capability);
+ clear_bit(X86_FEATURE_DE, c->x86_capability);
+ clear_bit(X86_FEATURE_PSE, c->x86_capability);
+ clear_bit(X86_FEATURE_PGE, c->x86_capability);
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+ if (!(xen_start_info.flags & SIF_PRIVILEGED))
+ clear_bit(X86_FEATURE_MTRR, c->x86_capability);
+}
+
+extern void hypervisor_callback(void);
+extern void failsafe_callback(void);
+
+static void __init machine_specific_arch_setup(void)
+{
+ HYPERVISOR_set_callbacks(
+ (unsigned long) hypervisor_callback,
+ (unsigned long) failsafe_callback,
+ (unsigned long) system_call);
+
+ machine_specific_modify_cpu_capabilities(&boot_cpu_data);
+}
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h
new file mode 100644
index 0000000000..b18df6896c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h
@@ -0,0 +1,5 @@
+/* Hook to call BIOS initialisation function */
+
+#define ARCH_SETUP machine_specific_arch_setup();
+
+static void __init machine_specific_arch_setup(void);
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h
new file mode 100644
index 0000000000..28adeaf244
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h
@@ -0,0 +1,55 @@
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ io_apic_irqs = 0;
+#endif
+}
+
+static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+{
+#if 1
+ printk("smpboot_setup_warm_reset_vector\n");
+#else
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+#endif
+}
+
+static inline void smpboot_restore_warm_reset_vector(void)
+{
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+}
+
+static inline void smpboot_setup_io_apic(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+}
+
+
+#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
new file mode 100644
index 0000000000..4e487a06d6
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
@@ -0,0 +1,76 @@
+#ifndef __X86_64_MMU_CONTEXT_H
+#define __X86_64_MMU_CONTEXT_H
+
+#include <linux/config.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/pda.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/*
+ * possibly do the LDT unload here?
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+
+#ifdef CONFIG_SMP
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ if (read_pda(mmu_state) == TLBSTATE_OK)
+ write_pda(mmu_state, TLBSTATE_LAZY);
+}
+#else
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+#endif
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned cpu = smp_processor_id();
+ if (likely(prev != next)) {
+ /* stop flush ipis for the previous mm */
+ clear_bit(cpu, &prev->cpu_vm_mask);
+#ifdef CONFIG_SMP
+ write_pda(mmu_state, TLBSTATE_OK);
+ write_pda(active_mm, next);
+#endif
+ set_bit(cpu, &next->cpu_vm_mask);
+ load_cr3(next->pgd);
+ xen_new_user_pt(__pa(__user_pgd(next->pgd)));
+ if (unlikely(next->context.ldt != prev->context.ldt))
+ load_LDT_nolock(&next->context, cpu);
+ }
+#ifdef CONFIG_SMP
+ else {
+ write_pda(mmu_state, TLBSTATE_OK);
+ if (read_pda(active_mm) != next)
+ out_of_line_bug();
+ if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
+ /* We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload CR3
+ * to make sure to use no freed page tables.
+ */
+ load_cr3(next->pgd);
+ xen_new_user_pt(__pa(__user_pgd(next->pgd)));
+ load_LDT_nolock(&next->context, cpu);
+ }
+ }
+#endif
+}
+
+#define deactivate_mm(tsk,mm) do { \
+ load_gs_index(0); \
+ asm volatile("movl %0,%%fs"::"r"(0)); \
+} while(0)
+
+#define activate_mm(prev, next) do { \
+ switch_mm((prev),(next),NULL); \
+} while (0)
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/page.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/page.h
new file mode 100644
index 0000000000..8acd7990dc
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/page.h
@@ -0,0 +1,229 @@
+#ifndef _X86_64_PAGE_H
+#define _X86_64_PAGE_H
+
+#include <linux/config.h>
+/* #include <linux/string.h> */
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/foreign_page.h>
+
+#define arch_free_page(_page,_order) \
+({ int foreign = PageForeign(_page); \
+ if (foreign) \
+ (PageForeignDestructor(_page))(_page); \
+ foreign; \
+})
+#define HAVE_ARCH_FREE_PAGE
+
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT 12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE (0x1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
+
+#define THREAD_ORDER 1
+#ifdef __ASSEMBLY__
+#define THREAD_SIZE (1 << (PAGE_SHIFT + THREAD_ORDER))
+#else
+#define THREAD_SIZE (1UL << (PAGE_SHIFT + THREAD_ORDER))
+#endif
+#define CURRENT_MASK (~(THREAD_SIZE-1))
+
+#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
+#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
+
+#define HPAGE_SHIFT PMD_SHIFT
+#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+void clear_page(void *);
+void copy_page(void *, void *);
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+extern u32 *phys_to_machine_mapping;
+#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned int)(_pfn)])
+#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned int)(_mfn)])
+static inline unsigned long phys_to_machine(unsigned long phys)
+{
+ unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+ machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
+ return machine;
+}
+
+static inline unsigned long machine_to_phys(unsigned long machine)
+{
+ unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+ phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
+ return phys;
+}
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
+#define PTE_MASK PHYSICAL_PAGE_MASK
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x) (((x).pte & 1) ? machine_to_phys((x).pte) : \
+ (x).pte)
+#define pte_val_ma(x) ((x).pte)
+
+static inline unsigned long pmd_val(pmd_t x)
+{
+ unsigned long ret = x.pmd;
+ if (ret) ret = machine_to_phys(ret);
+ return ret;
+}
+
+static inline unsigned long pud_val(pud_t x)
+{
+ unsigned long ret = x.pud;
+ if (ret) ret = machine_to_phys(ret);
+ return ret;
+}
+
+static inline unsigned long pgd_val(pgd_t x)
+{
+ unsigned long ret = x.pgd;
+ if (ret) ret = machine_to_phys(ret);
+ return ret;
+}
+
+#define pgprot_val(x) ((x).pgprot)
+
+#define __pte_ma(x) ((pte_t) { (x) } )
+
+static inline pte_t __pte(unsigned long x)
+{
+ if (x & 1) x = phys_to_machine(x);
+ return ((pte_t) { (x) });
+}
+
+static inline pmd_t __pmd(unsigned long x)
+{
+ if ((x & 1)) x = phys_to_machine(x);
+ return ((pmd_t) { (x) });
+}
+
+static inline pud_t __pud(unsigned long x)
+{
+ if ((x & 1)) x = phys_to_machine(x);
+ return ((pud_t) { (x) });
+}
+
+static inline pgd_t __pgd(unsigned long x)
+{
+ if ((x & 1)) x = phys_to_machine(x);
+ return ((pgd_t) { (x) });
+}
+
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+extern unsigned long vm_stack_flags, vm_stack_flags32;
+extern unsigned long vm_data_default_flags, vm_data_default_flags32;
+extern unsigned long vm_force_exec32;
+
+#define __START_KERNEL 0xffffffff80100000UL
+#define __START_KERNEL_map 0xffffffff80000000UL
+#define __PAGE_OFFSET 0xffff880000000000UL
+
+#else
+#define __START_KERNEL 0xffffffff80100000
+#define __START_KERNEL_map 0xffffffff80000000
+#define __PAGE_OFFSET 0xffff880000000000
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+#define __PHYSICAL_MASK_SHIFT 46
+#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK_SHIFT 48
+#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+
+#define KERNEL_TEXT_SIZE (40UL*1024*1024)
+#define KERNEL_TEXT_START 0xffffffff80000000UL
+
+#ifndef __ASSEMBLY__
+
+#include <asm/bug.h>
+
+/* Pure 2^n version of get_order */
+extern __inline__ int get_order(unsigned long size)
+{
+ int order;
+
+ size = (size-1) >> (PAGE_SHIFT-1);
+ order = -1;
+ do {
+ size >>= 1;
+ order++;
+ } while (size);
+ return order;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+
+/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
+ Otherwise you risk miscompilation. */
+#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
+/* __pa_symbol should be used for C visible symbols.
+ This seems to be the official gcc blessed way to do such arithmetic. */
+#define __pa_symbol(x) \
+ ({unsigned long v; \
+ asm("" : "=r" (v) : "0" (x)); \
+ __pa(v); })
+
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#ifndef CONFIG_DISCONTIGMEM
+#define pfn_to_page(pfn) (mem_map + (pfn))
+#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
+#define pfn_valid(pfn) ((pfn) < max_mapnr)
+#endif
+
+#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(_a) (phys_to_machine(__pa(_a)))
+#define machine_to_virt(_m) (__va(machine_to_phys(_m)))
+
+#define VM_DATA_DEFAULT_FLAGS \
+ (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define __HAVE_ARCH_GATE_AREA 1
+
+#endif /* __KERNEL__ */
+
+#endif /* _X86_64_PAGE_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/param.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/param.h
new file mode 100644
index 0000000000..5145e63610
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/param.h
@@ -0,0 +1,22 @@
+#ifndef _ASMx86_64_PARAM_H
+#define _ASMx86_64_PARAM_H
+
+#ifdef __KERNEL__
+# define HZ 100 /* Internal kernel timer frequency */
+# define USER_HZ 100 /* .. some user interfaces are in "ticks" */
+# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
+#endif
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE 4096
+
+#ifndef NOGROUP
+#define NOGROUP (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64 /* max length of hostname */
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pci.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pci.h
new file mode 100644
index 0000000000..039046fdc8
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pci.h
@@ -0,0 +1,148 @@
+#ifndef __x8664_PCI_H
+#define __x8664_PCI_H
+
+#include <linux/config.h>
+#include <asm/io.h>
+
+#ifdef __KERNEL__
+
+#include <linux/mm.h> /* for struct page */
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ already-configured bus numbers - to be used for buggy BIOSes
+ or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses() 0
+#endif
+#define pcibios_scan_all_fns(a, b) 0
+
+extern int no_iommu, force_iommu;
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM (pci_mem_start)
+
+#define PCIBIOS_MIN_CARDBUS_IO 0x4000
+
+void pcibios_config_init(void);
+struct pci_bus * pcibios_scan_root(int bus);
+extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
+extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <asm/page.h>
+
+extern int iommu_setup(char *opt);
+
+#ifdef CONFIG_GART_IOMMU
+/* The PCI address space does equal the physical memory
+ * address space. The networking and block device layers use
+ * this boolean for bounce buffer decisions
+ *
+ * On AMD64 it mostly equals, but we set it to zero to tell some subsystems
+ * that an IOMMU is available.
+ */
+#define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0)
+
+/*
+ * x86-64 always supports DAC, but sometimes it is useful to force
+ * devices through the IOMMU to get automatic sg list merging.
+ * Optional right now.
+ */
+extern int iommu_sac_force;
+#define pci_dac_dma_supported(pci_dev, mask) (!iommu_sac_force)
+
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+ dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+ __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+ ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+ (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+ ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+ (((PTR)->LEN_NAME) = (VAL))
+
+#else
+/* No IOMMU */
+
+#define PCI_DMA_BUS_IS_PHYS 1
+#define pci_dac_dma_supported(pci_dev, mask) 1
+
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME) (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME) (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
+
+#endif
+
+#include <asm-generic/pci-dma-compat.h>
+
+static inline dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
+{
+ return ((dma64_addr_t) page_to_phys(page) +
+ (dma64_addr_t) offset);
+}
+
+static inline struct page *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+ return virt_to_page(__va(dma_addr));
+}
+
+static inline unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+ return (dma_addr & ~PAGE_MASK);
+}
+
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+ flush_write_buffers();
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine);
+
+static inline void pcibios_add_platform_entries(struct pci_dev *dev)
+{
+}
+
+#endif /* __KERNEL__ */
+
+/* generic pci stuff */
+#ifdef CONFIG_PCI
+#include <asm-generic/pci.h>
+#endif
+
+/* On Xen we have to scan all functions since Xen hides bridges from
+ * us. If a bridge is at fn=0 and that slot has a multifunction
+ * device, we won't find the additional devices without scanning all
+ * functions. */
+#undef pcibios_scan_all_fns
+#define pcibios_scan_all_fns(a, b) 1
+
+#endif /* __x8664_PCI_H */
diff --git a/xen/include/asm-x86/pda.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pda.h
index dcecc48f20..76dac1c98b 100644
--- a/xen/include/asm-x86/pda.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pda.h
@@ -1,22 +1,31 @@
#ifndef X86_64_PDA_H
#define X86_64_PDA_H
-#include <xen/cache.h>
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/cache.h>
/* Per processor datastructure. %gs points to it while the kernel runs */
-/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */
struct x8664_pda {
- unsigned long kernelstack; /* TOS for current process */
+ struct task_struct *pcurrent; /* Current process */
+ unsigned long data_offset; /* Per cpu data offset from linker address */
+ struct x8664_pda *me; /* Pointer to itself */
+ unsigned long kernelstack; /* top of kernel stack for current */
unsigned long oldrsp; /* user rsp for system call */
unsigned long irqrsp; /* Old rsp for interrupts. */
- struct domain *pcurrent; /* Current process */
int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */
char *irqstackptr; /* top of irqstack */
- unsigned long volatile *level4_pgt;
-} __cacheline_aligned;
+ unsigned int __softirq_pending;
+ unsigned int __nmi_count; /* number of NMI on this CPUs */
+ unsigned long idle_timestamp;
+ struct mm_struct *active_mm;
+ int mmu_state;
+ unsigned apic_timer_irqs;
+ int kernel_mode; /* kernel or user mode */
+} ____cacheline_aligned;
-#define PDA_STACKOFFSET (5*8)
#define IRQSTACK_ORDER 2
#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
@@ -31,25 +40,34 @@ extern struct x8664_pda cpu_pda[];
#define typeof_field(type,field) typeof(((type *)0)->field)
extern void __bad_pda_field(void);
-/* Don't use offsetof because it requires too much infrastructure */
-#define pda_offset(field) ((unsigned long)&((struct x8664_pda *)0)->field)
+
+#define pda_offset(field) offsetof(struct x8664_pda, field)
#define pda_to_op(op,field,val) do { \
switch (sizeof_field(struct x8664_pda, field)) { \
- case 2: asm volatile(op "w %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
- case 4: asm volatile(op "l %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
- case 8: asm volatile(op "q %0,%%gs:%P1" :: "r" (val), "i"(pda_offset(field)):"memory"); break; \
+case 2: \
+asm volatile(op "w %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
+case 4: \
+asm volatile(op "l %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
+case 8: \
+asm volatile(op "q %0,%%gs:%P1"::"r" (val),"i"(pda_offset(field)):"memory"); break; \
default: __bad_pda_field(); \
} \
} while (0)
-
+/*
+ * AK: PDA read accesses should be neither volatile nor have an memory clobber.
+ * Unfortunately removing them causes all hell to break lose currently.
+ */
#define pda_from_op(op,field) ({ \
typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
switch (sizeof_field(struct x8664_pda, field)) { \
- case 2: asm volatile(op "w %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
- case 4: asm volatile(op "l %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
- case 8: asm volatile(op "q %%gs:%P1,%0":"=r" (ret__): "i" (pda_offset(field)):"memory"); break; \
+case 2: \
+asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\
+case 4: \
+asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\
+case 8: \
+asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\
default: __bad_pda_field(); \
} \
ret__; })
@@ -61,3 +79,7 @@ extern void __bad_pda_field(void);
#define sub_pda(field,val) pda_to_op("sub",field,val)
#endif
+
+#define PDA_STACKOFFSET (5*8)
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
new file mode 100644
index 0000000000..325d700c3b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
@@ -0,0 +1,171 @@
+#ifndef _X86_64_PGALLOC_H
+#define _X86_64_PGALLOC_H
+
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/pda.h>
+#include <linux/threads.h>
+#include <linux/mm.h>
+#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
+
+void make_page_readonly(void *va);
+void make_page_writable(void *va);
+void make_pages_readonly(void *va, unsigned int nr);
+void make_pages_writable(void *va, unsigned int nr);
+
+#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+{
+ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+{
+ set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+}
+
+/*
+ * We need to use the batch mode here, but pgd_pupulate() won't be
+ * be called frequently.
+ */
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+ set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
+}
+
+extern __inline__ pmd_t *get_pmd(void)
+{
+ pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pmd)
+ return NULL;
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
+ return pmd;
+}
+
+extern __inline__ void pmd_free(pmd_t *pmd)
+{
+ BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+ xen_pmd_unpin(__pa(pmd));
+ make_page_writable(pmd);
+ free_page((unsigned long)pmd);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!pmd)
+ return NULL;
+ make_page_readonly(pmd);
+ xen_pmd_pin(__pa(pmd));
+ return pmd;
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!pud)
+ return NULL;
+ make_page_readonly(pud);
+ xen_pud_pin(__pa(pud));
+ return pud;
+}
+
+static inline void pud_free(pud_t *pud)
+{
+ BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+ xen_pud_unpin(__pa(pud));
+ make_page_writable(pud);
+ free_page((unsigned long)pud);
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ /*
+ * We allocate two contiguous pages for kernel and user.
+ */
+ unsigned boundary;
+ pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
+
+ if (!pgd)
+ return NULL;
+ /*
+ * Copy kernel pointers in from init.
+ * Could keep a freelist or slab cache of those because the kernel
+ * part never changes.
+ */
+ boundary = pgd_index(__PAGE_OFFSET);
+ memset(pgd, 0, boundary * sizeof(pgd_t));
+ memcpy(pgd + boundary,
+ init_level4_pgt + boundary,
+ (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+
+ memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
+ make_pages_readonly(pgd, 2);
+
+ xen_pgd_pin(__pa(pgd)); /* kernel */
+ xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
+ /*
+ * Set level3_user_pgt for vsyscall area
+ */
+ set_pgd(__user_pgd(pgd) + pgd_index(VSYSCALL_START),
+ mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+ return pgd;
+}
+
+static inline void pgd_free(pgd_t *pgd)
+{
+ BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+ xen_pgd_unpin(__pa(pgd));
+ xen_pgd_unpin(__pa(__user_pgd(pgd)));
+ make_pages_writable(pgd, 2);
+ free_pages((unsigned long)pgd, 1);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!pte)
+ return NULL;
+ make_page_readonly(pte);
+ xen_pte_pin(__pa(pte));
+ return pte;
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *pte = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!pte)
+ return NULL;
+ make_page_readonly(pte);
+ xen_pte_pin(__pa(pte));
+ return virt_to_page((unsigned long)pte);
+}
+
+/* Should really implement gc for free page table pages. This could be
+ done with a reference count in struct page. */
+
+extern __inline__ void pte_free_kernel(pte_t *pte)
+{
+ BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+ xen_pte_unpin(__pa(pte));
+ make_page_writable(pte);
+ free_page((unsigned long)pte);
+}
+
+extern void pte_free(struct page *pte);
+
+//#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
+
+#define __pte_free_tlb(tlb,x) pte_free((x))
+#define __pmd_free_tlb(tlb,x) pmd_free((x))
+#define __pud_free_tlb(tlb,x) pud_free((x))
+
+#endif /* _X86_64_PGALLOC_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
new file mode 100644
index 0000000000..5b86bd7978
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
@@ -0,0 +1,527 @@
+#ifndef _X86_64_PGTABLE_H
+#define _X86_64_PGTABLE_H
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the x86-64 page table tree.
+ *
+ * x86-64 has a 4 level table setup. Generic linux MM only supports
+ * three levels. The fourth level is currently a single static page that
+ * is shared by everybody and just contains a pointer to the current
+ * three level page setup on the beginning and some kernel mappings at
+ * the end. For more details see Documentation/x86_64/mm.txt
+ */
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <linux/threads.h>
+#include <asm/pda.h>
+#include <asm-xen/hypervisor.h>
+extern pud_t level3_user_pgt[512];
+extern pud_t init_level4_pgt[];
+extern pud_t init_level4_user_pgt[];
+extern unsigned long __supported_pte_mask;
+
+#define swapper_pg_dir NULL
+
+extern int nonx_setup(char *str);
+extern void paging_init(void);
+extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+extern unsigned long pgkern_mask;
+
+#define arbitrary_virt_to_machine(__va) ({0;})
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+
+/*
+ * PUDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PUD_SHIFT 30
+#define PTRS_PER_PUD 512
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT 21
+#define PTRS_PER_PMD 512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE 512
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), pmd_val(e))
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), pud_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
+
+#define pgd_none(x) (!pgd_val(x))
+#define pud_none(x) (!pud_val(x))
+
+#define set_pte_batched(pteptr, pteval) \
+ queue_l1_entry_update(pteptr, (pteval))
+
+extern inline int pud_present(pud_t pud) { return !pud_none(pud); }
+
+#ifdef CONFIG_SMP
+#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval).pte)
+
+#else
+#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval.pte))
+#if 0
+static inline void set_pte(pte_t *dst, pte_t val)
+{
+ *dst = val;
+}
+#endif
+#endif
+
+#define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval))
+#define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval))
+#define set_pgd(pgdptr, pgdval) xen_l4_entry_update(pgdptr, (pgdval))
+
+extern inline void pud_clear (pud_t * pud)
+{
+ set_pud(pud, __pud(0));
+}
+
+#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+
+extern inline void pgd_clear (pgd_t * pgd)
+{
+ set_pgd(pgd, __pgd(0));
+ set_pgd(__user_pgd(pgd), __pgd(0));
+}
+
+#define pud_page(pud) \
+ ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+
+/*
+ * A note on implementation of this atomic 'get-and-clear' operation.
+ * This is actually very simple because Xen Linux can only run on a single
+ * processor. Therefore, we cannot race other processors setting the 'accessed'
+ * or 'dirty' bits on a page-table entry.
+ * Even if pages are shared between domains, that is not a problem because
+ * each domain will have separate page tables, with their own versions of
+ * accessed & dirty state.
+ */
+static inline pte_t ptep_get_and_clear(pte_t *xp)
+{
+ pte_t pte = *xp;
+ if (pte.pte)
+ set_pte(xp, __pte_ma(0));
+ return pte;
+}
+
+#define pte_same(a, b) ((a).pte == (b).pte)
+
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR 0
+
+#ifndef __ASSEMBLY__
+#define MAXMEM 0x3fffffffffffUL
+#define VMALLOC_START 0xffffc20000000000UL
+#define VMALLOC_END 0xffffe1ffffffffffUL
+#define MODULES_VADDR 0xffffffff88000000UL
+#define MODULES_END 0xfffffffffff00000UL
+#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+
+#define _PAGE_BIT_PRESENT 0
+#define _PAGE_BIT_RW 1
+#define _PAGE_BIT_USER 2
+#define _PAGE_BIT_PWT 3
+#define _PAGE_BIT_PCD 4
+#define _PAGE_BIT_ACCESSED 5
+#define _PAGE_BIT_DIRTY 6
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PSE 0x080 /* 2MB page */
+#define _PAGE_FILE 0x040 /* set:pagecache, unset:swap */
+#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
+
+#define _PAGE_PROTNONE 0x080 /* If not present */
+#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE _PAGE_TABLE
+
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY PAGE_COPY_NOEXEC
+#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define __PAGE_KERNEL \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
+#define __PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER )
+#define __PAGE_KERNEL_NOCACHE \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
+#define __PAGE_KERNEL_RO \
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
+#define __PAGE_KERNEL_VSYSCALL \
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER )
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE \
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER )
+#define __PAGE_KERNEL_LARGE \
+ (__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER )
+
+
+/*
+ * We don't support GLOBAL page in xenolinux64
+ */
+#define MAKE_GLOBAL(x) __pgprot((x))
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+
+/* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+static inline unsigned long pgd_bad(pgd_t pgd)
+{
+ unsigned long val = pgd_val(pgd);
+ val &= ~PTE_MASK;
+ val &= ~(_PAGE_USER | _PAGE_DIRTY);
+ return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
+}
+
+static inline unsigned long pud_bad(pud_t pud)
+{
+ unsigned long val = pud_val(pud);
+ val &= ~PTE_MASK;
+ val &= ~(_PAGE_USER | _PAGE_DIRTY);
+ return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
+}
+
+#define pte_none(x) (!(x).pte)
+#define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
+
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * We detect special mappings in one of two ways:
+ * 1. If the MFN is an I/O page then Xen will set the m2p entry
+ * to be outside our maximum possible pseudophys range.
+ * 2. If the MFN belongs to a different domain then we will certainly
+ * not have MFN in our p2m table. Conversely, if the page is ours,
+ * then we'll have p2m(m2p(MFN))==MFN.
+ * If we detect a special mapping then it doesn't have a 'struct page'.
+ * We force !pfn_valid() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ * require. In all the cases we care about, the high bit gets shifted out
+ * (e.g., phys_to_machine()) so behaviour there is correct.
+ */
+#define INVALID_P2M_ENTRY (~0U)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
+#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
+#define pte_pfn(_pte) \
+({ \
+ unsigned long mfn = pte_mfn(_pte); \
+ unsigned pfn = mfn_to_pfn(mfn); \
+ if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
+ pfn = max_mapnr; /* special: force !pfn_valid() */ \
+ pfn; \
+})
+
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+ pte_t pte;
+
+ (pte).pte = (pfn_to_mfn(page_nr) << PAGE_SHIFT);
+ (pte).pte |= pgprot_val(pgprot);
+ (pte).pte &= __supported_pte_mask;
+ return pte;
+}
+
+#define pfn_pte_ma(pfn, prot) __pte_ma((((pfn) << PAGE_SHIFT) | pgprot_val(prot)) & __supported_pte_mask)
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+#define __pte_val(x) ((x).pte)
+
+static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
+extern inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
+extern inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
+extern inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
+extern inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
+extern inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
+static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
+
+extern inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
+extern inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
+extern inline pte_t pte_mkclean(pte_t pte) { __pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+extern inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+extern inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
+extern inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
+extern inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
+extern inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
+extern inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+extern inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ int ret = pte_dirty(pte);
+ if (ret)
+ xen_l1_entry_update(ptep, pte_mkclean(pte).pte);
+ return ret;
+}
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ int ret = pte_young(pte);
+ if (ret)
+ xen_l1_entry_update(ptep, pte_mkold(pte).pte);
+ return ret;
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ if (pte_write(pte))
+ set_pte(ptep, pte_wrprotect(pte));
+}
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ if (!pte_dirty(pte))
+ xen_l1_entry_update(ptep, pte_mkdirty(pte).pte);
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
+
+#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
+static inline int pmd_large(pmd_t pte) {
+ return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE;
+}
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+/*
+ * Level 4 access.
+ * Never use these in the common code.
+ */
+#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+#define pgd_offset_k(address) (pgd_t *)(init_level4_pgt + pgd_index(address))
+#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
+#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+
+/* PUD - Level3 access */
+/* to find an entry in a page-table-directory. */
+#define pud_index(address) ((address >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
+static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
+{
+ return pud + pud_index(address);
+}
+
+/* Find correct pud via the hidden fourth level page level: */
+
+/* This accesses the reference page table of the boot cpu.
+ Other CPUs get synced lazily via the page fault handler. */
+static inline pud_t *pud_offset_k(unsigned long address)
+{
+ unsigned long addr;
+
+ addr = pud_val(init_level4_pgt[pud_index(address)]);
+ addr &= PHYSICAL_PAGE_MASK; /* machine physical */
+ addr = machine_to_phys(addr);
+ return __pud_offset_k((pud_t *)__va(addr), address);
+}
+
+/* PMD - Level 2 access */
+#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
+ pmd_index(address))
+#define pmd_none(x) (!pmd_val(x))
+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+#define pmd_bad(x) ((pmd_val(x) & ~PTE_MASK) != _KERNPG_TABLE )
+#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+
+#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
+#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
+
+/* PTE - Level 1 access. */
+
+/* page, protection -> pte */
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
+
+/* physical address -> PTE */
+static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pte_t pte;
+ (pte).pte = physpage | pgprot_val(pgprot);
+ return pte;
+}
+
+/* Change flags of a PTE */
+extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ (pte).pte &= _PAGE_CHG_MASK;
+ (pte).pte |= pgprot_val(newprot);
+ (pte).pte &= __supported_pte_mask;
+ return pte;
+}
+
+#define pte_index(address) \
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+ pte_index(address))
+
+/* x86-64 always has all page tables mapped. */
+#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
+#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
+#define pte_unmap(pte) /* NOP */
+#define pte_unmap_nested(pte) /* NOP */
+
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+
+/* We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time. */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ if (__dirty) { \
+ set_pte(__ptep, __entry); \
+ flush_tlb_page(__vma, __address); \
+ } \
+ } while (0)
+
+/* Encode and de-code a swap entry */
+#define __swp_type(x) (((x).val >> 1) & 0x3f)
+#define __swp_offset(x) ((x).val >> 8)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+#endif /* !__ASSEMBLY__ */
+
+extern int kern_addr_valid(unsigned long addr);
+
+#define DOMID_LOCAL (0xFFFFU)
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid);
+int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v);
+
+#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
+ remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#define pgtable_cache_init() do { } while (0)
+#define check_pgt_cache() do { } while (0)
+
+#define PAGE_AGP PAGE_KERNEL_NOCACHE
+#define HAVE_PAGE_AGP 1
+
+/* fs/proc/kcore.c */
+#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
+#define kc_offset_to_vaddr(o) \
+ (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTEP_MKDIRTY
+#define __HAVE_ARCH_PTE_SAME
+#include <asm-generic/pgtable.h>
+
+#endif /* _X86_64_PGTABLE_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h
new file mode 100644
index 0000000000..e4a683206f
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/processor.h
@@ -0,0 +1,474 @@
+/*
+ * include/asm-x86_64/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_X86_64_PROCESSOR_H
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/msr.h>
+#include <asm/current.h>
+#include <asm/system.h>
+#include <asm/mmsegment.h>
+#include <asm/percpu.h>
+#include <linux/personality.h>
+
+#define TF_MASK 0x00000100
+#define IF_MASK 0x00000200
+#define IOPL_MASK 0x00003000
+#define NT_MASK 0x00004000
+#define VM_MASK 0x00020000
+#define AC_MASK 0x00040000
+#define VIF_MASK 0x00080000 /* virtual interrupt flag */
+#define VIP_MASK 0x00100000 /* virtual interrupt pending */
+#define ID_MASK 0x00200000
+
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ */
+
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ __u32 x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB */
+ int x86_clflush_size;
+ int x86_cache_alignment;
+ int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
+ __u8 x86_virt_bits, x86_phys_bits;
+ __u8 x86_num_cores;
+ __u8 x86_apicid;
+ __u32 x86_power;
+ __u32 x86_cpuid_level; /* Max CPUID function supported */
+ unsigned long loops_per_jiffy;
+} ____cacheline_aligned;
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_NUM 8
+#define X86_VENDOR_UNKNOWN 0xff
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x0040 /* Machine check enable */
+#define X86_CR4_PGE 0x0080 /* enable global pages */
+#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+ mmu_cr4_features |= mask;
+ switch (mask) {
+ case X86_CR4_OSFXSR:
+ case X86_CR4_OSXMMEXCPT:
+ break;
+ default:
+ do {
+ const char *msg = "Xen unsupported cr4 update\n";
+ (void)HYPERVISOR_console_io(
+ CONSOLEIO_write, __builtin_strlen(msg),
+ (char *)msg);
+ BUG();
+ } while (0);
+ }
+}
+
+#define load_cr3(pgdir) do { \
+ xen_pt_switch(__pa(pgdir)); \
+ per_cpu(cur_pgd, smp_processor_id()) = pgdir; \
+} while (/* CONSTCOND */0)
+
+/*
+ * Bus types
+ */
+#define MCA_bus 0
+#define MCA_bus__is_a_macro
+
+
+/*
+ * User space process size. 47bits.
+ */
+#define TASK_SIZE (0x800000000000UL)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
+#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
+#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3)
+#define TASK_UNMAPPED_BASE \
+ (test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
+
+/*
+ * Size of io_bitmap.
+ */
+#define IO_BITMAP_BITS 65536
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fxsave_struct {
+ u16 cwd;
+ u16 swd;
+ u16 twd;
+ u16 fop;
+ u64 rip;
+ u64 rdp;
+ u32 mxcsr;
+ u32 mxcsr_mask;
+ u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
+ u32 padding[24];
+} __attribute__ ((aligned (16)));
+
+union i387_union {
+ struct i387_fxsave_struct fxsave;
+};
+
+struct tss_struct {
+ u32 reserved1;
+ u64 rsp0;
+ u64 rsp1;
+ u64 rsp2;
+ u64 reserved2;
+ u64 ist[7];
+ u32 reserved3;
+ u32 reserved4;
+ u16 reserved5;
+ u16 io_bitmap_base;
+ /*
+ * The extra 1 is there because the CPU will access an
+ * additional byte beyond the end of the IO permission
+ * bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit. Thus we have:
+ *
+ * 128 bytes, the bitmap itself, for ports 0..0x3ff
+ * 8 bytes, for an extra "long" of ~0UL
+ */
+ unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+} __attribute__((packed)) ____cacheline_aligned;
+
+extern struct cpuinfo_x86 boot_cpu_data;
+DECLARE_PER_CPU(struct tss_struct,init_tss);
+DECLARE_PER_CPU(pgd_t *, cur_pgd);
+
+#define ARCH_MIN_TASKALIGN 16
+
+struct thread_struct {
+ unsigned long rsp0;
+ unsigned long rsp;
+ unsigned long userrsp; /* Copy from PDA */
+ unsigned long fs;
+ unsigned long gs;
+ unsigned int io_pl;
+ unsigned short es, ds, fsindex, gsindex;
+/* Hardware debugging registers */
+ unsigned long debugreg0;
+ unsigned long debugreg1;
+ unsigned long debugreg2;
+ unsigned long debugreg3;
+ unsigned long debugreg6;
+ unsigned long debugreg7;
+/* fault info */
+ unsigned long cr2, trap_no, error_code;
+/* floating point info */
+ union i387_union i387 __attribute__((aligned(16)));
+/* IO permissions. the bitmap could be moved into the GDT, that would make
+ switch faster for a limited number of ioperm using tasks. -AK */
+ int ioperm;
+ unsigned long *io_bitmap_ptr;
+ unsigned io_bitmap_max;
+/* cached TLS descriptors. */
+ u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
+} __attribute__((aligned(16)));
+
+#define INIT_THREAD {}
+
+#define INIT_MMAP \
+{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
+
+#define STACKFAULT_STACK 1
+#define DOUBLEFAULT_STACK 2
+#define NMI_STACK 3
+#define DEBUG_STACK 4
+#define MCE_STACK 5
+#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER 0
+
+#define start_thread(regs,new_rip,new_rsp) do { \
+ asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
+ load_gs_index(0); \
+ (regs)->rip = (new_rip); \
+ (regs)->rsp = (new_rsp); \
+ write_pda(oldrsp, (new_rsp)); \
+ (regs)->cs = __USER_CS; \
+ (regs)->ss = __USER_DS; \
+ (regs)->eflags = 0x200; \
+ set_fs(USER_DS); \
+} while(0)
+
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/* Prepare to copy thread state - unlazy all lazy status */
+extern void prepare_to_copy(struct task_struct *tsk);
+
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.rsp - 8))
+
+extern unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk) \
+ (((struct pt_regs *)(tsk->thread.rsp0 - sizeof(struct pt_regs)))->rip)
+#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+
+
+struct microcode_header {
+ unsigned int hdrver;
+ unsigned int rev;
+ unsigned int date;
+ unsigned int sig;
+ unsigned int cksum;
+ unsigned int ldrver;
+ unsigned int pf;
+ unsigned int datasize;
+ unsigned int totalsize;
+ unsigned int reserved[3];
+};
+
+struct microcode {
+ struct microcode_header hdr;
+ unsigned int bits[0];
+};
+
+typedef struct microcode microcode_t;
+typedef struct microcode_header microcode_header_t;
+
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+ unsigned int sig;
+ unsigned int pf;
+ unsigned int cksum;
+};
+
+struct extended_sigtable {
+ unsigned int count;
+ unsigned int cksum;
+ unsigned int reserved[3];
+ struct extended_signature sigs[0];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE _IO('6',0)
+
+
+#define ASM_NOP1 K8_NOP1
+#define ASM_NOP2 K8_NOP2
+#define ASM_NOP3 K8_NOP3
+#define ASM_NOP4 K8_NOP4
+#define ASM_NOP5 K8_NOP5
+#define ASM_NOP6 K8_NOP6
+#define ASM_NOP7 K8_NOP7
+#define ASM_NOP8 K8_NOP8
+
+/* Opteron nops */
+#define K8_NOP1 ".byte 0x90\n"
+#define K8_NOP2 ".byte 0x66,0x90\n"
+#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
+#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
+#define K8_NOP5 K8_NOP3 K8_NOP2
+#define K8_NOP6 K8_NOP3 K8_NOP3
+#define K8_NOP7 K8_NOP4 K8_NOP3
+#define K8_NOP8 K8_NOP4 K8_NOP4
+
+#define ASM_NOP_MAX 8
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+extern inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+/* Stop speculative execution */
+extern inline void sync_core(void)
+{
+ int tmp;
+ asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
+}
+
+#define cpu_has_fpu 1
+
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(void *x)
+{
+ asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
+}
+
+#define ARCH_HAS_PREFETCHW 1
+static inline void prefetchw(void *x)
+{
+ alternative_input(ASM_NOP5,
+ "prefetchw (%1)",
+ X86_FEATURE_3DNOW,
+ "r" (x));
+}
+
+#define ARCH_HAS_SPINLOCK_PREFETCH 1
+
+#define spin_lock_prefetch(x) prefetchw(x)
+
+#define cpu_relax() rep_nop()
+
+/*
+ * NSC/Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ * NSC/Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+ outb((reg), 0x22); \
+ outb((data), 0x23); \
+} while (0)
+
+static inline void __monitor(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitor %eax,%ecx,%edx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc8;"
+ : :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax,%ecx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc9;"
+ : :"a" (eax), "c" (ecx));
+}
+
+#define stack_current() \
+({ \
+ struct thread_info *ti; \
+ asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
+ ti->task; \
+})
+
+#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+
+extern unsigned long boot_option_idle_override;
+/* Boot loader type from the setup header */
+extern int bootloader_type;
+
+#endif /* __ASM_X86_64_PROCESSOR_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
new file mode 100644
index 0000000000..2af8edd82f
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h
@@ -0,0 +1,119 @@
+#ifndef _X86_64_PTRACE_H
+#define _X86_64_PTRACE_H
+
+#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 32
+#define RBX 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+#define R11 48
+#define R10 56
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120 /* = ERROR */
+/* end of arguments */
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+#define ARGOFFSET R11
+#endif /* __ASSEMBLY__ */
+
+/* top of stack page */
+#define FRAME_SIZE 168
+
+#define PTRACE_OLDSETOPTIONS 21
+
+#ifndef __ASSEMBLY__
+
+struct pt_regs {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long rbp;
+ unsigned long rbx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rax;
+ unsigned long rcx;
+ unsigned long rdx;
+ unsigned long rsi;
+ unsigned long rdi;
+ unsigned long orig_rax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+ unsigned long rip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long rsp;
+ unsigned long ss;
+/* top of stack page */
+};
+
+#endif
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+#define PTRACE_GETFPXREGS 18
+#define PTRACE_SETFPXREGS 19
+
+/* only useful for access 32bit programs */
+#define PTRACE_GET_THREAD_AREA 25
+#define PTRACE_SET_THREAD_AREA 26
+
+#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#define user_mode(regs) (!!((regs)->cs & 3))
+#define instruction_pointer(regs) ((regs)->rip)
+#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
+extern unsigned long profile_pc(struct pt_regs *regs);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
+
+enum {
+ EF_CF = 0x00000001,
+ EF_PF = 0x00000004,
+ EF_AF = 0x00000010,
+ EF_ZF = 0x00000040,
+ EF_SF = 0x00000080,
+ EF_TF = 0x00000100,
+ EF_IE = 0x00000200,
+ EF_DF = 0x00000400,
+ EF_OF = 0x00000800,
+ EF_IOPL = 0x00003000,
+ EF_IOPL_RING0 = 0x00000000,
+ EF_IOPL_RING1 = 0x00001000,
+ EF_IOPL_RING2 = 0x00002000,
+ EF_NT = 0x00004000, /* nested task */
+ EF_RF = 0x00010000, /* resume */
+ EF_VM = 0x00020000, /* virtual mode */
+ EF_AC = 0x00040000, /* alignment */
+ EF_VIF = 0x00080000, /* virtual interrupt */
+ EF_VIP = 0x00100000, /* virtual interrupt pending */
+ EF_ID = 0x00200000, /* id */
+};
+
+#endif
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/segment.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/segment.h
new file mode 100644
index 0000000000..db5926a999
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/segment.h
@@ -0,0 +1,47 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#include <asm/cache.h>
+
+#define __KERNEL_CS 0x10
+#define __KERNEL_DS 0x1b
+
+#define __KERNEL32_CS 0x3b
+
+/*
+ * we cannot use the same code segment descriptor for user and kernel
+ * -- not even in the long flat mode, because of different DPL /kkeil
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ */
+
+#define __USER32_CS 0x23 /* 4*8+3 */
+#define __USER_DS 0x2b /* 5*8+3 */
+#define __USER_CS 0x33 /* 6*8+3 */
+#define __USER32_DS __USER_DS
+#define __KERNEL16_CS (GDT_ENTRY_KERNELCS16 * 8)
+#define __KERNEL_COMPAT32_CS 0x8
+
+#define GDT_ENTRY_TLS 1
+#define GDT_ENTRY_TSS 8 /* needs two entries */
+#define GDT_ENTRY_LDT 10
+#define GDT_ENTRY_TLS_MIN 11
+#define GDT_ENTRY_TLS_MAX 13
+/* 14 free */
+#define GDT_ENTRY_KERNELCS16 15
+
+#define GDT_ENTRY_TLS_ENTRIES 3
+
+/* TLS indexes for 64bit - hardcoded in arch_prctl */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
+
+#define IDT_ENTRIES 256
+#define GDT_ENTRIES 16
+#define GDT_SIZE (GDT_ENTRIES * 8)
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h
new file mode 100644
index 0000000000..82b5cc2b9f
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/smp.h
@@ -0,0 +1,154 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/bitops.h>
+extern int disable_apic;
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#include <asm/thread_info.h>
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef ASSEMBLY
+
+#include <asm/pda.h>
+
+struct pt_regs;
+
+/*
+ * Private routines/data
+ */
+
+extern void smp_alloc_memory(void);
+extern cpumask_t cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern int smp_num_siblings;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_send_reschedule(int cpu);
+extern void smp_invalidate_rcv(void); /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings(void);
+void smp_stop_cpu(void);
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern u8 phys_proc_id[NR_CPUS];
+
+#define SMP_TRAMPOLINE_BASE 0x6000
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_callin_map;
+#define cpu_possible_map cpu_callout_map
+
+static inline int num_booting_cpus(void)
+{
+ return cpus_weight(cpu_callout_map);
+}
+
+#define __smp_processor_id() read_pda(cpunumber)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern __inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
+}
+#endif
+
+#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id()))
+
+#endif /* !ASSEMBLY */
+
+#define NO_PROC_ID 0xFF /* No processor magic marker */
+
+#endif
+
+#ifndef ASSEMBLY
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */
+extern u8 x86_cpu_to_log_apicid[NR_CPUS];
+extern u8 bios_cpu_apicid[];
+#ifdef CONFIG_X86_LOCAL_APIC
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ return cpus_addr(cpumask)[0];
+}
+
+static inline int x86_apicid_to_cpu(u8 apicid)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; ++i)
+ if (x86_cpu_to_apicid[i] == apicid)
+ return i;
+
+ /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
+ * or called too early. Either way, we must be CPU 0. */
+ if (x86_cpu_to_apicid[0] == BAD_APICID)
+ return 0;
+
+ return -1;
+}
+
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < NR_CPUS)
+ return (int)bios_cpu_apicid[mps_cpu];
+ else
+ return BAD_APICID;
+}
+#endif
+
+#endif /* !ASSEMBLY */
+
+#ifndef CONFIG_SMP
+#define stack_smp_processor_id() 0
+#define safe_smp_processor_id() 0
+#define cpu_logical_map(x) (x)
+#else
+#include <asm/thread_info.h>
+#define stack_smp_processor_id() \
+({ \
+ struct thread_info *ti; \
+ __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
+ ti->cpu; \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_LOCAL_APIC
+static __inline int logical_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+#endif
+#endif
+
+#endif
+
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h
new file mode 100644
index 0000000000..17a8fec17c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h
@@ -0,0 +1,403 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/segment.h>
+#include <asm-xen/synch_bitops.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/arch-x86_64.h>
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
+#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
+
+/* frame pointer must be last for get_wchan */
+#define SAVE_CONTEXT "pushfq ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popfq\n\t"
+
+#define __EXTRA_CLOBBER \
+ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
+
+#define switch_to(prev,next,last) \
+ asm volatile(SAVE_CONTEXT \
+ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
+ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
+ "call __switch_to\n\t" \
+ ".globl thread_return\n" \
+ "thread_return:\n\t" \
+ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
+ "movq %P[thread_info](%%rsi),%%r8\n\t" \
+ LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "movq %%rax,%%rdi\n\t" \
+ "jc ret_from_fork\n\t" \
+ RESTORE_CONTEXT \
+ : "=a" (last) \
+ : [next] "S" (next), [prev] "D" (prev), \
+ [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
+ [ti_flags] "i" (offsetof(struct thread_info, flags)),\
+ [tif_fork] "i" (TIF_FORK), \
+ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
+ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
+ : "memory", "cc" __EXTRA_CLOBBER)
+
+
+extern void load_gs_index(unsigned);
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value) \
+ asm volatile("\n" \
+ "1:\t" \
+ "movl %k0,%%" #seg "\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\t" \
+ "movl %1,%%" #seg "\n\t" \
+ "jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 8\n\t" \
+ ".quad 1b,3b\n" \
+ ".previous" \
+ : :"r" (value), "r" (0))
+
+#define set_debug(value,register) \
+ __asm__("movq %0,%%db" #register \
+ : /* no output */ \
+ :"r" ((unsigned long) value))
+
+
+#ifdef __KERNEL__
+struct alt_instr {
+ __u8 *instr; /* original instruction */
+ __u8 *replacement;
+ __u8 cpuid; /* cpuid bit set for replacement */
+ __u8 instrlen; /* length of original instruction */
+ __u8 replacementlen; /* length of new instruction, <= instrlen */
+ __u8 pad[5];
+};
+#endif
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature) : "memory")
+
+/*
+ * Alternative inline assembly with input.
+ *
+ * Pecularities:
+ * No memory clobber here.
+ * Argument numbers start with 1.
+ * Best is to use constraints that are fixed size (like (%1) ... "r")
+ * If you use variable sized constraints like "m" or "g" in the
+ * replacement maake sure to pad to the worst case length.
+ */
+#define alternative_input(oldinstr, newinstr, feature, input...) \
+ asm volatile ("661:\n\t" oldinstr "\n662:\n" \
+ ".section .altinstructions,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 661b\n" /* label */ \
+ " .quad 663f\n" /* new instruction */ \
+ " .byte %c0\n" /* feature bit */ \
+ " .byte 662b-661b\n" /* sourcelen */ \
+ " .byte 664f-663f\n" /* replacementlen */ \
+ ".previous\n" \
+ ".section .altinstr_replacement,\"ax\"\n" \
+ "663:\n\t" newinstr "\n664:\n" /* replacement */ \
+ ".previous" :: "i" (feature), ##input)
+
+/*
+ * Clear and set 'TS' bit respectively
+ */
+#define clts() (HYPERVISOR_fpu_taskswitch(0))
+
+static inline unsigned long read_cr0(void)
+{
+ unsigned long cr0;
+ asm volatile("movq %%cr0,%0" : "=r" (cr0));
+ return cr0;
+}
+
+static inline void write_cr0(unsigned long val)
+{
+ asm volatile("movq %0,%%cr0" :: "r" (val));
+}
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ asm("movq %%cr3,%0" : "=r" (cr3));
+ return cr3;
+}
+
+static inline unsigned long read_cr4(void)
+{
+ unsigned long cr4;
+ asm("movq %%cr4,%0" : "=r" (cr4));
+ return cr4;
+}
+
+static inline void write_cr4(unsigned long val)
+{
+ asm volatile("movq %0,%%cr4" :: "r" (val));
+}
+
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
+
+#define wbinvd() \
+ __asm__ __volatile__ ("wbinvd": : :"memory");
+
+#endif /* __KERNEL__ */
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+#define __xg(x) ((volatile long *)(x))
+
+extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
+{
+ *ptr = val;
+}
+
+#define _set_64bit set_64bit
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ * but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+ switch (size) {
+ case 1:
+ __asm__ __volatile__("xchgb %b0,%1"
+ :"=q" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 2:
+ __asm__ __volatile__("xchgw %w0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 4:
+ __asm__ __volatile__("xchgl %k0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ case 8:
+ __asm__ __volatile__("xchgq %0,%1"
+ :"=r" (x)
+ :"m" (*__xg(ptr)), "0" (x)
+ :"memory");
+ break;
+ }
+ return x;
+}
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ unsigned long prev;
+ switch (size) {
+ case 1:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ }
+ return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends() do {} while(0)
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do {} while(0)
+#endif
+
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+
+#ifdef CONFIG_UNORDERED_IO
+#define wmb() asm volatile("sfence" ::: "memory")
+#else
+#define wmb() asm volatile("" ::: "memory")
+#endif
+#define read_barrier_depends() do {} while(0)
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
+
+
+/*
+ * The use of 'barrier' in the following reflects their use as local-lock
+ * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
+ * critical operations are executed. All critical operations must complete
+ * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
+ * includes these barriers, for example.
+ */
+
+#define __cli() \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
+} while (0)
+
+#define __sti() \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ _vcpu->evtchn_upcall_mask = 0; \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+} while (0)
+
+#define __save_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+} while (0)
+
+#define __restore_flags(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ barrier(); \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
+ barrier(); /* unmask then check (avoid races) */ \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
+ force_evtchn_callback(); \
+ preempt_enable(); \
+ } else \
+ preempt_enable_no_resched(); \
+} while (0)
+
+#define safe_halt() ((void)0)
+
+#define __save_and_cli(x) \
+do { \
+ vcpu_info_t *_vcpu; \
+ preempt_disable(); \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
+ preempt_enable_no_resched(); \
+ barrier(); \
+} while (0)
+
+void cpu_idle_wait(void);
+
+#define local_irq_save(x) __save_and_cli(x)
+#define local_irq_restore(x) __restore_flags(x)
+#define local_save_flags(x) __save_flags(x)
+#define local_irq_disable() __cli()
+#define local_irq_enable() __sti()
+
+#define irqs_disabled() \
+ HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#define HAVE_EAT_KEY
+void eat_key(void);
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/timer.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/timer.h
new file mode 100644
index 0000000000..40c54f6978
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/timer.h
@@ -0,0 +1,64 @@
+#ifndef _ASMi386_TIMER_H
+#define _ASMi386_TIMER_H
+#include <linux/init.h>
+
+/**
+ * struct timer_ops - used to define a timer source
+ *
+ * @name: name of the timer.
+ * @init: Probes and initializes the timer. Takes clock= override
+ * string as an argument. Returns 0 on success, anything else
+ * on failure.
+ * @mark_offset: called by the timer interrupt.
+ * @get_offset: called by gettimeofday(). Returns the number of microseconds
+ * since the last timer interupt.
+ * @monotonic_clock: returns the number of nanoseconds since the init of the
+ * timer.
+ * @delay: delays this many clock cycles.
+ */
+struct timer_opts {
+ char* name;
+ void (*mark_offset)(void);
+ unsigned long (*get_offset)(void);
+ unsigned long long (*monotonic_clock)(void);
+ void (*delay)(unsigned long);
+};
+
+struct init_timer_opts {
+ int (*init)(char *override);
+ struct timer_opts *opts;
+};
+
+#define TICK_SIZE (tick_nsec / 1000)
+
+extern struct timer_opts* __init select_timer(void);
+extern void clock_fallback(void);
+void setup_pit_timer(void);
+
+/* Modifiers for buggy PIT handling */
+
+extern int pit_latch_buggy;
+
+extern struct timer_opts *cur_timer;
+extern int timer_ack;
+
+/* list of externed timers */
+extern struct timer_opts timer_none;
+extern struct timer_opts timer_pit;
+extern struct init_timer_opts timer_pit_init;
+extern struct init_timer_opts timer_tsc_init;
+#ifdef CONFIG_X86_CYCLONE_TIMER
+extern struct init_timer_opts timer_cyclone_init;
+#endif
+
+extern unsigned long calibrate_tsc(void);
+extern void init_cpu_khz(void);
+#ifdef CONFIG_HPET_TIMER
+extern struct init_timer_opts timer_hpet_init;
+extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr);
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern struct init_timer_opts timer_pmtmr_init;
+#endif
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
new file mode 100644
index 0000000000..35fd9b530d
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
@@ -0,0 +1,97 @@
+#ifndef _X8664_TLBFLUSH_H
+#define _X8664_TLBFLUSH_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+
+#define __flush_tlb() xen_tlb_flush()
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+#define __flush_tlb_global() xen_tlb_flush()
+
+
+extern unsigned long pgkern_mask;
+
+#define __flush_tlb_all() __flush_tlb_global()
+
+#define __flush_tlb_one(addr) xen_invlpg(addr)
+
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the x86_64 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (vma->vm_mm == current->active_mm)
+ __flush_tlb();
+}
+
+#else
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() \
+ __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb() flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+
+#define TLBSTATE_OK 1
+#define TLBSTATE_LAZY 2
+
+#endif
+
+#define flush_tlb_kernel_range(start, end) flush_tlb_all()
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ /* x86_64 does not keep any page table caches in TLB */
+}
+
+#endif /* _X8664_TLBFLUSH_H */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/vga.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/vga.h
new file mode 100644
index 0000000000..14b8209600
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/vga.h
@@ -0,0 +1,20 @@
+/*
+ * Access to VGA videoram
+ *
+ * (c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+/*
+ * On the PC, we can just recalculate addresses and then
+ * access the videoram directly without any black magic.
+ */
+
+#define VGA_MAP_MEM(x) (unsigned long)isa_bus_to_virt(x)
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x,y) (*(y) = (x))
+
+#endif
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h
new file mode 100644
index 0000000000..6ec68fd4ff
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/xor.h
@@ -0,0 +1,328 @@
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
+
+/* Doesn't use gcc to save the XMM registers, because there is no easy way to
+ tell it to do a clts before the register saving. */
+#define XMMS_SAVE do { \
+ preempt_disable(); \
+ if (!(current_thread_info()->status & TS_USEDFPU)) \
+ clts(); \
+ __asm__ __volatile__ ( \
+ "movups %%xmm0,(%1) ;\n\t" \
+ "movups %%xmm1,0x10(%1) ;\n\t" \
+ "movups %%xmm2,0x20(%1) ;\n\t" \
+ "movups %%xmm3,0x30(%1) ;\n\t" \
+ : "=&r" (cr0) \
+ : "r" (xmm_save) \
+ : "memory"); \
+} while(0)
+
+#define XMMS_RESTORE do { \
+ asm volatile ( \
+ "sfence ;\n\t" \
+ "movups (%1),%%xmm0 ;\n\t" \
+ "movups 0x10(%1),%%xmm1 ;\n\t" \
+ "movups 0x20(%1),%%xmm2 ;\n\t" \
+ "movups 0x30(%1),%%xmm3 ;\n\t" \
+ : \
+ : "r" (cr0), "r" (xmm_save) \
+ : "memory"); \
+ if (!(current_thread_info()->status & TS_USEDFPU)) \
+ stts(); \
+ preempt_enable(); \
+} while(0)
+
+#define OFFS(x) "16*("#x")"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
+#define LD(x,y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
+#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
+#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
+#define XO1(x,y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
+#define XO2(x,y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
+#define XO3(x,y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
+#define XO4(x,y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
+#define XO5(x,y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned int lines = bytes >> 8;
+ unsigned long cr0;
+ xmm_store_t xmm_save[4];
+
+ XMMS_SAVE;
+
+ asm volatile (
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i,0) \
+ LD(i+1,1) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addq %[inc], %[p1] ;\n"
+ " addq %[inc], %[p2] ;\n"
+ " decl %[cnt] ; jnz 1b"
+ : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
+ : [inc] "r" (256UL)
+ : "memory");
+
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned int lines = bytes >> 8;
+ xmm_store_t xmm_save[4];
+ unsigned long cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addq %[inc], %[p1] ;\n"
+ " addq %[inc], %[p2] ;\n"
+ " addq %[inc], %[p3] ;\n"
+ " decl %[cnt] ; jnz 1b"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] "r" (256UL)
+ : "memory");
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned int lines = bytes >> 8;
+ xmm_store_t xmm_save[4];
+ unsigned long cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ PF3(i) \
+ PF3(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ XO3(i,0) \
+ XO3(i+1,1) \
+ XO3(i+2,2) \
+ XO3(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addq %[inc], %[p1] ;\n"
+ " addq %[inc], %[p2] ;\n"
+ " addq %[inc], %[p3] ;\n"
+ " addq %[inc], %[p4] ;\n"
+ " decl %[cnt] ; jnz 1b"
+ : [cnt] "+c" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] "r" (256UL)
+ : "memory" );
+
+ XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned int lines = bytes >> 8;
+ xmm_store_t xmm_save[4];
+ unsigned long cr0;
+
+ XMMS_SAVE;
+
+ __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i+2) \
+ LD(i,0) \
+ LD(i+1,1) \
+ LD(i+2,2) \
+ LD(i+3,3) \
+ PF2(i) \
+ PF2(i+2) \
+ XO1(i,0) \
+ XO1(i+1,1) \
+ XO1(i+2,2) \
+ XO1(i+3,3) \
+ PF3(i) \
+ PF3(i+2) \
+ XO2(i,0) \
+ XO2(i+1,1) \
+ XO2(i+2,2) \
+ XO2(i+3,3) \
+ PF4(i) \
+ PF4(i+2) \
+ PF0(i+4) \
+ PF0(i+6) \
+ XO3(i,0) \
+ XO3(i+1,1) \
+ XO3(i+2,2) \
+ XO3(i+3,3) \
+ XO4(i,0) \
+ XO4(i+1,1) \
+ XO4(i+2,2) \
+ XO4(i+3,3) \
+ ST(i,0) \
+ ST(i+1,1) \
+ ST(i+2,2) \
+ ST(i+3,3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " addq %[inc], %[p1] ;\n"
+ " addq %[inc], %[p2] ;\n"
+ " addq %[inc], %[p3] ;\n"
+ " addq %[inc], %[p4] ;\n"
+ " addq %[inc], %[p5] ;\n"
+ " decl %[cnt] ; jnz 1b"
+ : [cnt] "+c" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
+ [p5] "+r" (p5)
+ : [inc] "r" (256UL)
+ : "memory");
+
+ XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_sse = {
+ .name = "generic_sse",
+ .do_2 = xor_sse_2,
+ .do_3 = xor_sse_3,
+ .do_4 = xor_sse_4,
+ .do_5 = xor_sse_5,
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_sse); \
+ } while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h b/linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h
index ed41c4a98d..a1801c1981 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h
@@ -34,8 +34,9 @@
#include <linux/config.h>
#include <asm-xen/hypervisor.h>
#include <asm/ptrace.h>
-#include <asm/synch_bitops.h>
+#include <asm-xen/synch_bitops.h>
#include <asm-xen/xen-public/event_channel.h>
+#include <linux/smp.h>
/*
* LOW-LEVEL DEFINITIONS
@@ -56,6 +57,7 @@ static inline void mask_evtchn(int port)
static inline void unmask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
synch_clear_bit(port, &s->evtchn_mask[0]);
@@ -64,10 +66,10 @@ static inline void unmask_evtchn(int port)
* a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
*/
if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
- !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
{
- s->vcpu_data[0].evtchn_upcall_pending = 1;
- if ( !s->vcpu_data[0].evtchn_upcall_mask )
+ vcpu_info->evtchn_upcall_pending = 1;
+ if ( !vcpu_info->evtchn_upcall_mask )
force_evtchn_callback();
}
}
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
new file mode 100644
index 0000000000..642a74dbf9
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
@@ -0,0 +1,72 @@
+/******************************************************************************
+ * gnttab.h
+ *
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ *
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2005, Christopher Clark
+ */
+
+#ifndef __ASM_GNTTAB_H__
+#define __ASM_GNTTAB_H__
+
+#include <linux/config.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/grant_table.h>
+
+/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
+#define NR_GRANT_FRAMES 4
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+
+int
+gnttab_grant_foreign_access(
+ domid_t domid, unsigned long frame, int readonly);
+
+void
+gnttab_end_foreign_access(
+ grant_ref_t ref, int readonly);
+
+int
+gnttab_grant_foreign_transfer(
+ domid_t domid, unsigned long pfn);
+
+unsigned long
+gnttab_end_foreign_transfer(
+ grant_ref_t ref);
+
+int
+gnttab_query_foreign_access(
+ grant_ref_t ref );
+
+/*
+ * operations on reserved batches of grant references
+ */
+int
+gnttab_alloc_grant_references(
+ u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+
+void
+gnttab_free_grant_references(
+ u16 count, grant_ref_t private_head );
+
+int
+gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
+);
+
+void
+gnttab_release_grant_reference(
+ grant_ref_t *private_head, grant_ref_t release );
+
+void
+gnttab_grant_foreign_access_ref(
+ grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+
+void
+gnttab_grant_foreign_transfer_ref(
+ grant_ref_t, domid_t domid, unsigned long pfn);
+
+
+#endif /* __ASM_GNTTAB_H__ */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
index 5dc7a4e4ae..449dba9745 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h
@@ -73,510 +73,70 @@ void lgdt_finish(void);
* be MACHINE addresses.
*/
-extern unsigned int mmu_update_queue_idx;
-
-void queue_l1_entry_update(pte_t *ptr, unsigned long val);
-void queue_l2_entry_update(pmd_t *ptr, pmd_t val);
-void queue_pt_switch(unsigned long ptr);
-void queue_tlb_flush(void);
-void queue_invlpg(unsigned long ptr);
-void queue_pgd_pin(unsigned long ptr);
-void queue_pgd_unpin(unsigned long ptr);
-void queue_pte_pin(unsigned long ptr);
-void queue_pte_unpin(unsigned long ptr);
-void queue_set_ldt(unsigned long ptr, unsigned long bytes);
-void queue_machphys_update(unsigned long mfn, unsigned long pfn);
-void xen_l1_entry_update(pte_t *ptr, unsigned long val);
-void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
void xen_pt_switch(unsigned long ptr);
+void xen_new_user_pt(unsigned long ptr); /* x86_64 only */
+void xen_load_gs(unsigned int selector); /* x86_64 only */
void xen_tlb_flush(void);
void xen_invlpg(unsigned long ptr);
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void xen_l1_entry_update(pte_t *ptr, unsigned long val);
+void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
+#ifdef __x86_64__
+void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64 only */
+#endif
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
void xen_pgd_pin(unsigned long ptr);
void xen_pgd_unpin(unsigned long ptr);
+void xen_pud_pin(unsigned long ptr); /* x86_64 only */
+void xen_pud_unpin(unsigned long ptr); /* x86_64 only */
+void xen_pmd_pin(unsigned long ptr); /* x86_64 only */
+void xen_pmd_unpin(unsigned long ptr); /* x86_64 only */
void xen_pte_pin(unsigned long ptr);
void xen_pte_unpin(unsigned long ptr);
+#else
+#define xen_l1_entry_update(_p, _v) set_pte((_p), (pte_t){(_v)})
+#define xen_l2_entry_update(_p, _v) set_pgd((_p), (pgd_t){(_v)})
+#define xen_pgd_pin(_p) ((void)0)
+#define xen_pgd_unpin(_p) ((void)0)
+#define xen_pte_pin(_p) ((void)0)
+#define xen_pte_unpin(_p) ((void)0)
+#endif
+
void xen_set_ldt(unsigned long ptr, unsigned long bytes);
void xen_machphys_update(unsigned long mfn, unsigned long pfn);
-void _flush_page_update_queue(void);
-static inline int flush_page_update_queue(void)
-{
- unsigned int idx = mmu_update_queue_idx;
- if ( idx != 0 ) _flush_page_update_queue();
- return idx;
-}
-#define xen_flush_page_update_queue() (_flush_page_update_queue())
-#define XEN_flush_page_update_queue() (_flush_page_update_queue())
-void MULTICALL_flush_page_update_queue(void);
-
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
-/* Allocate a contiguous empty region of low memory. Return virtual start. */
-unsigned long allocate_empty_lowmem_region(unsigned long pages);
+#ifdef CONFIG_SMP
+#include <linux/cpumask.h>
+void xen_tlb_flush_all(void);
+void xen_invlpg_all(unsigned long ptr);
+void xen_tlb_flush_mask(cpumask_t *mask);
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr);
#endif
-/*
- * Assembler stubs for hyper-calls.
- */
-
-static inline int
-HYPERVISOR_set_trap_table(
- trap_info_t *table)
-{
- int ret;
- unsigned long ignore;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_set_trap_table), "1" (table)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmu_update(
- mmu_update_t *req, int count, int *success_count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
- "3" (success_count)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_gdt(
- unsigned long *frame_list, int entries)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
- : "memory" );
-
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_stack_switch(
- unsigned long ss, unsigned long esp)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_callbacks(
- unsigned long event_selector, unsigned long event_address,
- unsigned long failsafe_selector, unsigned long failsafe_address)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
- "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_fpu_taskswitch(
- void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_yield(
- void)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_block(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_shutdown(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
- : "memory" );
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+/*
+** XXX SMH: 2.4 doesn't have percpu.h (or support SMP guests) so just
+** include sufficient #defines to allow the below to build.
+*/
+#define DEFINE_PER_CPU(type, name) \
+ __typeof__(type) per_cpu__##name
- return ret;
-}
+#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
+#define __get_cpu_var(var) per_cpu__##var
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name
-static inline int
-HYPERVISOR_reboot(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
- unsigned long srec)
-{
- int ret;
- unsigned long ign1, ign2;
-
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=S" (ign2)
- : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
- "S" (srec) : "memory");
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_crash(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
- : "memory" );
-
- return ret;
-}
-
-static inline long
-HYPERVISOR_set_timer_op(
- u64 timeout)
-{
- int ret;
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- unsigned long ign1, ign2;
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+#endif /* linux < 2.6.0 */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_hi), "c" (timeout_lo)
- : "memory");
+void xen_contig_memory(unsigned long vstart, unsigned int order);
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- int ret;
- unsigned long ign1;
-
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
- : "memory");
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_debugreg(
- int reg, unsigned long value)
-{
- int ret;
- unsigned long ign1, ign2;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
- : "memory" );
-
- return ret;
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(
- int reg)
-{
- unsigned long ret;
- unsigned long ign;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_descriptor(
- unsigned long ma, unsigned long word1, unsigned long word2)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_update_descriptor), "1" (ma), "2" (word1),
- "3" (word2)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_fast_trap(
- int idx)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_set_fast_trap), "1" (idx)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_dom_mem_op(
- unsigned int op, unsigned long *extent_list,
- unsigned long nr_extents, unsigned int extent_order)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4, ign5;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
- "=D" (ign5)
- : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
- "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_multicall(
- void *call_list, int nr_calls)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping(
- unsigned long page_nr, pte_t new_val, unsigned long flags)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_update_va_mapping),
- "1" (page_nr), "2" ((new_val).pte_low), "3" (flags)
- : "memory" );
-
- if ( unlikely(ret < 0) )
- {
- printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
- page_nr, (new_val).pte_low, flags);
- BUG();
- }
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_event_channel_op(
- void *op)
-{
- int ret;
- unsigned long ignore;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_event_channel_op), "1" (op)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_xen_version(
- int cmd)
-{
- int ret;
- unsigned long ignore;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_xen_version), "1" (cmd)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_console_io(
- int cmd, int count, char *str)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_physdev_op(
- void *physdev_op)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (count), "3" (uop)
- : "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "1" (page_nr), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
- "memory" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vm_assist(
- unsigned int cmd, unsigned int type)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
- : "memory" );
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+/* Allocate a contiguous empty region of low memory. Return virtual start. */
+unsigned long allocate_empty_lowmem_region(unsigned long pages);
+#endif
- return ret;
-}
+#include <asm/hypercall.h>
#endif /* __HYPERVISOR_H__ */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/multicall.h b/linux-2.6.11-xen-sparse/include/asm-xen/multicall.h
deleted file mode 100644
index ca169b57b9..0000000000
--- a/linux-2.6.11-xen-sparse/include/asm-xen/multicall.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/******************************************************************************
- * multicall.h
- *
- * Copyright (c) 2003-2004, K A Fraser
- *
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __MULTICALL_H__
-#define __MULTICALL_H__
-
-#include <asm-xen/hypervisor.h>
-
-extern multicall_entry_t multicall_list[];
-extern int nr_multicall_ents;
-
-static inline void queue_multicall0(unsigned long op)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- nr_multicall_ents = i+1;
-}
-
-static inline void queue_multicall1(unsigned long op, unsigned long arg1)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- nr_multicall_ents = i+1;
-}
-
-static inline void queue_multicall2(
- unsigned long op, unsigned long arg1, unsigned long arg2)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- nr_multicall_ents = i+1;
-}
-
-static inline void queue_multicall3(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- nr_multicall_ents = i+1;
-}
-
-static inline void queue_multicall4(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- multicall_list[i].args[3] = arg4;
- nr_multicall_ents = i+1;
-}
-
-static inline void queue_multicall5(
- unsigned long op, unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4, unsigned long arg5)
-{
- int i = nr_multicall_ents;
- multicall_list[i].op = op;
- multicall_list[i].args[0] = arg1;
- multicall_list[i].args[1] = arg2;
- multicall_list[i].args[2] = arg3;
- multicall_list[i].args[3] = arg4;
- multicall_list[i].args[4] = arg5;
- nr_multicall_ents = i+1;
-}
-
-static inline void execute_multicall_list(void)
-{
- if ( unlikely(nr_multicall_ents == 0) ) return;
- (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents);
- nr_multicall_ents = 0;
-}
-
-#endif /* __MULTICALL_H__ */
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/synch_bitops.h b/linux-2.6.11-xen-sparse/include/asm-xen/synch_bitops.h
new file mode 100644
index 0000000000..9b5d468c26
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/synch_bitops.h
@@ -0,0 +1,2 @@
+
+#include <asm-i386/synch_bitops.h>
diff --git a/linux-2.6.11-xen-sparse/include/linux/skbuff.h b/linux-2.6.11-xen-sparse/include/linux/skbuff.h
new file mode 100644
index 0000000000..dad5d9bc1b
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/include/linux/skbuff.h
@@ -0,0 +1,1184 @@
+/*
+ * Definitions for the 'struct sk_buff' memory handlers.
+ *
+ * Authors:
+ * Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/time.h>
+#include <linux/cache.h>
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/poll.h>
+#include <linux/net.h>
+#include <net/checksum.h>
+
+#define HAVE_ALLOC_SKB /* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
+#define SLAB_SKB /* Slabified skbuffs */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \
+ sizeof(struct skb_shared_info)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
+#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
+
+/* A. Checksumming of received packets by device.
+ *
+ * NONE: device failed to checksum this packet.
+ * skb->csum is undefined.
+ *
+ * UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ * skb->csum is undefined.
+ * It is bad option, but, unfortunately, many of vendors do this.
+ * Apparently with secret goal to sell you new device, when you
+ * will add new protocol to your host. F.e. IPv6. 8)
+ *
+ * HW: the most generic way. Device supplied checksum of _all_
+ * the packet as seen by netif_rx in skb->csum.
+ * NOTE: Even if device supports only some protocols, but
+ * is able to produce some skb->csum, it MUST use HW,
+ * not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ * NONE: skb is checksummed by protocol or csum is not required.
+ *
+ * HW: device is required to csum packet as seen by hard_start_xmit
+ * from skb->h.raw to the end and to record the checksum
+ * at skb->h.raw+skb->csum.
+ *
+ * Device must show its capabilities in dev->features, set
+ * at device setup time.
+ * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
+ * everything.
+ * NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ * TCP/UDP over IPv4. Sigh. Vendors like this
+ * way by an unknown reason. Though, see comment above
+ * about CHECKSUM_UNNECESSARY. 8)
+ *
+ * Any questions? No questions, good. --ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void **)&arg) - 1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+struct net_device;
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+ atomic_t use;
+ void (*destroy)(struct nf_conntrack *);
+};
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+struct nf_bridge_info {
+ atomic_t use;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ struct net_device *netoutdev;
+#endif
+ unsigned int mask;
+ unsigned long data[32 / sizeof(unsigned long)];
+};
+#endif
+
+#endif
+
+struct sk_buff_head {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ __u32 qlen;
+ spinlock_t lock;
+};
+
+struct sk_buff;
+
+/* To allow 64K frame to be packed as single skb without frag_list */
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct {
+ struct page *page;
+ __u16 page_offset;
+ __u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+ atomic_t dataref;
+ unsigned int nr_frags;
+ unsigned short tso_size;
+ unsigned short tso_segs;
+ struct sk_buff *frag_list;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+/**
+ * struct sk_buff - socket buffer
+ * @next: Next buffer in list
+ * @prev: Previous buffer in list
+ * @list: List we are on
+ * @sk: Socket we are owned by
+ * @stamp: Time we arrived
+ * @dev: Device we arrived on/are leaving by
+ * @input_dev: Device we arrived on
+ * @real_dev: The real device we are using
+ * @h: Transport layer header
+ * @nh: Network layer header
+ * @mac: Link layer header
+ * @dst: FIXME: Describe this field
+ * @cb: Control buffer. Free for use by every layer. Put private vars here
+ * @len: Length of actual data
+ * @data_len: Data length
+ * @mac_len: Length of link layer header
+ * @csum: Checksum
+ * @__unused: Dead field, may be reused
+ * @cloned: Head may be cloned (check refcnt to be sure)
+ * @proto_csum_valid: Protocol csum validated since arriving at localhost
+ * @proto_csum_blank: Protocol csum must be added before leaving localhost
+ * @pkt_type: Packet class
+ * @ip_summed: Driver fed us an IP checksum
+ * @priority: Packet queueing priority
+ * @users: User count - see {datagram,tcp}.c
+ * @protocol: Packet protocol from driver
+ * @security: Security level of packet
+ * @truesize: Buffer size
+ * @head: Head of buffer
+ * @data: Data head pointer
+ * @tail: Tail pointer
+ * @end: End pointer
+ * @destructor: Destruct function
+ * @nfmark: Can be used for communication between hooks
+ * @nfcache: Cache info
+ * @nfct: Associated connection, if any
+ * @nfctinfo: Relationship of this skb to the connection
+ * @nf_debug: Netfilter debugging
+ * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
+ * @private: Data which is private to the HIPPI implementation
+ * @tc_index: Traffic control index
+ */
+
+struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ struct sk_buff_head *list;
+ struct sock *sk;
+ struct timeval stamp;
+ struct net_device *dev;
+ struct net_device *input_dev;
+ struct net_device *real_dev;
+
+ union {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct icmphdr *icmph;
+ struct igmphdr *igmph;
+ struct iphdr *ipiph;
+ struct ipv6hdr *ipv6h;
+ unsigned char *raw;
+ } h;
+
+ union {
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ struct arphdr *arph;
+ unsigned char *raw;
+ } nh;
+
+ union {
+ unsigned char *raw;
+ } mac;
+
+ struct dst_entry *dst;
+ struct sec_path *sp;
+
+ /*
+ * This is the control buffer. It is free to use for every
+ * layer. Please put your private variables there. If you
+ * want to keep them across layers you have to do a skb_clone()
+ * first. This is owned by whoever has the skb queued ATM.
+ */
+ char cb[40];
+
+ unsigned int len,
+ data_len,
+ mac_len,
+ csum;
+ unsigned char local_df,
+ cloned:1,
+ proto_csum_valid:1,
+ proto_csum_blank:1,
+ pkt_type,
+ ip_summed;
+ __u32 priority;
+ unsigned short protocol,
+ security;
+
+ void (*destructor)(struct sk_buff *skb);
+#ifdef CONFIG_NETFILTER
+ unsigned long nfmark;
+ __u32 nfcache;
+ __u32 nfctinfo;
+ struct nf_conntrack *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+ unsigned int nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+ struct nf_bridge_info *nf_bridge;
+#endif
+#endif /* CONFIG_NETFILTER */
+#if defined(CONFIG_HIPPI)
+ union {
+ __u32 ifield;
+ } private;
+#endif
+#ifdef CONFIG_NET_SCHED
+ __u32 tc_index; /* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+ __u32 tc_verd; /* traffic control verdict */
+ __u32 tc_classid; /* traffic control classid */
+#endif
+
+#endif
+
+
+ /* These elements must be at the end, see alloc_skb() for details. */
+ unsigned int truesize;
+ atomic_t users;
+ unsigned char *head,
+ *data,
+ *tail,
+ *end;
+};
+
+#ifdef __KERNEL__
+/*
+ * Handling routines are only of interest to the kernel
+ */
+#include <linux/slab.h>
+
+#include <asm/system.h>
+
+extern void __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+ unsigned int size, int priority);
+extern void kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int pskb_expand_head(struct sk_buff *skb,
+ int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+ unsigned int headroom);
+extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+ int newheadroom, int newtailroom,
+ int priority);
+extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad);
+#define dev_kfree_skb(a) kfree_skb(a)
+extern void skb_over_panic(struct sk_buff *skb, int len,
+ void *here);
+extern void skb_under_panic(struct sk_buff *skb, int len,
+ void *here);
+
+/* Internal */
+#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
+
+/**
+ * skb_queue_empty - check if a queue is empty
+ * @list: queue head
+ *
+ * Returns true if the queue is empty, false otherwise.
+ */
+static inline int skb_queue_empty(const struct sk_buff_head *list)
+{
+ return list->next == (struct sk_buff *)list;
+}
+
+/**
+ * skb_get - reference buffer
+ * @skb: buffer to reference
+ *
+ * Makes another reference to a socket buffer and returns a pointer
+ * to the buffer.
+ */
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+ atomic_inc(&skb->users);
+ return skb;
+}
+
+/*
+ * If users == 1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+static inline void kfree_skb(struct sk_buff *skb)
+{
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ __kfree_skb(skb);
+}
+
+/**
+ * skb_cloned - is the buffer a clone
+ * @skb: buffer to check
+ *
+ * Returns true if the buffer was generated with skb_clone() and is
+ * one of multiple shared copies of the buffer. Cloned buffers are
+ * shared data so must not be written to under normal circumstances.
+ */
+static inline int skb_cloned(const struct sk_buff *skb)
+{
+ return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ * skb_shared - is the buffer shared
+ * @skb: buffer to check
+ *
+ * Returns true if more than one person has a reference to this
+ * buffer.
+ */
+static inline int skb_shared(const struct sk_buff *skb)
+{
+ return atomic_read(&skb->users) != 1;
+}
+
+/**
+ * skb_share_check - check if buffer is shared and if so clone it
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the buffer is shared the buffer is cloned and the old copy
+ * drops a reference. A new clone with a single reference is returned.
+ * If the buffer is not shared the original buffer is returned. When
+ * being called from interrupt status or with spinlocks held pri must
+ * be GFP_ATOMIC.
+ *
+ * NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+ might_sleep_if(pri & __GFP_WAIT);
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, pri);
+ kfree_skb(skb);
+ skb = nskb;
+ }
+ return skb;
+}
+
+/*
+ * Copy shared buffers into a new sk_buff. We effectively do COW on
+ * packets to handle cases where we have a local reader and forward
+ * and a couple of other messy ones. The normal one is tcpdumping
+ * a packet thats being forwarded.
+ */
+
+/**
+ * skb_unshare - make a copy of a shared buffer
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the socket buffer is a clone then this function creates a new
+ * copy of the data, drops a reference count on the old copy and returns
+ * the new copy with the reference count at 1. If the buffer is not a clone
+ * the original buffer is returned. When called with a spinlock held or
+ * from interrupt state @pri must be %GFP_ATOMIC
+ *
+ * %NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+ might_sleep_if(pri & __GFP_WAIT);
+ if (skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, pri);
+ kfree_skb(skb); /* Free our shared copy */
+ skb = nskb;
+ }
+ return skb;
+}
+
+/**
+ * skb_peek
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the head element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->next;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_peek_tail
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the tail element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_queue_len - get queue length
+ * @list_: list to measure
+ *
+ * Return the length of an &sk_buff queue.
+ */
+static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
+{
+ return list_->qlen;
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+ spin_lock_init(&list->lock);
+ list->prev = list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+
+/*
+ * Insert an sk_buff at the start of a list.
+ *
+ * The "__skb_xxxx()" functions are the non-atomic ones that
+ * can only be called with interrupts disabled.
+ */
+
+/**
+ * __skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_head(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ prev = (struct sk_buff *)list;
+ next = prev->next;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+/**
+ * __skb_queue_tail - queue a buffer at the list tail
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the end of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_tail(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ next = (struct sk_buff *)list;
+ prev = next->prev;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+
+/**
+ * __skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The head item is
+ * returned or %NULL if the list is empty.
+ */
+extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev, *result;
+
+ prev = (struct sk_buff *) list;
+ next = prev->next;
+ result = NULL;
+ if (next != prev) {
+ result = next;
+ next = next->next;
+ list->qlen--;
+ next->prev = prev;
+ prev->next = next;
+ result->next = result->prev = NULL;
+ result->list = NULL;
+ }
+ return result;
+}
+
+
+/*
+ * Insert a packet on a list.
+ */
+extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk);
+static inline void __skb_insert(struct sk_buff *newsk,
+ struct sk_buff *prev, struct sk_buff *next,
+ struct sk_buff_head *list)
+{
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+ newsk->list = list;
+ list->qlen++;
+}
+
+/*
+ * Place a packet after a given packet in a list.
+ */
+extern void skb_append(struct sk_buff *old, struct sk_buff *newsk);
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+ __skb_insert(newsk, old, old->next, old->list);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+extern void skb_unlink(struct sk_buff *skb);
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev;
+
+ list->qlen--;
+ next = skb->next;
+ prev = skb->prev;
+ skb->next = skb->prev = NULL;
+ skb->list = NULL;
+ next->prev = prev;
+ prev->next = next;
+}
+
+
+/* XXX: more streamlined implementation */
+
+/**
+ * __skb_dequeue_tail - remove from the tail of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the tail of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The tail item is
+ * returned or %NULL if the list is empty.
+ */
+extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+ struct sk_buff *skb = skb_peek_tail(list);
+ if (skb)
+ __skb_unlink(skb, list);
+ return skb;
+}
+
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+ return skb->data_len;
+}
+
+static inline unsigned int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+static inline int skb_pagelen(const struct sk_buff *skb)
+{
+ int i, len = 0;
+
+ for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+ len += skb_shinfo(skb)->frags[i].size;
+ return len + skb_headlen(skb);
+}
+
+static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i + 1;
+}
+
+#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
+#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list)
+#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
+
+/*
+ * Add data to an sk_buff
+ */
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ return tmp;
+}
+
+/**
+ * skb_put - add data to a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer. If this would
+ * exceed the total buffer size the kernel will panic. A pointer to the
+ * first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ if (unlikely(skb->tail>skb->end))
+ skb_over_panic(skb, len, current_text_addr());
+ return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ return skb->data;
+}
+
+/**
+ * skb_push - add data to the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer at the buffer
+ * start. If this would exceed the total buffer headroom the kernel will
+ * panic. A pointer to the first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ if (unlikely(skb->data<skb->head))
+ skb_under_panic(skb, len, current_text_addr());
+ return skb->data;
+}
+
+static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ skb->len -= len;
+ BUG_ON(skb->len < skb->data_len);
+ return skb->data += len;
+}
+
+/**
+ * skb_pull - remove data from the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to remove
+ *
+ * This function removes data from the start of a buffer, returning
+ * the memory to the headroom. A pointer to the next data in the buffer
+ * is returned. Once the data has been pulled future pushes will overwrite
+ * the old data.
+ */
+static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+
+extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (len > skb_headlen(skb) &&
+ !__pskb_pull_tail(skb, len-skb_headlen(skb)))
+ return NULL;
+ skb->len -= len;
+ return skb->data += len;
+}
+
+static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (likely(len <= skb_headlen(skb)))
+ return 1;
+ if (unlikely(len > skb->len))
+ return 0;
+ return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL;
+}
+
+/**
+ * skb_headroom - bytes at buffer head
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the head of an &sk_buff.
+ */
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+/**
+ * skb_tailroom - bytes at buffer end
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the tail of an sk_buff
+ */
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+ return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
+}
+
+/**
+ * skb_reserve - adjust headroom
+ * @skb: buffer to alter
+ * @len: bytes to move
+ *
+ * Increase the headroom of an empty &sk_buff by reducing the tail
+ * room. This is only allowed for an empty buffer.
+ */
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+ skb->data += len;
+ skb->tail += len;
+}
+
+/*
+ * CPUs often take a performance hit when accessing unaligned memory
+ * locations. The actual performance hit varies, it can be small if the
+ * hardware handles it or large if we have to take an exception and fix it
+ * in software.
+ *
+ * Since an ethernet header is 14 bytes network drivers often end up with
+ * the IP header at an unaligned offset. The IP header can be aligned by
+ * shifting the start of the packet by 2 bytes. Drivers should do this
+ * with:
+ *
+ * skb_reserve(NET_IP_ALIGN);
+ *
+ * The downside to this alignment of the IP header is that the DMA is now
+ * unaligned. On some architectures the cost of an unaligned DMA is high
+ * and this cost outweighs the gains made by aligning the IP header.
+ *
+ * Since this trade off varies between architectures, we allow NET_IP_ALIGN
+ * to be overridden.
+ */
+#ifndef NET_IP_ALIGN
+#define NET_IP_ALIGN 2
+#endif
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data + len;
+ } else
+ ___pskb_trim(skb, len, 0);
+}
+
+/**
+ * skb_trim - remove end from a buffer
+ * @skb: buffer to alter
+ * @len: new length
+ *
+ * Cut the length of a buffer down by removing data from the tail. If
+ * the buffer is already under the length specified it is not modified.
+ */
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->len > len)
+ __skb_trim(skb, len);
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data+len;
+ return 0;
+ }
+ return ___pskb_trim(skb, len, 1);
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ return (len < skb->len) ? __pskb_trim(skb, len) : 0;
+}
+
+/**
+ * skb_orphan - orphan a buffer
+ * @skb: buffer to orphan
+ *
+ * If a buffer currently has an owner then we call the owner's
+ * destructor function and make the @skb unowned. The buffer continues
+ * to exist but is no longer charged to its former owner.
+ */
+static inline void skb_orphan(struct sk_buff *skb)
+{
+ if (skb->destructor)
+ skb->destructor(skb);
+ skb->destructor = NULL;
+ skb->sk = NULL;
+}
+
+/**
+ * __skb_queue_purge - empty a list
+ * @list: list to empty
+ *
+ * Delete all buffers on an &sk_buff list. Each buffer is removed from
+ * the list and one reference dropped. This function does not take the
+ * list lock and the caller must hold the relevant locks to use it.
+ */
+extern void skb_queue_purge(struct sk_buff_head *list);
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = __skb_dequeue(list)) != NULL)
+ kfree_skb(skb);
+}
+
+/**
+ * __dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory.
+ */
+#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+ int gfp_mask)
+{
+ struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+ if (likely(skb))
+ skb_reserve(skb, 16);
+ return skb;
+}
+#else
+extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask);
+#endif
+
+/**
+ * dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory. Although this function
+ * allocates memory it can be called from an interrupt.
+ */
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+ return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+/**
+ * skb_cow - copy header of skb when it is required
+ * @skb: buffer to cow
+ * @headroom: needed headroom
+ *
+ * If the skb passed lacks sufficient headroom or its data part
+ * is shared, data is reallocated. If reallocation fails, an error
+ * is returned and original skb is not changed.
+ *
+ * The result is skb with writable area skb->head...skb->tail
+ * and at least @headroom of space at head.
+ */
+static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+ if (delta < 0)
+ delta = 0;
+
+ if (delta || skb_cloned(skb))
+ return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+ return 0;
+}
+
+/**
+ * skb_padto - pad an skbuff up to a minimal size
+ * @skb: buffer to pad
+ * @len: minimal length
+ *
+ * Pads up a buffer to ensure the trailing bytes exist and are
+ * blanked. If the buffer already contains sufficient data it
+ * is untouched. Returns the buffer, which may be a replacement
+ * for the original, or NULL for out of memory - in which case
+ * the original buffer is still freed.
+ */
+
+static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+ if (likely(size >= len))
+ return skb;
+ return skb_pad(skb, len-size);
+}
+
+static inline int skb_add_data(struct sk_buff *skb,
+ char __user *from, int copy)
+{
+ const int off = skb->len;
+
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ int err = 0;
+ unsigned int csum = csum_and_copy_from_user(from,
+ skb_put(skb, copy),
+ copy, 0, &err);
+ if (!err) {
+ skb->csum = csum_block_add(skb->csum, csum, off);
+ return 0;
+ }
+ } else if (!copy_from_user(skb_put(skb, copy), from, copy))
+ return 0;
+
+ __skb_trim(skb, off);
+ return -EFAULT;
+}
+
+static inline int skb_can_coalesce(struct sk_buff *skb, int i,
+ struct page *page, int off)
+{
+ if (i) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ return page == frag->page &&
+ off == frag->page_offset + frag->size;
+ }
+ return 0;
+}
+
+/**
+ * skb_linearize - convert paged skb to linear one
+ * @skb: buffer to linarize
+ * @gfp: allocation mode
+ *
+ * If there is no free memory -ENOMEM is returned, otherwise zero
+ * is returned and the old skb data released.
+ */
+extern int __skb_linearize(struct sk_buff *skb, int gfp);
+static inline int skb_linearize(struct sk_buff *skb, int gfp)
+{
+ return __skb_linearize(skb, gfp);
+}
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+ BUG_ON(in_irq());
+
+ local_bh_disable();
+#endif
+ return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+ kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+ local_bh_enable();
+#endif
+}
+
+#define skb_queue_walk(queue, skb) \
+ for (skb = (queue)->next; \
+ prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \
+ skb = skb->next)
+
+
+extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err);
+extern unsigned int datagram_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait);
+extern int skb_copy_datagram_iovec(const struct sk_buff *from,
+ int offset, struct iovec *to,
+ int size);
+extern int skb_copy_and_csum_datagram_iovec(const
+ struct sk_buff *skb,
+ int hlen,
+ struct iovec *iov);
+extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+extern unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+ int len, unsigned int csum);
+extern int skb_copy_bits(const struct sk_buff *skb, int offset,
+ void *to, int len);
+extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb,
+ int offset, u8 *to, int len,
+ unsigned int csum);
+extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+extern void skb_split(struct sk_buff *skb,
+ struct sk_buff *skb1, const u32 len);
+
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+{
+ int hlen = skb_headlen(skb);
+
+ if (offset + len <= hlen)
+ return skb->data + offset;
+
+ if (skb_copy_bits(skb, offset, buffer, len) < 0)
+ return NULL;
+
+ return buffer;
+}
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+struct skb_iter {
+ /* Iteration functions set these */
+ unsigned char *data;
+ unsigned int len;
+
+ /* Private to iteration */
+ unsigned int nextfrag;
+ struct sk_buff *fraglist;
+};
+
+/* Keep iterating until skb_iter_next returns false. */
+extern void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i);
+extern int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i);
+/* Call this if aborting loop before !skb_iter_next */
+extern void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i);
+
+#ifdef CONFIG_NETFILTER
+static inline void nf_conntrack_put(struct nf_conntrack *nfct)
+{
+ if (nfct && atomic_dec_and_test(&nfct->use))
+ nfct->destroy(nfct);
+}
+static inline void nf_conntrack_get(struct nf_conntrack *nfct)
+{
+ if (nfct)
+ atomic_inc(&nfct->use);
+}
+static inline void nf_reset(struct sk_buff *skb)
+{
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug = 0;
+#endif
+}
+static inline void nf_reset_debug(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug = 0;
+#endif
+}
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
+{
+ if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+ kfree(nf_bridge);
+}
+static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
+{
+ if (nf_bridge)
+ atomic_inc(&nf_bridge->use);
+}
+#endif /* CONFIG_BRIDGE_NETFILTER */
+#else /* CONFIG_NETFILTER */
+static inline void nf_reset(struct sk_buff *skb) {}
+#endif /* CONFIG_NETFILTER */
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SKBUFF_H */
diff --git a/linux-2.6.11-xen-sparse/mm/mmap.c b/linux-2.6.11-xen-sparse/mm/mmap.c
new file mode 100644
index 0000000000..848200e1b8
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/mm/mmap.c
@@ -0,0 +1,2108 @@
+/*
+ * mm/mmap.c
+ *
+ * Written by obz.
+ *
+ * Address space accounting code <alan@redhat.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/personality.h>
+#include <linux/security.h>
+#include <linux/hugetlb.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/acct.h>
+#include <linux/mount.h>
+#include <linux/mempolicy.h>
+#include <linux/rmap.h>
+
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/tlb.h>
+
+/*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+ * unless you know what you are doing.
+ */
+#undef DEBUG_MM_RB
+
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware. The expected
+ * behavior is in parens:
+ *
+ * map_type prot
+ * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
+ * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (yes) yes w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (copy) copy w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+ __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+ __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
+int sysctl_overcommit_ratio = 50; /* default is 50% */
+int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
+atomic_t vm_committed_space = ATOMIC_INIT(0);
+
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(long pages, int cap_sys_admin)
+{
+ unsigned long free, allowed;
+
+ vm_acct_memory(pages);
+
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+ return 0;
+
+ if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+ unsigned long n;
+
+ free = get_page_cache_size();
+ free += nr_swap_pages;
+
+ /*
+ * Any slabs which are created with the
+ * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+ * which are reclaimable, under pressure. The dentry
+ * cache and most inode caches should fall into this
+ */
+ free += atomic_read(&slab_reclaim_pages);
+
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ free -= free / 32;
+
+ if (free > pages)
+ return 0;
+
+ /*
+ * nr_free_pages() is very expensive on large systems,
+ * only call if we're about to fail.
+ */
+ n = nr_free_pages();
+ if (!cap_sys_admin)
+ n -= n / 32;
+ free += n;
+
+ if (free > pages)
+ return 0;
+ vm_unacct_memory(pages);
+ return -ENOMEM;
+ }
+
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ allowed -= allowed / 32;
+ allowed += total_swap_pages;
+
+ /* Don't let a single process grow too big:
+ leave 3% of the size of this process for other processes */
+ allowed -= current->mm->total_vm / 32;
+
+ if (atomic_read(&vm_committed_space) < allowed)
+ return 0;
+
+ vm_unacct_memory(pages);
+
+ return -ENOMEM;
+}
+
+EXPORT_SYMBOL(sysctl_overcommit_memory);
+EXPORT_SYMBOL(sysctl_overcommit_ratio);
+EXPORT_SYMBOL(sysctl_max_map_count);
+EXPORT_SYMBOL(vm_committed_space);
+EXPORT_SYMBOL(__vm_enough_memory);
+
+/*
+ * Requires inode->i_mapping->i_mmap_lock
+ */
+static void __remove_shared_vm_struct(struct vm_area_struct *vma,
+ struct file *file, struct address_space *mapping)
+{
+ if (vma->vm_flags & VM_DENYWRITE)
+ atomic_inc(&file->f_dentry->d_inode->i_writecount);
+ if (vma->vm_flags & VM_SHARED)
+ mapping->i_mmap_writable--;
+
+ flush_dcache_mmap_lock(mapping);
+ if (unlikely(vma->vm_flags & VM_NONLINEAR))
+ list_del_init(&vma->shared.vm_set.list);
+ else
+ vma_prio_tree_remove(vma, &mapping->i_mmap);
+ flush_dcache_mmap_unlock(mapping);
+}
+
+/*
+ * Remove one vm structure and free it.
+ */
+static void remove_vm_struct(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+
+ might_sleep();
+ if (file) {
+ struct address_space *mapping = file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+ __remove_shared_vm_struct(vma, file, mapping);
+ spin_unlock(&mapping->i_mmap_lock);
+ }
+ if (vma->vm_ops && vma->vm_ops->close)
+ vma->vm_ops->close(vma);
+ if (file)
+ fput(file);
+ anon_vma_unlink(vma);
+ mpol_free(vma_policy(vma));
+ kmem_cache_free(vm_area_cachep, vma);
+}
+
+/*
+ * sys_brk() for the most part doesn't need the global kernel
+ * lock, except when an application is doing something nasty
+ * like trying to un-brk an area that has already been mapped
+ * to a regular file. in this case, the unmapping will need
+ * to invoke file system routines that need the global lock.
+ */
+asmlinkage unsigned long sys_brk(unsigned long brk)
+{
+ unsigned long rlim, retval;
+ unsigned long newbrk, oldbrk;
+ struct mm_struct *mm = current->mm;
+
+ down_write(&mm->mmap_sem);
+
+ if (brk < mm->end_code)
+ goto out;
+ newbrk = PAGE_ALIGN(brk);
+ oldbrk = PAGE_ALIGN(mm->brk);
+ if (oldbrk == newbrk)
+ goto set_brk;
+
+ /* Always allow shrinking brk. */
+ if (brk <= mm->brk) {
+ if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+ goto set_brk;
+ goto out;
+ }
+
+ /* Check against rlimit.. */
+ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
+ /* Check against existing mmap mappings. */
+ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+ if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+ goto out;
+set_brk:
+ mm->brk = brk;
+out:
+ retval = mm->brk;
+ up_write(&mm->mmap_sem);
+ return retval;
+}
+
+#ifdef DEBUG_MM_RB
+static int browse_rb(struct rb_root *root)
+{
+ int i = 0, j;
+ struct rb_node *nd, *pn = NULL;
+ unsigned long prev = 0, pend = 0;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct vm_area_struct *vma;
+ vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+ if (vma->vm_start < prev)
+ printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
+ if (vma->vm_start < pend)
+ printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ if (vma->vm_start > vma->vm_end)
+ printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
+ i++;
+ pn = nd;
+ }
+ j = 0;
+ for (nd = pn; nd; nd = rb_prev(nd)) {
+ j++;
+ }
+ if (i != j)
+ printk("backwards %d, forwards %d\n", j, i), i = 0;
+ return i;
+}
+
+void validate_mm(struct mm_struct *mm)
+{
+ int bug = 0;
+ int i = 0;
+ struct vm_area_struct *tmp = mm->mmap;
+ while (tmp) {
+ tmp = tmp->vm_next;
+ i++;
+ }
+ if (i != mm->map_count)
+ printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
+ i = browse_rb(&mm->mm_rb);
+ if (i != mm->map_count)
+ printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
+ if (bug)
+ BUG();
+}
+#else
+#define validate_mm(mm) do { } while (0)
+#endif
+
+static struct vm_area_struct *
+find_vma_prepare(struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct **pprev, struct rb_node ***rb_link,
+ struct rb_node ** rb_parent)
+{
+ struct vm_area_struct * vma;
+ struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
+
+ __rb_link = &mm->mm_rb.rb_node;
+ rb_prev = __rb_parent = NULL;
+ vma = NULL;
+
+ while (*__rb_link) {
+ struct vm_area_struct *vma_tmp;
+
+ __rb_parent = *__rb_link;
+ vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
+
+ if (vma_tmp->vm_end > addr) {
+ vma = vma_tmp;
+ if (vma_tmp->vm_start <= addr)
+ return vma;
+ __rb_link = &__rb_parent->rb_left;
+ } else {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ }
+ }
+
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
+ *rb_link = __rb_link;
+ *rb_parent = __rb_parent;
+ return vma;
+}
+
+static inline void
+__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, struct rb_node *rb_parent)
+{
+ if (prev) {
+ vma->vm_next = prev->vm_next;
+ prev->vm_next = vma;
+ } else {
+ mm->mmap = vma;
+ if (rb_parent)
+ vma->vm_next = rb_entry(rb_parent,
+ struct vm_area_struct, vm_rb);
+ else
+ vma->vm_next = NULL;
+ }
+}
+
+void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct rb_node **rb_link, struct rb_node *rb_parent)
+{
+ rb_link_node(&vma->vm_rb, rb_parent, rb_link);
+ rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+}
+
+static inline void __vma_link_file(struct vm_area_struct *vma)
+{
+ struct file * file;
+
+ file = vma->vm_file;
+ if (file) {
+ struct address_space *mapping = file->f_mapping;
+
+ if (vma->vm_flags & VM_DENYWRITE)
+ atomic_dec(&file->f_dentry->d_inode->i_writecount);
+ if (vma->vm_flags & VM_SHARED)
+ mapping->i_mmap_writable++;
+
+ flush_dcache_mmap_lock(mapping);
+ if (unlikely(vma->vm_flags & VM_NONLINEAR))
+ vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
+ else
+ vma_prio_tree_insert(vma, &mapping->i_mmap);
+ flush_dcache_mmap_unlock(mapping);
+ }
+}
+
+static void
+__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ __vma_link_list(mm, vma, prev, rb_parent);
+ __vma_link_rb(mm, vma, rb_link, rb_parent);
+ __anon_vma_link(vma);
+}
+
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ struct address_space *mapping = NULL;
+
+ if (vma->vm_file)
+ mapping = vma->vm_file->f_mapping;
+
+ if (mapping) {
+ spin_lock(&mapping->i_mmap_lock);
+ vma->vm_truncate_count = mapping->truncate_count;
+ }
+ anon_vma_lock(vma);
+
+ __vma_link(mm, vma, prev, rb_link, rb_parent);
+ __vma_link_file(vma);
+
+ anon_vma_unlock(vma);
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+
+ mm->map_count++;
+ validate_mm(mm);
+}
+
+/*
+ * Helper for vma_adjust in the split_vma insert case:
+ * insert vm structure into list and rbtree and anon_vma,
+ * but it has already been inserted into prio_tree earlier.
+ */
+static void
+__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+ struct vm_area_struct * __vma, * prev;
+ struct rb_node ** rb_link, * rb_parent;
+
+ __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
+ if (__vma && __vma->vm_start < vma->vm_end)
+ BUG();
+ __vma_link(mm, vma, prev, rb_link, rb_parent);
+ mm->map_count++;
+}
+
+static inline void
+__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev)
+{
+ prev->vm_next = vma->vm_next;
+ rb_erase(&vma->vm_rb, &mm->mm_rb);
+ if (mm->mmap_cache == vma)
+ mm->mmap_cache = prev;
+}
+
+/*
+ * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
+ * is already present in an i_mmap tree without adjusting the tree.
+ * The following helper function should be used when such adjustments
+ * are necessary. The "insert" vma (if any) is to be inserted
+ * before we drop the necessary locks.
+ */
+void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *next = vma->vm_next;
+ struct vm_area_struct *importer = NULL;
+ struct address_space *mapping = NULL;
+ struct prio_tree_root *root = NULL;
+ struct file *file = vma->vm_file;
+ struct anon_vma *anon_vma = NULL;
+ long adjust_next = 0;
+ int remove_next = 0;
+
+ if (next && !insert) {
+ if (end >= next->vm_end) {
+ /*
+ * vma expands, overlapping all the next, and
+ * perhaps the one after too (mprotect case 6).
+ */
+again: remove_next = 1 + (end > next->vm_end);
+ end = next->vm_end;
+ anon_vma = next->anon_vma;
+ importer = vma;
+ } else if (end > next->vm_start) {
+ /*
+ * vma expands, overlapping part of the next:
+ * mprotect case 5 shifting the boundary up.
+ */
+ adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
+ anon_vma = next->anon_vma;
+ importer = vma;
+ } else if (end < vma->vm_end) {
+ /*
+ * vma shrinks, and !insert tells it's not
+ * split_vma inserting another: so it must be
+ * mprotect case 4 shifting the boundary down.
+ */
+ adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+ anon_vma = next->anon_vma;
+ importer = next;
+ }
+ }
+
+ if (file) {
+ mapping = file->f_mapping;
+ if (!(vma->vm_flags & VM_NONLINEAR))
+ root = &mapping->i_mmap;
+ spin_lock(&mapping->i_mmap_lock);
+ if (importer &&
+ vma->vm_truncate_count != next->vm_truncate_count) {
+ /*
+ * unmap_mapping_range might be in progress:
+ * ensure that the expanding vma is rescanned.
+ */
+ importer->vm_truncate_count = 0;
+ }
+ if (insert) {
+ insert->vm_truncate_count = vma->vm_truncate_count;
+ /*
+ * Put into prio_tree now, so instantiated pages
+ * are visible to arm/parisc __flush_dcache_page
+ * throughout; but we cannot insert into address
+ * space until vma start or end is updated.
+ */
+ __vma_link_file(insert);
+ }
+ }
+
+ /*
+ * When changing only vma->vm_end, we don't really need
+ * anon_vma lock: but is that case worth optimizing out?
+ */
+ if (vma->anon_vma)
+ anon_vma = vma->anon_vma;
+ if (anon_vma) {
+ spin_lock(&anon_vma->lock);
+ /*
+ * Easily overlooked: when mprotect shifts the boundary,
+ * make sure the expanding vma has anon_vma set if the
+ * shrinking vma had, to cover any anon pages imported.
+ */
+ if (importer && !importer->anon_vma) {
+ importer->anon_vma = anon_vma;
+ __anon_vma_link(importer);
+ }
+ }
+
+ if (root) {
+ flush_dcache_mmap_lock(mapping);
+ vma_prio_tree_remove(vma, root);
+ if (adjust_next)
+ vma_prio_tree_remove(next, root);
+ }
+
+ vma->vm_start = start;
+ vma->vm_end = end;
+ vma->vm_pgoff = pgoff;
+ if (adjust_next) {
+ next->vm_start += adjust_next << PAGE_SHIFT;
+ next->vm_pgoff += adjust_next;
+ }
+
+ if (root) {
+ if (adjust_next)
+ vma_prio_tree_insert(next, root);
+ vma_prio_tree_insert(vma, root);
+ flush_dcache_mmap_unlock(mapping);
+ }
+
+ if (remove_next) {
+ /*
+ * vma_merge has merged next into vma, and needs
+ * us to remove next before dropping the locks.
+ */
+ __vma_unlink(mm, next, vma);
+ if (file)
+ __remove_shared_vm_struct(next, file, mapping);
+ if (next->anon_vma)
+ __anon_vma_merge(vma, next);
+ } else if (insert) {
+ /*
+ * split_vma has split insert from vma, and needs
+ * us to insert it before dropping the locks
+ * (it may either follow vma or precede it).
+ */
+ __insert_vm_struct(mm, insert);
+ }
+
+ if (anon_vma)
+ spin_unlock(&anon_vma->lock);
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+
+ if (remove_next) {
+ if (file)
+ fput(file);
+ mm->map_count--;
+ mpol_free(vma_policy(next));
+ kmem_cache_free(vm_area_cachep, next);
+ /*
+ * In mprotect's case 6 (see comments on vma_merge),
+ * we must remove another next too. It would clutter
+ * up the code too much to do both in one go.
+ */
+ if (remove_next == 2) {
+ next = vma->vm_next;
+ goto again;
+ }
+ }
+
+ validate_mm(mm);
+}
+
+/*
+ * If the vma has a ->close operation then the driver probably needs to release
+ * per-vma resources, so we don't attempt to merge those.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+
+static inline int is_mergeable_vma(struct vm_area_struct *vma,
+ struct file *file, unsigned long vm_flags)
+{
+ if (vma->vm_flags != vm_flags)
+ return 0;
+ if (vma->vm_file != file)
+ return 0;
+ if (vma->vm_ops && vma->vm_ops->close)
+ return 0;
+ return 1;
+}
+
+static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
+ struct anon_vma *anon_vma2)
+{
+ return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * in front of (at a lower virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ *
+ * We don't check here for the merged mmap wrapping around the end of pagecache
+ * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * wrap, nor mmaps which cover the final page at index -1UL.
+ */
+static int
+can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+ if (is_mergeable_vma(vma, file, vm_flags) &&
+ is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+ if (vma->vm_pgoff == vm_pgoff)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * beyond (at a higher virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ */
+static int
+can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
+ struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+ if (is_mergeable_vma(vma, file, vm_flags) &&
+ is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+ pgoff_t vm_pglen;
+ vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ if (vma->vm_pgoff + vm_pglen == vm_pgoff)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
+ * whether that can be merged with its predecessor or its successor.
+ * Or both (it neatly fills a hole).
+ *
+ * In most cases - when called for mmap, brk or mremap - [addr,end) is
+ * certain not to be mapped by the time vma_merge is called; but when
+ * called for mprotect, it is certain to be already mapped (either at
+ * an offset within prev, or at the start of next), and the flags of
+ * this area are about to be changed to vm_flags - and the no-change
+ * case has already been eliminated.
+ *
+ * The following mprotect cases have to be considered, where AAAA is
+ * the area passed down from mprotect_fixup, never extending beyond one
+ * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
+ *
+ * AAAA AAAA AAAA AAAA
+ * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN PPPPNNNNXXXX
+ * cannot merge might become might become might become
+ * PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or
+ * mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or
+ * mremap move: PPPPNNNNNNNN 8
+ * AAAA
+ * PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN
+ * might become case 1 below case 2 below case 3 below
+ *
+ * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
+ * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
+ */
+struct vm_area_struct *vma_merge(struct mm_struct *mm,
+ struct vm_area_struct *prev, unsigned long addr,
+ unsigned long end, unsigned long vm_flags,
+ struct anon_vma *anon_vma, struct file *file,
+ pgoff_t pgoff, struct mempolicy *policy)
+{
+ pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
+ struct vm_area_struct *area, *next;
+
+ /*
+ * We later require that vma->vm_flags == vm_flags,
+ * so this tests vma->vm_flags & VM_SPECIAL, too.
+ */
+ if (vm_flags & VM_SPECIAL)
+ return NULL;
+
+ if (prev)
+ next = prev->vm_next;
+ else
+ next = mm->mmap;
+ area = next;
+ if (next && next->vm_end == end) /* cases 6, 7, 8 */
+ next = next->vm_next;
+
+ /*
+ * Can it merge with the predecessor?
+ */
+ if (prev && prev->vm_end == addr &&
+ mpol_equal(vma_policy(prev), policy) &&
+ can_vma_merge_after(prev, vm_flags,
+ anon_vma, file, pgoff)) {
+ /*
+ * OK, it can. Can we now merge in the successor as well?
+ */
+ if (next && end == next->vm_start &&
+ mpol_equal(policy, vma_policy(next)) &&
+ can_vma_merge_before(next, vm_flags,
+ anon_vma, file, pgoff+pglen) &&
+ is_mergeable_anon_vma(prev->anon_vma,
+ next->anon_vma)) {
+ /* cases 1, 6 */
+ vma_adjust(prev, prev->vm_start,
+ next->vm_end, prev->vm_pgoff, NULL);
+ } else /* cases 2, 5, 7 */
+ vma_adjust(prev, prev->vm_start,
+ end, prev->vm_pgoff, NULL);
+ return prev;
+ }
+
+ /*
+ * Can this new request be merged in front of next?
+ */
+ if (next && end == next->vm_start &&
+ mpol_equal(policy, vma_policy(next)) &&
+ can_vma_merge_before(next, vm_flags,
+ anon_vma, file, pgoff+pglen)) {
+ if (prev && addr < prev->vm_end) /* case 4 */
+ vma_adjust(prev, prev->vm_start,
+ addr, prev->vm_pgoff, NULL);
+ else /* cases 3, 8 */
+ vma_adjust(area, addr, next->vm_end,
+ next->vm_pgoff - pglen, NULL);
+ return area;
+ }
+
+ return NULL;
+}
+
+/*
+ * find_mergeable_anon_vma is used by anon_vma_prepare, to check
+ * neighbouring vmas for a suitable anon_vma, before it goes off
+ * to allocate a new anon_vma. It checks because a repetitive
+ * sequence of mprotects and faults may otherwise lead to distinct
+ * anon_vmas being allocated, preventing vma merge in subsequent
+ * mprotect.
+ */
+struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
+{
+ struct vm_area_struct *near;
+ unsigned long vm_flags;
+
+ near = vma->vm_next;
+ if (!near)
+ goto try_prev;
+
+ /*
+ * Since only mprotect tries to remerge vmas, match flags
+ * which might be mprotected into each other later on.
+ * Neither mlock nor madvise tries to remerge at present,
+ * so leave their flags as obstructing a merge.
+ */
+ vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+ vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+ if (near->anon_vma && vma->vm_end == near->vm_start &&
+ mpol_equal(vma_policy(vma), vma_policy(near)) &&
+ can_vma_merge_before(near, vm_flags,
+ NULL, vma->vm_file, vma->vm_pgoff +
+ ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
+ return near->anon_vma;
+try_prev:
+ /*
+ * It is potentially slow to have to call find_vma_prev here.
+ * But it's only on the first write fault on the vma, not
+ * every time, and we could devise a way to avoid it later
+ * (e.g. stash info in next's anon_vma_node when assigning
+ * an anon_vma, or when trying vma_merge). Another time.
+ */
+ if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
+ BUG();
+ if (!near)
+ goto none;
+
+ vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+ vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+ if (near->anon_vma && near->vm_end == vma->vm_start &&
+ mpol_equal(vma_policy(near), vma_policy(vma)) &&
+ can_vma_merge_after(near, vm_flags,
+ NULL, vma->vm_file, vma->vm_pgoff))
+ return near->anon_vma;
+none:
+ /*
+ * There's no absolute need to look only at touching neighbours:
+ * we could search further afield for "compatible" anon_vmas.
+ * But it would probably just be a waste of time searching,
+ * or lead to too many vmas hanging off the same anon_vma.
+ * We're trying to allow mprotect remerging later on,
+ * not trying to minimize memory used for anon_vmas.
+ */
+ return NULL;
+}
+
+#ifdef CONFIG_PROC_FS
+void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+ struct file *file, long pages)
+{
+ const unsigned long stack_flags
+ = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
+
+#ifdef CONFIG_HUGETLB
+ if (flags & VM_HUGETLB) {
+ if (!(flags & VM_DONTCOPY))
+ mm->shared_vm += pages;
+ return;
+ }
+#endif /* CONFIG_HUGETLB */
+
+ if (file) {
+ mm->shared_vm += pages;
+ if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
+ mm->exec_vm += pages;
+ } else if (flags & stack_flags)
+ mm->stack_vm += pages;
+ if (flags & (VM_RESERVED|VM_IO))
+ mm->reserved_vm += pages;
+}
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * The caller must hold down_write(current->mm->mmap_sem).
+ */
+
+unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long pgoff)
+{
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+ struct inode *inode;
+ unsigned int vm_flags;
+ int correct_wcount = 0;
+ int error;
+ struct rb_node ** rb_link, * rb_parent;
+ int accountable = 1;
+ unsigned long charged = 0;
+
+ if (file) {
+ if (is_file_hugepages(file))
+ accountable = 0;
+
+ if (!file->f_op || !file->f_op->mmap)
+ return -ENODEV;
+
+ if ((prot & PROT_EXEC) &&
+ (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
+ return -EPERM;
+ }
+ /*
+ * Does the application expect PROT_READ to imply PROT_EXEC?
+ *
+ * (the exception is when the underlying filesystem is noexec
+ * mounted, in which case we dont add PROT_EXEC.)
+ */
+ if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
+ if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
+ prot |= PROT_EXEC;
+
+ if (!len)
+ return addr;
+
+ /* Careful about overflows.. */
+ len = PAGE_ALIGN(len);
+ if (!len || len > TASK_SIZE)
+ return -EINVAL;
+
+ /* offset overflow? */
+ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
+ return -EINVAL;
+
+ /* Too many mappings? */
+ if (mm->map_count > sysctl_max_map_count)
+ return -ENOMEM;
+
+ /* Obtain the address to map to. we verify (or select) it and ensure
+ * that it represents a valid section of the address space.
+ */
+ addr = get_unmapped_area(file, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+ /* Do simple checking here so the lower-level routines won't have
+ * to. we assume access permissions have been handled by the open
+ * of the memory object, so we don't do any here.
+ */
+ vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+ mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+ if (flags & MAP_LOCKED) {
+ if (!can_do_mlock())
+ return -EPERM;
+ vm_flags |= VM_LOCKED;
+ }
+ /* mlock MCL_FUTURE? */
+ if (vm_flags & VM_LOCKED) {
+ unsigned long locked, lock_limit;
+ locked = mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ locked += len;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ return -EAGAIN;
+ }
+
+ inode = file ? file->f_dentry->d_inode : NULL;
+
+ if (file) {
+ switch (flags & MAP_TYPE) {
+ case MAP_SHARED:
+ if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
+ return -EACCES;
+
+ /*
+ * Make sure we don't allow writing to an append-only
+ * file..
+ */
+ if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+ return -EACCES;
+
+ /*
+ * Make sure there are no mandatory locks on the file.
+ */
+ if (locks_verify_locked(inode))
+ return -EAGAIN;
+
+ vm_flags |= VM_SHARED | VM_MAYSHARE;
+ if (!(file->f_mode & FMODE_WRITE))
+ vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
+
+ /* fall through */
+ case MAP_PRIVATE:
+ if (!(file->f_mode & FMODE_READ))
+ return -EACCES;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ } else {
+ switch (flags & MAP_TYPE) {
+ case MAP_SHARED:
+ vm_flags |= VM_SHARED | VM_MAYSHARE;
+ break;
+ case MAP_PRIVATE:
+ /*
+ * Set pgoff according to addr for anon_vma.
+ */
+ pgoff = addr >> PAGE_SHIFT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ error = security_file_mmap(file, prot, flags);
+ if (error)
+ return error;
+
+ /* Clear old maps */
+ error = -ENOMEM;
+munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
+ if (do_munmap(mm, addr, len))
+ return -ENOMEM;
+ goto munmap_back;
+ }
+
+ /* Check against address space limit. */
+ if ((mm->total_vm << PAGE_SHIFT) + len
+ > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ return -ENOMEM;
+
+ if (accountable && (!(flags & MAP_NORESERVE) ||
+ sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
+ if (vm_flags & VM_SHARED) {
+ /* Check memory availability in shmem_file_setup? */
+ vm_flags |= VM_ACCOUNT;
+ } else if (vm_flags & VM_WRITE) {
+ /*
+ * Private writable mapping: check memory availability
+ */
+ charged = len >> PAGE_SHIFT;
+ if (security_vm_enough_memory(charged))
+ return -ENOMEM;
+ vm_flags |= VM_ACCOUNT;
+ }
+ }
+
+ /*
+ * Can we just expand an old private anonymous mapping?
+ * The VM_SHARED test is necessary because shmem_zero_setup
+ * will create the file object for a shared anonymous map below.
+ */
+ if (!file && !(vm_flags & VM_SHARED) &&
+ vma_merge(mm, prev, addr, addr + len, vm_flags,
+ NULL, NULL, pgoff, NULL))
+ goto out;
+
+ /*
+ * Determine the object being mapped and call the appropriate
+ * specific mapper. the address has already been validated, but
+ * not unmapped, but the maps are removed from the list.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma) {
+ error = -ENOMEM;
+ goto unacct_error;
+ }
+ memset(vma, 0, sizeof(*vma));
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+ vma->vm_flags = vm_flags;
+ vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+ vma->vm_pgoff = pgoff;
+
+ if (file) {
+ error = -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+ goto free_vma;
+ if (vm_flags & VM_DENYWRITE) {
+ error = deny_write_access(file);
+ if (error)
+ goto free_vma;
+ correct_wcount = 1;
+ }
+ vma->vm_file = file;
+ get_file(file);
+ error = file->f_op->mmap(file, vma);
+ if (error)
+ goto unmap_and_free_vma;
+ } else if (vm_flags & VM_SHARED) {
+ error = shmem_zero_setup(vma);
+ if (error)
+ goto free_vma;
+ }
+
+ /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
+ * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
+ * that memory reservation must be checked; but that reservation
+ * belongs to shared memory object, not to vma: so now clear it.
+ */
+ if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
+ vma->vm_flags &= ~VM_ACCOUNT;
+
+ /* Can addr have changed??
+ *
+ * Answer: Yes, several device drivers can do it in their
+ * f_op->mmap method. -DaveM
+ */
+ addr = vma->vm_start;
+ pgoff = vma->vm_pgoff;
+ vm_flags = vma->vm_flags;
+
+ if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
+ vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
+ file = vma->vm_file;
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+ } else {
+ if (file) {
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+ fput(file);
+ }
+ mpol_free(vma_policy(vma));
+ kmem_cache_free(vm_area_cachep, vma);
+ }
+out:
+ mm->total_vm += len >> PAGE_SHIFT;
+ __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+ if (vm_flags & VM_LOCKED) {
+ mm->locked_vm += len >> PAGE_SHIFT;
+ make_pages_present(addr, addr + len);
+ }
+ if (flags & MAP_POPULATE) {
+ up_write(&mm->mmap_sem);
+ sys_remap_file_pages(addr, len, 0,
+ pgoff, flags & MAP_NONBLOCK);
+ down_write(&mm->mmap_sem);
+ }
+ acct_update_integrals();
+ update_mem_hiwater();
+ return addr;
+
+unmap_and_free_vma:
+ if (correct_wcount)
+ atomic_inc(&inode->i_writecount);
+ vma->vm_file = NULL;
+ fput(file);
+
+ /* Undo any partial mapping done by a device driver. */
+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+free_vma:
+ kmem_cache_free(vm_area_cachep, vma);
+unacct_error:
+ if (charged)
+ vm_unacct_memory(charged);
+ return error;
+}
+
+EXPORT_SYMBOL(do_mmap_pgoff);
+
+/* Get an address range which is currently unmapped.
+ * For shmat() with addr=0.
+ *
+ * Ugly calling convention alert:
+ * Return value with the low bits set means error value,
+ * ie
+ * if (ret & ~PAGE_MASK)
+ * error = ret;
+ *
+ * This function "knows" that -ENOMEM has the bits set.
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long start_addr;
+
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+ start_addr = addr = mm->free_area_cache;
+
+full_search:
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr) {
+ /*
+ * Start a new search - just in case we missed
+ * some holes.
+ */
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+ if (!vma || addr + len <= vma->vm_start) {
+ /*
+ * Remember the place where we stopped the search:
+ */
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ addr = vma->vm_end;
+ }
+}
+#endif
+
+void arch_unmap_area(struct vm_area_struct *area)
+{
+ /*
+ * Is this a new hole at the lowest possible address?
+ */
+ if (area->vm_start >= TASK_UNMAPPED_BASE &&
+ area->vm_start < area->vm_mm->free_area_cache)
+ area->vm_mm->free_area_cache = area->vm_start;
+}
+
+/*
+ * This mmap-allocator allocates new areas top-down from below the
+ * stack's low limit (the base):
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma, *prev_vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long base = mm->mmap_base, addr = addr0;
+ int first_time = 1;
+
+ /* requested length too big for entire address space */
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ /* dont allow allocations above current base */
+ if (mm->free_area_cache > base)
+ mm->free_area_cache = base;
+
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+try_again:
+ /* make sure it can fit in the remaining address space */
+ if (mm->free_area_cache < len)
+ goto fail;
+
+ /* either no address requested or cant fit in requested address hole */
+ addr = (mm->free_area_cache - len) & PAGE_MASK;
+ do {
+ /*
+ * Lookup failure means no vma is above this address,
+ * i.e. return with success:
+ */
+ if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+ return addr;
+
+ /*
+ * new region fits between prev_vma->vm_end and
+ * vma->vm_start, use it:
+ */
+ if (addr+len <= vma->vm_start &&
+ (!prev_vma || (addr >= prev_vma->vm_end)))
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr);
+ else
+ /* pull free_area_cache down to the first hole */
+ if (mm->free_area_cache == vma->vm_end)
+ mm->free_area_cache = vma->vm_start;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start-len;
+ } while (len <= vma->vm_start);
+
+fail:
+ /*
+ * if hint left us with no space for the requested
+ * mapping then try again:
+ */
+ if (first_time) {
+ mm->free_area_cache = base;
+ first_time = 0;
+ goto try_again;
+ }
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ mm->free_area_cache = TASK_UNMAPPED_BASE;
+ addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+ /*
+ * Restore the topdown base:
+ */
+ mm->free_area_cache = base;
+
+ return addr;
+}
+#endif
+
+void arch_unmap_area_topdown(struct vm_area_struct *area)
+{
+ /*
+ * Is this a new hole at the highest possible address?
+ */
+ if (area->vm_end > area->vm_mm->free_area_cache)
+ area->vm_mm->free_area_cache = area->vm_end;
+}
+
+unsigned long
+get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ if (flags & MAP_FIXED) {
+ unsigned long ret;
+
+ if (addr > TASK_SIZE - len)
+ return -ENOMEM;
+ if (addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (file && is_file_hugepages(file)) {
+ /*
+ * Check if the given range is hugepage aligned, and
+ * can be made suitable for hugepages.
+ */
+ ret = prepare_hugepage_range(addr, len);
+ } else {
+ /*
+ * Ensure that a normal request is not falling in a
+ * reserved hugepage range. For some archs like IA-64,
+ * there is a separate region for hugepages.
+ */
+ ret = is_hugepage_only_range(addr, len);
+ }
+ if (ret)
+ return -EINVAL;
+ return addr;
+ }
+
+ if (file && file->f_op && file->f_op->get_unmapped_area)
+ return file->f_op->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+EXPORT_SYMBOL(get_unmapped_area);
+
+/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
+struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
+{
+ struct vm_area_struct *vma = NULL;
+
+ if (mm) {
+ /* Check the cache first. */
+ /* (Cache hit rate is typically around 35%.) */
+ vma = mm->mmap_cache;
+ if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+ struct rb_node * rb_node;
+
+ rb_node = mm->mm_rb.rb_node;
+ vma = NULL;
+
+ while (rb_node) {
+ struct vm_area_struct * vma_tmp;
+
+ vma_tmp = rb_entry(rb_node,
+ struct vm_area_struct, vm_rb);
+
+ if (vma_tmp->vm_end > addr) {
+ vma = vma_tmp;
+ if (vma_tmp->vm_start <= addr)
+ break;
+ rb_node = rb_node->rb_left;
+ } else
+ rb_node = rb_node->rb_right;
+ }
+ if (vma)
+ mm->mmap_cache = vma;
+ }
+ }
+ return vma;
+}
+
+EXPORT_SYMBOL(find_vma);
+
+/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
+struct vm_area_struct *
+find_vma_prev(struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct **pprev)
+{
+ struct vm_area_struct *vma = NULL, *prev = NULL;
+ struct rb_node * rb_node;
+ if (!mm)
+ goto out;
+
+ /* Guard against addr being lower than the first VMA */
+ vma = mm->mmap;
+
+ /* Go through the RB tree quickly. */
+ rb_node = mm->mm_rb.rb_node;
+
+ while (rb_node) {
+ struct vm_area_struct *vma_tmp;
+ vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+ if (addr < vma_tmp->vm_end) {
+ rb_node = rb_node->rb_left;
+ } else {
+ prev = vma_tmp;
+ if (!prev->vm_next || (addr < prev->vm_next->vm_end))
+ break;
+ rb_node = rb_node->rb_right;
+ }
+ }
+
+out:
+ *pprev = prev;
+ return prev ? prev->vm_next : vma;
+}
+
+/*
+ * Verify that the stack growth is acceptable and
+ * update accounting. This is shared with both the
+ * grow-up and grow-down cases.
+ */
+static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct rlimit *rlim = current->signal->rlim;
+
+ /* address space limit tests */
+ if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
+ return -ENOMEM;
+
+ /* Stack limit test */
+ if (size > rlim[RLIMIT_STACK].rlim_cur)
+ return -ENOMEM;
+
+ /* mlock limit tests */
+ if (vma->vm_flags & VM_LOCKED) {
+ unsigned long locked;
+ unsigned long limit;
+ locked = mm->locked_vm + grow;
+ limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ if (locked > limit && !capable(CAP_IPC_LOCK))
+ return -ENOMEM;
+ }
+
+ /*
+ * Overcommit.. This must be the final test, as it will
+ * update security statistics.
+ */
+ if (security_vm_enough_memory(grow))
+ return -ENOMEM;
+
+ /* Ok, everything looks good - let it rip */
+ mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ mm->locked_vm += grow;
+ __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+ acct_update_integrals();
+ update_mem_hiwater();
+ return 0;
+}
+
+#ifdef CONFIG_STACK_GROWSUP
+/*
+ * vma is the first one with address > vma->vm_end. Have to extend vma.
+ */
+int expand_stack(struct vm_area_struct * vma, unsigned long address)
+{
+ int error;
+
+ if (!(vma->vm_flags & VM_GROWSUP))
+ return -EFAULT;
+
+ /*
+ * We must make sure the anon_vma is allocated
+ * so that the anon_vma locking is not a noop.
+ */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
+ anon_vma_lock(vma);
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller
+ * is required to hold the mmap_sem in read mode. We need the
+ * anon_vma lock to serialize against concurrent expand_stacks.
+ */
+ address += 4 + PAGE_SIZE - 1;
+ address &= PAGE_MASK;
+ error = 0;
+
+ /* Somebody else might have raced and expanded it already */
+ if (address > vma->vm_end) {
+ unsigned long size, grow;
+
+ size = address - vma->vm_start;
+ grow = (address - vma->vm_end) >> PAGE_SHIFT;
+
+ error = acct_stack_growth(vma, size, grow);
+ if (!error)
+ vma->vm_end = address;
+ }
+ anon_vma_unlock(vma);
+ return error;
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma, *prev;
+
+ addr &= PAGE_MASK;
+ vma = find_vma_prev(mm, addr, &prev);
+ if (vma && (vma->vm_start <= addr))
+ return vma;
+ if (!prev || expand_stack(prev, addr))
+ return NULL;
+ if (prev->vm_flags & VM_LOCKED) {
+ make_pages_present(addr, prev->vm_end);
+ }
+ return prev;
+}
+#else
+/*
+ * vma is the first one with address < vma->vm_start. Have to extend vma.
+ */
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ int error;
+
+ /*
+ * We must make sure the anon_vma is allocated
+ * so that the anon_vma locking is not a noop.
+ */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
+ anon_vma_lock(vma);
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller
+ * is required to hold the mmap_sem in read mode. We need the
+ * anon_vma lock to serialize against concurrent expand_stacks.
+ */
+ address &= PAGE_MASK;
+ error = 0;
+
+ /* Somebody else might have raced and expanded it already */
+ if (address < vma->vm_start) {
+ unsigned long size, grow;
+
+ size = vma->vm_end - address;
+ grow = (vma->vm_start - address) >> PAGE_SHIFT;
+
+ error = acct_stack_growth(vma, size, grow);
+ if (!error) {
+ vma->vm_start = address;
+ vma->vm_pgoff -= grow;
+ }
+ }
+ anon_vma_unlock(vma);
+ return error;
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct * mm, unsigned long addr)
+{
+ struct vm_area_struct * vma;
+ unsigned long start;
+
+ addr &= PAGE_MASK;
+ vma = find_vma(mm,addr);
+ if (!vma)
+ return NULL;
+ if (vma->vm_start <= addr)
+ return vma;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return NULL;
+ start = vma->vm_start;
+ if (expand_stack(vma, addr))
+ return NULL;
+ if (vma->vm_flags & VM_LOCKED) {
+ make_pages_present(addr, start);
+ }
+ return vma;
+}
+#endif
+
+/*
+ * Try to free as many page directory entries as we can,
+ * without having to work very hard at actually scanning
+ * the page tables themselves.
+ *
+ * Right now we try to free page tables if we have a nice
+ * PGDIR-aligned area that got free'd up. We could be more
+ * granular if we want to, but this is fast and simple,
+ * and covers the bad cases.
+ *
+ * "prev", if it exists, points to a vma before the one
+ * we just free'd - but there's no telling how much before.
+ */
+static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end)
+{
+ unsigned long first = start & PGDIR_MASK;
+ unsigned long last = end + PGDIR_SIZE - 1;
+ struct mm_struct *mm = tlb->mm;
+
+ if (last > MM_VM_SIZE(mm) || last < end)
+ last = MM_VM_SIZE(mm);
+
+ if (!prev) {
+ prev = mm->mmap;
+ if (!prev)
+ goto no_mmaps;
+ if (prev->vm_end > start) {
+ if (last > prev->vm_start)
+ last = prev->vm_start;
+ goto no_mmaps;
+ }
+ }
+ for (;;) {
+ struct vm_area_struct *next = prev->vm_next;
+
+ if (next) {
+ if (next->vm_start < start) {
+ prev = next;
+ continue;
+ }
+ if (last > next->vm_start)
+ last = next->vm_start;
+ }
+ if (prev->vm_end > first)
+ first = prev->vm_end;
+ break;
+ }
+no_mmaps:
+ if (last < first) /* for arches with discontiguous pgd indices */
+ return;
+ if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
+ first = FIRST_USER_PGD_NR * PGDIR_SIZE;
+ /* No point trying to free anything if we're in the same pte page */
+ if ((first & PMD_MASK) < (last & PMD_MASK)) {
+ clear_page_range(tlb, first, last);
+ flush_tlb_pgtables(mm, first, last);
+ }
+}
+
+/* Normal function to fix up a mapping
+ * This function is the default for when an area has no specific
+ * function. This may be used as part of a more specific routine.
+ *
+ * By the time this function is called, the area struct has been
+ * removed from the process mapping list.
+ */
+static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
+{
+ size_t len = area->vm_end - area->vm_start;
+
+ area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+ if (area->vm_flags & VM_LOCKED)
+ area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+ vm_stat_unaccount(area);
+ area->vm_mm->unmap_area(area);
+ remove_vm_struct(area);
+}
+
+/*
+ * Update the VMA and inode share lists.
+ *
+ * Ok - we have the memory areas we should free on the 'free' list,
+ * so release them, and do the vma updates.
+ */
+static void unmap_vma_list(struct mm_struct *mm,
+ struct vm_area_struct *mpnt)
+{
+ do {
+ struct vm_area_struct *next = mpnt->vm_next;
+ unmap_vma(mm, mpnt);
+ mpnt = next;
+ } while (mpnt != NULL);
+ validate_mm(mm);
+}
+
+/*
+ * Get rid of page table information in the indicated region.
+ *
+ * Called with the page table lock held.
+ */
+static void unmap_region(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ struct vm_area_struct *prev,
+ unsigned long start,
+ unsigned long end)
+{
+ struct mmu_gather *tlb;
+ unsigned long nr_accounted = 0;
+
+ lru_add_drain();
+ tlb = tlb_gather_mmu(mm, 0);
+ unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
+ vm_unacct_memory(nr_accounted);
+
+ if (is_hugepage_only_range(start, end - start))
+ hugetlb_free_pgtables(tlb, prev, start, end);
+ else
+ free_pgtables(tlb, prev, start, end);
+ tlb_finish_mmu(tlb, start, end);
+}
+
+/*
+ * Create a list of vma's touched by the unmap, removing them from the mm's
+ * vma list as we go..
+ */
+static void
+detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, unsigned long end)
+{
+ struct vm_area_struct **insertion_point;
+ struct vm_area_struct *tail_vma = NULL;
+
+ insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+ do {
+ rb_erase(&vma->vm_rb, &mm->mm_rb);
+ mm->map_count--;
+ tail_vma = vma;
+ vma = vma->vm_next;
+ } while (vma && vma->vm_start < end);
+ *insertion_point = vma;
+ tail_vma->vm_next = NULL;
+ mm->mmap_cache = NULL; /* Kill the cache. */
+}
+
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the the tail.
+ */
+int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ unsigned long addr, int new_below)
+{
+ struct mempolicy *pol;
+ struct vm_area_struct *new;
+
+ if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+ return -EINVAL;
+
+ if (mm->map_count >= sysctl_max_map_count)
+ return -ENOMEM;
+
+ new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ /* most fields are the same, copy all, and then fixup */
+ *new = *vma;
+
+ if (new_below)
+ new->vm_end = addr;
+ else {
+ new->vm_start = addr;
+ new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
+ }
+
+ pol = mpol_copy(vma_policy(vma));
+ if (IS_ERR(pol)) {
+ kmem_cache_free(vm_area_cachep, new);
+ return PTR_ERR(pol);
+ }
+ vma_set_policy(new, pol);
+
+ if (new->vm_file)
+ get_file(new->vm_file);
+
+ if (new->vm_ops && new->vm_ops->open)
+ new->vm_ops->open(new);
+
+ if (new_below)
+ vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+ ((addr - new->vm_start) >> PAGE_SHIFT), new);
+ else
+ vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
+ return 0;
+}
+
+/* Munmap is split into 2 main parts -- this part which finds
+ * what needs doing, and the areas themselves, which do the
+ * work. This now handles partial unmappings.
+ * Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+{
+ unsigned long end;
+ struct vm_area_struct *mpnt, *prev, *last;
+
+ if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+ return -EINVAL;
+
+ if ((len = PAGE_ALIGN(len)) == 0)
+ return -EINVAL;
+
+ /* Find the first overlapping VMA */
+ mpnt = find_vma_prev(mm, start, &prev);
+ if (!mpnt)
+ return 0;
+ /* we have start < mpnt->vm_end */
+
+ /* if it doesn't overlap, we have nothing.. */
+ end = start + len;
+ if (mpnt->vm_start >= end)
+ return 0;
+
+ /*
+ * If we need to split any vma, do it now to save pain later.
+ *
+ * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
+ * unmapped vm_area_struct will remain in use: so lower split_vma
+ * places tmp vma above, and higher split_vma places tmp vma below.
+ */
+ if (start > mpnt->vm_start) {
+ int error = split_vma(mm, mpnt, start, 0);
+ if (error)
+ return error;
+ prev = mpnt;
+ }
+
+ /* Does it split the last one? */
+ last = find_vma(mm, end);
+ if (last && end > last->vm_start) {
+ int error = split_vma(mm, last, end, 1);
+ if (error)
+ return error;
+ }
+ mpnt = prev? prev->vm_next: mm->mmap;
+
+ /*
+ * Remove the vma's, and unmap the actual pages
+ */
+ detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
+ spin_lock(&mm->page_table_lock);
+ unmap_region(mm, mpnt, prev, start, end);
+ spin_unlock(&mm->page_table_lock);
+
+ /* Fix up all other VM information */
+ unmap_vma_list(mm, mpnt);
+
+ return 0;
+}
+
+EXPORT_SYMBOL(do_munmap);
+
+asmlinkage long sys_munmap(unsigned long addr, size_t len)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+
+ profile_munmap(addr);
+
+ down_write(&mm->mmap_sem);
+ ret = do_munmap(mm, addr, len);
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
+static inline void verify_mm_writelocked(struct mm_struct *mm)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+ if (unlikely(down_read_trylock(&mm->mmap_sem))) {
+ WARN_ON(1);
+ up_read(&mm->mmap_sem);
+ }
+#endif
+}
+
+/*
+ * this is really a simplified "do_mmap". it only handles
+ * anonymous maps. eventually we may be able to do some
+ * brk-specific accounting here.
+ */
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+ struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+ unsigned long flags;
+ struct rb_node ** rb_link, * rb_parent;
+ pgoff_t pgoff = addr >> PAGE_SHIFT;
+
+ len = PAGE_ALIGN(len);
+ if (!len)
+ return addr;
+
+ if ((addr + len) > TASK_SIZE || (addr + len) < addr)
+ return -EINVAL;
+
+ /*
+ * mlock MCL_FUTURE?
+ */
+ if (mm->def_flags & VM_LOCKED) {
+ unsigned long locked, lock_limit;
+ locked = mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ locked += len;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ return -EAGAIN;
+ }
+
+ /*
+ * mm->mmap_sem is required to protect against another thread
+ * changing the mappings in case we sleep.
+ */
+ verify_mm_writelocked(mm);
+
+ /*
+ * Clear old maps. this also does some error checking for us
+ */
+ munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
+ if (do_munmap(mm, addr, len))
+ return -ENOMEM;
+ goto munmap_back;
+ }
+
+ /* Check against address space limits *after* clearing old maps... */
+ if ((mm->total_vm << PAGE_SHIFT) + len
+ > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ return -ENOMEM;
+
+ if (mm->map_count > sysctl_max_map_count)
+ return -ENOMEM;
+
+ if (security_vm_enough_memory(len >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+ /* Can we just expand an old private anonymous mapping? */
+ if (vma_merge(mm, prev, addr, addr + len, flags,
+ NULL, NULL, pgoff, NULL))
+ goto out;
+
+ /*
+ * create a vma struct for an anonymous mapping
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma) {
+ vm_unacct_memory(len >> PAGE_SHIFT);
+ return -ENOMEM;
+ }
+ memset(vma, 0, sizeof(*vma));
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+ vma->vm_pgoff = pgoff;
+ vma->vm_flags = flags;
+ vma->vm_page_prot = protection_map[flags & 0x0f];
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+out:
+ mm->total_vm += len >> PAGE_SHIFT;
+ if (flags & VM_LOCKED) {
+ mm->locked_vm += len >> PAGE_SHIFT;
+ make_pages_present(addr, addr + len);
+ }
+ acct_update_integrals();
+ update_mem_hiwater();
+ return addr;
+}
+
+EXPORT_SYMBOL(do_brk);
+
+/* Release all mmaps. */
+void exit_mmap(struct mm_struct *mm)
+{
+ struct mmu_gather *tlb;
+ struct vm_area_struct *vma;
+ unsigned long nr_accounted = 0;
+
+#ifdef arch_exit_mmap
+ arch_exit_mmap(mm);
+#endif
+
+ lru_add_drain();
+
+ spin_lock(&mm->page_table_lock);
+
+ tlb = tlb_gather_mmu(mm, 1);
+ flush_cache_mm(mm);
+ /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
+ mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
+ ~0UL, &nr_accounted, NULL);
+ vm_unacct_memory(nr_accounted);
+ BUG_ON(mm->map_count); /* This is just debugging */
+ clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
+
+ tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
+
+ vma = mm->mmap;
+ mm->mmap = mm->mmap_cache = NULL;
+ mm->mm_rb = RB_ROOT;
+ mm->rss = 0;
+ mm->total_vm = 0;
+ mm->locked_vm = 0;
+
+ spin_unlock(&mm->page_table_lock);
+
+ /*
+ * Walk the list again, actually closing and freeing it
+ * without holding any MM locks.
+ */
+ while (vma) {
+ struct vm_area_struct *next = vma->vm_next;
+ remove_vm_struct(vma);
+ vma = next;
+ }
+}
+
+/* Insert vm structure into process list sorted by address
+ * and into the inode's i_mmap tree. If vm_file is non-NULL
+ * then i_mmap_lock is taken here.
+ */
+int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+ struct vm_area_struct * __vma, * prev;
+ struct rb_node ** rb_link, * rb_parent;
+
+ /*
+ * The vm_pgoff of a purely anonymous vma should be irrelevant
+ * until its first write fault, when page's anon_vma and index
+ * are set. But now set the vm_pgoff it will almost certainly
+ * end up with (unless mremap moves it elsewhere before that
+ * first wfault), so /proc/pid/maps tells a consistent story.
+ *
+ * By setting it to reflect the virtual start address of the
+ * vma, merges and splits can happen in a seamless way, just
+ * using the existing file pgoff checks and manipulations.
+ * Similarly in do_mmap_pgoff and in do_brk.
+ */
+ if (!vma->vm_file) {
+ BUG_ON(vma->anon_vma);
+ vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
+ }
+ __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
+ if (__vma && __vma->vm_start < vma->vm_end)
+ return -ENOMEM;
+ vma_link(mm, vma, prev, rb_link, rb_parent);
+ return 0;
+}
+
+/*
+ * Copy the vma structure to a new location in the same mm,
+ * prior to moving page table entries, to effect an mremap move.
+ */
+struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+ unsigned long addr, unsigned long len, pgoff_t pgoff)
+{
+ struct vm_area_struct *vma = *vmap;
+ unsigned long vma_start = vma->vm_start;
+ struct mm_struct *mm = vma->vm_mm;
+ struct vm_area_struct *new_vma, *prev;
+ struct rb_node **rb_link, *rb_parent;
+ struct mempolicy *pol;
+
+ /*
+ * If anonymous vma has not yet been faulted, update new pgoff
+ * to match new location, to increase its chance of merging.
+ */
+ if (!vma->vm_file && !vma->anon_vma)
+ pgoff = addr >> PAGE_SHIFT;
+
+ find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+ if (new_vma) {
+ /*
+ * Source vma may have been merged into new_vma
+ */
+ if (vma_start >= new_vma->vm_start &&
+ vma_start < new_vma->vm_end)
+ *vmap = new_vma;
+ } else {
+ new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (new_vma) {
+ *new_vma = *vma;
+ pol = mpol_copy(vma_policy(vma));
+ if (IS_ERR(pol)) {
+ kmem_cache_free(vm_area_cachep, new_vma);
+ return NULL;
+ }
+ vma_set_policy(new_vma, pol);
+ new_vma->vm_start = addr;
+ new_vma->vm_end = addr + len;
+ new_vma->vm_pgoff = pgoff;
+ if (new_vma->vm_file)
+ get_file(new_vma->vm_file);
+ if (new_vma->vm_ops && new_vma->vm_ops->open)
+ new_vma->vm_ops->open(new_vma);
+ vma_link(mm, new_vma, prev, rb_link, rb_parent);
+ }
+ }
+ return new_vma;
+}
diff --git a/linux-2.6.11-xen-sparse/net/core/dev.c b/linux-2.6.11-xen-sparse/net/core/dev.c
new file mode 100644
index 0000000000..b5e12b06ec
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/net/core/dev.c
@@ -0,0 +1,3389 @@
+/*
+ * NET3 Protocol independent device support routines.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Derived from the non IP parts of dev.c 1.0.19
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
+ * Additional Authors:
+ * Florian la Roche <rzsfl@rz.uni-sb.de>
+ * Alan Cox <gw4pts@gw4pts.ampr.org>
+ * David Hinds <dahinds@users.sourceforge.net>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ * Adam Sulmicki <adam@cfar.umd.edu>
+ * Pekka Riikonen <priikone@poesidon.pspt.fi>
+ *
+ * Changes:
+ * D.J. Barrow : Fixed bug where dev->refcnt gets set
+ * to 2 if register_netdev gets called
+ * before net_dev_init & also removed a
+ * few lines of code in the process.
+ * Alan Cox : device private ioctl copies fields back.
+ * Alan Cox : Transmit queue code does relevant
+ * stunts to keep the queue safe.
+ * Alan Cox : Fixed double lock.
+ * Alan Cox : Fixed promisc NULL pointer trap
+ * ???????? : Support the full private ioctl range
+ * Alan Cox : Moved ioctl permission check into
+ * drivers
+ * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
+ * Alan Cox : 100 backlog just doesn't cut it when
+ * you start doing multicast video 8)
+ * Alan Cox : Rewrote net_bh and list manager.
+ * Alan Cox : Fix ETH_P_ALL echoback lengths.
+ * Alan Cox : Took out transmit every packet pass
+ * Saved a few bytes in the ioctl handler
+ * Alan Cox : Network driver sets packet type before
+ * calling netif_rx. Saves a function
+ * call a packet.
+ * Alan Cox : Hashed net_bh()
+ * Richard Kooijman: Timestamp fixes.
+ * Alan Cox : Wrong field in SIOCGIFDSTADDR
+ * Alan Cox : Device lock protection.
+ * Alan Cox : Fixed nasty side effect of device close
+ * changes.
+ * Rudi Cilibrasi : Pass the right thing to
+ * set_mac_address()
+ * Dave Miller : 32bit quantity for the device lock to
+ * make it work out on a Sparc.
+ * Bjorn Ekwall : Added KERNELD hack.
+ * Alan Cox : Cleaned up the backlog initialise.
+ * Craig Metz : SIOCGIFCONF fix if space for under
+ * 1 device.
+ * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
+ * is no device open function.
+ * Andi Kleen : Fix error reporting for SIOCGIFCONF
+ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
+ * Cyrus Durgin : Cleaned for KMOD
+ * Adam Sulmicki : Bug Fix : Network Device Unload
+ * A network device unload needs to purge
+ * the backlog queue.
+ * Paul Rusty Russell : SIOCSIFNAME
+ * Pekka Riikonen : Netdev boot-time settings code
+ * Andrew Morton : Make unregister_netdevice wait
+ * indefinitely on dev->refcnt
+ * J Hadi Salim : - Backlog queue sampling
+ * - netif_rx() feedback
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/if_bridge.h>
+#include <linux/divert.h>
+#include <net/dst.h>
+#include <net/pkt_sched.h>
+#include <net/checksum.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/netpoll.h>
+#include <linux/rcupdate.h>
+#include <linux/delay.h>
+#ifdef CONFIG_NET_RADIO
+#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
+#include <net/iw_handler.h>
+#endif /* CONFIG_NET_RADIO */
+#include <asm/current.h>
+
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+
+/* This define, if set, will randomly drop a packet when congestion
+ * is more than moderate. It helps fairness in the multi-interface
+ * case when one of them is a hog, but it kills performance for the
+ * single interface case so it is off now by default.
+ */
+#undef RAND_LIE
+
+/* Setting this will sample the queue lengths and thus congestion
+ * via a timer instead of as each packet is received.
+ */
+#undef OFFLINE_SAMPLE
+
+/*
+ * The list of packet types we will receive (as opposed to discard)
+ * and the routines to invoke.
+ *
+ * Why 16. Because with 16 the only overlap we get on a hash of the
+ * low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ * NOTE: That is no longer true with the addition of VLAN tags. Not
+ * sure which should go first, but I bet it won't make much
+ * difference if we are running VLANs. The good news is that
+ * this protocol won't be in the list unless compiled in, so
+ * the average user (w/out VLANs) will not be adversly affected.
+ * --BLG
+ *
+ * 0800 IP
+ * 8100 802.1Q VLAN
+ * 0001 802.3
+ * 0002 AX.25
+ * 0004 802.2
+ * 8035 RARP
+ * 0005 SNAP
+ * 0805 X.25
+ * 0806 ARP
+ * 8137 IPX
+ * 0009 Localtalk
+ * 86DD IPv6
+ */
+
+static DEFINE_SPINLOCK(ptype_lock);
+static struct list_head ptype_base[16]; /* 16 way hashed list */
+static struct list_head ptype_all; /* Taps */
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy);
+static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
+#endif
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates. This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base;
+static struct net_device **dev_tail = &dev_base;
+DEFINE_RWLOCK(dev_base_lock);
+
+EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_lock);
+
+#define NETDEV_HASHBITS 8
+static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+
+static inline struct hlist_head *dev_name_hash(const char *name)
+{
+ unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+static inline struct hlist_head *dev_index_hash(int ifindex)
+{
+ return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+}
+
+/*
+ * Our notifier list
+ */
+
+static struct notifier_block *netdev_chain;
+
+/*
+ * Device drivers call our routines to queue packets here. We empty the
+ * queue in the local softnet handler.
+ */
+DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
+
+#ifdef CONFIG_SYSFS
+extern int netdev_sysfs_init(void);
+extern int netdev_register_sysfs(struct net_device *);
+extern void netdev_unregister_sysfs(struct net_device *);
+#else
+#define netdev_sysfs_init() (0)
+#define netdev_register_sysfs(dev) (0)
+#define netdev_unregister_sysfs(dev) do { } while(0)
+#endif
+
+
+/*******************************************************************************
+
+ Protocol management and registration routines
+
+*******************************************************************************/
+
+/*
+ * For efficiency
+ */
+
+int netdev_nit;
+
+/*
+ * Add a protocol ID to the list. Now that the input handler is
+ * smarter we can dispense with all the messy stuff that used to be
+ * here.
+ *
+ * BEWARE!!! Protocol handlers, mangling input packets,
+ * MUST BE last in hash buckets and checking protocol handlers
+ * MUST start from promiscuous ptype_all chain in net_bh.
+ * It is true now, do not change it.
+ * Explanation follows: if protocol handler, mangling packet, will
+ * be the first on list, it is not able to sense, that packet
+ * is cloned and should be copied-on-write, so that it will
+ * change it and subsequent readers will get broken packet.
+ * --ANK (980803)
+ */
+
+/**
+ * dev_add_pack - add packet handler
+ * @pt: packet type declaration
+ *
+ * Add a protocol handler to the networking stack. The passed &packet_type
+ * is linked into kernel lists and may not be freed until it has been
+ * removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new packet type (until the next received packet).
+ */
+
+void dev_add_pack(struct packet_type *pt)
+{
+ int hash;
+
+ spin_lock_bh(&ptype_lock);
+ if (pt->type == htons(ETH_P_ALL)) {
+ netdev_nit++;
+ list_add_rcu(&pt->list, &ptype_all);
+ } else {
+ hash = ntohs(pt->type) & 15;
+ list_add_rcu(&pt->list, &ptype_base[hash]);
+ }
+ spin_unlock_bh(&ptype_lock);
+}
+
+extern void linkwatch_run_queue(void);
+
+
+
+/**
+ * __dev_remove_pack - remove packet handler
+ * @pt: packet type declaration
+ *
+ * Remove a protocol handler that was previously added to the kernel
+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ * from the kernel lists and can be freed or reused once this function
+ * returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
+ */
+void __dev_remove_pack(struct packet_type *pt)
+{
+ struct list_head *head;
+ struct packet_type *pt1;
+
+ spin_lock_bh(&ptype_lock);
+
+ if (pt->type == htons(ETH_P_ALL)) {
+ netdev_nit--;
+ head = &ptype_all;
+ } else
+ head = &ptype_base[ntohs(pt->type) & 15];
+
+ list_for_each_entry(pt1, head, list) {
+ if (pt == pt1) {
+ list_del_rcu(&pt->list);
+ goto out;
+ }
+ }
+
+ printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
+out:
+ spin_unlock_bh(&ptype_lock);
+}
+/**
+ * dev_remove_pack - remove packet handler
+ * @pt: packet type declaration
+ *
+ * Remove a protocol handler that was previously added to the kernel
+ * protocol handlers by dev_add_pack(). The passed &packet_type is removed
+ * from the kernel lists and can be freed or reused once this function
+ * returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_pack(struct packet_type *pt)
+{
+ __dev_remove_pack(pt);
+
+ synchronize_net();
+}
+
+/******************************************************************************
+
+ Device Boot-time Settings Routines
+
+*******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ * netdev_boot_setup_add - add new setup entry
+ * @name: name of the device
+ * @map: configured settings for the device
+ *
+ * Adds new setup entry to the dev_boot_setup list. The function
+ * returns 0 on error and 1 on success. This is a generic routine to
+ * all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+ struct netdev_boot_setup *s;
+ int i;
+
+ s = dev_boot_setup;
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+ memset(s[i].name, 0, sizeof(s[i].name));
+ strcpy(s[i].name, name);
+ memcpy(&s[i].map, map, sizeof(s[i].map));
+ break;
+ }
+ }
+
+ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+ struct netdev_boot_setup *s = dev_boot_setup;
+ int i;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+ !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
+ dev->irq = s[i].map.irq;
+ dev->base_addr = s[i].map.base_addr;
+ dev->mem_start = s[i].map.mem_start;
+ dev->mem_end = s[i].map.mem_end;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * netdev_boot_base - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+ const struct netdev_boot_setup *s = dev_boot_setup;
+ char name[IFNAMSIZ];
+ int i;
+
+ sprintf(name, "%s%d", prefix, unit);
+
+ /*
+ * If device already registered then return base of 1
+ * to indicate not to probe for this interface
+ */
+ if (__dev_get_by_name(name))
+ return 1;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+ if (!strcmp(name, s[i].name))
+ return s[i].map.base_addr;
+ return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+int __init netdev_boot_setup(char *str)
+{
+ int ints[5];
+ struct ifmap map;
+
+ str = get_options(str, ARRAY_SIZE(ints), ints);
+ if (!str || !*str)
+ return 0;
+
+ /* Save settings */
+ memset(&map, 0, sizeof(map));
+ if (ints[0] > 0)
+ map.irq = ints[1];
+ if (ints[0] > 1)
+ map.base_addr = ints[2];
+ if (ints[0] > 2)
+ map.mem_start = ints[3];
+ if (ints[0] > 3)
+ map.mem_end = ints[4];
+
+ /* Add new entry to the list */
+ return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+/*******************************************************************************
+
+ Device Interface Subroutines
+
+*******************************************************************************/
+
+/**
+ * __dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. Must be called under RTNL semaphore
+ * or @dev_base_lock. If the name is found a pointer to the device
+ * is returned. If the name is not found then %NULL is returned. The
+ * reference counters are not incremented so the caller must be
+ * careful with locks.
+ */
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+ struct hlist_node *p;
+
+ hlist_for_each(p, dev_name_hash(name)) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(dev->name, name, IFNAMSIZ))
+ return dev;
+ }
+ return NULL;
+}
+
+/**
+ * dev_get_by_name - find a device by its name
+ * @name: name to find
+ *
+ * Find an interface by name. This can be called from any
+ * context and does its own locking. The returned handle has
+ * the usage count incremented and the caller must use dev_put() to
+ * release it when it is no longer needed. %NULL is returned if no
+ * matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+ return dev;
+}
+
+/**
+ * __dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not
+ * had its reference counter increased so the caller must be careful
+ * about locking. The caller must hold either the RTNL semaphore
+ * or @dev_base_lock.
+ */
+
+struct net_device *__dev_get_by_index(int ifindex)
+{
+ struct hlist_node *p;
+
+ hlist_for_each(p, dev_index_hash(ifindex)) {
+ struct net_device *dev
+ = hlist_entry(p, struct net_device, index_hlist);
+ if (dev->ifindex == ifindex)
+ return dev;
+ }
+ return NULL;
+}
+
+
+/**
+ * dev_get_by_index - find a device by its ifindex
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. Returns NULL if the device
+ * is not found or a pointer to the device. The device returned has
+ * had a reference added and the pointer is safe until the user calls
+ * dev_put to indicate they have finished with it.
+ */
+
+struct net_device *dev_get_by_index(int ifindex)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifindex);
+ if (dev)
+ dev_hold(dev);
+ read_unlock(&dev_base_lock);
+ return dev;
+}
+
+/**
+ * dev_getbyhwaddr - find a device by its hardware address
+ * @type: media type of device
+ * @ha: hardware address
+ *
+ * Search for an interface by MAC address. Returns NULL if the device
+ * is not found or a pointer to the device. The caller must hold the
+ * rtnl semaphore. The returned device has not had its ref count increased
+ * and the caller must therefore be careful about locking
+ *
+ * BUGS:
+ * If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+
+ for (dev = dev_base; dev; dev = dev->next)
+ if (dev->type == type &&
+ !memcmp(dev->dev_addr, ha, dev->addr_len))
+ break;
+ return dev;
+}
+
+struct net_device *dev_getfirstbyhwtype(unsigned short type)
+{
+ struct net_device *dev;
+
+ rtnl_lock();
+ for (dev = dev_base; dev; dev = dev->next) {
+ if (dev->type == type) {
+ dev_hold(dev);
+ break;
+ }
+ }
+ rtnl_unlock();
+ return dev;
+}
+
+EXPORT_SYMBOL(dev_getfirstbyhwtype);
+
+/**
+ * dev_get_by_flags - find any device with given flags
+ * @if_flags: IFF_* values
+ * @mask: bitmask of bits in if_flags to check
+ *
+ * Search for any interface with the given flags. Returns NULL if a device
+ * is not found or a pointer to the device. The device returned has
+ * had a reference added and the pointer is safe until the user calls
+ * dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ if (((dev->flags ^ if_flags) & mask) == 0) {
+ dev_hold(dev);
+ break;
+ }
+ }
+ read_unlock(&dev_base_lock);
+ return dev;
+}
+
+/**
+ * dev_valid_name - check if name is okay for network device
+ * @name: name string
+ *
+ * Network device names need to be valid file names to
+ * to allow sysfs to work
+ */
+static int dev_valid_name(const char *name)
+{
+ return !(*name == '\0'
+ || !strcmp(name, ".")
+ || !strcmp(name, "..")
+ || strchr(name, '/'));
+}
+
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. Not efficient for many devices, not called a lot. The caller
+ * must hold the dev_base or rtnl lock while allocating the name and
+ * adding the device in order to avoid duplicates. Returns the number
+ * of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+ int i = 0;
+ char buf[IFNAMSIZ];
+ const char *p;
+ const int max_netdevices = 8*PAGE_SIZE;
+ long *inuse;
+ struct net_device *d;
+
+ p = strnchr(name, IFNAMSIZ-1, '%');
+ if (p) {
+ /*
+ * Verify the string as this thing may have come from
+ * the user. There must be either one "%d" and no other "%"
+ * characters.
+ */
+ if (p[1] != 'd' || strchr(p + 2, '%'))
+ return -EINVAL;
+
+ /* Use one page as a bit array of possible slots */
+ inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+ if (!inuse)
+ return -ENOMEM;
+
+ for (d = dev_base; d; d = d->next) {
+ if (!sscanf(d->name, name, &i))
+ continue;
+ if (i < 0 || i >= max_netdevices)
+ continue;
+
+ /* avoid cases where sscanf is not exact inverse of printf */
+ snprintf(buf, sizeof(buf), name, i);
+ if (!strncmp(buf, d->name, IFNAMSIZ))
+ set_bit(i, inuse);
+ }
+
+ i = find_first_zero_bit(inuse, max_netdevices);
+ free_page((unsigned long) inuse);
+ }
+
+ snprintf(buf, sizeof(buf), name, i);
+ if (!__dev_get_by_name(buf)) {
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return i;
+ }
+
+ /* It is possible to run out of possible slots
+ * when the name is long and there isn't enough space left
+ * for the digits, or if all bits are used.
+ */
+ return -ENFILE;
+}
+
+
+/**
+ * dev_change_name - change name of a device
+ * @dev: device
+ * @newname: name (or format string) must be at least IFNAMSIZ
+ *
+ * Change name of a device, can pass format strings "eth%d".
+ * for wildcarding.
+ */
+int dev_change_name(struct net_device *dev, char *newname)
+{
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ if (dev->flags & IFF_UP)
+ return -EBUSY;
+
+ if (!dev_valid_name(newname))
+ return -EINVAL;
+
+ if (strchr(newname, '%')) {
+ err = dev_alloc_name(dev, newname);
+ if (err < 0)
+ return err;
+ strcpy(newname, dev->name);
+ }
+ else if (__dev_get_by_name(newname))
+ return -EEXIST;
+ else
+ strlcpy(dev->name, newname, IFNAMSIZ);
+
+ err = class_device_rename(&dev->class_dev, dev->name);
+ if (!err) {
+ hlist_del(&dev->name_hlist);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ }
+
+ return err;
+}
+
+/**
+ * netdev_state_change - device changes state
+ * @dev: device to cause notification
+ *
+ * Called to indicate a device has changed state. This function calls
+ * the notifier chains for netdev_chain and sends a NEWLINK message
+ * to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+ if (dev->flags & IFF_UP) {
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+ }
+}
+
+/**
+ * dev_load - load a network module
+ * @name: name of interface
+ *
+ * If a network interface is not present and the process has suitable
+ * privileges this function loads the module. If module loading is not
+ * available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+ struct net_device *dev;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_name(name);
+ read_unlock(&dev_base_lock);
+
+ if (!dev && capable(CAP_SYS_MODULE))
+ request_module("%s", name);
+}
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+ printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
+ skb->dev ? skb->dev->name : "NULL!!!");
+ kfree_skb(skb);
+ return 1;
+}
+
+
+/**
+ * dev_open - prepare an interface for use.
+ * @dev: device to open
+ *
+ * Takes a device from down to up state. The device's private open
+ * function is invoked and then the multicast lists are loaded. Finally
+ * the device is moved into the up state and a %NETDEV_UP message is
+ * sent to the netdev notifier chain.
+ *
+ * Calling this function on an active interface is a nop. On a failure
+ * a negative errno code is returned.
+ */
+int dev_open(struct net_device *dev)
+{
+ int ret = 0;
+
+ /*
+ * Is it already up?
+ */
+
+ if (dev->flags & IFF_UP)
+ return 0;
+
+ /*
+ * Is it even present?
+ */
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ /*
+ * Call device private open method
+ */
+ set_bit(__LINK_STATE_START, &dev->state);
+ if (dev->open) {
+ ret = dev->open(dev);
+ if (ret)
+ clear_bit(__LINK_STATE_START, &dev->state);
+ }
+
+ /*
+ * If it went open OK then:
+ */
+
+ if (!ret) {
+ /*
+ * Set the flags.
+ */
+ dev->flags |= IFF_UP;
+
+ /*
+ * Initialize multicasting status
+ */
+ dev_mc_upload(dev);
+
+ /*
+ * Wakeup transmit queue engine
+ */
+ dev_activate(dev);
+
+ /*
+ * ... and announce new interface.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ }
+ return ret;
+}
+
+/**
+ * dev_close - shutdown an interface.
+ * @dev: device to shutdown
+ *
+ * This function moves an active device into down state. A
+ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ * chain.
+ */
+int dev_close(struct net_device *dev)
+{
+ if (!(dev->flags & IFF_UP))
+ return 0;
+
+ /*
+ * Tell people we are going down, so that they can
+ * prepare to death, when device is still operating.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+ dev_deactivate(dev);
+
+ clear_bit(__LINK_STATE_START, &dev->state);
+
+ /* Synchronize to scheduled poll. We cannot touch poll list,
+ * it can be even on different cpu. So just clear netif_running(),
+ * and wait when poll really will happen. Actually, the best place
+ * for this is inside dev->stop() after device stopped its irq
+ * engine, but this requires more changes in devices. */
+
+ smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
+ /* No hurry. */
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ /*
+ * Call the device specific close. This cannot fail.
+ * Only if device is UP
+ *
+ * We allow it to be called even after a DETACH hot-plug
+ * event.
+ */
+ if (dev->stop)
+ dev->stop(dev);
+
+ /*
+ * Device is now down.
+ */
+
+ dev->flags &= ~IFF_UP;
+
+ /*
+ * Tell people we are down
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+ return 0;
+}
+
+
+/*
+ * Device change register/unregister. These are not inline or static
+ * as we export them to the world.
+ */
+
+/**
+ * register_netdevice_notifier - register a network notifier block
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ *
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
+ */
+
+int register_netdevice_notifier(struct notifier_block *nb)
+{
+ struct net_device *dev;
+ int err;
+
+ rtnl_lock();
+ err = notifier_chain_register(&netdev_chain, nb);
+ if (!err) {
+ for (dev = dev_base; dev; dev = dev->next) {
+ nb->notifier_call(nb, NETDEV_REGISTER, dev);
+
+ if (dev->flags & IFF_UP)
+ nb->notifier_call(nb, NETDEV_UP, dev);
+ }
+ }
+ rtnl_unlock();
+ return err;
+}
+
+/**
+ * unregister_netdevice_notifier - unregister a network notifier block
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ */
+
+int unregister_netdevice_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&netdev_chain, nb);
+}
+
+/**
+ * call_netdevice_notifiers - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @v: pointer passed unmodified to notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for notifier_call_chain().
+ */
+
+int call_netdevice_notifiers(unsigned long val, void *v)
+{
+ return notifier_call_chain(&netdev_chain, val, v);
+}
+
+/* When > 0 there are consumers of rx skb time stamps */
+static atomic_t netstamp_needed = ATOMIC_INIT(0);
+
+void net_enable_timestamp(void)
+{
+ atomic_inc(&netstamp_needed);
+}
+
+void net_disable_timestamp(void)
+{
+ atomic_dec(&netstamp_needed);
+}
+
+static inline void net_timestamp(struct timeval *stamp)
+{
+ if (atomic_read(&netstamp_needed))
+ do_gettimeofday(stamp);
+ else {
+ stamp->tv_sec = 0;
+ stamp->tv_usec = 0;
+ }
+}
+
+/*
+ * Support routine. Sends outgoing frames to any network
+ * taps currently in use.
+ */
+
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct packet_type *ptype;
+ net_timestamp(&skb->stamp);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ /* Never send packets back to the socket
+ * they originated from - MvS (miquels@drinkel.ow.org)
+ */
+ if ((ptype->dev == dev || !ptype->dev) &&
+ (ptype->af_packet_priv == NULL ||
+ (struct sock *)ptype->af_packet_priv != skb->sk)) {
+ struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ break;
+
+ /* skb->nh should be correctly
+ set by sender, so that the second statement is
+ just protection against buggy protocols.
+ */
+ skb2->mac.raw = skb2->data;
+
+ if (skb2->nh.raw < skb2->data ||
+ skb2->nh.raw > skb2->tail) {
+ if (net_ratelimit())
+ printk(KERN_CRIT "protocol %04x is "
+ "buggy, dev %s\n",
+ skb2->protocol, dev->name);
+ skb2->nh.raw = skb2->data;
+ }
+
+ skb2->h.raw = skb2->nh.raw;
+ skb2->pkt_type = PACKET_OUTGOING;
+ ptype->func(skb2, skb->dev, ptype);
+ }
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * Invalidate hardware checksum when packet is to be mangled, and
+ * complete checksum manually on outgoing path.
+ */
+int skb_checksum_help(struct sk_buff *skb, int inward)
+{
+ unsigned int csum;
+ int ret = 0, offset = skb->h.raw - skb->data;
+
+ if (inward) {
+ skb->ip_summed = CHECKSUM_NONE;
+ goto out;
+ }
+
+ if (skb_cloned(skb)) {
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (ret)
+ goto out;
+ }
+
+ if (offset > (int)skb->len)
+ BUG();
+ csum = skb_checksum(skb, offset, skb->len-offset, 0);
+
+ offset = skb->tail - skb->h.raw;
+ if (offset <= 0)
+ BUG();
+ if (skb->csum + 2 > offset)
+ BUG();
+
+ *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+out:
+ return ret;
+}
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+ int i;
+
+ if (dev->features & NETIF_F_HIGHDMA)
+ return 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ return 1;
+
+ return 0;
+}
+#else
+#define illegal_highdma(dev, skb) (0)
+#endif
+
+extern void skb_release_data(struct sk_buff *);
+
+/* Keep head the same: replace data */
+int __skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+ unsigned int size;
+ u8 *data;
+ long offset;
+ struct skb_shared_info *ninfo;
+ int headerlen = skb->data - skb->head;
+ int expand = (skb->tail + skb->data_len) - skb->end;
+
+ if (skb_shared(skb))
+ BUG();
+
+ if (expand <= 0)
+ expand = 0;
+
+ size = skb->end - skb->head + expand;
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ return -ENOMEM;
+
+ /* Copy entire thing */
+ if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
+ BUG();
+
+ /* Set up shinfo */
+ ninfo = (struct skb_shared_info*)(data + size);
+ atomic_set(&ninfo->dataref, 1);
+ ninfo->tso_size = skb_shinfo(skb)->tso_size;
+ ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+ ninfo->nr_frags = 0;
+ ninfo->frag_list = NULL;
+
+ /* Offset between the two in bytes */
+ offset = data - skb->head;
+
+ /* Free old data. */
+ skb_release_data(skb);
+
+ skb->head = data;
+ skb->end = data + size;
+
+ /* Set up new pointers */
+ skb->h.raw += offset;
+ skb->nh.raw += offset;
+ skb->mac.raw += offset;
+ skb->tail += offset;
+ skb->data += offset;
+
+ /* We are no longer a clone, even if we were. */
+ skb->cloned = 0;
+
+ skb->tail += skb->data_len;
+ skb->data_len = 0;
+ return 0;
+}
+
+#define HARD_TX_LOCK(dev, cpu) { \
+ if ((dev->features & NETIF_F_LLTX) == 0) { \
+ spin_lock(&dev->xmit_lock); \
+ dev->xmit_lock_owner = cpu; \
+ } \
+}
+
+#define HARD_TX_UNLOCK(dev) { \
+ if ((dev->features & NETIF_F_LLTX) == 0) { \
+ dev->xmit_lock_owner = -1; \
+ spin_unlock(&dev->xmit_lock); \
+ } \
+}
+
+/**
+ * dev_queue_xmit - transmit a buffer
+ * @skb: buffer to transmit
+ *
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling
+ * this function. The function can be called from an interrupt.
+ *
+ * A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
+ */
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct Qdisc *q;
+ int rc = -ENOMEM;
+
+ if (skb_shinfo(skb)->frag_list &&
+ !(dev->features & NETIF_F_FRAGLIST) &&
+ __skb_linearize(skb, GFP_ATOMIC))
+ goto out_kfree_skb;
+
+ /* Fragmented skb is linearized if device does not support SG,
+ * or if at least one of fragments is in highmem and device
+ * does not support DMA from it.
+ */
+ if (skb_shinfo(skb)->nr_frags &&
+ (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
+ __skb_linearize(skb, GFP_ATOMIC))
+ goto out_kfree_skb;
+
+ /* If a checksum-deferred packet is forwarded to a device that needs a
+ * checksum, correct the pointers and force checksumming.
+ */
+ if (skb->proto_csum_blank) {
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out_kfree_skb;
+ skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+ if (skb->h.raw >= skb->tail)
+ goto out_kfree_skb;
+ switch (skb->nh.iph->protocol) {
+ case IPPROTO_TCP:
+ skb->csum = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ skb->csum = offsetof(struct udphdr, check);
+ break;
+ default:
+ goto out_kfree_skb;
+ }
+ if ((skb->h.raw + skb->csum + 2) > skb->tail)
+ goto out_kfree_skb;
+ skb->ip_summed = CHECKSUM_HW;
+ }
+
+ /* If packet is not checksummed and device does not support
+ * checksumming for this protocol, complete checksumming here.
+ */
+ if (skb->ip_summed == CHECKSUM_HW &&
+ (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+ (!(dev->features & NETIF_F_IP_CSUM) ||
+ skb->protocol != htons(ETH_P_IP))))
+ if (skb_checksum_help(skb, 0))
+ goto out_kfree_skb;
+
+ /* Disable soft irqs for various locks below. Also
+ * stops preemption for RCU.
+ */
+ local_bh_disable();
+
+ /* Updates of qdisc are serialized by queue_lock.
+ * The struct Qdisc which is pointed to by qdisc is now a
+ * rcu structure - it may be accessed without acquiring
+ * a lock (but the structure may be stale.) The freeing of the
+ * qdisc will be deferred until it's known that there are no
+ * more references to it.
+ *
+ * If the qdisc has an enqueue function, we still need to
+ * hold the queue_lock before calling it, since queue_lock
+ * also serializes access to the device queue.
+ */
+
+ q = rcu_dereference(dev->qdisc);
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
+#endif
+ if (q->enqueue) {
+ /* Grab device queue */
+ spin_lock(&dev->queue_lock);
+
+ rc = q->enqueue(skb, q);
+
+ qdisc_run(dev);
+
+ spin_unlock(&dev->queue_lock);
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
+ }
+
+ /* The device has no queue. Common case for software devices:
+ loopback, all the sorts of tunnels...
+
+ Really, it is unlikely that xmit_lock protection is necessary here.
+ (f.e. loopback and IP tunnels are clean ignoring statistics
+ counters.)
+ However, it is possible, that they rely on protection
+ made by us here.
+
+ Check this and shot the lock. It is not prone from deadlocks.
+ Either shot noqueue qdisc, it is even simpler 8)
+ */
+ if (dev->flags & IFF_UP) {
+ int cpu = smp_processor_id(); /* ok because BHs are off */
+
+ if (dev->xmit_lock_owner != cpu) {
+
+ HARD_TX_LOCK(dev, cpu);
+
+ if (!netif_queue_stopped(dev)) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb, dev);
+
+ rc = 0;
+ if (!dev->hard_start_xmit(skb, dev)) {
+ HARD_TX_UNLOCK(dev);
+ goto out;
+ }
+ }
+ HARD_TX_UNLOCK(dev);
+ if (net_ratelimit())
+ printk(KERN_CRIT "Virtual device %s asks to "
+ "queue packet!\n", dev->name);
+ } else {
+ /* Recursion is detected! It is possible,
+ * unfortunately */
+ if (net_ratelimit())
+ printk(KERN_CRIT "Dead loop on virtual device "
+ "%s, fix it urgently!\n", dev->name);
+ }
+ }
+
+ rc = -ENETDOWN;
+ local_bh_enable();
+
+out_kfree_skb:
+ kfree_skb(skb);
+ return rc;
+out:
+ local_bh_enable();
+ return rc;
+}
+
+
+/*=======================================================================
+ Receiver routines
+ =======================================================================*/
+
+int netdev_max_backlog = 300;
+int weight_p = 64; /* old backlog weight */
+/* These numbers are selected based on intuition and some
+ * experimentatiom, if you have more scientific way of doing this
+ * please go ahead and fix things.
+ */
+int no_cong_thresh = 10;
+int no_cong = 20;
+int lo_cong = 100;
+int mod_cong = 290;
+
+DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+
+
+static void get_sample_stats(int cpu)
+{
+#ifdef RAND_LIE
+ unsigned long rd;
+ int rq;
+#endif
+ struct softnet_data *sd = &per_cpu(softnet_data, cpu);
+ int blog = sd->input_pkt_queue.qlen;
+ int avg_blog = sd->avg_blog;
+
+ avg_blog = (avg_blog >> 1) + (blog >> 1);
+
+ if (avg_blog > mod_cong) {
+ /* Above moderate congestion levels. */
+ sd->cng_level = NET_RX_CN_HIGH;
+#ifdef RAND_LIE
+ rd = net_random();
+ rq = rd % netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ sd->cng_level = NET_RX_DROP;
+#endif
+ } else if (avg_blog > lo_cong) {
+ sd->cng_level = NET_RX_CN_MOD;
+#ifdef RAND_LIE
+ rd = net_random();
+ rq = rd % netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ sd->cng_level = NET_RX_CN_HIGH;
+#endif
+ } else if (avg_blog > no_cong)
+ sd->cng_level = NET_RX_CN_LOW;
+ else /* no congestion */
+ sd->cng_level = NET_RX_SUCCESS;
+
+ sd->avg_blog = avg_blog;
+}
+
+#ifdef OFFLINE_SAMPLE
+static void sample_queue(unsigned long dummy)
+{
+/* 10 ms 0r 1ms -- i don't care -- JHS */
+ int next_tick = 1;
+ int cpu = smp_processor_id();
+
+ get_sample_stats(cpu);
+ next_tick += jiffies;
+ mod_timer(&samp_timer, next_tick);
+}
+#endif
+
+
+/**
+ * netif_rx - post buffer to the network code
+ * @skb: buffer to post
+ *
+ * This function receives a packet from a device driver and queues it for
+ * the upper (protocol) levels to process. It always succeeds. The buffer
+ * may be dropped during processing for congestion control or by the
+ * protocol layers.
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_CN_LOW (low congestion)
+ * NET_RX_CN_MOD (moderate congestion)
+ * NET_RX_CN_HIGH (high congestion)
+ * NET_RX_DROP (packet was dropped)
+ *
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+ int this_cpu;
+ struct softnet_data *queue;
+ unsigned long flags;
+
+#ifdef CONFIG_NETPOLL
+ if (skb->dev->netpoll_rx && netpoll_rx(skb)) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+#endif
+
+ if (!skb->stamp.tv_sec)
+ net_timestamp(&skb->stamp);
+
+ /*
+ * The code is rearranged so that the path is the most
+ * short when CPU is congested, but is still operating.
+ */
+ local_irq_save(flags);
+ this_cpu = smp_processor_id();
+ queue = &__get_cpu_var(softnet_data);
+
+ __get_cpu_var(netdev_rx_stat).total++;
+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+ if (queue->input_pkt_queue.qlen) {
+ if (queue->throttle)
+ goto drop;
+
+enqueue:
+ dev_hold(skb->dev);
+ __skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifndef OFFLINE_SAMPLE
+ get_sample_stats(this_cpu);
+#endif
+ local_irq_restore(flags);
+ return queue->cng_level;
+ }
+
+ if (queue->throttle)
+ queue->throttle = 0;
+
+ netif_rx_schedule(&queue->backlog_dev);
+ goto enqueue;
+ }
+
+ if (!queue->throttle) {
+ queue->throttle = 1;
+ __get_cpu_var(netdev_rx_stat).throttled++;
+ }
+
+drop:
+ __get_cpu_var(netdev_rx_stat).dropped++;
+ local_irq_restore(flags);
+
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+int netif_rx_ni(struct sk_buff *skb)
+{
+ int err;
+
+ preempt_disable();
+ err = netif_rx(skb);
+ if (local_softirq_pending())
+ do_softirq();
+ preempt_enable();
+
+ return err;
+}
+
+EXPORT_SYMBOL(netif_rx_ni);
+
+static __inline__ void skb_bond(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+
+ if (dev->master) {
+ skb->real_dev = skb->dev;
+ skb->dev = dev->master;
+ }
+}
+
+static void net_tx_action(struct softirq_action *h)
+{
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+
+ if (sd->completion_queue) {
+ struct sk_buff *clist;
+
+ local_irq_disable();
+ clist = sd->completion_queue;
+ sd->completion_queue = NULL;
+ local_irq_enable();
+
+ while (clist) {
+ struct sk_buff *skb = clist;
+ clist = clist->next;
+
+ BUG_TRAP(!atomic_read(&skb->users));
+ __kfree_skb(skb);
+ }
+ }
+
+ if (sd->output_queue) {
+ struct net_device *head;
+
+ local_irq_disable();
+ head = sd->output_queue;
+ sd->output_queue = NULL;
+ local_irq_enable();
+
+ while (head) {
+ struct net_device *dev = head;
+ head = head->next_sched;
+
+ smp_mb__before_clear_bit();
+ clear_bit(__LINK_STATE_SCHED, &dev->state);
+
+ if (spin_trylock(&dev->queue_lock)) {
+ qdisc_run(dev);
+ spin_unlock(&dev->queue_lock);
+ } else {
+ netif_schedule(dev);
+ }
+ }
+ }
+}
+
+static __inline__ int deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev)
+{
+ atomic_inc(&skb->users);
+ return pt_prev->func(skb, skb->dev, pt_prev);
+}
+
+#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
+int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+
+static __inline__ int handle_bridge(struct sk_buff **pskb,
+ struct packet_type **pt_prev, int *ret)
+{
+ struct net_bridge_port *port;
+
+ if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
+ (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
+ return 0;
+
+ if (*pt_prev) {
+ *ret = deliver_skb(*pskb, *pt_prev);
+ *pt_prev = NULL;
+ }
+
+ return br_handle_frame_hook(port, pskb);
+}
+#else
+#define handle_bridge(skb, pt_prev, ret) (0)
+#endif
+
+#ifdef CONFIG_NET_CLS_ACT
+/* TODO: Maybe we should just force sch_ingress to be compiled in
+ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
+ * a compare and 2 stores extra right now if we dont have it on
+ * but have CONFIG_NET_CLS_ACT
+ * NOTE: This doesnt stop any functionality; if you dont have
+ * the ingress scheduler, you just cant add policies on ingress.
+ *
+ */
+static int ing_filter(struct sk_buff *skb)
+{
+ struct Qdisc *q;
+ struct net_device *dev = skb->dev;
+ int result = TC_ACT_OK;
+
+ if (dev->qdisc_ingress) {
+ __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
+ if (MAX_RED_LOOP < ttl++) {
+ printk("Redir loop detected Dropping packet (%s->%s)\n",
+ skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+ return TC_ACT_SHOT;
+ }
+
+ skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+
+ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+ if (NULL == skb->input_dev) {
+ skb->input_dev = skb->dev;
+ printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
+ }
+ spin_lock(&dev->ingress_lock);
+ if ((q = dev->qdisc_ingress) != NULL)
+ result = q->enqueue(skb, q);
+ spin_unlock(&dev->ingress_lock);
+
+ }
+
+ return result;
+}
+#endif
+
+int netif_receive_skb(struct sk_buff *skb)
+{
+ struct packet_type *ptype, *pt_prev;
+ int ret = NET_RX_DROP;
+ unsigned short type;
+
+#ifdef CONFIG_NETPOLL
+ if (skb->dev->netpoll_rx && skb->dev->poll && netpoll_rx(skb)) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+#endif
+
+ if (!skb->stamp.tv_sec)
+ net_timestamp(&skb->stamp);
+
+ skb_bond(skb);
+
+ __get_cpu_var(netdev_rx_stat).total++;
+
+ skb->h.raw = skb->nh.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->mac.raw;
+
+ pt_prev = NULL;
+
+ rcu_read_lock();
+
+#ifdef CONFIG_NET_CLS_ACT
+ if (skb->tc_verd & TC_NCLS) {
+ skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+ goto ncls;
+ }
+#endif
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_UNNECESSARY:
+ skb->proto_csum_valid = 1;
+ break;
+ case CHECKSUM_HW:
+ /* XXX Implement me. */
+ default:
+ skb->proto_csum_valid = 0;
+ break;
+ }
+
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if (!ptype->dev || ptype->dev == skb->dev) {
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev);
+ pt_prev = ptype;
+ }
+ }
+
+#ifdef CONFIG_NET_CLS_ACT
+ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev);
+ pt_prev = NULL; /* noone else should process this after*/
+ } else {
+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+ }
+
+ ret = ing_filter(skb);
+
+ if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
+ kfree_skb(skb);
+ goto out;
+ }
+
+ skb->tc_verd = 0;
+ncls:
+#endif
+
+ handle_diverter(skb);
+
+ if (handle_bridge(&skb, &pt_prev, &ret))
+ goto out;
+
+ type = skb->protocol;
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
+ if (ptype->type == type &&
+ (!ptype->dev || ptype->dev == skb->dev)) {
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev);
+ pt_prev = ptype;
+ }
+ }
+
+ if (pt_prev) {
+ ret = pt_prev->func(skb, skb->dev, pt_prev);
+ } else {
+ kfree_skb(skb);
+ /* Jamal, now you will not able to escape explaining
+ * me how you were going to use this. :-)
+ */
+ ret = NET_RX_DROP;
+ }
+
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+static int process_backlog(struct net_device *backlog_dev, int *budget)
+{
+ int work = 0;
+ int quota = min(backlog_dev->quota, *budget);
+ struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ unsigned long start_time = jiffies;
+
+ for (;;) {
+ struct sk_buff *skb;
+ struct net_device *dev;
+
+ local_irq_disable();
+ skb = __skb_dequeue(&queue->input_pkt_queue);
+ if (!skb)
+ goto job_done;
+ local_irq_enable();
+
+ dev = skb->dev;
+
+ netif_receive_skb(skb);
+
+ dev_put(dev);
+
+ work++;
+
+ if (work >= quota || jiffies - start_time > 1)
+ break;
+
+ }
+
+ backlog_dev->quota -= work;
+ *budget -= work;
+ return -1;
+
+job_done:
+ backlog_dev->quota -= work;
+ *budget -= work;
+
+ list_del(&backlog_dev->poll_list);
+ smp_mb__before_clear_bit();
+ netif_poll_enable(backlog_dev);
+
+ if (queue->throttle)
+ queue->throttle = 0;
+ local_irq_enable();
+ return 0;
+}
+
+static void net_rx_action(struct softirq_action *h)
+{
+ struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ unsigned long start_time = jiffies;
+ int budget = netdev_max_backlog;
+
+
+ local_irq_disable();
+
+ while (!list_empty(&queue->poll_list)) {
+ struct net_device *dev;
+
+ if (budget <= 0 || jiffies - start_time > 1)
+ goto softnet_break;
+
+ local_irq_enable();
+
+ dev = list_entry(queue->poll_list.next,
+ struct net_device, poll_list);
+
+ if (dev->quota <= 0 || dev->poll(dev, &budget)) {
+ local_irq_disable();
+ list_del(&dev->poll_list);
+ list_add_tail(&dev->poll_list, &queue->poll_list);
+ if (dev->quota < 0)
+ dev->quota += dev->weight;
+ else
+ dev->quota = dev->weight;
+ } else {
+ dev_put(dev);
+ local_irq_disable();
+ }
+ }
+out:
+ local_irq_enable();
+ return;
+
+softnet_break:
+ __get_cpu_var(netdev_rx_stat).time_squeeze++;
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ goto out;
+}
+
+static gifconf_func_t * gifconf_list [NPROTO];
+
+/**
+ * register_gifconf - register a SIOCGIF handler
+ * @family: Address family
+ * @gifconf: Function handler
+ *
+ * Register protocol dependent address dumping routines. The handler
+ * that is passed must not be freed or reused until it has been replaced
+ * by another handler.
+ */
+int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
+{
+ if (family >= NPROTO)
+ return -EINVAL;
+ gifconf_list[family] = gifconf;
+ return 0;
+}
+
+
+/*
+ * Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ * We need this ioctl for efficient implementation of the
+ * if_indextoname() function required by the IPv6 API. Without
+ * it, we would have to search all the interfaces to find a
+ * match. --pb
+ */
+
+static int dev_ifname(struct ifreq __user *arg)
+{
+ struct net_device *dev;
+ struct ifreq ifr;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+ read_lock(&dev_base_lock);
+ dev = __dev_get_by_index(ifr.ifr_ifindex);
+ if (!dev) {
+ read_unlock(&dev_base_lock);
+ return -ENODEV;
+ }
+
+ strcpy(ifr.ifr_name, dev->name);
+ read_unlock(&dev_base_lock);
+
+ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ return 0;
+}
+
+/*
+ * Perform a SIOCGIFCONF call. This structure will change
+ * size eventually, and there is nothing I can do about it.
+ * Thus we will need a 'compatibility mode'.
+ */
+
+static int dev_ifconf(char __user *arg)
+{
+ struct ifconf ifc;
+ struct net_device *dev;
+ char __user *pos;
+ int len;
+ int total;
+ int i;
+
+ /*
+ * Fetch the caller's info block.
+ */
+
+ if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
+ return -EFAULT;
+
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+
+ /*
+ * Loop over the interfaces, and write an info block for each.
+ */
+
+ total = 0;
+ for (dev = dev_base; dev; dev = dev->next) {
+ for (i = 0; i < NPROTO; i++) {
+ if (gifconf_list[i]) {
+ int done;
+ if (!pos)
+ done = gifconf_list[i](dev, NULL, 0);
+ else
+ done = gifconf_list[i](dev, pos + total,
+ len - total);
+ if (done < 0)
+ return -EFAULT;
+ total += done;
+ }
+ }
+ }
+
+ /*
+ * All done. Write the updated control block back to the caller.
+ */
+ ifc.ifc_len = total;
+
+ /*
+ * Both BSD and Solaris return 0 here, so we do too.
+ */
+ return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ * This is invoked by the /proc filesystem handler to display a device
+ * in detail.
+ */
+static __inline__ struct net_device *dev_get_idx(loff_t pos)
+{
+ struct net_device *dev;
+ loff_t i;
+
+ for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+
+ return i == pos ? dev : NULL;
+}
+
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock(&dev_base_lock);
+ return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+}
+
+void dev_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock(&dev_base_lock);
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+ if (dev->get_stats) {
+ struct net_device_stats *stats = dev->get_stats(dev);
+
+ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+ dev->name, stats->rx_bytes, stats->rx_packets,
+ stats->rx_errors,
+ stats->rx_dropped + stats->rx_missed_errors,
+ stats->rx_fifo_errors,
+ stats->rx_length_errors + stats->rx_over_errors +
+ stats->rx_crc_errors + stats->rx_frame_errors,
+ stats->rx_compressed, stats->multicast,
+ stats->tx_bytes, stats->tx_packets,
+ stats->tx_errors, stats->tx_dropped,
+ stats->tx_fifo_errors, stats->collisions,
+ stats->tx_carrier_errors +
+ stats->tx_aborted_errors +
+ stats->tx_window_errors +
+ stats->tx_heartbeat_errors,
+ stats->tx_compressed);
+ } else
+ seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+}
+
+/*
+ * Called from the PROCfs module. This now uses the new arbitrary sized
+ * /proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
+ else
+ dev_seq_printf_stats(seq, v);
+ return 0;
+}
+
+static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+{
+ struct netif_rx_stats *rc = NULL;
+
+ while (*pos < NR_CPUS)
+ if (cpu_online(*pos)) {
+ rc = &per_cpu(netdev_rx_stat, *pos);
+ break;
+ } else
+ ++*pos;
+ return rc;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+ struct netif_rx_stats *s = v;
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ s->total, s->dropped, s->time_squeeze, s->throttled,
+ s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
+ s->fastroute_deferred_out,
+#if 0
+ s->fastroute_latency_reduction
+#else
+ s->cpu_collision
+#endif
+ );
+ return 0;
+}
+
+static struct seq_operations dev_seq_ops = {
+ .start = dev_seq_start,
+ .next = dev_seq_next,
+ .stop = dev_seq_stop,
+ .show = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &dev_seq_ops);
+}
+
+static struct file_operations dev_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = dev_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct seq_operations softnet_seq_ops = {
+ .start = softnet_seq_start,
+ .next = softnet_seq_next,
+ .stop = softnet_seq_stop,
+ .show = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &softnet_seq_ops);
+}
+
+static struct file_operations softnet_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = softnet_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#ifdef WIRELESS_EXT
+extern int wireless_proc_init(void);
+#else
+#define wireless_proc_init() 0
+#endif
+
+static int __init dev_proc_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ goto out;
+ if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ goto out_dev;
+ if (wireless_proc_init())
+ goto out_softnet;
+ rc = 0;
+out:
+ return rc;
+out_softnet:
+ proc_net_remove("softnet_stat");
+out_dev:
+ proc_net_remove("dev");
+ goto out;
+}
+#else
+#define dev_proc_init() 0
+#endif /* CONFIG_PROC_FS */
+
+
+/**
+ * netdev_set_master - set up master/slave pair
+ * @slave: slave device
+ * @master: new master device
+ *
+ * Changes the master device of the slave. Pass %NULL to break the
+ * bonding. The caller must hold the RTNL semaphore. On a failure
+ * a negative errno code is returned. On success the reference counts
+ * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ * function returns zero.
+ */
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+ struct net_device *old = slave->master;
+
+ ASSERT_RTNL();
+
+ if (master) {
+ if (old)
+ return -EBUSY;
+ dev_hold(master);
+ }
+
+ slave->master = master;
+
+ synchronize_net();
+
+ if (old)
+ dev_put(old);
+
+ if (master)
+ slave->flags |= IFF_SLAVE;
+ else
+ slave->flags &= ~IFF_SLAVE;
+
+ rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+ return 0;
+}
+
+/**
+ * dev_set_promiscuity - update promiscuity count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove promsicuity from a device. While the count in the device
+ * remains above zero the interface remains promiscuous. Once it hits zero
+ * the device reverts back to normal filtering operation. A negative inc
+ * value is used to drop promiscuity on the device.
+ */
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_PROMISC;
+ if ((dev->promiscuity += inc) == 0)
+ dev->flags &= ~IFF_PROMISC;
+ if (dev->flags ^ old_flags) {
+ dev_mc_upload(dev);
+ printk(KERN_INFO "device %s %s promiscuous mode\n",
+ dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
+ "left");
+ }
+}
+
+/**
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative @inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+ unsigned short old_flags = dev->flags;
+
+ dev->flags |= IFF_ALLMULTI;
+ if ((dev->allmulti += inc) == 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ if (dev->flags ^ old_flags)
+ dev_mc_upload(dev);
+}
+
+unsigned dev_get_flags(const struct net_device *dev)
+{
+ unsigned flags;
+
+ flags = (dev->flags & ~(IFF_PROMISC |
+ IFF_ALLMULTI |
+ IFF_RUNNING)) |
+ (dev->gflags & (IFF_PROMISC |
+ IFF_ALLMULTI));
+
+ if (netif_running(dev) && netif_carrier_ok(dev))
+ flags |= IFF_RUNNING;
+
+ return flags;
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+ int ret;
+ int old_flags = dev->flags;
+
+ /*
+ * Set the flags on our device.
+ */
+
+ dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
+ IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
+ IFF_AUTOMEDIA)) |
+ (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
+ IFF_ALLMULTI));
+
+ /*
+ * Load in the correct multicast list now the flags have changed.
+ */
+
+ dev_mc_upload(dev);
+
+ /*
+ * Have we downed the interface. We handle IFF_UP ourselves
+ * according to user attempts to set it, rather than blindly
+ * setting it.
+ */
+
+ ret = 0;
+ if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
+ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+ if (!ret)
+ dev_mc_upload(dev);
+ }
+
+ if (dev->flags & IFF_UP &&
+ ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
+ IFF_VOLATILE)))
+ notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+ if ((flags ^ dev->gflags) & IFF_PROMISC) {
+ int inc = (flags & IFF_PROMISC) ? +1 : -1;
+ dev->gflags ^= IFF_PROMISC;
+ dev_set_promiscuity(dev, inc);
+ }
+
+ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+ is important. Some (broken) drivers set IFF_PROMISC, when
+ IFF_ALLMULTI is requested not asking us and not reporting.
+ */
+ if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
+ int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
+ dev->gflags ^= IFF_ALLMULTI;
+ dev_set_allmulti(dev, inc);
+ }
+
+ if (old_flags ^ dev->flags)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
+
+ return ret;
+}
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+ int err;
+
+ if (new_mtu == dev->mtu)
+ return 0;
+
+ /* MTU must be positive. */
+ if (new_mtu < 0)
+ return -EINVAL;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ err = 0;
+ if (dev->change_mtu)
+ err = dev->change_mtu(dev, new_mtu);
+ else
+ dev->mtu = new_mtu;
+ if (!err && dev->flags & IFF_UP)
+ notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGEMTU, dev);
+ return err;
+}
+
+
+/*
+ * Perform the SIOCxIFxxx calls.
+ */
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+
+ switch (cmd) {
+ case SIOCGIFFLAGS: /* Get interface flags */
+ ifr->ifr_flags = dev_get_flags(dev);
+ return 0;
+
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCGIFMETRIC: /* Get the metric on the interface
+ (currently unused) */
+ ifr->ifr_metric = 0;
+ return 0;
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface
+ (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCGIFMTU: /* Get the MTU of a device */
+ ifr->ifr_mtu = dev->mtu;
+ return 0;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCGIFHWADDR:
+ if (!dev->addr_len)
+ memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
+ else
+ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ ifr->ifr_hwaddr.sa_family = dev->type;
+ return 0;
+
+ case SIOCSIFHWADDR:
+ if (!dev->set_mac_address)
+ return -EOPNOTSUPP;
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
+ if (!err)
+ notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGEADDR, dev);
+ return err;
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ notifier_call_chain(&netdev_chain,
+ NETDEV_CHANGEADDR, dev);
+ return 0;
+
+ case SIOCGIFMAP:
+ ifr->ifr_map.mem_start = dev->mem_start;
+ ifr->ifr_map.mem_end = dev->mem_end;
+ ifr->ifr_map.base_addr = dev->base_addr;
+ ifr->ifr_map.irq = dev->irq;
+ ifr->ifr_map.dma = dev->dma;
+ ifr->ifr_map.port = dev->if_port;
+ return 0;
+
+ case SIOCSIFMAP:
+ if (dev->set_config) {
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev->set_config(dev, &ifr->ifr_map);
+ }
+ return -EOPNOTSUPP;
+
+ case SIOCADDMULTI:
+ if (!dev->set_multicast_list ||
+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
+ dev->addr_len, 1);
+
+ case SIOCDELMULTI:
+ if (!dev->set_multicast_list ||
+ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+ return -EINVAL;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
+ dev->addr_len, 1);
+
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ case SIOCSIFTXQLEN:
+ if (ifr->ifr_qlen < 0)
+ return -EINVAL;
+ dev->tx_queue_len = ifr->ifr_qlen;
+ return 0;
+
+ case SIOCSIFNAME:
+ ifr->ifr_newname[IFNAMSIZ-1] = '\0';
+ return dev_change_name(dev, ifr->ifr_newname);
+
+ /*
+ * Unknown or private ioctl
+ */
+
+ default:
+ if ((cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15) ||
+ cmd == SIOCBONDENSLAVE ||
+ cmd == SIOCBONDRELEASE ||
+ cmd == SIOCBONDSETHWADDR ||
+ cmd == SIOCBONDSLAVEINFOQUERY ||
+ cmd == SIOCBONDINFOQUERY ||
+ cmd == SIOCBONDCHANGEACTIVE ||
+ cmd == SIOCGMIIPHY ||
+ cmd == SIOCGMIIREG ||
+ cmd == SIOCSMIIREG ||
+ cmd == SIOCBRADDIF ||
+ cmd == SIOCBRDELIF ||
+ cmd == SIOCWANDEV) {
+ err = -EOPNOTSUPP;
+ if (dev->do_ioctl) {
+ if (netif_device_present(dev))
+ err = dev->do_ioctl(dev, ifr,
+ cmd);
+ else
+ err = -ENODEV;
+ }
+ } else
+ err = -EINVAL;
+
+ }
+ return err;
+}
+
+/*
+ * This function handles all "interface"-type I/O control requests. The actual
+ * 'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ * dev_ioctl - network device ioctl
+ * @cmd: command to issue
+ * @arg: pointer to a struct ifreq in user space
+ *
+ * Issue ioctl functions to devices. This is normally called by the
+ * user space syscall interfaces but can sometimes be useful for
+ * other purposes. The return value is the return from the syscall if
+ * positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void __user *arg)
+{
+ struct ifreq ifr;
+ int ret;
+ char *colon;
+
+ /* One special case: SIOCGIFCONF takes ifconf argument
+ and requires shared lock, because it sleeps writing
+ to user space.
+ */
+
+ if (cmd == SIOCGIFCONF) {
+ rtnl_shlock();
+ ret = dev_ifconf((char __user *) arg);
+ rtnl_shunlock();
+ return ret;
+ }
+ if (cmd == SIOCGIFNAME)
+ return dev_ifname((struct ifreq __user *)arg);
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
+
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+ colon = strchr(ifr.ifr_name, ':');
+ if (colon)
+ *colon = 0;
+
+ /*
+ * See which interface the caller is talking about.
+ */
+
+ switch (cmd) {
+ /*
+ * These ioctl calls:
+ * - can be done by all.
+ * - atomic and do not require locking.
+ * - return a value
+ */
+ case SIOCGIFFLAGS:
+ case SIOCGIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCGIFHWADDR:
+ case SIOCGIFSLAVE:
+ case SIOCGIFMAP:
+ case SIOCGIFINDEX:
+ case SIOCGIFTXQLEN:
+ dev_load(ifr.ifr_name);
+ read_lock(&dev_base_lock);
+ ret = dev_ifsioc(&ifr, cmd);
+ read_unlock(&dev_base_lock);
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ ret = -EFAULT;
+ }
+ return ret;
+
+ case SIOCETHTOOL:
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ethtool(&ifr);
+ rtnl_unlock();
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ ret = -EFAULT;
+ }
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - return a value
+ */
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSIFNAME:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ if (!ret) {
+ if (colon)
+ *colon = ':';
+ if (copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ ret = -EFAULT;
+ }
+ return ret;
+
+ /*
+ * These ioctl calls:
+ * - require superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
+ case SIOCSIFFLAGS:
+ case SIOCSIFMETRIC:
+ case SIOCSIFMTU:
+ case SIOCSIFMAP:
+ case SIOCSIFHWADDR:
+ case SIOCSIFSLAVE:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ case SIOCSIFHWBROADCAST:
+ case SIOCSIFTXQLEN:
+ case SIOCSMIIREG:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDSLAVEINFOQUERY:
+ case SIOCBONDINFOQUERY:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCBRADDIF:
+ case SIOCBRDELIF:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ return ret;
+
+ case SIOCGIFMEM:
+ /* Get the per device memory space. We can add this but
+ * currently do not support it */
+ case SIOCSIFMEM:
+ /* Set the per device memory buffer space.
+ * Not applicable in our case */
+ case SIOCSIFLINK:
+ return -EINVAL;
+
+ /*
+ * Unknown or private ioctl.
+ */
+ default:
+ if (cmd == SIOCWANDEV ||
+ (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)) {
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ ret = dev_ifsioc(&ifr, cmd);
+ rtnl_unlock();
+ if (!ret && copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ ret = -EFAULT;
+ return ret;
+ }
+#ifdef WIRELESS_EXT
+ /* Take care of Wireless Extensions */
+ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+ /* If command is `set a parameter', or
+ * `get the encoding parameters', check if
+ * the user has the right to do it */
+ if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ }
+ dev_load(ifr.ifr_name);
+ rtnl_lock();
+ /* Follow me in net/core/wireless.c */
+ ret = wireless_process_ioctl(&ifr, cmd);
+ rtnl_unlock();
+ if (IW_IS_GET(cmd) &&
+ copy_to_user(arg, &ifr,
+ sizeof(struct ifreq)))
+ ret = -EFAULT;
+ return ret;
+ }
+#endif /* WIRELESS_EXT */
+ return -EINVAL;
+ }
+}
+
+
+/**
+ * dev_new_index - allocate an ifindex
+ *
+ * Returns a suitable unique value for a new device interface
+ * number. The caller must hold the rtnl semaphore or the
+ * dev_base_lock to be sure it remains unique.
+ */
+static int dev_new_index(void)
+{
+ static int ifindex;
+ for (;;) {
+ if (++ifindex <= 0)
+ ifindex = 1;
+ if (!__dev_get_by_index(ifindex))
+ return ifindex;
+ }
+}
+
+static int dev_boot_phase = 1;
+
+/* Delayed registration/unregisteration */
+static DEFINE_SPINLOCK(net_todo_list_lock);
+static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+
+static inline void net_set_todo(struct net_device *dev)
+{
+ spin_lock(&net_todo_list_lock);
+ list_add_tail(&dev->todo_list, &net_todo_list);
+ spin_unlock(&net_todo_list_lock);
+}
+
+/**
+ * register_netdevice - register a network device
+ * @dev: device to register
+ *
+ * Take a completed network device structure and add it to the kernel
+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ * chain. 0 is returned on success. A negative errno code is returned
+ * on a failure to set up the device, or if the name is a duplicate.
+ *
+ * Callers must hold the rtnl semaphore. You may want
+ * register_netdev() instead of this.
+ *
+ * BUGS:
+ * The locking appears insufficient to guarantee two parallel registers
+ * will not get the same name.
+ */
+
+int register_netdevice(struct net_device *dev)
+{
+ struct hlist_head *head;
+ struct hlist_node *p;
+ int ret;
+
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ /* When net_device's are persistent, this will be fatal. */
+ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_CLS_ACT
+ spin_lock_init(&dev->ingress_lock);
+#endif
+
+ ret = alloc_divert_blk(dev);
+ if (ret)
+ goto out;
+
+ dev->iflink = -1;
+
+ /* Init, if this function is available */
+ if (dev->init) {
+ ret = dev->init(dev);
+ if (ret) {
+ if (ret > 0)
+ ret = -EIO;
+ goto out_err;
+ }
+ }
+
+ if (!dev_valid_name(dev->name)) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
+ /* Check for existence of name */
+ head = dev_name_hash(dev->name);
+ hlist_for_each(p, head) {
+ struct net_device *d
+ = hlist_entry(p, struct net_device, name_hlist);
+ if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
+ ret = -EEXIST;
+ goto out_err;
+ }
+ }
+
+ /* Fix illegal SG+CSUM combinations. */
+ if ((dev->features & NETIF_F_SG) &&
+ !(dev->features & (NETIF_F_IP_CSUM |
+ NETIF_F_NO_CSUM |
+ NETIF_F_HW_CSUM))) {
+ printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_SG;
+ }
+
+ /* TSO requires that SG is present as well. */
+ if ((dev->features & NETIF_F_TSO) &&
+ !(dev->features & NETIF_F_SG)) {
+ printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_TSO;
+ }
+
+ /*
+ * nil rebuild_header routine,
+ * that should be never called and used as just bug trap.
+ */
+
+ if (!dev->rebuild_header)
+ dev->rebuild_header = default_rebuild_header;
+
+ /*
+ * Default initial state at registry is that the
+ * device is present.
+ */
+
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+ dev->next = NULL;
+ dev_init_scheduler(dev);
+ write_lock_bh(&dev_base_lock);
+ *dev_tail = dev;
+ dev_tail = &dev->next;
+ hlist_add_head(&dev->name_hlist, head);
+ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+ dev_hold(dev);
+ dev->reg_state = NETREG_REGISTERING;
+ write_unlock_bh(&dev_base_lock);
+
+ /* Notify protocols, that a new device appeared. */
+ notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+ /* Finish registration after unlock */
+ net_set_todo(dev);
+ ret = 0;
+
+out:
+ return ret;
+out_err:
+ free_divert_blk(dev);
+ goto out;
+}
+
+/**
+ * register_netdev - register a network device
+ * @dev: device to register
+ *
+ * Take a completed network device structure and add it to the kernel
+ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ * chain. 0 is returned on success. A negative errno code is returned
+ * on a failure to set up the device, or if the name is a duplicate.
+ *
+ * This is a wrapper around register_netdev that takes the rtnl semaphore
+ * and expands the device name if you passed a format string to
+ * alloc_netdev.
+ */
+int register_netdev(struct net_device *dev)
+{
+ int err;
+
+ rtnl_lock();
+
+ /*
+ * If the name is a format string the caller wants us to do a
+ * name allocation.
+ */
+ if (strchr(dev->name, '%')) {
+ err = dev_alloc_name(dev, dev->name);
+ if (err < 0)
+ goto out;
+ }
+
+ /*
+ * Back compatibility hook. Kill this one in 2.5
+ */
+ if (dev->name[0] == 0 || dev->name[0] == ' ') {
+ err = dev_alloc_name(dev, "eth%d");
+ if (err < 0)
+ goto out;
+ }
+
+ err = register_netdevice(dev);
+out:
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(register_netdev);
+
+/*
+ * netdev_wait_allrefs - wait until all references are gone.
+ *
+ * This is called when unregistering network devices.
+ *
+ * Any protocol or device that holds a reference should register
+ * for netdevice notification, and cleanup and put back the
+ * reference if they receive an UNREGISTER event.
+ * We can get stuck here if buggy protocols don't correctly
+ * call dev_put.
+ */
+static void netdev_wait_allrefs(struct net_device *dev)
+{
+ unsigned long rebroadcast_time, warning_time;
+
+ rebroadcast_time = warning_time = jiffies;
+ while (atomic_read(&dev->refcnt) != 0) {
+ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
+ rtnl_shlock();
+
+ /* Rebroadcast unregister notification */
+ notifier_call_chain(&netdev_chain,
+ NETDEV_UNREGISTER, dev);
+
+ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
+ &dev->state)) {
+ /* We must not have linkwatch events
+ * pending on unregister. If this
+ * happens, we simply run the queue
+ * unscheduled, resulting in a noop
+ * for this device.
+ */
+ linkwatch_run_queue();
+ }
+
+ rtnl_shunlock();
+
+ rebroadcast_time = jiffies;
+ }
+
+ msleep(250);
+
+ if (time_after(jiffies, warning_time + 10 * HZ)) {
+ printk(KERN_EMERG "unregister_netdevice: "
+ "waiting for %s to become free. Usage "
+ "count = %d\n",
+ dev->name, atomic_read(&dev->refcnt));
+ warning_time = jiffies;
+ }
+ }
+}
+
+/* The sequence is:
+ *
+ * rtnl_lock();
+ * ...
+ * register_netdevice(x1);
+ * register_netdevice(x2);
+ * ...
+ * unregister_netdevice(y1);
+ * unregister_netdevice(y2);
+ * ...
+ * rtnl_unlock();
+ * free_netdev(y1);
+ * free_netdev(y2);
+ *
+ * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * This allows us to deal with problems:
+ * 1) We can create/delete sysfs objects which invoke hotplug
+ * without deadlocking with linkwatch via keventd.
+ * 2) Since we run with the RTNL semaphore not held, we can sleep
+ * safely in order to wait for the netdev refcnt to drop to zero.
+ */
+static DECLARE_MUTEX(net_todo_run_mutex);
+void netdev_run_todo(void)
+{
+ struct list_head list = LIST_HEAD_INIT(list);
+ int err;
+
+
+ /* Need to guard against multiple cpu's getting out of order. */
+ down(&net_todo_run_mutex);
+
+ /* Not safe to do outside the semaphore. We must not return
+ * until all unregister events invoked by the local processor
+ * have been completed (either by this todo run, or one on
+ * another cpu).
+ */
+ if (list_empty(&net_todo_list))
+ goto out;
+
+ /* Snapshot list, allow later requests */
+ spin_lock(&net_todo_list_lock);
+ list_splice_init(&net_todo_list, &list);
+ spin_unlock(&net_todo_list_lock);
+
+ while (!list_empty(&list)) {
+ struct net_device *dev
+ = list_entry(list.next, struct net_device, todo_list);
+ list_del(&dev->todo_list);
+
+ switch(dev->reg_state) {
+ case NETREG_REGISTERING:
+ err = netdev_register_sysfs(dev);
+ if (err)
+ printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
+ dev->name, err);
+ dev->reg_state = NETREG_REGISTERED;
+ break;
+
+ case NETREG_UNREGISTERING:
+ netdev_unregister_sysfs(dev);
+ dev->reg_state = NETREG_UNREGISTERED;
+
+ netdev_wait_allrefs(dev);
+
+ /* paranoia */
+ BUG_ON(atomic_read(&dev->refcnt));
+ BUG_TRAP(!dev->ip_ptr);
+ BUG_TRAP(!dev->ip6_ptr);
+ BUG_TRAP(!dev->dn_ptr);
+
+
+ /* It must be the very last action,
+ * after this 'dev' may point to freed up memory.
+ */
+ if (dev->destructor)
+ dev->destructor(dev);
+ break;
+
+ default:
+ printk(KERN_ERR "network todo '%s' but state %d\n",
+ dev->name, dev->reg_state);
+ break;
+ }
+ }
+
+out:
+ up(&net_todo_run_mutex);
+}
+
+/**
+ * alloc_netdev - allocate network device
+ * @sizeof_priv: size of private data to allocate space for
+ * @name: device name format string
+ * @setup: callback to initialize device
+ *
+ * Allocates a struct net_device with private data area for driver use
+ * and performs basic initialization.
+ */
+struct net_device *alloc_netdev(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *))
+{
+ void *p;
+ struct net_device *dev;
+ int alloc_size;
+
+ /* ensure 32-byte alignment of both the device and private area */
+ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
+
+ p = kmalloc(alloc_size, GFP_KERNEL);
+ if (!p) {
+ printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+ return NULL;
+ }
+ memset(p, 0, alloc_size);
+
+ dev = (struct net_device *)
+ (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+ dev->padded = (char *)dev - (char *)p;
+
+ if (sizeof_priv)
+ dev->priv = netdev_priv(dev);
+
+ setup(dev);
+ strcpy(dev->name, name);
+ return dev;
+}
+EXPORT_SYMBOL(alloc_netdev);
+
+/**
+ * free_netdev - free network device
+ * @dev: device
+ *
+ * This function does the last stage of destroying an allocated device
+ * interface. The reference to the device object is released.
+ * If this is the last reference then it will be freed.
+ */
+void free_netdev(struct net_device *dev)
+{
+#ifdef CONFIG_SYSFS
+ /* Compatiablity with error handling in drivers */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ kfree((char *)dev - dev->padded);
+ return;
+ }
+
+ BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
+ dev->reg_state = NETREG_RELEASED;
+
+ /* will free via class release */
+ class_device_put(&dev->class_dev);
+#else
+ kfree((char *)dev - dev->padded);
+#endif
+}
+
+/* Synchronize with packet receive processing. */
+void synchronize_net(void)
+{
+ might_sleep();
+ synchronize_kernel();
+}
+
+/**
+ * unregister_netdevice - remove device from the kernel
+ * @dev: device
+ *
+ * This function shuts down a device interface and removes it
+ * from the kernel tables. On success 0 is returned, on a failure
+ * a negative errno code is returned.
+ *
+ * Callers must hold the rtnl semaphore. You may want
+ * unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+ struct net_device *d, **dp;
+
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ /* Some devices call without registering for initialization unwind. */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
+ "was registered\n", dev->name, dev);
+ return -ENODEV;
+ }
+
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
+
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+ dev_close(dev);
+
+ /* And unlink it from device chain. */
+ for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
+ if (d == dev) {
+ write_lock_bh(&dev_base_lock);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ if (dev_tail == &dev->next)
+ dev_tail = dp;
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
+ break;
+ }
+ }
+ if (!d) {
+ printk(KERN_ERR "unregister net_device: '%s' not found\n",
+ dev->name);
+ return -ENODEV;
+ }
+
+ dev->reg_state = NETREG_UNREGISTERING;
+
+ synchronize_net();
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the multicast chain
+ */
+ dev_mc_discard(dev);
+
+ if (dev->uninit)
+ dev->uninit(dev);
+
+ /* Notifier chain MUST detach us from master device. */
+ BUG_TRAP(!dev->master);
+
+ free_divert_blk(dev);
+
+ /* Finish processing unregister after unlock */
+ net_set_todo(dev);
+
+ synchronize_net();
+
+ dev_put(dev);
+ return 0;
+}
+
+/**
+ * unregister_netdev - remove device from the kernel
+ * @dev: device
+ *
+ * This function shuts down a device interface and removes it
+ * from the kernel tables. On success 0 is returned, on a failure
+ * a negative errno code is returned.
+ *
+ * This is just a wrapper for unregister_netdevice that takes
+ * the rtnl semaphore. In general you want to use this and not
+ * unregister_netdevice.
+ */
+void unregister_netdev(struct net_device *dev)
+{
+ rtnl_lock();
+ unregister_netdevice(dev);
+ rtnl_unlock();
+}
+
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int dev_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *ocpu)
+{
+ struct sk_buff **list_skb;
+ struct net_device **list_net;
+ struct sk_buff *skb;
+ unsigned int cpu, oldcpu = (unsigned long)ocpu;
+ struct softnet_data *sd, *oldsd;
+
+ if (action != CPU_DEAD)
+ return NOTIFY_OK;
+
+ local_irq_disable();
+ cpu = smp_processor_id();
+ sd = &per_cpu(softnet_data, cpu);
+ oldsd = &per_cpu(softnet_data, oldcpu);
+
+ /* Find end of our completion_queue. */
+ list_skb = &sd->completion_queue;
+ while (*list_skb)
+ list_skb = &(*list_skb)->next;
+ /* Append completion queue from offline CPU. */
+ *list_skb = oldsd->completion_queue;
+ oldsd->completion_queue = NULL;
+
+ /* Find end of our output_queue. */
+ list_net = &sd->output_queue;
+ while (*list_net)
+ list_net = &(*list_net)->next_sched;
+ /* Append output queue from offline CPU. */
+ *list_net = oldsd->output_queue;
+ oldsd->output_queue = NULL;
+
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_enable();
+
+ /* Process offline CPU's input_pkt_queue */
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+ netif_rx(skb);
+
+ return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+
+/*
+ * Initialize the DEV module. At boot time this walks the device list and
+ * unhooks any devices that fail to initialise (normally hardware not
+ * present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+/*
+ * This is called single threaded during boot, so no need
+ * to take the rtnl semaphore.
+ */
+static int __init net_dev_init(void)
+{
+ int i, rc = -ENOMEM;
+
+ BUG_ON(!dev_boot_phase);
+
+ net_random_init();
+
+ if (dev_proc_init())
+ goto out;
+
+ if (netdev_sysfs_init())
+ goto out;
+
+ INIT_LIST_HEAD(&ptype_all);
+ for (i = 0; i < 16; i++)
+ INIT_LIST_HEAD(&ptype_base[i]);
+
+ for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
+ INIT_HLIST_HEAD(&dev_name_head[i]);
+
+ for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
+ INIT_HLIST_HEAD(&dev_index_head[i]);
+
+ /*
+ * Initialise the packet receive queues.
+ */
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct softnet_data *queue;
+
+ queue = &per_cpu(softnet_data, i);
+ skb_queue_head_init(&queue->input_pkt_queue);
+ queue->throttle = 0;
+ queue->cng_level = 0;
+ queue->avg_blog = 10; /* arbitrary non-zero */
+ queue->completion_queue = NULL;
+ INIT_LIST_HEAD(&queue->poll_list);
+ set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
+ queue->backlog_dev.weight = weight_p;
+ queue->backlog_dev.poll = process_backlog;
+ atomic_set(&queue->backlog_dev.refcnt, 1);
+ }
+
+#ifdef OFFLINE_SAMPLE
+ samp_timer.expires = jiffies + (10 * HZ);
+ add_timer(&samp_timer);
+#endif
+
+ dev_boot_phase = 0;
+
+ open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
+ open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+ hotcpu_notifier(dev_cpu_callback, 0);
+ dst_init();
+ dev_mcast_init();
+ rc = 0;
+out:
+ return rc;
+}
+
+subsys_initcall(net_dev_init);
+
+EXPORT_SYMBOL(__dev_get_by_index);
+EXPORT_SYMBOL(__dev_get_by_name);
+EXPORT_SYMBOL(__dev_remove_pack);
+EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_add_pack);
+EXPORT_SYMBOL(dev_alloc_name);
+EXPORT_SYMBOL(dev_close);
+EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_index);
+EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(dev_ioctl);
+EXPORT_SYMBOL(dev_open);
+EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_remove_pack);
+EXPORT_SYMBOL(dev_set_allmulti);
+EXPORT_SYMBOL(dev_set_promiscuity);
+EXPORT_SYMBOL(dev_change_flags);
+EXPORT_SYMBOL(dev_set_mtu);
+EXPORT_SYMBOL(free_netdev);
+EXPORT_SYMBOL(netdev_boot_setup_check);
+EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_state_change);
+EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_rx);
+EXPORT_SYMBOL(register_gifconf);
+EXPORT_SYMBOL(register_netdevice);
+EXPORT_SYMBOL(register_netdevice_notifier);
+EXPORT_SYMBOL(skb_checksum_help);
+EXPORT_SYMBOL(synchronize_net);
+EXPORT_SYMBOL(unregister_netdevice);
+EXPORT_SYMBOL(unregister_netdevice_notifier);
+EXPORT_SYMBOL(net_enable_timestamp);
+EXPORT_SYMBOL(net_disable_timestamp);
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+EXPORT_SYMBOL(br_handle_frame_hook);
+#endif
+
+#ifdef CONFIG_KMOD
+EXPORT_SYMBOL(dev_load);
+#endif
+
+EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/linux-2.6.11-xen-sparse/net/core/skbuff.c b/linux-2.6.11-xen-sparse/net/core/skbuff.c
new file mode 100644
index 0000000000..be2801e883
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/net/core/skbuff.c
@@ -0,0 +1,1523 @@
+/*
+ * Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
+ * Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
+ *
+ * Fixes:
+ * Alan Cox : Fixed the worst of the load
+ * balancer bugs.
+ * Dave Platt : Interrupt stacking fix.
+ * Richard Kooijman : Timestamp fixes.
+ * Alan Cox : Changed buffer format.
+ * Alan Cox : destructor hook for AF_UNIX etc.
+ * Linus Torvalds : Better skb_clone.
+ * Alan Cox : Added skb_copy.
+ * Alan Cox : Added all the changed routines Linus
+ * only put in the headers
+ * Ray VanTassle : Fixed --skb->lock in free
+ * Alan Cox : skb_copy copy arp field
+ * Andi Kleen : slabified it.
+ * Robert Olsson : Removed skb_head_pool
+ *
+ * NOTE:
+ * The __skb_ routines should be called with interrupts
+ * disabled, or you better be *real* sure that the operation is atomic
+ * with respect to whatever list is being frobbed (e.g. via lock_sock()
+ * or via disabling bottom half handlers, etc).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NET_CLS_ACT
+#include <net/pkt_sched.h>
+#endif
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <net/protocol.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/checksum.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+static kmem_cache_t *skbuff_head_cache;
+
+/*
+ * Keep out-of-line to prevent kernel bloat.
+ * __builtin_return_address is not used because it is not always
+ * reliable.
+ */
+
+/**
+ * skb_over_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+/**
+ * skb_under_panic - private function
+ * @skb: buffer
+ * @sz: size
+ * @here: address
+ *
+ * Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+ printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
+ here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ BUG();
+}
+
+/* Allocate a new skbuff. We do this ourselves so we can fill in a few
+ * 'private' fields and also do memory statistics to find all the
+ * [BEEP] leaks.
+ *
+ */
+
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and a
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ /* Get the HEAD */
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+ if (!skb)
+ goto out;
+
+ /* Get the DATA. Size must match skb_add_mtu(). */
+ size = SKB_DATA_ALIGN(size);
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ goto nodata;
+
+ memset(skb, 0, offsetof(struct sk_buff, truesize));
+ skb->truesize = size + sizeof(struct sk_buff);
+ atomic_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+out:
+ return skb;
+nodata:
+ kmem_cache_free(skbuff_head_cache, skb);
+ skb = NULL;
+ goto out;
+}
+
+/**
+ * alloc_skb_from_cache - allocate a network buffer
+ * @cp: kmem_cache from which to allocate the data area
+ * (object size must be big enough for @size bytes + skb overheads)
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
+ unsigned int size, int gfp_mask)
+{
+ struct sk_buff *skb;
+ u8 *data;
+
+ /* Get the HEAD */
+ skb = kmem_cache_alloc(skbuff_head_cache,
+ gfp_mask & ~__GFP_DMA);
+ if (!skb)
+ goto out;
+
+ /* Get the DATA. */
+ size = SKB_DATA_ALIGN(size);
+ data = kmem_cache_alloc(cp, gfp_mask);
+ if (!data)
+ goto nodata;
+
+ memset(skb, 0, offsetof(struct sk_buff, truesize));
+ skb->truesize = size + sizeof(struct sk_buff);
+ atomic_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+out:
+ return skb;
+nodata:
+ kmem_cache_free(skbuff_head_cache, skb);
+ skb = NULL;
+ goto out;
+}
+
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ struct sk_buff *this = list;
+ list = list->next;
+ kfree_skb(this);
+ } while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+ struct sk_buff *list;
+
+ for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
+ skb_get(list);
+}
+
+void skb_release_data(struct sk_buff *skb)
+{
+ if (!skb->cloned ||
+ atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+ }
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_drop_fraglist(skb);
+
+ kfree(skb->head);
+ }
+}
+
+/*
+ * Free an skbuff by memory without cleaning the state.
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+ skb_release_data(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
+/**
+ * __kfree_skb - private function
+ * @skb: buffer
+ *
+ * Free an sk_buff. Release anything attached to the buffer.
+ * Clean the state. This is an internal helper function. Users should
+ * always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+ if (skb->list) {
+ printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+ "on a list (from %p).\n", NET_CALLER(skb));
+ BUG();
+ }
+
+ dst_release(skb->dst);
+#ifdef CONFIG_XFRM
+ secpath_put(skb->sp);
+#endif
+ if(skb->destructor) {
+ if (in_irq())
+ printk(KERN_WARNING "Warning: kfree_skb on "
+ "hard IRQ %p\n", NET_CALLER(skb));
+ skb->destructor(skb);
+ }
+#ifdef CONFIG_NETFILTER
+ nf_conntrack_put(skb->nfct);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(skb->nf_bridge);
+#endif
+#endif
+/* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+ skb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_verd = 0;
+ skb->tc_classid = 0;
+#endif
+#endif
+
+ kfree_skbmem(skb);
+}
+
+/**
+ * skb_clone - duplicate an sk_buff
+ * @skb: buffer to clone
+ * @gfp_mask: allocation priority
+ *
+ * Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ * copies share the same packet data but not structure. The new
+ * buffer has a reference count of 1. If the allocation fails the
+ * function returns %NULL otherwise the new buffer is returned.
+ *
+ * If this function is called from an interrupt gfp_mask() must be
+ * %GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+ struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+
+ if (!n)
+ return NULL;
+
+#define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+ n->list = NULL;
+ n->sk = NULL;
+ C(stamp);
+ C(dev);
+ C(real_dev);
+ C(h);
+ C(nh);
+ C(mac);
+ C(dst);
+ dst_clone(skb->dst);
+ C(sp);
+#ifdef CONFIG_INET
+ secpath_get(skb->sp);
+#endif
+ memcpy(n->cb, skb->cb, sizeof(skb->cb));
+ C(len);
+ C(data_len);
+ C(csum);
+ C(local_df);
+ n->cloned = 1;
+ C(proto_csum_valid);
+ C(proto_csum_blank);
+ C(pkt_type);
+ C(ip_summed);
+ C(priority);
+ C(protocol);
+ C(security);
+ n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+ C(nfmark);
+ C(nfcache);
+ C(nfct);
+ nf_conntrack_get(skb->nfct);
+ C(nfctinfo);
+#ifdef CONFIG_NETFILTER_DEBUG
+ C(nf_debug);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+ C(nf_bridge);
+ nf_bridge_get(skb->nf_bridge);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+ C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+ C(tc_index);
+#ifdef CONFIG_NET_CLS_ACT
+ n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
+ n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
+ n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+ C(input_dev);
+ C(tc_classid);
+#endif
+
+#endif
+ C(truesize);
+ atomic_set(&n->users, 1);
+ C(head);
+ C(data);
+ C(tail);
+ C(end);
+
+ atomic_inc(&(skb_shinfo(skb)->dataref));
+ skb->cloned = 1;
+
+ return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+ /*
+ * Shift between the two data areas in bytes
+ */
+ unsigned long offset = new->data - old->data;
+
+ new->list = NULL;
+ new->sk = NULL;
+ new->dev = old->dev;
+ new->real_dev = old->real_dev;
+ new->priority = old->priority;
+ new->protocol = old->protocol;
+ new->dst = dst_clone(old->dst);
+#ifdef CONFIG_INET
+ new->sp = secpath_get(old->sp);
+#endif
+ new->h.raw = old->h.raw + offset;
+ new->nh.raw = old->nh.raw + offset;
+ new->mac.raw = old->mac.raw + offset;
+ memcpy(new->cb, old->cb, sizeof(old->cb));
+ new->local_df = old->local_df;
+ new->pkt_type = old->pkt_type;
+ new->stamp = old->stamp;
+ new->destructor = NULL;
+ new->security = old->security;
+#ifdef CONFIG_NETFILTER
+ new->nfmark = old->nfmark;
+ new->nfcache = old->nfcache;
+ new->nfct = old->nfct;
+ nf_conntrack_get(old->nfct);
+ new->nfctinfo = old->nfctinfo;
+#ifdef CONFIG_NETFILTER_DEBUG
+ new->nf_debug = old->nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+ new->nf_bridge = old->nf_bridge;
+ nf_bridge_get(old->nf_bridge);
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+#ifdef CONFIG_NET_CLS_ACT
+ new->tc_verd = old->tc_verd;
+#endif
+ new->tc_index = old->tc_index;
+#endif
+ atomic_set(&new->users, 1);
+ skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
+ skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+}
+
+/**
+ * skb_copy - create private copy of an sk_buff
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and its data. This is used when the
+ * caller wishes to modify the data and needs a private copy of the
+ * data to alter. Returns %NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * As by-product this function converts non-linear &sk_buff to linear
+ * one, so that &sk_buff becomes completely private and caller is allowed
+ * to modify all the data of returned buffer. This means that this
+ * function is not recommended for use in circumstances when only
+ * header is going to be modified. Use pskb_copy() instead.
+ */
+
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+ int headerlen = skb->data - skb->head;
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
+ gfp_mask);
+ if (!n)
+ return NULL;
+
+ /* Set the data pointer */
+ skb_reserve(n, headerlen);
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+ n->csum = skb->csum;
+ n->ip_summed = skb->ip_summed;
+
+ if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
+ BUG();
+
+ copy_skb_header(n, skb);
+ return n;
+}
+
+
+/**
+ * pskb_copy - create copy of an sk_buff with private head.
+ * @skb: buffer to copy
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and part of its data, located
+ * in header. Fragmented data remain shared. This is used when
+ * the caller wishes to modify only header of &sk_buff and needs
+ * private copy of the header to alter. Returns %NULL on failure
+ * or the pointer to the buffer on success.
+ * The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
+
+ if (!n)
+ goto out;
+
+ /* Set the data pointer */
+ skb_reserve(n, skb->data - skb->head);
+ /* Set the tail pointer and length */
+ skb_put(n, skb_headlen(skb));
+ /* Copy the bytes */
+ memcpy(n->data, skb->data, n->len);
+ n->csum = skb->csum;
+ n->ip_summed = skb->ip_summed;
+
+ n->data_len = skb->data_len;
+ n->len = skb->len;
+
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+ get_page(skb_shinfo(n)->frags[i].page);
+ }
+ skb_shinfo(n)->nr_frags = i;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+ skb_clone_fraglist(n);
+ }
+
+ copy_skb_header(n, skb);
+out:
+ return n;
+}
+
+/**
+ * pskb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+ * @nhead: room to add at head
+ * @ntail: room to add at tail
+ * @gfp_mask: allocation priority
+ *
+ * Expands (or creates identical copy, if &nhead and &ntail are zero)
+ * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ * reference count of 1. Returns zero in the case of success or error,
+ * if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ * All the pointers pointing into skb header may change and must be
+ * reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+ int i;
+ u8 *data;
+ int size = nhead + (skb->end - skb->head) + ntail;
+ long off;
+
+ if (skb_shared(skb))
+ BUG();
+
+ size = SKB_DATA_ALIGN(size);
+
+ data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ if (!data)
+ goto nodata;
+
+ /* Copy only real data... and, alas, header. This should be
+ * optimized for the cases when header is void. */
+ memcpy(data + nhead, skb->head, skb->tail - skb->head);
+ memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ get_page(skb_shinfo(skb)->frags[i].page);
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_clone_fraglist(skb);
+
+ skb_release_data(skb);
+
+ off = (data + nhead) - skb->head;
+
+ skb->head = data;
+ skb->end = data + size;
+ skb->data += off;
+ skb->tail += off;
+ skb->mac.raw += off;
+ skb->h.raw += off;
+ skb->nh.raw += off;
+ skb->cloned = 0;
+ atomic_set(&skb_shinfo(skb)->dataref, 1);
+ return 0;
+
+nodata:
+ return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+ struct sk_buff *skb2;
+ int delta = headroom - skb_headroom(skb);
+
+ if (delta <= 0)
+ skb2 = pskb_copy(skb, GFP_ATOMIC);
+ else {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
+ GFP_ATOMIC)) {
+ kfree_skb(skb2);
+ skb2 = NULL;
+ }
+ }
+ return skb2;
+}
+
+
+/**
+ * skb_copy_expand - copy and expand sk_buff
+ * @skb: buffer to copy
+ * @newheadroom: new free bytes at head
+ * @newtailroom: new free bytes at tail
+ * @gfp_mask: allocation priority
+ *
+ * Make a copy of both an &sk_buff and its data and while doing so
+ * allocate additional space.
+ *
+ * This is used when the caller wishes to modify the data and needs a
+ * private copy of the data to alter as well as more space for new fields.
+ * Returns %NULL on failure or the pointer to the buffer
+ * on success. The returned buffer has a reference count of 1.
+ *
+ * You must pass %GFP_ATOMIC as the allocation priority if this function
+ * is called from an interrupt.
+ *
+ * BUG ALERT: ip_summed is not copied. Why does this work? Is it used
+ * only by netfilter in the cases when checksum is recalculated? --ANK
+ */
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+ int newheadroom, int newtailroom, int gfp_mask)
+{
+ /*
+ * Allocate the copy buffer
+ */
+ struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask);
+ int head_copy_len, head_copy_off;
+
+ if (!n)
+ return NULL;
+
+ skb_reserve(n, newheadroom);
+
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+
+ head_copy_len = skb_headroom(skb);
+ head_copy_off = 0;
+ if (newheadroom <= head_copy_len)
+ head_copy_len = newheadroom;
+ else
+ head_copy_off = newheadroom - head_copy_len;
+
+ /* Copy the linear header and data. */
+ if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ skb->len + head_copy_len))
+ BUG();
+
+ copy_skb_header(n, skb);
+
+ return n;
+}
+
+/**
+ * skb_pad - zero pad the tail of an skb
+ * @skb: buffer to pad
+ * @pad: space to pad
+ *
+ * Ensure that a buffer is followed by a padding area that is zero
+ * filled. Used by network drivers which may DMA or transfer data
+ * beyond the buffer end onto the wire.
+ *
+ * May return NULL in out of memory cases.
+ */
+
+struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+{
+ struct sk_buff *nskb;
+
+ /* If the skbuff is non linear tailroom is always zero.. */
+ if (skb_tailroom(skb) >= pad) {
+ memset(skb->data+skb->len, 0, pad);
+ return skb;
+ }
+
+ nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+ kfree_skb(skb);
+ if (nskb)
+ memset(nskb->data+nskb->len, 0, pad);
+ return nskb;
+}
+
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+ int offset = skb_headlen(skb);
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ int i;
+
+ for (i = 0; i < nfrags; i++) {
+ int end = offset + skb_shinfo(skb)->frags[i].size;
+ if (end > len) {
+ if (skb_cloned(skb)) {
+ if (!realloc)
+ BUG();
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ return -ENOMEM;
+ }
+ if (len <= offset) {
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->nr_frags--;
+ } else {
+ skb_shinfo(skb)->frags[i].size = len - offset;
+ }
+ }
+ offset = end;
+ }
+
+ if (offset < len) {
+ skb->data_len -= skb->len - len;
+ skb->len = len;
+ } else {
+ if (len <= skb_headlen(skb)) {
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
+ if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+ skb_drop_fraglist(skb);
+ } else {
+ skb->data_len -= skb->len - len;
+ skb->len = len;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * __pskb_pull_tail - advance tail of skb header
+ * @skb: buffer to reallocate
+ * @delta: number of bytes to advance tail
+ *
+ * The function makes a sense only on a fragmented &sk_buff,
+ * it expands header moving its tail forward and copying necessary
+ * data from fragmented part.
+ *
+ * &sk_buff MUST have reference count of 1.
+ *
+ * Returns %NULL (and &sk_buff does not change) if pull failed
+ * or value of new tail of skb in the case of success.
+ *
+ * All the pointers pointing into skb header may change and must be
+ * reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+ /* If skb has not enough free space at tail, get new one
+ * plus 128 bytes for future expansions. If we have enough
+ * room at tail, reallocate without expansion only if skb is cloned.
+ */
+ int i, k, eat = (skb->tail + delta) - skb->end;
+
+ if (eat > 0 || skb_cloned(skb)) {
+ if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
+ GFP_ATOMIC))
+ return NULL;
+ }
+
+ if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+ BUG();
+
+ /* Optimization: no fragments, no reasons to preestimate
+ * size of pulled pages. Superb.
+ */
+ if (!skb_shinfo(skb)->frag_list)
+ goto pull_pages;
+
+ /* Estimate size of pulled pages. */
+ eat = delta;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ if (skb_shinfo(skb)->frags[i].size >= eat)
+ goto pull_pages;
+ eat -= skb_shinfo(skb)->frags[i].size;
+ }
+
+ /* If we need update frag list, we are in troubles.
+ * Certainly, it possible to add an offset to skb data,
+ * but taking into account that pulling is expected to
+ * be very rare operation, it is worth to fight against
+ * further bloating skb head and crucify ourselves here instead.
+ * Pure masohism, indeed. 8)8)
+ */
+ if (eat) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ struct sk_buff *clone = NULL;
+ struct sk_buff *insp = NULL;
+
+ do {
+ if (!list)
+ BUG();
+
+ if (list->len <= eat) {
+ /* Eaten as whole. */
+ eat -= list->len;
+ list = list->next;
+ insp = list;
+ } else {
+ /* Eaten partially. */
+
+ if (skb_shared(list)) {
+ /* Sucks! We need to fork list. :-( */
+ clone = skb_clone(list, GFP_ATOMIC);
+ if (!clone)
+ return NULL;
+ insp = list->next;
+ list = clone;
+ } else {
+ /* This may be pulled without
+ * problems. */
+ insp = list;
+ }
+ if (!pskb_pull(list, eat)) {
+ if (clone)
+ kfree_skb(clone);
+ return NULL;
+ }
+ break;
+ }
+ } while (eat);
+
+ /* Free pulled out fragments. */
+ while ((list = skb_shinfo(skb)->frag_list) != insp) {
+ skb_shinfo(skb)->frag_list = list->next;
+ kfree_skb(list);
+ }
+ /* And insert new clone at head. */
+ if (clone) {
+ clone->next = list;
+ skb_shinfo(skb)->frag_list = clone;
+ }
+ }
+ /* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+ eat = delta;
+ k = 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ if (skb_shinfo(skb)->frags[i].size <= eat) {
+ put_page(skb_shinfo(skb)->frags[i].page);
+ eat -= skb_shinfo(skb)->frags[i].size;
+ } else {
+ skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+ if (eat) {
+ skb_shinfo(skb)->frags[k].page_offset += eat;
+ skb_shinfo(skb)->frags[k].size -= eat;
+ eat = 0;
+ }
+ k++;
+ }
+ }
+ skb_shinfo(skb)->nr_frags = k;
+
+ skb->tail += delta;
+ skb->data_len -= delta;
+
+ return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+ int i, copy;
+ int start = skb_headlen(skb);
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ /* Copy header. */
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ memcpy(to, skb->data + offset, copy);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ memcpy(to,
+ vaddr + skb_shinfo(skb)->frags[i].page_offset+
+ offset - start, copy);
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_copy_bits(list, offset - start,
+ to, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+/* Keep iterating until skb_iter_next returns false. */
+void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i)
+{
+ i->len = skb_headlen(skb);
+ i->data = (unsigned char *)skb->data;
+ i->nextfrag = 0;
+ i->fraglist = NULL;
+}
+
+int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i)
+{
+ /* Unmap previous, if not head fragment. */
+ if (i->nextfrag)
+ kunmap_skb_frag(i->data);
+
+ if (i->fraglist) {
+ fraglist:
+ /* We're iterating through fraglist. */
+ if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) {
+ i->data = kmap_skb_frag(&skb_shinfo(i->fraglist)
+ ->frags[i->nextfrag]);
+ i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag]
+ .size;
+ i->nextfrag++;
+ return 1;
+ }
+ /* Fragments with fragments? Too hard! */
+ BUG_ON(skb_shinfo(i->fraglist)->frag_list);
+ i->fraglist = i->fraglist->next;
+ if (!i->fraglist)
+ goto end;
+
+ i->len = skb_headlen(i->fraglist);
+ i->data = i->fraglist->data;
+ i->nextfrag = 0;
+ return 1;
+ }
+
+ if (i->nextfrag < skb_shinfo(skb)->nr_frags) {
+ i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]);
+ i->len = skb_shinfo(skb)->frags[i->nextfrag].size;
+ i->nextfrag++;
+ return 1;
+ }
+
+ i->fraglist = skb_shinfo(skb)->frag_list;
+ if (i->fraglist)
+ goto fraglist;
+
+end:
+ /* Bug trap for callers */
+ i->data = NULL;
+ return 0;
+}
+
+void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i)
+{
+ /* Unmap previous, if not head fragment. */
+ if (i->data && i->nextfrag)
+ kunmap_skb_frag(i->data);
+ /* Bug trap for callers */
+ i->data = NULL;
+}
+
+/* Checksum skb data. */
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+ int len, unsigned int csum)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ int pos = 0;
+
+ /* Checksum header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ csum = csum_partial(skb->data + offset, copy, csum);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ pos = copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap_skb_frag(frag);
+ csum2 = csum_partial(vaddr + frag->page_offset +
+ offset - start, copy, 0);
+ kunmap_skb_frag(vaddr);
+ csum = csum_block_add(csum, csum2, pos);
+ if (!(len -= copy))
+ return csum;
+ offset += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ if (copy > len)
+ copy = len;
+ csum2 = skb_checksum(list, offset - start,
+ copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+ }
+ if (len)
+ BUG();
+
+ return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+ u8 *to, int len, unsigned int csum)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ int pos = 0;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ csum = csum_partial_copy_nocheck(skb->data + offset, to,
+ copy, csum);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ to += copy;
+ pos = copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ unsigned int csum2;
+ u8 *vaddr;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (copy > len)
+ copy = len;
+ vaddr = kmap_skb_frag(frag);
+ csum2 = csum_partial_copy_nocheck(vaddr +
+ frag->page_offset +
+ offset - start, to,
+ copy, 0);
+ kunmap_skb_frag(vaddr);
+ csum = csum_block_add(csum, csum2, pos);
+ if (!(len -= copy))
+ return csum;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ unsigned int csum2;
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ csum2 = skb_copy_and_csum_bits(list,
+ offset - start,
+ to, copy, 0);
+ csum = csum_block_add(csum, csum2, pos);
+ if ((len -= copy) == 0)
+ return csum;
+ offset += copy;
+ to += copy;
+ pos += copy;
+ }
+ start = end;
+ }
+ }
+ if (len)
+ BUG();
+ return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+ unsigned int csum;
+ long csstart;
+
+ if (skb->ip_summed == CHECKSUM_HW)
+ csstart = skb->h.raw - skb->data;
+ else
+ csstart = skb_headlen(skb);
+
+ if (csstart > skb_headlen(skb))
+ BUG();
+
+ memcpy(to, skb->data, csstart);
+
+ csum = 0;
+ if (csstart != skb->len)
+ csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
+ skb->len - csstart, 0);
+
+ if (skb->ip_summed == CHECKSUM_HW) {
+ long csstuff = csstart + skb->csum;
+
+ *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+ }
+}
+
+/**
+ * skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. The list lock is taken so the function
+ * may be used safely with other locking list functions. The head item is
+ * returned or %NULL if the list is empty.
+ */
+
+struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+ unsigned long flags;
+ struct sk_buff *result;
+
+ spin_lock_irqsave(&list->lock, flags);
+ result = __skb_dequeue(list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ return result;
+}
+
+/**
+ * skb_dequeue_tail - remove from the tail of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the tail of the list. The list lock is taken so the function
+ * may be used safely with other locking list functions. The tail item is
+ * returned or %NULL if the list is empty.
+ */
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+ unsigned long flags;
+ struct sk_buff *result;
+
+ spin_lock_irqsave(&list->lock, flags);
+ result = __skb_dequeue_tail(list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ return result;
+}
+
+/**
+ * skb_queue_purge - empty a list
+ * @list: list to empty
+ *
+ * Delete all buffers on an &sk_buff list. Each buffer is removed from
+ * the list and one reference dropped. This function takes the list
+ * lock and is atomic with respect to other list locking functions.
+ */
+void skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue(list)) != NULL)
+ kfree_skb(skb);
+}
+
+/**
+ * skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of the list. This function takes the
+ * list lock and can be used safely with other locking &sk_buff functions
+ * safely.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_queue_head(list, newsk);
+ spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ * skb_queue_tail - queue a buffer at the list tail
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the tail of the list. This function takes the
+ * list lock and can be used safely with other locking &sk_buff functions
+ * safely.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_queue_tail(list, newsk);
+ spin_unlock_irqrestore(&list->lock, flags);
+}
+/**
+ * skb_unlink - remove a buffer from a list
+ * @skb: buffer to remove
+ *
+ * Place a packet after a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls
+ *
+ * Works even without knowing the list it is sitting on, which can be
+ * handy at times. It also means that THE LIST MUST EXIST when you
+ * unlink. Thus a list must have its contents unlinked before it is
+ * destroyed.
+ */
+void skb_unlink(struct sk_buff *skb)
+{
+ struct sk_buff_head *list = skb->list;
+
+ if (list) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&list->lock, flags);
+ if (skb->list == list)
+ __skb_unlink(skb, skb->list);
+ spin_unlock_irqrestore(&list->lock, flags);
+ }
+}
+
+
+/**
+ * skb_append - append a buffer
+ * @old: buffer to insert after
+ * @newsk: buffer to insert
+ *
+ * Place a packet after a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls.
+ * A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&old->list->lock, flags);
+ __skb_append(old, newsk);
+ spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+
+/**
+ * skb_insert - insert a buffer
+ * @old: buffer to insert before
+ * @newsk: buffer to insert
+ *
+ * Place a packet before a given packet in a list. The list locks are taken
+ * and this function is atomic with respect to other list locked calls
+ * A buffer cannot be placed on two lists at the same time.
+ */
+
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&old->list->lock, flags);
+ __skb_insert(newsk, old->prev, old, old->list);
+ spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+#if 0
+/*
+ * Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+ /* Must match allocation in alloc_skb */
+ mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+ kmem_add_cache_size(mtu);
+}
+#endif
+
+static inline void skb_split_inside_header(struct sk_buff *skb,
+ struct sk_buff* skb1,
+ const u32 len, const int pos)
+{
+ int i;
+
+ memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
+
+ /* And move data appendix as is. */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
+
+ skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+ skb_shinfo(skb)->nr_frags = 0;
+ skb1->data_len = skb->data_len;
+ skb1->len += skb1->data_len;
+ skb->data_len = 0;
+ skb->len = len;
+ skb->tail = skb->data + len;
+}
+
+static inline void skb_split_no_header(struct sk_buff *skb,
+ struct sk_buff* skb1,
+ const u32 len, int pos)
+{
+ int i, k = 0;
+ const int nfrags = skb_shinfo(skb)->nr_frags;
+
+ skb_shinfo(skb)->nr_frags = 0;
+ skb1->len = skb1->data_len = skb->len - len;
+ skb->len = len;
+ skb->data_len = len - pos;
+
+ for (i = 0; i < nfrags; i++) {
+ int size = skb_shinfo(skb)->frags[i].size;
+
+ if (pos + size > len) {
+ skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
+
+ if (pos < len) {
+ /* Split frag.
+ * We have to variants in this case:
+ * 1. Move all the frag to the second
+ * part, if it is possible. F.e.
+ * this approach is mandatory for TUX,
+ * where splitting is expensive.
+ * 2. Split is accurately. We make this.
+ */
+ get_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+ skb_shinfo(skb1)->frags[0].size -= len - pos;
+ skb_shinfo(skb)->frags[i].size = len - pos;
+ skb_shinfo(skb)->nr_frags++;
+ }
+ k++;
+ } else
+ skb_shinfo(skb)->nr_frags++;
+ pos += size;
+ }
+ skb_shinfo(skb1)->nr_frags = k;
+}
+
+/**
+ * skb_split - Split fragmented skb to two parts at length len.
+ */
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
+{
+ int pos = skb_headlen(skb);
+
+ if (len < pos) /* Split line is inside header. */
+ skb_split_inside_header(skb, skb1, len, pos);
+ else /* Second chunk has no header, nothing to copy. */
+ skb_split_no_header(skb, skb1, len, pos);
+}
+
+void __init skb_init(void)
+{
+ skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ sizeof(struct sk_buff),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!skbuff_head_cache)
+ panic("cannot create skbuff cache");
+}
+
+EXPORT_SYMBOL(___pskb_trim);
+EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(__pskb_pull_tail);
+EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(pskb_expand_head);
+EXPORT_SYMBOL(skb_checksum);
+EXPORT_SYMBOL(skb_clone);
+EXPORT_SYMBOL(skb_clone_fraglist);
+EXPORT_SYMBOL(skb_copy);
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
+EXPORT_SYMBOL(skb_copy_bits);
+EXPORT_SYMBOL(skb_copy_expand);
+EXPORT_SYMBOL(skb_over_panic);
+EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(skb_realloc_headroom);
+EXPORT_SYMBOL(skb_under_panic);
+EXPORT_SYMBOL(skb_dequeue);
+EXPORT_SYMBOL(skb_dequeue_tail);
+EXPORT_SYMBOL(skb_insert);
+EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_head);
+EXPORT_SYMBOL(skb_queue_tail);
+EXPORT_SYMBOL(skb_unlink);
+EXPORT_SYMBOL(skb_append);
+EXPORT_SYMBOL(skb_split);
+EXPORT_SYMBOL(skb_iter_first);
+EXPORT_SYMBOL(skb_iter_next);
+EXPORT_SYMBOL(skb_iter_abort);
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S
index ba26df3a57..d018b06a64 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S
@@ -180,7 +180,7 @@
* Xen guest identifier and loader selection
*/
.section __xen_guest
- .ascii "GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=2.0"
+ .ascii "GUEST_OS=netbsd,GUEST_VER=2.0,XEN_VER=3.0"
.ascii ",LOADER=generic"
#if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
.ascii ",BSD_SYMTAB"
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c
index 801edd9dcb..b27d3c8caf 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c
@@ -412,7 +412,7 @@ i386_proc0_tss_ldt_init()
ltr(lwp0.l_md.md_tss_sel);
lldt(pcb->pcb_ldt_sel);
#else
- HYPERVISOR_fpu_taskswitch();
+ HYPERVISOR_fpu_taskswitch(1);
XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
(void *)pcb->pcb_tss.tss_esp0,
pcb->pcb_tss.tss_ss0, IDXSEL(pcb->pcb_tss.tss_ss0)));
@@ -455,7 +455,7 @@ i386_switch_context(struct pcb *new)
ci = curcpu();
if (ci->ci_fpused) {
- HYPERVISOR_fpu_taskswitch();
+ HYPERVISOR_fpu_taskswitch(1);
ci->ci_fpused = 0;
}
@@ -1430,8 +1430,8 @@ initgdt()
pmap_kenter_pa((vaddr_t)gdt, (uint32_t)gdt - KERNBASE,
VM_PROT_READ);
XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT,
- LAST_RESERVED_GDT_ENTRY + 1));
- if (HYPERVISOR_set_gdt(frames, LAST_RESERVED_GDT_ENTRY + 1))
+ NGDT));
+ if (HYPERVISOR_set_gdt(frames, NGDT))
panic("HYPERVISOR_set_gdt failed!\n");
lgdt_finish();
#endif
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h b/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h
index 035495d4be..683f9640ce 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h
@@ -180,12 +180,16 @@ HYPERVISOR_set_callbacks(
}
static inline int
-HYPERVISOR_fpu_taskswitch(void)
+HYPERVISOR_fpu_taskswitch(int set)
{
int ret;
+ unsigned long ign;
+
__asm__ __volatile__ (
TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
+ : "memory" );
return ret;
}
@@ -280,7 +284,7 @@ HYPERVISOR_set_timer_op(uint64_t timeout)
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_hi), "c" (timeout_lo)
+ : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi)
: "memory");
return ret;
@@ -350,21 +354,6 @@ HYPERVISOR_update_descriptor(unsigned long pa, unsigned long word1,
}
static inline int
-HYPERVISOR_set_fast_trap(int idx)
-{
- int ret;
- unsigned long ign1;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_set_fast_trap), "1" (idx)
- : "memory" );
-
- return ret;
-}
-
-static inline int
HYPERVISOR_dom_mem_op(unsigned int op, unsigned long *extent_list,
unsigned long nr_extents, unsigned int extent_order)
{
@@ -398,7 +387,7 @@ HYPERVISOR_multicall(void *call_list, int nr_calls)
}
static inline int
-HYPERVISOR_update_va_mapping(unsigned long page_nr, unsigned long new_val,
+HYPERVISOR_update_va_mapping(unsigned long va, unsigned long new_val,
unsigned long flags)
{
int ret;
@@ -408,12 +397,12 @@ HYPERVISOR_update_va_mapping(unsigned long page_nr, unsigned long new_val,
TRAP_INSTR
: "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
: "0" (__HYPERVISOR_update_va_mapping),
- "1" (page_nr), "2" (new_val), "3" (flags)
+ "1" (va), "2" (new_val), "3" (flags)
: "memory" );
if (__predict_false(ret < 0))
panic("Failed update VA mapping: %08lx, %08lx, %08lx",
- page_nr, new_val, flags);
+ va, new_val, flags);
return ret;
}
@@ -494,7 +483,7 @@ HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
}
static inline int
-HYPERVISOR_update_va_mapping_otherdomain(unsigned long page_nr,
+HYPERVISOR_update_va_mapping_otherdomain(unsigned long va,
unsigned long new_val, unsigned long flags, domid_t domid)
{
int ret;
@@ -504,7 +493,7 @@ HYPERVISOR_update_va_mapping_otherdomain(unsigned long page_nr,
TRAP_INSTR
: "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
: "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "1" (page_nr), "2" (new_val), "3" (flags), "4" (domid) :
+ "1" (va), "2" (new_val), "3" (flags), "4" (domid) :
"memory" );
return ret;
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c
index 0f275484dd..6eb5331301 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c
@@ -80,7 +80,6 @@ static int irq_bindcount[NR_IRQS];
static int xen_die_handler(void *);
#endif
static int xen_debug_handler(void *);
-static int xen_misdirect_handler(void *);
void
events_default_setup()
@@ -111,10 +110,6 @@ init_events()
event_set_handler(irq, &xen_debug_handler, NULL, IPL_DEBUG);
hypervisor_enable_irq(irq);
- irq = bind_virq_to_irq(VIRQ_MISDIRECT);
- event_set_handler(irq, &xen_misdirect_handler, NULL, IPL_DIE);
- hypervisor_enable_irq(irq);
-
/* This needs to be done early, but after the IRQ subsystem is
* alive. */
ctrl_if_init();
@@ -370,13 +365,3 @@ xen_debug_handler(void *arg)
printf("debug event\n");
return 0;
}
-
-static int
-xen_misdirect_handler(void *arg)
-{
-#if 0
- char *msg = "misdirect\n";
- (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg);
-#endif
- return 0;
-}
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
index 9d8618923c..3c229b644a 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
@@ -580,7 +580,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id)
INVALID_P2M_ENTRY;
rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
- rx_mcl[nr_pfns].args[0] = sc->sc_rx_bufa[id].xb_rx.xbrx_va >> PAGE_SHIFT;
+ rx_mcl[nr_pfns].args[0] = sc->sc_rx_bufa[id].xb_rx.xbrx_va;
rx_mcl[nr_pfns].args[1] = 0;
rx_mcl[nr_pfns].args[2] = 0;
@@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id)
xpq_flush_queue();
/* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
+ rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
/* Give away a batch of pages. */
rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
@@ -612,7 +612,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id)
(void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
- if ( rx_mcl[nr_pfns].args[5] != nr_pfns )
+ if ( rx_mcl[nr_pfns].result != nr_pfns )
panic("Unable to reduce memory reservation\n");
/* Above is a suitable barrier to ensure backend will see requests. */
@@ -679,9 +679,9 @@ xen_network_handler(void *arg)
mmu->val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
- mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va >> PAGE_SHIFT;
+ mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
- mcl->args[2] = UVMF_FLUSH_TLB; // 0;
+ mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0;
mcl++;
xpmap_phys_to_machine_mapping
@@ -872,7 +872,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc)
INVALID_P2M_ENTRY;
rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
- rx_mcl[nr_pfns].args[0] = va >> PAGE_SHIFT;
+ rx_mcl[nr_pfns].args[0] = va;
rx_mcl[nr_pfns].args[1] = 0;
rx_mcl[nr_pfns].args[2] = 0;
@@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc)
xpq_flush_queue();
/* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
+ rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
/* Give away a batch of pages. */
rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
@@ -912,7 +912,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc)
(void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
- if (rx_mcl[nr_pfns].args[5] != nr_pfns)
+ if (rx_mcl[nr_pfns].result != nr_pfns)
panic("Unable to reduce memory reservation\n");
/* Above is a suitable barrier to ensure backend will see requests. */
diff --git a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c
index 23a96241d3..101332691c 100644
--- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c
+++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/xbd.c
@@ -476,17 +476,11 @@ get_xbda(vdisk_t *xd)
case XEN_IDE7_MAJOR:
case XEN_IDE8_MAJOR:
case XEN_IDE9_MAJOR:
- switch (VDISK_TYPE(xd->info)) {
- case VDISK_TYPE_CDROM:
+ if (xd->info & VDISK_CDROM)
return &cd_ata;
- case VDISK_TYPE_DISK:
- if (xd->capacity == 0)
- return NULL;
- return &wd_ata;
- default:
+ if (xd->capacity == 0)
return NULL;
- }
- break;
+ return &wd_ata;
#endif
default:
if (xd->capacity == 0)
diff --git a/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch b/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch
new file mode 100644
index 0000000000..ec39143743
--- /dev/null
+++ b/patches/linux-2.6.11/i386-cpu-hotplug-updated-for-mm.patch
@@ -0,0 +1,656 @@
+
+From: Zwane Mwaikambo <zwane@linuxpower.ca>
+
+Find attached the i386 cpu hotplug patch updated for Ingo's latest round of
+goodies. In order to avoid dumping cpu hotplug code into kernel/irq/* i
+dropped the cpu_online check in do_IRQ() by modifying fixup_irqs(). The
+difference being that on cpu offline, fixup_irqs() is called before we
+clear the cpu from cpu_online_map and a long delay in order to ensure that
+we never have any queued external interrupts on the APICs. Due to my usual
+test victims being in boxes a continent away this hasn't been tested, but
+i'll cover bug reports (nudge, Nathan! ;)
+
+1) Add CONFIG_HOTPLUG_CPU
+2) disable local APIC timer on dead cpus.
+3) Disable preempt around irq balancing to prevent CPUs going down.
+4) Print irq stats for all possible cpus.
+5) Debugging check for interrupts on offline cpus.
+6) Hacky fixup_irqs() to redirect irqs when cpus go off/online.
+7) play_dead() for offline cpus to spin inside.
+8) Handle offline cpus set in flush_tlb_others().
+9) Grab lock earlier in smp_call_function() to prevent CPUs going down.
+10) Implement __cpu_disable() and __cpu_die().
+11) Enable local interrupts in cpu_enable() after fixup_irqs()
+12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus.
+13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline.
+
+Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
+DESC
+ppc64: fix hotplug cpu
+EDESC
+From: Zwane Mwaikambo <zwane@fsmlabs.com>
+
+I seem to have broken this when I moved the clearing of the dying cpu to
+arch specific code.
+
+Signed-off-by: Zwane Mwaikambo <zwane@fsmlabs.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ 25-akpm/arch/i386/Kconfig | 9 ++
+ 25-akpm/arch/i386/kernel/apic.c | 3
+ 25-akpm/arch/i386/kernel/io_apic.c | 2
+ 25-akpm/arch/i386/kernel/irq.c | 66 +++++++++++++++++----
+ 25-akpm/arch/i386/kernel/msr.c | 2
+ 25-akpm/arch/i386/kernel/process.c | 35 +++++++++++
+ 25-akpm/arch/i386/kernel/smp.c | 25 +++++---
+ 25-akpm/arch/i386/kernel/smpboot.c | 98 ++++++++++++++++++++++++++++++--
+ 25-akpm/arch/i386/kernel/traps.c | 8 ++
+ 25-akpm/arch/ia64/kernel/smpboot.c | 3
+ 25-akpm/arch/ppc64/kernel/pSeries_smp.c | 5 +
+ 25-akpm/arch/s390/kernel/smp.c | 4 -
+ 25-akpm/include/asm-i386/cpu.h | 2
+ 25-akpm/include/asm-i386/irq.h | 4 +
+ 25-akpm/include/asm-i386/smp.h | 3
+ 25-akpm/kernel/cpu.c | 14 +---
+ arch/ppc64/kernel/smp.c | 0
+ 17 files changed, 242 insertions(+), 41 deletions(-)
+
+diff -puN arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm arch/i386/Kconfig
+--- 25/arch/i386/Kconfig~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/Kconfig 2005-02-23 02:20:06.000000000 -0800
+@@ -1205,6 +1205,15 @@ config SCx200
+ This support is also available as a module. If compiled as a
+ module, it will be called scx200.
+
++config HOTPLUG_CPU
++ bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
++ depends on SMP && HOTPLUG && EXPERIMENTAL
++ ---help---
++ Say Y here to experiment with turning CPUs off and on. CPUs
++ can be controlled through /sys/devices/system/cpu.
++
++ Say N.
++
+ source "drivers/pcmcia/Kconfig"
+
+ source "drivers/pci/hotplug/Kconfig"
+diff -puN arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/apic.c
+--- 25/arch/i386/kernel/apic.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/apic.c 2005-02-23 02:20:06.000000000 -0800
+@@ -26,6 +26,7 @@
+ #include <linux/mc146818rtc.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/sysdev.h>
++#include <linux/cpu.h>
+
+ #include <asm/atomic.h>
+ #include <asm/smp.h>
+@@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(v
+ setup_APIC_timer(calibration_result);
+ }
+
+-void __init disable_APIC_timer(void)
++void __devinit disable_APIC_timer(void)
+ {
+ if (using_apic_timer) {
+ unsigned long v;
+diff -puN arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/io_apic.c
+--- 25/arch/i386/kernel/io_apic.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/io_apic.c 2005-02-23 02:20:06.000000000 -0800
+@@ -576,9 +576,11 @@ static int balanced_irq(void *unused)
+ try_to_freeze(PF_FREEZE);
+ if (time_after(jiffies,
+ prev_balance_time+balanced_irq_interval)) {
++ preempt_disable();
+ do_irq_balance();
+ prev_balance_time = jiffies;
+ time_remaining = balanced_irq_interval;
++ preempt_enable();
+ }
+ }
+ return 0;
+diff -puN arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/irq.c
+--- 25/arch/i386/kernel/irq.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/irq.c 2005-02-23 02:20:06.000000000 -0800
+@@ -15,6 +15,9 @@
+ #include <linux/seq_file.h>
+ #include <linux/interrupt.h>
+ #include <linux/kernel_stat.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <linux/delay.h>
+
+ #ifndef CONFIG_X86_LOCAL_APIC
+ /*
+@@ -209,9 +212,8 @@ int show_interrupts(struct seq_file *p,
+
+ if (i == 0) {
+ seq_printf(p, " ");
+- for (j=0; j<NR_CPUS; j++)
+- if (cpu_online(j))
+- seq_printf(p, "CPU%d ",j);
++ for_each_cpu(j)
++ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+@@ -224,9 +226,8 @@ int show_interrupts(struct seq_file *p,
+ #ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+- for (j = 0; j < NR_CPUS; j++)
+- if (cpu_online(j))
+- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++ for_each_cpu(j)
++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %s", action->name);
+@@ -239,16 +240,13 @@ skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+- for (j = 0; j < NR_CPUS; j++)
+- if (cpu_online(j))
+- seq_printf(p, "%10u ", nmi_count(j));
++ for_each_cpu(j)
++ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+ #ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+- for (j = 0; j < NR_CPUS; j++)
+- if (cpu_online(j))
+- seq_printf(p, "%10u ",
+- irq_stat[j].apic_timer_irqs);
++ for_each_cpu(j)
++ seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+ #endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+@@ -258,3 +256,45 @@ skip:
+ }
+ return 0;
+ }
++
++#ifdef CONFIG_HOTPLUG_CPU
++#include <mach_apic.h>
++
++void fixup_irqs(cpumask_t map)
++{
++ unsigned int irq;
++ static int warned;
++
++ for (irq = 0; irq < NR_IRQS; irq++) {
++ cpumask_t mask;
++ if (irq == 2)
++ continue;
++
++ cpus_and(mask, irq_affinity[irq], map);
++ if (any_online_cpu(mask) == NR_CPUS) {
++ printk("Breaking affinity for irq %i\n", irq);
++ mask = map;
++ }
++ if (irq_desc[irq].handler->set_affinity)
++ irq_desc[irq].handler->set_affinity(irq, mask);
++ else if (irq_desc[irq].action && !(warned++))
++ printk("Cannot set affinity for irq %i\n", irq);
++ }
++
++#if 0
++ barrier();
++ /* Ingo Molnar says: "after the IO-APIC masks have been redirected
++ [note the nop - the interrupt-enable boundary on x86 is two
++ instructions from sti] - to flush out pending hardirqs and
++ IPIs. After this point nothing is supposed to reach this CPU." */
++ __asm__ __volatile__("sti; nop; cli");
++ barrier();
++#else
++ /* That doesn't seem sufficient. Give it 1ms. */
++ local_irq_enable();
++ mdelay(1);
++ local_irq_disable();
++#endif
++}
++#endif
++
+diff -puN arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/msr.c
+--- 25/arch/i386/kernel/msr.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/msr.c 2005-02-23 02:20:06.000000000 -0800
+@@ -260,7 +260,7 @@ static struct file_operations msr_fops =
+ .open = msr_open,
+ };
+
+-static int msr_class_simple_device_add(int i)
++static int __devinit msr_class_simple_device_add(int i)
+ {
+ int err = 0;
+ struct class_device *class_err;
+diff -puN arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/process.c
+--- 25/arch/i386/kernel/process.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/process.c 2005-02-23 02:20:06.000000000 -0800
+@@ -13,6 +13,7 @@
+
+ #include <stdarg.h>
+
++#include <linux/cpu.h>
+ #include <linux/errno.h>
+ #include <linux/sched.h>
+ #include <linux/fs.h>
+@@ -55,6 +56,9 @@
+ #include <linux/irq.h>
+ #include <linux/err.h>
+
++#include <asm/tlbflush.h>
++#include <asm/cpu.h>
++
+ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+ int hlt_counter;
+@@ -139,6 +143,34 @@ static void poll_idle (void)
+ }
+ }
+
++#ifdef CONFIG_HOTPLUG_CPU
++#include <asm/nmi.h>
++/* We don't actually take CPU down, just spin without interrupts. */
++static inline void play_dead(void)
++{
++ /* Ack it */
++ __get_cpu_var(cpu_state) = CPU_DEAD;
++
++ /* We shouldn't have to disable interrupts while dead, but
++ * some interrupts just don't seem to go away, and this makes
++ * it "work" for testing purposes. */
++ /* Death loop */
++ while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
++ cpu_relax();
++
++ local_irq_disable();
++ __flush_tlb_all();
++ cpu_set(smp_processor_id(), cpu_online_map);
++ enable_APIC_timer();
++ local_irq_enable();
++}
++#else
++static inline void play_dead(void)
++{
++ BUG();
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
+ /*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+@@ -162,6 +194,9 @@ void cpu_idle (void)
+ if (!idle)
+ idle = default_idle;
+
++ if (cpu_is_offline(cpu))
++ play_dead();
++
+ irq_stat[cpu].idle_timestamp = jiffies;
+ idle();
+ }
+diff -puN arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smpboot.c
+--- 25/arch/i386/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/smpboot.c 2005-02-23 02:20:06.000000000 -0800
+@@ -44,6 +44,9 @@
+ #include <linux/smp_lock.h>
+ #include <linux/irq.h>
+ #include <linux/bootmem.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <linux/percpu.h>
+
+ #include <linux/delay.h>
+ #include <linux/mc146818rtc.h>
+@@ -89,6 +92,9 @@ extern unsigned char trampoline_end [];
+ static unsigned char *trampoline_base;
+ static int trampoline_exec;
+
++/* State of each CPU. */
++DEFINE_PER_CPU(int, cpu_state) = { 0 };
++
+ /*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+@@ -1095,6 +1101,9 @@ static void __init smp_boot_cpus(unsigne
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
++ smp_commenced_mask = cpumask_of_cpu(0);
++ cpu_callin_map = cpumask_of_cpu(0);
++ mb();
+ smp_boot_cpus(max_cpus);
+ }
+
+@@ -1104,20 +1113,99 @@ void __devinit smp_prepare_boot_cpu(void
+ cpu_set(smp_processor_id(), cpu_callout_map);
+ }
+
+-int __devinit __cpu_up(unsigned int cpu)
++#ifdef CONFIG_HOTPLUG_CPU
++
++/* must be called with the cpucontrol mutex held */
++static int __devinit cpu_enable(unsigned int cpu)
+ {
+- /* This only works at boot for x86. See "rewrite" above. */
+- if (cpu_isset(cpu, smp_commenced_mask)) {
+- local_irq_enable();
+- return -ENOSYS;
++ /* get the target out of its holding state */
++ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
++ wmb();
++
++ /* wait for the processor to ack it. timeout? */
++ while (!cpu_online(cpu))
++ cpu_relax();
++
++ fixup_irqs(cpu_online_map);
++ /* counter the disable in fixup_irqs() */
++ local_irq_enable();
++ return 0;
++}
++
++int __cpu_disable(void)
++{
++ cpumask_t map = cpu_online_map;
++ int cpu = smp_processor_id();
++
++ /*
++ * Perhaps use cpufreq to drop frequency, but that could go
++ * into generic code.
++ *
++ * We won't take down the boot processor on i386 due to some
++ * interrupts only being able to be serviced by the BSP.
++ * Especially so if we're not using an IOAPIC -zwane
++ */
++ if (cpu == 0)
++ return -EBUSY;
++
++ /* We enable the timer again on the exit path of the death loop */
++ disable_APIC_timer();
++ /* Allow any queued timer interrupts to get serviced */
++ local_irq_enable();
++ mdelay(1);
++ local_irq_disable();
++
++ cpu_clear(cpu, map);
++ fixup_irqs(map);
++ /* It's now safe to remove this processor from the online map */
++ cpu_clear(cpu, cpu_online_map);
++ return 0;
++}
++
++void __cpu_die(unsigned int cpu)
++{
++ /* We don't do anything here: idle task is faking death itself. */
++ unsigned int i;
++
++ for (i = 0; i < 10; i++) {
++ /* They ack this in play_dead by setting CPU_DEAD */
++ if (per_cpu(cpu_state, cpu) == CPU_DEAD)
++ return;
++ current->state = TASK_UNINTERRUPTIBLE;
++ schedule_timeout(HZ/10);
+ }
++ printk(KERN_ERR "CPU %u didn't die...\n", cpu);
++}
++#else /* ... !CONFIG_HOTPLUG_CPU */
++int __cpu_disable(void)
++{
++ return -ENOSYS;
++}
+
++void __cpu_die(unsigned int cpu)
++{
++ /* We said "no" in __cpu_disable */
++ BUG();
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
++int __devinit __cpu_up(unsigned int cpu)
++{
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
++ printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
+ local_irq_enable();
+ return -EIO;
+ }
+
++#ifdef CONFIG_HOTPLUG_CPU
++ /* Already up, and in cpu_quiescent now? */
++ if (cpu_isset(cpu, smp_commenced_mask)) {
++ cpu_enable(cpu);
++ return 0;
++ }
++#endif
++
+ local_irq_enable();
+ /* Unleash the CPU! */
+ cpu_set(cpu, smp_commenced_mask);
+diff -puN arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/smp.c
+--- 25/arch/i386/kernel/smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/smp.c 2005-02-23 02:20:06.000000000 -0800
+@@ -19,6 +19,7 @@
+ #include <linux/mc146818rtc.h>
+ #include <linux/cache.h>
+ #include <linux/interrupt.h>
++#include <linux/cpu.h>
+
+ #include <asm/mtrr.h>
+ #include <asm/tlbflush.h>
+@@ -163,7 +164,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
+ unsigned long flags;
+
+ local_irq_save(flags);
+-
++ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+ /*
+ * Wait for idle.
+ */
+@@ -345,21 +346,21 @@ out:
+ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+ unsigned long va)
+ {
+- cpumask_t tmp;
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+- * - we do not send IPIs to not-yet booted CPUs.
+ * - current CPU must not be in mask
+ * - mask must exist :)
+ */
+ BUG_ON(cpus_empty(cpumask));
+-
+- cpus_and(tmp, cpumask, cpu_online_map);
+- BUG_ON(!cpus_equal(cpumask, tmp));
+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+ BUG_ON(!mm);
+
++ /* If a CPU which we ran on has gone down, OK. */
++ cpus_and(cpumask, cpumask, cpu_online_map);
++ if (cpus_empty(cpumask))
++ return;
++
+ /*
+ * i'm not happy about this global shared spinlock in the
+ * MM hot path, but we'll see how contended it is.
+@@ -484,6 +485,7 @@ void smp_send_nmi_allbutself(void)
+ */
+ void smp_send_reschedule(int cpu)
+ {
++ WARN_ON(cpu_is_offline(cpu));
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ }
+
+@@ -524,10 +526,16 @@ int smp_call_function (void (*func) (voi
+ */
+ {
+ struct call_data_struct data;
+- int cpus = num_online_cpus()-1;
++ int cpus;
+
+- if (!cpus)
++ /* Holding any lock stops cpus from going down. */
++ spin_lock(&call_lock);
++ cpus = num_online_cpus()-1;
++
++ if (!cpus) {
++ spin_unlock(&call_lock);
+ return 0;
++ }
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+@@ -539,7 +547,6 @@ int smp_call_function (void (*func) (voi
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+- spin_lock(&call_lock);
+ call_data = &data;
+ mb();
+
+diff -puN arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm arch/i386/kernel/traps.c
+--- 25/arch/i386/kernel/traps.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/i386/kernel/traps.c 2005-02-23 02:20:06.000000000 -0800
+@@ -669,6 +669,14 @@ fastcall void do_nmi(struct pt_regs * re
+ nmi_enter();
+
+ cpu = smp_processor_id();
++
++#ifdef CONFIG_HOTPLUG_CPU
++ if (!cpu_online(cpu)) {
++ nmi_exit();
++ return;
++ }
++#endif
++
+ ++nmi_count(cpu);
+
+ if (!nmi_callback(regs, cpu))
+diff -puN arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm arch/ia64/kernel/smpboot.c
+--- 25/arch/ia64/kernel/smpboot.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/ia64/kernel/smpboot.c 2005-02-23 02:20:06.000000000 -0800
+@@ -590,9 +590,10 @@ int __cpu_disable(void)
+ if (cpu == 0)
+ return -EBUSY;
+
++ cpu_clear(cpu, cpu_online_map);
+ fixup_irqs();
+ local_flush_tlb_all();
+- printk ("Disabled cpu %u\n", smp_processor_id());
++ printk("Disabled cpu %u\n", cpu);
+ return 0;
+ }
+
+diff -puN arch/ppc64/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/smp.c
+diff -puN arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm arch/s390/kernel/smp.c
+--- 25/arch/s390/kernel/smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/arch/s390/kernel/smp.c 2005-02-23 02:20:06.000000000 -0800
+@@ -679,12 +679,14 @@ __cpu_disable(void)
+ {
+ unsigned long flags;
+ ec_creg_mask_parms cr_parms;
++ int cpu = smp_processor_id();
+
+ spin_lock_irqsave(&smp_reserve_lock, flags);
+- if (smp_cpu_reserved[smp_processor_id()] != 0) {
++ if (smp_cpu_reserved[cpu] != 0) {
+ spin_unlock_irqrestore(&smp_reserve_lock, flags);
+ return -EBUSY;
+ }
++ cpu_clear(cpu, cpu_online_map);
+
+ #ifdef CONFIG_PFAULT
+ /* Disable pfault pseudo page faults on this cpu. */
+diff -puN include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/cpu.h
+--- 25/include/asm-i386/cpu.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/include/asm-i386/cpu.h 2005-02-23 02:20:06.000000000 -0800
+@@ -5,6 +5,7 @@
+ #include <linux/cpu.h>
+ #include <linux/topology.h>
+ #include <linux/nodemask.h>
++#include <linux/percpu.h>
+
+ #include <asm/node.h>
+
+@@ -17,4 +18,5 @@ extern int arch_register_cpu(int num);
+ extern void arch_unregister_cpu(int);
+ #endif
+
++DECLARE_PER_CPU(int, cpu_state);
+ #endif /* _ASM_I386_CPU_H_ */
+diff -puN include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/irq.h
+--- 25/include/asm-i386/irq.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/include/asm-i386/irq.h 2005-02-23 02:20:06.000000000 -0800
+@@ -38,4 +38,8 @@ extern void release_vm86_irqs(struct tas
+ extern int irqbalance_disable(char *str);
+ #endif
+
++#ifdef CONFIG_HOTPLUG_CPU
++extern void fixup_irqs(cpumask_t map);
++#endif
++
+ #endif /* _ASM_IRQ_H */
+diff -puN include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm include/asm-i386/smp.h
+--- 25/include/asm-i386/smp.h~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/include/asm-i386/smp.h 2005-02-23 02:20:06.000000000 -0800
+@@ -85,6 +85,9 @@ static __inline int logical_smp_processo
+ }
+
+ #endif
++
++extern int __cpu_disable(void);
++extern void __cpu_die(unsigned int cpu);
+ #endif /* !__ASSEMBLY__ */
+
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+diff -puN kernel/cpu.c~i386-cpu-hotplug-updated-for-mm kernel/cpu.c
+--- 25/kernel/cpu.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:06.000000000 -0800
++++ 25-akpm/kernel/cpu.c 2005-02-23 02:20:06.000000000 -0800
+@@ -63,19 +63,15 @@ static int take_cpu_down(void *unused)
+ {
+ int err;
+
+- /* Take offline: makes arch_cpu_down somewhat easier. */
+- cpu_clear(smp_processor_id(), cpu_online_map);
+-
+ /* Ensure this CPU doesn't handle any more interrupts. */
+ err = __cpu_disable();
+ if (err < 0)
+- cpu_set(smp_processor_id(), cpu_online_map);
+- else
+- /* Force idle task to run as soon as we yield: it should
+- immediately notice cpu is offline and die quickly. */
+- sched_idle_next();
++ return err;
+
+- return err;
++ /* Force idle task to run as soon as we yield: it should
++ immediately notice cpu is offline and die quickly. */
++ sched_idle_next();
++ return 0;
+ }
+
+ int cpu_down(unsigned int cpu)
+diff -puN arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm arch/ppc64/kernel/pSeries_smp.c
+--- 25/arch/ppc64/kernel/pSeries_smp.c~i386-cpu-hotplug-updated-for-mm 2005-02-23 02:20:08.000000000 -0800
++++ 25-akpm/arch/ppc64/kernel/pSeries_smp.c 2005-02-23 02:20:08.000000000 -0800
+@@ -86,10 +86,13 @@ static int query_cpu_stopped(unsigned in
+
+ int pSeries_cpu_disable(void)
+ {
++ int cpu = smp_processor_id();
++
++ cpu_clear(cpu, cpu_online_map);
+ systemcfg->processorCount--;
+
+ /*fix boot_cpuid here*/
+- if (smp_processor_id() == boot_cpuid)
++ if (cpu == boot_cpuid)
+ boot_cpuid = any_online_cpu(cpu_online_map);
+
+ /* FIXME: abstract this to not be platform specific later on */
+_
diff --git a/patches/linux-2.6.11/net-csum.patch b/patches/linux-2.6.11/net-csum.patch
new file mode 100644
index 0000000000..115cc1ed13
--- /dev/null
+++ b/patches/linux-2.6.11/net-csum.patch
@@ -0,0 +1,22 @@
+diff -ur linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+--- linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-05-27 11:47:48 +01:00
++++ linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2005-05-27 11:48:07 +01:00
+@@ -803,6 +803,7 @@
+ */
+ /* FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
++ && skb->ip_summed != CHECKSUM_UNNECESSARY
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
+diff -ur linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+--- linux-2.6.11/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2005-05-27 11:47:48 +01:00
++++ linux-2.6.11-csum/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2005-05-27 11:48:07 +01:00
+@@ -120,6 +120,7 @@
+ * and moreover root might send raw packets.
+ * FIXME: Source route IP option packets --RR */
+ if (hooknum == NF_IP_PRE_ROUTING
++ && skb->ip_summed != CHECKSUM_UNNECESSARY
+ && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
+ skb->ip_summed == CHECKSUM_HW ? skb->csum
+ : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
diff --git a/patches/linux-2.6.11/nettel.patch b/patches/linux-2.6.11/nettel.patch
deleted file mode 100644
index 319023e54b..0000000000
--- a/patches/linux-2.6.11/nettel.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-diff -rup pristine-linux-2.6.11/drivers/mtd/maps/nettel.c linux-2.6.11/drivers/mtd/maps/nettel.c
---- pristine-linux-2.6.11/drivers/mtd/maps/nettel.c 2005-03-02 07:37:50.000000000 +0000
-+++ linux-2.6.11/drivers/mtd/maps/nettel.c 2005-03-11 00:52:55.635506733 +0000
-@@ -270,7 +270,7 @@ int __init nettel_init(void)
- maxsize = AMD_WINDOW_MAXSIZE;
-
- *amdpar = SC520_PAR(SC520_PAR_BOOTCS, amdaddr, maxsize);
-- __asm__ ("wbinvd");
-+ wbinvd();
-
- nettel_amd_map.phys = amdaddr;
- nettel_amd_map.virt = ioremap_nocache(amdaddr, maxsize);
-@@ -380,7 +380,7 @@ int __init nettel_init(void)
- */
- intel1addr = intel0addr + intel0size;
- *intel1par = SC520_PAR(intel1cs, intel1addr, maxsize);
-- __asm__ ("wbinvd");
-+ wbinvd();
-
- maxsize += intel0size;
-
-@@ -405,7 +405,7 @@ int __init nettel_init(void)
- intel1size = intel_mtd->size - intel0size;
- if (intel1size > 0) {
- *intel1par = SC520_PAR(intel1cs, intel1addr, intel1size);
-- __asm__ ("wbinvd");
-+ wbinvd();
- } else {
- *intel1par = 0;
- }
diff --git a/patches/linux-2.6.11/x86_64-linux.patch b/patches/linux-2.6.11/x86_64-linux.patch
new file mode 100644
index 0000000000..57d4f07a06
--- /dev/null
+++ b/patches/linux-2.6.11/x86_64-linux.patch
@@ -0,0 +1,68 @@
+diff -urN linux-2.6.10-orig/include/asm-x86_64/hw_irq.h linux-2.6.10/include/asm-x86_64/hw_irq.h
+--- linux-2.6.10-orig/include/asm-x86_64/hw_irq.h 2005-01-06 00:34:38.000000000 -0500
++++ linux-2.6.10/include/asm-x86_64/hw_irq.h 2005-02-25 17:45:37.181518088 -0500
+@@ -48,6 +48,7 @@
+ *
+ * Vectors 0xf0-0xf9 are free (reserved for future Linux use).
+ */
++#ifndef CONFIG_XEN
+ #define SPURIOUS_APIC_VECTOR 0xff
+ #define ERROR_APIC_VECTOR 0xfe
+ #define INVALIDATE_TLB_VECTOR 0xfd
+@@ -57,7 +58,7 @@
+ #define KDB_VECTOR 0xf9
+
+ #define THERMAL_APIC_VECTOR 0xf0
+-
++#endif
+
+ /*
+ * Local APIC timer IRQ vector is on a different priority level,
+diff -urN linux-2.6.10-orig/include/asm-x86_64/irq.h linux-2.6.10/include/asm-x86_64/irq.h
+--- linux-2.6.10-orig/include/asm-x86_64/irq.h 2005-01-06 00:34:38.000000000 -0500
++++ linux-2.6.10/include/asm-x86_64/irq.h 2005-02-25 17:45:37.181518088 -0500
+@@ -10,6 +10,9 @@
+ * <tomsoft@informatik.tu-chemnitz.de>
+ */
+
++#ifdef CONFIG_XEN
++#include "irq_vectors.h"
++#endif
+ #define TIMER_IRQ 0
+
+ /*
+@@ -22,6 +25,7 @@
+ * the usable vector space is 0x20-0xff (224 vectors)
+ */
+
++#ifndef CONFIG_XEN
+ /*
+ * The maximum number of vectors supported by x86_64 processors
+ * is limited to 256. For processors other than x86_64, NR_VECTORS
+@@ -38,6 +42,7 @@
+ #define NR_IRQS 224
+ #define NR_IRQ_VECTORS 1024
+ #endif
++#endif
+
+ static __inline__ int irq_canonicalize(int irq)
+ {
+diff -urN linux-2.6.10-orig/include/asm-x86_64/posix_types.h linux-2.6.10/include/asm-x86_64/posix_types.h
+--- linux-2.6.10-orig/include/asm-x86_64/posix_types.h 2004-10-18 17:55:29.000000000 -0400
++++ linux-2.6.10/include/asm-x86_64/posix_types.h 2005-02-25 17:45:37.183517784 -0500
+@@ -6,7 +6,7 @@
+ * be a little careful about namespace pollution etc. Also, we cannot
+ * assume GCC is being used.
+ */
+-
++#ifndef __ASSEMBLY__
+ typedef unsigned long __kernel_ino_t;
+ typedef unsigned int __kernel_mode_t;
+ typedef unsigned long __kernel_nlink_t;
+@@ -115,5 +115,5 @@
+ }
+
+ #endif /* defined(__KERNEL__) */
+-
++#endif
+ #endif
diff --git a/tools/Makefile b/tools/Makefile
index 0308c7512f..b122ba465a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -2,33 +2,50 @@ XEN_ROOT = ../
include $(XEN_ROOT)/tools/Rules.mk
SUBDIRS :=
-SUBDIRS += libxutil
SUBDIRS += libxc
+SUBDIRS += xenstore
SUBDIRS += misc
SUBDIRS += examples
SUBDIRS += xentrace
SUBDIRS += python
-SUBDIRS += xfrd
+SUBDIRS += xcs
+SUBDIRS += xcutils
+SUBDIRS += xenstore
+SUBDIRS += pygrub
+SUBDIRS += firmware
-.PHONY: all install clean check check_clean
+.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean
all: check
@set -e; for subdir in $(SUBDIRS); do \
$(MAKE) -C $$subdir $@; \
done
+ $(MAKE) ioemu
install: check
@set -e; for subdir in $(SUBDIRS); do \
$(MAKE) -C $$subdir $@; \
done
+ $(MAKE) ioemuinstall
clean: check_clean
@set -e; for subdir in $(SUBDIRS); do \
$(MAKE) -C $$subdir $@; \
done
+ $(MAKE) ioemuclean
check:
$(MAKE) -C check
check_clean:
$(MAKE) -C check clean
+
+ifndef XEN_NO_IOEMU
+ioemu ioemuinstall ioemuclean:
+ [ -f ioemu/config-host.h ] || \
+ (cd ioemu; ./configure --prefix=usr)
+ $(MAKE) -C ioemu $(patsubst ioemu%,%,$@)
+else
+ioemu ioemuinstall ioemuclean:
+endif
+
diff --git a/tools/Rules.mk b/tools/Rules.mk
index eb89b3be05..5b79d37ff7 100644
--- a/tools/Rules.mk
+++ b/tools/Rules.mk
@@ -1,24 +1,27 @@
# -*- mode: Makefile; -*-
+include $(XEN_ROOT)/Config.mk
+
XEN_XC = $(XEN_ROOT)/tools/python/xen/lowlevel/xc
XEN_LIBXC = $(XEN_ROOT)/tools/libxc
-XEN_LIBXUTIL = $(XEN_ROOT)/tools/libxutil
-
-COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/)
-TARGET_ARCH ?= $(COMPILE_ARCH)
-ifeq ($(TARGET_ARCH),x86_32)
+ifeq ($(XEN_TARGET_ARCH),x86_32)
CFLAGS += -m32 -march=i686
LDFLAGS += -m elf_i386
endif
-ifeq ($(TARGET_ARCH),x86_64)
+ifeq ($(XEN_TARGET_ARCH),x86_64)
CFLAGS += -m64
LDFLAGS += -m elf_x86_64
endif
+X11_LDPATH = -L/usr/X11R6/$(LIBDIR)
+
%.opic: %.c
$(CC) $(CPPFLAGS) -DPIC $(CFLAGS) -fPIC -c -o $@ $<
%.o: %.c
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
+
+%.o: %.cc
+ $(CC) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile
new file mode 100644
index 0000000000..6ee79ada9c
--- /dev/null
+++ b/tools/blktap/Makefile
@@ -0,0 +1,144 @@
+MAJOR = 2.0
+MINOR = 0
+SONAME = libblktap.so.$(MAJOR)
+
+CC = gcc
+
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+BLKTAP_INSTALL_DIR = /usr/sbin
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+INCLUDES +=
+
+LIBS := -lpthread -lz
+
+SRCS :=
+SRCS += blktaplib.c
+
+PLX_SRCS :=
+PLX_SRCS += vdi.c
+PLX_SRCS += radix.c
+PLX_SRCS += snaplog.c
+PLX_SRCS += blockstore.c
+PLX_SRCS += block-async.c
+PLX_SRCS += requests-async.c
+VDI_SRCS := $(PLX_SRCS)
+PLX_SRCS += parallax.c
+
+VDI_TOOLS :=
+VDI_TOOLS += vdi_create
+VDI_TOOLS += vdi_list
+VDI_TOOLS += vdi_snap
+VDI_TOOLS += vdi_snap_list
+VDI_TOOLS += vdi_snap_delete
+VDI_TOOLS += vdi_fill
+VDI_TOOLS += vdi_tree
+VDI_TOOLS += vdi_validate
+
+CFLAGS += -Wall
+CFLAGS += -Werror
+CFLAGS += -Wno-unused
+#CFLAGS += -O3
+CFLAGS += -g3
+CFLAGS += -fno-strict-aliasing
+CFLAGS += -I $(XEN_LIBXC)
+CFLAGS += $(INCLUDES) -I.
+CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+IBINS = blkdump parallax $(VDI_TOOLS)
+
+LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
+
+all: mk-symlinks blkdump $(VDI_TOOLS) parallax blockstored
+ $(MAKE) $(LIB)
+
+LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
+mk-symlinks:
+ [ -e xen/linux ] || mkdir -p xen/linux
+ [ -e xen/io ] || mkdir -p xen/io
+ ( cd xen >/dev/null ; \
+ ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
+ ( cd xen/io >/dev/null ; \
+ ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
+ ( cd xen/linux >/dev/null ; \
+ ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . )
+
+install: all
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
+ $(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
+
+clean:
+ rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax vdi_unittest
+
+rpm: all
+ rm -rf staging
+ mkdir staging
+ mkdir staging/i386
+ rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
+ --define "_rpmdir$$PWD/staging" -bb rpm.spec
+ mv staging/i386/*.rpm .
+ rm -rf staging
+
+libblktap.so:
+ ln -sf libblktap.so.$(MAJOR) $@
+libblktap.so.$(MAJOR):
+ ln -sf libblktap.so.$(MAJOR).$(MINOR) $@
+libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
+ $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ $(LIBS)
+
+blkdump: $(LIB)
+ $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c
+
+parallax: $(LIB) $(PLX_SRCS)
+ $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L. -lblktap $(LIBS) $(PLX_SRCS)
+
+vdi_list: $(LIB) vdi_list.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c $(LIBS) $(VDI_SRCS)
+
+vdi_create: $(LIB) vdi_create.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_create vdi_create.c $(LIBS) $(VDI_SRCS)
+
+vdi_snap: $(LIB) vdi_snap.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_snap vdi_snap.c $(LIBS) $(VDI_SRCS)
+
+vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(LIBS) $(VDI_SRCS)
+
+vdi_snap_delete: $(LIB) vdi_snap_delete.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_snap_delete vdi_snap_delete.c $(LIBS) $(VDI_SRCS)
+
+vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(LIBS) $(VDI_SRCS)
+
+vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_fill vdi_fill.c $(LIBS) $(VDI_SRCS)
+
+vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(LIBS) $(VDI_SRCS)
+
+vdi_unittest: $(LIB) vdi_unittest.c $(VDI_SRCS)
+ $(CC) $(CFLAGS) -g3 -o vdi_unittest vdi_unittest.c $(LIBS) $(VDI_SRCS)
+
+blockstored: blockstored.c
+ $(CC) $(CFLAGS) -g3 -o blockstored $(LIBS) blockstored.c
+bstest: bstest.c blockstore.c
+ $(CC) $(CFLAGS) -g3 -o bstest bstest.c $(LIBS) blockstore.c
+
+.PHONY: TAGS clean install mk-symlinks rpm
+TAGS:
+ etags -t $(SRCS) *.h
+
+-include $(DEPS)
+
diff --git a/tools/blktap/README b/tools/blktap/README
new file mode 100644
index 0000000000..cca9a28fd9
--- /dev/null
+++ b/tools/blktap/README
@@ -0,0 +1,149 @@
+Block Tap User-level Interfaces
+Andrew Warfield
+andrew.warfield@cl.cam.ac.uk
+February 8, 2005
+
+NOTE #1: The blktap is _experimental_ code. It works for me. Your
+mileage may vary. Don't use it for anything important. Please. ;)
+
+NOTE #2: All of the interfaces here are likely to change. This is all
+early code, and I am checking it in because others want to play with
+it. If you use it for anything, please let me know!
+
+Overview:
+---------
+
+This directory contains a library and set of example applications for
+the block tap device. The block tap hooks into the split block device
+interfaces above Xen allowing them to be extended. This extension can
+be done in userspace with the help of a library.
+
+The tap can be installed either as an interposition domain in between
+a frontend and backend driver pair, or as a terminating backend, in
+which case it is responsible for serving all requests itself.
+
+There are two reasons that you might want to use the tap,
+corresponding to these configurations:
+
+ 1. To examine or modify a stream of block requests while they are
+ in-flight (e.g. to encrypt data, or add data-driven watchpoints)
+
+ 2. To prototype a new backend driver, serving requests from the tap
+ rather than passing them along to the XenLinux blkback driver.
+ (e.g. to forward block requests to a remote host)
+
+
+Interface:
+----------
+
+At the moment, the tap interface is similar in spirit to that of the
+Linux netfilter. Requests are messages from a client (frontend)
+domain to a disk (backend) domain. Responses are messages travelling
+back, acknowledging the completion of a request. the library allows
+chains of functions to be attached to these events. In addition,
+hooks may be attached to handle control messages, which signify things
+like connections from new domains.
+
+At present the control messages especially expose a lot of the
+underlying driver interfaces. This may change in the future in order
+to simplify writing hooks.
+
+Here are the public interfaces:
+
+These allow hook functions to be chained:
+
+ void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
+ void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
+ void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
+
+This allows a response to be injected, in the case where a request has
+been removed using BLKTAP_STOLEN.
+
+ void blktap_inject_response(blkif_response_t *);
+
+These let you add file descriptors and handlers to the main poll loop:
+
+ int blktap_attach_poll(int fd, short events, int (*func)(int));
+ void blktap_detach_poll(int fd);
+
+This starts the main poll loop:
+
+ int blktap_listen(void);
+
+Example:
+--------
+
+blkimage.c uses an image on the local file system to serve requests to
+a domain. Here's what it looks like:
+
+---[blkimg.c]---
+
+/* blkimg.c
+ *
+ * file-backed disk.
+ */
+
+#include "blktaplib.h"
+#include "blkimglib.h"
+
+
+int main(int argc, char *argv[])
+{
+ image_init();
+
+ blktap_register_ctrl_hook("image_control", image_control);
+ blktap_register_request_hook("image_request", image_request);
+ blktap_listen();
+
+ return 0;
+}
+
+----------------
+
+All of the real work is in blkimglib.c, but this illustrates the
+actual tap interface well enough. image_control() will be called with
+all control messages. image_request() handles requests. As it reads
+from an on-disk image file, no requests are ever passed on to a
+backend, and so there will be no responses to process -- so there is
+nothing registered as a response hook.
+
+Other examples:
+---------------
+
+Here is a list of other examples in the directory:
+
+Things that terminate a block request stream:
+
+ blkimg - Use a image file/device to serve requests
+ blkgnbd - Use a remote gnbd server to serve requests
+ blkaio - Use libaio... (DOES NOT WORK)
+
+Things that don't:
+
+ blkdump - Print in-flight requests.
+ blkcow - Really inefficient copy-on-write disks using libdb to store
+ writes.
+
+There are examples of plugging these things together, for instance
+blkcowgnbd is a read-only gnbd device with copy-on-write to a local
+file.
+
+TODO:
+-----
+
+- Make session tracking work. At the moment these generally just handle a
+ single front-end client at a time.
+
+- Integrate with Xend. Need to cleanly pass a image identifier in the connect
+ message.
+
+- Make an asynchronous file-io terminator. The libaio attempt is
+ tragically stalled because mapped foreign pages make pfn_valid fail
+ (they are VM_IO), and so cannot be passed to aio as targets. A
+ better solution may be to tear the disk interfaces out of the real
+ backend and expose them somehow.
+
+- Make CoW suck less.
+
+- Do something more along the lines of dynamic linking for the
+ plugins, so thatthey don't all need a new main().
diff --git a/tools/blktap/README-PARALLAX b/tools/blktap/README-PARALLAX
new file mode 100644
index 0000000000..ace05fdb41
--- /dev/null
+++ b/tools/blktap/README-PARALLAX
@@ -0,0 +1,177 @@
+Parallax Quick Overview
+March 3, 2005
+
+This is intended to provide a quick set of instructions to let you
+guys play with the current parallax source. In it's current form, the
+code will let you run an arbitrary number of VMs off of a single disk
+image, doing copy-on-write as they make updates. Each domain is
+assigned a virtual disk image (VDI), which may be based on a snapshot
+of an existing image. All of the VDI and snapshot management should
+currently work.
+
+The current implementation uses a single file as a blockstore for
+_everything_ this will soon be replaced by the fancier backend code
+and the local cache. As it stands, Parallax will create
+"blockstore.dat" in the directory that you run it from, and use
+largefile support to make this grow to unfathomable girth. So, you
+probably want to run the daemon off of a local disk, with a lot of
+free space.
+
+Here's how to get going:
+
+0. Setup:
+---------
+
+Pick a local directory on a disk with lots of room. You should be
+running from a privileged domain (e.g. dom0) with the blocktap
+configured in and block backend NOT.
+
+For convenience (for the moment) copy all of the vdi tools (vdi_*) and
+the parallax daemon from tools/blktap into this directory.
+
+1. Populate the blockstore:
+---------------------------
+
+First you need to put at least one image into the blockstore. You
+will need a disk image, either as a file or local partition. My
+general approach has been to
+
+(a) make a really big sparse file with
+
+ dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
+
+(b) put a filesystem into it
+
+ mkfs.ext3 ./image
+
+(c) mount it using loopback
+
+ mkdir ./mnt
+ mount -o loop ./image
+
+(d) cd into it and untar one of the image files from srg-roots.
+
+ cd mnt
+ tar ...
+
+NOTE: Beware if your system is FC3. mkfs is not compatible with old
+versions of fedora, and so you don't have much choice but to install
+further fc3 images if you have used the fc3 version of mkfs.
+
+(e) unmount the image
+
+ cd ..
+ umount mnt
+
+(f) now, create a new VDI to hold the image
+
+ ./vdi_create "My new FC3 VDI"
+
+(g) get the id of the new VDI.
+
+ ./vdi_list
+
+ | 0 My new FC3 VDI
+
+(0 is the VDI id... create a few more if you want.)
+
+(h) hoover your image into the new VDI.
+
+ ./vdi_fill 0 ./image
+
+This will pull the entire image into the blockstore and set up a
+mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3)
+should also work, but vdi_fill has NO notion of sparseness yet, so you
+are going to pump a block into the store for each block you read.
+
+vdi_fill will count up until it is done, and you should be ready to
+go. If you want to be anal, you can use vdi_validate to test the VDI
+against the original image.
+
+2. Create some extra VDIs
+-------------------------
+
+VDIs are actually a list of snapshots, and each snapshot is a full
+image of mappings. So, to preserve an immutable copy of a current
+VDI, do this:
+
+(a) Snapshot your new VDI.
+
+ ./vdi_snap 0
+
+Snapshotting writes the current radix root to the VDI's snapshot log,
+and assigns it a new writable root.
+
+(b) look at the VDI's snapshot log.
+
+ ./vdi_snap_list 0
+
+ | 16 0 Thu Mar 3 19:27:48 2005 565111 31
+
+The first two columns constitute a snapshot id and represent the
+(block, offset) of the snapshot record. The Date tells you when the
+snapshot was made, and 31 is the radix root node of the snapshot.
+
+(c) Create a new VDI, based on that snapshot, and look at the list.
+
+ ./vdi_create "FC3 - Copy 1" 16 0
+ ./vdi_list
+
+ | 0 My new FC3 VDI
+ | 1 FC3 - Copy 1
+
+NOTE: If you have Graphviz installed on your system, you can use
+vdi_tree to generate a postscript of your current set of VDIs and
+snapshots.
+
+
+Create as many VDIs as you need for the VMs that you want to run.
+
+3. Boot some VMs:
+-----------------
+
+Parallax currently uses a hack in xend to pass the VDI id, you need to
+modify the disk line of the VM config that is going to mount it.
+
+(a) set up your vm config, by using the following disk line:
+
+ disk = ['parallax:1,sda1,w,0' ]
+
+This example uses VDI 1 (from vdi_list above), presents it as sda1
+(writable), and uses dom 0 as the backend. If you were running the
+daemon (and tap driver) in some domain other than 0, you would change
+this last parameter.
+
+NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:".
+
+(b) Run parallax in the backend domain.
+
+ ./parallax
+
+(c) create your new domain.
+
+ xm create ...
+
+---
+
+That's pretty much all there is to it at the moment. Hope this is
+clear enough to get you going. Now, a few serious caveats that will
+be sorted out in the almost immediate future:
+
+WARNINGS:
+---------
+
+1. There is NO locking in the VDI tools at the moment, so I'd avoid
+running them in parallel, or more importantly, running them while the
+daemon is running.
+
+2. I doubt that xend will be very happy about restarting if you have
+parallax-using domains. So if it dies while there are active parallax
+doms, you may need to reboot.
+
+3. I've turned off write-in-place. So at the moment, EVERY block
+write is a log append on the blockstore. I've been having some probs
+with the radix tree's marking of writable blocks after snapshots and
+will sort this out very soon.
+
+
diff --git a/tools/blktap/blkdump.c b/tools/blktap/blkdump.c
new file mode 100644
index 0000000000..0cf087ff02
--- /dev/null
+++ b/tools/blktap/blkdump.c
@@ -0,0 +1,131 @@
+/* blkdump.c
+ *
+ * show a running trace of block requests as they fly by.
+ *
+ * (c) 2004 Andrew Warfield.
+ */
+
+#include <stdio.h>
+#include "blktaplib.h"
+
+int control_print(control_msg_t *msg)
+{
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
+ ((blkif_be_create_t *)msg->msg)->domid,
+ ((blkif_be_create_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
+ ((blkif_be_destroy_t *)msg->msg)->domid,
+ ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n",
+ ((blkif_be_connect_t *)msg->msg)->domid,
+ ((blkif_be_connect_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n",
+ ((blkif_be_disconnect_t *)msg->msg)->domid,
+ ((blkif_be_disconnect_t *)msg->msg)->blkif_handle);
+ break;
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_create_t *)msg->msg)->domid,
+ ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_create_t *)msg->msg)->vdevice);
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
+ goto parse_error;
+ printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n",
+ ((blkif_be_vbd_destroy_t *)msg->msg)->domid,
+ ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
+ ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ return 0;
+
+parse_error:
+ printf("[CONTROL_MSG] Bad message type or length!\n");
+ return 0;
+}
+
+int request_print(blkif_request_t *req)
+{
+ int i;
+ unsigned long fas;
+
+ if ( req->operation == BLKIF_OP_PROBE ) {
+ printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id),
+ blkif_op_name[req->operation]);
+ return BLKTAP_PASS;
+ } else {
+ printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n",
+ ID_TO_DOM(req->id), ID_TO_IDX(req->id),
+ blkif_op_name[req->operation],
+ req->nr_segments, req->device,
+ req->sector_number);
+
+
+ for (i=0; i < req->nr_segments; i++) {
+ fas = req->frame_and_sects[i];
+ printf(" (pf: 0x%8lx start: %lu stop: %lu)\n",
+ (fas & PAGE_MASK),
+ blkif_first_sect(fas),
+ blkif_last_sect(fas)
+ );
+ }
+
+ }
+
+ return BLKTAP_PASS;
+}
+
+int response_print(blkif_response_t *rsp)
+{
+ if ( rsp->operation == BLKIF_OP_PROBE ) {
+ printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
+ blkif_op_name[rsp->operation]);
+ return BLKTAP_PASS;
+ } else {
+ printf("[%2u:%2u>%5s] (status: %d)\n",
+ ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
+ blkif_op_name[rsp->operation],
+ rsp->status);
+
+ }
+ return BLKTAP_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ blktap_register_ctrl_hook("control_print", control_print);
+ blktap_register_request_hook("request_print", request_print);
+ blktap_register_response_hook("response_print", response_print);
+ blktap_listen();
+
+ return 0;
+}
diff --git a/tools/blktap/blktaplib.c b/tools/blktap/blktaplib.c
new file mode 100644
index 0000000000..8db175a019
--- /dev/null
+++ b/tools/blktap/blktaplib.c
@@ -0,0 +1,564 @@
+/*
+ * blktaplib.c
+ *
+ * userspace interface routines for the blktap driver.
+ *
+ * (threadsafe(r) version)
+ *
+ * (c) 2004 Andrew Warfield.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#include <err.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <linux/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+
+
+#define __COMPILING_BLKTAP_LIB
+#include "blktaplib.h"
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+#define DEBUG_RING_IDXS 0
+
+#define POLLRDNORM 0x040
+
+#define BLKTAP_IOCTL_KICK 1
+
+void got_sig_bus();
+void got_sig_int();
+
+/* in kernel these are opposite, but we are a consumer now. */
+blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */
+blkif_front_ring_t be_ring;
+ctrl_back_ring_t ctrl_ring;
+
+unsigned long mmap_vstart = 0;
+char *blktap_mem;
+int fd = 0;
+
+#define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */
+/*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/
+#define BLKTAP_MMAP_PAGES \
+ ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
+#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
+
+int bad_count = 0;
+void bad(void)
+{
+ bad_count ++;
+ if (bad_count > 50) exit(0);
+}
+/*-----[ ID Manipulation from tap driver code ]--------------------------*/
+
+#define ACTIVE_RING_IDX unsigned short
+
+inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+ return ( (fe_dom << 16) | idx );
+}
+
+inline unsigned int ID_TO_IDX(unsigned long id)
+{
+ return ( id & 0x0000ffff );
+}
+
+inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
+/*
+static int (*request_hook)(blkif_request_t *req) = NULL;
+static int (*response_hook)(blkif_response_t *req) = NULL;
+*/
+
+/*-----[ Request/Response hook chains.]----------------------------------*/
+
+#define HOOK_NAME_MAX 50
+
+typedef struct ctrl_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(control_msg_t *);
+ struct ctrl_hook_st *next;
+} ctrl_hook_t;
+
+typedef struct request_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(blkif_request_t *);
+ struct request_hook_st *next;
+} request_hook_t;
+
+typedef struct response_hook_st {
+ char name[HOOK_NAME_MAX];
+ int (*func)(blkif_response_t *);
+ struct response_hook_st *next;
+} response_hook_t;
+
+static ctrl_hook_t *ctrl_hook_chain = NULL;
+static request_hook_t *request_hook_chain = NULL;
+static response_hook_t *response_hook_chain = NULL;
+
+void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *))
+{
+ ctrl_hook_t *ch_ent, **c;
+
+ ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
+ if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ ch_ent->func = ch;
+ ch_ent->next = NULL;
+ strncpy(ch_ent->name, name, HOOK_NAME_MAX);
+ ch_ent->name[HOOK_NAME_MAX-1] = '\0';
+
+ c = &ctrl_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = ch_ent;
+}
+
+void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *))
+{
+ request_hook_t *rh_ent, **c;
+
+ rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
+ if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ rh_ent->func = rh;
+ rh_ent->next = NULL;
+ strncpy(rh_ent->name, name, HOOK_NAME_MAX);
+
+ c = &request_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = rh_ent;
+}
+
+void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *))
+{
+ response_hook_t *rh_ent, **c;
+
+ rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
+ if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
+
+ rh_ent->func = rh;
+ rh_ent->next = NULL;
+ strncpy(rh_ent->name, name, HOOK_NAME_MAX);
+
+ c = &response_hook_chain;
+ while (*c != NULL) {
+ c = &(*c)->next;
+ }
+ *c = rh_ent;
+}
+
+void print_hooks(void)
+{
+ request_hook_t *req_hook;
+ response_hook_t *rsp_hook;
+ ctrl_hook_t *ctrl_hook;
+
+ DPRINTF("Control Hooks:\n");
+ ctrl_hook = ctrl_hook_chain;
+ while (ctrl_hook != NULL)
+ {
+ DPRINTF(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
+ ctrl_hook = ctrl_hook->next;
+ }
+
+ DPRINTF("Request Hooks:\n");
+ req_hook = request_hook_chain;
+ while (req_hook != NULL)
+ {
+ DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name);
+ req_hook = req_hook->next;
+ }
+
+ DPRINTF("Response Hooks:\n");
+ rsp_hook = response_hook_chain;
+ while (rsp_hook != NULL)
+ {
+ DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
+ rsp_hook = rsp_hook->next;
+ }
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+ static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ pthread_mutex_lock(&be_prod_mutex);
+ req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ be_ring.req_prod_pvt++;
+ pthread_mutex_unlock(&be_prod_mutex);
+
+ return 0;
+}
+
+inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *rsp_d;
+ static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ pthread_mutex_lock(&fe_prod_mutex);
+ rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
+ memcpy(rsp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ fe_ring.rsp_prod_pvt++;
+ pthread_mutex_unlock(&fe_prod_mutex);
+
+ return 0;
+}
+
+static void apply_rsp_hooks(blkif_response_t *rsp)
+{
+ response_hook_t *rsp_hook;
+
+ rsp_hook = response_hook_chain;
+ while (rsp_hook != NULL)
+ {
+ switch(rsp_hook->func(rsp))
+ {
+ case BLKTAP_PASS:
+ break;
+ default:
+ printf("Only PASS is supported for resp hooks!\n");
+ }
+ rsp_hook = rsp_hook->next;
+ }
+}
+
+static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+void blktap_inject_response(blkif_response_t *rsp)
+{
+
+ apply_rsp_hooks(rsp);
+
+ write_rsp_to_fe_ring(rsp);
+
+ pthread_mutex_lock(&push_mutex);
+
+ RING_PUSH_RESPONSES(&fe_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_FE);
+
+ pthread_mutex_unlock(&push_mutex);
+}
+
+/*-----[ Polling fd listeners ]------------------------------------------*/
+
+#define MAX_POLLFDS 64
+
+typedef struct {
+ int (*func)(int fd);
+ struct pollfd *pfd;
+ int fd;
+ short events;
+ int active;
+} pollhook_t;
+
+static struct pollfd pfd[MAX_POLLFDS+1];
+static pollhook_t pollhooks[MAX_POLLFDS];
+static unsigned int ph_freelist[MAX_POLLFDS];
+static unsigned int ph_cons, ph_prod;
+#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
+#define PH_IDX(x) (x % MAX_POLLFDS)
+
+int blktap_attach_poll(int fd, short events, int (*func)(int fd))
+{
+ pollhook_t *ph;
+
+ if (nr_pollhooks() == MAX_POLLFDS) {
+ printf("Too many pollhooks!\n");
+ return -1;
+ }
+
+ ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
+
+ ph->func = func;
+ ph->fd = fd;
+ ph->events = events;
+ ph->active = 1;
+
+ DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1,
+ nr_pollhooks());
+
+ return 0;
+}
+
+void blktap_detach_poll(int fd)
+{
+ int i;
+
+ for (i=0; i<MAX_POLLFDS; i++)
+ if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
+ ph_freelist[PH_IDX(ph_prod++)] = i;
+ pollhooks[i].pfd->fd = -1;
+ pollhooks[i].active = 0;
+ break;
+ }
+
+ DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i,
+ nr_pollhooks());
+}
+
+void pollhook_init(void)
+{
+ int i;
+
+ for (i=0; i < MAX_POLLFDS; i++) {
+ ph_freelist[i] = (i+1) % MAX_POLLFDS;
+ pollhooks[i].active = 0;
+ }
+
+ ph_cons = 0;
+ ph_prod = MAX_POLLFDS;
+}
+
+void __attribute__ ((constructor)) blktaplib_init(void)
+{
+ pollhook_init();
+}
+
+/*-----[ The main listen loop ]------------------------------------------*/
+
+int blktap_listen(void)
+{
+ int notify_be, notify_fe, tap_pfd;
+
+ /* comms rings: */
+ blkif_request_t *req;
+ blkif_response_t *rsp;
+ control_msg_t *msg;
+ blkif_sring_t *sring;
+ ctrl_sring_t *csring;
+ RING_IDX rp, i, pfd_count;
+
+ /* pending rings */
+ blkif_request_t req_pending[BLKIF_RING_SIZE];
+ blkif_response_t rsp_pending[BLKIF_RING_SIZE];
+
+ /* handler hooks: */
+ request_hook_t *req_hook;
+ response_hook_t *rsp_hook;
+ ctrl_hook_t *ctrl_hook;
+
+ signal (SIGBUS, got_sig_bus);
+ signal (SIGINT, got_sig_int);
+
+ print_hooks();
+
+ fd = open("/dev/blktap", O_RDWR);
+ if (fd == -1) {
+ printf("open failed! (%d)\n", errno);
+ goto open_failed;
+ }
+
+ blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if ((int)blktap_mem == -1) {
+ printf("mmap failed! (%d)\n", errno);
+ goto mmap_failed;
+ }
+
+ /* assign the rings to the mapped memory */
+ csring = (ctrl_sring_t *)blktap_mem;
+ BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE);
+
+ sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
+ FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
+
+ sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
+ BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
+
+ mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
+
+ ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
+
+ while(1) {
+ int ret;
+
+ /* build the poll list */
+
+ DPRINTF("Building poll list.\n");
+
+ pfd_count = 0;
+ for ( i=0; i < MAX_POLLFDS; i++ ) {
+ pollhook_t *ph = &pollhooks[i];
+
+ if (ph->active) {
+ pfd[pfd_count].fd = ph->fd;
+ pfd[pfd_count].events = ph->events;
+ ph->pfd = &pfd[pfd_count];
+ pfd_count++;
+ }
+ }
+
+ tap_pfd = pfd_count;
+ pfd[tap_pfd].fd = fd;
+ pfd[tap_pfd].events = POLLIN;
+
+ DPRINTF("poll() %d fds.\n", pfd_count);
+
+ if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
+ if (DEBUG_RING_IDXS)
+ ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
+ continue;
+ }
+
+ DPRINTF("poll returned %d\n", ret);
+
+ for (i=0; i < MAX_POLLFDS; i++) {
+ if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
+ pollhooks[i].func(pollhooks[i].pfd->fd);
+ }
+
+ if (pfd[tap_pfd].revents) {
+
+ /* empty the control ring */
+ rp = ctrl_ring.sring->req_prod;
+ rmb();
+ for (i = ctrl_ring.req_cons; i < rp; i++)
+ {
+ msg = RING_GET_REQUEST(&ctrl_ring, i);
+
+ ctrl_hook = ctrl_hook_chain;
+ while (ctrl_hook != NULL)
+ {
+ DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
+ /* We currently don't respond to ctrl messages. */
+ ctrl_hook->func(msg);
+ ctrl_hook = ctrl_hook->next;
+ }
+ }
+ /* Using this as a unidirectional ring. */
+ ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
+pthread_mutex_lock(&push_mutex);
+ RING_PUSH_RESPONSES(&ctrl_ring);
+pthread_mutex_unlock(&push_mutex);
+
+ /* empty the fe_ring */
+ notify_fe = 0;
+ notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
+ rp = fe_ring.sring->req_prod;
+ rmb();
+ for (i = fe_ring.req_cons; i != rp; i++)
+ {
+ int done = 0; /* stop forwarding this request */
+
+ req = RING_GET_REQUEST(&fe_ring, i);
+ memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
+ req = &req_pending[ID_TO_IDX(req->id)];
+
+ DPRINTF("copying an fe request\n");
+
+ req_hook = request_hook_chain;
+ while (req_hook != NULL)
+ {
+ DPRINTF("REQ_HOOK: %s\n", req_hook->name);
+ switch(req_hook->func(req))
+ {
+ case BLKTAP_RESPOND:
+ apply_rsp_hooks((blkif_response_t *)req);
+ write_rsp_to_fe_ring((blkif_response_t *)req);
+ notify_fe = 1;
+ done = 1;
+ break;
+ case BLKTAP_STOLEN:
+ done = 1;
+ break;
+ case BLKTAP_PASS:
+ break;
+ default:
+ printf("Unknown request hook return value!\n");
+ }
+ if (done) break;
+ req_hook = req_hook->next;
+ }
+
+ if (done == 0) write_req_to_be_ring(req);
+
+ }
+ fe_ring.req_cons = i;
+
+ /* empty the be_ring */
+ notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
+ rp = be_ring.sring->rsp_prod;
+ rmb();
+ for (i = be_ring.rsp_cons; i != rp; i++)
+ {
+
+ rsp = RING_GET_RESPONSE(&be_ring, i);
+ memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
+ rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
+
+ DPRINTF("copying a be request\n");
+
+ apply_rsp_hooks(rsp);
+ write_rsp_to_fe_ring(rsp);
+ }
+ be_ring.rsp_cons = i;
+
+ /* notify the domains */
+
+ if (notify_be) {
+ DPRINTF("notifying be\n");
+pthread_mutex_lock(&push_mutex);
+ RING_PUSH_REQUESTS(&be_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_BE);
+pthread_mutex_unlock(&push_mutex);
+ }
+
+ if (notify_fe) {
+ DPRINTF("notifying fe\n");
+pthread_mutex_lock(&push_mutex);
+ RING_PUSH_RESPONSES(&fe_ring);
+ ioctl(fd, BLKTAP_IOCTL_KICK_FE);
+pthread_mutex_unlock(&push_mutex);
+ }
+ }
+ }
+
+
+ munmap(blktap_mem, PAGE_SIZE);
+
+ mmap_failed:
+ close(fd);
+
+ open_failed:
+ return 0;
+}
+
+void got_sig_bus() {
+ printf("Attempted to access a page that isn't.\n");
+ exit(-1);
+}
+
+void got_sig_int() {
+ DPRINTF("quitting -- returning to passthrough mode.\n");
+ if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
+ close(fd);
+ fd = 0;
+ exit(0);
+}
diff --git a/tools/blktap/blktaplib.h b/tools/blktap/blktaplib.h
new file mode 100644
index 0000000000..67ebca64cc
--- /dev/null
+++ b/tools/blktap/blktaplib.h
@@ -0,0 +1,101 @@
+/* blktaplib.h
+ *
+ * userland accessors to the block tap.
+ *
+ */
+
+#ifndef __BLKTAPLIB_H__
+#define __BLKTAPLIB_H__
+
+#include <xc.h>
+#include <sys/user.h>
+#include <xen/xen.h>
+#include <xen/io/blkif.h>
+#include <xen/io/ring.h>
+#include <xen/io/domain_controller.h>
+
+/* /dev/xen/blktap resides at device number major=10, minor=202 */
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_PRINT_IDXS 100
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+#define BLKTAP_MODE_COPY_FE 0x00000004
+#define BLKTAP_MODE_COPY_BE 0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+ (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+ (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+ );
+}
+
+/* Return values for handling messages in hooks. */
+#define BLKTAP_PASS 0 /* Keep passing this request as normal. */
+#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */
+#define BLKTAP_STOLEN 2 /* Hook has stolen request. */
+
+#define domid_t unsigned short
+
+inline unsigned int ID_TO_IDX(unsigned long id);
+inline domid_t ID_TO_DOM(unsigned long id);
+
+void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
+void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
+void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
+void blktap_inject_response(blkif_response_t *);
+int blktap_attach_poll(int fd, short events, int (*func)(int));
+void blktap_detach_poll(int fd);
+int blktap_listen(void);
+
+/* Accessing attached data page mappings */
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+extern unsigned long mmap_vstart;
+
+
+/* Defines that are only used by library clients */
+
+#ifndef __COMPILING_BLKTAP_LIB
+
+static char *blkif_op_name[] = {
+ [BLKIF_OP_READ] = "READ",
+ [BLKIF_OP_WRITE] = "WRITE",
+ [BLKIF_OP_PROBE] = "PROBE",
+};
+
+#endif /* __COMPILING_BLKTAP_LIB */
+
+#endif /* __BLKTAPLIB_H__ */
diff --git a/tools/blktap/block-async.c b/tools/blktap/block-async.c
new file mode 100755
index 0000000000..a0460de6fc
--- /dev/null
+++ b/tools/blktap/block-async.c
@@ -0,0 +1,393 @@
+/* block-async.c
+ *
+ * Asynchronous block wrappers for parallax.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "block-async.h"
+#include "blockstore.h"
+#include "vdi.h"
+
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* We have a queue of outstanding I/O requests implemented as a
+ * circular producer-consumer ring with free-running buffers.
+ * to allow reordering, this ring indirects to indexes in an
+ * ring of io_structs.
+ *
+ * the block_* calls may either add an entry to this ring and return,
+ * or satisfy the request immediately and call the callback directly.
+ * None of the io calls in parallax should be nested enough to worry
+ * about stack problems with this approach.
+ */
+
+struct read_args {
+ u64 addr;
+};
+
+struct write_args {
+ u64 addr;
+ char *block;
+};
+
+struct alloc_args {
+ char *block;
+};
+
+struct pending_io_req {
+ enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
+ union {
+ struct read_args r;
+ struct write_args w;
+ struct alloc_args a;
+ } u;
+ io_cb_t cb;
+ void *param;
+};
+
+void radix_lock_init(struct radix_lock *r)
+{
+ int i;
+
+ pthread_mutex_init(&r->lock, NULL);
+ for (i=0; i < 1024; i++) {
+ r->lines[i] = 0;
+ r->waiters[i] = NULL;
+ r->state[i] = ANY;
+ }
+}
+
+/* maximum outstanding I/O requests issued asynchronously */
+/* must be a power of 2.*/
+#define MAX_PENDING_IO 1024
+
+/* how many threads to concurrently issue I/O to the disk. */
+#define IO_POOL_SIZE 10
+
+static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
+static int pending_io_list[MAX_PENDING_IO];
+static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
+#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
+#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
+#define PENDING_IO_ENT(_x) \
+ (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
+#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
+#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
+static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER;
+
+static void init_pending_io(void)
+{
+ int i;
+
+ for (i=0; i<MAX_PENDING_IO; i++)
+ pending_io_list[i] = i;
+
+}
+
+void block_read(u64 addr, io_cb_t cb, void *param)
+{
+ struct pending_io_req *req;
+
+ pthread_mutex_lock(&pending_io_lock);
+ assert(CAN_PRODUCE_PENDING_IO);
+
+ req = PENDING_IO_ENT(io_prod++);
+ DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
+ req->op = IO_READ;
+ req->u.r.addr = addr;
+ req->cb = cb;
+ req->param = param;
+
+ pthread_cond_signal(&pending_io_cond);
+ pthread_mutex_unlock(&pending_io_lock);
+}
+
+
+void block_write(u64 addr, char *block, io_cb_t cb, void *param)
+{
+ struct pending_io_req *req;
+
+ pthread_mutex_lock(&pending_io_lock);
+ assert(CAN_PRODUCE_PENDING_IO);
+
+ req = PENDING_IO_ENT(io_prod++);
+ DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
+ req->op = IO_WRITE;
+ req->u.w.addr = addr;
+ req->u.w.block = block;
+ req->cb = cb;
+ req->param = param;
+
+ pthread_cond_signal(&pending_io_cond);
+ pthread_mutex_unlock(&pending_io_lock);
+}
+
+
+void block_alloc(char *block, io_cb_t cb, void *param)
+{
+ struct pending_io_req *req;
+
+ pthread_mutex_lock(&pending_io_lock);
+ assert(CAN_PRODUCE_PENDING_IO);
+
+ req = PENDING_IO_ENT(io_prod++);
+ req->op = IO_ALLOC;
+ req->u.a.block = block;
+ req->cb = cb;
+ req->param = param;
+
+ pthread_cond_signal(&pending_io_cond);
+ pthread_mutex_unlock(&pending_io_lock);
+}
+
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
+{
+ struct io_ret ret;
+ pthread_mutex_lock(&r->lock);
+
+ if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
+ r->lines[row]++;
+ r->state[row] = READ;
+ DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row);
+ pthread_mutex_unlock(&r->lock);
+ ret.type = IO_INT_T;
+ ret.u.i = 0;
+ cb(ret, param);
+ } else {
+ struct radix_wait **rwc;
+ struct radix_wait *rw =
+ (struct radix_wait *) malloc (sizeof(struct radix_wait));
+ DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
+ rw->type = RLOCK;
+ rw->param = param;
+ rw->cb = cb;
+ rw->next = NULL;
+ /* append to waiters list. */
+ rwc = &r->waiters[row];
+ while (*rwc != NULL) rwc = &(*rwc)->next;
+ *rwc = rw;
+ pthread_mutex_unlock(&r->lock);
+ return;
+ }
+}
+
+
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
+{
+ struct io_ret ret;
+ pthread_mutex_lock(&r->lock);
+
+ /* the second check here is redundant -- just here for debugging now. */
+ if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
+ r->state[row] = STOP;
+ r->lines[row] = -1;
+ DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row);
+ pthread_mutex_unlock(&r->lock);
+ ret.type = IO_INT_T;
+ ret.u.i = 0;
+ cb(ret, param);
+ } else {
+ struct radix_wait **rwc;
+ struct radix_wait *rw =
+ (struct radix_wait *) malloc (sizeof(struct radix_wait));
+ DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
+ rw->type = WLOCK;
+ rw->param = param;
+ rw->cb = cb;
+ rw->next = NULL;
+ /* append to waiters list. */
+ rwc = &r->waiters[row];
+ while (*rwc != NULL) rwc = &(*rwc)->next;
+ *rwc = rw;
+ pthread_mutex_unlock(&r->lock);
+ return;
+ }
+
+}
+
+/* called with radix_lock locked and lock count of zero. */
+static void wake_waiters(struct radix_lock *r, int row)
+{
+ struct pending_io_req *req;
+ struct radix_wait *rw;
+
+ if (r->lines[row] != 0) return;
+ if (r->waiters[row] == NULL) return;
+
+ if (r->waiters[row]->type == WLOCK) {
+
+ rw = r->waiters[row];
+ pthread_mutex_lock(&pending_io_lock);
+ assert(CAN_PRODUCE_PENDING_IO);
+
+ req = PENDING_IO_ENT(io_prod++);
+ req->op = IO_WWAKE;
+ req->cb = rw->cb;
+ req->param = rw->param;
+ r->lines[row] = -1; /* write lock the row. */
+ r->state[row] = STOP;
+ r->waiters[row] = rw->next;
+ free(rw);
+ pthread_mutex_unlock(&pending_io_lock);
+
+ } else /* RLOCK */ {
+
+ while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
+ rw = r->waiters[row];
+ pthread_mutex_lock(&pending_io_lock);
+ assert(CAN_PRODUCE_PENDING_IO);
+
+ req = PENDING_IO_ENT(io_prod++);
+ req->op = IO_RWAKE;
+ req->cb = rw->cb;
+ req->param = rw->param;
+ r->lines[row]++; /* read lock the row. */
+ r->state[row] = READ;
+ r->waiters[row] = rw->next;
+ free(rw);
+ pthread_mutex_unlock(&pending_io_lock);
+ }
+
+ if (r->waiters[row] != NULL) /* There is a write queued still */
+ r->state[row] = STOP;
+ }
+
+ pthread_mutex_lock(&pending_io_lock);
+ pthread_cond_signal(&pending_io_cond);
+ pthread_mutex_unlock(&pending_io_lock);
+}
+
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
+{
+ struct io_ret ret;
+
+ pthread_mutex_lock(&r->lock);
+ assert(r->lines[row] > 0); /* try to catch misuse. */
+ r->lines[row]--;
+ if (r->lines[row] == 0) {
+ r->state[row] = ANY;
+ wake_waiters(r, row);
+ }
+ pthread_mutex_unlock(&r->lock);
+ cb(ret, param);
+}
+
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
+{
+ struct io_ret ret;
+
+ pthread_mutex_lock(&r->lock);
+ assert(r->lines[row] == -1); /* try to catch misuse. */
+ r->lines[row] = 0;
+ r->state[row] = ANY;
+ wake_waiters(r, row);
+ pthread_mutex_unlock(&r->lock);
+ cb(ret, param);
+}
+
+/* consumer calls */
+static void do_next_io_req(struct pending_io_req *req)
+{
+ struct io_ret ret;
+ void *param;
+
+ switch (req->op) {
+ case IO_READ:
+ ret.type = IO_BLOCK_T;
+ ret.u.b = readblock(req->u.r.addr);
+ break;
+ case IO_WRITE:
+ ret.type = IO_INT_T;
+ ret.u.i = writeblock(req->u.w.addr, req->u.w.block);
+ DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
+ break;
+ case IO_ALLOC:
+ ret.type = IO_ADDR_T;
+ ret.u.a = allocblock(req->u.a.block);
+ break;
+ case IO_RWAKE:
+ DPRINTF("WAKE DEFERRED RLOCK!\n");
+ ret.type = IO_INT_T;
+ ret.u.i = 0;
+ break;
+ case IO_WWAKE:
+ DPRINTF("WAKE DEFERRED WLOCK!\n");
+ ret.type = IO_INT_T;
+ ret.u.i = 0;
+ break;
+ default:
+ DPRINTF("Unknown IO operation on pending list!\n");
+ return;
+ }
+
+ param = req->param;
+ pthread_mutex_lock(&pending_io_lock);
+ pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
+ pthread_mutex_unlock(&pending_io_lock);
+
+ assert(req->cb != NULL);
+ req->cb(ret, param);
+
+}
+
+void *io_thread(void *param)
+{
+ int tid;
+ struct pending_io_req *req;
+
+ /* Set this thread's tid. */
+ tid = *(int *)param;
+ free(param);
+
+start:
+ pthread_mutex_lock(&pending_io_lock);
+ while (io_prod == io_cons) {
+ pthread_cond_wait(&pending_io_cond, &pending_io_lock);
+ }
+
+ if (io_prod == io_cons) {
+ /* unnecessary wakeup. */
+ pthread_mutex_unlock(&pending_io_lock);
+ goto start;
+ }
+
+ req = PENDING_IO_ENT(io_cons++);
+ pthread_mutex_unlock(&pending_io_lock);
+
+ do_next_io_req(req);
+
+ goto start;
+
+}
+
+static pthread_t io_pool[IO_POOL_SIZE];
+void start_io_threads(void)
+
+{
+ int i, tid=0;
+
+ for (i=0; i < IO_POOL_SIZE; i++) {
+ int ret, *t;
+ t = (int *)malloc(sizeof(int));
+ *t = tid++;
+ ret = pthread_create(&io_pool[i], NULL, io_thread, t);
+ if (ret != 0) printf("Error starting thread %d\n", i);
+ }
+
+}
+
+void init_block_async(void)
+{
+ init_pending_io();
+ start_io_threads();
+}
diff --git a/tools/blktap/block-async.h b/tools/blktap/block-async.h
new file mode 100755
index 0000000000..022eea5da1
--- /dev/null
+++ b/tools/blktap/block-async.h
@@ -0,0 +1,69 @@
+/* block-async.h
+ *
+ * Asynchronous block wrappers for parallax.
+ */
+
+#ifndef _BLOCKASYNC_H_
+#define _BLOCKASYNC_H_
+
+#include <assert.h>
+#include <xc.h>
+#include "vdi.h"
+
+struct io_ret
+{
+ enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
+ union {
+ u64 a;
+ char *b;
+ int i;
+ } u;
+};
+
+typedef void (*io_cb_t)(struct io_ret r, void *param);
+
+/* per-vdi lock structures to make sure requests run in a safe order. */
+struct radix_wait {
+ enum {RLOCK, WLOCK} type;
+ io_cb_t cb;
+ void *param;
+ struct radix_wait *next;
+};
+
+struct radix_lock {
+ pthread_mutex_t lock;
+ int lines[1024];
+ struct radix_wait *waiters[1024];
+ enum {ANY, READ, STOP} state[1024];
+};
+void radix_lock_init(struct radix_lock *r);
+
+void block_read(u64 addr, io_cb_t cb, void *param);
+void block_write(u64 addr, char *block, io_cb_t cb, void *param);
+void block_alloc(char *block, io_cb_t cb, void *param);
+void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
+void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
+void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
+void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
+void init_block_async(void);
+
+static inline u64 IO_ADDR(struct io_ret r)
+{
+ assert(r.type == IO_ADDR_T);
+ return r.u.a;
+}
+
+static inline char *IO_BLOCK(struct io_ret r)
+{
+ assert(r.type == IO_BLOCK_T);
+ return r.u.b;
+}
+
+static inline int IO_INT(struct io_ret r)
+{
+ assert(r.type == IO_INT_T);
+ return r.u.i;
+}
+
+
+#endif //_BLOCKASYNC_H_
diff --git a/tools/blktap/blockstore.c b/tools/blktap/blockstore.c
new file mode 100644
index 0000000000..e15ddcd760
--- /dev/null
+++ b/tools/blktap/blockstore.c
@@ -0,0 +1,1350 @@
+/**************************************************************************
+ *
+ * blockstore.c
+ *
+ * Simple block store interface
+ *
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdarg.h>
+#include "blockstore.h"
+#include <pthread.h>
+
+//#define BLOCKSTORE_REMOTE
+//#define BSDEBUG
+
+#define RETRY_TIMEOUT 1000000 /* microseconds */
+
+/*****************************************************************************
+ * Debugging
+ */
+#ifdef BSDEBUG
+void DB(char *format, ...)
+{
+ va_list args;
+ fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+}
+#else
+#define DB(format, ...) (void)0
+#endif
+
+#ifdef BLOCKSTORE_REMOTE
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+/*****************************************************************************
+ * Network state *
+ *****************************************************************************/
+
+/* The individual disk servers we talks to. These will be referenced by
+ * an integer index into bsservers[].
+ */
+bsserver_t bsservers[MAX_SERVERS];
+
+/* The cluster map. This is indexed by an integer cluster number.
+ */
+bscluster_t bsclusters[MAX_CLUSTERS];
+
+/* Local socket.
+ */
+struct sockaddr_in sin_local;
+int bssock = 0;
+
+/*****************************************************************************
+ * Notification *
+ *****************************************************************************/
+
+typedef struct pool_thread_t_struct {
+ pthread_mutex_t ptmutex;
+ pthread_cond_t ptcv;
+ int newdata;
+} pool_thread_t;
+
+pool_thread_t pool_thread[READ_POOL_SIZE+1];
+
+#define RECV_NOTIFY(tid) { \
+ pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
+ pool_thread[tid].newdata = 1; \
+ DB("CV Waking %u", tid); \
+ pthread_cond_signal(&(pool_thread[tid].ptcv)); \
+ pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
+#define RECV_AWAIT(tid) { \
+ pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
+ if (pool_thread[tid].newdata) { \
+ pool_thread[tid].newdata = 0; \
+ DB("CV Woken %u", tid); \
+ } \
+ else { \
+ DB("CV Waiting %u", tid); \
+ pthread_cond_wait(&(pool_thread[tid].ptcv), \
+ &(pool_thread[tid].ptmutex)); \
+ } \
+ pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
+
+/*****************************************************************************
+ * Message queue management *
+ *****************************************************************************/
+
+/* Protects the queue manipulation critcal regions.
+ */
+pthread_mutex_t ptmutex_queue;
+#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
+#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
+
+pthread_mutex_t ptmutex_recv;
+#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
+#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
+
+/* A message queue entry. We allocate one of these for every request we send.
+ * Asynchronous reply reception also used one of these.
+ */
+typedef struct bsq_t_struct {
+ struct bsq_t_struct *prev;
+ struct bsq_t_struct *next;
+ int status;
+ int server;
+ int length;
+ struct msghdr msghdr;
+ struct iovec iov[2];
+ int tid;
+ struct timeval tv_sent;
+ bshdr_t message;
+ void *block;
+} bsq_t;
+
+#define BSQ_STATUS_MATCHED 1
+
+pthread_mutex_t ptmutex_luid;
+#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
+#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
+
+static u64 luid_cnt = 0x1000ULL;
+u64 new_luid(void) {
+ u64 luid;
+ ENTER_LUID_CR;
+ luid = luid_cnt++;
+ LEAVE_LUID_CR;
+ return luid;
+}
+
+/* Queue of outstanding requests.
+ */
+bsq_t *bs_head = NULL;
+bsq_t *bs_tail = NULL;
+int bs_qlen = 0;
+
+/*
+ */
+void queuedebug(char *msg) {
+ bsq_t *q;
+ ENTER_QUEUE_CR;
+ fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
+ for (q = bs_head; q; q = q->next) {
+ fprintf(stderr, " luid=%016llx server=%u\n",
+ q->message.luid, q->server);
+ }
+ LEAVE_QUEUE_CR;
+}
+
+int enqueue(bsq_t *qe) {
+ ENTER_QUEUE_CR;
+ qe->next = NULL;
+ qe->prev = bs_tail;
+ if (!bs_head)
+ bs_head = qe;
+ else
+ bs_tail->next = qe;
+ bs_tail = qe;
+ bs_qlen++;
+ LEAVE_QUEUE_CR;
+#ifdef BSDEBUG
+ queuedebug("enqueue");
+#endif
+ return 0;
+}
+
+int dequeue(bsq_t *qe) {
+ bsq_t *q;
+ ENTER_QUEUE_CR;
+ for (q = bs_head; q; q = q->next) {
+ if (q == qe) {
+ if (q->prev)
+ q->prev->next = q->next;
+ else
+ bs_head = q->next;
+ if (q->next)
+ q->next->prev = q->prev;
+ else
+ bs_tail = q->prev;
+ bs_qlen--;
+ goto found;
+ }
+ }
+
+ LEAVE_QUEUE_CR;
+#ifdef BSDEBUG
+ queuedebug("dequeue not found");
+#endif
+ return 0;
+
+ found:
+ LEAVE_QUEUE_CR;
+#ifdef BSDEBUG
+ queuedebug("dequeue not found");
+#endif
+ return 1;
+}
+
+bsq_t *queuesearch(bsq_t *qe) {
+ bsq_t *q;
+ ENTER_QUEUE_CR;
+ for (q = bs_head; q; q = q->next) {
+ if ((qe->server == q->server) &&
+ (qe->message.operation == q->message.operation) &&
+ (qe->message.luid == q->message.luid)) {
+
+ if ((q->message.operation == BSOP_READBLOCK) &&
+ ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
+ q->block = qe->block;
+ qe->block = NULL;
+ }
+ q->length = qe->length;
+ q->message.flags = qe->message.flags;
+ q->message.id = qe->message.id;
+ q->status |= BSQ_STATUS_MATCHED;
+
+ if (q->prev)
+ q->prev->next = q->next;
+ else
+ bs_head = q->next;
+ if (q->next)
+ q->next->prev = q->prev;
+ else
+ bs_tail = q->prev;
+ q->next = NULL;
+ q->prev = NULL;
+ bs_qlen--;
+ goto found;
+ }
+ }
+
+ LEAVE_QUEUE_CR;
+#ifdef BSDEBUG
+ queuedebug("queuesearch not found");
+#endif
+ return NULL;
+
+ found:
+ LEAVE_QUEUE_CR;
+#ifdef BSDEBUG
+ queuedebug("queuesearch found");
+#endif
+ return q;
+}
+
+/*****************************************************************************
+ * Network communication *
+ *****************************************************************************/
+
+int send_message(bsq_t *qe) {
+ int rc;
+
+ qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
+ qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
+ qe->msghdr.msg_iov = qe->iov;
+ if (qe->block)
+ qe->msghdr.msg_iovlen = 2;
+ else
+ qe->msghdr.msg_iovlen = 1;
+ qe->msghdr.msg_control = NULL;
+ qe->msghdr.msg_controllen = 0;
+ qe->msghdr.msg_flags = 0;
+
+ qe->iov[0].iov_base = (void *)&(qe->message);
+ qe->iov[0].iov_len = MSGBUFSIZE_ID;
+
+ if (qe->block) {
+ qe->iov[1].iov_base = qe->block;
+ qe->iov[1].iov_len = BLOCK_SIZE;
+ }
+
+ qe->message.luid = new_luid();
+
+ qe->status = 0;
+ qe->tid = (int)pthread_getspecific(tid_key);
+ if (enqueue(qe) < 0) {
+ fprintf(stderr, "Error enqueuing request.\n");
+ return -1;
+ }
+
+ gettimeofday(&(qe->tv_sent), NULL);
+ DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
+ rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
+ //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
+ // (struct sockaddr *)&(bsservers[qe->server].sin),
+ // sizeof(struct sockaddr_in));
+ if (rc < 0)
+ return rc;
+
+ return rc;
+}
+
+int recv_message(bsq_t *qe) {
+ struct sockaddr_in from;
+ //int flen = sizeof(from);
+ int rc;
+
+ qe->msghdr.msg_name = &from;
+ qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
+ qe->msghdr.msg_iov = qe->iov;
+ if (qe->block)
+ qe->msghdr.msg_iovlen = 2;
+ else
+ qe->msghdr.msg_iovlen = 1;
+ qe->msghdr.msg_control = NULL;
+ qe->msghdr.msg_controllen = 0;
+ qe->msghdr.msg_flags = 0;
+
+ qe->iov[0].iov_base = (void *)&(qe->message);
+ qe->iov[0].iov_len = MSGBUFSIZE_ID;
+ if (qe->block) {
+ qe->iov[1].iov_base = qe->block;
+ qe->iov[1].iov_len = BLOCK_SIZE;
+ }
+
+ rc = recvmsg(bssock, &(qe->msghdr), 0);
+
+ //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
+ // (struct sockaddr *)&from, &flen);
+ return rc;
+}
+
+int get_server_number(struct sockaddr_in *sin) {
+ int i;
+
+#ifdef BSDEBUG2
+ fprintf(stderr,
+ "get_server_number(%u.%u.%u.%u/%u)\n",
+ (unsigned int)sin->sin_addr.s_addr & 0xff,
+ ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
+ ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
+ ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
+ (unsigned int)sin->sin_port);
+#endif
+
+ for (i = 0; i < MAX_SERVERS; i++) {
+ if (bsservers[i].hostname) {
+#ifdef BSDEBUG2
+ fprintf(stderr,
+ "get_server_number check %u.%u.%u.%u/%u\n",
+ (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
+ ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
+ ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff,
+ ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff,
+ (unsigned int)bsservers[i].sin.sin_port);
+#endif
+ if ((sin->sin_family == bsservers[i].sin.sin_family) &&
+ (sin->sin_port == bsservers[i].sin.sin_port) &&
+ (memcmp((void *)&(sin->sin_addr),
+ (void *)&(bsservers[i].sin.sin_addr),
+ sizeof(struct in_addr)) == 0)) {
+ return i;
+ }
+ }
+ }
+
+ return -1;
+}
+
+void *rx_buffer = NULL;
+bsq_t rx_qe;
+bsq_t *recv_any(void) {
+ struct sockaddr_in from;
+ int rc;
+
+ DB("ENTER recv_any\n");
+
+ rx_qe.msghdr.msg_name = &from;
+ rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
+ rx_qe.msghdr.msg_iov = rx_qe.iov;
+ if (!rx_buffer) {
+ rx_buffer = malloc(BLOCK_SIZE);
+ if (!rx_buffer) {
+ perror("recv_any malloc");
+ return NULL;
+ }
+ }
+ rx_qe.block = rx_buffer;
+ rx_buffer = NULL;
+ rx_qe.msghdr.msg_iovlen = 2;
+ rx_qe.msghdr.msg_control = NULL;
+ rx_qe.msghdr.msg_controllen = 0;
+ rx_qe.msghdr.msg_flags = 0;
+
+ rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
+ rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
+ rx_qe.iov[1].iov_base = rx_qe.block;
+ rx_qe.iov[1].iov_len = BLOCK_SIZE;
+
+ rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
+ if (rc < 0) {
+ perror("recv_any");
+ return NULL;
+ }
+
+ rx_qe.length = rc;
+ rx_qe.server = get_server_number(&from);
+
+ DB("recv_any from %d luid=%016llx len=%u\n",
+ rx_qe.server, rx_qe.message.luid, rx_qe.length);
+
+ return &rx_qe;
+}
+
+void recv_recycle_buffer(bsq_t *q) {
+ if (q->block) {
+ rx_buffer = q->block;
+ q->block = NULL;
+ }
+}
+
+// cycle through reading any incoming, searching for a match in the
+// queue, until we have all we need.
+int wait_recv(bsq_t **reqs, int numreqs) {
+ bsq_t *q, *m;
+ unsigned int x, i;
+ int tid = (int)pthread_getspecific(tid_key);
+
+ DB("ENTER wait_recv %u\n", numreqs);
+
+ checkmatch:
+ x = 0xffffffff;
+ for (i = 0; i < numreqs; i++) {
+ x &= reqs[i]->status;
+ }
+ if ((x & BSQ_STATUS_MATCHED)) {
+ DB("LEAVE wait_recv\n");
+ return numreqs;
+ }
+
+ RECV_AWAIT(tid);
+
+ /*
+ rxagain:
+ ENTER_RECV_CR;
+ q = recv_any();
+ LEAVE_RECV_CR;
+ if (!q)
+ return -1;
+
+ m = queuesearch(q);
+ recv_recycle_buffer(q);
+ if (!m) {
+ fprintf(stderr, "Unmatched RX\n");
+ goto rxagain;
+ }
+ */
+
+ goto checkmatch;
+
+}
+
+/* retry
+ */
+static int retry_count = 0;
+int retry(bsq_t *qe)
+{
+ int rc;
+ gettimeofday(&(qe->tv_sent), NULL);
+ DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
+ retry_count++;
+ rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
+ if (rc < 0)
+ return rc;
+ return 0;
+}
+
+/* queue runner
+ */
+void *queue_runner(void *arg)
+{
+ for (;;) {
+ struct timeval now;
+ long long nowus, sus;
+ bsq_t *q;
+ int r;
+
+ sleep(1);
+
+ gettimeofday(&now, NULL);
+ nowus = now.tv_usec + now.tv_sec * 1000000;
+ ENTER_QUEUE_CR;
+ r = retry_count;
+ for (q = bs_head; q; q = q->next) {
+ sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
+ if ((nowus - sus) > RETRY_TIMEOUT) {
+ if (retry(q) < 0) {
+ fprintf(stderr, "Error on sendmsg retry.\n");
+ }
+ }
+ }
+ if (r != retry_count) {
+ fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
+ }
+ LEAVE_QUEUE_CR;
+ }
+}
+
+/* receive loop
+ */
+void *receive_loop(void *arg)
+{
+ bsq_t *q, *m;
+
+ for(;;) {
+ q = recv_any();
+ if (!q) {
+ fprintf(stderr, "recv_any error\n");
+ }
+ else {
+ m = queuesearch(q);
+ recv_recycle_buffer(q);
+ if (!m) {
+ fprintf(stderr, "Unmatched RX\n");
+ }
+ else {
+ DB("RX MATCH");
+ RECV_NOTIFY(m->tid);
+ }
+ }
+ }
+}
+pthread_t pthread_recv;
+
+/*****************************************************************************
+ * Reading *
+ *****************************************************************************/
+
+void *readblock_indiv(int server, u64 id) {
+ void *block;
+ bsq_t *qe;
+ int len, rc;
+
+ qe = (bsq_t *)malloc(sizeof(bsq_t));
+ if (!qe) {
+ perror("readblock qe malloc");
+ return NULL;
+ }
+ qe->block = NULL;
+
+ /*
+ qe->block = malloc(BLOCK_SIZE);
+ if (!qe->block) {
+ perror("readblock qe malloc");
+ free((void *)qe);
+ return NULL;
+ }
+ */
+
+ qe->server = server;
+
+ qe->message.operation = BSOP_READBLOCK;
+ qe->message.flags = 0;
+ qe->message.id = id;
+ qe->length = MSGBUFSIZE_ID;
+
+ if (send_message(qe) < 0) {
+ perror("readblock sendto");
+ goto err;
+ }
+
+ /*len = recv_message(qe);
+ if (len < 0) {
+ perror("readblock recv");
+ goto err;
+ }*/
+
+ rc = wait_recv(&qe, 1);
+ if (rc < 0) {
+ perror("readblock recv");
+ goto err;
+ }
+
+ if ((qe->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "readblock server error\n");
+ goto err;
+ }
+ if (qe->length < MSGBUFSIZE_BLOCK) {
+ fprintf(stderr, "readblock recv short (%u)\n", len);
+ goto err;
+ }
+ /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
+ perror("readblock malloc");
+ goto err;
+ }
+ memcpy(block, qe->message.block, BLOCK_SIZE);
+ */
+ block = qe->block;
+
+ free((void *)qe);
+ return block;
+
+ err:
+ if (qe->block)
+ free(qe->block);
+ free((void *)qe);
+ return NULL;
+}
+
+/**
+ * readblock: read a block from disk
+ * @id: block id to read
+ *
+ * @return: pointer to block, NULL on error
+ */
+void *readblock(u64 id) {
+ int map = (int)BSID_MAP(id);
+ u64 xid;
+ static int i = CLUSTER_MAX_REPLICAS - 1;
+ void *block = NULL;
+
+ /* special case for the "superblock" just use the first block on the
+ * first replica. (extend to blocks < 6 for vdi bug)
+ */
+ if (id < 6) {
+ block = readblock_indiv(bsclusters[map].servers[0], id);
+ goto out;
+ }
+
+ i++;
+ if (i >= CLUSTER_MAX_REPLICAS)
+ i = 0;
+ switch (i) {
+ case 0:
+ xid = BSID_REPLICA0(id);
+ break;
+ case 1:
+ xid = BSID_REPLICA1(id);
+ break;
+ case 2:
+ xid = BSID_REPLICA2(id);
+ break;
+ }
+
+ block = readblock_indiv(bsclusters[map].servers[i], xid);
+
+ out:
+#ifdef BSDEBUG
+ if (block)
+ fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
+ id,
+ (unsigned int)((unsigned char *)block)[0],
+ (unsigned int)((unsigned char *)block)[1],
+ (unsigned int)((unsigned char *)block)[2],
+ (unsigned int)((unsigned char *)block)[3],
+ (unsigned int)((unsigned char *)block)[4],
+ (unsigned int)((unsigned char *)block)[5],
+ (unsigned int)((unsigned char *)block)[6],
+ (unsigned int)((unsigned char *)block)[7]);
+ else
+ fprintf(stderr, "READ: %016llx NULL\n", id);
+#endif
+ return block;
+}
+
+/*****************************************************************************
+ * Writing *
+ *****************************************************************************/
+
+bsq_t *writeblock_indiv(int server, u64 id, void *block) {
+
+ bsq_t *qe;
+ int len;
+
+ qe = (bsq_t *)malloc(sizeof(bsq_t));
+ if (!qe) {
+ perror("writeblock qe malloc");
+ goto err;
+ }
+ qe->server = server;
+
+ qe->message.operation = BSOP_WRITEBLOCK;
+ qe->message.flags = 0;
+ qe->message.id = id;
+ //memcpy(qe->message.block, block, BLOCK_SIZE);
+ qe->block = block;
+ qe->length = MSGBUFSIZE_BLOCK;
+
+ if (send_message(qe) < 0) {
+ perror("writeblock sendto");
+ goto err;
+ }
+
+ return qe;
+
+ err:
+ free((void *)qe);
+ return NULL;
+}
+
+
+/**
+ * writeblock: write an existing block to disk
+ * @id: block id
+ * @block: pointer to block
+ *
+ * @return: zero on success, -1 on failure
+ */
+int writeblock(u64 id, void *block) {
+
+ int map = (int)BSID_MAP(id);
+ int rep0 = bsclusters[map].servers[0];
+ int rep1 = bsclusters[map].servers[1];
+ int rep2 = bsclusters[map].servers[2];
+ bsq_t *reqs[3];
+ int rc;
+
+ reqs[0] = reqs[1] = reqs[2] = NULL;
+
+#ifdef BSDEBUG
+ fprintf(stderr,
+ "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
+ id,
+ (unsigned int)((unsigned char *)block)[0],
+ (unsigned int)((unsigned char *)block)[1],
+ (unsigned int)((unsigned char *)block)[2],
+ (unsigned int)((unsigned char *)block)[3],
+ (unsigned int)((unsigned char *)block)[4],
+ (unsigned int)((unsigned char *)block)[5],
+ (unsigned int)((unsigned char *)block)[6],
+ (unsigned int)((unsigned char *)block)[7]);
+#endif
+
+ /* special case for the "superblock" just use the first block on the
+ * first replica. (extend to blocks < 6 for vdi bug)
+ */
+ if (id < 6) {
+ reqs[0] = writeblock_indiv(rep0, id, block);
+ if (!reqs[0])
+ return -1;
+ rc = wait_recv(reqs, 1);
+ return rc;
+ }
+
+ reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
+ if (!reqs[0])
+ goto err;
+ reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
+ if (!reqs[1])
+ goto err;
+ reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
+ if (!reqs[2])
+ goto err;
+
+ rc = wait_recv(reqs, 3);
+ if (rc < 0) {
+ perror("writeblock recv");
+ goto err;
+ }
+ if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "writeblock server0 error\n");
+ goto err;
+ }
+ if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "writeblock server1 error\n");
+ goto err;
+ }
+ if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "writeblock server2 error\n");
+ goto err;
+ }
+
+
+ free((void *)reqs[0]);
+ free((void *)reqs[1]);
+ free((void *)reqs[2]);
+ return 0;
+
+ err:
+ if (reqs[0]) {
+ dequeue(reqs[0]);
+ free((void *)reqs[0]);
+ }
+ if (reqs[1]) {
+ dequeue(reqs[1]);
+ free((void *)reqs[1]);
+ }
+ if (reqs[2]) {
+ dequeue(reqs[2]);
+ free((void *)reqs[2]);
+ }
+ return -1;
+}
+
+/*****************************************************************************
+ * Allocation *
+ *****************************************************************************/
+
+/**
+ * allocblock: write a new block to disk
+ * @block: pointer to block
+ *
+ * @return: new id of block on disk
+ */
+u64 allocblock(void *block) {
+ return allocblock_hint(block, 0);
+}
+
+bsq_t *allocblock_hint_indiv(int server, void *block, u64 hint) {
+ bsq_t *qe;
+ int len;
+
+ qe = (bsq_t *)malloc(sizeof(bsq_t));
+ if (!qe) {
+ perror("allocblock_hint qe malloc");
+ goto err;
+ }
+ qe->server = server;
+
+ qe->message.operation = BSOP_ALLOCBLOCK;
+ qe->message.flags = 0;
+ qe->message.id = hint;
+ //memcpy(qe->message.block, block, BLOCK_SIZE);
+ qe->block = block;
+ qe->length = MSGBUFSIZE_BLOCK;
+
+ if (send_message(qe) < 0) {
+ perror("allocblock_hint sendto");
+ goto err;
+ }
+
+ return qe;
+
+ err:
+ free((void *)qe);
+ return NULL;
+}
+
+/**
+ * allocblock_hint: write a new block to disk
+ * @block: pointer to block
+ * @hint: allocation hint
+ *
+ * @return: new id of block on disk
+ */
+u64 allocblock_hint(void *block, u64 hint) {
+ int map = (int)hint;
+ int rep0 = bsclusters[map].servers[0];
+ int rep1 = bsclusters[map].servers[1];
+ int rep2 = bsclusters[map].servers[2];
+ bsq_t *reqs[3];
+ int rc;
+ u64 id0, id1, id2;
+
+ reqs[0] = reqs[1] = reqs[2] = NULL;
+
+ DB("ENTER allocblock\n");
+
+ reqs[0] = allocblock_hint_indiv(rep0, block, hint);
+ if (!reqs[0])
+ goto err;
+ reqs[1] = allocblock_hint_indiv(rep1, block, hint);
+ if (!reqs[1])
+ goto err;
+ reqs[2] = allocblock_hint_indiv(rep2, block, hint);
+ if (!reqs[2])
+ goto err;
+
+ rc = wait_recv(reqs, 3);
+ if (rc < 0) {
+ perror("allocblock recv");
+ goto err;
+ }
+ if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "allocblock server0 error\n");
+ goto err;
+ }
+ if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "allocblock server1 error\n");
+ goto err;
+ }
+ if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
+ fprintf(stderr, "allocblock server2 error\n");
+ goto err;
+ }
+
+ id0 = reqs[0]->message.id;
+ id1 = reqs[1]->message.id;
+ id2 = reqs[2]->message.id;
+
+#ifdef BSDEBUG
+ fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
+ BSID(map, id0, id1, id2),
+ (unsigned int)((unsigned char *)block)[0],
+ (unsigned int)((unsigned char *)block)[1],
+ (unsigned int)((unsigned char *)block)[2],
+ (unsigned int)((unsigned char *)block)[3],
+ (unsigned int)((unsigned char *)block)[4],
+ (unsigned int)((unsigned char *)block)[5],
+ (unsigned int)((unsigned char *)block)[6],
+ (unsigned int)((unsigned char *)block)[7]);
+#endif
+
+ free((void *)reqs[0]);
+ free((void *)reqs[1]);
+ free((void *)reqs[2]);
+ return BSID(map, id0, id1, id2);
+
+ err:
+ if (reqs[0]) {
+ dequeue(reqs[0]);
+ free((void *)reqs[0]);
+ }
+ if (reqs[1]) {
+ dequeue(reqs[1]);
+ free((void *)reqs[1]);
+ }
+ if (reqs[2]) {
+ dequeue(reqs[2]);
+ free((void *)reqs[2]);
+ }
+ return 0;
+}
+
+#else /* /BLOCKSTORE_REMOTE */
+
+/*****************************************************************************
+ * Local storage version *
+ *****************************************************************************/
+
+/**
+ * readblock: read a block from disk
+ * @id: block id to read
+ *
+ * @return: pointer to block, NULL on error
+ */
+
+void *readblock(u64 id) {
+ void *block;
+ int block_fp;
+
+//printf("readblock(%llu)\n", id);
+ block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return NULL;
+ }
+
+ if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ printf ("%Ld ", id);
+ printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
+ perror("readblock lseek");
+ goto err;
+ }
+ if ((block = malloc(BLOCK_SIZE)) == NULL) {
+ perror("readblock malloc");
+ goto err;
+ }
+ if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("readblock read");
+ free(block);
+ goto err;
+ }
+ close(block_fp);
+ return block;
+
+err:
+ close(block_fp);
+ return NULL;
+}
+
+/**
+ * writeblock: write an existing block to disk
+ * @id: block id
+ * @block: pointer to block
+ *
+ * @return: zero on success, -1 on failure
+ */
+int writeblock(u64 id, void *block) {
+
+ int block_fp;
+
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return -1;
+ }
+
+ if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ perror("writeblock lseek");
+ goto err;
+ }
+ if (write(block_fp, block, BLOCK_SIZE) < 0) {
+ perror("writeblock write");
+ goto err;
+ }
+ close(block_fp);
+ return 0;
+
+err:
+ close(block_fp);
+ return -1;
+}
+
+/**
+ * allocblock: write a new block to disk
+ * @block: pointer to block
+ *
+ * @return: new id of block on disk
+ */
+
+u64 allocblock(void *block) {
+ u64 lb;
+ off64_t pos;
+ int block_fp;
+
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return 0;
+ }
+
+ pos = lseek64(block_fp, 0, SEEK_END);
+ if (pos == (off64_t)-1) {
+ perror("allocblock lseek");
+ goto err;
+ }
+ if (pos % BLOCK_SIZE != 0) {
+ fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
+ goto err;
+ }
+ if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("allocblock write");
+ goto err;
+ }
+ lb = pos / BLOCK_SIZE + 1;
+//printf("alloc(%Ld)\n", lb);
+ close(block_fp);
+ return lb;
+
+err:
+ close(block_fp);
+ return 0;
+
+}
+
+/**
+ * allocblock_hint: write a new block to disk
+ * @block: pointer to block
+ * @hint: allocation hint
+ *
+ * @return: new id of block on disk
+ */
+u64 allocblock_hint(void *block, u64 hint) {
+ return allocblock(block);
+}
+
+#endif /* BLOCKSTORE_REMOTE */
+
+/*****************************************************************************
+ * Memory management *
+ *****************************************************************************/
+
+/**
+ * newblock: get a new in-memory block set to zeros
+ *
+ * @return: pointer to new block, NULL on error
+ */
+void *newblock() {
+ void *block = malloc(BLOCK_SIZE);
+ if (block == NULL) {
+ perror("newblock");
+ return NULL;
+ }
+ memset(block, 0, BLOCK_SIZE);
+ return block;
+}
+
+
+/**
+ * freeblock: unallocate an in-memory block
+ * @id: block id (zero if this is only in-memory)
+ * @block: block to be freed
+ */
+void freeblock(void *block) {
+ if (block != NULL)
+ free(block);
+}
+
+static freeblock_t *new_freeblock(void)
+{
+ freeblock_t *fb;
+
+ fb = newblock();
+
+ if (fb == NULL) return NULL;
+
+ fb->magic = FREEBLOCK_MAGIC;
+ fb->next = 0ULL;
+ fb->count = 0ULL;
+ memset(fb->list, 0, sizeof fb->list);
+
+ return fb;
+}
+
+void releaseblock(u64 id)
+{
+ blockstore_super_t *bs_super;
+ freeblock_t *fl_current;
+
+ /* get superblock */
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+
+ /* get freeblock_current */
+ if (bs_super->freelist_current == 0ULL)
+ {
+ fl_current = new_freeblock();
+ bs_super->freelist_current = allocblock(fl_current);
+ writeblock(BLOCKSTORE_SUPER, bs_super);
+ } else {
+ fl_current = readblock(bs_super->freelist_current);
+ }
+
+ /* if full, chain to superblock and allocate new current */
+
+ if (fl_current->count == FREEBLOCK_SIZE) {
+ fl_current->next = bs_super->freelist_full;
+ writeblock(bs_super->freelist_current, fl_current);
+ bs_super->freelist_full = bs_super->freelist_current;
+ freeblock(fl_current);
+ fl_current = new_freeblock();
+ bs_super->freelist_current = allocblock(fl_current);
+ writeblock(BLOCKSTORE_SUPER, bs_super);
+ }
+
+ /* append id to current */
+ fl_current->list[fl_current->count++] = id;
+ writeblock(bs_super->freelist_current, fl_current);
+
+ freeblock(fl_current);
+ freeblock(bs_super);
+
+
+}
+
+/* freelist debug functions: */
+void freelist_count(int print_each)
+{
+ blockstore_super_t *bs_super;
+ freeblock_t *fb;
+ u64 total = 0, next;
+
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+
+ if (bs_super->freelist_current == 0ULL) {
+ printf("freelist is empty!\n");
+ return;
+ }
+
+ fb = readblock(bs_super->freelist_current);
+ printf("%Ld entires on current.\n", fb->count);
+ total += fb->count;
+ if (print_each == 1)
+ {
+ int i;
+ for (i=0; i< fb->count; i++)
+ printf(" %Ld\n", fb->list[i]);
+ }
+
+ freeblock(fb);
+
+ if (bs_super->freelist_full == 0ULL) {
+ printf("freelist_full is empty!\n");
+ return;
+ }
+
+ next = bs_super->freelist_full;
+ for (;;) {
+ fb = readblock(next);
+ total += fb->count;
+ if (print_each == 1)
+ {
+ int i;
+ for (i=0; i< fb->count; i++)
+ printf(" %Ld\n", fb->list[i]);
+ }
+ next = fb->next;
+ freeblock(fb);
+ if (next == 0ULL) break;
+ }
+ printf("Total of %Ld ids on freelist.\n", total);
+}
+
+/*****************************************************************************
+ * Initialisation *
+ *****************************************************************************/
+
+int __init_blockstore(void)
+{
+ int i;
+ blockstore_super_t *bs_super;
+ u64 ret;
+ int block_fp;
+
+#ifdef BLOCKSTORE_REMOTE
+ struct hostent *addr;
+
+ pthread_mutex_init(&ptmutex_queue, NULL);
+ pthread_mutex_init(&ptmutex_luid, NULL);
+ pthread_mutex_init(&ptmutex_recv, NULL);
+ /*pthread_mutex_init(&ptmutex_notify, NULL);*/
+ for (i = 0; i <= READ_POOL_SIZE; i++) {
+ pool_thread[i].newdata = 0;
+ pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
+ pthread_cond_init(&(pool_thread[i].ptcv), NULL);
+ }
+
+ bsservers[0].hostname = "firebug.cl.cam.ac.uk";
+ bsservers[1].hostname = "planb.cl.cam.ac.uk";
+ bsservers[2].hostname = "simcity.cl.cam.ac.uk";
+ bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
+ bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
+ bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
+ bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
+ bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
+ bsservers[8].hostname = NULL;
+ bsservers[9].hostname = NULL;
+ bsservers[10].hostname = NULL;
+ bsservers[11].hostname = NULL;
+ bsservers[12].hostname = NULL;
+ bsservers[13].hostname = NULL;
+ bsservers[14].hostname = NULL;
+ bsservers[15].hostname = NULL;
+
+ for (i = 0; i < MAX_SERVERS; i++) {
+ if (!bsservers[i].hostname)
+ continue;
+ addr = gethostbyname(bsservers[i].hostname);
+ if (!addr) {
+ perror("bad hostname");
+ return -1;
+ }
+ bsservers[i].sin.sin_family = addr->h_addrtype;
+ bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
+ bsservers[i].sin.sin_addr.s_addr =
+ ((struct in_addr *)(addr->h_addr))->s_addr;
+ }
+
+ /* Cluster map
+ */
+ bsclusters[0].servers[0] = 0;
+ bsclusters[0].servers[1] = 1;
+ bsclusters[0].servers[2] = 2;
+ bsclusters[1].servers[0] = 1;
+ bsclusters[1].servers[1] = 2;
+ bsclusters[1].servers[2] = 3;
+ bsclusters[2].servers[0] = 2;
+ bsclusters[2].servers[1] = 3;
+ bsclusters[2].servers[2] = 4;
+ bsclusters[3].servers[0] = 3;
+ bsclusters[3].servers[1] = 4;
+ bsclusters[3].servers[2] = 5;
+ bsclusters[4].servers[0] = 4;
+ bsclusters[4].servers[1] = 5;
+ bsclusters[4].servers[2] = 6;
+ bsclusters[5].servers[0] = 5;
+ bsclusters[5].servers[1] = 6;
+ bsclusters[5].servers[2] = 7;
+ bsclusters[6].servers[0] = 6;
+ bsclusters[6].servers[1] = 7;
+ bsclusters[6].servers[2] = 0;
+ bsclusters[7].servers[0] = 7;
+ bsclusters[7].servers[1] = 0;
+ bsclusters[7].servers[2] = 1;
+
+ /* Local socket set up
+ */
+ bssock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (bssock < 0) {
+ perror("Bad socket");
+ return -1;
+ }
+ memset(&sin_local, 0, sizeof(sin_local));
+ sin_local.sin_family = AF_INET;
+ sin_local.sin_port = htons(BLOCKSTORED_PORT);
+ sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
+ if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
+ perror("bind");
+ close(bssock);
+ return -1;
+ }
+
+ pthread_create(&pthread_recv, NULL, receive_loop, NULL);
+ pthread_create(&pthread_recv, NULL, queue_runner, NULL);
+
+#else /* /BLOCKSTORE_REMOTE */
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return -1;
+ exit(-1);
+ }
+
+ if (lseek(block_fp, 0, SEEK_END) == 0) {
+ bs_super = newblock();
+ bs_super->magic = BLOCKSTORE_MAGIC;
+ bs_super->freelist_full = 0LL;
+ bs_super->freelist_current = 0LL;
+
+ ret = allocblock(bs_super);
+
+ freeblock(bs_super);
+ } else {
+ bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
+ if (bs_super->magic != BLOCKSTORE_MAGIC)
+ {
+ printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
+ exit(-1);
+ }
+ freeblock(bs_super);
+ }
+
+ close(block_fp);
+
+#endif /* BLOCKSTORE_REMOTE */
+ return 0;
+}
+
+void __exit_blockstore(void)
+{
+ int i;
+#ifdef BLOCKSTORE_REMOTE
+ pthread_mutex_destroy(&ptmutex_recv);
+ pthread_mutex_destroy(&ptmutex_luid);
+ pthread_mutex_destroy(&ptmutex_queue);
+ /*pthread_mutex_destroy(&ptmutex_notify);
+ pthread_cond_destroy(&ptcv_notify);*/
+ for (i = 0; i <= READ_POOL_SIZE; i++) {
+ pthread_mutex_destroy(&(pool_thread[i].ptmutex));
+ pthread_cond_destroy(&(pool_thread[i].ptcv));
+ }
+#endif
+}
diff --git a/tools/blktap/blockstore.h b/tools/blktap/blockstore.h
new file mode 100644
index 0000000000..8415786511
--- /dev/null
+++ b/tools/blktap/blockstore.h
@@ -0,0 +1,134 @@
+/**************************************************************************
+ *
+ * blockstore.h
+ *
+ * Simple block store interface
+ *
+ */
+
+#ifndef __BLOCKSTORE_H__
+#define __BLOCKSTORE_H__
+
+#include <netinet/in.h>
+#include <xc.h>
+
+#define BLOCK_SIZE 4096
+#define BLOCK_SHIFT 12
+#define BLOCK_MASK 0xfffffffffffff000LL
+
+/* XXX SMH: where is the below supposed to be defined???? */
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+
+#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(u64)) - (3 * sizeof(u64))
+#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
+
+typedef struct {
+ u64 magic;
+ u64 next;
+ u64 count;
+ u64 list[FREEBLOCK_SIZE];
+} freeblock_t;
+
+#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
+#define BLOCKSTORE_SUPER 1ULL
+
+typedef struct {
+ u64 magic;
+ u64 freelist_full;
+ u64 freelist_current;
+} blockstore_super_t;
+
+extern void *newblock();
+extern void *readblock(u64 id);
+extern u64 allocblock(void *block);
+extern u64 allocblock_hint(void *block, u64 hint);
+extern int writeblock(u64 id, void *block);
+
+/* Add this blockid to a freelist, to be recycled by the allocator. */
+extern void releaseblock(u64 id);
+
+/* this is a memory free() operation for block-sized allocations */
+extern void freeblock(void *block);
+extern int __init_blockstore(void);
+
+/* debug for freelist. */
+void freelist_count(int print_each);
+#define ALLOCFAIL (((u64)(-1)))
+
+/* Distribution
+ */
+#define BLOCKSTORED_PORT 9346
+
+struct bshdr_t_struct {
+ u32 operation;
+ u32 flags;
+ u64 id;
+ u64 luid;
+} __attribute__ ((packed));
+typedef struct bshdr_t_struct bshdr_t;
+
+struct bsmsg_t_struct {
+ bshdr_t hdr;
+ unsigned char block[BLOCK_SIZE];
+} __attribute__ ((packed));
+
+typedef struct bsmsg_t_struct bsmsg_t;
+
+#define MSGBUFSIZE_OP sizeof(u32)
+#define MSGBUFSIZE_FLAGS (sizeof(u32) + sizeof(u32))
+#define MSGBUFSIZE_ID (sizeof(u32) + sizeof(u32) + sizeof(u64) + sizeof(u64))
+#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
+
+#define BSOP_READBLOCK 0x01
+#define BSOP_WRITEBLOCK 0x02
+#define BSOP_ALLOCBLOCK 0x03
+#define BSOP_FREEBLOCK 0x04
+
+#define BSOP_FLAG_ERROR 0x01
+
+#define BS_ALLOC_SKIP 10
+#define BS_ALLOC_HACK
+
+/* Remote hosts and cluster map - XXX need to generalise
+ */
+
+/*
+
+ Interim ID format is
+
+ 63 60 59 40 39 20 19 0
+ +----+--------------------+--------------------+--------------------+
+ |map | replica 2 | replica 1 | replica 0 |
+ +----+--------------------+--------------------+--------------------+
+
+ The map is an index into a table detailing which machines form the
+ cluster.
+
+ */
+
+#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
+#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
+#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
+#define BSID_MAP(_id) (((_id)>>60)&0xfULL)
+
+#define BSID(_map, _rep0, _rep1, _rep2) ((((u64)(_map))<<60) | \
+ (((u64)(_rep2))<<40) | \
+ (((u64)(_rep1))<<20) | ((u64)(_rep0)))
+
+typedef struct bsserver_t_struct {
+ char *hostname;
+ struct sockaddr_in sin;
+} bsserver_t;
+
+#define MAX_SERVERS 16
+
+#define CLUSTER_MAX_REPLICAS 3
+typedef struct bscluster_t_struct {
+ int servers[CLUSTER_MAX_REPLICAS];
+} bscluster_t;
+
+#define MAX_CLUSTERS 16
+
+#endif /* __BLOCKSTORE_H__ */
diff --git a/tools/blktap/blockstored.c b/tools/blktap/blockstored.c
new file mode 100644
index 0000000000..6b86cb182f
--- /dev/null
+++ b/tools/blktap/blockstored.c
@@ -0,0 +1,276 @@
+/**************************************************************************
+ *
+ * blockstored.c
+ *
+ * Block store daemon.
+ *
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include "blockstore.h"
+
+//#define BSDEBUG
+
+int readblock_into(u64 id, void *block);
+
+int open_socket(u16 port) {
+
+ struct sockaddr_in sn;
+ int sock;
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("Bad socket");
+ return -1;
+ }
+ memset(&sn, 0, sizeof(sn));
+ sn.sin_family = AF_INET;
+ sn.sin_port = htons(port);
+ sn.sin_addr.s_addr = htonl(INADDR_ANY);
+ if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
+ perror("bind");
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+static int block_fp = -1;
+static int bssock = -1;
+
+int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
+
+ int rc;
+
+#ifdef BSDEBUG
+ fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
+ len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id);
+#endif
+ rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer));
+ if (rc < 0) {
+ perror("send_reply");
+ return 1;
+ }
+
+
+ return 0;
+}
+
+static bsmsg_t msgbuf;
+
+void service_loop(void) {
+
+ for (;;) {
+ int rc, len;
+ struct sockaddr_in from;
+ size_t slen = sizeof(from);
+ u64 bid;
+
+ len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
+ (struct sockaddr *)&from, &slen);
+
+ if (len < 0) {
+ perror("recvfrom");
+ continue;
+ }
+
+ if (len < MSGBUFSIZE_OP) {
+ fprintf(stderr, "Short packet.\n");
+ continue;
+ }
+
+#ifdef BSDEBUG
+ fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
+ len, msgbuf.hdr.operation, msgbuf.hdr.id);
+#endif
+
+ switch (msgbuf.hdr.operation) {
+ case BSOP_READBLOCK:
+ if (len < MSGBUFSIZE_ID) {
+ fprintf(stderr, "Short packet (readblock %u).\n", len);
+ continue;
+ }
+ rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
+ if (rc < 0) {
+ fprintf(stderr, "readblock error\n");
+ msgbuf.hdr.flags = BSOP_FLAG_ERROR;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
+ continue;
+ }
+ msgbuf.hdr.flags = 0;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
+ break;
+ case BSOP_WRITEBLOCK:
+ if (len < MSGBUFSIZE_BLOCK) {
+ fprintf(stderr, "Short packet (writeblock %u).\n", len);
+ continue;
+ }
+ rc = writeblock(msgbuf.hdr.id, msgbuf.block);
+ if (rc < 0) {
+ fprintf(stderr, "writeblock error\n");
+ msgbuf.hdr.flags = BSOP_FLAG_ERROR;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
+ continue;
+ }
+ msgbuf.hdr.flags = 0;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
+ break;
+ case BSOP_ALLOCBLOCK:
+ if (len < MSGBUFSIZE_BLOCK) {
+ fprintf(stderr, "Short packet (allocblock %u).\n", len);
+ continue;
+ }
+ bid = allocblock(msgbuf.block);
+ if (bid == ALLOCFAIL) {
+ fprintf(stderr, "allocblock error\n");
+ msgbuf.hdr.flags = BSOP_FLAG_ERROR;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
+ continue;
+ }
+ msgbuf.hdr.id = bid;
+ msgbuf.hdr.flags = 0;
+ send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
+ break;
+ }
+
+ }
+}
+
+/**
+ * readblock: read a block from disk
+ * @id: block id to read
+ * @block: pointer to buffer to receive block
+ *
+ * @return: 0 if OK, other on error
+ */
+
+int readblock_into(u64 id, void *block) {
+ if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
+ perror("readblock lseek");
+ return -1;
+ }
+ if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("readblock read");
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * writeblock: write an existing block to disk
+ * @id: block id
+ * @block: pointer to block
+ *
+ * @return: zero on success, -1 on failure
+ */
+int writeblock(u64 id, void *block) {
+ if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
+ perror("writeblock lseek");
+ return -1;
+ }
+ if (write(block_fp, block, BLOCK_SIZE) < 0) {
+ perror("writeblock write");
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * allocblock: write a new block to disk
+ * @block: pointer to block
+ *
+ * @return: new id of block on disk
+ */
+static u64 lastblock = 0;
+
+u64 allocblock(void *block) {
+ u64 lb;
+ off64_t pos;
+
+ retry:
+ pos = lseek64(block_fp, 0, SEEK_END);
+ if (pos == (off64_t)-1) {
+ perror("allocblock lseek");
+ return ALLOCFAIL;
+ }
+ if (pos % BLOCK_SIZE != 0) {
+ fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
+ return ALLOCFAIL;
+ }
+ if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
+ perror("allocblock write");
+ return ALLOCFAIL;
+ }
+ lb = pos / BLOCK_SIZE + 1;
+
+#ifdef BS_ALLOC_HACK
+ if (lb < BS_ALLOC_SKIP)
+ goto retry;
+#endif
+
+ if (lb <= lastblock)
+ printf("[*** %Ld alredy allocated! ***]\n", lb);
+
+ lastblock = lb;
+ return lb;
+}
+
+/**
+ * newblock: get a new in-memory block set to zeros
+ *
+ * @return: pointer to new block, NULL on error
+ */
+void *newblock() {
+ void *block = malloc(BLOCK_SIZE);
+ if (block == NULL) {
+ perror("newblock");
+ return NULL;
+ }
+ memset(block, 0, BLOCK_SIZE);
+ return block;
+}
+
+
+/**
+ * freeblock: unallocate an in-memory block
+ * @id: block id (zero if this is only in-memory)
+ * @block: block to be freed
+ */
+void freeblock(void *block) {
+ if (block != NULL)
+ free(block);
+}
+
+
+int main(int argc, char **argv)
+{
+ block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
+
+ if (block_fp < 0) {
+ perror("open");
+ return -1;
+ }
+
+ bssock = open_socket(BLOCKSTORED_PORT);
+ if (bssock < 0) {
+ return -1;
+ }
+
+ service_loop();
+
+ close(bssock);
+
+ return 0;
+}
diff --git a/tools/blktap/bstest.c b/tools/blktap/bstest.c
new file mode 100644
index 0000000000..5476ea1b74
--- /dev/null
+++ b/tools/blktap/bstest.c
@@ -0,0 +1,191 @@
+/**************************************************************************
+ *
+ * bstest.c
+ *
+ * Block store daemon test program.
+ *
+ * usage: bstest <host>|X {r|w|a} ID
+ *
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+#include "blockstore.h"
+
+int direct(char *host, u32 op, u64 id, int len) {
+ struct sockaddr_in sn, peer;
+ int sock;
+ bsmsg_t msgbuf;
+ int rc, slen;
+ struct hostent *addr;
+
+ addr = gethostbyname(host);
+ if (!addr) {
+ perror("bad hostname");
+ exit(1);
+ }
+ peer.sin_family = addr->h_addrtype;
+ peer.sin_port = htons(BLOCKSTORED_PORT);
+ peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr;
+ fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
+ (unsigned int)(unsigned char)addr->h_addr[0],
+ (unsigned int)(unsigned char)addr->h_addr[1],
+ (unsigned int)(unsigned char)addr->h_addr[2],
+ (unsigned int)(unsigned char)addr->h_addr[3]);
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("Bad socket");
+ exit(1);
+ }
+ memset(&sn, 0, sizeof(sn));
+ sn.sin_family = AF_INET;
+ sn.sin_port = htons(BLOCKSTORED_PORT);
+ sn.sin_addr.s_addr = htonl(INADDR_ANY);
+ if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
+ perror("bind");
+ close(sock);
+ exit(1);
+ }
+
+ memset((void *)&msgbuf, 0, sizeof(msgbuf));
+ msgbuf.operation = op;
+ msgbuf.id = id;
+
+ rc = sendto(sock, (void *)&msgbuf, len, 0,
+ (struct sockaddr *)&peer, sizeof(peer));
+ if (rc < 0) {
+ perror("sendto");
+ exit(1);
+ }
+
+ slen = sizeof(peer);
+ len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
+ (struct sockaddr *)&peer, &slen);
+ if (len < 0) {
+ perror("recvfrom");
+ exit(1);
+ }
+
+ printf("Reply %u bytes:\n", len);
+ if (len >= MSGBUFSIZE_OP)
+ printf(" operation: %u\n", msgbuf.operation);
+ if (len >= MSGBUFSIZE_FLAGS)
+ printf(" flags: 0x%x\n", msgbuf.flags);
+ if (len >= MSGBUFSIZE_ID)
+ printf(" id: %llu\n", msgbuf.id);
+ if (len >= (MSGBUFSIZE_ID + 4))
+ printf(" data: %02x %02x %02x %02x...\n",
+ (unsigned int)msgbuf.block[0],
+ (unsigned int)msgbuf.block[1],
+ (unsigned int)msgbuf.block[2],
+ (unsigned int)msgbuf.block[3]);
+
+ if (sock > 0)
+ close(sock);
+
+ return 0;
+}
+
+int main (int argc, char **argv) {
+
+ u32 op = 0;
+ u64 id = 0;
+ int len = 0, rc;
+ void *block;
+
+ if (argc < 3) {
+ fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
+ return 1;
+ }
+
+ switch (argv[2][0]) {
+ case 'r':
+ case 'R':
+ op = BSOP_READBLOCK;
+ len = MSGBUFSIZE_ID;
+ break;
+ case 'w':
+ case 'W':
+ op = BSOP_WRITEBLOCK;
+ len = MSGBUFSIZE_BLOCK;
+ break;
+ case 'a':
+ case 'A':
+ op = BSOP_ALLOCBLOCK;
+ len = MSGBUFSIZE_BLOCK;
+ break;
+ default:
+ fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
+ return 1;
+ }
+
+ if (argc >= 4)
+ id = atoll(argv[3]);
+
+ if (strcmp(argv[1], "X") == 0) {
+ rc = __init_blockstore();
+ if (rc < 0) {
+ fprintf(stderr, "blockstore init failed.\n");
+ return 1;
+ }
+ switch(op) {
+ case BSOP_READBLOCK:
+ block = readblock(id);
+ if (block) {
+ printf("data: %02x %02x %02x %02x...\n",
+ (unsigned int)((unsigned char*)block)[0],
+ (unsigned int)((unsigned char*)block)[1],
+ (unsigned int)((unsigned char*)block)[2],
+ (unsigned int)((unsigned char*)block)[3]);
+ }
+ break;
+ case BSOP_WRITEBLOCK:
+ block = malloc(BLOCK_SIZE);
+ if (!block) {
+ perror("bstest malloc");
+ return 1;
+ }
+ memset(block, 0, BLOCK_SIZE);
+ rc = writeblock(id, block);
+ if (rc != 0) {
+ printf("error\n");
+ }
+ else {
+ printf("OK\n");
+ }
+ break;
+ case BSOP_ALLOCBLOCK:
+ block = malloc(BLOCK_SIZE);
+ if (!block) {
+ perror("bstest malloc");
+ return 1;
+ }
+ memset(block, 0, BLOCK_SIZE);
+ id = allocblock_hint(block, id);
+ if (id == 0) {
+ printf("error\n");
+ }
+ else {
+ printf("ID: %llu\n", id);
+ }
+ break;
+ }
+ }
+ else {
+ direct(argv[1], op, id, len);
+ }
+
+
+ return 0;
+}
diff --git a/tools/blktap/parallax.c b/tools/blktap/parallax.c
new file mode 100644
index 0000000000..46cdec4496
--- /dev/null
+++ b/tools/blktap/parallax.c
@@ -0,0 +1,611 @@
+/**************************************************************************
+ *
+ * parallax.c
+ *
+ * The Parallax Storage Server
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "blktaplib.h"
+#include "blockstore.h"
+#include "vdi.h"
+#include "block-async.h"
+#include "requests-async.h"
+
+#define PARALLAX_DEV 61440
+#define SECTS_PER_NODE 8
+
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* ------[ session records ]----------------------------------------------- */
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+#define VDI_HASHSZ 16
+#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
+
+typedef struct blkif {
+ domid_t domid;
+ unsigned int handle;
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ vdi_t *vdi_hash[VDI_HASHSZ];
+ struct blkif *hash_next;
+} blkif_t;
+
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ if ( handle != 0 )
+ printf("blktap/parallax don't currently support non-0 dev handles!\n");
+
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
+{
+ vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
+
+ while ((vdi != NULL) && (vdi->vdevice != device))
+ vdi = vdi->next;
+
+ return vdi;
+}
+
+/* ------[ control message handling ]-------------------------------------- */
+
+void blkif_create(blkif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTF("parallax (blkif_create): create is %p\n", create);
+
+ if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
+ {
+ DPRINTF("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTF("Could not create blkif: already exists (%d,%d)\n",
+ domid, handle);
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ free(blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
+ DPRINTF("Successfully created blkif\n");
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_destroy(blkif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy);
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ free(blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void vbd_create(blkif_be_vbd_create_t *create)
+{
+ blkif_t *blkif;
+ vdi_t *vdi, **vdip;
+ blkif_vdev_t vdevice = create->vdevice;
+
+ DPRINTF("parallax (vbd_create): create=%p\n", create);
+
+ blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
+ if ( blkif == NULL )
+ {
+ DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n",
+ create->domid, create->blkif_handle);
+ create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ /* VDI identifier is in grow->extent.sector_start */
+ DPRINTF("vbd_create: create->dev_handle (id) is %lx\n",
+ (unsigned long)create->dev_handle);
+
+ vdi = vdi_get(create->dev_handle);
+ if (vdi == NULL)
+ {
+ printf("parallax (vbd_create): VDI %lx not found.\n",
+ (unsigned long)create->dev_handle);
+ create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
+ return;
+ }
+
+ vdi->next = NULL;
+ vdi->vdevice = vdevice;
+ vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
+ while (*vdip != NULL)
+ vdip = &(*vdip)->next;
+ *vdip = vdi;
+
+ DPRINTF("blkif_create succeeded\n");
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
+{
+ blkif_t *blkif;
+ vdi_t *vdi, **vdip;
+ blkif_vdev_t vdevice = destroy->vdevice;
+
+ blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
+ if ( blkif == NULL )
+ {
+ DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
+ destroy->domid, destroy->blkif_handle);
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
+ while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
+ vdip = &(*vdip)->next;
+
+ if (*vdip != NULL)
+ {
+ vdi = *vdip;
+ *vdip = vdi->next;
+ vdi_put(vdi);
+ }
+
+}
+
+int parallax_control(control_msg_t *msg)
+{
+ domid_t domid;
+ int ret;
+
+ DPRINTF("parallax_control: msg is %p\n", msg);
+
+ if (msg->type != CMSG_BLKIF_BE)
+ {
+ printf("Unexpected control message (%d)\n", msg->type);
+ return 0;
+ }
+
+ switch(msg->subtype)
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ blkif_create((blkif_be_create_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ blkif_destroy((blkif_be_destroy_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ if ( msg->length != sizeof(blkif_be_vbd_create_t) )
+ goto parse_error;
+ vbd_create((blkif_be_vbd_create_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
+ goto parse_error;
+ vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
+ break;
+
+ case CMSG_BLKIF_BE_CONNECT:
+ case CMSG_BLKIF_BE_DISCONNECT:
+ /* we don't manage the device channel, the tap does. */
+ break;
+
+ default:
+ goto parse_error;
+ }
+ return 0;
+parse_error:
+ printf("Bad control message!\n");
+ return 0;
+
+}
+
+int parallax_probe(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ vdisk_t *img_info;
+ vdi_t *vdi;
+ int i, nr_vdis = 0;
+
+ DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif);
+
+ /* We expect one buffer only. */
+ if ( req->nr_segments != 1 )
+ goto err;
+
+ /* Make sure the buffer is page-sized. */
+ if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
+ (blkif_last_sect (req->frame_and_sects[0]) != 7) )
+ goto err;
+
+ /* fill the list of devices */
+ for (i=0; i<VDI_HASHSZ; i++) {
+ vdi = blkif->vdi_hash[i];
+ while (vdi) {
+ img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
+ img_info[nr_vdis].device = vdi->vdevice;
+ img_info[nr_vdis].info = 0;
+ /* The -1 here accounts for the LSB in the radix tree */
+ img_info[nr_vdis].capacity =
+ ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
+ nr_vdis++;
+ vdi = vdi->next;
+ }
+ }
+
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = nr_vdis; /* number of disks */
+
+ DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
+ return BLKTAP_RESPOND;
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_PROBE;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ DPRINTF("parallax_probe: send error response\n");
+ return BLKTAP_RESPOND;
+}
+
+typedef struct {
+ blkif_request_t *req;
+ int count;
+ int error;
+ pthread_mutex_t mutex;
+} pending_t;
+
+#define MAX_REQUESTS 64
+pending_t pending_list[MAX_REQUESTS];
+
+struct cb_param {
+ pending_t *pent;
+ int segment;
+ u64 sector;
+ u64 vblock; /* for debug printing -- can be removed. */
+};
+
+static void read_cb(struct io_ret r, void *in_param)
+{
+ struct cb_param *param = (struct cb_param *)in_param;
+ pending_t *p = param->pent;
+ int segment = param->segment;
+ blkif_request_t *req = p->req;
+ unsigned long size, offset, start;
+ char *dpage, *spage;
+
+ spage = IO_BLOCK(r);
+ if (spage == NULL) { p->error++; goto finish; }
+ dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
+
+ /* Calculate read size and offset within the read block. */
+
+ offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
+ size = ( blkif_last_sect (req->frame_and_sects[segment]) -
+ blkif_first_sect(req->frame_and_sects[segment]) + 1
+ ) << SECTOR_SHIFT;
+ start = blkif_first_sect(req->frame_and_sects[segment])
+ << SECTOR_SHIFT;
+
+ DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), "
+ "vblock %llx, "
+ "size %lx\n",
+ param->sector, blkif_first_sect(p->req->frame_and_sects[segment]),
+ blkif_last_sect (p->req->frame_and_sects[segment]),
+ param->vblock, size);
+
+ memcpy(dpage + start, spage + offset, size);
+ freeblock(spage);
+
+ /* Done the read. Now update the pending record. */
+ finish:
+ pthread_mutex_lock(&p->mutex);
+ p->count--;
+
+ if (p->count == 0) {
+ blkif_response_t *rsp;
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ if (p->error == 0) {
+ rsp->status = BLKIF_RSP_OKAY;
+ } else {
+ rsp->status = BLKIF_RSP_ERROR;
+ }
+ blktap_inject_response(rsp);
+ }
+
+ pthread_mutex_unlock(&p->mutex);
+
+ free(param); /* TODO: replace with cached alloc/dealloc */
+}
+
+int parallax_read(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ u64 vblock, gblock;
+ vdi_t *vdi;
+ u64 sector;
+ int i;
+ char *dpage, *spage;
+ pending_t *pent;
+
+ vdi = blkif_get_vdi(blkif, req->device);
+
+ if ( vdi == NULL )
+ goto err;
+
+ pent = &pending_list[ID_TO_IDX(req->id)];
+ pent->count = req->nr_segments;
+ pent->req = req;
+ pthread_mutex_init(&pent->mutex, NULL);
+
+ for (i = 0; i < req->nr_segments; i++) {
+ pthread_t tid;
+ int ret;
+ struct cb_param *p;
+
+ /* Round the requested segment to a block address. */
+ sector = req->sector_number + (8*i);
+ vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+ /* TODO: Replace this call to malloc with a cached allocation */
+ p = (struct cb_param *)malloc(sizeof(struct cb_param));
+ p->pent = pent;
+ p->sector = sector;
+ p->segment = i;
+ p->vblock = vblock; /* dbg */
+
+ /* Get that block from the store. */
+ vdi_read(vdi, vblock, read_cb, (void *)p);
+ }
+
+ return BLKTAP_STOLEN;
+
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_READ;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ return BLKTAP_RESPOND;
+}
+
+static void write_cb(struct io_ret r, void *in_param)
+{
+ struct cb_param *param = (struct cb_param *)in_param;
+ pending_t *p = param->pent;
+ blkif_request_t *req = p->req;
+
+ /* catch errors from the block code. */
+ if (IO_INT(r) < 0) p->error++;
+
+ pthread_mutex_lock(&p->mutex);
+ p->count--;
+
+ if (p->count == 0) {
+ blkif_response_t *rsp;
+
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ if (p->error == 0) {
+ rsp->status = BLKIF_RSP_OKAY;
+ } else {
+ rsp->status = BLKIF_RSP_ERROR;
+ }
+ blktap_inject_response(rsp);
+ }
+
+ pthread_mutex_unlock(&p->mutex);
+
+ free(param); /* TODO: replace with cached alloc/dealloc */
+}
+
+int parallax_write(blkif_request_t *req, blkif_t *blkif)
+{
+ blkif_response_t *rsp;
+ u64 sector;
+ int i, writable = 0;
+ u64 vblock, gblock;
+ char *spage;
+ unsigned long size, offset, start;
+ vdi_t *vdi;
+ pending_t *pent;
+
+ vdi = blkif_get_vdi(blkif, req->device);
+
+ if ( vdi == NULL )
+ goto err;
+
+ pent = &pending_list[ID_TO_IDX(req->id)];
+ pent->count = req->nr_segments;
+ pent->req = req;
+ pthread_mutex_init(&pent->mutex, NULL);
+
+ for (i = 0; i < req->nr_segments; i++) {
+ struct cb_param *p;
+
+ spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
+
+ /* Round the requested segment to a block address. */
+
+ sector = req->sector_number + (8*i);
+ vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
+
+ /* Calculate read size and offset within the read block. */
+
+ offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
+ size = ( blkif_last_sect (req->frame_and_sects[i]) -
+ blkif_first_sect(req->frame_and_sects[i]) + 1
+ ) << SECTOR_SHIFT;
+ start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
+
+ DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), "
+ "vblock %llx, gblock %llx, "
+ "size %lx\n",
+ sector, blkif_first_sect(req->frame_and_sects[i]),
+ blkif_last_sect (req->frame_and_sects[i]),
+ vblock, gblock, size);
+
+ /* XXX: For now we just freak out if they try to write a */
+ /* non block-sized, block-aligned page. */
+
+ if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
+ printf("]\n] STRANGE WRITE!\n]\n");
+ goto err;
+ }
+
+ /* TODO: Replace this call to malloc with a cached allocation */
+ p = (struct cb_param *)malloc(sizeof(struct cb_param));
+ p->pent = pent;
+ p->sector = sector;
+ p->segment = i;
+ p->vblock = vblock; /* dbg */
+
+ /* Issue the write to the store. */
+ vdi_write(vdi, vblock, spage, write_cb, (void *)p);
+ }
+
+ return BLKTAP_STOLEN;
+
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->id = req->id;
+ rsp->operation = BLKIF_OP_WRITE;
+ rsp->status = BLKIF_RSP_ERROR;
+
+ return BLKTAP_RESPOND;
+}
+
+int parallax_request(blkif_request_t *req)
+{
+ blkif_response_t *rsp;
+ domid_t dom = ID_TO_DOM(req->id);
+ blkif_t *blkif = blkif_find_by_handle(dom, 0);
+
+ if (blkif == NULL)
+ goto err;
+
+ if ( req->operation == BLKIF_OP_PROBE ) {
+
+ return parallax_probe(req, blkif);
+
+ } else if ( req->operation == BLKIF_OP_READ ) {
+
+ return parallax_read(req, blkif);
+
+ } else if ( req->operation == BLKIF_OP_WRITE ) {
+
+ return parallax_write(req, blkif);
+
+ } else {
+ printf("Unknown request message type!\n");
+ /* Unknown operation */
+ goto err;
+ }
+
+err:
+ rsp = (blkif_response_t *)req;
+ rsp->operation = req->operation;
+ rsp->id = req->id;
+ rsp->status = BLKIF_RSP_ERROR;
+ return BLKTAP_RESPOND;
+}
+
+void __init_parallax(void)
+{
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
+
+
+
+int main(int argc, char *argv[])
+{
+ DPRINTF("parallax: starting.\n");
+ __init_blockstore();
+ DPRINTF("parallax: initialized blockstore...\n");
+ init_block_async();
+ DPRINTF("parallax: initialized async blocks...\n");
+ __init_vdi();
+ DPRINTF("parallax: initialized vdi registry etc...\n");
+ __init_parallax();
+ DPRINTF("parallax: initialized local stuff..\n");
+
+ blktap_register_ctrl_hook("parallax_control", parallax_control);
+ blktap_register_request_hook("parallax_request", parallax_request);
+ DPRINTF("parallax: added ctrl + request hooks, starting listen...\n");
+ blktap_listen();
+
+ return 0;
+}
diff --git a/tools/blktap/radix.c b/tools/blktap/radix.c
new file mode 100644
index 0000000000..a9f148e336
--- /dev/null
+++ b/tools/blktap/radix.c
@@ -0,0 +1,631 @@
+/*
+ * Radix tree for mapping (up to) 63-bit virtual block IDs to
+ * 63-bit global block IDs
+ *
+ * Pointers within the tree set aside the least significant bit to indicate
+ * whther or not the target block is writable from this node.
+ *
+ * The block with ID 0 is assumed to be an empty block of all zeros
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <pthread.h>
+#include "blockstore.h"
+#include "radix.h"
+
+#define RADIX_TREE_MAP_SHIFT 9
+#define RADIX_TREE_MAP_MASK 0x1ff
+#define RADIX_TREE_MAP_ENTRIES 512
+
+/*
+#define DEBUG
+*/
+
+/* Experimental radix cache. */
+
+static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
+static int rcache_count = 0;
+#define RCACHE_MAX 1024
+
+typedef struct rcache_st {
+ radix_tree_node *node;
+ u64 id;
+ struct rcache_st *hash_next;
+ struct rcache_st *cache_next;
+ struct rcache_st *cache_prev;
+} rcache_t;
+
+static rcache_t *rcache_head = NULL;
+static rcache_t *rcache_tail = NULL;
+
+#define RCHASH_SIZE 512ULL
+rcache_t *rcache[RCHASH_SIZE];
+#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
+
+void __rcache_init(void)
+{
+ int i;
+
+ for (i=0; i<RCHASH_SIZE; i++)
+ rcache[i] = NULL;
+}
+
+
+void rcache_write(u64 id, radix_tree_node *node)
+{
+ rcache_t *r, *tmp, **curs;
+
+ pthread_mutex_lock(&rcache_mutex);
+
+ /* Is it already in the cache? */
+ r = rcache[RCACHE_HASH(id)];
+
+ for (;;) {
+ if (r == NULL)
+ break;
+ if (r->id == id)
+ {
+ memcpy(r->node, node, BLOCK_SIZE);
+
+ /* bring to front. */
+ if (r != rcache_head) {
+
+ if (r == rcache_tail) {
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ }
+
+ tmp = r->cache_next;
+ if (r->cache_next != NULL) r->cache_next->cache_prev
+ = r->cache_prev;
+ if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ }
+
+//printf("Update (%Ld)\n", r->id);
+ goto done;
+ }
+ r = r->hash_next;
+ }
+
+ if ( rcache_count == RCACHE_MAX )
+ {
+ /* Remove an entry */
+
+ r = rcache_tail;
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ freeblock(r->node);
+
+ curs = &rcache[RCACHE_HASH(r->id)];
+ while ((*curs) != r)
+ curs = &(*curs)->hash_next;
+ *curs = r->hash_next;
+//printf("Evict (%Ld)\n", r->id);
+
+ } else {
+
+ r = (rcache_t *)malloc(sizeof(rcache_t));
+ rcache_count++;
+ }
+
+ r->node = newblock();
+ memcpy(r->node, node, BLOCK_SIZE);
+ r->id = id;
+
+ r->hash_next = rcache[RCACHE_HASH(id)];
+ rcache[RCACHE_HASH(id)] = r;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ if (rcache_tail == NULL) rcache_tail = r;
+
+//printf("Added (%Ld, %p)\n", id, r->node);
+done:
+ pthread_mutex_unlock(&rcache_mutex);
+}
+
+radix_tree_node *rcache_read(u64 id)
+{
+ rcache_t *r, *tmp;
+ radix_tree_node *node = NULL;
+
+ pthread_mutex_lock(&rcache_mutex);
+
+ r = rcache[RCACHE_HASH(id)];
+
+ for (;;) {
+ if (r == NULL) {
+//printf("Miss (%Ld)\n", id);
+ goto done;
+ }
+ if (r->id == id) break;
+ r = r->hash_next;
+ }
+
+ /* bring to front. */
+ if (r != rcache_head)
+ {
+ if (r == rcache_tail) {
+ if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
+ rcache_tail->cache_next = NULL;
+ }
+ tmp = r->cache_next;
+ if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
+ if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
+
+ r->cache_prev = NULL;
+ r->cache_next = rcache_head;
+ if (rcache_head != NULL) rcache_head->cache_prev = r;
+ rcache_head = r;
+ }
+
+ node = newblock();
+ memcpy(node, r->node, BLOCK_SIZE);
+
+//printf("Hit (%Ld, %p)\n", id, r->node);
+done:
+ pthread_mutex_unlock(&rcache_mutex);
+
+ return(node);
+}
+
+
+void *rc_readblock(u64 id)
+{
+ void *ret;
+
+ ret = (void *)rcache_read(id);
+
+ if (ret != NULL) return ret;
+
+ ret = readblock(id);
+
+ if (ret != NULL)
+ rcache_write(id, ret);
+
+ return(ret);
+}
+
+u64 rc_allocblock(void *block)
+{
+ u64 ret;
+
+ ret = allocblock(block);
+
+ if (ret != ZERO)
+ rcache_write(ret, block);
+
+ return(ret);
+}
+
+int rc_writeblock(u64 id, void *block)
+{
+ int ret;
+
+ ret = writeblock(id, block);
+ rcache_write(id, block);
+
+ return(ret);
+}
+
+
+/*
+ * block device interface and other helper functions
+ * with these functions, block id is just a 63-bit number, with
+ * no special consideration for the LSB
+ */
+radix_tree_node cloneblock(radix_tree_node block);
+
+/*
+ * main api
+ * with these functions, the LSB of root always indicates
+ * whether or not the block is writable, including the return
+ * values of update and snapshot
+ */
+u64 lookup(int height, u64 root, u64 key);
+u64 update(int height, u64 root, u64 key, u64 val);
+u64 snapshot(u64 root);
+
+/**
+ * cloneblock: clone an existing block in memory
+ * @block: the old block
+ *
+ * @return: new block, with LSB cleared for every entry
+ */
+radix_tree_node cloneblock(radix_tree_node block) {
+ radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
+ int i;
+ if (node == NULL) {
+ perror("cloneblock malloc");
+ return NULL;
+ }
+ for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
+ node[i] = block[i] & ONEMASK;
+ return node;
+}
+
+/**
+ * lookup: find a value given a key
+ * @height: height in bits of the radix tree
+ * @root: root node id, with set LSB indicating writable node
+ * @key: key to lookup
+ *
+ * @return: value on success, zero on error
+ */
+
+u64 lookup(int height, u64 root, u64 key) {
+ radix_tree_node node;
+ u64 mask = ONE;
+
+ assert(key >> height == 0);
+
+ /* the root block may be smaller to ensure all leaves are full */
+ height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
+
+ /* now carve off equal sized chunks at each step */
+ for (;;) {
+ u64 oldroot;
+
+#ifdef DEBUG
+ printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
+ (int) ((key >> height) & RADIX_TREE_MAP_MASK),
+ (iswritable(root) ? "" : " (readonly)"));
+#endif
+
+ if (getid(root) == ZERO)
+ return ZERO;
+
+ oldroot = root;
+ node = (radix_tree_node) rc_readblock(getid(root));
+ if (node == NULL)
+ return ZERO;
+
+ root = node[(key >> height) & RADIX_TREE_MAP_MASK];
+ mask &= root;
+ freeblock(node);
+
+ if (height == 0)
+ return ( root & ONEMASK ) | mask;
+
+ height -= RADIX_TREE_MAP_SHIFT;
+ }
+
+ return ZERO;
+}
+
+/*
+ * update: set a radix tree entry, doing copy-on-write as necessary
+ * @height: height in bits of the radix tree
+ * @root: root node id, with set LSB indicating writable node
+ * @key: key to set
+ * @val: value to set, s.t. radix(key)=val
+ *
+ * @returns: (possibly new) root id on success (with LSB=1), 0 on failure
+ */
+
+u64 update(int height, u64 root, u64 key, u64 val) {
+ int offset;
+ u64 child;
+ radix_tree_node node;
+
+ /* base case--return val */
+ if (height == 0)
+ return val;
+
+ /* the root block may be smaller to ensure all leaves are full */
+ height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
+ offset = (key >> height) & RADIX_TREE_MAP_MASK;
+
+#ifdef DEBUG
+ printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
+ offset, (iswritable(root)?"":" (clone)"));
+#endif
+
+ /* load a block, or create a new one */
+ if (root == ZERO) {
+ node = (radix_tree_node) newblock();
+ } else {
+ node = (radix_tree_node) rc_readblock(getid(root));
+
+ if (!iswritable(root)) {
+ /* need to clone this node */
+ radix_tree_node oldnode = node;
+ node = cloneblock(node);
+ freeblock(oldnode);
+ root = ZERO;
+ }
+ }
+
+ if (node == NULL) {
+#ifdef DEBUG
+ printf("update: node is null!\n");
+#endif
+ return ZERO;
+ }
+
+ child = update(height, node[offset], key, val);
+
+ if (child == ZERO) {
+ freeblock(node);
+ return ZERO;
+ } else if (child == node[offset]) {
+ /* no change, so we already owned the child */
+ assert(iswritable(root));
+
+ freeblock(node);
+ return root;
+ }
+
+ node[offset] = child;
+
+ /* new/cloned blocks need to be saved */
+ if (root == ZERO) {
+ /* mark this as an owned block */
+ root = rc_allocblock(node);
+ if (root)
+ root = writable(root);
+ } else if (rc_writeblock(getid(root), node) < 0) {
+ freeblock(node);
+ return ZERO;
+ }
+
+ freeblock(node);
+ return root;
+}
+
+/**
+ * snapshot: create a snapshot
+ * @root: old root node
+ *
+ * @return: new root node, 0 on error
+ */
+u64 snapshot(u64 root) {
+ radix_tree_node node, newnode;
+
+ if ((node = rc_readblock(getid(root))) == NULL)
+ return ZERO;
+
+ newnode = cloneblock(node);
+ freeblock(node);
+ if (newnode == NULL)
+ return ZERO;
+
+ root = rc_allocblock(newnode);
+ freeblock(newnode);
+
+ if (root == ZERO)
+ return ZERO;
+ else
+ return writable(root);
+}
+
+/**
+ * collapse: collapse a parent onto a child.
+ *
+ * NOTE: This assumes that parent and child really are, and further that
+ * there are no other children forked from this parent. (children of the
+ * child are okay...)
+ */
+
+int collapse(int height, u64 proot, u64 croot)
+{
+ int i, numlinks, ret, total = 0;
+ radix_tree_node pnode, cnode;
+
+ if (height == 0) {
+ height = -1; /* terminate recursion */
+ } else {
+ height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
+ }
+ numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
+
+ /* Terminal cases: */
+
+ if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
+ return -1;
+
+ /* get roots */
+ if ((pnode = readblock(getid(proot))) == NULL)
+ return -1;
+
+ if ((cnode = readblock(getid(croot))) == NULL)
+ {
+ freeblock(pnode);
+ return -1;
+ }
+
+ /* For each writable link in proot */
+ for (i=0; i<numlinks; i++)
+ {
+ if ( pnode[i] == cnode[i] ) continue;
+
+ /* collapse (next level) */
+ /* if height != 0 and writable... */
+ if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
+ {
+ //printf(" %Ld is writable (i=%d).\n", getid(pnode[i]), i);
+ ret = collapse(height, pnode[i], cnode[i]);
+ if (ret == -1)
+ {
+ total = -1;
+ } else {
+ total += ret;
+ }
+ }
+
+
+ }
+
+ /* if plink is writable, AND clink is writable -> free plink block */
+ if ( ( iswritable(proot) ) && ( iswritable(croot) ) )
+ {
+ releaseblock(getid(proot));
+ if (ret >=0) total++;
+ //printf(" Delete %Ld\n", getid(proot));
+ }
+//printf("done : %Ld\n", getid(proot));
+ return total;
+
+}
+
+
+void print_root(u64 root, int height, FILE *dot_f)
+{
+ FILE *f;
+ int i;
+ radix_tree_node node;
+ char *style[2] = { "", "style=bold,color=blue," };
+
+ if (dot_f == NULL) {
+ f = fopen("radix.dot", "w");
+ if (f == NULL) {
+ perror("print_root: open");
+ return;
+ }
+
+ /* write graph preamble */
+ fprintf(f, "digraph G {\n");
+
+ /* add a node for this root. */
+ fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
+ getid(root), style[iswritable(root)], getid(root));
+ }
+
+ printf("print_root(%Ld)\n", getid(root));
+
+ /* base case */
+ if (height == 0) {
+ /* add a node and edge for each child root */
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return;
+
+ for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
+ if (node[i] != ZERO) {
+ fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
+ getid(node[i]), style[iswritable(node[i])],
+ getid(node[i]));
+ fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root),
+ getid(node[i]), i);
+ }
+ }
+ freeblock(node);
+ return;
+ }
+
+ /* the root block may be smaller to ensure all leaves are full */
+ height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
+
+ if (getid(root) == ZERO)
+ return;
+
+ node = (radix_tree_node) readblock(getid(root));
+ if (node == NULL)
+ return;
+
+ /* add a node and edge for each child root */
+ for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
+ if (node[i] != ZERO) {
+ fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n",
+ getid(node[i]), style[iswritable(node[i])],
+ getid(node[i]));
+
+ print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
+ fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root),
+ getid(node[i]), i);
+ }
+
+ freeblock(node);
+
+ /* write graph postamble */
+ if (dot_f == NULL) {
+ fprintf(f, "}\n");
+ fclose(f);
+ }
+}
+
+#ifdef RADIX_STANDALONE
+
+int main(int argc, char **argv) {
+ u64 key = ZERO, val = ZERO;
+ u64 root = writable(2ULL);
+ u64 p = ZERO, c = ZERO;
+ int v;
+ char buff[4096];
+
+ __init_blockstore();
+
+ memset(buff, 0, 4096);
+ /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
+
+ if (fp < 3) {
+ perror("open");
+ return -1;
+ }
+ if (lseek(fp, 0, SEEK_END) == 0) {
+ write(fp, buff, 4096);
+ }*/
+
+ allocblock(buff);
+
+ printf("Recognized commands:\n"
+ "Note: the LSB of a node number indicates if it is writable\n"
+ " root <node> set root to <node>\n"
+ " snapshot take a snapshot of the root\n"
+ " set <key> <val> set key=val\n"
+ " get <key> query key\n"
+ " c <proot> <croot> collapse\n"
+ " pr print tree to dot\n"
+ " pf <1=verbose> print freelist\n"
+ " quit\n"
+ "\nroot = %Ld\n", root);
+ for (;;) {
+ //print_root(root, 34, NULL);
+ //system("dot radix.dot -Tps -o radix.ps");
+
+ printf("> ");
+ fflush(stdout);
+ fgets(buff, 1024, stdin);
+ if (feof(stdin))
+ break;
+ if (sscanf(buff, " root %Ld", &root) == 1) {
+ printf("root set to %Ld\n", root);
+ } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
+ root = update(34, root, key, val);
+ printf("root = %Ld\n", root);
+ } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
+ v = collapse(34, p, c);
+ printf("reclaimed %d blocks.\n", v);
+ } else if (sscanf(buff, " get %Ld", &key) == 1) {
+ val = lookup(34, root, key);
+ printf("value = %Ld\n", val);
+ } else if (!strcmp(buff, "quit\n")) {
+ break;
+ } else if (!strcmp(buff, "snapshot\n")) {
+ root = snapshot(root);
+ printf("new root = %Ld\n", root);
+ } else if (sscanf(buff, " pr %Ld", &root) == 1) {
+ print_root(root, 34, NULL);
+ } else if (sscanf(buff, " pf %d", &v) == 1) {
+ freelist_count(v);
+ } else if (!strcmp(buff, "pf\n")) {
+ freelist_count(0);
+ } else {
+ printf("command not recognized\n");
+ }
+ }
+ return 0;
+}
+
+#endif
diff --git a/tools/blktap/radix.h b/tools/blktap/radix.h
new file mode 100644
index 0000000000..61ea2205f8
--- /dev/null
+++ b/tools/blktap/radix.h
@@ -0,0 +1,45 @@
+/*
+ * Radix tree for mapping (up to) 63-bit virtual block IDs to
+ * 63-bit global block IDs
+ *
+ * Pointers within the tree set aside the least significant bit to indicate
+ * whther or not the target block is writable from this node.
+ *
+ * The block with ID 0 is assumed to be an empty block of all zeros
+ */
+
+#ifndef __RADIX_H__
+#define __RADIX_H__
+
+/* I don't really like exposing these, but... */
+#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
+#define putid(x) ((x)<<1)
+#define writable(x) (((x)<<1)|1LL)
+#define iswritable(x) ((x)&1LL)
+#define ZERO 0LL
+#define ONE 1LL
+#define ONEMASK 0xffffffffffffffeLL
+
+#define RADIX_TREE_MAP_SHIFT 9
+#define RADIX_TREE_MAP_MASK 0x1ff
+#define RADIX_TREE_MAP_ENTRIES 512
+
+typedef u64 *radix_tree_node;
+
+
+/*
+ * main api
+ * with these functions, the LSB of root always indicates
+ * whether or not the block is writable, including the return
+ * values of update and snapshot
+ */
+u64 lookup(int height, u64 root, u64 key);
+u64 update(int height, u64 root, u64 key, u64 val);
+u64 snapshot(u64 root);
+int collapse(int height, u64 proot, u64 croot);
+int isprivate(int height, u64 root, u64 key);
+
+
+void __rcache_init(void);
+
+#endif /* __RADIX_H__ */
diff --git a/tools/blktap/requests-async.c b/tools/blktap/requests-async.c
new file mode 100755
index 0000000000..f1e0bc8425
--- /dev/null
+++ b/tools/blktap/requests-async.c
@@ -0,0 +1,762 @@
+/* requests-async.c
+ *
+ * asynchronous request dispatcher for radix access in parallax.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <pthread.h>
+#include <err.h>
+#include <zlib.h> /* for crc32() */
+#include "requests-async.h"
+#include "vdi.h"
+#include "radix.h"
+
+#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
+#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
+#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
+
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+struct block_info {
+ u32 crc;
+ u32 unused;
+};
+
+struct io_req {
+ enum { IO_OP_READ, IO_OP_WRITE } op;
+ u64 root;
+ u64 vaddr;
+ int state;
+ io_cb_t cb;
+ void *param;
+ struct radix_lock *lock;
+
+ /* internal stuff: */
+ struct io_ret retval;/* holds the return while we unlock. */
+ char *block; /* the block to write */
+ radix_tree_node radix[3];
+ u64 radix_addr[3];
+ struct block_info bi;
+};
+
+void clear_w_bits(radix_tree_node node)
+{
+ int i;
+ for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
+ node[i] = node[i] & ONEMASK;
+ return;
+}
+
+void clear_L3_w_bits(radix_tree_node node)
+{
+ int i;
+ for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
+ node[i] = node[i] & ONEMASK;
+ return;
+}
+
+enum states {
+ /* both */
+ READ_L1,
+ READ_L2,
+ READ_L3,
+
+ /* read */
+ READ_LOCKED,
+ READ_DATA,
+ READ_UNLOCKED,
+ RETURN_ZERO,
+
+ /* write */
+ WRITE_LOCKED,
+ WRITE_DATA,
+ WRITE_L3,
+ WRITE_UNLOCKED,
+
+ /* L3 Zero Path */
+ ALLOC_DATA_L3z,
+ WRITE_L3_L3z,
+
+ /* L3 Fault Path */
+ ALLOC_DATA_L3f,
+ WRITE_L3_L3f,
+
+ /* L2 Zero Path */
+ ALLOC_DATA_L2z,
+ WRITE_L2_L2z,
+ ALLOC_L3_L2z,
+ WRITE_L2_L3z,
+
+ /* L2 Fault Path */
+ READ_L3_L2f,
+ ALLOC_DATA_L2f,
+ WRITE_L2_L2f,
+ ALLOC_L3_L2f,
+ WRITE_L2_L3f,
+
+ /* L1 Zero Path */
+ ALLOC_DATA_L1z,
+ ALLOC_L3_L1z,
+ ALLOC_L2_L1z,
+ WRITE_L1_L1z,
+
+ /* L1 Fault Path */
+ READ_L2_L1f,
+ READ_L3_L1f,
+ ALLOC_DATA_L1f,
+ ALLOC_L3_L1f,
+ ALLOC_L2_L1f,
+ WRITE_L1_L1f,
+
+};
+
+enum radix_offsets {
+ L1 = 0,
+ L2 = 1,
+ L3 = 2
+};
+
+
+static void read_cb(struct io_ret ret, void *param);
+static void write_cb(struct io_ret ret, void *param);
+
+int vdi_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param)
+{
+ struct io_req *req;
+
+ if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
+ /* Every second line in the bottom-level radix tree is used to */
+ /* store crc32 values etc. We shift the vadder here to achied this. */
+ vaddr <<= 1;
+
+ req = (struct io_req *)malloc(sizeof (struct io_req));
+ if (req == NULL) return ERR_NOMEM;
+
+ req->radix[0] = req->radix[1] = req->radix[2] = NULL;
+ req->op = IO_OP_READ;
+ req->root = vdi->radix_root;
+ req->lock = vdi->radix_lock;
+ req->vaddr = vaddr;
+ req->cb = cb;
+ req->param = param;
+ req->state = READ_LOCKED;
+
+ block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
+
+ return 0;
+}
+
+
+int vdi_write(vdi_t *vdi, u64 vaddr, char *block,
+ io_cb_t cb, void *param)
+{
+ struct io_req *req;
+
+ if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
+ /* Every second line in the bottom-level radix tree is used to */
+ /* store crc32 values etc. We shift the vadder here to achied this. */
+ vaddr <<= 1;
+
+ req = (struct io_req *)malloc(sizeof (struct io_req));
+ if (req == NULL) return ERR_NOMEM;
+
+ req->radix[0] = req->radix[1] = req->radix[2] = NULL;
+ req->op = IO_OP_WRITE;
+ req->root = vdi->radix_root;
+ req->lock = vdi->radix_lock;
+ req->vaddr = vaddr;
+ req->block = block;
+ /* Todo: add a pseodoheader to the block to include some location */
+ /* information in the CRC as well. */
+ req->bi.crc = (u32) crc32(0L, Z_NULL, 0);
+ req->bi.crc = (u32) crc32(req->bi.crc, block, BLOCK_SIZE);
+ req->bi.unused = 0xdeadbeef;
+
+ req->cb = cb;
+ req->param = param;
+ req->radix_addr[L1] = getid(req->root); /* for consistency */
+ req->state = WRITE_LOCKED;
+
+ block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
+
+
+ return 0;
+}
+
+static void read_cb(struct io_ret ret, void *param)
+{
+ struct io_req *req = (struct io_req *)param;
+ radix_tree_node node;
+ u64 idx;
+ char *block;
+ void *req_param;
+
+ DPRINTF("read_cb\n");
+ /* get record */
+ switch(req->state) {
+
+ case READ_LOCKED:
+
+ DPRINTF("READ_LOCKED\n");
+ req->state = READ_L1;
+ block_read(getid(req->root), read_cb, req);
+ break;
+
+ case READ_L1: /* block is the radix root */
+
+ DPRINTF("READ_L1\n");
+ block = IO_BLOCK(ret);
+ if (block == NULL) goto fail;
+ node = (radix_tree_node) block;
+ idx = getid( node[L1_IDX(req->vaddr)] );
+ free(block);
+ if ( idx == ZERO ) {
+ req->state = RETURN_ZERO;
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
+ } else {
+ req->state = READ_L2;
+ block_read(idx, read_cb, req);
+ }
+ break;
+
+ case READ_L2:
+
+ DPRINTF("READ_L2\n");
+ block = IO_BLOCK(ret);
+ if (block == NULL) goto fail;
+ node = (radix_tree_node) block;
+ idx = getid( node[L2_IDX(req->vaddr)] );
+ free(block);
+ if ( idx == ZERO ) {
+ req->state = RETURN_ZERO;
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
+ } else {
+ req->state = READ_L3;
+ block_read(idx, read_cb, req);
+ }
+ break;
+
+ case READ_L3:
+ {
+ struct block_info *bi;
+
+ DPRINTF("READ_L3\n");
+ block = IO_BLOCK(ret);
+ if (block == NULL) goto fail;
+ node = (radix_tree_node) block;
+ idx = getid( node[L3_IDX(req->vaddr)] );
+ bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
+ req->bi = *bi;
+ free(block);
+ if ( idx == ZERO ) {
+ req->state = RETURN_ZERO;
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
+ } else {
+ req->state = READ_DATA;
+ block_read(idx, read_cb, req);
+ }
+ break;
+ }
+ case READ_DATA:
+ {
+ u32 crc;
+
+ DPRINTF("READ_DATA\n");
+ block = IO_BLOCK(ret);
+ if (block == NULL) goto fail;
+
+ /* crc check */
+ crc = (u32) crc32(0L, Z_NULL, 0);
+ crc = (u32) crc32(crc, block, BLOCK_SIZE);
+ if (crc != req->bi.crc) {
+ /* TODO: add a retry loop here. */
+ /* Do this after the cache is added -- make sure to */
+ /* invalidate the bad page before reissuing the read. */
+
+ warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
+#ifdef PRINT_BADCRC_PAGES
+ {
+ int j;
+ for (j=0; j<BLOCK_SIZE; j++) {
+ if isprint(block[j]) {
+ printf("%c", block[j]);
+ } else {
+ printf(".");
+ }
+ if ((j % 64) == 0) printf("\n");
+ }
+ }
+#endif /* PRINT_BADCRC_PAGES */
+
+ /* fast and loose for the moment. */
+ /* goto fail; */
+ }
+
+ req->retval = ret;
+ req->state = READ_UNLOCKED;
+ block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
+ break;
+ }
+ case READ_UNLOCKED:
+ {
+ struct io_ret r;
+ io_cb_t cb;
+ DPRINTF("READ_UNLOCKED\n");
+ req_param = req->param;
+ r = req->retval;
+ cb = req->cb;
+ free(req);
+ cb(r, req_param);
+ break;
+ }
+
+ case RETURN_ZERO:
+ {
+ struct io_ret r;
+ io_cb_t cb;
+ DPRINTF("RETURN_ZERO\n");
+ req_param = req->param;
+ cb = req->cb;
+ free(req);
+ r.type = IO_BLOCK_T;
+ r.u.b = newblock();
+ cb(r, req_param);
+ break;
+ }
+
+ default:
+ DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
+ goto fail;
+ }
+
+ return;
+
+ fail:
+ {
+ struct io_ret r;
+ io_cb_t cb;
+ DPRINTF("asyn_read had a read error.\n");
+ req_param = req->param;
+ r = ret;
+ cb = req->cb;
+ free(req);
+ cb(r, req_param);
+ }
+
+
+}
+
+static void write_cb(struct io_ret r, void *param)
+{
+ struct io_req *req = (struct io_req *)param;
+ radix_tree_node node;
+ u64 a, addr;
+ void *req_param;
+ struct block_info *bi;
+
+ switch(req->state) {
+
+ case WRITE_LOCKED:
+
+ DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
+ req->state = READ_L1;
+ block_read(getid(req->root), write_cb, req);
+ break;
+
+ case READ_L1: /* block is the radix root */
+
+ DPRINTF("READ_L1\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ if (node == NULL) goto fail;
+ a = node[L1_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix_addr[L2] = addr;
+ req->radix[L1] = node;
+
+ if ( addr == ZERO ) {
+ /* L1 empty subtree: */
+ req->state = ALLOC_DATA_L1z;
+ block_alloc( req->block, write_cb, req );
+ } else if ( !iswritable(a) ) {
+ /* L1 fault: */
+ req->state = READ_L2_L1f;
+ block_read( addr, write_cb, req );
+ } else {
+ req->state = READ_L2;
+ block_read( addr, write_cb, req );
+ }
+ break;
+
+ case READ_L2:
+
+ DPRINTF("READ_L2\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ if (node == NULL) goto fail;
+ a = node[L2_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix_addr[L3] = addr;
+ req->radix[L2] = node;
+
+ if ( addr == ZERO ) {
+ /* L2 empty subtree: */
+ req->state = ALLOC_DATA_L2z;
+ block_alloc( req->block, write_cb, req );
+ } else if ( !iswritable(a) ) {
+ /* L2 fault: */
+ req->state = READ_L3_L2f;
+ block_read( addr, write_cb, req );
+ } else {
+ req->state = READ_L3;
+ block_read( addr, write_cb, req );
+ }
+ break;
+
+ case READ_L3:
+
+ DPRINTF("READ_L3\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ if (node == NULL) goto fail;
+ a = node[L3_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix[L3] = node;
+
+ if ( addr == ZERO ) {
+ /* L3 fault: */
+ req->state = ALLOC_DATA_L3z;
+ block_alloc( req->block, write_cb, req );
+ } else if ( !iswritable(a) ) {
+ /* L3 fault: */
+ req->state = ALLOC_DATA_L3f;
+ block_alloc( req->block, write_cb, req );
+ } else {
+ req->state = WRITE_DATA;
+ block_write( addr, req->block, write_cb, req );
+ }
+ break;
+
+ case WRITE_DATA:
+
+ DPRINTF("WRITE_DATA\n");
+ /* The L3 radix points to the correct block, we just need to */
+ /* update the crc. */
+ if (IO_INT(r) < 0) goto fail;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 101;
+ *bi = req->bi;
+ req->state = WRITE_L3;
+ block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
+ break;
+
+ /* L3 Zero Path: */
+
+ case ALLOC_DATA_L3z:
+
+ DPRINTF("ALLOC_DATA_L3z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 102;
+ *bi = req->bi;
+ req->state = WRITE_L3_L3z;
+ block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
+ break;
+
+ /* L3 Fault Path: */
+
+ case ALLOC_DATA_L3f:
+
+ DPRINTF("ALLOC_DATA_L3f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 103;
+ *bi = req->bi;
+ req->state = WRITE_L3_L3f;
+ block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
+ break;
+
+ /* L2 Zero Path: */
+
+ case ALLOC_DATA_L2z:
+
+ DPRINTF("ALLOC_DATA_L2z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3] = newblock();
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 104;
+ *bi = req->bi;
+ req->state = ALLOC_L3_L2z;
+ block_alloc( (char*)req->radix[L3], write_cb, req );
+ break;
+
+ case ALLOC_L3_L2z:
+
+ DPRINTF("ALLOC_L3_L2z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L2][L2_IDX(req->vaddr)] = a;
+ req->state = WRITE_L2_L2z;
+ block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
+ break;
+
+ /* L2 Fault Path: */
+
+ case READ_L3_L2f:
+
+ DPRINTF("READ_L3_L2f\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ clear_L3_w_bits(node);
+ if (node == NULL) goto fail;
+ a = node[L2_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix[L3] = node;
+ req->state = ALLOC_DATA_L2f;
+ block_alloc( req->block, write_cb, req );
+ break;
+
+ case ALLOC_DATA_L2f:
+
+ DPRINTF("ALLOC_DATA_L2f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 105;
+ *bi = req->bi;
+ req->state = ALLOC_L3_L2f;
+ block_alloc( (char*)req->radix[L3], write_cb, req );
+ break;
+
+ case ALLOC_L3_L2f:
+
+ DPRINTF("ALLOC_L3_L2f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L2][L2_IDX(req->vaddr)] = a;
+ req->state = WRITE_L2_L2f;
+ block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
+ break;
+
+ /* L1 Zero Path: */
+
+ case ALLOC_DATA_L1z:
+
+ DPRINTF("ALLOC_DATA_L1z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3] = newblock();
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 106;
+ *bi = req->bi;
+ req->state = ALLOC_L3_L1z;
+ block_alloc( (char*)req->radix[L3], write_cb, req );
+ break;
+
+ case ALLOC_L3_L1z:
+
+ DPRINTF("ALLOC_L3_L1z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L2] = newblock();
+ req->radix[L2][L2_IDX(req->vaddr)] = a;
+ req->state = ALLOC_L2_L1z;
+ block_alloc( (char*)req->radix[L2], write_cb, req );
+ break;
+
+ case ALLOC_L2_L1z:
+
+ DPRINTF("ALLOC_L2_L1z\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L1][L1_IDX(req->vaddr)] = a;
+ req->state = WRITE_L1_L1z;
+ block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
+ break;
+
+ /* L1 Fault Path: */
+
+ case READ_L2_L1f:
+
+ DPRINTF("READ_L2_L1f\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ clear_w_bits(node);
+ if (node == NULL) goto fail;
+ a = node[L2_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix_addr[L3] = addr;
+ req->radix[L2] = node;
+
+ if (addr == ZERO) {
+ /* nothing below L2, create an empty L3 and alloc data. */
+ /* (So skip READ_L3_L1f.) */
+ req->radix[L3] = newblock();
+ req->state = ALLOC_DATA_L1f;
+ block_alloc( req->block, write_cb, req );
+ } else {
+ req->state = READ_L3_L1f;
+ block_read( addr, write_cb, req );
+ }
+ break;
+
+ case READ_L3_L1f:
+
+ DPRINTF("READ_L3_L1f\n");
+ node = (radix_tree_node) IO_BLOCK(r);
+ clear_L3_w_bits(node);
+ if (node == NULL) goto fail;
+ a = node[L2_IDX(req->vaddr)];
+ addr = getid(a);
+
+ req->radix[L3] = node;
+ req->state = ALLOC_DATA_L1f;
+ block_alloc( req->block, write_cb, req );
+ break;
+
+ case ALLOC_DATA_L1f:
+
+ DPRINTF("ALLOC_DATA_L1f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L3][L3_IDX(req->vaddr)] = a;
+ bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
+ req->bi.unused = 107;
+ *bi = req->bi;
+ req->state = ALLOC_L3_L1f;
+ block_alloc( (char*)req->radix[L3], write_cb, req );
+ break;
+
+ case ALLOC_L3_L1f:
+
+ DPRINTF("ALLOC_L3_L1f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L2][L2_IDX(req->vaddr)] = a;
+ req->state = ALLOC_L2_L1f;
+ block_alloc( (char*)req->radix[L2], write_cb, req );
+ break;
+
+ case ALLOC_L2_L1f:
+
+ DPRINTF("ALLOC_L2_L1f\n");
+ addr = IO_ADDR(r);
+ a = writable(addr);
+ req->radix[L1][L1_IDX(req->vaddr)] = a;
+ req->state = WRITE_L1_L1f;
+ block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
+ break;
+
+ case WRITE_L3:
+ case WRITE_L3_L3z:
+ case WRITE_L3_L3f:
+ case WRITE_L2_L2z:
+ case WRITE_L2_L2f:
+ case WRITE_L1_L1z:
+ case WRITE_L1_L1f:
+ {
+ int i;
+ DPRINTF("DONE\n");
+ /* free any saved node vals. */
+ for (i=0; i<3; i++)
+ if (req->radix[i] != 0) free(req->radix[i]);
+ req->retval = r;
+ req->state = WRITE_UNLOCKED;
+ block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
+ break;
+ }
+ case WRITE_UNLOCKED:
+ {
+ struct io_ret r;
+ io_cb_t cb;
+ DPRINTF("WRITE_UNLOCKED!\n");
+ req_param = req->param;
+ r = req->retval;
+ cb = req->cb;
+ free(req);
+ cb(r, req_param);
+ break;
+ }
+
+ default:
+ DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
+ goto fail;
+ }
+
+ return;
+
+ fail:
+ {
+ struct io_ret r;
+ io_cb_t cb;
+ int i;
+
+ DPRINTF("asyn_write had a read error mid-way.\n");
+ req_param = req->param;
+ cb = req->cb;
+ r.type = IO_INT_T;
+ r.u.i = -1;
+ /* free any saved node vals. */
+ for (i=0; i<3; i++)
+ if (req->radix[i] != 0) free(req->radix[i]);
+ free(req);
+ cb(r, req_param);
+ }
+}
+
+char *vdi_read_s(vdi_t *vdi, u64 vaddr)
+{
+ pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+ char *block = NULL;
+ int ret;
+
+ void reads_cb(struct io_ret r, void *param)
+ {
+ block = IO_BLOCK(r);
+ pthread_mutex_unlock((pthread_mutex_t *)param);
+ }
+
+ pthread_mutex_lock(&m);
+ ret = vdi_read(vdi, vaddr, reads_cb, &m);
+
+ if (ret == 0) pthread_mutex_lock(&m);
+
+ return block;
+}
+
+
+int vdi_write_s(vdi_t *vdi, u64 vaddr, char *block)
+{
+ pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+ int ret, result;
+
+ void writes_cb(struct io_ret r, void *param)
+ {
+ result = IO_INT(r);
+ pthread_mutex_unlock((pthread_mutex_t *)param);
+ }
+
+ pthread_mutex_lock(&m);
+ ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
+
+ if (ret == 0) pthread_mutex_lock(&m);
+
+ return result;
+}
diff --git a/tools/blktap/requests-async.h b/tools/blktap/requests-async.h
new file mode 100755
index 0000000000..451f211bd5
--- /dev/null
+++ b/tools/blktap/requests-async.h
@@ -0,0 +1,29 @@
+#ifndef _REQUESTSASYNC_H_
+#define _REQUESTSASYNC_H_
+
+#include "block-async.h"
+#include "blockstore.h" /* for newblock etc. */
+
+/*
+#define BLOCK_SIZE 4096
+#define ZERO 0ULL
+#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
+#define iswritable(x) (((x) & 1LLU) != 0)
+#define writable(x) (((x) << 1) | 1LLU)
+#define readonly(x) ((u64)((x) << 1))
+*/
+
+#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
+#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
+
+int vdi_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param);
+int vdi_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param);
+
+/* synchronous versions: */
+char *vdi_read_s (vdi_t *vdi, u64 vaddr);
+int vdi_write_s(vdi_t *vdi, u64 vaddr, char *block);
+
+#define ERR_BAD_VADDR -1
+#define ERR_NOMEM -2
+
+#endif //_REQUESTSASYNC_H_
diff --git a/tools/blktap/snaplog.c b/tools/blktap/snaplog.c
new file mode 100644
index 0000000000..072497fe72
--- /dev/null
+++ b/tools/blktap/snaplog.c
@@ -0,0 +1,238 @@
+/**************************************************************************
+ *
+ * snaplog.c
+ *
+ * Snapshot log on-disk data structure.
+ *
+ */
+
+ /* VDI histories are made from chains of snapshot logs. These logs record
+ * the (radix) root and timestamp of individual snapshots.
+ *
+ * creation of a new VDI involves 'forking' a snapshot log, by creating a
+ * new, empty log (in a new VDI) and parenting it off of a record in an
+ * existing snapshot log.
+ *
+ * snapshot log blocks have at most one writer.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "snaplog.h"
+
+
+
+snap_block_t *snap_get_block(u64 block)
+{
+ snap_block_t *blk = (snap_block_t *)readblock(block);
+
+ if ( blk == NULL)
+ return NULL;
+ if ( blk->hdr.magic != SNAP_MAGIC ) {
+ freeblock(blk);
+ return NULL;
+ }
+
+ return blk;
+}
+
+int snap_get_id(snap_id_t *id, snap_rec_t *target)
+{
+ snap_block_t *blk;
+
+ if ( id == NULL )
+ return -1;
+
+ blk = snap_get_block(id->block);
+
+ if ( blk == NULL )
+ return -1;
+
+ if ( id->index > blk->hdr.nr_entries ) {
+ freeblock(blk);
+ return -1;
+ }
+
+ *target = blk->snaps[id->index];
+ freeblock(blk);
+ return 0;
+}
+
+int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
+ snap_id_t *new_id)
+{
+ snap_rec_t parent_rec, fork_rec;
+ snap_block_t *blk, *pblk;
+ /*
+ if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
+ return -1;
+
+ if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
+ return -1;
+*/
+ blk = (snap_block_t *)newblock();
+ blk->hdr.magic = SNAP_MAGIC;
+ blk->hdr.nr_entries = 0;
+ blk->hdr.log_entries = 0;
+ blk->hdr.immutable = 0;
+
+ if ( (parent_id != NULL)
+ && (parent_id->block != fork_id->block)
+ && (parent_id->block != 0)) {
+
+ pblk = snap_get_block(parent_id->block);
+ blk->hdr.log_entries = pblk->hdr.log_entries;
+ freeblock(pblk);
+ }
+
+ if (parent_id != NULL) {
+ blk->hdr.parent_block = *parent_id;
+ blk->hdr.fork_block = *fork_id;
+ } else {
+ blk->hdr.parent_block = null_snap_id;
+ blk->hdr.fork_block = null_snap_id;
+ }
+
+ new_id->index = 0;
+ new_id->block = allocblock(blk);
+ freeblock(blk);
+ if (new_id->block == 0)
+ return -1;
+
+ return 0;
+}
+
+int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
+{
+ return __snap_block_create(parent_id, parent_id, new_id);
+}
+
+int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
+{
+ snap_id_t id = *old_id;
+ snap_block_t *blk = snap_get_block(id.block);
+
+ if ( rec->deleted == 1 ) {
+ printf("Attempt to append a deleted snapshot!\n");
+ return -1;
+ }
+
+ if ( blk->hdr.immutable != 0 ) {
+ printf("Attempt to snap an immutable snap block!\n");
+ return -1;
+ }
+
+ new_id->block = id.block;
+
+ if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
+ int ret;
+
+ id.index--; /* make id point to the last full record */
+
+ ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
+ if ( ret != 0 ) {
+ freeblock(blk);
+ return -1;
+ }
+
+ blk->hdr.immutable = 1;
+ writeblock(id.block, blk);
+ freeblock(blk);
+ blk = snap_get_block(new_id->block);
+ id = *new_id;
+ }
+
+ blk->snaps[blk->hdr.nr_entries] = *rec;
+ blk->hdr.nr_entries++;
+ blk->hdr.log_entries++;
+ new_id->index = blk->hdr.nr_entries;
+ //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
+ writeblock(id.block, blk);
+ freeblock(blk);
+ return 0;
+}
+
+int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
+{
+ snap_block_t *p_blk, *c_blk, *blk;
+ snap_rec_t *p_rec, *c_rec;
+ int ret = -1;
+
+ p_blk = snap_get_block(p_id->block);
+
+ if (p_blk == NULL) return(-1);
+
+ if (c_id->block == p_id->block)
+ {
+ c_blk = p_blk;
+ } else {
+ c_blk = snap_get_block(c_id->block);
+ }
+
+ if (p_blk == NULL) {
+ freeblock(p_blk);
+ return(-1);
+ }
+
+ /* parent and child must not be deleted. */
+ p_rec = &p_blk->snaps[p_id->index];
+ c_rec = &c_blk->snaps[c_id->index];
+ /*
+ if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
+ printf("One of those snaps is already deleted.\n");
+ goto done;
+ }
+ */
+ /* first non-deleted thing in the log before child must be parent. */
+
+ /* XXX todo: text the range here for delete (and eventually fork) bits) */
+ /* for now, snaps must be consecutive, on the same log page: */
+
+ if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
+ {
+ printf("Deleting non-consecutive snaps is not done yet.\n");
+ goto done;
+ }
+
+ /* mark parent as deleted XXX: may need to lock parent block here.*/
+ p_rec->deleted = 1;
+ writeblock(p_id->block, p_blk);
+
+ /* delete the parent */
+ printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
+ ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
+
+ /* return the number of blocks reclaimed. */
+
+done:
+ if (c_blk != p_blk) freeblock(c_blk);
+ freeblock(p_blk);
+
+ return(ret);
+}
+
+void snap_print_history(snap_id_t *snap_id)
+{
+ snap_id_t id = *snap_id;
+ unsigned int idx = id.index;
+ snap_block_t *new_blk, *blk = snap_get_block(id.block);
+
+ while ( blk ) {
+ printf("[Snap block %Ld]:\n", id.block);
+ do {
+ printf(" %03u: root: %Ld ts: %ld.%ld\n", idx,
+ blk->snaps[idx].radix_root,
+ blk->snaps[idx].timestamp.tv_sec,
+ blk->snaps[idx].timestamp.tv_usec);
+ } while (idx-- != 0);
+
+ id = blk->hdr.parent_block;
+ if (id.block != 0) {
+ new_blk = snap_get_block(id.block);
+ }
+ freeblock(blk);
+ blk = new_blk;
+ }
+}
diff --git a/tools/blktap/snaplog.h b/tools/blktap/snaplog.h
new file mode 100644
index 0000000000..02748bdedb
--- /dev/null
+++ b/tools/blktap/snaplog.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * snaplog.h
+ *
+ * Snapshot log on-disk data structure.
+ *
+ */
+
+#include "radix.h"
+#include "blockstore.h" /* for BLOCK_SIZE */
+
+#ifndef __SNAPLOG_H__
+#define __SNAPLOG_H__
+
+typedef struct snap_id {
+ u64 block;
+ unsigned int index;
+} snap_id_t;
+
+typedef struct snap_rec {
+ u64 radix_root;
+ struct timeval timestamp;
+ /* flags: */
+ unsigned deleted:1;
+} snap_rec_t;
+
+
+int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
+int snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
+int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
+void snap_print_history(snap_id_t *snap_id);
+int snap_get_id(snap_id_t *id, snap_rec_t *target);
+
+
+/* exported for vdi debugging */
+#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
+
+static const snap_id_t null_snap_id = { 0, 0 };
+
+typedef struct snap_block_hdr {
+ u64 magic;
+ snap_id_t parent_block; /* parent block within this chain */
+ snap_id_t fork_block; /* where this log was forked */
+ unsigned log_entries; /* total entries since forking */
+ unsigned short nr_entries; /* entries in snaps[] */
+ unsigned short immutable; /* has this snap page become immutable? */
+} snap_block_hdr_t;
+
+
+#define SNAPS_PER_BLOCK \
+ ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
+
+typedef struct snap_block {
+ snap_block_hdr_t hdr;
+ snap_rec_t snaps[SNAPS_PER_BLOCK];
+} snap_block_t;
+
+
+snap_block_t *snap_get_block(u64 block);
+
+#endif /* __SNAPLOG_H__ */
diff --git a/tools/blktap/vdi.c b/tools/blktap/vdi.c
new file mode 100644
index 0000000000..f3181b86b2
--- /dev/null
+++ b/tools/blktap/vdi.c
@@ -0,0 +1,367 @@
+/**************************************************************************
+ *
+ * vdi.c
+ *
+ * Virtual Disk Image (VDI) Interfaces
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "blockstore.h"
+#include "block-async.h"
+#include "requests-async.h"
+#include "radix.h"
+#include "vdi.h"
+
+#define VDI_REG_BLOCK 2LL
+#define VDI_RADIX_ROOT writable(3)
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* I haven't decided about this registry stuff, so this is just a really
+ * quick lash-up so that there is some way to track VDIs.
+ *
+ * (Most vdi access should be with a direct handle to the block, so this
+ * registry is just for start-of-day lookup and other control operations.)
+ */
+
+vdi_registry_t *create_vdi_registry(void)
+{
+ vdi_registry_t *reg = (vdi_registry_t *)newblock();
+
+ if (reg == NULL)
+ return NULL;
+
+ /* zero-fill the vdi radix root while we have an empty block. */
+ writeblock(VDI_RADIX_ROOT, (void *)reg);
+
+
+ DPRINTF("[vdi.c] Creating VDI registry!\n");
+ reg->magic = VDI_REG_MAGIC;
+ reg->nr_vdis = 0;
+
+ writeblock(VDI_REG_BLOCK, (void *)reg);
+
+ return reg;
+}
+
+vdi_registry_t *get_vdi_registry(void)
+{
+ vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
+
+ if ( vdi_reg == NULL )
+ vdi_reg = create_vdi_registry();
+
+ if ( vdi_reg->magic != VDI_REG_MAGIC ) {
+ freeblock(vdi_reg);
+ return NULL;
+ }
+
+ return vdi_reg;
+}
+
+
+vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
+{
+ int ret;
+ vdi_t *vdi;
+ vdi_registry_t *vdi_reg;
+ snap_rec_t snap_rec;
+
+ /* create a vdi struct */
+ vdi = newblock();
+ if (vdi == NULL)
+ return NULL;
+
+ if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
+ vdi->radix_root = snapshot(snap_rec.radix_root);
+ } else {
+ vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
+ vdi->radix_root = writable(vdi->radix_root); /* grr. */
+ }
+
+ /* create a snapshot log, and add it to the vdi struct */
+
+ ret = snap_block_create(parent_snap, &vdi->snap);
+ if ( ret != 0 ) {
+ DPRINTF("Error getting snap block in vdi_create.\n");
+ freeblock(vdi);
+ return NULL;
+ }
+
+ /* append the vdi to the registry, fill block and id. */
+ /* implicit allocation means we have to write the vdi twice here. */
+ vdi_reg = get_vdi_registry();
+ if ( vdi_reg == NULL ) {
+ freeblock(vdi);
+ return NULL;
+ }
+
+ vdi->block = allocblock((void *)vdi);
+ vdi->id = vdi_reg->nr_vdis++;
+ strncpy(vdi->name, name, VDI_NAME_SZ);
+ vdi->name[VDI_NAME_SZ] = '\0';
+ vdi->radix_lock = NULL; /* for tidiness */
+ writeblock(vdi->block, (void *)vdi);
+
+ update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
+ writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
+ freeblock(vdi_reg);
+
+ vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+ if (vdi->radix_lock == NULL)
+ {
+ perror("couldn't malloc radix_lock for new vdi!");
+ freeblock(vdi);
+ return NULL;
+ }
+ radix_lock_init(vdi->radix_lock);
+
+ return vdi;
+}
+
+/* vdi_get and vdi_put currently act more like alloc/free -- they don't
+ * do refcount-based allocation.
+ */
+vdi_t *vdi_get(u64 vdi_id)
+{
+ u64 vdi_blk;
+ vdi_t *vdi;
+
+ vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
+
+ if ( vdi_blk == 0 )
+ return NULL;
+
+ vdi = (vdi_t *)readblock(vdi_blk);
+
+ vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
+ if (vdi->radix_lock == NULL)
+ {
+ perror("couldn't malloc radix_lock for new vdi!");
+ freeblock(vdi);
+ return NULL;
+ }
+ radix_lock_init(vdi->radix_lock);
+
+ return vdi;
+}
+
+void vdi_put(vdi_t *vdi)
+{
+ free(vdi->radix_lock);
+ freeblock(vdi);
+}
+
+void vdi_snapshot(vdi_t *vdi)
+{
+ snap_rec_t rec;
+ int ret;
+
+ rec.radix_root = vdi->radix_root;
+ gettimeofday(&rec.timestamp, NULL);
+ rec.deleted = 0;
+
+ vdi->radix_root = snapshot(vdi->radix_root);
+ ret = snap_append(&vdi->snap, &rec, &vdi->snap);
+ if ( ret != 0 ) {
+ printf("snap_append returned failure\n");
+ return;
+ }
+ writeblock(vdi->block, vdi);
+}
+
+int __init_vdi()
+{
+ /* sneak this in here for the moment. */
+ __rcache_init();
+
+ /* force the registry to be created if it doesn't exist. */
+ vdi_registry_t *vdi_reg = get_vdi_registry();
+ if (vdi_reg == NULL) {
+ printf("[vdi.c] Couldn't get/create a VDI registry!\n");
+ return -1;
+ }
+ freeblock(vdi_reg);
+
+
+ return 0;
+}
+
+#ifdef VDI_STANDALONE
+
+#define TEST_VDIS 50
+#define NR_ITERS 50000
+#define FORK_POINTS 200
+#define INIT_VDIS 3
+#define INIT_SNAPS 40
+
+/* These must be of decreasing size: */
+#define NEW_FORK (RAND_MAX-(RAND_MAX/1000))
+#define NEW_ROOT_VDI (RAND_MAX-((RAND_MAX/1000)*2))
+#define NEW_FORK_VDI (RAND_MAX-((RAND_MAX/1000)*3))
+
+#define GRAPH_DOT_FILE "vdi.dot"
+#define GRAPH_PS_FILE "vdi.ps"
+
+
+typedef struct sh_st {
+ snap_id_t id;
+ struct sh_st *next;
+} sh_t;
+
+#define SNAP_HASHSZ 1024
+sh_t *node_hash[SNAP_HASHSZ];
+#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
+
+#define SNAPID_EQUAL(_a,_b) \
+ (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
+int sh_check_and_add(snap_id_t *id)
+{
+ sh_t **s = &node_hash[SNAP_HASH(id)];
+
+ while (*s != NULL) {
+ if (SNAPID_EQUAL(&((*s)->id), id))
+ return 1;
+ *s = (*s)->next;
+ }
+
+ *s = (sh_t *)malloc(sizeof(sh_t));
+ (*s)->id = *id;
+ (*s)->next = NULL;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi_list[TEST_VDIS];
+ snap_id_t id, fork_points[FORK_POINTS];
+ int nr_vdis = 0, nr_forks = 0;
+ int i, j, r;
+ FILE *f;
+ char name[VDI_NAME_SZ];
+
+ __init_blockstore();
+ __init_vdi();
+
+ printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
+
+ for (i=0; i<INIT_VDIS; i++) {
+ r=rand();
+
+ sprintf(name, "VDI Number %d", nr_vdis);
+ vdi_list[i] = vdi_create(NULL, name);
+ for (j=0; j<(r%INIT_SNAPS); j++)
+ vdi_snapshot(vdi_list[i]);
+ fork_points[i] = vdi_list[i]->snap;
+ nr_vdis++;
+ nr_forks++;
+ }
+
+ printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
+
+ for (i=0; i<NR_ITERS; i++) {
+ r = rand();
+
+ if ( r > NEW_FORK ) {
+ if ( nr_forks > FORK_POINTS )
+ continue;
+ id = vdi_list[r%nr_vdis]->snap;
+ if ( ( id.block == 0 ) || ( id.index == 0 ) )
+ continue;
+ id.index--;
+ fork_points[nr_forks++] = id;
+
+ } else if ( r > NEW_ROOT_VDI ) {
+
+ if ( nr_vdis == TEST_VDIS )
+ continue;
+
+ sprintf(name, "VDI Number %d.", nr_vdis);
+ vdi_list[nr_vdis++] = vdi_create(NULL, name);
+
+ } else if ( r > NEW_FORK_VDI ) {
+
+ if ( nr_vdis == TEST_VDIS )
+ continue;
+
+ sprintf(name, "VDI Number %d.", nr_vdis);
+ vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
+
+ } else /* SNAPSHOT */ {
+
+ vdi_snapshot(vdi_list[r%nr_vdis]);
+
+ }
+ }
+
+ /* now dump it out to a dot file. */
+ printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
+
+ f = fopen(GRAPH_DOT_FILE, "w");
+
+ /* write graph preamble */
+ fprintf(f, "digraph G {\n");
+ fprintf(f, " rankdir=LR\n");
+
+ for (i=0; i<nr_vdis; i++) {
+ char oldnode[255];
+ snap_block_t *blk;
+ snap_id_t id = vdi_list[i]->snap;
+ int nr_snaps, done=0;
+
+ /* add a node for the id */
+printf("vdi: %d\n", i);
+ fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n",
+ id.block, id.index, vdi_list[i]->name,
+ id.block, id.index);
+ sprintf(oldnode, "n%Ld%d", id.block, id.index);
+
+ while (id.block != 0) {
+ blk = snap_get_block(id.block);
+ nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
+ id = blk->hdr.fork_block;
+
+ done = sh_check_and_add(&id);
+
+ /* add a node for the fork_id */
+ if (!done) {
+ fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n",
+ id.block, id.index,
+ id.block, id.index);
+ }
+
+ /* add an edge between them */
+ fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n",
+ id.block, id.index, oldnode, nr_snaps);
+ sprintf(oldnode, "n%Ld%d", id.block, id.index);
+ freeblock(blk);
+
+ if (done) break;
+ }
+ }
+
+ /* write graph postamble */
+ fprintf(f, "}\n");
+ fclose(f);
+
+ printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
+ {
+ char cmd[255];
+ sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
+ system(cmd);
+ }
+ return 0;
+}
+
+#endif
diff --git a/tools/blktap/vdi.h b/tools/blktap/vdi.h
new file mode 100644
index 0000000000..462fa03299
--- /dev/null
+++ b/tools/blktap/vdi.h
@@ -0,0 +1,55 @@
+#ifndef _VDI_H_
+#define _VDI_H_
+/**************************************************************************
+ *
+ * vdi.h
+ *
+ * Virtual Disk Image (VDI) Interfaces
+ *
+ */
+
+#ifndef __VDI_H__
+#define __VDI_H__
+
+#include "blktaplib.h"
+#include "snaplog.h"
+
+#define VDI_HEIGHT 27 /* Note that these are now hard-coded */
+#define VDI_REG_HEIGHT 27 /* in the async lookup code */
+
+#define VDI_NAME_SZ 256
+
+
+typedef struct vdi {
+ u64 id; /* unique vdi id -- used by the registry */
+ u64 block; /* block where this vdi lives (also unique)*/
+ u64 radix_root; /* radix root node for block mappings */
+ snap_id_t snap; /* next snapshot slot for this VDI */
+ struct vdi *next; /* used to hash-chain in blkif. */
+ blkif_vdev_t vdevice; /* currently mounted as... */
+ struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs */
+ char name[VDI_NAME_SZ];/* human readable vdi name */
+} vdi_t;
+
+#define VDI_REG_MAGIC 0xff00ff0bb0ff00ffLL
+
+typedef struct vdi_registry {
+ u64 magic;
+ u64 nr_vdis;
+} vdi_registry_t;
+
+
+int __init_vdi(void);
+
+vdi_t *vdi_get(u64 vdi_id);
+void vdi_put(vdi_t *vdi);
+vdi_registry_t *get_vdi_registry(void);
+vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
+u64 vdi_lookup_block(vdi_t *vdi, u64 vdi_block, int *writable);
+void vdi_update_block(vdi_t *vdi, u64 vdi_block, u64 g_block);
+void vdi_snapshot(vdi_t *vdi);
+
+
+#endif /* __VDI_H__ */
+
+#endif //_VDI_H_
diff --git a/tools/blktap/vdi_create.c b/tools/blktap/vdi_create.c
new file mode 100644
index 0000000000..1d40b86c24
--- /dev/null
+++ b/tools/blktap/vdi_create.c
@@ -0,0 +1,52 @@
+/**************************************************************************
+ *
+ * vdi_create.c
+ *
+ * Create a new vdi.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ char name[VDI_NAME_SZ] = "";
+ snap_id_t id;
+ int from_snap = 0;
+
+ __init_blockstore();
+ __init_vdi();
+
+ if ( argc == 1 ) {
+ printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
+ exit(-1);
+ }
+
+ strncpy( name, argv[1], VDI_NAME_SZ);
+ name[VDI_NAME_SZ] = '\0';
+
+ if ( argc > 3 ) {
+ id.block = (u64) atoll(argv[2]);
+ id.index = (unsigned int) atol (argv[3]);
+ from_snap = 1;
+ }
+
+ vdi = vdi_create( from_snap ? &id : NULL, name);
+
+ if ( vdi == NULL ) {
+ printf("Failed to create VDI!\n");
+ freeblock(vdi);
+ exit(-1);
+ }
+
+ freeblock(vdi);
+
+ return (0);
+}
diff --git a/tools/blktap/vdi_fill.c b/tools/blktap/vdi_fill.c
new file mode 100644
index 0000000000..61025862f7
--- /dev/null
+++ b/tools/blktap/vdi_fill.c
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * vdi_fill.c
+ *
+ * Hoover a file or device into a vdi.
+ * You must first create the vdi with vdi_create.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "requests-async.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ u64 id;
+ int fd;
+ struct stat st;
+ u64 tot_size;
+ char spage[BLOCK_SIZE];
+ char *dpage;
+ u64 vblock = 0, count=0;
+
+ __init_blockstore();
+ init_block_async();
+ __init_vdi();
+
+ if ( argc < 3 ) {
+ printf("usage: %s <VDI id> <filename>\n", argv[0]);
+ exit(-1);
+ }
+
+ id = (u64) atoll(argv[1]);
+
+ vdi = vdi_get( id );
+
+ if ( vdi == NULL ) {
+ printf("Failed to retreive VDI %Ld!\n", id);
+ exit(-1);
+ }
+
+ fd = open(argv[2], O_RDONLY | O_LARGEFILE);
+
+ if (fd < 0) {
+ printf("Couldn't open %s!\n", argv[2]);
+ exit(-1);
+ }
+
+ if ( fstat(fd, &st) != 0 ) {
+ printf("Couldn't stat %s!\n", argv[2]);
+ exit(-1);
+ }
+
+ tot_size = (u64) st.st_size;
+ printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
+
+ printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);
+ printf(" ");
+ while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
+ vdi_write_s(vdi, vblock, spage);
+
+ vblock++;
+ if ((vblock % 512) == 0)
+ printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
+ fflush(stdout);
+ }
+ printf("\n");
+
+ freeblock(vdi);
+
+ return (0);
+}
diff --git a/tools/blktap/vdi_list.c b/tools/blktap/vdi_list.c
new file mode 100644
index 0000000000..c08abe27b6
--- /dev/null
+++ b/tools/blktap/vdi_list.c
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * vdi_list.c
+ *
+ * Print a list of VDIs on the block store.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_registry_t *reg;
+ vdi_t *vdi;
+ int i;
+
+ __init_blockstore();
+ __init_vdi();
+
+ reg = get_vdi_registry();
+
+ if ( reg == NULL ) {
+ printf("couldn't get VDI registry.\n");
+ exit(-1);
+ }
+
+ for (i=0; i < reg->nr_vdis; i++) {
+ vdi = vdi_get(i);
+
+ if ( vdi != NULL ) {
+
+ printf("%10Ld %60s\n", vdi->id, vdi->name);
+ freeblock(vdi);
+
+ }
+ }
+
+ freeblock(reg);
+
+ return 0;
+}
diff --git a/tools/blktap/vdi_snap.c b/tools/blktap/vdi_snap.c
new file mode 100644
index 0000000000..3b09898dc6
--- /dev/null
+++ b/tools/blktap/vdi_snap.c
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * vdi_snap.c
+ *
+ * Snapshot a vdi.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ u64 id;
+
+ __init_blockstore();
+ __init_vdi();
+
+ if ( argc == 1 ) {
+ printf("usage: %s <VDI id>\n", argv[0]);
+ exit(-1);
+ }
+
+ id = (u64) atoll(argv[1]);
+
+ vdi = vdi_get(id);
+
+ if ( vdi == NULL ) {
+ printf("couldn't find the requested VDI.\n");
+ freeblock(vdi);
+ exit(-1);
+ }
+
+ vdi_snapshot(vdi);
+
+ return 0;
+}
diff --git a/tools/blktap/vdi_snap_delete.c b/tools/blktap/vdi_snap_delete.c
new file mode 100644
index 0000000000..0160ccad83
--- /dev/null
+++ b/tools/blktap/vdi_snap_delete.c
@@ -0,0 +1,48 @@
+/**************************************************************************
+ *
+ * vdi_snap_delete.c
+ *
+ * Delete a snapshot.
+ *
+ * This is not finished: right now it takes a snap n and calls
+ * snap_collapse(n,n+1).
+ *
+ * TODO: support for non-consecutive, non-same-block snaps
+ * Avoid forking probs.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "snaplog.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ snap_id_t id, c_id;
+ int ret;
+
+ __init_blockstore();
+ __init_vdi();
+
+ if ( argc != 3 ) {
+ printf("usage: %s <snap block> <snap idx>\n", argv[0]);
+ exit(-1);
+ }
+
+ id.block = (u64) atoll(argv[1]);
+ id.index = (unsigned int) atol (argv[2]);
+
+ c_id = id;
+ c_id.index++;
+
+ ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
+
+ printf("Freed %d blocks.\n", ret);
+
+ return 0;
+}
diff --git a/tools/blktap/vdi_snap_list.c b/tools/blktap/vdi_snap_list.c
new file mode 100644
index 0000000000..044397495d
--- /dev/null
+++ b/tools/blktap/vdi_snap_list.c
@@ -0,0 +1,82 @@
+/**************************************************************************
+ *
+ * vdi_snap_list.c
+ *
+ * Print a list of snapshots for the specified vdi.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ u64 id;
+ int i, max_snaps = -1;
+ snap_block_t *blk;
+ snap_id_t sid;
+ char *t;
+
+ __init_blockstore();
+ __init_vdi();
+
+ if ( argc == 1 ) {
+ printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
+ exit(-1);
+ }
+
+ id = (u64) atoll(argv[1]);
+
+ if ( argc > 2 ) {
+ max_snaps = atoi(argv[2]);
+ }
+
+ vdi = vdi_get(id);
+
+ if ( vdi == NULL ) {
+ printf("couldn't find the requested VDI.\n");
+ freeblock(vdi);
+ exit(-1);
+ }
+
+ sid = vdi->snap;
+ sid.index--;
+
+ //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp",
+ // "radix root", "d");
+ printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp",
+ "radix root", "d");
+
+ while (sid.block != 0) {
+ blk = snap_get_block(sid.block);
+ for (i = sid.index; i >= 0; i--) {
+ if ( max_snaps == 0 ) {
+ freeblock(blk);
+ goto done;
+ }
+ t = ctime(&blk->snaps[i].timestamp.tv_sec);
+ t[strlen(t)-1] = '\0';
+ //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
+ printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
+ sid.block, i,
+ //blk->snaps[i].timestamp.tv_sec,
+ t,
+ blk->snaps[i].timestamp.tv_usec,
+ blk->snaps[i].radix_root,
+ blk->snaps[i].deleted ? "*" : " ");
+ if ( max_snaps != -1 )
+ max_snaps--;
+ }
+ sid = blk->hdr.parent_block;
+ freeblock(blk);
+ }
+done:
+ return 0;
+}
diff --git a/tools/blktap/vdi_tree.c b/tools/blktap/vdi_tree.c
new file mode 100644
index 0000000000..d43abccef4
--- /dev/null
+++ b/tools/blktap/vdi_tree.c
@@ -0,0 +1,132 @@
+/**************************************************************************
+ *
+ * vdi_tree.c
+ *
+ * Output current vdi tree to dot and postscript.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+#define GRAPH_DOT_FILE "vdi.dot"
+#define GRAPH_PS_FILE "vdi.ps"
+
+typedef struct sh_st {
+ snap_id_t id;
+ struct sh_st *next;
+} sh_t;
+
+#define SNAP_HASHSZ 1024
+sh_t *node_hash[SNAP_HASHSZ];
+#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
+
+#define SNAPID_EQUAL(_a,_b) \
+ (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
+int sh_check_and_add(snap_id_t *id)
+{
+ sh_t **s = &node_hash[SNAP_HASH(id)];
+
+ while (*s != NULL) {
+ if (SNAPID_EQUAL(&((*s)->id), id))
+ return 1;
+ *s = (*s)->next;
+ }
+
+ *s = (sh_t *)malloc(sizeof(sh_t));
+ (*s)->id = *id;
+ (*s)->next = NULL;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *f;
+ char dot_file[255] = GRAPH_DOT_FILE;
+ char ps_file[255] = GRAPH_PS_FILE;
+ int nr_vdis = 0, nr_forks = 0;
+ vdi_registry_t *reg;
+ vdi_t *vdi;
+ int i;
+
+ __init_blockstore();
+ __init_vdi();
+
+ reg = get_vdi_registry();
+
+ if ( reg == NULL ) {
+ printf("couldn't get VDI registry.\n");
+ exit(-1);
+ }
+
+ if ( argc > 1 ) {
+ strncpy(ps_file, argv[1], 255);
+ ps_file[255] = '\0';
+ }
+
+ /* now dump it out to a dot file. */
+ printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
+
+ f = fopen(dot_file, "w");
+
+ /* write graph preamble */
+ fprintf(f, "digraph G {\n");
+ fprintf(f, " rankdir=LR\n");
+
+ for (i=0; i<reg->nr_vdis; i++) {
+ char oldnode[255];
+ snap_block_t *blk;
+ snap_id_t id;
+ int nr_snaps, done=0;
+
+ vdi = vdi_get(i);
+ id = vdi->snap;
+ /* add a node for the id */
+printf("vdi: %d\n", i);
+ fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n",
+ id.block, id.index, vdi->name,
+ id.block, id.index);
+ sprintf(oldnode, "n%Ld%d", id.block, id.index);
+
+ while (id.block != 0) {
+ blk = snap_get_block(id.block);
+ nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
+ id = blk->hdr.fork_block;
+
+ done = sh_check_and_add(&id);
+
+ /* add a node for the fork_id */
+ if (!done) {
+ fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n",
+ id.block, id.index,
+ id.block, id.index);
+ }
+
+ /* add an edge between them */
+ fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n",
+ id.block, id.index, oldnode, nr_snaps);
+ sprintf(oldnode, "n%Ld%d", id.block, id.index);
+ freeblock(blk);
+
+ if (done) break;
+ }
+ }
+
+ /* write graph postamble */
+ fprintf(f, "}\n");
+ fclose(f);
+
+ printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
+ {
+ char cmd[255];
+ sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
+ system(cmd);
+ }
+ return 0;
+}
diff --git a/tools/blktap/vdi_unittest.c b/tools/blktap/vdi_unittest.c
new file mode 100644
index 0000000000..77ecc833be
--- /dev/null
+++ b/tools/blktap/vdi_unittest.c
@@ -0,0 +1,184 @@
+/**************************************************************************
+ *
+ * vdi_unittest.c
+ *
+ * Run a small test workload to ensure that data access through a vdi
+ * is (at least superficially) correct.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "requests-async.h"
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+
+#define TEST_PAGES 32
+static char *zero_page;
+static char pages[TEST_PAGES][BLOCK_SIZE];
+static int next_page = 0;
+
+void fill_test_pages(void)
+{
+ int i, j;
+ long *page;
+
+ for (i=0; i< TEST_PAGES; i++) {
+ page = (unsigned long *)pages[i];
+ for (j=0; j<(BLOCK_SIZE/4); j++) {
+ page[j] = random();
+ }
+ }
+
+ zero_page = newblock();
+}
+
+inline u64 make_vaddr(u64 L1, u64 L2, u64 L3)
+{
+ u64 ret = L1;
+
+ ret = (ret << 9) | L2;
+ ret = (ret << 9) | L3;
+
+ return ret;
+}
+
+void touch_block(vdi_t *vdi, u64 L1, u64 L2, u64 L3)
+{
+ u64 vaddr;
+ char *page = pages[next_page++];
+ char *rpage = NULL;
+
+ printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
+
+ vaddr = make_vaddr(L1, L2, L3);
+ vdi_write_s(vdi, vaddr, page);
+ rpage = vdi_read_s(vdi, vaddr);
+
+ if (rpage == NULL)
+ {
+ printf( "read %Lu returned NULL\n", vaddr);
+ return;
+ }
+
+ if (memcmp(page, rpage, BLOCK_SIZE) != 0)
+ {
+ printf( "read %Lu returned a different page\n", vaddr);
+ return;
+ }
+
+ freeblock(rpage);
+}
+
+void test_block(vdi_t *vdi, u64 L1, u64 L2, u64 L3, char *page)
+{
+ u64 vaddr;
+ char *rpage = NULL;
+
+ printf("TEST (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
+
+ vaddr = make_vaddr(L1, L2, L3);
+ rpage = vdi_read_s(vdi, vaddr);
+
+ if (rpage == NULL)
+ {
+ printf( "read %Lu returned NULL\n", vaddr);
+ return;
+ }
+
+ if (memcmp(page, rpage, BLOCK_SIZE) != 0)
+ {
+ printf( "read %Lu returned a different page\n", vaddr);
+ return;
+ }
+
+ freeblock(rpage);
+}
+
+void coverage_test(vdi_t *vdi)
+{
+ u64 vaddr;
+ int i, j, k;
+
+ /* Do a series of writes and reads to test all paths through the
+ * async radix code. The radix request code will dump CRC warnings
+ * if there are data problems here as well.
+ */
+
+ /* L1 Zero */
+ touch_block(vdi, 0, 0, 0);
+
+ /* L2 Zero */
+ i = next_page;
+ touch_block(vdi, 0, 1, 0);
+
+ /* L3 Zero */
+ j = next_page;
+ touch_block(vdi, 0, 0, 1);
+ k = next_page;
+ touch_block(vdi, 0, 1, 1);
+
+ /* Direct write */
+ touch_block(vdi, 0, 0, 0);
+
+ vdi_snapshot(vdi);
+
+ /* L1 fault */
+ touch_block(vdi, 0, 0, 0);
+ /* test the read-only branches that should have been copied over. */
+ test_block(vdi, 0, 1, 0, pages[i]);
+ test_block(vdi, 0, 0, 1, pages[j]);
+
+ /* L2 fault */
+ touch_block(vdi, 0, 1, 0);
+ test_block(vdi, 0, 1, 1, pages[k]);
+
+ /* L3 fault */
+ touch_block(vdi, 0, 0, 1);
+
+ /* read - L1 zero */
+ test_block(vdi, 1, 0, 0, zero_page);
+
+ /* read - L2 zero */
+ test_block(vdi, 0, 2, 0, zero_page);
+
+ /* read - L3 zero */
+ test_block(vdi, 0, 0, 2, zero_page);
+}
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ u64 id;
+ int fd;
+ struct stat st;
+ u64 tot_size;
+ char spage[BLOCK_SIZE];
+ char *dpage;
+ u64 vblock = 0, count=0;
+
+ __init_blockstore();
+ init_block_async();
+ __init_vdi();
+
+ vdi = vdi_create( NULL, "UNIT TEST VDI");
+
+ if ( vdi == NULL ) {
+ printf("Failed to create VDI!\n");
+ freeblock(vdi);
+ exit(-1);
+ }
+
+ fill_test_pages();
+ coverage_test(vdi);
+
+ freeblock(vdi);
+
+ return (0);
+}
diff --git a/tools/blktap/vdi_validate.c b/tools/blktap/vdi_validate.c
new file mode 100644
index 0000000000..de7a62d3e9
--- /dev/null
+++ b/tools/blktap/vdi_validate.c
@@ -0,0 +1,97 @@
+/**************************************************************************
+ *
+ * vdi_validate.c
+ *
+ * Intended to sanity-check vm_fill and the underlying vdi code.
+ *
+ * Block-by-block compare of a vdi with a file/device on the disk.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "blockstore.h"
+#include "radix.h"
+#include "vdi.h"
+#include "requests-async.h"
+
+int main(int argc, char *argv[])
+{
+ vdi_t *vdi;
+ u64 id;
+ int fd;
+ struct stat st;
+ u64 tot_size;
+ char spage[BLOCK_SIZE], *dpage;
+ char *vpage;
+ u64 vblock = 0, count=0;
+
+ __init_blockstore();
+ init_block_async();
+ __init_vdi();
+
+ if ( argc < 3 ) {
+ printf("usage: %s <VDI id> <filename>\n", argv[0]);
+ exit(-1);
+ }
+
+ id = (u64) atoll(argv[1]);
+
+ vdi = vdi_get( id );
+
+ if ( vdi == NULL ) {
+ printf("Failed to retreive VDI %Ld!\n", id);
+ exit(-1);
+ }
+
+ fd = open(argv[2], O_RDONLY | O_LARGEFILE);
+
+ if (fd < 0) {
+ printf("Couldn't open %s!\n", argv[2]);
+ exit(-1);
+ }
+
+ if ( fstat(fd, &st) != 0 ) {
+ printf("Couldn't stat %s!\n", argv[2]);
+ exit(-1);
+ }
+
+ tot_size = (u64) st.st_size;
+ printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
+
+ printf(" ");
+ while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
+
+ dpage = vdi_read_s(vdi, vblock);
+
+ if (dpage == NULL) {
+ printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
+ exit(0);
+ }
+
+ if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
+ printf("\n\nblocks don't match! (%Ld)\n", vblock);
+ exit(0);
+ }
+
+ freeblock(dpage);
+
+ vblock++;
+ if ((vblock % 1024) == 0) {
+ printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
+ fflush(stdout);
+ }
+ }
+ printf("\n");
+
+ printf("VDI %Ld looks good!\n", id);
+
+ freeblock(vdi);
+
+ return (0);
+}
diff --git a/tools/check/check_twisted b/tools/check/check_twisted
deleted file mode 100755
index 06d70b4692..0000000000
--- a/tools/check/check_twisted
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python
-# CHECK-INSTALL
-# -*- mode: python; -*-
-
-import os
-import sys
-
-def hline():
- print >>sys.stderr, "*" * 70
-
-def msg(message):
- print >>sys.stderr, "*" * 3, message
-
-def check_twisted_version():
- """Check twisted is installed with a supported version and print a warning if not.
- Raises an error if twisted is not installed.
- """
- # Supported twisted release and major version.
- RELEASE = 1
- MAJOR = 3
- try:
- from twisted.copyright import version
- except ImportError:
- hline()
- msg("The Twisted framework is not installed.")
- msg("Use 'make install-twisted' at the xen root to install.")
- msg("")
- msg("Alternatively download and install version %d.%d or higher" % (RELEASE, MAJOR))
- msg("from http://www.twistedmatrix.com/products")
- hline()
- sys.exit(1)
-
- (release, major, minor) = version.split('.')
- release = int(release)
- major = int(major)
- if release > RELEASE: return
- if release == RELEASE and major >= MAJOR: return
- hline()
- msg("Warning: Twisted version not supported: %s" % version)
- msg("Use Twisted version %d.%d.0 or higher" % (RELEASE, MAJOR))
- hline()
- sys.exit(1)
-
-if __name__ == '__main__':
- check_twisted_version()
-
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
new file mode 100644
index 0000000000..e470217934
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
@@ -0,0 +1,308 @@
+# Copyright 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+# 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+# This file is part of GDB.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+host_alias = @host_alias@
+target_alias = @target_alias@
+program_transform_name = @program_transform_name@
+bindir = @bindir@
+libdir = @libdir@
+tooldir = $(libdir)/$(target_alias)
+
+datadir = @datadir@
+mandir = @mandir@
+man1dir = $(mandir)/man1
+man2dir = $(mandir)/man2
+man3dir = $(mandir)/man3
+man4dir = $(mandir)/man4
+man5dir = $(mandir)/man5
+man6dir = $(mandir)/man6
+man7dir = $(mandir)/man7
+man8dir = $(mandir)/man8
+man9dir = $(mandir)/man9
+infodir = @infodir@
+htmldir = $(prefix)/html
+includedir = @includedir@
+
+SHELL = /bin/sh
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+
+CC = @CC@
+
+# Directory containing source files. Don't clean up the spacing,
+# this exact string is matched for by the "configure" script.
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+# It is also possible that you will need to add -I/usr/include/sys to the
+# CFLAGS section if your system doesn't have fcntl.h in /usr/include (which
+# is where it should be according to Posix).
+
+# Set this up with gcc if you have gnu ld and the loader will print out
+# line numbers for undefinded refs.
+#CC-LD=gcc -static
+CC-LD=${CC}
+
+# Where is the "include" directory? Traditionally ../include or ./include
+INCLUDE_DIR = ${srcdir}/../../include
+INCLUDE_DEP = $$(INCLUDE_DIR)
+
+# Where are the BFD library?
+BFD_DIR = ../../bfd
+BFD = $(BFD_DIR)/libbfd.a
+BFD_SRC = $(srcdir)/$(BFD_DIR)
+BFD_CFLAGS = -I$(BFD_DIR) -I$(BFD_SRC)
+
+# Where is the source dir for the READLINE library? Traditionally in .. or .
+# (For the binary library built from it, we use ${READLINE_DIR}${subdir}.)
+READLINE_DIR = ${srcdir}/../readline
+READLINE_DEP = $$(READLINE_DIR)
+
+# All the includes used for CFLAGS and for lint.
+# -I. for config files.
+# -I${srcdir} for our headers.
+# -I$(srcdir)/../regformats for regdef.h.
+INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR) -I../../../../../libxc/
+
+# M{H,T}_CFLAGS, if defined, has host- and target-dependent CFLAGS
+# from the config/ directory.
+GLOBAL_CFLAGS = ${MT_CFLAGS} ${MH_CFLAGS}
+#PROFILE_CFLAGS = -pg
+
+WARN_CFLAGS = -Wall
+
+# CFLAGS is specifically reserved for setting from the command line
+# when running make. I.E. "make CFLAGS=-Wmissing-prototypes".
+CFLAGS = @CFLAGS@
+
+# INTERNAL_CFLAGS is the aggregate of all other *CFLAGS macros.
+INTERNAL_CFLAGS = $(WARN_CFLAGS) ${CFLAGS} ${GLOBAL_CFLAGS} \
+ ${PROFILE_CFLAGS} ${INCLUDE_CFLAGS} ${BFD_CFLAGS}
+
+# LDFLAGS is specifically reserved for setting from the command line
+# when running make.
+LDFLAGS = @LDFLAGS@
+
+# Perhaps should come from parent Makefile
+VERSION = gdbserver-4.12.3
+DIST=gdb
+
+LINT=/usr/5bin/lint
+LINTFLAGS= $(BFD_CFLAGS)
+
+# All source files that go into linking GDB remote server.
+
+SFILES= $(srcdir)/gdbreplay.c $(srcdir)/inferiors.c \
+ $(srcdir)/mem-break.c $(srcdir)/proc-service.c $(srcdir)/regcache.c \
+ $(srcdir)/remote-utils.c $(srcdir)/server.c $(srcdir)/target.c \
+ $(srcdir)/thread-db.c $(srcdir)/utils.c \
+ $(srcdir)/linux-arm-low.c $(srcdir)/linux-i386-low.c \
+ $(srcdir)/i387-fp.c \
+ $(srcdir)/linux-ia64-low.c $(srcdir)/linux-low.c \
+ $(srcdir)/linux-m68k-low.c $(srcdir)/linux-mips-low.c \
+ $(srcdir)/linux-ppc-low.c $(srcdir)/linux-s390-low.c \
+ $(srcdir)/linux-sh-low.c $(srcdir)/linux-x86-64-low.c
+
+DEPFILES = @GDBSERVER_DEPFILES@
+
+SOURCES = $(SFILES)
+TAGFILES = $(SOURCES) ${HFILES} ${ALLPARAM} ${POSSLIBS}
+
+OBS = inferiors.o regcache.o remote-utils.o server.o signals.o target.o \
+ utils.o \
+ mem-break.o \
+ $(DEPFILES)
+GDBSERVER_LIBS = @GDBSERVER_LIBS@
+
+# Prevent Sun make from putting in the machine type. Setting
+# TARGET_ARCH to nothing works for SunOS 3, 4.0, but not for 4.1.
+.c.o:
+ ${CC} -c ${INTERNAL_CFLAGS} $<
+
+all: gdbserver-xen gdbreplay
+
+# Traditionally "install" depends on "all". But it may be useful
+# not to; for example, if the user has made some trivial change to a
+# source file and doesn't care about rebuilding or just wants to save the
+# time it takes for make to check that all is up to date.
+# install-only is intended to address that need.
+install: all install-only
+install-only:
+ n=`echo gdbserver-xen | sed '$(program_transform_name)'`; \
+ if [ x$$n = x ]; then n=gdbserver-xen; else true; fi; \
+ $(SHELL) $(srcdir)/../../mkinstalldirs $(DESTDIR)$(bindir); \
+ $(INSTALL_PROGRAM) gdbserver-xen $(DESTDIR)$(bindir)/$$n; \
+ $(SHELL) $(srcdir)/../../mkinstalldirs $(DESTDIR)$(man1dir); \
+ $(INSTALL_DATA) $(srcdir)/gdbserver.1 $(DESTDIR)$(man1dir)/$$n.1
+
+uninstall: force
+ n=`echo gdbserver-xen | sed '$(program_transform_name)'`; \
+ if [ x$$n = x ]; then n=gdbserver-xen; else true; fi; \
+ rm -f $(bindir)/$$n $(DESTDIR)$(man1dir)/$$n.1
+
+installcheck:
+check:
+info dvi:
+install-info:
+html:
+install-html:
+clean-info:
+
+gdbserver-xen: $(OBS) ${ADD_DEPS} ${CDEPS}
+ rm -f gdbserver-xen
+ ${CC-LD} $(GLOBAL_CFLAGS) $(LDFLAGS) -o gdbserver-xen $(OBS) \
+ $(GDBSERVER_LIBS) $(XM_CLIBS)
+
+gdbreplay: gdbreplay.o
+ rm -f gdbreplay
+ ${CC-LD} $(GLOBAL_CFLAGS) $(LDFLAGS) -o gdbreplay gdbreplay.o \
+ $(XM_CLIBS)
+
+# Put the proper machine-specific files first, so M-. on a machine
+# specific routine gets the one for the correct machine.
+# The xyzzy stuff below deals with empty DEPFILES
+TAGS: ${TAGFILES}
+ etags `find ${srcdir}/../config -name $(TM_FILE) -print` \
+ `find ${srcdir}/../config -name ${XM_FILE} -print` \
+ `find ${srcdir}/../config -name ${NAT_FILE} -print` \
+ `for i in yzzy ${DEPFILES}; do \
+ if [ x$$i != xyzzy ]; then \
+ echo ${srcdir}/$$i | sed -e 's/\.o$$/\.c/' ; \
+ fi; \
+ done` \
+ ${TAGFILES}
+tags: TAGS
+
+clean:
+ rm -f *.o ${ADD_FILES} *~
+ rm -f gdbserver gdbreplay core make.log
+ rm -f reg-arm.c reg-i386.c reg-ia64.c reg-m68k.c reg-mips.c
+ rm -f reg-ppc.c reg-sh.c reg-x86-64.c reg-i386-linux.c
+
+maintainer-clean realclean distclean: clean
+ rm -f nm.h tm.h xm.h config.status config.h stamp-h config.log
+ rm -f Makefile
+
+STAGESTUFF=${OBS} ${TSOBS} ${NTSOBS} ${ADD_FILES} init.c init.o version.c gdb
+
+config.h: stamp-h ; @true
+stamp-h: config.in config.status
+ CONFIG_FILES="" $(SHELL) ./config.status
+
+Makefile: Makefile.in config.status
+ CONFIG_HEADERS="" $(SHELL) ./config.status
+
+config.status: configure configure.srv
+ $(SHELL) ./config.status --recheck
+
+force:
+
+version.c: Makefile
+ echo 'char *version = "$(VERSION)";' >version.c
+
+# GNU Make has an annoying habit of putting *all* the Makefile variables
+# into the environment, unless you include this target as a circumvention.
+# Rumor is that this will be fixed (and this target can be removed)
+# in GNU Make 4.0.
+.NOEXPORT:
+
+# GNU Make 3.63 has a different problem: it keeps tacking command line
+# overrides onto the definition of $(MAKE). This variable setting
+# will remove them.
+MAKEOVERRIDES=
+
+gdb_proc_service_h = $(srcdir)/../gdb_proc_service.h $(srcdir)/../gregset.h
+regdat_sh = $(srcdir)/../regformats/regdat.sh
+regdef_h = $(srcdir)/../regformats/regdef.h
+regcache_h = $(srcdir)/regcache.h
+server_h = $(srcdir)/server.h $(regcache_h) config.h $(srcdir)/target.h \
+ $(srcdir)/mem-break.h
+
+inferiors.o: inferiors.c $(server_h)
+mem-break.o: mem-break.c $(server_h)
+proc-service.o: proc-service.c $(server_h) $(gdb_proc_service_h)
+regcache.o: regcache.c $(server_h) $(regdef_h)
+remote-utils.o: remote-utils.c terminal.h $(server_h)
+server.o: server.c $(server_h)
+target.o: target.c $(server_h)
+thread-db.o: thread-db.c $(server_h) $(gdb_proc_service_h)
+utils.o: utils.c $(server_h)
+
+signals.o: ../signals/signals.c $(server_h)
+ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< -DGDBSERVER
+
+i387-fp.o: i387-fp.c $(server_h)
+
+linux_low_h = $(srcdir)/linux-low.h
+
+linux-low.o: linux-low.c $(linux_low_h) $(server_h)
+ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@
+
+linux-xen-low.o: linux-xen-low.c $(linux_low_h) $(server_h)
+ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@
+
+linux-arm-low.o: linux-arm-low.c $(linux_low_h) $(server_h)
+linux-i386-low.o: linux-i386-low.c $(linux_low_h) $(server_h)
+linux-ia64-low.o: linux-ia64-low.c $(linux_low_h) $(server_h)
+linux-mips-low.o: linux-mips-low.c $(linux_low_h) $(server_h)
+linux-ppc-low.o: linux-ppc-low.c $(linux_low_h) $(server_h)
+linux-s390-low.o: linux-s390-low.c $(linux_low_h) $(server_h)
+linux-sh-low.o: linux-sh-low.c $(linux_low_h) $(server_h)
+linux-x86-64-low.o: linux-x86-64-low.c $(linux_low_h) $(server_h)
+
+reg-arm.o : reg-arm.c $(regdef_h)
+reg-arm.c : $(srcdir)/../regformats/reg-arm.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-arm.dat reg-arm.c
+reg-i386.o : reg-i386.c $(regdef_h)
+reg-i386.c : $(srcdir)/../regformats/reg-i386.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-i386.dat reg-i386.c
+reg-i386-linux.o : reg-i386-linux.c $(regdef_h)
+reg-i386-linux.c : $(srcdir)/../regformats/reg-i386-linux.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-i386-linux.dat reg-i386-linux.c
+reg-ia64.o : reg-ia64.c $(regdef_h)
+reg-ia64.c : $(srcdir)/../regformats/reg-ia64.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-ia64.dat reg-ia64.c
+reg-m68k.o : reg-m68k.c $(regdef_h)
+reg-m68k.c : $(srcdir)/../regformats/reg-m68k.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-m68k.dat reg-m68k.c
+reg-mips.o : reg-mips.c $(regdef_h)
+reg-mips.c : $(srcdir)/../regformats/reg-mips.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-mips.dat reg-mips.c
+reg-ppc.o : reg-ppc.c $(regdef_h)
+reg-ppc.c : $(srcdir)/../regformats/reg-ppc.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-ppc.dat reg-ppc.c
+reg-s390.o : reg-s390.c $(regdef_h)
+reg-s390.c : $(srcdir)/../regformats/reg-s390.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-s390.dat reg-s390.c
+reg-s390x.o : reg-s390x.c $(regdef_h)
+reg-s390x.c : $(srcdir)/../regformats/reg-s390x.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-s390x.dat reg-s390x.c
+reg-sh.o : reg-sh.c $(regdef_h)
+reg-sh.c : $(srcdir)/../regformats/reg-sh.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-sh.dat reg-sh.c
+reg-x86-64.o : reg-x86-64.c $(regdef_h)
+reg-x86-64.c : $(srcdir)/../regformats/reg-x86-64.dat $(regdat_sh)
+ sh $(regdat_sh) $(srcdir)/../regformats/reg-x86-64.dat reg-x86-64.c
+
+# This is the end of "Makefile.in".
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure
new file mode 100755
index 0000000000..3cb8c841bd
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure
@@ -0,0 +1,4650 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.57.
+#
+# Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+
+# Support unset when possible.
+if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -n "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+exec 6>&1
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_config_libobj_dir=.
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Maximum number of lines to put in a shell here document.
+# This variable seems obsolete. It should probably be removed, and
+# only ac_max_sed_lines should be used.
+: ${ac_max_here_lines=38}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_unique_file="server.c"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CPP EGREP GDBSERVER_DEPFILES GDBSERVER_LIBS USE_THREAD_DB LIBOBJS LTLIBOBJS'
+ac_subst_files=''
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_option in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ eval "enable_$ac_feature=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_$ac_feature='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case $ac_option in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_$ac_package='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/-/_/g'`
+ eval "with_$ac_package=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ { echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute paths.
+for ac_var in exec_prefix prefix
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# Be sure to have absolute paths.
+for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
+ localstatedir libdir includedir oldincludedir infodir mandir
+do
+ eval ac_val=$`echo $ac_var`
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_confdir=`(dirname "$0") 2>/dev/null ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$0" : 'X\(//\)[^/]' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$0" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
+ { (exit 1); exit 1; }; }
+ else
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
+ fi
+fi
+(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
+ { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
+ { (exit 1); exit 1; }; }
+srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
+ac_env_build_alias_set=${build_alias+set}
+ac_env_build_alias_value=$build_alias
+ac_cv_env_build_alias_set=${build_alias+set}
+ac_cv_env_build_alias_value=$build_alias
+ac_env_host_alias_set=${host_alias+set}
+ac_env_host_alias_value=$host_alias
+ac_cv_env_host_alias_set=${host_alias+set}
+ac_cv_env_host_alias_value=$host_alias
+ac_env_target_alias_set=${target_alias+set}
+ac_env_target_alias_value=$target_alias
+ac_cv_env_target_alias_set=${target_alias+set}
+ac_cv_env_target_alias_value=$target_alias
+ac_env_CC_set=${CC+set}
+ac_env_CC_value=$CC
+ac_cv_env_CC_set=${CC+set}
+ac_cv_env_CC_value=$CC
+ac_env_CFLAGS_set=${CFLAGS+set}
+ac_env_CFLAGS_value=$CFLAGS
+ac_cv_env_CFLAGS_set=${CFLAGS+set}
+ac_cv_env_CFLAGS_value=$CFLAGS
+ac_env_LDFLAGS_set=${LDFLAGS+set}
+ac_env_LDFLAGS_value=$LDFLAGS
+ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
+ac_cv_env_LDFLAGS_value=$LDFLAGS
+ac_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_env_CPPFLAGS_value=$CPPFLAGS
+ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
+ac_cv_env_CPPFLAGS_value=$CPPFLAGS
+ac_env_CPP_set=${CPP+set}
+ac_env_CPP_value=$CPP
+ac_cv_env_CPP_set=${CPP+set}
+ac_cv_env_CPP_value=$CPP
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+_ACEOF
+
+ cat <<_ACEOF
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --infodir=DIR info documentation [PREFIX/info]
+ --mandir=DIR man documentation [PREFIX/man]
+_ACEOF
+
+ cat <<\_ACEOF
+
+System types:
+ --build=BUILD configure for building on BUILD [guessed]
+ --host=HOST cross-compile to build programs to run on HOST [BUILD]
+ --target=TARGET configure for building compilers for TARGET [HOST]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
+ headers in a nonstandard directory <include dir>
+ CPP C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ ac_popdir=`pwd`
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d $ac_dir || continue
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
+# absolute.
+ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
+ac_abs_top_builddir=`cd "$ac_dir" && cd ${ac_top_builddir}. && pwd`
+ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
+ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
+
+ cd $ac_dir
+ # Check for guested configure; otherwise get Cygnus style configure.
+ if test -f $ac_srcdir/configure.gnu; then
+ echo
+ $SHELL $ac_srcdir/configure.gnu --help=recursive
+ elif test -f $ac_srcdir/configure; then
+ echo
+ $SHELL $ac_srcdir/configure --help=recursive
+ elif test -f $ac_srcdir/configure.ac ||
+ test -f $ac_srcdir/configure.in; then
+ echo
+ $ac_configure --help
+ else
+ echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi
+ cd $ac_popdir
+ done
+fi
+
+test -n "$ac_init_help" && exit 0
+if $ac_init_version; then
+ cat <<\_ACEOF
+
+Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
+Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit 0
+fi
+exec 5>config.log
+cat >&5 <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.57. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ echo "PATH: $as_dir"
+done
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_sep=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+ 2)
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
+ # Get rid of the leading space.
+ ac_sep=" "
+ ;;
+ esac
+ done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Be sure not to use single quotes in there, as some shells,
+# such as our DU 5.0 friend, will then `close' the trap.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+{
+ (set) 2>&1 |
+ case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ sed -n \
+ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
+ ;;
+ *)
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+}
+ echo
+
+ cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ cat <<\_ASBOX
+## ------------- ##
+## Output files. ##
+## ------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=$`echo $ac_var`
+ echo "$ac_var='"'"'$ac_val'"'"'"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+ echo
+ sed "/^$/d" confdefs.h | sort
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ echo "$as_me: caught signal $ac_signal"
+ echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core core.* *.core &&
+ rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+ ' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . $cache_file;;
+ *) . ./$cache_file;;
+ esac
+ fi
+else
+ { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in `(set) 2>&1 |
+ sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val="\$ac_cv_env_${ac_var}_value"
+ eval ac_new_val="\$ac_env_${ac_var}_value"
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
+echo "$as_me: former value: $ac_old_val" >&2;}
+ { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
+echo "$as_me: current value: $ac_new_val" >&2;}
+ ac_cache_corrupted=:
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ac_config_headers="$ac_config_headers config.h:config.in"
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ CC=$ac_ct_CC
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6
+else
+ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6
+fi
+
+ test -n "$ac_ct_CC" && break
+done
+
+ CC=$ac_ct_CC
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO:" \
+ "checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
+ (eval $ac_compiler --version </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
+ (eval $ac_compiler -v </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
+ (eval $ac_compiler -V </dev/null >&5) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+echo "$as_me:$LINENO: checking for C compiler default output" >&5
+echo $ECHO_N "checking for C compiler default output... $ECHO_C" >&6
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
+ (eval $ac_link_default) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # Find the output, starting from the most likely. This scheme is
+# not robust to junk in `.', hence go to wildcards (a.*) only as a last
+# resort.
+
+# Be careful to initialize this variable, since it used to be cached.
+# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
+ac_cv_exeext=
+# b.out is created by i960 compilers.
+for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj )
+ ;;
+ conftest.$ac_ext )
+ # This is the source file.
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ # FIXME: I believe we export ac_cv_exeext for Libtool,
+ # but it would be cool to find out if it's true. Does anybody
+ # maintain Libtool? --akim.
+ export ac_cv_exeext
+ break;;
+ * )
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6
+
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ fi
+fi
+echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
+echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6
+
+echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ export ac_cv_exeext
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_compiler_gnu=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+CFLAGS="-g"
+echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_prog_cc_g=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5
+echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6
+if test "${ac_cv_prog_cc_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_prog_cc_stdc=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_prog_cc_stdc=$ac_arg
+break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext
+done
+rm -f conftest.$ac_ext conftest.$ac_objext
+CC=$ac_save_CC
+
+fi
+
+case "x$ac_cv_prog_cc_stdc" in
+ x|xno)
+ echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6 ;;
+ *)
+ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6
+ CC="$CC $ac_cv_prog_cc_stdc" ;;
+esac
+
+# Some people use a C++ compiler to compile C. Since we use `exit',
+# in C++ we need to declare it. In case someone uses the same compiler
+# for both compiling C and C++ we need to have the C++ compiler decide
+# the declaration of exit, since it's the most demanding environment.
+cat >conftest.$ac_ext <<_ACEOF
+#ifndef __cplusplus
+ choke me
+#endif
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ for ac_declaration in \
+ ''\
+ '#include <stdlib.h>' \
+ 'extern "C" void std::exit (int) throw (); using std::exit;' \
+ 'extern "C" void std::exit (int); using std::exit;' \
+ 'extern "C" void exit (int) throw ();' \
+ 'extern "C" void exit (int);' \
+ 'void exit (int);'
+do
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+$ac_declaration
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+continue
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_declaration
+int
+main ()
+{
+exit (42);
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ break
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+done
+rm -f conftest*
+if test -n "$ac_declaration"; then
+ echo '#ifdef __cplusplus' >>confdefs.h
+ echo $ac_declaration >>confdefs.h
+ echo '#endif' >>confdefs.h
+fi
+
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/install-sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f $ac_dir/install.sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f $ac_dir/shtool; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&5
+echo "$as_me: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"
+ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
+
+# Make sure we can run config.sub.
+$ac_config_sub sun4 >/dev/null 2>&1 ||
+ { { echo "$as_me:$LINENO: error: cannot run $ac_config_sub" >&5
+echo "$as_me: error: cannot run $ac_config_sub" >&2;}
+ { (exit 1); exit 1; }; }
+
+echo "$as_me:$LINENO: checking build system type" >&5
+echo $ECHO_N "checking build system type... $ECHO_C" >&6
+if test "${ac_cv_build+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_build_alias=$build_alias
+test -z "$ac_cv_build_alias" &&
+ ac_cv_build_alias=`$ac_config_guess`
+test -z "$ac_cv_build_alias" &&
+ { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5
+echo "$as_me: error: cannot guess build type; you must specify one" >&2;}
+ { (exit 1); exit 1; }; }
+ac_cv_build=`$ac_config_sub $ac_cv_build_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_build_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_build_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_build" >&5
+echo "${ECHO_T}$ac_cv_build" >&6
+build=$ac_cv_build
+build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+echo "$as_me:$LINENO: checking host system type" >&5
+echo $ECHO_N "checking host system type... $ECHO_C" >&6
+if test "${ac_cv_host+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_host_alias=$host_alias
+test -z "$ac_cv_host_alias" &&
+ ac_cv_host_alias=$ac_cv_build_alias
+ac_cv_host=`$ac_config_sub $ac_cv_host_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_host_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_host_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_host" >&5
+echo "${ECHO_T}$ac_cv_host" >&6
+host=$ac_cv_host
+host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+echo "$as_me:$LINENO: checking target system type" >&5
+echo $ECHO_N "checking target system type... $ECHO_C" >&6
+if test "${ac_cv_target+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_target_alias=$target_alias
+test "x$ac_cv_target_alias" = "x" &&
+ ac_cv_target_alias=$ac_cv_host_alias
+ac_cv_target=`$ac_config_sub $ac_cv_target_alias` ||
+ { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_target_alias failed" >&5
+echo "$as_me: error: $ac_config_sub $ac_cv_target_alias failed" >&2;}
+ { (exit 1); exit 1; }; }
+
+fi
+echo "$as_me:$LINENO: result: $ac_cv_target" >&5
+echo "${ECHO_T}$ac_cv_target" >&6
+target=$ac_cv_target
+target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+test -n "$target_alias" &&
+ test "$program_prefix$program_suffix$program_transform_name" = \
+ NONENONEs,x,x, &&
+ program_prefix=${target_alias}-
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5
+echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in
+ ./ | .// | /cC/* | \
+ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+ /usr/ucb/* ) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
+ if test $ac_prog = install &&
+ grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+ break 3
+ fi
+ fi
+ done
+ done
+ ;;
+esac
+done
+
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL=$ac_cv_path_install
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL=$ac_install_sh
+ fi
+fi
+echo "$as_me:$LINENO: result: $INSTALL" >&5
+echo "${ECHO_T}$INSTALL" >&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether non-existent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ :
+else
+ { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6
+if test "${ac_cv_prog_egrep+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if echo a | (grep -E '(a|b)') >/dev/null 2>&1
+ then ac_cv_prog_egrep='grep -E'
+ else ac_cv_prog_egrep='egrep'
+ fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5
+echo "${ECHO_T}$ac_cv_prog_egrep" >&6
+ EGREP=$ac_cv_prog_egrep
+
+
+echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_header_stdc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_cv_header_stdc=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+fi
+echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+eval "$as_ac_Header=no"
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+
+
+
+
+
+
+for ac_header in sgtty.h termio.h termios.h sys/reg.h string.h proc_service.h sys/procfs.h thread_db.h linux/elf.h stdlib.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+else
+ # Is the header compilable?
+echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ac_header_compiler=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6
+
+# Is the header present?
+echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6
+cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <$ac_header>
+_ACEOF
+if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
+ (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null; then
+ if test -s conftest.err; then
+ ac_cpp_err=$ac_c_preproc_warn_flag
+ else
+ ac_cpp_err=
+ fi
+else
+ ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.$ac_ext
+echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc in
+ yes:no )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+ (
+ cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+ ) |
+ sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+ no:yes )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+ (
+ cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to bug-autoconf@gnu.org. ##
+## ------------------------------------ ##
+_ASBOX
+ ) |
+ sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
+if eval "test \"\${$as_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ eval "$as_ac_Header=$ac_header_preproc"
+fi
+echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
+echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+echo "$as_me:$LINENO: checking whether strerror must be declared" >&5
+echo $ECHO_N "checking whether strerror must be declared... $ECHO_C" >&6
+if test "${bfd_cv_decl_needed_strerror+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#include <stdio.h>
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+int
+main ()
+{
+char *(*pfn) = (char *(*)) strerror
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_decl_needed_strerror=no
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_decl_needed_strerror=yes
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+echo "$as_me:$LINENO: result: $bfd_cv_decl_needed_strerror" >&5
+echo "${ECHO_T}$bfd_cv_decl_needed_strerror" >&6
+if test $bfd_cv_decl_needed_strerror = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define NEED_DECLARATION_STRERROR 1
+_ACEOF
+
+fi
+
+
+. ${srcdir}/configure.srv
+
+if test "${srv_linux_usrregs}" = "yes"; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINUX_USRREGS 1
+_ACEOF
+
+fi
+
+if test "${srv_linux_regsets}" = "yes"; then
+ echo "$as_me:$LINENO: checking for PTRACE_GETREGS" >&5
+echo $ECHO_N "checking for PTRACE_GETREGS... $ECHO_C" >&6
+ if test "${gdbsrv_cv_have_ptrace_getregs+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/ptrace.h>
+int
+main ()
+{
+PTRACE_GETREGS;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ gdbsrv_cv_have_ptrace_getregs=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+gdbsrv_cv_have_ptrace_getregs=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ echo "$as_me:$LINENO: result: $gdbsrv_cv_have_ptrace_getregs" >&5
+echo "${ECHO_T}$gdbsrv_cv_have_ptrace_getregs" >&6
+ if test "${gdbsrv_cv_have_ptrace_getregs}" = "yes"; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINUX_REGSETS 1
+_ACEOF
+
+ fi
+
+ echo "$as_me:$LINENO: checking for PTRACE_GETFPXREGS" >&5
+echo $ECHO_N "checking for PTRACE_GETFPXREGS... $ECHO_C" >&6
+ if test "${gdbsrv_cv_have_ptrace_getfpxregs+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/ptrace.h>
+int
+main ()
+{
+PTRACE_GETFPXREGS;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ gdbsrv_cv_have_ptrace_getfpxregs=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+gdbsrv_cv_have_ptrace_getfpxregs=no
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ echo "$as_me:$LINENO: result: $gdbsrv_cv_have_ptrace_getfpxregs" >&5
+echo "${ECHO_T}$gdbsrv_cv_have_ptrace_getfpxregs" >&6
+ if test "${gdbsrv_cv_have_ptrace_getfpxregs}" = "yes"; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTRACE_GETFPXREGS 1
+_ACEOF
+
+ fi
+fi
+
+if test "$ac_cv_header_sys_procfs_h" = yes; then
+ echo "$as_me:$LINENO: checking for lwpid_t in sys/procfs.h" >&5
+echo $ECHO_N "checking for lwpid_t in sys/procfs.h... $ECHO_C" >&6
+ if test "${bfd_cv_have_sys_procfs_type_lwpid_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#define _SYSCALL32
+#include <sys/procfs.h>
+int
+main ()
+{
+lwpid_t avar
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_have_sys_procfs_type_lwpid_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_have_sys_procfs_type_lwpid_t=no
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ if test $bfd_cv_have_sys_procfs_type_lwpid_t = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LWPID_T 1
+_ACEOF
+
+ fi
+ echo "$as_me:$LINENO: result: $bfd_cv_have_sys_procfs_type_lwpid_t" >&5
+echo "${ECHO_T}$bfd_cv_have_sys_procfs_type_lwpid_t" >&6
+
+ echo "$as_me:$LINENO: checking for psaddr_t in sys/procfs.h" >&5
+echo $ECHO_N "checking for psaddr_t in sys/procfs.h... $ECHO_C" >&6
+ if test "${bfd_cv_have_sys_procfs_type_psaddr_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#define _SYSCALL32
+#include <sys/procfs.h>
+int
+main ()
+{
+psaddr_t avar
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_have_sys_procfs_type_psaddr_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_have_sys_procfs_type_psaddr_t=no
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ if test $bfd_cv_have_sys_procfs_type_psaddr_t = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PSADDR_T 1
+_ACEOF
+
+ fi
+ echo "$as_me:$LINENO: result: $bfd_cv_have_sys_procfs_type_psaddr_t" >&5
+echo "${ECHO_T}$bfd_cv_have_sys_procfs_type_psaddr_t" >&6
+
+ echo "$as_me:$LINENO: checking for prgregset_t in sys/procfs.h" >&5
+echo $ECHO_N "checking for prgregset_t in sys/procfs.h... $ECHO_C" >&6
+ if test "${bfd_cv_have_sys_procfs_type_prgregset_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#define _SYSCALL32
+#include <sys/procfs.h>
+int
+main ()
+{
+prgregset_t avar
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_have_sys_procfs_type_prgregset_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_have_sys_procfs_type_prgregset_t=no
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ if test $bfd_cv_have_sys_procfs_type_prgregset_t = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRGREGSET_T 1
+_ACEOF
+
+ fi
+ echo "$as_me:$LINENO: result: $bfd_cv_have_sys_procfs_type_prgregset_t" >&5
+echo "${ECHO_T}$bfd_cv_have_sys_procfs_type_prgregset_t" >&6
+
+ echo "$as_me:$LINENO: checking for prfpregset_t in sys/procfs.h" >&5
+echo $ECHO_N "checking for prfpregset_t in sys/procfs.h... $ECHO_C" >&6
+ if test "${bfd_cv_have_sys_procfs_type_prfpregset_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#define _SYSCALL32
+#include <sys/procfs.h>
+int
+main ()
+{
+prfpregset_t avar
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_have_sys_procfs_type_prfpregset_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_have_sys_procfs_type_prfpregset_t=no
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ if test $bfd_cv_have_sys_procfs_type_prfpregset_t = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRFPREGSET_T 1
+_ACEOF
+
+ fi
+ echo "$as_me:$LINENO: result: $bfd_cv_have_sys_procfs_type_prfpregset_t" >&5
+echo "${ECHO_T}$bfd_cv_have_sys_procfs_type_prfpregset_t" >&6
+
+
+
+
+ if test $bfd_cv_have_sys_procfs_type_prfpregset_t = yes; then
+ echo "$as_me:$LINENO: checking whether prfpregset_t type is broken" >&5
+echo $ECHO_N "checking whether prfpregset_t type is broken... $ECHO_C" >&6
+ if test "${gdb_cv_prfpregset_t_broken+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$cross_compiling" = yes; then
+ gdb_cv_prfpregset_t_broken=yes
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/procfs.h>
+ int main ()
+ {
+ if (sizeof (prfpregset_t) == sizeof (void *))
+ return 1;
+ return 0;
+ }
+_ACEOF
+rm -f conftest$ac_exeext
+if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ gdb_cv_prfpregset_t_broken=no
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+gdb_cv_prfpregset_t_broken=yes
+fi
+rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+fi
+
+ echo "$as_me:$LINENO: result: $gdb_cv_prfpregset_t_broken" >&5
+echo "${ECHO_T}$gdb_cv_prfpregset_t_broken" >&6
+ if test $gdb_cv_prfpregset_t_broken = yes; then
+ cat >>confdefs.h <<\_ACEOF
+#define PRFPREGSET_T_BROKEN 1
+_ACEOF
+
+ fi
+ fi
+
+ echo "$as_me:$LINENO: checking for elf_fpregset_t in sys/procfs.h" >&5
+echo $ECHO_N "checking for elf_fpregset_t in sys/procfs.h... $ECHO_C" >&6
+ if test "${bfd_cv_have_sys_procfs_type_elf_fpregset_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+#line $LINENO "configure"
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#define _SYSCALL32
+#include <sys/procfs.h>
+int
+main ()
+{
+elf_fpregset_t avar
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+ (eval $ac_compile) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } &&
+ { ac_try='test -s conftest.$ac_objext'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ bfd_cv_have_sys_procfs_type_elf_fpregset_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+bfd_cv_have_sys_procfs_type_elf_fpregset_t=no
+
+fi
+rm -f conftest.$ac_objext conftest.$ac_ext
+fi
+
+ if test $bfd_cv_have_sys_procfs_type_elf_fpregset_t = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ELF_FPREGSET_T 1
+_ACEOF
+
+ fi
+ echo "$as_me:$LINENO: result: $bfd_cv_have_sys_procfs_type_elf_fpregset_t" >&5
+echo "${ECHO_T}$bfd_cv_have_sys_procfs_type_elf_fpregset_t" >&6
+
+fi
+
+srv_thread_depfiles=
+srv_libs=
+USE_THREAD_DB=
+
+
+GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+
+
+
+
+
+ ac_config_files="$ac_config_files Makefile"
+ ac_config_commands="$ac_config_commands default"
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+{
+ (set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n \
+ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
+ ;;
+ esac;
+} |
+ sed '
+ t clear
+ : clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ : end' >>confcache
+if diff $cache_file confcache >/dev/null 2>&1; then :; else
+ if test -w $cache_file; then
+ test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/;
+s/:*\${srcdir}:*/:/;
+s/:*@srcdir@:*/:/;
+s/^\([^=]*=[ ]*\):*/\1/;
+s/:*$//;
+s/^[^=]*=[ ]*$//;
+}'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_i=`echo "$ac_i" |
+ sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
+ # 2. Add them.
+ ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
+ set -o posix
+fi
+
+# Support unset when possible.
+if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# Work around bugs in pre-3.0 UWIN ksh.
+$as_unset ENV MAIL MAILPATH
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -n "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)$' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
+ /^X\/\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+
+
+# PATH needs CR, and LINENO needs CR and PATH.
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
+ # Find who we are. Look in the path if we contain no path at all
+ # relative or not.
+ case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+
+ ;;
+ esac
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
+ # in which case we are not to be found in the path.
+ if test "x$as_myself" = x; then
+ as_myself=$0
+ fi
+ if test ! -f "$as_myself"; then
+ { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
+echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ case $CONFIG_SHELL in
+ '')
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for as_base in sh bash ksh sh5; do
+ case $as_dir in
+ /*)
+ if ("$as_dir/$as_base" -c '
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
+ CONFIG_SHELL=$as_dir/$as_base
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
+ fi;;
+ esac
+ done
+done
+;;
+ esac
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line before each line; the second 'sed' does the real
+ # work. The second script uses 'N' to pair each line-number line
+ # with the numbered line, and appends trailing '-' during
+ # substitution so that $LINENO is not a special case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
+ sed '=' <$as_myself |
+ sed '
+ N
+ s,$,-,
+ : loop
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
+ t loop
+ s,-$,,
+ s,^['$as_cr_digits']*\n,,
+ ' >$as_me.lineno &&
+ chmod +x $as_me.lineno ||
+ { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
+echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensible to this).
+ . ./$as_me.lineno
+ # Exit status is that of the last command.
+ exit
+}
+
+
+case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
+ *c*,-n*) ECHO_N= ECHO_C='
+' ECHO_T=' ' ;;
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ # We could just check for DJGPP; but this test a) works b) is more generic
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
+ if test -f conf$$.exe; then
+ # Don't use ln at all; we don't have any links
+ as_ln_s='cp -p'
+ else
+ as_ln_s='ln -s'
+ fi
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.file
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ as_mkdir_p=false
+fi
+
+as_executable_p="test -f"
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.
+as_nl='
+'
+IFS=" $as_nl"
+
+# CDPATH.
+$as_unset CDPATH
+
+exec 6>&1
+
+# Open the log real soon, to keep \$[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling. Logging --version etc. is OK.
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+} >&5
+cat >&5 <<_CSEOF
+
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.57. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+_CSEOF
+echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
+echo >&5
+_ACEOF
+
+# Files that config.status was made for.
+if test -n "$ac_config_files"; then
+ echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_headers"; then
+ echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_links"; then
+ echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
+fi
+
+if test -n "$ac_config_commands"; then
+ echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number, then exit
+ -q, --quiet do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to <bug-autoconf@gnu.org>."
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.57,
+ with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+srcdir=$srcdir
+INSTALL="$INSTALL"
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=*)
+ ac_option=`expr "x$1" : 'x\([^=]*\)='`
+ ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ -*)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_option=$1
+ ac_need_defaults=false;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --vers* | -V )
+ echo "$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ { { echo "$as_me:$LINENO: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; };;
+ --help | --hel | -h )
+ echo "$ac_cs_usage"; exit 0 ;;
+ --debug | --d* | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ $ac_shift
+ CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+ ac_need_defaults=false;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&5
+echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2;}
+ { (exit 1); exit 1; }; } ;;
+
+ *) ac_config_targets="$ac_config_targets $1" ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+ echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+ exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+
+
+
+
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_config_target in $ac_config_targets
+do
+ case "$ac_config_target" in
+ # Handling of arguments.
+ "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+ "default" ) CONFIG_COMMANDS="$CONFIG_COMMANDS default" ;;
+ "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h:config.in" ;;
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+ test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason to put it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Create a temporary directory, and hook for its removal unless debugging.
+$debug ||
+{
+ trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+} ||
+{
+ tmp=./confstat$$-$RANDOM
+ (umask 077 && mkdir $tmp)
+} ||
+{
+ echo "$me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
+ s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
+s,@SHELL@,$SHELL,;t t
+s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
+s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t
+s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
+s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
+s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t
+s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
+s,@exec_prefix@,$exec_prefix,;t t
+s,@prefix@,$prefix,;t t
+s,@program_transform_name@,$program_transform_name,;t t
+s,@bindir@,$bindir,;t t
+s,@sbindir@,$sbindir,;t t
+s,@libexecdir@,$libexecdir,;t t
+s,@datadir@,$datadir,;t t
+s,@sysconfdir@,$sysconfdir,;t t
+s,@sharedstatedir@,$sharedstatedir,;t t
+s,@localstatedir@,$localstatedir,;t t
+s,@libdir@,$libdir,;t t
+s,@includedir@,$includedir,;t t
+s,@oldincludedir@,$oldincludedir,;t t
+s,@infodir@,$infodir,;t t
+s,@mandir@,$mandir,;t t
+s,@build_alias@,$build_alias,;t t
+s,@host_alias@,$host_alias,;t t
+s,@target_alias@,$target_alias,;t t
+s,@DEFS@,$DEFS,;t t
+s,@ECHO_C@,$ECHO_C,;t t
+s,@ECHO_N@,$ECHO_N,;t t
+s,@ECHO_T@,$ECHO_T,;t t
+s,@LIBS@,$LIBS,;t t
+s,@CC@,$CC,;t t
+s,@CFLAGS@,$CFLAGS,;t t
+s,@LDFLAGS@,$LDFLAGS,;t t
+s,@CPPFLAGS@,$CPPFLAGS,;t t
+s,@ac_ct_CC@,$ac_ct_CC,;t t
+s,@EXEEXT@,$EXEEXT,;t t
+s,@OBJEXT@,$OBJEXT,;t t
+s,@build@,$build,;t t
+s,@build_cpu@,$build_cpu,;t t
+s,@build_vendor@,$build_vendor,;t t
+s,@build_os@,$build_os,;t t
+s,@host@,$host,;t t
+s,@host_cpu@,$host_cpu,;t t
+s,@host_vendor@,$host_vendor,;t t
+s,@host_os@,$host_os,;t t
+s,@target@,$target,;t t
+s,@target_cpu@,$target_cpu,;t t
+s,@target_vendor@,$target_vendor,;t t
+s,@target_os@,$target_os,;t t
+s,@INSTALL_PROGRAM@,$INSTALL_PROGRAM,;t t
+s,@INSTALL_SCRIPT@,$INSTALL_SCRIPT,;t t
+s,@INSTALL_DATA@,$INSTALL_DATA,;t t
+s,@CPP@,$CPP,;t t
+s,@EGREP@,$EGREP,;t t
+s,@GDBSERVER_DEPFILES@,$GDBSERVER_DEPFILES,;t t
+s,@GDBSERVER_LIBS@,$GDBSERVER_LIBS,;t t
+s,@USE_THREAD_DB@,$USE_THREAD_DB,;t t
+s,@LIBOBJS@,$LIBOBJS,;t t
+s,@LTLIBOBJS@,$LTLIBOBJS,;t t
+CEOF
+
+_ACEOF
+
+ cat >>$CONFIG_STATUS <<\_ACEOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ else
+ sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
+ fi
+ if test ! -s $tmp/subs.frag; then
+ ac_more_lines=false
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
+# absolute.
+ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
+ac_abs_top_builddir=`cd "$ac_dir" && cd ${ac_top_builddir}. && pwd`
+ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
+ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
+
+
+ case $INSTALL in
+ [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+ *) ac_INSTALL=$ac_top_builddir$INSTALL ;;
+ esac
+
+ if test x"$ac_file" != x-; then
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+ rm -f "$ac_file"
+ fi
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ configure_input=
+ else
+ configure_input="$ac_file. "
+ fi
+ configure_input=$configure_input"Generated from `echo $ac_file_in |
+ sed 's,.*/,,'` by configure."
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo $f;;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo $f
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo $srcdir/$f
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s,@configure_input@,$configure_input,;t t
+s,@srcdir@,$ac_srcdir,;t t
+s,@abs_srcdir@,$ac_abs_srcdir,;t t
+s,@top_srcdir@,$ac_top_srcdir,;t t
+s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
+s,@builddir@,$ac_builddir,;t t
+s,@abs_builddir@,$ac_abs_builddir,;t t
+s,@top_builddir@,$ac_top_builddir,;t t
+s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
+s,@INSTALL@,$ac_INSTALL,;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
+ rm -f $tmp/stdin
+ if test x"$ac_file" != x-; then
+ mv $tmp/out $ac_file
+ else
+ cat $tmp/out
+ rm -f $tmp/out
+ fi
+
+done
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$,\1#\2'
+ac_dC=' '
+ac_dD=',;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$,\1#\2define\3'
+ac_uC=' '
+ac_uD=',;t'
+
+for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case $ac_file in
+ - | *:- | *:-:* ) # input from stdin
+ cat >$tmp/stdin
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
+ * ) ac_file_in=$ac_file.in ;;
+ esac
+
+ test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+
+ # First look for the input files in the build tree, otherwise in the
+ # src tree.
+ ac_file_inputs=`IFS=:
+ for f in $ac_file_in; do
+ case $f in
+ -) echo $tmp/stdin ;;
+ [\\/$]*)
+ # Absolute (can't be DOS-style, as IFS=:)
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ echo $f;;
+ *) # Relative
+ if test -f "$f"; then
+ # Build tree
+ echo $f
+ elif test -f "$srcdir/$f"; then
+ # Source tree
+ echo $srcdir/$f
+ else
+ # /dev/null tree
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
+echo "$as_me: error: cannot find input file: $f" >&2;}
+ { (exit 1); exit 1; }; }
+ fi;;
+ esac
+ done` || { (exit 1); exit 1; }
+ # Remove the trailing spaces.
+ sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
+
+_ACEOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+# Using a here document instead of a string reduces the quoting nightmare.
+# Putting comments in sed scripts is not portable.
+#
+# `end' is used to avoid that the second main sed command (meant for
+# 0-ary CPP macros) applies to n-ary macro definitions.
+# See the Autoconf documentation for `clear'.
+cat >confdef2sed.sed <<\_ACEOF
+s/[\\&,]/\\&/g
+s,[\\$`],\\&,g
+t clear
+: clear
+s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
+t end
+s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
+: end
+_ACEOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC-DEFINE to be honored.
+uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
+sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f confdef2sed.sed
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\_ACEOF
+s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
+_ACEOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $tmp/defines.sed.
+ echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#define' lines.
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/defines.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # grep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $tmp/undefs.sed.
+ echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
+ # Speed up: don't consider the non `#undef'
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ # Work around the forget-to-reset-the-flag bug.
+ echo 't clr' >>$CONFIG_STATUS
+ echo ': clr' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $tmp/undefs.sed $tmp/in >$tmp/out
+ rm -f $tmp/in
+ mv $tmp/out $tmp/in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ if test x"$ac_file" = x-; then
+ echo "/* Generated by configure. */" >$tmp/config.h
+ else
+ echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
+ fi
+ cat $tmp/in >>$tmp/config.h
+ rm -f $tmp/in
+ if test x"$ac_file" != x-; then
+ if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ { if $as_mkdir_p; then
+ mkdir -p "$ac_dir"
+ else
+ as_dir="$ac_dir"
+ as_dirs=
+ while test ! -d "$as_dir"; do
+ as_dirs="$as_dir $as_dirs"
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ done
+ test ! -n "$as_dirs" || mkdir $as_dirs
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
+echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
+ { (exit 1); exit 1; }; }; }
+
+ rm -f $ac_file
+ mv $tmp/config.h $ac_file
+ fi
+ else
+ cat $tmp/config.h
+ rm -f $tmp/config.h
+ fi
+done
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+#
+# CONFIG_COMMANDS section.
+#
+for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue
+ ac_dest=`echo "$ac_file" | sed 's,:.*,,'`
+ ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'`
+ ac_dir=`(dirname "$ac_dest") 2>/dev/null ||
+$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_dest" : 'X\(//\)[^/]' \| \
+ X"$ac_dest" : 'X\(//\)$' \| \
+ X"$ac_dest" : 'X\(/\)' \| \
+ . : '\(.\)' 2>/dev/null ||
+echo X"$ac_dest" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
+ /^X\(\/\/\)$/{ s//\1/; q; }
+ /^X\(\/\).*/{ s//\1/; q; }
+ s/.*/./; q'`
+ ac_builddir=.
+
+if test "$ac_dir" != .; then
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
+else
+ ac_dir_suffix= ac_top_builddir=
+fi
+
+case $srcdir in
+ .) # No --srcdir option. We are building in place.
+ ac_srcdir=.
+ if test -z "$ac_top_builddir"; then
+ ac_top_srcdir=.
+ else
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
+ fi ;;
+ [\\/]* | ?:[\\/]* ) # Absolute path.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir ;;
+ *) # Relative path.
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
+esac
+# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
+# absolute.
+ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
+ac_abs_top_builddir=`cd "$ac_dir" && cd ${ac_top_builddir}. && pwd`
+ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
+ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
+
+
+ { echo "$as_me:$LINENO: executing $ac_dest commands" >&5
+echo "$as_me: executing $ac_dest commands" >&6;}
+ case $ac_dest in
+ default ) case x$CONFIG_HEADERS in
+xconfig.h:config.in)
+echo > stamp-h ;;
+esac
+ ;;
+ esac
+done
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || { (exit 1); exit 1; }
+fi
+
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in
new file mode 100644
index 0000000000..bf88ae6dcd
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in
@@ -0,0 +1,121 @@
+dnl Autoconf configure script for GDB server.
+dnl Copyright 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of GDB.
+dnl
+dnl This program is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+dnl Process this file with autoconf to produce a configure script.
+
+AC_INIT(server.c)
+AC_CONFIG_HEADER(config.h:config.in)
+
+AC_PROG_CC
+
+AC_CANONICAL_SYSTEM
+
+AC_PROG_INSTALL
+
+AC_HEADER_STDC
+
+AC_CHECK_HEADERS(sgtty.h termio.h termios.h sys/reg.h string.h dnl
+ proc_service.h sys/procfs.h thread_db.h linux/elf.h dnl
+ stdlib.h unistd.h)
+
+BFD_NEED_DECLARATION(strerror)
+
+. ${srcdir}/configure.srv
+
+if test "${srv_linux_usrregs}" = "yes"; then
+ AC_DEFINE(HAVE_LINUX_USRREGS)
+fi
+
+if test "${srv_linux_regsets}" = "yes"; then
+ AC_MSG_CHECKING(for PTRACE_GETREGS)
+ AC_CACHE_VAL(gdbsrv_cv_have_ptrace_getregs,
+ [AC_TRY_COMPILE([#include <sys/ptrace.h>],
+ [PTRACE_GETREGS;],
+ [gdbsrv_cv_have_ptrace_getregs=yes],
+ [gdbsrv_cv_have_ptrace_getregs=no])])
+ AC_MSG_RESULT($gdbsrv_cv_have_ptrace_getregs)
+ if test "${gdbsrv_cv_have_ptrace_getregs}" = "yes"; then
+ AC_DEFINE(HAVE_LINUX_REGSETS)
+ fi
+
+ AC_MSG_CHECKING(for PTRACE_GETFPXREGS)
+ AC_CACHE_VAL(gdbsrv_cv_have_ptrace_getfpxregs,
+ [AC_TRY_COMPILE([#include <sys/ptrace.h>],
+ [PTRACE_GETFPXREGS;],
+ [gdbsrv_cv_have_ptrace_getfpxregs=yes],
+ [gdbsrv_cv_have_ptrace_getfpxregs=no])])
+ AC_MSG_RESULT($gdbsrv_cv_have_ptrace_getfpxregs)
+ if test "${gdbsrv_cv_have_ptrace_getfpxregs}" = "yes"; then
+ AC_DEFINE(HAVE_PTRACE_GETFPXREGS)
+ fi
+fi
+
+if test "$ac_cv_header_sys_procfs_h" = yes; then
+ BFD_HAVE_SYS_PROCFS_TYPE(lwpid_t)
+ BFD_HAVE_SYS_PROCFS_TYPE(psaddr_t)
+ BFD_HAVE_SYS_PROCFS_TYPE(prgregset_t)
+ BFD_HAVE_SYS_PROCFS_TYPE(prfpregset_t)
+
+ dnl Check for broken prfpregset_t type
+
+ dnl For Linux/i386, glibc 2.1.3 was released with a bogus
+ dnl prfpregset_t type (it's a typedef for the pointer to a struct
+ dnl instead of the struct itself). We detect this here, and work
+ dnl around it in gdb_proc_service.h.
+
+ if test $bfd_cv_have_sys_procfs_type_prfpregset_t = yes; then
+ AC_MSG_CHECKING(whether prfpregset_t type is broken)
+ AC_CACHE_VAL(gdb_cv_prfpregset_t_broken,
+ [AC_TRY_RUN([#include <sys/procfs.h>
+ int main ()
+ {
+ if (sizeof (prfpregset_t) == sizeof (void *))
+ return 1;
+ return 0;
+ }],
+ gdb_cv_prfpregset_t_broken=no,
+ gdb_cv_prfpregset_t_broken=yes,
+ gdb_cv_prfpregset_t_broken=yes)])
+ AC_MSG_RESULT($gdb_cv_prfpregset_t_broken)
+ if test $gdb_cv_prfpregset_t_broken = yes; then
+ AC_DEFINE(PRFPREGSET_T_BROKEN)
+ fi
+ fi
+
+ BFD_HAVE_SYS_PROCFS_TYPE(elf_fpregset_t)
+fi
+
+srv_thread_depfiles=
+srv_libs=
+USE_THREAD_DB=
+
+
+GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+
+AC_SUBST(GDBSERVER_DEPFILES)
+AC_SUBST(GDBSERVER_LIBS)
+AC_SUBST(USE_THREAD_DB)
+
+AC_OUTPUT(Makefile,
+[case x$CONFIG_HEADERS in
+xconfig.h:config.in)
+echo > stamp-h ;;
+esac
+])
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.srv b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.srv
new file mode 100644
index 0000000000..a586a51db9
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.srv
@@ -0,0 +1,75 @@
+# Mappings from configuration triplets to gdbserver build options.
+# This is invoked from the autoconf-generated configure script, to
+# produce the appropriate Makefile substitutions.
+
+# This file sets the following shell variables:
+# srv_regobj The register protocol appropriate for this target.
+# srv_tgtobj Any other target-specific modules appropriate
+# for this target.
+#
+# In addition, on GNU/Linux the following shell variables will be set:
+# srv_linux_regsets Set to "yes" if ptrace(PTRACE_GETREGS) and friends
+# may be available on this platform; unset otherwise.
+# They will only be used if <sys/ptrace.h> defines
+# PTRACE_GETREGS.
+# srv_linux_usrregs Set to "yes" if we can get at registers via
+# PTRACE_PEEKUSR / PTRACE_POKEUSR.
+
+# Input is taken from the "${target}" variable.
+
+case "${target}" in
+ arm*-*-linux*) srv_regobj=reg-arm.o
+ srv_tgtobj="linux-xen-low.o linux-arm-low.o"
+ srv_linux_usrregs=yes
+ srv_linux_thread_db=yes
+ ;;
+ i[34567]86-*-linux*) srv_regobj=reg-i386-linux.o
+ srv_tgtobj="linux-xen-low.o linux-i386-low.o i387-fp.o"
+ srv_linux_usrregs=yes
+ srv_linux_regsets=yes
+ srv_linux_thread_db=yes
+ ;;
+ ia64-*-linux*) srv_regobj=reg-ia64.o
+ srv_tgtobj="linux-low.o linux-ia64-low.o"
+ srv_linux_usrregs=yes
+ ;;
+ m68*-*-linux*) srv_regobj=reg-m68k.o
+ srv_tgtobj="linux-low.o linux-m68k-low.o"
+ srv_linux_usrregs=yes
+ ;;
+ mips*-*-linux*) srv_regobj=reg-mips.o
+ srv_tgtobj="linux-low.o linux-mips-low.o"
+ srv_linux_usrregs=yes
+ srv_linux_thread_db=yes
+ ;;
+ powerpc*-*-linux*) srv_regobj=reg-ppc.o
+ srv_tgtobj="linux-low.o linux-ppc-low.o"
+ srv_linux_usrregs=yes
+ srv_linux_thread_db=yes
+ ;;
+ s390-*-linux*) srv_regobj=reg-s390.o
+ srv_tgtobj="linux-low.o linux-s390-low.o"
+ srv_linux_usrregs=yes
+ ;;
+ s390x-*-linux*) srv_regobj=reg-s390x.o
+ srv_tgtobj="linux-low.o linux-s390-low.o"
+ srv_linux_usrregs=yes
+ ;;
+ sh*-*-linux*) srv_regobj=reg-sh.o
+ srv_tgtobj="linux-low.o linux-sh-low.o"
+ srv_linux_usrregs=yes
+ srv_linux_thread_db=yes
+ ;;
+ x86_64-*-linux*) srv_regobj=reg-x86-64.o
+ srv_tgtobj="linux-low.o linux-x86-64-low.o i387-fp.o"
+ srv_linux_regsets=yes
+ ;;
+ xscale*-*-linux*) srv_regobj=reg-arm.o
+ srv_tgtobj="linux-low.o linux-arm-low.o"
+ srv_linux_usrregs=yes
+ srv_linux_thread_db=yes
+ ;;
+ *) echo "Error: target not supported by gdbserver."
+ exit 1
+ ;;
+esac
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
new file mode 100644
index 0000000000..015c4f1938
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
@@ -0,0 +1,556 @@
+/* Low level interface to ptrace, for the remote server for GDB.
+ Copyright 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004
+ Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "server.h"
+#include "linux-low.h"
+
+#include <sys/wait.h>
+#include <stdio.h>
+#include <sys/param.h>
+#include <sys/dir.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <xc.h>
+#define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */
+long (*myptrace)(enum __ptrace_request, pid_t, long, long);
+int (*myxcwait)(int domain, int *status, int options) ;
+
+
+#define DOMFLAGS_DYING (1<<0) /* Domain is scheduled to die. */
+#define DOMFLAGS_SHUTDOWN (1<<2) /* The guest OS has shut down. */
+#define DOMFLAGS_PAUSED (1<<3) /* Currently paused by control software. */
+#define DOMFLAGS_BLOCKED (1<<4) /* Currently blocked pending an event. */
+#define DOMFLAGS_RUNNING (1<<5) /* Domain is currently running. */
+
+
+
+struct inferior_list all_processes;
+
+
+static int current_domain;
+static int expect_signal = 0;
+static int signal_to_send = 0;
+static void linux_resume (struct thread_resume *resume_info);
+
+int debug_threads;
+int using_threads;
+extern int isfile;
+
+struct pending_signals
+{
+ int signal;
+ struct pending_signals *prev;
+};
+
+#define PTRACE_ARG3_TYPE long
+#define PTRACE_XFER_TYPE long
+
+static int use_regsets_p = 1;
+
+
+#define pid_of(proc) ((proc)->head.id)
+
+/* FIXME: Delete eventually. */
+#define inferior_pid (pid_of (get_thread_process (current_inferior)))
+
+/* This function should only be called if the process got a SIGTRAP.
+ The SIGTRAP could mean several things.
+
+ On i386, where decr_pc_after_break is non-zero:
+ If we were single-stepping this process using PTRACE_SINGLESTEP,
+ we will get only the one SIGTRAP (even if the instruction we
+ stepped over was a breakpoint). The value of $eip will be the
+ next instruction.
+ If we continue the process using PTRACE_CONT, we will get a
+ SIGTRAP when we hit a breakpoint. The value of $eip will be
+ the instruction after the breakpoint (i.e. needs to be
+ decremented). If we report the SIGTRAP to GDB, we must also
+ report the undecremented PC. If we cancel the SIGTRAP, we
+ must resume at the decremented PC.
+
+ (Presumably, not yet tested) On a non-decr_pc_after_break machine
+ with hardware or kernel single-step:
+ If we single-step over a breakpoint instruction, our PC will
+ point at the following instruction. If we continue and hit a
+ breakpoint instruction, our PC will point at the breakpoint
+ instruction. */
+#if 0
+static CORE_ADDR
+get_stop_pc (void)
+{
+ CORE_ADDR stop_pc = (*the_low_target.get_pc) ();
+
+ if (get_thread_process (current_inferior)->stepping)
+ return stop_pc;
+ else
+ return stop_pc - the_low_target.decr_pc_after_break;
+}
+#endif
+static void *
+add_process (int pid)
+{
+ struct process_info *process;
+
+ process = (struct process_info *) malloc (sizeof (*process));
+ memset (process, 0, sizeof (*process));
+
+ process->head.id = pid;
+
+ /* Default to tid == lwpid == pid. */
+ process->tid = pid;
+ process->lwpid = pid;
+
+ add_inferior_to_list (&all_processes, &process->head);
+
+ return process;
+}
+
+/* Start an inferior process and returns its pid.
+ ALLARGS is a vector of program-name and args. */
+
+static int
+linux_create_inferior (char *program, char **allargs)
+{
+
+ fprintf (stderr, "Cannot exec %s: %s.\n", program,
+ strerror (errno));
+ fflush (stderr);
+ _exit (0177);
+ /* NOT REACHED */
+ return -1;
+}
+
+int
+linux_attach (int domain)
+{
+ struct process_info *new_process;
+ current_domain = domain;
+ if (myptrace (PTRACE_ATTACH, domain, 0, 0) != 0) {
+ fprintf (stderr, "Cannot attach to domain %d: %s (%d)\n", domain,
+ strerror (errno), errno);
+ fflush (stderr);
+ _exit (0177);
+ }
+
+ new_process = (struct process_info *) add_process (domain);
+ add_thread (domain, new_process);
+
+ /* Don't ignore the initial SIGSTOP if we just attached to this process. */
+ new_process->stop_expected = 0;
+
+ return 0;
+}
+
+/* Kill the inferior process. Make us have no inferior. */
+
+static void
+linux_kill_one_process (struct inferior_list_entry *entry)
+{
+ struct thread_info *thread = (struct thread_info *) entry;
+ struct process_info *process = get_thread_process (thread);
+ myptrace (PTRACE_KILL, pid_of (process), 0, 0);
+
+}
+
+static void
+linux_kill (void)
+{
+ for_each_inferior (&all_threads, linux_kill_one_process);
+}
+
+
+static void
+linux_detach_one_process (struct inferior_list_entry *entry)
+{
+ struct thread_info *thread = (struct thread_info *) entry;
+ struct process_info *process = get_thread_process (thread);
+
+ myptrace (PTRACE_DETACH, pid_of (process), 0, 0);
+}
+
+
+static void
+linux_detach (void)
+{
+ for_each_inferior (&all_threads, linux_detach_one_process);
+}
+
+/* Return nonzero if the given thread is still alive. */
+static int
+linux_thread_alive (int tid)
+{
+ if (find_inferior_id (&all_threads, tid) != NULL)
+ return 1;
+ else
+ return 0;
+}
+
+/* Wait for process, returns status. */
+
+static unsigned char
+linux_wait (char *status)
+{
+ int w;
+ if (myxcwait(current_domain, &w, 0))
+ return -1;
+
+ if (w & (DOMFLAGS_SHUTDOWN|DOMFLAGS_DYING)) {
+ *status = 'W';
+ return 0;
+ }
+
+
+ *status = 'T';
+ if (expect_signal)
+ return expect_signal;
+ else
+ return SIGTRAP;
+
+}
+
+static void
+linux_resume (struct thread_resume *resume_info)
+{
+ int step = resume_info->step;
+ TRACE_ENTER;
+ expect_signal = resume_info->sig;
+ for_each_inferior(&all_threads, regcache_invalidate_one);
+
+ myptrace (step ? PTRACE_SINGLESTEP : PTRACE_CONT, current_domain, 0, 0);
+
+}
+
+
+static int
+regsets_fetch_inferior_registers ()
+{
+ struct regset_info *regset;
+ TRACE_ENTER;
+ regset = target_regsets;
+
+ while (regset->size >= 0)
+ {
+ void *buf;
+ int res;
+
+ if (regset->size == 0)
+ {
+ regset ++;
+ continue;
+ }
+
+ buf = malloc (regset->size);
+ res = myptrace (regset->get_request, inferior_pid, 0, (PTRACE_XFER_TYPE)buf);
+ if (res < 0)
+ {
+ if (errno == EIO)
+ {
+ /* If we get EIO on the first regset, do not try regsets again.
+ If we get EIO on a later regset, disable that regset. */
+ if (regset == target_regsets)
+ {
+ use_regsets_p = 0;
+ return -1;
+ }
+ else
+ {
+ regset->size = 0;
+ continue;
+ }
+ }
+ else
+ {
+ char s[256];
+ sprintf (s, "ptrace(regsets_fetch_inferior_registers) PID=%d",
+ inferior_pid);
+ perror (s);
+ }
+ }
+ regset->store_function (buf);
+ regset ++;
+ }
+ return 0;
+}
+
+static int
+regsets_store_inferior_registers ()
+{
+ struct regset_info *regset;
+ TRACE_ENTER;
+ regset = target_regsets;
+
+ while (regset->size >= 0)
+ {
+ void *buf;
+ int res;
+
+ if (regset->size == 0)
+ {
+ regset ++;
+ continue;
+ }
+
+ buf = malloc (regset->size);
+ regset->fill_function (buf);
+ res = myptrace (regset->set_request, inferior_pid, 0, (PTRACE_XFER_TYPE)buf);
+ if (res < 0)
+ {
+ if (errno == EIO)
+ {
+ /* If we get EIO on the first regset, do not try regsets again.
+ If we get EIO on a later regset, disable that regset. */
+ if (regset == target_regsets)
+ {
+ use_regsets_p = 0;
+ return -1;
+ }
+ else
+ {
+ regset->size = 0;
+ continue;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ perror ("Warning: ptrace(regsets_store_inferior_registers)");
+#endif
+ }
+ }
+ regset ++;
+ free (buf);
+ }
+ return 0;
+}
+
+
+
+
+void
+linux_fetch_registers (int regno)
+{
+ if (use_regsets_p)
+ {
+ if (regsets_fetch_inferior_registers () == 0)
+ return;
+ }
+
+}
+
+void
+linux_store_registers (int regno)
+{
+ if (use_regsets_p)
+ {
+ if (regsets_store_inferior_registers () == 0)
+ return;
+ }
+}
+
+
+/* Copy LEN bytes from inferior's memory starting at MEMADDR
+ to debugger memory starting at MYADDR. */
+
+static int
+linux_read_memory (CORE_ADDR memaddr, char *myaddr, int len)
+{
+ register int i;
+ /* Round starting address down to longword boundary. */
+ register CORE_ADDR addr = memaddr & -(CORE_ADDR) sizeof (PTRACE_XFER_TYPE);
+ /* Round ending address up; get number of longwords that makes. */
+ register int count
+ = (((memaddr + len) - addr) + sizeof (PTRACE_XFER_TYPE) - 1)
+ / sizeof (PTRACE_XFER_TYPE);
+ /* Allocate buffer of that many longwords. */
+ register PTRACE_XFER_TYPE *buffer
+ = (PTRACE_XFER_TYPE *) alloca (count * sizeof (PTRACE_XFER_TYPE));
+
+ TRACE_ENTER;
+ /* Read all the longwords */
+ for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
+ {
+ errno = 0;
+ buffer[i] = myptrace (PTRACE_PEEKTEXT, inferior_pid, (PTRACE_ARG3_TYPE) addr, 0);
+ if (errno)
+ return errno;
+ }
+
+ /* Copy appropriate bytes out of the buffer. */
+ memcpy (myaddr, (char *) buffer + (memaddr & (sizeof (PTRACE_XFER_TYPE) - 1)), len);
+
+ return 0;
+}
+
+/* Copy LEN bytes of data from debugger memory at MYADDR
+ to inferior's memory at MEMADDR.
+ On failure (cannot write the inferior)
+ returns the value of errno. */
+
+static int
+linux_write_memory (CORE_ADDR memaddr, const char *myaddr, int len)
+{
+ register int i;
+ /* Round starting address down to longword boundary. */
+ register CORE_ADDR addr = memaddr & -(CORE_ADDR) sizeof (PTRACE_XFER_TYPE);
+ /* Round ending address up; get number of longwords that makes. */
+ register int count
+ = (((memaddr + len) - addr) + sizeof (PTRACE_XFER_TYPE) - 1) / sizeof (PTRACE_XFER_TYPE);
+ /* Allocate buffer of that many longwords. */
+ register PTRACE_XFER_TYPE *buffer = (PTRACE_XFER_TYPE *) alloca (count * sizeof (PTRACE_XFER_TYPE));
+ extern int errno;
+
+ TRACE_ENTER;
+
+ /* Fill start and end extra bytes of buffer with existing memory data. */
+
+ buffer[0] = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+ (PTRACE_ARG3_TYPE) addr, 0);
+
+ if (count > 1)
+ {
+ buffer[count - 1]
+ = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+ (PTRACE_ARG3_TYPE) (addr + (count - 1)
+ * sizeof (PTRACE_XFER_TYPE)),
+ 0);
+ }
+
+ /* Copy data to be written over corresponding part of buffer */
+
+ memcpy ((char *) buffer + (memaddr & (sizeof (PTRACE_XFER_TYPE) - 1)), myaddr, len);
+
+ /* Write the entire buffer. */
+ for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
+ {
+ errno = 0;
+ myptrace (PTRACE_POKETEXT, inferior_pid, (PTRACE_ARG3_TYPE) addr, buffer[i]);
+ if (errno)
+ return errno;
+ }
+
+ return 0;
+}
+
+static void
+linux_look_up_symbols (void)
+{
+#if 0
+ using_threads = thread_db_init ();
+#endif
+}
+
+static void
+linux_send_signal (int signum)
+{
+ extern int signal_pid;
+
+ TRACE_ENTER;
+ signal_to_send = signum;
+ psignal(signum, "need to send ");
+ if (cont_thread > 0)
+ {
+ struct process_info *process;
+
+ process = get_thread_process (current_inferior);
+ kill (process->lwpid, signum);
+ }
+ else
+ kill (signal_pid, signum);
+}
+
+/* Copy LEN bytes from inferior's auxiliary vector starting at OFFSET
+ to debugger memory starting at MYADDR. */
+
+static int
+linux_read_auxv (CORE_ADDR offset, char *myaddr, unsigned int len)
+{
+ char filename[PATH_MAX];
+ int fd, n;
+
+ TRACE_ENTER;
+ snprintf (filename, sizeof filename, "/proc/%d/auxv", inferior_pid);
+
+ fd = open (filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ if (offset != (CORE_ADDR) 0
+ && lseek (fd, (off_t) offset, SEEK_SET) != (off_t) offset)
+ n = -1;
+ else
+ n = read (fd, myaddr, len);
+
+ close (fd);
+
+ return n;
+}
+
+
+static struct target_ops linux_xen_target_ops = {
+ linux_create_inferior,
+ linux_attach,
+ linux_kill,
+ linux_detach,
+ linux_thread_alive,
+ linux_resume,
+ linux_wait,
+ linux_fetch_registers,
+ linux_store_registers,
+ linux_read_memory,
+ linux_write_memory,
+ linux_look_up_symbols,
+ linux_send_signal,
+ linux_read_auxv,
+};
+
+static void
+linux_init_signals ()
+{
+ /* FIXME drow/2002-06-09: As above, we should check with LinuxThreads
+ to find what the cancel signal actually is. */
+ signal (__SIGRTMIN+1, SIG_IGN);
+}
+
+void
+initialize_low (void)
+{
+
+ set_target_ops (&linux_xen_target_ops);
+ set_breakpoint_data (the_low_target.breakpoint,
+ the_low_target.breakpoint_len);
+ init_registers ();
+ linux_init_signals ();
+ if (isfile) {
+ myptrace = xc_ptrace_core;
+ myxcwait = xc_waitdomain_core;
+ } else {
+ myptrace = xc_ptrace;
+ myxcwait = xc_waitdomain;
+ }
+
+}
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c
new file mode 100644
index 0000000000..54f508a186
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/server.c
@@ -0,0 +1,639 @@
+/* Main code for remote server for GDB.
+ Copyright 1989, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2002, 2003, 2004
+ Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "server.h"
+
+#include <unistd.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int cont_thread;
+int general_thread;
+int step_thread;
+int thread_from_wait;
+int old_thread_from_wait;
+int extended_protocol;
+int server_waiting;
+int isfile = 0;
+
+jmp_buf toplevel;
+
+/* The PID of the originally created or attached inferior. Used to
+ send signals to the process when GDB sends us an asynchronous interrupt
+ (user hitting Control-C in the client), and to wait for the child to exit
+ when no longer debugging it. */
+
+int signal_pid;
+
+static unsigned char
+start_inferior (char *argv[], char *statusptr)
+{
+ signal (SIGTTOU, SIG_DFL);
+ signal (SIGTTIN, SIG_DFL);
+
+ signal_pid = create_inferior (argv[0], argv);
+
+ fprintf (stderr, "Process %s created; pid = %d\n", argv[0],
+ signal_pid);
+
+ signal (SIGTTOU, SIG_IGN);
+ signal (SIGTTIN, SIG_IGN);
+ tcsetpgrp (fileno (stderr), signal_pid);
+
+ /* Wait till we are at 1st instruction in program, return signal number. */
+ return mywait (statusptr, 0);
+}
+
+static int
+attach_inferior (int pid, char *statusptr, unsigned char *sigptr)
+{
+ /* myattach should return -1 if attaching is unsupported,
+ 0 if it succeeded, and call error() otherwise. */
+
+ if (myattach (pid) != 0)
+ return -1;
+
+ fprintf (stderr, "Attached; pid = %d\n", pid);
+
+ /* FIXME - It may be that we should get the SIGNAL_PID from the
+ attach function, so that it can be the main thread instead of
+ whichever we were told to attach to. */
+ signal_pid = pid;
+
+ *sigptr = mywait (statusptr, 0);
+
+ return 0;
+}
+
+extern int remote_debug;
+
+/* Handle all of the extended 'q' packets. */
+void
+handle_query (char *own_buf)
+{
+ static struct inferior_list_entry *thread_ptr;
+
+ if (strcmp ("qSymbol::", own_buf) == 0)
+ {
+ if (the_target->look_up_symbols != NULL)
+ (*the_target->look_up_symbols) ();
+
+ strcpy (own_buf, "OK");
+ return;
+ }
+
+ if (strcmp ("qfThreadInfo", own_buf) == 0)
+ {
+ thread_ptr = all_threads.head;
+ sprintf (own_buf, "m%x", thread_ptr->id);
+ thread_ptr = thread_ptr->next;
+ return;
+ }
+
+ if (strcmp ("qsThreadInfo", own_buf) == 0)
+ {
+ if (thread_ptr != NULL)
+ {
+ sprintf (own_buf, "m%x", thread_ptr->id);
+ thread_ptr = thread_ptr->next;
+ return;
+ }
+ else
+ {
+ sprintf (own_buf, "l");
+ return;
+ }
+ }
+
+ if (the_target->read_auxv != NULL
+ && strncmp ("qPart:auxv:read::", own_buf, 17) == 0)
+ {
+ char data[(PBUFSIZ - 1) / 2];
+ CORE_ADDR ofs;
+ unsigned int len;
+ int n;
+ decode_m_packet (&own_buf[17], &ofs, &len); /* "OFS,LEN" */
+ if (len > sizeof data)
+ len = sizeof data;
+ n = (*the_target->read_auxv) (ofs, data, len);
+ if (n == 0)
+ write_ok (own_buf);
+ else if (n < 0)
+ write_enn (own_buf);
+ else
+ convert_int_to_ascii (data, own_buf, n);
+ return;
+ }
+
+ /* Otherwise we didn't know what packet it was. Say we didn't
+ understand it. */
+ own_buf[0] = 0;
+}
+
+/* Parse vCont packets. */
+void
+handle_v_cont (char *own_buf, char *status, unsigned char *signal)
+{
+ char *p, *q;
+ int n = 0, i = 0;
+ struct thread_resume *resume_info, default_action;
+
+ /* Count the number of semicolons in the packet. There should be one
+ for every action. */
+ p = &own_buf[5];
+ while (p)
+ {
+ n++;
+ p++;
+ p = strchr (p, ';');
+ }
+ /* Allocate room for one extra action, for the default remain-stopped
+ behavior; if no default action is in the list, we'll need the extra
+ slot. */
+ resume_info = malloc ((n + 1) * sizeof (resume_info[0]));
+
+ default_action.thread = -1;
+ default_action.leave_stopped = 1;
+ default_action.step = 0;
+ default_action.sig = 0;
+
+ p = &own_buf[5];
+ i = 0;
+ while (*p)
+ {
+ p++;
+
+ resume_info[i].leave_stopped = 0;
+
+ if (p[0] == 's' || p[0] == 'S')
+ resume_info[i].step = 1;
+ else if (p[0] == 'c' || p[0] == 'C')
+ resume_info[i].step = 0;
+ else
+ goto err;
+
+ if (p[0] == 'S' || p[0] == 'C')
+ {
+ int sig;
+ sig = strtol (p + 1, &q, 16);
+ if (p == q)
+ goto err;
+ p = q;
+
+ if (!target_signal_to_host_p (sig))
+ goto err;
+ resume_info[i].sig = target_signal_to_host (sig);
+ }
+ else
+ {
+ resume_info[i].sig = 0;
+ p = p + 1;
+ }
+
+ if (p[0] == 0)
+ {
+ resume_info[i].thread = -1;
+ default_action = resume_info[i];
+
+ /* Note: we don't increment i here, we'll overwrite this entry
+ the next time through. */
+ }
+ else if (p[0] == ':')
+ {
+ resume_info[i].thread = strtol (p + 1, &q, 16);
+ if (p == q)
+ goto err;
+ p = q;
+ if (p[0] != ';' && p[0] != 0)
+ goto err;
+
+ i++;
+ }
+ }
+
+ resume_info[i] = default_action;
+
+ /* Still used in occasional places in the backend. */
+ if (n == 1 && resume_info[0].thread != -1)
+ cont_thread = resume_info[0].thread;
+ else
+ cont_thread = -1;
+ set_desired_inferior (0);
+
+ (*the_target->resume) (resume_info);
+
+ free (resume_info);
+
+ *signal = mywait (status, 1);
+ prepare_resume_reply (own_buf, *status, *signal);
+ return;
+
+err:
+ /* No other way to report an error... */
+ strcpy (own_buf, "");
+ free (resume_info);
+ return;
+}
+
+/* Handle all of the extended 'v' packets. */
+void
+handle_v_requests (char *own_buf, char *status, unsigned char *signal)
+{
+ if (strncmp (own_buf, "vCont;", 6) == 0)
+ {
+ handle_v_cont (own_buf, status, signal);
+ return;
+ }
+
+ if (strncmp (own_buf, "vCont?", 6) == 0)
+ {
+ strcpy (own_buf, "vCont;c;C;s;S");
+ return;
+ }
+
+ /* Otherwise we didn't know what packet it was. Say we didn't
+ understand it. */
+ own_buf[0] = 0;
+ return;
+}
+
+void
+myresume (int step, int sig)
+{
+ struct thread_resume resume_info[2];
+ int n = 0;
+
+ if (step || sig || cont_thread > 0)
+ {
+ resume_info[0].thread
+ = ((struct inferior_list_entry *) current_inferior)->id;
+ resume_info[0].step = step;
+ resume_info[0].sig = sig;
+ resume_info[0].leave_stopped = 0;
+ n++;
+ }
+ resume_info[n].thread = -1;
+ resume_info[n].step = 0;
+ resume_info[n].sig = 0;
+ resume_info[n].leave_stopped = (cont_thread > 0);
+
+ (*the_target->resume) (resume_info);
+}
+
+static int attached;
+
+static void
+gdbserver_usage (void)
+{
+ error ("Usage:\tgdbserver COMM PROG [ARGS ...]\n"
+ "\tgdbserver COMM --attach PID\n"
+ "\tgdbserver COMM --file COREFILE\n"
+ "\n"
+ "COMM may either be a tty device (for serial debugging), or \n"
+ "HOST:PORT to listen for a TCP connection.\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+ char ch, status, *own_buf, mem_buf[2000];
+ int i = 0;
+ unsigned char signal;
+ unsigned int len;
+ CORE_ADDR mem_addr;
+ int bad_attach;
+ int pid;
+ char *arg_end;
+
+ if (setjmp (toplevel))
+ {
+ fprintf (stderr, "Exiting\n");
+ exit (1);
+ }
+
+ bad_attach = 0;
+ pid = 0;
+ attached = 0;
+ if (argc >= 3 && strcmp (argv[2], "--attach") == 0)
+ {
+ if (argc == 4
+ && argv[3] != '\0'
+ && (pid = strtoul (argv[3], &arg_end, 10)) != 0
+ && *arg_end == '\0')
+ {
+ ;
+ }
+ else
+ bad_attach = 1;
+ }
+ else if (argc >= 3 && strcmp (argv[2], "--file") == 0)
+ {
+ if (argc == 4
+ && argv[3] != '\0')
+ {
+ if ((pid = open(argv[3], O_RDONLY)) <= 0)
+ bad_attach = 1;
+ else
+ isfile = 1;
+ }
+ else
+ bad_attach = 1;
+ }
+
+ if (argc < 3 || bad_attach)
+ gdbserver_usage();
+
+ initialize_low ();
+
+ own_buf = malloc (PBUFSIZ);
+
+ if (pid == 0)
+ {
+ /* Wait till we are at first instruction in program. */
+ signal = start_inferior (&argv[2], &status);
+
+ /* We are now stopped at the first instruction of the target process */
+ }
+ else
+ {
+ switch (attach_inferior (pid, &status, &signal))
+ {
+ case -1:
+ error ("Attaching not supported on this target");
+ break;
+ default:
+ attached = 1;
+ break;
+ }
+ }
+
+ while (1)
+ {
+ remote_open (argv[1]);
+
+ restart:
+ setjmp (toplevel);
+ while (getpkt (own_buf) > 0)
+ {
+ unsigned char sig;
+ i = 0;
+ ch = own_buf[i++];
+ switch (ch)
+ {
+ case 'q':
+ handle_query (own_buf);
+ break;
+ case 'd':
+ remote_debug = !remote_debug;
+ break;
+ case 'D':
+ fprintf (stderr, "Detaching from inferior\n");
+ detach_inferior ();
+ write_ok (own_buf);
+ putpkt (own_buf);
+ remote_close ();
+
+ /* If we are attached, then we can exit. Otherwise, we need to
+ hang around doing nothing, until the child is gone. */
+ if (!attached)
+ {
+ int status, ret;
+
+ do {
+ ret = waitpid (signal_pid, &status, 0);
+ if (WIFEXITED (status) || WIFSIGNALED (status))
+ break;
+ } while (ret != -1 || errno != ECHILD);
+ }
+
+ exit (0);
+
+ case '!':
+ if (attached == 0)
+ {
+ extended_protocol = 1;
+ prepare_resume_reply (own_buf, status, signal);
+ }
+ else
+ {
+ /* We can not use the extended protocol if we are
+ attached, because we can not restart the running
+ program. So return unrecognized. */
+ own_buf[0] = '\0';
+ }
+ break;
+ case '?':
+ prepare_resume_reply (own_buf, status, signal);
+ break;
+ case 'H':
+ switch (own_buf[1])
+ {
+ case 'g':
+ general_thread = strtol (&own_buf[2], NULL, 16);
+ write_ok (own_buf);
+ set_desired_inferior (1);
+ break;
+ case 'c':
+ cont_thread = strtol (&own_buf[2], NULL, 16);
+ write_ok (own_buf);
+ break;
+ case 's':
+ step_thread = strtol (&own_buf[2], NULL, 16);
+ write_ok (own_buf);
+ break;
+ default:
+ /* Silently ignore it so that gdb can extend the protocol
+ without compatibility headaches. */
+ own_buf[0] = '\0';
+ break;
+ }
+ break;
+ case 'g':
+ set_desired_inferior (1);
+ registers_to_string (own_buf);
+ break;
+ case 'G':
+ set_desired_inferior (1);
+ registers_from_string (&own_buf[1]);
+ write_ok (own_buf);
+ break;
+ case 'm':
+ decode_m_packet (&own_buf[1], &mem_addr, &len);
+ if (read_inferior_memory (mem_addr, mem_buf, len) == 0)
+ convert_int_to_ascii (mem_buf, own_buf, len);
+ else
+ write_enn (own_buf);
+ break;
+ case 'M':
+ decode_M_packet (&own_buf[1], &mem_addr, &len, mem_buf);
+ if (write_inferior_memory (mem_addr, mem_buf, len) == 0)
+ write_ok (own_buf);
+ else
+ write_enn (own_buf);
+ break;
+ case 'C':
+ convert_ascii_to_int (own_buf + 1, &sig, 1);
+ if (target_signal_to_host_p (sig))
+ signal = target_signal_to_host (sig);
+ else
+ signal = 0;
+ set_desired_inferior (0);
+ myresume (0, signal);
+ signal = mywait (&status, 1);
+ prepare_resume_reply (own_buf, status, signal);
+ break;
+ case 'S':
+ convert_ascii_to_int (own_buf + 1, &sig, 1);
+ if (target_signal_to_host_p (sig))
+ signal = target_signal_to_host (sig);
+ else
+ signal = 0;
+ set_desired_inferior (0);
+ myresume (1, signal);
+ signal = mywait (&status, 1);
+ prepare_resume_reply (own_buf, status, signal);
+ break;
+ case 'c':
+ set_desired_inferior (0);
+ myresume (0, 0);
+ signal = mywait (&status, 1);
+ prepare_resume_reply (own_buf, status, signal);
+ break;
+ case 's':
+ set_desired_inferior (0);
+ myresume (1, 0);
+ signal = mywait (&status, 1);
+ prepare_resume_reply (own_buf, status, signal);
+ break;
+ case 'k':
+ fprintf (stderr, "Killing inferior\n");
+ kill_inferior ();
+ /* When using the extended protocol, we start up a new
+ debugging session. The traditional protocol will
+ exit instead. */
+ if (extended_protocol)
+ {
+ write_ok (own_buf);
+ fprintf (stderr, "GDBserver restarting\n");
+
+ /* Wait till we are at 1st instruction in prog. */
+ signal = start_inferior (&argv[2], &status);
+ goto restart;
+ break;
+ }
+ else
+ {
+ exit (0);
+ break;
+ }
+ case 'T':
+ if (mythread_alive (strtol (&own_buf[1], NULL, 16)))
+ write_ok (own_buf);
+ else
+ write_enn (own_buf);
+ break;
+ case 'R':
+ /* Restarting the inferior is only supported in the
+ extended protocol. */
+ if (extended_protocol)
+ {
+ kill_inferior ();
+ write_ok (own_buf);
+ fprintf (stderr, "GDBserver restarting\n");
+
+ /* Wait till we are at 1st instruction in prog. */
+ signal = start_inferior (&argv[2], &status);
+ goto restart;
+ break;
+ }
+ else
+ {
+ /* It is a request we don't understand. Respond with an
+ empty packet so that gdb knows that we don't support this
+ request. */
+ own_buf[0] = '\0';
+ break;
+ }
+ case 'v':
+ /* Extended (long) request. */
+ handle_v_requests (own_buf, &status, &signal);
+ break;
+ default:
+ /* It is a request we don't understand. Respond with an
+ empty packet so that gdb knows that we don't support this
+ request. */
+ own_buf[0] = '\0';
+ break;
+ }
+
+ putpkt (own_buf);
+
+ if (status == 'W')
+ fprintf (stderr,
+ "\nChild exited with status %d\n", signal);
+ if (status == 'X')
+ fprintf (stderr, "\nChild terminated with signal = 0x%x\n",
+ signal);
+ if (status == 'W' || status == 'X')
+ {
+ if (extended_protocol)
+ {
+ fprintf (stderr, "Killing inferior\n");
+ kill_inferior ();
+ write_ok (own_buf);
+ fprintf (stderr, "GDBserver restarting\n");
+
+ /* Wait till we are at 1st instruction in prog. */
+ signal = start_inferior (&argv[2], &status);
+ goto restart;
+ break;
+ }
+ else
+ {
+ fprintf (stderr, "GDBserver exiting\n");
+ exit (0);
+ }
+ }
+ }
+
+ /* We come here when getpkt fails.
+
+ For the extended remote protocol we exit (and this is the only
+ way we gracefully exit!).
+
+ For the traditional remote protocol close the connection,
+ and re-open it at the top of the loop. */
+ if (extended_protocol)
+ {
+ remote_close ();
+ exit (0);
+ }
+ else
+ {
+ fprintf (stderr, "Remote side has terminated connection. "
+ "GDBserver will reopen the connection.\n");
+ remote_close ();
+ }
+ }
+}
diff --git a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/mkbuildtree b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/mkbuildtree
new file mode 100755
index 0000000000..6be1df1753
--- /dev/null
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/mkbuildtree
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+# mkbuildtree <build tree>
+#
+# Creates symbolic links in <build tree> for the sparse tree
+# in the current directory.
+
+# Script to determine the relative path between two directories.
+# Copyright (c) D. J. Hawkey Jr. 2002
+# Fixed for Xen project by K. Fraser in 2003.
+abs_to_rel ()
+{
+ local CWD SRCPATH
+
+ if [ "$1" != "/" -a "${1##*[^/]}" = "/" ]; then
+ SRCPATH=${1%?}
+ else
+ SRCPATH=$1
+ fi
+ if [ "$2" != "/" -a "${2##*[^/]}" = "/" ]; then
+ DESTPATH=${2%?}
+ else
+ DESTPATH=$2
+ fi
+
+ CWD=$PWD
+ [ "${1%%[^/]*}" != "/" ] && cd $1 && SRCPATH=$PWD
+ [ "${2%%[^/]*}" != "/" ] && cd $2 && DESTPATH=$PWD
+ [ "$CWD" != "$PWD" ] && cd $CWD
+
+ BASEPATH=$SRCPATH
+
+ [ "$SRCPATH" = "$DESTPATH" ] && DESTPATH="." && return
+ [ "$SRCPATH" = "/" ] && DESTPATH=${DESTPATH#?} && return
+
+ while [ "$BASEPATH/" != "${DESTPATH%${DESTPATH#$BASEPATH/}}" ]; do
+ BASEPATH=${BASEPATH%/*}
+ done
+
+ SRCPATH=${SRCPATH#$BASEPATH}
+ DESTPATH=${DESTPATH#$BASEPATH}
+ DESTPATH=${DESTPATH#?}
+ while [ -n "$SRCPATH" ]; do
+ SRCPATH=${SRCPATH%/*}
+ DESTPATH="../$DESTPATH"
+ done
+
+ [ -z "$BASEPATH" ] && BASEPATH="/"
+ [ "${DESTPATH##*[^/]}" = "/" ] && DESTPATH=${DESTPATH%?}
+}
+
+# relative_lndir <target_dir>
+# Creates a tree of symlinks in the current working directory that mirror
+# real files in <target_dir>. <target_dir> should be relative to the current
+# working directory. Symlinks in <target_dir> are ignored. Source-control files
+# are ignored.
+relative_lndir ()
+{
+ local SYMLINK_DIR REAL_DIR pref i j
+ SYMLINK_DIR=$PWD
+ REAL_DIR=$1
+ (
+ cd $REAL_DIR
+ for i in `find . -type d | grep -v SCCS`; do
+ [ -d $SYMLINK_DIR/$i ] || mkdir -p $SYMLINK_DIR/$i
+ (
+ cd $i
+ pref=`echo $i | sed -e 's#/[^/]*#../#g' -e 's#^\.##'`
+ for j in `find . -type f -o -type l -maxdepth 1`; do
+ ln -sf ${pref}${REAL_DIR}/$i/$j ${SYMLINK_DIR}/$i/$j
+ done
+ )
+ done
+ )
+}
+
+[ "$1" == "" ] && { echo "Syntax: $0 <linux tree to xenify>"; exit 1; }
+
+# Get absolute path to the destination directory
+pushd . >/dev/null
+cd ${1}
+AD=$PWD
+popd >/dev/null
+
+# Get absolute path to the source directory
+AS=`pwd`
+
+# Get name of sparse directory
+SDN=$(basename $AS)
+
+# Get path to source, relative to destination
+abs_to_rel ${AD} ${AS}
+RS=$DESTPATH
+
+# We now work from the destination directory
+cd ${AD}
+
+# Remove old symlinks
+find sys -type l | while read f
+do
+ case $(readlink $f) in
+ */$SDN/*)
+ rm -f $f
+ ;;
+ esac
+done
+
+if [ -f ${AD}/BUILDING ]; then
+ # Create symlinks of files and directories which exist in the sparse source
+ (cd sys && relative_lndir ../${RS}/sys)
+else
+ # Create symlinks of files and directories which exist in the sparse source
+ relative_lndir ${RS}
+ rm -f mkbuildtree
+fi
diff --git a/tools/debugger/gdb/gdbbuild b/tools/debugger/gdb/gdbbuild
new file mode 100755
index 0000000000..1c1c9a0242
--- /dev/null
+++ b/tools/debugger/gdb/gdbbuild
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+XENROOT=`bk root`
+export XENROOT
+
+cd $XENROOT/tools/debugger/gdb
+rm -rf gdb-6.2.1 gdb-6.2.1-linux-i386-xen
+# FIXME:cw this should be smarter
+wget -c ftp://ftp.gnu.org/gnu/gdb/gdb-6.2.1.tar.bz2
+tar xjf gdb-6.2.1.tar.bz2
+
+cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-xen-sparse
+./mkbuildtree ../gdb-6.2.1
+
+mkdir $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
+cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
+../gdb-6.2.1/configure
+# some people don't have gmake
+if which gmake ; then
+ gmake -j4
+else
+ make -j4
+fi
diff --git a/tools/debugger/libxendebug/Makefile b/tools/debugger/libxendebug/Makefile
new file mode 100644
index 0000000000..ed478658a4
--- /dev/null
+++ b/tools/debugger/libxendebug/Makefile
@@ -0,0 +1,72 @@
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DATA = $(INSTALL) -m0644
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+MAJOR = 3.0
+MINOR = 0
+
+CC = gcc
+
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+SRCS := xendebug.c
+
+CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
+CFLAGS += $(INCLUDES) -I. -I$(XEN_ROOT)/tools/libxc
+# Get gcc to generate the dependencies for us.
+CFLAGS += -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+LDFLAGS += -L$(XEN_ROOT)/tools/libxc -lxc
+
+LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
+PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
+
+LIB := libxendebug.a libxendebug.so
+LIB += libxendebug.so.$(MAJOR) libxendebug.so.$(MAJOR).$(MINOR)
+
+all: build
+build:
+ $(MAKE) $(LIB)
+
+install: build
+ [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
+ [ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include
+ $(INSTALL_PROG) libxendebug.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxendebug.a $(DESTDIR)/usr/$(LIBDIR)
+ ln -sf libxendebug.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxendebug.so.$(MAJOR)
+ ln -sf libxendebug.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxendebug.so
+ $(INSTALL_DATA) xendebug.h $(DESTDIR)/usr/include
+
+.PHONY: TAGS clean rpm install all
+
+TAGS:
+ etags -t $(SRCS) *.h
+
+clean:
+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen
+
+rpm: build
+ rm -rf staging
+ mkdir staging
+ mkdir staging/i386
+ rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
+ --define "_rpmdir$$PWD/staging" -bb rpm.spec
+ mv staging/i386/*.rpm .
+ rm -rf staging
+
+libxendebug.a: $(LIB_OBJS)
+ $(AR) rc $@ $^
+
+libxendebug.so: libxendebug.so.$(MAJOR)
+ ln -sf $< $@
+libxendebug.so.$(MAJOR): libxendebug.so.$(MAJOR).$(MINOR)
+ ln -sf $< $@
+
+libxendebug.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxendebug.so.$(MAJOR) -shared -o $@ $^
+
+-include $(DEPS)
diff --git a/tools/debugger/libxendebug/list.h b/tools/debugger/libxendebug/list.h
new file mode 100644
index 0000000000..d2ee720f34
--- /dev/null
+++ b/tools/debugger/libxendebug/list.h
@@ -0,0 +1,186 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+ struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+ struct list_head *first = list->next;
+
+ if (first != list) {
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ prefetch(pos->member.next); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member), \
+ prefetch(pos->member.next))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#endif /* _LINUX_LIST_H */
+
diff --git a/tools/debugger/libxendebug/xendebug.c b/tools/debugger/libxendebug/xendebug.c
new file mode 100644
index 0000000000..844cdf0e03
--- /dev/null
+++ b/tools/debugger/libxendebug/xendebug.c
@@ -0,0 +1,599 @@
+/*
+ * xendebug.c
+ *
+ * alex ho
+ * http://www.cl.cam.ac.uk/netos/pdb
+ *
+ * xendebug_memory_page adapted from xc_ptrace.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <xc.h>
+#include "list.h"
+
+#if defined(__i386__)
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+#endif
+
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define PAGE_SIZE (1UL<<PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+/* from xen/include/asm-x86/processor.h */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+
+typedef int boolean;
+#define true 1
+#define false 0
+
+
+typedef struct bwcpoint /* break/watch/catch point */
+{
+ struct list_head list;
+ memory_t address;
+ u32 domain;
+ u16 vcpu;
+ u8 old_value; /* old value for software bkpt */
+} bwcpoint_t, *bwcpoint_p;
+
+static bwcpoint_t bwcpoint_list;
+
+
+
+typedef struct domain_context /* local cache of domain state */
+{
+ struct list_head list;
+ u32 domid;
+ boolean valid[MAX_VIRT_CPUS];
+ vcpu_guest_context_t context[MAX_VIRT_CPUS];
+
+ long total_pages;
+ unsigned long *page_array;
+
+ unsigned long cr3_phys[MAX_VIRT_CPUS];
+ unsigned long *cr3_virt[MAX_VIRT_CPUS];
+ unsigned long pde_phys[MAX_VIRT_CPUS];
+ unsigned long *pde_virt[MAX_VIRT_CPUS];
+ unsigned long page_phys[MAX_VIRT_CPUS];
+ unsigned long *page_virt[MAX_VIRT_CPUS];
+ int page_perm[MAX_VIRT_CPUS];
+} domain_context_t, *domain_context_p;
+
+static domain_context_t domain_context_list;
+
+/* initialization */
+
+static boolean xendebug_initialized = false;
+
+static __inline__ void
+xendebug_initialize()
+{
+ if ( !xendebug_initialized )
+ {
+ memset((void *) &domain_context_list, 0, sizeof(domain_context_t));
+ INIT_LIST_HEAD(&domain_context_list.list);
+
+ memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t));
+ INIT_LIST_HEAD(&bwcpoint_list.list);
+
+ xendebug_initialized = true;
+ }
+}
+
+/**************/
+
+static domain_context_p
+xendebug_domain_context_search (u32 domid)
+{
+ struct list_head *entry;
+ domain_context_p ctxt;
+
+ list_for_each(entry, &domain_context_list.list)
+ {
+ ctxt = list_entry(entry, domain_context_t, list);
+ if ( domid == ctxt->domid )
+ return ctxt;
+ }
+ return (domain_context_p)NULL;
+}
+
+static __inline__ domain_context_p
+xendebug_get_context (int xc_handle, u32 domid, u32 vcpu)
+{
+ int rc;
+ domain_context_p ctxt;
+
+ xendebug_initialize();
+
+ if ( (ctxt = xendebug_domain_context_search(domid)) == NULL)
+ return NULL;
+
+ if ( !ctxt->valid[vcpu] )
+ {
+ if ( (rc = xc_domain_get_vcpu_context(xc_handle, domid, vcpu,
+ &ctxt->context[vcpu])) )
+ return NULL;
+
+ ctxt->valid[vcpu] = true;
+ }
+
+ return ctxt;
+}
+
+static __inline__ int
+xendebug_set_context (int xc_handle, domain_context_p ctxt, u32 vcpu)
+{
+ dom0_op_t op;
+ int rc;
+
+ if ( !ctxt->valid[vcpu] )
+ return -EINVAL;
+
+ op.interface_version = DOM0_INTERFACE_VERSION;
+ op.cmd = DOM0_SETDOMAININFO;
+ op.u.setdomaininfo.domain = ctxt->domid;
+ op.u.setdomaininfo.vcpu = vcpu;
+ op.u.setdomaininfo.ctxt = &ctxt->context[vcpu];
+
+ if ( (rc = mlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t))) )
+ return rc;
+
+ rc = xc_dom0_op(xc_handle, &op);
+ (void) munlock(&ctxt->context[vcpu], sizeof(vcpu_guest_context_t));
+
+ return rc;
+}
+
+/**************/
+
+int
+xendebug_attach(int xc_handle,
+ u32 domid,
+ u32 vcpu)
+{
+ domain_context_p ctxt;
+
+ xendebug_initialize();
+
+ if ( (ctxt = malloc(sizeof(domain_context_t))) == NULL )
+ return -1;
+ memset(ctxt, 0, sizeof(domain_context_t));
+
+ ctxt->domid = domid;
+ list_add(&ctxt->list, &domain_context_list.list);
+
+ return xc_domain_pause(xc_handle, domid);
+}
+
+int
+xendebug_detach(int xc_handle,
+ u32 domid,
+ u32 vcpu)
+{
+ domain_context_p ctxt;
+
+ xendebug_initialize();
+
+ if ( (ctxt = xendebug_domain_context_search (domid)) == NULL)
+ return -EINVAL;
+
+ list_del(&ctxt->list);
+
+ if ( ctxt->page_array ) free(ctxt->page_array);
+
+ free(ctxt);
+
+ return xc_domain_unpause(xc_handle, domid);
+}
+
+int
+xendebug_read_registers(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ cpu_user_regs_t **regs)
+{
+ domain_context_p ctxt;
+ int rc = -1;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ if (ctxt)
+ {
+ *regs = &ctxt->context[vcpu].user_regs;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int
+xendebug_read_fpregisters (int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ char **regs)
+{
+ domain_context_p ctxt;
+ int rc = -1;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ if (ctxt)
+ {
+ *regs = ctxt->context[vcpu].fpu_ctxt.x;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int
+xendebug_write_registers(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ cpu_user_regs_t *regs)
+{
+ domain_context_p ctxt;
+ int rc = -1;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ if (ctxt)
+ {
+ memcpy(&ctxt->context[vcpu].user_regs, regs, sizeof(cpu_user_regs_t));
+ rc = xendebug_set_context(xc_handle, ctxt, vcpu);
+ }
+
+ return rc;
+}
+
+int
+xendebug_step(int xc_handle,
+ u32 domid,
+ u32 vcpu)
+{
+ domain_context_p ctxt;
+ int rc;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ if (!ctxt) return -EINVAL;
+
+ ctxt->context[vcpu].user_regs.eflags |= X86_EFLAGS_TF;
+
+ if ( (rc = xendebug_set_context(xc_handle, ctxt, vcpu)) )
+ return rc;
+
+ ctxt->valid[vcpu] = false;
+ return xc_domain_unpause(xc_handle, domid);
+}
+
+int
+xendebug_continue(int xc_handle,
+ u32 domid,
+ u32 vcpu)
+{
+ domain_context_p ctxt;
+ int rc;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ if (!ctxt) return -EINVAL;
+
+ if ( ctxt->context[vcpu].user_regs.eflags & X86_EFLAGS_TF )
+ {
+ ctxt->context[vcpu].user_regs.eflags &= ~X86_EFLAGS_TF;
+ if ( (rc = xendebug_set_context(xc_handle, ctxt, vcpu)) )
+ return rc;
+ }
+ ctxt->valid[vcpu] = false;
+ return xc_domain_unpause(xc_handle, domid);
+}
+
+/*************************************************/
+
+#define vtopdi(va) ((va) >> L2_PAGETABLE_SHIFT)
+#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
+
+/* access to one page */
+static int
+xendebug_memory_page (domain_context_p ctxt, int xc_handle, u32 vcpu,
+ int protection, memory_t address, int length, u8 *buffer)
+{
+ vcpu_guest_context_t *vcpu_ctxt = &ctxt->context[vcpu];
+ unsigned long pde, page;
+ unsigned long va = (unsigned long)address;
+ void *ptr;
+ long pages;
+
+ pages = xc_get_tot_pages(xc_handle, ctxt->domid);
+
+ if ( ctxt->total_pages != pages )
+ {
+ if ( ctxt->total_pages > 0 ) free( ctxt->page_array );
+ ctxt->total_pages = pages;
+
+ ctxt->page_array = malloc(pages * sizeof(unsigned long));
+ if ( ctxt->page_array == NULL )
+ {
+ printf("Could not allocate memory\n");
+ return 0;
+ }
+
+ if ( xc_get_pfn_list(xc_handle, ctxt->domid, ctxt->page_array,pages) !=
+ pages )
+ {
+ printf("Could not get the page frame list\n");
+ return 0;
+ }
+ }
+
+ if ( vcpu_ctxt->pt_base != ctxt->cr3_phys[vcpu])
+ {
+ ctxt->cr3_phys[vcpu] = vcpu_ctxt->pt_base;
+ if ( ctxt->cr3_virt[vcpu] )
+ munmap(ctxt->cr3_virt[vcpu], PAGE_SIZE);
+ ctxt->cr3_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
+ PAGE_SIZE, PROT_READ, ctxt->cr3_phys[vcpu] >> PAGE_SHIFT);
+ if ( ctxt->cr3_virt[vcpu] == NULL )
+ return 0;
+ }
+
+
+ if ( (pde = ctxt->cr3_virt[vcpu][vtopdi(va)]) == 0) /* logical address */
+ return 0;
+ if (ctxt->context[vcpu].flags & VGCF_VMX_GUEST)
+ pde = ctxt->page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (pde != ctxt->pde_phys[vcpu])
+ {
+ ctxt->pde_phys[vcpu] = pde;
+ if ( ctxt->pde_virt[vcpu])
+ munmap(ctxt->pde_virt[vcpu], PAGE_SIZE);
+ ctxt->pde_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
+ PAGE_SIZE, PROT_READ, ctxt->pde_phys[vcpu] >> PAGE_SHIFT);
+ if ( ctxt->pde_virt[vcpu] == NULL )
+ return 0;
+ }
+
+ if ((page = ctxt->pde_virt[vcpu][vtopti(va)]) == 0) /* logical address */
+ return 0;
+ if (ctxt->context[vcpu].flags & VGCF_VMX_GUEST)
+ page = ctxt->page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (page != ctxt->page_phys[vcpu] || protection != ctxt->page_perm[vcpu])
+ {
+ ctxt->page_phys[vcpu] = page;
+ if (ctxt->page_virt[vcpu])
+ munmap(ctxt->page_virt[vcpu], PAGE_SIZE);
+ ctxt->page_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid,
+ PAGE_SIZE, protection, ctxt->page_phys[vcpu] >> PAGE_SHIFT);
+ if ( ctxt->page_virt[vcpu] == NULL )
+ {
+ printf("cr3 %lx pde %lx page %lx pti %lx\n",
+ vcpu_ctxt->pt_base, pde, page, vtopti(va));
+ ctxt->page_phys[vcpu] = 0;
+ return 0;
+ }
+ ctxt->page_perm[vcpu] = protection;
+ }
+
+ ptr = (void *)( (unsigned long)ctxt->page_virt[vcpu] |
+ (va & ~PAGE_MASK) );
+
+ if ( protection & PROT_WRITE )
+ {
+ memcpy(ptr, buffer, length);
+ }
+ else
+ {
+ memcpy(buffer, ptr, length);
+ }
+
+ return length;
+}
+
+/* divide a memory operation into accesses to individual pages */
+static int
+xendebug_memory_op (domain_context_p ctxt, int xc_handle, u32 vcpu,
+ int protection, memory_t address, int length, u8 *buffer)
+{
+ int remain; /* number of bytes to touch past this page */
+ int bytes = 0;
+
+ while ( (remain = (address + length - 1) - (address | (PAGE_SIZE-1))) > 0)
+ {
+ bytes += xendebug_memory_page(ctxt, xc_handle, vcpu, protection,
+ address, length - remain, buffer);
+ buffer += (length - remain);
+ length = remain;
+ address = (address | (PAGE_SIZE - 1)) + 1;
+ }
+
+ bytes += xendebug_memory_page(ctxt, xc_handle, vcpu, protection,
+ address, length, buffer);
+
+ return bytes;
+}
+
+int
+xendebug_read_memory(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length,
+ u8 *data)
+{
+ domain_context_p ctxt;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+
+ xendebug_memory_op(ctxt, xc_handle, vcpu, PROT_READ,
+ address, length, data);
+
+ return 0;
+}
+
+int
+xendebug_write_memory(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length,
+ u8 *data)
+{
+ domain_context_p ctxt;
+
+ xendebug_initialize();
+
+ ctxt = xendebug_get_context(xc_handle, domid, vcpu);
+ xendebug_memory_op(ctxt, xc_handle, vcpu, PROT_READ | PROT_WRITE,
+
+ address, length, data);
+
+ return 0;
+}
+
+int
+xendebug_insert_memory_breakpoint(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length)
+{
+ bwcpoint_p bkpt;
+ u8 breakpoint_opcode = 0xcc;
+
+ printf("insert breakpoint %d:%lx %d\n",
+ domid, address, length);
+
+ xendebug_initialize();
+
+ bkpt = malloc(sizeof(bwcpoint_t));
+ if ( bkpt == NULL )
+ {
+ printf("error: breakpoint length should be 1\n");
+ return -1;
+ }
+
+ if ( length != 1 )
+ {
+ printf("error: breakpoint length should be 1\n");
+ free(bkpt);
+ return -1;
+ }
+
+ bkpt->address = address;
+ bkpt->domain = domid;
+
+ xendebug_read_memory(xc_handle, domid, vcpu, address, 1,
+ &bkpt->old_value);
+
+ xendebug_write_memory(xc_handle, domid, vcpu, address, 1,
+ &breakpoint_opcode);
+
+ list_add(&bkpt->list, &bwcpoint_list.list);
+
+ printf("breakpoint_set %d:%lx 0x%x\n",
+ domid, address, bkpt->old_value);
+
+ return 0;
+}
+
+int
+xendebug_remove_memory_breakpoint(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length)
+{
+ bwcpoint_p bkpt = NULL;
+
+ printf ("remove breakpoint %d:%lx\n",
+ domid, address);
+
+ struct list_head *entry;
+ list_for_each(entry, &bwcpoint_list.list)
+ {
+ bkpt = list_entry(entry, bwcpoint_t, list);
+ if ( domid == bkpt->domain && address == bkpt->address )
+ break;
+ }
+
+ if (bkpt == &bwcpoint_list || bkpt == NULL)
+ {
+ printf ("error: no breakpoint found\n");
+ return -1;
+ }
+
+ list_del(&bkpt->list);
+
+ xendebug_write_memory(xc_handle, domid, vcpu, address, 1,
+ &bkpt->old_value);
+
+ free(bkpt);
+ return 0;
+}
+
+int
+xendebug_query_domain_stop(int xc_handle, int *dom_list, int dom_list_size)
+{
+ xc_dominfo_t *info;
+ u32 first_dom = 0;
+ int max_doms = 1024;
+ int nr_doms, loop;
+ int count = 0;
+
+ if ( (info = malloc(max_doms * sizeof(xc_dominfo_t))) == NULL )
+ return -ENOMEM;
+
+ nr_doms = xc_domain_getinfo(xc_handle, first_dom, max_doms, info);
+
+ for (loop = 0; loop < nr_doms; loop++)
+ {
+ printf ("domid: %d", info[loop].domid);
+ printf (" %c%c%c%c%c%c",
+ info[loop].dying ? 'D' : '-',
+ info[loop].crashed ? 'C' : '-',
+ info[loop].shutdown ? 'S' : '-',
+ info[loop].paused ? 'P' : '-',
+ info[loop].blocked ? 'B' : '-',
+ info[loop].running ? 'R' : '-');
+ printf (" pages: %ld, vcpus %d",
+ info[loop].nr_pages, info[loop].vcpus);
+ printf ("\n");
+
+ if ( info[loop].paused && count < dom_list_size)
+ {
+ dom_list[count++] = info[loop].domid;
+ }
+ }
+
+ free(info);
+
+ return count;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/debugger/libxendebug/xendebug.h b/tools/debugger/libxendebug/xendebug.h
new file mode 100644
index 0000000000..66a45104ba
--- /dev/null
+++ b/tools/debugger/libxendebug/xendebug.h
@@ -0,0 +1,78 @@
+/*
+ * xendebug.h
+ *
+ * alex ho
+ * http://www.cl.cam.ac.uk/netos/pdb
+ *
+ */
+
+#ifndef _XENDEBUG_H_DEFINED
+#define _XENDEBUG_H_DEFINED
+
+#include <xc.h>
+
+int xendebug_attach(int xc_handle,
+ u32 domid,
+ u32 vcpu);
+
+int xendebug_detach(int xc_handle,
+ u32 domid,
+ u32 vcpu);
+
+int xendebug_read_registers(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ cpu_user_regs_t **regs);
+
+int xendebug_read_fpregisters (int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ char **regs);
+
+int xendebug_write_registers(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ cpu_user_regs_t *regs);
+
+int xendebug_step(int xc_handle,
+ u32 domid,
+ u32 vcpu);
+
+int xendebug_continue(int xc_handle,
+ u32 domid,
+ u32 vcpu);
+
+int xendebug_read_memory(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length,
+ u8 *data);
+
+
+int xendebug_write_memory(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length,
+ u8 *data);
+
+
+int xendebug_insert_memory_breakpoint(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length);
+
+int xendebug_remove_memory_breakpoint(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ memory_t address,
+ u32 length);
+
+int xendebug_query_domain_stop(int xc_handle,
+ int *dom_list,
+ int dom_list_size);
+
+
+#endif /* _XENDEBUG_H_DEFINED */
diff --git a/tools/debugger/pdb/Domain.ml b/tools/debugger/pdb/Domain.ml
new file mode 100644
index 0000000000..700699a958
--- /dev/null
+++ b/tools/debugger/pdb/Domain.ml
@@ -0,0 +1,63 @@
+(** Domain.ml
+ *
+ * domain context implementation
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+open Int32
+open Intel
+
+type context_t =
+{
+ mutable domain : int;
+ mutable execution_domain : int
+}
+
+let default_context = { domain = 0; execution_domain = 0 }
+
+let new_context dom exec_dom = {domain = dom; execution_domain = exec_dom}
+
+let set_domain ctx value =
+ ctx.domain <- value;
+ print_endline (Printf.sprintf "ctx.domain <- %d" ctx.domain)
+
+let set_execution_domain ctx value =
+ ctx.execution_domain <- value;
+ print_endline (Printf.sprintf "ctx.execution_domain <- %d"
+ ctx.execution_domain)
+
+let get_domain ctx =
+ ctx.domain
+
+let get_execution_domain ctx =
+ ctx.execution_domain
+
+let string_of_context ctx =
+ Printf.sprintf "{domain} domain: %d, execution_domain: %d"
+ ctx.domain ctx.execution_domain
+
+external read_registers : context_t -> registers = "read_registers"
+external write_register : context_t -> register -> int32 -> unit =
+ "write_register"
+external read_memory : context_t -> int32 -> int -> int list =
+ "read_memory"
+external write_memory : context_t -> int32 -> int list -> unit =
+ "write_memory"
+
+external continue : context_t -> unit = "continue_target"
+external step : context_t -> unit = "step_target"
+
+external insert_memory_breakpoint : context_t -> int32 -> int -> unit =
+ "insert_memory_breakpoint"
+external remove_memory_breakpoint : context_t -> int32 -> int -> unit =
+ "remove_memory_breakpoint"
+
+external attach_debugger : int -> int -> unit = "attach_debugger"
+external detach_debugger : int -> int -> unit = "detach_debugger"
+external pause_target : int -> unit = "pause_target"
+
+let pause ctx =
+ pause_target ctx.domain
diff --git a/tools/debugger/pdb/Domain.mli b/tools/debugger/pdb/Domain.mli
new file mode 100644
index 0000000000..456d19489d
--- /dev/null
+++ b/tools/debugger/pdb/Domain.mli
@@ -0,0 +1,38 @@
+(** Domain.mli
+ *
+ * domain context interface
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+open Int32
+open Intel
+
+type context_t
+
+val default_context : context_t
+val new_context : int -> int -> context_t
+
+val set_domain : context_t -> int -> unit
+val get_domain : context_t -> int
+val set_execution_domain : context_t -> int -> unit
+val get_execution_domain : context_t -> int
+
+val string_of_context : context_t -> string
+
+val read_registers : context_t -> registers
+val write_register : context_t -> register -> int32 -> unit
+val read_memory : context_t -> int32 -> int -> int list
+val write_memory : context_t -> int32 -> int list -> unit
+
+val continue : context_t -> unit
+val step : context_t -> unit
+
+val insert_memory_breakpoint : context_t -> int32 -> int -> unit
+val remove_memory_breakpoint : context_t -> int32 -> int -> unit
+
+val attach_debugger : int -> int -> unit
+val detach_debugger : int -> int -> unit
+val pause : context_t -> unit
diff --git a/tools/debugger/pdb/Intel.ml b/tools/debugger/pdb/Intel.ml
new file mode 100644
index 0000000000..d82ef8b527
--- /dev/null
+++ b/tools/debugger/pdb/Intel.ml
@@ -0,0 +1,71 @@
+(** Intel.ml
+ *
+ * various sundry Intel x86 definitions
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+
+type register =
+ | EBX
+ | ECX
+ | EDX
+ | ESI
+ | EDI
+ | EBP
+ | EAX
+ | Error_code
+ | Entry_vector
+ | EIP
+ | CS
+ | EFLAGS
+ | ESP
+ | SS
+ | ES
+ | DS
+ | FS
+ | GS
+
+type registers =
+ { ebx : int32;
+ ecx : int32;
+ edx : int32;
+ esi : int32;
+ edi : int32;
+ ebp : int32;
+ eax : int32;
+ error_code : int32;
+ entry_vector : int32;
+ eip : int32;
+ cs : int32;
+ eflags : int32;
+ esp : int32;
+ ss : int32;
+ es : int32;
+ ds : int32;
+ fs : int32;
+ gs : int32
+ }
+
+let null_registers =
+ { ebx = 0l;
+ ecx = 0l;
+ edx = 0l;
+ esi = 0l;
+ edi = 0l;
+ ebp = 0l;
+ eax = 0l;
+ error_code = 0l;
+ entry_vector = 0l;
+ eip = 0l;
+ cs = 0l;
+ eflags = 0l;
+ esp = 0l;
+ ss = 0l;
+ es = 0l;
+ ds = 0l;
+ fs = 0l;
+ gs = 0l
+ }
diff --git a/tools/debugger/pdb/Makefile b/tools/debugger/pdb/Makefile
new file mode 100644
index 0000000000..579c7da12c
--- /dev/null
+++ b/tools/debugger/pdb/Makefile
@@ -0,0 +1,56 @@
+OCAMLMAKEFILE = OCamlMakefile
+
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+# overwrite LDFLAGS from xen/tool/Rules.mk
+# otherwise, ocamlmktop gets confused.
+LDFLAGS =
+
+OCAML_ROOT=/usr/local
+# force ocaml 3.08
+# OCAML_ROOT = /anfs/nos1/ach61/ocaml
+
+OCAMLC = $(OCAML_ROOT)/bin/ocamlc
+OCAMLMKTOP = $(OCAML_ROOT)/bin/ocamlmktop
+OCAMLLIBPATH= $(OCAML_ROOT)/lib/ocaml
+
+INCLUDES += -I $(XEN_XC)
+INCLUDES += -I $(XEN_LIBXC)
+INCLUDES += -I ../libxendebug
+INCLUDES += -I $(OCAML_ROOT)/lib/ocaml
+
+CFLAGS += $(INCLUDES)
+CFLAGS += -Wall
+CFLAGS += -Werror
+CFLAGS += -g
+
+CLIBS += xc
+CLIBS += xendebug
+CLIBS += pdb
+
+LIBDIRS += $(XEN_LIBXC)
+LIBDIRS += $(XEN_LIBXUTIL)
+LIBDIRS += ../libxendebug
+LIBDIRS += .
+
+LIBS += unix str
+
+PRE_TARGETS = libpdb.a
+
+all : bc
+
+libpdb.a : pdb_xen.o
+ ar rc $@ $^
+ ranlib $@
+
+SOURCES += pdb_caml_xc.c pdb_xen.c
+SOURCES += Util.ml Intel.ml
+SOURCES += evtchn.ml evtchn.mli
+SOURCES += Domain.ml Process.ml
+SOURCES += Domain.mli Process.mli
+SOURCES += PDB.ml debugger.ml server.ml
+RESULT = pdb
+
+include $(OCAMLMAKEFILE)
+
diff --git a/tools/debugger/pdb/OCamlMakefile b/tools/debugger/pdb/OCamlMakefile
new file mode 100644
index 0000000000..0c6d23ab00
--- /dev/null
+++ b/tools/debugger/pdb/OCamlMakefile
@@ -0,0 +1,1149 @@
+###########################################################################
+# OCamlMakefile
+# Copyright (C) 1999-2004 Markus Mottl
+#
+# For updates see:
+# http://www.oefai.at/~markus/ocaml_sources
+#
+# $Id: OCamlMakefile,v 1.1 2005/05/19 09:30:48 root Exp $
+#
+###########################################################################
+
+# Modified by damien for .glade.ml compilation
+
+# Set these variables to the names of the sources to be processed and
+# the result variable. Order matters during linkage!
+
+ifndef SOURCES
+ SOURCES := foo.ml
+endif
+export SOURCES
+
+ifndef RES_CLIB_SUF
+ RES_CLIB_SUF := _stubs
+endif
+export RES_CLIB_SUF
+
+ifndef RESULT
+ RESULT := foo
+endif
+export RESULT
+
+export LIB_PACK_NAME
+
+ifndef DOC_FILES
+ DOC_FILES := $(filter %.mli, $(SOURCES))
+endif
+export DOC_FILES
+
+export BCSUFFIX
+export NCSUFFIX
+
+ifndef TOPSUFFIX
+ TOPSUFFIX := .top
+endif
+export TOPSUFFIX
+
+# Eventually set include- and library-paths, libraries to link,
+# additional compilation-, link- and ocamlyacc-flags
+# Path- and library information needs not be written with "-I" and such...
+# Define THREADS if you need it, otherwise leave it unset (same for
+# USE_CAMLP4)!
+
+export THREADS
+export VMTHREADS
+export ANNOTATE
+export USE_CAMLP4
+
+export INCDIRS
+export LIBDIRS
+export EXTLIBDIRS
+export RESULTDEPS
+export OCAML_DEFAULT_DIRS
+
+export LIBS
+export CLIBS
+
+export OCAMLFLAGS
+export OCAMLNCFLAGS
+export OCAMLBCFLAGS
+
+export OCAMLLDFLAGS
+export OCAMLNLDFLAGS
+export OCAMLBLDFLAGS
+
+ifndef OCAMLCPFLAGS
+ OCAMLCPFLAGS := a
+endif
+
+export OCAMLCPFLAGS
+
+export PPFLAGS
+
+export YFLAGS
+export IDLFLAGS
+
+export OCAMLDOCFLAGS
+
+export OCAMLFIND_INSTFLAGS
+
+export DVIPSFLAGS
+
+export STATIC
+
+# Add a list of optional trash files that should be deleted by "make clean"
+export TRASH
+
+#################### variables depending on your OCaml-installation
+
+ifdef MINGW
+ export MINGW
+ WIN32 := 1
+ CFLAGS_WIN32 := -mno-cygwin
+endif
+ifdef MSVC
+ export MSVC
+ WIN32 := 1
+ ifndef STATIC
+ CPPFLAGS_WIN32 := -DCAML_DLL
+ endif
+ CFLAGS_WIN32 += -nologo
+ EXT_OBJ := obj
+ EXT_LIB := lib
+ ifeq ($(CC),gcc)
+ # work around GNU Make default value
+ ifdef THREADS
+ CC := cl -MT
+ else
+ CC := cl
+ endif
+ endif
+ ifeq ($(CXX),g++)
+ # work around GNU Make default value
+ CXX := $(CC)
+ endif
+ CFLAG_O := -Fo
+endif
+ifdef WIN32
+ EXT_CXX := cpp
+ EXE := .exe
+endif
+
+ifndef EXT_OBJ
+ EXT_OBJ := o
+endif
+ifndef EXT_LIB
+ EXT_LIB := a
+endif
+ifndef EXT_CXX
+ EXT_CXX := cc
+endif
+ifndef EXE
+ EXE := # empty
+endif
+ifndef CFLAG_O
+ CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)!
+endif
+
+export CC
+export CXX
+export CFLAGS
+export CXXFLAGS
+export LDFLAGS
+export CPPFLAGS
+
+ifndef RPATH_FLAG
+ RPATH_FLAG := -R
+endif
+export RPATH_FLAG
+
+ifndef MSVC
+ifndef PIC_CFLAGS
+ PIC_CFLAGS := -fPIC
+endif
+ifndef PIC_CPPFLAGS
+ PIC_CPPFLAGS := -DPIC
+endif
+endif
+
+export PIC_CFLAGS
+export PIC_CPPFLAGS
+
+BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT))
+NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT))
+TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT))
+
+ifndef OCAMLFIND
+ OCAMLFIND := ocamlfind
+endif
+export OCAMLFIND
+
+ifndef OCAMLC
+ OCAMLC := ocamlc
+endif
+export OCAMLC
+
+ifndef OCAMLOPT
+ OCAMLOPT := ocamlopt
+endif
+export OCAMLOPT
+
+ifndef OCAMLMKTOP
+ OCAMLMKTOP := ocamlmktop
+endif
+export OCAMLMKTOP
+
+ifndef OCAMLCP
+ OCAMLCP := ocamlcp
+endif
+export OCAMLCP
+
+ifndef OCAMLDEP
+ OCAMLDEP := ocamldep
+endif
+export OCAMLDEP
+
+ifndef OCAMLLEX
+ OCAMLLEX := ocamllex
+endif
+export OCAMLLEX
+
+ifndef OCAMLYACC
+ OCAMLYACC := ocamlyacc
+endif
+export OCAMLYACC
+
+ifndef OCAMLMKLIB
+ OCAMLMKLIB := ocamlmklib
+endif
+export OCAMLMKLIB
+
+ifndef OCAML_GLADECC
+ OCAML_GLADECC := lablgladecc2
+endif
+export OCAML_GLADECC
+
+ifndef OCAML_GLADECC_FLAGS
+ OCAML_GLADECC_FLAGS :=
+endif
+export OCAML_GLADECC_FLAGS
+
+ifndef CAMELEON_REPORT
+ CAMELEON_REPORT := report
+endif
+export CAMELEON_REPORT
+
+ifndef CAMELEON_REPORT_FLAGS
+ CAMELEON_REPORT_FLAGS :=
+endif
+export CAMELEON_REPORT_FLAGS
+
+ifndef CAMELEON_ZOGGY
+ CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo
+endif
+export CAMELEON_ZOGGY
+
+ifndef CAMELEON_ZOGGY_FLAGS
+ CAMELEON_ZOGGY_FLAGS :=
+endif
+export CAMELEON_ZOGGY_FLAGS
+
+ifndef OXRIDL
+ OXRIDL := oxridl
+endif
+export OXRIDL
+
+ifndef CAMLIDL
+ CAMLIDL := camlidl
+endif
+export CAMLIDL
+
+ifndef CAMLIDLDLL
+ CAMLIDLDLL := camlidldll
+endif
+export CAMLIDLDLL
+
+ifndef NOIDLHEADER
+ MAYBE_IDL_HEADER := -header
+endif
+export NOIDLHEADER
+
+export NO_CUSTOM
+
+ifndef CAMLP4
+ CAMLP4 := camlp4
+endif
+export CAMLP4
+
+ifndef REAL_OCAMLFIND
+ ifdef PACKS
+ ifndef CREATE_LIB
+ ifdef THREADS
+ PACKS += threads
+ endif
+ endif
+ empty :=
+ space := $(empty) $(empty)
+ comma := ,
+ ifdef PREDS
+ PRE_OCAML_FIND_PREDICATES := $(subst $(space),$(comma),$(PREDS))
+ PRE_OCAML_FIND_PACKAGES := $(subst $(space),$(comma),$(PACKS))
+ OCAML_FIND_PREDICATES := -predicates $(PRE_OCAML_FIND_PREDICATES)
+ # OCAML_DEP_PREDICATES := -syntax $(PRE_OCAML_FIND_PREDICATES)
+ OCAML_FIND_PACKAGES := $(OCAML_FIND_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES)
+ OCAML_DEP_PACKAGES := $(OCAML_DEP_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES)
+ else
+ OCAML_FIND_PACKAGES := -package $(subst $(space),$(comma),$(PACKS))
+ OCAML_DEP_PACKAGES :=
+ endif
+ OCAML_FIND_LINKPKG := -linkpkg
+ REAL_OCAMLFIND := $(OCAMLFIND)
+ endif
+endif
+
+export OCAML_FIND_PACKAGES
+export OCAML_DEP_PACKAGES
+export OCAML_FIND_LINKPKG
+export REAL_OCAMLFIND
+
+ifndef OCAMLDOC
+ OCAMLDOC := ocamldoc
+endif
+export OCAMLDOC
+
+ifndef LATEX
+ LATEX := latex
+endif
+export LATEX
+
+ifndef DVIPS
+ DVIPS := dvips
+endif
+export DVIPS
+
+ifndef PS2PDF
+ PS2PDF := ps2pdf
+endif
+export PS2PDF
+
+ifndef OCAMLMAKEFILE
+ OCAMLMAKEFILE := OCamlMakefile
+endif
+export OCAMLMAKEFILE
+
+ifndef OCAMLLIBPATH
+ OCAMLLIBPATH := \
+ $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml)
+endif
+export OCAMLLIBPATH
+
+ifndef OCAML_LIB_INSTALL
+ OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib
+endif
+export OCAML_LIB_INSTALL
+
+###########################################################################
+
+#################### change following sections only if
+#################### you know what you are doing!
+
+# delete target files when a build command fails
+.PHONY: .DELETE_ON_ERROR
+.DELETE_ON_ERROR:
+
+# for pedants using "--warn-undefined-variables"
+export MAYBE_IDL
+export REAL_RESULT
+export CAMLIDLFLAGS
+export THREAD_FLAG
+export RES_CLIB
+export MAKEDLL
+export ANNOT_FLAG
+export C_OXRIDL
+export SUBPROJS
+export CFLAGS_WIN32
+export CPPFLAGS_WIN32
+
+INCFLAGS :=
+
+SHELL := /bin/sh
+
+MLDEPDIR := ._d
+BCDIDIR := ._bcdi
+NCDIDIR := ._ncdi
+
+FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.oxridl %.c %.$(EXT_CXX) %.rep %.zog %.glade
+
+FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES))
+SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED))))
+
+FILTERED_REP := $(filter %.rep, $(FILTERED))
+DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d)
+AUTO_REP := $(FILTERED_REP:.rep=.ml)
+
+FILTERED_ZOG := $(filter %.zog, $(FILTERED))
+DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d)
+AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml)
+
+FILTERED_GLADE := $(filter %.glade, $(FILTERED))
+DEP_GLADE := $(FILTERED_GLADE:%.glade=$(MLDEPDIR)/%.d)
+AUTO_GLADE := $(FILTERED_GLADE:.glade=.ml)
+
+FILTERED_ML := $(filter %.ml, $(FILTERED))
+DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d)
+
+FILTERED_MLI := $(filter %.mli, $(FILTERED))
+DEP_MLI := $(FILTERED_MLI:.mli=.di)
+
+FILTERED_MLL := $(filter %.mll, $(FILTERED))
+DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d)
+AUTO_MLL := $(FILTERED_MLL:.mll=.ml)
+
+FILTERED_MLY := $(filter %.mly, $(FILTERED))
+DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di)
+AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml)
+
+FILTERED_IDL := $(filter %.idl, $(FILTERED))
+DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di)
+C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c)
+ifndef NOIDLHEADER
+ C_IDL += $(FILTERED_IDL:.idl=.h)
+endif
+OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ))
+AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL)
+
+FILTERED_OXRIDL := $(filter %.oxridl, $(FILTERED))
+DEP_OXRIDL := $(FILTERED_OXRIDL:%.oxridl=$(MLDEPDIR)/%.d) $(FILTERED_OXRIDL:.oxridl=.di)
+AUTO_OXRIDL := $(FILTERED_OXRIDL:.oxridl=.mli) $(FILTERED_OXRIDL:.oxridl=.ml) $(C_OXRIDL)
+
+FILTERED_C_CXX := $(filter %.c %.$(EXT_CXX), $(FILTERED))
+OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ))
+OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ))
+
+PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_OXRIDL) $(AUTO_ZOG) $(AUTO_REP) $(AUTO_GLADE)
+
+ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_OXRIDL) $(DEP_ZOG) $(DEP_REP) $(DEP_GLADE)
+
+MLDEPS := $(filter %.d, $(ALL_DEPS))
+MLIDEPS := $(filter %.di, $(ALL_DEPS))
+BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di)
+NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di)
+
+ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.oxridl %.rep %.zog %.glade, $(FILTERED))
+
+IMPLO_INTF := $(ALLML:%.mli=%.mli.__)
+IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \
+ $(basename $(file)).cmi $(basename $(file)).cmo)
+IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF))
+IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi)
+
+IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx)
+
+INTF := $(filter %.cmi, $(IMPLO_INTF))
+IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF))
+IMPL_CMX := $(IMPL_CMO:.cmo=.cmx)
+IMPL_ASM := $(IMPL_CMO:.cmo=.asm)
+IMPL_S := $(IMPL_CMO:.cmo=.s)
+
+OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX)
+OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK)
+
+EXECS := $(addsuffix $(EXE), \
+ $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT)))
+ifdef WIN32
+ EXECS += $(BCRESULT).dll $(NCRESULT).dll
+endif
+
+CLIB_BASE := $(RESULT)$(RES_CLIB_SUF)
+ifneq ($(strip $(OBJ_LINK)),)
+ RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB)
+endif
+
+ifdef WIN32
+DLLSONAME := $(CLIB_BASE).dll
+else
+DLLSONAME := dll$(CLIB_BASE).so
+endif
+
+NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(IMPL_ASM) $(IMPL_S) \
+ $(OBJ_FILES) $(PRE_TARGETS) $(BCRESULT).cma $(NCRESULT).cmxa \
+ $(NCRESULT).$(EXT_LIB) $(BCRESULT).cmi $(BCRESULT).cmo \
+ $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).o \
+ $(RES_CLIB) $(IMPL_CMO:.cmo=.annot) \
+ $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(LIB_PACK_NAME).cmx $(LIB_PACK_NAME).o
+
+ifndef STATIC
+ NONEXECS += $(DLLSONAME)
+endif
+
+ifndef LIBINSTALL_FILES
+ LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \
+ $(RESULT).cmxa $(RESULT).$(EXT_LIB) $(RES_CLIB)
+ ifndef STATIC
+ ifneq ($(strip $(OBJ_LINK)),)
+ LIBINSTALL_FILES += $(DLLSONAME)
+ endif
+ endif
+endif
+
+export LIBINSTALL_FILES
+
+ifdef WIN32
+ # some extra stuff is created while linking DLLs
+ NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp $(CLIB_BASE).exp $(CLIB_BASE).lib
+endif
+
+TARGETS := $(EXECS) $(NONEXECS)
+
+# If there are IDL-files
+ifneq ($(strip $(FILTERED_IDL)),)
+ MAYBE_IDL := -cclib -lcamlidl
+endif
+
+ifdef USE_CAMLP4
+ CAMLP4PATH := \
+ $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4)
+ INCFLAGS := -I $(CAMLP4PATH)
+ CINCFLAGS := -I$(CAMLP4PATH)
+endif
+
+DINCFLAGS := $(INCFLAGS) $(SOURCE_DIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %)
+INCFLAGS := $(DINCFLAGS) $(INCDIRS:%=-I %)
+CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%)
+
+ifndef MSVC
+CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \
+ $(EXTLIBDIRS:%=-L%) $(EXTLIBDIRS:%=-Wl,$(RPATH_FLAG)%) \
+ $(OCAML_DEFAULT_DIRS:%=-L%)
+endif
+
+ifndef PROFILING
+ INTF_OCAMLC := $(OCAMLC)
+else
+ ifndef THREADS
+ INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS)
+ else
+ # OCaml does not support profiling byte code
+ # with threads (yet), therefore we force an error.
+ ifndef REAL_OCAMLC
+ $(error Profiling of multithreaded byte code not yet supported by OCaml)
+ endif
+ INTF_OCAMLC := $(OCAMLC)
+ endif
+endif
+
+ifndef MSVC
+COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \
+ $(LIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -L%) \
+ $(EXTLIBDIRS:%=-ccopt -Wl,$(RPATH_FLAG)%) \
+ $(OCAML_DEFAULT_DIRS:%=-ccopt -L%)
+else
+COMMON_LDFLAGS := -ccopt "/link -NODEFAULTLIB:LIBC $(LDFLAGS:%=%) $(SOURCE_DIRS:%=-LIBPATH:%) \
+ $(LIBDIRS:%=-LIBPATH:%) $(EXTLIBDIRS:%=-LIBPATH:%) \
+ $(OCAML_DEFAULT_DIRS:%=-LIBPATH:%) "
+endif
+
+CLIBS_OPTS := $(CLIBS:%=-cclib -l%)
+ifdef MSVC
+ ifndef STATIC
+ # MSVC libraries do not have 'lib' prefix
+ CLIBS_OPTS := $(CLIBS:%=-cclib %.lib)
+ endif
+endif
+
+ifneq ($(strip $(OBJ_LINK)),)
+ ifdef CREATE_LIB
+ OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL)
+ else
+ OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL)
+ endif
+else
+ OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL)
+endif
+
+# If we have to make byte-code
+ifndef REAL_OCAMLC
+ BYTE_OCAML := y
+
+ # EXTRADEPS is added dependencies we have to insert for all
+ # executable files we generate. Ideally it should be all of the
+ # libraries we use, but it's hard to find the ones that get searched on
+ # the path since I don't know the paths built into the compiler, so
+ # just include the ones with slashes in their names.
+ EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i))))
+ SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS)
+
+ REAL_OCAMLC := $(INTF_OCAMLC)
+
+ REAL_IMPL := $(IMPL_CMO)
+ REAL_IMPL_INTF := $(IMPLO_INTF)
+ IMPL_SUF := .cmo
+
+ DEPFLAGS :=
+ MAKE_DEPS := $(MLDEPS) $(BCDEPIS)
+
+ ifdef CREATE_LIB
+ CFLAGS := $(PIC_CFLAGS) $(CFLAGS)
+ CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS)
+ ifndef STATIC
+ ifneq ($(strip $(OBJ_LINK)),)
+ MAKEDLL := $(DLLSONAME)
+ ALL_LDFLAGS := -dllib $(DLLSONAME)
+ endif
+ endif
+ endif
+
+ ifndef NO_CUSTOM
+ ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS))" ""
+ ALL_LDFLAGS += -custom
+ endif
+ endif
+
+ ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \
+ $(COMMON_LDFLAGS) $(LIBS:%=%.cma)
+ CAMLIDLDLLFLAGS :=
+
+ ifdef THREADS
+ ifdef VMTHREADS
+ THREAD_FLAG := -vmthread
+ else
+ THREAD_FLAG := -thread
+ endif
+ ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS)
+ ifndef CREATE_LIB
+ ifndef REAL_OCAMLFIND
+ ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS)
+ endif
+ endif
+ endif
+
+# we have to make native-code
+else
+ EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i))))
+ ifndef PROFILING
+ SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS)
+ PLDFLAGS :=
+ else
+ SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS)
+ PLDFLAGS := -p
+ endif
+
+ REAL_IMPL := $(IMPL_CMX)
+ REAL_IMPL_INTF := $(IMPLX_INTF)
+ IMPL_SUF := .cmx
+
+ CPPFLAGS := -DNATIVE_CODE $(CPPFLAGS)
+
+ DEPFLAGS := -native
+ MAKE_DEPS := $(MLDEPS) $(NCDEPIS)
+
+ ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \
+ $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS)
+ CAMLIDLDLLFLAGS := -opt
+
+ ifndef CREATE_LIB
+ ALL_LDFLAGS += $(LIBS:%=%.cmxa)
+ else
+ CFLAGS := $(PIC_CFLAGS) $(CFLAGS)
+ CPPFLAGS := $(PIC_CPPFLAGS) $(CPPFLAGS)
+ endif
+
+ ifdef THREADS
+ THREAD_FLAG := -thread
+ ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS)
+ ifndef CREATE_LIB
+ ifndef REAL_OCAMLFIND
+ ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS)
+ endif
+ endif
+ endif
+endif
+
+export MAKE_DEPS
+
+ifdef ANNOTATE
+ ANNOT_FLAG := -dtypes
+else
+endif
+
+ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(ANNOT_FLAG) $(OCAMLFLAGS) \
+ $(INCFLAGS) $(SPECIAL_OCAMLFLAGS)
+
+ifdef make_deps
+ -include $(MAKE_DEPS)
+ PRE_TARGETS :=
+endif
+
+###########################################################################
+# USER RULES
+
+# Call "OCamlMakefile QUIET=" to get rid of all of the @'s.
+QUIET=@
+
+# generates byte-code (default)
+byte-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes
+bc: byte-code
+
+byte-code-nolink: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes
+bcnl: byte-code-nolink
+
+top: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes
+
+# generates native-code
+
+native-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ make_deps=yes
+nc: native-code
+
+native-code-nolink: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ make_deps=yes
+ncnl: native-code-nolink
+
+# generates byte-code libraries
+byte-code-library: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(BCRESULT).cma \
+ REAL_RESULT="$(BCRESULT)" \
+ CREATE_LIB=yes \
+ make_deps=yes
+bcl: byte-code-library
+
+# generates native-code libraries
+native-code-library: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(NCRESULT).cmxa \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ CREATE_LIB=yes \
+ make_deps=yes
+ncl: native-code-library
+
+ifdef WIN32
+# generates byte-code dll
+byte-code-dll: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(BCRESULT).dll \
+ REAL_RESULT="$(BCRESULT)" \
+ make_deps=yes
+bcd: byte-code-dll
+
+# generates native-code dll
+native-code-dll: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(NCRESULT).dll \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ make_deps=yes
+ncd: native-code-dll
+endif
+
+# generates byte-code with debugging information
+debug-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes \
+ OCAMLFLAGS="-g $(OCAMLFLAGS)" \
+ OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
+dc: debug-code
+
+debug-code-nolink: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes \
+ OCAMLFLAGS="-g $(OCAMLFLAGS)" \
+ OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
+dcnl: debug-code-nolink
+
+# generates byte-code libraries with debugging information
+debug-code-library: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(BCRESULT).cma \
+ REAL_RESULT="$(BCRESULT)" make_deps=yes \
+ CREATE_LIB=yes \
+ OCAMLFLAGS="-g $(OCAMLFLAGS)" \
+ OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
+dcl: debug-code-library
+
+# generates byte-code for profiling
+profiling-byte-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
+ REAL_RESULT="$(BCRESULT)" PROFILING="y" \
+ make_deps=yes
+pbc: profiling-byte-code
+
+# generates native-code
+
+profiling-native-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ PROFILING="y" \
+ make_deps=yes
+pnc: profiling-native-code
+
+# generates byte-code libraries
+profiling-byte-code-library: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(BCRESULT).cma \
+ REAL_RESULT="$(BCRESULT)" PROFILING="y" \
+ CREATE_LIB=yes \
+ make_deps=yes
+pbcl: profiling-byte-code-library
+
+# generates native-code libraries
+profiling-native-code-library: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(RES_CLIB) $(NCRESULT).cmxa \
+ REAL_RESULT="$(NCRESULT)" PROFILING="y" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ CREATE_LIB=yes \
+ make_deps=yes
+pncl: profiling-native-code-library
+
+# packs byte-code objects
+pack-byte-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \
+ REAL_RESULT="$(BCRESULT)" \
+ PACK_LIB=yes make_deps=yes
+pabc: pack-byte-code
+
+# packs native-code objects
+pack-native-code: $(PRE_TARGETS)
+ $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
+ $(NCRESULT).cmx $(NCRESULT).o \
+ REAL_RESULT="$(NCRESULT)" \
+ REAL_OCAMLC="$(OCAMLOPT)" \
+ PACK_LIB=yes make_deps=yes
+panc: pack-native-code
+
+# generates HTML-documentation
+htdoc: doc/$(RESULT)/html
+
+# generates Latex-documentation
+ladoc: doc/$(RESULT)/latex
+
+# generates PostScript-documentation
+psdoc: doc/$(RESULT)/latex/doc.ps
+
+# generates PDF-documentation
+pdfdoc: doc/$(RESULT)/latex/doc.pdf
+
+# generates all supported forms of documentation
+doc: htdoc ladoc psdoc pdfdoc
+
+###########################################################################
+# LOW LEVEL RULES
+
+$(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) $(RESULTDEPS)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) \
+ $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \
+ $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \
+ $(REAL_IMPL)
+
+nolink: $(REAL_IMPL_INTF) $(OBJ_LINK)
+
+ifdef WIN32
+$(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK)
+ $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \
+ -o $@ $(REAL_IMPL)
+endif
+
+%$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS)
+ $(REAL_OCAMLFIND) $(OCAMLMKTOP) \
+ $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \
+ $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \
+ $(REAL_IMPL)
+
+.SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \
+ .mly .di .d .$(EXT_LIB) .idl %.oxridl .c .$(EXT_CXX) .h .so \
+ .rep .zog .glade
+
+ifndef STATIC
+ifdef MINGW
+$(DLLSONAME): $(OBJ_LINK)
+ $(CC) $(CFLAGS) $(CFLAGS_WIN32) $(OBJ_LINK) -shared -o $@ \
+ -Wl,--whole-archive $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/lib%.a))) \
+ $(OCAMLLIBPATH)/ocamlrun.a \
+ -Wl,--export-all-symbols \
+ -Wl,--no-whole-archive
+else
+ifdef MSVC
+$(DLLSONAME): $(OBJ_LINK)
+ link /NOLOGO /DLL /OUT:$@ $(OBJ_LINK) \
+ $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/%.lib))) \
+ $(OCAMLLIBPATH)/ocamlrun.lib
+
+else
+$(DLLSONAME): $(OBJ_LINK)
+ $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \
+ -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) \
+ $(OCAMLMKLIB_FLAGS)
+endif
+endif
+endif
+
+ifndef LIB_PACK_NAME
+$(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \
+ $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(REAL_IMPL)
+
+$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) $(RESULTDEPS)
+ $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \
+ $(OCAMLNLDFLAGS) -o $@ $(REAL_IMPL)
+else
+ifdef BYTE_OCAML
+$(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo: $(REAL_IMPL_INTF)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmo $(REAL_IMPL)
+else
+$(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx: $(REAL_IMPL_INTF)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmx $(REAL_IMPL)
+endif
+
+$(RESULT).cma: $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \
+ $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(LIB_PACK_NAME).cmo
+
+$(RESULT).cmxa $(RESULT).$(EXT_LIB): $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx $(EXTRADEPS) $(RESULTDEPS)
+ $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \
+ $(OCAMLNLDFLAGS) -o $@ $(LIB_PACK_NAME).cmx
+endif
+
+$(RES_CLIB): $(OBJ_LINK)
+ifndef MSVC
+ ifneq ($(strip $(OBJ_LINK)),)
+ $(AR) rcs $@ $(OBJ_LINK)
+ endif
+else
+ ifneq ($(strip $(OBJ_LINK)),)
+ lib -nologo -debugtype:cv -out:$(RES_CLIB) $(OBJ_LINK)
+ endif
+endif
+
+.mli.cmi: $(EXTRADEPS)
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c $(THREAD_FLAG) $(ANNOT_FLAG) \
+ $(OCAMLFLAGS) $(INCFLAGS) $<; \
+ $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c $(THREAD_FLAG) $(ANNOT_FLAG) \
+ $(OCAMLFLAGS) $(INCFLAGS) $<; \
+ else \
+ echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c -pp \"$$pp $(PPFLAGS)\" $(THREAD_FLAG) $(ANNOT_FLAG) \
+ $(OCAMLFLAGS) $(INCFLAGS) $<; \
+ $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c -pp "$$pp $(PPFLAGS)" $(THREAD_FLAG) $(ANNOT_FLAG) \
+ $(OCAMLFLAGS) $(INCFLAGS) $<; \
+ fi
+
+.ml.cmi .ml.$(EXT_OBJ) .ml.cmx .ml.cmo: $(EXTRADEPS)
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c $(ALL_OCAMLCFLAGS) $<; \
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c $(ALL_OCAMLCFLAGS) $<; \
+ else \
+ echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c -pp \"$$pp $(PPFLAGS)\" $(ALL_OCAMLCFLAGS) $<; \
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
+ -c -pp "$$pp $(PPFLAGS)" $(ALL_OCAMLCFLAGS) $<; \
+ fi
+
+ifdef PACK_LIB
+$(REAL_RESULT).cmo $(REAL_RESULT).cmx $(REAL_RESULT).o: $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS)
+ $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack $(ALL_LDFLAGS) \
+ $(OBJS_LIBS) -o $@ $(REAL_IMPL)
+endif
+
+.PRECIOUS: %.ml
+%.ml: %.mll
+ $(OCAMLLEX) $<
+
+.PRECIOUS: %.ml %.mli
+%.ml %.mli: %.mly
+ $(OCAMLYACC) $(YFLAGS) $<
+ $(QUIET)pp=`sed -n -e 's/.*(\*pp \([^*]*\) \*).*/\1/p;q' $<`; \
+ if [ ! -z "$$pp" ]; then \
+ mv $*.ml $*.ml.temporary; \
+ echo "(*pp $$pp $(PPFLAGS)*)" > $*.ml; \
+ cat $*.ml.temporary >> $*.ml; \
+ rm $*.ml.temporary; \
+ mv $*.mli $*.mli.temporary; \
+ echo "(*pp $$pp $(PPFLAGS)*)" > $*.mli; \
+ cat $*.mli.temporary >> $*.mli; \
+ rm $*.mli.temporary; \
+ fi
+
+
+.PRECIOUS: %.ml
+%.ml: %.rep
+ $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $<
+
+.PRECIOUS: %.ml
+%.ml: %.zog
+ $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@
+
+.PRECIOUS: %.ml
+%.ml: %.glade
+ $(OCAML_GLADECC) $(OCAML_GLADECC_FLAGS) $< > $@
+
+.PRECIOUS: %.ml %.mli
+%.ml %.mli: %.oxridl
+ $(OXRIDL) $<
+
+.PRECIOUS: %.ml %.mli %_stubs.c %.h
+%.ml %.mli %_stubs.c %.h: %.idl
+ $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \
+ $(CAMLIDLFLAGS) $<
+ $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi
+
+.c.$(EXT_OBJ):
+ $(OCAMLC) -c -cc "$(CC)" -ccopt "$(CFLAGS) \
+ $(CPPFLAGS) $(CPPFLAGS_WIN32) \
+ $(CFLAGS_WIN32) $(CINCFLAGS) $(CFLAG_O)$@ " $<
+
+.$(EXT_CXX).$(EXT_OBJ):
+ $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) $(CPPFLAGS) \
+ -I'$(OCAMLLIBPATH)' \
+ $< $(CFLAG_O)$@
+
+$(MLDEPDIR)/%.d: %.ml
+ $(QUIET)echo making $@ from $<
+ $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \
+ $(DINCFLAGS) $< > $@; \
+ else \
+ $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \
+ -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \
+ fi
+
+$(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli
+ $(QUIET)echo making $@ from $<
+ $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(DINCFLAGS) $< > $@; \
+ else \
+ $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \
+ -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \
+ fi
+
+doc/$(RESULT)/html: $(DOC_FILES)
+ rm -rf $@
+ mkdir -p $@
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ echo $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \
+ $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \
+ else \
+ echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -html -d $@ $(OCAMLDOCFLAGS) \
+ $(INCFLAGS) $(DOC_FILES); \
+ $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -html -d $@ $(OCAMLDOCFLAGS) \
+ $(INCFLAGS) $(DOC_FILES); \
+ fi
+
+doc/$(RESULT)/latex: $(DOC_FILES)
+ rm -rf $@
+ mkdir -p $@
+ $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
+ if [ -z "$$pp" ]; then \
+ echo $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) \
+ $(DOC_FILES) -o $@/doc.tex; \
+ $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES) \
+ -o $@/doc.tex; \
+ else \
+ echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -latex $(OCAMLDOCFLAGS) \
+ $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \
+ $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -latex $(OCAMLDOCFLAGS) \
+ $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \
+ fi
+
+doc/$(RESULT)/latex/doc.ps: doc/$(RESULT)/latex
+ cd doc/$(RESULT)/latex && \
+ $(LATEX) doc.tex && \
+ $(LATEX) doc.tex && \
+ $(DVIPS) $(DVIPSFLAGS) doc.dvi -o $(@F)
+
+doc/$(RESULT)/latex/doc.pdf: doc/$(RESULT)/latex/doc.ps
+ cd doc/$(RESULT)/latex && $(PS2PDF) $(<F)
+
+define make_subproj
+.PHONY:
+subproj_$(1):
+ $$(eval $$(call PROJ_$(1)))
+ $(QUIET)if [ "$(SUBTARGET)" != "all" ]; then \
+ $(MAKE) -f $(OCAMLMAKEFILE) $(SUBTARGET); \
+ fi
+endef
+
+$(foreach subproj,$(SUBPROJS),$(eval $(call make_subproj,$(subproj))))
+
+.PHONY:
+subprojs: $(SUBPROJS:%=subproj_%)
+
+###########################################################################
+# (UN)INSTALL RULES FOR LIBRARIES
+
+.PHONY: libinstall
+libinstall: all
+ $(QUIET)printf "\nInstalling library with ocamlfind\n"
+ $(OCAMLFIND) install $(OCAMLFIND_INSTFLAGS) $(RESULT) META $(LIBINSTALL_FILES)
+ $(QUIET)printf "\nInstallation successful.\n"
+
+.PHONY: libuninstall
+libuninstall:
+ $(QUIET)printf "\nUninstalling library with ocamlfind\n"
+ $(OCAMLFIND) remove $(OCAMLFIND_INSTFLAGS) $(RESULT)
+ $(QUIET)printf "\nUninstallation successful.\n"
+
+.PHONY: rawinstall
+rawinstall: all
+ $(QUIET)printf "\nInstalling library to: $(OCAML_LIB_INSTALL)\n"
+ -install -d $(OCAML_LIB_INSTALL)
+ for i in $(LIBINSTALL_FILES); do \
+ if [ -f $$i ]; then \
+ install -c -m 0644 $$i $(OCAML_LIB_INSTALL); \
+ fi; \
+ done
+ $(QUIET)printf "\nInstallation successful.\n"
+
+.PHONY: rawuninstall
+rawuninstall:
+ $(QUIET)printf "\nUninstalling library from: $(OCAML_LIB_INSTALL)\n"
+ cd $(OCAML_LIB_INSTALL) && rm $(notdir $(LIBINSTALL_FILES))
+ $(QUIET)printf "\nUninstallation successful.\n"
+
+###########################################################################
+# MAINTAINANCE RULES
+
+.PHONY: clean
+clean::
+ rm -f $(TARGETS) $(TRASH)
+ rm -rf $(BCDIDIR) $(NCDIDIR) $(MLDEPDIR)
+
+.PHONY: cleanup
+cleanup::
+ rm -f $(NONEXECS) $(TRASH)
+ rm -rf $(BCDIDIR) $(NCDIDIR) $(MLDEPDIR)
+
+.PHONY: clean-doc
+clean-doc::
+ rm -rf doc
+
+.PHONY: nobackup
+nobackup:
+ rm -f *.bak *~ *.dup
diff --git a/tools/debugger/pdb/PDB.ml b/tools/debugger/pdb/PDB.ml
new file mode 100644
index 0000000000..0ed121b7aa
--- /dev/null
+++ b/tools/debugger/pdb/PDB.ml
@@ -0,0 +1,180 @@
+(** PDB.ml
+ *
+ * Dispatch debugger commands to the appropriate context
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+exception Unimplemented of string
+exception Unknown_context of string
+exception Unknown_domain
+
+type context_t =
+ | Void
+ | Event_channel
+ | Domain of Domain.context_t
+ | Process of Process.context_t
+
+let string_of_context ctx =
+ match ctx with
+ | Void -> "{void}"
+ | Event_channel -> "{event channel}"
+ | Domain d -> Domain.string_of_context d
+ | Process p -> Process.string_of_context p
+
+
+
+let read_registers ctx =
+ match ctx with
+ | Domain d -> Domain.read_registers d
+ | _ -> Intel.null_registers
+
+let write_register ctx register value =
+ match ctx with
+ | Domain d -> Domain.write_register d register value
+ | _ -> raise (Unimplemented "write register")
+
+
+let read_memory ctx addr len =
+ match ctx with
+ | Domain d -> Domain.read_memory d addr len
+ | _ -> raise (Unimplemented "read memory")
+
+let write_memory ctx addr values =
+ match ctx with
+ | Domain d -> Domain.write_memory d addr values
+ | _ -> raise (Unimplemented "write memory")
+
+
+let continue ctx =
+ match ctx with
+ | Domain d -> Domain.continue d
+ | _ -> raise (Unimplemented "continue")
+
+let step ctx =
+ match ctx with
+ | Domain d -> Domain.step d
+ | _ -> raise (Unimplemented "step")
+
+
+let insert_memory_breakpoint ctx addr len =
+ match ctx with
+ | Domain d -> Domain.insert_memory_breakpoint d addr len
+ | _ -> raise (Unimplemented "insert memory breakpoint")
+
+let remove_memory_breakpoint ctx addr len =
+ match ctx with
+ | Domain d -> Domain.remove_memory_breakpoint d addr len
+ | _ -> raise (Unimplemented "remove memory breakpoint")
+
+
+let pause ctx =
+ match ctx with
+ | Domain d -> Domain.pause d
+ | _ -> raise (Unimplemented "pause target")
+
+
+let attach_debugger ctx =
+ match ctx with
+ | Domain d -> Domain.attach_debugger (Domain.get_domain d)
+ (Domain.get_execution_domain d)
+ | _ -> raise (Unimplemented "attach debugger")
+
+let detach_debugger ctx =
+ match ctx with
+ | Domain d -> Domain.detach_debugger (Domain.get_domain d)
+ (Domain.get_execution_domain d)
+ | _ -> raise (Unimplemented "detach debugger")
+
+external open_debugger : unit -> unit = "open_context"
+external close_debugger : unit -> unit = "close_context"
+
+(* this is just the domains right now... expand to other contexts later *)
+external debugger_status : unit -> unit = "debugger_status"
+
+
+(***********************************************************)
+
+
+let hash = Hashtbl.create 10
+
+let debug_contexts () =
+ print_endline "context list:";
+ let print_context key ctx =
+ match ctx with
+ | Void -> print_endline (Printf.sprintf " [%s] {void}"
+ (Util.get_connection_info key))
+ | Event_channel -> print_endline (Printf.sprintf " [%s] {event_channel}"
+ (Util.get_connection_info key))
+ | Process p -> print_endline (Printf.sprintf " [%s] %s"
+ (Util.get_connection_info key)
+ (Process.string_of_context p))
+ | Domain d -> print_endline (Printf.sprintf " [%s] %s"
+ (Util.get_connection_info key)
+ (Domain.string_of_context d))
+ in
+ Hashtbl.iter print_context hash
+
+(** add_context : add a new context to the hash table.
+ * if there is an existing context for the same key then it
+ * is first removed implictly by the hash table replace function.
+ *)
+let add_context (key:Unix.file_descr) context params =
+ match context with
+ | "void" -> Hashtbl.replace hash key Void
+ | "event channel" -> Hashtbl.replace hash key Event_channel
+ | "domain" ->
+ begin
+ match params with
+ | dom::exec_dom::_ ->
+ let d = Domain(Domain.new_context dom exec_dom) in
+ attach_debugger d;
+ Hashtbl.replace hash key d
+ | _ -> failwith "bogus parameters to domain context"
+ end
+ | "process" ->
+ begin
+ match params with
+ | dom::pid::_ ->
+ let p = Process.new_context dom pid in
+ Hashtbl.replace hash key (Process(p))
+ | _ -> failwith "bogus parameters to process context"
+ end
+ | _ -> raise (Unknown_context context)
+
+let add_default_context sock =
+ add_context sock "void" []
+
+let find_context key =
+ try
+ Hashtbl.find hash key
+ with
+ Not_found ->
+ print_endline "error: (find_context) PDB context not found";
+ raise Not_found
+
+let delete_context key =
+ Hashtbl.remove hash key
+
+(** find_domain : Locate the context(s) matching a particular domain
+ * and execution_domain pair.
+ *)
+
+let find_domain dom exec_dom =
+ let find key ctx list =
+ match ctx with
+ | Domain d ->
+ if (((Domain.get_domain d) = dom) &&
+ ((Domain.get_execution_domain d) = exec_dom))
+ then
+ key :: list
+ else
+ list
+ | _ -> list
+ in
+ let sock_list = Hashtbl.fold find hash [] in
+ match sock_list with
+ | hd::tl -> hd
+ | [] -> raise Unknown_domain
diff --git a/tools/debugger/pdb/Process.ml b/tools/debugger/pdb/Process.ml
new file mode 100644
index 0000000000..79632b3298
--- /dev/null
+++ b/tools/debugger/pdb/Process.ml
@@ -0,0 +1,39 @@
+(** Process.ml
+ *
+ * process context implementation
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+open Int32
+open Intel
+
+type context_t =
+{
+ mutable domain : int;
+ mutable process : int;
+}
+
+let default_context = { domain = 0; process = 0 }
+
+let new_context dom proc = { domain = dom; process = proc }
+
+let string_of_context ctx =
+ Printf.sprintf "{process} domain: %d, process: %d"
+ ctx.domain ctx.process
+
+let set_domain ctx value =
+ ctx.domain <- value;
+ print_endline (Printf.sprintf "ctx.domain <- %d" ctx.domain)
+
+let set_process ctx value =
+ ctx.process <- value;
+ print_endline (Printf.sprintf "ctx.process <- %d" ctx.process)
+
+let get_domain ctx =
+ ctx.domain
+
+let get_process ctx =
+ ctx.process
diff --git a/tools/debugger/pdb/Process.mli b/tools/debugger/pdb/Process.mli
new file mode 100644
index 0000000000..39b6221892
--- /dev/null
+++ b/tools/debugger/pdb/Process.mli
@@ -0,0 +1,20 @@
+(** Process.mli
+ *
+ * process context interface
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+type context_t
+
+val default_context : context_t
+val new_context : int -> int -> context_t
+
+val set_domain : context_t -> int -> unit
+val get_domain : context_t -> int
+val set_process : context_t -> int -> unit
+val get_process : context_t -> int
+
+val string_of_context : context_t -> string
diff --git a/tools/debugger/pdb/Util.ml b/tools/debugger/pdb/Util.ml
new file mode 100644
index 0000000000..a5722242db
--- /dev/null
+++ b/tools/debugger/pdb/Util.ml
@@ -0,0 +1,153 @@
+(** Util.ml
+ *
+ * various utility functions
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+let int_of_hexchar h =
+ let i = int_of_char h in
+ match h with
+ | '0' .. '9' -> i - (int_of_char '0')
+ | 'a' .. 'f' -> i - (int_of_char 'a') + 10
+ | 'A' .. 'F' -> i - (int_of_char 'A') + 10
+ | _ -> raise (Invalid_argument "unknown hex character")
+
+let hexchar_of_int i =
+ let hexchars = [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'a'; 'b'; 'c'; 'd'; 'e'; 'f' |]
+ in
+ hexchars.(i)
+
+
+(** flip the bytes of a four byte int
+ *)
+
+let flip_int num =
+ let a = num mod 256
+ and b = (num / 256) mod 256
+ and c = (num / (256 * 256)) mod 256
+ and d = (num / (256 * 256 * 256)) in
+ (a * 256 * 256 * 256) + (b * 256 * 256) + (c * 256) + d
+
+
+let flip_int32 num =
+ let a = Int32.logand num 0xffl
+ and b = Int32.logand (Int32.shift_right_logical num 8) 0xffl
+ and c = Int32.logand (Int32.shift_right_logical num 16) 0xffl
+ and d = (Int32.shift_right_logical num 24) in
+ (Int32.logor
+ (Int32.logor (Int32.shift_left a 24) (Int32.shift_left b 16))
+ (Int32.logor (Int32.shift_left c 8) d))
+
+
+let int_list_of_string_list list =
+ List.map (fun x -> int_of_string x) list
+
+let int_list_of_string str len =
+ let array_of_string s =
+ let int_array = Array.make len 0 in
+ for loop = 0 to len - 1 do
+ int_array.(loop) <- (Char.code s.[loop]);
+ done;
+ int_array
+ in
+ Array.to_list (array_of_string str)
+
+
+(* remove leading and trailing whitespace from a string *)
+
+let chomp str =
+ let head = Str.regexp "^[ \t\r\n]+" in
+ let tail = Str.regexp "[ \t\r\n]+$" in
+ let str = Str.global_replace head "" str in
+ Str.global_replace tail "" str
+
+(* Stupid little parser for "<key>=<value>[,<key>=<value>]*"
+ It first chops the entire command at each ',', so no ',' in key or value!
+ Mucked to return a list of words for "value"
+ *)
+
+let list_of_string str =
+ let delim c = Str.regexp ("[ \t]*" ^ c ^ "[ \t]*") in
+ let str_list = Str.split (delim " ") str in
+ List.map (fun x -> chomp(x)) str_list
+
+let little_parser fn str =
+ let delim c = Str.regexp ("[ \t]*" ^ c ^ "[ \t]*") in
+ let str_list = Str.split (delim ",") str in
+ let pair s =
+ match Str.split (delim "=") s with
+ | [key;value] -> fn (chomp key) (list_of_string value)
+ | [key] -> fn (chomp key) []
+ | _ -> failwith (Printf.sprintf "error: (little_parser) parse error [%s]" str)
+ in
+ List.iter pair str_list
+
+(* boolean list membership test *)
+let not_list_member the_list element =
+ try
+ List.find (fun x -> x = element) the_list;
+ false
+ with
+ Not_found -> true
+
+(* a very inefficient way to remove the elements of one list from another *)
+let list_remove the_list remove_list =
+ List.filter (not_list_member remove_list) the_list
+
+(* get a description of a file descriptor *)
+let get_connection_info fd =
+ let get_local_info fd =
+ let sockname = Unix.getsockname fd in
+ match sockname with
+ | Unix.ADDR_UNIX(s) -> s
+ | Unix.ADDR_INET(a,p) -> ((Unix.string_of_inet_addr a) ^ ":" ^
+ (string_of_int p))
+ and get_remote_info fd =
+ let sockname = Unix.getpeername fd in
+ match sockname with
+ | Unix.ADDR_UNIX(s) -> s
+ | Unix.ADDR_INET(a,p) -> ((Unix.string_of_inet_addr a) ^ ":" ^
+ (string_of_int p))
+ in
+ try
+ get_remote_info fd
+ with
+ | Unix.Unix_error (Unix.ENOTSOCK, s1, s2) ->
+ let s = Unix.fstat fd in
+ Printf.sprintf "dev: %d, inode: %d" s.Unix.st_dev s.Unix.st_ino
+ | _ -> get_local_info fd
+
+
+(* really write a string *)
+let really_write fd str =
+ let strlen = String.length str in
+ let sent = ref 0 in
+ while (!sent < strlen) do
+ sent := !sent + (Unix.write fd str !sent (strlen - !sent))
+ done
+
+let write_character fd ch =
+ let str = String.create 1 in
+ str.[0] <- ch;
+ really_write fd str
+
+
+
+let send_reply fd reply =
+ let checksum = ref 0 in
+ write_character fd '$';
+ for loop = 0 to (String.length reply) - 1 do
+ write_character fd reply.[loop];
+ checksum := !checksum + int_of_char reply.[loop]
+ done;
+ write_character fd '#';
+ write_character fd (hexchar_of_int ((!checksum mod 256) / 16));
+ write_character fd (hexchar_of_int ((!checksum mod 256) mod 16))
+ (*
+ * BUG NEED TO LISTEN FOR REPLY +/- AND POSSIBLY RE-TRANSMIT
+ *)
+
diff --git a/tools/debugger/pdb/debugger.ml b/tools/debugger/pdb/debugger.ml
new file mode 100644
index 0000000000..5a3002470b
--- /dev/null
+++ b/tools/debugger/pdb/debugger.ml
@@ -0,0 +1,315 @@
+(** debugger.ml
+ *
+ * main debug functionality
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+open Intel
+open PDB
+open Util
+open Str
+
+(** a few debugger commands such as step 's' and continue 'c' do
+ * not immediately return a response to the debugger. in these
+ * cases we raise No_reply instead.
+ *)
+exception No_reply
+
+let initialize_debugger () =
+ ()
+
+let exit_debugger () =
+ ()
+
+
+(**
+ Detach Command
+ Note: response is ignored by gdb. We leave the context in the
+ hash. It will be cleaned up with the socket is closed.
+ *)
+let gdb_detach ctx =
+ PDB.detach_debugger ctx;
+ raise No_reply
+
+(**
+ Kill Command
+ Note: response is ignored by gdb. We leave the context in the
+ hash. It will be cleaned up with the socket is closed.
+ *)
+let gdb_kill () =
+ ""
+
+
+
+(**
+ Continue Command.
+ resume the target
+ *)
+let gdb_continue ctx =
+ PDB.continue ctx;
+ raise No_reply
+
+(**
+ Step Command.
+ single step the target
+ *)
+let gdb_step ctx =
+ PDB.step ctx;
+ raise No_reply
+
+
+(**
+ Read Registers Command.
+ returns 16 4-byte registers in a particular defined by gdb.
+ *)
+let gdb_read_registers ctx =
+ let regs = PDB.read_registers ctx in
+ let str =
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.eax)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.ecx)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.edx)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.ebx)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.esp)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.ebp)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.esi)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.edi)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.eip)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.eflags)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.cs)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.ss)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.ds)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.es)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.fs)) ^
+ (Printf.sprintf "%08lx" (Util.flip_int32 regs.gs)) in
+ str
+
+(**
+ Set Thread Command
+ *)
+let gdb_set_thread command =
+ "OK"
+
+
+(**
+ Read Memory Packets
+ *)
+let gdb_read_memory ctx command =
+ let int_list_to_string i str =
+ (Printf.sprintf "%02x" i) ^ str
+ in
+ let read_mem addr len =
+ try
+ let mem = PDB.read_memory ctx addr len in
+ List.fold_right int_list_to_string mem ""
+ with
+ Failure s -> "E02"
+ in
+ Scanf.sscanf command "m%lx,%d" read_mem
+
+
+
+(**
+ Write Memory Packets
+ *)
+let gdb_write_memory ctx command =
+ let write_mem addr len =
+ print_endline (Printf.sprintf " gdb_write_memory %lx %x\n" addr len);
+ print_endline (Printf.sprintf " [[ unimplemented ]]\n")
+ in
+ Scanf.sscanf command "M%lx,%d" write_mem;
+ "OK"
+
+
+
+(**
+ Write Register Packets
+ *)
+let gdb_write_register ctx command =
+ let write_reg reg goofy_val =
+ let new_val = Util.flip_int32 goofy_val in
+ match reg with
+ | 0 -> PDB.write_register ctx EAX new_val
+ | 1 -> PDB.write_register ctx ECX new_val
+ | 2 -> PDB.write_register ctx EDX new_val
+ | 3 -> PDB.write_register ctx EBX new_val
+ | 4 -> PDB.write_register ctx ESP new_val
+ | 5 -> PDB.write_register ctx EBP new_val
+ | 6 -> PDB.write_register ctx ESI new_val
+ | 7 -> PDB.write_register ctx EDI new_val
+ | 8 -> PDB.write_register ctx EIP new_val
+ | 9 -> PDB.write_register ctx EFLAGS new_val
+ | 10 -> PDB.write_register ctx CS new_val
+ | 11 -> PDB.write_register ctx SS new_val
+ | 12 -> PDB.write_register ctx DS new_val
+ | 13 -> PDB.write_register ctx ES new_val
+ | 14 -> PDB.write_register ctx FS new_val
+ | 15 -> PDB.write_register ctx GS new_val
+ | _ -> print_endline (Printf.sprintf "write unknown register [%d]" reg)
+ in
+ Scanf.sscanf command "P%x=%lx" write_reg;
+ "OK"
+
+
+(**
+ General Query Packets
+ *)
+let gdb_query command =
+ match command with
+ | "qC" -> ""
+ | "qOffsets" -> ""
+ | "qSymbol::" -> ""
+ | _ ->
+ print_endline (Printf.sprintf "unknown gdb query packet [%s]" command);
+ "E01"
+
+
+(**
+ Write Memory Binary Packets
+ *)
+let gdb_write_memory_binary ctx command =
+ let write_mem addr len =
+ let pos = Str.search_forward (Str.regexp ":") command 0 in
+ let txt = Str.string_after command (pos + 1) in
+ PDB.write_memory ctx addr (int_list_of_string txt len)
+ in
+ Scanf.sscanf command "X%lx,%d" write_mem;
+ "OK"
+
+
+
+(**
+ Last Signal Command
+ *)
+let gdb_last_signal =
+ "S00"
+
+
+
+
+(**
+ Process PDB extensions to the GDB serial protocol.
+ Changes the mutable context state.
+ *)
+let pdb_extensions command sock =
+ let process_extension key value =
+ (* since this command can change the context, we need to grab it each time *)
+ let ctx = PDB.find_context sock in
+ match key with
+ | "status" ->
+ print_endline (string_of_context ctx);
+ PDB.debug_contexts ();
+ debugger_status ()
+ | "context" ->
+ PDB.add_context sock (List.hd value)
+ (int_list_of_string_list (List.tl value))
+ | _ -> failwith (Printf.sprintf "unknown pdb extension command [%s:%s]"
+ key (List.hd value))
+ in
+ try
+ Util.little_parser process_extension
+ (String.sub command 1 ((String.length command) - 1));
+ "OK"
+ with
+ | Unknown_context s ->
+ print_endline (Printf.sprintf "unknown context [%s]" s);
+ "E01"
+ | Failure s -> "E01"
+
+
+(**
+ Insert Breakpoint or Watchpoint Packet
+ *)
+let gdb_insert_bwcpoint ctx command =
+ let insert cmd addr length =
+ try
+ match cmd with
+ | 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK"
+ | _ -> ""
+ with
+ Failure s -> "E03"
+ in
+ Scanf.sscanf command "Z%d,%lx,%d" insert
+
+(**
+ Remove Breakpoint or Watchpoint Packet
+ *)
+let gdb_remove_bwcpoint ctx command =
+ let insert cmd addr length =
+ try
+ match cmd with
+ | 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK"
+ | _ -> ""
+ with
+ Failure s -> "E04"
+ in
+ Scanf.sscanf command "z%d,%lx,%d" insert
+
+(**
+ Do Work!
+
+ @param command char list
+ *)
+
+let process_command command sock =
+ let ctx = PDB.find_context sock in
+ try
+ match command.[0] with
+ | 'c' -> gdb_continue ctx
+ | 'D' -> gdb_detach ctx
+ | 'g' -> gdb_read_registers ctx
+ | 'H' -> gdb_set_thread command
+ | 'k' -> gdb_kill ()
+ | 'm' -> gdb_read_memory ctx command
+ | 'M' -> gdb_write_memory ctx command
+ | 'P' -> gdb_write_register ctx command
+ | 'q' -> gdb_query command
+ | 's' -> gdb_step ctx
+ | 'x' -> pdb_extensions command sock
+ | 'X' -> gdb_write_memory_binary ctx command
+ | '?' -> gdb_last_signal
+ | 'z' -> gdb_remove_bwcpoint ctx command
+ | 'Z' -> gdb_insert_bwcpoint ctx command
+ | _ ->
+ print_endline (Printf.sprintf "unknown gdb command [%s]" command);
+ ""
+ with
+ Unimplemented s ->
+ print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]"
+ command s);
+ ""
+
+
+(**
+ process_evtchn
+
+ This is called each time a virq_pdb is sent from xen to dom 0.
+ It is sent by Xen when a domain hits a breakpoint.
+
+ Think of this as the continuation function for a "c" or "s" command.
+*)
+
+external query_domain_stop : unit -> (int * int) list = "query_domain_stop"
+(* returns a list of paused domains : () -> (domain, vcpu) list *)
+
+let process_evtchn fd =
+ let channel = Evtchn.read fd in
+ let find_pair (dom, vcpu) =
+ print_endline (Printf.sprintf "checking %d.%d" dom vcpu);
+ try
+ let sock = PDB.find_domain dom vcpu in
+ true
+ with
+ Unknown_domain -> false
+ in
+ let dom_list = query_domain_stop () in
+ let (dom, vcpu) = List.find find_pair dom_list in
+ let vec = 3 in
+ let sock = PDB.find_domain dom vcpu in
+ print_endline (Printf.sprintf "handle bkpt d:%d ed:%d v:%d %s"
+ dom vcpu vec (Util.get_connection_info sock));
+ Util.send_reply sock "S05";
+ Evtchn.unmask fd channel (* allow next virq *)
+
diff --git a/tools/debugger/pdb/evtchn.ml b/tools/debugger/pdb/evtchn.ml
new file mode 100644
index 0000000000..5443accd9b
--- /dev/null
+++ b/tools/debugger/pdb/evtchn.ml
@@ -0,0 +1,32 @@
+(** evtchn.ml
+ *
+ * event channel interface
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+let dev_name = "/dev/xen/evtchn" (* EVTCHN_DEV_NAME *)
+let dev_major = 10 (* EVTCHN_DEV_MAJOR *)
+let dev_minor = 201 (* EVTCHN_DEV_MINOR *)
+
+let virq_pdb = 6 (* as defined VIRQ_PDB *)
+
+external bind_virq : int -> int = "evtchn_bind_virq"
+external bind : Unix.file_descr -> int -> unit = "evtchn_bind"
+external unbind : Unix.file_descr -> int -> unit = "evtchn_unbind"
+external ec_open : string -> int -> int -> Unix.file_descr = "evtchn_open"
+external read : Unix.file_descr -> int = "evtchn_read"
+external ec_close : Unix.file_descr -> unit = "evtchn_close"
+external unmask : Unix.file_descr -> int -> unit = "evtchn_unmask"
+
+let setup () =
+ let port = bind_virq virq_pdb in
+ let fd = ec_open dev_name dev_major dev_minor in
+ bind fd port;
+ fd
+
+let teardown fd =
+ unbind fd virq_pdb;
+ ec_close fd
diff --git a/tools/debugger/pdb/evtchn.mli b/tools/debugger/pdb/evtchn.mli
new file mode 100644
index 0000000000..18b3ed667b
--- /dev/null
+++ b/tools/debugger/pdb/evtchn.mli
@@ -0,0 +1,14 @@
+(** evtchn.mli
+ *
+ * event channel interface
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+
+val setup : unit -> Unix.file_descr
+val read : Unix.file_descr -> int
+val teardown : Unix.file_descr -> unit
+val unmask : Unix.file_descr -> int -> unit
diff --git a/tools/debugger/pdb/pdb_caml_xc.c b/tools/debugger/pdb/pdb_caml_xc.c
new file mode 100644
index 0000000000..6ba82a92c2
--- /dev/null
+++ b/tools/debugger/pdb/pdb_caml_xc.c
@@ -0,0 +1,732 @@
+/*
+ * pdb_caml_xc.c
+ *
+ * http://www.cl.cam.ac.uk/netos/pdb
+ *
+ * OCaml to libxc interface library for PDB
+ */
+
+#include <xc.h>
+#include <xendebug.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <caml/alloc.h>
+#include <caml/fail.h>
+#include <caml/memory.h>
+#include <caml/mlvalues.h>
+
+int pdb_evtchn_bind_virq (int xc_handle, int virq, int *port);
+int xen_evtchn_bind (int evtchn_fd, int idx);
+int xen_evtchn_unbind (int evtchn_fd, int idx);
+
+/* this order comes from xen/include/public/arch-x86_32.h */
+enum x86_registers { PDB_EBX, PDB_ECX, PDB_EDX, PDB_ESI, PDB_EDI,
+ PDB_EBP, PDB_EAX, PDB_Error_code, PDB_Entry_vector,
+ PDB_EIP, PDB_CS, PDB_EFLAGS, PDB_ESP, PDB_SS,
+ PDB_ES, PDB_DS, PDB_FS, PDB_GS };
+
+static void dump_regs (cpu_user_regs_t *ctx);
+
+static int xc_handle = -1;
+
+typedef struct
+{
+ int domain;
+ int vcpu;
+} context_t;
+
+#define decode_context(_ctx, _ocaml) \
+{ \
+ (_ctx)->domain = Int_val(Field((_ocaml),0)); \
+ (_ctx)->vcpu = Int_val(Field((_ocaml),1)); \
+}
+
+#define encode_context(_ctx, _ocaml) \
+{ \
+ (_ocaml) = caml_alloc_tuple(2); \
+ Store_field((_ocaml), 0, Val_int((_ctx)->domain)); \
+ Store_field((_ocaml), 1, Val_int((_ctx)->vcpu)); \
+}
+
+
+/****************************************************************************/
+
+/*
+ * open_context : unit -> unit
+ */
+value
+open_context (value unit)
+{
+ CAMLparam1(unit);
+
+ xc_handle = xc_interface_open();
+
+ if ( xc_handle < 0 )
+ {
+ fprintf(stderr, "(pdb) error opening xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * close_context : unit -> unit
+ */
+value
+close_context (value unit)
+{
+ CAMLparam1(unit);
+ int rc;
+
+ if ( (rc = xc_interface_close(xc_handle)) < 0 )
+ {
+ fprintf(stderr, "(pdb) error closing xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * read_registers : context_t -> int32
+ */
+value
+read_registers (value context)
+{
+ CAMLparam1(context);
+ CAMLlocal1(result);
+
+ cpu_user_regs_t *regs;
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, &regs) )
+ {
+ printf("(pdb) read registers error!\n"); fflush(stdout);
+ failwith("read registers error");
+ }
+
+ dump_regs(regs);
+
+ result = caml_alloc_tuple(18); /* FIXME */
+
+ Store_field(result, 0, caml_copy_int32(regs->ebx));
+ Store_field(result, 1, caml_copy_int32(regs->ecx));
+ Store_field(result, 2, caml_copy_int32(regs->edx));
+ Store_field(result, 3, caml_copy_int32(regs->esi));
+ Store_field(result, 4, caml_copy_int32(regs->edi));
+ Store_field(result, 5, caml_copy_int32(regs->ebp));
+ Store_field(result, 6, caml_copy_int32(regs->eax));
+ Store_field(result, 7, caml_copy_int32(regs->error_code)); /* 16 */
+ Store_field(result, 8, caml_copy_int32(regs->entry_vector)); /* 16 */
+ Store_field(result, 9, caml_copy_int32(regs->eip));
+ Store_field(result, 10, caml_copy_int32(regs->cs)); /* 16 */
+ Store_field(result, 11, caml_copy_int32(regs->eflags));
+ Store_field(result, 12, caml_copy_int32(regs->esp));
+ Store_field(result, 13, caml_copy_int32(regs->ss)); /* 16 */
+ Store_field(result, 14, caml_copy_int32(regs->es)); /* 16 */
+ Store_field(result, 15, caml_copy_int32(regs->ds)); /* 16 */
+ Store_field(result, 16, caml_copy_int32(regs->fs)); /* 16 */
+ Store_field(result, 17, caml_copy_int32(regs->gs)); /* 16 */
+
+ CAMLreturn(result);
+}
+
+
+/*
+ * write_register : context_t -> register -> int32 -> unit
+ */
+value
+write_register (value context, value reg, value newval)
+{
+ CAMLparam3(context, reg, newval);
+
+ int my_reg = Int_val(reg);
+ int val = Int32_val(newval);
+
+ context_t ctx;
+ cpu_user_regs_t *regs;
+
+ printf("(pdb) write register\n");
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, &regs) )
+ {
+ printf("(pdb) write register (get) error!\n"); fflush(stdout);
+ failwith("write register error");
+ }
+
+ switch (my_reg)
+ {
+ case PDB_EBX: regs->ebx = val; break;
+ case PDB_ECX: regs->ecx = val; break;
+ case PDB_EDX: regs->edx = val; break;
+ case PDB_ESI: regs->esi = val; break;
+ case PDB_EDI: regs->edi = val; break;
+
+ case PDB_EBP: regs->ebp = val; break;
+ case PDB_EAX: regs->eax = val; break;
+ case PDB_Error_code: regs->error_code = val; break;
+ case PDB_Entry_vector: regs->entry_vector = val; break;
+
+ case PDB_EIP: regs->eip = val; break;
+ case PDB_CS: regs->cs = val; break;
+ case PDB_EFLAGS: regs->eflags = val; break;
+ case PDB_ESP: regs->esp = val; break;
+ case PDB_SS: regs->ss = val; break;
+ case PDB_ES: regs->es = val; break;
+ case PDB_DS: regs->ds = val; break;
+ case PDB_FS: regs->fs = val; break;
+ case PDB_GS: regs->gs = val; break;
+ }
+
+ if ( xendebug_write_registers(xc_handle, ctx.domain, ctx.vcpu, regs) )
+ {
+ printf("(pdb) write register (set) error!\n"); fflush(stdout);
+ failwith("write register error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * read_memory : context_t -> int32 -> int -> int
+ */
+value
+read_memory (value context, value address, value length)
+{
+ CAMLparam3(context, address, length);
+ CAMLlocal2(result, temp);
+
+ context_t ctx;
+ int loop;
+ char *buffer;
+ memory_t my_address = Int32_val(address);
+ u32 my_length = Int_val(length);
+
+ printf ("(pdb) read memory\n");
+
+ decode_context(&ctx, context);
+
+ buffer = malloc(my_length);
+ if (buffer == NULL)
+ {
+ printf("(pdb) read memory: malloc failed.\n"); fflush(stdout);
+ failwith("read memory error");
+ }
+
+ if ( xendebug_read_memory(xc_handle, ctx.domain, ctx.vcpu,
+ my_address, my_length, buffer) )
+ {
+ printf("(pdb) read memory error!\n"); fflush(stdout);
+ failwith("read memory error");
+ }
+
+ result = caml_alloc(2,0);
+ if ( my_length > 0 ) /* car */
+ {
+ Store_field(result, 0, Val_int(buffer[my_length - 1] & 0xff));
+ }
+ else
+
+ {
+ Store_field(result, 0, Val_int(0));
+ }
+ Store_field(result, 1, Val_int(0)); /* cdr */
+
+ for (loop = 1; loop < my_length; loop++)
+ {
+ temp = result;
+ result = caml_alloc(2,0);
+ Store_field(result, 0, Val_int(buffer[my_length - loop - 1] & 0xff));
+ Store_field(result, 1, temp);
+ }
+
+ CAMLreturn(result);
+}
+
+/*
+ * write_memory : context_t -> int32 -> int list -> unit
+ */
+value
+write_memory (value context, value address, value val_list)
+{
+ CAMLparam3(context, address, val_list);
+ CAMLlocal1(node);
+
+ context_t ctx;
+
+ char buffer[4096]; /* a big buffer */
+ memory_t my_address;
+ u32 length = 0;
+
+ printf ("(pdb) write memory\n");
+
+ decode_context(&ctx, context);
+
+ node = val_list;
+ if ( Int_val(node) == 0 ) /* gdb functionalty test uses empty list */
+ {
+ CAMLreturn(Val_unit);
+ }
+
+ while ( Int_val(Field(node,1)) != 0 )
+ {
+ buffer[length++] = Int_val(Field(node, 0));
+ node = Field(node,1);
+ }
+ buffer[length++] = Int_val(Field(node, 0));
+
+ my_address = (memory_t) Int32_val(address);
+
+ if ( xendebug_write_memory(xc_handle, ctx.domain, ctx.vcpu,
+ my_address, length, buffer) )
+ {
+ printf("(pdb) write memory error!\n"); fflush(stdout);
+ failwith("write memory error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+
+/*********************************************************************/
+
+void
+dump_regs (cpu_user_regs_t *regs)
+{
+ printf (" eax: %x\n", regs->eax);
+ printf (" ecx: %x\n", regs->ecx);
+ printf (" edx: %x\n", regs->edx);
+ printf (" ebx: %x\n", regs->ebx);
+ printf (" esp: %x\n", regs->esp);
+ printf (" ebp: %x\n", regs->ebp);
+ printf (" esi: %x\n", regs->esi);
+ printf (" edi: %x\n", regs->edi);
+ printf (" eip: %x\n", regs->eip);
+ printf (" flags: %x\n", regs->eflags);
+ printf (" cs: %x\n", regs->cs);
+ printf (" ss: %x\n", regs->ss);
+ printf (" es: %x\n", regs->es);
+ printf (" ds: %x\n", regs->ds);
+ printf (" fs: %x\n", regs->fs);
+ printf (" gs: %x\n", regs->gs);
+
+ return;
+}
+
+/*
+ * continue_target : context_t -> unit
+ */
+value
+continue_target (value context)
+{
+ CAMLparam1(context);
+
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_continue(xc_handle, ctx.domain, ctx.vcpu) )
+ {
+ printf("(pdb) continue\n"); fflush(stdout);
+ failwith("continue");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * step_target : context_t -> unit
+ */
+value
+step_target (value context)
+{
+ CAMLparam1(context);
+
+ context_t ctx;
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_step(xc_handle, ctx.domain, ctx.vcpu) )
+ {
+ printf("(pdb) step\n"); fflush(stdout);
+ failwith("step");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+
+
+/*
+ * insert_memory_breakpoint : context_t -> int32 -> int list -> unit
+ */
+value
+insert_memory_breakpoint (value context, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+ memory_t my_address = (memory_t) Int32_val(address);
+ int my_length = Int_val(length);
+
+ decode_context(&ctx, context);
+
+ printf ("(pdb) insert memory breakpoint 0x%lx %d\n",
+ my_address, my_length);
+
+ if ( xendebug_insert_memory_breakpoint(xc_handle, ctx.domain, ctx.vcpu,
+ my_address, my_length) )
+ {
+ printf("(pdb) error: insert memory breakpoint\n"); fflush(stdout);
+ failwith("insert memory breakpoint");
+ }
+
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * remove_memory_breakpoint : context_t -> int32 -> int list -> unit
+ */
+value
+remove_memory_breakpoint (value context, value address, value length)
+{
+ CAMLparam3(context, address, length);
+
+ context_t ctx;
+
+ memory_t my_address = (memory_t) Int32_val(address);
+ int my_length = Int_val(length);
+
+ printf ("(pdb) remove memory breakpoint 0x%lx %d\n",
+ my_address, my_length);
+
+ decode_context(&ctx, context);
+
+ if ( xendebug_remove_memory_breakpoint(xc_handle,
+ ctx.domain, ctx.vcpu,
+ my_address, my_length) )
+ {
+ printf("(pdb) error: remove memory breakpoint\n"); fflush(stdout);
+ failwith("remove memory breakpoint");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * attach_debugger : int -> int -> unit
+ */
+value
+attach_debugger (value domain, value vcpu)
+{
+ CAMLparam2(domain, vcpu);
+
+ int my_domain = Int_val(domain);
+ int my_vcpu = Int_val(vcpu);
+
+ printf ("(pdb) attach domain [%d.%d]\n", my_domain, my_vcpu);
+
+ if ( xendebug_attach(xc_handle, my_domain, my_vcpu) )
+ {
+ printf("(pdb) attach error!\n"); fflush(stdout);
+ failwith("attach error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+
+/*
+ * detach_debugger : int -> int -> unit
+ */
+value
+detach_debugger (value domain, value vcpu)
+{
+ CAMLparam2(domain, vcpu);
+
+ int my_domain = Int_val(domain);
+ int my_vcpu = Int_val(vcpu);
+
+ printf ("(pdb) detach domain [%d.%d]\n", my_domain, my_vcpu);
+
+ if ( xendebug_detach(xc_handle, my_domain, my_vcpu) )
+ {
+ printf("(pdb) detach error!\n"); fflush(stdout);
+ failwith("detach error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+
+/*
+ * debugger_status : unit -> unit
+ */
+value
+debugger_status (value unit)
+{
+ CAMLparam1(unit);
+
+ printf ("(pdb) debugger status\n");
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * pause_target : int -> unit
+ */
+value
+pause_target (value domid)
+{
+ CAMLparam1(domid);
+
+ int my_domid = Int_val(domid);
+
+ printf ("(pdb) pause target %d\n", my_domid);
+
+ xc_domain_pause(xc_handle, my_domid);
+
+ CAMLreturn(Val_unit);
+}
+
+/****************************************************************************/
+/****************************************************************************/
+
+/*
+ * query_domain_stop : unit -> (int * int) list
+ */
+value
+query_domain_stop (value unit)
+{
+ CAMLparam1(unit);
+ CAMLlocal3(result, temp, node);
+
+ int max_domains = 20;
+ int dom_list[max_domains];
+ int loop, count;
+
+ count = xendebug_query_domain_stop(xc_handle, dom_list, max_domains);
+ if ( count < 0 )
+ {
+ printf("(pdb) query domain stop!\n"); fflush(stdout);
+ failwith("query domain stop");
+ }
+
+ printf ("QDS: %d\n", count);
+ for (loop = 0; loop < count; loop ++)
+ printf (" %d %d\n", loop, dom_list[loop]);
+
+ result = caml_alloc(2,0);
+ if ( count > 0 ) /* car */
+ {
+ node = caml_alloc(2,0);
+ Store_field(node, 0, Val_int(dom_list[0])); /* domain id */
+ Store_field(node, 1, Val_int(0)); /* vcpu */
+ Store_field(result, 0, node);
+ }
+ else
+ {
+ Store_field(result, 0, Val_int(0));
+ }
+ Store_field(result, 1, Val_int(0)); /* cdr */
+
+ for ( loop = 1; loop < count; loop++ )
+ {
+ temp = result;
+ result = caml_alloc(2,0);
+ node = caml_alloc(2,0);
+ Store_field(node, 0, Val_int(dom_list[loop])); /* domain id */
+ Store_field(node, 1, Val_int(0)); /* vcpu */
+ Store_field(result, 0, node);
+ Store_field(result, 1, temp);
+ }
+
+ CAMLreturn(result);
+}
+
+/****************************************************************************/
+/****************************************************************************/
+
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/*
+ * evtchn_open : string -> int -> int -> Unix.file_descr
+ *
+ * OCaml's Unix library doesn't have mknod, so it makes more sense just write
+ * this in C. This code is from Keir/Andy.
+ */
+value
+evtchn_open (value filename, value major, value minor)
+{
+ CAMLparam3(filename, major, minor);
+
+ char *myfilename = String_val(filename);
+ int mymajor = Int_val(major);
+ int myminor = Int_val(minor);
+ int evtchn_fd;
+ struct stat st;
+
+ /* Make sure any existing device file links to correct device. */
+ if ( (lstat(myfilename, &st) != 0) ||
+ !S_ISCHR(st.st_mode) ||
+ (st.st_rdev != makedev(mymajor, myminor)) )
+ {
+ (void)unlink(myfilename);
+ }
+
+ reopen:
+ evtchn_fd = open(myfilename, O_RDWR);
+ if ( evtchn_fd == -1 )
+ {
+ if ( (errno == ENOENT) &&
+ ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+ (mknod(myfilename, S_IFCHR|0600, makedev(mymajor,myminor)) == 0) )
+ {
+ goto reopen;
+ }
+ return -errno;
+ }
+
+ CAMLreturn(Val_int(evtchn_fd));
+}
+
+/*
+ * evtchn_bind_virq : int -> int
+ */
+value
+evtchn_bind_virq (value virq)
+{
+ CAMLparam1(virq);
+
+ int port;
+
+ if ( pdb_evtchn_bind_virq(xc_handle, Int_val(virq), &port) < 0 )
+ {
+ printf("(pdb) evtchn_bind_virq error!\n"); fflush(stdout);
+ failwith("evtchn_bind_virq error");
+ }
+
+ CAMLreturn(Val_int(port));
+}
+
+/*
+ * evtchn_bind : Unix.file_descr -> int -> unit
+ */
+value
+evtchn_bind (value fd, value idx)
+{
+ CAMLparam2(fd, idx);
+
+ int myfd = Int_val(fd);
+ int myidx = Int_val(idx);
+
+ if ( xen_evtchn_bind(myfd, myidx) < 0 )
+ {
+ printf("(pdb) evtchn_bind error!\n"); fflush(stdout);
+ failwith("evtchn_bind error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * evtchn_unbind : Unix.file_descr -> int -> unit
+ */
+value
+evtchn_unbind (value fd, value idx)
+{
+ CAMLparam2(fd, idx);
+
+ int myfd = Int_val(fd);
+ int myidx = Int_val(idx);
+
+ if ( xen_evtchn_unbind(myfd, myidx) < 0 )
+ {
+ printf("(pdb) evtchn_unbind error!\n"); fflush(stdout);
+ failwith("evtchn_unbind error");
+ }
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * evtchn_read : Unix.file_descr -> int
+ */
+value
+evtchn_read (value fd)
+{
+ CAMLparam1(fd);
+
+ u16 v;
+ int bytes;
+ int rc = -1;
+ int myfd = Int_val(fd);
+
+ while ( (bytes = read(myfd, &v, sizeof(v))) == -1 )
+ {
+ if ( errno == EINTR ) continue;
+ rc = -errno;
+ goto exit;
+ }
+
+ if ( bytes == sizeof(v) )
+ rc = v;
+
+ exit:
+ CAMLreturn(Val_int(rc));
+}
+
+
+/*
+ * evtchn_close : Unix.file_descr -> unit
+ */
+value
+evtchn_close (value fd)
+{
+ CAMLparam1(fd);
+ int myfd = Int_val(fd);
+
+ (void)close(myfd);
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * evtchn_unmask : Unix.file_descr -> int -> unit
+ */
+value
+evtchn_unmask (value fd, value idx)
+{
+ CAMLparam1(fd);
+
+ int myfd = Int_val(fd);
+ u16 myidx = Int_val(idx);
+
+ (void)write(myfd, &myidx, sizeof(myidx));
+
+ CAMLreturn(Val_unit);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/tools/debugger/pdb/pdb_xen.c b/tools/debugger/pdb/pdb_xen.c
new file mode 100644
index 0000000000..36671dacc0
--- /dev/null
+++ b/tools/debugger/pdb/pdb_xen.c
@@ -0,0 +1,93 @@
+/*
+ * pdb_xen.c
+ *
+ * alex ho
+ * http://www.cl.cam.ac.uk/netos/pdb
+ *
+ * PDB interface library for accessing Xen
+ */
+
+#include <xc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+
+int
+pdb_open ()
+{
+ int xc_handle = xc_interface_open();
+
+ if ( xc_handle < 0 )
+ {
+ fprintf(stderr, "(pdb) error opening xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ }
+ return xc_handle;
+}
+
+int
+pdb_close (int xc_handle)
+{
+ int rc;
+
+
+ if ( (rc = xc_interface_close(xc_handle)) < 0 )
+ {
+ fprintf(stderr, "(pdb) error closing xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ }
+ return rc;
+}
+
+
+int
+pdb_evtchn_bind_virq (int xc_handle, int virq, int *port)
+{
+ int rc;
+
+ if ( (rc = xc_evtchn_bind_virq(xc_handle, virq, port) < 0 ) )
+ {
+ fprintf(stderr, "(pdb) error binding virq to event channel: %d (%s)\n",
+ errno, strerror(errno));
+ }
+ return rc;
+}
+
+
+#include <sys/ioctl.h>
+
+/* /dev/xen/evtchn ioctls */
+#define EVTCHN_RESET _IO('E', 1) /* clear & reinit buffer */
+#define EVTCHN_BIND _IO('E', 2) /* bind to event channel */
+#define EVTCHN_UNBIND _IO('E', 3) /* unbind from event channel */
+
+int
+xen_evtchn_bind (int evtchn_fd, int idx)
+{
+ if ( ioctl(evtchn_fd, EVTCHN_BIND, idx) != 0 )
+ return -errno;
+
+ return 0;
+}
+
+int
+xen_evtchn_unbind (int evtchn_fd, int idx)
+{
+ if ( ioctl(evtchn_fd, EVTCHN_UNBIND, idx) != 0 )
+ return -errno;
+
+ return 0;
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/debugger/pdb/server.ml b/tools/debugger/pdb/server.ml
new file mode 100644
index 0000000000..2d3a3c7c86
--- /dev/null
+++ b/tools/debugger/pdb/server.ml
@@ -0,0 +1,219 @@
+(** server.ml
+ *
+ * PDB server main loop
+ *
+ * @author copyright (c) 2005 alex ho
+ * @see <www.cl.cam.ac.uk/netos/pdb> pervasive debugger
+ * @version 1
+ *)
+
+open Unix
+open Buffer
+
+
+(**
+ * connection_t: The state for each connection.
+ * buffer & length contains bytes that have been read from the sock
+ * but not yet parsed / processed.
+ *)
+type connection_t =
+{
+ fd : file_descr;
+ mutable buffer : string;
+ mutable length : int;
+}
+
+
+(**
+ * validate_checksum: Compute and compare the checksum of a string
+ * against the provided checksum using the gdb serial protocol algorithm.
+ *
+ *)
+let validate_checksum command checksum =
+ let c0 = ref 0 in
+ for loop = 0 to (String.length command - 1) do
+ c0 := !c0 + int_of_char(command.[loop]);
+ done;
+ if (String.length checksum) = 2
+ then
+ let c1 = Util.int_of_hexchar(checksum.[1]) +
+ Util.int_of_hexchar(checksum.[0]) * 16 in
+ (!c0 mod 256) = (c1 mod 256)
+ else
+ false
+
+
+(**
+ * process_input: Oh, joy! Someone sent us a message. Let's open the
+ * envelope and see what they have to say.
+ *
+ * This function is a paradigm of inefficiency; it performs as many
+ * string copies as possible.
+ *)
+let process_input conn sock =
+ let max_buffer_size = 1024 in
+ let in_string = String.create max_buffer_size in
+
+ let length = read sock in_string 0 max_buffer_size in
+ conn.buffer <- conn.buffer ^ (String.sub in_string 0 length);
+ conn.length <- conn.length + length;
+ let re = Str.regexp "[^\\$]*\\$\\([^#]*\\)#\\(..\\)" in
+
+ begin
+ try
+ let break = String.index conn.buffer '\003' + 1 in
+ print_endline (Printf.sprintf "{{%s}}" (String.escaped conn.buffer));
+
+ (* discard everything seen before the ctrl-c *)
+ conn.buffer <- String.sub conn.buffer break (conn.length - break);
+ conn.length <- conn.length - break;
+
+ (* pause the target *)
+ PDB.pause (PDB.find_context sock);
+
+ (* send a code back to the debugger *)
+ Util.send_reply sock "S05"
+
+ with
+ Not_found -> ()
+ end;
+
+ (* with gdb this is unlikely to loop since you ack each packet *)
+ while ( Str.string_match re conn.buffer 0 ) do
+ let command = Str.matched_group 1 conn.buffer in
+ let checksum = Str.matched_group 2 conn.buffer in
+ let match_end = Str.group_end 2 in
+
+ begin
+ match validate_checksum command checksum with
+ | true ->
+ begin
+ Util.write_character sock '+';
+ try
+ let reply = Debugger.process_command command sock in
+ print_endline (Printf.sprintf "[%s] %s -> \"%s\""
+ (Util.get_connection_info sock)
+ (String.escaped command)
+ (String.escaped reply));
+ Util.send_reply sock reply
+ with
+ Debugger.No_reply ->
+ print_endline (Printf.sprintf "[%s] %s -> null"
+ (Util.get_connection_info sock)
+ (String.escaped command))
+ end
+ | false ->
+ Util.write_character sock '-';
+ end;
+
+ conn.buffer <- String.sub conn.buffer match_end (conn.length - match_end);
+ conn.length <- conn.length - match_end;
+ done;
+ if length = 0 then raise End_of_file
+
+
+
+(** main_server_loop.
+ *
+ * connection_hash is a hash (duh!) with one connection_t for each
+ * open connection.
+ *
+ * in_list is a list of active sockets. it also contains two
+ * magic entries: server_sock for accepting new entries and
+ * event_sock for Xen event channel asynchronous notifications.
+ *)
+let main_server_loop sockaddr =
+ let connection_hash = Hashtbl.create 10
+ in
+ let process_socket svr_sock sockets sock =
+ let (new_list, closed_list) = sockets in
+ if sock == svr_sock
+ then
+ begin
+ let (new_sock, caller) = accept sock in
+ print_endline (Printf.sprintf "[%s] new connection from %s"
+ (Util.get_connection_info sock)
+ (Util.get_connection_info new_sock));
+ Hashtbl.add connection_hash new_sock
+ {fd=new_sock; buffer=""; length = 0};
+ PDB.add_default_context new_sock;
+ (new_sock :: new_list, closed_list)
+ end
+ else
+ begin
+ try
+ match PDB.find_context sock with
+ | PDB.Event_channel ->
+ print_endline (Printf.sprintf "[%s] event channel"
+ (Util.get_connection_info sock));
+ Debugger.process_evtchn sock;
+ (new_list, closed_list)
+ | _ ->
+ let conn = Hashtbl.find connection_hash sock in
+ process_input conn sock;
+ (new_list, closed_list)
+ with
+ | Not_found ->
+ print_endline "error: (main_svr_loop) context not found";
+ PDB.debug_contexts ();
+ raise Not_found
+ | End_of_file ->
+ print_endline (Printf.sprintf "[%s] close connection from %s"
+ (Util.get_connection_info sock)
+ (Util.get_connection_info sock));
+ PDB.delete_context sock;
+ Hashtbl.remove connection_hash sock;
+ close sock;
+ (new_list, sock :: closed_list)
+ end
+ in
+ let rec helper in_list server_sock =
+ (*
+ * List.iter (fun x->Printf.printf "{%s} "
+ * (Util.get_connection_info x)) in_list;
+ * Printf.printf "\n";
+ *)
+ let (rd_list, _, _) = select in_list [] [] (-1.0) in
+ let (new_list, closed_list) = List.fold_left (process_socket server_sock)
+ ([],[]) rd_list in
+ let merge_list = Util.list_remove (new_list @ in_list) closed_list in
+ helper merge_list server_sock
+ in
+ try
+ let server_sock = socket (domain_of_sockaddr sockaddr) SOCK_STREAM 0 in
+ setsockopt server_sock SO_REUSEADDR true;
+ bind server_sock sockaddr;
+ listen server_sock 2;
+
+ PDB.open_debugger ();
+ let event_sock = Evtchn.setup () in
+ PDB.add_context event_sock "event channel" [];
+ helper [server_sock; event_sock] server_sock
+ with
+ | Sys.Break ->
+ print_endline "break: cleaning up";
+ PDB.close_debugger ();
+ Hashtbl.iter (fun sock conn -> close sock) connection_hash
+ | Unix_error(e,err,param) ->
+ Printf.printf "unix error: [%s][%s][%s]\n" (error_message e) err param
+ | Sys_error s -> Printf.printf "sys error: [%s]\n" s
+ | Failure s -> Printf.printf "failure: [%s]\n" s
+ | End_of_file -> Printf.printf "end of file\n"
+
+
+let get_port () =
+ if (Array.length Sys.argv) = 2
+ then
+ int_of_string Sys.argv.(1)
+ else
+ begin
+ print_endline (Printf.sprintf "syntax error: %s <port>" Sys.argv.(0));
+ exit 1
+ end
+
+
+let main =
+ let address = inet_addr_any in
+ let port = get_port () in
+ main_server_loop (ADDR_INET(address, port))
+
diff --git a/tools/examples/Makefile b/tools/examples/Makefile
index 5957a422b9..3d7422a75d 100644
--- a/tools/examples/Makefile
+++ b/tools/examples/Makefile
@@ -12,6 +12,9 @@ XEN_CONFIG_DIR = /etc/xen
XEN_CONFIGS = xend-config.sxp
XEN_CONFIGS += xmexample1
XEN_CONFIGS += xmexample2
+XEN_CONFIGS += xmexample.vmx
+XEN_CONFIGS += mem-map.sxp
+XEN_CONFIGS += bochsrc
# Xen script dir and scripts to go there.
XEN_SCRIPT_DIR = /etc/xen/scripts
diff --git a/tools/examples/block-enbd b/tools/examples/block-enbd
index cfae6288b2..a8fa108f0d 100755
--- a/tools/examples/block-enbd
+++ b/tools/examples/block-enbd
@@ -27,7 +27,7 @@ case $1 in
;;
*)
- echo 'Unknown command: ' $1
- echo 'Valid commands are: bind, unbind'
+ echo 'Unknown command: ' $1 >&2
+ echo 'Valid commands are: bind, unbind' >&2
exit 1
esac
diff --git a/tools/examples/block-file b/tools/examples/block-file
index 362b1faee5..a1612b6d47 100755
--- a/tools/examples/block-file
+++ b/tools/examples/block-file
@@ -25,7 +25,7 @@ case $1 in
;;
*)
- echo 'Unknown command: ' $1
- echo 'Valid commands are: bind, unbind'
+ echo 'Unknown command: ' $1 >&2
+ echo 'Valid commands are: bind, unbind' >&2
exit 1
esac
diff --git a/tools/examples/bochsrc b/tools/examples/bochsrc
new file mode 100644
index 0000000000..d80884b3c8
--- /dev/null
+++ b/tools/examples/bochsrc
@@ -0,0 +1,20 @@
+#megs: 32
+#romimage: file=$BXSHARE/BIOS-bochs-latest, address=0xf0000
+#vgaromimage: $BXSHARE/VGABIOS-lgpl-latest
+floppya: 1_44=a.img, status=inserted
+floppyb: 1_44=b.img, status=inserted
+# if you don't use absolute paths below, bochs looks under the cwd of xend,
+# which is usually "/"
+#ata0-master: type=disk, path=/var/images/min-el3-i386.img, cylinders=800, heads=4, spt=32
+i440fxsupport: enabled=1
+ne2k: ioaddr=0x300, irq=9, mac=b0:c4:22:01:00:00, ethmod=linux, ethdev=eth0
+ata0-master: type=disk, path=/var/images/1g-el3-i386.img, mode=flat, cylinders=2048, heads=16, spt=63
+boot: c
+
+log: /tmp/bochsout.txt
+#debug: action=report
+info: action=report
+error: action=report
+panic: action=ask
+
+mouse: enabled=0
diff --git a/tools/examples/init.d/xend b/tools/examples/init.d/xend
index 058f6a821f..b6d1ad41f3 100755
--- a/tools/examples/init.d/xend
+++ b/tools/examples/init.d/xend
@@ -11,7 +11,7 @@ if ! [ -e /proc/xen/privcmd ]; then
exit 0
fi
-# Wait for Xend / Xfrd to be up
+# Wait for Xend and xcs to be up
function await_daemons_up
{
i=1
diff --git a/tools/examples/init.d/xendomains b/tools/examples/init.d/xendomains
index 3180f45aa2..ac2b273fd7 100755
--- a/tools/examples/init.d/xendomains
+++ b/tools/examples/init.d/xendomains
@@ -14,6 +14,18 @@
# Applications HOWTO" by Avi Alkalay
# <http://www.tldp.org/HOWTO/HighQuality-Apps-HOWTO/>
#
+### BEGIN INIT INFO
+# Provides: xendomains
+# Required-Start: $syslog $remote_fs xend
+# Should-Start:
+# Required-Stop: $syslog $remote_fs xend
+# Should-Stop:
+# Default-Start: 3 4 5
+# Default-Stop: 0 1 2 6
+# Short-Description: Start/stop secondary xen domains
+# Description: Start / stop domains automatically when domain 0
+# boots / shuts down.
+### END INIT INFO
if ! [ -e /proc/xen/privcmd ]; then
exit 0
diff --git a/tools/examples/mem-map.sxp b/tools/examples/mem-map.sxp
new file mode 100644
index 0000000000..246b49b92a
--- /dev/null
+++ b/tools/examples/mem-map.sxp
@@ -0,0 +1,10 @@
+(memmap
+ (0000000000000000 000000000009f800 "AddressRangeMemory" WB)
+ (000000000009f800 00000000000a0000 "AddressRangeReserved" UC)
+ (00000000000a0000 00000000000bffff "AddressRangeIO" UC)
+ (00000000000f0000 0000000000100000 "AddressRangeReserved" UC)
+ (0000000000100000 0000000008000000 "AddressRangeMemory" WB)
+ (0000000007fff000 0000000008000000 "AddressRangeShared" WB)
+ (0000000008000000 0000000008003000 "AddressRangeNVS" UC)
+ (0000000008003000 000000000800d000 "AddressRangeACPI" WB)
+ (00000000fec00000 0000000100000000 "AddressRangeIO" UC))
diff --git a/tools/examples/network b/tools/examples/network
index c8b414fe6d..537bbe3dab 100755
--- a/tools/examples/network
+++ b/tools/examples/network
@@ -53,7 +53,7 @@ bridge=${bridge:-xen-br0}
netdev=${netdev:-eth0}
antispoof=${antispoof:-yes}
-echo "network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof"
+echo "*network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" >&2
# Usage: transfer_addrs src dst
# Copy all IP addresses (including aliases) from device $src to device $dst.
@@ -66,12 +66,18 @@ transfer_addrs () {
fi
# Address lines start with 'inet' and have the device in them.
# Replace 'inet' with 'ip addr add' and change the device name $src
- # to 'dev $src'. Remove netmask as we'll add routes later.
+ # to 'dev $src'.
ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
s/inet/ip addr add/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
s/${src}/dev ${dst}/
" | sh -e
+ # Remove automatic routes on destionation device
+ ip route list | sed -ne "
+/dev ${dst}\( \|$\)/ {
+ s/^/ip route del /
+ p
+}" | sh -e
}
# Usage: del_addrs src
@@ -95,16 +101,17 @@ transfer_routes () {
# List all routes and grep the ones with $src in.
# Stick 'ip route del' on the front to delete.
# Change $src to $dst and use 'ip route add' to add.
- ip route list | grep ${src} | sed -e "
-h
-s/^/ip route del /
-P
-g
-s/${src}/${dst}/
-s/^/ip route add /
-P
-d
-" | sh -e
+ ip route list | sed -ne "
+/dev ${src}\( \|$\)/ {
+ h
+ s/^/ip route del /
+ P
+ g
+ s/${src}/${dst}/
+ s/^/ip route add /
+ P
+ d
+}" | sh -e
}
# Usage: create_bridge bridge
@@ -233,7 +240,7 @@ case ${OP} in
;;
*)
- echo 'Unknown command: ' ${OP}
- echo 'Valid commands are: start, stop, status'
+ echo 'Unknown command: ' ${OP} >&2
+ echo 'Valid commands are: start, stop, status' >&2
exit 1
esac
diff --git a/tools/examples/network-nat b/tools/examples/network-nat
index ed32b70c58..0d6460f308 100644
--- a/tools/examples/network-nat
+++ b/tools/examples/network-nat
@@ -1,18 +1,18 @@
#!/bin/sh
#============================================================================
-# Default Xen network start/stop script.
+# Default Xen network start/stop script when using NAT.
# Xend calls a network script when it starts.
# The script name to use is defined in /etc/xen/xend-config.sxp
# in the network-script field.
#
# Usage:
#
-# network-route (start|stop|status) {VAR=VAL}*
+# network-nat (start|stop|status) {VAR=VAL}*
#
# Vars:
#
# netdev The gateway interface (default eth0).
-# antispoof Whether to use iptables to prevent spoofing (default yes).
+# antispoof Whether to use iptables to prevent spoofing (default no).
#
#============================================================================
@@ -30,9 +30,9 @@ for arg ; do export "${arg}" ; done
netdev=${netdev:-eth0}
# antispoofing not yet implemented
-antispoof=${antispoof:-yes}
+antispoof=${antispoof:-no}
-echo "network-nat $OP netdev=$netdev antispoof=$antispoof"
+echo "*network-nat $OP netdev=$netdev antispoof=$antispoof" >&2
op_start() {
@@ -71,7 +71,7 @@ case ${OP} in
;;
*)
- echo 'Unknown command: ' ${OP}
- echo 'Valid commands are: start, stop, status'
+ echo 'Unknown command: ' ${OP} >&2
+ echo 'Valid commands are: start, stop, status' >&2
exit 1
esac
diff --git a/tools/examples/vif-bridge b/tools/examples/vif-bridge
index 42bdf0e173..1c5bc161f9 100755
--- a/tools/examples/vif-bridge
+++ b/tools/examples/vif-bridge
@@ -34,7 +34,7 @@
# Exit if anything goes wrong
set -e
-echo "vif-bridge $*"
+echo "*vif-bridge $*" >&2
# Operation name.
OP=$1
@@ -63,8 +63,8 @@ case $OP in
iptcmd='-D'
;;
*)
- echo 'Invalid command: ' $OP
- echo 'Valid commands are: up, down'
+ echo 'Invalid command: ' $OP >&2
+ echo 'Valid commands are: up, down' >&2
exit 1
;;
esac
diff --git a/tools/examples/vif-nat b/tools/examples/vif-nat
index 4b6d348dfa..00977f8819 100644
--- a/tools/examples/vif-nat
+++ b/tools/examples/vif-nat
@@ -23,7 +23,7 @@
# Exit if anything goes wrong
set -e
-echo "vif-nat $*"
+echo "*vif-nat $*" >&2
# Operation name.
OP=$1
@@ -56,8 +56,8 @@ case $OP in
ipcmd='d'
;;
*)
- echo 'Invalid command: ' $OP
- echo 'Valid commands are: up, down'
+ echo 'Invalid command: ' $OP >&2
+ echo 'Valid commands are: up, down' >&2
exit 1
;;
esac
diff --git a/tools/examples/vif-route b/tools/examples/vif-route
index b15aea1e5c..e2ff8b0f19 100755
--- a/tools/examples/vif-route
+++ b/tools/examples/vif-route
@@ -24,7 +24,7 @@
# Exit if anything goes wrong
set -e
-echo "vif-route $*"
+echo "*vif-route $*" >&2
# Operation name.
OP=$1
@@ -57,8 +57,8 @@ case $OP in
ipcmd='d'
;;
*)
- echo 'Invalid command: ' $OP
- echo 'Valid commands are: up, down'
+ echo 'Invalid command: ' $OP >&2
+ echo 'Valid commands are: up, down' >&2
exit 1
;;
esac
diff --git a/tools/examples/xmexample.vmx b/tools/examples/xmexample.vmx
new file mode 100644
index 0000000000..1936d46a69
--- /dev/null
+++ b/tools/examples/xmexample.vmx
@@ -0,0 +1,148 @@
+# -*- mode: python; -*-
+#============================================================================
+# Python configuration setup for 'xm create'.
+# This script sets the parameters used when a domain is created using 'xm create'.
+# You use a separate script for each domain you want to create, or
+# you can set the parameters for the domain on the xm command line.
+#============================================================================
+
+#----------------------------------------------------------------------------
+# Kernel image file.
+kernel = "/usr/lib/xen/boot/vmxloader"
+
+# Optional ramdisk.
+#ramdisk = "/boot/initrd.gz"
+
+# The domain build function. Default is 'linux'.
+builder='vmx'
+#builder='linux'
+#builder='netbsd'
+
+# Initial memory allocation (in megabytes) for the new domain.
+memory = 128
+
+# A name for your domain. All domains must have different names.
+name = "ExampleVMXDomain"
+
+# Which CPU to start domain on?
+#cpu = -1 # leave to Xen to pick
+
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# Number of network interfaces. Default is 1.
+#nics=1
+nics=0
+
+# Optionally define mac and/or bridge for the network interfaces.
+# Random MACs are assigned if not given.
+#vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0' ]
+
+#----------------------------------------------------------------------------
+# Define the disk devices you want the domain to have access to, and
+# what you want them accessible as.
+# Each disk entry is of the form phy:UNAME,DEV,MODE
+# where UNAME is the device, DEV is the device name the domain will see,
+# and MODE is r for read-only, w for read-write.
+
+#disk = [ 'phy:hda1,hda1,r' ]
+
+#----------------------------------------------------------------------------
+# Set the kernel command line for the new domain.
+# You only need to define the IP parameters and hostname if the domain's
+# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
+# You can use 'extra' to set the runlevel and custom environment
+# variables used by custom rc scripts (e.g. VMID=, usr= ).
+
+# Set if you want dhcp to allocate the IP address.
+#dhcp="dhcp"
+# Set netmask.
+#netmask=
+# Set default gateway.
+#gateway=
+# Set the hostname.
+#hostname= "vm%d" % vmid
+
+# Set root device.
+#root = "/dev/ram0"
+root = "/dev/hda1 ro"
+
+# Root device for nfs.
+#root = "/dev/nfs"
+# The nfs server.
+#nfs_server = '169.254.1.0'
+# Root directory on the nfs server.
+#nfs_root = '/full/path/to/root/directory'
+
+# Sets runlevel 4.
+#extra = "acpi=off console=ttyS0 console=tty0 1"
+
+#----------------------------------------------------------------------------
+# Set according to whether you want the domain restarted when it exits.
+# The default is 'onreboot', which restarts the domain when it shuts down
+# with exit code reboot.
+# Other values are 'always', and 'never'.
+
+#restart = 'onreboot'
+
+#============================================================================
+
+# New stuff
+memmap = '/etc/xen/mem-map.sxp'
+device_model = '/usr/bin/device-model'
+device_config = '/etc/xen/xmexample.vmx'
+
+#============================================================================
+#
+# config item for qemu device model
+# Note: no space between =
+#-----------------------------------------------------------------------------
+# Qemu binary path
+qemubin='/usr/bin/qemu-dm'
+
+#-----------------------------------------------------------------------------
+# Disk image for
+hda='/var/images/min-el3-i386.img'
+#hdb=
+#hdc=
+#hdd=
+#cdrom=
+
+#-----------------------------------------------------------------------------
+# boot on floppy (a), hard disk (c) or CD-ROM (d)
+#boot=[a|c|d]
+#-----------------------------------------------------------------------------
+# write to temporary files instead of disk image files
+#snapshot=1
+
+#----------------------------------------------------------------------------
+# enable SDL library for graphics, default = 0
+sdl=0
+
+#----------------------------------------------------------------------------
+# enable VNC library for graphics, default = 1
+vnc=1
+
+#----------------------------------------------------------------------------
+# no graphics, use serial port
+#nographic=0
+
+
+#-----------------------------------------------------------------------------
+# enable audio support
+#enable-audio=1
+
+
+#-----------------------------------------------------------------------------
+# set the real time clock to local time [default=utc]
+#localtime='utc'
+
+
+#-----------------------------------------------------------------------------
+# start in full screen
+#full-screen=1
+
+#-----------------------------------------------------------------------------
+# set the mac address of the first interface
+#macaddr=
+
diff --git a/tools/examples/xmexample1 b/tools/examples/xmexample1
index 0fe27fbef6..a5cf683c5b 100644
--- a/tools/examples/xmexample1
+++ b/tools/examples/xmexample1
@@ -25,6 +25,9 @@ name = "ExampleDomain"
# Which CPU to start domain on?
#cpu = -1 # leave to Xen to pick
+# Number of Virtual CPUS to use, default is 1
+#vcpus = 1
+
#----------------------------------------------------------------------------
# Define network interfaces.
diff --git a/tools/examples/xmexample2 b/tools/examples/xmexample2
index 1d4084ddc1..ea349cfa67 100644
--- a/tools/examples/xmexample2
+++ b/tools/examples/xmexample2
@@ -55,6 +55,10 @@ name = "VM%d" % vmid
#cpu = -1 # leave to Xen to pick
cpu = vmid # set based on vmid (mod number of CPUs)
+# Number of Virtual CPUS to use, default is 1
+#vcpus = 1
+vcpus = 4 # make your domain a 4-way
+
#----------------------------------------------------------------------------
# Define network interfaces.
diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile
new file mode 100644
index 0000000000..2eeb70baba
--- /dev/null
+++ b/tools/firmware/Makefile
@@ -0,0 +1,34 @@
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+TARGET := vmxassist/vmxloader
+INSTALL_DIR := $(DESTDIR)/usr/lib/xen/boot
+
+SUBDIRS :=
+SUBDIRS += rombios
+SUBDIRS += vgabios
+SUBDIRS += vmxassist
+
+.PHONY: all install clean
+
+all:
+ @set -e; if ! `which bcc 1>/dev/null 2>/dev/null`; then \
+ echo "***********************************************************"; \
+ echo "WARNING: Install dev86 package to build firmware!"; \
+ echo " (http://www.cix.co.uk/~mayday)"; \
+ echo "***********************************************************"; \
+ else \
+ for subdir in $(SUBDIRS); do \
+ $(MAKE) -C $$subdir $@; \
+ done; \
+ fi
+
+
+install: all
+ [ -d $(INSTALL_DIR) ] || install -d -m0755 $(INSTALL_DIR)
+ [ ! -e $(TARGET) ] || install -m0644 $(TARGET) $(INSTALL_DIR)
+
+clean:
+ @set -e; for subdir in $(SUBDIRS); do \
+ $(MAKE) -C $$subdir $@; \
+ done
diff --git a/tools/firmware/README b/tools/firmware/README
new file mode 100644
index 0000000000..0339fa679f
--- /dev/null
+++ b/tools/firmware/README
@@ -0,0 +1,88 @@
+Domain FirmWare support
+-----------------------
+
+One of the key advantages of full virtualization hardware support (such
+as Intel's VT or AMD's Pacifica) is the ability to run unmodified guest
+operating systems. However, since most OSes rely on BIOS support during
+their early bringup, we need to provide a surrogate ROMBIOS and VGABIOS
+firmware layer.
+
+What's more, we need to support real-mode which is required by
+the firmware and bootstrap loaders. Real-mode support is especially
+challenging for Intel's VMX (VT) enabled CPUs where there is no real-mode
+support for VMX guest partitions. In this case you either have to do full
+emulation (full real-mode emulator; more complete but potentially slower)
+or partial emulation (use the VM8086 extensions, emulate only those
+instructions that are missing; faster, but potentially incomplete). The
+vmxassist code in this subdirectory uses the later approach because it
+is smaller and faster.
+
+The approach is relatively straight forward. Vmxloader contains three
+payloads (rombios, vgabios and vmxassist) and it is bootstrapped as any
+other 32-bit OS. Vmxloader copies its payloads to the addresses below
+and transfers control to vmxassist.
+
+ vgabios VGABIOS (standard and Cirrus).
+ Resides at C000:0000.
+
+ vmxassist VMXAssist VM86 realmode emulator for VMX.
+ Resides at D000:0000.
+
+ rombios ROMBIOS code. Derived from Bochs.
+ Resides at F000:0000
+
+Vmxassist first sets up it own world (GDT, IDT, TR, etc), enables
+VM8086 and then transfers control to F000:FFF0 and executes 16-bit
+code. Unsupported instructions cause a general protection failure at
+which point vmxassist kicks in and emulates the offending instruction.
+Whever the emulated code transitions to 32-bit protected mode, vmxassist
+will go away. Whenever 32-bit protected code transitions to real-mode,
+Xen/VMX will detect this and transfer control to vmxassist.
+
+Most of the vmxassist complexity comes from properly handling the
+real to protected mode and protected to real mode transitions and
+the proper emulation of the segment registers. Even though the Intel
+manual clearly states that you should immediately perform a jmp far
+after a mode transition, many operating systems execute additional
+instructions and some even refer to segment selectors and pop data
+from the stack. Vmxassist contains a number of work arounds for these
+OSes.
+
+
+Acknowledgements
+----------------
+
+The rombios was taken (largely unmodified) from Bochs, which was written
+by Kevin Lawton. The VGABIOS was written by Christophe Bothamy. Arun Sharma,
+Asit Mallick and Nitin Kamble (Intel) provided the E820 patches and lots
+of useful feedback.
+
+
+Contact
+-------
+
+Leendert van Doorn
+IBM T.J. Watson Research Center
+19 Skyline Drive
+Hawthorne, NY 10532
+leendert@watson.ibm.com
+
+
+Tested Operating Systems
+------------------------
+
+Since vmxassist uses partial emulation, it may always miss opcodes
+that are required by a particular OS. The table below lists the OSes
+I have tried. The Install column indicates a full CD/DVD install into
+a VMX partition. The Disk column indicates booting from prefabricated
+disk image.
+
+Operating System Install Disk
+------------------------------------------------------------
+RedHat Enterprise Linux (RHEL3_U5) Yes Yes
+Fedora Code (FC3) (-) Yes
+FreeBSD 5.3 (-) Yes
+MS-DOS 5.0 (-) Yes
+
+(-) not tried yet
+
diff --git a/tools/firmware/rombios/Makefile b/tools/firmware/rombios/Makefile
new file mode 100644
index 0000000000..0624e81e96
--- /dev/null
+++ b/tools/firmware/rombios/Makefile
@@ -0,0 +1,58 @@
+BIOS_BUILDS = BIOS-bochs-latest
+#BIOS_BUILDS += BIOS-bochs-2-processors
+#BIOS_BUILDS += BIOS-bochs-4-processors
+#BIOS_BUILDS += BIOS-bochs-8-processors
+
+all: bios
+
+bios: biossums ${BIOS_BUILDS}
+
+clean:
+ rm -f *.o *.a *.s rombios.bin _rombios*_.c
+ rm -f as86-sym.txt ld86-sym.txt
+ rm -f rombios*.txt rombios*.sym usage biossums
+ rm -f BIOS-bochs-*
+
+BIOS-bochs-latest: rombios.c biossums
+ gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c
+ bcc -o rombios.s -C-c -D__i86__ -0 -S _rombios_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' rombios.s > _rombios_.s
+ as86 _rombios_.s -b tmp.bin -u- -w- -g -0 -j -O -l rombios.txt
+ -perl makesym.perl < rombios.txt > rombios.sym
+ mv tmp.bin BIOS-bochs-latest
+ ./biossums BIOS-bochs-latest
+ rm -f _rombios_.s
+
+BIOS-bochs-2-processors: rombios.c biossums
+ gcc -DBX_SMP_PROCESSORS=2 -E -P $< > _rombios2_.c
+ bcc -o rombios2.s -C-c -D__i86__ -0 -S _rombios2_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' rombios2.s > _rombios2_.s
+ as86 _rombios2_.s -b tmp2.bin -u- -w- -g -0 -j -O -l rombios2.txt
+ -perl makesym.perl < rombios2.txt > rombios2.sym
+ mv tmp2.bin BIOS-bochs-2-processors
+ ./biossums BIOS-bochs-2-processors
+ rm -f _rombios2_.s
+
+BIOS-bochs-4-processors: rombios.c biossums
+ gcc -DBX_SMP_PROCESSORS=4 -E -P $< > _rombios4_.c
+ bcc -o rombios4.s -C-c -D__i86__ -0 -S _rombios4_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' rombios4.s > _rombios4_.s
+ as86 _rombios4_.s -b tmp4.bin -u- -w- -g -0 -j -O -l rombios4.txt
+ -perl makesym.perl < rombios4.txt > rombios4.sym
+ mv tmp4.bin BIOS-bochs-4-processors
+ ./biossums BIOS-bochs-4-processors
+ rm -f _rombios4_.s
+
+BIOS-bochs-8-processors: rombios.c biossums
+ gcc -DBX_SMP_PROCESSORS=8 -E -P $< > _rombios8_.c
+ bcc -o rombios8.s -C-c -D__i86__ -0 -S _rombios8_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' rombios8.s > _rombios8_.s
+ as86 _rombios8_.s -b tmp8.bin -u- -w- -g -0 -j -O -l rombios8.txt
+ -perl makesym.perl < rombios8.txt > rombios8.sym
+ mv tmp8.bin BIOS-bochs-8-processors
+ ./biossums BIOS-bochs-8-processors
+ rm -f _rombios8_.s
+
+biossums: biossums.c
+ gcc -o biossums biossums.c
+
diff --git a/tools/firmware/rombios/apmbios.S b/tools/firmware/rombios/apmbios.S
new file mode 100644
index 0000000000..d8ac160848
--- /dev/null
+++ b/tools/firmware/rombios/apmbios.S
@@ -0,0 +1,329 @@
+// APM BIOS support for the Bochs BIOS
+// Copyright (C) 2004 Fabrice Bellard
+//
+// Debugging extensions, 16-bit interface and extended power options
+// Copyright (C) 2005 Struan Bartlett
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+#if defined(APM_REAL)
+#define APMSYM(s) apmreal_ ## s
+#elif defined(APM_PROT16)
+#define APMSYM(s) apm16_ ## s
+#elif defined(APM_PROT32)
+#define APMSYM(s) apm32_ ## s
+#else
+#error unsupported APM mode
+#endif
+
+APMSYM(out_str):
+ push eax
+ push ebx
+ mov ebx, eax
+APMSYM(out_str1):
+ SEG CS
+ mov al, byte ptr [bx]
+ cmp al, #0
+ je APMSYM(out_str2)
+ outb dx, al
+ inc ebx
+ jmp APMSYM(out_str1)
+APMSYM(out_str2):
+ pop ebx
+ pop eax
+ ret
+
+APMSYM(07_poweroff_str):
+ .ascii "Shutdown"
+ db 0
+APMSYM(07_suspend_str):
+ .ascii "Suspend"
+ db 0
+APMSYM(07_standby_str):
+ .ascii "Standby"
+ db 0
+
+#if DEBUG_APM
+APMSYM(put_str):
+ push edx
+ mov dx, #INFO_PORT
+ call APMSYM(out_str)
+ pop edx
+ ret
+
+; print the hex number in eax
+APMSYM(put_num):
+ push eax
+ push ebx
+ push ecx
+ push edx
+ mov ecx, eax
+ mov bx, #8
+ mov dx, #INFO_PORT
+APMSYM(put_num1):
+ mov eax, ecx
+ shr eax, #28
+ add al, #0x30
+ cmp al, #0x39
+ jbe APMSYM(put_num2)
+ add al, #0x27
+APMSYM(put_num2):
+ outb dx, al
+ shl ecx, #4
+ dec bx
+ jne APMSYM(put_num1)
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ ret
+
+APMSYM(put_reg):
+ outb dx, al
+ shr eax, #8
+ outb dx, al
+ shr eax, #8
+ outb dx, al
+ shr eax, #8
+ outb dx, al
+
+ mov eax,ebx
+ call APMSYM(put_num)
+
+ mov al, #0x3b
+ outb dx,al
+ mov al, #0x20
+ outb dx,al
+ ret
+
+APMSYM(put_regs):
+ push eax
+ push edx
+ push ebx
+ mov dx, #INFO_PORT
+
+ mov ebx, eax
+ mov eax, #0x3d584145 // 'EAX='
+ call APMSYM(put_reg)
+ pop ebx
+ push ebx
+ mov eax, #0x3d584245 // 'EBX='
+ call APMSYM(put_reg)
+ mov ebx, ecx
+ mov eax, #0x3d584345 // 'ECX='
+ call APMSYM(put_reg)
+ mov ebx, edx
+ mov eax, #0x3d584445 // 'EDX='
+ call APMSYM(put_reg)
+ mov ebx, esi
+ mov eax, #0x3d495345 // 'ESI='
+ call APMSYM(put_reg)
+ mov ebx, edi
+ mov eax, #0x3d494445 // 'EDI='
+ call APMSYM(put_reg)
+
+ mov al, #0x0a
+ outb dx, al
+ pop ebx
+ pop edx
+ pop eax
+ ret
+#endif
+
+#if defined(APM_PROT32)
+_apm32_entry:
+#endif
+#if defined(APM_PROT16)
+_apm16_entry:
+#endif
+ pushf
+
+#if defined(APM_REAL)
+_apmreal_entry:
+#endif
+
+#if DEBUG_APM
+ call APMSYM(put_regs)
+#endif
+
+#if defined(APM_REAL)
+;-----------------
+; APM installation check
+APMSYM(00):
+ cmp al, #0x00
+ jne APMSYM(01)
+
+ mov ah, #1 // APM major version
+ mov al, #2 // APM minor version
+
+ mov bh, #0x50 // 'P'
+ mov bl, #0x4d // 'M'
+
+ // bit 0 : 16 bit interface supported
+ // bit 1 : 32 bit interface supported
+ mov cx, #0x3
+ jmp APMSYM(ok)
+
+;-----------------
+; APM real mode interface connect
+APMSYM(01):
+ cmp al, #0x01
+ jne APMSYM(02)
+ jmp APMSYM(ok)
+
+;-----------------
+; APM 16 bit protected mode interface connect
+APMSYM(02):
+ cmp al, #0x02
+ jne APMSYM(03)
+
+ mov bx, #_apm16_entry
+
+ mov ax, #0xf000 // 16 bit code segment base
+ mov si, #0xfff0 // 16 bit code segment size
+ mov cx, #0xf000 // data segment address
+ mov di, #0xfff0 // data segment length
+ jmp APMSYM(ok)
+
+;-----------------
+; APM 32 bit protected mode interface connect
+APMSYM(03):
+ cmp al, #0x03
+ jne APMSYM(04)
+ mov ax, #0xf000 // 32 bit code segment base
+ mov ebx, #_apm32_entry
+ mov cx, #0xf000 // 16 bit code segment base
+ // 32 bit code segment size (low 16 bits)
+ // 16 bit code segment size (high 16 bits)
+ mov esi, #0xfff0fff0
+ mov dx, #0xf000 // data segment address
+ mov di, #0xfff0 // data segment length
+ jmp APMSYM(ok)
+#endif
+
+;-----------------
+; APM interface disconnect
+APMSYM(04):
+ cmp al, #0x04
+ jne APMSYM(07)
+ jmp APMSYM(ok)
+
+;-----------------
+; APM Set Power State
+APMSYM(07):
+ cmp al, #0x07
+ jne APMSYM(0a)
+
+ cmp bx, #1
+ jne APMSYM(ok)
+
+ cmp cx, #3
+ je APMSYM(07_poweroff)
+
+ cmp cx, #2
+ je APMSYM(07_suspend)
+
+ cmp cx, #1
+ je APMSYM(07_standby)
+
+ jne APMSYM(ok)
+
+APMSYM(07_poweroff):
+ // send power off event to emulator
+ cli
+ mov dx, #0x8900
+ mov ax, #APMSYM(07_poweroff_str)
+ call APMSYM(out_str)
+
+APMSYM(07_1):
+ hlt
+ jmp APMSYM(07_1)
+
+APMSYM(07_suspend):
+ push edx
+ mov dx, #0x8900
+ mov ax, #APMSYM(07_suspend_str)
+ call APMSYM(out_str)
+ pop edx
+ jmp APMSYM(ok)
+
+APMSYM(07_standby):
+ push edx
+ mov dx, #0x8900
+ mov ax, #APMSYM(07_standby_str)
+ call APMSYM(out_str)
+ pop edx
+ jmp APMSYM(ok)
+
+;-----------------
+; Get Power Status
+APMSYM(0a):
+ cmp al, #0x0a
+ jne APMSYM(0b)
+ mov bh, #0x01 // on line
+ // mov bh, #0x02 // battery
+ mov bl, #0xff // unknown battery status
+ // mov bl, #0x03 // charging
+ mov ch, #0x80 // no system battery
+ // mov ch, #0x8 // charging
+ mov cl, #0xff // unknown remaining time
+ // mov cl, #50
+ mov dx, #0xffff // unknown remaining time
+ mov si, #0 // zero battery
+ // mov si, #1 // one battery
+ jmp APMSYM(ok)
+
+;-----------------
+; Get PM Event
+APMSYM(0b):
+ cmp al, #0x0b
+ jne APMSYM(0e)
+ mov ah, #0x80 // no event pending
+ jmp APMSYM(error)
+
+;-----------------
+; APM Driver Version
+APMSYM(0e):
+ cmp al, #0x0e
+ jne APMSYM(unimplemented)
+
+ mov ah, #1
+ mov al, #2
+
+ jmp APMSYM(ok)
+
+;-----------------
+APMSYM(ok):
+ popf
+ clc
+#if defined(APM_REAL)
+ jmp iret_modify_cf
+#else
+ retf
+#endif
+APMSYM(unimplemented):
+APMSYM(error):
+ popf
+ stc
+#if defined(APM_REAL)
+ jmp iret_modify_cf
+#else
+ retf
+#endif
+
+#undef APM_PROT32
+#undef APM_PROT16
+#undef APM_REAL
+#undef APMSYM
diff --git a/tools/firmware/rombios/biossums.c b/tools/firmware/rombios/biossums.c
new file mode 100644
index 0000000000..be12e49f35
--- /dev/null
+++ b/tools/firmware/rombios/biossums.c
@@ -0,0 +1,478 @@
+/* biossums.c --- written by Eike W. */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef unsigned char byte;
+
+void check( int value, char* message );
+
+#define LEN_BIOS_DATA 0x10000
+#define MAX_OFFSET (LEN_BIOS_DATA - 1)
+
+
+#define BIOS_OFFSET 0xFFFF
+
+long chksum_bios_get_offset( byte* data, long offset );
+byte chksum_bios_calc_value( byte* data, long offset );
+byte chksum_bios_get_value( byte* data, long offset );
+void chksum_bios_set_value( byte* data, long offset, byte value );
+
+
+#define _32__LEN 9
+#define _32__CHKSUM 10
+
+#define _32__MINHDR 16
+
+long chksum__32__get_offset( byte* data, long offset );
+byte chksum__32__calc_value( byte* data, long offset );
+byte chksum__32__get_value( byte* data, long offset );
+void chksum__32__set_value( byte* data, long offset, byte value );
+
+
+#define _MP__LEN 8
+#define _MP__CHKSUM 10
+
+#define _MP__MINHDR 16
+
+long chksum__mp__get_offset( byte* data, long offset );
+byte chksum__mp__calc_value( byte* data, long offset );
+byte chksum__mp__get_value( byte* data, long offset );
+void chksum__mp__set_value( byte* data, long offset, byte value );
+
+
+#define PCMP_BASELEN 4
+#define PCMP_CHKSUM 7
+#define PCMP_EXT_LEN 40
+#define PCMP_EXT_CHKSUM 42
+
+#define PCMP_MINHDR 42
+
+long chksum_pcmp_get_offset( byte* data, long offset );
+byte chksum_pcmp_calc_value( byte* data, long offset );
+byte chksum_pcmp_get_value( byte* data, long offset );
+void chksum_pcmp_set_value( byte* data, long offset, byte value );
+
+
+#define _PIR_LEN 6
+#define _PIR_CHKSUM 31
+
+#define _PIR_MINHDR 32
+
+long chksum__pir_get_offset( byte *data, long offset );
+byte chksum__pir_calc_value( byte* data, long offset );
+byte chksum__pir_get_value( byte* data, long offset );
+void chksum__pir_set_value( byte* data, long offset, byte value );
+
+
+byte bios_data[LEN_BIOS_DATA];
+
+
+int main( int argc, char* argv[] ) {
+
+ FILE* stream;
+ long offset, tmp_offset;
+ byte cur_val = 0, new_val = 0;
+ int hits;
+
+
+ if( argc != 2 ) {
+ printf( "Error. Need a file-name as an argument.\n" );
+ exit( EXIT_FAILURE );
+ }
+
+ if(( stream = fopen( argv[1], "rb" )) == NULL ) {
+ printf( "Error opening %s for reading.\n", argv[1] );
+ exit( EXIT_FAILURE );
+ }
+ if( fread( bios_data, 1, LEN_BIOS_DATA, stream ) < LEN_BIOS_DATA ) {
+ printf( "Error reading 64KBytes from %s.\n", argv[1] );
+ fclose( stream );
+ exit( EXIT_FAILURE );
+ }
+ fclose( stream );
+
+ hits = 0;
+ offset = 0L;
+ while( (tmp_offset = chksum__32__get_offset( bios_data, offset )) != -1L ) {
+ offset = tmp_offset;
+ cur_val = chksum__32__get_value( bios_data, offset );
+ new_val = chksum__32__calc_value( bios_data, offset );
+ printf( "\n\nPCI-Bios header at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ hits++;
+ }
+ if( hits == 1 && cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum__32__set_value( bios_data, offset, new_val );
+ }
+ if( hits >= 2 ) {
+ printf( "Multiple PCI headers! No checksum set." );
+ }
+ if( hits ) {
+ printf( "\n" );
+ }
+
+
+ hits = 0;
+ offset = 0L;
+ while( (tmp_offset = chksum__mp__get_offset( bios_data, offset )) != -1L ) {
+ offset = tmp_offset;
+ cur_val = chksum__mp__get_value( bios_data, offset );
+ new_val = chksum__mp__calc_value( bios_data, offset );
+ printf( "\n\nMP header at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ hits++;
+ }
+ if( hits == 1 && cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum__mp__set_value( bios_data, offset, new_val );
+ }
+ if( hits >= 2 ) {
+ printf( "Warning! Multiple MP headers. No checksum set." );
+ }
+ if( hits ) {
+ printf( "\n" );
+ }
+
+
+ hits = 0;
+ offset = 0L;
+ while( (tmp_offset = chksum_pcmp_get_offset( bios_data, offset )) != -1L ) {
+ offset = tmp_offset;
+ cur_val = chksum_pcmp_get_value( bios_data, offset );
+ new_val = chksum_pcmp_calc_value( bios_data, offset );
+ printf( "\n\nPCMP header at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ hits++;
+ }
+ if( hits == 1 && cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum_pcmp_set_value( bios_data, offset, new_val );
+ }
+ if( hits >= 2 ) {
+ printf( "Warning! Multiple PCMP headers. No checksum set." );
+ }
+ if( hits ) {
+ printf( "\n" );
+ }
+
+
+ hits = 0;
+ offset = 0L;
+ while( (tmp_offset = chksum__pir_get_offset( bios_data, offset )) != -1L ) {
+ offset = tmp_offset;
+ cur_val = chksum__pir_get_value( bios_data, offset );
+ new_val = chksum__pir_calc_value( bios_data, offset );
+ printf( "\n\n$PIR header at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X\n ", new_val );
+ hits++;
+ }
+ if( hits == 1 && cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum__pir_set_value( bios_data, offset, new_val );
+ }
+ if( hits >= 2 ) {
+ printf( "Warning! Multiple $PIR headers. No checksum set." );
+ }
+ if( hits ) {
+ printf( "\n" );
+ }
+
+
+ offset = 0L;
+ offset = chksum_bios_get_offset( bios_data, offset );
+ cur_val = chksum_bios_get_value( bios_data, offset );
+ new_val = chksum_bios_calc_value( bios_data, offset );
+ printf( "\n\nBios checksum at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ if( cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum_bios_set_value( bios_data, offset, new_val );
+ }
+ printf( "\n" );
+
+
+ if(( stream = fopen( argv[1], "wb" )) == NULL ) {
+ printf( "Error opening %s for writing.\n", argv[1] );
+ exit( EXIT_FAILURE );
+ }
+ if( fwrite( bios_data, 1, LEN_BIOS_DATA, stream ) < LEN_BIOS_DATA ) {
+ printf( "Error writing 64KBytes to %s.\n", argv[1] );
+ fclose( stream );
+ exit( EXIT_FAILURE );
+ }
+ fclose( stream );
+
+ return( EXIT_SUCCESS );
+}
+
+
+void check( int okay, char* message ) {
+
+ if( !okay ) {
+ printf( "\n\nError. %s.\n", message );
+ exit( EXIT_FAILURE );
+ }
+}
+
+
+long chksum_bios_get_offset( byte* data, long offset ) {
+
+ return( BIOS_OFFSET );
+}
+
+
+byte chksum_bios_calc_value( byte* data, long offset ) {
+
+ int i;
+ byte sum;
+
+ sum = 0;
+ for( i = 0; i < MAX_OFFSET; i++ ) {
+ sum = sum + *( data + i );
+ }
+ sum = -sum; /* iso ensures -s + s == 0 on unsigned types */
+ return( sum );
+}
+
+
+byte chksum_bios_get_value( byte* data, long offset ) {
+
+ return( *( data + BIOS_OFFSET ) );
+}
+
+
+void chksum_bios_set_value( byte* data, long offset, byte value ) {
+
+ *( data + BIOS_OFFSET ) = value;
+}
+
+
+byte chksum__32__calc_value( byte* data, long offset ) {
+
+ int i;
+ int len;
+ byte sum;
+
+ check( offset + _32__MINHDR <= MAX_OFFSET, "_32_ header out of bounds" );
+ len = *( data + offset + _32__LEN ) << 4;
+ check( offset + len <= MAX_OFFSET, "_32_ header-length out of bounds" );
+ sum = 0;
+ for( i = 0; i < len; i++ ) {
+ if( i != _32__CHKSUM ) {
+ sum = sum + *( data + offset + i );
+ }
+ }
+ sum = -sum;
+ return( sum );
+}
+
+
+long chksum__32__get_offset( byte* data, long offset ) {
+
+ long result = -1L;
+
+ offset = offset + 0x0F;
+ offset = offset & ~( 0x0F );
+ while( offset + 16 < MAX_OFFSET ) {
+ offset = offset + 16;
+ if( *( data + offset + 0 ) == '_' && \
+ *( data + offset + 1 ) == '3' && \
+ *( data + offset + 2 ) == '2' && \
+ *( data + offset + 3 ) == '_' ) {
+ result = offset;
+ break;
+ }
+ }
+ return( result );
+}
+
+
+byte chksum__32__get_value( byte* data, long offset ) {
+
+ check( offset + _32__CHKSUM <= MAX_OFFSET, "PCI-Bios checksum out of bounds" );
+ return( *( data + offset + _32__CHKSUM ) );
+}
+
+
+void chksum__32__set_value( byte* data, long offset, byte value ) {
+
+ check( offset + _32__CHKSUM <= MAX_OFFSET, "PCI-Bios checksum out of bounds" );
+ *( data + offset + _32__CHKSUM ) = value;
+}
+
+
+byte chksum__mp__calc_value( byte* data, long offset ) {
+
+ int i;
+ int len;
+ byte sum;
+
+ check( offset + _MP__MINHDR <= MAX_OFFSET, "_MP_ header out of bounds" );
+ len = *( data + offset + _MP__LEN ) << 4;
+ check( offset + len <= MAX_OFFSET, "_MP_ header-length out of bounds" );
+ sum = 0;
+ for( i = 0; i < len; i++ ) {
+ if( i != _MP__CHKSUM ) {
+ sum = sum + *( data + offset + i );
+ }
+ }
+ sum = -sum;
+ return( sum );
+}
+
+
+long chksum__mp__get_offset( byte* data, long offset ) {
+
+ long result = -1L;
+
+ offset = offset + 0x0F;
+ offset = offset & ~( 0x0F );
+ while( offset + 16 < MAX_OFFSET ) {
+ offset = offset + 16;
+ if( *( data + offset + 0 ) == '_' && \
+ *( data + offset + 1 ) == 'M' && \
+ *( data + offset + 2 ) == 'P' && \
+ *( data + offset + 3 ) == '_' ) {
+ result = offset;
+ break;
+ }
+ }
+ return( result );
+}
+
+
+byte chksum__mp__get_value( byte* data, long offset ) {
+
+ check( offset + _MP__CHKSUM <= MAX_OFFSET, "MP checksum out of bounds" );
+ return( *( data + offset + _MP__CHKSUM ) );
+}
+
+
+void chksum__mp__set_value( byte* data, long offset, byte value ) {
+
+ check( offset + _MP__CHKSUM <= MAX_OFFSET, "MP checksum out of bounds" );
+ *( data + offset + _MP__CHKSUM ) = value;
+}
+
+
+byte chksum_pcmp_calc_value( byte* data, long offset ) {
+
+ int i;
+ int len;
+ byte sum;
+
+ check( offset + PCMP_MINHDR <= MAX_OFFSET, "PCMP header out of bounds" );
+ len = *( data + offset + PCMP_BASELEN ) + \
+ ( *( data + offset + PCMP_BASELEN + 1 ) << 8 );
+ check( offset + len <= MAX_OFFSET, "PCMP header-length out of bounds" );
+ if( *( data + offset + PCMP_EXT_LEN ) | \
+ *( data + offset + PCMP_EXT_LEN + 1 ) | \
+ *( data + offset + PCMP_EXT_CHKSUM ) ) {
+ check( 0, "PCMP header indicates extended tables (unsupported)" );
+ }
+ sum = 0;
+ for( i = 0; i < len; i++ ) {
+ if( i != PCMP_CHKSUM ) {
+ sum = sum + *( data + offset + i );
+ }
+ }
+ sum = -sum;
+ return( sum );
+}
+
+
+long chksum_pcmp_get_offset( byte* data, long offset ) {
+
+ long result = -1L;
+
+ offset = offset + 0x0F;
+ offset = offset & ~( 0x0F );
+ while( offset + 16 < MAX_OFFSET ) {
+ offset = offset + 16;
+ if( *( data + offset + 0 ) == 'P' && \
+ *( data + offset + 1 ) == 'C' && \
+ *( data + offset + 2 ) == 'M' && \
+ *( data + offset + 3 ) == 'P' ) {
+ result = offset;
+ break;
+ }
+ }
+ return( result );
+}
+
+
+byte chksum_pcmp_get_value( byte* data, long offset ) {
+
+ check( offset + PCMP_CHKSUM <= MAX_OFFSET, "PCMP checksum out of bounds" );
+ return( *( data + offset + PCMP_CHKSUM ) );
+}
+
+
+void chksum_pcmp_set_value( byte* data, long offset, byte value ) {
+
+ check( offset + PCMP_CHKSUM <= MAX_OFFSET, "PCMP checksum out of bounds" );
+ *( data + offset + PCMP_CHKSUM ) = value;
+}
+
+
+byte chksum__pir_calc_value( byte* data, long offset ) {
+
+ int i;
+ int len;
+ byte sum;
+
+ check( offset + _PIR_MINHDR <= MAX_OFFSET, "$PIR header out of bounds" );
+ len = *( data + offset + _PIR_LEN ) + \
+ ( *( data + offset + _PIR_LEN + 1 ) << 8 );
+ check( offset + len <= MAX_OFFSET, "$PIR header-length out of bounds" );
+ sum = 0;
+ for( i = 0; i < len; i++ ) {
+ if( i != _PIR_CHKSUM ) {
+ sum = sum + *( data + offset + i );
+ }
+ }
+ sum = -sum;
+ return( sum );
+}
+
+
+long chksum__pir_get_offset( byte* data, long offset ) {
+
+ long result = -1L;
+
+ offset = offset + 0x0F;
+ offset = offset & ~( 0x0F );
+ while( offset + 16 < MAX_OFFSET ) {
+ offset = offset + 16;
+ if( *( data + offset + 0 ) == '$' && \
+ *( data + offset + 1 ) == 'P' && \
+ *( data + offset + 2 ) == 'I' && \
+ *( data + offset + 3 ) == 'R' ) {
+ result = offset;
+ break;
+ }
+ }
+ return( result );
+}
+
+
+byte chksum__pir_get_value( byte* data, long offset ) {
+
+ check( offset + _PIR_CHKSUM <= MAX_OFFSET, "$PIR checksum out of bounds" );
+ return( *( data + offset + _PIR_CHKSUM ) );
+}
+
+
+void chksum__pir_set_value( byte* data, long offset, byte value ) {
+
+ check( offset + _PIR_CHKSUM <= MAX_OFFSET, "$PIR checksum out of bounds" );
+ *( data + offset + _PIR_CHKSUM ) = value;
+}
+
diff --git a/tools/firmware/rombios/makesym.perl b/tools/firmware/rombios/makesym.perl
new file mode 100755
index 0000000000..df604e2ae4
--- /dev/null
+++ b/tools/firmware/rombios/makesym.perl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+#
+# $Id: makesym.perl,v 1.1 2002/11/24 22:45:40 bdenney Exp $
+#
+# Read output file from as86 (e.g. rombios.txt) and write out a symbol
+# table suitable for the Bochs debugger.
+#
+
+$WHERE_BEFORE_SYM_TABLE = 0;
+$WHERE_IN_SYM_TABLE = 1;
+$WHERE_AFTER_SYM_TABLE = 2;
+
+$where = $WHERE_BEFORE_SYM_TABLE;
+while (<STDIN>) {
+ chop;
+ if ($where == WHERE_BEFORE_SYM_TABLE && /^Symbols:/) {
+ $where = $WHERE_IN_SYM_TABLE;
+ } elsif ($where == $WHERE_IN_SYM_TABLE && /^$/) {
+ $where = $WHERE_AFTER_SYM_TABLE;
+ }
+ if ($where == $WHERE_IN_SYM_TABLE) {
+ @F = split (/\s+/);
+ ($name[0], $junk, $addr[0], $junk, $name[1], $junk, $addr[1]) = @F;
+ foreach $col (0,1) {
+ next if length $addr[$col] < 1;
+ $addr[$col] =~ tr/A-Z/a-z/;
+ $addr[$col] = "000f" . $addr[$col];
+ print "$addr[$col] $name[$col]\n";
+ }
+ }
+}
diff --git a/tools/firmware/rombios/rombios.c b/tools/firmware/rombios/rombios.c
new file mode 100644
index 0000000000..c3605ac71e
--- /dev/null
+++ b/tools/firmware/rombios/rombios.c
@@ -0,0 +1,10825 @@
+/////////////////////////////////////////////////////////////////////////
+// $Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $
+/////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 2002 MandrakeSoft S.A.
+//
+// MandrakeSoft S.A.
+// 43, rue d'Aboukir
+// 75002 Paris - France
+// http://www.linux-mandrake.com/
+// http://www.mandrakesoft.com/
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+// ROM BIOS for use with Bochs/Plex x86 emulation environment
+
+#define VMXASSIST
+
+// ROM BIOS compatability entry points:
+// ===================================
+// $e05b ; POST Entry Point
+// $e2c3 ; NMI Handler Entry Point
+// $e3fe ; INT 13h Fixed Disk Services Entry Point
+// $e401 ; Fixed Disk Parameter Table
+// $e6f2 ; INT 19h Boot Load Service Entry Point
+// $e6f5 ; Configuration Data Table
+// $e729 ; Baud Rate Generator Table
+// $e739 ; INT 14h Serial Communications Service Entry Point
+// $e82e ; INT 16h Keyboard Service Entry Point
+// $e987 ; INT 09h Keyboard Service Entry Point
+// $ec59 ; INT 13h Diskette Service Entry Point
+// $ef57 ; INT 0Eh Diskette Hardware ISR Entry Point
+// $efc7 ; Diskette Controller Parameter Table
+// $efd2 ; INT 17h Printer Service Entry Point
+// $f045 ; INT 10 Functions 0-Fh Entry Point
+// $f065 ; INT 10h Video Support Service Entry Point
+// $f0a4 ; MDA/CGA Video Parameter Table (INT 1Dh)
+// $f841 ; INT 12h Memory Size Service Entry Point
+// $f84d ; INT 11h Equipment List Service Entry Point
+// $f859 ; INT 15h System Services Entry Point
+// $fa6e ; Character Font for 320x200 & 640x200 Graphics (lower 128 characters)
+// $fe6e ; INT 1Ah Time-of-day Service Entry Point
+// $fea5 ; INT 08h System Timer ISR Entry Point
+// $fef3 ; Initial Interrupt Vector Offsets Loaded by POST
+// $ff53 ; IRET Instruction for Dummy Interrupt Handler
+// $ff54 ; INT 05h Print Screen Service Entry Point
+// $fff0 ; Power-up Entry Point
+// $fff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
+// $fffe ; System Model ID
+
+// NOTES for ATA/ATAPI driver (cbbochs@free.fr)
+// Features
+// - supports up to 4 ATA interfaces
+// - device/geometry detection
+// - 16bits/32bits device access
+// - pchs/lba access
+// - datain/dataout/packet command support
+//
+// NOTES for El-Torito Boot (cbbochs@free.fr)
+// - CD-ROM booting is only available if ATA/ATAPI Driver is available
+// - Current code is only able to boot mono-session cds
+// - Current code can not boot and emulate a hard-disk
+// the bios will panic otherwise
+// - Current code also use memory in EBDA segement.
+// - I used cmos byte 0x3D to store extended information on boot-device
+// - Code has to be modified modified to handle multiple cdrom drives
+// - Here are the cdrom boot failure codes:
+// 1 : no atapi device found
+// 2 : no atapi cdrom found
+// 3 : can not read cd - BRVD
+// 4 : cd is not eltorito (BRVD)
+// 5 : cd is not eltorito (ISO TAG)
+// 6 : cd is not eltorito (ELTORITO TAG)
+// 7 : can not read cd - boot catalog
+// 8 : boot catalog : bad header
+// 9 : boot catalog : bad platform
+// 10 : boot catalog : bad signature
+// 11 : boot catalog : bootable flag not set
+// 12 : can not read cd - boot image
+//
+// ATA driver
+// - EBDA segment.
+// I used memory starting at 0x121 in the segment
+// - the translation policy is defined in cmos regs 0x39 & 0x3a
+//
+// TODO :
+//
+// int74
+// - needs to be reworked. Uses direct [bp] offsets. (?)
+//
+// int13:
+// - f04 (verify sectors) isn't complete (?)
+// - f02/03/04 should set current cyl,etc in BDA (?)
+// - rewrite int13_relocated & clean up int13 entry code
+//
+// NOTES:
+// - NMI access (bit7 of addr written to 70h)
+//
+// ATA driver
+// - should handle the "don't detect" bit (cmos regs 0x3b & 0x3c)
+// - could send the multiple-sector read/write commands
+//
+// El-Torito
+// - Emulate a Hard-disk (currently only diskette can be emulated) see "FIXME ElTorito Harddisk"
+// - Implement remaining int13_cdemu functions (as defined by El-Torito specs)
+// - cdrom drive is hardcoded to ide 0 device 1 in several places. see "FIXME ElTorito Hardcoded"
+// - int13 Fix DL when emulating a cd. In that case DL is decremented before calling real int13.
+// This is ok. But DL should be reincremented afterwards.
+// - Fix all "FIXME ElTorito Various"
+// - should be able to boot any cdrom instead of the first one
+//
+// BCC Bug: find a generic way to handle the bug of #asm after an "if" (fixed in 0.16.7)
+
+#define DEBUG_ROMBIOS 0
+
+#define DEBUG_ATA 0
+#define DEBUG_INT13_HD 0
+#define DEBUG_INT13_CD 0
+#define DEBUG_INT13_ET 0
+#define DEBUG_INT13_FL 0
+#define DEBUG_INT15 0
+#define DEBUG_INT16 0
+#define DEBUG_INT1A 0
+#define DEBUG_INT74 0
+#define DEBUG_APM 0
+
+#define BX_CPU 3
+#define BX_USE_PS2_MOUSE 1
+#define BX_CALL_INT15_4F 1
+#define BX_USE_EBDA 1
+#define BX_SUPPORT_FLOPPY 1
+#define BX_FLOPPY_ON_CNT 37 /* 2 seconds */
+#define BX_PCIBIOS 1
+#define BX_APM 1
+
+#define BX_USE_ATADRV 1
+#define BX_ELTORITO_BOOT 1
+
+#define BX_MAX_ATA_INTERFACES 4
+#define BX_MAX_ATA_DEVICES (BX_MAX_ATA_INTERFACES*2)
+
+#define BX_VIRTUAL_PORTS 1 /* normal output to Bochs ports */
+#define BX_DEBUG_SERIAL 0 /* output to COM1 */
+
+ /* model byte 0xFC = AT */
+#define SYS_MODEL_ID 0xFC
+#define SYS_SUBMODEL_ID 0x00
+#define BIOS_REVISION 1
+#define BIOS_CONFIG_TABLE 0xe6f5
+
+#ifndef BIOS_BUILD_DATE
+# define BIOS_BUILD_DATE "06/23/99"
+#endif
+
+ // 1K of base memory used for Extended Bios Data Area (EBDA)
+ // EBDA is used for PS/2 mouse support, and IDE BIOS, etc.
+#define EBDA_SEG 0x9FC0
+#define EBDA_SIZE 1 // In KiB
+#define BASE_MEM_IN_K (640 - EBDA_SIZE)
+
+ // Define the application NAME
+#ifdef VMXASSIST
+# define BX_APPNAME "VMXAssist"
+#elif PLEX86
+# define BX_APPNAME "Plex86"
+#else
+# define BX_APPNAME "Bochs"
+#endif
+
+ // Sanity Checks
+#if BX_USE_ATADRV && BX_CPU<3
+# error The ATA/ATAPI Driver can only to be used with a 386+ cpu
+#endif
+#if BX_USE_ATADRV && !BX_USE_EBDA
+# error ATA/ATAPI Driver can only be used if EBDA is available
+#endif
+#if BX_ELTORITO_BOOT && !BX_USE_ATADRV
+# error El-Torito Boot can only be use if ATA/ATAPI Driver is available
+#endif
+#if BX_PCIBIOS && BX_CPU<3
+# error PCI BIOS can only be used with 386+ cpu
+#endif
+#if BX_APM && BX_CPU<3
+# error APM BIOS can only be used with 386+ cpu
+#endif
+
+#ifndef BX_SMP_PROCESSORS
+#define BX_SMP_PROCESSORS 1
+# warning BX_SMP_PROCESSORS not defined, defaulting to 1
+#endif
+
+#define PANIC_PORT 0x400
+#define PANIC_PORT2 0x401
+#define INFO_PORT 0x402
+#define DEBUG_PORT 0x403
+
+// #20 is dec 20
+// #$20 is hex 20 = 32
+// #0x20 is hex 20 = 32
+// LDA #$20
+// JSR $E820
+// LDD .i,S
+// JSR $C682
+// mov al, #$20
+
+// all hex literals should be prefixed with '0x'
+// grep "#[0-9a-fA-F][0-9a-fA-F]" rombios.c
+// no mov SEG-REG, #value, must mov register into seg-reg
+// grep -i "mov[ ]*.s" rombios.c
+
+// This is for compiling with gcc2 and gcc3
+#define ASM_START #asm
+#define ASM_END #endasm
+
+ASM_START
+.rom
+
+.org 0x0000
+
+#if BX_CPU >= 3
+use16 386
+#else
+use16 286
+#endif
+
+MACRO HALT
+ ;; the HALT macro is called with the line number of the HALT call.
+ ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
+ ;; to print a BX_PANIC message. This will normally halt the simulation
+ ;; with a message such as "BIOS panic at rombios.c, line 4091".
+ ;; However, users can choose to make panics non-fatal and continue.
+#if BX_VIRTUAL_PORTS
+ mov dx,#PANIC_PORT
+ mov ax,#?1
+ out dx,ax
+#else
+ mov dx,#0x80
+ mov ax,#?1
+ out dx,al
+#endif
+MEND
+
+MACRO JMP_AP
+ db 0xea
+ dw ?2
+ dw ?1
+MEND
+
+MACRO SET_INT_VECTOR
+ mov ax, ?3
+ mov ?1*4, ax
+ mov ax, ?2
+ mov ?1*4+2, ax
+MEND
+
+ASM_END
+
+typedef unsigned char Bit8u;
+typedef unsigned short Bit16u;
+typedef unsigned short bx_bool;
+typedef unsigned long Bit32u;
+
+#if BX_USE_ATADRV
+
+ void memsetb(seg,offset,value,count);
+ void memcpyb(dseg,doffset,sseg,soffset,count);
+ void memcpyd(dseg,doffset,sseg,soffset,count);
+
+ // memset of count bytes
+ void
+ memsetb(seg,offset,value,count)
+ Bit16u seg;
+ Bit16u offset;
+ Bit16u value;
+ Bit16u count;
+ {
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+
+ mov cx, 10[bp] ; count
+ cmp cx, #0x00
+ je memsetb_end
+ mov ax, 4[bp] ; segment
+ mov es, ax
+ mov ax, 6[bp] ; offset
+ mov di, ax
+ mov al, 8[bp] ; value
+ cld
+ rep
+ stosb
+
+ memsetb_end:
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ ASM_END
+ }
+
+ // memcpy of count bytes
+ void
+ memcpyb(dseg,doffset,sseg,soffset,count)
+ Bit16u dseg;
+ Bit16u doffset;
+ Bit16u sseg;
+ Bit16u soffset;
+ Bit16u count;
+ {
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+ push ds
+ push si
+
+ mov cx, 12[bp] ; count
+ cmp cx, #0x0000
+ je memcpyb_end
+ mov ax, 4[bp] ; dsegment
+ mov es, ax
+ mov ax, 6[bp] ; doffset
+ mov di, ax
+ mov ax, 8[bp] ; ssegment
+ mov ds, ax
+ mov ax, 10[bp] ; soffset
+ mov si, ax
+ cld
+ rep
+ movsb
+
+ memcpyb_end:
+ pop si
+ pop ds
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ ASM_END
+ }
+
+#if 0
+ // memcpy of count dword
+ void
+ memcpyd(dseg,doffset,sseg,soffset,count)
+ Bit16u dseg;
+ Bit16u doffset;
+ Bit16u sseg;
+ Bit16u soffset;
+ Bit16u count;
+ {
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+ push ds
+ push si
+
+ mov cx, 12[bp] ; count
+ cmp cx, #0x0000
+ je memcpyd_end
+ mov ax, 4[bp] ; dsegment
+ mov es, ax
+ mov ax, 6[bp] ; doffset
+ mov di, ax
+ mov ax, 8[bp] ; ssegment
+ mov ds, ax
+ mov ax, 10[bp] ; soffset
+ mov si, ax
+ cld
+ rep
+ movsd
+
+ memcpyd_end:
+ pop si
+ pop ds
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ ASM_END
+ }
+#endif
+#endif //BX_USE_ATADRV
+
+ // read_dword and write_dword functions
+ static Bit32u read_dword();
+ static void write_dword();
+
+ Bit32u
+ read_dword(seg, offset)
+ Bit16u seg;
+ Bit16u offset;
+ {
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, [bx]
+ inc bx
+ inc bx
+ mov dx, [bx]
+ ;; ax = return value (word)
+ ;; dx = return value (word)
+ pop ds
+ pop bx
+
+ pop bp
+ ASM_END
+ }
+
+ void
+ write_dword(seg, offset, data)
+ Bit16u seg;
+ Bit16u offset;
+ Bit32u data;
+ {
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, 8[bp] ; data word
+ mov [bx], ax ; write data word
+ inc bx
+ inc bx
+ mov ax, 10[bp] ; data word
+ mov [bx], ax ; write data word
+ pop ds
+ pop bx
+ pop ax
+
+ pop bp
+ ASM_END
+ }
+
+ // Bit32u (unsigned long) and long helper functions
+ ASM_START
+
+ ;; and function
+ landl:
+ landul:
+ SEG SS
+ and ax,[di]
+ SEG SS
+ and bx,2[di]
+ ret
+
+ ;; add function
+ laddl:
+ laddul:
+ SEG SS
+ add ax,[di]
+ SEG SS
+ adc bx,2[di]
+ ret
+
+ ;; cmp function
+ lcmpl:
+ lcmpul:
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ shr ebx, #16
+ SEG SS
+ cmp eax, dword ptr [di]
+ ret
+
+ ;; sub function
+ lsubl:
+ lsubul:
+ SEG SS
+ sub ax,[di]
+ SEG SS
+ sbb bx,2[di]
+ ret
+
+ ;; mul function
+ lmull:
+ lmulul:
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ SEG SS
+ mul eax, dword ptr [di]
+ mov ebx, eax
+ shr ebx, #16
+ ret
+
+ ;; dec function
+ ldecl:
+ ldecul:
+ SEG SS
+ dec dword ptr [bx]
+ ret
+
+ ;; or function
+ lorl:
+ lorul:
+ SEG SS
+ or ax,[di]
+ SEG SS
+ or bx,2[di]
+ ret
+
+ ;; inc function
+ lincl:
+ lincul:
+ SEG SS
+ inc dword ptr [bx]
+ ret
+
+ ;; tst function
+ ltstl:
+ ltstul:
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ shr ebx, #16
+ test eax, eax
+ ret
+
+ ;; sr function
+ lsrul:
+ mov cx,di
+ jcxz lsr_exit
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ lsr_loop:
+ shr eax, #1
+ loop lsr_loop
+ mov ebx, eax
+ shr ebx, #16
+ lsr_exit:
+ ret
+
+ ;; sl function
+ lsll:
+ lslul:
+ mov cx,di
+ jcxz lsl_exit
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ lsl_loop:
+ shl eax, #1
+ loop lsl_loop
+ mov ebx, eax
+ shr ebx, #16
+ lsl_exit:
+ ret
+
+ idiv_:
+ cwd
+ idiv bx
+ ret
+
+ idiv_u:
+ xor dx,dx
+ div bx
+ ret
+
+ ldivul:
+ and eax, #0x0000FFFF
+ shl ebx, #16
+ add eax, ebx
+ xor edx, edx
+ SEG SS
+ mov bx, 2[di]
+ shl ebx, #16
+ SEG SS
+ mov bx, [di]
+ div ebx
+ mov ebx, eax
+ shr ebx, #16
+ ret
+
+ ASM_END
+
+// for access to RAM area which is used by interrupt vectors
+// and BIOS Data Area
+
+typedef struct {
+ unsigned char filler1[0x400];
+ unsigned char filler2[0x6c];
+ Bit16u ticks_low;
+ Bit16u ticks_high;
+ Bit8u midnight_flag;
+ } bios_data_t;
+
+#define BiosData ((bios_data_t *) 0)
+
+#if BX_USE_ATADRV
+ typedef struct {
+ Bit16u heads; // # heads
+ Bit16u cylinders; // # cylinders
+ Bit16u spt; // # sectors / track
+ } chs_t;
+
+ // DPTE definition
+ typedef struct {
+ Bit16u iobase1;
+ Bit16u iobase2;
+ Bit8u prefix;
+ Bit8u unused;
+ Bit8u irq;
+ Bit8u blkcount;
+ Bit8u dma;
+ Bit8u pio;
+ Bit16u options;
+ Bit16u reserved;
+ Bit8u revision;
+ Bit8u checksum;
+ } dpte_t;
+
+ typedef struct {
+ Bit8u iface; // ISA or PCI
+ Bit16u iobase1; // IO Base 1
+ Bit16u iobase2; // IO Base 2
+ Bit8u irq; // IRQ
+ } ata_channel_t;
+
+ typedef struct {
+ Bit8u type; // Detected type of ata (ata/atapi/none/unknown)
+ Bit8u device; // Detected type of attached devices (hd/cd/none)
+ Bit8u removable; // Removable device flag
+ Bit8u lock; // Locks for removable devices
+ // Bit8u lba_capable; // LBA capable flag - always yes for bochs devices
+ Bit8u mode; // transfert mode : PIO 16/32 bits - IRQ - ISADMA - PCIDMA
+ Bit16u blksize; // block size
+
+ Bit8u translation; // type of translation
+ chs_t lchs; // Logical CHS
+ chs_t pchs; // Physical CHS
+
+ Bit32u sectors; // Total sectors count
+ } ata_device_t;
+
+ typedef struct {
+ // ATA channels info
+ ata_channel_t channels[BX_MAX_ATA_INTERFACES];
+
+ // ATA devices info
+ ata_device_t devices[BX_MAX_ATA_DEVICES];
+ //
+ // map between (bios hd id - 0x80) and ata channels
+ Bit8u hdcount, hdidmap[BX_MAX_ATA_DEVICES];
+
+ // map between (bios cd id - 0xE0) and ata channels
+ Bit8u cdcount, cdidmap[BX_MAX_ATA_DEVICES];
+
+ // Buffer for DPTE table
+ dpte_t dpte;
+
+ // Count of transferred sectors and bytes
+ Bit16u trsfsectors;
+ Bit32u trsfbytes;
+
+ } ata_t;
+
+#if BX_ELTORITO_BOOT
+ // ElTorito Device Emulation data
+ typedef struct {
+ Bit8u active;
+ Bit8u media;
+ Bit8u emulated_drive;
+ Bit8u controller_index;
+ Bit16u device_spec;
+ Bit32u ilba;
+ Bit16u buffer_segment;
+ Bit16u load_segment;
+ Bit16u sector_count;
+
+ // Virtual device
+ chs_t vdevice;
+ } cdemu_t;
+#endif // BX_ELTORITO_BOOT
+
+ // for access to EBDA area
+ // The EBDA structure should conform to
+ // http://www.cybertrails.com/~fys/rombios.htm document
+ // I made the ata and cdemu structs begin at 0x121 in the EBDA seg
+ typedef struct {
+ unsigned char filler1[0x3D];
+
+ // FDPT - Can be splitted in data members if needed
+ unsigned char fdpt0[0x10];
+ unsigned char fdpt1[0x10];
+
+ unsigned char filler2[0xC4];
+
+ // ATA Driver data
+ ata_t ata;
+
+#if BX_ELTORITO_BOOT
+ // El Torito Emulation data
+ cdemu_t cdemu;
+#endif // BX_ELTORITO_BOOT
+
+ } ebda_data_t;
+
+ #define EbdaData ((ebda_data_t *) 0)
+
+ // for access to the int13ext structure
+ typedef struct {
+ Bit8u size;
+ Bit8u reserved;
+ Bit16u count;
+ Bit16u offset;
+ Bit16u segment;
+ Bit32u lba1;
+ Bit32u lba2;
+ } int13ext_t;
+
+ #define Int13Ext ((int13ext_t *) 0)
+
+ // Disk Physical Table definition
+ typedef struct {
+ Bit16u size;
+ Bit16u infos;
+ Bit32u cylinders;
+ Bit32u heads;
+ Bit32u spt;
+ Bit32u sector_count1;
+ Bit32u sector_count2;
+ Bit16u blksize;
+ Bit16u dpte_segment;
+ Bit16u dpte_offset;
+ Bit16u key;
+ Bit8u dpi_length;
+ Bit8u reserved1;
+ Bit16u reserved2;
+ Bit8u host_bus[4];
+ Bit8u iface_type[8];
+ Bit8u iface_path[8];
+ Bit8u device_path[8];
+ Bit8u reserved3;
+ Bit8u checksum;
+ } dpt_t;
+
+ #define Int13DPT ((dpt_t *) 0)
+
+#endif // BX_USE_ATADRV
+
+typedef struct {
+ union {
+ struct {
+ Bit16u di, si, bp, sp;
+ Bit16u bx, dx, cx, ax;
+ } r16;
+ struct {
+ Bit16u filler[4];
+ Bit8u bl, bh, dl, dh, cl, ch, al, ah;
+ } r8;
+ } u;
+ } pusha_regs_t;
+
+typedef struct {
+ union {
+ struct {
+ Bit32u edi, esi, ebp, esp;
+ Bit32u ebx, edx, ecx, eax;
+ } r32;
+ struct {
+ Bit16u di, filler1, si, filler2, bp, filler3, sp, filler4;
+ Bit16u bx, filler5, dx, filler6, cx, filler7, ax, filler8;
+ } r16;
+ struct {
+ Bit32u filler[4];
+ Bit8u bl, bh;
+ Bit16u filler1;
+ Bit8u dl, dh;
+ Bit16u filler2;
+ Bit8u cl, ch;
+ Bit16u filler3;
+ Bit8u al, ah;
+ Bit16u filler4;
+ } r8;
+ } u;
+} pushad_regs_t;
+
+typedef struct {
+ union {
+ struct {
+ Bit16u flags;
+ } r16;
+ struct {
+ Bit8u flagsl;
+ Bit8u flagsh;
+ } r8;
+ } u;
+ } flags_t;
+
+#define SetCF(x) x.u.r8.flagsl |= 0x01
+#define SetZF(x) x.u.r8.flagsl |= 0x40
+#define ClearCF(x) x.u.r8.flagsl &= 0xfe
+#define ClearZF(x) x.u.r8.flagsl &= 0xbf
+#define GetCF(x) (x.u.r8.flagsl & 0x01)
+
+typedef struct {
+ Bit16u ip;
+ Bit16u cs;
+ flags_t flags;
+ } iret_addr_t;
+
+
+
+static Bit8u inb();
+static Bit8u inb_cmos();
+static void outb();
+static void outb_cmos();
+static Bit16u inw();
+static void outw();
+static void init_rtc();
+static bx_bool rtc_updating();
+
+static Bit8u read_byte();
+static Bit16u read_word();
+static void write_byte();
+static void write_word();
+static void bios_printf();
+static void copy_e820_table();
+
+static Bit8u inhibit_mouse_int_and_events();
+static void enable_mouse_int_and_events();
+static Bit8u send_to_mouse_ctrl();
+static Bit8u get_mouse_data();
+static void set_kbd_command_byte();
+
+static void int09_function();
+static void int13_harddisk();
+static void int13_cdrom();
+static void int13_cdemu();
+static void int13_eltorito();
+static void int13_diskette_function();
+static void int14_function();
+static void int15_function();
+static void int16_function();
+static void int17_function();
+static Bit32u int19_function();
+static void int1a_function();
+static void int70_function();
+static void int74_function();
+static Bit16u get_CS();
+//static Bit16u get_DS();
+//static void set_DS();
+static Bit16u get_SS();
+static unsigned int enqueue_key();
+static unsigned int dequeue_key();
+static void get_hd_geometry();
+static void set_diskette_ret_status();
+static void set_diskette_current_cyl();
+static void determine_floppy_media();
+static bx_bool floppy_drive_exists();
+static bx_bool floppy_drive_recal();
+static bx_bool floppy_media_known();
+static bx_bool floppy_media_sense();
+static bx_bool set_enable_a20();
+static void debugger_on();
+static void debugger_off();
+static void keyboard_init();
+static void keyboard_panic();
+static void shutdown_status_panic();
+static void nmi_handler_msg();
+
+static void print_bios_banner();
+static void print_boot_device();
+static void print_boot_failure();
+static void print_cdromboot_failure();
+
+# if BX_USE_ATADRV
+
+// ATA / ATAPI driver
+void ata_init();
+void ata_detect();
+void ata_reset();
+
+Bit16u ata_cmd_non_data();
+Bit16u ata_cmd_data_in();
+Bit16u ata_cmd_data_out();
+Bit16u ata_cmd_packet();
+
+Bit16u atapi_get_sense();
+Bit16u atapi_is_ready();
+Bit16u atapi_is_cdrom();
+
+#endif // BX_USE_ATADRV
+
+#if BX_ELTORITO_BOOT
+
+void cdemu_init();
+Bit8u cdemu_isactive();
+Bit8u cdemu_emulated_drive();
+
+Bit16u cdrom_boot();
+
+#endif // BX_ELTORITO_BOOT
+
+static char bios_cvs_version_string[] = "$Revision: 1.138 $";
+static char bios_date_string[] = "$Date: 2005/05/07 15:55:26 $";
+
+static char CVSID[] = "$Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $";
+
+/* Offset to skip the CVS $Id: prefix */
+#define bios_version_string (CVSID + 4)
+
+#define BIOS_PRINTF_HALT 1
+#define BIOS_PRINTF_SCREEN 2
+#define BIOS_PRINTF_INFO 4
+#define BIOS_PRINTF_DEBUG 8
+#define BIOS_PRINTF_ALL (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
+#define BIOS_PRINTF_DEBHALT (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | BIOS_PRINTF_HALT)
+
+#define printf(format, p...) bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
+
+// Defines the output macros.
+// BX_DEBUG goes to INFO port until we can easily choose debug info on a
+// per-device basis. Debug info are sent only in debug mode
+#if DEBUG_ROMBIOS
+# define BX_DEBUG(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#else
+# define BX_DEBUG(format, p...)
+#endif
+#define BX_INFO(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#define BX_PANIC(format, p...) bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+
+#if DEBUG_ATA
+# define BX_DEBUG_ATA(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_ATA(a...)
+#endif
+#if DEBUG_INT13_HD
+# define BX_DEBUG_INT13_HD(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT13_HD(a...)
+#endif
+#if DEBUG_INT13_CD
+# define BX_DEBUG_INT13_CD(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT13_CD(a...)
+#endif
+#if DEBUG_INT13_ET
+# define BX_DEBUG_INT13_ET(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT13_ET(a...)
+#endif
+#if DEBUG_INT13_FL
+# define BX_DEBUG_INT13_FL(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT13_FL(a...)
+#endif
+#if DEBUG_INT15
+# define BX_DEBUG_INT15(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT15(a...)
+#endif
+#if DEBUG_INT16
+# define BX_DEBUG_INT16(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT16(a...)
+#endif
+#if DEBUG_INT1A
+# define BX_DEBUG_INT1A(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT1A(a...)
+#endif
+#if DEBUG_INT74
+# define BX_DEBUG_INT74(a...) BX_DEBUG(a)
+#else
+# define BX_DEBUG_INT74(a...)
+#endif
+
+#define SET_AL(val8) AX = ((AX & 0xff00) | (val8))
+#define SET_BL(val8) BX = ((BX & 0xff00) | (val8))
+#define SET_CL(val8) CX = ((CX & 0xff00) | (val8))
+#define SET_DL(val8) DX = ((DX & 0xff00) | (val8))
+#define SET_AH(val8) AX = ((AX & 0x00ff) | ((val8) << 8))
+#define SET_BH(val8) BX = ((BX & 0x00ff) | ((val8) << 8))
+#define SET_CH(val8) CX = ((CX & 0x00ff) | ((val8) << 8))
+#define SET_DH(val8) DX = ((DX & 0x00ff) | ((val8) << 8))
+
+#define GET_AL() ( AX & 0x00ff )
+#define GET_BL() ( BX & 0x00ff )
+#define GET_CL() ( CX & 0x00ff )
+#define GET_DL() ( DX & 0x00ff )
+#define GET_AH() ( AX >> 8 )
+#define GET_BH() ( BX >> 8 )
+#define GET_CH() ( CX >> 8 )
+#define GET_DH() ( DX >> 8 )
+
+#define GET_ELDL() ( ELDX & 0x00ff )
+#define GET_ELDH() ( ELDX >> 8 )
+
+#define SET_CF() FLAGS |= 0x0001
+#define CLEAR_CF() FLAGS &= 0xfffe
+#define GET_CF() (FLAGS & 0x0001)
+
+#define SET_ZF() FLAGS |= 0x0040
+#define CLEAR_ZF() FLAGS &= 0xffbf
+#define GET_ZF() (FLAGS & 0x0040)
+
+#define UNSUPPORTED_FUNCTION 0x86
+
+#define none 0
+#define MAX_SCAN_CODE 0x53
+
+static struct {
+ Bit16u normal;
+ Bit16u shift;
+ Bit16u control;
+ Bit16u alt;
+ Bit8u lock_flags;
+ } scan_to_scanascii[MAX_SCAN_CODE + 1] = {
+ { none, none, none, none, none },
+ { 0x011b, 0x011b, 0x011b, 0x0100, none }, /* escape */
+ { 0x0231, 0x0221, none, 0x7800, none }, /* 1! */
+ { 0x0332, 0x0340, 0x0300, 0x7900, none }, /* 2@ */
+ { 0x0433, 0x0423, none, 0x7a00, none }, /* 3# */
+ { 0x0534, 0x0524, none, 0x7b00, none }, /* 4$ */
+ { 0x0635, 0x0625, none, 0x7c00, none }, /* 5% */
+ { 0x0736, 0x075e, 0x071e, 0x7d00, none }, /* 6^ */
+ { 0x0837, 0x0826, none, 0x7e00, none }, /* 7& */
+ { 0x0938, 0x092a, none, 0x7f00, none }, /* 8* */
+ { 0x0a39, 0x0a28, none, 0x8000, none }, /* 9( */
+ { 0x0b30, 0x0b29, none, 0x8100, none }, /* 0) */
+ { 0x0c2d, 0x0c5f, 0x0c1f, 0x8200, none }, /* -_ */
+ { 0x0d3d, 0x0d2b, none, 0x8300, none }, /* =+ */
+ { 0x0e08, 0x0e08, 0x0e7f, none, none }, /* backspace */
+ { 0x0f09, 0x0f00, none, none, none }, /* tab */
+ { 0x1071, 0x1051, 0x1011, 0x1000, 0x40 }, /* Q */
+ { 0x1177, 0x1157, 0x1117, 0x1100, 0x40 }, /* W */
+ { 0x1265, 0x1245, 0x1205, 0x1200, 0x40 }, /* E */
+ { 0x1372, 0x1352, 0x1312, 0x1300, 0x40 }, /* R */
+ { 0x1474, 0x1454, 0x1414, 0x1400, 0x40 }, /* T */
+ { 0x1579, 0x1559, 0x1519, 0x1500, 0x40 }, /* Y */
+ { 0x1675, 0x1655, 0x1615, 0x1600, 0x40 }, /* U */
+ { 0x1769, 0x1749, 0x1709, 0x1700, 0x40 }, /* I */
+ { 0x186f, 0x184f, 0x180f, 0x1800, 0x40 }, /* O */
+ { 0x1970, 0x1950, 0x1910, 0x1900, 0x40 }, /* P */
+ { 0x1a5b, 0x1a7b, 0x1a1b, none, none }, /* [{ */
+ { 0x1b5d, 0x1b7d, 0x1b1d, none, none }, /* ]} */
+ { 0x1c0d, 0x1c0d, 0x1c0a, none, none }, /* Enter */
+ { none, none, none, none, none }, /* L Ctrl */
+ { 0x1e61, 0x1e41, 0x1e01, 0x1e00, 0x40 }, /* A */
+ { 0x1f73, 0x1f53, 0x1f13, 0x1f00, 0x40 }, /* S */
+ { 0x2064, 0x2044, 0x2004, 0x2000, 0x40 }, /* D */
+ { 0x2166, 0x2146, 0x2106, 0x2100, 0x40 }, /* F */
+ { 0x2267, 0x2247, 0x2207, 0x2200, 0x40 }, /* G */
+ { 0x2368, 0x2348, 0x2308, 0x2300, 0x40 }, /* H */
+ { 0x246a, 0x244a, 0x240a, 0x2400, 0x40 }, /* J */
+ { 0x256b, 0x254b, 0x250b, 0x2500, 0x40 }, /* K */
+ { 0x266c, 0x264c, 0x260c, 0x2600, 0x40 }, /* L */
+ { 0x273b, 0x273a, none, none, none }, /* ;: */
+ { 0x2827, 0x2822, none, none, none }, /* '" */
+ { 0x2960, 0x297e, none, none, none }, /* `~ */
+ { none, none, none, none, none }, /* L shift */
+ { 0x2b5c, 0x2b7c, 0x2b1c, none, none }, /* |\ */
+ { 0x2c7a, 0x2c5a, 0x2c1a, 0x2c00, 0x40 }, /* Z */
+ { 0x2d78, 0x2d58, 0x2d18, 0x2d00, 0x40 }, /* X */
+ { 0x2e63, 0x2e43, 0x2e03, 0x2e00, 0x40 }, /* C */
+ { 0x2f76, 0x2f56, 0x2f16, 0x2f00, 0x40 }, /* V */
+ { 0x3062, 0x3042, 0x3002, 0x3000, 0x40 }, /* B */
+ { 0x316e, 0x314e, 0x310e, 0x3100, 0x40 }, /* N */
+ { 0x326d, 0x324d, 0x320d, 0x3200, 0x40 }, /* M */
+ { 0x332c, 0x333c, none, none, none }, /* ,< */
+ { 0x342e, 0x343e, none, none, none }, /* .> */
+ { 0x352f, 0x353f, none, none, none }, /* /? */
+ { none, none, none, none, none }, /* R Shift */
+ { 0x372a, 0x372a, none, none, none }, /* * */
+ { none, none, none, none, none }, /* L Alt */
+ { 0x3920, 0x3920, 0x3920, 0x3920, none }, /* space */
+ { none, none, none, none, none }, /* caps lock */
+ { 0x3b00, 0x5400, 0x5e00, 0x6800, none }, /* F1 */
+ { 0x3c00, 0x5500, 0x5f00, 0x6900, none }, /* F2 */
+ { 0x3d00, 0x5600, 0x6000, 0x6a00, none }, /* F3 */
+ { 0x3e00, 0x5700, 0x6100, 0x6b00, none }, /* F4 */
+ { 0x3f00, 0x5800, 0x6200, 0x6c00, none }, /* F5 */
+ { 0x4000, 0x5900, 0x6300, 0x6d00, none }, /* F6 */
+ { 0x4100, 0x5a00, 0x6400, 0x6e00, none }, /* F7 */
+ { 0x4200, 0x5b00, 0x6500, 0x6f00, none }, /* F8 */
+ { 0x4300, 0x5c00, 0x6600, 0x7000, none }, /* F9 */
+ { 0x4400, 0x5d00, 0x6700, 0x7100, none }, /* F10 */
+ { none, none, none, none, none }, /* Num Lock */
+ { none, none, none, none, none }, /* Scroll Lock */
+ { 0x4700, 0x4737, 0x7700, none, 0x20 }, /* 7 Home */
+ { 0x4800, 0x4838, none, none, 0x20 }, /* 8 UP */
+ { 0x4900, 0x4939, 0x8400, none, 0x20 }, /* 9 PgUp */
+ { 0x4a2d, 0x4a2d, none, none, none }, /* - */
+ { 0x4b00, 0x4b34, 0x7300, none, 0x20 }, /* 4 Left */
+ { 0x4c00, 0x4c35, none, none, 0x20 }, /* 5 */
+ { 0x4d00, 0x4d36, 0x7400, none, 0x20 }, /* 6 Right */
+ { 0x4e2b, 0x4e2b, none, none, none }, /* + */
+ { 0x4f00, 0x4f31, 0x7500, none, 0x20 }, /* 1 End */
+ { 0x5000, 0x5032, none, none, 0x20 }, /* 2 Down */
+ { 0x5100, 0x5133, 0x7600, none, 0x20 }, /* 3 PgDn */
+ { 0x5200, 0x5230, none, none, 0x20 }, /* 0 Ins */
+ { 0x5300, 0x532e, none, none, 0x20 } /* Del */
+ };
+
+ Bit8u
+inb(port)
+ Bit16u port;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push dx
+ mov dx, 4[bp]
+ in al, dx
+ pop dx
+
+ pop bp
+ASM_END
+}
+
+#if BX_USE_ATADRV
+ Bit16u
+inw(port)
+ Bit16u port;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push dx
+ mov dx, 4[bp]
+ in ax, dx
+ pop dx
+
+ pop bp
+ASM_END
+}
+#endif
+
+ void
+outb(port, val)
+ Bit16u port;
+ Bit8u val;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push dx
+ mov dx, 4[bp]
+ mov al, 6[bp]
+ out dx, al
+ pop dx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+#if BX_USE_ATADRV
+ void
+outw(port, val)
+ Bit16u port;
+ Bit16u val;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push dx
+ mov dx, 4[bp]
+ mov ax, 6[bp]
+ out dx, ax
+ pop dx
+ pop ax
+
+ pop bp
+ASM_END
+}
+#endif
+
+ void
+outb_cmos(cmos_reg, val)
+ Bit8u cmos_reg;
+ Bit8u val;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ mov al, 4[bp] ;; cmos_reg
+ out 0x70, al
+ mov al, 6[bp] ;; val
+ out 0x71, al
+
+ pop bp
+ASM_END
+}
+
+ Bit8u
+inb_cmos(cmos_reg)
+ Bit8u cmos_reg;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ mov al, 4[bp] ;; cmos_reg
+ out 0x70, al
+ in al, 0x71
+
+ pop bp
+ASM_END
+}
+
+ void
+init_rtc()
+{
+ outb_cmos(0x0a, 0x26);
+ outb_cmos(0x0b, 0x02);
+ inb_cmos(0x0c);
+ inb_cmos(0x0d);
+}
+
+ bx_bool
+rtc_updating()
+{
+ // This function checks to see if the update-in-progress bit
+ // is set in CMOS Status Register A. If not, it returns 0.
+ // If it is set, it tries to wait until there is a transition
+ // to 0, and will return 0 if such a transition occurs. A 1
+ // is returned only after timing out. The maximum period
+ // that this bit should be set is constrained to 244useconds.
+ // The count I use below guarantees coverage or more than
+ // this time, with any reasonable IPS setting.
+
+ Bit16u count;
+
+ count = 25000;
+ while (--count != 0) {
+ if ( (inb_cmos(0x0a) & 0x80) == 0 )
+ return(0);
+ }
+ return(1); // update-in-progress never transitioned to 0
+}
+
+
+ Bit8u
+read_byte(seg, offset)
+ Bit16u seg;
+ Bit16u offset;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov al, [bx]
+ ;; al = return value (byte)
+ pop ds
+ pop bx
+
+ pop bp
+ASM_END
+}
+
+ Bit16u
+read_word(seg, offset)
+ Bit16u seg;
+ Bit16u offset;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, [bx]
+ ;; ax = return value (word)
+ pop ds
+ pop bx
+
+ pop bp
+ASM_END
+}
+
+ void
+write_byte(seg, offset, data)
+ Bit16u seg;
+ Bit16u offset;
+ Bit8u data;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov al, 8[bp] ; data byte
+ mov [bx], al ; write data byte
+ pop ds
+ pop bx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+ void
+write_word(seg, offset, data)
+ Bit16u seg;
+ Bit16u offset;
+ Bit16u data;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, 8[bp] ; data word
+ mov [bx], ax ; write data word
+ pop ds
+ pop bx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+ Bit16u
+get_CS()
+{
+ASM_START
+ mov ax, cs
+ASM_END
+}
+
+// Bit16u
+//get_DS()
+//{
+//ASM_START
+// mov ax, ds
+//ASM_END
+//}
+//
+// void
+//set_DS(ds_selector)
+// Bit16u ds_selector;
+//{
+//ASM_START
+// push bp
+// mov bp, sp
+//
+// push ax
+// mov ax, 4[bp] ; ds_selector
+// mov ds, ax
+// pop ax
+//
+// pop bp
+//ASM_END
+//}
+
+ Bit16u
+get_SS()
+{
+ASM_START
+ mov ax, ss
+ASM_END
+}
+
+#ifdef VMXASSIST
+void
+copy_e820_table()
+{
+ Bit8u nr_entries = read_byte(0x9000, 0x1e8);
+ if (nr_entries > 32)
+ nr_entries = 32;
+ write_word(0xe000, 0x8, nr_entries);
+ memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
+}
+#endif /* VMXASSIST */
+
+#if BX_DEBUG_SERIAL
+/* serial debug port*/
+#define BX_DEBUG_PORT 0x03f8
+
+/* data */
+#define UART_RBR 0x00
+#define UART_THR 0x00
+
+/* control */
+#define UART_IER 0x01
+#define UART_IIR 0x02
+#define UART_FCR 0x02
+#define UART_LCR 0x03
+#define UART_MCR 0x04
+#define UART_DLL 0x00
+#define UART_DLM 0x01
+
+/* status */
+#define UART_LSR 0x05
+#define UART_MSR 0x06
+#define UART_SCR 0x07
+
+int uart_can_tx_byte(base_port)
+ Bit16u base_port;
+{
+ return inb(base_port + UART_LSR) & 0x20;
+}
+
+void uart_wait_to_tx_byte(base_port)
+ Bit16u base_port;
+{
+ while (!uart_can_tx_byte(base_port));
+}
+
+void uart_wait_until_sent(base_port)
+ Bit16u base_port;
+{
+ while (!(inb(base_port + UART_LSR) & 0x40));
+}
+
+void uart_tx_byte(base_port, data)
+ Bit16u base_port;
+ Bit8u data;
+{
+ uart_wait_to_tx_byte(base_port);
+ outb(base_port + UART_THR, data);
+ uart_wait_until_sent(base_port);
+}
+#endif
+
+ void
+wrch(c)
+ Bit8u c;
+{
+ ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ mov ah, #0x0e
+ mov al, 4[bp]
+ xor bx,bx
+ int #0x10
+ pop bx
+
+ pop bp
+ ASM_END
+}
+
+ void
+send(action, c)
+ Bit16u action;
+ Bit8u c;
+{
+#if BX_DEBUG_SERIAL
+ if (c == '\n') uart_tx_byte(BX_DEBUG_PORT, '\r');
+ uart_tx_byte(BX_DEBUG_PORT, c);
+#endif
+#ifdef VMXASSIST
+ outb(0xE9, c);
+#endif
+#if BX_VIRTUAL_PORTS
+ if (action & BIOS_PRINTF_DEBUG) outb(DEBUG_PORT, c);
+ if (action & BIOS_PRINTF_INFO) outb(INFO_PORT, c);
+#endif
+ if (action & BIOS_PRINTF_SCREEN) {
+ if (c == '\n') wrch('\r');
+ wrch(c);
+ }
+}
+
+ void
+put_int(action, val, width, neg)
+ Bit16u action;
+ short val, width;
+ bx_bool neg;
+{
+ short nval = val / 10;
+ if (nval)
+ put_int(action, nval, width - 1, neg);
+ else {
+ while (--width > 0) send(action, ' ');
+ if (neg) send(action, '-');
+ }
+ send(action, val - (nval * 10) + '0');
+}
+
+ void
+put_uint(action, val, width, neg)
+ Bit16u action;
+ unsigned short val;
+ short width;
+ bx_bool neg;
+{
+ unsigned short nval = val / 10;
+ if (nval)
+ put_uint(action, nval, width - 1, neg);
+ else {
+ while (--width > 0) send(action, ' ');
+ if (neg) send(action, '-');
+ }
+ send(action, val - (nval * 10) + '0');
+}
+
+//--------------------------------------------------------------------------
+// bios_printf()
+// A compact variable argument printf function which prints its output via
+// an I/O port so that it can be logged by Bochs/Plex.
+// Currently, only %x is supported (or %02x, %04x, etc).
+//
+// Supports %[format_width][format]
+// where format can be d,x,c,s
+//--------------------------------------------------------------------------
+ void
+bios_printf(action, s)
+ Bit16u action;
+ Bit8u *s;
+{
+ Bit8u c, format_char;
+ bx_bool in_format;
+ short i;
+ Bit16u *arg_ptr;
+ Bit16u arg_seg, arg, nibble, shift_count, format_width;
+
+ arg_ptr = &s;
+ arg_seg = get_SS();
+
+ in_format = 0;
+ format_width = 0;
+
+ if ((action & BIOS_PRINTF_DEBHALT) == BIOS_PRINTF_DEBHALT) {
+#if BX_VIRTUAL_PORTS
+ outb(PANIC_PORT2, 0x00);
+#endif
+ bios_printf (BIOS_PRINTF_SCREEN, "FATAL: ");
+ }
+
+ while (c = read_byte(get_CS(), s)) {
+ if ( c == '%' ) {
+ in_format = 1;
+ format_width = 0;
+ }
+ else if (in_format) {
+ if ( (c>='0') && (c<='9') ) {
+ format_width = (format_width * 10) + (c - '0');
+ }
+ else {
+ arg_ptr++; // increment to next arg
+ arg = read_word(arg_seg, arg_ptr);
+ if (c == 'x') {
+ if (format_width == 0)
+ format_width = 4;
+ for (i=format_width-1; i>=0; i--) {
+ nibble = (arg >> (4 * i)) & 0x000f;
+ send (action, (nibble<=9)? (nibble+'0') : (nibble-10+'A'));
+ }
+ }
+ else if (c == 'u') {
+ put_uint(action, arg, format_width, 0);
+ }
+ else if (c == 'd') {
+ if (arg & 0x8000)
+ put_int(action, -arg, format_width - 1, 1);
+ else
+ put_int(action, arg, format_width, 0);
+ }
+ else if (c == 's') {
+ bios_printf(action & (~BIOS_PRINTF_HALT), arg);
+ }
+ else if (c == 'c') {
+ send(action, arg);
+ }
+ else
+ BX_PANIC("bios_printf: unknown format\n");
+ in_format = 0;
+ }
+ }
+ else {
+ send(action, c);
+ }
+ s ++;
+ }
+
+ if (action & BIOS_PRINTF_HALT) {
+ // freeze in a busy loop.
+ASM_START
+ cli
+ halt2_loop:
+ hlt
+ jmp halt2_loop
+ASM_END
+ }
+}
+
+//--------------------------------------------------------------------------
+// keyboard_init
+//--------------------------------------------------------------------------
+// this file is based on LinuxBIOS implementation of keyboard.c
+// could convert to #asm to gain space
+ void
+keyboard_init()
+{
+ Bit16u max;
+
+ /* ------------------- Flush buffers ------------------------*/
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ( (inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x00);
+
+ /* flush incoming keys */
+ max=0x2000;
+ while (--max > 0) {
+ outb(0x80, 0x00);
+ if (inb(0x64) & 0x01) {
+ inb(0x60);
+ max = 0x2000;
+ }
+ }
+
+ // Due to timer issues, and if the IPS setting is > 15000000,
+ // the incoming keys might not be flushed here. That will
+ // cause a panic a few lines below. See sourceforge bug report :
+ // [ 642031 ] FATAL: Keyboard RESET error:993
+
+ /* ------------------- controller side ----------------------*/
+ /* send cmd = 0xAA, self test 8042 */
+ outb(0x64, 0xaa);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ( (inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x00);
+ if (max==0x0) keyboard_panic(00);
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x01);
+ if (max==0x0) keyboard_panic(01);
+
+ /* read self-test result, 0x55 should be returned from 0x60 */
+ if ((inb(0x60) != 0x55)){
+ keyboard_panic(991);
+ }
+
+ /* send cmd = 0xAB, keyboard interface test */
+ outb(0x64,0xab);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x10);
+ if (max==0x0) keyboard_panic(10);
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x11);
+ if (max==0x0) keyboard_panic(11);
+
+ /* read keyboard interface test result, */
+ /* 0x00 should be returned form 0x60 */
+ if ((inb(0x60) != 0x00)) {
+ keyboard_panic(992);
+ }
+
+ /* Enable Keyboard clock */
+ outb(0x64,0xae);
+ outb(0x64,0xa8);
+
+ /* ------------------- keyboard side ------------------------*/
+ /* reset kerboard and self test (keyboard side) */
+ outb(0x60, 0xff);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x20);
+ if (max==0x0) keyboard_panic(20);
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x21);
+ if (max==0x0) keyboard_panic(21);
+
+ /* keyboard should return ACK */
+ if ((inb(0x60) != 0xfa)) {
+ keyboard_panic(993);
+ }
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x31);
+ if (max==0x0) keyboard_panic(31);
+
+ if ((inb(0x60) != 0xaa)) {
+ keyboard_panic(994);
+ }
+
+ /* Disable keyboard */
+ outb(0x60, 0xf5);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x40);
+ if (max==0x0) keyboard_panic(40);
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x41);
+ if (max==0x0) keyboard_panic(41);
+
+ /* keyboard should return ACK */
+ if ((inb(0x60) != 0xfa)) {
+ keyboard_panic(995);
+ }
+
+ /* Write Keyboard Mode */
+ outb(0x64, 0x60);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x50);
+ if (max==0x0) keyboard_panic(50);
+
+ /* send cmd: scan code convert, disable mouse, enable IRQ 1 */
+ outb(0x60, 0x61);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x60);
+ if (max==0x0) keyboard_panic(60);
+
+ /* Enable keyboard */
+ outb(0x60, 0xf4);
+
+ /* Wait until buffer is empty */
+ max=0xffff;
+ while ((inb(0x64) & 0x02) && (--max>0)) outb(0x80, 0x70);
+ if (max==0x0) keyboard_panic(70);
+
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x71);
+ if (max==0x0) keyboard_panic(70);
+
+ /* keyboard should return ACK */
+ if ((inb(0x60) != 0xfa)) {
+ keyboard_panic(996);
+ }
+
+ outb(0x80, 0x77);
+}
+
+//--------------------------------------------------------------------------
+// keyboard_panic
+//--------------------------------------------------------------------------
+ void
+keyboard_panic(status)
+ Bit16u status;
+{
+ // If you're getting a 993 keyboard panic here,
+ // please see the comment in keyboard_init
+
+ BX_PANIC("Keyboard error:%u\n",status);
+}
+
+//--------------------------------------------------------------------------
+// shutdown_status_panic
+// called when the shutdown statsu is not implemented, displays the status
+//--------------------------------------------------------------------------
+ void
+shutdown_status_panic(status)
+ Bit16u status;
+{
+ BX_PANIC("Unimplemented shutdown status: %02x\n",(Bit8u)status);
+}
+
+//--------------------------------------------------------------------------
+// print_bios_banner
+// displays a the bios version
+//--------------------------------------------------------------------------
+void
+print_bios_banner()
+{
+ printf(BX_APPNAME" BIOS, %d cpu%s, ", BX_SMP_PROCESSORS, BX_SMP_PROCESSORS>1?"s":"");
+ printf("%s %s\n", bios_cvs_version_string, bios_date_string);
+ printf("\n");
+}
+
+//--------------------------------------------------------------------------
+// print_boot_device
+// displays the boot device
+//--------------------------------------------------------------------------
+
+static char drivetypes[][10]={"Floppy","Hard Disk","CD-Rom"};
+
+void
+print_boot_device(cdboot, drive)
+ Bit8u cdboot; Bit16u drive;
+{
+ Bit8u i;
+
+ // cdboot contains 0 if floppy/harddisk, 1 otherwise
+ // drive contains real/emulated boot drive
+
+ if(cdboot)i=2; // CD-Rom
+ else if((drive&0x0080)==0x00)i=0; // Floppy
+ else if((drive&0x0080)==0x80)i=1; // Hard drive
+ else return;
+
+ printf("Booting from %s...\n",drivetypes[i]);
+}
+
+//--------------------------------------------------------------------------
+// print_boot_failure
+// displays the reason why boot failed
+//--------------------------------------------------------------------------
+ void
+print_boot_failure(cdboot, drive, reason, lastdrive)
+ Bit8u cdboot; Bit8u drive; Bit8u lastdrive;
+{
+ Bit16u drivenum = drive&0x7f;
+
+ // cdboot: 1 if boot from cd, 0 otherwise
+ // drive : drive number
+ // reason: 0 signature check failed, 1 read error
+ // lastdrive: 1 boot drive is the last one in boot sequence
+
+ if (cdboot)
+ bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s failed\n",drivetypes[2]);
+ else if (drive & 0x80)
+ bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[1],drivenum);
+ else
+ bios_printf(BIOS_PRINTF_INFO | BIOS_PRINTF_SCREEN, "Boot from %s %d failed\n", drivetypes[0],drivenum);
+
+ if (lastdrive==1) {
+ if (reason==0)
+ BX_PANIC("Not a bootable disk\n");
+ else
+ BX_PANIC("Could not read the boot disk\n");
+ }
+}
+
+//--------------------------------------------------------------------------
+// print_cdromboot_failure
+// displays the reason why boot failed
+//--------------------------------------------------------------------------
+ void
+print_cdromboot_failure( code )
+ Bit16u code;
+{
+ bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "CDROM boot failure code : %04x\n",code);
+
+ return;
+}
+
+void
+nmi_handler_msg()
+{
+ BX_PANIC("NMI Handler called\n");
+}
+
+void
+int18_panic_msg()
+{
+ BX_PANIC("INT18: BOOT FAILURE\n");
+}
+
+void
+log_bios_start()
+{
+#if BX_DEBUG_SERIAL
+ outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
+#endif
+ BX_INFO("%s\n", bios_version_string);
+}
+
+ bx_bool
+set_enable_a20(val)
+ bx_bool val;
+{
+ Bit8u oldval;
+
+ // Use PS2 System Control port A to set A20 enable
+
+ // get current setting first
+ oldval = inb(0x92);
+
+ // change A20 status
+ if (val)
+ outb(0x92, oldval | 0x02);
+ else
+ outb(0x92, oldval & 0xfd);
+
+ return((oldval & 0x02) != 0);
+}
+
+ void
+debugger_on()
+{
+ outb(0xfedc, 0x01);
+}
+
+ void
+debugger_off()
+{
+ outb(0xfedc, 0x00);
+}
+
+#if BX_USE_ATADRV
+
+// ---------------------------------------------------------------------------
+// Start of ATA/ATAPI Driver
+// ---------------------------------------------------------------------------
+
+// Global defines -- ATA register and register bits.
+// command block & control block regs
+#define ATA_CB_DATA 0 // data reg in/out pio_base_addr1+0
+#define ATA_CB_ERR 1 // error in pio_base_addr1+1
+#define ATA_CB_FR 1 // feature reg out pio_base_addr1+1
+#define ATA_CB_SC 2 // sector count in/out pio_base_addr1+2
+#define ATA_CB_SN 3 // sector number in/out pio_base_addr1+3
+#define ATA_CB_CL 4 // cylinder low in/out pio_base_addr1+4
+#define ATA_CB_CH 5 // cylinder high in/out pio_base_addr1+5
+#define ATA_CB_DH 6 // device head in/out pio_base_addr1+6
+#define ATA_CB_STAT 7 // primary status in pio_base_addr1+7
+#define ATA_CB_CMD 7 // command out pio_base_addr1+7
+#define ATA_CB_ASTAT 6 // alternate status in pio_base_addr2+6
+#define ATA_CB_DC 6 // device control out pio_base_addr2+6
+#define ATA_CB_DA 7 // device address in pio_base_addr2+7
+
+#define ATA_CB_ER_ICRC 0x80 // ATA Ultra DMA bad CRC
+#define ATA_CB_ER_BBK 0x80 // ATA bad block
+#define ATA_CB_ER_UNC 0x40 // ATA uncorrected error
+#define ATA_CB_ER_MC 0x20 // ATA media change
+#define ATA_CB_ER_IDNF 0x10 // ATA id not found
+#define ATA_CB_ER_MCR 0x08 // ATA media change request
+#define ATA_CB_ER_ABRT 0x04 // ATA command aborted
+#define ATA_CB_ER_NTK0 0x02 // ATA track 0 not found
+#define ATA_CB_ER_NDAM 0x01 // ATA address mark not found
+
+#define ATA_CB_ER_P_SNSKEY 0xf0 // ATAPI sense key (mask)
+#define ATA_CB_ER_P_MCR 0x08 // ATAPI Media Change Request
+#define ATA_CB_ER_P_ABRT 0x04 // ATAPI command abort
+#define ATA_CB_ER_P_EOM 0x02 // ATAPI End of Media
+#define ATA_CB_ER_P_ILI 0x01 // ATAPI Illegal Length Indication
+
+// ATAPI Interrupt Reason bits in the Sector Count reg (CB_SC)
+#define ATA_CB_SC_P_TAG 0xf8 // ATAPI tag (mask)
+#define ATA_CB_SC_P_REL 0x04 // ATAPI release
+#define ATA_CB_SC_P_IO 0x02 // ATAPI I/O
+#define ATA_CB_SC_P_CD 0x01 // ATAPI C/D
+
+// bits 7-4 of the device/head (CB_DH) reg
+#define ATA_CB_DH_DEV0 0xa0 // select device 0
+#define ATA_CB_DH_DEV1 0xb0 // select device 1
+
+// status reg (CB_STAT and CB_ASTAT) bits
+#define ATA_CB_STAT_BSY 0x80 // busy
+#define ATA_CB_STAT_RDY 0x40 // ready
+#define ATA_CB_STAT_DF 0x20 // device fault
+#define ATA_CB_STAT_WFT 0x20 // write fault (old name)
+#define ATA_CB_STAT_SKC 0x10 // seek complete
+#define ATA_CB_STAT_SERV 0x10 // service
+#define ATA_CB_STAT_DRQ 0x08 // data request
+#define ATA_CB_STAT_CORR 0x04 // corrected
+#define ATA_CB_STAT_IDX 0x02 // index
+#define ATA_CB_STAT_ERR 0x01 // error (ATA)
+#define ATA_CB_STAT_CHK 0x01 // check (ATAPI)
+
+// device control reg (CB_DC) bits
+#define ATA_CB_DC_HD15 0x08 // bit should always be set to one
+#define ATA_CB_DC_SRST 0x04 // soft reset
+#define ATA_CB_DC_NIEN 0x02 // disable interrupts
+
+// Most mandtory and optional ATA commands (from ATA-3),
+#define ATA_CMD_CFA_ERASE_SECTORS 0xC0
+#define ATA_CMD_CFA_REQUEST_EXT_ERR_CODE 0x03
+#define ATA_CMD_CFA_TRANSLATE_SECTOR 0x87
+#define ATA_CMD_CFA_WRITE_MULTIPLE_WO_ERASE 0xCD
+#define ATA_CMD_CFA_WRITE_SECTORS_WO_ERASE 0x38
+#define ATA_CMD_CHECK_POWER_MODE1 0xE5
+#define ATA_CMD_CHECK_POWER_MODE2 0x98
+#define ATA_CMD_DEVICE_RESET 0x08
+#define ATA_CMD_EXECUTE_DEVICE_DIAGNOSTIC 0x90
+#define ATA_CMD_FLUSH_CACHE 0xE7
+#define ATA_CMD_FORMAT_TRACK 0x50
+#define ATA_CMD_IDENTIFY_DEVICE 0xEC
+#define ATA_CMD_IDENTIFY_DEVICE_PACKET 0xA1
+#define ATA_CMD_IDENTIFY_PACKET_DEVICE 0xA1
+#define ATA_CMD_IDLE1 0xE3
+#define ATA_CMD_IDLE2 0x97
+#define ATA_CMD_IDLE_IMMEDIATE1 0xE1
+#define ATA_CMD_IDLE_IMMEDIATE2 0x95
+#define ATA_CMD_INITIALIZE_DRIVE_PARAMETERS 0x91
+#define ATA_CMD_INITIALIZE_DEVICE_PARAMETERS 0x91
+#define ATA_CMD_NOP 0x00
+#define ATA_CMD_PACKET 0xA0
+#define ATA_CMD_READ_BUFFER 0xE4
+#define ATA_CMD_READ_DMA 0xC8
+#define ATA_CMD_READ_DMA_QUEUED 0xC7
+#define ATA_CMD_READ_MULTIPLE 0xC4
+#define ATA_CMD_READ_SECTORS 0x20
+#define ATA_CMD_READ_VERIFY_SECTORS 0x40
+#define ATA_CMD_RECALIBRATE 0x10
+#define ATA_CMD_SEEK 0x70
+#define ATA_CMD_SET_FEATURES 0xEF
+#define ATA_CMD_SET_MULTIPLE_MODE 0xC6
+#define ATA_CMD_SLEEP1 0xE6
+#define ATA_CMD_SLEEP2 0x99
+#define ATA_CMD_STANDBY1 0xE2
+#define ATA_CMD_STANDBY2 0x96
+#define ATA_CMD_STANDBY_IMMEDIATE1 0xE0
+#define ATA_CMD_STANDBY_IMMEDIATE2 0x94
+#define ATA_CMD_WRITE_BUFFER 0xE8
+#define ATA_CMD_WRITE_DMA 0xCA
+#define ATA_CMD_WRITE_DMA_QUEUED 0xCC
+#define ATA_CMD_WRITE_MULTIPLE 0xC5
+#define ATA_CMD_WRITE_SECTORS 0x30
+#define ATA_CMD_WRITE_VERIFY 0x3C
+
+#define ATA_IFACE_NONE 0x00
+#define ATA_IFACE_ISA 0x00
+#define ATA_IFACE_PCI 0x01
+
+#define ATA_TYPE_NONE 0x00
+#define ATA_TYPE_UNKNOWN 0x01
+#define ATA_TYPE_ATA 0x02
+#define ATA_TYPE_ATAPI 0x03
+
+#define ATA_DEVICE_NONE 0x00
+#define ATA_DEVICE_HD 0xFF
+#define ATA_DEVICE_CDROM 0x05
+
+#define ATA_MODE_NONE 0x00
+#define ATA_MODE_PIO16 0x00
+#define ATA_MODE_PIO32 0x01
+#define ATA_MODE_ISADMA 0x02
+#define ATA_MODE_PCIDMA 0x03
+#define ATA_MODE_USEIRQ 0x10
+
+#define ATA_TRANSLATION_NONE 0
+#define ATA_TRANSLATION_LBA 1
+#define ATA_TRANSLATION_LARGE 2
+#define ATA_TRANSLATION_RECHS 3
+
+#define ATA_DATA_NO 0x00
+#define ATA_DATA_IN 0x01
+#define ATA_DATA_OUT 0x02
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : initialization
+// ---------------------------------------------------------------------------
+void ata_init( )
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u channel, device;
+
+ // Channels info init.
+ for (channel=0; channel<BX_MAX_ATA_INTERFACES; channel++) {
+ write_byte(ebda_seg,&EbdaData->ata.channels[channel].iface,ATA_IFACE_NONE);
+ write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1,0x0);
+ write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase2,0x0);
+ write_byte(ebda_seg,&EbdaData->ata.channels[channel].irq,0);
+ }
+
+ // Devices info init.
+ for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_NONE);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].removable,0);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].lock,0);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].mode,ATA_MODE_NONE);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].blksize,0);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].translation,ATA_TRANSLATION_NONE);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads,0);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders,0);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt,0);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads,0);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders,0);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt,0);
+
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors,0L);
+ }
+
+ // hdidmap and cdidmap init.
+ for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
+ write_byte(ebda_seg,&EbdaData->ata.hdidmap[device],BX_MAX_ATA_DEVICES);
+ write_byte(ebda_seg,&EbdaData->ata.cdidmap[device],BX_MAX_ATA_DEVICES);
+ }
+
+ write_byte(ebda_seg,&EbdaData->ata.hdcount,0);
+ write_byte(ebda_seg,&EbdaData->ata.cdcount,0);
+}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : device detection
+// ---------------------------------------------------------------------------
+
+void ata_detect( )
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u hdcount, cdcount, device, type;
+ Bit8u buffer[0x0200];
+
+#if BX_MAX_ATA_INTERFACES > 0
+ write_byte(ebda_seg,&EbdaData->ata.channels[0].iface,ATA_IFACE_ISA);
+ write_word(ebda_seg,&EbdaData->ata.channels[0].iobase1,0x1f0);
+ write_word(ebda_seg,&EbdaData->ata.channels[0].iobase2,0x3f0);
+ write_byte(ebda_seg,&EbdaData->ata.channels[0].irq,14);
+#endif
+#if BX_MAX_ATA_INTERFACES > 1
+ write_byte(ebda_seg,&EbdaData->ata.channels[1].iface,ATA_IFACE_ISA);
+ write_word(ebda_seg,&EbdaData->ata.channels[1].iobase1,0x170);
+ write_word(ebda_seg,&EbdaData->ata.channels[1].iobase2,0x370);
+ write_byte(ebda_seg,&EbdaData->ata.channels[1].irq,15);
+#endif
+#if BX_MAX_ATA_INTERFACES > 2
+ write_byte(ebda_seg,&EbdaData->ata.channels[2].iface,ATA_IFACE_ISA);
+ write_word(ebda_seg,&EbdaData->ata.channels[2].iobase1,0x1e8);
+ write_word(ebda_seg,&EbdaData->ata.channels[2].iobase2,0x3e0);
+ write_byte(ebda_seg,&EbdaData->ata.channels[2].irq,12);
+#endif
+#if BX_MAX_ATA_INTERFACES > 3
+ write_byte(ebda_seg,&EbdaData->ata.channels[3].iface,ATA_IFACE_ISA);
+ write_word(ebda_seg,&EbdaData->ata.channels[3].iobase1,0x168);
+ write_word(ebda_seg,&EbdaData->ata.channels[3].iobase2,0x360);
+ write_byte(ebda_seg,&EbdaData->ata.channels[3].irq,11);
+#endif
+#if BX_MAX_ATA_INTERFACES > 4
+#error Please fill the ATA interface informations
+#endif
+
+ // Device detection
+ hdcount=cdcount=0;
+
+ for(device=0; device<BX_MAX_ATA_DEVICES; device++) {
+ Bit16u iobase1, iobase2;
+ Bit8u channel, slave, shift;
+ Bit8u sc, sn, cl, ch, st;
+
+ channel = device / 2;
+ slave = device % 2;
+
+ iobase1 =read_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1);
+ iobase2 =read_word(ebda_seg,&EbdaData->ata.channels[channel].iobase2);
+
+ // Disable interrupts
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
+
+ // Look for device
+ outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
+ outb(iobase1+ATA_CB_SC, 0x55);
+ outb(iobase1+ATA_CB_SN, 0xaa);
+ outb(iobase1+ATA_CB_SC, 0xaa);
+ outb(iobase1+ATA_CB_SN, 0x55);
+ outb(iobase1+ATA_CB_SC, 0x55);
+ outb(iobase1+ATA_CB_SN, 0xaa);
+
+ // If we found something
+ sc = inb(iobase1+ATA_CB_SC);
+ sn = inb(iobase1+ATA_CB_SN);
+
+ if ( (sc == 0x55) && (sn == 0xaa) ) {
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_UNKNOWN);
+
+ // reset the channel
+ ata_reset (device);
+
+ // check for ATA or ATAPI
+ outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
+ sc = inb(iobase1+ATA_CB_SC);
+ sn = inb(iobase1+ATA_CB_SN);
+ if ( (sc==0x01) && (sn==0x01) ) {
+ cl = inb(iobase1+ATA_CB_CL);
+ ch = inb(iobase1+ATA_CB_CH);
+ st = inb(iobase1+ATA_CB_STAT);
+
+ if ( (cl==0x14) && (ch==0xeb) ) {
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATAPI);
+ }
+ else if ( (cl==0x00) && (ch==0x00) && (st!=0x00) ) {
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATA);
+ }
+ }
+ }
+
+ type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
+
+ // Now we send a IDENTIFY command to ATA device
+ if(type == ATA_TYPE_ATA) {
+ Bit32u sectors;
+ Bit16u cylinders, heads, spt, blksize;
+ Bit8u translation, removable, mode;
+
+ //Temporary values to do the transfer
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
+
+ if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, get_SS(),buffer) !=0 )
+ BX_PANIC("ata-detect: Failed to detect ATA device\n");
+
+ removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
+ mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
+ blksize = read_word(get_SS(),buffer+10);
+
+ cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
+ heads = read_word(get_SS(),buffer+(3*2)); // word 3
+ spt = read_word(get_SS(),buffer+(6*2)); // word 6
+
+ sectors = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
+
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads, heads);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders, cylinders);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt, spt);
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors, sectors);
+ BX_INFO("ata%d-%d: PCHS=%u/%d/%d translation=", channel, slave,cylinders, heads, spt);
+
+ translation = inb_cmos(0x39 + channel/2);
+ for (shift=device%4; shift>0; shift--) translation >>= 2;
+ translation &= 0x03;
+
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].translation, translation);
+
+ switch (translation) {
+ case ATA_TRANSLATION_NONE:
+ BX_INFO("none");
+ break;
+ case ATA_TRANSLATION_LBA:
+ BX_INFO("lba");
+ break;
+ case ATA_TRANSLATION_LARGE:
+ BX_INFO("large");
+ break;
+ case ATA_TRANSLATION_RECHS:
+ BX_INFO("r-echs");
+ break;
+ }
+ switch (translation) {
+ case ATA_TRANSLATION_NONE:
+ break;
+ case ATA_TRANSLATION_LBA:
+ spt = 63;
+ sectors /= 63;
+ heads = sectors / 1024;
+ if (heads>128) heads = 255;
+ else if (heads>64) heads = 128;
+ else if (heads>32) heads = 64;
+ else if (heads>16) heads = 32;
+ else heads=16;
+ cylinders = sectors / heads;
+ break;
+ case ATA_TRANSLATION_RECHS:
+ // Take care not to overflow
+ if (heads==16) {
+ if(cylinders>61439) cylinders=61439;
+ heads=15;
+ cylinders = (Bit16u)((Bit32u)(cylinders)*16/15);
+ }
+ // then go through the large bitshift process
+ case ATA_TRANSLATION_LARGE:
+ while(cylinders > 1024) {
+ cylinders >>= 1;
+ heads <<= 1;
+
+ // If we max out the head count
+ if (heads > 127) break;
+ }
+ break;
+ }
+ // clip to 1024 cylinders in lchs
+ if (cylinders > 1024) cylinders=1024;
+ BX_INFO(" LCHS=%d/%d/%d\n", cylinders, heads, spt);
+
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads, heads);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders, cylinders);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt, spt);
+
+ // fill hdidmap
+ write_byte(ebda_seg,&EbdaData->ata.hdidmap[hdcount], device);
+ hdcount++;
+ }
+
+ // Now we send a IDENTIFY command to ATAPI device
+ if(type == ATA_TYPE_ATAPI) {
+
+ Bit8u type, removable, mode;
+ Bit16u blksize;
+
+ //Temporary values to do the transfer
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
+
+ if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 0L, get_SS(),buffer) != 0)
+ BX_PANIC("ata-detect: Failed to detect ATAPI device\n");
+
+ type = read_byte(get_SS(),buffer+1) & 0x1f;
+ removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
+ mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
+ blksize = 2048;
+
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].device, type);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
+ write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
+
+ // fill cdidmap
+ write_byte(ebda_seg,&EbdaData->ata.cdidmap[cdcount], device);
+ cdcount++;
+ }
+
+ {
+ Bit32u sizeinmb;
+ Bit16u ataversion;
+ Bit8u c, i, version, model[41];
+
+ switch (type) {
+ case ATA_TYPE_ATA:
+ sizeinmb = read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors);
+ sizeinmb >>= 11;
+ case ATA_TYPE_ATAPI:
+ // Read ATA/ATAPI version
+ ataversion=((Bit16u)(read_byte(get_SS(),buffer+161))<<8)|read_byte(get_SS(),buffer+160);
+ for(version=15;version>0;version--) {
+ if((ataversion&(1<<version))!=0)
+ break;
+ }
+
+ // Read model name
+ for(i=0;i<20;i++){
+ write_byte(get_SS(),model+(i*2),read_byte(get_SS(),buffer+(i*2)+54+1));
+ write_byte(get_SS(),model+(i*2)+1,read_byte(get_SS(),buffer+(i*2)+54));
+ }
+
+ // Reformat
+ write_byte(get_SS(),model+40,0x00);
+ for(i=39;i>0;i--){
+ if(read_byte(get_SS(),model+i)==0x20)
+ write_byte(get_SS(),model+i,0x00);
+ else break;
+ }
+ break;
+ }
+
+ switch (type) {
+ case ATA_TYPE_ATA:
+ printf("ata%d %s: ",channel,slave?" slave":"master");
+ i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
+ printf(" ATA-%d Hard-Disk (%d MBytes)\n",version,(Bit16u)sizeinmb);
+ break;
+ case ATA_TYPE_ATAPI:
+ printf("ata%d %s: ",channel,slave?" slave":"master");
+ i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
+ if(read_byte(ebda_seg,&EbdaData->ata.devices[device].device)==ATA_DEVICE_CDROM)
+ printf(" ATAPI-%d CD-Rom/DVD-Rom\n",version);
+ else
+ printf(" ATAPI-%d Device\n",version);
+ break;
+ case ATA_TYPE_UNKNOWN:
+ printf("ata%d %s: Unknown device\n",channel,slave?" slave":"master");
+ break;
+ }
+ }
+ }
+
+ // Store the devices counts
+ write_byte(ebda_seg,&EbdaData->ata.hdcount, hdcount);
+ write_byte(ebda_seg,&EbdaData->ata.cdcount, cdcount);
+ write_byte(0x40,0x75, hdcount);
+
+ printf("\n");
+
+ // FIXME : should use bios=cmos|auto|disable bits
+ // FIXME : should know about translation bits
+ // FIXME : move hard_drive_post here
+
+}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : software reset
+// ---------------------------------------------------------------------------
+// ATA-3
+// 8.2.1 Software reset - Device 0
+
+void ata_reset(device)
+Bit16u device;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u iobase1, iobase2;
+ Bit8u channel, slave, sn, sc;
+ Bit16u max;
+
+ channel = device / 2;
+ slave = device % 2;
+
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+
+ // Reset
+
+// 8.2.1 (a) -- set SRST in DC
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST);
+
+// 8.2.1 (b) -- wait for BSY
+ max=0xff;
+ while(--max>0) {
+ Bit8u status = inb(iobase1+ATA_CB_STAT);
+ if ((status & ATA_CB_STAT_BSY) != 0) break;
+ }
+
+// 8.2.1 (f) -- clear SRST
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
+
+ if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_NONE) {
+
+// 8.2.1 (g) -- check for sc==sn==0x01
+ // select device
+ outb(iobase1+ATA_CB_DH, slave?ATA_CB_DH_DEV1:ATA_CB_DH_DEV0);
+ sc = inb(iobase1+ATA_CB_SC);
+ sn = inb(iobase1+ATA_CB_SN);
+
+ if ( (sc==0x01) && (sn==0x01) ) {
+
+// 8.2.1 (h) -- wait for not BSY
+ max=0xff;
+ while(--max>0) {
+ Bit8u status = inb(iobase1+ATA_CB_STAT);
+ if ((status & ATA_CB_STAT_BSY) == 0) break;
+ }
+ }
+ }
+
+// 8.2.1 (i) -- wait for DRDY
+ max=0xfff;
+ while(--max>0) {
+ Bit8u status = inb(iobase1+ATA_CB_STAT);
+ if ((status & ATA_CB_STAT_RDY) != 0) break;
+ }
+
+ // Enable interrupts
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15);
+}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : execute a non data command
+// ---------------------------------------------------------------------------
+
+Bit16u ata_cmd_non_data()
+{return 0;}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : execute a data-in command
+// ---------------------------------------------------------------------------
+ // returns
+ // 0 : no error
+ // 1 : BUSY bit set
+ // 2 : read error
+ // 3 : expected DRQ=1
+ // 4 : no sectors left to read/verify
+ // 5 : more sectors to read/verify
+ // 6 : no sectors left to write
+ // 7 : more sectors to write
+Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba, segment, offset)
+Bit16u device, command, count, cylinder, head, sector, segment, offset;
+Bit32u lba;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u iobase1, iobase2, blksize;
+ Bit8u channel, slave;
+ Bit8u status, current, mode;
+
+ channel = device / 2;
+ slave = device % 2;
+
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+ mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
+ blksize = 0x200; // was = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
+ if (mode == ATA_MODE_PIO32) blksize>>=2;
+ else blksize>>=1;
+
+ // sector will be 0 only on lba access. Convert to lba-chs
+ if (sector == 0) {
+ sector = (Bit16u) (lba & 0x000000ffL);
+ lba >>= 8;
+ cylinder = (Bit16u) (lba & 0x0000ffffL);
+ lba >>= 16;
+ head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
+ }
+
+ // Reset count of transferred data
+ write_word(ebda_seg, &EbdaData->ata.trsfsectors,0);
+ write_dword(ebda_seg, &EbdaData->ata.trsfbytes,0L);
+ current = 0;
+
+ status = inb(iobase1 + ATA_CB_STAT);
+ if (status & ATA_CB_STAT_BSY) return 1;
+
+ outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
+ outb(iobase1 + ATA_CB_FR, 0x00);
+ outb(iobase1 + ATA_CB_SC, count);
+ outb(iobase1 + ATA_CB_SN, sector);
+ outb(iobase1 + ATA_CB_CL, cylinder & 0x00ff);
+ outb(iobase1 + ATA_CB_CH, cylinder >> 8);
+ outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
+ outb(iobase1 + ATA_CB_CMD, command);
+
+ while (1) {
+ status = inb(iobase1 + ATA_CB_STAT);
+ if ( !(status & ATA_CB_STAT_BSY) ) break;
+ }
+
+ if (status & ATA_CB_STAT_ERR) {
+ BX_DEBUG_ATA("ata_cmd_data_in : read error\n");
+ return 2;
+ } else if ( !(status & ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_data_in : DRQ not set (status %02x)\n", (unsigned) status);
+ return 3;
+ }
+
+ // FIXME : move seg/off translation here
+
+ASM_START
+ sti ;; enable higher priority interrupts
+ASM_END
+
+ while (1) {
+
+ASM_START
+ push bp
+ mov bp, sp
+ mov di, _ata_cmd_data_in.offset + 2[bp]
+ mov ax, _ata_cmd_data_in.segment + 2[bp]
+ mov cx, _ata_cmd_data_in.blksize + 2[bp]
+
+ ;; adjust if there will be an overrun. 2K max sector size
+ cmp di, #0xf800 ;;
+ jbe ata_in_no_adjust
+
+ata_in_adjust:
+ sub di, #0x0800 ;; sub 2 kbytes from offset
+ add ax, #0x0080 ;; add 2 Kbytes to segment
+
+ata_in_no_adjust:
+ mov es, ax ;; segment in es
+
+ mov dx, _ata_cmd_data_in.iobase1 + 2[bp] ;; ATA data read port
+
+ mov ah, _ata_cmd_data_in.mode + 2[bp]
+ cmp ah, #ATA_MODE_PIO32
+ je ata_in_32
+
+ata_in_16:
+ rep
+ insw ;; CX words transfered from port(DX) to ES:[DI]
+ jmp ata_in_done
+
+ata_in_32:
+ rep
+ insd ;; CX dwords transfered from port(DX) to ES:[DI]
+
+ata_in_done:
+ mov _ata_cmd_data_in.offset + 2[bp], di
+ mov _ata_cmd_data_in.segment + 2[bp], es
+ pop bp
+ASM_END
+
+ current++;
+ write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
+ count--;
+ status = inb(iobase1 + ATA_CB_STAT);
+ if (count == 0) {
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != ATA_CB_STAT_RDY ) {
+ BX_DEBUG_ATA("ata_cmd_data_in : no sectors left (status %02x)\n", (unsigned) status);
+ return 4;
+ }
+ break;
+ }
+ else {
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_data_in : more sectors left (status %02x)\n", (unsigned) status);
+ return 5;
+ }
+ continue;
+ }
+ }
+ // Enable interrupts
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15);
+ return 0;
+}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : execute a data-out command
+// ---------------------------------------------------------------------------
+ // returns
+ // 0 : no error
+ // 1 : BUSY bit set
+ // 2 : read error
+ // 3 : expected DRQ=1
+ // 4 : no sectors left to read/verify
+ // 5 : more sectors to read/verify
+ // 6 : no sectors left to write
+ // 7 : more sectors to write
+Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba, segment, offset)
+Bit16u device, command, count, cylinder, head, sector, segment, offset;
+Bit32u lba;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u iobase1, iobase2, blksize;
+ Bit8u channel, slave;
+ Bit8u status, current, mode;
+
+ channel = device / 2;
+ slave = device % 2;
+
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+ mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
+ blksize = 0x200; // was = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
+ if (mode == ATA_MODE_PIO32) blksize>>=2;
+ else blksize>>=1;
+
+ // sector will be 0 only on lba access. Convert to lba-chs
+ if (sector == 0) {
+ sector = (Bit16u) (lba & 0x000000ffL);
+ lba >>= 8;
+ cylinder = (Bit16u) (lba & 0x0000ffffL);
+ lba >>= 16;
+ head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
+ }
+
+ // Reset count of transferred data
+ write_word(ebda_seg, &EbdaData->ata.trsfsectors,0);
+ write_dword(ebda_seg, &EbdaData->ata.trsfbytes,0L);
+ current = 0;
+
+ status = inb(iobase1 + ATA_CB_STAT);
+ if (status & ATA_CB_STAT_BSY) return 1;
+
+ outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
+ outb(iobase1 + ATA_CB_FR, 0x00);
+ outb(iobase1 + ATA_CB_SC, count);
+ outb(iobase1 + ATA_CB_SN, sector);
+ outb(iobase1 + ATA_CB_CL, cylinder & 0x00ff);
+ outb(iobase1 + ATA_CB_CH, cylinder >> 8);
+ outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
+ outb(iobase1 + ATA_CB_CMD, command);
+
+ while (1) {
+ status = inb(iobase1 + ATA_CB_STAT);
+ if ( !(status & ATA_CB_STAT_BSY) ) break;
+ }
+
+ if (status & ATA_CB_STAT_ERR) {
+ BX_DEBUG_ATA("ata_cmd_data_out : read error\n");
+ return 2;
+ } else if ( !(status & ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_data_out : DRQ not set (status %02x)\n", (unsigned) status);
+ return 3;
+ }
+
+ // FIXME : move seg/off translation here
+
+ASM_START
+ sti ;; enable higher priority interrupts
+ASM_END
+
+ while (1) {
+
+ASM_START
+ push bp
+ mov bp, sp
+ mov si, _ata_cmd_data_out.offset + 2[bp]
+ mov ax, _ata_cmd_data_out.segment + 2[bp]
+ mov cx, _ata_cmd_data_out.blksize + 2[bp]
+
+ ;; adjust if there will be an overrun. 2K max sector size
+ cmp si, #0xf800 ;;
+ jbe ata_out_no_adjust
+
+ata_out_adjust:
+ sub si, #0x0800 ;; sub 2 kbytes from offset
+ add ax, #0x0080 ;; add 2 Kbytes to segment
+
+ata_out_no_adjust:
+ mov es, ax ;; segment in es
+
+ mov dx, _ata_cmd_data_out.iobase1 + 2[bp] ;; ATA data write port
+
+ mov ah, _ata_cmd_data_out.mode + 2[bp]
+ cmp ah, #ATA_MODE_PIO32
+ je ata_out_32
+
+ata_out_16:
+ seg ES
+ rep
+ outsw ;; CX words transfered from port(DX) to ES:[SI]
+ jmp ata_out_done
+
+ata_out_32:
+ seg ES
+ rep
+ outsd ;; CX dwords transfered from port(DX) to ES:[SI]
+
+ata_out_done:
+ mov _ata_cmd_data_out.offset + 2[bp], si
+ mov _ata_cmd_data_out.segment + 2[bp], es
+ pop bp
+ASM_END
+
+ current++;
+ write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
+ count--;
+ status = inb(iobase1 + ATA_CB_STAT);
+ if (count == 0) {
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != ATA_CB_STAT_RDY ) {
+ BX_DEBUG_ATA("ata_cmd_data_out : no sectors left (status %02x)\n", (unsigned) status);
+ return 6;
+ }
+ break;
+ }
+ else {
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_data_out : more sectors left (status %02x)\n", (unsigned) status);
+ return 7;
+ }
+ continue;
+ }
+ }
+ // Enable interrupts
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15);
+ return 0;
+}
+
+// ---------------------------------------------------------------------------
+// ATA/ATAPI driver : execute a packet command
+// ---------------------------------------------------------------------------
+ // returns
+ // 0 : no error
+ // 1 : error in parameters
+ // 2 : BUSY bit set
+ // 3 : error
+ // 4 : not ready
+Bit16u ata_cmd_packet(device, cmdlen, cmdseg, cmdoff, header, length, inout, bufseg, bufoff)
+Bit8u cmdlen,inout;
+Bit16u device,cmdseg, cmdoff, bufseg, bufoff;
+Bit16u header;
+Bit32u length;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u iobase1, iobase2;
+ Bit16u lcount, lbefore, lafter, count;
+ Bit8u channel, slave;
+ Bit8u status, mode, lmode;
+ Bit32u total, transfer;
+
+ channel = device / 2;
+ slave = device % 2;
+
+ // Data out is not supported yet
+ if (inout == ATA_DATA_OUT) {
+ BX_INFO("ata_cmd_packet: DATA_OUT not supported yet\n");
+ return 1;
+ }
+
+ // The header length must be even
+ if (header & 1) {
+ BX_DEBUG_ATA("ata_cmd_packet : header must be even (%04x)\n",header);
+ return 1;
+ }
+
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+ mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
+ transfer= 0L;
+
+ if (cmdlen < 12) cmdlen=12;
+ if (cmdlen > 12) cmdlen=16;
+ cmdlen>>=1;
+
+ // Reset count of transferred data
+ write_word(ebda_seg, &EbdaData->ata.trsfsectors,0);
+ write_dword(ebda_seg, &EbdaData->ata.trsfbytes,0L);
+
+ status = inb(iobase1 + ATA_CB_STAT);
+ if (status & ATA_CB_STAT_BSY) return 2;
+
+ outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
+ // outb(iobase1 + ATA_CB_FR, 0x00);
+ // outb(iobase1 + ATA_CB_SC, 0x00);
+ // outb(iobase1 + ATA_CB_SN, 0x00);
+ outb(iobase1 + ATA_CB_CL, 0xfff0 & 0x00ff);
+ outb(iobase1 + ATA_CB_CH, 0xfff0 >> 8);
+ outb(iobase1 + ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
+ outb(iobase1 + ATA_CB_CMD, ATA_CMD_PACKET);
+
+ // Device should ok to receive command
+ while (1) {
+ status = inb(iobase1 + ATA_CB_STAT);
+ if ( !(status & ATA_CB_STAT_BSY) ) break;
+ }
+
+ if (status & ATA_CB_STAT_ERR) {
+ BX_DEBUG_ATA("ata_cmd_packet : error, status is %02x\n",status);
+ return 3;
+ } else if ( !(status & ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_packet : DRQ not set (status %02x)\n", (unsigned) status);
+ return 4;
+ }
+
+ // Normalize address
+ cmdseg += (cmdoff / 16);
+ cmdoff %= 16;
+
+ // Send command to device
+ASM_START
+ sti ;; enable higher priority interrupts
+
+ push bp
+ mov bp, sp
+
+ mov si, _ata_cmd_packet.cmdoff + 2[bp]
+ mov ax, _ata_cmd_packet.cmdseg + 2[bp]
+ mov cx, _ata_cmd_packet.cmdlen + 2[bp]
+ mov es, ax ;; segment in es
+
+ mov dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data write port
+
+ seg ES
+ rep
+ outsw ;; CX words transfered from port(DX) to ES:[SI]
+
+ pop bp
+ASM_END
+
+ if (inout == ATA_DATA_NO) {
+ status = inb(iobase1 + ATA_CB_STAT);
+ }
+ else {
+ while (1) {
+
+ status = inb(iobase1 + ATA_CB_STAT);
+
+ // Check if command completed
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_DRQ) ) ==0 ) break;
+
+ if (status & ATA_CB_STAT_ERR) {
+ BX_DEBUG_ATA("ata_cmd_packet : error (status %02x)\n",status);
+ return 3;
+ }
+
+ // Device must be ready to send data
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
+ BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", status);
+ return 4;
+ }
+
+ // Normalize address
+ bufseg += (bufoff / 16);
+ bufoff %= 16;
+
+ // Get the byte count
+ lcount = ((Bit16u)(inb(iobase1 + ATA_CB_CH))<<8)+inb(iobase1 + ATA_CB_CL);
+
+ // adjust to read what we want
+ if(header>lcount) {
+ lbefore=lcount;
+ header-=lcount;
+ lcount=0;
+ }
+ else {
+ lbefore=header;
+ header=0;
+ lcount-=lbefore;
+ }
+
+ if(lcount>length) {
+ lafter=lcount-length;
+ lcount=length;
+ length=0;
+ }
+ else {
+ lafter=0;
+ length-=lcount;
+ }
+
+ // Save byte count
+ count = lcount;
+
+ BX_DEBUG_ATA("Trying to read %04x bytes (%04x %04x %04x) ",lbefore+lcount+lafter,lbefore,lcount,lafter);
+ BX_DEBUG_ATA("to 0x%04x:0x%04x\n",bufseg,bufoff);
+
+ // If counts not dividable by 4, use 16bits mode
+ lmode = mode;
+ if (lbefore & 0x03) lmode=ATA_MODE_PIO16;
+ if (lcount & 0x03) lmode=ATA_MODE_PIO16;
+ if (lafter & 0x03) lmode=ATA_MODE_PIO16;
+
+ // adds an extra byte if count are odd. before is always even
+ if (lcount & 0x01) {
+ lcount+=1;
+ if ((lafter > 0) && (lafter & 0x01)) {
+ lafter-=1;
+ }
+ }
+
+ if (lmode == ATA_MODE_PIO32) {
+ lcount>>=2; lbefore>>=2; lafter>>=2;
+ }
+ else {
+ lcount>>=1; lbefore>>=1; lafter>>=1;
+ }
+
+ ; // FIXME bcc bug
+
+ASM_START
+ push bp
+ mov bp, sp
+
+ mov dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data read port
+
+ mov cx, _ata_cmd_packet.lbefore + 2[bp]
+ jcxz ata_packet_no_before
+
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
+ cmp ah, #ATA_MODE_PIO32
+ je ata_packet_in_before_32
+
+ata_packet_in_before_16:
+ in ax, dx
+ loop ata_packet_in_before_16
+ jmp ata_packet_no_before
+
+ata_packet_in_before_32:
+ push eax
+ata_packet_in_before_32_loop:
+ in eax, dx
+ loop ata_packet_in_before_32_loop
+ pop eax
+
+ata_packet_no_before:
+ mov cx, _ata_cmd_packet.lcount + 2[bp]
+ jcxz ata_packet_after
+
+ mov di, _ata_cmd_packet.bufoff + 2[bp]
+ mov ax, _ata_cmd_packet.bufseg + 2[bp]
+ mov es, ax
+
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
+ cmp ah, #ATA_MODE_PIO32
+ je ata_packet_in_32
+
+ata_packet_in_16:
+ rep
+ insw ;; CX words transfered tp port(DX) to ES:[DI]
+ jmp ata_packet_after
+
+ata_packet_in_32:
+ rep
+ insd ;; CX dwords transfered to port(DX) to ES:[DI]
+
+ata_packet_after:
+ mov cx, _ata_cmd_packet.lafter + 2[bp]
+ jcxz ata_packet_done
+
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
+ cmp ah, #ATA_MODE_PIO32
+ je ata_packet_in_after_32
+
+ata_packet_in_after_16:
+ in ax, dx
+ loop ata_packet_in_after_16
+ jmp ata_packet_done
+
+ata_packet_in_after_32:
+ push eax
+ata_packet_in_after_32_loop:
+ in eax, dx
+ loop ata_packet_in_after_32_loop
+ pop eax
+
+ata_packet_done:
+ pop bp
+ASM_END
+
+ // Compute new buffer address
+ bufoff += count;
+
+ // Save transferred bytes count
+ transfer += count;
+ write_dword(ebda_seg, &EbdaData->ata.trsfbytes,transfer);
+ }
+ }
+
+ // Final check, device must be ready
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ != ATA_CB_STAT_RDY ) {
+ BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", (unsigned) status);
+ return 4;
+ }
+
+ // Enable interrupts
+ outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15);
+ return 0;
+}
+
+// ---------------------------------------------------------------------------
+// End of ATA/ATAPI Driver
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Start of ATA/ATAPI generic functions
+// ---------------------------------------------------------------------------
+
+ Bit16u
+atapi_get_sense(device)
+ Bit16u device;
+{
+ Bit8u atacmd[12];
+ Bit8u buffer[16];
+ Bit8u i;
+
+ memsetb(get_SS(),atacmd,0,12);
+
+ // Request SENSE
+ atacmd[0]=0x03;
+ atacmd[4]=0x20;
+ if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 16L, ATA_DATA_IN, get_SS(), buffer) != 0)
+ return 0x0002;
+
+ if ((buffer[0] & 0x7e) == 0x70) {
+ return (((Bit16u)buffer[2]&0x0f)*0x100)+buffer[12];
+ }
+
+ return 0;
+}
+
+ Bit16u
+atapi_is_ready(device)
+ Bit16u device;
+{
+ Bit8u atacmd[12];
+ Bit8u buffer[];
+
+ memsetb(get_SS(),atacmd,0,12);
+
+ // Test Unit Ready
+ if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
+ return 0x000f;
+
+ if (atapi_get_sense(device) !=0 ) {
+ memsetb(get_SS(),atacmd,0,12);
+
+ // try to send Test Unit Ready again
+ if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
+ return 0x000f;
+
+ return atapi_get_sense(device);
+ }
+ return 0;
+}
+
+ Bit16u
+atapi_is_cdrom(device)
+ Bit8u device;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+
+ if (device >= BX_MAX_ATA_DEVICES)
+ return 0;
+
+ if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_ATAPI)
+ return 0;
+
+ if (read_byte(ebda_seg,&EbdaData->ata.devices[device].device) != ATA_DEVICE_CDROM)
+ return 0;
+
+ return 1;
+}
+
+// ---------------------------------------------------------------------------
+// End of ATA/ATAPI generic functions
+// ---------------------------------------------------------------------------
+
+#endif // BX_USE_ATADRV
+
+#if BX_ELTORITO_BOOT
+
+// ---------------------------------------------------------------------------
+// Start of El-Torito boot functions
+// ---------------------------------------------------------------------------
+
+ void
+cdemu_init()
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+
+ // the only important data is this one for now
+ write_byte(ebda_seg,&EbdaData->cdemu.active,0x00);
+}
+
+ Bit8u
+cdemu_isactive()
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+
+ return(read_byte(ebda_seg,&EbdaData->cdemu.active));
+}
+
+ Bit8u
+cdemu_emulated_drive()
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+
+ return(read_byte(ebda_seg,&EbdaData->cdemu.emulated_drive));
+}
+
+static char isotag[6]="CD001";
+static char eltorito[24]="EL TORITO SPECIFICATION";
+//
+// Returns ah: emulated drive, al: error code
+//
+ Bit16u
+cdrom_boot()
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u atacmd[12], buffer[2048];
+ Bit32u lba;
+ Bit16u boot_segment, nbsectors, i, error;
+ Bit8u device;
+
+ // Find out the first cdrom
+ for (device=0; device<BX_MAX_ATA_DEVICES;device++) {
+ if (atapi_is_cdrom(device)) break;
+ }
+
+ // if not found
+ if(device >= BX_MAX_ATA_DEVICES) return 2;
+
+ // Read the Boot Record Volume Descriptor
+ memsetb(get_SS(),atacmd,0,12);
+ atacmd[0]=0x28; // READ command
+ atacmd[7]=(0x01 & 0xff00) >> 8; // Sectors
+ atacmd[8]=(0x01 & 0x00ff); // Sectors
+ atacmd[2]=(0x11 & 0xff000000) >> 24; // LBA
+ atacmd[3]=(0x11 & 0x00ff0000) >> 16;
+ atacmd[4]=(0x11 & 0x0000ff00) >> 8;
+ atacmd[5]=(0x11 & 0x000000ff);
+ if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, ATA_DATA_IN, get_SS(), buffer)) != 0)
+ return 3;
+
+ // Validity checks
+ if(buffer[0]!=0)return 4;
+ for(i=0;i<5;i++){
+ if(buffer[1+i]!=read_byte(0xf000,&isotag[i]))return 5;
+ }
+ for(i=0;i<23;i++)
+ if(buffer[7+i]!=read_byte(0xf000,&eltorito[i]))return 6;
+
+ // ok, now we calculate the Boot catalog address
+ lba=buffer[0x4A]*0x1000000+buffer[0x49]*0x10000+buffer[0x48]*0x100+buffer[0x47];
+
+ // And we read the Boot Catalog
+ memsetb(get_SS(),atacmd,0,12);
+ atacmd[0]=0x28; // READ command
+ atacmd[7]=(0x01 & 0xff00) >> 8; // Sectors
+ atacmd[8]=(0x01 & 0x00ff); // Sectors
+ atacmd[2]=(lba & 0xff000000) >> 24; // LBA
+ atacmd[3]=(lba & 0x00ff0000) >> 16;
+ atacmd[4]=(lba & 0x0000ff00) >> 8;
+ atacmd[5]=(lba & 0x000000ff);
+ if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, ATA_DATA_IN, get_SS(), buffer)) != 0)
+ return 7;
+
+ // Validation entry
+ if(buffer[0x00]!=0x01)return 8; // Header
+ if(buffer[0x01]!=0x00)return 9; // Platform
+ if(buffer[0x1E]!=0x55)return 10; // key 1
+ if(buffer[0x1F]!=0xAA)return 10; // key 2
+
+ // Initial/Default Entry
+ if(buffer[0x20]!=0x88)return 11; // Bootable
+
+ write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
+ if(buffer[0x21]==0){
+ // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
+ // Win2000 cd boot needs to know it booted from cd
+ write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0xE0);
+ }
+ else if(buffer[0x21]<4)
+ write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x00);
+ else
+ write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x80);
+
+ write_byte(ebda_seg,&EbdaData->cdemu.controller_index,device/2);
+ write_byte(ebda_seg,&EbdaData->cdemu.device_spec,device%2);
+
+ boot_segment=buffer[0x23]*0x100+buffer[0x22];
+ if(boot_segment==0x0000)boot_segment=0x07C0;
+
+ write_word(ebda_seg,&EbdaData->cdemu.load_segment,boot_segment);
+ write_word(ebda_seg,&EbdaData->cdemu.buffer_segment,0x0000);
+
+ nbsectors=buffer[0x27]*0x100+buffer[0x26];
+ write_word(ebda_seg,&EbdaData->cdemu.sector_count,nbsectors);
+
+ lba=buffer[0x2B]*0x1000000+buffer[0x2A]*0x10000+buffer[0x29]*0x100+buffer[0x28];
+ write_dword(ebda_seg,&EbdaData->cdemu.ilba,lba);
+
+ // And we read the image in memory
+ memsetb(get_SS(),atacmd,0,12);
+ atacmd[0]=0x28; // READ command
+ atacmd[7]=((1+(nbsectors-1)/4) & 0xff00) >> 8; // Sectors
+ atacmd[8]=((1+(nbsectors-1)/4) & 0x00ff); // Sectors
+ atacmd[2]=(lba & 0xff000000) >> 24; // LBA
+ atacmd[3]=(lba & 0x00ff0000) >> 16;
+ atacmd[4]=(lba & 0x0000ff00) >> 8;
+ atacmd[5]=(lba & 0x000000ff);
+ if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, nbsectors*512L, ATA_DATA_IN, boot_segment,0)) != 0)
+ return 12;
+
+ // Remember the media type
+ switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
+ case 0x01: // 1.2M floppy
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,15);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,80);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,2);
+ break;
+ case 0x02: // 1.44M floppy
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,18);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,80);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,2);
+ break;
+ case 0x03: // 2.88M floppy
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,36);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,80);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,2);
+ break;
+ case 0x04: // Harddrive
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,read_byte(boot_segment,446+6)&0x3f);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,
+ (read_byte(boot_segment,446+6)<<2) + read_byte(boot_segment,446+7) + 1);
+ write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,read_byte(boot_segment,446+5) + 1);
+ break;
+ }
+
+ if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0) {
+ // Increase bios installed hardware number of devices
+ if(read_byte(ebda_seg,&EbdaData->cdemu.emulated_drive)==0x00)
+ write_byte(0x40,0x10,read_byte(0x40,0x10)|0x41);
+ else
+ write_byte(ebda_seg, &EbdaData->ata.hdcount, read_byte(ebda_seg, &EbdaData->ata.hdcount) + 1);
+ }
+
+
+ // everything is ok, so from now on, the emulation is active
+ if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0)
+ write_byte(ebda_seg,&EbdaData->cdemu.active,0x01);
+
+ // return the boot drive + no error
+ return (read_byte(ebda_seg,&EbdaData->cdemu.emulated_drive)*0x100)+0;
+}
+
+// ---------------------------------------------------------------------------
+// End of El-Torito boot functions
+// ---------------------------------------------------------------------------
+#endif // BX_ELTORITO_BOOT
+
+ void
+int14_function(regs, ds, iret_addr)
+ pusha_regs_t regs; // regs pushed from PUSHA instruction
+ Bit16u ds; // previous DS:, DS set to 0x0000 by asm wrapper
+ iret_addr_t iret_addr; // CS,IP,Flags pushed from original INT call
+{
+ Bit16u addr,timer,val16;
+ Bit8u timeout;
+
+ ASM_START
+ sti
+ ASM_END
+
+ addr = read_word(0x0040, (regs.u.r16.dx << 1));
+ timeout = read_byte(0x0040, 0x007C + regs.u.r16.dx);
+ if ((regs.u.r16.dx < 4) && (addr > 0)) {
+ switch (regs.u.r8.ah) {
+ case 0:
+ outb(addr+3, inb(addr+3) | 0x80);
+ if (regs.u.r8.al & 0xE0 == 0) {
+ outb(addr, 0x17);
+ outb(addr+1, 0x04);
+ } else {
+ val16 = 0x600 >> ((regs.u.r8.al & 0xE0) >> 5);
+ outb(addr, val16 & 0xFF);
+ outb(addr+1, val16 >> 8);
+ }
+ outb(addr+3, regs.u.r8.al & 0x1F);
+ regs.u.r8.ah = inb(addr+5);
+ regs.u.r8.al = inb(addr+6);
+ ClearCF(iret_addr.flags);
+ break;
+ case 1:
+ timer = read_word(0x0040, 0x006C);
+ while (((inb(addr+5) & 0x60) != 0x60) && (timeout)) {
+ val16 = read_word(0x0040, 0x006C);
+ if (val16 != timer) {
+ timer = val16;
+ timeout--;
+ }
+ }
+ if (timeout) outb(addr, regs.u.r8.al);
+ regs.u.r8.ah = inb(addr+5);
+ if (!timeout) regs.u.r8.ah |= 0x80;
+ ClearCF(iret_addr.flags);
+ break;
+ case 2:
+ timer = read_word(0x0040, 0x006C);
+ while (((inb(addr+5) & 0x01) == 0) && (timeout)) {
+ val16 = read_word(0x0040, 0x006C);
+ if (val16 != timer) {
+ timer = val16;
+ timeout--;
+ }
+ }
+ if (timeout) {
+ regs.u.r8.ah = 0;
+ regs.u.r8.al = inb(addr);
+ } else {
+ regs.u.r8.ah = inb(addr+5);
+ }
+ ClearCF(iret_addr.flags);
+ break;
+ case 3:
+ regs.u.r8.ah = inb(addr+5);
+ regs.u.r8.al = inb(addr+6);
+ ClearCF(iret_addr.flags);
+ break;
+ default:
+ SetCF(iret_addr.flags); // Unsupported
+ }
+ } else {
+ SetCF(iret_addr.flags); // Unsupported
+ }
+}
+
+ void
+int15_function(regs, ES, DS, FLAGS)
+ pusha_regs_t regs; // REGS pushed via pusha
+ Bit16u ES, DS, FLAGS;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ bx_bool prev_a20_enable;
+ Bit16u base15_00;
+ Bit8u base23_16;
+ Bit16u ss;
+ Bit16u CX,DX;
+
+ Bit16u bRegister;
+ Bit8u irqDisable;
+
+BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
+
+ switch (regs.u.r8.ah) {
+ case 0x24: /* A20 Control */
+ switch (regs.u.r8.al) {
+ case 0x00:
+ set_enable_a20(0);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ break;
+ case 0x01:
+ set_enable_a20(1);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ break;
+ case 0x02:
+ regs.u.r8.al = (inb(0x92) >> 1) & 0x01;
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ break;
+ case 0x03:
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ regs.u.r16.bx = 3;
+ break;
+ default:
+ BX_INFO("int15: Func 24h, subfunc %02xh, A20 gate control not supported\n", (unsigned) regs.u.r8.al);
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ break;
+
+ case 0x41:
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+
+ case 0x4f:
+ /* keyboard intercept */
+#if BX_CPU < 2
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+#else
+ // nop
+#endif
+ SET_CF();
+ break;
+
+ case 0x52: // removable media eject
+ CLEAR_CF();
+ regs.u.r8.ah = 0; // "ok ejection may proceed"
+ break;
+
+ case 0x83: {
+ if( regs.u.r8.al == 0 ) {
+ // Set Interval requested.
+ if( ( read_byte( 0x40, 0xA0 ) & 1 ) == 0 ) {
+ // Interval not already set.
+ write_byte( 0x40, 0xA0, 1 ); // Set status byte.
+ write_word( 0x40, 0x98, ES ); // Byte location, segment
+ write_word( 0x40, 0x9A, regs.u.r16.bx ); // Byte location, offset
+ write_word( 0x40, 0x9C, regs.u.r16.dx ); // Low word, delay
+ write_word( 0x40, 0x9E, regs.u.r16.cx ); // High word, delay.
+ CLEAR_CF( );
+ irqDisable = inb( 0xA1 );
+ outb( 0xA1, irqDisable & 0xFE );
+ bRegister = inb_cmos( 0xB ); // Unmask IRQ8 so INT70 will get through.
+ outb_cmos( 0xB, bRegister | 0x40 ); // Turn on the Periodic Interrupt timer
+ } else {
+ // Interval already set.
+ BX_DEBUG_INT15("int15: Func 83h, failed, already waiting.\n" );
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ } else if( regs.u.r8.al == 1 ) {
+ // Clear Interval requested
+ write_byte( 0x40, 0xA0, 0 ); // Clear status byte
+ CLEAR_CF( );
+ bRegister = inb_cmos( 0xB );
+ outb_cmos( 0xB, bRegister & ~0x40 ); // Turn off the Periodic Interrupt timer
+ } else {
+ BX_DEBUG_INT15("int15: Func 83h, failed.\n" );
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ regs.u.r8.al--;
+ }
+
+ break;
+ }
+
+ case 0x87:
+#if BX_CPU < 3
+# error "Int15 function 87h not supported on < 80386"
+#endif
+ // +++ should probably have descriptor checks
+ // +++ should have exception handlers
+
+ // turn off interrupts
+ASM_START
+ cli
+ASM_END
+
+ prev_a20_enable = set_enable_a20(1); // enable A20 line
+
+ // 128K max of transfer on 386+ ???
+ // source == destination ???
+
+ // ES:SI points to descriptor table
+ // offset use initially comments
+ // ==============================================
+ // 00..07 Unused zeros Null descriptor
+ // 08..0f GDT zeros filled in by BIOS
+ // 10..17 source ssssssss source of data
+ // 18..1f dest dddddddd destination of data
+ // 20..27 CS zeros filled in by BIOS
+ // 28..2f SS zeros filled in by BIOS
+
+ //es:si
+ //eeee0
+ //0ssss
+ //-----
+
+// check for access rights of source & dest here
+
+ // Initialize GDT descriptor
+ base15_00 = (ES << 4) + regs.u.r16.si;
+ base23_16 = ES >> 12;
+ if (base15_00 < (ES<<4))
+ base23_16++;
+ write_word(ES, regs.u.r16.si+0x08+0, 47); // limit 15:00 = 6 * 8bytes/descriptor
+ write_word(ES, regs.u.r16.si+0x08+2, base15_00);// base 15:00
+ write_byte(ES, regs.u.r16.si+0x08+4, base23_16);// base 23:16
+ write_byte(ES, regs.u.r16.si+0x08+5, 0x93); // access
+ write_word(ES, regs.u.r16.si+0x08+6, 0x0000); // base 31:24/reserved/limit 19:16
+
+ // Initialize CS descriptor
+ write_word(ES, regs.u.r16.si+0x20+0, 0xffff);// limit 15:00 = normal 64K limit
+ write_word(ES, regs.u.r16.si+0x20+2, 0x0000);// base 15:00
+ write_byte(ES, regs.u.r16.si+0x20+4, 0x000f);// base 23:16
+ write_byte(ES, regs.u.r16.si+0x20+5, 0x9b); // access
+ write_word(ES, regs.u.r16.si+0x20+6, 0x0000);// base 31:24/reserved/limit 19:16
+
+ // Initialize SS descriptor
+ ss = get_SS();
+ base15_00 = ss << 4;
+ base23_16 = ss >> 12;
+ write_word(ES, regs.u.r16.si+0x28+0, 0xffff); // limit 15:00 = normal 64K limit
+ write_word(ES, regs.u.r16.si+0x28+2, base15_00);// base 15:00
+ write_byte(ES, regs.u.r16.si+0x28+4, base23_16);// base 23:16
+ write_byte(ES, regs.u.r16.si+0x28+5, 0x93); // access
+ write_word(ES, regs.u.r16.si+0x28+6, 0x0000); // base 31:24/reserved/limit 19:16
+
+ CX = regs.u.r16.cx;
+ASM_START
+ // Compile generates locals offset info relative to SP.
+ // Get CX (word count) from stack.
+ mov bx, sp
+ SEG SS
+ mov cx, _int15_function.CX [bx]
+
+ // since we need to set SS:SP, save them to the BDA
+ // for future restore
+ push eax
+ xor eax, eax
+ mov ds, ax
+ mov 0x0469, ss
+ mov 0x0467, sp
+
+ SEG ES
+ lgdt [si + 0x08]
+ SEG CS
+ lidt [pmode_IDT_info]
+ ;; perhaps do something with IDT here
+
+ ;; set PE bit in CR0
+ mov eax, cr0
+ or al, #0x01
+ mov cr0, eax
+ ;; far jump to flush CPU queue after transition to protected mode
+ JMP_AP(0x0020, protected_mode)
+
+protected_mode:
+ ;; GDT points to valid descriptor table, now load SS, DS, ES
+ mov ax, #0x28 ;; 101 000 = 5th descriptor in table, TI=GDT, RPL=00
+ mov ss, ax
+ mov ax, #0x10 ;; 010 000 = 2nd descriptor in table, TI=GDT, RPL=00
+ mov ds, ax
+ mov ax, #0x18 ;; 011 000 = 3rd descriptor in table, TI=GDT, RPL=00
+ mov es, ax
+ xor si, si
+ xor di, di
+ cld
+ rep
+ movsw ;; move CX words from DS:SI to ES:DI
+
+ ;; make sure DS and ES limits are 64KB
+ mov ax, #0x28
+ mov ds, ax
+ mov es, ax
+
+ ;; reset PG bit in CR0 ???
+ mov eax, cr0
+ and al, #0xFE
+ mov cr0, eax
+
+ ;; far jump to flush CPU queue after transition to real mode
+ JMP_AP(0xf000, real_mode)
+
+real_mode:
+ ;; restore IDT to normal real-mode defaults
+ SEG CS
+ lidt [rmode_IDT_info]
+
+ // restore SS:SP from the BDA
+ xor ax, ax
+ mov ds, ax
+ mov ss, 0x0469
+ mov sp, 0x0467
+ pop eax
+ASM_END
+
+ set_enable_a20(prev_a20_enable);
+
+ // turn back on interrupts
+ASM_START
+ sti
+ASM_END
+
+ regs.u.r8.ah = 0;
+ CLEAR_CF();
+ break;
+
+
+ case 0x88:
+ // Get the amount of extended memory (above 1M)
+#if BX_CPU < 2
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ SET_CF();
+#else
+ regs.u.r8.al = inb_cmos(0x30);
+ regs.u.r8.ah = inb_cmos(0x31);
+
+ // limit to 15M
+ if(regs.u.r16.ax > 0x3c00)
+ regs.u.r16.ax = 0x3c00;
+
+ CLEAR_CF();
+#endif
+ break;
+
+ case 0x90:
+ /* Device busy interrupt. Called by Int 16h when no key available */
+ break;
+
+ case 0x91:
+ /* Interrupt complete. Called by Int 16h when key becomes available */
+ break;
+
+ case 0xbf:
+ BX_INFO("*** int 15h function AH=bf not yet supported!\n");
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+
+ case 0xC0:
+#if 0
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+#endif
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ regs.u.r16.bx = BIOS_CONFIG_TABLE;
+ ES = 0xF000;
+ break;
+
+ case 0xc1:
+ ES = ebda_seg;
+ CLEAR_CF();
+ break;
+
+ case 0xd8:
+ bios_printf(BIOS_PRINTF_DEBUG, "EISA BIOS not present\n");
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+
+ default:
+ BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+ (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+ }
+}
+
+#if BX_USE_PS2_MOUSE
+ void
+int15_function_mouse(regs, ES, DS, FLAGS)
+ pusha_regs_t regs; // REGS pushed via pusha
+ Bit16u ES, DS, FLAGS;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u mouse_flags_1, mouse_flags_2;
+ Bit16u mouse_driver_seg;
+ Bit16u mouse_driver_offset;
+ Bit8u comm_byte, prev_command_byte;
+ Bit8u ret, mouse_data1, mouse_data2, mouse_data3;
+
+BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
+
+ switch (regs.u.r8.ah) {
+ case 0xC2:
+ // Return Codes status in AH
+ // =========================
+ // 00: success
+ // 01: invalid subfunction (AL > 7)
+ // 02: invalid input value (out of allowable range)
+ // 03: interface error
+ // 04: resend command received from mouse controller,
+ // device driver should attempt command again
+ // 05: cannot enable mouse, since no far call has been installed
+ // 80/86: mouse service not implemented
+
+ switch (regs.u.r8.al) {
+ case 0: // Disable/Enable Mouse
+BX_DEBUG_INT15("case 0:\n");
+ switch (regs.u.r8.bh) {
+ case 0: // Disable Mouse
+BX_DEBUG_INT15("case 0: disable mouse\n");
+ inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ ret = send_to_mouse_ctrl(0xF5); // disable mouse command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ if ( (ret == 0) || (mouse_data1 == 0xFA) ) {
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ return;
+ }
+ }
+
+ // error
+ SET_CF();
+ regs.u.r8.ah = ret;
+ return;
+ break;
+
+ case 1: // Enable Mouse
+BX_DEBUG_INT15("case 1: enable mouse\n");
+ mouse_flags_2 = read_byte(ebda_seg, 0x0027);
+ if ( (mouse_flags_2 & 0x80) == 0 ) {
+ BX_DEBUG_INT15("INT 15h C2 Enable Mouse, no far call handler\n");
+ SET_CF(); // error
+ regs.u.r8.ah = 5; // no far call installed
+ return;
+ }
+ inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ ret = send_to_mouse_ctrl(0xF4); // enable mouse command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ if ( (ret == 0) && (mouse_data1 == 0xFA) ) {
+ enable_mouse_int_and_events(); // turn IRQ12 and packet generation on
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ return;
+ }
+ }
+ SET_CF();
+ regs.u.r8.ah = ret;
+ return;
+
+ default: // invalid subfunction
+ BX_DEBUG_INT15("INT 15h C2 AL=0, BH=%02x\n", (unsigned) regs.u.r8.bh);
+ SET_CF(); // error
+ regs.u.r8.ah = 1; // invalid subfunction
+ return;
+ }
+ break;
+
+ case 1: // Reset Mouse
+ case 5: // Initialize Mouse
+BX_DEBUG_INT15("case 1 or 5:\n");
+ if (regs.u.r8.al == 5) {
+ if (regs.u.r8.bh != 3) {
+ SET_CF();
+ regs.u.r8.ah = 0x02; // invalid input
+ return;
+ }
+ mouse_flags_2 = read_byte(ebda_seg, 0x0027);
+ mouse_flags_2 = (mouse_flags_2 & 0x00) | regs.u.r8.bh;
+ mouse_flags_1 = 0x00;
+ write_byte(ebda_seg, 0x0026, mouse_flags_1);
+ write_byte(ebda_seg, 0x0027, mouse_flags_2);
+ }
+
+ inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ ret = send_to_mouse_ctrl(0xFF); // reset mouse command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data3);
+ // if no mouse attached, it will return RESEND
+ if (mouse_data3 == 0xfe) {
+ SET_CF();
+ return;
+ }
+ if (mouse_data3 != 0xfa)
+ BX_PANIC("Mouse reset returned %02x (should be ack)\n", (unsigned)mouse_data3);
+ if ( ret == 0 ) {
+ ret = get_mouse_data(&mouse_data1);
+ if ( ret == 0 ) {
+ ret = get_mouse_data(&mouse_data2);
+ if ( ret == 0 ) {
+ // turn IRQ12 and packet generation on
+ enable_mouse_int_and_events();
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ regs.u.r8.bl = mouse_data1;
+ regs.u.r8.bh = mouse_data2;
+ return;
+ }
+ }
+ }
+ }
+
+ // error
+ SET_CF();
+ regs.u.r8.ah = ret;
+ return;
+
+ case 2: // Set Sample Rate
+BX_DEBUG_INT15("case 2:\n");
+ switch (regs.u.r8.bh) {
+ case 0: mouse_data1 = 10; break; // 10 reports/sec
+ case 1: mouse_data1 = 20; break; // 20 reports/sec
+ case 2: mouse_data1 = 40; break; // 40 reports/sec
+ case 3: mouse_data1 = 60; break; // 60 reports/sec
+ case 4: mouse_data1 = 80; break; // 80 reports/sec
+ case 5: mouse_data1 = 100; break; // 100 reports/sec (default)
+ case 6: mouse_data1 = 200; break; // 200 reports/sec
+ default: mouse_data1 = 0;
+ }
+ if (mouse_data1 > 0) {
+ ret = send_to_mouse_ctrl(0xF3); // set sample rate command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data2);
+ ret = send_to_mouse_ctrl(mouse_data1);
+ ret = get_mouse_data(&mouse_data2);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ break;
+
+ case 3: // Set Resolution
+BX_DEBUG_INT15("case 3:\n");
+ // BX:
+ // 0 = 25 dpi, 1 count per millimeter
+ // 1 = 50 dpi, 2 counts per millimeter
+ // 2 = 100 dpi, 4 counts per millimeter
+ // 3 = 200 dpi, 8 counts per millimeter
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ break;
+
+ case 4: // Get Device ID
+BX_DEBUG_INT15("case 4:\n");
+ inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ ret = send_to_mouse_ctrl(0xF2); // get mouse ID command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ ret = get_mouse_data(&mouse_data2);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ regs.u.r8.bh = mouse_data2;
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ break;
+
+ case 6: // Return Status & Set Scaling Factor...
+BX_DEBUG_INT15("case 6:\n");
+ switch (regs.u.r8.bh) {
+ case 0: // Return Status
+ comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ ret = send_to_mouse_ctrl(0xE9); // get mouse info command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ if (mouse_data1 != 0xfa)
+ BX_PANIC("Mouse status returned %02x (should be ack)\n", (unsigned)mouse_data1);
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ if ( ret == 0 ) {
+ ret = get_mouse_data(&mouse_data2);
+ if ( ret == 0 ) {
+ ret = get_mouse_data(&mouse_data3);
+ if ( ret == 0 ) {
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ regs.u.r8.bl = mouse_data1;
+ regs.u.r8.cl = mouse_data2;
+ regs.u.r8.dl = mouse_data3;
+ set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ // error
+ SET_CF();
+ regs.u.r8.ah = ret;
+ set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
+ return;
+
+ case 1: // Set Scaling Factor to 1:1
+ case 2: // Set Scaling Factor to 2:1
+ comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ if (regs.u.r8.bh == 1) {
+ ret = send_to_mouse_ctrl(0xE6);
+ } else {
+ ret = send_to_mouse_ctrl(0xE7);
+ }
+ if (ret == 0) {
+ get_mouse_data(&mouse_data1);
+ ret = (mouse_data1 != 0xFA);
+ }
+ if (ret == 0) {
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
+ break;
+
+ default:
+ BX_PANIC("INT 15h C2 AL=6, BH=%02x\n", (unsigned) regs.u.r8.bh);
+ }
+ break;
+
+ case 7: // Set Mouse Handler Address
+BX_DEBUG_INT15("case 7:\n");
+ mouse_driver_seg = ES;
+ mouse_driver_offset = regs.u.r16.bx;
+ write_word(ebda_seg, 0x0022, mouse_driver_offset);
+ write_word(ebda_seg, 0x0024, mouse_driver_seg);
+ mouse_flags_2 = read_byte(ebda_seg, 0x0027);
+ if (mouse_driver_offset == 0 && mouse_driver_seg == 0) {
+ /* remove handler */
+ if ( (mouse_flags_2 & 0x80) != 0 ) {
+ mouse_flags_2 &= ~0x80;
+ inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ }
+ }
+ else {
+ /* install handler */
+ mouse_flags_2 |= 0x80;
+ }
+ write_byte(ebda_seg, 0x0027, mouse_flags_2);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ break;
+
+ default:
+BX_DEBUG_INT15("case default:\n");
+ regs.u.r8.ah = 1; // invalid function
+ SET_CF();
+ }
+ break;
+
+ default:
+ BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+ (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+ }
+}
+#endif
+
+ void
+int15_function32(regs, ES, DS, FLAGS)
+ pushad_regs_t regs; // REGS pushed via pushad
+ Bit16u ES, DS, FLAGS;
+{
+ Bit32u extended_memory_size=0; // 64bits long
+ Bit16u CX,DX;
+
+BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
+
+ switch (regs.u.r8.ah) {
+ case 0x86:
+ // Wait for CX:DX microseconds. currently using the
+ // refresh request port 0x61 bit4, toggling every 15usec
+
+ CX = regs.u.r16.cx;
+ DX = regs.u.r16.dx;
+
+ASM_START
+ sti
+
+ ;; Get the count in eax
+ mov bx, sp
+ SEG SS
+ mov ax, _int15_function.CX [bx]
+ shl eax, #16
+ SEG SS
+ mov ax, _int15_function.DX [bx]
+
+ ;; convert to numbers of 15usec ticks
+ mov ebx, #15
+ xor edx, edx
+ div eax, ebx
+ mov ecx, eax
+
+ ;; wait for ecx number of refresh requests
+ in al, #0x61
+ and al,#0x10
+ mov ah, al
+
+ or ecx, ecx
+ je int1586_tick_end
+int1586_tick:
+ in al, #0x61
+ and al,#0x10
+ cmp al, ah
+ je int1586_tick
+ mov ah, al
+ dec ecx
+ jnz int1586_tick
+int1586_tick_end:
+ASM_END
+
+ break;
+
+ case 0xe8:
+ switch(regs.u.r8.al)
+ {
+ case 0x20: // coded by osmaker aka K.J.
+ if(regs.u.r32.edx == 0x534D4150)
+ {
+#ifdef VMXASSIST
+ if ((regs.u.r16.bx / 0x14)* 0x14 == regs.u.r16.bx) {
+ Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
+
+ if (regs.u.r16.bx + 0x14 <= e820_table_size) {
+ memcpyb(ES, regs.u.r16.di,
+ 0xe000, 0x10 + regs.u.r16.bx, 0x14);
+ }
+ regs.u.r32.ebx += 0x14;
+ if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
+ regs.u.r32.ebx = 0;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ } else if (regs.u.r16.bx == 1) {
+ extended_memory_size = inb_cmos(0x35);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x34);
+ extended_memory_size *= 64;
+ if (extended_memory_size > 0x3bc000) // greater than EFF00000???
+ {
+ extended_memory_size = 0x3bc000; // everything after this is reserved memory until we get to 0x100000000
+ }
+ extended_memory_size *= 1024;
+ extended_memory_size += 15728640; // make up for the 16mb of memory that is chopped off
+
+ if (extended_memory_size <= 15728640)
+ {
+ extended_memory_size = inb_cmos(0x31);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x30);
+ extended_memory_size *= 1024;
+ }
+
+ write_word(ES, regs.u.r16.di, 0x0000);
+ write_word(ES, regs.u.r16.di+2, 0x0010);
+ write_word(ES, regs.u.r16.di+4, 0x0000);
+ write_word(ES, regs.u.r16.di+6, 0x0000);
+
+ write_word(ES, regs.u.r16.di+8, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+10, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+12, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+14, extended_memory_size);
+
+ write_word(ES, regs.u.r16.di+16, 0x1);
+ write_word(ES, regs.u.r16.di+18, 0x0);
+
+ regs.u.r32.ebx = 0;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ } else { /* AX=E820, DX=534D4150, BX unrecognized */
+ goto int15_unimplemented;
+ }
+#else
+ switch(regs.u.r16.bx)
+ {
+ case 0:
+ write_word(ES, regs.u.r16.di, 0x00);
+ write_word(ES, regs.u.r16.di+2, 0x00);
+ write_word(ES, regs.u.r16.di+4, 0x00);
+ write_word(ES, regs.u.r16.di+6, 0x00);
+
+ write_word(ES, regs.u.r16.di+8, 0xFC00);
+ write_word(ES, regs.u.r16.di+10, 0x0009);
+ write_word(ES, regs.u.r16.di+12, 0x0000);
+ write_word(ES, regs.u.r16.di+14, 0x0000);
+
+ write_word(ES, regs.u.r16.di+16, 0x1);
+ write_word(ES, regs.u.r16.di+18, 0x0);
+
+ regs.u.r32.ebx = 1;
+
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 1:
+ extended_memory_size = inb_cmos(0x35);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x34);
+ extended_memory_size *= 64;
+ if(extended_memory_size > 0x3bc000) // greater than EFF00000???
+ {
+ extended_memory_size = 0x3bc000; // everything after this is reserved memory until we get to 0x100000000
+ }
+ extended_memory_size *= 1024;
+ extended_memory_size += 15728640; // make up for the 16mb of memory that is chopped off
+
+ if(extended_memory_size <= 15728640)
+ {
+ extended_memory_size = inb_cmos(0x31);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x30);
+ extended_memory_size *= 1024;
+ }
+
+ write_word(ES, regs.u.r16.di, 0x0000);
+ write_word(ES, regs.u.r16.di+2, 0x0010);
+ write_word(ES, regs.u.r16.di+4, 0x0000);
+ write_word(ES, regs.u.r16.di+6, 0x0000);
+
+ write_word(ES, regs.u.r16.di+8, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+10, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+12, extended_memory_size);
+ extended_memory_size >>= 16;
+ write_word(ES, regs.u.r16.di+14, extended_memory_size);
+
+ write_word(ES, regs.u.r16.di+16, 0x1);
+ write_word(ES, regs.u.r16.di+18, 0x0);
+
+ regs.u.r32.ebx = 0;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ default: /* AX=E820, DX=534D4150, BX unrecognized */
+ goto int15_unimplemented;
+ break;
+ }
+#endif
+ } else {
+ // if DX != 0x534D4150)
+ goto int15_unimplemented;
+ }
+ break;
+
+ case 0x01:
+ // do we have any reason to fail here ?
+ CLEAR_CF();
+
+ // my real system sets ax and bx to 0
+ // this is confirmed by Ralph Brown list
+ // but syslinux v1.48 is known to behave
+ // strangely if ax is set to 0
+ // regs.u.r16.ax = 0;
+ // regs.u.r16.bx = 0;
+
+ // Get the amount of extended memory (above 1M)
+ regs.u.r8.cl = inb_cmos(0x30);
+ regs.u.r8.ch = inb_cmos(0x31);
+
+ // limit to 15M
+ if(regs.u.r16.cx > 0x3c00)
+ {
+ regs.u.r16.cx = 0x3c00;
+ }
+
+ // Get the amount of extended memory above 16M in 64k blocs
+ regs.u.r8.dl = inb_cmos(0x34);
+ regs.u.r8.dh = inb_cmos(0x35);
+
+ // Set configured memory equal to extended memory
+ regs.u.r16.ax = regs.u.r16.cx;
+ regs.u.r16.bx = regs.u.r16.dx;
+ break;
+ default: /* AH=0xE8?? but not implemented */
+ goto int15_unimplemented;
+ }
+ break;
+ int15_unimplemented:
+ // fall into the default
+ default:
+ BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+ (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+ }
+}
+
+ void
+int16_function(DI, SI, BP, SP, BX, DX, CX, AX, FLAGS)
+ Bit16u DI, SI, BP, SP, BX, DX, CX, AX, FLAGS;
+{
+ Bit8u scan_code, ascii_code, shift_flags, count;
+ Bit16u kbd_code, max;
+
+ BX_DEBUG_INT16("int16: AX=%04x BX=%04x CX=%04x DX=%04x \n", AX, BX, CX, DX);
+
+ switch (GET_AH()) {
+ case 0x00: /* read keyboard input */
+
+ if ( !dequeue_key(&scan_code, &ascii_code, 1) ) {
+ BX_PANIC("KBD: int16h: out of keyboard input\n");
+ }
+ if (scan_code !=0 && ascii_code == 0xF0) ascii_code = 0;
+ else if (ascii_code == 0xE0) ascii_code = 0;
+ AX = (scan_code << 8) | ascii_code;
+ break;
+
+ case 0x01: /* check keyboard status */
+ if ( !dequeue_key(&scan_code, &ascii_code, 0) ) {
+ SET_ZF();
+ return;
+ }
+ if (scan_code !=0 && ascii_code == 0xF0) ascii_code = 0;
+ else if (ascii_code == 0xE0) ascii_code = 0;
+ AX = (scan_code << 8) | ascii_code;
+ CLEAR_ZF();
+ break;
+
+ case 0x02: /* get shift flag status */
+ shift_flags = read_byte(0x0040, 0x17);
+ SET_AL(shift_flags);
+ break;
+
+ case 0x05: /* store key-stroke into buffer */
+ if ( !enqueue_key(GET_CH(), GET_CL()) ) {
+ SET_AL(1);
+ }
+ else {
+ SET_AL(0);
+ }
+ break;
+
+ case 0x09: /* GET KEYBOARD FUNCTIONALITY */
+ // bit Bochs Description
+ // 7 0 reserved
+ // 6 0 INT 16/AH=20h-22h supported (122-key keyboard support)
+ // 5 1 INT 16/AH=10h-12h supported (enhanced keyboard support)
+ // 4 1 INT 16/AH=0Ah supported
+ // 3 0 INT 16/AX=0306h supported
+ // 2 0 INT 16/AX=0305h supported
+ // 1 0 INT 16/AX=0304h supported
+ // 0 0 INT 16/AX=0300h supported
+ //
+ SET_AL(0x30);
+ break;
+
+ case 0x0A: /* GET KEYBOARD ID */
+ count = 2;
+ kbd_code = 0x0;
+ outb(0x60, 0xf2);
+ /* Wait for data */
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x00);
+ if (max>0x0) {
+ if ((inb(0x60) == 0xfa)) {
+ do {
+ max=0xffff;
+ while ( ((inb(0x64) & 0x01) == 0) && (--max>0) ) outb(0x80, 0x00);
+ if (max>0x0) {
+ kbd_code >>= 8;
+ kbd_code |= (inb(0x60) << 8);
+ }
+ } while (--count>0);
+ }
+ }
+ BX=kbd_code;
+ break;
+
+ case 0x10: /* read MF-II keyboard input */
+
+ if ( !dequeue_key(&scan_code, &ascii_code, 1) ) {
+ BX_PANIC("KBD: int16h: out of keyboard input\n");
+ }
+ if (scan_code !=0 && ascii_code == 0xF0) ascii_code = 0;
+ AX = (scan_code << 8) | ascii_code;
+ break;
+
+ case 0x11: /* check MF-II keyboard status */
+ if ( !dequeue_key(&scan_code, &ascii_code, 0) ) {
+ SET_ZF();
+ return;
+ }
+ if (scan_code !=0 && ascii_code == 0xF0) ascii_code = 0;
+ AX = (scan_code << 8) | ascii_code;
+ CLEAR_ZF();
+ break;
+
+ case 0x12: /* get extended keyboard status */
+ shift_flags = read_byte(0x0040, 0x17);
+ SET_AL(shift_flags);
+ shift_flags = read_byte(0x0040, 0x18);
+ SET_AH(shift_flags);
+ BX_DEBUG_INT16("int16: func 12 sending %04x\n",AX);
+ break;
+
+ case 0x92: /* keyboard capability check called by DOS 5.0+ keyb */
+ SET_AH(0x80); // function int16 ah=0x10-0x12 supported
+ break;
+
+ case 0xA2: /* 122 keys capability check called by DOS 5.0+ keyb */
+ // don't change AH : function int16 ah=0x20-0x22 NOT supported
+ break;
+
+ case 0x6F:
+ if (GET_AL() == 0x08)
+ SET_AH(0x02); // unsupported, aka normal keyboard
+
+ default:
+ BX_INFO("KBD: unsupported int 16h function %02x\n", GET_AH());
+ }
+}
+
+ unsigned int
+dequeue_key(scan_code, ascii_code, incr)
+ Bit8u *scan_code;
+ Bit8u *ascii_code;
+ unsigned int incr;
+{
+ Bit16u buffer_start, buffer_end, buffer_head, buffer_tail;
+ Bit16u ss;
+ Bit8u acode, scode;
+
+#if BX_CPU < 2
+ buffer_start = 0x001E;
+ buffer_end = 0x003E;
+#else
+ buffer_start = read_word(0x0040, 0x0080);
+ buffer_end = read_word(0x0040, 0x0082);
+#endif
+
+ buffer_head = read_word(0x0040, 0x001a);
+ buffer_tail = read_word(0x0040, 0x001c);
+
+ if (buffer_head != buffer_tail) {
+ ss = get_SS();
+ acode = read_byte(0x0040, buffer_head);
+ scode = read_byte(0x0040, buffer_head+1);
+ write_byte(ss, ascii_code, acode);
+ write_byte(ss, scan_code, scode);
+
+ if (incr) {
+ buffer_head += 2;
+ if (buffer_head >= buffer_end)
+ buffer_head = buffer_start;
+ write_word(0x0040, 0x001a, buffer_head);
+ }
+ return(1);
+ }
+ else {
+ return(0);
+ }
+}
+
+static char panic_msg_keyb_buffer_full[] = "%s: keyboard input buffer full\n";
+
+ Bit8u
+inhibit_mouse_int_and_events()
+{
+ Bit8u command_byte, prev_command_byte;
+
+ // Turn off IRQ generation and aux data line
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"inhibmouse");
+ outb(0x64, 0x20); // get command byte
+ while ( (inb(0x64) & 0x01) != 0x01 );
+ prev_command_byte = inb(0x60);
+ command_byte = prev_command_byte;
+ //while ( (inb(0x64) & 0x02) );
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"inhibmouse");
+ command_byte &= 0xfd; // turn off IRQ 12 generation
+ command_byte |= 0x20; // disable mouse serial clock line
+ outb(0x64, 0x60); // write command byte
+ outb(0x60, command_byte);
+ return(prev_command_byte);
+}
+
+ void
+enable_mouse_int_and_events()
+{
+ Bit8u command_byte;
+
+ // Turn on IRQ generation and aux data line
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"enabmouse");
+ outb(0x64, 0x20); // get command byte
+ while ( (inb(0x64) & 0x01) != 0x01 );
+ command_byte = inb(0x60);
+ //while ( (inb(0x64) & 0x02) );
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"enabmouse");
+ command_byte |= 0x02; // turn on IRQ 12 generation
+ command_byte &= 0xdf; // enable mouse serial clock line
+ outb(0x64, 0x60); // write command byte
+ outb(0x60, command_byte);
+}
+
+ Bit8u
+send_to_mouse_ctrl(sendbyte)
+ Bit8u sendbyte;
+{
+ Bit8u response;
+
+ // wait for chance to write to ctrl
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"sendmouse");
+ outb(0x64, 0xD4);
+ outb(0x60, sendbyte);
+ return(0);
+}
+
+
+ Bit8u
+get_mouse_data(data)
+ Bit8u *data;
+{
+ Bit8u response;
+ Bit16u ss;
+
+ while ( (inb(0x64) & 0x21) != 0x21 ) {
+ }
+
+ response = inb(0x60);
+
+ ss = get_SS();
+ write_byte(ss, data, response);
+ return(0);
+}
+
+ void
+set_kbd_command_byte(command_byte)
+ Bit8u command_byte;
+{
+ if ( inb(0x64) & 0x02 )
+ BX_PANIC(panic_msg_keyb_buffer_full,"setkbdcomm");
+ outb(0x64, 0xD4);
+
+ outb(0x64, 0x60); // write command byte
+ outb(0x60, command_byte);
+}
+
+ void
+int09_function(DI, SI, BP, SP, BX, DX, CX, AX)
+ Bit16u DI, SI, BP, SP, BX, DX, CX, AX;
+{
+ Bit8u scancode, asciicode, shift_flags;
+ Bit8u mf2_flags, mf2_state, led_flags;
+
+ //
+ // DS has been set to F000 before call
+ //
+
+
+ scancode = GET_AL();
+
+ if (scancode == 0) {
+ BX_INFO("KBD: int09 handler: AL=0\n");
+ return;
+ }
+
+
+ shift_flags = read_byte(0x0040, 0x17);
+ mf2_flags = read_byte(0x0040, 0x18);
+ mf2_state = read_byte(0x0040, 0x96);
+ led_flags = read_byte(0x0040, 0x97);
+ asciicode = 0;
+
+ switch (scancode) {
+ case 0x3a: /* Caps Lock press */
+ shift_flags ^= 0x40;
+ write_byte(0x0040, 0x17, shift_flags);
+ mf2_flags |= 0x40;
+ write_byte(0x0040, 0x18, mf2_flags);
+ led_flags ^= 0x04;
+ write_byte(0x0040, 0x97, led_flags);
+ break;
+ case 0xba: /* Caps Lock release */
+ mf2_flags &= ~0x40;
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+
+ case 0x2a: /* L Shift press */
+ /*shift_flags &= ~0x40;*/
+ shift_flags |= 0x02;
+ write_byte(0x0040, 0x17, shift_flags);
+ led_flags &= ~0x04;
+ write_byte(0x0040, 0x97, led_flags);
+ break;
+ case 0xaa: /* L Shift release */
+ shift_flags &= ~0x02;
+ write_byte(0x0040, 0x17, shift_flags);
+ break;
+
+ case 0x36: /* R Shift press */
+ /*shift_flags &= ~0x40;*/
+ shift_flags |= 0x01;
+ write_byte(0x0040, 0x17, shift_flags);
+ led_flags &= ~0x04;
+ write_byte(0x0040, 0x97, led_flags);
+ break;
+ case 0xb6: /* R Shift release */
+ shift_flags &= ~0x01;
+ write_byte(0x0040, 0x17, shift_flags);
+ break;
+
+ case 0x1d: /* Ctrl press */
+ shift_flags |= 0x04;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x01) {
+ mf2_flags |= 0x04;
+ } else {
+ mf2_flags |= 0x01;
+ }
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+ case 0x9d: /* Ctrl release */
+ shift_flags &= ~0x04;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x01) {
+ mf2_flags &= ~0x04;
+ } else {
+ mf2_flags &= ~0x01;
+ }
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+
+ case 0x38: /* Alt press */
+ shift_flags |= 0x08;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x01) {
+ mf2_flags |= 0x08;
+ } else {
+ mf2_flags |= 0x02;
+ }
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+ case 0xb8: /* Alt release */
+ shift_flags &= ~0x08;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x01) {
+ mf2_flags &= ~0x08;
+ } else {
+ mf2_flags &= ~0x02;
+ }
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+
+ case 0x45: /* Num Lock press */
+ if ((mf2_state & 0x01) == 0) {
+ mf2_flags |= 0x20;
+ write_byte(0x0040, 0x18, mf2_flags);
+ shift_flags ^= 0x20;
+ led_flags ^= 0x02;
+ write_byte(0x0040, 0x17, shift_flags);
+ write_byte(0x0040, 0x97, led_flags);
+ }
+ break;
+ case 0xc5: /* Num Lock release */
+ if ((mf2_state & 0x01) == 0) {
+ mf2_flags &= ~0x20;
+ write_byte(0x0040, 0x18, mf2_flags);
+ }
+ break;
+
+ case 0x46: /* Scroll Lock press */
+ mf2_flags |= 0x10;
+ write_byte(0x0040, 0x18, mf2_flags);
+ shift_flags ^= 0x10;
+ led_flags ^= 0x01;
+ write_byte(0x0040, 0x17, shift_flags);
+ write_byte(0x0040, 0x97, led_flags);
+ break;
+
+ case 0xc6: /* Scroll Lock release */
+ mf2_flags &= ~0x10;
+ write_byte(0x0040, 0x18, mf2_flags);
+ break;
+
+ default:
+ if (scancode & 0x80) return; /* toss key releases ... */
+ if (scancode > MAX_SCAN_CODE) {
+ BX_INFO("KBD: int09h_handler(): unknown scancode read!\n");
+ return;
+ }
+ if (shift_flags & 0x08) { /* ALT */
+ asciicode = scan_to_scanascii[scancode].alt;
+ scancode = scan_to_scanascii[scancode].alt >> 8;
+ }
+ else if (shift_flags & 0x04) { /* CONTROL */
+ asciicode = scan_to_scanascii[scancode].control;
+ scancode = scan_to_scanascii[scancode].control >> 8;
+ }
+ else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
+ /* check if lock state should be ignored
+ * because a SHIFT key are pressed */
+
+ if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
+ asciicode = scan_to_scanascii[scancode].normal;
+ scancode = scan_to_scanascii[scancode].normal >> 8;
+ }
+ else {
+ asciicode = scan_to_scanascii[scancode].shift;
+ scancode = scan_to_scanascii[scancode].shift >> 8;
+ }
+ }
+ else {
+ /* check if lock is on */
+ if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
+ asciicode = scan_to_scanascii[scancode].shift;
+ scancode = scan_to_scanascii[scancode].shift >> 8;
+ }
+ else {
+ asciicode = scan_to_scanascii[scancode].normal;
+ scancode = scan_to_scanascii[scancode].normal >> 8;
+ }
+ }
+ if (scancode==0 && asciicode==0) {
+ BX_INFO("KBD: int09h_handler(): scancode & asciicode are zero?\n");
+ }
+ enqueue_key(scancode, asciicode);
+ break;
+ }
+ mf2_state &= ~0x01;
+}
+
+ unsigned int
+enqueue_key(scan_code, ascii_code)
+ Bit8u scan_code, ascii_code;
+{
+ Bit16u buffer_start, buffer_end, buffer_head, buffer_tail, temp_tail;
+
+ //BX_INFO("KBD: enqueue_key() called scan:%02x, ascii:%02x\n",
+ // scan_code, ascii_code);
+
+#if BX_CPU < 2
+ buffer_start = 0x001E;
+ buffer_end = 0x003E;
+#else
+ buffer_start = read_word(0x0040, 0x0080);
+ buffer_end = read_word(0x0040, 0x0082);
+#endif
+
+ buffer_head = read_word(0x0040, 0x001A);
+ buffer_tail = read_word(0x0040, 0x001C);
+
+ temp_tail = buffer_tail;
+ buffer_tail += 2;
+ if (buffer_tail >= buffer_end)
+ buffer_tail = buffer_start;
+
+ if (buffer_tail == buffer_head) {
+ return(0);
+ }
+
+ write_byte(0x0040, temp_tail, ascii_code);
+ write_byte(0x0040, temp_tail+1, scan_code);
+ write_word(0x0040, 0x001C, buffer_tail);
+ return(1);
+}
+
+
+ void
+int74_function(make_farcall, Z, Y, X, status)
+ Bit16u make_farcall, Z, Y, X, status;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u in_byte, index, package_count;
+ Bit8u mouse_flags_1, mouse_flags_2;
+
+BX_DEBUG_INT74("entering int74_function\n");
+ make_farcall = 0;
+
+ in_byte = inb(0x64);
+ if ( (in_byte & 0x21) != 0x21 ) {
+ return;
+ }
+ in_byte = inb(0x60);
+BX_DEBUG_INT74("int74: read byte %02x\n", in_byte);
+
+ mouse_flags_1 = read_byte(ebda_seg, 0x0026);
+ mouse_flags_2 = read_byte(ebda_seg, 0x0027);
+
+ if ( (mouse_flags_2 & 0x80) != 0x80 ) {
+ // BX_PANIC("int74_function:\n");
+ return;
+ }
+
+ package_count = mouse_flags_2 & 0x07;
+ index = mouse_flags_1 & 0x07;
+ write_byte(ebda_seg, 0x28 + index, in_byte);
+
+ if ( (index+1) >= package_count ) {
+BX_DEBUG_INT74("int74_function: make_farcall=1\n");
+ status = read_byte(ebda_seg, 0x0028 + 0);
+ X = read_byte(ebda_seg, 0x0028 + 1);
+ Y = read_byte(ebda_seg, 0x0028 + 2);
+ Z = 0;
+ mouse_flags_1 = 0;
+ // check if far call handler installed
+ if (mouse_flags_2 & 0x80)
+ make_farcall = 1;
+ }
+ else {
+ mouse_flags_1++;
+ }
+ write_byte(ebda_seg, 0x0026, mouse_flags_1);
+}
+
+#define SET_DISK_RET_STATUS(status) write_byte(0x0040, 0x0074, status)
+
+#if BX_USE_ATADRV
+
+ void
+int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit32u lba;
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u cylinder, head, sector;
+ Bit16u segment, offset;
+ Bit16u npc, nph, npspt, nlc, nlh, nlspt;
+ Bit16u size, count;
+ Bit8u device, status;
+
+ BX_DEBUG_INT13_HD("int13_harddisk: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+
+ write_byte(0x0040, 0x008e, 0); // clear completion flag
+
+ // basic check : device has to be defined
+ if ( (GET_ELDL() < 0x80) || (GET_ELDL() >= 0x80 + BX_MAX_ATA_DEVICES) ) {
+ BX_INFO("int13_harddisk: function %02x, ELDL out of range %02x\n", GET_AH(), GET_ELDL());
+ goto int13_fail;
+ }
+
+ // Get the ata channel
+ device=read_byte(ebda_seg,&EbdaData->ata.hdidmap[GET_ELDL()-0x80]);
+
+ // basic check : device has to be valid
+ if (device >= BX_MAX_ATA_DEVICES) {
+ BX_INFO("int13_harddisk: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
+ goto int13_fail;
+ }
+
+ switch (GET_AH()) {
+
+ case 0x00: /* disk controller reset */
+ ata_reset (device);
+ goto int13_success;
+ break;
+
+ case 0x01: /* read disk status */
+ status = read_byte(0x0040, 0x0074);
+ SET_AH(status);
+ SET_DISK_RET_STATUS(0);
+ /* set CF if error status read */
+ if (status) goto int13_fail_nostatus;
+ else goto int13_success_noah;
+ break;
+
+ case 0x02: // read disk sectors
+ case 0x03: // write disk sectors
+ case 0x04: // verify disk sectors
+
+ count = GET_AL();
+ cylinder = GET_CH();
+ cylinder |= ( ((Bit16u) GET_CL()) << 2) & 0x300;
+ sector = (GET_CL() & 0x3f);
+ head = GET_DH();
+
+ segment = ES;
+ offset = BX;
+
+ if ( (count > 128) || (count == 0) ) {
+ BX_INFO("int13_harddisk: function %02x, count out of range!\n",GET_AH());
+ goto int13_fail;
+ }
+
+ nlc = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
+ nlh = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
+ nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
+
+ // sanity check on cyl heads, sec
+ if( (cylinder >= nlc) || (head >= nlh) || (sector > nlspt )) {
+ BX_INFO("int13_harddisk: function %02x, parameters out of range %04x/%04x/%04x!\n", GET_AH(), cylinder, head, sector);
+ goto int13_fail;
+ }
+
+ // FIXME verify
+ if ( GET_AH() == 0x04 ) goto int13_success;
+
+ nph = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
+ npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+
+ // if needed, translate lchs to lba, and execute command
+ if ( (nph != nlh) || (npspt != nlspt)) {
+ lba = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * (Bit32u)nlspt) + (Bit32u)sector - 1;
+ sector = 0; // this forces the command to be lba
+ }
+
+ if ( GET_AH() == 0x02 )
+ status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, head, sector, lba, segment, offset);
+ else
+ status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, cylinder, head, sector, lba, segment, offset);
+
+ // Set nb of sector transferred
+ SET_AL(read_word(ebda_seg, &EbdaData->ata.trsfsectors));
+
+ if (status != 0) {
+ BX_INFO("int13_harddisk: function %02x, error %02x !\n",GET_AH(),status);
+ SET_AH(0x0c);
+ goto int13_fail_noah;
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x05: /* format disk track */
+ BX_INFO("format disk track called\n");
+ goto int13_success;
+ return;
+ break;
+
+ case 0x08: /* read disk drive parameters */
+
+ // Get logical geometry from table
+ nlc = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
+ nlh = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
+ nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
+ count = read_byte(ebda_seg, &EbdaData->ata.hdcount);
+
+ nlc = nlc - 2; /* 0 based , last sector not used */
+ SET_AL(0);
+ SET_CH(nlc & 0xff);
+ SET_CL(((nlc >> 2) & 0xc0) | (nlspt & 0x3f));
+ SET_DH(nlh - 1);
+ SET_DL(count); /* FIXME returns 0, 1, or n hard drives */
+
+ // FIXME should set ES & DI
+
+ goto int13_success;
+ break;
+
+ case 0x10: /* check drive ready */
+ // should look at 40:8E also???
+
+ // Read the status from controller
+ status = inb(read_word(ebda_seg, &EbdaData->ata.channels[device/2].iobase1) + ATA_CB_STAT);
+ if ( (status & ( ATA_CB_STAT_BSY | ATA_CB_STAT_RDY )) == ATA_CB_STAT_RDY ) {
+ goto int13_success;
+ }
+ else {
+ SET_AH(0xAA);
+ goto int13_fail_noah;
+ }
+ break;
+
+ case 0x15: /* read disk drive size */
+
+ // Get physical geometry from table
+ npc = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
+ nph = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
+ npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+
+ // Compute sector count seen by int13
+ lba = (Bit32u)(npc - 1) * (Bit32u)nph * (Bit32u)npspt;
+ CX = lba >> 16;
+ DX = lba & 0xffff;
+
+ SET_AH(3); // hard disk accessible
+ goto int13_success_noah;
+ break;
+
+ case 0x41: // IBM/MS installation check
+ BX=0xaa55; // install check
+ SET_AH(0x30); // EDD 3.0
+ CX=0x0007; // ext disk access and edd, removable supported
+ goto int13_success_noah;
+ break;
+
+ case 0x42: // IBM/MS extended read
+ case 0x43: // IBM/MS extended write
+ case 0x44: // IBM/MS verify
+ case 0x47: // IBM/MS extended seek
+
+ count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
+ segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
+ offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
+
+ // Can't use 64 bits lba
+ lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
+ if (lba != 0L) {
+ BX_PANIC("int13_harddisk: function %02x. Can't use 64bits lba\n",GET_AH());
+ goto int13_fail;
+ }
+
+ // Get 32 bits lba and check
+ lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
+ if (lba >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors) ) {
+ BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
+ goto int13_fail;
+ }
+
+ // If verify or seek
+ if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
+ goto int13_success;
+
+ // Execute the command
+ if ( GET_AH() == 0x42 )
+ status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, lba, segment, offset);
+ else
+ status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, lba, segment, offset);
+
+ count=read_word(ebda_seg, &EbdaData->ata.trsfsectors);
+ write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
+
+ if (status != 0) {
+ BX_INFO("int13_harddisk: function %02x, error %02x !\n",GET_AH(),status);
+ SET_AH(0x0c);
+ goto int13_fail_noah;
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x45: // IBM/MS lock/unlock drive
+ case 0x49: // IBM/MS extended media change
+ goto int13_success; // Always success for HD
+ break;
+
+ case 0x46: // IBM/MS eject media
+ SET_AH(0xb2); // Volume Not Removable
+ goto int13_fail_noah; // Always fail for HD
+ break;
+
+ case 0x48: // IBM/MS get drive parameters
+ size=read_word(DS,SI+(Bit16u)&Int13DPT->size);
+
+ // Buffer is too small
+ if(size < 0x1a)
+ goto int13_fail;
+
+ // EDD 1.x
+ if(size >= 0x1a) {
+ Bit16u blksize;
+
+ npc = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
+ nph = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
+ npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+ lba = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors);
+ blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
+ write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
+ write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->heads, (Bit32u)nph);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->spt, (Bit32u)npspt);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba); // FIXME should be Bit64
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0L);
+ write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
+ }
+
+ // EDD 2.x
+ if(size >= 0x1e) {
+ Bit8u channel, dev, irq, mode, checksum, i, translation;
+ Bit16u iobase1, iobase2, options;
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
+
+ // Fill in dpte
+ channel = device / 2;
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+ irq = read_byte(ebda_seg, &EbdaData->ata.channels[channel].irq);
+ mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
+ translation = read_byte(ebda_seg, &EbdaData->ata.devices[device].translation);
+
+ options = (translation==ATA_TRANSLATION_NONE?0:1<<3); // chs translation
+ options |= (1<<4); // lba translation
+ options |= (mode==ATA_MODE_PIO32?1:0<<7);
+ options |= (translation==ATA_TRANSLATION_LBA?1:0<<9);
+ options |= (translation==ATA_TRANSLATION_RECHS?3:0<<9);
+
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+ write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.blkcount, 1 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.dma, 0 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
+ write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
+ write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
+ write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
+
+ checksum=0;
+ for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
+ checksum = ~checksum;
+ write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
+ }
+
+ // EDD 3.x
+ if(size >= 0x42) {
+ Bit8u channel, iface, checksum, i;
+ Bit16u iobase1;
+
+ channel = device / 2;
+ iface = read_byte(ebda_seg, &EbdaData->ata.channels[channel].iface);
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x42);
+ write_word(DS, SI+(Bit16u)&Int13DPT->key, 0xbedd);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->dpi_length, 0x24);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->reserved1, 0);
+ write_word(DS, SI+(Bit16u)&Int13DPT->reserved2, 0);
+
+ if (iface==ATA_IFACE_ISA) {
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[0], 'I');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[1], 'S');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
+ }
+ else {
+ // FIXME PCI
+ }
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[1], 'T');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[2], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[3], 0);
+
+ if (iface==ATA_IFACE_ISA) {
+ write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[0], iobase1);
+ write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
+ }
+ else {
+ // FIXME PCI
+ }
+ write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[1], 0);
+ write_word(DS, SI+(Bit16u)&Int13DPT->device_path[2], 0);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->device_path[4], 0L);
+
+ checksum=0;
+ for (i=30; i<64; i++) checksum+=read_byte(DS, SI + i);
+ checksum = ~checksum;
+ write_byte(DS, SI+(Bit16u)&Int13DPT->checksum, checksum);
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x4e: // // IBM/MS set hardware configuration
+ // DMA, prefetch, PIO maximum not supported
+ switch (GET_AL()) {
+ case 0x01:
+ case 0x03:
+ case 0x04:
+ case 0x06:
+ goto int13_success;
+ break;
+ default :
+ goto int13_fail;
+ }
+ break;
+
+ case 0x09: /* initialize drive parameters */
+ case 0x0c: /* seek to specified cylinder */
+ case 0x0d: /* alternate disk reset */
+ case 0x11: /* recalibrate */
+ case 0x14: /* controller internal diagnostic */
+ BX_INFO("int13h_harddisk function %02xh unimplemented, returns success\n", GET_AH());
+ goto int13_success;
+ break;
+
+ case 0x0a: /* read disk sectors with ECC */
+ case 0x0b: /* write disk sectors with ECC */
+ case 0x18: // set media type for format
+ case 0x50: // IBM/MS send packet command
+ default:
+ BX_INFO("int13_harddisk function %02xh unsupported, returns fail\n", GET_AH());
+ goto int13_fail;
+ break;
+ }
+
+int13_fail:
+ SET_AH(0x01); // defaults to invalid function in AH or invalid parameter
+int13_fail_noah:
+ SET_DISK_RET_STATUS(GET_AH());
+int13_fail_nostatus:
+ SET_CF(); // error occurred
+ return;
+
+int13_success:
+ SET_AH(0x00); // no error
+int13_success_noah:
+ SET_DISK_RET_STATUS(0x00);
+ CLEAR_CF(); // no error
+ return;
+}
+
+// ---------------------------------------------------------------------------
+// Start of int13 for cdrom
+// ---------------------------------------------------------------------------
+
+ void
+int13_cdrom(EHBX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u EHBX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u device, status, locks;
+ Bit8u atacmd[12];
+ Bit32u lba;
+ Bit16u count, segment, offset, i, size;
+
+ BX_DEBUG_INT13_CD("int13_cdrom: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+ // BX_DEBUG_INT13_CD("int13_cdrom: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
+
+ SET_DISK_RET_STATUS(0x00);
+
+ /* basic check : device should be 0xE0+ */
+ if( (GET_ELDL() < 0xE0) || (GET_ELDL() >= 0xE0+BX_MAX_ATA_DEVICES) ) {
+ BX_INFO("int13_cdrom: function %02x, ELDL out of range %02x\n", GET_AH(), GET_ELDL());
+ goto int13_fail;
+ }
+
+ // Get the ata channel
+ device=read_byte(ebda_seg,&EbdaData->ata.cdidmap[GET_ELDL()-0xE0]);
+
+ /* basic check : device has to be valid */
+ if (device >= BX_MAX_ATA_DEVICES) {
+ BX_INFO("int13_cdrom: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
+ goto int13_fail;
+ }
+
+ switch (GET_AH()) {
+
+ // all those functions return SUCCESS
+ case 0x00: /* disk controller reset */
+ case 0x09: /* initialize drive parameters */
+ case 0x0c: /* seek to specified cylinder */
+ case 0x0d: /* alternate disk reset */
+ case 0x10: /* check drive ready */
+ case 0x11: /* recalibrate */
+ case 0x14: /* controller internal diagnostic */
+ case 0x16: /* detect disk change */
+ goto int13_success;
+ break;
+
+ // all those functions return disk write-protected
+ case 0x03: /* write disk sectors */
+ case 0x05: /* format disk track */
+ case 0x43: // IBM/MS extended write
+ SET_AH(0x03);
+ goto int13_fail_noah;
+ break;
+
+ case 0x01: /* read disk status */
+ status = read_byte(0x0040, 0x0074);
+ SET_AH(status);
+ SET_DISK_RET_STATUS(0);
+
+ /* set CF if error status read */
+ if (status) goto int13_fail_nostatus;
+ else goto int13_success_noah;
+ break;
+
+ case 0x15: /* read disk drive size */
+ SET_AH(0x02);
+ goto int13_fail_noah;
+ break;
+
+ case 0x41: // IBM/MS installation check
+ BX=0xaa55; // install check
+ SET_AH(0x30); // EDD 2.1
+ CX=0x0007; // ext disk access, removable and edd
+ goto int13_success_noah;
+ break;
+
+ case 0x42: // IBM/MS extended read
+ case 0x44: // IBM/MS verify sectors
+ case 0x47: // IBM/MS extended seek
+
+ count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
+ segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
+ offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
+
+ // Can't use 64 bits lba
+ lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
+ if (lba != 0L) {
+ BX_PANIC("int13_cdrom: function %02x. Can't use 64bits lba\n",GET_AH());
+ goto int13_fail;
+ }
+
+ // Get 32 bits lba
+ lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
+
+ // If verify or seek
+ if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
+ goto int13_success;
+
+ memsetb(get_SS(),atacmd,0,12);
+ atacmd[0]=0x28; // READ command
+ atacmd[7]=(count & 0xff00) >> 8; // Sectors
+ atacmd[8]=(count & 0x00ff); // Sectors
+ atacmd[2]=(lba & 0xff000000) >> 24; // LBA
+ atacmd[3]=(lba & 0x00ff0000) >> 16;
+ atacmd[4]=(lba & 0x0000ff00) >> 8;
+ atacmd[5]=(lba & 0x000000ff);
+ status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, ATA_DATA_IN, segment,offset);
+
+ count = (Bit16u)(read_dword(ebda_seg, &EbdaData->ata.trsfbytes) >> 11);
+ write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
+
+ if (status != 0) {
+ BX_INFO("int13_cdrom: function %02x, status %02x !\n",GET_AH(),status);
+ SET_AH(0x0c);
+ goto int13_fail_noah;
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x45: // IBM/MS lock/unlock drive
+ if (GET_AL() > 2) goto int13_fail;
+
+ locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
+
+ switch (GET_AL()) {
+ case 0 : // lock
+ if (locks == 0xff) {
+ SET_AH(0xb4);
+ SET_AL(1);
+ goto int13_fail_noah;
+ }
+ write_byte(ebda_seg, &EbdaData->ata.devices[device].lock, ++locks);
+ SET_AL(1);
+ break;
+ case 1 : // unlock
+ if (locks == 0x00) {
+ SET_AH(0xb0);
+ SET_AL(0);
+ goto int13_fail_noah;
+ }
+ write_byte(ebda_seg, &EbdaData->ata.devices[device].lock, --locks);
+ SET_AL(locks==0?0:1);
+ break;
+ case 2 : // status
+ SET_AL(locks==0?0:1);
+ break;
+ }
+ goto int13_success;
+ break;
+
+ case 0x46: // IBM/MS eject media
+ locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
+
+ if (locks != 0) {
+ SET_AH(0xb1); // media locked
+ goto int13_fail_noah;
+ }
+ // FIXME should handle 0x31 no media in device
+ // FIXME should handle 0xb5 valid request failed
+
+ // Call removable media eject
+ ASM_START
+ push bp
+ mov bp, sp
+
+ mov ah, #0x52
+ int 15
+ mov _int13_cdrom.status + 2[bp], ah
+ jnc int13_cdrom_rme_end
+ mov _int13_cdrom.status, #1
+int13_cdrom_rme_end:
+ pop bp
+ ASM_END
+
+ if (status != 0) {
+ SET_AH(0xb1); // media locked
+ goto int13_fail_noah;
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x48: // IBM/MS get drive parameters
+ size = read_word(DS,SI+(Bit16u)&Int13Ext->size);
+
+ // Buffer is too small
+ if(size < 0x1a)
+ goto int13_fail;
+
+ // EDD 1.x
+ if(size >= 0x1a) {
+ Bit16u cylinders, heads, spt, blksize;
+
+ blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
+ write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x74); // removable, media change, lockable, max values
+ write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, 0xffffffff);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->heads, 0xffffffff);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->spt, 0xffffffff);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, 0xffffffff); // FIXME should be Bit64
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
+ write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
+ }
+
+ // EDD 2.x
+ if(size >= 0x1e) {
+ Bit8u channel, dev, irq, mode, checksum, i;
+ Bit16u iobase1, iobase2, options;
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
+
+ // Fill in dpte
+ channel = device / 2;
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+ iobase2 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase2);
+ irq = read_byte(ebda_seg, &EbdaData->ata.channels[channel].irq);
+ mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
+
+ // FIXME atapi device
+ options = (1<<4); // lba translation
+ options |= (1<<5); // removable device
+ options |= (1<<6); // atapi device
+ options |= (mode==ATA_MODE_PIO32?1:0<<7);
+
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+ write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.blkcount, 1 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.dma, 0 );
+ write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
+ write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
+ write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
+ write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
+
+ checksum=0;
+ for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
+ checksum = ~checksum;
+ write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
+ }
+
+ // EDD 3.x
+ if(size >= 0x42) {
+ Bit8u channel, iface, checksum, i;
+ Bit16u iobase1;
+
+ channel = device / 2;
+ iface = read_byte(ebda_seg, &EbdaData->ata.channels[channel].iface);
+ iobase1 = read_word(ebda_seg, &EbdaData->ata.channels[channel].iobase1);
+
+ write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x42);
+ write_word(DS, SI+(Bit16u)&Int13DPT->key, 0xbedd);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->dpi_length, 0x24);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->reserved1, 0);
+ write_word(DS, SI+(Bit16u)&Int13DPT->reserved2, 0);
+
+ if (iface==ATA_IFACE_ISA) {
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[0], 'I');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[1], 'S');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
+ }
+ else {
+ // FIXME PCI
+ }
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[1], 'T');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[2], 'A');
+ write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[3], 0);
+
+ if (iface==ATA_IFACE_ISA) {
+ write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[0], iobase1);
+ write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
+ }
+ else {
+ // FIXME PCI
+ }
+ write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
+ write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[1], 0);
+ write_word(DS, SI+(Bit16u)&Int13DPT->device_path[2], 0);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->device_path[4], 0L);
+
+ checksum=0;
+ for (i=30; i<64; i++) checksum+=read_byte(DS, SI + i);
+ checksum = ~checksum;
+ write_byte(DS, SI+(Bit16u)&Int13DPT->checksum, checksum);
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x49: // IBM/MS extended media change
+ // always send changed ??
+ SET_AH(06);
+ goto int13_fail_nostatus;
+ break;
+
+ case 0x4e: // // IBM/MS set hardware configuration
+ // DMA, prefetch, PIO maximum not supported
+ switch (GET_AL()) {
+ case 0x01:
+ case 0x03:
+ case 0x04:
+ case 0x06:
+ goto int13_success;
+ break;
+ default :
+ goto int13_fail;
+ }
+ break;
+
+ // all those functions return unimplemented
+ case 0x02: /* read sectors */
+ case 0x04: /* verify sectors */
+ case 0x08: /* read disk drive parameters */
+ case 0x0a: /* read disk sectors with ECC */
+ case 0x0b: /* write disk sectors with ECC */
+ case 0x18: /* set media type for format */
+ case 0x50: // ? - send packet command
+ default:
+ BX_INFO("int13_cdrom: unsupported AH=%02x\n", GET_AH());
+ goto int13_fail;
+ break;
+ }
+
+int13_fail:
+ SET_AH(0x01); // defaults to invalid function in AH or invalid parameter
+int13_fail_noah:
+ SET_DISK_RET_STATUS(GET_AH());
+int13_fail_nostatus:
+ SET_CF(); // error occurred
+ return;
+
+int13_success:
+ SET_AH(0x00); // no error
+int13_success_noah:
+ SET_DISK_RET_STATUS(0x00);
+ CLEAR_CF(); // no error
+ return;
+}
+
+// ---------------------------------------------------------------------------
+// End of int13 for cdrom
+// ---------------------------------------------------------------------------
+
+#if BX_ELTORITO_BOOT
+// ---------------------------------------------------------------------------
+// Start of int13 for eltorito functions
+// ---------------------------------------------------------------------------
+
+ void
+int13_eltorito(DS, ES, DI, SI, BP, SP, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, SP, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+
+ BX_DEBUG_INT13_ET("int13_eltorito: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+ // BX_DEBUG_INT13_ET("int13_eltorito: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
+
+ switch (GET_AH()) {
+
+ // FIXME ElTorito Various. Should be implemented
+ case 0x4a: // ElTorito - Initiate disk emu
+ case 0x4c: // ElTorito - Initiate disk emu and boot
+ case 0x4d: // ElTorito - Return Boot catalog
+ BX_PANIC("Int13 eltorito call with AX=%04x. Please report\n",AX);
+ goto int13_fail;
+ break;
+
+ case 0x4b: // ElTorito - Terminate disk emu
+ // FIXME ElTorito Hardcoded
+ write_byte(DS,SI+0x00,0x13);
+ write_byte(DS,SI+0x01,read_byte(ebda_seg,&EbdaData->cdemu.media));
+ write_byte(DS,SI+0x02,read_byte(ebda_seg,&EbdaData->cdemu.emulated_drive));
+ write_byte(DS,SI+0x03,read_byte(ebda_seg,&EbdaData->cdemu.controller_index));
+ write_dword(DS,SI+0x04,read_dword(ebda_seg,&EbdaData->cdemu.ilba));
+ write_word(DS,SI+0x08,read_word(ebda_seg,&EbdaData->cdemu.device_spec));
+ write_word(DS,SI+0x0a,read_word(ebda_seg,&EbdaData->cdemu.buffer_segment));
+ write_word(DS,SI+0x0c,read_word(ebda_seg,&EbdaData->cdemu.load_segment));
+ write_word(DS,SI+0x0e,read_word(ebda_seg,&EbdaData->cdemu.sector_count));
+ write_byte(DS,SI+0x10,read_byte(ebda_seg,&EbdaData->cdemu.vdevice.cylinders));
+ write_byte(DS,SI+0x11,read_byte(ebda_seg,&EbdaData->cdemu.vdevice.spt));
+ write_byte(DS,SI+0x12,read_byte(ebda_seg,&EbdaData->cdemu.vdevice.heads));
+
+ // If we have to terminate emulation
+ if(GET_AL() == 0x00) {
+ // FIXME ElTorito Various. Should be handled accordingly to spec
+ write_byte(ebda_seg,&EbdaData->cdemu.active, 0x00); // bye bye
+ }
+
+ goto int13_success;
+ break;
+
+ default:
+ BX_INFO("int13_eltorito: unsupported AH=%02x\n", GET_AH());
+ goto int13_fail;
+ break;
+ }
+
+int13_fail:
+ SET_AH(0x01); // defaults to invalid function in AH or invalid parameter
+ SET_DISK_RET_STATUS(GET_AH());
+ SET_CF(); // error occurred
+ return;
+
+int13_success:
+ SET_AH(0x00); // no error
+ SET_DISK_RET_STATUS(0x00);
+ CLEAR_CF(); // no error
+ return;
+}
+
+// ---------------------------------------------------------------------------
+// End of int13 for eltorito functions
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Start of int13 when emulating a device from the cd
+// ---------------------------------------------------------------------------
+
+ void
+int13_cdemu(DS, ES, DI, SI, BP, SP, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, SP, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit8u device, status;
+ Bit16u vheads, vspt, vcylinders;
+ Bit16u head, sector, cylinder, nbsectors;
+ Bit32u vlba, ilba, slba, elba;
+ Bit16u before, segment, offset;
+ Bit8u atacmd[12];
+
+ BX_DEBUG_INT13_ET("int13_cdemu: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+ //BX_DEBUG_INT13_ET("int13_cdemu: SS=%04x ES=%04x DI=%04x SI=%04x\n", get_SS(), ES, DI, SI);
+
+ /* at this point, we are emulating a floppy/harddisk */
+
+ // Recompute the device number
+ device = read_byte(ebda_seg,&EbdaData->cdemu.controller_index) * 2;
+ device += read_byte(ebda_seg,&EbdaData->cdemu.device_spec);
+
+ SET_DISK_RET_STATUS(0x00);
+
+ /* basic checks : emulation should be active, dl should equal the emulated drive */
+ if( (read_byte(ebda_seg,&EbdaData->cdemu.active) ==0 )
+ || (read_byte(ebda_seg,&EbdaData->cdemu.emulated_drive ) != GET_DL())) {
+ BX_INFO("int13_cdemu: function %02x, emulation not active for DL= %02x\n", GET_AH(), GET_DL());
+ goto int13_fail;
+ }
+
+ switch (GET_AH()) {
+
+ // all those functions return SUCCESS
+ case 0x00: /* disk controller reset */
+ case 0x09: /* initialize drive parameters */
+ case 0x0c: /* seek to specified cylinder */
+ case 0x0d: /* alternate disk reset */ // FIXME ElTorito Various. should really reset ?
+ case 0x10: /* check drive ready */ // FIXME ElTorito Various. should check if ready ?
+ case 0x11: /* recalibrate */
+ case 0x14: /* controller internal diagnostic */
+ case 0x16: /* detect disk change */
+ goto int13_success;
+ break;
+
+ // all those functions return disk write-protected
+ case 0x03: /* write disk sectors */
+ case 0x05: /* format disk track */
+ SET_AH(0x03);
+ goto int13_fail_noah;
+ break;
+
+ case 0x01: /* read disk status */
+ status=read_byte(0x0040, 0x0074);
+ SET_AH(status);
+ SET_DISK_RET_STATUS(0);
+
+ /* set CF if error status read */
+ if (status) goto int13_fail_nostatus;
+ else goto int13_success_noah;
+ break;
+
+ case 0x02: // read disk sectors
+ case 0x04: // verify disk sectors
+ vspt = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+ vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
+ vheads = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
+
+ ilba = read_dword(ebda_seg,&EbdaData->cdemu.ilba);
+
+ sector = GET_CL() & 0x003f;
+ cylinder = (GET_CL() & 0x00c0) << 2 | GET_CH();
+ head = GET_DH();
+ nbsectors = GET_AL();
+ segment = ES;
+ offset = BX;
+
+ // no sector to read ?
+ if(nbsectors==0) goto int13_success;
+
+ // sanity checks sco openserver needs this!
+ if ((sector > vspt)
+ || (cylinder >= vcylinders)
+ || (head >= vheads)) {
+ goto int13_fail;
+ }
+
+ // After controls, verify do nothing
+ if (GET_AH() == 0x04) goto int13_success;
+
+ segment = ES+(BX / 16);
+ offset = BX % 16;
+
+ // calculate the virtual lba inside the image
+ vlba=((((Bit32u)cylinder*(Bit32u)vheads)+(Bit32u)head)*(Bit32u)vspt)+((Bit32u)(sector-1));
+
+ // In advance so we don't loose the count
+ SET_AL(nbsectors);
+
+ // start lba on cd
+ slba = (Bit32u)vlba/4;
+ before= (Bit16u)vlba%4;
+
+ // end lba on cd
+ elba = (Bit32u)(vlba+nbsectors-1)/4;
+
+ memsetb(get_SS(),atacmd,0,12);
+ atacmd[0]=0x28; // READ command
+ atacmd[7]=((Bit16u)(elba-slba+1) & 0xff00) >> 8; // Sectors
+ atacmd[8]=((Bit16u)(elba-slba+1) & 0x00ff); // Sectors
+ atacmd[2]=(ilba+slba & 0xff000000) >> 24; // LBA
+ atacmd[3]=(ilba+slba & 0x00ff0000) >> 16;
+ atacmd[4]=(ilba+slba & 0x0000ff00) >> 8;
+ atacmd[5]=(ilba+slba & 0x000000ff);
+ if((status = ata_cmd_packet(device, 12, get_SS(), atacmd, before*512, nbsectors*512L, ATA_DATA_IN, segment,offset)) != 0) {
+ BX_INFO("int13_cdemu: function %02x, error %02x !\n",GET_AH(),status);
+ SET_AH(0x02);
+ SET_AL(0);
+ goto int13_fail_noah;
+ }
+
+ goto int13_success;
+ break;
+
+ case 0x08: /* read disk drive parameters */
+ vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+ vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
+ vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
+
+ SET_AL( 0x00 );
+ SET_BL( 0x00 );
+ SET_CH( vcylinders & 0xff );
+ SET_CL((( vcylinders >> 2) & 0xc0) | ( vspt & 0x3f ));
+ SET_DH( vheads );
+ SET_DL( 0x02 ); // FIXME ElTorito Various. should send the real count of drives 1 or 2
+ // FIXME ElTorito Harddisk. should send the HD count
+
+ switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
+ case 0x01: SET_BL( 0x02 ); break;
+ case 0x02: SET_BL( 0x04 ); break;
+ case 0x03: SET_BL( 0x06 ); break;
+ }
+
+ASM_START
+ push bp
+ mov bp, sp
+ mov ax, #diskette_param_table2
+ mov _int13_cdemu.DI+2[bp], ax
+ mov _int13_cdemu.ES+2[bp], cs
+ pop bp
+ASM_END
+ goto int13_success;
+ break;
+
+ case 0x15: /* read disk drive size */
+ // FIXME ElTorito Harddisk. What geometry to send ?
+ SET_AH(0x03);
+ goto int13_success_noah;
+ break;
+
+ // all those functions return unimplemented
+ case 0x0a: /* read disk sectors with ECC */
+ case 0x0b: /* write disk sectors with ECC */
+ case 0x18: /* set media type for format */
+ case 0x41: // IBM/MS installation check
+ // FIXME ElTorito Harddisk. Darwin would like to use EDD
+ case 0x42: // IBM/MS extended read
+ case 0x43: // IBM/MS extended write
+ case 0x44: // IBM/MS verify sectors
+ case 0x45: // IBM/MS lock/unlock drive
+ case 0x46: // IBM/MS eject media
+ case 0x47: // IBM/MS extended seek
+ case 0x48: // IBM/MS get drive parameters
+ case 0x49: // IBM/MS extended media change
+ case 0x4e: // ? - set hardware configuration
+ case 0x50: // ? - send packet command
+ default:
+ BX_INFO("int13_cdemu function AH=%02x unsupported, returns fail\n", GET_AH());
+ goto int13_fail;
+ break;
+ }
+
+int13_fail:
+ SET_AH(0x01); // defaults to invalid function in AH or invalid parameter
+int13_fail_noah:
+ SET_DISK_RET_STATUS(GET_AH());
+int13_fail_nostatus:
+ SET_CF(); // error occurred
+ return;
+
+int13_success:
+ SET_AH(0x00); // no error
+int13_success_noah:
+ SET_DISK_RET_STATUS(0x00);
+ CLEAR_CF(); // no error
+ return;
+}
+
+// ---------------------------------------------------------------------------
+// End of int13 when emulating a device from the cd
+// ---------------------------------------------------------------------------
+
+#endif // BX_ELTORITO_BOOT
+
+#else //BX_USE_ATADRV
+
+ void
+outLBA(cylinder,hd_heads,head,hd_sectors,sector,dl)
+ Bit16u cylinder;
+ Bit16u hd_heads;
+ Bit16u head;
+ Bit16u hd_sectors;
+ Bit16u sector;
+ Bit16u dl;
+{
+ASM_START
+ push bp
+ mov bp, sp
+ push eax
+ push ebx
+ push edx
+ xor eax,eax
+ mov ax,4[bp] // cylinder
+ xor ebx,ebx
+ mov bl,6[bp] // hd_heads
+ imul ebx
+
+ mov bl,8[bp] // head
+ add eax,ebx
+ mov bl,10[bp] // hd_sectors
+ imul ebx
+ mov bl,12[bp] // sector
+ add eax,ebx
+
+ dec eax
+ mov dx,#0x1f3
+ out dx,al
+ mov dx,#0x1f4
+ mov al,ah
+ out dx,al
+ shr eax,#16
+ mov dx,#0x1f5
+ out dx,al
+ and ah,#0xf
+ mov bl,14[bp] // dl
+ and bl,#1
+ shl bl,#4
+ or ah,bl
+ or ah,#0xe0
+ mov al,ah
+ mov dx,#0x01f6
+ out dx,al
+ pop edx
+ pop ebx
+ pop eax
+ pop bp
+ASM_END
+}
+
+ void
+int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit8u drive, num_sectors, sector, head, status, mod;
+ Bit8u drive_map;
+ Bit8u n_drives;
+ Bit16u cyl_mod, ax;
+ Bit16u max_cylinder, cylinder, total_sectors;
+ Bit16u hd_cylinders;
+ Bit8u hd_heads, hd_sectors;
+ Bit16u val16;
+ Bit8u sector_count;
+ unsigned int i;
+ Bit16u tempbx;
+ Bit16u dpsize;
+
+ Bit16u count, segment, offset;
+ Bit32u lba;
+ Bit16u error;
+
+ BX_DEBUG_INT13_HD("int13 harddisk: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+
+ write_byte(0x0040, 0x008e, 0); // clear completion flag
+
+ /* at this point, DL is >= 0x80 to be passed from the floppy int13h
+ handler code */
+ /* check how many disks first (cmos reg 0x12), return an error if
+ drive not present */
+ drive_map = inb_cmos(0x12);
+ drive_map = (((drive_map & 0xf0)==0) ? 0 : 1) |
+ (((drive_map & 0x0f)==0) ? 0 : 2);
+ n_drives = (drive_map==0) ? 0 :
+ ((drive_map==3) ? 2 : 1);
+
+ if (!(drive_map & (1<<(GET_ELDL()&0x7f)))) { /* allow 0, 1, or 2 disks */
+ SET_AH(0x01);
+ SET_DISK_RET_STATUS(0x01);
+ SET_CF(); /* error occurred */
+ return;
+ }
+
+ switch (GET_AH()) {
+
+ case 0x00: /* disk controller reset */
+BX_DEBUG_INT13_HD("int13_f00\n");
+
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ set_diskette_ret_status(0);
+ set_diskette_current_cyl(0, 0); /* current cylinder, diskette 1 */
+ set_diskette_current_cyl(1, 0); /* current cylinder, diskette 2 */
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x01: /* read disk status */
+BX_DEBUG_INT13_HD("int13_f01\n");
+ status = read_byte(0x0040, 0x0074);
+ SET_AH(status);
+ SET_DISK_RET_STATUS(0);
+ /* set CF if error status read */
+ if (status) SET_CF();
+ else CLEAR_CF();
+ return;
+ break;
+
+ case 0x04: // verify disk sectors
+ case 0x02: // read disk sectors
+ drive = GET_ELDL();
+ get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
+
+ num_sectors = GET_AL();
+ cylinder = (GET_CL() & 0x00c0) << 2 | GET_CH();
+ sector = (GET_CL() & 0x3f);
+ head = GET_DH();
+
+
+ if (hd_cylinders > 1024) {
+ if (hd_cylinders <= 2048) {
+ cylinder <<= 1;
+ }
+ else if (hd_cylinders <= 4096) {
+ cylinder <<= 2;
+ }
+ else if (hd_cylinders <= 8192) {
+ cylinder <<= 3;
+ }
+ else { // hd_cylinders <= 16384
+ cylinder <<= 4;
+ }
+
+ ax = head / hd_heads;
+ cyl_mod = ax & 0xff;
+ head = ax >> 8;
+ cylinder |= cyl_mod;
+ }
+
+ if ( (cylinder >= hd_cylinders) ||
+ (sector > hd_sectors) ||
+ (head >= hd_heads) ) {
+ SET_AH(1);
+ SET_DISK_RET_STATUS(1);
+ SET_CF(); /* error occurred */
+ return;
+ }
+
+ if ( (num_sectors > 128) || (num_sectors == 0) )
+ BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+
+ if (head > 15)
+ BX_PANIC("hard drive BIOS:(read/verify) head > 15\n");
+
+ if ( GET_AH() == 0x04 ) {
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF();
+ return;
+ }
+
+ status = inb(0x1f7);
+ if (status & 0x80) {
+ BX_PANIC("hard drive BIOS:(read/verify) BUSY bit set\n");
+ }
+ outb(0x01f2, num_sectors);
+ /* activate LBA? (tomv) */
+ if (hd_heads > 16) {
+BX_DEBUG_INT13_HD("CHS: %x %x %x\n", cylinder, head, sector);
+ outLBA(cylinder,hd_heads,head,hd_sectors,sector,drive);
+ }
+ else {
+ outb(0x01f3, sector);
+ outb(0x01f4, cylinder & 0x00ff);
+ outb(0x01f5, cylinder >> 8);
+ outb(0x01f6, 0xa0 | ((drive & 0x01)<<4) | (head & 0x0f));
+ }
+ outb(0x01f7, 0x20);
+
+ while (1) {
+ status = inb(0x1f7);
+ if ( !(status & 0x80) ) break;
+ }
+
+ if (status & 0x01) {
+ BX_PANIC("hard drive BIOS:(read/verify) read error\n");
+ } else if ( !(status & 0x08) ) {
+ BX_DEBUG_INT13_HD("status was %02x\n", (unsigned) status);
+ BX_PANIC("hard drive BIOS:(read/verify) expected DRQ=1\n");
+ }
+
+ sector_count = 0;
+ tempbx = BX;
+
+ASM_START
+ sti ;; enable higher priority interrupts
+ASM_END
+
+ while (1) {
+ASM_START
+ ;; store temp bx in real DI register
+ push bp
+ mov bp, sp
+ mov di, _int13_harddisk.tempbx + 2 [bp]
+ pop bp
+
+ ;; adjust if there will be an overrun
+ cmp di, #0xfe00
+ jbe i13_f02_no_adjust
+i13_f02_adjust:
+ sub di, #0x0200 ; sub 512 bytes from offset
+ mov ax, es
+ add ax, #0x0020 ; add 512 to segment
+ mov es, ax
+
+i13_f02_no_adjust:
+ mov cx, #0x0100 ;; counter (256 words = 512b)
+ mov dx, #0x01f0 ;; AT data read port
+
+ rep
+ insw ;; CX words transfered from port(DX) to ES:[DI]
+
+i13_f02_done:
+ ;; store real DI register back to temp bx
+ push bp
+ mov bp, sp
+ mov _int13_harddisk.tempbx + 2 [bp], di
+ pop bp
+ASM_END
+
+ sector_count++;
+ num_sectors--;
+ if (num_sectors == 0) {
+ status = inb(0x1f7);
+ if ( (status & 0xc9) != 0x40 )
+ BX_PANIC("no sectors left to read/verify, status is %02x\n", (unsigned) status);
+ break;
+ }
+ else {
+ status = inb(0x1f7);
+ if ( (status & 0xc9) != 0x48 )
+ BX_PANIC("more sectors left to read/verify, status is %02x\n", (unsigned) status);
+ continue;
+ }
+ }
+
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ SET_AL(sector_count);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+
+ case 0x03: /* write disk sectors */
+BX_DEBUG_INT13_HD("int13_f03\n");
+ drive = GET_ELDL ();
+ get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
+
+ num_sectors = GET_AL();
+ cylinder = GET_CH();
+ cylinder |= ( ((Bit16u) GET_CL()) << 2) & 0x300;
+ sector = (GET_CL() & 0x3f);
+ head = GET_DH();
+
+ if (hd_cylinders > 1024) {
+ if (hd_cylinders <= 2048) {
+ cylinder <<= 1;
+ }
+ else if (hd_cylinders <= 4096) {
+ cylinder <<= 2;
+ }
+ else if (hd_cylinders <= 8192) {
+ cylinder <<= 3;
+ }
+ else { // hd_cylinders <= 16384
+ cylinder <<= 4;
+ }
+
+ ax = head / hd_heads;
+ cyl_mod = ax & 0xff;
+ head = ax >> 8;
+ cylinder |= cyl_mod;
+ }
+
+ if ( (cylinder >= hd_cylinders) ||
+ (sector > hd_sectors) ||
+ (head >= hd_heads) ) {
+ SET_AH( 1);
+ SET_DISK_RET_STATUS(1);
+ SET_CF(); /* error occurred */
+ return;
+ }
+
+ if ( (num_sectors > 128) || (num_sectors == 0) )
+ BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+
+ if (head > 15)
+ BX_PANIC("hard drive BIOS:(read) head > 15\n");
+
+ status = inb(0x1f7);
+ if (status & 0x80) {
+ BX_PANIC("hard drive BIOS:(read) BUSY bit set\n");
+ }
+// should check for Drive Ready Bit also in status reg
+ outb(0x01f2, num_sectors);
+
+ /* activate LBA? (tomv) */
+ if (hd_heads > 16) {
+BX_DEBUG_INT13_HD("CHS (write): %x %x %x\n", cylinder, head, sector);
+ outLBA(cylinder,hd_heads,head,hd_sectors,sector,GET_ELDL());
+ }
+ else {
+ outb(0x01f3, sector);
+ outb(0x01f4, cylinder & 0x00ff);
+ outb(0x01f5, cylinder >> 8);
+ outb(0x01f6, 0xa0 | ((GET_ELDL() & 0x01)<<4) | (head & 0x0f));
+ }
+ outb(0x01f7, 0x30);
+
+ // wait for busy bit to turn off after seeking
+ while (1) {
+ status = inb(0x1f7);
+ if ( !(status & 0x80) ) break;
+ }
+
+ if ( !(status & 0x08) ) {
+ BX_DEBUG_INT13_HD("status was %02x\n", (unsigned) status);
+ BX_PANIC("hard drive BIOS:(write) data-request bit not set\n");
+ }
+
+ sector_count = 0;
+ tempbx = BX;
+
+ASM_START
+ sti ;; enable higher priority interrupts
+ASM_END
+
+ while (1) {
+ASM_START
+ ;; store temp bx in real SI register
+ push bp
+ mov bp, sp
+ mov si, _int13_harddisk.tempbx + 2 [bp]
+ pop bp
+
+ ;; adjust if there will be an overrun
+ cmp si, #0xfe00
+ jbe i13_f03_no_adjust
+i13_f03_adjust:
+ sub si, #0x0200 ; sub 512 bytes from offset
+ mov ax, es
+ add ax, #0x0020 ; add 512 to segment
+ mov es, ax
+
+i13_f03_no_adjust:
+ mov cx, #0x0100 ;; counter (256 words = 512b)
+ mov dx, #0x01f0 ;; AT data read port
+
+ seg ES
+ rep
+ outsw ;; CX words tranfered from ES:[SI] to port(DX)
+
+ ;; store real SI register back to temp bx
+ push bp
+ mov bp, sp
+ mov _int13_harddisk.tempbx + 2 [bp], si
+ pop bp
+ASM_END
+
+ sector_count++;
+ num_sectors--;
+ if (num_sectors == 0) {
+ status = inb(0x1f7);
+ if ( (status & 0xe9) != 0x40 )
+ BX_PANIC("no sectors left to write, status is %02x\n", (unsigned) status);
+ break;
+ }
+ else {
+ status = inb(0x1f7);
+ if ( (status & 0xc9) != 0x48 )
+ BX_PANIC("more sectors left to write, status is %02x\n", (unsigned) status);
+ continue;
+ }
+ }
+
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ SET_AL(sector_count);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x05: /* format disk track */
+BX_DEBUG_INT13_HD("int13_f05\n");
+ BX_PANIC("format disk track called\n");
+ /* nop */
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x08: /* read disk drive parameters */
+BX_DEBUG_INT13_HD("int13_f08\n");
+
+ drive = GET_ELDL ();
+ get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
+
+ // translate CHS
+ //
+ if (hd_cylinders <= 1024) {
+ // hd_cylinders >>= 0;
+ // hd_heads <<= 0;
+ }
+ else if (hd_cylinders <= 2048) {
+ hd_cylinders >>= 1;
+ hd_heads <<= 1;
+ }
+ else if (hd_cylinders <= 4096) {
+ hd_cylinders >>= 2;
+ hd_heads <<= 2;
+ }
+ else if (hd_cylinders <= 8192) {
+ hd_cylinders >>= 3;
+ hd_heads <<= 3;
+ }
+ else { // hd_cylinders <= 16384
+ hd_cylinders >>= 4;
+ hd_heads <<= 4;
+ }
+
+ max_cylinder = hd_cylinders - 2; /* 0 based */
+ SET_AL(0);
+ SET_CH(max_cylinder & 0xff);
+ SET_CL(((max_cylinder >> 2) & 0xc0) | (hd_sectors & 0x3f));
+ SET_DH(hd_heads - 1);
+ SET_DL(n_drives); /* returns 0, 1, or 2 hard drives */
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+
+ return;
+ break;
+
+ case 0x09: /* initialize drive parameters */
+BX_DEBUG_INT13_HD("int13_f09\n");
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x0a: /* read disk sectors with ECC */
+BX_DEBUG_INT13_HD("int13_f0a\n");
+ case 0x0b: /* write disk sectors with ECC */
+BX_DEBUG_INT13_HD("int13_f0b\n");
+ BX_PANIC("int13h Functions 0Ah & 0Bh not implemented!\n");
+ return;
+ break;
+
+ case 0x0c: /* seek to specified cylinder */
+BX_DEBUG_INT13_HD("int13_f0c\n");
+ BX_INFO("int13h function 0ch (seek) not implemented!\n");
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x0d: /* alternate disk reset */
+BX_DEBUG_INT13_HD("int13_f0d\n");
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x10: /* check drive ready */
+BX_DEBUG_INT13_HD("int13_f10\n");
+ //SET_AH(0);
+ //SET_DISK_RET_STATUS(0);
+ //CLEAR_CF(); /* successful */
+ //return;
+ //break;
+
+ // should look at 40:8E also???
+ status = inb(0x01f7);
+ if ( (status & 0xc0) == 0x40 ) {
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); // drive ready
+ return;
+ }
+ else {
+ SET_AH(0xAA);
+ SET_DISK_RET_STATUS(0xAA);
+ SET_CF(); // not ready
+ return;
+ }
+ break;
+
+ case 0x11: /* recalibrate */
+BX_DEBUG_INT13_HD("int13_f11\n");
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ return;
+ break;
+
+ case 0x14: /* controller internal diagnostic */
+BX_DEBUG_INT13_HD("int13_f14\n");
+ SET_AH(0);
+ SET_DISK_RET_STATUS(0);
+ CLEAR_CF(); /* successful */
+ SET_AL(0);
+ return;
+ break;
+
+ case 0x15: /* read disk drive size */
+ drive = GET_ELDL();
+ get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
+ASM_START
+ push bp
+ mov bp, sp
+ mov al, _int13_harddisk.hd_heads + 2 [bp]
+ mov ah, _int13_harddisk.hd_sectors + 2 [bp]
+ mul al, ah ;; ax = heads * sectors
+ mov bx, _int13_harddisk.hd_cylinders + 2 [bp]
+ dec bx ;; use (cylinders - 1) ???
+ mul ax, bx ;; dx:ax = (cylinders -1) * (heads * sectors)
+ ;; now we need to move the 32bit result dx:ax to what the
+ ;; BIOS wants which is cx:dx.
+ ;; and then into CX:DX on the stack
+ mov _int13_harddisk.CX + 2 [bp], dx
+ mov _int13_harddisk.DX + 2 [bp], ax
+ pop bp
+ASM_END
+ SET_AH(3); // hard disk accessible
+ SET_DISK_RET_STATUS(0); // ??? should this be 0
+ CLEAR_CF(); // successful
+ return;
+ break;
+
+ case 0x18: // set media type for format
+ case 0x41: // IBM/MS
+ case 0x42: // IBM/MS
+ case 0x43: // IBM/MS
+ case 0x44: // IBM/MS
+ case 0x45: // IBM/MS lock/unlock drive
+ case 0x46: // IBM/MS eject media
+ case 0x47: // IBM/MS extended seek
+ case 0x49: // IBM/MS extended media change
+ case 0x50: // IBM/MS send packet command
+ default:
+ BX_INFO("int13_harddisk: unsupported AH=%02x\n", GET_AH());
+
+ SET_AH(1); // code=invalid function in AH or invalid parameter
+ SET_DISK_RET_STATUS(1);
+ SET_CF(); /* unsuccessful */
+ return;
+ break;
+ }
+}
+
+static char panic_msg_reg12h[] = "HD%d cmos reg 12h not type F\n";
+static char panic_msg_reg19h[] = "HD%d cmos reg %02xh not user definable type 47\n";
+
+ void
+get_hd_geometry(drive, hd_cylinders, hd_heads, hd_sectors)
+ Bit8u drive;
+ Bit16u *hd_cylinders;
+ Bit8u *hd_heads;
+ Bit8u *hd_sectors;
+{
+ Bit8u hd_type;
+ Bit16u ss;
+ Bit16u cylinders;
+ Bit8u iobase;
+
+ ss = get_SS();
+ if (drive == 0x80) {
+ hd_type = inb_cmos(0x12) & 0xf0;
+ if (hd_type != 0xf0)
+ BX_INFO(panic_msg_reg12h,0);
+ hd_type = inb_cmos(0x19); // HD0: extended type
+ if (hd_type != 47)
+ BX_INFO(panic_msg_reg19h,0,0x19);
+ iobase = 0x1b;
+ } else {
+ hd_type = inb_cmos(0x12) & 0x0f;
+ if (hd_type != 0x0f)
+ BX_INFO(panic_msg_reg12h,1);
+ hd_type = inb_cmos(0x1a); // HD0: extended type
+ if (hd_type != 47)
+ BX_INFO(panic_msg_reg19h,0,0x1a);
+ iobase = 0x24;
+ }
+
+ // cylinders
+ cylinders = inb_cmos(iobase) | (inb_cmos(iobase+1) << 8);
+ write_word(ss, hd_cylinders, cylinders);
+
+ // heads
+ write_byte(ss, hd_heads, inb_cmos(iobase+2));
+
+ // sectors per track
+ write_byte(ss, hd_sectors, inb_cmos(iobase+8));
+}
+
+#endif //else BX_USE_ATADRV
+
+
+//////////////////////
+// FLOPPY functions //
+//////////////////////
+
+ bx_bool
+floppy_media_known(drive)
+ Bit16u drive;
+{
+ Bit8u val8;
+ Bit16u media_state_offset;
+
+ val8 = read_byte(0x0040, 0x003e); // diskette recal status
+ if (drive)
+ val8 >>= 1;
+ val8 &= 0x01;
+ if (val8 == 0)
+ return(0);
+
+ media_state_offset = 0x0090;
+ if (drive)
+ media_state_offset += 1;
+
+ val8 = read_byte(0x0040, media_state_offset);
+ val8 = (val8 >> 4) & 0x01;
+ if (val8 == 0)
+ return(0);
+
+ // check pass, return KNOWN
+ return(1);
+}
+
+ bx_bool
+floppy_media_sense(drive)
+ Bit16u drive;
+{
+ bx_bool retval;
+ Bit16u media_state_offset;
+ Bit8u drive_type, config_data, media_state;
+
+ if (floppy_drive_recal(drive) == 0) {
+ return(0);
+ }
+
+ // for now cheat and get drive type from CMOS,
+ // assume media is same as drive type
+
+ // ** config_data **
+ // Bitfields for diskette media control:
+ // Bit(s) Description (Table M0028)
+ // 7-6 last data rate set by controller
+ // 00=500kbps, 01=300kbps, 10=250kbps, 11=1Mbps
+ // 5-4 last diskette drive step rate selected
+ // 00=0Ch, 01=0Dh, 10=0Eh, 11=0Ah
+ // 3-2 {data rate at start of operation}
+ // 1-0 reserved
+
+ // ** media_state **
+ // Bitfields for diskette drive media state:
+ // Bit(s) Description (Table M0030)
+ // 7-6 data rate
+ // 00=500kbps, 01=300kbps, 10=250kbps, 11=1Mbps
+ // 5 double stepping required (e.g. 360kB in 1.2MB)
+ // 4 media type established
+ // 3 drive capable of supporting 4MB media
+ // 2-0 on exit from BIOS, contains
+ // 000 trying 360kB in 360kB
+ // 001 trying 360kB in 1.2MB
+ // 010 trying 1.2MB in 1.2MB
+ // 011 360kB in 360kB established
+ // 100 360kB in 1.2MB established
+ // 101 1.2MB in 1.2MB established
+ // 110 reserved
+ // 111 all other formats/drives
+
+ drive_type = inb_cmos(0x10);
+ if (drive == 0)
+ drive_type >>= 4;
+ else
+ drive_type &= 0x0f;
+ if ( drive_type == 1 ) {
+ // 360K 5.25" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x25; // 0010 0101
+ retval = 1;
+ }
+ else if ( drive_type == 2 ) {
+ // 1.2 MB 5.25" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x25; // 0010 0101 // need double stepping??? (bit 5)
+ retval = 1;
+ }
+ else if ( drive_type == 3 ) {
+ // 720K 3.5" drive
+ config_data = 0x00; // 0000 0000 ???
+ media_state = 0x17; // 0001 0111
+ retval = 1;
+ }
+ else if ( drive_type == 4 ) {
+ // 1.44 MB 3.5" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x17; // 0001 0111
+ retval = 1;
+ }
+ else if ( drive_type == 5 ) {
+ // 2.88 MB 3.5" drive
+ config_data = 0xCC; // 1100 1100
+ media_state = 0xD7; // 1101 0111
+ retval = 1;
+ }
+ //
+ // Extended floppy size uses special cmos setting
+ else if ( drive_type == 6 ) {
+ // 160k 5.25" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x27; // 0010 0111
+ retval = 1;
+ }
+ else if ( drive_type == 7 ) {
+ // 180k 5.25" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x27; // 0010 0111
+ retval = 1;
+ }
+ else if ( drive_type == 8 ) {
+ // 320k 5.25" drive
+ config_data = 0x00; // 0000 0000
+ media_state = 0x27; // 0010 0111
+ retval = 1;
+ }
+
+ else {
+ // not recognized
+ config_data = 0x00; // 0000 0000
+ media_state = 0x00; // 0000 0000
+ retval = 0;
+ }
+
+ if (drive == 0)
+ media_state_offset = 0x90;
+ else
+ media_state_offset = 0x91;
+ write_byte(0x0040, 0x008B, config_data);
+ write_byte(0x0040, media_state_offset, media_state);
+
+ return(retval);
+}
+
+ bx_bool
+floppy_drive_recal(drive)
+ Bit16u drive;
+{
+ Bit8u val8, dor;
+ Bit16u curr_cyl_offset;
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+
+ // turn on motor of selected drive, DMA & int enabled, normal operation
+ if (drive)
+ dor = 0x20;
+ else
+ dor = 0x10;
+ dor |= 0x0c;
+ dor |= drive;
+ outb(0x03f2, dor);
+
+ // reset the disk motor timeout value of INT 08
+ write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+ // check port 3f4 for drive readiness
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xf0) != 0x80 )
+ BX_PANIC("floppy recal:f07: ctrl not ready\n");
+
+ // send Recalibrate command (2 bytes) to controller
+ outb(0x03f5, 0x07); // 07: Recalibrate
+ outb(0x03f5, drive); // 0=drive0, 1=drive1
+
+ // turn on interrupts
+ASM_START
+ sti
+ASM_END
+
+ // wait on 40:3e bit 7 to become 1
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ while ( val8 == 0 ) {
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ }
+
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
+ASM_START
+ cli
+ASM_END
+
+ // set 40:3e bit 7 to 0, and calibrated bit
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ if (drive) {
+ val8 |= 0x02; // Drive 1 calibrated
+ curr_cyl_offset = 0x0095;
+ }
+ else {
+ val8 |= 0x01; // Drive 0 calibrated
+ curr_cyl_offset = 0x0094;
+ }
+ write_byte(0x0040, 0x003e, val8);
+ write_byte(0x0040, curr_cyl_offset, 0); // current cylinder is 0
+
+ return(1);
+}
+
+
+
+ bx_bool
+floppy_drive_exists(drive)
+ Bit16u drive;
+{
+ Bit8u drive_type;
+
+ // check CMOS to see if drive exists
+ drive_type = inb_cmos(0x10);
+ if (drive == 0)
+ drive_type >>= 4;
+ else
+ drive_type &= 0x0f;
+ if ( drive_type == 0 )
+ return(0);
+ else
+ return(1);
+}
+
+#if BX_SUPPORT_FLOPPY
+ void
+int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit8u drive, num_sectors, track, sector, head, status;
+ Bit16u base_address, base_count, base_es;
+ Bit8u page, mode_register, val8, dor;
+ Bit8u return_status[7];
+ Bit8u drive_type, num_floppies, ah;
+ Bit16u es, last_addr;
+
+ BX_DEBUG_INT13_FL("int13_diskette: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
+ // BX_DEBUG_INT13_FL("int13_diskette: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), get_DS(), ES, DI, SI);
+
+ ah = GET_AH();
+
+ switch ( ah ) {
+ case 0x00: // diskette controller reset
+BX_DEBUG_INT13_FL("floppy f00\n");
+ drive = GET_ELDL();
+ if (drive > 1) {
+ SET_AH(1); // invalid param
+ set_diskette_ret_status(1);
+ SET_CF();
+ return;
+ }
+ drive_type = inb_cmos(0x10);
+
+ if (drive == 0)
+ drive_type >>= 4;
+ else
+ drive_type &= 0x0f;
+ if (drive_type == 0) {
+ SET_AH(0x80); // drive not responding
+ set_diskette_ret_status(0x80);
+ SET_CF();
+ return;
+ }
+ SET_AH(0);
+ set_diskette_ret_status(0);
+ CLEAR_CF(); // successful
+ set_diskette_current_cyl(drive, 0); // current cylinder
+ return;
+
+ case 0x01: // Read Diskette Status
+ CLEAR_CF();
+ val8 = read_byte(0x0000, 0x0441);
+ SET_AH(val8);
+ if (val8) {
+ SET_CF();
+ }
+ return;
+
+ case 0x02: // Read Diskette Sectors
+ case 0x03: // Write Diskette Sectors
+ case 0x04: // Verify Diskette Sectors
+ num_sectors = GET_AL();
+ track = GET_CH();
+ sector = GET_CL();
+ head = GET_DH();
+ drive = GET_ELDL();
+
+ if ( (drive > 1) || (head > 1) ||
+ (num_sectors == 0) || (num_sectors > 72) ) {
+BX_INFO("floppy: drive>1 || head>1 ...\n");
+ SET_AH(1);
+ set_diskette_ret_status(1);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ // see if drive exists
+ if (floppy_drive_exists(drive) == 0) {
+ SET_AH(0x80); // not responding
+ set_diskette_ret_status(0x80);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ // see if media in drive, and type is known
+ if (floppy_media_known(drive) == 0) {
+ if (floppy_media_sense(drive) == 0) {
+ SET_AH(0x0C); // Media type not found
+ set_diskette_ret_status(0x0C);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+ }
+
+ if (ah == 0x02) {
+ // Read Diskette Sectors
+
+ //-----------------------------------
+ // set up DMA controller for transfer
+ //-----------------------------------
+
+ // es:bx = pointer to where to place information from diskette
+ // port 04: DMA-1 base and current address, channel 2
+ // port 05: DMA-1 base and current count, channel 2
+ page = (ES >> 12); // upper 4 bits
+ base_es = (ES << 4); // lower 16bits contributed by ES
+ base_address = base_es + BX; // lower 16 bits of address
+ // contributed by ES:BX
+ if ( base_address < base_es ) {
+ // in case of carry, adjust page by 1
+ page++;
+ }
+ base_count = (num_sectors * 512) - 1;
+
+ // check for 64K boundary overrun
+ last_addr = base_address + base_count;
+ if (last_addr < base_address) {
+ SET_AH(0x09);
+ set_diskette_ret_status(0x09);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
+ outb(0x000a, 0x06);
+
+ BX_DEBUG_INT13_FL("clear flip-flop\n");
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0004, base_address);
+ outb(0x0004, base_address>>8);
+ BX_DEBUG_INT13_FL("clear flip-flop\n");
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0005, base_count);
+ outb(0x0005, base_count>>8);
+
+ // port 0b: DMA-1 Mode Register
+ mode_register = 0x46; // single mode, increment, autoinit disable,
+ // transfer type=write, channel 2
+ BX_DEBUG_INT13_FL("setting mode register\n");
+ outb(0x000b, mode_register);
+
+ BX_DEBUG_INT13_FL("setting page register\n");
+ // port 81: DMA-1 Page Register, channel 2
+ outb(0x0081, page);
+
+ BX_DEBUG_INT13_FL("unmask chan 2\n");
+ outb(0x000a, 0x02); // unmask channel 2
+
+ BX_DEBUG_INT13_FL("unmasking DMA-1 c2\n");
+ outb(0x000a, 0x02);
+
+ //--------------------------------------
+ // set up floppy controller for transfer
+ //--------------------------------------
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+
+ // turn on motor of selected drive, DMA & int enabled, normal operation
+ if (drive)
+ dor = 0x20;
+ else
+ dor = 0x10;
+ dor |= 0x0c;
+ dor |= drive;
+ outb(0x03f2, dor);
+
+ // reset the disk motor timeout value of INT 08
+ write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+ // check port 3f4 for drive readiness
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xf0) != 0x80 )
+ BX_PANIC("int13_diskette:f02: ctrl not ready\n");
+
+ // send read-normal-data command (9 bytes) to controller
+ outb(0x03f5, 0xe6); // e6: read normal data
+ outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
+ outb(0x03f5, track);
+ outb(0x03f5, head);
+ outb(0x03f5, sector);
+ outb(0x03f5, 2); // 512 byte sector size
+ outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, 0); // Gap length
+ outb(0x03f5, 0xff); // Gap length
+
+ // turn on interrupts
+ ASM_START
+ sti
+ ASM_END
+
+ // wait on 40:3e bit 7 to become 1
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ while ( val8 == 0 ) {
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ }
+
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
+ ASM_START
+ cli
+ ASM_END
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+
+ // check port 3f4 for accessibility to status bytes
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xc0) != 0xc0 )
+ BX_PANIC("int13_diskette: ctrl not ready\n");
+
+ // read 7 return status bytes from controller
+ // using loop index broken, have to unroll...
+ return_status[0] = inb(0x3f5);
+ return_status[1] = inb(0x3f5);
+ return_status[2] = inb(0x3f5);
+ return_status[3] = inb(0x3f5);
+ return_status[4] = inb(0x3f5);
+ return_status[5] = inb(0x3f5);
+ return_status[6] = inb(0x3f5);
+ // record in BIOS Data Area
+ write_byte(0x0040, 0x0042, return_status[0]);
+ write_byte(0x0040, 0x0043, return_status[1]);
+ write_byte(0x0040, 0x0044, return_status[2]);
+ write_byte(0x0040, 0x0045, return_status[3]);
+ write_byte(0x0040, 0x0046, return_status[4]);
+ write_byte(0x0040, 0x0047, return_status[5]);
+ write_byte(0x0040, 0x0048, return_status[6]);
+
+ if ( (return_status[0] & 0xc0) != 0 ) {
+ SET_AH(0x20);
+ set_diskette_ret_status(0x20);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ // ??? should track be new val from return_status[3] ?
+ set_diskette_current_cyl(drive, track);
+ // AL = number of sectors read (same value as passed)
+ SET_AH(0x00); // success
+ CLEAR_CF(); // success
+ return;
+ }
+ else if (ah == 0x03) {
+ // Write Diskette Sectors
+
+ //-----------------------------------
+ // set up DMA controller for transfer
+ //-----------------------------------
+
+ // es:bx = pointer to where to place information from diskette
+ // port 04: DMA-1 base and current address, channel 2
+ // port 05: DMA-1 base and current count, channel 2
+ page = (ES >> 12); // upper 4 bits
+ base_es = (ES << 4); // lower 16bits contributed by ES
+ base_address = base_es + BX; // lower 16 bits of address
+ // contributed by ES:BX
+ if ( base_address < base_es ) {
+ // in case of carry, adjust page by 1
+ page++;
+ }
+ base_count = (num_sectors * 512) - 1;
+
+ // check for 64K boundary overrun
+ last_addr = base_address + base_count;
+ if (last_addr < base_address) {
+ SET_AH(0x09);
+ set_diskette_ret_status(0x09);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
+ outb(0x000a, 0x06);
+
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0004, base_address);
+ outb(0x0004, base_address>>8);
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0005, base_count);
+ outb(0x0005, base_count>>8);
+
+ // port 0b: DMA-1 Mode Register
+ mode_register = 0x4a; // single mode, increment, autoinit disable,
+ // transfer type=read, channel 2
+ outb(0x000b, mode_register);
+
+ // port 81: DMA-1 Page Register, channel 2
+ outb(0x0081, page);
+
+ BX_DEBUG_INT13_FL("unmasking DMA-1 c2\n");
+ outb(0x000a, 0x02);
+
+ //--------------------------------------
+ // set up floppy controller for transfer
+ //--------------------------------------
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+
+ // turn on motor of selected drive, DMA & int enabled, normal operation
+ if (drive)
+ dor = 0x20;
+ else
+ dor = 0x10;
+ dor |= 0x0c;
+ dor |= drive;
+ outb(0x03f2, dor);
+
+ // reset the disk motor timeout value of INT 08
+ write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+ // check port 3f4 for drive readiness
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xf0) != 0x80 )
+ BX_PANIC("int13_diskette:f03: ctrl not ready\n");
+
+ // send read-normal-data command (9 bytes) to controller
+ outb(0x03f5, 0xc5); // c5: write normal data
+ outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
+ outb(0x03f5, track);
+ outb(0x03f5, head);
+ outb(0x03f5, sector);
+ outb(0x03f5, 2); // 512 byte sector size
+ outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, 0); // Gap length
+ outb(0x03f5, 0xff); // Gap length
+
+ // turn on interrupts
+ ASM_START
+ sti
+ ASM_END
+
+ // wait on 40:3e bit 7 to become 1
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ while ( val8 == 0 ) {
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ }
+
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
+ ASM_START
+ cli
+ ASM_END
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+
+ // check port 3f4 for accessibility to status bytes
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xc0) != 0xc0 )
+ BX_PANIC("int13_diskette: ctrl not ready\n");
+
+ // read 7 return status bytes from controller
+ // using loop index broken, have to unroll...
+ return_status[0] = inb(0x3f5);
+ return_status[1] = inb(0x3f5);
+ return_status[2] = inb(0x3f5);
+ return_status[3] = inb(0x3f5);
+ return_status[4] = inb(0x3f5);
+ return_status[5] = inb(0x3f5);
+ return_status[6] = inb(0x3f5);
+ // record in BIOS Data Area
+ write_byte(0x0040, 0x0042, return_status[0]);
+ write_byte(0x0040, 0x0043, return_status[1]);
+ write_byte(0x0040, 0x0044, return_status[2]);
+ write_byte(0x0040, 0x0045, return_status[3]);
+ write_byte(0x0040, 0x0046, return_status[4]);
+ write_byte(0x0040, 0x0047, return_status[5]);
+ write_byte(0x0040, 0x0048, return_status[6]);
+
+ if ( (return_status[0] & 0xc0) != 0 ) {
+ if ( (return_status[1] & 0x02) != 0 ) {
+ // diskette not writable.
+ // AH=status code=0x03 (tried to write on write-protected disk)
+ // AL=number of sectors written=0
+ AX = 0x0300;
+ SET_CF();
+ return;
+ } else {
+ BX_PANIC("int13_diskette_function: read error\n");
+ }
+ }
+
+ // ??? should track be new val from return_status[3] ?
+ set_diskette_current_cyl(drive, track);
+ // AL = number of sectors read (same value as passed)
+ SET_AH(0x00); // success
+ CLEAR_CF(); // success
+ return;
+ }
+ else { // if (ah == 0x04)
+ // Verify Diskette Sectors
+
+ // ??? should track be new val from return_status[3] ?
+ set_diskette_current_cyl(drive, track);
+ // AL = number of sectors verified (same value as passed)
+ CLEAR_CF(); // success
+ SET_AH(0x00); // success
+ return;
+ }
+
+
+ case 0x05: // format diskette track
+BX_DEBUG_INT13_FL("floppy f05\n");
+
+ num_sectors = GET_AL();
+ track = GET_CH();
+ head = GET_DH();
+ drive = GET_ELDL();
+
+ if ((drive > 1) || (head > 1) || (track > 79) ||
+ (num_sectors == 0) || (num_sectors > 18)) {
+ SET_AH(1);
+ set_diskette_ret_status(1);
+ SET_CF(); // error occurred
+ }
+
+ // see if drive exists
+ if (floppy_drive_exists(drive) == 0) {
+ SET_AH(0x80); // drive not responding
+ set_diskette_ret_status(0x80);
+ SET_CF(); // error occurred
+ return;
+ }
+
+ // see if media in drive, and type is known
+ if (floppy_media_known(drive) == 0) {
+ if (floppy_media_sense(drive) == 0) {
+ SET_AH(0x0C); // Media type not found
+ set_diskette_ret_status(0x0C);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+ }
+
+ // set up DMA controller for transfer
+ page = (ES >> 12); // upper 4 bits
+ base_es = (ES << 4); // lower 16bits contributed by ES
+ base_address = base_es + BX; // lower 16 bits of address
+ // contributed by ES:BX
+ if ( base_address < base_es ) {
+ // in case of carry, adjust page by 1
+ page++;
+ }
+ base_count = (num_sectors * 4) - 1;
+
+ // check for 64K boundary overrun
+ last_addr = base_address + base_count;
+ if (last_addr < base_address) {
+ SET_AH(0x09);
+ set_diskette_ret_status(0x09);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
+ }
+
+ outb(0x000a, 0x06);
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0004, base_address);
+ outb(0x0004, base_address>>8);
+ outb(0x000c, 0x00); // clear flip-flop
+ outb(0x0005, base_count);
+ outb(0x0005, base_count>>8);
+ mode_register = 0x4a; // single mode, increment, autoinit disable,
+ // transfer type=read, channel 2
+ outb(0x000b, mode_register);
+ // port 81: DMA-1 Page Register, channel 2
+ outb(0x0081, page);
+ outb(0x000a, 0x02);
+
+ // set up floppy controller for transfer
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+ // turn on motor of selected drive, DMA & int enabled, normal operation
+ if (drive)
+ dor = 0x20;
+ else
+ dor = 0x10;
+ dor |= 0x0c;
+ dor |= drive;
+ outb(0x03f2, dor);
+
+ // reset the disk motor timeout value of INT 08
+ write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+ // check port 3f4 for drive readiness
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xf0) != 0x80 )
+ BX_PANIC("int13_diskette:f05: ctrl not ready\n");
+
+ // send read-normal-data command (6 bytes) to controller
+ outb(0x03f5, 0x4d); // 4d: format track
+ outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
+ outb(0x03f5, 2); // 512 byte sector size
+ outb(0x03f5, num_sectors); // number of sectors per track
+ outb(0x03f5, 0); // Gap length
+ outb(0x03f5, 0xf6); // Fill byte
+ // turn on interrupts
+ ASM_START
+ sti
+ ASM_END
+ // wait on 40:3e bit 7 to become 1
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ while ( val8 == 0 ) {
+ val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ }
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
+ ASM_START
+ cli
+ ASM_END
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0000, 0x043e);
+ val8 &= 0x7f;
+ write_byte(0x0000, 0x043e, val8);
+ // check port 3f4 for accessibility to status bytes
+ val8 = inb(0x3f4);
+ if ( (val8 & 0xc0) != 0xc0 )
+ BX_PANIC("int13_diskette: ctrl not ready\n");
+
+ // read 7 return status bytes from controller
+ // using loop index broken, have to unroll...
+ return_status[0] = inb(0x3f5);
+ return_status[1] = inb(0x3f5);
+ return_status[2] = inb(0x3f5);
+ return_status[3] = inb(0x3f5);
+ return_status[4] = inb(0x3f5);
+ return_status[5] = inb(0x3f5);
+ return_status[6] = inb(0x3f5);
+ // record in BIOS Data Area
+ write_byte(0x0040, 0x0042, return_status[0]);
+ write_byte(0x0040, 0x0043, return_status[1]);
+ write_byte(0x0040, 0x0044, return_status[2]);
+ write_byte(0x0040, 0x0045, return_status[3]);
+ write_byte(0x0040, 0x0046, return_status[4]);
+ write_byte(0x0040, 0x0047, return_status[5]);
+ write_byte(0x0040, 0x0048, return_status[6]);
+
+ if ( (return_status[0] & 0xc0) != 0 ) {
+ if ( (return_status[1] & 0x02) != 0 ) {
+ // diskette not writable.
+ // AH=status code=0x03 (tried to write on write-protected disk)
+ // AL=number of sectors written=0
+ AX = 0x0300;
+ SET_CF();
+ return;
+ } else {
+ BX_PANIC("int13_diskette_function: write error\n");
+ }
+ }
+
+ SET_AH(0);
+ set_diskette_ret_status(0);
+ set_diskette_current_cyl(drive, 0);
+ CLEAR_CF(); // successful
+ return;
+
+
+ case 0x08: // read diskette drive parameters
+BX_DEBUG_INT13_FL("floppy f08\n");
+ drive = GET_ELDL();
+
+ if (drive > 1) {
+ AX = 0;
+ BX = 0;
+ CX = 0;
+ DX = 0;
+ ES = 0;
+ DI = 0;
+ SET_DL(num_floppies);
+ SET_CF();
+ return;
+ }
+
+ drive_type = inb_cmos(0x10);
+ num_floppies = 0;
+ if (drive_type & 0xf0)
+ num_floppies++;
+ if (drive_type & 0x0f)
+ num_floppies++;
+
+ if (drive == 0)
+ drive_type >>= 4;
+ else
+ drive_type &= 0x0f;
+
+ SET_BH(0);
+ SET_BL(drive_type);
+ SET_AH(0);
+ SET_AL(0);
+ SET_DL(num_floppies);
+
+ switch (drive_type) {
+ case 0: // none
+ CX = 0;
+ SET_DH(0); // max head #
+ break;
+
+ case 1: // 360KB, 5.25"
+ CX = 0x2709; // 40 tracks, 9 sectors
+ SET_DH(1); // max head #
+ break;
+
+ case 2: // 1.2MB, 5.25"
+ CX = 0x4f0f; // 80 tracks, 15 sectors
+ SET_DH(1); // max head #
+ break;
+
+ case 3: // 720KB, 3.5"
+ CX = 0x4f09; // 80 tracks, 9 sectors
+ SET_DH(1); // max head #
+ break;
+
+ case 4: // 1.44MB, 3.5"
+ CX = 0x4f12; // 80 tracks, 18 sectors
+ SET_DH(1); // max head #
+ break;
+
+ case 5: // 2.88MB, 3.5"
+ CX = 0x4f24; // 80 tracks, 36 sectors
+ SET_DH(1); // max head #
+ break;
+
+ case 6: // 160k, 5.25"
+ CX = 0x2708; // 40 tracks, 8 sectors
+ SET_DH(0); // max head #
+ break;
+
+ case 7: // 180k, 5.25"
+ CX = 0x2709; // 40 tracks, 9 sectors
+ SET_DH(0); // max head #
+ break;
+
+ case 8: // 320k, 5.25"
+ CX = 0x2708; // 40 tracks, 8 sectors
+ SET_DH(1); // max head #
+ break;
+
+ default: // ?
+ BX_PANIC("floppy: int13: bad floppy type\n");
+ }
+
+ /* set es & di to point to 11 byte diskette param table in ROM */
+ASM_START
+ push bp
+ mov bp, sp
+ mov ax, #diskette_param_table2
+ mov _int13_diskette_function.DI+2[bp], ax
+ mov _int13_diskette_function.ES+2[bp], cs
+ pop bp
+ASM_END
+ CLEAR_CF(); // success
+ /* disk status not changed upon success */
+ return;
+
+
+ case 0x15: // read diskette drive type
+BX_DEBUG_INT13_FL("floppy f15\n");
+ drive = GET_ELDL();
+ if (drive > 1) {
+ SET_AH(0); // only 2 drives supported
+ // set_diskette_ret_status here ???
+ SET_CF();
+ return;
+ }
+ drive_type = inb_cmos(0x10);
+
+ if (drive == 0)
+ drive_type >>= 4;
+ else
+ drive_type &= 0x0f;
+ CLEAR_CF(); // successful, not present
+ if (drive_type==0) {
+ SET_AH(0); // drive not present
+ }
+ else {
+ SET_AH(1); // drive present, does not support change line
+ }
+
+ return;
+
+ case 0x16: // get diskette change line status
+BX_DEBUG_INT13_FL("floppy f16\n");
+ drive = GET_ELDL();
+ if (drive > 1) {
+ SET_AH(0x01); // invalid drive
+ set_diskette_ret_status(0x01);
+ SET_CF();
+ return;
+ }
+
+ SET_AH(0x06); // change line not supported
+ set_diskette_ret_status(0x06);
+ SET_CF();
+ return;
+
+ case 0x17: // set diskette type for format(old)
+BX_DEBUG_INT13_FL("floppy f17\n");
+ /* not used for 1.44M floppies */
+ SET_AH(0x01); // not supported
+ set_diskette_ret_status(1); /* not supported */
+ SET_CF();
+ return;
+
+ case 0x18: // set diskette type for format(new)
+BX_DEBUG_INT13_FL("floppy f18\n");
+ SET_AH(0x01); // do later
+ set_diskette_ret_status(1);
+ SET_CF();
+ return;
+
+ default:
+ BX_INFO("int13_diskette: unsupported AH=%02x\n", GET_AH());
+
+ // if ( (ah==0x20) || ((ah>=0x41) && (ah<=0x49)) || (ah==0x4e) ) {
+ SET_AH(0x01); // ???
+ set_diskette_ret_status(1);
+ SET_CF();
+ return;
+ // }
+ }
+}
+#else // #if BX_SUPPORT_FLOPPY
+ void
+int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+{
+ Bit8u val8;
+
+ switch ( GET_AH() ) {
+
+ case 0x01: // Read Diskette Status
+ CLEAR_CF();
+ val8 = read_byte(0x0000, 0x0441);
+ SET_AH(val8);
+ if (val8) {
+ SET_CF();
+ }
+ return;
+
+ default:
+ SET_CF();
+ write_byte(0x0000, 0x0441, 0x01);
+ SET_AH(0x01);
+ }
+}
+#endif // #if BX_SUPPORT_FLOPPY
+
+ void
+set_diskette_ret_status(value)
+ Bit8u value;
+{
+ write_byte(0x0040, 0x0041, value);
+}
+
+ void
+set_diskette_current_cyl(drive, cyl)
+ Bit8u drive;
+ Bit8u cyl;
+{
+ if (drive > 1)
+ BX_PANIC("set_diskette_current_cyl(): drive > 1\n");
+ write_byte(0x0040, 0x0094+drive, cyl);
+}
+
+ void
+determine_floppy_media(drive)
+ Bit16u drive;
+{
+#if 0
+ Bit8u val8, DOR, ctrl_info;
+
+ ctrl_info = read_byte(0x0040, 0x008F);
+ if (drive==1)
+ ctrl_info >>= 4;
+ else
+ ctrl_info &= 0x0f;
+
+#if 0
+ if (drive == 0) {
+ DOR = 0x1c; // DOR: drive0 motor on, DMA&int enabled, normal op, drive select 0
+ }
+ else {
+ DOR = 0x2d; // DOR: drive1 motor on, DMA&int enabled, normal op, drive select 1
+ }
+#endif
+
+ if ( (ctrl_info & 0x04) != 0x04 ) {
+ // Drive not determined means no drive exists, done.
+ return;
+ }
+
+#if 0
+ // check Main Status Register for readiness
+ val8 = inb(0x03f4) & 0x80; // Main Status Register
+ if (val8 != 0x80)
+ BX_PANIC("d_f_m: MRQ bit not set\n");
+
+ // change line
+
+ // existing BDA values
+
+ // turn on drive motor
+ outb(0x03f2, DOR); // Digital Output Register
+ //
+#endif
+ BX_PANIC("d_f_m: OK so far\n");
+#endif
+}
+
+ void
+int17_function(regs, ds, iret_addr)
+ pusha_regs_t regs; // regs pushed from PUSHA instruction
+ Bit16u ds; // previous DS:, DS set to 0x0000 by asm wrapper
+ iret_addr_t iret_addr; // CS,IP,Flags pushed from original INT call
+{
+ Bit16u addr,timeout;
+ Bit8u val8;
+
+ ASM_START
+ sti
+ ASM_END
+
+ addr = read_word(0x0040, (regs.u.r16.dx << 1) + 8);
+ if ((regs.u.r8.ah < 3) && (regs.u.r16.dx < 3) && (addr > 0)) {
+ timeout = read_byte(0x0040, 0x0078 + regs.u.r16.dx) << 8;
+ if (regs.u.r8.ah == 0) {
+ outb(addr, regs.u.r8.al);
+ val8 = inb(addr+2);
+ outb(addr+2, val8 | 0x01); // send strobe
+ ASM_START
+ nop
+ ASM_END
+ outb(addr+2, val8 & ~0x01);
+ while (((inb(addr+1) & 0x40) == 0x40) && (timeout)) {
+ timeout--;
+ }
+ }
+ if (regs.u.r8.ah == 1) {
+ val8 = inb(addr+2);
+ outb(addr+2, val8 & ~0x04); // send init
+ ASM_START
+ nop
+ ASM_END
+ outb(addr+2, val8 | 0x04);
+ }
+ val8 = inb(addr+1);
+ regs.u.r8.ah = (val8 ^ 0x48);
+ if (!timeout) regs.u.r8.ah |= 0x01;
+ ClearCF(iret_addr.flags);
+ } else {
+ SetCF(iret_addr.flags); // Unsupported
+ }
+}
+
+// returns bootsegment in ax, drive in bl
+ Bit32u
+int19_function(bseqnr)
+Bit8u bseqnr;
+{
+ Bit16u ebda_seg=read_word(0x0040,0x000E);
+ Bit16u bootseq;
+ Bit8u bootdrv;
+ Bit8u bootcd;
+ Bit8u bootchk;
+ Bit16u bootseg;
+ Bit16u status;
+ Bit8u lastdrive=0;
+
+ // if BX_ELTORITO_BOOT is not defined, old behavior
+ // check bit 5 in CMOS reg 0x2d. load either 0x00 or 0x80 into DL
+ // in preparation for the intial INT 13h (0=floppy A:, 0x80=C:)
+ // 0: system boot sequence, first drive C: then A:
+ // 1: system boot sequence, first drive A: then C:
+ // else BX_ELTORITO_BOOT is defined
+ // CMOS regs 0x3D and 0x38 contain the boot sequence:
+ // CMOS reg 0x3D & 0x0f : 1st boot device
+ // CMOS reg 0x3D & 0xf0 : 2nd boot device
+ // CMOS reg 0x38 & 0xf0 : 3rd boot device
+ // boot device codes:
+ // 0x00 : not defined
+ // 0x01 : first floppy
+ // 0x02 : first harddrive
+ // 0x03 : first cdrom
+ // else : boot failure
+
+ // Get the boot sequence
+#if BX_ELTORITO_BOOT
+ bootseq=inb_cmos(0x3d);
+ bootseq|=((inb_cmos(0x38) & 0xf0) << 4);
+
+ if (bseqnr==2) bootseq >>= 4;
+ if (bseqnr==3) bootseq >>= 8;
+ if (bootseq<0x10) lastdrive = 1;
+ bootdrv=0x00; bootcd=0;
+ switch(bootseq & 0x0f) {
+ case 0x01: bootdrv=0x00; bootcd=0; break;
+ case 0x02: bootdrv=0x80; bootcd=0; break;
+ case 0x03: bootdrv=0x00; bootcd=1; break;
+ default: return 0x00000000;
+ }
+#else
+ bootseq=inb_cmos(0x2d);
+
+ if (bseqnr==2) {
+ bootseq ^= 0x20;
+ lastdrive = 1;
+ }
+ bootdrv=0x00; bootcd=0;
+ if((bootseq&0x20)==0) bootdrv=0x80;
+#endif // BX_ELTORITO_BOOT
+
+#if BX_ELTORITO_BOOT
+ // We have to boot from cd
+ if (bootcd != 0) {
+ status = cdrom_boot();
+
+ // If failure
+ if ( (status & 0x00ff) !=0 ) {
+ print_cdromboot_failure(status);
+ print_boot_failure(bootcd, bootdrv, 1, lastdrive);
+ return 0x00000000;
+ }
+
+ bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment);
+ bootdrv = (Bit8u)(status>>8);
+ }
+
+#endif // BX_ELTORITO_BOOT
+
+ // We have to boot from harddisk or floppy
+ if (bootcd == 0) {
+ bootseg=0x07c0;
+
+ASM_START
+ push bp
+ mov bp, sp
+
+ mov ax, #0x0000
+ mov _int19_function.status + 2[bp], ax
+ mov dl, _int19_function.bootdrv + 2[bp]
+ mov ax, _int19_function.bootseg + 2[bp]
+ mov es, ax ;; segment
+ mov bx, #0x0000 ;; offset
+ mov ah, #0x02 ;; function 2, read diskette sector
+ mov al, #0x01 ;; read 1 sector
+ mov ch, #0x00 ;; track 0
+ mov cl, #0x01 ;; sector 1
+ mov dh, #0x00 ;; head 0
+ int #0x13 ;; read sector
+ jnc int19_load_done
+ mov ax, #0x0001
+ mov _int19_function.status + 2[bp], ax
+
+int19_load_done:
+ pop bp
+ASM_END
+
+ if (status != 0) {
+ print_boot_failure(bootcd, bootdrv, 1, lastdrive);
+ return 0x00000000;
+ }
+ }
+
+ // check signature if instructed by cmos reg 0x38, only for floppy
+ // bootchk = 1 : signature check disabled
+ // bootchk = 0 : signature check enabled
+ if (bootdrv != 0) bootchk = 0;
+ else bootchk = inb_cmos(0x38) & 0x01;
+
+#if BX_ELTORITO_BOOT
+ // if boot from cd, no signature check
+ if (bootcd != 0)
+ bootchk = 1;
+#endif // BX_ELTORITO_BOOT
+
+ if (bootchk == 0) {
+ if (read_word(bootseg,0x1fe) != 0xaa55) {
+ print_boot_failure(bootcd, bootdrv, 0, lastdrive);
+ return 0x00000000;
+ }
+ }
+
+#if BX_ELTORITO_BOOT
+ // Print out the boot string
+ print_boot_device(bootcd, bootdrv);
+#else // BX_ELTORITO_BOOT
+ print_boot_device(0, bootdrv);
+#endif // BX_ELTORITO_BOOT
+
+ // return the boot segment
+ return (((Bit32u)bootdrv) << 16) + bootseg;
+}
+
+ void
+int1a_function(regs, ds, iret_addr)
+ pusha_regs_t regs; // regs pushed from PUSHA instruction
+ Bit16u ds; // previous DS:, DS set to 0x0000 by asm wrapper
+ iret_addr_t iret_addr; // CS,IP,Flags pushed from original INT call
+{
+ Bit8u val8;
+
+ BX_DEBUG_INT1A("int1a: AX=%04x BX=%04x CX=%04x DX=%04x DS=%04x\n", regs.u.r16.ax, regs.u.r16.bx, regs.u.r16.cx, regs.u.r16.dx, ds);
+
+ ASM_START
+ sti
+ ASM_END
+
+ switch (regs.u.r8.ah) {
+ case 0: // get current clock count
+ ASM_START
+ cli
+ ASM_END
+ regs.u.r16.cx = BiosData->ticks_high;
+ regs.u.r16.dx = BiosData->ticks_low;
+ regs.u.r8.al = BiosData->midnight_flag;
+ BiosData->midnight_flag = 0; // reset flag
+ ASM_START
+ sti
+ ASM_END
+ // AH already 0
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 1: // Set Current Clock Count
+ ASM_START
+ cli
+ ASM_END
+ BiosData->ticks_high = regs.u.r16.cx;
+ BiosData->ticks_low = regs.u.r16.dx;
+ BiosData->midnight_flag = 0; // reset flag
+ ASM_START
+ sti
+ ASM_END
+ regs.u.r8.ah = 0;
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+
+ case 2: // Read CMOS Time
+ if (rtc_updating()) {
+ SetCF(iret_addr.flags);
+ break;
+ }
+
+ regs.u.r8.dh = inb_cmos(0x00); // Seconds
+ regs.u.r8.cl = inb_cmos(0x02); // Minutes
+ regs.u.r8.ch = inb_cmos(0x04); // Hours
+ regs.u.r8.dl = inb_cmos(0x0b) & 0x01; // Stat Reg B
+ regs.u.r8.ah = 0;
+ regs.u.r8.al = regs.u.r8.ch;
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 3: // Set CMOS Time
+ // Using a debugger, I notice the following masking/setting
+ // of bits in Status Register B, by setting Reg B to
+ // a few values and getting its value after INT 1A was called.
+ //
+ // try#1 try#2 try#3
+ // before 1111 1101 0111 1101 0000 0000
+ // after 0110 0010 0110 0010 0000 0010
+ //
+ // Bit4 in try#1 flipped in hardware (forced low) due to bit7=1
+ // My assumption: RegB = ((RegB & 01100000b) | 00000010b)
+ if (rtc_updating()) {
+ init_rtc();
+ // fall through as if an update were not in progress
+ }
+ outb_cmos(0x00, regs.u.r8.dh); // Seconds
+ outb_cmos(0x02, regs.u.r8.cl); // Minutes
+ outb_cmos(0x04, regs.u.r8.ch); // Hours
+ // Set Daylight Savings time enabled bit to requested value
+ val8 = (inb_cmos(0x0b) & 0x60) | 0x02 | (regs.u.r8.dl & 0x01);
+ // (reg B already selected)
+ outb_cmos(0x0b, val8);
+ regs.u.r8.ah = 0;
+ regs.u.r8.al = val8; // val last written to Reg B
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 4: // Read CMOS Date
+ regs.u.r8.ah = 0;
+ if (rtc_updating()) {
+ SetCF(iret_addr.flags);
+ break;
+ }
+ regs.u.r8.cl = inb_cmos(0x09); // Year
+ regs.u.r8.dh = inb_cmos(0x08); // Month
+ regs.u.r8.dl = inb_cmos(0x07); // Day of Month
+ regs.u.r8.ch = inb_cmos(0x32); // Century
+ regs.u.r8.al = regs.u.r8.ch;
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 5: // Set CMOS Date
+ // Using a debugger, I notice the following masking/setting
+ // of bits in Status Register B, by setting Reg B to
+ // a few values and getting its value after INT 1A was called.
+ //
+ // try#1 try#2 try#3 try#4
+ // before 1111 1101 0111 1101 0000 0010 0000 0000
+ // after 0110 1101 0111 1101 0000 0010 0000 0000
+ //
+ // Bit4 in try#1 flipped in hardware (forced low) due to bit7=1
+ // My assumption: RegB = (RegB & 01111111b)
+ if (rtc_updating()) {
+ init_rtc();
+ SetCF(iret_addr.flags);
+ break;
+ }
+ outb_cmos(0x09, regs.u.r8.cl); // Year
+ outb_cmos(0x08, regs.u.r8.dh); // Month
+ outb_cmos(0x07, regs.u.r8.dl); // Day of Month
+ outb_cmos(0x32, regs.u.r8.ch); // Century
+ val8 = inb_cmos(0x0b) & 0x7f; // clear halt-clock bit
+ outb_cmos(0x0b, val8);
+ regs.u.r8.ah = 0;
+ regs.u.r8.al = val8; // AL = val last written to Reg B
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 6: // Set Alarm Time in CMOS
+ // Using a debugger, I notice the following masking/setting
+ // of bits in Status Register B, by setting Reg B to
+ // a few values and getting its value after INT 1A was called.
+ //
+ // try#1 try#2 try#3
+ // before 1101 1111 0101 1111 0000 0000
+ // after 0110 1111 0111 1111 0010 0000
+ //
+ // Bit4 in try#1 flipped in hardware (forced low) due to bit7=1
+ // My assumption: RegB = ((RegB & 01111111b) | 00100000b)
+ val8 = inb_cmos(0x0b); // Get Status Reg B
+ regs.u.r16.ax = 0;
+ if (val8 & 0x20) {
+ // Alarm interrupt enabled already
+ SetCF(iret_addr.flags); // Error: alarm in use
+ break;
+ }
+ if (rtc_updating()) {
+ init_rtc();
+ // fall through as if an update were not in progress
+ }
+ outb_cmos(0x01, regs.u.r8.dh); // Seconds alarm
+ outb_cmos(0x03, regs.u.r8.cl); // Minutes alarm
+ outb_cmos(0x05, regs.u.r8.ch); // Hours alarm
+ outb(0xa1, inb(0xa1) & 0xfe); // enable IRQ 8
+ // enable Status Reg B alarm bit, clear halt clock bit
+ outb_cmos(0x0b, (val8 & 0x7f) | 0x20);
+ ClearCF(iret_addr.flags); // OK
+ break;
+
+ case 7: // Turn off Alarm
+ // Using a debugger, I notice the following masking/setting
+ // of bits in Status Register B, by setting Reg B to
+ // a few values and getting its value after INT 1A was called.
+ //
+ // try#1 try#2 try#3 try#4
+ // before 1111 1101 0111 1101 0010 0000 0010 0010
+ // after 0100 0101 0101 0101 0000 0000 0000 0010
+ //
+ // Bit4 in try#1 flipped in hardware (forced low) due to bit7=1
+ // My assumption: RegB = (RegB & 01010111b)
+ val8 = inb_cmos(0x0b); // Get Status Reg B
+ // clear clock-halt bit, disable alarm bit
+ outb_cmos(0x0b, val8 & 0x57); // disable alarm bit
+ regs.u.r8.ah = 0;
+ regs.u.r8.al = val8; // val last written to Reg B
+ ClearCF(iret_addr.flags); // OK
+ break;
+#if BX_PCIBIOS
+ case 0xb1:
+ // real mode PCI BIOS functions now handled in assembler code
+ // this C code handles the error code for information only
+ if (regs.u.r8.bl == 0xff) {
+ BX_INFO("PCI BIOS: PCI not present\n");
+ } else if (regs.u.r8.bl == 0x81) {
+ BX_INFO("unsupported PCI BIOS function 0x%02x\n", regs.u.r8.al);
+ } else if (regs.u.r8.bl == 0x83) {
+ BX_INFO("bad PCI vendor ID %04x\n", regs.u.r16.dx);
+ } else if (regs.u.r8.bl == 0x86) {
+ BX_INFO("PCI device %04x:%04x not found\n", regs.u.r16.dx, regs.u.r16.cx);
+ }
+ regs.u.r8.ah = regs.u.r8.bl;
+ SetCF(iret_addr.flags);
+ break;
+#endif
+
+ default:
+ SetCF(iret_addr.flags); // Unsupported
+ }
+}
+
+ void
+int70_function(regs, ds, iret_addr)
+ pusha_regs_t regs; // regs pushed from PUSHA instruction
+ Bit16u ds; // previous DS:, DS set to 0x0000 by asm wrapper
+ iret_addr_t iret_addr; // CS,IP,Flags pushed from original INT call
+{
+ // INT 70h: IRQ 8 - CMOS RTC interrupt from periodic or alarm modes
+ Bit8u registerB = 0, registerC = 0;
+
+ // Check which modes are enabled and have occurred.
+ registerB = inb_cmos( 0xB );
+ registerC = inb_cmos( 0xC );
+
+ if( ( registerB & 0x60 ) != 0 ) {
+ if( ( registerC & 0x20 ) != 0 ) {
+ // Handle Alarm Interrupt.
+ASM_START
+ sti
+ int #0x4a
+ cli
+ASM_END
+ }
+ if( ( registerC & 0x40 ) != 0 ) {
+ // Handle Periodic Interrupt.
+
+ if( read_byte( 0x40, 0xA0 ) != 0 ) {
+ // Wait Interval (Int 15, AH=83) active.
+ Bit32u time, toggle;
+
+ time = read_dword( 0x40, 0x9C ); // Time left in microseconds.
+ if( time < 0x3D1 ) {
+ // Done waiting.
+ Bit16u segment, offset;
+
+ offset = read_word( 0x40, 0x98 );
+ segment = read_word( 0x40, 0x9A );
+ write_byte( 0x40, 0xA0, 0 ); // Turn of status byte.
+ outb_cmos( 0xB, registerB & 0x37 ); // Clear the Periodic Interrupt.
+ write_byte( segment, offset, 0x80 ); // Write to specified flag byte.
+ } else {
+ // Continue waiting.
+ time -= 0x3D1;
+ write_dword( 0x40, 0x9C, time );
+ }
+ }
+ }
+ }
+
+ASM_START
+ call eoi_both_pics
+ASM_END
+}
+
+
+ASM_START
+;------------------------------------------
+;- INT74h : PS/2 mouse hardware interrupt -
+;------------------------------------------
+int74_handler:
+ sti
+ pusha
+ push ds ;; save DS
+ push #0x00 ;; placeholder for status
+ push #0x00 ;; placeholder for X
+ push #0x00 ;; placeholder for Y
+ push #0x00 ;; placeholder for Z
+ push #0x00 ;; placeholder for make_far_call boolean
+ call _int74_function
+ pop cx ;; remove make_far_call from stack
+ jcxz int74_done
+
+ ;; make far call to EBDA:0022
+ push #0x00
+ pop ds
+ push 0x040E ;; push 0000:040E (opcodes 0xff, 0x36, 0x0E, 0x04)
+ pop ds
+ //CALL_EP(0x0022) ;; call far routine (call_Ep DS:0022 :opcodes 0xff, 0x1e, 0x22, 0x00)
+ call far ptr[0x22]
+int74_done:
+ cli
+ call eoi_both_pics
+ add sp, #8 ;; pop status, x, y, z
+
+ pop ds ;; restore DS
+ popa
+ iret
+
+
+;; This will perform an IRET, but will retain value of current CF
+;; by altering flags on stack. Better than RETF #02.
+iret_modify_cf:
+ jc carry_set
+ push bp
+ mov bp, sp
+ and BYTE [bp + 0x06], #0xfe
+ pop bp
+ iret
+carry_set:
+ push bp
+ mov bp, sp
+ or BYTE [bp + 0x06], #0x01
+ pop bp
+ iret
+
+
+;----------------------
+;- INT13h (relocated) -
+;----------------------
+;
+; int13_relocated is a little bit messed up since I played with it
+; I have to rewrite it:
+; - call a function that detect which function to call
+; - make all called C function get the same parameters list
+;
+int13_relocated:
+
+#if BX_ELTORITO_BOOT
+ ;; check for an eltorito function
+ cmp ah,#0x4a
+ jb int13_not_eltorito
+ cmp ah,#0x4d
+ ja int13_not_eltorito
+
+ pusha
+ push es
+ push ds
+ push ss
+ pop ds
+
+ push #int13_out
+ jmp _int13_eltorito ;; ELDX not used
+
+int13_not_eltorito:
+ push ax
+ push bx
+ push cx
+ push dx
+
+ ;; check if emulation active
+ call _cdemu_isactive
+ cmp al,#0x00
+ je int13_cdemu_inactive
+
+ ;; check if access to the emulated drive
+ call _cdemu_emulated_drive
+ pop dx
+ push dx
+ cmp al,dl ;; int13 on emulated drive
+ jne int13_nocdemu
+
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+
+ pusha
+ push es
+ push ds
+ push ss
+ pop ds
+
+ push #int13_out
+ jmp _int13_cdemu ;; ELDX not used
+
+int13_nocdemu:
+ and dl,#0xE0 ;; mask to get device class, including cdroms
+ cmp al,dl ;; al is 0x00 or 0x80
+ jne int13_cdemu_inactive ;; inactive for device class
+
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+
+ push ax
+ push cx
+ push dx
+ push bx
+
+ dec dl ;; real drive is dl - 1
+ jmp int13_legacy
+
+int13_cdemu_inactive:
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+
+#endif // BX_ELTORITO_BOOT
+
+int13_noeltorito:
+
+ push ax
+ push cx
+ push dx
+ push bx
+
+int13_legacy:
+
+ push dx ;; push eltorito value of dx instead of sp
+
+ push bp
+ push si
+ push di
+
+ push es
+ push ds
+ push ss
+ pop ds
+
+ ;; now the 16-bit registers can be restored with:
+ ;; pop ds; pop es; popa; iret
+ ;; arguments passed to functions should be
+ ;; DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS
+
+ test dl, #0x80
+ jnz int13_notfloppy
+
+ push #int13_out
+ jmp _int13_diskette_function
+
+int13_notfloppy:
+
+#if BX_USE_ATADRV
+
+ cmp dl, #0xE0
+ jb int13_notcdrom
+
+ // ebx is modified: BSD 5.2.1 boot loader problem
+ // someone should figure out which 32 bit register that actually are used
+
+ shr ebx, #16
+ push bx
+
+ call _int13_cdrom
+
+ pop bx
+ shl ebx, #16
+
+ jmp int13_out
+
+int13_notcdrom:
+
+#endif
+
+int13_disk:
+ call _int13_harddisk
+
+int13_out:
+ pop ds
+ pop es
+ popa
+ iret
+
+
+;----------
+;- INT18h -
+;----------
+int18_handler: ;; Boot Failure routing
+ call _int18_panic_msg
+ hlt
+ iret
+
+;----------
+;- INT19h -
+;----------
+int19_relocated: ;; Boot function, relocated
+
+ ;; int19 was beginning to be really complex, so now it
+ ;; just calls an C function, that does the work
+ ;; it returns in BL the boot drive, and in AX the boot segment
+ ;; the boot segment will be 0x0000 if something has failed
+
+ push bp
+ mov bp, sp
+
+ ;; drop ds
+ xor ax, ax
+ mov ds, ax
+
+ ;; 1st boot device
+ mov ax, #0x0001
+ push ax
+ call _int19_function
+ inc sp
+ inc sp
+ ;; bl contains the boot drive
+ ;; ax contains the boot segment or 0 if failure
+
+ test ax, ax ;; if ax is 0 try next boot device
+ jnz boot_setup
+
+ ;; 2nd boot device
+ mov ax, #0x0002
+ push ax
+ call _int19_function
+ inc sp
+ inc sp
+ test ax, ax ;; if ax is 0 try next boot device
+ jnz boot_setup
+
+ ;; 3rd boot device
+ mov ax, #0x0003
+ push ax
+ call _int19_function
+ inc sp
+ inc sp
+ test ax, ax ;; if ax is 0 call int18
+ jz int18_handler
+
+boot_setup:
+ mov dl, bl ;; set drive so guest os find it
+ shl eax, #0x04 ;; convert seg to ip
+ mov 2[bp], ax ;; set ip
+
+ shr eax, #0x04 ;; get cs back
+ and ax, #0xF000 ;; remove what went in ip
+ mov 4[bp], ax ;; set cs
+ xor ax, ax
+ mov es, ax ;; set es to zero fixes [ 549815 ]
+ mov [bp], ax ;; set bp to zero
+ mov ax, #0xaa55 ;; set ok flag
+
+ pop bp
+ iret ;; Beam me up Scotty
+
+;----------
+;- INT1Ch -
+;----------
+int1c_handler: ;; User Timer Tick
+ iret
+
+
+;----------------------
+;- POST: Floppy Drive -
+;----------------------
+floppy_drive_post:
+ mov ax, #0x0000
+ mov ds, ax
+
+ mov al, #0x00
+ mov 0x043e, al ;; drive 0 & 1 uncalibrated, no interrupt has occurred
+
+ mov 0x043f, al ;; diskette motor status: read op, drive0, motors off
+
+ mov 0x0440, al ;; diskette motor timeout counter: not active
+ mov 0x0441, al ;; diskette controller status return code
+
+ mov 0x0442, al ;; disk & diskette controller status register 0
+ mov 0x0443, al ;; diskette controller status register 1
+ mov 0x0444, al ;; diskette controller status register 2
+ mov 0x0445, al ;; diskette controller cylinder number
+ mov 0x0446, al ;; diskette controller head number
+ mov 0x0447, al ;; diskette controller sector number
+ mov 0x0448, al ;; diskette controller bytes written
+
+ mov 0x048b, al ;; diskette configuration data
+
+ ;; -----------------------------------------------------------------
+ ;; (048F) diskette controller information
+ ;;
+ mov al, #0x10 ;; get CMOS diskette drive type
+ out 0x70, AL
+ in AL, 0x71
+ mov ah, al ;; save byte to AH
+
+look_drive0:
+ shr al, #4 ;; look at top 4 bits for drive 0
+ jz f0_missing ;; jump if no drive0
+ mov bl, #0x07 ;; drive0 determined, multi-rate, has changed line
+ jmp look_drive1
+f0_missing:
+ mov bl, #0x00 ;; no drive0
+
+look_drive1:
+ mov al, ah ;; restore from AH
+ and al, #0x0f ;; look at bottom 4 bits for drive 1
+ jz f1_missing ;; jump if no drive1
+ or bl, #0x70 ;; drive1 determined, multi-rate, has changed line
+f1_missing:
+ ;; leave high bits in BL zerod
+ mov 0x048f, bl ;; put new val in BDA (diskette controller information)
+ ;; -----------------------------------------------------------------
+
+ mov al, #0x00
+ mov 0x0490, al ;; diskette 0 media state
+ mov 0x0491, al ;; diskette 1 media state
+
+ ;; diskette 0,1 operational starting state
+ ;; drive type has not been determined,
+ ;; has no changed detection line
+ mov 0x0492, al
+ mov 0x0493, al
+
+ mov 0x0494, al ;; diskette 0 current cylinder
+ mov 0x0495, al ;; diskette 1 current cylinder
+
+ mov al, #0x02
+ out #0x0a, al ;; clear DMA-1 channel 2 mask bit
+
+ SET_INT_VECTOR(0x1E, #0xF000, #diskette_param_table2)
+ SET_INT_VECTOR(0x40, #0xF000, #int13_diskette)
+ SET_INT_VECTOR(0x0E, #0xF000, #int0e_handler) ;; IRQ 6
+
+ ret
+
+
+;--------------------
+;- POST: HARD DRIVE -
+;--------------------
+; relocated here because the primary POST area isnt big enough.
+hard_drive_post:
+ // IRQ 14 = INT 76h
+ // INT 76h calls INT 15h function ax=9100
+
+ mov al, #0x0a ; 0000 1010 = reserved, disable IRQ 14
+ mov dx, #0x03f6
+ out dx, al
+
+ mov ax, #0x0000
+ mov ds, ax
+ mov 0x0474, al /* hard disk status of last operation */
+ mov 0x0477, al /* hard disk port offset (XT only ???) */
+ mov 0x048c, al /* hard disk status register */
+ mov 0x048d, al /* hard disk error register */
+ mov 0x048e, al /* hard disk task complete flag */
+ mov al, #0x01
+ mov 0x0475, al /* hard disk number attached */
+ mov al, #0xc0
+ mov 0x0476, al /* hard disk control byte */
+ SET_INT_VECTOR(0x13, #0xF000, #int13_handler)
+ SET_INT_VECTOR(0x76, #0xF000, #int76_handler)
+ ;; INT 41h: hard disk 0 configuration pointer
+ ;; INT 46h: hard disk 1 configuration pointer
+ SET_INT_VECTOR(0x41, #EBDA_SEG, #0x003D)
+ SET_INT_VECTOR(0x46, #EBDA_SEG, #0x004D)
+
+ ;; move disk geometry data from CMOS to EBDA disk parameter table(s)
+ mov al, #0x12
+ out #0x70, al
+ in al, #0x71
+ and al, #0xf0
+ cmp al, #0xf0
+ je post_d0_extended
+ jmp check_for_hd1
+post_d0_extended:
+ mov al, #0x19
+ out #0x70, al
+ in al, #0x71
+ cmp al, #47 ;; decimal 47 - user definable
+ je post_d0_type47
+ HALT(__LINE__)
+post_d0_type47:
+ ;; CMOS purpose param table offset
+ ;; 1b cylinders low 0
+ ;; 1c cylinders high 1
+ ;; 1d heads 2
+ ;; 1e write pre-comp low 5
+ ;; 1f write pre-comp high 6
+ ;; 20 retries/bad map/heads>8 8
+ ;; 21 landing zone low C
+ ;; 22 landing zone high D
+ ;; 23 sectors/track E
+
+ mov ax, #EBDA_SEG
+ mov ds, ax
+
+ ;;; Filling EBDA table for hard disk 0.
+ mov al, #0x1f
+ out #0x70, al
+ in al, #0x71
+ mov ah, al
+ mov al, #0x1e
+ out #0x70, al
+ in al, #0x71
+ mov (0x003d + 0x05), ax ;; write precomp word
+
+ mov al, #0x20
+ out #0x70, al
+ in al, #0x71
+ mov (0x003d + 0x08), al ;; drive control byte
+
+ mov al, #0x22
+ out #0x70, al
+ in al, #0x71
+ mov ah, al
+ mov al, #0x21
+ out #0x70, al
+ in al, #0x71
+ mov (0x003d + 0x0C), ax ;; landing zone word
+
+ mov al, #0x1c ;; get cylinders word in AX
+ out #0x70, al
+ in al, #0x71 ;; high byte
+ mov ah, al
+ mov al, #0x1b
+ out #0x70, al
+ in al, #0x71 ;; low byte
+ mov bx, ax ;; BX = cylinders
+
+ mov al, #0x1d
+ out #0x70, al
+ in al, #0x71
+ mov cl, al ;; CL = heads
+
+ mov al, #0x23
+ out #0x70, al
+ in al, #0x71
+ mov dl, al ;; DL = sectors
+
+ cmp bx, #1024
+ jnbe hd0_post_logical_chs ;; if cylinders > 1024, use translated style CHS
+
+hd0_post_physical_chs:
+ ;; no logical CHS mapping used, just physical CHS
+ ;; use Standard Fixed Disk Parameter Table (FDPT)
+ mov (0x003d + 0x00), bx ;; number of physical cylinders
+ mov (0x003d + 0x02), cl ;; number of physical heads
+ mov (0x003d + 0x0E), dl ;; number of physical sectors
+ jmp check_for_hd1
+
+hd0_post_logical_chs:
+ ;; complies with Phoenix style Translated Fixed Disk Parameter Table (FDPT)
+ mov (0x003d + 0x09), bx ;; number of physical cylinders
+ mov (0x003d + 0x0b), cl ;; number of physical heads
+ mov (0x003d + 0x04), dl ;; number of physical sectors
+ mov (0x003d + 0x0e), dl ;; number of logical sectors (same)
+ mov al, #0xa0
+ mov (0x003d + 0x03), al ;; A0h signature, indicates translated table
+
+ cmp bx, #2048
+ jnbe hd0_post_above_2048
+ ;; 1024 < c <= 2048 cylinders
+ shr bx, #0x01
+ shl cl, #0x01
+ jmp hd0_post_store_logical
+
+hd0_post_above_2048:
+ cmp bx, #4096
+ jnbe hd0_post_above_4096
+ ;; 2048 < c <= 4096 cylinders
+ shr bx, #0x02
+ shl cl, #0x02
+ jmp hd0_post_store_logical
+
+hd0_post_above_4096:
+ cmp bx, #8192
+ jnbe hd0_post_above_8192
+ ;; 4096 < c <= 8192 cylinders
+ shr bx, #0x03
+ shl cl, #0x03
+ jmp hd0_post_store_logical
+
+hd0_post_above_8192:
+ ;; 8192 < c <= 16384 cylinders
+ shr bx, #0x04
+ shl cl, #0x04
+
+hd0_post_store_logical:
+ mov (0x003d + 0x00), bx ;; number of physical cylinders
+ mov (0x003d + 0x02), cl ;; number of physical heads
+ ;; checksum
+ mov cl, #0x0f ;; repeat count
+ mov si, #0x003d ;; offset to disk0 FDPT
+ mov al, #0x00 ;; sum
+hd0_post_checksum_loop:
+ add al, [si]
+ inc si
+ dec cl
+ jnz hd0_post_checksum_loop
+ not al ;; now take 2s complement
+ inc al
+ mov [si], al
+;;; Done filling EBDA table for hard disk 0.
+
+
+check_for_hd1:
+ ;; is there really a second hard disk? if not, return now
+ mov al, #0x12
+ out #0x70, al
+ in al, #0x71
+ and al, #0x0f
+ jnz post_d1_exists
+ ret
+post_d1_exists:
+ ;; check that the hd type is really 0x0f.
+ cmp al, #0x0f
+ jz post_d1_extended
+ HALT(__LINE__)
+post_d1_extended:
+ ;; check that the extended type is 47 - user definable
+ mov al, #0x1a
+ out #0x70, al
+ in al, #0x71
+ cmp al, #47 ;; decimal 47 - user definable
+ je post_d1_type47
+ HALT(__LINE__)
+post_d1_type47:
+ ;; Table for disk1.
+ ;; CMOS purpose param table offset
+ ;; 0x24 cylinders low 0
+ ;; 0x25 cylinders high 1
+ ;; 0x26 heads 2
+ ;; 0x27 write pre-comp low 5
+ ;; 0x28 write pre-comp high 6
+ ;; 0x29 heads>8 8
+ ;; 0x2a landing zone low C
+ ;; 0x2b landing zone high D
+ ;; 0x2c sectors/track E
+;;; Fill EBDA table for hard disk 1.
+ mov ax, #EBDA_SEG
+ mov ds, ax
+ mov al, #0x28
+ out #0x70, al
+ in al, #0x71
+ mov ah, al
+ mov al, #0x27
+ out #0x70, al
+ in al, #0x71
+ mov (0x004d + 0x05), ax ;; write precomp word
+
+ mov al, #0x29
+ out #0x70, al
+ in al, #0x71
+ mov (0x004d + 0x08), al ;; drive control byte
+
+ mov al, #0x2b
+ out #0x70, al
+ in al, #0x71
+ mov ah, al
+ mov al, #0x2a
+ out #0x70, al
+ in al, #0x71
+ mov (0x004d + 0x0C), ax ;; landing zone word
+
+ mov al, #0x25 ;; get cylinders word in AX
+ out #0x70, al
+ in al, #0x71 ;; high byte
+ mov ah, al
+ mov al, #0x24
+ out #0x70, al
+ in al, #0x71 ;; low byte
+ mov bx, ax ;; BX = cylinders
+
+ mov al, #0x26
+ out #0x70, al
+ in al, #0x71
+ mov cl, al ;; CL = heads
+
+ mov al, #0x2c
+ out #0x70, al
+ in al, #0x71
+ mov dl, al ;; DL = sectors
+
+ cmp bx, #1024
+ jnbe hd1_post_logical_chs ;; if cylinders > 1024, use translated style CHS
+
+hd1_post_physical_chs:
+ ;; no logical CHS mapping used, just physical CHS
+ ;; use Standard Fixed Disk Parameter Table (FDPT)
+ mov (0x004d + 0x00), bx ;; number of physical cylinders
+ mov (0x004d + 0x02), cl ;; number of physical heads
+ mov (0x004d + 0x0E), dl ;; number of physical sectors
+ ret
+
+hd1_post_logical_chs:
+ ;; complies with Phoenix style Translated Fixed Disk Parameter Table (FDPT)
+ mov (0x004d + 0x09), bx ;; number of physical cylinders
+ mov (0x004d + 0x0b), cl ;; number of physical heads
+ mov (0x004d + 0x04), dl ;; number of physical sectors
+ mov (0x004d + 0x0e), dl ;; number of logical sectors (same)
+ mov al, #0xa0
+ mov (0x004d + 0x03), al ;; A0h signature, indicates translated table
+
+ cmp bx, #2048
+ jnbe hd1_post_above_2048
+ ;; 1024 < c <= 2048 cylinders
+ shr bx, #0x01
+ shl cl, #0x01
+ jmp hd1_post_store_logical
+
+hd1_post_above_2048:
+ cmp bx, #4096
+ jnbe hd1_post_above_4096
+ ;; 2048 < c <= 4096 cylinders
+ shr bx, #0x02
+ shl cl, #0x02
+ jmp hd1_post_store_logical
+
+hd1_post_above_4096:
+ cmp bx, #8192
+ jnbe hd1_post_above_8192
+ ;; 4096 < c <= 8192 cylinders
+ shr bx, #0x03
+ shl cl, #0x03
+ jmp hd1_post_store_logical
+
+hd1_post_above_8192:
+ ;; 8192 < c <= 16384 cylinders
+ shr bx, #0x04
+ shl cl, #0x04
+
+hd1_post_store_logical:
+ mov (0x004d + 0x00), bx ;; number of physical cylinders
+ mov (0x004d + 0x02), cl ;; number of physical heads
+ ;; checksum
+ mov cl, #0x0f ;; repeat count
+ mov si, #0x004d ;; offset to disk0 FDPT
+ mov al, #0x00 ;; sum
+hd1_post_checksum_loop:
+ add al, [si]
+ inc si
+ dec cl
+ jnz hd1_post_checksum_loop
+ not al ;; now take 2s complement
+ inc al
+ mov [si], al
+;;; Done filling EBDA table for hard disk 1.
+
+ ret
+
+;--------------------
+;- POST: EBDA segment
+;--------------------
+; relocated here because the primary POST area isnt big enough.
+ebda_post:
+#if BX_USE_EBDA
+ mov ax, #EBDA_SEG
+ mov ds, ax
+ mov byte ptr [0x0], #EBDA_SIZE
+#endif
+ xor ax, ax ; mov EBDA seg into 40E
+ mov ds, ax
+ mov word ptr [0x40E], #EBDA_SEG
+ ret;;
+
+;--------------------
+;- POST: EOI + jmp via [0x40:67)
+;--------------------
+; relocated here because the primary POST area isnt big enough.
+eoi_jmp_post:
+ call eoi_both_pics
+
+ xor ax, ax
+ mov ds, ax
+
+ jmp far ptr [0x467]
+
+
+;--------------------
+eoi_both_pics:
+ mov al, #0x20
+ out #0xA0, al ;; slave PIC EOI
+eoi_master_pic:
+ mov al, #0x20
+ out #0x20, al ;; master PIC EOI
+ ret
+
+;--------------------
+BcdToBin:
+ ;; in: AL in BCD format
+ ;; out: AL in binary format, AH will always be 0
+ ;; trashes BX
+ mov bl, al
+ and bl, #0x0f ;; bl has low digit
+ shr al, #4 ;; al has high digit
+ mov bh, #10
+ mul al, bh ;; multiply high digit by 10 (result in AX)
+ add al, bl ;; then add low digit
+ ret
+
+;--------------------
+timer_tick_post:
+ ;; Setup the Timer Ticks Count (0x46C:dword) and
+ ;; Timer Ticks Roller Flag (0x470:byte)
+ ;; The Timer Ticks Count needs to be set according to
+ ;; the current CMOS time, as if ticks have been occurring
+ ;; at 18.2hz since midnight up to this point. Calculating
+ ;; this is a little complicated. Here are the factors I gather
+ ;; regarding this. 14,318,180 hz was the original clock speed,
+ ;; chosen so it could be divided by either 3 to drive the 5Mhz CPU
+ ;; at the time, or 4 to drive the CGA video adapter. The div3
+ ;; source was divided again by 4 to feed a 1.193Mhz signal to
+ ;; the timer. With a maximum 16bit timer count, this is again
+ ;; divided down by 65536 to 18.2hz.
+ ;;
+ ;; 14,318,180 Hz clock
+ ;; /3 = 4,772,726 Hz fed to orginal 5Mhz CPU
+ ;; /4 = 1,193,181 Hz fed to timer
+ ;; /65536 (maximum timer count) = 18.20650736 ticks/second
+ ;; 1 second = 18.20650736 ticks
+ ;; 1 minute = 1092.390442 ticks
+ ;; 1 hour = 65543.42651 ticks
+ ;;
+ ;; Given the values in the CMOS clock, one could calculate
+ ;; the number of ticks by the following:
+ ;; ticks = (BcdToBin(seconds) * 18.206507) +
+ ;; (BcdToBin(minutes) * 1092.3904)
+ ;; (BcdToBin(hours) * 65543.427)
+ ;; To get a little more accuracy, since Im using integer
+ ;; arithmatic, I use:
+ ;; ticks = (BcdToBin(seconds) * 18206507) / 1000000 +
+ ;; (BcdToBin(minutes) * 10923904) / 10000 +
+ ;; (BcdToBin(hours) * 65543427) / 1000
+
+ ;; assuming DS=0000
+
+ ;; get CMOS seconds
+ xor eax, eax ;; clear EAX
+ mov al, #0x00
+ out #0x70, al
+ in al, #0x71 ;; AL has CMOS seconds in BCD
+ call BcdToBin ;; EAX now has seconds in binary
+ mov edx, #18206507
+ mul eax, edx
+ mov ebx, #1000000
+ xor edx, edx
+ div eax, ebx
+ mov ecx, eax ;; ECX will accumulate total ticks
+
+ ;; get CMOS minutes
+ xor eax, eax ;; clear EAX
+ mov al, #0x02
+ out #0x70, al
+ in al, #0x71 ;; AL has CMOS minutes in BCD
+ call BcdToBin ;; EAX now has minutes in binary
+ mov edx, #10923904
+ mul eax, edx
+ mov ebx, #10000
+ xor edx, edx
+ div eax, ebx
+ add ecx, eax ;; add to total ticks
+
+ ;; get CMOS hours
+ xor eax, eax ;; clear EAX
+ mov al, #0x04
+ out #0x70, al
+ in al, #0x71 ;; AL has CMOS hours in BCD
+ call BcdToBin ;; EAX now has hours in binary
+ mov edx, #65543427
+ mul eax, edx
+ mov ebx, #1000
+ xor edx, edx
+ div eax, ebx
+ add ecx, eax ;; add to total ticks
+
+ mov 0x46C, ecx ;; Timer Ticks Count
+ xor al, al
+ mov 0x470, al ;; Timer Ticks Rollover Flag
+ ret
+
+;--------------------
+int76_handler:
+ ;; record completion in BIOS task complete flag
+ push ax
+ push ds
+ mov ax, #0x0040
+ mov ds, ax
+ mov 0x008E, #0xff
+ call eoi_both_pics
+ pop ds
+ pop ax
+ iret
+
+
+;--------------------
+#if BX_APM
+
+use32 386
+#define APM_PROT32
+#include "apmbios.S"
+
+use16 386
+#define APM_PROT16
+#include "apmbios.S"
+
+#define APM_REAL
+#include "apmbios.S"
+
+#endif
+
+;--------------------
+#if BX_PCIBIOS
+use32 386
+.align 16
+bios32_structure:
+ db 0x5f, 0x33, 0x32, 0x5f ;; "_32_" signature
+ dw bios32_entry_point, 0xf ;; 32 bit physical address
+ db 0 ;; revision level
+ ;; length in paragraphs and checksum stored in a word to prevent errors
+ dw (~(((bios32_entry_point >> 8) + (bios32_entry_point & 0xff) + 0x32) \
+ & 0xff) << 8) + 0x01
+ db 0,0,0,0,0 ;; reserved
+
+.align 16
+bios32_entry_point:
+ pushf
+ cmp eax, #0x49435024
+ jne unknown_service
+ mov eax, #0x80000000
+ mov dx, #0x0cf8
+ out dx, eax
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, #0x12378086
+ jne unknown_service
+ mov ebx, #0x000f0000
+ mov ecx, #0
+ mov edx, #pcibios_protected
+ xor al, al
+ jmp bios32_end
+unknown_service:
+ mov al, #0x80
+bios32_end:
+ popf
+ retf
+
+.align 16
+pcibios_protected:
+ pushf
+ cli
+ push esi
+ push edi
+ cmp al, #0x01 ;; installation check
+ jne pci_pro_f02
+ mov bx, #0x0210
+ mov cx, #0
+ mov edx, #0x20494350
+ mov al, #0x01
+ jmp pci_pro_ok
+pci_pro_f02: ;; find pci device
+ cmp al, #0x02
+ jne pci_pro_f08
+ shl ecx, #16
+ mov cx, dx
+ mov bx, #0x0000
+ mov di, #0x00
+pci_pro_devloop:
+ call pci_pro_select_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, ecx
+ jne pci_pro_nextdev
+ cmp si, #0
+ je pci_pro_ok
+ dec si
+pci_pro_nextdev:
+ inc bx
+ cmp bx, #0x0100
+ jne pci_pro_devloop
+ mov ah, #0x86
+ jmp pci_pro_fail
+pci_pro_f08: ;; read configuration byte
+ cmp al, #0x08
+ jne pci_pro_f09
+ call pci_pro_select_reg
+ push edx
+ mov dx, di
+ and dx, #0x03
+ add dx, #0x0cfc
+ in al, dx
+ pop edx
+ mov cl, al
+ jmp pci_pro_ok
+pci_pro_f09: ;; read configuration word
+ cmp al, #0x09
+ jne pci_pro_f0a
+ call pci_pro_select_reg
+ push edx
+ mov dx, di
+ and dx, #0x02
+ add dx, #0x0cfc
+ in ax, dx
+ pop edx
+ mov cx, ax
+ jmp pci_pro_ok
+pci_pro_f0a: ;; read configuration dword
+ cmp al, #0x0a
+ jne pci_pro_f0b
+ call pci_pro_select_reg
+ push edx
+ mov dx, #0x0cfc
+ in eax, dx
+ pop edx
+ mov ecx, eax
+ jmp pci_pro_ok
+pci_pro_f0b: ;; write configuration byte
+ cmp al, #0x0b
+ jne pci_pro_f0c
+ call pci_pro_select_reg
+ push edx
+ mov dx, di
+ and dx, #0x03
+ add dx, #0x0cfc
+ mov al, cl
+ out dx, al
+ pop edx
+ jmp pci_pro_ok
+pci_pro_f0c: ;; write configuration word
+ cmp al, #0x0c
+ jne pci_pro_f0d
+ call pci_pro_select_reg
+ push edx
+ mov dx, di
+ and dx, #0x02
+ add dx, #0x0cfc
+ mov ax, cx
+ out dx, ax
+ pop edx
+ jmp pci_pro_ok
+pci_pro_f0d: ;; write configuration dword
+ cmp al, #0x0d
+ jne pci_pro_unknown
+ call pci_pro_select_reg
+ push edx
+ mov dx, #0x0cfc
+ mov eax, ecx
+ out dx, eax
+ pop edx
+ jmp pci_pro_ok
+pci_pro_unknown:
+ mov ah, #0x81
+pci_pro_fail:
+ pop edi
+ pop esi
+ sti
+ popf
+ stc
+ retf
+pci_pro_ok:
+ xor ah, ah
+ pop edi
+ pop esi
+ sti
+ popf
+ clc
+ retf
+
+pci_pro_select_reg:
+ push edx
+ mov eax, #0x800000
+ mov ax, bx
+ shl eax, #8
+ and di, #0xff
+ or ax, di
+ and al, #0xfc
+ mov dx, #0x0cf8
+ out dx, eax
+ pop edx
+ ret
+
+use16 386
+
+pcibios_real:
+ push eax
+ push dx
+ mov eax, #0x80000000
+ mov dx, #0x0cf8
+ out dx, eax
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, #0x12378086
+ je pci_present
+ pop dx
+ pop eax
+ mov ah, #0xff
+ stc
+ ret
+pci_present:
+ pop dx
+ pop eax
+ cmp al, #0x01 ;; installation check
+ jne pci_real_f02
+ mov ax, #0x0001
+ mov bx, #0x0210
+ mov cx, #0
+ mov edx, #0x20494350
+ mov edi, #0xf0000
+ mov di, #pcibios_protected
+ clc
+ ret
+pci_real_f02: ;; find pci device
+ push esi
+ push edi
+ cmp al, #0x02
+ jne pci_real_f08
+ shl ecx, #16
+ mov cx, dx
+ mov bx, #0x0000
+ mov di, #0x00
+pci_real_devloop:
+ call pci_real_select_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, ecx
+ jne pci_real_nextdev
+ cmp si, #0
+ je pci_real_ok
+ dec si
+pci_real_nextdev:
+ inc bx
+ cmp bx, #0x0100
+ jne pci_real_devloop
+ mov dx, cx
+ shr ecx, #16
+ mov ah, #0x86
+ jmp pci_real_fail
+pci_real_f08: ;; read configuration byte
+ cmp al, #0x08
+ jne pci_real_f09
+ call pci_real_select_reg
+ push dx
+ mov dx, di
+ and dx, #0x03
+ add dx, #0x0cfc
+ in al, dx
+ pop dx
+ mov cl, al
+ jmp pci_real_ok
+pci_real_f09: ;; read configuration word
+ cmp al, #0x09
+ jne pci_real_f0a
+ call pci_real_select_reg
+ push dx
+ mov dx, di
+ and dx, #0x02
+ add dx, #0x0cfc
+ in ax, dx
+ pop dx
+ mov cx, ax
+ jmp pci_real_ok
+pci_real_f0a: ;; read configuration dword
+ cmp al, #0x0a
+ jne pci_real_f0b
+ call pci_real_select_reg
+ push dx
+ mov dx, #0x0cfc
+ in eax, dx
+ pop dx
+ mov ecx, eax
+ jmp pci_real_ok
+pci_real_f0b: ;; write configuration byte
+ cmp al, #0x0b
+ jne pci_real_f0c
+ call pci_real_select_reg
+ push dx
+ mov dx, di
+ and dx, #0x03
+ add dx, #0x0cfc
+ mov al, cl
+ out dx, al
+ pop dx
+ jmp pci_real_ok
+pci_real_f0c: ;; write configuration word
+ cmp al, #0x0c
+ jne pci_real_f0d
+ call pci_real_select_reg
+ push dx
+ mov dx, di
+ and dx, #0x02
+ add dx, #0x0cfc
+ mov ax, cx
+ out dx, ax
+ pop dx
+ jmp pci_real_ok
+pci_real_f0d: ;; write configuration dword
+ cmp al, #0x0d
+ jne pci_real_unknown
+ call pci_real_select_reg
+ push dx
+ mov dx, #0x0cfc
+ mov eax, ecx
+ out dx, eax
+ pop dx
+ jmp pci_real_ok
+pci_real_unknown:
+ mov ah, #0x81
+pci_real_fail:
+ pop edi
+ pop esi
+ stc
+ ret
+pci_real_ok:
+ xor ah, ah
+ pop edi
+ pop esi
+ clc
+ ret
+
+pci_real_select_reg:
+ push dx
+ mov eax, #0x800000
+ mov ax, bx
+ shl eax, #8
+ and di, #0xff
+ or ax, di
+ and al, #0xfc
+ mov dx, #0x0cf8
+ out dx, eax
+ pop dx
+ ret
+
+.align 16
+pci_routing_table_structure:
+ db 0x24, 0x50, 0x49, 0x52 ;; "$PIR" signature
+ db 0, 1 ;; version
+ dw 32 + (6 * 16) ;; table size
+ db 0 ;; PCI interrupt router bus
+ db 0x08 ;; PCI interrupt router DevFunc
+ dw 0x0000 ;; PCI exclusive IRQs
+ dw 0x8086 ;; compatible PCI interrupt router vendor ID
+ dw 0x7000 ;; compatible PCI interrupt router device ID
+ dw 0,0 ;; Miniport data
+ db 0,0,0,0,0,0,0,0,0,0,0 ;; reserved
+ db 0x07 ;; checksum
+ ;; first slot entry PCI-to-ISA (embedded)
+ db 0 ;; pci bus number
+ db 0x08 ;; pci device number (bit 7-3)
+ db 0x60 ;; link value INTA#: pointer into PCI2ISA config space
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x61 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x62 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x63 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 0 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+ ;; second slot entry: 1st PCI slot
+ db 0 ;; pci bus number
+ db 0x10 ;; pci device number (bit 7-3)
+ db 0x61 ;; link value INTA#
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x62 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x63 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x60 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 1 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+ ;; third slot entry: 2nd PCI slot
+ db 0 ;; pci bus number
+ db 0x18 ;; pci device number (bit 7-3)
+ db 0x62 ;; link value INTA#
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x63 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x60 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x61 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 2 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+ ;; 4th slot entry: 3rd PCI slot
+ db 0 ;; pci bus number
+ db 0x20 ;; pci device number (bit 7-3)
+ db 0x63 ;; link value INTA#
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x60 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x61 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x62 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 3 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+ ;; 5th slot entry: 4rd PCI slot
+ db 0 ;; pci bus number
+ db 0x28 ;; pci device number (bit 7-3)
+ db 0x60 ;; link value INTA#
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x61 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x62 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x63 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 4 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+ ;; 6th slot entry: 5rd PCI slot
+ db 0 ;; pci bus number
+ db 0x30 ;; pci device number (bit 7-3)
+ db 0x61 ;; link value INTA#
+ dw 0xdef8 ;; IRQ bitmap INTA#
+ db 0x62 ;; link value INTB#
+ dw 0xdef8 ;; IRQ bitmap INTB#
+ db 0x63 ;; link value INTC#
+ dw 0xdef8 ;; IRQ bitmap INTC#
+ db 0x60 ;; link value INTD#
+ dw 0xdef8 ;; IRQ bitmap INTD#
+ db 5 ;; physical slot (0 = embedded)
+ db 0 ;; reserved
+
+pci_irq_list:
+ db 11, 10, 9, 5;
+
+pcibios_init_sel_reg:
+ push eax
+ mov eax, #0x800000
+ mov ax, bx
+ shl eax, #8
+ and dl, #0xfc
+ or al, dl
+ mov dx, #0x0cf8
+ out dx, eax
+ pop eax
+ ret
+
+pcibios_init_set_elcr:
+ push ax
+ push cx
+ mov dx, #0x04d0
+ test al, #0x08
+ jz is_master_pic
+ inc dx
+ and al, #0x07
+is_master_pic:
+ mov cl, al
+ mov bl, #0x01
+ shl bl, cl
+ in al, dx
+ or al, bl
+ out dx, al
+ pop cx
+ pop ax
+ ret
+
+pcibios_init:
+ push ds
+ push bp
+ mov ax, #0xf000
+ mov ds, ax
+ mov dx, #0x04d0 ;; reset ELCR1 + ELCR2
+ mov al, #0x00
+ out dx, al
+ inc dx
+ out dx, al
+ mov si, #pci_routing_table_structure
+ mov bh, [si+8]
+ mov bl, [si+9]
+ mov dl, #0x00
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, [si+12] ;; check irq router
+ jne pci_init_end
+ mov dl, [si+34]
+ call pcibios_init_sel_reg
+ push bx ;; save irq router bus + devfunc
+ mov dx, #0x0cfc
+ mov ax, #0x8080
+ out dx, ax ;; reset PIRQ route control
+ inc dx
+ inc dx
+ out dx, ax
+ mov ax, [si+6]
+ sub ax, #0x20
+ shr ax, #4
+ mov cx, ax
+ add si, #0x20 ;; set pointer to 1st entry
+ mov bp, sp
+ mov ax, #pci_irq_list
+ push ax
+ xor ax, ax
+ push ax
+pci_init_loop1:
+ mov bh, [si]
+ mov bl, [si+1]
+pci_init_loop2:
+ mov dl, #0x00
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in ax, dx
+ cmp ax, #0xffff
+ jnz pci_test_int_pin
+ test bl, #0x07
+ jz next_pir_entry
+ jmp next_pci_func
+pci_test_int_pin:
+ mov dl, #0x3c
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfd
+ in al, dx
+ and al, #0x07
+ jz next_pci_func
+ dec al ;; determine pirq reg
+ mov dl, #0x03
+ mul al, dl
+ add al, #0x02
+ xor ah, ah
+ mov bx, ax
+ mov al, [si+bx]
+ mov dl, al
+ mov bx, [bp]
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ and al, #0x03
+ add dl, al
+ in al, dx
+ cmp al, #0x80
+ jb pirq_found
+ mov bx, [bp-2] ;; pci irq list pointer
+ mov al, [bx]
+ out dx, al
+ inc bx
+ mov [bp-2], bx
+ call pcibios_init_set_elcr
+pirq_found:
+ mov bh, [si]
+ mov bl, [si+1]
+ add bl, [bp-3] ;; pci function number
+ mov dl, #0x3c
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ out dx, al
+next_pci_func:
+ inc byte ptr[bp-3]
+ inc bl
+ test bl, #0x07
+ jnz pci_init_loop2
+next_pir_entry:
+ add si, #0x10
+ mov byte ptr[bp-3], #0x00
+ loop pci_init_loop1
+ mov sp, bp
+ pop bx
+pci_init_end:
+ pop bp
+ pop ds
+ ret
+#endif // BX_PCIBIOS
+
+; parallel port detection: base address in DX, index in BX, timeout in CL
+detect_parport:
+ push dx
+ add dx, #2
+ in al, dx
+ and al, #0xdf ; clear input mode
+ out dx, al
+ pop dx
+ mov al, #0xaa
+ out dx, al
+ in al, dx
+ cmp al, #0xaa
+ jne no_parport
+ push bx
+ shl bx, #1
+ mov [bx+0x408], dx ; Parallel I/O address
+ pop bx
+ mov [bx+0x478], cl ; Parallel printer timeout
+ inc bx
+no_parport:
+ ret
+
+; serial port detection: base address in DX, index in BX, timeout in CL
+detect_serial:
+ push dx
+ inc dx
+ mov al, #0x02
+ out dx, al
+ in al, dx
+ cmp al, #0x02
+ jne no_serial
+ inc dx
+ in al, dx
+ cmp al, #0x02
+ jne no_serial
+ dec dx
+ xor al, al
+ out dx, al
+ pop dx
+ push bx
+ shl bx, #1
+ mov [bx+0x400], dx ; Serial I/O address
+ pop bx
+ mov [bx+0x47c], cl ; Serial timeout
+ inc bx
+ ret
+no_serial:
+ pop dx
+ ret
+
+rom_checksum:
+ push ax
+ push bx
+ push cx
+ xor ax, ax
+ xor bx, bx
+ xor cx, cx
+ mov ch, [2]
+ shl cx, #1
+checksum_loop:
+ add al, [bx]
+ inc bx
+ loop checksum_loop
+ and al, #0xff
+ pop cx
+ pop bx
+ pop ax
+ ret
+
+rom_scan:
+ ;; Scan for existence of valid expansion ROMS.
+ ;; Video ROM: from 0xC0000..0xC7FFF in 2k increments
+ ;; General ROM: from 0xC8000..0xDFFFF in 2k increments
+ ;; System ROM: only 0xE0000
+ ;;
+ ;; Header:
+ ;; Offset Value
+ ;; 0 0x55
+ ;; 1 0xAA
+ ;; 2 ROM length in 512-byte blocks
+ ;; 3 ROM initialization entry point (FAR CALL)
+
+ mov cx, #0xc000
+rom_scan_loop:
+ mov ds, cx
+ mov ax, #0x0004 ;; start with increment of 4 (512-byte) blocks = 2k
+ cmp [0], #0xAA55 ;; look for signature
+ jne rom_scan_increment
+ call rom_checksum
+ jnz rom_scan_increment
+ mov al, [2] ;; change increment to ROM length in 512-byte blocks
+
+ ;; We want our increment in 512-byte quantities, rounded to
+ ;; the nearest 2k quantity, since we only scan at 2k intervals.
+ test al, #0x03
+ jz block_count_rounded
+ and al, #0xfc ;; needs rounding up
+ add al, #0x04
+block_count_rounded:
+
+ xor bx, bx ;; Restore DS back to 0000:
+ mov ds, bx
+ push ax ;; Save AX
+ ;; Push addr of ROM entry point
+ push cx ;; Push seg
+ push #0x0003 ;; Push offset
+ mov bp, sp ;; Call ROM init routine using seg:off on stack
+ db 0xff ;; call_far ss:[bp+0]
+ db 0x5e
+ db 0
+ cli ;; In case expansion ROM BIOS turns IF on
+ add sp, #2 ;; Pop offset value
+ pop cx ;; Pop seg value (restore CX)
+ pop ax ;; Restore AX
+rom_scan_increment:
+ shl ax, #5 ;; convert 512-bytes blocks to 16-byte increments
+ ;; because the segment selector is shifted left 4 bits.
+ add cx, ax
+ cmp cx, #0xe000
+ jbe rom_scan_loop
+
+ xor ax, ax ;; Restore DS back to 0000:
+ mov ds, ax
+ ret
+
+;; for 'C' strings and other data, insert them here with
+;; a the following hack:
+;; DATA_SEG_DEFS_HERE
+
+
+;--------
+;- POST -
+;--------
+.org 0xe05b ; POST Entry Point
+post:
+
+ xor ax, ax
+
+ ;; first reset the DMA controllers
+ out 0x0d,al
+ out 0xda,al
+
+ ;; then initialize the DMA controllers
+ mov al, #0xC0
+ out 0xD6, al ; cascade mode of channel 4 enabled
+ mov al, #0x00
+ out 0xD4, al ; unmask channel 4
+
+ ;; Examine CMOS shutdown status.
+ mov AL, #0x0f
+ out 0x70, AL
+ in AL, 0x71
+
+ ;; backup status
+ mov bl, al
+
+ ;; Reset CMOS shutdown status.
+ mov AL, #0x0f
+ out 0x70, AL ; select CMOS register Fh
+ mov AL, #0x00
+ out 0x71, AL ; set shutdown action to normal
+
+ ;; Examine CMOS shutdown status.
+ mov al, bl
+
+ ;; 0x00, 0x09, 0x0D+ = normal startup
+ cmp AL, #0x00
+ jz normal_post
+ cmp AL, #0x0d
+ jae normal_post
+ cmp AL, #0x09
+ je normal_post
+
+ ;; 0x05 = eoi + jmp via [0x40:0x67] jump
+ cmp al, #0x05
+ je eoi_jmp_post
+
+ ;; Examine CMOS shutdown status.
+ ;; 0x01,0x02,0x03,0x04,0x06,0x07,0x08, 0x0a, 0x0b, 0x0c = Unimplemented shutdown status.
+ push bx
+ call _shutdown_status_panic
+
+#if 0
+ HALT(__LINE__)
+ ;
+ ;#if 0
+ ; 0xb0, 0x20, /* mov al, #0x20 */
+ ; 0xe6, 0x20, /* out 0x20, al ;send EOI to PIC */
+ ;#endif
+ ;
+ pop es
+ pop ds
+ popa
+ iret
+#endif
+
+normal_post:
+ ; case 0: normal startup
+
+ cli
+ mov ax, #0xfffe
+ mov sp, ax
+ mov ax, #0x0000
+ mov ds, ax
+ mov ss, ax
+
+ ;; zero out BIOS data area (40:00..40:ff)
+ mov es, ax
+ mov cx, #0x0080 ;; 128 words
+ mov di, #0x0400
+ cld
+ rep
+ stosw
+
+ call _log_bios_start
+
+ ;; set all interrupts to default handler
+ mov bx, #0x0000 ;; offset index
+ mov cx, #0x0100 ;; counter (256 interrupts)
+ mov ax, #dummy_iret_handler
+ mov dx, #0xF000
+
+post_default_ints:
+ mov [bx], ax
+ inc bx
+ inc bx
+ mov [bx], dx
+ inc bx
+ inc bx
+ loop post_default_ints
+
+ ;; set vector 0x79 to zero
+ ;; this is used by 'gardian angel' protection system
+ SET_INT_VECTOR(0x79, #0, #0)
+
+ ;; base memory in K 40:13 (word)
+ mov ax, #BASE_MEM_IN_K
+ mov 0x0413, ax
+
+
+ ;; Manufacturing Test 40:12
+ ;; zerod out above
+
+ ;; Warm Boot Flag 0040:0072
+ ;; value of 1234h = skip memory checks
+ ;; zerod out above
+
+
+ ;; Printer Services vector
+ SET_INT_VECTOR(0x17, #0xF000, #int17_handler)
+
+ ;; Bootstrap failure vector
+ SET_INT_VECTOR(0x18, #0xF000, #int18_handler)
+
+ ;; Bootstrap Loader vector
+ SET_INT_VECTOR(0x19, #0xF000, #int19_handler)
+
+ ;; User Timer Tick vector
+ SET_INT_VECTOR(0x1c, #0xF000, #int1c_handler)
+
+ ;; Memory Size Check vector
+ SET_INT_VECTOR(0x12, #0xF000, #int12_handler)
+
+ ;; Equipment Configuration Check vector
+ SET_INT_VECTOR(0x11, #0xF000, #int11_handler)
+
+ ;; System Services
+ SET_INT_VECTOR(0x15, #0xF000, #int15_handler)
+
+ ;; EBDA setup
+ call ebda_post
+
+ ;; PIT setup
+ SET_INT_VECTOR(0x08, #0xF000, #int08_handler)
+ ;; int 1C already points at dummy_iret_handler (above)
+ mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
+ out 0x43, al
+#ifdef VMXASSIST
+ mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
+ out 0x40, al ; lsb
+ mov al, #0xe9
+ out 0x40, al ; msb
+#else
+ mov al, #0x00 ; maximum count of 0000H = 18.2Hz
+ out 0x40, al
+ out 0x40, al
+#endif
+
+ ;; Keyboard
+ SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
+ SET_INT_VECTOR(0x16, #0xF000, #int16_handler)
+
+ xor ax, ax
+ mov ds, ax
+ mov 0x0417, al /* keyboard shift flags, set 1 */
+ mov 0x0418, al /* keyboard shift flags, set 2 */
+ mov 0x0419, al /* keyboard alt-numpad work area */
+ mov 0x0471, al /* keyboard ctrl-break flag */
+ mov 0x0497, al /* keyboard status flags 4 */
+ mov al, #0x10
+ mov 0x0496, al /* keyboard status flags 3 */
+
+
+ /* keyboard head of buffer pointer */
+ mov bx, #0x001E
+ mov 0x041A, bx
+
+ /* keyboard end of buffer pointer */
+ mov 0x041C, bx
+
+ /* keyboard pointer to start of buffer */
+ mov bx, #0x001E
+ mov 0x0480, bx
+
+ /* keyboard pointer to end of buffer */
+ mov bx, #0x003E
+ mov 0x0482, bx
+
+ /* init the keyboard */
+ call _keyboard_init
+
+ ;; mov CMOS Equipment Byte to BDA Equipment Word
+ mov ax, 0x0410
+ mov al, #0x14
+ out 0x70, al
+ in al, 0x71
+ mov 0x0410, ax
+
+
+ ;; Parallel setup
+ SET_INT_VECTOR(0x0F, #0xF000, #dummy_iret_handler)
+ xor ax, ax
+ mov ds, ax
+ xor bx, bx
+ mov cl, #0x14 ; timeout value
+ mov dx, #0x378 ; Parallel I/O address, port 1
+ call detect_parport
+ mov dx, #0x278 ; Parallel I/O address, port 2
+ call detect_parport
+ shl bx, #0x0e
+ mov ax, 0x410 ; Equipment word bits 14..15 determing # parallel ports
+ and ax, #0x3fff
+ or ax, bx ; set number of parallel ports
+ mov 0x410, ax
+
+ ;; Serial setup
+ SET_INT_VECTOR(0x0C, #0xF000, #dummy_iret_handler)
+ SET_INT_VECTOR(0x14, #0xF000, #int14_handler)
+ xor bx, bx
+ mov cl, #0x0a ; timeout value
+ mov dx, #0x03f8 ; Serial I/O address, port 1
+ call detect_serial
+ mov dx, #0x02f8 ; Serial I/O address, port 2
+ call detect_serial
+ mov dx, #0x03e8 ; Serial I/O address, port 3
+ call detect_serial
+ mov dx, #0x02e8 ; Serial I/O address, port 4
+ call detect_serial
+ shl bx, #0x09
+ mov ax, 0x410 ; Equipment word bits 9..11 determing # serial ports
+ and ax, #0xf1ff
+ or ax, bx ; set number of serial port
+ mov 0x410, ax
+
+ ;; CMOS RTC
+ SET_INT_VECTOR(0x1A, #0xF000, #int1a_handler)
+ SET_INT_VECTOR(0x4A, #0xF000, #dummy_iret_handler)
+ SET_INT_VECTOR(0x70, #0xF000, #int70_handler)
+ ;; BIOS DATA AREA 0x4CE ???
+ call timer_tick_post
+
+ ;; PS/2 mouse setup
+ SET_INT_VECTOR(0x74, #0xF000, #int74_handler)
+
+ ;; IRQ13 (FPU exception) setup
+ SET_INT_VECTOR(0x75, #0xF000, #int75_handler)
+
+ ;; Video setup
+ SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
+
+ ;; PIC
+ mov al, #0x11 ; send initialisation commands
+ out 0x20, al
+ out 0xa0, al
+#ifdef VMXASSIST
+ ;; The vm86 emulator expects interrupts to be mapped beyond the reserved
+ ;; vectors (0 through 31). Since rombios fully controls the hardware, we
+ ;; map it the way the emulator needs it and expect that it will do the
+ ;; proper 8086 interrupt translation (that is, master pic base is at 0x8
+ ;; and slave pic base is at 0x70).
+ mov al, #0x20
+ out 0x21, al
+ mov al, #0x28
+ out 0xa1, al
+#else
+ mov al, #0x08
+ out 0x21, al
+ mov al, #0x70
+ out 0xa1, al
+#endif
+ mov al, #0x04
+ out 0x21, al
+ mov al, #0x02
+ out 0xa1, al
+ mov al, #0x01
+ out 0x21, al
+ out 0xa1, al
+ mov al, #0xb8
+ out 0x21, AL ;master pic: unmask IRQ 0, 1, 2, 6
+#if BX_USE_PS2_MOUSE
+ mov al, #0x8f
+#else
+ mov al, #0x9f
+#endif
+ out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14
+
+#ifdef VMXASSIST
+ call _copy_e820_table
+#endif
+
+ call pcibios_init
+
+ call rom_scan
+
+ call _print_bios_banner
+
+ ;;
+ ;; Floppy setup
+ ;;
+ call floppy_drive_post
+
+#if BX_USE_ATADRV
+
+ ;;
+ ;; Hard Drive setup
+ ;;
+ call hard_drive_post
+
+ ;;
+ ;; ATA/ATAPI driver setup
+ ;;
+ call _ata_init
+ call _ata_detect
+ ;;
+#else // BX_USE_ATADRV
+
+ ;;
+ ;; Hard Drive setup
+ ;;
+ call hard_drive_post
+
+#endif // BX_USE_ATADRV
+
+#if BX_ELTORITO_BOOT
+ ;;
+ ;; eltorito floppy/harddisk emulation from cd
+ ;;
+ call _cdemu_init
+ ;;
+#endif // BX_ELTORITO_BOOT
+
+ int #0x19
+ //JMP_EP(0x0064) ; INT 19h location
+
+
+.org 0xe2c3 ; NMI Handler Entry Point
+nmi:
+ ;; FIXME the NMI handler should not panic
+ ;; but iret when called from int75 (fpu exception)
+ call _nmi_handler_msg
+ iret
+
+int75_handler:
+ out 0xf0, al // clear irq13
+ call eoi_both_pics // clear interrupt
+ int 2 // legacy nmi call
+ iret
+
+;-------------------------------------------
+;- INT 13h Fixed Disk Services Entry Point -
+;-------------------------------------------
+.org 0xe3fe ; INT 13h Fixed Disk Services Entry Point
+int13_handler:
+ //JMPL(int13_relocated)
+ jmp int13_relocated
+
+.org 0xe401 ; Fixed Disk Parameter Table
+
+;----------
+;- INT19h -
+;----------
+.org 0xe6f2 ; INT 19h Boot Load Service Entry Point
+int19_handler:
+
+ jmp int19_relocated
+;-------------------------------------------
+;- System BIOS Configuration Data Table
+;-------------------------------------------
+.org BIOS_CONFIG_TABLE
+db 0x08 ; Table size (bytes) -Lo
+db 0x00 ; Table size (bytes) -Hi
+db SYS_MODEL_ID
+db SYS_SUBMODEL_ID
+db BIOS_REVISION
+; Feature byte 1
+; b7: 1=DMA channel 3 used by hard disk
+; b6: 1=2 interrupt controllers present
+; b5: 1=RTC present
+; b4: 1=BIOS calls int 15h/4Fh every key
+; b3: 1=wait for extern event supported (Int 15h/41h)
+; b2: 1=extended BIOS data area used
+; b1: 0=AT or ESDI bus, 1=MicroChannel
+; b0: 1=Dual bus (MicroChannel + ISA)
+db (0 << 7) | \
+ (1 << 6) | \
+ (1 << 5) | \
+ (BX_CALL_INT15_4F << 4) | \
+ (0 << 3) | \
+ (BX_USE_EBDA << 2) | \
+ (0 << 1) | \
+ (0 << 0)
+; Feature byte 2
+; b7: 1=32-bit DMA supported
+; b6: 1=int16h, function 9 supported
+; b5: 1=int15h/C6h (get POS data) supported
+; b4: 1=int15h/C7h (get mem map info) supported
+; b3: 1=int15h/C8h (en/dis CPU) supported
+; b2: 1=non-8042 kb controller
+; b1: 1=data streaming supported
+; b0: reserved
+db (0 << 7) | \
+ (1 << 6) | \
+ (0 << 5) | \
+ (0 << 4) | \
+ (0 << 3) | \
+ (0 << 2) | \
+ (0 << 1) | \
+ (0 << 0)
+; Feature byte 3
+; b7: not used
+; b6: reserved
+; b5: reserved
+; b4: POST supports ROM-to-RAM enable/disable
+; b3: SCSI on system board
+; b2: info panel installed
+; b1: Initial Machine Load (IML) system - BIOS on disk
+; b0: SCSI supported in IML
+db 0x00
+; Feature byte 4
+; b7: IBM private
+; b6: EEPROM present
+; b5-3: ABIOS presence (011 = not supported)
+; b2: private
+; b1: memory split above 16Mb supported
+; b0: POSTEXT directly supported by POST
+db 0x00
+; Feature byte 5 (IBM)
+; b1: enhanced mouse
+; b0: flash EPROM
+db 0x00
+
+
+
+.org 0xe729 ; Baud Rate Generator Table
+
+;----------
+;- INT14h -
+;----------
+.org 0xe739 ; INT 14h Serial Communications Service Entry Point
+int14_handler:
+ push ds
+ pusha
+ mov ax, #0x0000
+ mov ds, ax
+ call _int14_function
+ popa
+ pop ds
+ iret
+
+
+;----------------------------------------
+;- INT 16h Keyboard Service Entry Point -
+;----------------------------------------
+.org 0xe82e
+int16_handler:
+
+ sti
+ push ds
+ pushf
+ pusha
+
+ cmp ah, #0x00
+ je int16_F00
+ cmp ah, #0x10
+ je int16_F00
+
+ mov bx, #0xf000
+ mov ds, bx
+ call _int16_function
+ popa
+ popf
+ pop ds
+ jz int16_zero_set
+
+int16_zero_clear:
+ push bp
+ mov bp, sp
+ //SEG SS
+ and BYTE [bp + 0x06], #0xbf
+ pop bp
+ iret
+
+int16_zero_set:
+ push bp
+ mov bp, sp
+ //SEG SS
+ or BYTE [bp + 0x06], #0x40
+ pop bp
+ iret
+
+int16_F00:
+ mov bx, #0x0040
+ mov ds, bx
+
+int16_wait_for_key:
+ cli
+ mov bx, 0x001a
+ cmp bx, 0x001c
+ jne int16_key_found
+ sti
+ nop
+#if 0
+ /* no key yet, call int 15h, function AX=9002 */
+ 0x50, /* push AX */
+ 0xb8, 0x02, 0x90, /* mov AX, #0x9002 */
+ 0xcd, 0x15, /* int 15h */
+ 0x58, /* pop AX */
+ 0xeb, 0xea, /* jmp WAIT_FOR_KEY */
+#endif
+ jmp int16_wait_for_key
+
+int16_key_found:
+ mov bx, #0xf000
+ mov ds, bx
+ call _int16_function
+ popa
+ popf
+ pop ds
+#if 0
+ /* notify int16 complete w/ int 15h, function AX=9102 */
+ 0x50, /* push AX */
+ 0xb8, 0x02, 0x91, /* mov AX, #0x9102 */
+ 0xcd, 0x15, /* int 15h */
+ 0x58, /* pop AX */
+#endif
+ iret
+
+
+
+;-------------------------------------------------
+;- INT09h : Keyboard Hardware Service Entry Point -
+;-------------------------------------------------
+.org 0xe987
+int09_handler:
+ cli
+ push ax
+
+ mov al, #0xAD ;;disable keyboard
+ out #0x64, al
+
+ mov al, #0x0B
+ out #0x20, al
+ in al, #0x20
+ and al, #0x02
+ jz int09_finish
+
+ in al, #0x60 ;;read key from keyboard controller
+ //test al, #0x80 ;;look for key release
+ //jnz int09_process_key ;; dont pass releases to intercept?
+
+ ;; check for extended key
+ cmp al, #0xe0
+ jne int09_call_int15_4f
+
+ push ds
+ xor ax, ax
+ mov ds, ax
+ mov al, BYTE [0x496] ;; mf2_state |= 0x01
+ or al, #0x01
+ mov BYTE [0x496], al
+ pop ds
+
+ in al, #0x60 ;;read another key from keyboard controller
+
+ sti
+
+int09_call_int15_4f:
+ push ds
+ pusha
+#ifdef BX_CALL_INT15_4F
+ mov ah, #0x4f ;; allow for keyboard intercept
+ stc
+ int #0x15
+ jnc int09_done
+#endif
+
+
+//int09_process_key:
+ mov bx, #0xf000
+ mov ds, bx
+ call _int09_function
+
+int09_done:
+ popa
+ pop ds
+ cli
+ call eoi_master_pic
+
+int09_finish:
+ mov al, #0xAE ;;enable keyboard
+ out #0x64, al
+ pop ax
+ iret
+
+
+
+
+;----------------------------------------
+;- INT 13h Diskette Service Entry Point -
+;----------------------------------------
+.org 0xec59
+int13_diskette:
+ jmp int13_noeltorito
+
+;---------------------------------------------
+;- INT 0Eh Diskette Hardware ISR Entry Point -
+;---------------------------------------------
+.org 0xef57 ; INT 0Eh Diskette Hardware ISR Entry Point
+int0e_handler:
+ push ax
+ push dx
+ mov dx, #0x03f4
+ in al, dx
+ and al, #0xc0
+ cmp al, #0xc0
+ je int0e_normal
+ mov dx, #0x03f5
+ mov al, #0x08 ; sense interrupt status
+ out dx, al
+int0e_loop1:
+ mov dx, #0x03f4
+ in al, dx
+ and al, #0xc0
+ cmp al, #0xc0
+ jne int0e_loop1
+int0e_loop2:
+ mov dx, #0x03f5
+ in al, dx
+ mov dx, #0x03f4
+ in al, dx
+ and al, #0xc0
+ cmp al, #0xc0
+ je int0e_loop2
+int0e_normal:
+ push ds
+ mov ax, #0x0000 ;; segment 0000
+ mov ds, ax
+ call eoi_master_pic
+ mov al, 0x043e
+ or al, #0x80 ;; diskette interrupt has occurred
+ mov 0x043e, al
+ pop ds
+ pop dx
+ pop ax
+ iret
+
+
+.org 0xefc7 ; Diskette Controller Parameter Table
+diskette_param_table:
+;; Since no provisions are made for multiple drive types, most
+;; values in this table are ignored. I set parameters for 1.44M
+;; floppy here
+db 0xAF
+db 0x02 ;; head load time 0000001, DMA used
+db 0x25
+db 0x02
+db 18
+db 0x1B
+db 0xFF
+db 0x6C
+db 0xF6
+db 0x0F
+db 0x08
+
+
+;----------------------------------------
+;- INT17h : Printer Service Entry Point -
+;----------------------------------------
+.org 0xefd2
+int17_handler:
+ push ds
+ pusha
+ mov ax, #0x0000
+ mov ds, ax
+ call _int17_function
+ popa
+ pop ds
+ iret
+
+diskette_param_table2:
+;; New diskette parameter table adding 3 parameters from IBM
+;; Since no provisions are made for multiple drive types, most
+;; values in this table are ignored. I set parameters for 1.44M
+;; floppy here
+db 0xAF
+db 0x02 ;; head load time 0000001, DMA used
+db 0x25
+db 0x02
+db 18
+db 0x1B
+db 0xFF
+db 0x6C
+db 0xF6
+db 0x0F
+db 0x08
+db 79 ;; maximum track
+db 0 ;; data transfer rate
+db 4 ;; drive type in cmos
+
+.org 0xf045 ; INT 10 Functions 0-Fh Entry Point
+ HALT(__LINE__)
+ iret
+
+;----------
+;- INT10h -
+;----------
+.org 0xf065 ; INT 10h Video Support Service Entry Point
+int10_handler:
+ ;; dont do anything, since the VGA BIOS handles int10h requests
+ iret
+
+.org 0xf0a4 ; MDA/CGA Video Parameter Table (INT 1Dh)
+
+;----------
+;- INT12h -
+;----------
+.org 0xf841 ; INT 12h Memory Size Service Entry Point
+; ??? different for Pentium (machine check)?
+int12_handler:
+ push ds
+ mov ax, #0x0040
+ mov ds, ax
+ mov ax, 0x0013
+ pop ds
+ iret
+
+;----------
+;- INT11h -
+;----------
+.org 0xf84d ; INT 11h Equipment List Service Entry Point
+int11_handler:
+ push ds
+ mov ax, #0x0040
+ mov ds, ax
+ mov ax, 0x0010
+ pop ds
+ iret
+
+;----------
+;- INT15h -
+;----------
+.org 0xf859 ; INT 15h System Services Entry Point
+int15_handler:
+ pushf
+#if BX_APM
+ cmp ah, #0x53
+ je apm_call
+#endif
+ push ds
+ push es
+ cmp ah, #0x86
+ je int15_handler32
+ cmp ah, #0xE8
+ je int15_handler32
+ pusha
+#if BX_USE_PS2_MOUSE
+ cmp ah, #0xC2
+ je int15_handler_mouse
+#endif
+ call _int15_function
+int15_handler_mouse_ret:
+ popa
+int15_handler32_ret:
+ pop es
+ pop ds
+ popf
+ jmp iret_modify_cf
+#if BX_APM
+apm_call:
+ jmp _apmreal_entry
+#endif
+
+#if BX_USE_PS2_MOUSE
+int15_handler_mouse:
+ call _int15_function_mouse
+ jmp int15_handler_mouse_ret
+#endif
+
+int15_handler32:
+ pushad
+ call _int15_function32
+ popad
+ jmp int15_handler32_ret
+
+;; Protected mode IDT descriptor
+;;
+;; I just make the limit 0, so the machine will shutdown
+;; if an exception occurs during protected mode memory
+;; transfers.
+;;
+;; Set base to f0000 to correspond to beginning of BIOS,
+;; in case I actually define an IDT later
+;; Set limit to 0
+
+pmode_IDT_info:
+dw 0x0000 ;; limit 15:00
+dw 0x0000 ;; base 15:00
+db 0x0f ;; base 23:16
+
+;; Real mode IDT descriptor
+;;
+;; Set to typical real-mode values.
+;; base = 000000
+;; limit = 03ff
+
+rmode_IDT_info:
+dw 0x03ff ;; limit 15:00
+dw 0x0000 ;; base 15:00
+db 0x00 ;; base 23:16
+
+
+;----------
+;- INT1Ah -
+;----------
+.org 0xfe6e ; INT 1Ah Time-of-day Service Entry Point
+int1a_handler:
+#if BX_PCIBIOS
+ cmp ah, #0xb1
+ jne int1a_normal
+ call pcibios_real
+ jc pcibios_error
+ retf 2
+pcibios_error:
+ mov bl, ah
+ mov ah, #0xb1
+ push ds
+ pusha
+ mov ax, ss ; set readable descriptor to ds, for calling pcibios
+ mov ds, ax ; on 16bit protected mode.
+ jmp int1a_callfunction
+int1a_normal:
+#endif
+ push ds
+ pusha
+ xor ax, ax
+ mov ds, ax
+int1a_callfunction:
+ call _int1a_function
+ popa
+ pop ds
+ iret
+
+;;
+;; int70h: IRQ8 - CMOS RTC
+;;
+int70_handler:
+ push ds
+ pusha
+ xor ax, ax
+ mov ds, ax
+ call _int70_function
+ popa
+ pop ds
+ iret
+
+;---------
+;- INT08 -
+;---------
+.org 0xfea5 ; INT 08h System Timer ISR Entry Point
+int08_handler:
+ sti
+ push eax
+ push ds
+ xor ax, ax
+ mov ds, ax
+
+ ;; time to turn off drive(s)?
+ mov al,0x0440
+ or al,al
+ jz int08_floppy_off
+ dec al
+ mov 0x0440,al
+ jnz int08_floppy_off
+ ;; turn motor(s) off
+ push dx
+ mov dx,#0x03f2
+ in al,dx
+ and al,#0xcf
+ out dx,al
+ pop dx
+int08_floppy_off:
+
+ mov eax, 0x046c ;; get ticks dword
+ inc eax
+
+ ;; compare eax to one days worth of timer ticks at 18.2 hz
+ cmp eax, #0x001800B0
+ jb int08_store_ticks
+ ;; there has been a midnight rollover at this point
+ xor eax, eax ;; zero out counter
+ inc BYTE 0x0470 ;; increment rollover flag
+
+int08_store_ticks:
+ mov 0x046c, eax ;; store new ticks dword
+ ;; chain to user timer tick INT #0x1c
+ //pushf
+ //;; call_ep [ds:loc]
+ //CALL_EP( 0x1c << 2 )
+ int #0x1c
+ cli
+ call eoi_master_pic
+ pop ds
+ pop eax
+ iret
+
+.org 0xfef3 ; Initial Interrupt Vector Offsets Loaded by POST
+
+
+.org 0xff00
+.ascii "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
+
+;------------------------------------------------
+;- IRET Instruction for Dummy Interrupt Handler -
+;------------------------------------------------
+.org 0xff53 ; IRET Instruction for Dummy Interrupt Handler
+dummy_iret_handler:
+ iret
+
+.org 0xff54 ; INT 05h Print Screen Service Entry Point
+ HALT(__LINE__)
+ iret
+
+.org 0xfff0 ; Power-up Entry Point
+ jmp 0xf000:post
+
+.org 0xfff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
+.ascii BIOS_BUILD_DATE
+
+.org 0xfffe ; System Model ID
+db SYS_MODEL_ID
+db 0x00 ; filler
+
+.org 0xfa6e ;; Character Font for 320x200 & 640x200 Graphics (lower 128 characters)
+ASM_END
+/*
+ * This font comes from the fntcol16.zip package (c) by Joseph Gil
+ * found at ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
+ * This font is public domain
+ */
+static Bit8u vgafont8[128*8]=
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e,
+ 0x7e, 0xff, 0xdb, 0xff, 0xc3, 0xe7, 0xff, 0x7e,
+ 0x6c, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00,
+ 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00,
+ 0x38, 0x7c, 0x38, 0xfe, 0xfe, 0x7c, 0x38, 0x7c,
+ 0x10, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x7c,
+ 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00,
+ 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff,
+ 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
+ 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff,
+ 0x0f, 0x07, 0x0f, 0x7d, 0xcc, 0xcc, 0xcc, 0x78,
+ 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18,
+ 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x70, 0xf0, 0xe0,
+ 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x67, 0xe6, 0xc0,
+ 0x99, 0x5a, 0x3c, 0xe7, 0xe7, 0x3c, 0x5a, 0x99,
+ 0x80, 0xe0, 0xf8, 0xfe, 0xf8, 0xe0, 0x80, 0x00,
+ 0x02, 0x0e, 0x3e, 0xfe, 0x3e, 0x0e, 0x02, 0x00,
+ 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x7e, 0x3c, 0x18,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x00,
+ 0x7f, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x00,
+ 0x3e, 0x63, 0x38, 0x6c, 0x6c, 0x38, 0xcc, 0x78,
+ 0x00, 0x00, 0x00, 0x00, 0x7e, 0x7e, 0x7e, 0x00,
+ 0x18, 0x3c, 0x7e, 0x18, 0x7e, 0x3c, 0x18, 0xff,
+ 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
+ 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00,
+ 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
+ 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00,
+ 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00,
+ 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0x7e, 0x3c, 0x18, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x78, 0x78, 0x30, 0x30, 0x00, 0x30, 0x00,
+ 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x6c, 0x6c, 0xfe, 0x6c, 0xfe, 0x6c, 0x6c, 0x00,
+ 0x30, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x30, 0x00,
+ 0x00, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xc6, 0x00,
+ 0x38, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0x76, 0x00,
+ 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x30, 0x60, 0x60, 0x60, 0x30, 0x18, 0x00,
+ 0x60, 0x30, 0x18, 0x18, 0x18, 0x30, 0x60, 0x00,
+ 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
+ 0x00, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x60,
+ 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x00,
+ 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00,
+ 0x7c, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0x7c, 0x00,
+ 0x30, 0x70, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x00,
+ 0x78, 0xcc, 0x0c, 0x38, 0x60, 0xcc, 0xfc, 0x00,
+ 0x78, 0xcc, 0x0c, 0x38, 0x0c, 0xcc, 0x78, 0x00,
+ 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x1e, 0x00,
+ 0xfc, 0xc0, 0xf8, 0x0c, 0x0c, 0xcc, 0x78, 0x00,
+ 0x38, 0x60, 0xc0, 0xf8, 0xcc, 0xcc, 0x78, 0x00,
+ 0xfc, 0xcc, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x00,
+ 0x78, 0xcc, 0xcc, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x78, 0xcc, 0xcc, 0x7c, 0x0c, 0x18, 0x70, 0x00,
+ 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x00,
+ 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x60,
+ 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x00,
+ 0x00, 0x00, 0xfc, 0x00, 0x00, 0xfc, 0x00, 0x00,
+ 0x60, 0x30, 0x18, 0x0c, 0x18, 0x30, 0x60, 0x00,
+ 0x78, 0xcc, 0x0c, 0x18, 0x30, 0x00, 0x30, 0x00,
+ 0x7c, 0xc6, 0xde, 0xde, 0xde, 0xc0, 0x78, 0x00,
+ 0x30, 0x78, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x66, 0x66, 0xfc, 0x00,
+ 0x3c, 0x66, 0xc0, 0xc0, 0xc0, 0x66, 0x3c, 0x00,
+ 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00,
+ 0xfe, 0x62, 0x68, 0x78, 0x68, 0x62, 0xfe, 0x00,
+ 0xfe, 0x62, 0x68, 0x78, 0x68, 0x60, 0xf0, 0x00,
+ 0x3c, 0x66, 0xc0, 0xc0, 0xce, 0x66, 0x3e, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0xcc, 0x00,
+ 0x78, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x1e, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78, 0x00,
+ 0xe6, 0x66, 0x6c, 0x78, 0x6c, 0x66, 0xe6, 0x00,
+ 0xf0, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00,
+ 0xc6, 0xee, 0xfe, 0xfe, 0xd6, 0xc6, 0xc6, 0x00,
+ 0xc6, 0xe6, 0xf6, 0xde, 0xce, 0xc6, 0xc6, 0x00,
+ 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00,
+ 0x78, 0xcc, 0xcc, 0xcc, 0xdc, 0x78, 0x1c, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0xe6, 0x00,
+ 0x78, 0xcc, 0xe0, 0x70, 0x1c, 0xcc, 0x78, 0x00,
+ 0xfc, 0xb4, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xfc, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00,
+ 0xc6, 0xc6, 0xc6, 0xd6, 0xfe, 0xee, 0xc6, 0x00,
+ 0xc6, 0xc6, 0x6c, 0x38, 0x38, 0x6c, 0xc6, 0x00,
+ 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x30, 0x78, 0x00,
+ 0xfe, 0xc6, 0x8c, 0x18, 0x32, 0x66, 0xfe, 0x00,
+ 0x78, 0x60, 0x60, 0x60, 0x60, 0x60, 0x78, 0x00,
+ 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x02, 0x00,
+ 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x00,
+ 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+ 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x76, 0x00,
+ 0xe0, 0x60, 0x60, 0x7c, 0x66, 0x66, 0xdc, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xc0, 0xcc, 0x78, 0x00,
+ 0x1c, 0x0c, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
+ 0x38, 0x6c, 0x60, 0xf0, 0x60, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
+ 0xe0, 0x60, 0x6c, 0x76, 0x66, 0x66, 0xe6, 0x00,
+ 0x30, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x0c, 0x00, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78,
+ 0xe0, 0x60, 0x66, 0x6c, 0x78, 0x6c, 0xe6, 0x00,
+ 0x70, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x00, 0x00, 0xcc, 0xfe, 0xfe, 0xd6, 0xc6, 0x00,
+ 0x00, 0x00, 0xf8, 0xcc, 0xcc, 0xcc, 0xcc, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0xdc, 0x66, 0x66, 0x7c, 0x60, 0xf0,
+ 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0x1e,
+ 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x00,
+ 0x10, 0x30, 0x7c, 0x30, 0x30, 0x34, 0x18, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00,
+ 0x00, 0x00, 0xc6, 0xd6, 0xfe, 0xfe, 0x6c, 0x00,
+ 0x00, 0x00, 0xc6, 0x6c, 0x38, 0x6c, 0xc6, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
+ 0x00, 0x00, 0xfc, 0x98, 0x30, 0x64, 0xfc, 0x00,
+ 0x1c, 0x30, 0x30, 0xe0, 0x30, 0x30, 0x1c, 0x00,
+ 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00,
+ 0xe0, 0x30, 0x30, 0x1c, 0x30, 0x30, 0xe0, 0x00,
+ 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0x00,
+};
+
+ASM_START
+.org 0xcc00
+// bcc-generated data will be placed here
+
+// For documentation of this config structure, look on developer.intel.com and
+// search for multiprocessor specification. Note that when you change anything
+// you must update the checksum (a pain!). It would be better to construct this
+// with C structures, or at least fill in the checksum automatically.
+//
+// Maybe this structs could be moved elsewhere than d000
+
+#if (BX_SMP_PROCESSORS==1)
+ // no structure necessary.
+#elif (BX_SMP_PROCESSORS==2)
+// define the Intel MP Configuration Structure for 2 processors at
+// APIC ID 0,1. I/O APIC at ID=2.
+.align 16
+mp_config_table:
+ db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
+ dw (mp_config_end-mp_config_table) ;; table length
+ db 4 ;; spec rev
+ db 0x65 ;; checksum
+ .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
+ db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
+ db 0x20, 0x20, 0x20, 0x20
+ db 0x20, 0x20, 0x20, 0x20
+ dw 0,0 ;; oem table ptr
+ dw 0 ;; oem table size
+ dw 20 ;; entry count
+ dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
+ dw 0 ;; extended table length
+ db 0 ;; extended table checksum
+ db 0 ;; reserved
+mp_config_proc0:
+ db 0 ;; entry type=processor
+ db 0 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 3 ;; cpu flags: enabled, bootstrap processor
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc1:
+ db 0 ;; entry type=processor
+ db 1 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_isa_bus:
+ db 1 ;; entry type=bus
+ db 0 ;; bus ID
+ db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
+mp_config_ioapic:
+ db 2 ;; entry type=I/O APIC
+ db 2 ;; apic id=2. linux will set.
+ db 0x11 ;; I/O APIC version number
+ db 1 ;; flags=1=enabled
+ dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
+mp_config_irqs:
+ db 3 ;; entry type=I/O interrupt
+ db 0 ;; interrupt type=vectored interrupt
+ db 0,0 ;; flags po=0, el=0 (linux uses as default)
+ db 0 ;; source bus ID is ISA
+ db 0 ;; source bus IRQ
+ db 2 ;; destination I/O APIC ID
+ db 0 ;; destination I/O APIC interrrupt in
+ ;; repeat pattern for interrupts 0-15
+ db 3,0,0,0,0,1,2,1
+ db 3,0,0,0,0,2,2,2
+ db 3,0,0,0,0,3,2,3
+ db 3,0,0,0,0,4,2,4
+ db 3,0,0,0,0,5,2,5
+ db 3,0,0,0,0,6,2,6
+ db 3,0,0,0,0,7,2,7
+ db 3,0,0,0,0,8,2,8
+ db 3,0,0,0,0,9,2,9
+ db 3,0,0,0,0,10,2,10
+ db 3,0,0,0,0,11,2,11
+ db 3,0,0,0,0,12,2,12
+ db 3,0,0,0,0,13,2,13
+ db 3,0,0,0,0,14,2,14
+ db 3,0,0,0,0,15,2,15
+#elif (BX_SMP_PROCESSORS==4)
+// define the Intel MP Configuration Structure for 4 processors at
+// APIC ID 0,1,2,3. I/O APIC at ID=4.
+.align 16
+mp_config_table:
+ db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
+ dw (mp_config_end-mp_config_table) ;; table length
+ db 4 ;; spec rev
+ db 0xdd ;; checksum
+ .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
+ db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
+ db 0x20, 0x20, 0x20, 0x20
+ db 0x20, 0x20, 0x20, 0x20
+ dw 0,0 ;; oem table ptr
+ dw 0 ;; oem table size
+ dw 22 ;; entry count
+ dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
+ dw 0 ;; extended table length
+ db 0 ;; extended table checksum
+ db 0 ;; reserved
+mp_config_proc0:
+ db 0 ;; entry type=processor
+ db 0 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 3 ;; cpu flags: enabled, bootstrap processor
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc1:
+ db 0 ;; entry type=processor
+ db 1 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc2:
+ db 0 ;; entry type=processor
+ db 2 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc3:
+ db 0 ;; entry type=processor
+ db 3 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_isa_bus:
+ db 1 ;; entry type=bus
+ db 0 ;; bus ID
+ db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
+mp_config_ioapic:
+ db 2 ;; entry type=I/O APIC
+ db 4 ;; apic id=4. linux will set.
+ db 0x11 ;; I/O APIC version number
+ db 1 ;; flags=1=enabled
+ dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
+mp_config_irqs:
+ db 3 ;; entry type=I/O interrupt
+ db 0 ;; interrupt type=vectored interrupt
+ db 0,0 ;; flags po=0, el=0 (linux uses as default)
+ db 0 ;; source bus ID is ISA
+ db 0 ;; source bus IRQ
+ db 4 ;; destination I/O APIC ID
+ db 0 ;; destination I/O APIC interrrupt in
+ ;; repeat pattern for interrupts 0-15
+ db 3,0,0,0,0,1,4,1
+ db 3,0,0,0,0,2,4,2
+ db 3,0,0,0,0,3,4,3
+ db 3,0,0,0,0,4,4,4
+ db 3,0,0,0,0,5,4,5
+ db 3,0,0,0,0,6,4,6
+ db 3,0,0,0,0,7,4,7
+ db 3,0,0,0,0,8,4,8
+ db 3,0,0,0,0,9,4,9
+ db 3,0,0,0,0,10,4,10
+ db 3,0,0,0,0,11,4,11
+ db 3,0,0,0,0,12,4,12
+ db 3,0,0,0,0,13,4,13
+ db 3,0,0,0,0,14,4,14
+ db 3,0,0,0,0,15,4,15
+#elif (BX_SMP_PROCESSORS==8)
+// define the Intel MP Configuration Structure for 8 processors at
+// APIC ID 0,1,2,3,4,5,6,7. I/O APIC at ID=8.
+.align 16
+mp_config_table:
+ db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
+ dw (mp_config_end-mp_config_table) ;; table length
+ db 4 ;; spec rev
+ db 0xc3 ;; checksum
+ .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
+ db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
+ db 0x20, 0x20, 0x20, 0x20
+ db 0x20, 0x20, 0x20, 0x20
+ dw 0,0 ;; oem table ptr
+ dw 0 ;; oem table size
+ dw 26 ;; entry count
+ dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
+ dw 0 ;; extended table length
+ db 0 ;; extended table checksum
+ db 0 ;; reserved
+mp_config_proc0:
+ db 0 ;; entry type=processor
+ db 0 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 3 ;; cpu flags: enabled, bootstrap processor
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc1:
+ db 0 ;; entry type=processor
+ db 1 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc2:
+ db 0 ;; entry type=processor
+ db 2 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc3:
+ db 0 ;; entry type=processor
+ db 3 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc4:
+ db 0 ;; entry type=processor
+ db 4 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc5:
+ db 0 ;; entry type=processor
+ db 5 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc6:
+ db 0 ;; entry type=processor
+ db 6 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_proc7:
+ db 0 ;; entry type=processor
+ db 7 ;; local APIC id
+ db 0x11 ;; local APIC version number
+ db 1 ;; cpu flags: enabled
+ db 0,6,0,0 ;; cpu signature
+ dw 0x201,0 ;; feature flags
+ dw 0,0 ;; reserved
+ dw 0,0 ;; reserved
+mp_config_isa_bus:
+ db 1 ;; entry type=bus
+ db 0 ;; bus ID
+ db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
+mp_config_ioapic:
+ db 2 ;; entry type=I/O APIC
+ db 8 ;; apic id=8
+ db 0x11 ;; I/O APIC version number
+ db 1 ;; flags=1=enabled
+ dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
+mp_config_irqs:
+ db 3 ;; entry type=I/O interrupt
+ db 0 ;; interrupt type=vectored interrupt
+ db 0,0 ;; flags po=0, el=0 (linux uses as default)
+ db 0 ;; source bus ID is ISA
+ db 0 ;; source bus IRQ
+ db 8 ;; destination I/O APIC ID
+ db 0 ;; destination I/O APIC interrrupt in
+ ;; repeat pattern for interrupts 0-15
+ db 3,0,0,0,0,1,8,1
+ db 3,0,0,0,0,2,8,2
+ db 3,0,0,0,0,3,8,3
+ db 3,0,0,0,0,4,8,4
+ db 3,0,0,0,0,5,8,5
+ db 3,0,0,0,0,6,8,6
+ db 3,0,0,0,0,7,8,7
+ db 3,0,0,0,0,8,8,8
+ db 3,0,0,0,0,9,8,9
+ db 3,0,0,0,0,10,8,10
+ db 3,0,0,0,0,11,8,11
+ db 3,0,0,0,0,12,8,12
+ db 3,0,0,0,0,13,8,13
+ db 3,0,0,0,0,14,8,14
+ db 3,0,0,0,0,15,8,15
+#else
+# error Sorry, rombios only has configurations for 1, 2, 4 or 8 processors.
+#endif // if (BX_SMP_PROCESSORS==...)
+
+mp_config_end: // this label used to find length of mp structure
+ db 0
+
+#if (BX_SMP_PROCESSORS>1)
+.align 16
+mp_floating_pointer_structure:
+db 0x5f, 0x4d, 0x50, 0x5f ; "_MP_" signature
+dw mp_config_table, 0xf ;; pointer to MP configuration table
+db 1 ;; length of this struct in 16-bit byte chunks
+db 4 ;; MP spec revision
+db 0xc1 ;; checksum
+db 0 ;; MP feature byte 1. value 0 means look at the config table
+db 0,0,0,0 ;; MP feature bytes 2-5.
+#endif
+
+ASM_END
diff --git a/tools/firmware/rombios/rombios.diffs b/tools/firmware/rombios/rombios.diffs
new file mode 100644
index 0000000000..8ec23ef9de
--- /dev/null
+++ b/tools/firmware/rombios/rombios.diffs
@@ -0,0 +1,206 @@
+--- /home/leendert/cvs/bochs/bios/rombios.c 2005-05-23 12:18:11.000000000 -0400
++++ rombios.c 2005-06-01 23:46:45.000000000 -0400
+@@ -26,6 +26,7 @@
+
+ // ROM BIOS for use with Bochs/Plex x86 emulation environment
+
++#define VMXASSIST
+
+ // ROM BIOS compatability entry points:
+ // ===================================
+@@ -170,7 +171,9 @@
+ #define BASE_MEM_IN_K (640 - EBDA_SIZE)
+
+ // Define the application NAME
+-#ifdef PLEX86
++#ifdef VMXASSIST
++# define BX_APPNAME "VMXAssist"
++#elif PLEX86
+ # define BX_APPNAME "Plex86"
+ #else
+ # define BX_APPNAME "Bochs"
+@@ -314,7 +317,6 @@
+ ASM_END
+ }
+
+-#if 0
+ // memcpy of count bytes
+ void
+ memcpyb(dseg,doffset,sseg,soffset,count)
+@@ -362,6 +364,7 @@
+ ASM_END
+ }
+
++#if 0
+ // memcpy of count dword
+ void
+ memcpyd(dseg,doffset,sseg,soffset,count)
+@@ -858,6 +861,7 @@
+ static void write_byte();
+ static void write_word();
+ static void bios_printf();
++static void copy_e820_table();
+
+ static Bit8u inhibit_mouse_int_and_events();
+ static void enable_mouse_int_and_events();
+@@ -1420,6 +1424,16 @@
+ ASM_END
+ }
+
++#ifdef VMXASSIST
++void
++copy_e820_table()
++{
++ Bit8u nr_entries = read_byte(0x9000, 0x1e8);
++ write_word(0xe000, 0x8, nr_entries);
++ memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
++}
++#endif /* VMXASSIST */
++
+ #if BX_DEBUG_SERIAL
+ /* serial debug port*/
+ #define BX_DEBUG_PORT 0x03f8
+@@ -1498,6 +1512,9 @@
+ if (c == '\n') uart_tx_byte(BX_DEBUG_PORT, '\r');
+ uart_tx_byte(BX_DEBUG_PORT, c);
+ #endif
++#ifdef VMXASSIST
++ outb(0xE9, c);
++#endif
+ #if BX_VIRTUAL_PORTS
+ if (action & BIOS_PRINTF_DEBUG) outb(DEBUG_PORT, c);
+ if (action & BIOS_PRINTF_INFO) outb(INFO_PORT, c);
+@@ -4053,6 +4070,66 @@
+ case 0x20: // coded by osmaker aka K.J.
+ if(regs.u.r32.edx == 0x534D4150)
+ {
++#ifdef VMXASSIST
++ if ((regs.u.r16.bx / 0x14)* 0x14 == regs.u.r16.bx) {
++ Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
++
++ if (regs.u.r16.bx + 0x14 <= e820_table_size) {
++ memcpyb(ES, regs.u.r16.di,
++ 0xe000, 0x10 + regs.u.r16.bx, 0x14);
++ }
++ regs.u.r32.ebx += 0x14;
++ if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
++ regs.u.r32.ebx = 0;
++ regs.u.r32.eax = 0x534D4150;
++ regs.u.r32.ecx = 0x14;
++ CLEAR_CF();
++ return;
++ } else if (regs.u.r16.bx == 1) {
++ extended_memory_size = inb_cmos(0x35);
++ extended_memory_size <<= 8;
++ extended_memory_size |= inb_cmos(0x34);
++ extended_memory_size *= 64;
++ if (extended_memory_size > 0x3bc000) // greater than EFF00000???
++ {
++ extended_memory_size = 0x3bc000; // everything after this is reserved memory until we get to 0x100000000
++ }
++ extended_memory_size *= 1024;
++ extended_memory_size += 15728640; // make up for the 16mb of memory that is chopped off
++
++ if (extended_memory_size <= 15728640)
++ {
++ extended_memory_size = inb_cmos(0x31);
++ extended_memory_size <<= 8;
++ extended_memory_size |= inb_cmos(0x30);
++ extended_memory_size *= 1024;
++ }
++
++ write_word(ES, regs.u.r16.di, 0x0000);
++ write_word(ES, regs.u.r16.di+2, 0x0010);
++ write_word(ES, regs.u.r16.di+4, 0x0000);
++ write_word(ES, regs.u.r16.di+6, 0x0000);
++
++ write_word(ES, regs.u.r16.di+8, extended_memory_size);
++ extended_memory_size >>= 16;
++ write_word(ES, regs.u.r16.di+10, extended_memory_size);
++ extended_memory_size >>= 16;
++ write_word(ES, regs.u.r16.di+12, extended_memory_size);
++ extended_memory_size >>= 16;
++ write_word(ES, regs.u.r16.di+14, extended_memory_size);
++
++ write_word(ES, regs.u.r16.di+16, 0x1);
++ write_word(ES, regs.u.r16.di+18, 0x0);
++
++ regs.u.r32.ebx = 0;
++ regs.u.r32.eax = 0x534D4150;
++ regs.u.r32.ecx = 0x14;
++ CLEAR_CF();
++ return;
++ } else { /* AX=E820, DX=534D4150, BX unrecognized */
++ goto int15_unimplemented;
++ }
++#else
+ switch(regs.u.r16.bx)
+ {
+ case 0:
+@@ -4070,6 +4147,7 @@
+ write_word(ES, regs.u.r16.di+18, 0x0);
+
+ regs.u.r32.ebx = 1;
++
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+@@ -4121,6 +4199,7 @@
+ goto int15_unimplemented;
+ break;
+ }
++#endif
+ } else {
+ // if DX != 0x534D4150)
+ goto int15_unimplemented;
+@@ -9497,9 +9576,16 @@
+ ;; int 1C already points at dummy_iret_handler (above)
+ mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
+ out 0x43, al
++#ifdef VMXASSIST
++ mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
++ out 0x40, al ; lsb
++ mov al, #0xe9
++ out 0x40, al ; msb
++#else
+ mov al, #0x00 ; maximum count of 0000H = 18.2Hz
+ out 0x40, al
+ out 0x40, al
++#endif
+
+ ;; Keyboard
+ SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
+@@ -9597,10 +9683,22 @@
+ mov al, #0x11 ; send initialisation commands
+ out 0x20, al
+ out 0xa0, al
++#ifdef VMXASSIST
++ ;; The vm86 emulator expects interrupts to be mapped beyond the reserved
++ ;; vectors (0 through 31). Since rombios fully controls the hardware, we
++ ;; map it the way the emulator needs it and expect that it will do the
++ ;; proper 8086 interrupt translation (that is, master pic base is at 0x8
++ ;; and slave pic base is at 0x70).
++ mov al, #0x20
++ out 0x21, al
++ mov al, #0x28
++ out 0xa1, al
++#else
+ mov al, #0x08
+ out 0x21, al
+ mov al, #0x70
+ out 0xa1, al
++#endif
+ mov al, #0x04
+ out 0x21, al
+ mov al, #0x02
+@@ -9617,6 +9715,10 @@
+ #endif
+ out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14
+
++#ifdef VMXASSIST
++ call _copy_e820_table
++#endif
++
+ call pcibios_init
+
+ call rom_scan
diff --git a/tools/firmware/vgabios/BUGS b/tools/firmware/vgabios/BUGS
new file mode 100644
index 0000000000..2bf3b062e9
--- /dev/null
+++ b/tools/firmware/vgabios/BUGS
@@ -0,0 +1,3 @@
+Not all the functions have been implemented yet.
+
+Please report any bugs to <info@vruppert.de>
diff --git a/tools/firmware/vgabios/COPYING b/tools/firmware/vgabios/COPYING
new file mode 100644
index 0000000000..223ede7de3
--- /dev/null
+++ b/tools/firmware/vgabios/COPYING
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/tools/firmware/vgabios/ChangeLog b/tools/firmware/vgabios/ChangeLog
new file mode 100644
index 0000000000..08711f0c60
--- /dev/null
+++ b/tools/firmware/vgabios/ChangeLog
@@ -0,0 +1,1060 @@
+2005-05-24 16:50 vruppert
+
+ * vbe.c (1.47), vgabios.c (1.61):
+
+ - output to the vgabios info port can be disabled now. It is still enabled by
+ default and always possible in debug mode. (based on a patch from Alex Beregszaszi)
+
+2005-05-20 16:06 vruppert
+
+ * vbe.c (1.46), vgabios.c (1.60):
+
+ - fixed return value for the default case in the VBE section (non-debug mode)
+ - removed unused macros HALT and PANIC_PORT
+
+2005-03-07 20:39 vruppert
+
+ * README (1.9):
+
+ - updates for 0.5a release
+
+2005-03-06 13:06 vruppert
+
+ * Makefile (1.17):
+
+ - vgabios files with cirrus support added to release target
+
+2005-03-06 12:24 vruppert
+
+ * Makefile (1.16):
+
+ - cross compilation support added (patch from Alex Beregszaszi)
+
+2005-03-05 13:03 vruppert
+
+ * BUGS (1.3), README (1.8), TODO (1.11):
+
+ - documentation updates
+
+2004-12-04 15:26 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.61), VGABIOS-lgpl-latest.cirrus.bin
+ (1.13), VGABIOS-lgpl-latest.cirrus.debug.bin (1.13),
+ VGABIOS-lgpl-latest.debug.bin (1.61), clext.c (1.9):
+
+ - Cirrus extension: support for 1280x1024x15 and 1280x1024x16 modes added (patch
+ from Fabrice Bellard)
+
+2004-08-08 16:53 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.60), VGABIOS-lgpl-latest.cirrus.bin (1.12),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.12),
+ VGABIOS-lgpl-latest.debug.bin (1.60), clext.c (1.8):
+
+ - use single bank mode for VBE
+ - enable 16k granularity for VBE only
+
+2004-07-30 19:33 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.59), VGABIOS-lgpl-latest.cirrus.bin (1.11),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.11),
+ VGABIOS-lgpl-latest.debug.bin (1.59), clext.c (1.7):
+
+ - cirrus init: set standard vga mode and reset bitblt
+
+2004-07-22 18:38 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.58), VGABIOS-lgpl-latest.cirrus.bin (1.10),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.10),
+ VGABIOS-lgpl-latest.debug.bin (1.58), clext.c (1.6), vbe.c (1.45),
+ vbetables.h (1.24):
+
+ - cirrus extension: tables for mode 1280x1024x8 added
+ - vbe: dispi_set_xres() and dispi_set_virt_width() now modify vga compatible
+ registers
+ - vbe: mode list entry for mode 800x600x4 fixed
+
+2004-07-18 20:23 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.57), VGABIOS-lgpl-latest.cirrus.bin (1.9),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.9),
+ VGABIOS-lgpl-latest.debug.bin (1.57), vgabios.c (1.59), vgatables.h (1.8):
+
+ - disable CRTC write protection before setting new values
+ - CRTC line for mode 0x6a fixed
+
+2004-07-07 16:08 vruppert
+
+ * Makefile (1.15), VGABIOS-lgpl-latest.bin (1.56),
+ VGABIOS-lgpl-latest.cirrus.bin (1.8), VGABIOS-lgpl-latest.cirrus.debug.bin (1.8),
+ VGABIOS-lgpl-latest.debug.bin (1.56), biossums.c (1.1), clext.c (1.5):
+
+ - biossums utility for the Bochs BIOS adapted for the LGPL'd VGABIOS
+ - VESA3 PMINFO checksum calculated in the source
+ - 24 bpp mode entries fixed (patch from Fabrice Bellard)
+
+2004-06-25 18:28 vruppert
+
+ * VGABIOS-lgpl-latest.cirrus.bin (1.7), VGABIOS-lgpl-latest.cirrus.debug.bin (1.7),
+ clext.c (1.4):
+
+ - 4MB memory probe added (patch from Fabrice Bellard)
+
+2004-06-25 17:31 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.55), VGABIOS-lgpl-latest.cirrus.bin (1.6),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.6),
+ VGABIOS-lgpl-latest.debug.bin (1.55), clext.c (1.3):
+
+ - fixed value of sequencer reset register in cirrus mode table
+ - fixed possible overflow error if cirrus start address is >256k
+
+2004-06-23 21:11 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.54), VGABIOS-lgpl-latest.cirrus.bin (1.5),
+ VGABIOS-lgpl-latest.cirrus.debug.bin (1.5),
+ VGABIOS-lgpl-latest.debug.bin (1.54), clext.c (1.2):
+
+ - applied new patch for the cirrus extension from suzu
+ * enable VESA LFB support if a Cirrus PCI adapter is detected
+ * prepared VBE3 protected mode info block (test case required)
+ - added VBE functions 4F06h and 4F07h
+ - some bugfixes
+
+2004-06-17 18:57 vruppert
+
+ * Makefile (1.14), VGABIOS-lgpl-latest.bin (1.53),
+ VGABIOS-lgpl-latest.cirrus.bin (1.2), VGABIOS-lgpl-latest.cirrus.debug.bin (1.2),
+ VGABIOS-lgpl-latest.debug.bin (1.53):
+
+ - fixed makefile targets for the binaries with cirrus extension
+
+2004-06-16 21:11 vruppert
+
+ * Makefile (1.13), VGABIOS-lgpl-latest.bin (1.52),
+ VGABIOS-lgpl-latest.cirrus.bin (1.1), VGABIOS-lgpl-latest.cirrus.debug.bin (1.1),
+ VGABIOS-lgpl-latest.debug.bin (1.52), clext.c (1.1), vgabios.c (1.58):
+
+ - applied suzu's cirrus extension patch. Cirrus SVGA detection, most of the
+ cirrus-specific modes and some basic VBE features are present now.
+
+2004-05-31 21:15 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.51), VGABIOS-lgpl-latest.debug.bin (1.51),
+ vgabios.c (1.57):
+
+ - write character in planar graphics modes: sequencer map mask must be 0x0f and
+ bit operation must be 'replace' if bit 7 of attribute is clear
+ - read/write pixel in planar graphics modes: bit mask setup simplified
+
+2004-05-11 18:08 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.50), VGABIOS-lgpl-latest.debug.bin (1.50),
+ vgabios.c (1.56):
+
+ - biosfn_select_vert_res rewritten in assembler
+ - scroll text in planar graphics modes: attribute for blank line fixed
+ - write character in planar graphics modes: graphics controller values fixed
+
+2004-05-09 20:32 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.49), VGABIOS-lgpl-latest.debug.bin (1.49),
+ vbe.c (1.44), vbe.h (1.24), vgabios.c (1.55):
+
+ - VBE init code and some dispi ioport functions rewritten in assembler
+ - text scroll functions for CGA graphics modes added
+ - scroll text in graphics modes: attribute for blank line fixed
+
+2004-05-08 16:06 vruppert
+
+ * BUGS (1.2), README (1.7), TODO (1.10), VGABIOS-lgpl-latest.bin (1.48),
+ VGABIOS-lgpl-latest.debug.bin (1.48), vbe.c (1.43), vbe.h (1.23),
+ vbe_display_api.txt (1.11), vgabios.c (1.54):
+
+ - VBE internal functions dispi_set_enable and dispi_set_bank now called both from C
+ and asm code
+ - VBE function 0x03 rewritten in assembler
+ - VBE function 0x08 cleaned up
+ - text output and scroll functions for graphics modes rewritten using case
+ structures
+ - documentation and comments updated
+
+2004-05-06 21:18 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.47), VGABIOS-lgpl-latest.debug.bin (1.47),
+ vbe.c (1.42), vbe.h (1.22), vgabios.c (1.53):
+
+ - VBE functions 0x05, 0x06, 0x07 and some dispi ioport functions rewritten in
+ assembler
+ - VBE functions 0x06 and 0x07: get functions now supported, 15 bpp bug fixed
+
+2004-05-05 19:24 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.46), VGABIOS-lgpl-latest.debug.bin (1.46),
+ vbe.c (1.41), vbe.h (1.21), vbe_display_api.txt (1.10), vgabios.c (1.52):
+
+ - 8 bit DAC capability flag set
+ - vbe_biosfn_set_get_dac_palette_format implemented
+ - VBE api description updated
+ - C definitions from header files now used assembler code
+
+2004-05-02 17:27 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.45), VGABIOS-lgpl-latest.debug.bin (1.45),
+ vgabios.c (1.51):
+
+ - text scroll functions for PLANAR1/PLANAR4 graphics modes added
+ - function biosfn_get_ega_info rewritten in assembler
+ - read/write graphics pixel functions rewritten using a case structure
+
+2004-05-01 16:03 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.44), VGABIOS-lgpl-latest.debug.bin (1.44),
+ vgabios.c (1.50):
+
+ - biosfn_enable_cursor_emulation rewritten in assembler
+ - remap of the cursor shape depends on modeset control bit 0
+ - text output in PLANAR4 modes now supports attribute bit 7 (XOR with background)
+
+2004-04-25 20:13 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.43), VGABIOS-lgpl-latest.debug.bin (1.43),
+ vgabios.c (1.49), vgatables.h (1.7):
+
+ - table entries for vga mode 0x0f fixed (PLANAR2 exists on EGA only)
+ - function release_font_access now supports the monochrome text mode
+ - PLANAR1 modes now supported in text output functions and read/write pixel
+ - function AH=0x12/BL=0x32 rewritten in assembler
+
+2004-04-25 08:45 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.42), VGABIOS-lgpl-latest.debug.bin (1.42),
+ vgabios.c (1.48):
+
+ - block address calculation in font functions fixed
+ - functions AX=0x1103, AH=0x12/BL=0x31 and AH=0x12/BL=0x33 rewritten in assembler
+
+2004-04-24 09:59 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.41), VGABIOS-lgpl-latest.debug.bin (1.41),
+ vgabios.c (1.47):
+
+ - read/write graphics pixel for PLANAR4 modes added
+ - CGA specific functions (group AH = 0x0B) implemented
+
+2004-04-23 14:34 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.40), VGABIOS-lgpl-latest.debug.bin (1.40),
+ vgabios.c (1.46):
+
+ - remaining palette and dac read/write functions (except gray scale summing)
+ rewritten in assembler
+
+2004-04-18 13:43 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.39), VGABIOS-lgpl-latest.debug.bin (1.39),
+ vgabios.c (1.45):
+
+ - some palette and dac read/write functions rewritten in assembler
+ - main int10 debug message now works with assembler functions, too
+
+2004-04-18 09:15 japj
+
+ * vbe.c (1.40):
+
+ updated my email address + put vgabios url in the bios copyright string
+ (instead of my old email address)
+
+2004-04-17 07:18 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.38), VGABIOS-lgpl-latest.debug.bin (1.38),
+ vgabios.c (1.44):
+
+ - biosfn_set_video_mode: don't load DAC registers if default palette loading is
+ disabled. Perform gray scale summing if enabled.
+ - biosfn_perform_gray_scale_summing: switch between DAC read and write mode is
+ required to make this function work. Maximum DAC value always set to 0x3f.
+
+2004-04-08 17:50 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.37), VGABIOS-lgpl-latest.debug.bin (1.37),
+ vgabios.c (1.43):
+
+ - write character function for the LINEAR8 mode
+ - get_font_access() and release_font_access() rewritten in assembler
+ - fixed wrong variable name in the init code
+
+2004-04-06 19:31 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.36), VGABIOS-lgpl-latest.debug.bin (1.36),
+ vgabios.c (1.42):
+
+ - init functions rewitten in assembler
+ - function biosfn_set_display_code rewritten in assembler
+
+2004-04-05 19:40 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.35), VGABIOS-lgpl-latest.debug.bin (1.35),
+ vgabios.c (1.41):
+
+ - functions biosfn_get_video_mode() and biosfn_read_display_code() rewritten
+ in assembler
+
+2004-04-04 18:20 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.34), VGABIOS-lgpl-latest.debug.bin (1.34),
+ vgabios.c (1.40):
+
+ - write character function for CGA modes added
+ - read/write graphics pixel for CGA and LINEAR8 modes added
+
+2004-02-23 21:08 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.33), VGABIOS-lgpl-latest.debug.bin (1.33),
+ vbe.c (1.39):
+
+ - dispi_get_max_bpp(): restore the original value of the vbe enable register
+
+2004-02-22 14:17 vruppert
+
+ * README (1.6), vbe.c (1.38), vbe.h (1.20), vbe_display_api.txt (1.9),
+ VGABIOS-lgpl-latest.bin (1.32), VGABIOS-lgpl-latest.debug.bin (1.32):
+
+ - new function dispi_get_max_bpp() returns the bpp capabilities of the Bochs gui
+ - create the mode list depending on the supported bpp capability
+ - unused stuff removed
+ - documentation updated
+
+2004-02-21 18:20 vruppert
+
+ * vbe.c (1.37), vbe.h (1.19), vbetables.h (1.23),
+ VGABIOS-lgpl-latest.bin (1.31), VGABIOS-lgpl-latest.debug.bin (1.31):
+
+ - dynamicly genarated vbe mode_info list works now
+
+2003-11-17 21:04 vruppert
+
+ * vbe.c (1.36), vbetables.h (1.22), vgabios.c (1.39), vgatables.h (1.6),
+ VGABIOS-lgpl-latest.bin (1.30), VGABIOS-lgpl-latest.debug.bin (1.30):
+
+ - new VBE presence flag stored at unused BDA address 0xB9
+ - VBE init code rewritten
+ - added BIOS TTY flag for VBE mode 0x0102 (TODO: scrolling)
+ - vgabios_init_func: load and activate text font already done by set_video_mode
+ - function biosfn_get_all_palette_reg() fixed
+
+2003-11-06 00:26 cbothamy
+
+ * README (1.5):
+
+ - add changes for 0.4c release
+
+2003-11-06 00:22 cbothamy
+
+ * VGABIOS-lgpl-latest.bin (1.29), VGABIOS-lgpl-latest.debug.bin
+ (1.29):
+
+ - compile vgabios.c rev1.38
+
+2003-11-06 00:21 cbothamy
+
+ * vgabios.c (1.38):
+
+ - activate char table after loading it when setting a text video
+ mode
+
+2003-11-06 00:19 cbothamy
+
+ * Makefile (1.12):
+
+ - when making a release, remove unwanted files first, and exclude
+ CVS from the tarball
+
+2003-11-04 22:50 cbothamy
+
+ * ChangeLog (1.20, v0_4b):
+
+ - update ChangeLog for 0.4b release
+
+2003-11-04 22:49 cbothamy
+
+ * README (1.4, v0_4b):
+
+ - update Changes for 0.4b release
+
+2003-11-04 20:26 vruppert
+
+ * vgabios.c (1.37), VGABIOS-lgpl-latest.bin (1.28),
+ VGABIOS-lgpl-latest.debug.bin (1.28) (utags: v0_4b):
+
+ - biosfn_get_font_info(): character height must be returned in CX
+
+2003-11-03 21:57 vruppert
+
+ * vbe.c (1.35, v0_4b), vgabios.c (1.36), VGABIOS-lgpl-latest.bin
+ (1.27), VGABIOS-lgpl-latest.debug.bin (1.27):
+
+ - the 'noclearmem' flag is not stored in the 'current video mode'
+ register (0040h:0049h) - VBE also stores the 'noclear' flag in
+ the 'video control' register (0040h:0087h)
+
+2003-10-05 10:06 vruppert
+
+ * vbe.h (1.18, v0_4b), vbe_display_api.txt (1.8, v0_4b),
+ VGABIOS-lgpl-latest.bin (1.26), VGABIOS-lgpl-latest.debug.bin
+ (1.26):
+
+ - changed VBE i/o registers to 0x01CE/CF (suggestion from Daniel
+ Gimpelevich)
+
+2003-08-18 18:38 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.25), VGABIOS-lgpl-latest.debug.bin
+ (1.25), vgabios.c (1.35):
+
+ - wrong offsets to the character tables (INT 0x1F/0x43) fixed
+ (underscore added) - functions accessing the CRT controller
+ optimized using a local variable 'crtc_addr'
+
+2003-08-17 15:46 cbothamy
+
+ * ChangeLog (1.19, v0_4a):
+
+ - ChangeLog is now automatically generated by running "cvs2cl -r
+ -t -P -S" - update ChangeLog for 0.4a release
+
+2003-08-17 15:44 cbothamy
+
+ * README (1.3, v0_4a):
+
+ - added the old ChangeLog in the HOSTORY section of the README
+ file - update History for 0.4a release, with a summary of Changes
+
+2003-08-17 15:24 cbothamy
+
+ * Makefile (1.11, v0_4b, v0_4a):
+
+ - fix Makefile for "release" target
+
+2003-08-16 01:49 cbothamy
+
+ * Makefile (1.10), README (1.2), VGABIOS-lgpl-latest.bin (1.24,
+ v0_4a), VGABIOS-lgpl-latest.debug.bin (1.24, v0_4a), vgabios.c
+ (1.34, v0_4a):
+
+ - update the Makefile for releases - remove references to old
+ plex86 website - update the Makefile so it build
+ VGABIOS-lgpl-latest.bin and VGABIOS-lgpl-latest.debug.bin
+
+2003-08-07 18:17 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.23), VGABIOS-lgpl-latest.debug.bin
+ (1.23):
+
+ - current VBE mode now stored in BDA (unused address 0xBA)
+
+2003-08-07 17:54 vruppert
+
+ * vbe.c (1.34), vgatables.h (1.5, v0_4b) (utags: v0_4a):
+
+ - current VBE mode now stored in BDA (unused address 0xBA)
+
+2003-07-20 18:05 vruppert
+
+ * vgabios.c (1.33), VGABIOS-lgpl-latest.bin (1.22),
+ VGABIOS-lgpl-latest.debug.bin (1.22):
+
+ - fixed a few functions accessing the attribute controller
+
+2003-07-19 09:33 vruppert
+
+ * vgabios.c (1.32), VGABIOS-lgpl-latest.bin (1.21),
+ VGABIOS-lgpl-latest.debug.bin (1.21):
+
+ - re-enable video after programming the attribute controller -
+ biosfn_set_all_palette_reg(): number of palette registers fixed
+
+2003-07-16 22:32 vruppert
+
+ * ChangeLog (1.18), vbe.c (1.33), vbe.h (1.17, v0_4a),
+ vbe_display_api.txt (1.7, v0_4a), vgabios.c (1.31),
+ VGABIOS-lgpl-latest.bin (1.20), VGABIOS-lgpl-latest.debug.bin
+ (1.20):
+
+ - LFB flag now stored in the register VBE_DISPI_INDEX_ENABLE -
+ release date in Changelog fixed - release date of VBE BIOS 0.6
+ was the same as VGA BIOS 0.3b - year changed in copyright
+ messages
+
+2003-07-15 12:40 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.19), VGABIOS-lgpl-latest.debug.bin
+ (1.19):
+
+ - new function dispi_get_bpp() - function
+ vbe_biosfn_set_get_logical_scan_line_length() fixed for >8bpp -
+ number of image pages of all VBE modes fixed
+
+2003-07-15 12:35 vruppert
+
+ * vbe.c (1.32), vbetables.h (1.21, v0_4b, v0_4a):
+
+ - new function dispi_get_bpp() - function
+ vbe_biosfn_set_get_logical_scan_line_length() fixed for >8bpp -
+ number of image pages of all VBE modes fixed
+
+2003-07-14 19:45 vruppert
+
+ * vbe_display_api.txt (1.6):
+
+ - description of VBE_DISPI_ interface 0xb0c2 added
+
+2003-07-10 19:07 vruppert
+
+ * vbe.c (1.31), vbetables.h (1.20), VGABIOS-lgpl-latest.bin (1.18),
+ VGABIOS-lgpl-latest.debug.bin (1.18):
+
+ - 15 bpp VBE modes added - "Bochs own" mode 0x142 (640x480x32bpp)
+ added
+
+2003-07-01 19:00 vruppert
+
+ * vbe.c (1.30), vbe.h (1.16), vbetables.h (1.19),
+ VGABIOS-lgpl-latest.bin (1.17), VGABIOS-lgpl-latest.debug.bin
+ (1.17):
+
+ - VBE preserve display memory feature implemented - VBE mode
+ entries 0x117 and 0x118 added
+
+2003-06-30 21:27 vruppert
+
+ * vbe.c (1.29), vbe.h (1.15), vbetables.h (1.18),
+ VGABIOS-lgpl-latest.bin (1.16), VGABIOS-lgpl-latest.debug.bin
+ (1.16):
+
+ - VBE mode info blocks of modes with >8bpp enabled - VBE modes
+ with 24 bpp: bytes per scanline fixed - vbe_biosfn_set_mode() now
+ supports >8bpp - VBE will be enabled with new VBE_DISPI_ID2
+ (0xB0C2)
+
+2003-06-29 12:53 vruppert
+
+ * vbetables.h (1.17), VGABIOS-lgpl-latest.bin (1.15),
+ VGABIOS-lgpl-latest.debug.bin (1.15):
+
+ - duplicate lines with VBE_MODE_ATTRIBUTE_GRAPHICS_MODE removed -
+ VBE mode info items of currently unsupported modes fixed
+
+2003-06-15 21:19 vruppert
+
+ * vgabios.c (1.30), VGABIOS-lgpl-latest.bin (1.14),
+ VGABIOS-lgpl-latest.debug.bin (1.14):
+
+ - function write_gfx_char() rewritten
+
+2003-04-26 09:27 vruppert
+
+ * VGABIOS-lgpl-latest.debug.bin (1.13):
+
+ - added missing VBE function dispi_get_bank() - added missing
+ return codes for VBE function 4F05h - memory size is always
+ reported in VBE function 4F00h - fixed scan line length for VBE
+ mode 0102h - fixed function set_active_page() for graphics modes
+ - fixed the page sizes of some VGA modes
+
+2003-04-26 09:22 vruppert
+
+ * vbe.c (1.28), vbetables.h (1.16), vgabios.c (1.29), vgatables.h
+ (1.4), VGABIOS-lgpl-latest.bin (1.13):
+
+ - added missing VBE function dispi_get_bank() - added missing
+ return codes for VBE function 4F05h - memory size is always
+ reported in VBE function 4F00h - fixed scan line length for VBE
+ mode 0102h - fixed function set_active_page() for graphics modes
+ - fixed the page sizes of some VGA modes
+
+2003-04-20 09:51 vruppert
+
+ * vgabios.c (1.28), vgatables.h (1.3), VGABIOS-lgpl-latest.bin
+ (1.12), VGABIOS-lgpl-latest.debug.bin (1.12):
+
+ - function write_gfx_char() now supports different font sizes -
+ some entries of the static functionality table fixed
+
+2003-04-18 09:23 vruppert
+
+ * vbe.c (1.27), vbe.h (1.14), vbetables.h (1.15):
+
+ - applied patch #1331 * new function dispi_set_bank_farcall()
+ * VBE mode info item WinFuncPtr points to the new function if the
+ flag VBE_WINDOW_ATTRIBUTE_RELOCATABLE is set * flag
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE added
+
+2003-02-11 20:17 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.11), VGABIOS-lgpl-latest.debug.bin
+ (1.11), vbe.c (1.26), vbetables.h (1.14):
+
+ - VBE mode search rewritten * improved function
+ mode_info_find_mode() is now used by the VBE functions 0x4F01
+ and 0x4F02 * removed all mode list entries with the LFB bit
+ set. LFB detection is now present in the function
+ mode_info_find_mode()
+
+2003-02-09 20:59 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.10), VGABIOS-lgpl-latest.debug.bin
+ (1.10), vgabios.c (1.27):
+
+ - function write_gfx_char(): memory address now calculated in
+ this function; background color is always black - function
+ biosfn_write_char_attr(): the count parameter is now used in
+ graphics modes too - function biosfn_write_char_only() works
+ the same way as function biosfn_write_char_attr() in graphics
+ mode - copying charmap data optimized using memcpyb()
+
+2003-02-09 11:36 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.9), VGABIOS-lgpl-latest.debug.bin
+ (1.9):
+
+ - VESA mode 0x102 added (uses existing SVGA mode 0x6a) - all VESA
+ modes with the LFB flag set removed from the list (Linux doesn't
+ like mode numbers > 0x07ff)
+
+2003-02-09 11:02 vruppert
+
+ * vbe.c (1.25), vbe.h (1.13), vbetables.h (1.13):
+
+ - VESA mode 0x102 added (uses existing SVGA mode 0x6a) - all VESA
+ modes with the LFB flag set removed from the list (Linux doesn't
+ like mode numbers > 0x07ff)
+
+2003-02-08 13:04 vruppert
+
+ * vbe.c (1.24), vgabios.c (1.26):
+
+ - vbe_biosfn_return_current_mode() now returns the active
+ standard VGA mode TODO: return VESA mode if enabled -
+ biosfn_set_video_mode() now clears the screen in CGA mode
+ correctly - write character functions are now working in all
+ PLANAR4 graphics modes - added stubs for unimplemented features
+ in graphics modes
+
+2003-02-04 22:19 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.8), VGABIOS-lgpl-latest.debug.bin
+ (1.8):
+
+ - set video mode: clear vga memory in graphics mode - set video
+ mode: load default font in text mode - write character
+ implemented for graphics mode 0x12
+
+2003-02-04 22:06 vruppert
+
+ * vgabios.c (1.25):
+
+ - set video mode: clear vga memory in graphics mode - set video
+ mode: load default font in text mode - write character
+ implemented for graphics mode 0x12
+
+2003-01-21 19:30 vruppert
+
+ * vgabios.c (1.24):
+
+ - remap the cursor size if the char height is > 8 and the new
+ values are < 8
+
+2003-01-20 18:24 cbothamy
+
+ * Makefile (1.9):
+
+ - fix so make -j2 does not overwrite temp files
+
+2003-01-19 12:35 vruppert
+
+ * vgabios.c (1.23):
+
+ - function set_scan_lines() recalculates the number of rows and
+ the page size - new values for char height, text rows and page
+ size are stored in the BIOS data segment - asm helper function
+ idiv_u added
+
+2003-01-15 18:49 cbothamy
+
+ * VGABIOS-lgpl-latest.bin (1.7), VGABIOS-lgpl-latest.debug.bin
+ (1.7):
+
+ - compile vgabios rev 1.22
+
+2003-01-15 18:49 cbothamy
+
+ * vgabios.c (1.22):
+
+ - fix bug found by ams : a 8bits index value was compared to
+ 0x100 in some cases in biosfn_set_all_dac_reg,
+ biosfn_read_all_dac_reg, biosfn_perform_gray_scale_summing
+
+2003-01-15 17:34 cbothamy
+
+ * Makefile (1.8):
+
+ - fix symbol table file names, discovered by ams
+
+2003-01-04 21:20 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.6), VGABIOS-lgpl-latest.debug.bin
+ (1.6), vgabios.c (1.21):
+
+ - biosfn_set_video_mode(): reset attribute controller flip-flop
+ before setting up the controller's registers (bug found with
+ amidiag)
+
+2003-01-04 09:50 vruppert
+
+ * vbe.c (1.23):
+
+ - VBE function 0x00 returns VBE 1.x compatible information if no
+ VBE signature is present
+
+2003-01-01 12:44 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.5), VGABIOS-lgpl-latest.debug.bin
+ (1.5):
+
+ - SVGA mode 0x6A (800x600x4) added to the list of graphics modes
+
+2002-12-31 18:07 vruppert
+
+ * vgatables.h (1.2):
+
+ - SVGA mode 0x6A (800x600x4) added to the list of graphics modes
+
+2002-11-23 10:38 cbothamy
+
+ * ChangeLog (1.17, v0_3b):
+
+ - fix changelog for 0.3b release
+
+2002-10-20 17:12 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.4), VGABIOS-lgpl-latest.debug.bin
+ (1.4), vgabios.c (1.20) (utags: v0_3b):
+
+ - new function set_scan_lines() for the font size change (patch
+ from Hartmut Birr) - cursor shape start and end must be updated
+ in set_scan_lines() - set_scan_lines() is called by the functions
+ 0x1110, 0x1111, 0x1112 and 0x1114 after copying the font data
+
+2002-10-04 08:20 vruppert
+
+ * VGABIOS-lgpl-latest.bin (1.3), VGABIOS-lgpl-latest.debug.bin
+ (1.3), vgabios.c (1.19):
+
+ - biosfn_set_single_dac_reg(): the red value is stored in DH
+
+2002-09-19 19:05 cbothamy
+
+ * VGABIOS-lgpl-latest.bin (1.2), VGABIOS-lgpl-latest.debug.bin
+ (1.2):
+
+ - updated with latest changes
+
+2002-09-19 19:03 cbothamy
+
+ * ChangeLog (1.16), Makefile (1.7, v0_3b), vbe.c (1.22, v0_3b),
+ vgabios.c (1.18), vgabios.h (1.3, v0_4b, v0_4a, v0_3b):
+
+ - updated the Makefile - removed display of copyrights. -
+ changed the Copyright string to "LGPL VGABios developers"
+
+2002-09-08 21:14 vruppert
+
+ * vgabios.c (1.17):
+
+ - set the cursor shape depending on the current font height -
+ clear BL before calling int 0x10 function 0x1103 in
+ vgabios_init_func
+
+2002-08-23 22:58 cbothamy
+
+ * vbe.c (1.21), vbetables.h (1.12, v0_3b):
+
+ - added lfb-mode numbers (patch from mathis)
+
+2002-07-21 21:57 japj
+
+ * vbe.c (1.20), vgabios.c (1.16):
+
+ gcc2/3 preprocessing fix
+
+2002-05-18 16:55 cbothamy
+
+ * vgabios.c (1.15):
+
+ - include patch from Volker that adds some text font functions
+
+2002-05-01 23:13 japj
+
+ * VGABIOS-lgpl-latest.bin (1.1), VGABIOS-lgpl-latest.debug.bin
+ (1.1):
+
+ adding latest bin & debug bin of the vgabios
+
+2002-04-29 14:50 japj
+
+ * ChangeLog (1.15), vbe.c (1.19), vbe.h (1.12, v0_3b), vbetables.h
+ (1.11), vgabios.c (1.14):
+
+ - applying hw scrolling/multibuffering patch
+
+2002-04-25 21:59 japj
+
+ * Makefile (1.6), vbe.c (1.18), vgabios.c (1.13):
+
+ - reverting #asm/##asm & endasm patch (does not work with with
+ cygwin)
+
+2002-04-19 19:38 japj
+
+ * Makefile (1.5), vbe.c (1.17), vgabios.c (1.12):
+
+ - fixing preprocessing of vgabios with latest gcc (from Mandrake
+ 8.2)
+
+2002-04-08 23:44 japj
+
+ * ChangeLog (1.14), vbe_display_api.txt (1.5, v0_3b):
+
+ - preparing docs for new DISPI interface (for hardware scrolling)
+
+2002-04-03 19:06 japj
+
+ * ChangeLog (1.13), TODO (1.9, v0_4b, v0_4a, v0_3b), vbe.c (1.16):
+
+ - defaulting LFB on + updated changelog & todo
+
+2002-04-03 00:38 cbothamy
+
+ * vbe.c (1.15), vgabios.c (1.11):
+
+ - changed the logging ports to 0x500 -> 0x502
+
+2002-03-14 17:54 japj
+
+ * vbe.c (1.14):
+
+ - vbetables.h is dependant upon some defines (VBE_HAVE_LFB), so
+ put the include *after* the define
+
+2002-03-13 21:47 japj
+
+ * ChangeLog (1.12), TODO (1.8), vbe.c (1.13), vbetables.h (1.10),
+ vgabios.c (1.10):
+
+ - made LFB dependant upon define - not implement vbe functions
+ return failure - updated todo & docs for things after bochs 1.4
+
+2002-03-13 19:46 japj
+
+ * vbe.h (1.11), vbe_display_api.txt (1.4):
+
+ - added max video memory + documented what is in the 0xb0c0
+ interface
+
+2002-03-12 02:33 cbothamy
+
+ * ChangeLog (1.11), Makefile (1.4):
+
+ - updated for 0.3a. Merged vgabios.bin and vbebios.bin
+
+2002-03-10 21:36 japj
+
+ * ChangeLog (1.10), vbetables.h (1.9):
+
+ - added LFB modes for testing with vbe-lfb patch in Bochs
+
+2002-03-10 17:42 japj
+
+ * vbe.c (1.12, v0_3a):
+
+ - show people when they do NOT have VBE support available
+
+2002-03-10 17:36 japj
+
+ * TODO (1.7, v0_3a), vbe.c (1.11), vbe.h (1.10, v0_3a), vgabios.c
+ (1.9, v0_3a):
+
+ - cleanup of vbe internal functions (set 8bpp mode is now
+ dependant on ModeInfo content instead of hardcoded functions)
+
+2002-03-10 17:20 cbothamy
+
+ * ChangeLog (1.9, v0_3a), TODO (1.6):
+
+ - updated for 0.3a
+
+2002-03-10 17:19 cbothamy
+
+ * vbe.c (1.10), vbe.h (1.9):
+
+ - added vbe_has_vbe_display function that detects an attached vbe
+ display
+
+2002-03-10 17:12 cbothamy
+
+ * vgabios.c (1.8):
+
+ - vbe calls are done only if a vbe display is detected
+
+2002-03-10 11:25 japj
+
+ * vbe.h (1.8), vbe_display_api.txt (1.3, v0_3a):
+
+ - preparing for LFB support
+
+2002-03-09 14:25 japj
+
+ * vgabios.c (1.7):
+
+ - fixing initial cursor shape to _ instead of -
+
+2002-03-08 23:08 japj
+
+ * ChangeLog (1.8), TODO (1.5), vbe.c (1.9), vbe.h (1.7), vgabios.c
+ (1.6):
+
+ - updating vbe code to new API
+
+2002-03-08 21:48 japj
+
+ * vbe.c (1.8), vbe.h (1.6), vbetables.h (1.8, v0_3a):
+
+ - updating vbe code with #defines from API
+
+2002-03-08 21:31 japj
+
+ * vbe_display_api.txt (1.2):
+
+ - adding some text about how banks work
+
+2002-03-08 21:09 japj
+
+ * ChangeLog (1.7), vbe_display_api.txt (1.1):
+
+ - adding vbe_display_api documentation
+
+2002-03-07 21:36 japj
+
+ * ChangeLog (1.6), vbe.c (1.7), vbetables.h (1.7):
+
+ - added 1024x768xbpp support - some more cleanups/comments
+
+2002-03-06 21:55 japj
+
+ * ChangeLog (1.5), TODO (1.4), vbe.c (1.6), vbetables.h (1.6),
+ vgabios.c (1.5):
+
+ - updated changelog with new modi - added 640x480x8 (Mandrake
+ Installer can use this!) - added pre VBE2 compatible 'detection'
+ - fixed problem when normal vga set mode wouldn't disable vbe
+ mode
+
+2002-03-06 20:59 japj
+
+ * TODO (1.3), vbe.c (1.5), vbe.h (1.5), vbetables.h (1.5),
+ vgabios.c (1.4):
+
+ - adding 640x400x8 and 800x600x8 vbe support (this depends
+ HEAVILY on my bochs vga code patch - japj)
+
+2002-03-06 18:00 japj
+
+ * vbe.c (1.4), vbe.h (1.4), vbetables.h (1.4):
+
+ - implemented banked & lfb support for 320x200x8bpp (some fixes
+ for vbetest program not displaying anything)
+
+2002-03-05 20:25 japj
+
+ * Makefile (1.3, v0_3a):
+
+ for vbe debug bios: - print debugging information in assembly
+ output - print source code in assembly output
+
+2002-03-01 19:39 japj
+
+ * ChangeLog (1.4), TODO (1.2), vbe.c (1.3), vbe.h (1.3),
+ vbetables.h (1.3):
+
+ - added vbe support for 320x200x8 using the standard vgamode
+ (0x13)
+
+2002-02-19 00:29 japj
+
+ * ChangeLog (1.3):
+
+ - updating ChangeLog with lfbprof
+
+2002-02-18 23:26 japj
+
+ * tests/lfbprof/: lfbprof.c (1.2), lfbprof.h (1.2) (utags: v0_3a,
+ v0_3b, v0_4a, v0_4b):
+
+ - fixed unsigned short for mode list (-1 != 0xffff otherwise) -
+ fixed LfbMapRealPointer macro mask problem (some modes were
+ skipped) - added some extra 'debugging' printf's
+
+2002-02-18 23:07 japj
+
+ * tests/lfbprof/: Makefile (1.1, v0_4b, v0_4a, v0_3b, v0_3a),
+ lfbprof.c (1.1), lfbprof.h (1.1):
+
+ - Adding lfbprof testprogram (for vbe testing purposes) It
+ needs to be compiled with the Watcom C Compiler
+
+2002-02-18 18:48 japj
+
+ * vbe.c (1.2), vbe.h (1.2):
+
+ - cosmetic updates to vbe.c/h + added bunch of FIXMEs for work
+ that needs to be done
+
+2002-02-18 18:34 japj
+
+ * vbetables.h (1.2):
+
+ - cosmetic updates in vbetables.h
+
+2002-02-18 18:32 japj
+
+ * ChangeLog (1.2):
+
+ updated changelog with merge of vbebios 0.2
+
+2002-02-18 18:07 japj
+
+ * vgabios.c (1.3):
+
+ - small cosmetic cleanup in vgabios vbe code + added FIXMEs
+
+2002-02-18 17:55 japj
+
+ * Makefile (1.2), dataseghack (1.2, v0_4b, v0_4a, v0_3b, v0_3a),
+ vbe.c (1.1), vbe.h (1.1), vbetables.h (1.1), vgabios.c (1.2),
+ vgabios.h (1.2, v0_3a):
+
+ - merging with vbebios 0.2 release
+
+2002-02-18 11:31 cbothamy
+
+ * BUGS (1.1, v0_4b, v0_4a, v0_3b, v0_3a), COPYING (1.1, v0_4b,
+ v0_4a, v0_3b, v0_3a), ChangeLog (1.1), Makefile (1.1), Notes
+ (1.1, v0_4b, v0_4a, v0_3b, v0_3a), README (1.1, v0_3b, v0_3a),
+ TODO (1.1), dataseghack (1.1), vgabios.c (1.1), vgabios.h (1.1),
+ vgafonts.h (1.1, v0_4b, v0_4a, v0_3b, v0_3a), vgatables.h (1.1,
+ v0_3b, v0_3a), tests/testbios.c (1.1, v0_4b, v0_4a, v0_3b,
+ v0_3a):
+
+ - initial import
+
diff --git a/tools/firmware/vgabios/Makefile b/tools/firmware/vgabios/Makefile
new file mode 100644
index 0000000000..929d882e72
--- /dev/null
+++ b/tools/firmware/vgabios/Makefile
@@ -0,0 +1,77 @@
+CC = gcc
+CFLAGS = -g -O2 -Wall -Wstrict-prototypes
+LDFLAGS =
+
+GCC = gcc
+BCC = bcc
+AS86 = as86
+
+RELEASE = `pwd | sed "s-.*/--"`
+RELDATE = `date '+%d %b %Y'`
+RELVERS = `pwd | sed "s-.*/--" | sed "s/vgabios//" | sed "s/-//"`
+
+VGABIOS_DATE = "-DVGABIOS_DATE=\"$(RELDATE)\""
+
+all: bios cirrus-bios
+
+bios: biossums vgabios.bin vgabios.debug.bin
+
+cirrus-bios: vgabios-cirrus.bin vgabios-cirrus.debug.bin
+
+clean:
+ rm -f biossums *.o *.s *.ld86 \
+ temp.awk.* vgabios*.orig _vgabios_* _vgabios-debug_* core vgabios*.bin vgabios*.txt $(RELEASE).bin *.bak
+ rm -f VGABIOS-lgpl-latest*.bin
+
+release:
+ VGABIOS_VERS=\"-DVGABIOS_VERS=\\\"$(RELVERS)\\\"\" make bios cirrus-bios
+ /bin/rm -f *.o *.s *.ld86 \
+ temp.awk.* vgabios.*.orig _vgabios_.*.c core *.bak .#*
+ cp VGABIOS-lgpl-latest.bin ../$(RELEASE).bin
+ cp VGABIOS-lgpl-latest.debug.bin ../$(RELEASE).debug.bin
+ cp VGABIOS-lgpl-latest.cirrus.bin ../$(RELEASE).cirrus.bin
+ cp VGABIOS-lgpl-latest.cirrus.debug.bin ../$(RELEASE).cirrus.debug.bin
+ tar czvf ../$(RELEASE).tgz --exclude CVS -C .. $(RELEASE)/
+
+vgabios.bin: vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) $(VGABIOS_DATE) > _vgabios_.c
+ $(BCC) -o vgabios.s -C-c -D__i86__ -S -0 _vgabios_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' vgabios.s > _vgabios_.s
+ $(AS86) _vgabios_.s -b vgabios.bin -u -w- -g -0 -j -O -l vgabios.txt
+ rm -f _vgabios_.s _vgabios_.c vgabios.s
+ cp vgabios.bin VGABIOS-lgpl-latest.bin
+ ./biossums VGABIOS-lgpl-latest.bin
+ ls -l VGABIOS-lgpl-latest.bin
+
+vgabios.debug.bin: vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DDEBUG $(VGABIOS_DATE) > _vgabios-debug_.c
+ $(BCC) -o vgabios-debug.s -C-c -D__i86__ -S -0 _vgabios-debug_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' vgabios-debug.s > _vgabios-debug_.s
+ $(AS86) _vgabios-debug_.s -b vgabios.debug.bin -u -w- -g -0 -j -O -l vgabios.debug.txt
+ rm -f _vgabios-debug_.s _vgabios-debug_.c vgabios-debug.s
+ cp vgabios.debug.bin VGABIOS-lgpl-latest.debug.bin
+ ./biossums VGABIOS-lgpl-latest.debug.bin
+ ls -l VGABIOS-lgpl-latest.debug.bin
+
+vgabios-cirrus.bin: vgabios.c vgabios.h vgafonts.h vgatables.h clext.c
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DCIRRUS $(VGABIOS_DATE) > _vgabios-cirrus_.c
+ $(BCC) -o vgabios-cirrus.s -C-c -D__i86__ -S -0 _vgabios-cirrus_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' vgabios-cirrus.s > _vgabios-cirrus_.s
+ $(AS86) _vgabios-cirrus_.s -b vgabios-cirrus.bin -u -w- -g -0 -j -O -l vgabios-cirrus.txt
+ rm -f _vgabios-cirrus_.s _vgabios-cirrus_.c vgabios-cirrus.s
+ cp vgabios-cirrus.bin VGABIOS-lgpl-latest.cirrus.bin
+ ./biossums VGABIOS-lgpl-latest.cirrus.bin
+ ls -l VGABIOS-lgpl-latest.cirrus.bin
+
+vgabios-cirrus.debug.bin: vgabios.c vgabios.h vgafonts.h vgatables.h clext.c
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DCIRRUS -DCIRRUS_DEBUG $(VGABIOS_DATE) > _vgabios-cirrus-debug_.c
+ $(BCC) -o vgabios-cirrus-debug.s -C-c -D__i86__ -S -0 _vgabios-cirrus-debug_.c
+ sed -e 's/^\.text//' -e 's/^\.data//' vgabios-cirrus-debug.s > _vgabios-cirrus-debug_.s
+ $(AS86) _vgabios-cirrus-debug_.s -b vgabios-cirrus.debug.bin -u -w- -g -0 -j -O -l vgabios-cirrus.debug.txt
+ rm -f _vgabios-cirrus-debug_.s _vgabios-cirrus-debug_.c vgabios-cirrus-debug.s
+ cp vgabios-cirrus.debug.bin VGABIOS-lgpl-latest.cirrus.debug.bin
+ ./biossums VGABIOS-lgpl-latest.cirrus.debug.bin
+ ls -l VGABIOS-lgpl-latest.cirrus.debug.bin
+
+biossums: biossums.c
+ $(CC) -o biossums biossums.c
diff --git a/tools/firmware/vgabios/Notes b/tools/firmware/vgabios/Notes
new file mode 100644
index 0000000000..d5b708dc7f
--- /dev/null
+++ b/tools/firmware/vgabios/Notes
@@ -0,0 +1,11 @@
+Development notes
+-----------------
+
+- need to split video init function
+ 1. set bios variables
+ 2. do the real init with io based on bios variables
+
+- characters format switching will set the bios
+ variables and call function #2 above
+
+- need to rework the tables as explained in Interrupt list
diff --git a/tools/firmware/vgabios/README b/tools/firmware/vgabios/README
new file mode 100644
index 0000000000..69462d93b7
--- /dev/null
+++ b/tools/firmware/vgabios/README
@@ -0,0 +1,191 @@
+Plex86/Bochs VGABios
+--------------------
+
+The goal of this project is to have a LGPL'd Video Bios in plex86,
+Bochs and qemu.
+This VGA Bios is very specific to the emulated VGA card.
+It is NOT meant to drive a physical vga card.
+
+
+Cirrus SVGA extension
+---------------------
+
+The Cirrus SVGA extension is designed for the Cirrus emulation in Bochs and
+qemu. The initial patch for the Cirrus extension has been written by Makoto
+Suzuki (suzu).
+
+
+Install
+-------
+To compile the VGA Bios you will need :
+- gcc
+- bcc
+- as86
+- ld86
+
+Untar the archive, and type make. You should get a "VGABIOS-lgpl-latest.bin"
+file. Alternatively, you can use the binary file "VGABIOS-lgpl-latest.bin",
+i have compiled for you.
+
+Edit your plex86/bochs conf file, and modify the load-rom command in the
+VGA BIOS section, to point to the new vgabios image file.
+
+
+Debugging
+---------
+You can get a very basic debugging system: messages printed by the vgabios.
+You have to register the "unmapped" device driver in plex86 or bochs, and make
+sure it grabs port 0xfff0.
+
+Comment the #undef DEBUG at the beginning of vgabios.c.
+You can then use the "printf" function in the bios.
+
+
+Testing
+-------
+Look at the "testvga.c" file in the archive. This is a minimal Turbo C 2.0
+source file that calls a few int10 functions. Feel free to modify it to suit
+your needs.
+
+
+Copyright and License
+---------------------
+This program has been written by Christophe Bothamy
+It is protected by the GNU Lesser Public License, which you should
+have received a copy of along with this package.
+
+
+Reverse Engineering
+-------------------
+The VGA Bios has been written without reverse-engineering any existing Bios.
+
+
+Acknowledgment
+--------------
+The source code contains code ripped from rombios.c of plex86, written
+by Kevin Lawton <kevin2001@yahoo.com>
+
+The source code contains fonts from fntcol16.zip (c) by Joseph Gil avalable at :
+ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
+These fonts are public domain
+
+The source code is based on information taken from :
+- Kevin Lawton's vga card emulation for bochs/plex86
+- Ralf Brown's interrupts list avalaible at
+ http://www.cs.cmu.edu/afs/cs/user/ralf/pub/WWW/files.html
+- Finn Thogersons' VGADOC4b available at http://home.worldonline.dk/~finth/
+- Michael Abrash's Graphics Programming Black Book
+- Francois Gervais' book "programmation des cartes graphiques cga-ega-vga"
+ edited by sybex
+- DOSEMU 1.0.1 source code for several tables values and formulas
+
+
+Feedback
+--------
+Please report any bugs, comments, patches for this VGA Bios to info@vruppert.de
+You can find the latest release at : http://www.nongnu.org/vgabios/
+For any information on bochs, visit the website http://bochs.sourceforge.net/
+For any information on qemu, visit the website http://fabrice.bellard.free.fr/qemu/
+
+
+History
+-------
+vgabios-0.5b : May 24 2005
+ - Volker
+ . fixed return value for the default case in the VBE section (non-debug mode)
+ . removed unused stuff
+
+vgabios-0.5a : Mar 07 2005
+ - Volker
+ . Cirrus SVGA extension (initial patches from Makoto Suzuki, improvements
+ from Fabrice Bellard)
+ . vgabios image size is now exactly 32k with a checksum
+ . a lot of vgabios and vbe functions rewritten in assembler
+ . dynamicly generated VBE mode info list
+ . write character function for CGA and LINEAR8 modes
+ . read/write graphics pixel for some graphics modes
+ . text scroll feature for some graphics modes
+ . VBE 8-bit DAC support
+
+vgabios-0.4c : Nov 06 2003
+ - Christophe
+ . fix font problem on initial screen of NT4 Loader
+
+vgabios-0.4b : Nov 04 2003
+ - Volker
+ . fix offset of character tables
+ . optimizations of CRT controller accesses
+ . VBE i/o registers changed to 0x01CE/CF
+ (suggestion from Daniel Gimpelevich)
+ . "noclear" flag stored in BIOS area
+ . fix character height returned by get_font_info function
+
+vgabios-0.4a : Aug 17 2003
+ - Volker
+ . VBE mode search rewritten (VBE modes with LFB bit removed)
+ . many bugfixes and optimizations
+ . write character function implemented for graphics modes
+ . support for 15bpp, 16bpp, 24bpp and 32bpp VBE modes added
+ . SVGA mode 0x6A added
+ . VBE modes 0x102, 0x117, 0x118 and 0x142 (Bochs specific)
+
+vgabios-0.3b : Nov 23 2002
+ - Christophe
+ . added lfb-mode numbers (patch from mathis)
+ . updated the Makefile
+ . removed display of copyrights.
+ . changed the Copyright string to "LGPL VGABios developers"
+ - Volker
+ . set the cursor shape depending on the current font height
+ . clear BL before calling int 0x10 function 0x1103 in vgabios_init_func
+ . added some text font functions
+ - Jeroen
+ . Forced to new DISPI (0xb0c1) interface (requires latest bochs vbe code)
+ . Added multibuffering support
+ . Added new DISPI interface for: virt width, height, x offset, y offset
+ . Added LFB modes (to be used with the vbe-lfb patch in bochs)
+ see VBE_HAVE_LFB in vbe.c (currently default enabled)
+ . updated TODO & docs for changes after bochs 1.4
+
+vgabios-0.3a : Mar 10 2002
+ - Christophe
+ . Fixed bug in function ah=13
+ - Jeroen
+ . updated vbebios implementation to new api
+ . added vbe_display_api documentation
+ . added 640x400x8, 640x480x8, 800x600x8, 1024x768
+ (>640x480 needs a special bochs patch atm)
+ . added 320x200x8 vbe support (uses the standard 320x200x8 vga mode to
+ display, this allows for testing & having something on screen as well,
+ at least until bochs host side display is up & running)
+ . adding lfbprof (vbe) testprogram (+some small fixes to it)
+ . merging with vbebios 0.2
+
+vgabios-0.2b : Nov 19 2001
+ - Christophe
+ . Fixed bug in function ah=13
+
+vgabios-0.2a : Nov 09 2001
+ - Christophe
+ . Included bugfix from techt@pikeonline.net about grayscale summing
+ . Added the "IBM" string at org 0x1e as Bart Oldeman suggested
+ . Fixed DS and ES that where inverted in the int10 parameters list!
+ . The following have been implemented :
+ - function ax=1a00, ax=1a01, ah=1b
+ - function ax=1130
+ . Added debug messages for unimplemented/unknown functions
+ Must be compiled with DEBUG defined. The output is trapped
+ by the unknown-ioport driver of plex/bochs (port 0xfff0 is used)
+
+vgabios-0.1a : May 8 2001
+ - Christophe
+ . First release. The work has been focused only on text mode.
+ . The following have been implemented :
+ - inits
+ - int 10 handler
+ - functions ah=00, ah=01, ah=02, ah=03, ah=05, ah=06, ah=07, ah=08
+ ah=09, ah=0a, ah=0e, ah=0f, ax=1000, ax=1001, ax=1002, ax=1003
+ ax=1007, ax=1008, ax=1009, ax=1010, ax=1012, ax=1013, ax=1015
+ ax=1017, ax=1018, ax=1019, ax=101a, ax=101b, ah=12 bl=10,
+ ah=12 bl=30, ah=12 bl=31, ah=12 bl=32, ah=12 bl=33, ah=12 bl=34
+ ah=13
diff --git a/tools/firmware/vgabios/TODO b/tools/firmware/vgabios/TODO
new file mode 100644
index 0000000000..0b83ed0992
--- /dev/null
+++ b/tools/firmware/vgabios/TODO
@@ -0,0 +1,28 @@
+Short term :
+------------
+
+General
+ - Fix init mode (ah=00). Should use more BIOS variables
+ - Add new functionalities and modify static functionality table
+ - Performance : 16 bits IO
+
+v0.6
+ - Reimplement the tables so it is compatible with the video save pointer table
+ - Implement the remaining functions (don't know if all are needed):
+ - chargen ax=1120, ax=1121, ax=1122, ax=1123, ax=1124
+ - display switch interface ah=12 bl=35
+ - video refresh control ah=12 bl=36
+ - save/restore state ah=1c
+ - Graphic modes
+
+v1.0
+ - Bugfixes
+
+
+=================================================================================================
+VBE:
+----
+Long term:
+- have plex86 host side display interface
+- have text io functions in vbe mode
+
diff --git a/tools/firmware/vgabios/biossums.c b/tools/firmware/vgabios/biossums.c
new file mode 100644
index 0000000000..bb1d0ad7f7
--- /dev/null
+++ b/tools/firmware/vgabios/biossums.c
@@ -0,0 +1,200 @@
+/* biossums.c --- written by Eike W. for the Bochs BIOS */
+/* adapted for the LGPL'd VGABIOS by vruppert */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef unsigned char byte;
+
+void check( int value, char* message );
+
+#define LEN_BIOS_DATA 0x8000
+#define MAX_OFFSET (LEN_BIOS_DATA - 1)
+
+
+#define BIOS_OFFSET 0x7FFF
+
+long chksum_bios_get_offset( byte* data, long offset );
+byte chksum_bios_calc_value( byte* data, long offset );
+byte chksum_bios_get_value( byte* data, long offset );
+void chksum_bios_set_value( byte* data, long offset, byte value );
+
+
+#define PMID_LEN 20
+#define PMID_CHKSUM 19
+
+long chksum_pmid_get_offset( byte* data, long offset );
+byte chksum_pmid_calc_value( byte* data, long offset );
+byte chksum_pmid_get_value( byte* data, long offset );
+void chksum_pmid_set_value( byte* data, long offset, byte value );
+
+
+byte bios_data[LEN_BIOS_DATA];
+
+
+int main( int argc, char* argv[] ) {
+
+ FILE* stream;
+ long offset, tmp_offset;
+ byte cur_val = 0, new_val = 0;
+ int hits;
+
+
+ if( argc != 2 ) {
+ printf( "Error. Need a file-name as an argument.\n" );
+ exit( EXIT_FAILURE );
+ }
+
+ if(( stream = fopen( argv[1], "rb" )) == NULL ) {
+ printf( "Error opening %s for reading.\n", argv[1] );
+ exit( EXIT_FAILURE );
+ }
+ if( fread( bios_data, 1, LEN_BIOS_DATA, stream ) >= LEN_BIOS_DATA ) {
+ printf( "Error reading max. 32767 Bytes from %s.\n", argv[1] );
+ fclose( stream );
+ exit( EXIT_FAILURE );
+ }
+ fclose( stream );
+
+ hits = 0;
+ offset = 0L;
+ while( (tmp_offset = chksum_pmid_get_offset( bios_data, offset )) != -1L ) {
+ offset = tmp_offset;
+ cur_val = chksum_pmid_get_value( bios_data, offset );
+ new_val = chksum_pmid_calc_value( bios_data, offset );
+ printf( "\nPMID entry at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ hits++;
+ }
+ if( hits == 1 && cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum_pmid_set_value( bios_data, offset, new_val );
+ }
+ if( hits >= 2 ) {
+ printf( "Multiple PMID entries! No checksum set." );
+ }
+ if( hits ) {
+ printf( "\n" );
+ }
+
+
+ offset = 0L;
+ offset = chksum_bios_get_offset( bios_data, offset );
+ cur_val = chksum_bios_get_value( bios_data, offset );
+ new_val = chksum_bios_calc_value( bios_data, offset );
+ printf( "\nBios checksum at: 0x%4lX\n", offset );
+ printf( "Current checksum: 0x%02X\n", cur_val );
+ printf( "Calculated checksum: 0x%02X ", new_val );
+ if( cur_val != new_val ) {
+ printf( "Setting checksum." );
+ chksum_bios_set_value( bios_data, offset, new_val );
+ }
+ printf( "\n" );
+
+
+ if(( stream = fopen( argv[1], "wb" )) == NULL ) {
+ printf( "Error opening %s for writing.\n", argv[1] );
+ exit( EXIT_FAILURE );
+ }
+ if( fwrite( bios_data, 1, LEN_BIOS_DATA, stream ) < LEN_BIOS_DATA ) {
+ printf( "Error writing 32KBytes to %s.\n", argv[1] );
+ fclose( stream );
+ exit( EXIT_FAILURE );
+ }
+ fclose( stream );
+
+ return( EXIT_SUCCESS );
+}
+
+
+void check( int okay, char* message ) {
+
+ if( !okay ) {
+ printf( "\n\nError. %s.\n", message );
+ exit( EXIT_FAILURE );
+ }
+}
+
+
+long chksum_bios_get_offset( byte* data, long offset ) {
+
+ return( BIOS_OFFSET );
+}
+
+
+byte chksum_bios_calc_value( byte* data, long offset ) {
+
+ int i;
+ byte sum;
+
+ sum = 0;
+ for( i = 0; i < MAX_OFFSET; i++ ) {
+ sum = sum + *( data + i );
+ }
+ sum = -sum; /* iso ensures -s + s == 0 on unsigned types */
+ return( sum );
+}
+
+
+byte chksum_bios_get_value( byte* data, long offset ) {
+
+ return( *( data + BIOS_OFFSET ) );
+}
+
+
+void chksum_bios_set_value( byte* data, long offset, byte value ) {
+
+ *( data + BIOS_OFFSET ) = value;
+}
+
+
+byte chksum_pmid_calc_value( byte* data, long offset ) {
+
+ int i;
+ int len;
+ byte sum;
+
+ len = PMID_LEN;
+ check( offset + len <= MAX_OFFSET, "PMID entry length out of bounds" );
+ sum = 0;
+ for( i = 0; i < len; i++ ) {
+ if( i != PMID_CHKSUM ) {
+ sum = sum + *( data + offset + i );
+ }
+ }
+ sum = -sum;
+ return( sum );
+}
+
+
+long chksum_pmid_get_offset( byte* data, long offset ) {
+
+ long result = -1L;
+
+ while( offset + PMID_LEN < MAX_OFFSET ) {
+ offset = offset + 1;
+ if( *( data + offset + 0 ) == 'P' && \
+ *( data + offset + 1 ) == 'M' && \
+ *( data + offset + 2 ) == 'I' && \
+ *( data + offset + 3 ) == 'D' ) {
+ result = offset;
+ break;
+ }
+ }
+ return( result );
+}
+
+
+byte chksum_pmid_get_value( byte* data, long offset ) {
+
+ check( offset + PMID_CHKSUM <= MAX_OFFSET, "PMID checksum out of bounds" );
+ return( *( data + offset + PMID_CHKSUM ) );
+}
+
+
+void chksum_pmid_set_value( byte* data, long offset, byte value ) {
+
+ check( offset + PMID_CHKSUM <= MAX_OFFSET, "PMID checksum out of bounds" );
+ *( data + offset + PMID_CHKSUM ) = value;
+}
diff --git a/tools/firmware/vgabios/clext.c b/tools/firmware/vgabios/clext.c
new file mode 100644
index 0000000000..31a50a2326
--- /dev/null
+++ b/tools/firmware/vgabios/clext.c
@@ -0,0 +1,1587 @@
+//
+// QEMU Cirrus CLGD 54xx VGABIOS Extension.
+//
+// Copyright (c) 2004 Makoto Suzuki (suzu)
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+//#define CIRRUS_VESA3_PMINFO
+#ifdef VBE
+#undef CIRRUS_VESA3_PMINFO
+#endif
+
+#define PM_BIOSMEM_CURRENT_MODE 0x449
+#define PM_BIOSMEM_CRTC_ADDRESS 0x463
+#define PM_BIOSMEM_VBE_MODE 0x4BA
+
+typedef struct
+{
+ /* + 0 */
+ unsigned short mode;
+ unsigned short width;
+ unsigned short height;
+ unsigned short depth;
+ /* + 8 */
+ unsigned short hidden_dac; /* 0x3c6 */
+ unsigned short *seq; /* 0x3c4 */
+ unsigned short *graph; /* 0x3ce */
+ unsigned short *crtc; /* 0x3d4 */
+ /* +16 */
+ unsigned char bitsperpixel;
+ unsigned char vesacolortype;
+ unsigned char vesaredmask;
+ unsigned char vesaredpos;
+ unsigned char vesagreenmask;
+ unsigned char vesagreenpos;
+ unsigned char vesabluemask;
+ unsigned char vesabluepos;
+ /* +24 */
+ unsigned char vesareservedmask;
+ unsigned char vesareservedpos;
+} cirrus_mode_t;
+#define CIRRUS_MODE_SIZE 26
+
+
+/* For VESA BIOS 3.0 */
+#define CIRRUS_PM16INFO_SIZE 20
+
+/* VGA */
+unsigned short cseq_vga[] = {0x0007,0xffff};
+unsigned short cgraph_vga[] = {0x0009,0x000a,0x000b,0xffff};
+unsigned short ccrtc_vga[] = {0x001a,0x001b,0x001d,0xffff};
+
+/* extensions */
+unsigned short cgraph_svgacolor[] = {
+0x0000,0x0001,0x0002,0x0003,0x0004,0x4005,0x0506,0x0f07,0xff08,
+0x0009,0x000a,0x000b,
+0xffff
+};
+/* 640x480x8 */
+unsigned short cseq_640x480x8[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1107,
+0x580b,0x580c,0x580d,0x580e,
+0x0412,0x0013,0x2017,
+0x331b,0x331c,0x331d,0x331e,
+0xffff
+};
+unsigned short ccrtc_640x480x8[] = {
+0x2c11,
+0x5f00,0x4f01,0x4f02,0x8003,0x5204,0x1e05,0x0b06,0x3e07,
+0x4009,0x000c,0x000d,
+0xea10,0xdf12,0x5013,0x4014,0xdf15,0x0b16,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 640x480x16 */
+unsigned short cseq_640x480x16[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1707,
+0x580b,0x580c,0x580d,0x580e,
+0x0412,0x0013,0x2017,
+0x331b,0x331c,0x331d,0x331e,
+0xffff
+};
+unsigned short ccrtc_640x480x16[] = {
+0x2c11,
+0x5f00,0x4f01,0x4f02,0x8003,0x5204,0x1e05,0x0b06,0x3e07,
+0x4009,0x000c,0x000d,
+0xea10,0xdf12,0xa013,0x4014,0xdf15,0x0b16,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 640x480x24 */
+unsigned short cseq_640x480x24[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1507,
+0x580b,0x580c,0x580d,0x580e,
+0x0412,0x0013,0x2017,
+0x331b,0x331c,0x331d,0x331e,
+0xffff
+};
+unsigned short ccrtc_640x480x24[] = {
+0x2c11,
+0x5f00,0x4f01,0x4f02,0x8003,0x5204,0x1e05,0x0b06,0x3e07,
+0x4009,0x000c,0x000d,
+0xea10,0xdf12,0x0013,0x4014,0xdf15,0x0b16,0xc317,0xff18,
+0x001a,0x321b,0x001d,
+0xffff
+};
+/* 800x600x8 */
+unsigned short cseq_800x600x8[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1107,
+0x230b,0x230c,0x230d,0x230e,
+0x0412,0x0013,0x2017,
+0x141b,0x141c,0x141d,0x141e,
+0xffff
+};
+unsigned short ccrtc_800x600x8[] = {
+0x2311,0x7d00,0x6301,0x6302,0x8003,0x6b04,0x1a05,0x9806,0xf007,
+0x6009,0x000c,0x000d,
+0x7d10,0x5712,0x6413,0x4014,0x5715,0x9816,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 800x600x16 */
+unsigned short cseq_800x600x16[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1707,
+0x230b,0x230c,0x230d,0x230e,
+0x0412,0x0013,0x2017,
+0x141b,0x141c,0x141d,0x141e,
+0xffff
+};
+unsigned short ccrtc_800x600x16[] = {
+0x2311,0x7d00,0x6301,0x6302,0x8003,0x6b04,0x1a05,0x9806,0xf007,
+0x6009,0x000c,0x000d,
+0x7d10,0x5712,0xc813,0x4014,0x5715,0x9816,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 800x600x24 */
+unsigned short cseq_800x600x24[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1507,
+0x230b,0x230c,0x230d,0x230e,
+0x0412,0x0013,0x2017,
+0x141b,0x141c,0x141d,0x141e,
+0xffff
+};
+unsigned short ccrtc_800x600x24[] = {
+0x2311,0x7d00,0x6301,0x6302,0x8003,0x6b04,0x1a05,0x9806,0xf007,
+0x6009,0x000c,0x000d,
+0x7d10,0x5712,0x2c13,0x4014,0x5715,0x9816,0xc317,0xff18,
+0x001a,0x321b,0x001d,
+0xffff
+};
+/* 1024x768x8 */
+unsigned short cseq_1024x768x8[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1107,
+0x760b,0x760c,0x760d,0x760e,
+0x0412,0x0013,0x2017,
+0x341b,0x341c,0x341d,0x341e,
+0xffff
+};
+unsigned short ccrtc_1024x768x8[] = {
+0x2911,0xa300,0x7f01,0x7f02,0x8603,0x8304,0x9405,0x2406,0xf507,
+0x6009,0x000c,0x000d,
+0x0310,0xff12,0x8013,0x4014,0xff15,0x2416,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 1024x768x16 */
+unsigned short cseq_1024x768x16[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1707,
+0x760b,0x760c,0x760d,0x760e,
+0x0412,0x0013,0x2017,
+0x341b,0x341c,0x341d,0x341e,
+0xffff
+};
+unsigned short ccrtc_1024x768x16[] = {
+0x2911,0xa300,0x7f01,0x7f02,0x8603,0x8304,0x9405,0x2406,0xf507,
+0x6009,0x000c,0x000d,
+0x0310,0xff12,0x0013,0x4014,0xff15,0x2416,0xc317,0xff18,
+0x001a,0x321b,0x001d,
+0xffff
+};
+/* 1024x768x24 */
+unsigned short cseq_1024x768x24[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1507,
+0x760b,0x760c,0x760d,0x760e,
+0x0412,0x0013,0x2017,
+0x341b,0x341c,0x341d,0x341e,
+0xffff
+};
+unsigned short ccrtc_1024x768x24[] = {
+0x2911,0xa300,0x7f01,0x7f02,0x8603,0x8304,0x9405,0x2406,0xf507,
+0x6009,0x000c,0x000d,
+0x0310,0xff12,0x8013,0x4014,0xff15,0x2416,0xc317,0xff18,
+0x001a,0x321b,0x001d,
+0xffff
+};
+/* 1280x1024x8 */
+unsigned short cseq_1280x1024x8[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1107,
+0x760b,0x760c,0x760d,0x760e,
+0x0412,0x0013,0x2017,
+0x341b,0x341c,0x341d,0x341e,
+0xffff
+};
+unsigned short ccrtc_1280x1024x8[] = {
+0x2911,0xc300,0x9f01,0x9f02,0x8603,0x8304,0x9405,0x2406,0xf707,
+0x6009,0x000c,0x000d,
+0x0310,0xff12,0xa013,0x4014,0xff15,0x2416,0xc317,0xff18,
+0x001a,0x221b,0x001d,
+0xffff
+};
+/* 1280x1024x16 */
+unsigned short cseq_1280x1024x16[] = {
+0x0300,0x2101,0x0f02,0x0003,0x0e04,0x1707,
+0x760b,0x760c,0x760d,0x760e,
+0x0412,0x0013,0x2017,
+0x341b,0x341c,0x341d,0x341e,
+0xffff
+};
+unsigned short ccrtc_1280x1024x16[] = {
+0x2911,0xc300,0x9f01,0x9f02,0x8603,0x8304,0x9405,0x2406,0xf707,
+0x6009,0x000c,0x000d,
+0x0310,0xff12,0x4013,0x4014,0xff15,0x2416,0xc317,0xff18,
+0x001a,0x321b,0x001d,
+0xffff
+};
+
+
+cirrus_mode_t cirrus_modes[] =
+{
+ {0x5f,640,480,8,0x00,
+ cseq_640x480x8,cgraph_svgacolor,ccrtc_640x480x8,8,
+ 4,0,0,0,0,0,0,0,0},
+ {0x64,640,480,16,0xe1,
+ cseq_640x480x16,cgraph_svgacolor,ccrtc_640x480x16,16,
+ 6,5,11,6,5,5,0,0,0},
+ {0x66,640,480,15,0xf0,
+ cseq_640x480x16,cgraph_svgacolor,ccrtc_640x480x16,16,
+ 6,5,10,5,5,5,0,1,15},
+ {0x71,640,480,24,0xe5,
+ cseq_640x480x24,cgraph_svgacolor,ccrtc_640x480x24,24,
+ 6,8,16,8,8,8,0,0,0},
+
+ {0x5c,800,600,8,0x00,
+ cseq_800x600x8,cgraph_svgacolor,ccrtc_800x600x8,8,
+ 4,0,0,0,0,0,0,0,0},
+ {0x65,800,600,16,0xe1,
+ cseq_800x600x16,cgraph_svgacolor,ccrtc_800x600x16,16,
+ 6,5,11,6,5,5,0,0,0},
+ {0x67,800,600,15,0xf0,
+ cseq_800x600x16,cgraph_svgacolor,ccrtc_800x600x16,16,
+ 6,5,10,5,5,5,0,1,15},
+
+ {0x60,1024,768,8,0x00,
+ cseq_1024x768x8,cgraph_svgacolor,ccrtc_1024x768x8,8,
+ 4,0,0,0,0,0,0,0,0},
+ {0x74,1024,768,16,0xe1,
+ cseq_1024x768x16,cgraph_svgacolor,ccrtc_1024x768x16,16,
+ 6,5,11,6,5,5,0,0,0},
+ {0x68,1024,768,15,0xf0,
+ cseq_1024x768x16,cgraph_svgacolor,ccrtc_1024x768x16,16,
+ 6,5,10,5,5,5,0,1,15},
+
+ {0x78,800,600,24,0xe5,
+ cseq_800x600x24,cgraph_svgacolor,ccrtc_800x600x24,24,
+ 6,8,16,8,8,8,0,0,0},
+ {0x79,1024,768,24,0xe5,
+ cseq_1024x768x24,cgraph_svgacolor,ccrtc_1024x768x24,24,
+ 6,8,16,8,8,8,0,0,0},
+
+ {0x6d,1280,1024,8,0x00,
+ cseq_1280x1024x8,cgraph_svgacolor,ccrtc_1280x1024x8,8,
+ 4,0,0,0,0,0,0,0,0},
+ {0x69,1280,1024,15,0xf0,
+ cseq_1280x1024x16,cgraph_svgacolor,ccrtc_1280x1024x16,16,
+ 6,5,10,5,5,5,0,1,15},
+ {0x75,1280,1024,16,0xe1,
+ cseq_1280x1024x16,cgraph_svgacolor,ccrtc_1280x1024x16,16,
+ 6,5,11,6,5,5,0,0,0},
+
+ {0xfe,0,0,0,0,cseq_vga,cgraph_vga,ccrtc_vga,0,
+ 0xff,0,0,0,0,0,0,0,0},
+ {0xff,0,0,0,0,0,0,0,0,
+ 0xff,0,0,0,0,0,0,0,0},
+};
+
+unsigned char cirrus_id_table[] = {
+ // 5430
+ 0xA0, 0x32,
+ // 5446
+ 0xB8, 0x39,
+
+ 0xff, 0xff
+};
+
+
+unsigned short cirrus_vesa_modelist[] = {
+// 640x480x8
+ 0x101, 0x5f,
+// 640x480x15
+ 0x110, 0x66,
+// 640x480x16
+ 0x111, 0x64,
+// 640x480x24
+ 0x112, 0x71,
+// 800x600x8
+ 0x103, 0x5c,
+// 800x600x15
+ 0x113, 0x67,
+// 800x600x16
+ 0x114, 0x65,
+// 800x600x24
+ 0x115, 0x78,
+// 1024x768x8
+ 0x105, 0x60,
+// 1024x768x15
+ 0x116, 0x68,
+// 1024x768x16
+ 0x117, 0x74,
+// 1024x768x24
+ 0x118, 0x79,
+// 1280x1024x8
+ 0x107, 0x6d,
+// 1280x1024x15
+ 0x119, 0x69,
+// 1280x1024x16
+ 0x11a, 0x75,
+// invalid
+ 0xffff,0xffff
+};
+
+
+ASM_START
+
+cirrus_installed:
+.ascii "cirrus-compatible VGA is detected"
+.byte 0x0d,0x0a
+.byte 0x0d,0x0a,0x00
+
+cirrus_not_installed:
+.ascii "cirrus-compatible VGA is not detected"
+.byte 0x0d,0x0a
+.byte 0x0d,0x0a,0x00
+
+cirrus_vesa_vendorname:
+cirrus_vesa_productname:
+cirrus_vesa_oemname:
+.ascii "VGABIOS Cirrus extension"
+.byte 0
+cirrus_vesa_productrevision:
+.ascii "1.0"
+.byte 0
+
+cirrus_init:
+ call cirrus_check
+ jnz no_cirrus
+ SET_INT_VECTOR(0x10, #0xC000, #cirrus_int10_handler)
+ mov al, #0x0f ; memory setup
+ mov dx, #0x3C4
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0x18
+ mov ah, al
+ mov al, #0x0a
+ dec dx
+ out dx, ax
+ mov ax, #0x0007 ; set vga mode
+ out dx, ax
+ mov ax, #0x0431 ; reset bitblt
+ mov dx, #0x3CE
+ out dx, ax
+ mov ax, #0x0031
+ out dx, ax
+no_cirrus:
+ ret
+
+cirrus_display_info:
+ push ds
+ push si
+ push cs
+ pop ds
+ call cirrus_check
+ mov si, #cirrus_not_installed
+ jnz cirrus_msgnotinstalled
+ mov si, #cirrus_installed
+
+cirrus_msgnotinstalled:
+ call _display_string
+ pop si
+ pop ds
+ ret
+
+cirrus_check:
+ push ax
+ push dx
+ mov ax, #0x9206
+ mov dx, #0x3C4
+ out dx, ax
+ inc dx
+ in al, dx
+ cmp al, #0x12
+ pop dx
+ pop ax
+ ret
+
+
+cirrus_int10_handler:
+ pushf
+ push bp
+ cmp ah, #0x00 ;; set video mode
+ jz cirrus_set_video_mode
+ cmp ah, #0x12 ;; cirrus extension
+ jz cirrus_extbios
+ cmp ah, #0x4F ;; VESA extension
+ jz cirrus_vesa
+
+cirrus_unhandled:
+ pop bp
+ popf
+ jmp vgabios_int10_handler
+
+cirrus_return:
+#ifdef CIRRUS_DEBUG
+ call cirrus_debug_dump
+#endif
+ pop bp
+ popf
+ iret
+
+cirrus_set_video_mode:
+#ifdef CIRRUS_DEBUG
+ call cirrus_debug_dump
+#endif
+ push si
+ push ax
+ push bx
+ push ds
+#ifdef CIRRUS_VESA3_PMINFO
+ db 0x2e ;; cs:
+ mov si, [cirrus_vesa_sel0000_data]
+#else
+ xor si, si
+#endif
+ mov ds, si
+ xor bx, bx
+ mov [PM_BIOSMEM_VBE_MODE], bx
+ pop ds
+ pop bx
+ call cirrus_get_modeentry
+ jnc cirrus_set_video_mode_extended
+ mov al, #0xfe
+ call cirrus_get_modeentry_nomask
+ call cirrus_switch_mode
+ pop ax
+ pop si
+ jmp cirrus_unhandled
+
+cirrus_extbios:
+#ifdef CIRRUS_DEBUG
+ call cirrus_debug_dump
+#endif
+ cmp bl, #0x80
+ jb cirrus_unhandled
+ cmp bl, #0xAF
+ ja cirrus_unhandled
+ push bx
+ and bx, #0x7F
+ shl bx, 1
+ db 0x2e ;; cs:
+ mov bp, cirrus_extbios_handlers[bx]
+ pop bx
+ push #cirrus_return
+ push bp
+ ret
+
+cirrus_vesa:
+#ifdef CIRRUS_DEBUG
+ call cirrus_debug_dump
+#endif
+ cmp al, #0x0F
+ ja cirrus_vesa_not_handled
+ push bx
+ xor bx, bx
+ mov bl, al
+ shl bx, 1
+ db 0x2e ;; cs:
+ mov bp, cirrus_vesa_handlers[bx]
+ pop bx
+ push #cirrus_return
+ push bp
+ ret
+
+cirrus_vesa_not_handled:
+ mov ax, #0x014F ;; not implemented
+ jmp cirrus_return
+
+#ifdef CIRRUS_DEBUG
+cirrus_debug_dump:
+ push es
+ push ds
+ pusha
+ push cs
+ pop ds
+ call _cirrus_debugmsg
+ popa
+ pop ds
+ pop es
+ ret
+#endif
+
+cirrus_set_video_mode_extended:
+ call cirrus_switch_mode
+ pop ax ;; mode
+ and al, #0x7f
+
+ push ds
+#ifdef CIRRUS_VESA3_PMINFO
+ db 0x2e ;; cs:
+ mov si, [cirrus_vesa_sel0000_data]
+#else
+ xor si, si
+#endif
+ mov ds, si
+ mov [PM_BIOSMEM_CURRENT_MODE], al
+ pop ds
+
+ mov al, #0x20
+
+ pop si
+ jmp cirrus_return
+
+cirrus_vesa_pmbios_init:
+ retf
+cirrus_vesa_pmbios_entry:
+ pushf
+ push bp
+ cmp ah, #0x4F
+ jnz cirrus_vesa_pmbios_unimplemented
+ cmp al, #0x0F
+ ja cirrus_vesa_pmbios_unimplemented
+ push bx
+ xor bx, bx
+ mov bl, al
+ shl bx, 1
+ db 0x2e ;; cs:
+ mov bp, cirrus_vesa_handlers[bx]
+ pop bx
+ push #cirrus_vesa_pmbios_return
+ push bp
+ ret
+cirrus_vesa_pmbios_unimplemented:
+ mov ax, #0x014F
+cirrus_vesa_pmbios_return:
+ pop bp
+ popf
+ retf
+
+; in si:mode table
+cirrus_switch_mode:
+ push ds
+ push bx
+ push dx
+ push cs
+ pop ds
+
+ mov bx, [si+10] ;; seq
+ mov dx, #0x3c4
+ mov ax, #0x1206
+ out dx, ax ;; Unlock cirrus special
+ call cirrus_switch_mode_setregs
+
+ mov bx, [si+12] ;; graph
+ mov dx, #0x3ce
+ call cirrus_switch_mode_setregs
+
+ mov bx, [si+14] ;; crtc
+ call cirrus_get_crtc
+ call cirrus_switch_mode_setregs
+
+ mov dx, #0x3c6
+ mov al, #0x00
+ out dx, al
+ in al, dx
+ in al, dx
+ in al, dx
+ in al, dx
+ mov al, [si+8] ;; hidden dac
+ out dx, al
+ mov al, #0xff
+ out dx, al
+
+ mov al, #0x00
+ mov bl, [si+17] ;; memory model
+ or bl, bl
+ jz is_text_mode
+ mov al, #0x01
+ cmp bl, #0x03
+ jnz is_text_mode
+ or al, #0x40
+is_text_mode:
+ mov bl, #0x10
+ call biosfn_get_single_palette_reg
+ and bh, #0xfe
+ or bh, al
+ call biosfn_set_single_palette_reg
+
+ pop dx
+ pop bx
+ pop ds
+ ret
+
+cirrus_enable_16k_granularity:
+ push ax
+ push dx
+ mov dx, #0x3ce
+ mov al, #0x0b
+ out dx, al
+ inc dx
+ in al, dx
+ or al, #0x20 ;; enable 16k
+ out dx, al
+ pop dx
+ pop ax
+ ret
+
+cirrus_switch_mode_setregs:
+csms_1:
+ mov ax, [bx]
+ cmp ax, #0xffff
+ jz csms_2
+ out dx, ax
+ add bx, #0x2
+ jmp csms_1
+csms_2:
+ ret
+
+cirrus_extbios_80h:
+ push dx
+ call cirrus_get_crtc
+ mov al, #0x27
+ out dx, al
+ inc dx
+ in al, dx
+ mov bx, #_cirrus_id_table
+c80h_1:
+ db 0x2e ;; cs:
+ mov ah, [bx]
+ cmp ah, al
+ jz c80h_2
+ cmp ah, #0xff
+ jz c80h_2
+ inc bx
+ inc bx
+ jmp c80h_1
+c80h_2:
+ db 0x2e ;; cs:
+ mov al, 0x1[bx]
+ pop dx
+ mov ah, #0x00
+ xor bx, bx
+ ret
+
+cirrus_extbios_81h:
+ mov ax, #0x100 ;; XXX
+ ret
+cirrus_extbios_82h:
+ push dx
+ call cirrus_get_crtc
+ xor ax, ax
+ mov al, #0x27
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0x03
+ mov ah, #0xAF
+ pop dx
+ ret
+
+cirrus_extbios_85h:
+ push cx
+ push dx
+ mov dx, #0x3C4
+ mov al, #0x0f ;; get DRAM band width
+ out dx, al
+ inc dx
+ in al, dx
+ ;; al = 4 << bandwidth
+ mov cl, al
+ shr cl, #0x03
+ and cl, #0x03
+ cmp cl, #0x03
+ je c85h2
+ mov al, #0x04
+ shl al, cl
+ jmp c85h3
+c85h2:
+;; 4MB or 2MB
+ and al, #0x80
+ mov al, #0x20 ;; 2 MB
+ je c85h3
+ mov al, #0x40 ;; 4 MB
+c85h3:
+ pop dx
+ pop cx
+ ret
+
+cirrus_extbios_9Ah:
+ mov ax, #0x4060
+ mov cx, #0x1132
+ ret
+
+cirrus_extbios_A0h:
+ call cirrus_get_modeentry
+ mov ah, #0x01
+ sbb ah, #0x00
+ mov bx, cirrus_extbios_A0h_callback
+ mov si, #0xffff
+ mov di, bx
+ mov ds, bx
+ mov es, bx
+ ret
+
+cirrus_extbios_A0h_callback:
+ ;; fatal: not implemented yet
+ cli
+ hlt
+ retf
+
+cirrus_extbios_A1h:
+ mov bx, #0x0E00 ;; IBM 8512/8513, color
+ ret
+
+cirrus_extbios_A2h:
+ mov al, #0x07 ;; HSync 31.5 - 64.0 kHz
+ ret
+
+cirrus_extbios_AEh:
+ mov al, #0x01 ;; High Refresh 75Hz
+ ret
+
+cirrus_extbios_unimplemented:
+ ret
+
+cirrus_vesa_00h:
+ push ds
+ push si
+ mov bp, di
+ push es
+ pop ds
+ cld
+ mov ax, [di]
+ cmp ax, #0x4256 ;; VB
+ jnz cv00_1
+ mov ax, [di+2]
+ cmp ax, #0x3245 ;; E2
+ jnz cv00_1
+ ;; VBE2
+ lea di, 0x14[bp]
+ mov ax, #0x0100 ;; soft ver.
+ stosw
+ mov ax, # cirrus_vesa_vendorname
+ stosw
+ mov ax, cs
+ stosw
+ mov ax, # cirrus_vesa_productname
+ stosw
+ mov ax, cs
+ stosw
+ mov ax, # cirrus_vesa_productrevision
+ stosw
+ mov ax, cs
+ stosw
+cv00_1:
+ mov di, bp
+ mov ax, #0x4556 ;; VE
+ stosw
+ mov ax, #0x4153 ;; SA
+ stosw
+ mov ax, #0x0200 ;; v2.00
+ stosw
+ mov ax, # cirrus_vesa_oemname
+ stosw
+ mov ax, cs
+ stosw
+ xor ax, ax ;; caps
+ stosw
+ stosw
+ lea ax, 0x40[bp]
+ stosw
+ mov ax, es
+ stosw
+ call cirrus_extbios_85h ;; vram in 64k
+ mov ah, #0x00
+ stosw
+
+ push cs
+ pop ds
+ lea di, 0x40[bp]
+ mov si, #_cirrus_vesa_modelist
+cv00_2:
+ lodsw
+ stosw
+ add si, #2
+ cmp ax, #0xffff
+ jnz cv00_2
+
+ mov ax, #0x004F
+ mov di, bp
+ pop si
+ pop ds
+ ret
+
+cirrus_vesa_01h:
+ mov ax, cx
+ and ax, #0x3fff
+ call cirrus_vesamode_to_mode
+ cmp ax, #0xffff
+ jnz cirrus_vesa_01h_1
+ jmp cirrus_vesa_unimplemented
+cirrus_vesa_01h_1:
+ push ds
+ push si
+ push cx
+ push dx
+ push bx
+ mov bp, di
+ cld
+ push cs
+ pop ds
+ call cirrus_get_modeentry_nomask
+
+ push di
+ xor ax, ax
+ mov cx, #0x80
+ rep
+ stosw ;; clear buffer
+ pop di
+
+ mov ax, #0x003b ;; mode
+ stosw
+ mov ax, #0x0007 ;; attr
+ stosw
+ mov ax, #0x0010 ;; granularity =16K
+ stosw
+ mov ax, #0x0040 ;; size =64K
+ stosw
+ mov ax, #0xA000 ;; segment A
+ stosw
+ xor ax, ax ;; no segment B
+ stosw
+ mov ax, #cirrus_vesa_05h_farentry
+ stosw
+ mov ax, cs
+ stosw
+ call cirrus_get_line_offset_entry
+ stosw ;; bytes per scan line
+ mov ax, [si+2] ;; width
+ stosw
+ mov ax, [si+4] ;; height
+ stosw
+ mov ax, #0x08
+ stosb
+ mov ax, #0x10
+ stosb
+ mov al, #1 ;; count of planes
+ stosb
+ mov al, [si+6] ;; bpp
+ stosb
+ mov al, #0x1 ;; XXX number of banks
+ stosb
+ mov al, [si+17]
+ stosb ;; memory model
+ mov al, #0x0 ;; XXX size of bank in K
+ stosb
+ call cirrus_get_line_offset_entry
+ mov bx, [si+4]
+ mul bx ;; dx:ax=vramdisp
+ or ax, ax
+ jz cirrus_vesa_01h_3
+ inc dx
+cirrus_vesa_01h_3:
+ call cirrus_extbios_85h ;; al=vram in 64k
+ mov ah, #0x00
+ mov cx, dx
+ xor dx, dx
+ div cx
+ dec ax
+ stosb ;; number of image pages = vramtotal/vramdisp-1
+ mov al, #0x00
+ stosb
+
+ ;; v1.2+ stuffs
+ push si
+ add si, #18
+ movsw
+ movsw
+ movsw
+ movsw
+ pop si
+
+ mov ah, [si+16]
+ mov al, #0x0
+ sub ah, #9
+ rcl al, #1 ; bit 0=palette flag
+ stosb ;; direct screen mode info
+
+ ;; v2.0+ stuffs
+ ;; 32-bit LFB address
+ xor ax, ax
+ stosw
+ call cirrus_get_lfb_addr
+ stosw
+ or ax, ax
+ jz cirrus_vesa_01h_4
+ push di
+ mov di, bp
+ db 0x26 ;; es:
+ mov ax, [di]
+ or ax, #0x0080 ;; mode bit 7:LFB
+ stosw
+ pop di
+cirrus_vesa_01h_4:
+
+ xor ax, ax
+ stosw ; reserved
+ stosw ; reserved
+ stosw ; reserved
+
+ mov ax, #0x004F
+ mov di, bp
+ pop bx
+ pop dx
+ pop cx
+ pop si
+ pop ds
+
+ test cx, #0x4000 ;; LFB flag
+ jz cirrus_vesa_01h_5
+ push cx
+ db 0x26 ;; es:
+ mov cx, [di]
+ cmp cx, #0x0080 ;; is LFB supported?
+ jnz cirrus_vesa_01h_6
+ mov ax, #0x014F ;; error - no LFB
+cirrus_vesa_01h_6:
+ pop cx
+cirrus_vesa_01h_5:
+ ret
+
+cirrus_vesa_02h:
+ ;; XXX support CRTC registers
+ test bx, #0x3e00
+ jnz cirrus_vesa_02h_2 ;; unknown flags
+ mov ax, bx
+ and ax, #0x1ff ;; bit 8-0 mode
+ cmp ax, #0x100 ;; legacy VGA mode
+ jb cirrus_vesa_02h_legacy
+ call cirrus_vesamode_to_mode
+ cmp ax, #0xffff
+ jnz cirrus_vesa_02h_1
+cirrus_vesa_02h_2:
+ jmp cirrus_vesa_unimplemented
+cirrus_vesa_02h_legacy:
+#ifdef CIRRUS_VESA3_PMINFO
+ db 0x2e ;; cs:
+ cmp byte ptr [cirrus_vesa_is_protected_mode], #0
+ jnz cirrus_vesa_02h_2
+#endif // CIRRUS_VESA3_PMINFO
+ int #0x10
+ mov ax, #0x004F
+ ret
+cirrus_vesa_02h_1:
+ push si
+ push ax
+ call cirrus_get_modeentry_nomask
+ call cirrus_switch_mode
+ test bx, #0x4000 ;; LFB
+ jnz cirrus_vesa_02h_3
+ call cirrus_enable_16k_granularity
+cirrus_vesa_02h_3:
+ pop ax
+ push ds
+#ifdef CIRRUS_VESA3_PMINFO
+ db 0x2e ;; cs:
+ mov si, [cirrus_vesa_sel0000_data]
+#else
+ xor si, si
+#endif
+ mov ds, si
+ mov [PM_BIOSMEM_CURRENT_MODE], al
+ mov [PM_BIOSMEM_VBE_MODE], bx
+ pop ds
+ pop si
+ mov ax, #0x004F
+ ret
+
+cirrus_vesa_03h:
+ push ds
+#ifdef CIRRUS_VESA3_PMINFO
+ db 0x2e ;; cs:
+ mov ax, [cirrus_vesa_sel0000_data]
+#else
+ xor ax, ax
+#endif
+ mov ds, ax
+ mov bx, # PM_BIOSMEM_VBE_MODE
+ mov ax, [bx]
+ mov bx, ax
+ test bx, bx
+ jnz cirrus_vesa_03h_1
+ mov bx, # PM_BIOSMEM_CURRENT_MODE
+ mov al, [bx]
+ mov bl, al
+ xor bh, bh
+cirrus_vesa_03h_1:
+ mov ax, #0x004f
+ pop ds
+ ret
+
+cirrus_vesa_05h_farentry:
+ call cirrus_vesa_05h
+ retf
+
+cirrus_vesa_05h:
+ cmp bl, #0x01
+ ja cirrus_vesa_05h_1
+ cmp bh, #0x00
+ jz cirrus_vesa_05h_setmempage
+ cmp bh, #0x01
+ jz cirrus_vesa_05h_getmempage
+cirrus_vesa_05h_1:
+ jmp cirrus_vesa_unimplemented
+cirrus_vesa_05h_setmempage:
+ or dh, dh ; address must be < 0x100
+ jnz cirrus_vesa_05h_1
+ push dx
+ mov al, bl ;; bl=bank number
+ add al, #0x09
+ mov ah, dl ;; dx=window address in granularity
+ mov dx, #0x3ce
+ out dx, ax
+ pop dx
+ mov ax, #0x004F
+ ret
+cirrus_vesa_05h_getmempage:
+ mov al, bl ;; bl=bank number
+ add al, #0x09
+ mov dx, #0x3ce
+ out dx, al
+ inc dx
+ in al, dx
+ xor dx, dx
+ mov dl, al ;; dx=window address in granularity
+ mov ax, #0x004F
+ ret
+
+cirrus_vesa_06h:
+ mov ax, cx
+ cmp bl, #0x01
+ je cirrus_vesa_06h_3
+ cmp bl, #0x02
+ je cirrus_vesa_06h_2
+ jb cirrus_vesa_06h_1
+ mov ax, #0x0100
+ ret
+cirrus_vesa_06h_1:
+ call cirrus_get_bpp_bytes
+ mov bl, al
+ xor bh, bh
+ mov ax, cx
+ mul bx
+cirrus_vesa_06h_2:
+ call cirrus_set_line_offset
+cirrus_vesa_06h_3:
+ call cirrus_get_bpp_bytes
+ mov bl, al
+ xor bh, bh
+ xor dx, dx
+ call cirrus_get_line_offset
+ push ax
+ div bx
+ mov cx, ax
+ pop bx
+ call cirrus_extbios_85h ;; al=vram in 64k
+ xor dx, dx
+ mov dl, al
+ xor ax, ax
+ div bx
+ mov dx, ax
+ mov ax, #0x004f
+ ret
+
+cirrus_vesa_07h:
+ cmp bl, #0x80
+ je cirrus_vesa_07h_1
+ cmp bl, #0x01
+ je cirrus_vesa_07h_2
+ jb cirrus_vesa_07h_1
+ mov ax, #0x0100
+ ret
+cirrus_vesa_07h_1:
+ push dx
+ call cirrus_get_bpp_bytes
+ mov bl, al
+ xor bh, bh
+ mov ax, cx
+ mul bx
+ pop bx
+ push ax
+ call cirrus_get_line_offset
+ mul bx
+ pop bx
+ add ax, bx
+ jnc cirrus_vesa_07h_3
+ inc dx
+cirrus_vesa_07h_3:
+ push dx
+ and dx, #0x0003
+ mov bx, #0x04
+ div bx
+ pop dx
+ shr dx, #2
+ call cirrus_set_start_addr
+ mov ax, #0x004f
+ ret
+cirrus_vesa_07h_2:
+ call cirrus_get_start_addr
+ shl dx, #2
+ push dx
+ mov bx, #0x04
+ mul bx
+ pop bx
+ or dx, bx
+ push ax
+ call cirrus_get_line_offset
+ mov bx, ax
+ pop ax
+ div bx
+ push ax
+ push dx
+ call cirrus_get_bpp_bytes
+ mov bl, al
+ xor bh, bh
+ pop ax
+ xor dx, dx
+ div bx
+ mov cx, ax
+ pop dx
+ mov ax, #0x004f
+ ret
+
+cirrus_vesa_unimplemented:
+ mov ax, #0x014F ;; not implemented
+ ret
+
+
+;; in ax:vesamode, out ax:cirrusmode
+cirrus_vesamode_to_mode:
+ push ds
+ push cx
+ push si
+ push cs
+ pop ds
+ mov cx, #0xffff
+ mov si, #_cirrus_vesa_modelist
+cvtm_1:
+ cmp [si],ax
+ jz cvtm_2
+ cmp [si],cx
+ jz cvtm_2
+ add si, #4
+ jmp cvtm_1
+cvtm_2:
+ mov ax,[si+2]
+ pop si
+ pop cx
+ pop ds
+ ret
+
+ ; cirrus_get_crtc
+ ;; NOTE - may be called in protected mode
+cirrus_get_crtc:
+ push ds
+ push ax
+ mov dx, #0x3cc
+ in al, dx
+ and al, #0x01
+ shl al, #5
+ mov dx, #0x3b4
+ add dl, al
+ pop ax
+ pop ds
+ ret
+
+;; in - al:mode, out - cflag:result, si:table, ax:destroyed
+cirrus_get_modeentry:
+ and al, #0x7f
+cirrus_get_modeentry_nomask:
+ mov si, #_cirrus_modes
+cgm_1:
+ db 0x2e ;; cs:
+ mov ah, [si]
+ cmp al, ah
+ jz cgm_2
+ cmp ah, #0xff
+ jz cgm_4
+ add si, # CIRRUS_MODE_SIZE
+ jmp cgm_1
+cgm_4:
+ xor si, si
+ stc ;; video mode is not supported
+ jmp cgm_3
+cgm_2:
+ clc ;; video mode is supported
+cgm_3:
+ ret
+
+ ; get LFB address
+ ; out - ax:LFB address (high 16 bit)
+ ;; NOTE - may be called in protected mode
+cirrus_get_lfb_addr:
+ push cx
+ push dx
+ push eax
+ xor cx, cx
+ mov dl, #0x00
+ call cirrus_pci_read
+ cmp ax, #0xffff
+ jz cirrus_get_lfb_addr_5
+ cirrus_get_lfb_addr_3:
+ mov dl, #0x00
+ call cirrus_pci_read
+ cmp ax, #0x1013 ;; cirrus
+ jz cirrus_get_lfb_addr_4
+ add cx, #0x8
+ cmp cx, #0x200 ;; search bus #0 and #1
+ jb cirrus_get_lfb_addr_3
+ cirrus_get_lfb_addr_5:
+ xor dx, dx ;; no LFB
+ jmp cirrus_get_lfb_addr_6
+ cirrus_get_lfb_addr_4:
+ mov dl, #0x10 ;; I/O space #0
+ call cirrus_pci_read
+ test ax, #0xfff1
+ jnz cirrus_get_lfb_addr_5
+ shr eax, #16
+ mov dx, ax ;; LFB address
+ cirrus_get_lfb_addr_6:
+ pop eax
+ mov ax, dx
+ pop dx
+ pop cx
+ ret
+
+cirrus_pci_read:
+ mov eax, #0x00800000
+ mov ax, cx
+ shl eax, #8
+ mov al, dl
+ mov dx, #0xcf8
+ out dx, eax
+ add dl, #4
+ in eax, dx
+ ret
+
+;; out - al:bytes per pixel
+cirrus_get_bpp_bytes:
+ push dx
+ mov dx, #0x03c4
+ mov al, #0x07
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0x0e
+ cmp al, #0x06
+ jne cirrus_get_bpp_bytes_1
+ and al, #0x02
+cirrus_get_bpp_bytes_1:
+ shr al, #1
+ cmp al, #0x04
+ je cirrus_get_bpp_bytes_2
+ inc al
+cirrus_get_bpp_bytes_2:
+ pop dx
+ ret
+
+;; in - ax: new line offset
+cirrus_set_line_offset:
+ shr ax, #3
+ push ax
+ call cirrus_get_crtc
+ mov al, #0x13
+ out dx, al
+ inc dx
+ pop ax
+ out dx, al
+ dec dx
+ mov al, #0x1b
+ out dx, al
+ inc dx
+ shl ah, #4
+ in al, dx
+ and al, #ef
+ or al, ah
+ out dx, al
+ ret
+
+;; out - ax: active line offset
+cirrus_get_line_offset:
+ push dx
+ push bx
+ call cirrus_get_crtc
+ mov al, #0x13
+ out dx, al
+ inc dx
+ in al, dx
+ mov bl, al
+ dec dx
+ mov al, #0x1b
+ out dx, al
+ inc dx
+ in al, dx
+ mov ah, al
+ shr ah, #4
+ and ah, #0x01
+ mov al, bl
+ shl ax, #3
+ pop bx
+ pop dx
+ ret
+
+;; in - si: table
+;; out - ax: line offset for mode
+cirrus_get_line_offset_entry:
+ push bx
+ mov bx, [si+14] ;; crtc table
+ push bx
+offset_loop1:
+ mov ax, [bx]
+ cmp al, #0x13
+ je offset_found1
+ inc bx
+ inc bx
+ jnz offset_loop1
+offset_found1:
+ xor al, al
+ shr ax, #5
+ pop bx
+ push ax
+offset_loop2:
+ mov ax, [bx]
+ cmp al, #0x1b
+ je offset_found2
+ inc bx
+ inc bx
+ jnz offset_loop2
+offset_found2:
+ pop bx
+ and ax, #0x1000
+ shr ax, #1
+ or ax, bx
+ pop bx
+ ret
+
+;; in - new address in DX:AX
+cirrus_set_start_addr:
+ push bx
+ push dx
+ push ax
+ call cirrus_get_crtc
+ mov al, #0x0d
+ out dx, al
+ inc dx
+ pop ax
+ out dx, al
+ dec dx
+ mov al, #0x0c
+ out dx, al
+ inc dx
+ mov al, ah
+ out dx, al
+ dec dx
+ mov al, #0x1d
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0x7f
+ pop bx
+ mov ah, bl
+ shl bl, #4
+ and bl, #0x80
+ or al, bl
+ out dx, al
+ dec dx
+ mov bl, ah
+ and ah, #0x01
+ shl bl, #1
+ and bl, #0x0c
+ or ah, bl
+ mov al, #0x1b
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0xf2
+ or al, ah
+ out dx, al
+ pop bx
+ ret
+
+;; out - current address in DX:AX
+cirrus_get_start_addr:
+ push bx
+ call cirrus_get_crtc
+ mov al, #0x0c
+ out dx, al
+ inc dx
+ in al, dx
+ mov ah, al
+ dec dx
+ mov al, #0x0d
+ out dx, al
+ inc dx
+ in al, dx
+ push ax
+ dec dx
+ mov al, #0x1b
+ out dx, al
+ inc dx
+ in al, dx
+ dec dx
+ mov bl, al
+ and al, #0x01
+ and bl, #0x0c
+ shr bl, #1
+ or bl, al
+ mov al, #0x1d
+ out dx, al
+ inc dx
+ in al, dx
+ and al, #0x80
+ shr al, #4
+ or bl, al
+ mov dl, bl
+ xor dh, dh
+ pop ax
+ pop bx
+ ret
+
+cirrus_extbios_handlers:
+ ;; 80h
+ dw cirrus_extbios_80h
+ dw cirrus_extbios_81h
+ dw cirrus_extbios_82h
+ dw cirrus_extbios_unimplemented
+ ;; 84h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_85h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; 88h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; 8Ch
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; 90h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; 94h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; 98h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_9Ah
+ dw cirrus_extbios_unimplemented
+ ;; 9Ch
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; A0h
+ dw cirrus_extbios_A0h
+ dw cirrus_extbios_A1h
+ dw cirrus_extbios_A2h
+ dw cirrus_extbios_unimplemented
+ ;; A4h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; A8h
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ ;; ACh
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_unimplemented
+ dw cirrus_extbios_AEh
+ dw cirrus_extbios_unimplemented
+
+cirrus_vesa_handlers:
+ ;; 00h
+ dw cirrus_vesa_00h
+ dw cirrus_vesa_01h
+ dw cirrus_vesa_02h
+ dw cirrus_vesa_03h
+ ;; 04h
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_05h
+ dw cirrus_vesa_06h
+ dw cirrus_vesa_07h
+ ;; 08h
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+ ;; 0Ch
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+ dw cirrus_vesa_unimplemented
+
+
+
+ASM_END
+
+#ifdef CIRRUS_VESA3_PMINFO
+ASM_START
+cirrus_vesa_pminfo:
+ /* + 0 */
+ .byte 0x50,0x4d,0x49,0x44 ;; signature[4]
+ /* + 4 */
+ dw cirrus_vesa_pmbios_entry ;; entry_bios
+ dw cirrus_vesa_pmbios_init ;; entry_init
+ /* + 8 */
+cirrus_vesa_sel0000_data:
+ dw 0x0000 ;; sel_00000
+cirrus_vesa_selA000_data:
+ dw 0xA000 ;; sel_A0000
+ /* +12 */
+cirrus_vesa_selB000_data:
+ dw 0xB000 ;; sel_B0000
+cirrus_vesa_selB800_data:
+ dw 0xB800 ;; sel_B8000
+ /* +16 */
+cirrus_vesa_selC000_data:
+ dw 0xC000 ;; sel_C0000
+cirrus_vesa_is_protected_mode:
+ ;; protected mode flag and checksum
+ dw (~((0xf2 + (cirrus_vesa_pmbios_entry >> 8) + (cirrus_vesa_pmbios_entry) \
+ + (cirrus_vesa_pmbios_init >> 8) + (cirrus_vesa_pmbios_init)) & 0xff) << 8) + 0x01
+ASM_END
+#endif // CIRRUS_VESA3_PMINFO
+
+
+#ifdef CIRRUS_DEBUG
+static void cirrus_debugmsg(DI, SI, BP, SP, BX, DX, CX, AX, DS, ES, FLAGS)
+ Bit16u DI, SI, BP, SP, BX, DX, CX, AX, ES, DS, FLAGS;
+{
+ if((GET_AH()!=0x0E)&&(GET_AH()!=0x02)&&(GET_AH()!=0x09)&&(AX!=0x4F05))
+ printf("vgabios call ah%02x al%02x bx%04x cx%04x dx%04x\n",GET_AH(),GET_AL(),BX,CX,DX);
+}
+#endif
diff --git a/tools/firmware/vgabios/dataseghack b/tools/firmware/vgabios/dataseghack
new file mode 100755
index 0000000000..02a2d4c525
--- /dev/null
+++ b/tools/firmware/vgabios/dataseghack
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+awk \
+ 'BEGIN { }\
+ /^\.text/,/DATA_SEG_DEFS_HERE/ { print }\
+ END { }'\
+ $1 > temp.awk.1
+
+awk \
+ 'BEGIN { i = 0; last = "hello" }\
+ /BLOCK_STRINGS_BEGIN/,/^\.bss/ { if ( i > 1 ) { print last } last = $0; i = i + 1 }\
+ END { }'\
+ $1 > temp.awk.2
+
+awk \
+ 'BEGIN { }\
+ /DATA_SEG_DEFS_HERE/,/BLOCK_STRINGS_BEGIN/ { print }\
+ END { }'\
+ $1 > temp.awk.3
+
+cp $1 $1.orig
+cat temp.awk.1 temp.awk.2 temp.awk.3 | sed -e 's/^\.data//' -e 's/^\.bss//' -e 's/^\.text//' > $1
+/bin/rm -f temp.awk.1 temp.awk.2 temp.awk.3 $1.orig
diff --git a/tools/firmware/vgabios/vbe.c b/tools/firmware/vgabios/vbe.c
new file mode 100644
index 0000000000..e71099546e
--- /dev/null
+++ b/tools/firmware/vgabios/vbe.c
@@ -0,0 +1,1068 @@
+// ============================================================================================
+//
+// Copyright (C) 2002 Jeroen Janssen
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// ============================================================================================
+//
+// This VBE is part of the VGA Bios specific to the plex86/bochs Emulated VGA card.
+// You can NOT drive any physical vga card with it.
+//
+// ============================================================================================
+//
+// This VBE Bios is based on information taken from :
+// - VESA BIOS EXTENSION (VBE) Core Functions Standard Version 3.0 located at www.vesa.org
+//
+// ============================================================================================
+
+
+// defines available
+// enable LFB support
+#define VBE_HAVE_LFB
+
+// disable VESA/VBE2 check in vbe info
+//#define VBE2_NO_VESA_CHECK
+
+// dynamicly generate a mode_info list
+#define DYN_LIST
+
+
+#include "vbe.h"
+#include "vbetables.h"
+
+
+// The current OEM Software Revision of this VBE Bios
+#define VBE_OEM_SOFTWARE_REV 0x0002;
+
+extern char vbebios_copyright;
+extern char vbebios_vendor_name;
+extern char vbebios_product_name;
+extern char vbebios_product_revision;
+
+#ifndef DYN_LIST
+extern Bit16u vbebios_mode_list;
+#endif
+
+ASM_START
+// FIXME: 'merge' these (c) etc strings with the vgabios.c strings?
+_vbebios_copyright:
+.ascii "Bochs/Plex86 VBE(C) 2003 http://savannah.nongnu.org/projects/vgabios/"
+.byte 0x00
+
+_vbebios_vendor_name:
+.ascii "Bochs/Plex86 Developers"
+.byte 0x00
+
+_vbebios_product_name:
+.ascii "Bochs/Plex86 VBE Adapter"
+.byte 0x00
+
+_vbebios_product_revision:
+.ascii "$Id: vbe.c,v 1.47 2005/05/24 16:50:50 vruppert Exp $"
+.byte 0x00
+
+_vbebios_info_string:
+.ascii "Bochs VBE Display Adapter enabled"
+.byte 0x0a,0x0d
+.byte 0x0a,0x0d
+.byte 0x00
+
+_no_vbebios_info_string:
+.ascii "NO Bochs VBE Support available!"
+.byte 0x0a,0x0d
+.byte 0x0a,0x0d
+.byte 0x00
+
+#if defined(USE_BX_INFO) || defined(DEBUG)
+msg_vbe_init:
+.ascii "VBE Bios $Id: vbe.c,v 1.47 2005/05/24 16:50:50 vruppert Exp $"
+.byte 0x0a,0x0d, 0x00
+#endif
+
+#ifndef DYN_LIST
+// FIXME: for each new mode add a statement here
+// at least until dynamic list creation is working
+_vbebios_mode_list:
+
+.word VBE_VESA_MODE_640X400X8
+.word VBE_VESA_MODE_640X480X8
+.word VBE_VESA_MODE_800X600X4
+.word VBE_VESA_MODE_800X600X8
+.word VBE_VESA_MODE_1024X768X8
+.word VBE_VESA_MODE_640X480X1555
+.word VBE_VESA_MODE_640X480X565
+.word VBE_VESA_MODE_640X480X888
+.word VBE_VESA_MODE_800X600X1555
+.word VBE_VESA_MODE_800X600X565
+.word VBE_VESA_MODE_800X600X888
+.word VBE_VESA_MODE_1024X768X1555
+.word VBE_VESA_MODE_1024X768X565
+.word VBE_VESA_MODE_1024X768X888
+.word VBE_OWN_MODE_640X480X8888
+.word VBE_OWN_MODE_800X600X8888
+.word VBE_OWN_MODE_1024X768X8888
+.word VBE_OWN_MODE_320X200X8
+.word VBE_VESA_MODE_END_OF_LIST
+#endif
+
+; DISPI ioport functions
+
+dispi_get_id:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_ID
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+
+dispi_set_id:
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_ID
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+ASM_END
+
+static void dispi_set_xres(xres)
+ Bit16u xres;
+{
+ASM_START
+ push bp
+ mov bp, sp
+ push ax
+ push dx
+
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_XRES
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ mov ax, 4[bp] ; xres
+ out dx, ax
+ push ax
+ mov dx, #0x03d4
+ mov ax, #0x0011
+ out dx, ax
+ mov dx, #0x03d4
+ pop ax
+ push ax
+ shr ax, #3
+ dec ax
+ mov ah, al
+ mov al, #0x01
+ out dx, ax
+ pop ax
+ call vga_set_virt_width
+
+ pop dx
+ pop ax
+ pop bp
+ASM_END
+}
+
+static void dispi_set_yres(yres)
+ Bit16u yres;
+{
+ outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_YRES);
+ outw(VBE_DISPI_IOPORT_DATA,yres);
+}
+
+static void dispi_set_bpp(bpp)
+ Bit16u bpp;
+{
+ outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_BPP);
+ outw(VBE_DISPI_IOPORT_DATA,bpp);
+}
+
+ASM_START
+; AL = bits per pixel / AH = bytes per pixel
+dispi_get_bpp:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_BPP
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ mov ah, al
+ shr ah, 3
+ test al, #0x07
+ jz get_bpp_noinc
+ inc ah
+get_bpp_noinc:
+ pop dx
+ ret
+
+_dispi_get_max_bpp:
+ push dx
+ push bx
+ call dispi_get_enable
+ mov bx, ax
+ or ax, # VBE_DISPI_GETCAPS
+ call _dispi_set_enable
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_BPP
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ push ax
+ mov ax, bx
+ call _dispi_set_enable
+ pop ax
+ pop bx
+ pop dx
+ ret
+
+_dispi_set_enable:
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_ENABLE
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+
+dispi_get_enable:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_ENABLE
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+
+_dispi_set_bank:
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_BANK
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+
+dispi_get_bank:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_BANK
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+ASM_END
+
+static void dispi_set_bank_farcall()
+{
+ASM_START
+ cmp bx,#0x0100
+ je dispi_set_bank_farcall_get
+ or bx,bx
+ jnz dispi_set_bank_farcall_error
+ push dx
+ mov ax,# VBE_DISPI_INDEX_BANK
+ mov dx,# VBE_DISPI_IOPORT_INDEX
+ out dx,ax
+ pop ax
+ mov dx,# VBE_DISPI_IOPORT_DATA
+ out dx,ax
+ retf
+dispi_set_bank_farcall_get:
+ mov ax,# VBE_DISPI_INDEX_BANK
+ mov dx,# VBE_DISPI_IOPORT_INDEX
+ out dx,ax
+ mov dx,# VBE_DISPI_IOPORT_DATA
+ in ax,dx
+ mov dx,ax
+ retf
+dispi_set_bank_farcall_error:
+ mov ax,#0x014F
+ retf
+ASM_END
+}
+
+ASM_START
+dispi_set_x_offset:
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_X_OFFSET
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+
+dispi_get_x_offset:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_X_OFFSET
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+
+dispi_set_y_offset:
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_Y_OFFSET
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+
+dispi_get_y_offset:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_Y_OFFSET
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+
+vga_set_virt_width:
+ push ax
+ push bx
+ push dx
+ mov bx, ax
+ call dispi_get_bpp
+ cmp al, #0x04
+ ja set_width_svga
+ shr bx, #2
+set_width_svga:
+ shr bx, #2
+ mov dx, #0x03d4
+ mov ah, bl
+ mov al, #0x13
+ out dx, ax
+ pop dx
+ pop bx
+ pop ax
+ ret
+
+dispi_set_virt_width:
+ call vga_set_virt_width
+ push dx
+ push ax
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_VIRT_WIDTH
+ out dx, ax
+ pop ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ out dx, ax
+ pop dx
+ ret
+
+dispi_get_virt_width:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_VIRT_WIDTH
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+
+dispi_get_virt_height:
+ push dx
+ mov dx, # VBE_DISPI_IOPORT_INDEX
+ mov ax, # VBE_DISPI_INDEX_VIRT_HEIGHT
+ out dx, ax
+ mov dx, # VBE_DISPI_IOPORT_DATA
+ in ax, dx
+ pop dx
+ ret
+ASM_END
+
+
+// ModeInfo helper function
+static ModeInfoListItem* mode_info_find_mode(mode, using_lfb)
+ Bit16u mode; Boolean using_lfb;
+{
+ ModeInfoListItem *cur_info=&mode_info_list;
+
+ while (cur_info->mode != VBE_VESA_MODE_END_OF_LIST)
+ {
+ if (cur_info->mode == mode)
+ {
+ if (!using_lfb)
+ {
+ return cur_info;
+ }
+ else if (cur_info->info.ModeAttributes & VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE)
+ {
+ return cur_info;
+ }
+ else
+ {
+ cur_info++;
+ }
+ }
+ else
+ {
+ cur_info++;
+ }
+ }
+
+ return 0;
+}
+
+ASM_START
+
+; Has VBE display - Returns true if VBE display detected
+
+_vbe_has_vbe_display:
+ push ds
+ push bx
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_VBE_FLAG
+ mov al, [bx]
+ and al, #0x01
+ xor ah, ah
+ pop bx
+ pop ds
+ ret
+
+; VBE Init - Initialise the Vesa Bios Extension Code
+; This function does a sanity check on the host side display code interface.
+
+vbe_init:
+ mov ax, # VBE_DISPI_ID0
+ call dispi_set_id
+ call dispi_get_id
+ cmp ax, # VBE_DISPI_ID0
+ jne no_vbe_interface
+ push ds
+ push bx
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_VBE_FLAG
+ mov al, #0x01
+ mov [bx], al
+ pop bx
+ pop ds
+ mov ax, # VBE_DISPI_ID3
+ call dispi_set_id
+no_vbe_interface:
+#if defined(USE_BX_INFO) || defined(DEBUG)
+ mov bx, #msg_vbe_init
+ push bx
+ call _printf
+ inc sp
+ inc sp
+#endif
+ ret
+
+; VBE Display Info - Display information on screen about the VBE
+
+vbe_display_info:
+ call _vbe_has_vbe_display
+ test ax, ax
+ jz no_vbe_flag
+ mov ax, #0xc000
+ mov ds, ax
+ mov si, #_vbebios_info_string
+ jmp _display_string
+no_vbe_flag:
+ mov ax, #0xc000
+ mov ds, ax
+ mov si, #_no_vbebios_info_string
+ jmp _display_string
+ASM_END
+
+/** Function 00h - Return VBE Controller Information
+ *
+ * Input:
+ * AX = 4F00h
+ * ES:DI = Pointer to buffer in which to place VbeInfoBlock structure
+ * (VbeSignature should be VBE2 when VBE 2.0 information is desired and
+ * the info block is 512 bytes in size)
+ * Output:
+ * AX = VBE Return Status
+ *
+ */
+void vbe_biosfn_return_controller_information(AX, ES, DI)
+Bit16u *AX;Bit16u ES;Bit16u DI;
+{
+ Bit16u ss=get_SS();
+ VbeInfoBlock vbe_info_block;
+ Bit16u status;
+ Bit16u result;
+ Bit16u vbe2_info;
+ Bit16u cur_mode=0;
+ Bit16u cur_ptr=34;
+ ModeInfoListItem *cur_info=&mode_info_list;
+
+ status = read_word(ss, AX);
+
+#ifdef DEBUG
+ printf("VBE vbe_biosfn_return_vbe_info ES%x DI%x AX%x\n",ES,DI,status);
+#endif
+
+ vbe2_info = 0;
+#ifdef VBE2_NO_VESA_CHECK
+#else
+ // get vbe_info_block into local variable
+ memcpyb(ss, &vbe_info_block, ES, DI, sizeof(vbe_info_block));
+
+ // check for VBE2 signature
+ if (((vbe_info_block.VbeSignature[0] == 'V') &&
+ (vbe_info_block.VbeSignature[1] == 'B') &&
+ (vbe_info_block.VbeSignature[2] == 'E') &&
+ (vbe_info_block.VbeSignature[3] == '2')) ||
+
+ ((vbe_info_block.VbeSignature[0] == 'V') &&
+ (vbe_info_block.VbeSignature[1] == 'E') &&
+ (vbe_info_block.VbeSignature[2] == 'S') &&
+ (vbe_info_block.VbeSignature[3] == 'A')) )
+ {
+ vbe2_info = 1;
+#ifdef DEBUG
+ printf("VBE correct VESA/VBE2 signature found\n");
+#endif
+ }
+#endif
+
+ // VBE Signature
+ vbe_info_block.VbeSignature[0] = 'V';
+ vbe_info_block.VbeSignature[1] = 'E';
+ vbe_info_block.VbeSignature[2] = 'S';
+ vbe_info_block.VbeSignature[3] = 'A';
+
+ // VBE Version supported
+ vbe_info_block.VbeVersion = 0x0200;
+
+ // OEM String
+ vbe_info_block.OemStringPtr_Seg = 0xc000;
+ vbe_info_block.OemStringPtr_Off = &vbebios_copyright;
+
+ // Capabilities
+ vbe_info_block.Capabilities[0] = VBE_CAPABILITY_8BIT_DAC;
+ vbe_info_block.Capabilities[1] = 0;
+ vbe_info_block.Capabilities[2] = 0;
+ vbe_info_block.Capabilities[3] = 0;
+
+#ifdef DYN_LIST
+ // VBE Video Mode Pointer (dynamicly generated from the mode_info_list)
+ vbe_info_block.VideoModePtr_Seg= ES ;
+ vbe_info_block.VideoModePtr_Off= DI + 34;
+#else
+ // VBE Video Mode Pointer (staticly in rom)
+ vbe_info_block.VideoModePtr_Seg = 0xc000;
+ vbe_info_block.VideoModePtr_Off = &vbebios_mode_list;
+#endif
+
+ // VBE Total Memory (in 64b blocks)
+ vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
+
+ if (vbe2_info)
+ {
+ // OEM Stuff
+ vbe_info_block.OemSoftwareRev = VBE_OEM_SOFTWARE_REV;
+ vbe_info_block.OemVendorNamePtr_Seg = 0xc000;
+ vbe_info_block.OemVendorNamePtr_Off = &vbebios_vendor_name;
+ vbe_info_block.OemProductNamePtr_Seg = 0xc000;
+ vbe_info_block.OemProductNamePtr_Off = &vbebios_product_name;
+ vbe_info_block.OemProductRevPtr_Seg = 0xc000;
+ vbe_info_block.OemProductRevPtr_Off = &vbebios_product_revision;
+
+ // copy updates in vbe_info_block back
+ memcpyb(ES, DI, ss, &vbe_info_block, sizeof(vbe_info_block));
+ }
+ else
+ {
+ // copy updates in vbe_info_block back (VBE 1.x compatibility)
+ memcpyb(ES, DI, ss, &vbe_info_block, 256);
+ }
+
+#ifdef DYN_LIST
+ do
+ {
+ if (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) {
+#ifdef DEBUG
+ printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
+#endif
+ write_word(ES, DI + cur_ptr, cur_info->mode);
+ cur_mode++;
+ cur_ptr+=2;
+ }
+ cur_info++;
+ } while (cur_info->mode != VBE_VESA_MODE_END_OF_LIST);
+
+ // Add vesa mode list terminator
+ write_word(ES, DI + cur_ptr, cur_info->mode);
+#endif
+
+ result = 0x4f;
+
+ write_word(ss, AX, result);
+}
+
+
+/** Function 01h - Return VBE Mode Information
+ *
+ * Input:
+ * AX = 4F01h
+ * CX = Mode Number
+ * ES:DI = Pointer to buffer in which to place ModeInfoBlock structure
+ * Output:
+ * AX = VBE Return Status
+ *
+ */
+void vbe_biosfn_return_mode_information(AX, CX, ES, DI)
+Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u DI;
+{
+ Bit16u result=0x0100;
+ Bit16u ss=get_SS();
+ ModeInfoBlock info;
+ ModeInfoListItem *cur_info;
+ Boolean using_lfb;
+
+#ifdef DEBUG
+ printf("VBE vbe_biosfn_return_mode_information ES%x DI%x CX%x\n",ES,DI,CX);
+#endif
+
+ using_lfb=((CX & VBE_MODE_LINEAR_FRAME_BUFFER) == VBE_MODE_LINEAR_FRAME_BUFFER);
+
+ CX = (CX & 0x1ff);
+
+ cur_info = mode_info_find_mode(CX, using_lfb, &cur_info);
+
+ if (cur_info != 0)
+ {
+#ifdef DEBUG
+ printf("VBE found mode %x\n",CX);
+#endif
+ memsetb(ss, &info, 0, sizeof(ModeInfoBlock));
+ memcpyb(ss, &info, 0xc000, &(cur_info->info), sizeof(ModeInfoBlockCompact));
+ if (info.WinAAttributes & VBE_WINDOW_ATTRIBUTE_RELOCATABLE) {
+ info.WinFuncPtr = 0xC0000000UL;
+ *(Bit16u *)&(info.WinFuncPtr) = (Bit16u)(dispi_set_bank_farcall);
+ }
+
+ result = 0x4f;
+ }
+ else
+ {
+#ifdef DEBUG
+ printf("VBE *NOT* found mode %x\n",CX);
+#endif
+ result = 0x100;
+ }
+
+ if (result == 0x4f)
+ {
+ // copy updates in mode_info_block back
+ memcpyb(ES, DI, ss, &info, sizeof(info));
+ }
+
+ write_word(ss, AX, result);
+}
+
+/** Function 02h - Set VBE Mode
+ *
+ * Input:
+ * AX = 4F02h
+ * BX = Desired Mode to set
+ * ES:DI = Pointer to CRTCInfoBlock structure
+ * Output:
+ * AX = VBE Return Status
+ *
+ */
+void vbe_biosfn_set_mode(AX, BX, ES, DI)
+Bit16u *AX;Bit16u BX; Bit16u ES;Bit16u DI;
+{
+ Bit16u ss = get_SS();
+ Bit16u result;
+ ModeInfoListItem *cur_info;
+ Boolean using_lfb;
+ Bit8u no_clear;
+ Bit8u lfb_flag;
+
+ using_lfb=((BX & VBE_MODE_LINEAR_FRAME_BUFFER) == VBE_MODE_LINEAR_FRAME_BUFFER);
+ lfb_flag=using_lfb?VBE_DISPI_LFB_ENABLED:0;
+ no_clear=((BX & VBE_MODE_PRESERVE_DISPLAY_MEMORY) == VBE_MODE_PRESERVE_DISPLAY_MEMORY)?VBE_DISPI_NOCLEARMEM:0;
+
+ BX = (BX & 0x1ff);
+
+ //result=read_word(ss,AX);
+
+ // check for non vesa mode
+ if (BX<VBE_MODE_VESA_DEFINED)
+ {
+ Bit8u mode;
+
+ dispi_set_enable(VBE_DISPI_DISABLED);
+ // call the vgabios in order to set the video mode
+ // this allows for going back to textmode with a VBE call (some applications expect that to work)
+
+ mode=(BX & 0xff);
+ biosfn_set_video_mode(mode);
+ result = 0x4f;
+ }
+
+ cur_info = mode_info_find_mode(BX, using_lfb, &cur_info);
+
+ if (cur_info != 0)
+ {
+#ifdef DEBUG
+ printf("VBE found mode %x, setting:\n", BX);
+ printf("\txres%x yres%x bpp%x\n",
+ cur_info->info.XResolution,
+ cur_info->info.YResolution,
+ cur_info->info.BitsPerPixel);
+#endif
+
+ // first disable current mode (when switching between vesa modi)
+ dispi_set_enable(VBE_DISPI_DISABLED);
+
+ if (cur_info->mode == VBE_VESA_MODE_800X600X4)
+ {
+ biosfn_set_video_mode(0x6a);
+ }
+
+ dispi_set_bpp(cur_info->info.BitsPerPixel);
+ dispi_set_xres(cur_info->info.XResolution);
+ dispi_set_yres(cur_info->info.YResolution);
+ dispi_set_bank(0);
+ dispi_set_enable(VBE_DISPI_ENABLED | no_clear | lfb_flag);
+
+ write_word(BIOSMEM_SEG,BIOSMEM_VBE_MODE,BX);
+ write_byte(BIOSMEM_SEG,BIOSMEM_VIDEO_CTL,(0x60 | no_clear));
+
+ result = 0x4f;
+ }
+ else
+ {
+#ifdef DEBUG
+ printf("VBE *NOT* found mode %x\n" , BX);
+#endif
+ result = 0x100;
+
+ // FIXME: redirect non VBE modi to normal VGA bios operation
+ // (switch back to VGA mode
+ if (BX == 3)
+ result = 0x4f;
+ }
+
+ write_word(ss, AX, result);
+}
+
+/** Function 03h - Return Current VBE Mode
+ *
+ * Input:
+ * AX = 4F03h
+ * Output:
+ * AX = VBE Return Status
+ * BX = Current VBE Mode
+ *
+ */
+ASM_START
+vbe_biosfn_return_current_mode:
+ push ds
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ call dispi_get_enable
+ and ax, # VBE_DISPI_ENABLED
+ jz no_vbe_mode
+ mov bx, # BIOSMEM_VBE_MODE
+ mov ax, [bx]
+ mov bx, ax
+ jnz vbe_03_ok
+no_vbe_mode:
+ mov bx, # BIOSMEM_CURRENT_MODE
+ mov al, [bx]
+ mov bl, al
+ xor bh, bh
+vbe_03_ok:
+ mov ax, #0x004f
+ pop ds
+ ret
+ASM_END
+
+
+/** Function 04h - Save/Restore State
+ *
+ * Input:
+ * AX = 4F04h
+ * DL = 00h Return Save/Restore State buffer size
+ * 01h Save State
+ * 02h Restore State
+ * CX = Requested states
+ * ES:BX = Pointer to buffer (if DL <> 00h)
+ * Output:
+ * AX = VBE Return Status
+ * BX = Number of 64-byte blocks to hold the state buffer (if DL=00h)
+ *
+ */
+void vbe_biosfn_save_restore_state(AX, DL, CX, ES, BX)
+{
+}
+
+
+/** Function 05h - Display Window Control
+ *
+ * Input:
+ * AX = 4F05h
+ * (16-bit) BH = 00h Set memory window
+ * = 01h Get memory window
+ * BL = Window number
+ * = 00h Window A
+ * = 01h Window B
+ * DX = Window number in video memory in window
+ * granularity units (Set Memory Window only)
+ * Note:
+ * If this function is called while in a linear frame buffer mode,
+ * this function must fail with completion code AH=03h
+ *
+ * Output:
+ * AX = VBE Return Status
+ * DX = Window number in window granularity units
+ * (Get Memory Window only)
+ */
+ASM_START
+vbe_biosfn_display_window_control:
+ cmp bl, #0x00
+ jne vbe_05_failed
+ cmp bh, #0x01
+ je get_display_window
+ jb set_display_window
+ mov ax, #0x0100
+ ret
+set_display_window:
+ mov ax, dx
+ call _dispi_set_bank
+ call dispi_get_bank
+ cmp ax, dx
+ jne vbe_05_failed
+ mov ax, #0x004f
+ ret
+get_display_window:
+ call dispi_get_bank
+ mov dx, ax
+ mov ax, #0x004f
+ ret
+vbe_05_failed:
+ mov ax, #0x014f
+ ret
+ASM_END
+
+
+/** Function 06h - Set/Get Logical Scan Line Length
+ *
+ * Input:
+ * AX = 4F06h
+ * BL = 00h Set Scan Line Length in Pixels
+ * = 01h Get Scan Line Length
+ * = 02h Set Scan Line Length in Bytes
+ * = 03h Get Maximum Scan Line Length
+ * CX = If BL=00h Desired Width in Pixels
+ * If BL=02h Desired Width in Bytes
+ * (Ignored for Get Functions)
+ *
+ * Output:
+ * AX = VBE Return Status
+ * BX = Bytes Per Scan Line
+ * CX = Actual Pixels Per Scan Line
+ * (truncated to nearest complete pixel)
+ * DX = Maximum Number of Scan Lines
+ */
+ASM_START
+vbe_biosfn_set_get_logical_scan_line_length:
+ mov ax, cx
+ cmp bl, #0x01
+ je get_logical_scan_line_length
+ cmp bl, #0x02
+ je set_logical_scan_line_bytes
+ jb set_logical_scan_line_pixels
+ mov ax, #0x0100
+ ret
+set_logical_scan_line_bytes:
+ push ax
+ call dispi_get_bpp
+ xor bh, bh
+ mov bl, ah
+ xor dx, dx
+ pop ax
+ div bx
+set_logical_scan_line_pixels:
+ call dispi_set_virt_width
+get_logical_scan_line_length:
+ call dispi_get_bpp
+ xor bh, bh
+ mov bl, ah
+ call dispi_get_virt_width
+ mov cx, ax
+ mul bx
+ mov bx, ax
+ call dispi_get_virt_height
+ mov dx, ax
+ mov ax, #0x004f
+ ret
+ASM_END
+
+
+/** Function 07h - Set/Get Display Start
+ *
+ * Input(16-bit):
+ * AX = 4F07h
+ * BH = 00h Reserved and must be 00h
+ * BL = 00h Set Display Start
+ * = 01h Get Display Start
+ * = 02h Schedule Display Start (Alternate)
+ * = 03h Schedule Stereoscopic Display Start
+ * = 04h Get Scheduled Display Start Status
+ * = 05h Enable Stereoscopic Mode
+ * = 06h Disable Stereoscopic Mode
+ * = 80h Set Display Start during Vertical Retrace
+ * = 82h Set Display Start during Vertical Retrace (Alternate)
+ * = 83h Set Stereoscopic Display Start during Vertical Retrace
+ * ECX = If BL=02h/82h Display Start Address in bytes
+ * If BL=03h/83h Left Image Start Address in bytes
+ * EDX = If BL=03h/83h Right Image Start Address in bytes
+ * CX = If BL=00h/80h First Displayed Pixel In Scan Line
+ * DX = If BL=00h/80h First Displayed Scan Line
+ *
+ * Output:
+ * AX = VBE Return Status
+ * BH = If BL=01h Reserved and will be 0
+ * CX = If BL=01h First Displayed Pixel In Scan Line
+ * If BL=04h 0 if flip has not occurred, not 0 if it has
+ * DX = If BL=01h First Displayed Scan Line
+ *
+ * Input(32-bit):
+ * BH = 00h Reserved and must be 00h
+ * BL = 00h Set Display Start
+ * = 80h Set Display Start during Vertical Retrace
+ * CX = Bits 0-15 of display start address
+ * DX = Bits 16-31 of display start address
+ * ES = Selector for memory mapped registers
+ */
+ASM_START
+vbe_biosfn_set_get_display_start:
+ cmp bl, #0x80
+ je set_display_start
+ cmp bl, #0x01
+ je get_display_start
+ jb set_display_start
+ mov ax, #0x0100
+ ret
+set_display_start:
+ mov ax, cx
+ call dispi_set_x_offset
+ mov ax, dx
+ call dispi_set_y_offset
+ mov ax, #0x004f
+ ret
+get_display_start:
+ call dispi_get_x_offset
+ mov cx, ax
+ call dispi_get_y_offset
+ mov dx, ax
+ xor bh, bh
+ mov ax, #0x004f
+ ret
+ASM_END
+
+
+/** Function 08h - Set/Get Dac Palette Format
+ *
+ * Input:
+ * AX = 4F08h
+ * BL = 00h set DAC palette width
+ * = 01h get DAC palette width
+ * BH = If BL=00h: desired number of bits per primary color
+ * Output:
+ * AX = VBE Return Status
+ * BH = current number of bits per primary color (06h = standard VGA)
+ */
+ASM_START
+vbe_biosfn_set_get_dac_palette_format:
+ cmp bl, #0x01
+ je get_dac_palette_format
+ jb set_dac_palette_format
+ mov ax, #0x0100
+ ret
+set_dac_palette_format:
+ call dispi_get_enable
+ cmp bh, #0x06
+ je set_normal_dac
+ cmp bh, #0x08
+ jne vbe_08_unsupported
+ or ax, # VBE_DISPI_8BIT_DAC
+ jnz set_dac_mode
+set_normal_dac:
+ and ax, #~ VBE_DISPI_8BIT_DAC
+set_dac_mode:
+ call _dispi_set_enable
+get_dac_palette_format:
+ mov bh, #0x06
+ call dispi_get_enable
+ and ax, # VBE_DISPI_8BIT_DAC
+ jz vbe_08_ok
+ mov bh, #0x08
+vbe_08_ok:
+ mov ax, #0x004f
+ ret
+vbe_08_unsupported:
+ mov ax, #0x014f
+ ret
+ASM_END
+
+
+/** Function 09h - Set/Get Palette Data
+ *
+ * Input:
+ * AX = 4F09h
+ * Output:
+ * AX = VBE Return Status
+ *
+ * FIXME: incomplete API description, Input & Output
+ */
+void vbe_biosfn_set_get_palette_data(AX)
+{
+}
+
+/** Function 0Ah - Return VBE Protected Mode Interface
+ *
+ * Input:
+ * AX = 4F0Ah
+ * Output:
+ * AX = VBE Return Status
+ *
+ * FIXME: incomplete API description, Input & Output
+ */
+void vbe_biosfn_return_protected_mode_interface(AX)
+{
+}
diff --git a/tools/firmware/vgabios/vbe.h b/tools/firmware/vgabios/vbe.h
new file mode 100644
index 0000000000..621048a1c7
--- /dev/null
+++ b/tools/firmware/vgabios/vbe.h
@@ -0,0 +1,302 @@
+#ifndef vbe_h_included
+#define vbe_h_included
+
+#include "vgabios.h"
+
+// DISPI helper function
+void dispi_set_enable(enable);
+
+/** VBE int10 API
+ *
+ * See the function descriptions in vbe.c for more information
+ */
+Boolean vbe_has_vbe_display();
+void vbe_biosfn_return_controller_information(AX, ES, DI);
+void vbe_biosfn_return_mode_information(AX, CX, ES, DI);
+void vbe_biosfn_set_mode(AX, BX, ES, DI);
+void vbe_biosfn_save_restore_state(AX, DL, CX, ES, BX);
+void vbe_biosfn_set_get_palette_data(AX);
+void vbe_biosfn_return_protected_mode_interface(AX);
+
+// The official VBE Information Block
+typedef struct VbeInfoBlock
+{
+ Bit8u VbeSignature[4];
+ Bit16u VbeVersion;
+ Bit16u OemStringPtr_Off;
+ Bit16u OemStringPtr_Seg;
+ Bit8u Capabilities[4];
+ Bit16u VideoModePtr_Off;
+ Bit16u VideoModePtr_Seg;
+ Bit16u TotalMemory;
+ Bit16u OemSoftwareRev;
+ Bit16u OemVendorNamePtr_Off;
+ Bit16u OemVendorNamePtr_Seg;
+ Bit16u OemProductNamePtr_Off;
+ Bit16u OemProductNamePtr_Seg;
+ Bit16u OemProductRevPtr_Off;
+ Bit16u OemProductRevPtr_Seg;
+ Bit16u Reserved[111]; // used for dynamicly generated mode list
+ Bit8u OemData[256];
+} VbeInfoBlock;
+
+
+// This one is for compactly storing a static list of mode info blocks
+// this saves us 189 bytes per block
+typedef struct ModeInfoBlockCompact
+{
+// Mandatory information for all VBE revisions
+ Bit16u ModeAttributes;
+ Bit8u WinAAttributes;
+ Bit8u WinBAttributes;
+ Bit16u WinGranularity;
+ Bit16u WinSize;
+ Bit16u WinASegment;
+ Bit16u WinBSegment;
+ Bit32u WinFuncPtr;
+ Bit16u BytesPerScanLine;
+// Mandatory information for VBE 1.2 and above
+ Bit16u XResolution;
+ Bit16u YResolution;
+ Bit8u XCharSize;
+ Bit8u YCharSize;
+ Bit8u NumberOfPlanes;
+ Bit8u BitsPerPixel;
+ Bit8u NumberOfBanks;
+ Bit8u MemoryModel;
+ Bit8u BankSize;
+ Bit8u NumberOfImagePages;
+ Bit8u Reserved_page;
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ Bit8u RedMaskSize;
+ Bit8u RedFieldPosition;
+ Bit8u GreenMaskSize;
+ Bit8u GreenFieldPosition;
+ Bit8u BlueMaskSize;
+ Bit8u BlueFieldPosition;
+ Bit8u RsvdMaskSize;
+ Bit8u RsvdFieldPosition;
+ Bit8u DirectColorModeInfo;
+// Mandatory information for VBE 2.0 and above
+ Bit32u PhysBasePtr;
+ Bit32u OffScreenMemOffset;
+ Bit16u OffScreenMemSize;
+// Mandatory information for VBE 3.0 and above
+ Bit16u LinBytesPerScanLine;
+ Bit8u BnkNumberOfPages;
+ Bit8u LinNumberOfPages;
+ Bit8u LinRedMaskSize;
+ Bit8u LinRedFieldPosition;
+ Bit8u LinGreenMaskSize;
+ Bit8u LinGreenFieldPosition;
+ Bit8u LinBlueMaskSize;
+ Bit8u LinBlueFieldPosition;
+ Bit8u LinRsvdMaskSize;
+ Bit8u LinRsvdFieldPosition;
+ Bit32u MaxPixelClock;
+// Bit8u Reserved[189]; // DO NOT PUT THIS IN HERE because of Compact Mode Info storage in bios
+} ModeInfoBlockCompact;
+
+typedef struct ModeInfoBlock
+{
+// Mandatory information for all VBE revisions
+ Bit16u ModeAttributes;
+ Bit8u WinAAttributes;
+ Bit8u WinBAttributes;
+ Bit16u WinGranularity;
+ Bit16u WinSize;
+ Bit16u WinASegment;
+ Bit16u WinBSegment;
+ Bit32u WinFuncPtr;
+ Bit16u BytesPerScanLine;
+// Mandatory information for VBE 1.2 and above
+ Bit16u XResolution;
+ Bit16u YResolution;
+ Bit8u XCharSize;
+ Bit8u YCharSize;
+ Bit8u NumberOfPlanes;
+ Bit8u BitsPerPixel;
+ Bit8u NumberOfBanks;
+ Bit8u MemoryModel;
+ Bit8u BankSize;
+ Bit8u NumberOfImagePages;
+ Bit8u Reserved_page;
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ Bit8u RedMaskSize;
+ Bit8u RedFieldPosition;
+ Bit8u GreenMaskSize;
+ Bit8u GreenFieldPosition;
+ Bit8u BlueMaskSize;
+ Bit8u BlueFieldPosition;
+ Bit8u RsvdMaskSize;
+ Bit8u RsvdFieldPosition;
+ Bit8u DirectColorModeInfo;
+// Mandatory information for VBE 2.0 and above
+ Bit32u PhysBasePtr;
+ Bit32u OffScreenMemOffset;
+ Bit16u OffScreenMemSize;
+// Mandatory information for VBE 3.0 and above
+ Bit16u LinBytesPerScanLine;
+ Bit8u BnkNumberOfPages;
+ Bit8u LinNumberOfPages;
+ Bit8u LinRedMaskSize;
+ Bit8u LinRedFieldPosition;
+ Bit8u LinGreenMaskSize;
+ Bit8u LinGreenFieldPosition;
+ Bit8u LinBlueMaskSize;
+ Bit8u LinBlueFieldPosition;
+ Bit8u LinRsvdMaskSize;
+ Bit8u LinRsvdFieldPosition;
+ Bit32u MaxPixelClock;
+ Bit8u Reserved[189];
+} ModeInfoBlock;
+
+// VBE Return Status Info
+// AL
+#define VBE_RETURN_STATUS_SUPPORTED 0x4F
+#define VBE_RETURN_STATUS_UNSUPPORTED 0x00
+// AH
+#define VBE_RETURN_STATUS_SUCCESSFULL 0x00
+#define VBE_RETURN_STATUS_FAILED 0x01
+#define VBE_RETURN_STATUS_NOT_SUPPORTED 0x02
+#define VBE_RETURN_STATUS_INVALID 0x03
+
+// VBE Mode Numbers
+
+#define VBE_MODE_VESA_DEFINED 0x0100
+#define VBE_MODE_REFRESH_RATE_USE_CRTC 0x0800
+#define VBE_MODE_LINEAR_FRAME_BUFFER 0x4000
+#define VBE_MODE_PRESERVE_DISPLAY_MEMORY 0x8000
+
+// VBE GFX Mode Number
+
+#define VBE_VESA_MODE_640X400X8 0x100
+#define VBE_VESA_MODE_640X480X8 0x101
+#define VBE_VESA_MODE_800X600X4 0x102
+#define VBE_VESA_MODE_800X600X8 0x103
+#define VBE_VESA_MODE_1024X768X4 0x104
+#define VBE_VESA_MODE_1024X768X8 0x105
+#define VBE_VESA_MODE_1280X1024X4 0x106
+#define VBE_VESA_MODE_1280X1024X8 0x107
+#define VBE_VESA_MODE_320X200X1555 0x10D
+#define VBE_VESA_MODE_320X200X565 0x10E
+#define VBE_VESA_MODE_320X200X888 0x10F
+#define VBE_VESA_MODE_640X480X1555 0x110
+#define VBE_VESA_MODE_640X480X565 0x111
+#define VBE_VESA_MODE_640X480X888 0x112
+#define VBE_VESA_MODE_800X600X1555 0x113
+#define VBE_VESA_MODE_800X600X565 0x114
+#define VBE_VESA_MODE_800X600X888 0x115
+#define VBE_VESA_MODE_1024X768X1555 0x116
+#define VBE_VESA_MODE_1024X768X565 0x117
+#define VBE_VESA_MODE_1024X768X888 0x118
+#define VBE_VESA_MODE_1280X1024X1555 0x119
+#define VBE_VESA_MODE_1280X1024X565 0x11A
+#define VBE_VESA_MODE_1280X1024X888 0x11B
+
+// BOCHS/PLEX86 'own' mode numbers
+#define VBE_OWN_MODE_320X200X8888 0x140
+#define VBE_OWN_MODE_640X400X8888 0x141
+#define VBE_OWN_MODE_640X480X8888 0x142
+#define VBE_OWN_MODE_800X600X8888 0x143
+#define VBE_OWN_MODE_1024X768X8888 0x144
+#define VBE_OWN_MODE_1280X1024X8888 0x145
+#define VBE_OWN_MODE_320X200X8 0x146
+
+#define VBE_VESA_MODE_END_OF_LIST 0xFFFF
+
+// Capabilities
+
+#define VBE_CAPABILITY_8BIT_DAC 0x0001
+#define VBE_CAPABILITY_NOT_VGA_COMPATIBLE 0x0002
+#define VBE_CAPABILITY_RAMDAC_USE_BLANK_BIT 0x0004
+#define VBE_CAPABILITY_STEREOSCOPIC_SUPPORT 0x0008
+#define VBE_CAPABILITY_STEREO_VIA_VESA_EVC 0x0010
+
+// Mode Attributes
+
+#define VBE_MODE_ATTRIBUTE_SUPPORTED 0x0001
+#define VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE 0x0002
+#define VBE_MODE_ATTRIBUTE_TTY_BIOS_SUPPORT 0x0004
+#define VBE_MODE_ATTRIBUTE_COLOR_MODE 0x0008
+#define VBE_MODE_ATTRIBUTE_GRAPHICS_MODE 0x0010
+#define VBE_MODE_ATTRIBUTE_NOT_VGA_COMPATIBLE 0x0020
+#define VBE_MODE_ATTRIBUTE_NO_VGA_COMPATIBLE_WINDOW 0x0040
+#define VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE 0x0080
+#define VBE_MODE_ATTRIBUTE_DOUBLE_SCAN_MODE 0x0100
+#define VBE_MODE_ATTRIBUTE_INTERLACE_MODE 0x0200
+#define VBE_MODE_ATTRIBUTE_HARDWARE_TRIPLE_BUFFER 0x0400
+#define VBE_MODE_ATTRIBUTE_HARDWARE_STEREOSCOPIC_DISPLAY 0x0800
+#define VBE_MODE_ATTRIBUTE_DUAL_DISPLAY_START_ADDRESS 0x1000
+
+#define VBE_MODE_ATTTRIBUTE_LFB_ONLY ( VBE_MODE_ATTRIBUTE_NO_VGA_COMPATIBLE_WINDOW | VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE )
+
+// Window attributes
+
+#define VBE_WINDOW_ATTRIBUTE_RELOCATABLE 0x01
+#define VBE_WINDOW_ATTRIBUTE_READABLE 0x02
+#define VBE_WINDOW_ATTRIBUTE_WRITEABLE 0x04
+
+// Memory model
+
+#define VBE_MEMORYMODEL_TEXT_MODE 0x00
+#define VBE_MEMORYMODEL_CGA_GRAPHICS 0x01
+#define VBE_MEMORYMODEL_HERCULES_GRAPHICS 0x02
+#define VBE_MEMORYMODEL_PLANAR 0x03
+#define VBE_MEMORYMODEL_PACKED_PIXEL 0x04
+#define VBE_MEMORYMODEL_NON_CHAIN_4_256 0x05
+#define VBE_MEMORYMODEL_DIRECT_COLOR 0x06
+#define VBE_MEMORYMODEL_YUV 0x07
+
+// DirectColorModeInfo
+
+#define VBE_DIRECTCOLOR_COLOR_RAMP_PROGRAMMABLE 0x01
+#define VBE_DIRECTCOLOR_RESERVED_BITS_AVAILABLE 0x02
+
+// GUEST <-> HOST Communication API
+
+// FIXME: either dynamicly ask host for this or put somewhere high in physical memory
+// like 0xE0000000
+
+
+ #define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 4
+
+ #define VBE_DISPI_BANK_ADDRESS 0xA0000
+ #define VBE_DISPI_BANK_SIZE_KB 64
+
+ #define VBE_DISPI_MAX_XRES 1024
+ #define VBE_DISPI_MAX_YRES 768
+
+ #define VBE_DISPI_IOPORT_INDEX 0x01CE
+ #define VBE_DISPI_IOPORT_DATA 0x01CF
+
+ #define VBE_DISPI_INDEX_ID 0x0
+ #define VBE_DISPI_INDEX_XRES 0x1
+ #define VBE_DISPI_INDEX_YRES 0x2
+ #define VBE_DISPI_INDEX_BPP 0x3
+ #define VBE_DISPI_INDEX_ENABLE 0x4
+ #define VBE_DISPI_INDEX_BANK 0x5
+ #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+ #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+ #define VBE_DISPI_INDEX_X_OFFSET 0x8
+ #define VBE_DISPI_INDEX_Y_OFFSET 0x9
+
+ #define VBE_DISPI_ID0 0xB0C0
+ #define VBE_DISPI_ID1 0xB0C1
+ #define VBE_DISPI_ID2 0xB0C2
+ #define VBE_DISPI_ID3 0xB0C3
+
+ #define VBE_DISPI_DISABLED 0x00
+ #define VBE_DISPI_ENABLED 0x01
+ #define VBE_DISPI_GETCAPS 0x02
+ #define VBE_DISPI_8BIT_DAC 0x20
+ #define VBE_DISPI_LFB_ENABLED 0x40
+ #define VBE_DISPI_NOCLEARMEM 0x80
+
+ #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000
+
+
+#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K (VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
+
+
+#endif
diff --git a/tools/firmware/vgabios/vbe_display_api.txt b/tools/firmware/vgabios/vbe_display_api.txt
new file mode 100644
index 0000000000..788e17a790
--- /dev/null
+++ b/tools/firmware/vgabios/vbe_display_api.txt
@@ -0,0 +1,227 @@
+VBE Display API
+-------------------------------------------------------------------------------------------------------------
+ This document is part of the Bochs/VBEBios documentation,
+ it specifies the bochs host <-> vbebios client communication.
+
+ That means, the display code implementation and the vbebios code depend
+ very heavily on each other. As such, this documents needs be synchronised
+ between bochs CVS and the vgabios CVS.
+
+ This document does not describe how the VBEBios implements the VBE2/3 spec.
+ This document does not describe how the Bochs display code will display gfx based upon this spec.
+
+
+API History
+-----------
+0xb0c0 supports the following VBE_DISPI_ interfaces (present in Bochs 1.4):
+ VBE_DISPI_INDEX_ID
+ VBE_DISPI_INDEX_XRES
+ VBE_DISPI_INDEX_YRES
+ VBE_DISPI_INDEX_BPP
+ VBE_DISPI_INDEX_ENABLE
+ VBE_DISPI_INDEX_BANK
+
+ Bpp format supported is:
+ VBE_DISPI_BPP_8
+
+0xb0c1 supports 0xb0c0 VBE_DISPI_ interfaces, additional interfaces (present in Bochs 2.0):
+ VBE_DISPI_INDEX_VIRT_WIDTH
+ VBE_DISPI_INDEX_VIRT_HEIGHT
+ VBE_DISPI_INDEX_X_OFFSET
+ VBE_DISPI_INDEX_Y_OFFSET
+
+0xb0c2 supports 0xb0c1 VBE_DISPI_ interfaces, interfaces updated for
+ additional features (present in Bochs 2.1):
+ VBE_DISPI_INDEX_BPP supports >8bpp color depth (value = bits)
+ VBE_DISPI_INDEX_ENABLE supports new flags VBE_DISPI_NOCLEARMEM and VBE_DISPI_LFB_ENABLED
+ VBE i/o registers changed from 0xFF80/81 to 0x01CE/CF
+
+0xb0c3 supports 0xb0c2 VBE_DISPI_ interfaces, interfaces updated for
+ additional features:
+ VBE_DISPI_INDEX_ENABLE supports new flags VBE_DISPI_GETCAPS and VBE_DISPI_8BIT_DAC
+
+
+History
+-------
+ Version 0.6 2002 Nov 23 Jeroen Janssen
+ - Added LFB support
+ - Added Virt width, height and x,y offset
+
+ Version 0.5 2002 March 08 Jeroen Janssen
+ - Added documentation about panic behaviour / current limits of the data values.
+ - Changed BPP API (in order to include future (A)RGB formats)
+ - Initial version (based upon extended display text of the vbe bochs display patch)
+
+
+Todo
+----
+ Version 0.6+ [random order]
+ - Add lots of different (A)RGB formats
+
+References
+----------
+ [VBE3] VBE 3 Specification at
+ http://www.vesa.org/vbe3.pdf
+
+ [BOCHS] Bochs Open Source IA-32 Emulator at
+ http://bochs.sourceforge.net
+
+ [VBEBIOS] VBE Bios for Bochs at
+ http://savannah.gnu.org/projects/vgabios/
+
+ [Screenshots] Screenshots of programs using the VBE Bios at
+ http://japj.org/projects/bochs_plex86/screenshots.html
+
+Abbreviations
+-------------
+ VBE Vesa Bios Extension
+ DISPI (Bochs) Display Interface
+ BPP Bits Per Pixel
+ LFB Linear Frame Buffer
+
+
+#defines
+--------
+ #define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 4
+ #define VBE_DISPI_BANK_ADDRESS 0xA0000
+ #define VBE_DISPI_BANK_SIZE_KB 64
+
+ #define VBE_DISPI_MAX_XRES 1024
+ #define VBE_DISPI_MAX_YRES 768
+
+ #define VBE_DISPI_IOPORT_INDEX 0x01CE
+ #define VBE_DISPI_IOPORT_DATA 0x01CF
+
+ #define VBE_DISPI_INDEX_ID 0x0
+ #define VBE_DISPI_INDEX_XRES 0x1
+ #define VBE_DISPI_INDEX_YRES 0x2
+ #define VBE_DISPI_INDEX_BPP 0x3
+ #define VBE_DISPI_INDEX_ENABLE 0x4
+ #define VBE_DISPI_INDEX_BANK 0x5
+ #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+ #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+ #define VBE_DISPI_INDEX_X_OFFSET 0x8
+ #define VBE_DISPI_INDEX_Y_OFFSET 0x9
+
+ #define VBE_DISPI_ID0 0xB0C0
+ #define VBE_DISPI_ID1 0xB0C1
+ #define VBE_DISPI_ID2 0xB0C2
+
+ #define VBE_DISPI_DISABLED 0x00
+ #define VBE_DISPI_ENABLED 0x01
+ #define VBE_DISPI_VBE_ENABLED 0x40
+ #define VBE_DISPI_NOCLEARMEM 0x80
+
+ #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000
+
+API
+---
+ The display api works by using a index (VBE_DISPI_IOPORT_INDEX) and
+ data (VBE_DISPI_IOPORT_DATA) ioport. One writes the index of the parameter to the index port.
+ Next, the parameter value can be read or written.
+
+[0xb0c0]
+ * VBE_DISPI_INDEX_ID : WORD {R,W}
+ This parameter can be used to detect the current display API (both bochs & vbebios).
+ The bios writes VBE_DISPI_ID0 to the dataport and reads it back again.
+ This way, the display code knows the vbebios 'ID' and the vbebios can check if the correct
+ display code is present.
+ As a result, a PANIC can be generated if an incompatible vbebios/display code combination is detected.
+ This panic can be generated from the bochs display code (NOT the bios, see Notes).
+
+ Example values: VBE_DISPI_ID0
+
+ * VBE_DISPI_INDEX_XRES : WORD {R,W}
+ This parameter can be used to read/write the vbe display X resolution (in pixels).
+ It's illegal to set the XRES when the VBE is enabled (display code should generate PANIC).
+
+ If the value written exceeds VBE_DISPI_MAX_XRES, the display code needs to generate a PANIC.
+
+ Example values: 320,640,800,1024
+
+ * VBE_DISPI_INDEX_YRES : WORD {R,W}
+ This parameter can be used to read/write the vbe display Y resolution (in pixels).
+ It's illegal to set the YRES when the VBE is enabled (display code should generate PANIC).
+
+ If the value written exceeds VBE_DISPI_MAX_YRES, the display code needs to generate a PANIC.
+
+ Example values: 200,400,480,600,768
+
+ * VBE_DISPI_INDEX_BPP : WORD {R,W}
+ This parameter can be used to read/write the vbe display BPP.
+ It's illegal to set the BPP when the VBE is enabled (display code should generate PANIC).
+
+ If the value written is an incompatible BPP, the display code needs to generate a PANIC.
+
+ Example values: VBE_DISPI_BPP_8
+
+ * VBE_DISPI_INDEX_ENABLE : WORD {R,W}
+ This parameter can be used to read/write the vbe ENABLED state.
+ If the bios writes VBE_DISPI_ENABLED then the display code will setup a hostside display mode
+ with the current XRES, YRES and BPP settings.
+ If the bios write VBE_DISPI_DISABLED then the display code will switch back to normal vga mode behaviour.
+
+ Example values: VBE_DISPI_ENABLED, VBE_DISPI_DISABLED
+
+ * VBE_DISPI_INDEX_BANK : WORD {R,W}
+ This parameter can be used to read/write the current selected BANK (at 0xA0000).
+ This can be used for switching banks in banked mode.
+
+[0xb0c1]
+ * VBE_DISPI_INDEX_VIRT_WIDTH : WORD {R,W}
+ This parameter can be used to read/write the current virtual width.
+ Upon enabling a mode, this will be set to the current xres
+ Setting this field during enabled mode will result in the virtual width to be changed.
+ Value will be adjusted if current setting is not possible.
+
+ * VBE_DISPI_INDEX_VIRT_HEIGHT : WORD {R}
+ This parameter can be read in order to obtain the current virtual height.
+ This setting will be adjusted after setting a virtual width in order to stay within limit of video memory.
+
+ * VBE_DISPI_INDEX_X_OFFSET : WORD {R,W}
+ The current X offset (in pixels!) of the visible screen part.
+ Writing a new offset will also result in a complete screen refresh.
+
+ * VBE_DISPI_INDEX_Y_OFFSET : WORD {R,W}
+ The current Y offset (in pixels!) of the visible screen part.
+ Writing a new offset will also result in a complete screen refresh.
+
+
+[0xb0c2]
+ * VBE_DISPI_INDEX_BPP : WORD {R,W}
+ The value written is now the number of bits per pixel. A value of 0 is treated
+ the same as 8 for backward compatibilty. These values are supported: 8, 15,
+ 16, 24 and 32. The value of 4 is not yet handled in the VBE code.
+ * VBE_DISPI_INDEX_ENABLE : WORD {R,W}
+ The new flag VBE_DISPI_NOCLEARMEM allows to preserve the VBE video memory.
+ The new flag VBE_DISPI_LFB_ENABLED indicates the usage of the LFB.
+
+[0xb0c3]
+ * VBE_DISPI_INDEX_ENABLE : WORD {R,W}
+ If the new flag VBE_DISPI_GETCAPS is enabled, the xres, yres and bpp registers
+ return the gui capabilities.
+ The new flag VBE_DISPI_8BIT_DAC switches the DAC to 8 bit mode.
+
+Displaying GFX (banked mode)
+--------------
+ What happens is that the total screen is devided in banks of 'VBE_DISPI_BANK_SIZE_KB' KiloByte in size.
+ If you want to set a pixel you can calculate its bank by doing:
+
+ offset = pixel_x + pixel_y * resolution_x;
+ bank = offset / 64 Kb (rounded 1.9999 -> 1)
+
+ bank_pixel_pos = offset - bank * 64Kb
+
+ Now you can set the current bank and put the pixel at VBE_DISPI_BANK_ADDRESS + bank_pixel_pos
+
+Displaying GFX (linear frame buffer mode)
+--------------
+ NOT WRITTEN YET
+
+Notes
+-----
+ * Since the XRES/YRES/BPP may not be written when VBE is enabled, if you want to switch from one VBE mode
+ to another, you will need to disable VBE first.
+
+ * Note when the bios doesn't find a valid DISPI_ID, it can disable the VBE functions. This allows people to
+ use the same bios for both vbe enabled and disabled bochs executables.
diff --git a/tools/firmware/vgabios/vbetables.h b/tools/firmware/vgabios/vbetables.h
new file mode 100644
index 0000000000..a742ac74ba
--- /dev/null
+++ b/tools/firmware/vgabios/vbetables.h
@@ -0,0 +1,1282 @@
+#ifndef vbetables_h_included
+#define vbetables_h_included
+
+/* vbetables.h
+
+ This file contains a static mode information list containing all
+ bochs/plex86 "supported" VBE modi and their 'settings'.
+
+*/
+
+typedef struct ModeInfoListItem
+{
+ Bit16u mode;
+ ModeInfoBlockCompact info;
+} ModeInfoListItem;
+
+// FIXME: check all member variables to be correct for the different modi
+// FIXME: add more modi
+static ModeInfoListItem mode_info_list[]=
+{
+ {
+ VBE_VESA_MODE_640X400X8,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 400,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 8,
+ /*Bit8u NumberOfBanks*/ 4, // 640x400/64kb == 4
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PACKED_PIXEL,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 15,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_640X480X8,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 480,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 8,
+ /*Bit8u NumberOfBanks*/ 5, // 640x480/64kb == 5
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PACKED_PIXEL,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 11,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_800X600X4,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_TTY_BIOS_SUPPORT |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 100,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 4,
+ /*Bit8u BitsPerPixel*/ 4,
+ /*Bit8u NumberOfBanks*/ 16,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PLANAR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 15,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+ /*Bit32u PhysBasePtr*/ 0,
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 100,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_800X600X8,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 800,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 8,
+ /*Bit8u NumberOfBanks*/ 8, // 800x600/64kb == 8
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PACKED_PIXEL,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 7,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 800,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_1024X768X8,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 1024,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 1024,
+ /*Bit16u YResolution*/ 768,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 8,
+ /*Bit8u NumberOfBanks*/ 12, // 1024x768/64kb == 12
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PACKED_PIXEL,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 3,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 1024,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_640X480X1555,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 480,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 15,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 5,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 10,
+ /*Bit8u GreenMaskSize*/ 5,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 1,
+ /*Bit8u RsvdFieldPosition*/ 15,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 10,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 1,
+ /*Bit8u LinRsvdFieldPosition*/ 15,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_800X600X1555,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 800*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 15,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 3,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 10,
+ /*Bit8u GreenMaskSize*/ 5,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 1,
+ /*Bit8u RsvdFieldPosition*/ 15,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 800*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 10,
+ /*Bit8u LinGreenMaskSize*/ 5,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 1,
+ /*Bit8u LinRsvdFieldPosition*/ 15,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_1024X768X1555,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 1024*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 1024,
+ /*Bit16u YResolution*/ 768,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 15,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 10,
+ /*Bit8u GreenMaskSize*/ 5,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 1,
+ /*Bit8u RsvdFieldPosition*/ 15,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 1024*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 10,
+ /*Bit8u LinGreenMaskSize*/ 5,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 1,
+ /*Bit8u LinRsvdFieldPosition*/ 15,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_640X480X565,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 480,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 16,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 5,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 11,
+ /*Bit8u GreenMaskSize*/ 6,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 11,
+ /*Bit8u LinGreenMaskSize*/ 6,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_800X600X565,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 800*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 16,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 3,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 11,
+ /*Bit8u GreenMaskSize*/ 6,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 800*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 11,
+ /*Bit8u LinGreenMaskSize*/ 6,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_1024X768X565,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 1024*2,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 1024,
+ /*Bit16u YResolution*/ 768,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 16,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 5,
+ /*Bit8u RedFieldPosition*/ 11,
+ /*Bit8u GreenMaskSize*/ 6,
+ /*Bit8u GreenFieldPosition*/ 5,
+ /*Bit8u BlueMaskSize*/ 5,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 1024*2,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 5,
+ /*Bit8u LinRedFieldPosition*/ 11,
+ /*Bit8u LinGreenMaskSize*/ 6,
+ /*Bit8u LinGreenFieldPosition*/ 5,
+ /*Bit8u LinBlueMaskSize*/ 5,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_640X480X888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640*3,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 480,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 24,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 3,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640*3,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_800X600X888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 800*3,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 24,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 800*3,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_VESA_MODE_1024X768X888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 1024*3,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 1024,
+ /*Bit16u YResolution*/ 768,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 24,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 0,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 1024*3,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_OWN_MODE_640X480X8888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 640*4,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 640,
+ /*Bit16u YResolution*/ 480,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 32,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 8,
+ /*Bit8u RsvdFieldPosition*/ 24,
+ /*Bit8u DirectColorModeInfo*/ VBE_DIRECTCOLOR_RESERVED_BITS_AVAILABLE,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 640*4,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 8,
+ /*Bit8u LinRsvdFieldPosition*/ 24,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_OWN_MODE_800X600X8888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 800*4,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 800,
+ /*Bit16u YResolution*/ 600,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 32,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 8,
+ /*Bit8u RsvdFieldPosition*/ 24,
+ /*Bit8u DirectColorModeInfo*/ VBE_DIRECTCOLOR_RESERVED_BITS_AVAILABLE,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 800*4,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 8,
+ /*Bit8u LinRsvdFieldPosition*/ 24,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_OWN_MODE_1024X768X8888,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_RELOCATABLE |
+ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 1024*4,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 1024,
+ /*Bit16u YResolution*/ 768,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 32,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_DIRECT_COLOR,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 1,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 8,
+ /*Bit8u RedFieldPosition*/ 16,
+ /*Bit8u GreenMaskSize*/ 8,
+ /*Bit8u GreenFieldPosition*/ 8,
+ /*Bit8u BlueMaskSize*/ 8,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 8,
+ /*Bit8u RsvdFieldPosition*/ 24,
+ /*Bit8u DirectColorModeInfo*/ VBE_DIRECTCOLOR_RESERVED_BITS_AVAILABLE,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 1024*4,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 8,
+ /*Bit8u LinRedFieldPosition*/ 16,
+ /*Bit8u LinGreenMaskSize*/ 8,
+ /*Bit8u LinGreenFieldPosition*/ 8,
+ /*Bit8u LinBlueMaskSize*/ 8,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 8,
+ /*Bit8u LinRsvdFieldPosition*/ 24,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+ {
+ VBE_OWN_MODE_320X200X8,
+ {
+/*typedef struct ModeInfoBlock
+{*/
+// Mandatory information for all VBE revisions
+ /*Bit16u ModeAttributes*/ VBE_MODE_ATTRIBUTE_SUPPORTED |
+ VBE_MODE_ATTRIBUTE_EXTENDED_INFORMATION_AVAILABLE |
+ VBE_MODE_ATTRIBUTE_COLOR_MODE |
+#ifdef VBE_HAVE_LFB
+ VBE_MODE_ATTRIBUTE_LINEAR_FRAME_BUFFER_MODE |
+#endif
+ VBE_MODE_ATTRIBUTE_GRAPHICS_MODE,
+ /*Bit8u WinAAttributes*/ VBE_WINDOW_ATTRIBUTE_READABLE |
+ VBE_WINDOW_ATTRIBUTE_WRITEABLE,
+ /*Bit8u WinBAttributes*/ 0,
+ /*Bit16u WinGranularity*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinSize*/ VBE_DISPI_BANK_SIZE_KB,
+ /*Bit16u WinASegment*/ VGAMEM_GRAPH,
+ /*Bit16u WinBSegment*/ 0,
+ /*Bit32u WinFuncPtr*/ 0,
+ /*Bit16u BytesPerScanLine*/ 320,
+// Mandatory information for VBE 1.2 and above
+ /*Bit16u XResolution*/ 320,
+ /*Bit16u YResolution*/ 200,
+ /*Bit8u XCharSize*/ 8,
+ /*Bit8u YCharSize*/ 16,
+ /*Bit8u NumberOfPlanes*/ 1,
+ /*Bit8u BitsPerPixel*/ 8,
+ /*Bit8u NumberOfBanks*/ 1,
+ /*Bit8u MemoryModel*/ VBE_MEMORYMODEL_PACKED_PIXEL,
+ /*Bit8u BankSize*/ 0,
+ /*Bit8u NumberOfImagePages*/ 3,
+ /*Bit8u Reserved_page*/ 0,
+// Direct Color fields (required for direct/6 and YUV/7 memory models)
+ /*Bit8u RedMaskSize*/ 0,
+ /*Bit8u RedFieldPosition*/ 0,
+ /*Bit8u GreenMaskSize*/ 0,
+ /*Bit8u GreenFieldPosition*/ 0,
+ /*Bit8u BlueMaskSize*/ 0,
+ /*Bit8u BlueFieldPosition*/ 0,
+ /*Bit8u RsvdMaskSize*/ 0,
+ /*Bit8u RsvdFieldPosition*/ 0,
+ /*Bit8u DirectColorModeInfo*/ 0,
+// Mandatory information for VBE 2.0 and above
+#ifdef VBE_HAVE_LFB
+ /*Bit32u PhysBasePtr*/ VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+#else
+ /*Bit32u PhysBasePtr*/ 0,
+#endif
+ /*Bit32u OffScreenMemOffset*/ 0,
+ /*Bit16u OffScreenMemSize*/ 0,
+// Mandatory information for VBE 3.0 and above
+ /*Bit16u LinBytesPerScanLine*/ 320,
+ /*Bit8u BnkNumberOfPages*/ 0,
+ /*Bit8u LinNumberOfPages*/ 0,
+ /*Bit8u LinRedMaskSize*/ 0,
+ /*Bit8u LinRedFieldPosition*/ 0,
+ /*Bit8u LinGreenMaskSize*/ 0,
+ /*Bit8u LinGreenFieldPosition*/ 0,
+ /*Bit8u LinBlueMaskSize*/ 0,
+ /*Bit8u LinBlueFieldPosition*/ 0,
+ /*Bit8u LinRsvdMaskSize*/ 0,
+ /*Bit8u LinRsvdFieldPosition*/ 0,
+ /*Bit32u MaxPixelClock*/ 0,
+/*} ModeInfoBlock;*/
+ }
+ },
+
+/** END OF THE LIST **/
+ {
+ VBE_VESA_MODE_END_OF_LIST,
+ {
+ 0,
+ }
+ }
+};
+
+#endif
diff --git a/tools/firmware/vgabios/vgabios.c b/tools/firmware/vgabios/vgabios.c
new file mode 100644
index 0000000000..1bca91962d
--- /dev/null
+++ b/tools/firmware/vgabios/vgabios.c
@@ -0,0 +1,3608 @@
+// ============================================================================================
+/*
+ * vgabios.c
+ */
+// ============================================================================================
+//
+// Copyright (C) 2001,2002 the LGPL VGABios developers Team
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// ============================================================================================
+//
+// This VGA Bios is specific to the plex86/bochs Emulated VGA card.
+// You can NOT drive any physical vga card with it.
+//
+// ============================================================================================
+//
+// This file contains code ripped from :
+// - rombios.c of plex86
+//
+// This VGA Bios contains fonts from :
+// - fntcol16.zip (c) by Joseph Gil avalable at :
+// ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
+// These fonts are public domain
+//
+// This VGA Bios is based on information taken from :
+// - Kevin Lawton's vga card emulation for bochs/plex86
+// - Ralf Brown's interrupts list available at http://www.cs.cmu.edu/afs/cs/user/ralf/pub/WWW/files.html
+// - Finn Thogersons' VGADOC4b available at http://home.worldonline.dk/~finth/
+// - Michael Abrash's Graphics Programming Black Book
+// - Francois Gervais' book "programmation des cartes graphiques cga-ega-vga" edited by sybex
+// - DOSEMU 1.0.1 source code for several tables values and formulas
+//
+// Thanks for patches, comments and ideas to :
+// - techt@pikeonline.net
+//
+// ============================================================================================
+
+#include "vgabios.h"
+
+#ifdef VBE
+#include "vbe.h"
+#endif
+
+#undef DEBUG
+#define USE_BX_INFO
+
+/* Declares */
+static Bit8u read_byte();
+static Bit16u read_word();
+static void write_byte();
+static void write_word();
+static Bit8u inb();
+static Bit16u inw();
+static void outb();
+static void outw();
+
+static Bit16u get_SS();
+
+// Output
+static void printf();
+static void unimplemented();
+static void unknown();
+
+static Bit8u find_vga_entry();
+
+static void memsetb();
+static void memsetw();
+static void memcpyb();
+static void memcpyw();
+
+static void biosfn_set_video_mode();
+static void biosfn_set_cursor_shape();
+static void biosfn_set_cursor_pos();
+static void biosfn_get_cursor_pos();
+static void biosfn_set_active_page();
+static void biosfn_scroll();
+static void biosfn_read_char_attr();
+static void biosfn_write_char_attr();
+static void biosfn_write_char_only();
+static void biosfn_write_pixel();
+static void biosfn_read_pixel();
+static void biosfn_write_teletype();
+static void biosfn_perform_gray_scale_summing();
+static void biosfn_load_text_user_pat();
+static void biosfn_load_text_8_14_pat();
+static void biosfn_load_text_8_8_pat();
+static void biosfn_load_text_8_16_pat();
+static void biosfn_load_gfx_8_8_chars();
+static void biosfn_load_gfx_user_chars();
+static void biosfn_load_gfx_8_14_chars();
+static void biosfn_load_gfx_8_8_dd_chars();
+static void biosfn_load_gfx_8_16_chars();
+static void biosfn_get_font_info();
+static void biosfn_alternate_prtsc();
+static void biosfn_switch_video_interface();
+static void biosfn_enable_video_refresh_control();
+static void biosfn_write_string();
+static void biosfn_read_state_info();
+static void biosfn_read_video_state_size();
+static void biosfn_save_video_state();
+static void biosfn_restore_video_state();
+
+// This is for compiling with gcc2 and gcc3
+#define ASM_START #asm
+#define ASM_END #endasm
+
+ASM_START
+
+MACRO SET_INT_VECTOR
+ push ds
+ xor ax, ax
+ mov ds, ax
+ mov ax, ?3
+ mov ?1*4, ax
+ mov ax, ?2
+ mov ?1*4+2, ax
+ pop ds
+MEND
+
+ASM_END
+
+ASM_START
+.text
+.rom
+.org 0
+
+use16 386
+
+vgabios_start:
+.byte 0x55, 0xaa /* BIOS signature, required for BIOS extensions */
+
+.byte 0x40 /* BIOS extension length in units of 512 bytes */
+
+
+vgabios_entry_point:
+
+ jmp vgabios_init_func
+
+vgabios_name:
+.ascii "Plex86/Bochs VGABios"
+.ascii " "
+.byte 0x00
+
+// Info from Bart Oldeman
+.org 0x1e
+.ascii "IBM"
+.byte 0x00
+
+vgabios_version:
+#ifndef VGABIOS_VERS
+.ascii "current-cvs"
+#else
+.ascii VGABIOS_VERS
+#endif
+.ascii " "
+
+vgabios_date:
+.ascii VGABIOS_DATE
+.byte 0x0a,0x0d
+.byte 0x00
+
+vgabios_copyright:
+.ascii "(C) 2003 the LGPL VGABios developers Team"
+.byte 0x0a,0x0d
+.byte 0x00
+
+vgabios_license:
+.ascii "This VGA/VBE Bios is released under the GNU LGPL"
+.byte 0x0a,0x0d
+.byte 0x0a,0x0d
+.byte 0x00
+
+vgabios_website:
+.ascii "Please visit :"
+.byte 0x0a,0x0d
+;;.ascii " . http://www.plex86.org"
+;;.byte 0x0a,0x0d
+.ascii " . http://bochs.sourceforge.net"
+.byte 0x0a,0x0d
+.ascii " . http://www.nongnu.org/vgabios"
+.byte 0x0a,0x0d
+.byte 0x0a,0x0d
+.byte 0x00
+
+
+;; ============================================================================================
+;;
+;; Init Entry point
+;;
+;; ============================================================================================
+vgabios_init_func:
+
+;; init vga card
+ call init_vga_card
+
+;; init basic bios vars
+ call init_bios_area
+
+#ifdef VBE
+;; init vbe functions
+ call vbe_init
+#endif
+
+;; set int10 vect
+ SET_INT_VECTOR(0x10, #0xC000, #vgabios_int10_handler)
+
+#ifdef CIRRUS
+ call cirrus_init
+#endif
+
+;; display splash screen
+ call _display_splash_screen
+
+;; init video mode and clear the screen
+ mov ax,#0x0003
+ int #0x10
+
+;; show info
+ call _display_info
+
+#ifdef VBE
+;; show vbe info
+ call vbe_display_info
+#endif
+
+#ifdef CIRRUS
+;; show cirrus info
+ call cirrus_display_info
+#endif
+
+ retf
+ASM_END
+
+/*
+ * int10 handled here
+ */
+ASM_START
+vgabios_int10_handler:
+ pushf
+#ifdef DEBUG
+ push es
+ push ds
+ pusha
+ mov bx, #0xc000
+ mov ds, bx
+ call _int10_debugmsg
+ popa
+ pop ds
+ pop es
+#endif
+ cmp ah, #0x0f
+ jne int10_test_1A
+ call biosfn_get_video_mode
+ jmp int10_end
+int10_test_1A:
+ cmp ah, #0x1a
+ jne int10_test_0B
+ call biosfn_group_1A
+ jmp int10_end
+int10_test_0B:
+ cmp ah, #0x0b
+ jne int10_test_1103
+ call biosfn_group_0B
+ jmp int10_end
+int10_test_1103:
+ cmp ax, #0x1103
+ jne int10_test_12
+ call biosfn_set_text_block_specifier
+ jmp int10_end
+int10_test_12:
+ cmp ah, #0x12
+ jne int10_test_101B
+ cmp bl, #0x10
+ jne int10_test_BL30
+ call biosfn_get_ega_info
+ jmp int10_end
+int10_test_BL30:
+ cmp bl, #0x30
+ jne int10_test_BL31
+ call biosfn_select_vert_res
+ jmp int10_end
+int10_test_BL31:
+ cmp bl, #0x31
+ jne int10_test_BL32
+ call biosfn_enable_default_palette_loading
+ jmp int10_end
+int10_test_BL32:
+ cmp bl, #0x32
+ jne int10_test_BL33
+ call biosfn_enable_video_addressing
+ jmp int10_end
+int10_test_BL33:
+ cmp bl, #0x33
+ jne int10_test_BL34
+ call biosfn_enable_grayscale_summing
+ jmp int10_end
+int10_test_BL34:
+ cmp bl, #0x34
+ jne int10_normal
+ call biosfn_enable_cursor_emulation
+ jmp int10_end
+int10_test_101B:
+ cmp ax, #0x101b
+ je int10_normal
+ cmp ah, #0x10
+#ifndef VBE
+ jne int10_normal
+#else
+ jne int10_test_4F
+#endif
+ call biosfn_group_10
+ jmp int10_end
+#ifdef VBE
+int10_test_4F:
+ cmp ah, #0x4f
+ jne int10_normal
+ cmp al, #0x03
+ jne int10_test_vbe_05
+ call vbe_biosfn_return_current_mode
+ jmp int10_end
+int10_test_vbe_05:
+ cmp al, #0x05
+ jne int10_test_vbe_06
+ call vbe_biosfn_display_window_control
+ jmp int10_end
+int10_test_vbe_06:
+ cmp al, #0x06
+ jne int10_test_vbe_07
+ call vbe_biosfn_set_get_logical_scan_line_length
+ jmp int10_end
+int10_test_vbe_07:
+ cmp al, #0x07
+ jne int10_test_vbe_08
+ call vbe_biosfn_set_get_display_start
+ jmp int10_end
+int10_test_vbe_08:
+ cmp al, #0x08
+ jne int10_normal
+ call vbe_biosfn_set_get_dac_palette_format
+ jmp int10_end
+#endif
+
+int10_normal:
+ push es
+ push ds
+ pusha
+
+;; We have to set ds to access the right data segment
+ mov bx, #0xc000
+ mov ds, bx
+ call _int10_func
+
+ popa
+ pop ds
+ pop es
+int10_end:
+ popf
+ iret
+ASM_END
+
+#include "vgatables.h"
+#include "vgafonts.h"
+
+/*
+ * Boot time harware inits
+ */
+ASM_START
+init_vga_card:
+;; switch to color mode and enable CPU access 480 lines
+ mov dx, #0x3C2
+ mov al, #0xC3
+ outb dx,al
+
+;; more than 64k 3C4/04
+ mov dx, #0x3C4
+ mov al, #0x04
+ outb dx,al
+ mov dx, #0x3C5
+ mov al, #0x02
+ outb dx,al
+
+#if defined(USE_BX_INFO) || defined(DEBUG)
+ mov bx, #msg_vga_init
+ push bx
+ call _printf
+#endif
+ inc sp
+ inc sp
+ ret
+
+#if defined(USE_BX_INFO) || defined(DEBUG)
+msg_vga_init:
+.ascii "VGABios $Id: vgabios.c,v 1.61 2005/05/24 16:50:50 vruppert Exp $"
+.byte 0x0d,0x0a,0x00
+#endif
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+/*
+ * Boot time bios area inits
+ */
+ASM_START
+init_bios_area:
+ push ds
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+
+;; init detected hardware BIOS Area
+ mov bx, # BIOSMEM_INITIAL_MODE
+ mov ax, [bx]
+ and ax, #0xffcf
+ mov [bx], ax
+
+;; Just for the first int10 find its children
+
+;; the default char height
+ mov bx, # BIOSMEM_CHAR_HEIGHT
+ mov al, #0x10
+ mov [bx], al
+
+;; Clear the screen
+ mov bx, # BIOSMEM_VIDEO_CTL
+ mov al, #0x60
+ mov [bx], al
+
+;; Set the basic screen we have
+ mov bx, # BIOSMEM_SWITCHES
+ mov al, #0xf9
+ mov [bx], al
+
+;; Set the basic modeset options
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov al, #0x51
+ mov [bx], al
+
+;; Set the default MSR
+ mov bx, # BIOSMEM_CURRENT_MSR
+ mov al, #0x09
+ mov [bx], al
+
+ pop ds
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+/*
+ * Boot time Splash screen
+ */
+static void display_splash_screen()
+{
+}
+
+// --------------------------------------------------------------------------------------------
+/*
+ * Tell who we are
+ */
+
+static void display_info()
+{
+ASM_START
+ mov ax,#0xc000
+ mov ds,ax
+ mov si,#vgabios_name
+ call _display_string
+ mov si,#vgabios_version
+ call _display_string
+
+ ;;mov si,#vgabios_copyright
+ ;;call _display_string
+ ;;mov si,#crlf
+ ;;call _display_string
+
+ mov si,#vgabios_license
+ call _display_string
+ mov si,#vgabios_website
+ call _display_string
+ASM_END
+}
+
+static void display_string()
+{
+ // Get length of string
+ASM_START
+ mov ax,ds
+ mov es,ax
+ mov di,si
+ xor cx,cx
+ not cx
+ xor al,al
+ cld
+ repne
+ scasb
+ not cx
+ dec cx
+ push cx
+
+ mov ax,#0x0300
+ mov bx,#0x0000
+ int #0x10
+
+ pop cx
+ mov ax,#0x1301
+ mov bx,#0x000b
+ mov bp,si
+ int #0x10
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+#ifdef DEBUG
+static void int10_debugmsg(DI, SI, BP, SP, BX, DX, CX, AX, DS, ES, FLAGS)
+ Bit16u DI, SI, BP, SP, BX, DX, CX, AX, ES, DS, FLAGS;
+{
+ // 0E is write char...
+ if(GET_AH()!=0x0E)
+ printf("vgabios call ah%02x al%02x bx%04x cx%04x dx%04x\n",GET_AH(),GET_AL(),BX,CX,DX);
+}
+#endif
+
+// --------------------------------------------------------------------------------------------
+/*
+ * int10 main dispatcher
+ */
+static void int10_func(DI, SI, BP, SP, BX, DX, CX, AX, DS, ES, FLAGS)
+ Bit16u DI, SI, BP, SP, BX, DX, CX, AX, ES, DS, FLAGS;
+{
+
+ // BIOS functions
+ switch(GET_AH())
+ {
+ case 0x00:
+ biosfn_set_video_mode(GET_AL());
+ switch(GET_AL()&0x7F)
+ {case 6:
+ SET_AL(0x3F);
+ break;
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 7:
+ SET_AL(0x30);
+ break;
+ default:
+ SET_AL(0x20);
+ }
+ break;
+ case 0x01:
+ biosfn_set_cursor_shape(GET_CH(),GET_CL());
+ break;
+ case 0x02:
+ biosfn_set_cursor_pos(GET_BH(),DX);
+ break;
+ case 0x03:
+ biosfn_get_cursor_pos(GET_BH(),&CX,&DX);
+ break;
+ case 0x04:
+ // Read light pen pos (unimplemented)
+#ifdef DEBUG
+ unimplemented();
+#endif
+ AX=0x00;
+ BX=0x00;
+ CX=0x00;
+ DX=0x00;
+ break;
+ case 0x05:
+ biosfn_set_active_page(GET_AL());
+ break;
+ case 0x06:
+ biosfn_scroll(GET_AL(),GET_BH(),GET_CH(),GET_CL(),GET_DH(),GET_DL(),0xFF,SCROLL_UP);
+ break;
+ case 0x07:
+ biosfn_scroll(GET_AL(),GET_BH(),GET_CH(),GET_CL(),GET_DH(),GET_DL(),0xFF,SCROLL_DOWN);
+ break;
+ case 0x08:
+ biosfn_read_char_attr(GET_BH(),&AX);
+ break;
+ case 0x09:
+ biosfn_write_char_attr(GET_AL(),GET_BH(),GET_BL(),CX);
+ break;
+ case 0x0A:
+ biosfn_write_char_only(GET_AL(),GET_BH(),GET_BL(),CX);
+ break;
+ case 0x0C:
+ biosfn_write_pixel(GET_BH(),GET_AL(),CX,DX);
+ break;
+ case 0x0D:
+ biosfn_read_pixel(GET_BH(),CX,DX,&AX);
+ break;
+ case 0x0E:
+ // Ralf Brown Interrupt list is WRONG on bh(page)
+ // We do output only on the current page !
+ biosfn_write_teletype(GET_AL(),0xff,GET_BL(),NO_ATTR);
+ break;
+ case 0x10:
+ // All other functions of group AH=0x10 rewritten in assembler
+ biosfn_perform_gray_scale_summing(BX,CX);
+ break;
+ case 0x11:
+ switch(GET_AL())
+ {
+ case 0x00:
+ case 0x10:
+ biosfn_load_text_user_pat(GET_AL(),ES,BP,CX,DX,GET_BL(),GET_BH());
+ break;
+ case 0x01:
+ case 0x11:
+ biosfn_load_text_8_14_pat(GET_AL(),GET_BL());
+ break;
+ case 0x02:
+ case 0x12:
+ biosfn_load_text_8_8_pat(GET_AL(),GET_BL());
+ break;
+ case 0x04:
+ case 0x14:
+ biosfn_load_text_8_16_pat(GET_AL(),GET_BL());
+ break;
+ case 0x20:
+ biosfn_load_gfx_8_8_chars(ES,BP);
+ break;
+ case 0x21:
+ biosfn_load_gfx_user_chars(ES,BP,CX,GET_BL(),GET_DL());
+ break;
+ case 0x22:
+ biosfn_load_gfx_8_14_chars(GET_BL());
+ break;
+ case 0x23:
+ biosfn_load_gfx_8_8_dd_chars(GET_BL());
+ break;
+ case 0x24:
+ biosfn_load_gfx_8_16_chars(GET_BL());
+ break;
+ case 0x30:
+ biosfn_get_font_info(GET_BH(),&ES,&BP,&CX,&DX);
+ break;
+#ifdef DEBUG
+ default:
+ unknown();
+#endif
+ }
+
+ break;
+ case 0x12:
+ switch(GET_BL())
+ {
+ case 0x20:
+ biosfn_alternate_prtsc();
+ break;
+ case 0x35:
+ biosfn_switch_video_interface(GET_AL(),ES,DX);
+ SET_AL(0x12);
+ break;
+ case 0x36:
+ biosfn_enable_video_refresh_control(GET_AL());
+ SET_AL(0x12);
+ break;
+#ifdef DEBUG
+ default:
+ unknown();
+#endif
+ }
+ break;
+ case 0x13:
+ biosfn_write_string(GET_AL(),GET_BH(),GET_BL(),CX,GET_DH(),GET_DL(),ES,BP);
+ break;
+ case 0x1B:
+ biosfn_read_state_info(BX,ES,DI);
+ SET_AL(0x1B);
+ break;
+ case 0x1C:
+ switch(GET_AL())
+ {
+ case 0x00:
+ biosfn_read_video_state_size(CX,&BX);
+ break;
+ case 0x01:
+ biosfn_save_video_state(CX,ES,BX);
+ break;
+ case 0x02:
+ biosfn_restore_video_state(CX,ES,BX);
+ break;
+#ifdef DEBUG
+ default:
+ unknown();
+#endif
+ }
+ SET_AL(0x1C);
+ break;
+
+#ifdef VBE
+ case 0x4f:
+ if (vbe_has_vbe_display()) {
+ switch(GET_AL())
+ {
+ case 0x00:
+ vbe_biosfn_return_controller_information(&AX,ES,DI);
+ break;
+ case 0x01:
+ vbe_biosfn_return_mode_information(&AX,CX,ES,DI);
+ break;
+ case 0x02:
+ vbe_biosfn_set_mode(&AX,BX,ES,DI);
+ break;
+ case 0x04:
+ //FIXME
+#ifdef DEBUG
+ unimplemented();
+#endif
+ // function failed
+ AX=0x100;
+ break;
+ case 0x09:
+ //FIXME
+#ifdef DEBUG
+ unimplemented();
+#endif
+ // function failed
+ AX=0x100;
+ break;
+ case 0x0A:
+ //FIXME
+#ifdef DEBUG
+ unimplemented();
+#endif
+ // function failed
+ AX=0x100;
+ break;
+ default:
+#ifdef DEBUG
+ unknown();
+#endif
+ // function failed
+ AX=0x100;
+ }
+ }
+ else {
+ // No VBE display
+ AX=0x0100;
+ }
+ break;
+#endif
+
+#ifdef DEBUG
+ default:
+ unknown();
+#endif
+ }
+}
+
+// ============================================================================================
+//
+// BIOS functions
+//
+// ============================================================================================
+
+static void biosfn_set_video_mode(mode) Bit8u mode;
+{// mode: Bit 7 is 1 if no clear screen
+
+ // Should we clear the screen ?
+ Bit8u noclearmem=mode&0x80;
+ Bit8u line,mmask,*palette;
+ Bit16u i,twidth,theight,cheight;
+ Bit8u modeset_ctl,video_ctl,vga_switches;
+ Bit16u crtc_addr;
+
+#ifdef VBE
+ if (vbe_has_vbe_display()) {
+ dispi_set_enable(VBE_DISPI_DISABLED);
+ }
+#endif // def VBE
+
+ // The real mode
+ mode=mode&0x7f;
+
+ // find the entry in the video modes
+ line=find_vga_entry(mode);
+
+#ifdef DEBUG
+ printf("mode search %02x found line %02x\n",mode,line);
+#endif
+
+ if(line==0xFF)
+ return;
+
+ twidth=vga_modes[line].twidth;
+ theight=vga_modes[line].theight;
+ cheight=vga_modes[line].cheight;
+
+ // Read the bios vga control
+ video_ctl=read_byte(BIOSMEM_SEG,BIOSMEM_VIDEO_CTL);
+
+ // Read the bios vga switches
+ vga_switches=read_byte(BIOSMEM_SEG,BIOSMEM_SWITCHES);
+
+ // Read the bios mode set control
+ modeset_ctl=read_byte(BIOSMEM_SEG,BIOSMEM_MODESET_CTL);
+
+ // Then we know the number of lines
+// FIXME
+
+ // if palette loading (bit 3 of modeset ctl = 0)
+ if((modeset_ctl&0x08)==0)
+ {// Set the PEL mask
+ outb(VGAREG_PEL_MASK,vga_modes[line].pelmask);
+
+ // Set the whole dac always, from 0
+ outb(VGAREG_DAC_WRITE_ADDRESS,0x00);
+
+ // From which palette
+ switch(vga_modes[line].dacmodel)
+ {case 0:
+ palette=&palette0;
+ break;
+ case 1:
+ palette=&palette1;
+ break;
+ case 2:
+ palette=&palette2;
+ break;
+ case 3:
+ palette=&palette3;
+ break;
+ }
+
+ // Always 256*3 values
+ for(i=0;i<0x0100;i++)
+ {
+ if(i<=dac_regs[vga_modes[line].dacmodel])
+ {outb(VGAREG_DAC_DATA,palette[(i*3)+0]);
+ outb(VGAREG_DAC_DATA,palette[(i*3)+1]);
+ outb(VGAREG_DAC_DATA,palette[(i*3)+2]);
+ }
+ else
+ {outb(VGAREG_DAC_DATA,0);
+ outb(VGAREG_DAC_DATA,0);
+ outb(VGAREG_DAC_DATA,0);
+ }
+ }
+ if((modeset_ctl&0x02)==0x02)
+ {
+ biosfn_perform_gray_scale_summing(0x00, 0x100);
+ }
+ }
+
+ // Reset Attribute Ctl flip-flop
+ inb(VGAREG_ACTL_RESET);
+
+ // Set Attribute Ctl
+ for(i=0;i<=ACTL_MAX_REG;i++)
+ {outb(VGAREG_ACTL_ADDRESS,i);
+ outb(VGAREG_ACTL_WRITE_DATA,actl_regs[vga_modes[line].actlmodel][i]);
+ }
+
+ // Set Sequencer Ctl
+ for(i=0;i<=SEQU_MAX_REG;i++)
+ {outb(VGAREG_SEQU_ADDRESS,i);
+ outb(VGAREG_SEQU_DATA,sequ_regs[vga_modes[line].sequmodel][i]);
+ }
+
+ // Set Grafx Ctl
+ for(i=0;i<=GRDC_MAX_REG;i++)
+ {outb(VGAREG_GRDC_ADDRESS,i);
+ outb(VGAREG_GRDC_DATA,grdc_regs[vga_modes[line].grdcmodel][i]);
+ }
+
+ // Set CRTC address VGA or MDA
+ crtc_addr=vga_modes[line].memmodel==MTEXT?VGAREG_MDA_CRTC_ADDRESS:VGAREG_VGA_CRTC_ADDRESS;
+
+ // Disable CRTC write protection
+ outw(crtc_addr,0x0011);
+ // Set CRTC regs
+ for(i=0;i<=CRTC_MAX_REG;i++)
+ {outb(crtc_addr,i);
+ outb(crtc_addr+1,crtc_regs[vga_modes[line].crtcmodel][i]);
+ }
+
+ // Set the misc register
+ outb(VGAREG_WRITE_MISC_OUTPUT,vga_modes[line].miscreg);
+
+ // Enable video
+ outb(VGAREG_ACTL_ADDRESS,0x20);
+ inb(VGAREG_ACTL_RESET);
+
+ if(noclearmem==0x00)
+ {
+ if(vga_modes[line].class==TEXT)
+ {
+ memsetw(vga_modes[line].sstart,0,0x0720,0x4000); // 32k
+ }
+ else
+ {
+ if(mode<0x0d)
+ {
+ memsetw(vga_modes[line].sstart,0,0x0000,0x4000); // 32k
+ }
+ else
+ {
+ outb( VGAREG_SEQU_ADDRESS, 0x02 );
+ mmask = inb( VGAREG_SEQU_DATA );
+ outb( VGAREG_SEQU_DATA, 0x0f ); // all planes
+ memsetw(vga_modes[line].sstart,0,0x0000,0x8000); // 64k
+ outb( VGAREG_SEQU_DATA, mmask );
+ }
+ }
+ }
+
+ // Set the BIOS mem
+ write_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE,mode);
+ write_word(BIOSMEM_SEG,BIOSMEM_NB_COLS,twidth);
+ write_word(BIOSMEM_SEG,BIOSMEM_PAGE_SIZE,vga_modes[line].slength);
+ write_word(BIOSMEM_SEG,BIOSMEM_CRTC_ADDRESS,crtc_addr);
+ write_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS,theight-1);
+ write_word(BIOSMEM_SEG,BIOSMEM_CHAR_HEIGHT,cheight);
+ write_byte(BIOSMEM_SEG,BIOSMEM_VIDEO_CTL,(0x60|noclearmem));
+ write_byte(BIOSMEM_SEG,BIOSMEM_SWITCHES,0xF9);
+ write_byte(BIOSMEM_SEG,BIOSMEM_MODESET_CTL,read_byte(BIOSMEM_SEG,BIOSMEM_MODESET_CTL)&0x7f);
+
+ // FIXME We nearly have the good tables. to be reworked
+ write_byte(BIOSMEM_SEG,BIOSMEM_DCC_INDEX,0x08); // 8 is VGA should be ok for now
+ write_word(BIOSMEM_SEG,BIOSMEM_VS_POINTER,0x00);
+ write_word(BIOSMEM_SEG,BIOSMEM_VS_POINTER+2,0x00);
+
+ // FIXME
+ write_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MSR,0x00); // Unavailable on vanilla vga, but...
+ write_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_PAL,0x00); // Unavailable on vanilla vga, but...
+
+ // Set cursor shape
+ if(vga_modes[line].class==TEXT)
+ {
+ biosfn_set_cursor_shape(0x06,0x07);
+ }
+
+ // Set cursor pos for page 0..7
+ for(i=0;i<8;i++)
+ biosfn_set_cursor_pos(i,0x0000);
+
+ // Set active page 0
+ biosfn_set_active_page(0x00);
+
+ // Write the fonts in memory
+ if(vga_modes[line].class==TEXT)
+ {
+ASM_START
+ ;; copy and activate 8x16 font
+ mov ax, #0x1104
+ mov bl, #0x00
+ int #0x10
+ mov ax, #0x1103
+ mov bl, #0x00
+ int #0x10
+ASM_END
+ }
+
+ // Set the ints 0x1F and 0x43
+ASM_START
+ SET_INT_VECTOR(0x1f, #0xC000, #_vgafont8+128*8)
+ASM_END
+
+ switch(cheight)
+ {case 8:
+ASM_START
+ SET_INT_VECTOR(0x43, #0xC000, #_vgafont8)
+ASM_END
+ break;
+ case 14:
+ASM_START
+ SET_INT_VECTOR(0x43, #0xC000, #_vgafont14)
+ASM_END
+ break;
+ case 16:
+ASM_START
+ SET_INT_VECTOR(0x43, #0xC000, #_vgafont16)
+ASM_END
+ break;
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_set_cursor_shape (CH,CL)
+Bit8u CH;Bit8u CL;
+{Bit16u cheight,curs,crtc_addr;
+ Bit8u modeset_ctl;
+
+ CH&=0x3f;
+ CL&=0x1f;
+
+ curs=(CH<<8)+CL;
+ write_word(BIOSMEM_SEG,BIOSMEM_CURSOR_TYPE,curs);
+
+ modeset_ctl=read_byte(BIOSMEM_SEG,BIOSMEM_MODESET_CTL);
+ cheight = read_word(BIOSMEM_SEG,BIOSMEM_CHAR_HEIGHT);
+ if((modeset_ctl&0x01) && (cheight>8) && (CL<8) && (CH<0x20))
+ {
+ if(CL!=(CH+1))
+ {
+ CH = ((CH+1) * cheight / 8) -1;
+ }
+ else
+ {
+ CH = ((CL+1) * cheight / 8) - 2;
+ }
+ CL = ((CL+1) * cheight / 8) - 1;
+ }
+
+ // CTRC regs 0x0a and 0x0b
+ crtc_addr=read_word(BIOSMEM_SEG,BIOSMEM_CRTC_ADDRESS);
+ outb(crtc_addr,0x0a);
+ outb(crtc_addr+1,CH);
+ outb(crtc_addr,0x0b);
+ outb(crtc_addr+1,CL);
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_set_cursor_pos (page, cursor)
+Bit8u page;Bit16u cursor;
+{
+ Bit8u xcurs,ycurs,current;
+ Bit16u nbcols,nbrows,address,crtc_addr;
+
+ // Should not happen...
+ if(page>7)return;
+
+ // Bios cursor pos
+ write_word(BIOSMEM_SEG, BIOSMEM_CURSOR_POS+2*page, cursor);
+
+ // Set the hardware cursor
+ current=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_PAGE);
+ if(page==current)
+ {
+ // Get the dimensions
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+
+ // Calculate the address knowing nbcols nbrows and page num
+ address=SCREEN_IO_START(nbcols,nbrows,page)+xcurs+ycurs*nbcols;
+
+ // CRTC regs 0x0e and 0x0f
+ crtc_addr=read_word(BIOSMEM_SEG,BIOSMEM_CRTC_ADDRESS);
+ outb(crtc_addr,0x0e);
+ outb(crtc_addr+1,(address&0xff00)>>8);
+ outb(crtc_addr,0x0f);
+ outb(crtc_addr+1,address&0x00ff);
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_get_cursor_pos (page,shape, pos)
+Bit8u page;Bit16u *shape;Bit16u *pos;
+{
+ Bit16u ss=get_SS();
+
+ // Default
+ write_word(ss, shape, 0);
+ write_word(ss, pos, 0);
+
+ if(page>7)return;
+ // FIXME should handle VGA 14/16 lines
+ write_word(ss,shape,read_word(BIOSMEM_SEG,BIOSMEM_CURSOR_TYPE));
+ write_word(ss,pos,read_word(BIOSMEM_SEG,BIOSMEM_CURSOR_POS+page*2));
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_set_active_page (page)
+Bit8u page;
+{
+ Bit16u cursor,dummy,crtc_addr;
+ Bit16u nbcols,nbrows,address;
+ Bit8u mode,line;
+
+ if(page>7)return;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get pos curs pos for the right page
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Get the dimensions
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+
+ // Calculate the address knowing nbcols nbrows and page num
+ address=SCREEN_MEM_START(nbcols,nbrows,page);
+ write_word(BIOSMEM_SEG,BIOSMEM_CURRENT_START,address);
+
+ // Start address
+ address=SCREEN_IO_START(nbcols,nbrows,page);
+ }
+ else
+ {
+ address = page*vga_modes[line].slength;
+ }
+
+ // CRTC regs 0x0c and 0x0d
+ crtc_addr=read_word(BIOSMEM_SEG,BIOSMEM_CRTC_ADDRESS);
+ outb(crtc_addr,0x0c);
+ outb(crtc_addr+1,(address&0xff00)>>8);
+ outb(crtc_addr,0x0d);
+ outb(crtc_addr+1,address&0x00ff);
+
+ // And change the BIOS page
+ write_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_PAGE,page);
+
+#ifdef DEBUG
+ printf("Set active page %02x address %04x\n",page,address);
+#endif
+
+ // Display the cursor, now the page is active
+ biosfn_set_cursor_pos(page,cursor);
+}
+
+// --------------------------------------------------------------------------------------------
+static void vgamem_copy_pl4(xstart,ysrc,ydest,cols,nbcols,cheight)
+Bit8u xstart;Bit8u ysrc;Bit8u ydest;Bit8u cols;Bit8u nbcols;Bit8u cheight;
+{
+ Bit16u src,dest;
+ Bit8u i;
+
+ src=ysrc*cheight*nbcols+xstart;
+ dest=ydest*cheight*nbcols+xstart;
+ outw(VGAREG_GRDC_ADDRESS, 0x0105);
+ for(i=0;i<cheight;i++)
+ {
+ memcpyb(0xa000,dest+i*nbcols,0xa000,src+i*nbcols,cols);
+ }
+ outw(VGAREG_GRDC_ADDRESS, 0x0005);
+}
+
+// --------------------------------------------------------------------------------------------
+static void vgamem_fill_pl4(xstart,ystart,cols,nbcols,cheight,attr)
+Bit8u xstart;Bit8u ystart;Bit8u cols;Bit8u nbcols;Bit8u cheight;Bit8u attr;
+{
+ Bit16u dest;
+ Bit8u i;
+
+ dest=ystart*cheight*nbcols+xstart;
+ outw(VGAREG_GRDC_ADDRESS, 0x0205);
+ for(i=0;i<cheight;i++)
+ {
+ memsetb(0xa000,dest+i*nbcols,attr,cols);
+ }
+ outw(VGAREG_GRDC_ADDRESS, 0x0005);
+}
+
+// --------------------------------------------------------------------------------------------
+static void vgamem_copy_cga(xstart,ysrc,ydest,cols,nbcols,cheight)
+Bit8u xstart;Bit8u ysrc;Bit8u ydest;Bit8u cols;Bit8u nbcols;Bit8u cheight;
+{
+ Bit16u src,dest;
+ Bit8u i;
+
+ src=((ysrc*cheight*nbcols)>>1)+xstart;
+ dest=((ydest*cheight*nbcols)>>1)+xstart;
+ for(i=0;i<cheight;i++)
+ {
+ if (i & 1)
+ memcpyb(0xb800,0x2000+dest+(i>>1)*nbcols,0xb800,0x2000+src+(i>>1)*nbcols,cols);
+ else
+ memcpyb(0xb800,dest+(i>>1)*nbcols,0xb800,src+(i>>1)*nbcols,cols);
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void vgamem_fill_cga(xstart,ystart,cols,nbcols,cheight,attr)
+Bit8u xstart;Bit8u ystart;Bit8u cols;Bit8u nbcols;Bit8u cheight;Bit8u attr;
+{
+ Bit16u dest;
+ Bit8u i;
+
+ dest=((ystart*cheight*nbcols)>>1)+xstart;
+ for(i=0;i<cheight;i++)
+ {
+ if (i & 1)
+ memsetb(0xb800,0x2000+dest+(i>>1)*nbcols,attr,cols);
+ else
+ memsetb(0xb800,dest+(i>>1)*nbcols,attr,cols);
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_scroll (nblines,attr,rul,cul,rlr,clr,page,dir)
+Bit8u nblines;Bit8u attr;Bit8u rul;Bit8u cul;Bit8u rlr;Bit8u clr;Bit8u page;Bit8u dir;
+{
+ // page == 0xFF if current
+
+ Bit8u mode,line,cheight,bpp,cols;
+ Bit16u nbcols,nbrows,i;
+ Bit16u address;
+
+ if(rul>rlr)return;
+ if(cul>clr)return;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get the dimensions
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+
+ // Get the current page
+ if(page==0xFF)
+ page=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_PAGE);
+
+ if(rlr>=nbrows)rlr=nbrows-1;
+ if(clr>=nbcols)clr=nbcols-1;
+ if(nblines>nbrows)nblines=0;
+ cols=clr-cul+1;
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Compute the address
+ address=SCREEN_MEM_START(nbcols,nbrows,page);
+#ifdef DEBUG
+ printf("Scroll, address %04x (%04x %04x %02x)\n",address,nbrows,nbcols,page);
+#endif
+
+ if(nblines==0&&rul==0&&cul==0&&rlr==nbrows-1&&clr==nbcols-1)
+ {
+ memsetw(vga_modes[line].sstart,address,(Bit16u)attr*0x100+' ',nbrows*nbcols);
+ }
+ else
+ {// if Scroll up
+ if(dir==SCROLL_UP)
+ {for(i=rul;i<=rlr;i++)
+ {
+ if((i+nblines>rlr)||(nblines==0))
+ memsetw(vga_modes[line].sstart,address+(i*nbcols+cul)*2,(Bit16u)attr*0x100+' ',cols);
+ else
+ memcpyw(vga_modes[line].sstart,address+(i*nbcols+cul)*2,vga_modes[line].sstart,((i+nblines)*nbcols+cul)*2,cols);
+ }
+ }
+ else
+ {for(i=rlr;i>=rul;i--)
+ {
+ if((i<rul+nblines)||(nblines==0))
+ memsetw(vga_modes[line].sstart,address+(i*nbcols+cul)*2,(Bit16u)attr*0x100+' ',cols);
+ else
+ memcpyw(vga_modes[line].sstart,address+(i*nbcols+cul)*2,vga_modes[line].sstart,((i-nblines)*nbcols+cul)*2,cols);
+ }
+ }
+ }
+ }
+ else
+ {
+ // FIXME gfx mode not complete
+ cheight=vga_modes[line].cheight;
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ if(nblines==0&&rul==0&&cul==0&&rlr==nbrows-1&&clr==nbcols-1)
+ {
+ outw(VGAREG_GRDC_ADDRESS, 0x0205);
+ memsetb(vga_modes[line].sstart,0,attr,nbrows*nbcols*cheight);
+ outw(VGAREG_GRDC_ADDRESS, 0x0005);
+ }
+ else
+ {// if Scroll up
+ if(dir==SCROLL_UP)
+ {for(i=rul;i<=rlr;i++)
+ {
+ if((i+nblines>rlr)||(nblines==0))
+ vgamem_fill_pl4(cul,i,cols,nbcols,cheight,attr);
+ else
+ vgamem_copy_pl4(cul,i+nblines,i,cols,nbcols,cheight);
+ }
+ }
+ else
+ {for(i=rlr;i>=rul;i--)
+ {
+ if((i<rul+nblines)||(nblines==0))
+ vgamem_fill_pl4(cul,i,cols,nbcols,cheight,attr);
+ else
+ vgamem_copy_pl4(cul,i,i-nblines,cols,nbcols,cheight);
+ }
+ }
+ }
+ break;
+ case CGA:
+ bpp=vga_modes[line].pixbits;
+ if(nblines==0&&rul==0&&cul==0&&rlr==nbrows-1&&clr==nbcols-1)
+ {
+ memsetb(vga_modes[line].sstart,0,attr,nbrows*nbcols*cheight*bpp);
+ }
+ else
+ {
+ if(bpp==2)
+ {
+ cul<<=1;
+ cols<<=1;
+ nbcols<<=1;
+ }
+ // if Scroll up
+ if(dir==SCROLL_UP)
+ {for(i=rul;i<=rlr;i++)
+ {
+ if((i+nblines>rlr)||(nblines==0))
+ vgamem_fill_cga(cul,i,cols,nbcols,cheight,attr);
+ else
+ vgamem_copy_cga(cul,i+nblines,i,cols,nbcols,cheight);
+ }
+ }
+ else
+ {for(i=rlr;i>=rul;i--)
+ {
+ if((i<rul+nblines)||(nblines==0))
+ vgamem_fill_cga(cul,i,cols,nbcols,cheight,attr);
+ else
+ vgamem_copy_cga(cul,i,i-nblines,cols,nbcols,cheight);
+ }
+ }
+ }
+ break;
+#ifdef DEBUG
+ default:
+ printf("Scroll in graphics mode ");
+ unimplemented();
+#endif
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_read_char_attr (page,car)
+Bit8u page;Bit16u *car;
+{Bit16u ss=get_SS();
+ Bit8u xcurs,ycurs,mode,line;
+ Bit16u nbcols,nbrows,address;
+ Bit16u cursor,dummy;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get the cursor pos for the page
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+
+ // Get the dimensions
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Compute the address
+ address=SCREEN_MEM_START(nbcols,nbrows,page)+(xcurs+ycurs*nbcols)*2;
+
+ write_word(ss,car,read_word(vga_modes[line].sstart,address));
+ }
+ else
+ {
+ // FIXME gfx mode
+#ifdef DEBUG
+ unimplemented();
+#endif
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void write_gfx_char_pl4(car,attr,xcurs,ycurs,nbcols,cheight)
+Bit8u car;Bit8u attr;Bit8u xcurs;Bit8u ycurs;Bit8u nbcols;Bit8u cheight;
+{
+ Bit8u i,j,mask;
+ Bit8u *fdata;
+ Bit16u addr,dest,src;
+
+ switch(cheight)
+ {case 14:
+ fdata = &vgafont14;
+ break;
+ case 16:
+ fdata = &vgafont16;
+ break;
+ default:
+ fdata = &vgafont8;
+ }
+ addr=xcurs+ycurs*cheight*nbcols;
+ src = car * cheight;
+ outw(VGAREG_SEQU_ADDRESS, 0x0f02);
+ outw(VGAREG_GRDC_ADDRESS, 0x0205);
+ if(attr&0x80)
+ {
+ outw(VGAREG_GRDC_ADDRESS, 0x1803);
+ }
+ else
+ {
+ outw(VGAREG_GRDC_ADDRESS, 0x0003);
+ }
+ for(i=0;i<cheight;i++)
+ {
+ dest=addr+i*nbcols;
+ for(j=0;j<8;j++)
+ {
+ mask=0x80>>j;
+ outw(VGAREG_GRDC_ADDRESS, (mask << 8) | 0x08);
+ read_byte(0xa000,dest);
+ if(fdata[src+i]&mask)
+ {
+ write_byte(0xa000,dest,attr&0x0f);
+ }
+ else
+ {
+ write_byte(0xa000,dest,0x00);
+ }
+ }
+ }
+ASM_START
+ mov dx, # VGAREG_GRDC_ADDRESS
+ mov ax, #0xff08
+ out dx, ax
+ mov ax, #0x0005
+ out dx, ax
+ mov ax, #0x0003
+ out dx, ax
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void write_gfx_char_cga(car,attr,xcurs,ycurs,nbcols,bpp)
+Bit8u car;Bit8u attr;Bit8u xcurs;Bit8u ycurs;Bit8u nbcols;Bit8u bpp;
+{
+ Bit8u i,j,mask,data;
+ Bit8u *fdata;
+ Bit16u addr,dest,src;
+
+ fdata = &vgafont8;
+ addr=(xcurs*bpp)+ycurs*320;
+ src = car * 8;
+ for(i=0;i<8;i++)
+ {
+ dest=addr+(i>>1)*80;
+ if (i & 1) dest += 0x2000;
+ mask = 0x80;
+ if (bpp == 1)
+ {
+ if (attr & 0x80)
+ {
+ data = read_byte(0xb800,dest);
+ }
+ else
+ {
+ data = 0x00;
+ }
+ for(j=0;j<8;j++)
+ {
+ if (fdata[src+i] & mask)
+ {
+ if (attr & 0x80)
+ {
+ data ^= (attr & 0x01) << (7-j);
+ }
+ else
+ {
+ data |= (attr & 0x01) << (7-j);
+ }
+ }
+ mask >>= 1;
+ }
+ write_byte(0xb800,dest,data);
+ }
+ else
+ {
+ while (mask > 0)
+ {
+ if (attr & 0x80)
+ {
+ data = read_byte(0xb800,dest);
+ }
+ else
+ {
+ data = 0x00;
+ }
+ for(j=0;j<4;j++)
+ {
+ if (fdata[src+i] & mask)
+ {
+ if (attr & 0x80)
+ {
+ data ^= (attr & 0x03) << ((3-j)*2);
+ }
+ else
+ {
+ data |= (attr & 0x03) << ((3-j)*2);
+ }
+ }
+ mask >>= 1;
+ }
+ write_byte(0xb800,dest,data);
+ dest += 1;
+ }
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void write_gfx_char_lin(car,attr,xcurs,ycurs,nbcols)
+Bit8u car;Bit8u attr;Bit8u xcurs;Bit8u ycurs;Bit8u nbcols;
+{
+ Bit8u i,j,mask,data;
+ Bit8u *fdata;
+ Bit16u addr,dest,src;
+
+ fdata = &vgafont8;
+ addr=xcurs*8+ycurs*nbcols*64;
+ src = car * 8;
+ for(i=0;i<8;i++)
+ {
+ dest=addr+i*nbcols*8;
+ mask = 0x80;
+ for(j=0;j<8;j++)
+ {
+ data = 0x00;
+ if (fdata[src+i] & mask)
+ {
+ data = attr;
+ }
+ write_byte(0xa000,dest+j,data);
+ mask >>= 1;
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_write_char_attr (car,page,attr,count)
+Bit8u car;Bit8u page;Bit8u attr;Bit16u count;
+{
+ Bit8u cheight,xcurs,ycurs,mode,line,bpp;
+ Bit16u nbcols,nbrows,address;
+ Bit16u cursor,dummy;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get the cursor pos for the page
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+
+ // Get the dimensions
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Compute the address
+ address=SCREEN_MEM_START(nbcols,nbrows,page)+(xcurs+ycurs*nbcols)*2;
+
+ dummy=((Bit16u)attr<<8)+car;
+/*
+printf("sstart=%x\n", vga_modes[line].sstart);
+printf("address=%x\n", address);
+printf("dummy=%x\n", dummy);
+printf("count=%x\n", count);
+*/
+ memsetw(vga_modes[line].sstart,address,dummy,count);
+ }
+ else
+ {
+ // FIXME gfx mode not complete
+ cheight=vga_modes[line].cheight;
+ bpp=vga_modes[line].pixbits;
+ while((count-->0) && (xcurs<nbcols))
+ {
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ write_gfx_char_pl4(car,attr,xcurs,ycurs,nbcols,cheight);
+ break;
+ case CGA:
+ write_gfx_char_cga(car,attr,xcurs,ycurs,nbcols,bpp);
+ break;
+ case LINEAR8:
+ write_gfx_char_lin(car,attr,xcurs,ycurs,nbcols);
+ break;
+#ifdef DEBUG
+ default:
+ unimplemented();
+#endif
+ }
+ xcurs++;
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_write_char_only (car,page,attr,count)
+Bit8u car;Bit8u page;Bit8u attr;Bit16u count;
+{
+ Bit8u cheight,xcurs,ycurs,mode,line,bpp;
+ Bit16u nbcols,nbrows,address;
+ Bit16u cursor,dummy;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get the cursor pos for the page
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+
+ // Get the dimensions
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Compute the address
+ address=SCREEN_MEM_START(nbcols,nbrows,page)+(xcurs+ycurs*nbcols)*2;
+
+ while(count-->0)
+ {write_byte(vga_modes[line].sstart,address,car);
+ address+=2;
+ }
+ }
+ else
+ {
+ // FIXME gfx mode not complete
+ cheight=vga_modes[line].cheight;
+ bpp=vga_modes[line].pixbits;
+ while((count-->0) && (xcurs<nbcols))
+ {
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ write_gfx_char_pl4(car,attr,xcurs,ycurs,nbcols,cheight);
+ break;
+ case CGA:
+ write_gfx_char_cga(car,attr,xcurs,ycurs,nbcols,bpp);
+ break;
+ case LINEAR8:
+ write_gfx_char_lin(car,attr,xcurs,ycurs,nbcols);
+ break;
+#ifdef DEBUG
+ default:
+ unimplemented();
+#endif
+ }
+ xcurs++;
+ }
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_group_0B:
+ cmp bh, #0x00
+ je biosfn_set_border_color
+ cmp bh, #0x01
+ je biosfn_set_palette
+#ifdef DEBUG
+ call _unknown
+#endif
+ ret
+biosfn_set_border_color:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x00
+ out dx, al
+ mov al, bl
+ and al, #0x0f
+ test al, #0x08
+ jz set_low_border
+ add al, #0x08
+set_low_border:
+ out dx, al
+ mov cl, #0x01
+ and bl, #0x10
+set_intensity_loop:
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, cl
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ and al, #0xef
+ or al, bl
+ mov dx, # VGAREG_ACTL_ADDRESS
+ out dx, al
+ inc cl
+ cmp cl, #0x04
+ jne set_intensity_loop
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+biosfn_set_palette:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov cl, #0x01
+ and bl, #0x01
+set_cga_palette_loop:
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, cl
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ and al, #0xfe
+ or al, bl
+ mov dx, # VGAREG_ACTL_ADDRESS
+ out dx, al
+ inc cl
+ cmp cl, #0x04
+ jne set_cga_palette_loop
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_write_pixel (BH,AL,CX,DX) Bit8u BH;Bit8u AL;Bit16u CX;Bit16u DX;
+{
+ Bit8u mode,line,mask,attr,data;
+ Bit16u addr;
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+ if(vga_modes[line].class==TEXT)return;
+
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ addr = CX/8+DX*read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+ mask = 0x80 >> (CX & 0x07);
+ outw(VGAREG_GRDC_ADDRESS, (mask << 8) | 0x08);
+ outw(VGAREG_GRDC_ADDRESS, 0x0205);
+ data = read_byte(0xa000,addr);
+ if (AL & 0x80)
+ {
+ outw(VGAREG_GRDC_ADDRESS, 0x1803);
+ }
+ write_byte(0xa000,addr,AL);
+ASM_START
+ mov dx, # VGAREG_GRDC_ADDRESS
+ mov ax, #0xff08
+ out dx, ax
+ mov ax, #0x0005
+ out dx, ax
+ mov ax, #0x0003
+ out dx, ax
+ASM_END
+ break;
+ case CGA:
+ if(vga_modes[line].pixbits==2)
+ {
+ addr=(CX>>2)+(DX>>1)*80;
+ }
+ else
+ {
+ addr=(CX>>3)+(DX>>1)*80;
+ }
+ if (DX & 1) addr += 0x2000;
+ data = read_byte(0xb800,addr);
+ if(vga_modes[line].pixbits==2)
+ {
+ attr = (AL & 0x03) << ((3 - (CX & 0x03)) * 2);
+ mask = 0x03 << ((3 - (CX & 0x03)) * 2);
+ }
+ else
+ {
+ attr = (AL & 0x01) << (7 - (CX & 0x07));
+ mask = 0x01 << (7 - (CX & 0x07));
+ }
+ if (AL & 0x80)
+ {
+ data ^= attr;
+ }
+ else
+ {
+ data &= ~mask;
+ data |= attr;
+ }
+ write_byte(0xb800,addr,data);
+ break;
+ case LINEAR8:
+ addr=CX+DX*(read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS)*8);
+ write_byte(0xa000,addr,AL);
+ break;
+#ifdef DEBUG
+ default:
+ unimplemented();
+#endif
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_read_pixel (BH,CX,DX,AX) Bit8u BH;Bit16u CX;Bit16u DX;Bit16u *AX;
+{
+ Bit8u mode,line,mask,attr,data,i;
+ Bit16u addr;
+ Bit16u ss=get_SS();
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+ if(vga_modes[line].class==TEXT)return;
+
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ addr = CX/8+DX*read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+ mask = 0x80 >> (CX & 0x07);
+ attr = 0x00;
+ for(i=0;i<4;i++)
+ {
+ outw(VGAREG_GRDC_ADDRESS, (i << 8) | 0x04);
+ data = read_byte(0xa000,addr) & mask;
+ if (data > 0) attr |= (0x01 << i);
+ }
+ break;
+ case CGA:
+ addr=(CX>>2)+(DX>>1)*80;
+ if (DX & 1) addr += 0x2000;
+ data = read_byte(0xb800,addr);
+ if(vga_modes[line].pixbits==2)
+ {
+ attr = (data >> ((3 - (CX & 0x03)) * 2)) & 0x03;
+ }
+ else
+ {
+ attr = (data >> (7 - (CX & 0x07))) & 0x01;
+ }
+ break;
+ case LINEAR8:
+ addr=CX+DX*(read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS)*8);
+ attr=read_byte(0xa000,addr);
+ break;
+ default:
+#ifdef DEBUG
+ unimplemented();
+#endif
+ attr = 0;
+ }
+ write_word(ss,AX,(read_word(ss,AX) & 0xff00) | attr);
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_write_teletype (car, page, attr, flag)
+Bit8u car;Bit8u page;Bit8u attr;Bit8u flag;
+{// flag = WITH_ATTR / NO_ATTR
+
+ Bit8u cheight,xcurs,ycurs,mode,line,bpp;
+ Bit16u nbcols,nbrows,address;
+ Bit16u cursor,dummy;
+
+ // special case if page is 0xff, use current page
+ if(page==0xff)
+ page=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_PAGE);
+
+ // Get the mode
+ mode=read_byte(BIOSMEM_SEG,BIOSMEM_CURRENT_MODE);
+ line=find_vga_entry(mode);
+ if(line==0xFF)return;
+
+ // Get the cursor pos for the page
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+
+ // Get the dimensions
+ nbrows=read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS)+1;
+ nbcols=read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+
+ switch(car)
+ {
+ case 7:
+ //FIXME should beep
+ break;
+
+ case 8:
+ if(xcurs>0)xcurs--;
+ break;
+
+ case '\r':
+ xcurs=0;
+ break;
+
+ case '\n':
+ xcurs=0;
+ ycurs++;
+ break;
+
+ case '\t':
+ do
+ {
+ biosfn_write_teletype(' ',page,attr,flag);
+ biosfn_get_cursor_pos(page,&dummy,&cursor);
+ xcurs=cursor&0x00ff;ycurs=(cursor&0xff00)>>8;
+ }while(xcurs%8==0);
+ break;
+
+ default:
+
+ if(vga_modes[line].class==TEXT)
+ {
+ // Compute the address
+ address=SCREEN_MEM_START(nbcols,nbrows,page)+(xcurs+ycurs*nbcols)*2;
+
+ // Write the char
+ write_byte(vga_modes[line].sstart,address,car);
+
+ if(flag==WITH_ATTR)
+ write_byte(vga_modes[line].sstart,address+1,attr);
+ }
+ else
+ {
+ // FIXME gfx mode not complete
+ cheight=vga_modes[line].cheight;
+ bpp=vga_modes[line].pixbits;
+ switch(vga_modes[line].memmodel)
+ {
+ case PLANAR4:
+ case PLANAR1:
+ write_gfx_char_pl4(car,attr,xcurs,ycurs,nbcols,cheight);
+ break;
+ case CGA:
+ write_gfx_char_cga(car,attr,xcurs,ycurs,nbcols,bpp);
+ break;
+ case LINEAR8:
+ write_gfx_char_lin(car,attr,xcurs,ycurs,nbcols);
+ break;
+#ifdef DEBUG
+ default:
+ unimplemented();
+#endif
+ }
+ }
+ xcurs++;
+ }
+
+ // Do we need to wrap ?
+ if(xcurs==nbcols)
+ {xcurs=0;
+ ycurs++;
+ }
+
+ // Do we need to scroll ?
+ if(ycurs==nbrows)
+ {
+ if(vga_modes[line].class==TEXT)
+ {
+ biosfn_scroll(0x01,0x07,0,0,nbrows-1,nbcols-1,page,SCROLL_UP);
+ }
+ else
+ {
+ biosfn_scroll(0x01,0x00,0,0,nbrows-1,nbcols-1,page,SCROLL_UP);
+ }
+ ycurs-=1;
+ }
+
+ // Set the cursor for the page
+ cursor=ycurs; cursor<<=8; cursor+=xcurs;
+ biosfn_set_cursor_pos(page,cursor);
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_get_video_mode:
+ push ds
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ push bx
+ mov bx, # BIOSMEM_CURRENT_PAGE
+ mov al, [bx]
+ pop bx
+ mov bh, al
+ push bx
+ mov bx, # BIOSMEM_VIDEO_CTL
+ mov ah, [bx]
+ and ah, #0x80
+ mov bx, # BIOSMEM_CURRENT_MODE
+ mov al, [bx]
+ or al, ah
+ mov bx, # BIOSMEM_NB_COLS
+ mov ah, [bx]
+ pop bx
+ pop ds
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_group_10:
+ cmp al, #0x00
+ jne int10_test_1001
+ jmp biosfn_set_single_palette_reg
+int10_test_1001:
+ cmp al, #0x01
+ jne int10_test_1002
+ jmp biosfn_set_overscan_border_color
+int10_test_1002:
+ cmp al, #0x02
+ jne int10_test_1003
+ jmp biosfn_set_all_palette_reg
+int10_test_1003:
+ cmp al, #0x03
+ jne int10_test_1007
+ jmp biosfn_toggle_intensity
+int10_test_1007:
+ cmp al, #0x07
+ jne int10_test_1008
+ jmp biosfn_get_single_palette_reg
+int10_test_1008:
+ cmp al, #0x08
+ jne int10_test_1009
+ jmp biosfn_read_overscan_border_color
+int10_test_1009:
+ cmp al, #0x09
+ jne int10_test_1010
+ jmp biosfn_get_all_palette_reg
+int10_test_1010:
+ cmp al, #0x10
+ jne int10_test_1012
+ jmp biosfn_set_single_dac_reg
+int10_test_1012:
+ cmp al, #0x12
+ jne int10_test_1013
+ jmp biosfn_set_all_dac_reg
+int10_test_1013:
+ cmp al, #0x13
+ jne int10_test_1015
+ jmp biosfn_select_video_dac_color_page
+int10_test_1015:
+ cmp al, #0x15
+ jne int10_test_1017
+ jmp biosfn_read_single_dac_reg
+int10_test_1017:
+ cmp al, #0x17
+ jne int10_test_1018
+ jmp biosfn_read_all_dac_reg
+int10_test_1018:
+ cmp al, #0x18
+ jne int10_test_1019
+ jmp biosfn_set_pel_mask
+int10_test_1019:
+ cmp al, #0x19
+ jne int10_test_101A
+ jmp biosfn_read_pel_mask
+int10_test_101A:
+ cmp al, #0x1a
+ jne int10_group_10_unknown
+ jmp biosfn_read_video_dac_state
+int10_group_10_unknown:
+#ifdef DEBUG
+ call _unknown
+#endif
+ ret
+
+biosfn_set_single_palette_reg:
+ cmp bl, #0x14
+ ja no_actl_reg1
+ push ax
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, bl
+ out dx, al
+ mov al, bh
+ out dx, al
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop ax
+no_actl_reg1:
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_overscan_border_color:
+ push bx
+ mov bl, #0x11
+ call biosfn_set_single_palette_reg
+ pop bx
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_all_palette_reg:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov bx, dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov cl, #0x00
+ mov dx, # VGAREG_ACTL_ADDRESS
+set_palette_loop:
+ mov al, cl
+ out dx, al
+ seg es
+ mov al, [bx]
+ out dx, al
+ inc bx
+ inc cl
+ cmp cl, #0x10
+ jne set_palette_loop
+ mov al, #0x11
+ out dx, al
+ seg es
+ mov al, [bx]
+ out dx, al
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_toggle_intensity:
+ push ax
+ push bx
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x10
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ and al, #0xf7
+ and bl, #0x01
+ shl bl, 3
+ or al, bl
+ mov dx, # VGAREG_ACTL_ADDRESS
+ out dx, al
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_get_single_palette_reg:
+ cmp bl, #0x14
+ ja no_actl_reg2
+ push ax
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, bl
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ mov bh, al
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop ax
+no_actl_reg2:
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_read_overscan_border_color:
+ push ax
+ push bx
+ mov bl, #0x11
+ call biosfn_get_single_palette_reg
+ mov al, bh
+ pop bx
+ mov bh, al
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_get_all_palette_reg:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov bx, dx
+ mov cl, #0x00
+get_palette_loop:
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, cl
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ seg es
+ mov [bx], al
+ inc bx
+ inc cl
+ cmp cl, #0x10
+ jne get_palette_loop
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x11
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ seg es
+ mov [bx], al
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_single_dac_reg:
+ push ax
+ push dx
+ mov dx, # VGAREG_DAC_WRITE_ADDRESS
+ mov al, bl
+ out dx, al
+ mov dx, # VGAREG_DAC_DATA
+ pop ax
+ push ax
+ mov al, ah
+ out dx, al
+ mov al, ch
+ out dx, al
+ mov al, cl
+ out dx, al
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_all_dac_reg:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov dx, # VGAREG_DAC_WRITE_ADDRESS
+ mov al, bl
+ out dx, al
+ pop dx
+ push dx
+ mov bx, dx
+ mov dx, # VGAREG_DAC_DATA
+set_dac_loop:
+ seg es
+ mov al, [bx]
+ out dx, al
+ inc bx
+ seg es
+ mov al, [bx]
+ out dx, al
+ inc bx
+ seg es
+ mov al, [bx]
+ out dx, al
+ inc bx
+ dec cx
+ jnz set_dac_loop
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_select_video_dac_color_page:
+ push ax
+ push bx
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x10
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ and bl, #0x01
+ jnz set_dac_page
+ and al, #0x7f
+ shl bh, 7
+ or al, bh
+ mov dx, # VGAREG_ACTL_ADDRESS
+ out dx, al
+ jmp set_actl_normal
+set_dac_page:
+ push ax
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x14
+ out dx, al
+ pop ax
+ and al, #0x80
+ jnz set_dac_16_page
+ shl bh, 2
+set_dac_16_page:
+ and bh, #0x0f
+ mov al, bh
+ out dx, al
+set_actl_normal:
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_read_single_dac_reg:
+ push ax
+ push dx
+ mov dx, # VGAREG_DAC_READ_ADDRESS
+ mov al, bl
+ out dx, al
+ pop ax
+ mov ah, al
+ mov dx, # VGAREG_DAC_DATA
+ in al, dx
+ xchg al, ah
+ push ax
+ in al, dx
+ mov ch, al
+ in al, dx
+ mov cl, al
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_read_all_dac_reg:
+ push ax
+ push bx
+ push cx
+ push dx
+ mov dx, # VGAREG_DAC_READ_ADDRESS
+ mov al, bl
+ out dx, al
+ pop dx
+ push dx
+ mov bx, dx
+ mov dx, # VGAREG_DAC_DATA
+read_dac_loop:
+ in al, dx
+ seg es
+ mov [bx], al
+ inc bx
+ in al, dx
+ seg es
+ mov [bx], al
+ inc bx
+ in al, dx
+ seg es
+ mov [bx], al
+ inc bx
+ dec cx
+ jnz read_dac_loop
+ pop dx
+ pop cx
+ pop bx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_pel_mask:
+ push ax
+ push dx
+ mov dx, # VGAREG_PEL_MASK
+ mov al, bl
+ out dx, al
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_read_pel_mask:
+ push ax
+ push dx
+ mov dx, # VGAREG_PEL_MASK
+ in al, dx
+ mov bl, al
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_read_video_dac_state:
+ push ax
+ push dx
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x10
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ mov bl, al
+ shr bl, 7
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x14
+ out dx, al
+ mov dx, # VGAREG_ACTL_READ_DATA
+ in al, dx
+ mov bh, al
+ and bh, #0x0f
+ test bl, #0x01
+ jnz get_dac_16_page
+ shr bh, 2
+get_dac_16_page:
+ mov dx, # VGAREG_ACTL_RESET
+ in al, dx
+ mov dx, # VGAREG_ACTL_ADDRESS
+ mov al, #0x20
+ out dx, al
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_perform_gray_scale_summing (start,count)
+Bit16u start;Bit16u count;
+{Bit8u r,g,b;
+ Bit16u i;
+ Bit16u index;
+
+ inb(VGAREG_ACTL_RESET);
+ outb(VGAREG_ACTL_ADDRESS,0x00);
+
+ for( index = 0; index < count; index++ )
+ {
+ // set read address and switch to read mode
+ outb(VGAREG_DAC_READ_ADDRESS,start);
+ // get 6-bit wide RGB data values
+ r=inb( VGAREG_DAC_DATA );
+ g=inb( VGAREG_DAC_DATA );
+ b=inb( VGAREG_DAC_DATA );
+
+ // intensity = ( 0.3 * Red ) + ( 0.59 * Green ) + ( 0.11 * Blue )
+ i = ( ( 77*r + 151*g + 28*b ) + 0x80 ) >> 8;
+
+ if(i>0x3f)i=0x3f;
+
+ // set write address and switch to write mode
+ outb(VGAREG_DAC_WRITE_ADDRESS,start);
+ // write new intensity value
+ outb( VGAREG_DAC_DATA, i&0xff );
+ outb( VGAREG_DAC_DATA, i&0xff );
+ outb( VGAREG_DAC_DATA, i&0xff );
+ start++;
+ }
+ inb(VGAREG_ACTL_RESET);
+ outb(VGAREG_ACTL_ADDRESS,0x20);
+}
+
+// --------------------------------------------------------------------------------------------
+static void get_font_access()
+{
+ASM_START
+ mov dx, # VGAREG_SEQU_ADDRESS
+ mov ax, #0x0100
+ out dx, ax
+ mov ax, #0x0402
+ out dx, ax
+ mov ax, #0x0704
+ out dx, ax
+ mov ax, #0x0300
+ out dx, ax
+ mov dx, # VGAREG_GRDC_ADDRESS
+ mov ax, #0x0204
+ out dx, ax
+ mov ax, #0x0005
+ out dx, ax
+ mov ax, #0x0406
+ out dx, ax
+ASM_END
+}
+
+static void release_font_access()
+{
+ASM_START
+ mov dx, # VGAREG_SEQU_ADDRESS
+ mov ax, #0x0100
+ out dx, ax
+ mov ax, #0x0302
+ out dx, ax
+ mov ax, #0x0304
+ out dx, ax
+ mov ax, #0x0300
+ out dx, ax
+ mov dx, # VGAREG_READ_MISC_OUTPUT
+ in al, dx
+ and al, #0x01
+ shl al, 2
+ or al, #0x0a
+ mov ah, al
+ mov al, #0x06
+ mov dx, # VGAREG_GRDC_ADDRESS
+ out dx, ax
+ mov ax, #0x0004
+ out dx, ax
+ mov ax, #0x1005
+ out dx, ax
+ASM_END
+}
+
+ASM_START
+idiv_u:
+ xor dx,dx
+ div bx
+ ret
+ASM_END
+
+static void set_scan_lines(lines) Bit8u lines;
+{
+ Bit16u crtc_addr,cols,page,vde;
+ Bit8u crtc_r9,ovl,rows;
+
+ crtc_addr = read_word(BIOSMEM_SEG,BIOSMEM_CRTC_ADDRESS);
+ outb(crtc_addr, 0x09);
+ crtc_r9 = inb(crtc_addr+1);
+ crtc_r9 = (crtc_r9 & 0xe0) | (lines - 1);
+ outb(crtc_addr+1, crtc_r9);
+ if(lines==8)
+ {
+ biosfn_set_cursor_shape(0x06,0x07);
+ }
+ else
+ {
+ biosfn_set_cursor_shape(lines-4,lines-3);
+ }
+ write_word(BIOSMEM_SEG,BIOSMEM_CHAR_HEIGHT, lines);
+ outb(crtc_addr, 0x12);
+ vde = inb(crtc_addr+1);
+ outb(crtc_addr, 0x07);
+ ovl = inb(crtc_addr+1);
+ vde += (((ovl & 0x02) << 7) + ((ovl & 0x40) << 3) + 1);
+ rows = vde / lines;
+ write_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS, rows-1);
+ cols = read_word(BIOSMEM_SEG,BIOSMEM_NB_COLS);
+ write_word(BIOSMEM_SEG,BIOSMEM_PAGE_SIZE, rows * cols * 2);
+}
+
+static void biosfn_load_text_user_pat (AL,ES,BP,CX,DX,BL,BH) Bit8u AL;Bit16u ES;Bit16u BP;Bit16u CX;Bit16u DX;Bit8u BL;Bit8u BH;
+{
+ Bit16u blockaddr,dest,i,src;
+
+ get_font_access();
+ blockaddr = ((BL & 0x03) << 14) + ((BL & 0x04) << 11);
+ for(i=0;i<CX;i++)
+ {
+ src = BP + i * BH;
+ dest = blockaddr + (DX + i) * 32;
+ memcpyb(0xA000, dest, ES, src, BH);
+ }
+ release_font_access();
+ if(AL>=0x10)
+ {
+ set_scan_lines(BH);
+ }
+}
+
+static void biosfn_load_text_8_14_pat (AL,BL) Bit8u AL;Bit8u BL;
+{
+ Bit16u blockaddr,dest,i,src;
+
+ get_font_access();
+ blockaddr = ((BL & 0x03) << 14) + ((BL & 0x04) << 11);
+ for(i=0;i<0x100;i++)
+ {
+ src = i * 14;
+ dest = blockaddr + i * 32;
+ memcpyb(0xA000, dest, 0xC000, vgafont14+src, 14);
+ }
+ release_font_access();
+ if(AL>=0x10)
+ {
+ set_scan_lines(14);
+ }
+}
+
+static void biosfn_load_text_8_8_pat (AL,BL) Bit8u AL;Bit8u BL;
+{
+ Bit16u blockaddr,dest,i,src;
+
+ get_font_access();
+ blockaddr = ((BL & 0x03) << 14) + ((BL & 0x04) << 11);
+ for(i=0;i<0x100;i++)
+ {
+ src = i * 8;
+ dest = blockaddr + i * 32;
+ memcpyb(0xA000, dest, 0xC000, vgafont8+src, 8);
+ }
+ release_font_access();
+ if(AL>=0x10)
+ {
+ set_scan_lines(8);
+ }
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_set_text_block_specifier:
+ push ax
+ push dx
+ mov dx, # VGAREG_SEQU_ADDRESS
+ mov ah, bl
+ mov al, #0x03
+ out dx, ax
+ pop dx
+ pop ax
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_load_text_8_16_pat (AL,BL) Bit8u AL;Bit8u BL;
+{
+ Bit16u blockaddr,dest,i,src;
+
+ get_font_access();
+ blockaddr = ((BL & 0x03) << 14) + ((BL & 0x04) << 11);
+ for(i=0;i<0x100;i++)
+ {
+ src = i * 16;
+ dest = blockaddr + i * 32;
+ memcpyb(0xA000, dest, 0xC000, vgafont16+src, 16);
+ }
+ release_font_access();
+ if(AL>=0x10)
+ {
+ set_scan_lines(16);
+ }
+}
+
+static void biosfn_load_gfx_8_8_chars (ES,BP) Bit16u ES;Bit16u BP;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_load_gfx_user_chars (ES,BP,CX,BL,DL) Bit16u ES;Bit16u BP;Bit16u CX;Bit8u BL;Bit8u DL;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_load_gfx_8_14_chars (BL) Bit8u BL;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_load_gfx_8_8_dd_chars (BL) Bit8u BL;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_load_gfx_8_16_chars (BL) Bit8u BL;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+// --------------------------------------------------------------------------------------------
+static void biosfn_get_font_info (BH,ES,BP,CX,DX)
+Bit8u BH;Bit16u *ES;Bit16u *BP;Bit16u *CX;Bit16u *DX;
+{Bit16u ss=get_SS();
+
+ switch(BH)
+ {case 0x00:
+ write_word(ss,ES,read_word(0x00,0x1f*4));
+ write_word(ss,BP,read_word(0x00,(0x1f*4)+2));
+ break;
+ case 0x01:
+ write_word(ss,ES,read_word(0x00,0x43*4));
+ write_word(ss,BP,read_word(0x00,(0x43*4)+2));
+ break;
+ case 0x02:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont14);
+ break;
+ case 0x03:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont8);
+ break;
+ case 0x04:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont8+128*8);
+ break;
+ case 0x05:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont14alt);
+ break;
+ case 0x06:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont16);
+ break;
+ case 0x07:
+ write_word(ss,ES,0xC000);
+ write_word(ss,BP,vgafont16alt);
+ break;
+ default:
+ #ifdef DEBUG
+ printf("Get font info BH(%02x) was discarded\n",BH);
+ #endif
+ return;
+ }
+ // Set byte/char of on screen font
+ write_word(ss,CX,(Bit16u)read_byte(BIOSMEM_SEG,BIOSMEM_CHAR_HEIGHT));
+
+ // Set Highest char row
+ write_word(ss,DX,(Bit16u)read_byte(BIOSMEM_SEG,BIOSMEM_NB_ROWS));
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_get_ega_info:
+ push ds
+ push ax
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ xor ch, ch
+ mov bx, # BIOSMEM_SWITCHES
+ mov cl, [bx]
+ and cl, #0x0f
+ mov bx, # BIOSMEM_CRTC_ADDRESS
+ mov ax, [bx]
+ mov bx, #0x0003
+ cmp ax, # VGAREG_MDA_CRTC_ADDRESS
+ jne mode_ega_color
+ mov bh, #0x01
+mode_ega_color:
+ pop ax
+ pop ds
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_alternate_prtsc()
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_select_vert_res:
+
+; res : 00 200 lines, 01 350 lines, 02 400 lines
+
+ push ds
+ push bx
+ push dx
+ mov dl, al
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov al, [bx]
+ mov bx, # BIOSMEM_SWITCHES
+ mov ah, [bx]
+ cmp dl, #0x01
+ je vert_res_350
+ jb vert_res_200
+ cmp dl, #0x02
+ je vert_res_400
+#ifdef DEBUG
+ mov al, dl
+ xor ah, ah
+ push ax
+ mov bx, #msg_vert_res
+ push bx
+ call _printf
+ add sp, #4
+#endif
+ jmp set_retcode
+vert_res_400:
+
+ ; reset modeset ctl bit 7 and set bit 4
+ ; set switches bit 3-0 to 0x09
+
+ and al, #0x7f
+ or al, #0x10
+ and ah, #0xf0
+ or ah, #0x09
+ jnz set_vert_res
+vert_res_350:
+
+ ; reset modeset ctl bit 7 and bit 4
+ ; set switches bit 3-0 to 0x09
+
+ and al, #0x6f
+ and ah, #0xf0
+ or ah, #0x09
+ jnz set_vert_res
+vert_res_200:
+
+ ; set modeset ctl bit 7 and reset bit 4
+ ; set switches bit 3-0 to 0x08
+
+ and al, #0xef
+ or al, #0x80
+ and ah, #0xf0
+ or ah, #0x08
+set_vert_res:
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov [bx], al
+ mov bx, # BIOSMEM_SWITCHES
+ mov [bx], ah
+set_retcode:
+ mov ax, #0x1212
+ pop dx
+ pop bx
+ pop ds
+ ret
+
+#ifdef DEBUG
+msg_vert_res:
+.ascii "Select vert res (%02x) was discarded"
+.byte 0x0d,0x0a,0x00
+#endif
+
+
+biosfn_enable_default_palette_loading:
+ push ds
+ push bx
+ push dx
+ mov dl, al
+ and dl, #0x01
+ shl dl, 3
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov al, [bx]
+ and al, #0xf7
+ or al, dl
+ mov [bx], al
+ mov ax, #0x1212
+ pop dx
+ pop bx
+ pop ds
+ ret
+
+
+biosfn_enable_video_addressing:
+ push bx
+ push dx
+ mov bl, al
+ and bl, #0x01
+ xor bl, #0x01
+ shl bl, 1
+ mov dx, # VGAREG_READ_MISC_OUTPUT
+ in al, dx
+ and al, #0xfd
+ or al, bl
+ mov dx, # VGAREG_WRITE_MISC_OUTPUT
+ out dx, al
+ mov ax, #0x1212
+ pop dx
+ pop bx
+ ret
+
+
+biosfn_enable_grayscale_summing:
+ push ds
+ push bx
+ push dx
+ mov dl, al
+ and dl, #0x01
+ xor dl, #0x01
+ shl dl, 1
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov al, [bx]
+ and al, #0xfd
+ or al, dl
+ mov [bx], al
+ mov ax, #0x1212
+ pop dx
+ pop bx
+ pop ds
+ ret
+
+
+biosfn_enable_cursor_emulation:
+ push ds
+ push bx
+ push dx
+ mov dl, al
+ and dl, #0x01
+ xor dl, #0x01
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_MODESET_CTL
+ mov al, [bx]
+ and al, #0xfe
+ or al, dl
+ mov [bx], al
+ mov ax, #0x1212
+ pop dx
+ pop bx
+ pop ds
+ ret
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_switch_video_interface (AL,ES,DX) Bit8u AL;Bit16u ES;Bit16u DX;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_enable_video_refresh_control (AL) Bit8u AL;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_write_string (flag,page,attr,count,row,col,seg,offset)
+Bit8u flag;Bit8u page;Bit8u attr;Bit16u count;Bit8u row;Bit8u col;Bit16u seg;Bit16u offset;
+{
+ Bit16u newcurs,oldcurs,dummy;
+ Bit8u car,carattr;
+
+ // Read curs info for the page
+ biosfn_get_cursor_pos(page,&dummy,&oldcurs);
+
+ // if row=0xff special case : use current cursor position
+ if(row==0xff)
+ {col=oldcurs&0x00ff;
+ row=(oldcurs&0xff00)>>8;
+ }
+
+ newcurs=row; newcurs<<=8; newcurs+=col;
+ biosfn_set_cursor_pos(page,newcurs);
+
+ while(count--!=0)
+ {
+ car=read_byte(seg,offset++);
+ if((flag&0x02)!=0)
+ attr=read_byte(seg,offset++);
+
+ biosfn_write_teletype(car,page,attr,WITH_ATTR);
+ }
+
+ // Set back curs pos
+ if((flag&0x01)==0)
+ biosfn_set_cursor_pos(page,oldcurs);
+}
+
+// --------------------------------------------------------------------------------------------
+ASM_START
+biosfn_group_1A:
+ cmp al, #0x00
+ je biosfn_read_display_code
+ cmp al, #0x01
+ je biosfn_set_display_code
+#ifdef DEBUG
+ call _unknown
+#endif
+ ret
+biosfn_read_display_code:
+ push ds
+ push ax
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov bx, # BIOSMEM_DCC_INDEX
+ mov al, [bx]
+ mov bl, al
+ xor bh, bh
+ pop ax
+ mov al, ah
+ pop ds
+ ret
+biosfn_set_display_code:
+ push ds
+ push ax
+ push bx
+ mov ax, # BIOSMEM_SEG
+ mov ds, ax
+ mov ax, bx
+ mov bx, # BIOSMEM_DCC_INDEX
+ mov [bx], al
+#ifdef DEBUG
+ mov al, ah
+ xor ah, ah
+ push ax
+ mov bx, #msg_alt_dcc
+ push bx
+ call _printf
+ add sp, #4
+#endif
+ pop bx
+ pop ax
+ mov al, ah
+ pop ds
+ ret
+
+#ifdef DEBUG
+msg_alt_dcc:
+.ascii "Alternate Display code (%02x) was discarded"
+.byte 0x0d,0x0a,0x00
+#endif
+ASM_END
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_read_state_info (BX,ES,DI)
+Bit16u BX;Bit16u ES;Bit16u DI;
+{
+ // Address of static functionality table
+ write_word(ES,DI+0x00,&static_functionality);
+ write_word(ES,DI+0x02,0xC000);
+
+ // Hard coded copy from BIOS area. Should it be cleaner ?
+ memcpyb(ES,DI+0x04,BIOSMEM_SEG,0x49,30);
+ memcpyb(ES,DI+0x22,BIOSMEM_SEG,0x84,3);
+
+ write_byte(ES,DI+0x25,read_byte(BIOSMEM_SEG,BIOSMEM_DCC_INDEX));
+ write_byte(ES,DI+0x26,0);
+ write_byte(ES,DI+0x27,16);
+ write_byte(ES,DI+0x28,0);
+ write_byte(ES,DI+0x29,8);
+ write_byte(ES,DI+0x2a,2);
+ write_byte(ES,DI+0x2b,0);
+ write_byte(ES,DI+0x2c,0);
+ write_byte(ES,DI+0x31,3);
+ write_byte(ES,DI+0x32,0);
+
+ memsetb(ES,DI+0x33,0,13);
+}
+
+// --------------------------------------------------------------------------------------------
+static void biosfn_read_video_state_size (CX,ES,BX) Bit16u CX;Bit16u ES;Bit16u BX;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_save_video_state (CX,ES,BX) Bit16u CX;Bit16u ES;Bit16u BX;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+static void biosfn_restore_video_state (CX,ES,BX) Bit16u CX;Bit16u ES;Bit16u BX;
+{
+#ifdef DEBUG
+ unimplemented();
+#endif
+}
+
+// ============================================================================================
+//
+// Video Utils
+//
+// ============================================================================================
+
+// --------------------------------------------------------------------------------------------
+static Bit8u find_vga_entry(mode)
+Bit8u mode;
+{
+ Bit8u i,line=0xFF;
+ for(i=0;i<=MODE_MAX;i++)
+ if(vga_modes[i].svgamode==mode)
+ {line=i;
+ break;
+ }
+ return line;
+}
+
+/* =========================================================== */
+/*
+ * Misc Utils
+*/
+/* =========================================================== */
+
+// --------------------------------------------------------------------------------------------
+static void memsetb(seg,offset,value,count)
+ Bit16u seg;
+ Bit16u offset;
+ Bit16u value;
+ Bit16u count;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+
+ mov cx, 10[bp] ; count
+ cmp cx, #0x00
+ je memsetb_end
+ mov ax, 4[bp] ; segment
+ mov es, ax
+ mov ax, 6[bp] ; offset
+ mov di, ax
+ mov al, 8[bp] ; value
+ cld
+ rep
+ stosb
+
+memsetb_end:
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void memsetw(seg,offset,value,count)
+ Bit16u seg;
+ Bit16u offset;
+ Bit16u value;
+ Bit16u count;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+
+ mov cx, 10[bp] ; count
+ cmp cx, #0x00
+ je memsetw_end
+ mov ax, 4[bp] ; segment
+ mov es, ax
+ mov ax, 6[bp] ; offset
+ mov di, ax
+ mov ax, 8[bp] ; value
+ cld
+ rep
+ stosw
+
+memsetw_end:
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void memcpyb(dseg,doffset,sseg,soffset,count)
+ Bit16u dseg;
+ Bit16u doffset;
+ Bit16u sseg;
+ Bit16u soffset;
+ Bit16u count;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+ push ds
+ push si
+
+ mov cx, 12[bp] ; count
+ cmp cx, #0x0000
+ je memcpyb_end
+ mov ax, 4[bp] ; dsegment
+ mov es, ax
+ mov ax, 6[bp] ; doffset
+ mov di, ax
+ mov ax, 8[bp] ; ssegment
+ mov ds, ax
+ mov ax, 10[bp] ; soffset
+ mov si, ax
+ cld
+ rep
+ movsb
+
+memcpyb_end:
+ pop si
+ pop ds
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void memcpyw(dseg,doffset,sseg,soffset,count)
+ Bit16u dseg;
+ Bit16u doffset;
+ Bit16u sseg;
+ Bit16u soffset;
+ Bit16u count;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push cx
+ push es
+ push di
+ push ds
+ push si
+
+ mov cx, 12[bp] ; count
+ cmp cx, #0x0000
+ je memcpyw_end
+ mov ax, 4[bp] ; dsegment
+ mov es, ax
+ mov ax, 6[bp] ; doffset
+ mov di, ax
+ mov ax, 8[bp] ; ssegment
+ mov ds, ax
+ mov ax, 10[bp] ; soffset
+ mov si, ax
+ cld
+ rep
+ movsw
+
+memcpyw_end:
+ pop si
+ pop ds
+ pop di
+ pop es
+ pop cx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+/* =========================================================== */
+/*
+ * These functions where ripped from Kevin's rombios.c
+*/
+/* =========================================================== */
+
+// --------------------------------------------------------------------------------------------
+static Bit8u
+read_byte(seg, offset)
+ Bit16u seg;
+ Bit16u offset;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov al, [bx]
+ ;; al = return value (byte)
+ pop ds
+ pop bx
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static Bit16u
+read_word(seg, offset)
+ Bit16u seg;
+ Bit16u offset;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, [bx]
+ ;; ax = return value (word)
+ pop ds
+ pop bx
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void
+write_byte(seg, offset, data)
+ Bit16u seg;
+ Bit16u offset;
+ Bit8u data;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov al, 8[bp] ; data byte
+ mov [bx], al ; write data byte
+ pop ds
+ pop bx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+static void
+write_word(seg, offset, data)
+ Bit16u seg;
+ Bit16u offset;
+ Bit16u data;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push bx
+ push ds
+ mov ax, 4[bp] ; segment
+ mov ds, ax
+ mov bx, 6[bp] ; offset
+ mov ax, 8[bp] ; data word
+ mov [bx], ax ; write data word
+ pop ds
+ pop bx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+ Bit8u
+inb(port)
+ Bit16u port;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push dx
+ mov dx, 4[bp]
+ in al, dx
+ pop dx
+
+ pop bp
+ASM_END
+}
+
+ Bit16u
+inw(port)
+ Bit16u port;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push dx
+ mov dx, 4[bp]
+ in ax, dx
+ pop dx
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+ void
+outb(port, val)
+ Bit16u port;
+ Bit8u val;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push dx
+ mov dx, 4[bp]
+ mov al, 6[bp]
+ out dx, al
+ pop dx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+// --------------------------------------------------------------------------------------------
+ void
+outw(port, val)
+ Bit16u port;
+ Bit16u val;
+{
+ASM_START
+ push bp
+ mov bp, sp
+
+ push ax
+ push dx
+ mov dx, 4[bp]
+ mov ax, 6[bp]
+ out dx, ax
+ pop dx
+ pop ax
+
+ pop bp
+ASM_END
+}
+
+Bit16u get_SS()
+{
+ASM_START
+ mov ax, ss
+ASM_END
+}
+
+#ifdef DEBUG
+void unimplemented()
+{
+ printf("--> Unimplemented\n");
+}
+
+void unknown()
+{
+ printf("--> Unknown int10\n");
+}
+#endif
+
+// --------------------------------------------------------------------------------------------
+#if defined(USE_BX_INFO) || defined(DEBUG) || defined(CIRRUS_DEBUG)
+void printf(s)
+ Bit8u *s;
+{
+ Bit8u c, format_char;
+ Boolean in_format;
+ unsigned format_width, i;
+ Bit16u *arg_ptr;
+ Bit16u arg_seg, arg, digit, nibble, shift_count;
+
+ arg_ptr = &s;
+ arg_seg = get_SS();
+
+ in_format = 0;
+ format_width = 0;
+
+ while (c = read_byte(0xc000, s)) {
+ if ( c == '%' ) {
+ in_format = 1;
+ format_width = 0;
+ }
+ else if (in_format) {
+ if ( (c>='0') && (c<='9') ) {
+ format_width = (format_width * 10) + (c - '0');
+ }
+ else if (c == 'x') {
+ arg_ptr++; // increment to next arg
+ arg = read_word(arg_seg, arg_ptr);
+ if (format_width == 0)
+ format_width = 4;
+ i = 0;
+ digit = format_width - 1;
+ for (i=0; i<format_width; i++) {
+ nibble = (arg >> (4 * digit)) & 0x000f;
+ if (nibble <= 9)
+ outb(0xE9, nibble + '0');
+ else
+ outb(0xE9, (nibble - 10) + 'A');
+ digit--;
+ }
+ in_format = 0;
+ }
+ //else if (c == 'd') {
+ // in_format = 0;
+ // }
+ }
+ else {
+ outb(0xE9, c);
+ }
+ s ++;
+ }
+}
+#endif
+
+#ifdef VBE
+#include "vbe.c"
+#endif
+
+#ifdef CIRRUS
+#include "clext.c"
+#endif
+
+// --------------------------------------------------------------------------------------------
+
+ASM_START
+;; DATA_SEG_DEFS_HERE
+ASM_END
+
+ASM_START
+.ascii "vgabios ends here"
+.byte 0x00
+vgabios_end:
+.byte 0xCB
+;; BLOCK_STRINGS_BEGIN
+ASM_END
diff --git a/tools/firmware/vgabios/vgabios.h b/tools/firmware/vgabios/vgabios.h
new file mode 100644
index 0000000000..3ad4bae94d
--- /dev/null
+++ b/tools/firmware/vgabios/vgabios.h
@@ -0,0 +1,47 @@
+#ifndef vgabios_h_included
+#define vgabios_h_included
+
+/* Types */
+typedef unsigned char Bit8u;
+typedef unsigned short Bit16u;
+typedef unsigned long Bit32u;
+typedef unsigned short Boolean;
+
+/* Defines */
+
+#define SET_AL(val8) AX = ((AX & 0xff00) | (val8))
+#define SET_BL(val8) BX = ((BX & 0xff00) | (val8))
+#define SET_CL(val8) CX = ((CX & 0xff00) | (val8))
+#define SET_DL(val8) DX = ((DX & 0xff00) | (val8))
+#define SET_AH(val8) AX = ((AX & 0x00ff) | ((val8) << 8))
+#define SET_BH(val8) BX = ((BX & 0x00ff) | ((val8) << 8))
+#define SET_CH(val8) CX = ((CX & 0x00ff) | ((val8) << 8))
+#define SET_DH(val8) DX = ((DX & 0x00ff) | ((val8) << 8))
+
+#define GET_AL() ( AX & 0x00ff )
+#define GET_BL() ( BX & 0x00ff )
+#define GET_CL() ( CX & 0x00ff )
+#define GET_DL() ( DX & 0x00ff )
+#define GET_AH() ( AX >> 8 )
+#define GET_BH() ( BX >> 8 )
+#define GET_CH() ( CX >> 8 )
+#define GET_DH() ( DX >> 8 )
+
+#define SET_CF() FLAGS |= 0x0001
+#define CLEAR_CF() FLAGS &= 0xfffe
+#define GET_CF() (FLAGS & 0x0001)
+
+#define SET_ZF() FLAGS |= 0x0040
+#define CLEAR_ZF() FLAGS &= 0xffbf
+#define GET_ZF() (FLAGS & 0x0040)
+
+#define SCROLL_DOWN 0
+#define SCROLL_UP 1
+#define NO_ATTR 2
+#define WITH_ATTR 3
+
+#define SCREEN_SIZE(x,y) (((x*y*2)|0x00ff)+1)
+#define SCREEN_MEM_START(x,y,p) ((((x*y*2)|0x00ff)+1)*p)
+#define SCREEN_IO_START(x,y,p) ((((x*y)|0x00ff)+1)*p)
+
+#endif
diff --git a/tools/firmware/vgabios/vgafonts.h b/tools/firmware/vgabios/vgafonts.h
new file mode 100644
index 0000000000..0c213e66bc
--- /dev/null
+++ b/tools/firmware/vgabios/vgafonts.h
@@ -0,0 +1,784 @@
+/*
+ * These fonts come from ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
+ * The package is (c) by Joseph Gil
+ * The individual fonts are public domain
+ */
+static Bit8u vgafont8[256*8]=
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e,
+ 0x7e, 0xff, 0xdb, 0xff, 0xc3, 0xe7, 0xff, 0x7e,
+ 0x6c, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00,
+ 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00,
+ 0x38, 0x7c, 0x38, 0xfe, 0xfe, 0x7c, 0x38, 0x7c,
+ 0x10, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x7c,
+ 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00,
+ 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff,
+ 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
+ 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff,
+ 0x0f, 0x07, 0x0f, 0x7d, 0xcc, 0xcc, 0xcc, 0x78,
+ 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18,
+ 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x70, 0xf0, 0xe0,
+ 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x67, 0xe6, 0xc0,
+ 0x99, 0x5a, 0x3c, 0xe7, 0xe7, 0x3c, 0x5a, 0x99,
+ 0x80, 0xe0, 0xf8, 0xfe, 0xf8, 0xe0, 0x80, 0x00,
+ 0x02, 0x0e, 0x3e, 0xfe, 0x3e, 0x0e, 0x02, 0x00,
+ 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x7e, 0x3c, 0x18,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x00,
+ 0x7f, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x00,
+ 0x3e, 0x63, 0x38, 0x6c, 0x6c, 0x38, 0xcc, 0x78,
+ 0x00, 0x00, 0x00, 0x00, 0x7e, 0x7e, 0x7e, 0x00,
+ 0x18, 0x3c, 0x7e, 0x18, 0x7e, 0x3c, 0x18, 0xff,
+ 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
+ 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00,
+ 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
+ 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00,
+ 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00,
+ 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0x7e, 0x3c, 0x18, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x30, 0x78, 0x78, 0x30, 0x30, 0x00, 0x30, 0x00,
+ 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x6c, 0x6c, 0xfe, 0x6c, 0xfe, 0x6c, 0x6c, 0x00,
+ 0x30, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x30, 0x00,
+ 0x00, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xc6, 0x00,
+ 0x38, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0x76, 0x00,
+ 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x30, 0x60, 0x60, 0x60, 0x30, 0x18, 0x00,
+ 0x60, 0x30, 0x18, 0x18, 0x18, 0x30, 0x60, 0x00,
+ 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
+ 0x00, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x60,
+ 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x00,
+ 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00,
+ 0x7c, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0x7c, 0x00,
+ 0x30, 0x70, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x00,
+ 0x78, 0xcc, 0x0c, 0x38, 0x60, 0xcc, 0xfc, 0x00,
+ 0x78, 0xcc, 0x0c, 0x38, 0x0c, 0xcc, 0x78, 0x00,
+ 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x1e, 0x00,
+ 0xfc, 0xc0, 0xf8, 0x0c, 0x0c, 0xcc, 0x78, 0x00,
+ 0x38, 0x60, 0xc0, 0xf8, 0xcc, 0xcc, 0x78, 0x00,
+ 0xfc, 0xcc, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x00,
+ 0x78, 0xcc, 0xcc, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x78, 0xcc, 0xcc, 0x7c, 0x0c, 0x18, 0x70, 0x00,
+ 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x00,
+ 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x60,
+ 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x00,
+ 0x00, 0x00, 0xfc, 0x00, 0x00, 0xfc, 0x00, 0x00,
+ 0x60, 0x30, 0x18, 0x0c, 0x18, 0x30, 0x60, 0x00,
+ 0x78, 0xcc, 0x0c, 0x18, 0x30, 0x00, 0x30, 0x00,
+ 0x7c, 0xc6, 0xde, 0xde, 0xde, 0xc0, 0x78, 0x00,
+ 0x30, 0x78, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x66, 0x66, 0xfc, 0x00,
+ 0x3c, 0x66, 0xc0, 0xc0, 0xc0, 0x66, 0x3c, 0x00,
+ 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00,
+ 0xfe, 0x62, 0x68, 0x78, 0x68, 0x62, 0xfe, 0x00,
+ 0xfe, 0x62, 0x68, 0x78, 0x68, 0x60, 0xf0, 0x00,
+ 0x3c, 0x66, 0xc0, 0xc0, 0xce, 0x66, 0x3e, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0xcc, 0x00,
+ 0x78, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x1e, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78, 0x00,
+ 0xe6, 0x66, 0x6c, 0x78, 0x6c, 0x66, 0xe6, 0x00,
+ 0xf0, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00,
+ 0xc6, 0xee, 0xfe, 0xfe, 0xd6, 0xc6, 0xc6, 0x00,
+ 0xc6, 0xe6, 0xf6, 0xde, 0xce, 0xc6, 0xc6, 0x00,
+ 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00,
+ 0x78, 0xcc, 0xcc, 0xcc, 0xdc, 0x78, 0x1c, 0x00,
+ 0xfc, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0xe6, 0x00,
+ 0x78, 0xcc, 0xe0, 0x70, 0x1c, 0xcc, 0x78, 0x00,
+ 0xfc, 0xb4, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xfc, 0x00,
+ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00,
+ 0xc6, 0xc6, 0xc6, 0xd6, 0xfe, 0xee, 0xc6, 0x00,
+ 0xc6, 0xc6, 0x6c, 0x38, 0x38, 0x6c, 0xc6, 0x00,
+ 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x30, 0x78, 0x00,
+ 0xfe, 0xc6, 0x8c, 0x18, 0x32, 0x66, 0xfe, 0x00,
+ 0x78, 0x60, 0x60, 0x60, 0x60, 0x60, 0x78, 0x00,
+ 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x02, 0x00,
+ 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x00,
+ 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+ 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x76, 0x00,
+ 0xe0, 0x60, 0x60, 0x7c, 0x66, 0x66, 0xdc, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xc0, 0xcc, 0x78, 0x00,
+ 0x1c, 0x0c, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
+ 0x38, 0x6c, 0x60, 0xf0, 0x60, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
+ 0xe0, 0x60, 0x6c, 0x76, 0x66, 0x66, 0xe6, 0x00,
+ 0x30, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x0c, 0x00, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78,
+ 0xe0, 0x60, 0x66, 0x6c, 0x78, 0x6c, 0xe6, 0x00,
+ 0x70, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x00, 0x00, 0xcc, 0xfe, 0xfe, 0xd6, 0xc6, 0x00,
+ 0x00, 0x00, 0xf8, 0xcc, 0xcc, 0xcc, 0xcc, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0xdc, 0x66, 0x66, 0x7c, 0x60, 0xf0,
+ 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0x1e,
+ 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x00,
+ 0x10, 0x30, 0x7c, 0x30, 0x30, 0x34, 0x18, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00,
+ 0x00, 0x00, 0xc6, 0xd6, 0xfe, 0xfe, 0x6c, 0x00,
+ 0x00, 0x00, 0xc6, 0x6c, 0x38, 0x6c, 0xc6, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
+ 0x00, 0x00, 0xfc, 0x98, 0x30, 0x64, 0xfc, 0x00,
+ 0x1c, 0x30, 0x30, 0xe0, 0x30, 0x30, 0x1c, 0x00,
+ 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00,
+ 0xe0, 0x30, 0x30, 0x1c, 0x30, 0x30, 0xe0, 0x00,
+ 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0x00,
+ 0x78, 0xcc, 0xc0, 0xcc, 0x78, 0x18, 0x0c, 0x78,
+ 0x00, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
+ 0x1c, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
+ 0x7e, 0xc3, 0x3c, 0x06, 0x3e, 0x66, 0x3f, 0x00,
+ 0xcc, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00,
+ 0xe0, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00,
+ 0x30, 0x30, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00,
+ 0x00, 0x00, 0x78, 0xc0, 0xc0, 0x78, 0x0c, 0x38,
+ 0x7e, 0xc3, 0x3c, 0x66, 0x7e, 0x60, 0x3c, 0x00,
+ 0xcc, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
+ 0xe0, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
+ 0xcc, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x7c, 0xc6, 0x38, 0x18, 0x18, 0x18, 0x3c, 0x00,
+ 0xe0, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0xc6, 0x38, 0x6c, 0xc6, 0xfe, 0xc6, 0xc6, 0x00,
+ 0x30, 0x30, 0x00, 0x78, 0xcc, 0xfc, 0xcc, 0x00,
+ 0x1c, 0x00, 0xfc, 0x60, 0x78, 0x60, 0xfc, 0x00,
+ 0x00, 0x00, 0x7f, 0x0c, 0x7f, 0xcc, 0x7f, 0x00,
+ 0x3e, 0x6c, 0xcc, 0xfe, 0xcc, 0xcc, 0xce, 0x00,
+ 0x78, 0xcc, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0xcc, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0xe0, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
+ 0x00, 0xe0, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
+ 0x00, 0xcc, 0x00, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
+ 0xc3, 0x18, 0x3c, 0x66, 0x66, 0x3c, 0x18, 0x00,
+ 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00,
+ 0x18, 0x18, 0x7e, 0xc0, 0xc0, 0x7e, 0x18, 0x18,
+ 0x38, 0x6c, 0x64, 0xf0, 0x60, 0xe6, 0xfc, 0x00,
+ 0xcc, 0xcc, 0x78, 0xfc, 0x30, 0xfc, 0x30, 0x30,
+ 0xf8, 0xcc, 0xcc, 0xfa, 0xc6, 0xcf, 0xc6, 0xc7,
+ 0x0e, 0x1b, 0x18, 0x3c, 0x18, 0x18, 0xd8, 0x70,
+ 0x1c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00,
+ 0x38, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
+ 0x00, 0x1c, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0x1c, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
+ 0x00, 0xf8, 0x00, 0xf8, 0xcc, 0xcc, 0xcc, 0x00,
+ 0xfc, 0x00, 0xcc, 0xec, 0xfc, 0xdc, 0xcc, 0x00,
+ 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00,
+ 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00,
+ 0x30, 0x00, 0x30, 0x60, 0xc0, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0x00, 0xfc, 0xc0, 0xc0, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xfc, 0x0c, 0x0c, 0x00, 0x00,
+ 0xc3, 0xc6, 0xcc, 0xde, 0x33, 0x66, 0xcc, 0x0f,
+ 0xc3, 0xc6, 0xcc, 0xdb, 0x37, 0x6f, 0xcf, 0x03,
+ 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00,
+ 0x00, 0x33, 0x66, 0xcc, 0x66, 0x33, 0x00, 0x00,
+ 0x00, 0xcc, 0x66, 0x33, 0x66, 0xcc, 0x00, 0x00,
+ 0x22, 0x88, 0x22, 0x88, 0x22, 0x88, 0x22, 0x88,
+ 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+ 0xdb, 0x77, 0xdb, 0xee, 0xdb, 0x77, 0xdb, 0xee,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x76, 0xdc, 0xc8, 0xdc, 0x76, 0x00,
+ 0x00, 0x78, 0xcc, 0xf8, 0xcc, 0xf8, 0xc0, 0xc0,
+ 0x00, 0xfc, 0xcc, 0xc0, 0xc0, 0xc0, 0xc0, 0x00,
+ 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00,
+ 0xfc, 0xcc, 0x60, 0x30, 0x60, 0xcc, 0xfc, 0x00,
+ 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0x70, 0x00,
+ 0x00, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0xc0,
+ 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x00,
+ 0xfc, 0x30, 0x78, 0xcc, 0xcc, 0x78, 0x30, 0xfc,
+ 0x38, 0x6c, 0xc6, 0xfe, 0xc6, 0x6c, 0x38, 0x00,
+ 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x6c, 0xee, 0x00,
+ 0x1c, 0x30, 0x18, 0x7c, 0xcc, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0x7e, 0x00, 0x00,
+ 0x06, 0x0c, 0x7e, 0xdb, 0xdb, 0x7e, 0x60, 0xc0,
+ 0x38, 0x60, 0xc0, 0xf8, 0xc0, 0x60, 0x38, 0x00,
+ 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x00,
+ 0x00, 0xfc, 0x00, 0xfc, 0x00, 0xfc, 0x00, 0x00,
+ 0x30, 0x30, 0xfc, 0x30, 0x30, 0x00, 0xfc, 0x00,
+ 0x60, 0x30, 0x18, 0x30, 0x60, 0x00, 0xfc, 0x00,
+ 0x18, 0x30, 0x60, 0x30, 0x18, 0x00, 0xfc, 0x00,
+ 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0x70,
+ 0x30, 0x30, 0x00, 0xfc, 0x00, 0x30, 0x30, 0x00,
+ 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00,
+ 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x0f, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x3c, 0x1c,
+ 0x78, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00,
+ 0x70, 0x18, 0x30, 0x60, 0x78, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x3c, 0x3c, 0x3c, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static Bit8u vgafont14[256*14]=
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, 0x99, 0x81, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0xff, 0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x1e, 0x0e, 0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf8, 0xfe, 0xf8, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x06, 0x0e, 0x3e, 0xfe, 0x3e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7f, 0xdb, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00,
+ 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, 0x7c, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x28, 0x6c, 0xfe, 0x6c, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, 0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x86, 0xc6, 0x7c, 0x18, 0x18, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, 0x30, 0x66, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xfc, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x0c, 0x78, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xde, 0xde, 0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0xfc, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xe6, 0x66, 0x6c, 0x6c, 0x78, 0x6c, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xee, 0xfe, 0xfe, 0xd6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, 0x0c, 0x0e, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x60, 0x38, 0x0c, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0x7e, 0x5a, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x10, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xd6, 0xfe, 0x7c, 0x6c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0x6c, 0x38, 0x38, 0x38, 0x6c, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0x8c, 0x18, 0x30, 0x60, 0xc2, 0xc6, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x80, 0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, 0x00, 0x00, 0x00,
+ 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00,
+ 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x66, 0x6c, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xd6, 0xd6, 0xd6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x70, 0x1c, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xd6, 0xd6, 0xfe, 0x6c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x6c, 0x38, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xcc, 0x18, 0x30, 0x66, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x0e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x70, 0x18, 0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x38, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x66, 0x3c, 0x0c, 0x06, 0x3c, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xcc, 0xcc, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x66, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x3c, 0x66, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0xc6, 0xc6, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x38, 0x6c, 0x38, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x66, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xcc, 0x76, 0x36, 0x7e, 0xd8, 0xd8, 0x6e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3e, 0x6c, 0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
+ 0x00, 0xc6, 0xc6, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00,
+ 0x00, 0xc6, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x18, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0xe6, 0xfc, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0xf8, 0xcc, 0xcc, 0xf8, 0xc4, 0xcc, 0xde, 0xcc, 0xcc, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0xd8, 0x70, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x0c, 0x18, 0x30, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x76, 0xdc, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00,
+ 0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xc0, 0xc0, 0xc6, 0xcc, 0xd8, 0x30, 0x60, 0xdc, 0x86, 0x0c, 0x18, 0x3e, 0x00,
+ 0x00, 0xc0, 0xc0, 0xc6, 0xcc, 0xd8, 0x30, 0x66, 0xce, 0x9e, 0x3e, 0x06, 0x06, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x00, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, 0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
+ 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+ 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xfc, 0xc6, 0xc6, 0xfc, 0xc0, 0xc0, 0x40, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0xee, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0f, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00,
+ 0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static Bit8u vgafont16[256*16]=
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, 0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7e, 0xff, 0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x1e, 0x0e, 0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, 0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x02, 0x06, 0x0e, 0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7f, 0xdb, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, 0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, 0x18, 0x18, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, 0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc3, 0xc3, 0xdb, 0xdb, 0xc3, 0xc3, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xde, 0xde, 0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, 0x0c, 0x0e, 0x00, 0x00,
+ 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
+ 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00,
+ 0x00, 0x00, 0xe0, 0x60, 0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x70, 0x18, 0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
+ 0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x38, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, 0x3c, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x3c, 0x66, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x38, 0x6c, 0x38, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, 0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x3e, 0x6c, 0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc6, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
+ 0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x18, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xfc, 0x66, 0x66, 0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0x70, 0x00, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0c, 0x18, 0x30, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x76, 0xdc, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+ 0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, 0x0c, 0x1f, 0x00, 0x00,
+ 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00,
+ 0x00, 0x00, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, 0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
+ 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+ 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, 0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x0f, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+static Bit8u vgafont14alt[1]={0x00};
+static Bit8u vgafont16alt[1]={0x00};
diff --git a/tools/firmware/vgabios/vgatables.h b/tools/firmware/vgabios/vgatables.h
new file mode 100644
index 0000000000..e5eca1e4e7
--- /dev/null
+++ b/tools/firmware/vgabios/vgatables.h
@@ -0,0 +1,318 @@
+/*
+ *
+ * BIOS Memory
+ *
+ */
+#define BIOSMEM_SEG 0x40
+
+#define BIOSMEM_INITIAL_MODE 0x10
+#define BIOSMEM_CURRENT_MODE 0x49
+#define BIOSMEM_NB_COLS 0x4A
+#define BIOSMEM_PAGE_SIZE 0x4C
+#define BIOSMEM_CURRENT_START 0x4E
+#define BIOSMEM_CURSOR_POS 0x50
+#define BIOSMEM_CURSOR_TYPE 0x60
+#define BIOSMEM_CURRENT_PAGE 0x62
+#define BIOSMEM_CRTC_ADDRESS 0x63
+#define BIOSMEM_CURRENT_MSR 0x65
+#define BIOSMEM_CURRENT_PAL 0x66
+#define BIOSMEM_NB_ROWS 0x84
+#define BIOSMEM_CHAR_HEIGHT 0x85
+#define BIOSMEM_VIDEO_CTL 0x87
+#define BIOSMEM_SWITCHES 0x88
+#define BIOSMEM_MODESET_CTL 0x89
+#define BIOSMEM_DCC_INDEX 0x8A
+#define BIOSMEM_VS_POINTER 0xA8
+#define BIOSMEM_VBE_FLAG 0xB9
+#define BIOSMEM_VBE_MODE 0xBA
+
+
+/*
+ *
+ * VGA registers
+ *
+ */
+#define VGAREG_ACTL_ADDRESS 0x3c0
+#define VGAREG_ACTL_WRITE_DATA 0x3c0
+#define VGAREG_ACTL_READ_DATA 0x3c1
+
+#define VGAREG_INPUT_STATUS 0x3c2
+#define VGAREG_WRITE_MISC_OUTPUT 0x3c2
+#define VGAREG_VIDEO_ENABLE 0x3c3
+#define VGAREG_SEQU_ADDRESS 0x3c4
+#define VGAREG_SEQU_DATA 0x3c5
+
+#define VGAREG_PEL_MASK 0x3c6
+#define VGAREG_DAC_STATE 0x3c7
+#define VGAREG_DAC_READ_ADDRESS 0x3c7
+#define VGAREG_DAC_WRITE_ADDRESS 0x3c8
+#define VGAREG_DAC_DATA 0x3c9
+
+#define VGAREG_READ_FEATURE_CTL 0x3ca
+#define VGAREG_READ_MISC_OUTPUT 0x3cc
+
+#define VGAREG_GRDC_ADDRESS 0x3ce
+#define VGAREG_GRDC_DATA 0x3cf
+
+#define VGAREG_MDA_CRTC_ADDRESS 0x3b4
+#define VGAREG_MDA_CRTC_DATA 0x3b5
+#define VGAREG_VGA_CRTC_ADDRESS 0x3d4
+#define VGAREG_VGA_CRTC_DATA 0x3d5
+
+#define VGAREG_MDA_WRITE_FEATURE_CTL 0x3ba
+#define VGAREG_VGA_WRITE_FEATURE_CTL 0x3da
+#define VGAREG_ACTL_RESET 0x3da
+
+#define VGAREG_MDA_MODECTL 0x3b8
+#define VGAREG_CGA_MODECTL 0x3d8
+#define VGAREG_CGA_PALETTE 0x3d9
+
+/* Video memory */
+#define VGAMEM_GRAPH 0xA000
+#define VGAMEM_CTEXT 0xB800
+#define VGAMEM_MTEXT 0xB000
+
+/*
+ *
+ * Tables of default values for each mode
+ *
+ */
+#define MODE_MAX 0x14
+#define TEXT 0x00
+#define GRAPH 0x01
+
+#define CTEXT 0x00
+#define MTEXT 0x01
+#define CGA 0x02
+#define PLANAR1 0x03
+#define PLANAR4 0x04
+#define LINEAR8 0x05
+
+// for SVGA
+#define LINEAR15 0x10
+#define LINEAR16 0x11
+#define LINEAR24 0x12
+#define LINEAR32 0x13
+
+typedef struct
+{Bit8u svgamode;
+ Bit16u vesamode;
+ Bit8u class; /* TEXT, GRAPH */
+ Bit8u memmodel; /* CTEXT,MTEXT,CGA,PL1,PL2,PL4,P8,P15,P16,P24,P32 */
+ Bit8u nbpages;
+ Bit8u pixbits;
+ Bit16u swidth, sheight;
+ Bit16u twidth, theight;
+ Bit16u cwidth, cheight;
+ Bit16u sstart;
+ Bit16u slength;
+ Bit8u miscreg;
+ Bit8u pelmask;
+ Bit8u crtcmodel;
+ Bit8u actlmodel;
+ Bit8u grdcmodel;
+ Bit8u sequmodel;
+ Bit8u dacmodel; /* 0 1 2 3 */
+} VGAMODES;
+
+static VGAMODES vga_modes[MODE_MAX+1]=
+{//mode vesa class model pg bits sw sh tw th cw ch sstart slength misc pelm crtc actl gdc sequ dac
+ {0x00, 0xFFFF, TEXT, CTEXT, 8, 4, 360, 400, 40, 25, 9, 16, 0xB800, 0x0800, 0x67, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x02},
+ {0x01, 0xFFFF, TEXT, CTEXT, 8, 4, 360, 400, 40, 25, 9, 16, 0xB800, 0x0800, 0x67, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x02},
+ {0x02, 0xFFFF, TEXT, CTEXT, 4, 4, 720, 400, 80, 25, 9, 16, 0xB800, 0x1000, 0x67, 0xFF, 0x01, 0x00, 0x00, 0x01, 0x02},
+ {0x03, 0xFFFF, TEXT, CTEXT, 4, 4, 720, 400, 80, 25, 9, 16, 0xB800, 0x1000, 0x67, 0xFF, 0x01, 0x00, 0x00, 0x01, 0x02},
+ {0x04, 0xFFFF, GRAPH, CGA, 4, 2, 320, 200, 40, 25, 8, 8, 0xB800, 0x0800, 0x63, 0xFF, 0x02, 0x01, 0x01, 0x02, 0x01},
+ {0x05, 0xFFFF, GRAPH, CGA, 1, 2, 320, 200, 40, 25, 8, 8, 0xB800, 0x0800, 0x63, 0xFF, 0x02, 0x01, 0x01, 0x02, 0x01},
+ {0x06, 0xFFFF, GRAPH, CGA, 1, 1, 640, 200, 80, 25, 8, 8, 0xB800, 0x1000, 0x63, 0xFF, 0x03, 0x02, 0x02, 0x03, 0x01},
+ {0x07, 0xFFFF, TEXT, MTEXT, 4, 4, 720, 400, 80, 25, 9, 16, 0xB000, 0x1000, 0x66, 0xFF, 0x04, 0x03, 0x03, 0x01, 0x00},
+ {0x0D, 0xFFFF, GRAPH, PLANAR4, 8, 4, 320, 200, 40, 25, 8, 8, 0xA000, 0x2000, 0x63, 0xFF, 0x05, 0x04, 0x04, 0x04, 0x01},
+ {0x0E, 0xFFFF, GRAPH, PLANAR4, 4, 4, 640, 200, 80, 25, 8, 8, 0xA000, 0x4000, 0x63, 0xFF, 0x06, 0x04, 0x04, 0x05, 0x01},
+ {0x0F, 0xFFFF, GRAPH, PLANAR1, 2, 1, 640, 350, 80, 25, 8, 14, 0xA000, 0x8000, 0xa3, 0xFF, 0x07, 0x05, 0x04, 0x05, 0x00},
+ {0x10, 0xFFFF, GRAPH, PLANAR4, 2, 4, 640, 350, 80, 25, 8, 14, 0xA000, 0x8000, 0xa3, 0xFF, 0x07, 0x06, 0x04, 0x05, 0x02},
+ {0x11, 0xFFFF, GRAPH, PLANAR1, 1, 1, 640, 480, 80, 30, 8, 16, 0xA000, 0x0000, 0xe3, 0xFF, 0x08, 0x07, 0x04, 0x05, 0x02},
+ {0x12, 0xFFFF, GRAPH, PLANAR4, 1, 4, 640, 480, 80, 30, 8, 16, 0xA000, 0x0000, 0xe3, 0xFF, 0x08, 0x06, 0x04, 0x05, 0x02},
+ {0x13, 0xFFFF, GRAPH, LINEAR8, 1, 8, 320, 200, 40, 25, 8, 8, 0xA000, 0x0000, 0x63, 0xFF, 0x09, 0x08, 0x05, 0x06, 0x03},
+ {0x6A, 0xFFFF, GRAPH, PLANAR4, 1, 4, 800, 600,100, 37, 8, 16, 0xA000, 0x0000, 0xe3, 0xFF, 0x0A, 0x06, 0x04, 0x05, 0x02}
+};
+
+/* CRTC */
+#define CRTC_MAX_REG 0x18
+#define CRTC_MAX_MODEL 0x0A
+static Bit8u crtc_access[CRTC_MAX_REG+1]=
+{ /* 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+};
+static Bit8u crtc_regs[CRTC_MAX_MODEL+1][CRTC_MAX_REG+1]=
+{/* Model 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 */
+ /* 00 */ 0x2d,0x27,0x28,0x90,0x2b,0xa0,0xbf,0x1f,0x00,0x4f,0x0d,0x0e,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x14,0x1f,0x96,0xb9,0xa3,0xff,
+ /* 01 */ 0x5f,0x4f,0x50,0x82,0x55,0x81,0xbf,0x1f,0x00,0x4f,0x0d,0x0e,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x28,0x1f,0x96,0xb9,0xa3,0xff,
+ /* 02 */ 0x2d,0x27,0x28,0x90,0x2b,0x80,0xbf,0x1f,0x00,0xc1,0x00,0x00,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x14,0x00,0x96,0xb9,0xa2,0xff,
+ /* 03 */ 0x5f,0x4f,0x50,0x82,0x54,0x80,0xbf,0x1f,0x00,0xc1,0x00,0x00,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x28,0x00,0x96,0xb9,0xc2,0xff,
+ /* 04 */ 0x5f,0x4f,0x50,0x82,0x55,0x81,0xbf,0x1f,0x00,0x4f,0x0d,0x0e,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x28,0x0f,0x96,0xb9,0xa3,0xff,
+ /* 05 */ 0x2d,0x27,0x28,0x90,0x2b,0x80,0xbf,0x1f,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x14,0x00,0x96,0xb9,0xe3,0xff,
+ /* 06 */ 0x5f,0x4f,0x50,0x82,0x54,0x80,0xbf,0x1f,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x28,0x00,0x96,0xb9,0xe3,0xff,
+ /* 07 */ 0x5f,0x4f,0x50,0x82,0x54,0x80,0xbf,0x1f,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x83,0x85,0x5d,0x28,0x0f,0x63,0xba,0xe3,0xff,
+ /* 08 */ 0x5f,0x4f,0x50,0x82,0x54,0x80,0x0b,0x3e,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0xea,0x8c,0xdf,0x28,0x00,0xe7,0x04,0xe3,0xff,
+ /* 09 */ 0x5f,0x4f,0x50,0x82,0x54,0x80,0xbf,0x1f,0x00,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x9c,0x8e,0x8f,0x28,0x40,0x96,0xb9,0xa3,0xff,
+ /* 0A */ 0x7f,0x63,0x63,0x83,0x6b,0x1b,0x72,0xf0,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x59,0x8d,0x57,0x32,0x00,0x57,0x73,0xe3,0xff
+};
+
+/* Attribute Controler 0x3c0 */
+#define ACTL_MAX_REG 0x14
+#define ACTL_MAX_MODEL 0x08
+
+static Bit8u actl_access[ACTL_MAX_REG+1]=
+{/* 00 01 02 03 04 05 06 07 08 09 0A 0B OC OD OE OF 10 11 12 13 14 */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+};
+
+static Bit8u actl_regs[ACTL_MAX_MODEL+1][ACTL_MAX_REG+1]=
+{/* Model 00 01 02 03 04 05 06 07 08 09 0A 0B OC OD OE OF 10 11 12 13 14 */
+ /* 00 */ 0x00,0x01,0x02,0x03,0x04,0x05,0x14,0x07,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x0c,0x00,0x0f,0x08,0x00,
+ /* 01 */ 0x00,0x13,0x15,0x17,0x02,0x04,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x01,0x00,0x03,0x00,0x00,
+ /* 02 */ 0x00,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x01,0x00,0x01,0x00,0x00,
+ /* 03 */ 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x10,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x0e,0x00,0x0f,0x08,0x00,
+ /* 04 */ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x01,0x00,0x0f,0x00,0x00,
+ /* 05 */ 0x00,0x08,0x00,0x00,0x18,0x18,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x18,0x00,0x00,0x01,0x00,0x01,0x00,0x00,
+ /* 06 */ 0x00,0x01,0x02,0x03,0x04,0x05,0x14,0x07,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,0x01,0x00,0x0f,0x00,0x00,
+ /* 07 */ 0x00,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x3f,0x01,0x00,0x01,0x00,0x00,
+ /* 08 */ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x41,0x00,0x0f,0x00,0x00
+};
+
+/* Sequencer 0x3c4 */
+#define SEQU_MAX_REG 0x04
+#define SEQU_MAX_MODEL 0x06
+
+static Bit8u sequ_access[SEQU_MAX_REG+1]=
+{ /* 00 01 02 03 04 */
+ 0x00,0x00,0x00,0x00,0x00
+};
+
+static Bit8u sequ_regs[SEQU_MAX_MODEL+1][SEQU_MAX_REG+1]=
+{/* Model 00 01 02 03 04 */
+ /* 00 */ 0x03,0x08,0x03,0x00,0x02,
+ /* 01 */ 0x03,0x00,0x03,0x00,0x02,
+ /* 02 */ 0x03,0x09,0x03,0x00,0x02,
+ /* 03 */ 0x03,0x01,0x01,0x00,0x06,
+ /* 04 */ 0x03,0x09,0x0f,0x00,0x06,
+ /* 05 */ 0x03,0x01,0x0f,0x00,0x06,
+ /* 06 */ 0x03,0x01,0x0f,0x00,0x0e
+};
+
+/* Graphic ctl 0x3ce */
+#define GRDC_MAX_REG 0x08
+#define GRDC_MAX_MODEL 0x05
+
+static Bit8u grdc_access[GRDC_MAX_REG+1]=
+{ /* 00 01 02 03 04 05 06 07 08 */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+};
+
+static Bit8u grdc_regs[GRDC_MAX_MODEL+1][GRDC_MAX_REG+1]=
+{/* Model 00 01 02 03 04 05 06 07 08 */
+ /* 00 */ 0x00,0x00,0x00,0x00,0x00,0x10,0x0e,0x0f,0xff,
+ /* 01 */ 0x00,0x00,0x00,0x00,0x00,0x30,0x0f,0x0f,0xff,
+ /* 02 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x0d,0x0f,0xff,
+ /* 03 */ 0x00,0x00,0x00,0x00,0x00,0x10,0x0a,0x0f,0xff,
+ /* 04 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x0f,0xff,
+ /* 05 */ 0x00,0x00,0x00,0x00,0x00,0x40,0x05,0x0f,0xff
+};
+
+/* Default Palette */
+#define DAC_MAX_MODEL 3
+
+static Bit8u dac_regs[DAC_MAX_MODEL+1]=
+{0x3f,0x3f,0x3f,0xff};
+
+/* Mono */
+static Bit8u palette0[63+1][3]=
+{
+ 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00,
+ 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a,
+ 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a,
+ 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f,
+ 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00,
+ 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a,
+ 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a, 0x2a,0x2a,0x2a,
+ 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f, 0x3f,0x3f,0x3f
+};
+
+static Bit8u palette1[63+1][3]=
+{
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x15,0x00, 0x2a,0x2a,0x2a,
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x15,0x00, 0x2a,0x2a,0x2a,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f,
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x15,0x00, 0x2a,0x2a,0x2a,
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x15,0x00, 0x2a,0x2a,0x2a,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f
+};
+
+static Bit8u palette2[63+1][3]=
+{
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x2a,0x00, 0x2a,0x2a,0x2a,
+ 0x00,0x00,0x15, 0x00,0x00,0x3f, 0x00,0x2a,0x15, 0x00,0x2a,0x3f, 0x2a,0x00,0x15, 0x2a,0x00,0x3f, 0x2a,0x2a,0x15, 0x2a,0x2a,0x3f,
+ 0x00,0x15,0x00, 0x00,0x15,0x2a, 0x00,0x3f,0x00, 0x00,0x3f,0x2a, 0x2a,0x15,0x00, 0x2a,0x15,0x2a, 0x2a,0x3f,0x00, 0x2a,0x3f,0x2a,
+ 0x00,0x15,0x15, 0x00,0x15,0x3f, 0x00,0x3f,0x15, 0x00,0x3f,0x3f, 0x2a,0x15,0x15, 0x2a,0x15,0x3f, 0x2a,0x3f,0x15, 0x2a,0x3f,0x3f,
+ 0x15,0x00,0x00, 0x15,0x00,0x2a, 0x15,0x2a,0x00, 0x15,0x2a,0x2a, 0x3f,0x00,0x00, 0x3f,0x00,0x2a, 0x3f,0x2a,0x00, 0x3f,0x2a,0x2a,
+ 0x15,0x00,0x15, 0x15,0x00,0x3f, 0x15,0x2a,0x15, 0x15,0x2a,0x3f, 0x3f,0x00,0x15, 0x3f,0x00,0x3f, 0x3f,0x2a,0x15, 0x3f,0x2a,0x3f,
+ 0x15,0x15,0x00, 0x15,0x15,0x2a, 0x15,0x3f,0x00, 0x15,0x3f,0x2a, 0x3f,0x15,0x00, 0x3f,0x15,0x2a, 0x3f,0x3f,0x00, 0x3f,0x3f,0x2a,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f
+};
+
+static Bit8u palette3[256][3]=
+{
+ 0x00,0x00,0x00, 0x00,0x00,0x2a, 0x00,0x2a,0x00, 0x00,0x2a,0x2a, 0x2a,0x00,0x00, 0x2a,0x00,0x2a, 0x2a,0x15,0x00, 0x2a,0x2a,0x2a,
+ 0x15,0x15,0x15, 0x15,0x15,0x3f, 0x15,0x3f,0x15, 0x15,0x3f,0x3f, 0x3f,0x15,0x15, 0x3f,0x15,0x3f, 0x3f,0x3f,0x15, 0x3f,0x3f,0x3f,
+ 0x00,0x00,0x00, 0x05,0x05,0x05, 0x08,0x08,0x08, 0x0b,0x0b,0x0b, 0x0e,0x0e,0x0e, 0x11,0x11,0x11, 0x14,0x14,0x14, 0x18,0x18,0x18,
+ 0x1c,0x1c,0x1c, 0x20,0x20,0x20, 0x24,0x24,0x24, 0x28,0x28,0x28, 0x2d,0x2d,0x2d, 0x32,0x32,0x32, 0x38,0x38,0x38, 0x3f,0x3f,0x3f,
+ 0x00,0x00,0x3f, 0x10,0x00,0x3f, 0x1f,0x00,0x3f, 0x2f,0x00,0x3f, 0x3f,0x00,0x3f, 0x3f,0x00,0x2f, 0x3f,0x00,0x1f, 0x3f,0x00,0x10,
+ 0x3f,0x00,0x00, 0x3f,0x10,0x00, 0x3f,0x1f,0x00, 0x3f,0x2f,0x00, 0x3f,0x3f,0x00, 0x2f,0x3f,0x00, 0x1f,0x3f,0x00, 0x10,0x3f,0x00,
+ 0x00,0x3f,0x00, 0x00,0x3f,0x10, 0x00,0x3f,0x1f, 0x00,0x3f,0x2f, 0x00,0x3f,0x3f, 0x00,0x2f,0x3f, 0x00,0x1f,0x3f, 0x00,0x10,0x3f,
+ 0x1f,0x1f,0x3f, 0x27,0x1f,0x3f, 0x2f,0x1f,0x3f, 0x37,0x1f,0x3f, 0x3f,0x1f,0x3f, 0x3f,0x1f,0x37, 0x3f,0x1f,0x2f, 0x3f,0x1f,0x27,
+
+ 0x3f,0x1f,0x1f, 0x3f,0x27,0x1f, 0x3f,0x2f,0x1f, 0x3f,0x37,0x1f, 0x3f,0x3f,0x1f, 0x37,0x3f,0x1f, 0x2f,0x3f,0x1f, 0x27,0x3f,0x1f,
+ 0x1f,0x3f,0x1f, 0x1f,0x3f,0x27, 0x1f,0x3f,0x2f, 0x1f,0x3f,0x37, 0x1f,0x3f,0x3f, 0x1f,0x37,0x3f, 0x1f,0x2f,0x3f, 0x1f,0x27,0x3f,
+ 0x2d,0x2d,0x3f, 0x31,0x2d,0x3f, 0x36,0x2d,0x3f, 0x3a,0x2d,0x3f, 0x3f,0x2d,0x3f, 0x3f,0x2d,0x3a, 0x3f,0x2d,0x36, 0x3f,0x2d,0x31,
+ 0x3f,0x2d,0x2d, 0x3f,0x31,0x2d, 0x3f,0x36,0x2d, 0x3f,0x3a,0x2d, 0x3f,0x3f,0x2d, 0x3a,0x3f,0x2d, 0x36,0x3f,0x2d, 0x31,0x3f,0x2d,
+ 0x2d,0x3f,0x2d, 0x2d,0x3f,0x31, 0x2d,0x3f,0x36, 0x2d,0x3f,0x3a, 0x2d,0x3f,0x3f, 0x2d,0x3a,0x3f, 0x2d,0x36,0x3f, 0x2d,0x31,0x3f,
+ 0x00,0x00,0x1c, 0x07,0x00,0x1c, 0x0e,0x00,0x1c, 0x15,0x00,0x1c, 0x1c,0x00,0x1c, 0x1c,0x00,0x15, 0x1c,0x00,0x0e, 0x1c,0x00,0x07,
+ 0x1c,0x00,0x00, 0x1c,0x07,0x00, 0x1c,0x0e,0x00, 0x1c,0x15,0x00, 0x1c,0x1c,0x00, 0x15,0x1c,0x00, 0x0e,0x1c,0x00, 0x07,0x1c,0x00,
+ 0x00,0x1c,0x00, 0x00,0x1c,0x07, 0x00,0x1c,0x0e, 0x00,0x1c,0x15, 0x00,0x1c,0x1c, 0x00,0x15,0x1c, 0x00,0x0e,0x1c, 0x00,0x07,0x1c,
+
+ 0x0e,0x0e,0x1c, 0x11,0x0e,0x1c, 0x15,0x0e,0x1c, 0x18,0x0e,0x1c, 0x1c,0x0e,0x1c, 0x1c,0x0e,0x18, 0x1c,0x0e,0x15, 0x1c,0x0e,0x11,
+ 0x1c,0x0e,0x0e, 0x1c,0x11,0x0e, 0x1c,0x15,0x0e, 0x1c,0x18,0x0e, 0x1c,0x1c,0x0e, 0x18,0x1c,0x0e, 0x15,0x1c,0x0e, 0x11,0x1c,0x0e,
+ 0x0e,0x1c,0x0e, 0x0e,0x1c,0x11, 0x0e,0x1c,0x15, 0x0e,0x1c,0x18, 0x0e,0x1c,0x1c, 0x0e,0x18,0x1c, 0x0e,0x15,0x1c, 0x0e,0x11,0x1c,
+ 0x14,0x14,0x1c, 0x16,0x14,0x1c, 0x18,0x14,0x1c, 0x1a,0x14,0x1c, 0x1c,0x14,0x1c, 0x1c,0x14,0x1a, 0x1c,0x14,0x18, 0x1c,0x14,0x16,
+ 0x1c,0x14,0x14, 0x1c,0x16,0x14, 0x1c,0x18,0x14, 0x1c,0x1a,0x14, 0x1c,0x1c,0x14, 0x1a,0x1c,0x14, 0x18,0x1c,0x14, 0x16,0x1c,0x14,
+ 0x14,0x1c,0x14, 0x14,0x1c,0x16, 0x14,0x1c,0x18, 0x14,0x1c,0x1a, 0x14,0x1c,0x1c, 0x14,0x1a,0x1c, 0x14,0x18,0x1c, 0x14,0x16,0x1c,
+ 0x00,0x00,0x10, 0x04,0x00,0x10, 0x08,0x00,0x10, 0x0c,0x00,0x10, 0x10,0x00,0x10, 0x10,0x00,0x0c, 0x10,0x00,0x08, 0x10,0x00,0x04,
+ 0x10,0x00,0x00, 0x10,0x04,0x00, 0x10,0x08,0x00, 0x10,0x0c,0x00, 0x10,0x10,0x00, 0x0c,0x10,0x00, 0x08,0x10,0x00, 0x04,0x10,0x00,
+
+ 0x00,0x10,0x00, 0x00,0x10,0x04, 0x00,0x10,0x08, 0x00,0x10,0x0c, 0x00,0x10,0x10, 0x00,0x0c,0x10, 0x00,0x08,0x10, 0x00,0x04,0x10,
+ 0x08,0x08,0x10, 0x0a,0x08,0x10, 0x0c,0x08,0x10, 0x0e,0x08,0x10, 0x10,0x08,0x10, 0x10,0x08,0x0e, 0x10,0x08,0x0c, 0x10,0x08,0x0a,
+ 0x10,0x08,0x08, 0x10,0x0a,0x08, 0x10,0x0c,0x08, 0x10,0x0e,0x08, 0x10,0x10,0x08, 0x0e,0x10,0x08, 0x0c,0x10,0x08, 0x0a,0x10,0x08,
+ 0x08,0x10,0x08, 0x08,0x10,0x0a, 0x08,0x10,0x0c, 0x08,0x10,0x0e, 0x08,0x10,0x10, 0x08,0x0e,0x10, 0x08,0x0c,0x10, 0x08,0x0a,0x10,
+ 0x0b,0x0b,0x10, 0x0c,0x0b,0x10, 0x0d,0x0b,0x10, 0x0f,0x0b,0x10, 0x10,0x0b,0x10, 0x10,0x0b,0x0f, 0x10,0x0b,0x0d, 0x10,0x0b,0x0c,
+ 0x10,0x0b,0x0b, 0x10,0x0c,0x0b, 0x10,0x0d,0x0b, 0x10,0x0f,0x0b, 0x10,0x10,0x0b, 0x0f,0x10,0x0b, 0x0d,0x10,0x0b, 0x0c,0x10,0x0b,
+ 0x0b,0x10,0x0b, 0x0b,0x10,0x0c, 0x0b,0x10,0x0d, 0x0b,0x10,0x0f, 0x0b,0x10,0x10, 0x0b,0x0f,0x10, 0x0b,0x0d,0x10, 0x0b,0x0c,0x10,
+ 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00, 0x00,0x00,0x00
+};
+
+static Bit8u static_functionality[0x10]=
+{
+ /* 0 */ 0xff, // All modes supported #1
+ /* 1 */ 0xe0, // All modes supported #2
+ /* 2 */ 0x0f, // All modes supported #3
+ /* 3 */ 0x00, 0x00, 0x00, 0x00, // reserved
+ /* 7 */ 0x07, // 200, 350, 400 scan lines
+ /* 8 */ 0x02, // mamimum number of visible charsets in text mode
+ /* 9 */ 0x08, // total number of charset blocks in text mode
+ /* a */ 0xe7, // Change to add new functions
+ /* b */ 0x0c, // Change to add new functions
+ /* c */ 0x00, // reserved
+ /* d */ 0x00, // reserved
+ /* e */ 0x00, // Change to add new functions
+ /* f */ 0x00 // reserved
+};
diff --git a/tools/firmware/vmxassist/Makefile b/tools/firmware/vmxassist/Makefile
new file mode 100644
index 0000000000..545acdf743
--- /dev/null
+++ b/tools/firmware/vmxassist/Makefile
@@ -0,0 +1,84 @@
+#
+# Makefile
+#
+# Leendert van Doorn, leendert@watson.ibm.com
+# Copyright (c) 2005, International Business Machines Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston, MA 02111-1307 USA.
+#
+
+# The emulator code lives in ROM space
+TEXTADDR=0x000D0000
+DEFINES=-DDEBUG -DTEXTADDR=${TEXTADDR}
+XENINC=-I../../../xen/include
+#TEXTADDR=0x000E0000
+#DEFINES=-DDEBUG -DTEST -DTEXTADDR=${TEXTADDR}
+#XENINC=-I/home/leendert/xen/xeno-unstable.bk/xen/include
+
+LD=ld
+CC=gcc
+CPP=cpp -P
+OBJCOPY=objcopy -p -O binary -R .note -R .comment -R .bss -S --gap-fill=0
+CFLAGS=${DEFINES} -I. $(XENINC) -Wall -fno-builtin -O2 -msoft-float
+
+OBJECTS = head.o trap.o vm86.o setup.o util.o
+
+all: vmxloader
+
+vmxloader: roms.h vmxloader.c
+ ${CC} ${DEFINES} -c vmxloader.c
+ $(CC) -o vmxloader.tmp -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 vmxloader.o
+ objcopy --change-addresses=0xC0000000 vmxloader.tmp vmxloader
+ rm -f vmxloader.tmp
+
+vmxassist.bin: vmxassist.ld ${OBJECTS}
+ ${CPP} ${DEFINES} vmxassist.ld > vmxassist.tmp
+ ${LD} -o vmxassist -nostdlib --fatal-warnings -N -T vmxassist.tmp ${OBJECTS}
+ nm -n vmxassist > vmxassist.sym
+ ${OBJCOPY} vmxassist vmxassist.tmp
+ dd if=vmxassist.tmp of=vmxassist.bin ibs=512 conv=sync
+ rm -f vmxassist.tmp
+
+head.o: machine.h head.S
+ ${CC} ${CFLAGS} -D__ASSEMBLY__ ${DEFINES} -c head.S
+
+trap.o: machine.h offsets.h trap.S
+ ${CC} ${CFLAGS} -D__ASSEMBLY__ ${DEFINES} -c trap.S
+
+vm86.o: machine.h vm86.c
+ ${CC} ${CFLAGS} -c vm86.c
+
+setup.o: machine.h setup.c
+ ${CC} ${CFLAGS} -c setup.c
+
+util.o: machine.h util.c
+ ${CC} ${CFLAGS} -c util.c
+
+roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin vmxassist.bin
+ ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
+ ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
+ ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h
+ ./mkhex vmxassist vmxassist.bin >> roms.h
+
+offsets.h: gen
+ ./gen > offsets.h
+
+gen: gen.c
+ ${CC} ${CFLAGS} -o gen gen.c
+
+clean:
+ rm -f vmxassist vmxassist.tmp vmxassist.bin vmxassist.run vmxassist.sym head.s roms.h
+ rm -f vmxloader vmxloader.tmp vmxloader.o ${OBJECTS}
+ rm -f gen gen.o offsets.h
+
diff --git a/tools/firmware/vmxassist/TODO b/tools/firmware/vmxassist/TODO
new file mode 100644
index 0000000000..2378ff3485
--- /dev/null
+++ b/tools/firmware/vmxassist/TODO
@@ -0,0 +1,8 @@
+
+- Use the VME extensions (interrupt handling)
+
+- Use E820 map in vmxassist instead of cmos hack
+
+- Add ACPI support (Nitin's patch)
+
+
diff --git a/tools/firmware/vmxassist/gen.c b/tools/firmware/vmxassist/gen.c
new file mode 100644
index 0000000000..f18f77a4f3
--- /dev/null
+++ b/tools/firmware/vmxassist/gen.c
@@ -0,0 +1,52 @@
+/*
+ * gen.c: Generate assembler symbols.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <public/vmx_assist.h>
+
+int
+main()
+{
+ printf("/* MACHINE GENERATED; DO NOT EDIT */\n");
+ printf("#define VMX_ASSIST_CTX_GS_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, gs_sel));
+ printf("#define VMX_ASSIST_CTX_FS_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, fs_sel));
+ printf("#define VMX_ASSIST_CTX_DS_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, ds_sel));
+ printf("#define VMX_ASSIST_CTX_ES_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, es_sel));
+ printf("#define VMX_ASSIST_CTX_SS_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, ss_sel));
+ printf("#define VMX_ASSIST_CTX_ESP 0x%x\n",
+ offsetof(struct vmx_assist_context, esp));
+ printf("#define VMX_ASSIST_CTX_EFLAGS 0x%x\n",
+ offsetof(struct vmx_assist_context, eflags));
+ printf("#define VMX_ASSIST_CTX_CS_SEL 0x%x\n",
+ offsetof(struct vmx_assist_context, cs_sel));
+ printf("#define VMX_ASSIST_CTX_EIP 0x%x\n",
+ offsetof(struct vmx_assist_context, eip));
+
+ printf("#define VMX_ASSIST_CTX_CR0 0x%x\n",
+ offsetof(struct vmx_assist_context, cr0));
+
+ return 0;
+}
diff --git a/tools/firmware/vmxassist/head.S b/tools/firmware/vmxassist/head.S
new file mode 100644
index 0000000000..a657e64740
--- /dev/null
+++ b/tools/firmware/vmxassist/head.S
@@ -0,0 +1,162 @@
+/*
+ * head.S: VMXAssist runtime start off.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "vm86.h"
+#include "machine.h"
+
+/*
+ * When a partition tries to mask off the CR0_PE bit a world
+ * switch happens to the environment below. The magic indicates
+ * that this is a valid context.
+ */
+#ifdef TEST
+ .byte 0x55, 0xaa
+ .byte 0x80
+ .code16
+ jmp _start16
+#else
+ jmp _start
+#endif
+
+ .align 8
+ .long VMXASSIST_MAGIC
+ .long newctx /* new context */
+ .long oldctx /* old context */
+
+#ifdef TEST
+/*
+ * We are running in 16-bit. Get into the protected mode as soon as
+ * possible. We use our own (minimal) GDT to get started.
+ *
+ * ROM is a misnomer as this code isn't really rommable (although it
+ * only requires a few changes) but it does live in a BIOS ROM segment.
+ * This code allows me to debug vmxassists under (a modified version of)
+ * Bochs and load it as a "optromimage1".
+ */
+ .code16
+ .globl _start16
+_start16:
+ cli
+
+ /* load our own global descriptor table */
+ data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
+
+ /* go to protected mode */
+ movl %cr0, %eax
+ orl $CR0_PE, %eax
+ movl %eax, %cr0
+ data32 ljmp $0x08, $1f
+
+ .align 32
+ .globl rom_gdt
+rom_gdt:
+ .word 0, 0 /* 0x00: reserved */
+ .byte 0, 0, 0, 0
+
+ .word 0xFFFF, 0 /* 0x08: CS 32-bit */
+ .byte 0, 0x9A, 0xCF, 0
+
+ .word 0xFFFF, 0 /* 0x10: CS 32-bit */
+ .byte 0, 0x92, 0xCF, 0
+rom_gdt_end:
+
+ .align 4
+ .globl rom_gdtr
+rom_gdtr:
+ .word rom_gdt_end - rom_gdt - 1
+ .long rom_gdt
+
+ .code32
+1:
+ /* welcome to the 32-bit world */
+ movw $0x10, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ movw %ax, %fs
+ movw %ax, %gs
+
+ /* enable Bochs debug facilities */
+ movw $0x8A00, %dx
+ movw $0x8A00, %ax
+ outw %ax, (%dx)
+
+ jmp _start
+#endif /* TEST */
+
+/*
+ * This is the real start. Control was transfered to this point
+ * with CR0_PE set and executing in some 32-bit segment. We call
+ * main and setup our own environment.
+ */
+ .globl _start
+_start:
+ cli
+
+ /* clear bss */
+ cld
+ xorb %al, %al
+ movl $_bbss, %edi
+ movl $_ebss, %ecx
+ subl %edi, %ecx
+ rep stosb
+
+ /* make sure we are in a sane world */
+ clts
+
+ /* setup my own stack */
+ movl $stack_top - 4*4, %esp
+ movl %esp, %ebp
+
+ /* go ... */
+ call main
+ jmp halt
+
+
+/*
+ * Something bad happened, print invoking %eip and loop forever
+ */
+ .align 4
+ .globl halt
+halt:
+ push $halt_msg
+ call printf
+#ifdef TEST
+ movw $0x8A00, %dx
+ movw $0x8AE0, %ax
+ outw %ax, (%dx)
+#endif
+ cli
+ jmp .
+
+ .data
+halt_msg:
+ .asciz "Halt called from %%eip 0x%x\n"
+
+
+/*
+ * Our stack
+ */
+ .bss
+ .align 8
+ .globl stack, stack_top
+stack:
+ .skip STACK_SIZE
+stack_top:
+
diff --git a/tools/firmware/vmxassist/machine.h b/tools/firmware/vmxassist/machine.h
new file mode 100644
index 0000000000..5d448ef13b
--- /dev/null
+++ b/tools/firmware/vmxassist/machine.h
@@ -0,0 +1,203 @@
+/*
+ * machine.h: Intel CPU specific definitions
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef __MACHINE_H__
+#define __MACHINE_H__
+
+/* the size of our stack (4KB) */
+#define STACK_SIZE 8192
+
+#define TSS_SELECTOR 0x08
+#define CODE_SELECTOR 0x10
+#define DATA_SELECTOR 0x18
+
+#define CR0_PE (1 << 0)
+#define CR0_EM (1 << 2)
+#define CR0_TS (1 << 3)
+#define CR0_NE (1 << 5)
+#define CR0_PG (1 << 31)
+
+#define CR4_VME (1 << 0)
+#define CR4_PVI (1 << 1)
+#define CR4_PSE (1 << 4)
+
+#define EFLAGS_TF (1 << 8)
+#define EFLAGS_IF (1 << 9)
+#define EFLAGS_DF (1 << 10)
+#define EFLAGS_VM (1 << 17)
+#define EFLAGS_VIF (1 << 19)
+#define EFLAGS_VIP (1 << 20)
+
+#define LOG_PGSIZE 12 /* log2(page size) */
+#define LOG_PDSIZE 22 /* log2(page directory size) */
+
+/* Derived constants */
+#define PGSIZE (1 << LOG_PGSIZE) /* page size */
+#define PGMASK (~(PGSIZE - 1)) /* page mask */
+#define LPGSIZE (1 << LOG_PDSIZE) /* large page size */
+#define LPGMASK (~(LPGSIZE - 1)) /* large page mask */
+
+#ifdef TEST
+#define PTE_P (1 << 0) /* Present */
+#define PTE_RW (1 << 1) /* Read/Write */
+#define PTE_US (1 << 2) /* User/Supervisor */
+#define PTE_PS (1 << 7) /* Page Size */
+#endif
+
+/* Programmable Interrupt Contoller (PIC) defines */
+#define PIC_MASTER 0x20
+#define PIC_SLAVE 0xA0
+
+#define PIC_CMD 0 /* command */
+#define PIC_ISR 0 /* interrupt status */
+#define PIC_IMR 1 /* interrupt mask */
+
+
+#ifndef __ASSEMBLY__
+
+struct dtr {
+ unsigned short size;
+ unsigned long base __attribute__ ((packed));
+};
+
+struct tss {
+ unsigned short prev_link;
+ unsigned short _1;
+ unsigned long esp0;
+ unsigned short ss0;
+ unsigned short _2;
+ unsigned long esp1;
+ unsigned short ss1;
+ unsigned short _3;
+ unsigned long esp2;
+ unsigned short ss2;
+ unsigned short _4;
+ unsigned long cr3;
+ unsigned long eip;
+ unsigned long eflags;
+ unsigned long eax;
+ unsigned long ecx;
+ unsigned long edx;
+ unsigned long ebx;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned long esp;
+ unsigned long ebp;
+ unsigned long es;
+ unsigned long cs;
+ unsigned long ss;
+ unsigned long ds;
+ unsigned long fs;
+ unsigned long gs;
+ unsigned short ldt_segment;
+ unsigned short _5;
+ unsigned short _6;
+ unsigned short iomap_base;
+ unsigned char iomap[8192];
+};
+
+static inline void
+outw(unsigned short addr, unsigned short val)
+{
+ __asm__ __volatile__ ("outw %%ax, %%dx" :: "d"(addr), "a"(val));
+}
+
+static inline void
+outb(unsigned short addr, unsigned char val)
+{
+ __asm__ __volatile__ ("outb %%al, %%dx" :: "d"(addr), "a"(val));
+}
+
+static inline unsigned char
+inb(unsigned short addr)
+{
+ unsigned char val;
+
+ __asm__ __volatile__ ("inb %w1,%0" : "=a" (val) : "Nd" (addr));
+ return val;
+}
+
+static inline unsigned
+get_cmos(int reg)
+{
+ outb(0x70, reg);
+ return inb(0x71);
+}
+
+static inline unsigned
+get_cr0(void)
+{
+ unsigned rv;
+ __asm__ __volatile__("movl %%cr0, %0" : "=r"(rv));
+ return rv;
+}
+
+static inline void
+set_cr0(unsigned value)
+{
+ __asm__ __volatile__(
+ "movl %0, %%cr0\n"
+ "jmp 1f\n"
+ "1: nop\n"
+ : /* no outputs */
+ : "r"(value)
+ );
+}
+
+static inline unsigned
+get_cr2(void)
+{
+ unsigned rv;
+
+ __asm__ __volatile__("movl %%cr2, %0" : "=r"(rv));
+ return rv;
+}
+
+static inline unsigned
+get_cr4(void)
+{
+ unsigned rv;
+ __asm__ __volatile__("movl %%cr4, %0" : "=r"(rv));
+ return rv;
+}
+
+#ifdef TEST
+static inline void
+set_cr3(unsigned addr)
+{
+ __asm__ __volatile__("movl %0, %%cr3" : /* no outputs */ : "r"(addr));
+}
+
+static inline void
+set_cr4(unsigned value)
+{
+ __asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
+}
+
+static inline void
+breakpoint(void)
+{
+ outw(0x8A00, 0x8AE0);
+}
+#endif /* TEST */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __MACHINE_H__ */
+
diff --git a/tools/firmware/vmxassist/mkhex b/tools/firmware/vmxassist/mkhex
new file mode 100755
index 0000000000..7389d70483
--- /dev/null
+++ b/tools/firmware/vmxassist/mkhex
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+#
+# mkhex: Generate C embeddable hexdumps
+#
+# Leendert van Doorn, leendert@watson.ibm.com
+# Copyright (c) 2005, International Business Machines Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston, MA 02111-1307 USA.
+#
+
+echo "unsigned $1[] = {"
+od -v -t x $2 | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed 's/$/,/'
+echo "};"
+
diff --git a/tools/firmware/vmxassist/setup.c b/tools/firmware/vmxassist/setup.c
new file mode 100644
index 0000000000..64b9a6e06d
--- /dev/null
+++ b/tools/firmware/vmxassist/setup.c
@@ -0,0 +1,338 @@
+/*
+ * setup.c: Setup the world for vmxassist.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "vm86.h"
+#include "util.h"
+#include "machine.h"
+
+#ifndef TEST
+#if (VMXASSIST_BASE != TEXTADDR)
+#error VMXAssist base mismatch
+#endif
+#endif
+
+#define NR_PGD (PGSIZE / sizeof(unsigned))
+
+#define min(a, b) ((a) > (b) ? (b) : (a))
+
+unsigned long long gdt[] __attribute__ ((aligned(32))) = {
+ 0x0000000000000000ULL, /* 0x00: reserved */
+ 0x0000890000000000ULL, /* 0x08: 32-bit TSS */
+ 0x00CF9A000000FFFFULL, /* 0x10: CS 32-bit */
+ 0x00CF92000000FFFFULL, /* 0x18: DS 32-bit */
+};
+
+struct dtr gdtr = { sizeof(gdt)-1, (unsigned long) &gdt };
+
+struct tss tss __attribute__ ((aligned(4)));
+
+unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));
+
+struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
+
+#ifdef TEST
+unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
+#endif
+
+struct vmx_assist_context oldctx;
+struct vmx_assist_context newctx;
+
+unsigned long memory_size;
+int initialize_real_mode;
+
+extern char stack[], stack_top[];
+extern unsigned trap_handlers[];
+
+void
+banner(void)
+{
+ printf("VMXAssist (%s)\n", __DATE__);
+
+ /* Bochs its way to convey memory size */
+ memory_size = ((get_cmos(0x35) << 8) | get_cmos(0x34)) << 6;
+ if (memory_size > 0x3bc000)
+ memory_size = 0x3bc000;
+ memory_size = (memory_size << 10) + 0xF00000;
+ if (memory_size <= 0xF00000)
+ memory_size =
+ (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
+ memory_size += 0x400 << 10; /* + 1MB */
+
+ printf("Memory size %ld MB\n", memory_size >> 20);
+ printf("\n");
+}
+
+#ifdef TEST
+void
+setup_paging(void)
+{
+ unsigned long i;
+
+ if (((unsigned)pgd & ~PGMASK) != 0)
+ panic("PGD not page aligned");
+ set_cr4(get_cr4() | CR4_PSE);
+ for (i = 0; i < NR_PGD; i++)
+ pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
+ set_cr3((unsigned) pgd);
+ set_cr0(get_cr0() | (CR0_PE|CR0_PG));
+}
+#endif /* TEST */
+
+void
+setup_gdt(void)
+{
+ /* setup task state segment */
+ memset(&tss, 0, sizeof(0));
+ tss.ss0 = DATA_SELECTOR;
+ tss.esp0 = (unsigned) stack_top - 4*4;
+ tss.iomap_base = offsetof(struct tss, iomap);
+
+ /* initialize gdt's tss selector */
+ unsigned long long addr = (unsigned long long) &tss;
+ gdt[TSS_SELECTOR / sizeof(gdt[0])] |=
+ ((addr & 0xFF000000) << (56-24)) |
+ ((addr & 0x00FF0000) << (32-16)) |
+ ((addr & 0x0000FFFF) << (16)) |
+ (sizeof(tss) - 1);
+
+ /* switch to our own gdt and set current tss */
+ __asm__ __volatile__ ("lgdt %0" : : "m" (gdtr));
+ __asm__ __volatile__ ("movl %%eax,%%ds;"
+ "movl %%eax,%%es;"
+ "movl %%eax,%%fs;"
+ "movl %%eax,%%gs;"
+ "movl %%eax,%%ss" : : "a" (DATA_SELECTOR));
+/* XXX 0x10 == CODE_SELECTOR (figure out gnuas) */
+ __asm__ __volatile__ ("ljmp $0x10,$1f; 1:");
+
+ __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_SELECTOR));
+}
+
+void
+set_intr_gate(int i, unsigned handler)
+{
+ unsigned long long addr = handler;
+
+ idt[i] = ((addr & 0xFFFF0000ULL) << 32) | (0x8E00ULL << 32) |
+ (addr & 0xFFFFULL) | (CODE_SELECTOR << 16);
+}
+
+void
+setup_idt(void)
+{
+ int i;
+
+ for (i = 0; i < NR_TRAPS; i++)
+ set_intr_gate(i, trap_handlers[i]);
+ __asm__ __volatile__ ("lidt %0" : : "m" (idtr));
+}
+
+void
+setup_pic(void)
+{
+ /* mask all interrupts */
+ outb(PIC_MASTER + PIC_IMR, 0xFF);
+ outb(PIC_SLAVE + PIC_IMR, 0xFF);
+
+ /* setup master PIC */
+ outb(PIC_MASTER + PIC_CMD, 0x11); /* edge triggered, cascade, ICW4 */
+ outb(PIC_MASTER + PIC_IMR, NR_EXCEPTION_HANDLER);
+ outb(PIC_MASTER + PIC_IMR, 1 << 2); /* slave on channel 2 */
+ outb(PIC_MASTER + PIC_IMR, 0x01);
+
+ /* setup slave PIC */
+ outb(PIC_SLAVE + PIC_CMD, 0x11); /* edge triggered, cascade, ICW4 */
+ outb(PIC_SLAVE + PIC_IMR, NR_EXCEPTION_HANDLER + 8);
+ outb(PIC_SLAVE + PIC_IMR, 0x02); /* slave identity is 2 */
+ outb(PIC_SLAVE + PIC_IMR, 0x01);
+
+ /* enable all interrupts */
+ outb(PIC_MASTER + PIC_IMR, 0);
+ outb(PIC_SLAVE + PIC_IMR, 0);
+}
+
+void
+enter_real_mode(struct regs *regs)
+{
+ /* mask off TSS busy bit */
+ gdt[TSS_SELECTOR / sizeof(gdt[0])] &= ~0x0000020000000000ULL;
+
+ /* start 8086 emulation of BIOS */
+ if (initialize_real_mode) {
+ initialize_real_mode = 0;
+ regs->eflags |= EFLAGS_VM | 0x02;
+ regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
+ regs->cs = 0xF000; /* ROM BIOS POST entry point */
+#ifdef TEST
+ regs->eip = 0xFFE0;
+#else
+ regs->eip = 0xFFF0;
+#endif
+ regs->uesp = 0;
+ regs->uss = 0;
+ printf("Starting emulated 16-bit real-mode: ip=%04x:%04x\n",
+ regs->cs, regs->eip);
+
+ mode = VM86_REAL; /* becomes previous mode */
+ set_mode(regs, VM86_REAL);
+
+ /* this should get us into 16-bit mode */
+ return;
+ } else {
+ /* go from protected to real mode */
+ regs->eflags |= EFLAGS_VM;
+
+ set_mode(regs, VM86_PROTECTED_TO_REAL);
+
+ emulate(regs);
+ }
+}
+
+/*
+ * Setup the environment for VMX assist.
+ * This environment consists of flat segments (code and data),
+ * its own gdt, idt, and tr.
+ */
+void
+setup_ctx(void)
+{
+ struct vmx_assist_context *c = &newctx;
+
+ memset(c, 0, sizeof(*c));
+ c->eip = (unsigned long) switch_to_real_mode;
+ c->esp = (unsigned) stack_top - 4*4;
+ c->eflags = 0x2; /* no interrupts, please */
+
+ /*
+ * Obviously, vmx assist is not running with CR0_PE disabled.
+ * The reason why the vmx assist cr0 has CR0.PE disabled is
+ * that a transtion to CR0.PE causes a world switch. It seems
+ * more natural to enable CR0.PE to cause a world switch to
+ * protected mode rather than disabling it.
+ */
+#ifdef TEST
+ c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
+ c->cr3 = (unsigned long) pgd;
+#else
+ c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
+ c->cr3 = 0;
+#endif
+ c->cr4 = get_cr4();
+
+ c->idtr_limit = sizeof(idt)-1;
+ c->idtr_base = (unsigned long) &idt;
+
+ c->gdtr_limit = sizeof(gdt)-1;
+ c->gdtr_base = (unsigned long) &gdt;
+
+ c->cs_sel = CODE_SELECTOR;
+ c->cs_limit = 0xFFFFFFFF;
+ c->cs_base = 0;
+ c->cs_arbytes.fields.seg_type = 0xb;
+ c->cs_arbytes.fields.s = 1;
+ c->cs_arbytes.fields.dpl = 0;
+ c->cs_arbytes.fields.p = 1;
+ c->cs_arbytes.fields.avl = 0;
+ c->cs_arbytes.fields.default_ops_size = 1;
+ c->cs_arbytes.fields.g = 1;
+
+ c->ds_sel = DATA_SELECTOR;
+ c->ds_limit = 0xFFFFFFFF;
+ c->ds_base = 0;
+ c->ds_arbytes = c->cs_arbytes;
+ c->ds_arbytes.fields.seg_type = 0x3;
+
+ c->es_sel = DATA_SELECTOR;
+ c->es_limit = 0xFFFFFFFF;
+ c->es_base = 0;
+ c->es_arbytes = c->ds_arbytes;
+
+ c->ss_sel = DATA_SELECTOR;
+ c->ss_limit = 0xFFFFFFFF;
+ c->ss_base = 0;
+ c->ss_arbytes = c->ds_arbytes;
+
+ c->fs_sel = DATA_SELECTOR;
+ c->fs_limit = 0xFFFFFFFF;
+ c->fs_base = 0;
+ c->fs_arbytes = c->ds_arbytes;
+
+ c->gs_sel = DATA_SELECTOR;
+ c->gs_limit = 0xFFFFFFFF;
+ c->gs_base = 0;
+ c->gs_arbytes = c->ds_arbytes;
+
+ c->tr_sel = TSS_SELECTOR;
+ c->tr_limit = sizeof(tss) - 1;
+ c->tr_base = (unsigned long) &tss;
+ c->tr_arbytes.fields.seg_type = 0xb; /* 0x9 | 0x2 (busy) */
+ c->tr_arbytes.fields.s = 0;
+ c->tr_arbytes.fields.dpl = 0;
+ c->tr_arbytes.fields.p = 1;
+ c->tr_arbytes.fields.avl = 0;
+ c->tr_arbytes.fields.default_ops_size = 0;
+ c->tr_arbytes.fields.g = 0;
+
+ c->ldtr_sel = 0;
+ c->ldtr_limit = 0;
+ c->ldtr_base = 0;
+ c->ldtr_arbytes = c->ds_arbytes;
+ c->ldtr_arbytes.fields.seg_type = 0x2;
+ c->ldtr_arbytes.fields.s = 0;
+ c->ldtr_arbytes.fields.dpl = 0;
+ c->ldtr_arbytes.fields.p = 1;
+ c->ldtr_arbytes.fields.avl = 0;
+ c->ldtr_arbytes.fields.default_ops_size = 0;
+ c->ldtr_arbytes.fields.g = 0;
+}
+
+/*
+ * Start BIOS by causing a world switch to vmxassist, which causes
+ * VM8086 to be enabled and control is transfered to F000:FFF0.
+ */
+void
+start_bios(void)
+{
+ unsigned long cr0;
+
+ printf("Start BIOS ...\n");
+ initialize_real_mode = 1;
+ cr0 = get_cr0();
+#ifndef TEST
+ set_cr0(cr0 | CR0_PE);
+#endif
+ set_cr0(cr0 & ~CR0_PE);
+ panic("vmxassist returned"); /* "cannot happen" */
+}
+
+int
+main()
+{
+ banner();
+#ifdef TEST
+ setup_paging();
+#endif
+ setup_gdt();
+ setup_idt();
+ setup_ctx();
+ setup_pic();
+ start_bios();
+ return 0;
+}
+
diff --git a/tools/firmware/vmxassist/trap.S b/tools/firmware/vmxassist/trap.S
new file mode 100644
index 0000000000..a469f68fc8
--- /dev/null
+++ b/tools/firmware/vmxassist/trap.S
@@ -0,0 +1,189 @@
+/*
+ * trap.S: Trap and world switch handlers
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "machine.h"
+#include "offsets.h"
+
+
+/*
+ * All processor exception/faults/interrupts end up here.
+ *
+ * On an exception/fault, the processor pushes CS:EIP, SS, ESP and an
+ * optional error code onto the stack. The common_trap routine
+ * below saves the processor context and transfers control to trap()
+ * whose job it is to virtualize and pass on the trap.
+ */
+ .macro TRAP_HANDLER trapno error
+ .text
+ .align 16
+1: .if \error == 0
+ pushl $0 /* dummy error code */
+ .endif
+ pushl $\trapno
+ jmp common_trap
+ .section .rodata
+ .long 1b
+ .text
+ .endm
+
+ .section .rodata
+ .align 4
+ .global trap_handlers
+trap_handlers:
+ TRAP_HANDLER 0, 0 /* divide error */
+ TRAP_HANDLER 1, 0 /* debug */
+ TRAP_HANDLER 2, 0 /* NMI interrupt */
+ TRAP_HANDLER 3, 0 /* breakpoint */
+ TRAP_HANDLER 4, 0 /* overflow */
+ TRAP_HANDLER 5, 0 /* BOUND range exceeded */
+ TRAP_HANDLER 6, 0 /* invalid opcode */
+ TRAP_HANDLER 7, 0 /* device not available */
+ TRAP_HANDLER 8, 1 /* double fault */
+ TRAP_HANDLER 9, 0 /* coprocessor segment overrun */
+ TRAP_HANDLER 10, 1 /* invalid TSS */
+ TRAP_HANDLER 11, 1 /* segment not present */
+ TRAP_HANDLER 12, 1 /* stack-segment fault */
+ TRAP_HANDLER 13, 1 /* general protection */
+ TRAP_HANDLER 14, 1 /* page fault */
+ TRAP_HANDLER 15, 0 /* reserved */
+ TRAP_HANDLER 16, 0 /* FPU floating-point error */
+ TRAP_HANDLER 17, 1 /* alignment check */
+ TRAP_HANDLER 18, 0 /* machine check */
+ TRAP_HANDLER 19, 0 /* SIMD floating-point error */
+ TRAP_HANDLER 20, 0 /* reserved */
+ TRAP_HANDLER 21, 0 /* reserved */
+ TRAP_HANDLER 22, 0 /* reserved */
+ TRAP_HANDLER 23, 0 /* reserved */
+ TRAP_HANDLER 24, 0 /* reserved */
+ TRAP_HANDLER 25, 0 /* reserved */
+ TRAP_HANDLER 26, 0 /* reserved */
+ TRAP_HANDLER 27, 0 /* reserved */
+ TRAP_HANDLER 28, 0 /* reserved */
+ TRAP_HANDLER 29, 0 /* reserved */
+ TRAP_HANDLER 30, 0 /* reserved */
+ TRAP_HANDLER 31, 0 /* reserved */
+ TRAP_HANDLER 32, 0 /* irq 0 */
+ TRAP_HANDLER 33, 0 /* irq 1 */
+ TRAP_HANDLER 34, 0 /* irq 2 */
+ TRAP_HANDLER 35, 0 /* irq 3 */
+ TRAP_HANDLER 36, 0 /* irq 4 */
+ TRAP_HANDLER 37, 0 /* irq 5 */
+ TRAP_HANDLER 38, 0 /* irq 6 */
+ TRAP_HANDLER 39, 0 /* irq 7 */
+ TRAP_HANDLER 40, 0 /* irq 8 */
+ TRAP_HANDLER 41, 0 /* irq 9 */
+ TRAP_HANDLER 42, 0 /* irq 10 */
+ TRAP_HANDLER 43, 0 /* irq 11 */
+ TRAP_HANDLER 44, 0 /* irq 12 */
+ TRAP_HANDLER 45, 0 /* irq 13 */
+ TRAP_HANDLER 46, 0 /* irq 14 */
+ TRAP_HANDLER 47, 0 /* irq 15 */
+
+ .text
+ .align 16
+common_trap: /* common trap handler */
+ pushl %gs
+ pushl %fs
+ pushl %ds
+ pushl %es
+ pushal
+
+ movl $DATA_SELECTOR, %eax /* make sure these are sane */
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %fs
+ movl %eax, %gs
+ movl %esp, %ebp
+
+ pushl %ebp
+ pushl 52(%ebp)
+ pushl 48(%ebp)
+ call trap /* trap(trapno, errno, regs) */
+ addl $12, %esp
+
+trap_return:
+ popal
+ popl %es
+ popl %ds
+ popl %fs
+ popl %gs
+ addl $8, %esp /* skip trapno, errno */
+ iret
+ /* NOT REACHED */
+
+
+/*
+ * A world switch to real mode occured. The hypervisor saved the
+ * executing context into "oldctx" and instantiated "newctx", which
+ * gets us here. Here we push a stack frame that is compatible with
+ * a trap frame (see above) so that we can handle this event as a
+ * regular trap.
+ */
+ .text
+ .align 16
+ .globl switch_to_real_mode
+switch_to_real_mode:
+ pushl oldctx+VMX_ASSIST_CTX_GS_SEL /* 16 to 32-bit transition */
+ pushl oldctx+VMX_ASSIST_CTX_FS_SEL
+ pushl oldctx+VMX_ASSIST_CTX_DS_SEL
+ pushl oldctx+VMX_ASSIST_CTX_ES_SEL
+ pushl oldctx+VMX_ASSIST_CTX_SS_SEL
+ pushl oldctx+VMX_ASSIST_CTX_ESP
+ pushl oldctx+VMX_ASSIST_CTX_EFLAGS
+ pushl oldctx+VMX_ASSIST_CTX_CS_SEL
+ pushl oldctx+VMX_ASSIST_CTX_EIP
+ pushl $-1 /* trapno, errno */
+ pushl $-1
+ pushl %gs
+ pushl %fs
+ pushl %ds
+ pushl %es
+ pushal
+
+ movl %esp, %ebp
+ pushl %ebp
+ call enter_real_mode
+ addl $4, %esp
+
+ jmp trap_return
+ /* NOT REACHED */
+
+
+/*
+ * Switch to protected mode. At this point all the registers have
+ * been reloaded by trap_return and all we have to do is cause a
+ * world switch by turning on CR0.PE.
+ */
+ .text
+ .align 16
+ .globl switch_to_protected_mode
+switch_to_protected_mode:
+ movl oldctx+VMX_ASSIST_CTX_CR0, %esp
+ movl %esp, %cr0 /* actual world switch ! */
+
+ /* NOT REACHED */
+ pushl $switch_failed
+ call panic
+ jmp .
+
+ .data
+ .align 4
+switch_failed:
+ .asciz "World switch to protected mode failed\n"
+
diff --git a/tools/firmware/vmxassist/util.c b/tools/firmware/vmxassist/util.c
new file mode 100644
index 0000000000..53b6addd35
--- /dev/null
+++ b/tools/firmware/vmxassist/util.c
@@ -0,0 +1,364 @@
+/*
+ * util.c: Commonly used utility functions.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <stdarg.h>
+#include <public/vmx_assist.h>
+
+#include "util.h"
+#include "machine.h"
+
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static void putchar(int);
+static char *printnum(char *, unsigned long, int);
+static void _doprint(void (*)(int), char const *, va_list);
+
+
+void
+dump_regs(struct regs *regs)
+{
+ printf("eax %8x ecx %8x edx %8x ebx %8x\n",
+ regs->eax, regs->ecx, regs->edx, regs->ebx);
+ printf("esp %8x ebp %8x esi %8x edi %8x\n",
+ regs->esp, regs->ebp, regs->esi, regs->edi);
+ printf("eip %8x eflags %8x cs %8x ds %8x\n",
+ regs->eip, regs->eflags, regs->cs, regs->ds);
+ printf("es %8x fs %8x uss %8x uesp %8x\n",
+ regs->es, regs->fs, regs->uss, regs->uesp);
+ printf("ves %8x vds %8x vfs %8x vgs %8x\n",
+ regs->ves, regs->vds, regs->vfs, regs->vgs);
+ if (regs->trapno != -1 || regs->errno != -1)
+ printf("trapno %8x errno %8x\n", regs->trapno, regs->errno);
+
+ printf("cr0 %8lx cr2 %8x cr3 %8lx cr4 %8lx\n",
+ oldctx.cr0, get_cr2(), oldctx.cr3, oldctx.cr4);
+}
+
+#ifdef DEBUG
+void
+hexdump(unsigned char *data, int sz)
+{
+ unsigned char *d;
+ int i;
+
+ for (d = data; sz > 0; d += 16, sz -= 16) {
+ int n = sz > 16 ? 16 : sz;
+
+ printf("%08x: ", (unsigned)d);
+ for (i = 0; i < n; i++)
+ printf("%02x%c", d[i], i == 7 ? '-' : ' ');
+ for (; i < 16; i++)
+ printf(" %c", i == 7 ? '-' : ' ');
+ printf(" ");
+ for (i = 0; i < n; i++)
+ printf("%c", d[i] >= ' ' && d[i] <= '~' ? d[i] : '.');
+ printf("\n");
+ }
+}
+
+void
+dump_dtr(unsigned long base, unsigned long limit)
+{
+ unsigned long long entry;
+ int i;
+
+ for (i = 0; i < limit; i += 8) {
+ entry = ((unsigned long long *) base)[i >> 3];
+ printf("[0x%x] = 0x%08x%08x\n", i,
+ (unsigned)(entry >> 32), (unsigned)(entry));
+ }
+}
+
+void
+dump_vmx_context(struct vmx_assist_context *c)
+{
+ printf("eip 0x%lx, esp 0x%lx, eflags 0x%lx\n",
+ c->eip, c->esp, c->eflags);
+
+ printf("cr0 0x%lx, cr3 0x%lx, cr4 0x%lx\n", c->cr0, c->cr3, c->cr4);
+
+ printf("idtr: limit 0x%lx, base 0x%lx\n",
+ c->idtr_limit, c->idtr_base);
+
+ printf("gdtr: limit 0x%lx, base 0x%lx\n",
+ c->gdtr_limit, c->gdtr_base);
+
+ printf("cs: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->cs_sel, c->cs_limit, c->cs_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->cs_arbytes.fields.seg_type,
+ c->cs_arbytes.fields.s,
+ c->cs_arbytes.fields.dpl,
+ c->cs_arbytes.fields.p,
+ c->cs_arbytes.fields.avl,
+ c->cs_arbytes.fields.default_ops_size,
+ c->cs_arbytes.fields.g,
+ c->cs_arbytes.fields.null_bit);
+
+ printf("ds: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->ds_sel, c->ds_limit, c->ds_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->ds_arbytes.fields.seg_type,
+ c->ds_arbytes.fields.s,
+ c->ds_arbytes.fields.dpl,
+ c->ds_arbytes.fields.p,
+ c->ds_arbytes.fields.avl,
+ c->ds_arbytes.fields.default_ops_size,
+ c->ds_arbytes.fields.g,
+ c->ds_arbytes.fields.null_bit);
+
+ printf("es: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->es_sel, c->es_limit, c->es_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->es_arbytes.fields.seg_type,
+ c->es_arbytes.fields.s,
+ c->es_arbytes.fields.dpl,
+ c->es_arbytes.fields.p,
+ c->es_arbytes.fields.avl,
+ c->es_arbytes.fields.default_ops_size,
+ c->es_arbytes.fields.g,
+ c->es_arbytes.fields.null_bit);
+
+ printf("ss: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->ss_sel, c->ss_limit, c->ss_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->ss_arbytes.fields.seg_type,
+ c->ss_arbytes.fields.s,
+ c->ss_arbytes.fields.dpl,
+ c->ss_arbytes.fields.p,
+ c->ss_arbytes.fields.avl,
+ c->ss_arbytes.fields.default_ops_size,
+ c->ss_arbytes.fields.g,
+ c->ss_arbytes.fields.null_bit);
+
+ printf("fs: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->fs_sel, c->fs_limit, c->fs_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->fs_arbytes.fields.seg_type,
+ c->fs_arbytes.fields.s,
+ c->fs_arbytes.fields.dpl,
+ c->fs_arbytes.fields.p,
+ c->fs_arbytes.fields.avl,
+ c->fs_arbytes.fields.default_ops_size,
+ c->fs_arbytes.fields.g,
+ c->fs_arbytes.fields.null_bit);
+
+ printf("gs: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->gs_sel, c->gs_limit, c->gs_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->gs_arbytes.fields.seg_type,
+ c->gs_arbytes.fields.s,
+ c->gs_arbytes.fields.dpl,
+ c->gs_arbytes.fields.p,
+ c->gs_arbytes.fields.avl,
+ c->gs_arbytes.fields.default_ops_size,
+ c->gs_arbytes.fields.g,
+ c->gs_arbytes.fields.null_bit);
+
+ printf("tr: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->tr_sel, c->tr_limit, c->tr_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->tr_arbytes.fields.seg_type,
+ c->tr_arbytes.fields.s,
+ c->tr_arbytes.fields.dpl,
+ c->tr_arbytes.fields.p,
+ c->tr_arbytes.fields.avl,
+ c->tr_arbytes.fields.default_ops_size,
+ c->tr_arbytes.fields.g,
+ c->tr_arbytes.fields.null_bit);
+
+ printf("ldtr: sel 0x%lx, limit 0x%lx, base 0x%lx\n",
+ c->ldtr_sel, c->ldtr_limit, c->ldtr_base);
+ printf("\ttype %d, s %d, dpl %d, p %d, avl %d, ops %d, g %d, nul %d\n",
+ c->ldtr_arbytes.fields.seg_type,
+ c->ldtr_arbytes.fields.s,
+ c->ldtr_arbytes.fields.dpl,
+ c->ldtr_arbytes.fields.p,
+ c->ldtr_arbytes.fields.avl,
+ c->ldtr_arbytes.fields.default_ops_size,
+ c->ldtr_arbytes.fields.g,
+ c->ldtr_arbytes.fields.null_bit);
+
+ printf("GDTR <0x%lx,0x%lx>:\n", c->gdtr_base, c->gdtr_limit);
+ dump_dtr(c->gdtr_base, c->gdtr_limit);
+}
+#endif /* DEBUG */
+
+/*
+ * Lightweight printf that doesn't drag in everything under the sun.
+ */
+int
+printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _doprint(putchar, fmt, ap);
+ va_end(ap);
+ return 0; /* for gcc compat */
+}
+
+int
+vprintf(const char *fmt, va_list ap)
+{
+ _doprint(putchar, fmt, ap);
+ return 0; /* for gcc compat */
+}
+
+void
+panic(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _doprint(putchar, fmt, ap);
+ putchar('\n');
+ va_end(ap);
+ halt();
+}
+
+unsigned
+strlen(const char *s)
+{
+ const char *q = s;
+
+ while (*s++)
+ /* void */;
+ return s - q - 1;
+}
+
+static void
+putchar(int ch)
+{
+ outb(0xE9, ch);
+}
+
+/*
+ * A stripped down version of doprint,
+ * but still powerful enough for most tasks.
+ */
+static void
+_doprint(void (*put)(int), char const *fmt, va_list ap)
+{
+ register char *str, c;
+ int lflag, zflag, nflag;
+ char buffer[17];
+ unsigned value;
+ int i, slen, pad;
+
+ for ( ; *fmt != '\0'; fmt++) {
+ pad = zflag = nflag = lflag = 0;
+ if (*fmt == '%') {
+ c = *++fmt;
+ if (c == '-' || isdigit(c)) {
+ if (c == '-') {
+ nflag = 1;
+ c = *++fmt;
+ }
+ zflag = c == '0';
+ for (pad = 0; isdigit(c); c = *++fmt)
+ pad = (pad * 10) + c - '0';
+ }
+ if (c == 'l') { /* long extension */
+ lflag = 1;
+ c = *++fmt;
+ }
+ if (c == 'd' || c == 'u' || c == 'o' || c == 'x') {
+ if (lflag)
+ value = va_arg(ap, unsigned);
+ else
+ value = (unsigned) va_arg(ap, unsigned int);
+ str = buffer;
+ printnum(str, value,
+ c == 'o' ? 8 : (c == 'x' ? 16 : 10));
+ goto printn;
+ } else if (c == 'O' || c == 'D' || c == 'X') {
+ value = va_arg(ap, unsigned);
+ str = buffer;
+ printnum(str, value,
+ c == 'O' ? 8 : (c == 'X' ? 16 : 10));
+ printn:
+ slen = strlen(str);
+ for (i = pad - slen; i > 0; i--)
+ put(zflag ? '0' : ' ');
+ while (*str) put(*str++);
+ } else if (c == 's') {
+ str = va_arg(ap, char *);
+ slen = strlen(str);
+ if (nflag == 0)
+ for (i = pad - slen; i > 0; i--) put(' ');
+ while (*str) put(*str++);
+ if (nflag)
+ for (i = pad - slen; i > 0; i--) put(' ');
+ } else if (c == 'c')
+ put(va_arg(ap, int));
+ else
+ put(*fmt);
+ } else
+ put(*fmt);
+ }
+}
+
+static char *
+printnum(char *p, unsigned long num, int base)
+{
+ unsigned long n;
+
+ if ((n = num/base) > 0)
+ p = printnum(p, n, base);
+ *p++ = "0123456789ABCDEF"[(int)(num % base)];
+ *p = '\0';
+ return p;
+}
+
+void *
+memset(void *s, int c, unsigned n)
+{
+ int t0, t1;
+
+ __asm__ __volatile__ ("cld; rep; stosb"
+ : "=&c" (t0), "=&D" (t1)
+ : "a" (c), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
+void *
+memcpy(void *dest, const void *src, unsigned n)
+{
+ int t0, t1, t2;
+
+ __asm__ __volatile__(
+ "cld\n"
+ "rep; movsl\n"
+ "testb $2,%b4\n"
+ "je 1f\n"
+ "movsw\n"
+ "1: testb $1,%b4\n"
+ "je 2f\n"
+ "movsb\n"
+ "2:"
+ : "=&c" (t0), "=&D" (t1), "=&S" (t2)
+ : "0" (n/4), "q" (n), "1" ((long) dest), "2" ((long) src)
+ : "memory"
+ );
+ return dest;
+}
+
diff --git a/tools/firmware/vmxassist/util.h b/tools/firmware/vmxassist/util.h
new file mode 100644
index 0000000000..06e030d571
--- /dev/null
+++ b/tools/firmware/vmxassist/util.h
@@ -0,0 +1,41 @@
+/*
+ * util.h: Useful utility functions.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef __UTIL_H__
+#define __UTIL_H__
+
+#include <stdarg.h>
+#include <vm86.h>
+
+#define offsetof(type, member) ((unsigned) &((type *)0)->member)
+
+struct vmx_assist_context;
+
+extern void hexdump(unsigned char *, int);
+extern void dump_regs(struct regs *);
+extern void dump_vmx_context(struct vmx_assist_context *);
+extern void dump_dtr(unsigned long, unsigned long);
+extern void *memcpy(void *, const void *, unsigned);
+extern void *memset(void *, int, unsigned);
+extern int printf(const char *fmt, ...);
+extern int vprintf(const char *fmt, va_list ap);
+extern void panic(const char *format, ...);
+extern void halt(void);
+
+#endif /* __UTIL_H__ */
diff --git a/tools/firmware/vmxassist/vm86.c b/tools/firmware/vmxassist/vm86.c
new file mode 100644
index 0000000000..d63843660e
--- /dev/null
+++ b/tools/firmware/vmxassist/vm86.c
@@ -0,0 +1,956 @@
+/*
+ * vm86.c: A vm86 emulator. The main purpose of this emulator is to do as
+ * little work as possible.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "vm86.h"
+#include "util.h"
+#include "machine.h"
+
+#define HIGHMEM (1 << 20) /* 1MB */
+#define MASK16(v) ((v) & 0xFFFF)
+
+#define DATA32 0x0001
+#define ADDR32 0x0002
+#define SEG_CS 0x0004
+#define SEG_DS 0x0008
+#define SEG_ES 0x0010
+#define SEG_SS 0x0020
+#define SEG_FS 0x0040
+#define SEG_GS 0x0080
+
+unsigned prev_eip = 0;
+enum vm86_mode mode;
+
+#ifdef DEBUG
+int traceset = 0;
+#endif /* DEBUG */
+
+
+unsigned
+address(struct regs *regs, unsigned seg, unsigned off)
+{
+ unsigned long long entry;
+ unsigned addr;
+
+ /* real mode: segment is part of the address */
+ if (mode == VM86_REAL || mode == VM86_REAL_TO_PROTECTED)
+ return ((seg & 0xFFFF) << 4) + off;
+
+ /* protected mode: use seg as index into gdt */
+ if (seg > oldctx.gdtr_limit) {
+ printf("address: Invalid segment descriptor (0x%x)\n", seg);
+ return 0;
+ }
+
+ entry = ((unsigned long long *) oldctx.gdtr_base)[seg >> 3];
+ addr = (((entry >> (56-24)) & 0xFF000000) |
+ ((entry >> (32-16)) & 0x00FF0000) |
+ ((entry >> ( 16)) & 0x0000FFFF)) + off;
+ return addr;
+}
+
+#ifdef DEBUG
+void
+trace(struct regs *regs, int adjust, char *fmt, ...)
+{
+ unsigned off = regs->eip - adjust;
+ va_list ap;
+
+ if ((traceset & (1 << mode)) &&
+ (mode == VM86_REAL_TO_PROTECTED || mode == VM86_REAL)) {
+ /* 16-bit, seg:off addressing */
+ unsigned addr = address(regs, regs->cs, off);
+ printf("0x%08x: 0x%x:0x%04x ", addr, regs->cs, off);
+ printf("(%d) ", mode);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ printf("\n");
+ }
+ if ((traceset & (1 << mode)) &&
+ (mode == VM86_PROTECTED_TO_REAL || mode == VM86_PROTECTED)) {
+ /* 16-bit, gdt addressing */
+ unsigned addr = address(regs, regs->cs, off);
+ printf("0x%08x: 0x%x:0x%08x ", addr, regs->cs, off);
+ printf("(%d) ", mode);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ printf("\n");
+ }
+}
+#endif /* DEBUG */
+
+static inline unsigned
+read32(unsigned addr)
+{
+ return *(unsigned long *) addr;
+}
+
+static inline unsigned
+read16(unsigned addr)
+{
+ return *(unsigned short *) addr;
+}
+
+static inline unsigned
+read8(unsigned addr)
+{
+ return *(unsigned char *) addr;
+}
+
+static inline void
+write32(unsigned addr, unsigned value)
+{
+ *(unsigned long *) addr = value;
+}
+
+static inline void
+write16(unsigned addr, unsigned value)
+{
+ *(unsigned short *) addr = value;
+}
+
+static inline void
+write8(unsigned addr, unsigned value)
+{
+ *(unsigned char *) addr = value;
+}
+
+static inline void
+push32(struct regs *regs, unsigned value)
+{
+ regs->uesp -= 4;
+ write32(address(regs, regs->uss, MASK16(regs->uesp)), value);
+}
+
+static inline void
+push16(struct regs *regs, unsigned value)
+{
+ regs->uesp -= 2;
+ write16(address(regs, regs->uss, MASK16(regs->uesp)), value);
+}
+
+static inline unsigned
+pop32(struct regs *regs)
+{
+ unsigned value = read32(address(regs, regs->uss, MASK16(regs->uesp)));
+ regs->uesp += 4;
+ return value;
+}
+
+static inline unsigned
+pop16(struct regs *regs)
+{
+ unsigned value = read16(address(regs, regs->uss, MASK16(regs->uesp)));
+ regs->uesp += 2;
+ return value;
+}
+
+static inline unsigned
+fetch32(struct regs *regs)
+{
+ unsigned addr = address(regs, regs->cs, MASK16(regs->eip));
+
+ regs->eip += 4;
+ return read32(addr);
+}
+
+static inline unsigned
+fetch16(struct regs *regs)
+{
+ unsigned addr = address(regs, regs->cs, MASK16(regs->eip));
+
+ regs->eip += 2;
+ return read16(addr);
+}
+
+static inline unsigned
+fetch8(struct regs *regs)
+{
+ unsigned addr = address(regs, regs->cs, MASK16(regs->eip));
+
+ regs->eip++;
+ return read8(addr);
+}
+
+unsigned
+getreg(struct regs *regs, int r)
+{
+ switch (r & 7) {
+ case 0: return regs->eax;
+ case 1: return regs->ecx;
+ case 2: return regs->edx;
+ case 3: return regs->ebx;
+ case 4: return regs->esp;
+ case 5: return regs->ebp;
+ case 6: return regs->esi;
+ case 7: return regs->edi;
+ }
+ return ~0;
+}
+
+void
+setreg(struct regs *regs, int r, unsigned v)
+{
+ switch (r & 7) {
+ case 0: regs->eax = v; break;
+ case 1: regs->ecx = v; break;
+ case 2: regs->edx = v; break;
+ case 3: regs->ebx = v; break;
+ case 4: regs->esp = v; break;
+ case 5: regs->ebp = v; break;
+ case 6: regs->esi = v; break;
+ case 7: regs->edi = v; break;
+ }
+}
+
+/*
+ * Operand (modrm) decode
+ */
+unsigned
+operand(unsigned prefix, struct regs *regs, unsigned modrm)
+{
+ int mod, disp = 0, seg;
+
+ seg = regs->vds;
+ if (prefix & SEG_ES)
+ seg = regs->ves;
+ if (prefix & SEG_DS)
+ seg = regs->vds;
+ if (prefix & SEG_CS)
+ seg = regs->cs;
+ if (prefix & SEG_SS)
+ seg = regs->uss;
+ if (prefix & SEG_FS)
+ seg = regs->fs;
+ if (prefix & SEG_GS)
+ seg = regs->gs;
+
+ if (prefix & ADDR32) { /* 32-bit addressing */
+ switch ((mod = (modrm >> 6) & 3)) {
+ case 0:
+ switch (modrm & 7) {
+ case 0: return address(regs, seg, regs->eax);
+ case 1: return address(regs, seg, regs->ecx);
+ case 2: return address(regs, seg, regs->edx);
+ case 3: return address(regs, seg, regs->ebx);
+ case 4: panic("No SIB decode (yet)");
+ case 5: return address(regs, seg, fetch32(regs));
+ case 6: return address(regs, seg, regs->esi);
+ case 7: return address(regs, seg, regs->edi);
+ }
+ break;
+ case 1:
+ case 2:
+ if ((modrm & 7) != 4) {
+ if (mod == 1)
+ disp = (char) fetch8(regs);
+ else
+ disp = (int) fetch32(regs);
+ }
+ switch (modrm & 7) {
+ case 0: return address(regs, seg, regs->eax + disp);
+ case 1: return address(regs, seg, regs->ecx + disp);
+ case 2: return address(regs, seg, regs->edx + disp);
+ case 3: return address(regs, seg, regs->ebx + disp);
+ case 4: panic("No SIB decode (yet)");
+ case 5: return address(regs, seg, regs->ebp + disp);
+ case 6: return address(regs, seg, regs->esi + disp);
+ case 7: return address(regs, seg, regs->edi + disp);
+ }
+ break;
+ case 3:
+ return getreg(regs, modrm);
+ }
+ } else { /* 16-bit addressing */
+ switch ((mod = (modrm >> 6) & 3)) {
+ case 0:
+ switch (modrm & 7) {
+ case 0: return address(regs, seg, MASK16(regs->ebx) +
+ MASK16(regs->esi));
+ case 1: return address(regs, seg, MASK16(regs->ebx) +
+ MASK16(regs->edi));
+ case 2: return address(regs, seg, MASK16(regs->ebp) +
+ MASK16(regs->esi));
+ case 3: return address(regs, seg, MASK16(regs->ebp) +
+ MASK16(regs->edi));
+ case 4: return address(regs, seg, MASK16(regs->esi));
+ case 5: return address(regs, seg, MASK16(regs->edi));
+ case 6: return address(regs, seg, fetch16(regs));
+ case 7: return address(regs, seg, MASK16(regs->ebx));
+ }
+ break;
+ case 1:
+ case 2:
+ if (mod == 1)
+ disp = (char) fetch8(regs);
+ else
+ disp = (int) fetch16(regs);
+ switch (modrm & 7) {
+ case 0: return address(regs, seg, MASK16(regs->ebx) +
+ MASK16(regs->esi) + disp);
+ case 1: return address(regs, seg, MASK16(regs->ebx) +
+ MASK16(regs->edi) + disp);
+ case 2: return address(regs, seg, MASK16(regs->ebp) +
+ MASK16(regs->esi) + disp);
+ case 3: return address(regs, seg, MASK16(regs->ebp) +
+ MASK16(regs->edi) + disp);
+ case 4: return address(regs, seg,
+ MASK16(regs->esi) + disp);
+ case 5: return address(regs, seg,
+ MASK16(regs->edi) + disp);
+ case 6: return address(regs, seg,
+ MASK16(regs->ebp) + disp);
+ case 7: return address(regs, seg,
+ MASK16(regs->ebx) + disp);
+ }
+ break;
+ case 3:
+ return MASK16(getreg(regs, modrm));
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Load new IDT
+ */
+int
+lidt(struct regs *regs, unsigned prefix, unsigned modrm)
+{
+ unsigned eip = regs->eip - 3;
+ unsigned addr = operand(prefix, regs, modrm);
+
+ oldctx.idtr_limit = ((struct dtr *) addr)->size;
+ if ((prefix & DATA32) == 0)
+ oldctx.idtr_base = ((struct dtr *) addr)->base & 0xFFFFFF;
+ else
+ oldctx.idtr_base = ((struct dtr *) addr)->base;
+ TRACE((regs, regs->eip - eip, "lidt 0x%x <%d, 0x%x>",
+ addr, oldctx.idtr_limit, oldctx.idtr_base));
+
+ return 1;
+}
+
+/*
+ * Load new GDT
+ */
+int
+lgdt(struct regs *regs, unsigned prefix, unsigned modrm)
+{
+ unsigned eip = regs->eip - 3;
+ unsigned addr = operand(prefix, regs, modrm);
+
+ oldctx.gdtr_limit = ((struct dtr *) addr)->size;
+ if ((prefix & DATA32) == 0)
+ oldctx.gdtr_base = ((struct dtr *) addr)->base & 0xFFFFFF;
+ else
+ oldctx.gdtr_base = ((struct dtr *) addr)->base;
+ TRACE((regs, regs->eip - eip, "lgdt 0x%x <%d, 0x%x>",
+ addr, oldctx.gdtr_limit, oldctx.gdtr_base));
+
+ return 1;
+}
+
+/*
+ * Modify CR0 either through an lmsw instruction.
+ */
+int
+lmsw(struct regs *regs, unsigned prefix, unsigned modrm)
+{
+ unsigned eip = regs->eip - 3;
+ unsigned ax = operand(prefix, regs, modrm) & 0xF;
+ unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
+
+ TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
+#ifndef TEST
+ oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
+#else
+ oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
+#endif
+ if (cr0 & CR0_PE)
+ set_mode(regs, VM86_REAL_TO_PROTECTED);
+
+ return 1;
+}
+
+/*
+ * Move to and from a control register.
+ */
+int
+movcr(struct regs *regs, unsigned prefix, unsigned opc)
+{
+ unsigned eip = regs->eip - 2;
+ unsigned modrm = fetch8(regs);
+ unsigned cr = (modrm >> 3) & 7;
+
+ if ((modrm & 0xC0) != 0xC0) /* only registers */
+ return 0;
+
+ switch (opc) {
+ case 0x20: /* mov Rd, Cd */
+ TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
+ switch (cr) {
+ case 0:
+#ifndef TEST
+ setreg(regs, modrm,
+ oldctx.cr0 & ~(CR0_PE | CR0_NE));
+#else
+ setreg(regs, modrm,
+ oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
+#endif
+ break;
+ case 2:
+ setreg(regs, modrm, get_cr2());
+ break;
+ case 3:
+ setreg(regs, modrm, oldctx.cr3);
+ break;
+ case 4:
+ setreg(regs, modrm, oldctx.cr4);
+ break;
+ }
+ break;
+ case 0x22: /* mov Cd, Rd */
+ TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d", cr));
+ switch (cr) {
+ case 0:
+ oldctx.cr0 = getreg(regs, modrm) | (CR0_PE | CR0_NE);
+#ifdef TEST
+ oldctx.cr0 |= CR0_PG;
+#endif
+ if (getreg(regs, modrm) & CR0_PE)
+ set_mode(regs, VM86_REAL_TO_PROTECTED);
+
+ break;
+ case 3:
+ oldctx.cr3 = getreg(regs, modrm);
+ break;
+ case 4:
+ oldctx.cr4 = getreg(regs, modrm);
+ break;
+ }
+ break;
+ }
+
+ return 1;
+}
+
+/*
+ * Emulate a segment load in protected mode
+ */
+int
+load_seg(unsigned long sel, unsigned long *base, unsigned long *limit,
+ union vmcs_arbytes *arbytes)
+{
+ unsigned long long entry;
+
+ /* protected mode: use seg as index into gdt */
+ if (sel == 0 || sel > oldctx.gdtr_limit)
+ return 0;
+
+ entry = ((unsigned long long *) oldctx.gdtr_base)[sel >> 3];
+ *base = (((entry >> (56-24)) & 0xFF000000) |
+ ((entry >> (32-16)) & 0x00FF0000) |
+ ((entry >> ( 16)) & 0x0000FFFF));
+ *limit = (((entry >> (48-16)) & 0x000F0000) |
+ ((entry ) & 0x0000FFFF));
+
+ arbytes->bytes = 0;
+ arbytes->fields.seg_type = (entry >> (8+32)) & 0xF; /* TYPE */
+ arbytes->fields.s = (entry >> (12+32)) & 0x1; /* S */
+ if (arbytes->fields.s)
+ arbytes->fields.seg_type |= 1; /* accessed */
+ arbytes->fields.dpl = (entry >> (13+32)) & 0x3; /* DPL */
+ arbytes->fields.p = (entry >> (15+32)) & 0x1; /* P */
+ arbytes->fields.avl = (entry >> (20+32)) & 0x1; /* AVL */
+ arbytes->fields.default_ops_size = (entry >> (22+32)) & 0x1; /* D */
+
+ if (entry & (1ULL << (23+32))) { /* G */
+ arbytes->fields.g = 1;
+ *limit = (*limit << 12) | 0xFFF;
+ }
+
+ return 1;
+}
+
+/*
+ * Transition to protected mode
+ */
+void
+protected_mode(struct regs *regs)
+{
+ regs->eflags &= ~(EFLAGS_TF|EFLAGS_VM);
+
+ oldctx.eip = regs->eip;
+ oldctx.esp = regs->uesp;
+ oldctx.eflags = regs->eflags;
+
+ /* reload all segment registers */
+ if (!load_seg(regs->cs, &oldctx.cs_base,
+ &oldctx.cs_limit, &oldctx.cs_arbytes))
+ panic("Invalid %%cs=0x%x for protected mode\n", regs->cs);
+ oldctx.cs_sel = regs->cs;
+
+ if (load_seg(regs->ves, &oldctx.es_base,
+ &oldctx.es_limit, &oldctx.es_arbytes))
+ oldctx.es_sel = regs->ves;
+
+ if (load_seg(regs->uss, &oldctx.ss_base,
+ &oldctx.ss_limit, &oldctx.ss_arbytes))
+ oldctx.ss_sel = regs->uss;
+
+ if (load_seg(regs->vds, &oldctx.ds_base,
+ &oldctx.ds_limit, &oldctx.ds_arbytes))
+ oldctx.ds_sel = regs->vds;
+
+ if (load_seg(regs->vfs, &oldctx.fs_base,
+ &oldctx.fs_limit, &oldctx.fs_arbytes))
+ oldctx.fs_sel = regs->vfs;
+
+ if (load_seg(regs->vgs, &oldctx.gs_base,
+ &oldctx.gs_limit, &oldctx.gs_arbytes))
+ oldctx.gs_sel = regs->vgs;
+
+ /* initialize jump environment to warp back to protected mode */
+ regs->cs = CODE_SELECTOR;
+ regs->ds = DATA_SELECTOR;
+ regs->es = DATA_SELECTOR;
+ regs->fs = DATA_SELECTOR;
+ regs->gs = DATA_SELECTOR;
+ regs->eip = (unsigned) &switch_to_protected_mode;
+
+ /* this should get us into 32-bit mode */
+}
+
+/*
+ * Start real-mode emulation
+ */
+void
+real_mode(struct regs *regs)
+{
+ regs->eflags |= EFLAGS_VM | 0x02;
+ regs->ds = DATA_SELECTOR;
+ regs->es = DATA_SELECTOR;
+ regs->fs = DATA_SELECTOR;
+ regs->gs = DATA_SELECTOR;
+
+ /*
+ * When we transition from protected to real-mode and we
+ * have not reloaded the segment descriptors yet, they are
+ * interpreted as if they were in protect mode.
+ * We emulate this behavior by assuming that these memory
+ * reference are below 1MB and set %ss, %ds, %es accordingly.
+ */
+ if (regs->uss != 0) {
+ if (regs->uss >= HIGHMEM)
+ panic("%%ss 0x%lx higher than 1MB", regs->uss);
+ regs->uss = address(regs, regs->uss, 0) >> 4;
+ }
+ if (regs->vds != 0) {
+ if (regs->vds >= HIGHMEM)
+ panic("%%ds 0x%lx higher than 1MB", regs->vds);
+ regs->vds = address(regs, regs->vds, 0) >> 4;
+ }
+ if (regs->ves != 0) {
+ if (regs->ves >= HIGHMEM)
+ panic("%%es 0x%lx higher than 1MB", regs->ves);
+ regs->ves = address(regs, regs->ves, 0) >> 4;
+ }
+
+ /* this should get us into 16-bit mode */
+}
+
+/*
+ * This is the smarts of the emulator and handles the mode transitions. The
+ * emulator handles 4 different modes. 1) VM86_REAL: emulated real-mode, Just
+ * handle those instructions that are not supported under VM8086.
+ * 2) VM86_REAL_TO_PROTECTED: going from real-mode to protected mode. In this
+ * we single step through the instructions until we reload the new %cs (some
+ * OSes do a lot of computations before reloading %cs). 2) VM86_PROTECTED_TO_REAL
+ * when we are going from protected to real mode. In this case we emulate the
+ * instructions by hand. Finally, 4) VM86_PROTECTED when we transitioned to
+ * protected mode and we should abandon the emulator. No instructions are
+ * emulated when in VM86_PROTECTED mode.
+ */
+void
+set_mode(struct regs *regs, enum vm86_mode newmode)
+{
+ switch (newmode) {
+ case VM86_REAL:
+ TRACE((regs, 0, "<VM86_REAL>"));
+ if (mode == VM86_PROTECTED_TO_REAL) {
+ real_mode(regs);
+ break;
+ } else if (mode == VM86_REAL) {
+ break;
+ } else
+ panic("unexpected real mode transition");
+ break;
+
+ case VM86_REAL_TO_PROTECTED:
+ TRACE((regs, 0, "<VM86_REAL_TO_PROTECTED>"));
+ if (mode == VM86_REAL) {
+ regs->eflags |= EFLAGS_TF;
+ break;
+ } else if (mode == VM86_REAL_TO_PROTECTED) {
+ break;
+ } else
+ panic("unexpected real-to-protected mode transition");
+ break;
+
+ case VM86_PROTECTED_TO_REAL:
+ if (mode == VM86_PROTECTED)
+ break;
+ else
+ panic("unexpected protected-to-real mode transition");
+
+ case VM86_PROTECTED:
+ TRACE((regs, 0, "<VM86_PROTECTED>"));
+ if (mode == VM86_REAL_TO_PROTECTED) {
+ protected_mode(regs);
+ break;
+ } else
+ panic("unexpected protected mode transition");
+ break;
+ }
+
+ mode = newmode;
+}
+
+void
+jmpl(struct regs *regs, int prefix)
+{
+ unsigned n = regs->eip;
+ unsigned cs, eip;
+
+ if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
+ eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
+ cs = fetch16(regs);
+
+ TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_PROTECTED);
+ } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
+ eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs);
+ cs = fetch16(regs);
+
+ TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip));
+
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_REAL);
+ } else
+ panic("jmpl");
+}
+
+void
+retl(struct regs *regs, int prefix)
+{
+ unsigned cs, eip;
+
+ if (prefix & DATA32) {
+ eip = pop32(regs);
+ cs = MASK16(pop32(regs));
+ } else {
+ eip = pop16(regs);
+ cs = pop16(regs);
+ }
+
+ TRACE((regs, 1, "retl (to 0x%x:0x%x)", cs, eip));
+
+ if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_PROTECTED);
+ } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */
+ regs->cs = cs;
+ regs->eip = eip;
+ set_mode(regs, VM86_REAL);
+ } else
+ panic("retl");
+}
+
+void
+interrupt(struct regs *regs, int n)
+{
+ TRACE((regs, 0, "external interrupt %d", n));
+ push16(regs, regs->eflags);
+ push16(regs, regs->cs);
+ push16(regs, regs->eip);
+ regs->eflags &= ~EFLAGS_IF;
+ regs->eip = read16(address(regs, 0, n * 4));
+ regs->cs = read16(address(regs, 0, n * 4 + 2));
+}
+
+enum { OPC_INVALID, OPC_EMULATED };
+
+/*
+ * Emulate a single instruction, including all its prefixes. We only implement
+ * a small subset of the opcodes, and not all opcodes are implemented for each
+ * of the four modes we can operate in.
+ */
+int
+opcode(struct regs *regs)
+{
+ unsigned eip = regs->eip;
+ unsigned opc, modrm, disp;
+ unsigned prefix = 0;
+
+ for (;;) {
+ switch ((opc = fetch8(regs))) {
+ case 0x0F: /* two byte opcode */
+ if (mode == VM86_PROTECTED)
+ goto invalid;
+ switch ((opc = fetch8(regs))) {
+ case 0x01:
+ switch (((modrm = fetch8(regs)) >> 3) & 7) {
+ case 0: /* sgdt */
+ case 1: /* sidt */
+ goto invalid;
+ case 2: /* lgdt */
+ if (!lgdt(regs, prefix, modrm))
+ goto invalid;
+ return OPC_EMULATED;
+ case 3: /* lidt */
+ if (!lidt(regs, prefix, modrm))
+ goto invalid;
+ return OPC_EMULATED;
+ case 4: /* smsw */
+ goto invalid;
+ case 5:
+ goto invalid;
+ case 6: /* lmsw */
+ if (!lmsw(regs, prefix, modrm))
+ goto invalid;
+ return OPC_EMULATED;
+ case 7: /* invlpg */
+ goto invalid;
+ }
+ break;
+ case 0x20: /* mov Rd, Cd (1h) */
+ case 0x22:
+ if (!movcr(regs, prefix, opc))
+ goto invalid;
+ return OPC_EMULATED;
+ default:
+ goto invalid;
+ }
+ goto invalid;
+
+ case 0x26:
+ TRACE((regs, regs->eip - eip, "%%es:"));
+ prefix |= SEG_ES;
+ continue;
+
+ case 0x2E:
+ TRACE((regs, regs->eip - eip, "%%cs:"));
+ prefix |= SEG_CS;
+ continue;
+
+ case 0x36:
+ TRACE((regs, regs->eip - eip, "%%ss:"));
+ prefix |= SEG_SS;
+ continue;
+
+ case 0x3E:
+ TRACE((regs, regs->eip - eip, "%%ds:"));
+ prefix |= SEG_DS;
+ continue;
+
+ case 0x64:
+ TRACE((regs, regs->eip - eip, "%%fs:"));
+ prefix |= SEG_FS;
+ continue;
+
+ case 0x65:
+ TRACE((regs, regs->eip - eip, "%%gs:"));
+ prefix |= SEG_GS;
+ continue;
+
+ case 0x66:
+ TRACE((regs, regs->eip - eip, "data32"));
+ prefix |= DATA32;
+ continue;
+
+ case 0x67:
+ TRACE((regs, regs->eip - eip, "addr32"));
+ prefix |= ADDR32;
+ continue;
+
+ case 0x90: /* nop */
+ TRACE((regs, regs->eip - eip, "nop"));
+ return OPC_EMULATED;
+
+ case 0x9C: /* pushf */
+ TRACE((regs, regs->eip - eip, "pushf"));
+ if (prefix & DATA32)
+ push32(regs, regs->eflags & ~EFLAGS_VM);
+ else
+ push16(regs, regs->eflags & ~EFLAGS_VM);
+ return OPC_EMULATED;
+
+ case 0x9D: /* popf */
+ TRACE((regs, regs->eip - eip, "popf"));
+ if (prefix & DATA32)
+ regs->eflags = pop32(regs);
+ else
+ regs->eflags = (regs->eflags & 0xFFFF0000L) |
+ pop16(regs);
+ regs->eflags |= EFLAGS_VM;
+ return OPC_EMULATED;
+
+ case 0xCB: /* retl */
+ if ((mode == VM86_REAL_TO_PROTECTED) ||
+ (mode == VM86_PROTECTED_TO_REAL)) {
+ retl(regs, prefix);
+ return OPC_EMULATED;
+ }
+ goto invalid;
+
+ case 0xCD: /* int $n */
+ TRACE((regs, regs->eip - eip, "int"));
+ interrupt(regs, fetch8(regs));
+ return OPC_EMULATED;
+
+ case 0xCF: /* iret */
+ if (prefix & DATA32) {
+ TRACE((regs, regs->eip - eip, "data32 iretd"));
+ regs->eip = pop32(regs);
+ regs->cs = pop32(regs);
+ regs->eflags = pop32(regs);
+ } else {
+ TRACE((regs, regs->eip - eip, "iret"));
+ regs->eip = pop16(regs);
+ regs->cs = pop16(regs);
+ regs->eflags = (regs->eflags & 0xFFFF0000L) |
+ pop16(regs);
+ }
+ return OPC_EMULATED;
+
+ case 0xEA: /* jmpl */
+ if ((mode == VM86_REAL_TO_PROTECTED) ||
+ (mode == VM86_PROTECTED_TO_REAL)) {
+ jmpl(regs, prefix);
+ return OPC_EMULATED;
+ }
+ goto invalid;
+
+ case 0xEB: /* short jump */
+ if ((mode == VM86_REAL_TO_PROTECTED) ||
+ (mode == VM86_PROTECTED_TO_REAL)) {
+ disp = (char) fetch8(regs);
+ TRACE((regs, 2, "jmp 0x%x", regs->eip + disp));
+ regs->eip += disp;
+ return OPC_EMULATED;
+ }
+ goto invalid;
+
+ case 0xF0: /* lock */
+ TRACE((regs, regs->eip - eip, "lock"));
+ continue;
+
+ case 0xFA: /* cli */
+ TRACE((regs, regs->eip - eip, "cli"));
+ regs->eflags &= ~EFLAGS_IF;
+ return OPC_EMULATED;
+
+ case 0xFB: /* sti */
+ TRACE((regs, regs->eip - eip, "sti"));
+ regs->eflags |= EFLAGS_IF;
+ return OPC_EMULATED;
+
+ default:
+ goto invalid;
+ }
+ }
+
+invalid:
+ regs->eip = eip;
+ return OPC_INVALID;
+}
+
+void
+emulate(struct regs *regs)
+{
+ unsigned flteip;
+ int nemul = 0;
+
+ /* emulate as many instructions as possible */
+ while (opcode(regs) != OPC_INVALID)
+ nemul++;
+
+ /* detect the case where we are not making progress */
+ if (nemul == 0 && prev_eip == regs->eip) {
+ flteip = address(regs, MASK16(regs->cs), regs->eip);
+ panic("Unknown opcode at %04x:%04x=0x%x",
+ MASK16(regs->cs), regs->eip, flteip);
+ } else
+ prev_eip = regs->eip;
+}
+
+void
+trap(int trapno, int errno, struct regs *regs)
+{
+ /* emulate device interrupts */
+ if (trapno >= NR_EXCEPTION_HANDLER) {
+ int irq = trapno - NR_EXCEPTION_HANDLER;
+ if (irq < 8)
+ interrupt(regs, irq + 8);
+ else
+ interrupt(regs, 0x70 + (irq - 8));
+ return;
+ }
+
+ switch (trapno) {
+ case 1: /* Debug */
+ if (regs->eflags & EFLAGS_VM) {
+ /* emulate any 8086 instructions */
+ if (mode != VM86_REAL_TO_PROTECTED)
+ panic("not in real-to-protected mode");
+ emulate(regs);
+ return;
+ }
+ goto invalid;
+
+ case 13: /* GPF */
+ if (regs->eflags & EFLAGS_VM) {
+ /* emulate any 8086 instructions */
+ if (mode == VM86_PROTECTED)
+ panic("unexpected protected mode");
+ emulate(regs);
+ return;
+ }
+ goto invalid;
+
+ default:
+ invalid:
+ printf("Trap (%d) while in %s mode\n",
+ trapno, regs->eflags & EFLAGS_VM ? "real" : "protected");
+ if (trapno == 14)
+ printf("Page fault address 0x%x\n", get_cr2());
+ dump_regs(regs);
+ halt();
+ }
+}
+
diff --git a/tools/firmware/vmxassist/vm86.h b/tools/firmware/vmxassist/vm86.h
new file mode 100644
index 0000000000..ce09bd9c8d
--- /dev/null
+++ b/tools/firmware/vmxassist/vm86.h
@@ -0,0 +1,67 @@
+/*
+ * vm86.h: vm86 emulator definitions.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef __VM86_H__
+#define __VM86_H__
+
+#include <public/vmx_assist.h>
+
+#define NR_EXCEPTION_HANDLER 32
+#define NR_INTERRUPT_HANDLERS 16
+#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
+#ifndef __ASSEMBLY__
+
+struct regs {
+ unsigned edi, esi, ebp, esp, ebx, edx, ecx, eax;
+ unsigned ds, es, fs, gs;
+ unsigned trapno, errno;
+ unsigned eip, cs, eflags, uesp, uss;
+ unsigned ves, vds, vfs, vgs;
+};
+
+enum vm86_mode {
+ VM86_REAL = 0,
+ VM86_REAL_TO_PROTECTED,
+ VM86_PROTECTED_TO_REAL,
+ VM86_PROTECTED
+};
+
+#ifdef DEBUG
+#define TRACE(a) trace a
+#else
+#define TRACE(a)
+#endif
+
+extern enum vm86_mode prevmode, mode;
+extern struct vmx_assist_context oldctx;
+extern struct vmx_assist_context newctx;
+
+extern void emulate(struct regs *);
+extern void interrupt(struct regs *, int);
+extern void dump_regs(struct regs *);
+extern void trace(struct regs *, int, char *, ...);
+
+extern void set_mode(struct regs *, enum vm86_mode);
+extern void switch_to_real_mode(void);
+extern void switch_to_protected_mode(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __VM86_H__ */
diff --git a/tools/firmware/vmxassist/vmxassist.ld b/tools/firmware/vmxassist/vmxassist.ld
new file mode 100644
index 0000000000..c9807c63f6
--- /dev/null
+++ b/tools/firmware/vmxassist/vmxassist.ld
@@ -0,0 +1,34 @@
+/*
+ * vmxassist.ld
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+/*OUTPUT_ARCH(i386)*/
+ENTRY(_start)
+
+SECTIONS
+{
+ .text TEXTADDR :
+ {
+ _btext = .;
+ *(.text)
+ *(.rodata)
+ *(.rodata.str1.1)
+ *(.rodata.str1.4)
+ _etext = .;
+ }
+
+ .data :
+ {
+ _bdata = .;
+ *(.data)
+ _edata = .;
+ }
+
+ .bss :
+ {
+ _bbss = .;
+ *(.bss)
+ _ebss = .;
+ }
+}
+
diff --git a/tools/firmware/vmxassist/vmxloader.c b/tools/firmware/vmxassist/vmxloader.c
new file mode 100644
index 0000000000..39f6a8323f
--- /dev/null
+++ b/tools/firmware/vmxassist/vmxloader.c
@@ -0,0 +1,110 @@
+/*
+ * vmxloader.c: ROM/VMXAssist image loader.
+ *
+ * A quicky so that we can boot rom images as if they were a Linux kernel.
+ * This code will copy the rom images (ROMBIOS/VGABIOS/VM86) into their
+ * respective spaces and transfer control to VM86 to execute the BIOSes.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "machine.h"
+#include "roms.h"
+
+/*
+ * C runtime start off
+ */
+asm(" \n\
+ .text \n\
+ .globl _start \n\
+_start: \n\
+ cli \n\
+ movl $stack_top, %esp \n\
+ movl %esp, %ebp \n\
+ call main \n\
+ jmp halt \n\
+ \n\
+ .globl halt \n\
+halt: \n\
+ sti \n\
+ jmp . \n\
+ \n\
+ .bss \n\
+ .align 8 \n\
+ .globl stack, stack_top \n\
+stack: \n\
+ .skip 0x4000 \n\
+stack_top: \n\
+");
+
+void *
+memcpy(void *dest, const void *src, unsigned n)
+{
+ int t0, t1, t2;
+
+ __asm__ __volatile__(
+ "cld\n"
+ "rep; movsl\n"
+ "testb $2,%b4\n"
+ "je 1f\n"
+ "movsw\n"
+ "1: testb $1,%b4\n"
+ "je 2f\n"
+ "movsb\n"
+ "2:"
+ : "=&c" (t0), "=&D" (t1), "=&S" (t2)
+ : "0" (n/4), "q" (n), "1" ((long) dest), "2" ((long) src)
+ : "memory"
+ );
+ return dest;
+}
+
+int
+puts(const char *s)
+{
+ while (*s)
+ outb(0xE9, *s++);
+ return 0;
+}
+
+int
+cirrus_check(void)
+{
+ outw(0x3C4, 0x9206);
+ return inb(0x3C5) == 0x12;
+}
+
+int
+main()
+{
+ puts("VMXAssist Loader\n");
+ puts("Loading ROMBIOS ...\n");
+ memcpy((void *)0xF0000, rombios, sizeof(rombios));
+ if (cirrus_check()) {
+ puts("Loading Cirrus VGABIOS ...\n");
+ memcpy((void *)0xC0000,
+ vgabios_cirrusvga, sizeof(vgabios_cirrusvga));
+ } else {
+ puts("Loading Standard VGABIOS ...\n");
+ memcpy((void *)0xC0000,
+ vgabios_stdvga, sizeof(vgabios_stdvga));
+ }
+ puts("Loading VMXAssist ...\n");
+ memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
+ puts("Go ...\n");
+ ((void (*)())TEXTADDR)();
+}
+
diff --git a/tools/ioemu/COPYING b/tools/ioemu/COPYING
new file mode 100644
index 0000000000..e77696ae8d
--- /dev/null
+++ b/tools/ioemu/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/tools/ioemu/COPYING.LIB b/tools/ioemu/COPYING.LIB
new file mode 100644
index 0000000000..223ede7de3
--- /dev/null
+++ b/tools/ioemu/COPYING.LIB
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/tools/ioemu/Changelog b/tools/ioemu/Changelog
new file mode 100644
index 0000000000..cebbbc8a36
--- /dev/null
+++ b/tools/ioemu/Changelog
@@ -0,0 +1,295 @@
+version 0.6.1:
+
+ - Mac OS X port (Pierre d'Herbemont)
+ - Virtual console support
+ - Better monitor line edition
+ - New block device layer
+ - New 'qcow' growable disk image support with AES encryption and
+ transparent decompression
+ - VMware 3 and 4 read-only disk image support (untested)
+ - Support for up to 4 serial ports
+ - TFTP server support (Magnus Damm)
+ - Port redirection support in user mode networking
+ - Support for not executable data sections
+ - Compressed loop disk image support (Johannes Schindelin)
+ - Level triggered IRQ fix (aka NE2000 PCI performance fix) (Steve
+ Wormley)
+ - Fixed Fedora Core 2 problems (now you can run qemu without any
+ LD_ASSUME_KERNEL tricks on FC2)
+ - DHCP fix for Windows (accept DHCPREQUEST alone)
+ - SPARC system emulation (Blue Swirl)
+ - Automatic Samba configuration for host file access from Windows.
+ - '-loadvm' and '-full-screen' options
+ - ne2000 savevm support (Johannes Schindelin)
+ - Ctrl-Alt is now the default grab key. Ctrl-Alt-[0-9] switches to
+ the virtual consoles.
+ - BIOS floppy fix for NT4 (Mike Nordell, Derek Fawcus, Volker Ruppert)
+ - Floppy fixes for NT4 and NT5 (Mike Nordell)
+ - NT4 IDE fixes (Ben Pfaf, Mike Nordell)
+ - SDL Audio support and SB16 fixes (malc)
+ - ENTER instruction bug fix (initial patch by Stefan Kisdaroczi)
+ - VGA font change fix
+ - VGA read-only CRTC register fix
+
+version 0.6.0:
+
+ - minimalist FPU exception support (NetBSD FPU probe fix)
+ - cr0.ET fix (Win95 boot)
+ - *BSD port (Markus Niemisto)
+ - I/O access fix (signaled by Mark Jonckheere)
+ - IDE drives serial number fix (Mike Nordell)
+ - int13 CDROM BIOS fix (aka Solaris x86 install CD fix)
+ - int15, ah=86 BIOS fix (aka Solaris x86 hardware probe hang up fix)
+ - BSR/BSF "undefined behaviour" fix
+ - vmdk2raw: convert VMware disk images to raw images
+ - PCI support
+ - NE2K PCI support
+ - dummy VGA PCI support
+ - VGA font selection fix (Daniel Serpell)
+ - PIC reset fix (Hidemi KAWAI)
+ - PIC spurious irq support (aka Solaris install bug)
+ - added '-localtime' option
+ - Cirrus CL-GD54xx VGA support (initial patch by Makoto Suzuki (suzu))
+ - APM and system shutdown support
+ - Fixed system reset
+ - Support for other PC BIOSes
+ - Initial PowerMac hardware emulation
+ - PowerMac/PREP OpenFirmware compatible BIOS (Jocelyn Mayer)
+ - initial IDE BMDMA support (needed for Darwin x86)
+ - Set the default memory size for PC emulation to 128 MB
+
+version 0.5.5:
+
+ - SDL full screen support (initial patch by malc)
+ - VGA support on PowerPC PREP
+ - VBE fixes (Matthew Mastracci)
+ - PIT fixes (aka Win98 hardware probe and "VGA slowness" bug)
+ - IDE master only fixes (aka Win98 CD-ROM probe bug)
+ - ARM load/store half word fix (Ulrich Hecht)
+ - FDC fixes for Win98
+
+version 0.5.4:
+
+ - qemu-fast fixes
+ - BIOS area protection fix (aka EMM386.EXE fix) (Mike Nordell)
+ - keyboard/mouse fix (Mike Nordell)
+ - IDE fixes (Linux did not recognized slave drivers)
+ - VM86 EIP masking fix (aka NT5 install fix) (Mike Nordell)
+ - QEMU can now boot a PowerPC Linux kernel (Jocelyn Mayer)
+ - User mode network stack
+ - imul imm8 fix + 0x82 opcode support (Hidemi KAWAI)
+ - precise self modifying code (aka BeOS install bug)
+
+version 0.5.3:
+
+ - added Bochs VESA VBE support
+ - VGA memory map mode 3 access fix (OS/2 install fix)
+ - IDE fixes (Jens Axboe)
+ - CPU interrupt fixes
+ - fixed various TLB invalidation cases (NT install)
+ - fixed cr0.WP semantics (XP install)
+ - direct chaining support for SPARC and PowerPC (faster)
+ - ARM NWFPE support (initial patch by Ulrich Hecht)
+ - added specific x86 to x86 translator (close to native performance
+ in qemu-i386 and qemu-fast)
+ - shm syscalls support (Paul McKerras)
+ - added accurate CR0.MP/ME/TS emulation
+ - fixed DMA memory write access (Win95 boot floppy fix)
+ - graphical x86 linux loader
+ - command line monitor
+ - generic removable device support
+ - support of CD-ROM change
+ - multiple network interface support
+ - initial x86-64 host support (Gwenole Beauchesne)
+ - lret to outer priviledge fix (OS/2 install fix)
+ - task switch fixes (SkyOS boot)
+ - VM save/restore commands
+ - new timer API
+ - more precise RTC emulation (periodic timers + time updates)
+ - Win32 port (initial patch by Kazu)
+
+version 0.5.2:
+
+ - improved soft MMU speed (assembly functions and specializing)
+ - improved multitasking speed by avoiding flushing TBs when
+ switching tasks
+ - improved qemu-fast speed
+ - improved self modifying code handling (big performance gain in
+ softmmu mode).
+ - fixed IO checking
+ - fixed CD-ROM detection (win98 install CD)
+ - fixed addseg real mode bug (GRUB boot fix)
+ - added ROM memory support (win98 boot)
+ - fixed 'call Ev' in case of paging exception
+ - updated the script 'qemu-binfmt-conf.sh' to use QEMU automagically
+ when launching executables for the supported target CPUs.
+ - PowerPC system emulation update (Jocelyn Mayer)
+ - PC floppy emulation and DMA fixes (Jocelyn Mayer)
+ - polled mode for PIC (Jocelyn Mayer)
+ - fixed PTE dirty bit handling
+ - fixed xadd same reg bug
+ - fixed cmpxchg exception safeness
+ - access to virtual memory in gdb stub
+ - task gate and NT flag fixes
+ - eflags optimisation fix for string operations
+
+version 0.5.1:
+
+ - float access fixes when using soft mmu
+ - PC emulation support on PowerPC
+ - A20 support
+ - IDE CD-ROM emulation
+ - ARM fixes (Ulrich Hecht)
+ - SB16 emulation (malc)
+ - IRET and INT fixes in VM86 mode with IOPL=3
+ - Port I/Os use TSS io map
+ - Full task switching/task gate support
+ - added verr, verw, arpl, fcmovxx
+ - PowerPC target support (Jocelyn Mayer)
+ - Major SPARC target fixes (dynamically linked programs begin to work)
+
+version 0.5.0:
+
+ - full hardware level VGA emulation
+ - graphical display with SDL
+ - added PS/2 mouse and keyboard emulation
+ - popw (%esp) fix
+ - mov to/from segment data width fix
+ - added real mode support
+ - added Bochs BIOS and LGPL'ed VGA BIOS loader in qemu
+ - m68k host port (Richard Zidlicky)
+ - partial soft MMU support for memory mapped I/Os
+ - multi-target build
+ - fixed: no error code in hardware interrupts
+ - fixed: pop ss, mov ss, x and sti disable hardware irqs for the next insn
+ - correct single stepping thru string operations
+ - preliminary SPARC target support (Thomas M. Ogrisegg)
+ - tun-fd option (Rusty Russell)
+ - automatic IDE geometry detection
+ - renamed 'vl' to qemu[-fast] and user qemu to qemu-{cpu}.
+ - added man page
+ - added full soft mmu mode to launch unpatched OSes.
+
+version 0.4.3:
+
+ - x86 exception fix in case of nop instruction.
+ - gcc 3.2.2 bug workaround (RedHat 9 fix)
+ - sparc and Alpha host fixes
+ - many ARM target fixes: 'ls' and 'bash' can be launched.
+
+version 0.4.2:
+
+ - many exception handling fixes (can compile a Linux kernel inside vl)
+ - IDE emulation support
+ - initial GDB stub support
+ - deferred update support for disk images (Rusty Russell)
+ - accept User Mode Linux Copy On Write disk images
+ - SMP kernels can at least be booted
+
+version 0.4.1:
+
+ - more accurate timer support in vl.
+ - more reliable NE2000 probe in vl.
+ - added 2.5.66 kernel in vl-test.
+ - added VLTMPDIR environment variable in vl.
+
+version 0.4:
+
+ - initial support for ring 0 x86 processor emulation
+ - fixed signal handling for correct dosemu DPMI emulation
+ - fast x86 MMU emulation with mmap()
+ - fixed popl (%esp) case
+ - Linux kernel can be executed by QEMU with the 'vl' command.
+
+version 0.3:
+
+ - initial support for ARM emulation
+ - added fnsave, frstor, fnstenv, fldenv FPU instructions
+ - added FPU register save in signal emulation
+ - initial ARM port
+ - Sparc and Alpha ports work on the regression test
+ - generic ioctl number conversion
+ - fixed ioctl type conversion
+
+version 0.2:
+
+ - PowerPC disassembly and ELF symbols output (Rusty Russell)
+ - flock support (Rusty Russell)
+ - ugetrlimit support (Rusty Russell)
+ - fstat64 fix (Rusty Russell)
+ - initial Alpha port (Falk Hueffner)
+ - initial IA64 port (Matt Wilson)
+ - initial Sparc and Sparc64 port (David S. Miller)
+ - added HLT instruction
+ - LRET instruction fix.
+ - added GPF generation for I/Os.
+ - added INT3 and TF flag support.
+ - SHL instruction C flag fix.
+ - mmap emulation for host page size > 4KB
+ - self-modifying code support
+ - better VM86 support (dosemu works on non trivial programs)
+ - precise exception support (EIP is computed correctly in most cases)
+ - more precise LDT/GDT/IDT emulation
+ - faster segment load in vm86 mode
+ - direct chaining of basic blocks (faster emulation)
+
+version 0.1.6:
+
+ - automatic library search system. QEMU can now work with unpatched
+ ELF dynamic loader and libc (Rusty Russell).
+ - ISO C warning fixes (Alistair Strachan)
+ - first self-virtualizable version (works only as long as the
+ translation cache is not flushed)
+ - RH9 fixes
+
+version 0.1.5:
+
+ - ppc64 support + personality() patch (Rusty Russell)
+ - first Alpha CPU patches (Falk Hueffner)
+ - removed bfd.h dependancy
+ - fixed shrd, shld, idivl and divl on PowerPC.
+ - fixed buggy glibc PowerPC rint() function (test-i386 passes now on PowerPC).
+
+version 0.1.4:
+
+ - more accurate VM86 emulation (can launch small DOS 16 bit
+ executables in wine).
+ - fixed push/pop fs/gs
+ - added iret instruction.
+ - added times() syscall and SIOCATMARK ioctl.
+
+version 0.1.3:
+
+ - S390 support (Ulrich Weigand)
+ - glibc 2.3.x compile fix (Ulrich Weigand)
+ - socketcall endian fix (Ulrich Weigand)
+ - struct sockaddr endian fix (Ulrich Weigand)
+ - sendmsg/recvmsg endian fix (Ulrich Weigand)
+ - execve endian fix (Ulrich Weigand)
+ - fdset endian fix (Ulrich Weigand)
+ - partial setsockopt syscall support (Ulrich Weigand)
+ - more accurate pushf/popf emulation
+ - first partial vm86() syscall support (can be used with runcom example).
+ - added bound, cmpxchg8b, cpuid instructions
+ - added 16 bit addressing support/override for string operations
+ - poll() fix
+
+version 0.1.2:
+
+ - compile fixes
+ - xlat instruction
+ - xchg instruction memory lock
+ - added simple vm86 example (not working with QEMU yet). The 54 byte
+ DOS executable 'pi_10.com' program was released by Bertram
+ Felgenhauer (more information at http://www.boo.net/~jasonp/pipage.html).
+
+version 0.1.1:
+
+ - glibc 2.2 compilation fixes
+ - added -s and -L options
+ - binary distribution of x86 glibc and wine
+ - big endian fixes in ELF loader and getdents.
+
+version 0.1:
+
+ - initial public release.
diff --git a/tools/ioemu/Makefile b/tools/ioemu/Makefile
new file mode 100644
index 0000000000..d99da37ef4
--- /dev/null
+++ b/tools/ioemu/Makefile
@@ -0,0 +1,116 @@
+-include config-host.mak
+
+CFLAGS=-Wall -O2 -g -fno-strict-aliasing
+ifdef CONFIG_DARWIN
+CFLAGS+= -mdynamic-no-pic
+endif
+ifdef CONFIG_WIN32
+CFLAGS+=-fpack-struct
+endif
+LDFLAGS=-g
+LIBS=
+DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+TOOLS=qemu-img
+ifdef CONFIG_STATIC
+LDFLAGS+=-static
+endif
+#DOCS=qemu-doc.html qemu-tech.html qemu.1
+
+all: $(DOCS) HEADERS
+ for d in $(TARGET_DIRS); do \
+ $(MAKE) -C $$d $@ || exit 1 ; \
+ done
+
+qemu-img: qemu-img.c block.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c
+ $(CC) -DQEMU_TOOL $(CFLAGS) $(LDFLAGS) $(DEFINES) -o $@ $^ -lz $(LIBS)
+
+dyngen$(EXESUF): dyngen.c
+ $(HOST_CC) $(CFLAGS) $(DEFINES) -o $@ $^
+
+clean:
+# avoid old build problems by removing potentially incorrect old files
+ rm -f config.mak config.h op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
+ rm -f *.o *.a $(TOOLS) dyngen$(EXESUF) TAGS qemu.pod *~ */*~
+ #$(MAKE) -C tests clean
+ for d in $(TARGET_DIRS); do \
+ $(MAKE) -C $$d $@ || exit 1 ; \
+ done
+ rm -f config-host.mak config-host.h
+ rm -f keysym_adapter_sdl.h keysym_adapter_vnc.h
+
+distclean: clean
+ rm -f config-host.mak config-host.h
+ rm -f keysym_adapter_sdl.h keysym_adapter_vnc.h
+ for d in $(TARGET_DIRS); do \
+ $(MAKE) -C $$d $@ || exit 1 ; \
+ done
+
+KEYMAPS=da en-gb et fr fr-ch is lt modifiers no pt-br sv \
+ar de en-us fi fr-be hr it lv nl pl ru th \
+common de-ch es fo fr-ca hu ja mk nl-be pt sl tr
+
+install: all
+ mkdir -p "$(bindir)"
+ mkdir -p "$(DESTDIR)/$(datadir)"
+ mkdir -p "$(DESTDIR)/$(datadir)/keymaps"
+ install -m 644 $(addprefix keymaps/,$(KEYMAPS)) "$(DESTDIR)/$(datadir)/keymaps"
+ for d in $(TARGET_DIRS); do \
+ $(MAKE) -C $$d $@ || exit 1 ; \
+ done
+
+# various test targets
+test speed test2: all
+ $(MAKE) -C tests $@
+
+TAGS:
+ etags *.[ch] tests/*.[ch]
+
+# documentation
+%.html: %.texi
+ texi2html -monolithic -number $<
+
+qemu.1: qemu-doc.texi
+ ./texi2pod.pl $< qemu.pod
+ pod2man --section=1 --center=" " --release=" " qemu.pod > $@
+
+FILE=qemu-$(shell cat VERSION)
+
+# tar release (use 'make -k tar' on a checkouted tree)
+tar:
+ rm -rf /tmp/$(FILE)
+ cp -r . /tmp/$(FILE)
+ ( cd /tmp ; tar zcvf ~/$(FILE).tar.gz $(FILE) --exclude CVS )
+ rm -rf /tmp/$(FILE)
+
+# generate a binary distribution
+tarbin:
+ ( cd $(DESTDIR) ; tar zcvf ~/qemu-$(VERSION)-i386.tar.gz \
+ $(DESTDIR)/$(bindir)/qemu $(DESTDIR)/$(bindir)/qemu-fast \
+ $(DESTDIR)/$(bindir)/qemu-system-ppc \
+ $(DESTDIR)/$(bindir)/qemu-i386 \
+ $(DESTDIR)/$(bindir)/qemu-arm \
+ $(DESTDIR)/$(bindir)/qemu-sparc \
+ $(DESTDIR)/$(bindir)/qemu-ppc \
+ $(DESTDIR)/$(mandir)/man1/qemu.1 $(DESTDIR)/$(mandir)/man1/qemu-mkcow.1 )
+
+ifneq ($(wildcard .depend),)
+include .depend
+endif
+
+HEADERS:
+
+ifdef CONFIG_SDL
+HEADERS: keysym_adapter_sdl.h
+endif
+
+ifdef CONFIG_VNC
+HEADERS: keysym_adapter_vnc.h
+endif
+
+keysym_adapter_sdl.h: Makefile create_keysym_header.sh
+ sh create_keysym_header.sh sdl "$(SDL_CFLAGS)"
+
+keysym_adapter_vnc.h: Makefile create_keysym_header.sh
+ sh create_keysym_header.sh vnc "$(VNC_CFLAGS)"
+
+
diff --git a/tools/ioemu/README b/tools/ioemu/README
new file mode 100644
index 0000000000..030306717b
--- /dev/null
+++ b/tools/ioemu/README
@@ -0,0 +1,61 @@
+The QEMU x86 emulator
+---------------------
+
+INSTALLATION
+------------
+
+Type
+
+ ./configure
+ make
+
+to build qemu, qemu-CPU and libqemu.a (CPU is the name of the various
+supported target CPUs).
+
+Type
+
+ make install
+
+to install QEMU in /usr/local
+
+Tested tool versions
+--------------------
+
+In order to compile QEMU succesfully, it is very important that you
+have the right tools. The most important one is gcc. I cannot guaranty
+that QEMU works if you do not use a tested gcc version. Look at
+'configure' and 'Makefile' if you want to make a different gcc
+version work.
+
+host gcc binutils glibc linux distribution
+----------------------------------------------------------------------
+x86 2.95.2 2.13.2 2.1.3 2.4.18
+ 3.2 2.13.2 2.1.3 2.4.18
+ 2.96 2.11.93.0.2 2.2.5 2.4.18 Red Hat 7.3
+ 3.2.2 2.13.90.0.18 2.3.2 2.4.20 Red Hat 9
+
+PowerPC 3.3 [4] 2.13.90.0.18 2.3.1 2.4.20briq
+ 3.2
+
+Alpha 3.3 [1] 2.14.90.0.4 2.2.5 2.2.20 [2] Debian 3.0
+
+Sparc32 2.95.4 2.12.90.0.1 2.2.5 2.4.18 Debian 3.0
+
+ARM 2.95.4 2.12.90.0.1 2.2.5 2.4.9 [3] Debian 3.0
+
+[1] On Alpha, QEMU needs the gcc 'visibility' attribute only available
+ for gcc version >= 3.3.
+[2] Linux >= 2.4.20 is necessary for precise exception support
+ (untested).
+[3] 2.4.9-ac10-rmk2-np1-cerf2
+
+[4] gcc 2.95.x generates invalid code when using too many register
+variables. You must use gcc 3.x on PowerPC.
+
+Documentation
+-------------
+
+Read the documentation in qemu-doc.html.
+
+
+Fabrice Bellard. \ No newline at end of file
diff --git a/tools/ioemu/TODO b/tools/ioemu/TODO
new file mode 100644
index 0000000000..b8e973ce46
--- /dev/null
+++ b/tools/ioemu/TODO
@@ -0,0 +1,66 @@
+short term:
+----------
+- debug option in 'configure' script + disable -fomit-frame-pointer
+- Solaris display error with Cirrus VGA
+ (http://lists.gnu.org/archive/html/qemu-devel/2004-10/msg00390.html).
+- Precise VGA timings for old games/demos (malc patch)
+- merge PIC spurious interrupt patch
+- merge VNC keyboard patch
+- merge Solaris patch
+- merge ARM patches + self modifying code patch (Paul Brook)
+- warning for OS/2: must not use 128 MB memory
+- config file (at least for windows/Mac OS X)
+- commit message if execution of code in IO memory
+- update doc: PCI infos.
+- VNC patch + Synaptic patch.
+- basic VGA optimizations
+- test sysenter/sysexit and fxsr for L4 pistachio 686
+- physical memory cache (reduce qemu-fast address space size to about 32 MB)
+- better code fetch (different exception handling + CS.limit support)
+- do not resize vga if invalid size.
+- avoid looping if only exceptions
+- cycle counter for all archs
+- TLB code protection support for PPC
+- see openMosix Doc
+- disable SMC handling for ARM/SPARC/PPC (not finished)
+- see undefined flags for BTx insn
+- user/kernel PUSHL/POPL in helper.c
+- keyboard output buffer filling timing emulation
+- return UD exception if LOCK prefix incorrectly used
+- test ldt limit < 7 ?
+- tests for each target CPU
+- fix CCOP optimisation
+- fix all remaining thread lock issues (must put TBs in a specific invalid
+ state, find a solution for tb_flush()).
+- fix arm fpu rounding (at least for float->integer conversions)
+- SMP support
+
+ppc specific:
+------------
+- TLB invalidate not needed if msr_pr changes
+- endianness bugs in do_load_fpscr and do_store_fpscr
+- SPR_ENCODE() not useful
+- enable shift optimizations ?
+
+lower priority:
+--------------
+- more friendly BIOS (logo)
+- int15 ah=86: use better timing
+- HDD geometry in CMOS (not used except for very old DOS programs)
+- suppress shift_mem ops
+- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret)
+- sysenter/sysexit emulation
+- optimize FPU operations (evaluate x87 stack pointer statically)
+- add IPC syscalls
+- use -msoft-float on ARM
+- use kernel traps for unaligned accesses on ARM ?
+- handle rare page fault cases (in particular if page fault in helpers or
+ in syscall emulation code).
+- fix thread stack freeing (use kernel 2.5.x CLONE_CHILD_CLEARTID)
+- more syscalls (in particular all 64 bit ones, IPCs, fix 64 bit
+ issues, fix 16 bit uid issues)
+- use page_unprotect_range in every suitable syscall to handle all
+ cases of self modifying code.
+- use gcc as a backend to generate better code (easy to do by using
+ op-i386.c operations as local inline functions).
+- add SSE2/MMX operations
diff --git a/tools/ioemu/VERSION b/tools/ioemu/VERSION
new file mode 100644
index 0000000000..7ceb04048e
--- /dev/null
+++ b/tools/ioemu/VERSION
@@ -0,0 +1 @@
+0.6.1 \ No newline at end of file
diff --git a/tools/ioemu/block-cloop.c b/tools/ioemu/block-cloop.c
new file mode 100644
index 0000000000..f22253daaf
--- /dev/null
+++ b/tools/ioemu/block-cloop.c
@@ -0,0 +1,167 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2004 Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "block_int.h"
+#include <zlib.h>
+
+typedef struct BDRVCloopState {
+ int fd;
+ uint32_t block_size;
+ uint32_t n_blocks;
+ uint64_t* offsets;
+ uint32_t sectors_per_block;
+ uint32_t current_block;
+ char* compressed_block;
+ char* uncompressed_block;
+ z_stream zstream;
+} BDRVCloopState;
+
+static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const char* magic_version_2_0="#!/bin/sh\n"
+ "#V2.0 Format\n"
+ "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n";
+ int length=strlen(magic_version_2_0);
+ if(length>buf_size)
+ length=buf_size;
+ if(!memcmp(magic_version_2_0,buf,length))
+ return 2;
+ return 0;
+}
+
+static int cloop_open(BlockDriverState *bs, const char *filename)
+{
+ BDRVCloopState *s = bs->opaque;
+ uint32_t offsets_size,max_compressed_block_size=1,i;
+
+ s->fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (s->fd < 0)
+ return -1;
+ bs->read_only = 1;
+
+ /* read header */
+ if(lseek(s->fd,128,SEEK_SET)<0) {
+cloop_close:
+ close(s->fd);
+ return -1;
+ }
+ if(read(s->fd,&s->block_size,4)<4)
+ goto cloop_close;
+ s->block_size=be32_to_cpu(s->block_size);
+ if(read(s->fd,&s->n_blocks,4)<4)
+ goto cloop_close;
+ s->n_blocks=be32_to_cpu(s->n_blocks);
+
+ /* read offsets */
+ offsets_size=s->n_blocks*sizeof(uint64_t);
+ if(!(s->offsets=(uint64_t*)malloc(offsets_size)))
+ goto cloop_close;
+ if(read(s->fd,s->offsets,offsets_size)<offsets_size)
+ goto cloop_close;
+ for(i=0;i<s->n_blocks;i++) {
+ s->offsets[i]=be64_to_cpu(s->offsets[i]);
+ if(i>0) {
+ uint32_t size=s->offsets[i]-s->offsets[i-1];
+ if(size>max_compressed_block_size)
+ max_compressed_block_size=size;
+ }
+ }
+
+ /* initialize zlib engine */
+ if(!(s->compressed_block=(char*)malloc(max_compressed_block_size+1)))
+ goto cloop_close;
+ if(!(s->uncompressed_block=(char*)malloc(s->block_size)))
+ goto cloop_close;
+ if(inflateInit(&s->zstream) != Z_OK)
+ goto cloop_close;
+ s->current_block=s->n_blocks;
+
+ s->sectors_per_block = s->block_size/512;
+ bs->total_sectors = s->n_blocks*s->sectors_per_block;
+ return 0;
+}
+
+static inline int cloop_read_block(BDRVCloopState *s,int block_num)
+{
+ if(s->current_block != block_num) {
+ int ret;
+ uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num];
+
+ lseek(s->fd, s->offsets[block_num], SEEK_SET);
+ ret = read(s->fd, s->compressed_block, bytes);
+ if (ret != bytes)
+ return -1;
+
+ s->zstream.next_in = s->compressed_block;
+ s->zstream.avail_in = bytes;
+ s->zstream.next_out = s->uncompressed_block;
+ s->zstream.avail_out = s->block_size;
+ ret = inflateReset(&s->zstream);
+ if(ret != Z_OK)
+ return -1;
+ ret = inflate(&s->zstream, Z_FINISH);
+ if(ret != Z_STREAM_END || s->zstream.total_out != s->block_size)
+ return -1;
+
+ s->current_block = block_num;
+ }
+ return 0;
+}
+
+static int cloop_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ BDRVCloopState *s = bs->opaque;
+ int i;
+
+ for(i=0;i<nb_sectors;i++) {
+ uint32_t sector_offset_in_block=((sector_num+i)%s->sectors_per_block),
+ block_num=(sector_num+i)/s->sectors_per_block;
+ if(cloop_read_block(s, block_num) != 0)
+ return -1;
+ memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512);
+ }
+ return 0;
+}
+
+static void cloop_close(BlockDriverState *bs)
+{
+ BDRVCloopState *s = bs->opaque;
+ close(s->fd);
+ free(s->compressed_block);
+ free(s->uncompressed_block);
+ inflateEnd(&s->zstream);
+}
+
+BlockDriver bdrv_cloop = {
+ "cloop",
+ sizeof(BDRVCloopState),
+ cloop_probe,
+ cloop_open,
+ cloop_read,
+ NULL,
+ cloop_close,
+};
+
+
diff --git a/tools/ioemu/block-cow.c b/tools/ioemu/block-cow.c
new file mode 100644
index 0000000000..81bd334cc1
--- /dev/null
+++ b/tools/ioemu/block-cow.c
@@ -0,0 +1,263 @@
+/*
+ * Block driver for the COW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef _WIN32
+#include "vl.h"
+#include "block_int.h"
+#include <sys/mman.h>
+
+/**************************************************************/
+/* COW block driver using file system holes */
+
+/* user mode linux compatible COW file */
+#define COW_MAGIC 0x4f4f4f4d /* MOOO */
+#define COW_VERSION 2
+
+struct cow_header_v2 {
+ uint32_t magic;
+ uint32_t version;
+ char backing_file[1024];
+ int32_t mtime;
+ uint64_t size;
+ uint32_t sectorsize;
+};
+
+typedef struct BDRVCowState {
+ int fd;
+ uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
+ uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
+ int cow_bitmap_size;
+ int64_t cow_sectors_offset;
+} BDRVCowState;
+
+static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const struct cow_header_v2 *cow_header = (const void *)buf;
+
+ if (be32_to_cpu(cow_header->magic) == COW_MAGIC &&
+ be32_to_cpu(cow_header->version) == COW_VERSION)
+ return 100;
+ else
+ return 0;
+}
+
+static int cow_open(BlockDriverState *bs, const char *filename)
+{
+ BDRVCowState *s = bs->opaque;
+ int fd;
+ struct cow_header_v2 cow_header;
+ int64_t size;
+
+ fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+ if (fd < 0) {
+ fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd < 0)
+ return -1;
+ }
+ s->fd = fd;
+ /* see if it is a cow image */
+ if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
+ goto fail;
+ }
+
+ if (be32_to_cpu(cow_header.magic) != COW_MAGIC ||
+ be32_to_cpu(cow_header.version) != COW_VERSION) {
+ goto fail;
+ }
+
+ /* cow image found */
+ size = be64_to_cpu(cow_header.size);
+ bs->total_sectors = size / 512;
+
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+ cow_header.backing_file);
+
+#if 0
+ if (cow_header.backing_file[0] != '\0') {
+ if (stat(cow_header.backing_file, &st) != 0) {
+ fprintf(stderr, "%s: could not find original disk image '%s'\n", filename, cow_header.backing_file);
+ goto fail;
+ }
+ if (st.st_mtime != be32_to_cpu(cow_header.mtime)) {
+ fprintf(stderr, "%s: original raw disk image '%s' does not match saved timestamp\n", filename, cow_header.backing_file);
+ goto fail;
+ }
+ fd = open(cow_header.backing_file, O_RDONLY | O_LARGEFILE);
+ if (fd < 0)
+ goto fail;
+ bs->fd = fd;
+ }
+#endif
+ /* mmap the bitmap */
+ s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
+ s->cow_bitmap_addr = mmap(get_mmap_addr(s->cow_bitmap_size),
+ s->cow_bitmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, s->fd, 0);
+ if (s->cow_bitmap_addr == MAP_FAILED)
+ goto fail;
+ s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header);
+ s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511;
+ return 0;
+ fail:
+ close(fd);
+ return -1;
+}
+
+static inline void set_bit(uint8_t *bitmap, int64_t bitnum)
+{
+ bitmap[bitnum / 8] |= (1 << (bitnum%8));
+}
+
+static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
+{
+ return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
+}
+
+
+/* Return true if first block has been changed (ie. current version is
+ * in COW file). Set the number of continuous blocks for which that
+ * is true. */
+static inline int is_changed(uint8_t *bitmap,
+ int64_t sector_num, int nb_sectors,
+ int *num_same)
+{
+ int changed;
+
+ if (!bitmap || nb_sectors == 0) {
+ *num_same = nb_sectors;
+ return 0;
+ }
+
+ changed = is_bit_set(bitmap, sector_num);
+ for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+ if (is_bit_set(bitmap, sector_num + *num_same) != changed)
+ break;
+ }
+
+ return changed;
+}
+
+static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVCowState *s = bs->opaque;
+ return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum);
+}
+
+static int cow_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ BDRVCowState *s = bs->opaque;
+ int ret, n;
+
+ while (nb_sectors > 0) {
+ if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) {
+ lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
+ ret = read(s->fd, buf, n * 512);
+ if (ret != n * 512)
+ return -1;
+ } else {
+ memset(buf, 0, n * 512);
+ }
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ return 0;
+}
+
+static int cow_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVCowState *s = bs->opaque;
+ int ret, i;
+
+ lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
+ ret = write(s->fd, buf, nb_sectors * 512);
+ if (ret != nb_sectors * 512)
+ return -1;
+ for (i = 0; i < nb_sectors; i++)
+ set_bit(s->cow_bitmap, sector_num + i);
+ return 0;
+}
+
+static void cow_close(BlockDriverState *bs)
+{
+ BDRVCowState *s = bs->opaque;
+ munmap(s->cow_bitmap_addr, s->cow_bitmap_size);
+ close(s->fd);
+}
+
+static int cow_create(const char *filename, int64_t image_sectors,
+ const char *image_filename, int flags)
+{
+ int fd, cow_fd;
+ struct cow_header_v2 cow_header;
+ struct stat st;
+
+ if (flags)
+ return -ENOTSUP;
+
+ cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+ 0644);
+ if (cow_fd < 0)
+ return -1;
+ memset(&cow_header, 0, sizeof(cow_header));
+ cow_header.magic = cpu_to_be32(COW_MAGIC);
+ cow_header.version = cpu_to_be32(COW_VERSION);
+ if (image_filename) {
+ fd = open(image_filename, O_RDONLY | O_BINARY);
+ if (fd < 0) {
+ close(cow_fd);
+ return -1;
+ }
+ if (fstat(fd, &st) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ cow_header.mtime = cpu_to_be32(st.st_mtime);
+ realpath(image_filename, cow_header.backing_file);
+ }
+ cow_header.sectorsize = cpu_to_be32(512);
+ cow_header.size = cpu_to_be64(image_sectors * 512);
+ write(cow_fd, &cow_header, sizeof(cow_header));
+ /* resize to include at least all the bitmap */
+ ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
+ close(cow_fd);
+ return 0;
+}
+
+BlockDriver bdrv_cow = {
+ "cow",
+ sizeof(BDRVCowState),
+ cow_probe,
+ cow_open,
+ cow_read,
+ cow_write,
+ cow_close,
+ cow_create,
+ cow_is_allocated,
+};
+#endif
diff --git a/tools/ioemu/block-qcow.c b/tools/ioemu/block-qcow.c
new file mode 100644
index 0000000000..a473298a82
--- /dev/null
+++ b/tools/ioemu/block-qcow.c
@@ -0,0 +1,677 @@
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "block_int.h"
+#include <zlib.h>
+#include "aes.h"
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} QCowHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVQcowState {
+ int fd;
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+ uint64_t *l2_cache;
+ uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+} BDRVQcowState;
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) == QCOW_VERSION)
+ return 100;
+ else
+ return 0;
+}
+
+static int qcow_open(BlockDriverState *bs, const char *filename)
+{
+ BDRVQcowState *s = bs->opaque;
+ int fd, len, i, shift;
+ QCowHeader header;
+
+ fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+ if (fd < 0) {
+ fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd < 0)
+ return -1;
+ }
+ s->fd = fd;
+ if (read(fd, &header, sizeof(header)) != sizeof(header))
+ goto fail;
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be32_to_cpus(&header.mtime);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+
+ if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
+ goto fail;
+ if (header.size <= 1 || header.cluster_bits < 9)
+ goto fail;
+ if (header.crypt_method > QCOW_CRYPT_AES)
+ goto fail;
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header)
+ bs->encrypted = 1;
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = header.l2_bits;
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+ /* read the level 1 table */
+ shift = s->cluster_bits + s->l2_bits;
+ s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+
+ s->l1_table_offset = header.l1_table_offset;
+ s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+ if (!s->l1_table)
+ goto fail;
+ lseek(fd, s->l1_table_offset, SEEK_SET);
+ if (read(fd, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
+ s->l1_size * sizeof(uint64_t))
+ goto fail;
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ /* alloc L2 cache */
+ s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ if (!s->l2_cache)
+ goto fail;
+ s->cluster_cache = qemu_malloc(s->cluster_size);
+ if (!s->cluster_cache)
+ goto fail;
+ s->cluster_data = qemu_malloc(s->cluster_size);
+ if (!s->cluster_data)
+ goto fail;
+ s->cluster_cache_offset = -1;
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023)
+ len = 1023;
+ lseek(fd, header.backing_file_offset, SEEK_SET);
+ if (read(fd, bs->backing_file, len) != len)
+ goto fail;
+ bs->backing_file[len] = '\0';
+ }
+ return 0;
+
+ fail:
+ qemu_free(s->l1_table);
+ qemu_free(s->l2_cache);
+ qemu_free(s->cluster_cache);
+ qemu_free(s->cluster_data);
+ close(fd);
+ return -1;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+#if 0
+ /* test */
+ {
+ uint8_t in[16];
+ uint8_t out[16];
+ uint8_t tmp[16];
+ for(i=0;i<16;i++)
+ in[i] = i;
+ AES_encrypt(in, tmp, &s->aes_encrypt_key);
+ AES_decrypt(tmp, out, &s->aes_decrypt_key);
+ for(i = 0; i < 16; i++)
+ printf(" %02x", tmp[i]);
+ printf("\n");
+ for(i = 0; i < 16; i++)
+ printf(" %02x", out[i]);
+ printf("\n");
+ }
+#endif
+ return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ uint64_t offset, int allocate,
+ int compressed_size,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ int min_index, i, j, l1_index, l2_index;
+ uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+ uint32_t min_count;
+ int new_l2_table;
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l2_offset = s->l1_table[l1_index];
+ new_l2_table = 0;
+ if (!l2_offset) {
+ if (!allocate)
+ return 0;
+ /* allocate a new l2 entry */
+ l2_offset = lseek(s->fd, 0, SEEK_END);
+ /* round to cluster size */
+ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset;
+ tmp = cpu_to_be64(l2_offset);
+ lseek(s->fd, s->l1_table_offset + l1_index * sizeof(tmp), SEEK_SET);
+ if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+ return 0;
+ new_l2_table = 1;
+ }
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i << s->l2_bits);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index << s->l2_bits);
+ lseek(s->fd, l2_offset, SEEK_SET);
+ if (new_l2_table) {
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ } else {
+ if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ }
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (!cluster_offset ||
+ ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
+ if (!allocate)
+ return 0;
+ /* allocate a new cluster */
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+ (n_end - n_start) < s->cluster_sectors) {
+ /* if the cluster is already compressed, we must
+ decompress it in the case it is not completely
+ overwritten */
+ if (decompress_cluster(s, cluster_offset) < 0)
+ return 0;
+ cluster_offset = lseek(s->fd, 0, SEEK_END);
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ /* write the cluster content */
+ lseek(s->fd, cluster_offset, SEEK_SET);
+ if (write(s->fd, s->cluster_cache, s->cluster_size) !=
+ s->cluster_size)
+ return -1;
+ } else {
+ cluster_offset = lseek(s->fd, 0, SEEK_END);
+ if (allocate == 1) {
+ /* round to cluster size */
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ ftruncate(s->fd, cluster_offset + s->cluster_size);
+ /* if encrypted, we must initialize the cluster
+ content which won't be written */
+ if (s->crypt_method &&
+ (n_end - n_start) < s->cluster_sectors) {
+ uint64_t start_sect;
+ start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+ memset(s->cluster_data + 512, 0xaa, 512);
+ for(i = 0; i < s->cluster_sectors; i++) {
+ if (i < n_start || i >= n_end) {
+ encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1, 1,
+ &s->aes_encrypt_key);
+ lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
+ if (write(s->fd, s->cluster_data, 512) != 512)
+ return -1;
+ }
+ }
+ }
+ } else {
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ (uint64_t)compressed_size << (63 - s->cluster_bits);
+ }
+ }
+ /* update L2 table */
+ tmp = cpu_to_be64(cluster_offset);
+ l2_table[l2_index] = tmp;
+ lseek(s->fd, l2_offset + l2_index * sizeof(tmp), SEEK_SET);
+ if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+ return 0;
+ }
+ return cluster_offset;
+}
+
+static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
+{
+ int ret, csize;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ csize = cluster_offset >> (63 - s->cluster_bits);
+ csize &= (s->cluster_size - 1);
+ lseek(s->fd, coffset, SEEK_SET);
+ ret = read(s->fd, s->cluster_data, csize);
+ if (ret != csize)
+ return -1;
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data, csize) < 0) {
+ return -1;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ while (nb_sectors > 0) {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ if (!cluster_offset) {
+ memset(buf, 0, 512 * n);
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ if (decompress_cluster(s, cluster_offset) < 0)
+ return -1;
+ memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
+ } else {
+ lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
+ ret = read(s->fd, buf, n * 512);
+ if (ret != n * 512)
+ return -1;
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector_num, buf, buf, n, 0,
+ &s->aes_decrypt_key);
+ }
+ }
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ return 0;
+}
+
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ while (nb_sectors > 0) {
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster + n);
+ if (!cluster_offset)
+ return -1;
+ lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
+ &s->aes_encrypt_key);
+ ret = write(s->fd, s->cluster_data, n * 512);
+ } else {
+ ret = write(s->fd, buf, n * 512);
+ }
+ if (ret != n * 512)
+ return -1;
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+ return 0;
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ qemu_free(s->l1_table);
+ qemu_free(s->l2_cache);
+ qemu_free(s->cluster_cache);
+ qemu_free(s->cluster_data);
+ close(s->fd);
+}
+
+static int qcow_create(const char *filename, int64_t total_size,
+ const char *backing_file, int flags)
+{
+ int fd, header_size, backing_filename_len, l1_size, i, shift;
+ QCowHeader header;
+ char backing_filename[1024];
+ uint64_t tmp;
+ struct stat st;
+
+ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+ 0644);
+ if (fd < 0)
+ return -1;
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+ header.size = cpu_to_be64(total_size * 512);
+ header_size = sizeof(header);
+ backing_filename_len = 0;
+ if (backing_file) {
+ realpath(backing_file, backing_filename);
+ if (stat(backing_filename, &st) != 0) {
+ return -1;
+ }
+ header.mtime = cpu_to_be32(st.st_mtime);
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_filename);
+ header.backing_file_size = cpu_to_be32(backing_filename_len);
+ header_size += backing_filename_len;
+ header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+ unmodifyed sectors */
+ header.l2_bits = 12; /* 32 KB L2 tables */
+ } else {
+ header.cluster_bits = 12; /* 4 KB clusters */
+ header.l2_bits = 9; /* 4 KB L2 tables */
+ }
+ header_size = (header_size + 7) & ~7;
+ shift = header.cluster_bits + header.l2_bits;
+ l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+
+ header.l1_table_offset = cpu_to_be64(header_size);
+ if (flags) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ /* write all the data */
+ write(fd, &header, sizeof(header));
+ if (backing_file) {
+ write(fd, backing_filename, backing_filename_len);
+ }
+ lseek(fd, header_size, SEEK_SET);
+ tmp = 0;
+ for(i = 0;i < l1_size; i++) {
+ write(fd, &tmp, sizeof(tmp));
+ }
+ close(fd);
+ return 0;
+}
+
+int qcow_get_cluster_size(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ if (bs->drv != &bdrv_qcow)
+ return -1;
+ return s->cluster_size;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (bs->drv != &bdrv_qcow)
+ return -1;
+
+ out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ if (!out_buf)
+ return -1;
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ qemu_free(out_buf);
+ return -1;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ qemu_free(out_buf);
+ deflateEnd(&strm);
+ return -1;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ qcow_write(bs, sector_num, buf, s->cluster_sectors);
+ } else {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ out_len, 0, 0);
+ cluster_offset &= s->cluster_offset_mask;
+ lseek(s->fd, cluster_offset, SEEK_SET);
+ if (write(s->fd, out_buf, out_len) != out_len) {
+ qemu_free(out_buf);
+ return -1;
+ }
+ }
+
+ qemu_free(out_buf);
+ return 0;
+}
+
+BlockDriver bdrv_qcow = {
+ "qcow",
+ sizeof(BDRVQcowState),
+ qcow_probe,
+ qcow_open,
+ qcow_read,
+ qcow_write,
+ qcow_close,
+ qcow_create,
+ qcow_is_allocated,
+ qcow_set_key,
+};
+
+
diff --git a/tools/ioemu/block-vmdk.c b/tools/ioemu/block-vmdk.c
new file mode 100644
index 0000000000..1cc4988534
--- /dev/null
+++ b/tools/ioemu/block-vmdk.c
@@ -0,0 +1,279 @@
+/*
+ * Block driver for the VMDK format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "block_int.h"
+
+/* XXX: this code is untested */
+/* XXX: add write support */
+
+#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
+#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
+
+typedef struct {
+ uint32_t version;
+ uint32_t flags;
+ uint32_t disk_sectors;
+ uint32_t granularity;
+ uint32_t l1dir_offset;
+ uint32_t l1dir_size;
+ uint32_t file_sectors;
+ uint32_t cylinders;
+ uint32_t heads;
+ uint32_t sectors_per_track;
+} VMDK3Header;
+
+typedef struct {
+ uint32_t version;
+ uint32_t flags;
+ int64_t capacity;
+ int64_t granularity;
+ int64_t desc_offset;
+ int64_t desc_size;
+ int32_t num_gtes_per_gte;
+ int64_t rgd_offset;
+ int64_t gd_offset;
+ int64_t grain_offset;
+ char filler[1];
+ char check_bytes[4];
+} VMDK4Header;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVVmdkState {
+ int fd;
+ int64_t l1_table_offset;
+ uint32_t *l1_table;
+ unsigned int l1_size;
+ uint32_t l1_entry_sectors;
+
+ unsigned int l2_size;
+ uint32_t *l2_cache;
+ uint32_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+
+ unsigned int cluster_sectors;
+} BDRVVmdkState;
+
+static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ uint32_t magic;
+
+ if (buf_size < 4)
+ return 0;
+ magic = be32_to_cpu(*(uint32_t *)buf);
+ if (magic == VMDK3_MAGIC ||
+ magic == VMDK4_MAGIC)
+ return 100;
+ else
+ return 0;
+}
+
+static int vmdk_open(BlockDriverState *bs, const char *filename)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int fd, i;
+ uint32_t magic;
+ int l1_size;
+
+ fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd < 0)
+ return -1;
+ if (read(fd, &magic, sizeof(magic)) != sizeof(magic))
+ goto fail;
+ magic = be32_to_cpu(magic);
+ if (magic == VMDK3_MAGIC) {
+ VMDK3Header header;
+ if (read(fd, &header, sizeof(header)) !=
+ sizeof(header))
+ goto fail;
+ s->cluster_sectors = le32_to_cpu(header.granularity);
+ s->l2_size = 1 << 9;
+ s->l1_size = 1 << 6;
+ bs->total_sectors = le32_to_cpu(header.disk_sectors);
+ s->l1_table_offset = le32_to_cpu(header.l1dir_offset) * 512;
+ s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+ } else if (magic == VMDK4_MAGIC) {
+ VMDK4Header header;
+
+ if (read(fd, &header, sizeof(header)) != sizeof(header))
+ goto fail;
+ bs->total_sectors = le32_to_cpu(header.capacity);
+ s->cluster_sectors = le32_to_cpu(header.granularity);
+ s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
+ s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+ if (s->l1_entry_sectors <= 0)
+ goto fail;
+ s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
+ / s->l1_entry_sectors;
+ s->l1_table_offset = le64_to_cpu(header.rgd_offset) * 512;
+ } else {
+ goto fail;
+ }
+ /* read the L1 table */
+ l1_size = s->l1_size * sizeof(uint32_t);
+ s->l1_table = qemu_malloc(l1_size);
+ if (!s->l1_table)
+ goto fail;
+ if (lseek(fd, s->l1_table_offset, SEEK_SET) == -1)
+ goto fail;
+ if (read(fd, s->l1_table, l1_size) != l1_size)
+ goto fail;
+ for(i = 0; i < s->l1_size; i++) {
+ le32_to_cpus(&s->l1_table[i]);
+ }
+
+ s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+ if (!s->l2_cache)
+ goto fail;
+ s->fd = fd;
+ /* XXX: currently only read only */
+ bs->read_only = 1;
+ return 0;
+ fail:
+ qemu_free(s->l1_table);
+ qemu_free(s->l2_cache);
+ close(fd);
+ return -1;
+}
+
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ uint64_t offset)
+{
+ BDRVVmdkState *s = bs->opaque;
+ unsigned int l1_index, l2_offset, l2_index;
+ int min_index, i, j;
+ uint32_t min_count, *l2_table;
+ uint64_t cluster_offset;
+
+ l1_index = (offset >> 9) / s->l1_entry_sectors;
+ if (l1_index >= s->l1_size)
+ return 0;
+ l2_offset = s->l1_table[l1_index];
+ if (!l2_offset)
+ return 0;
+
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i * s->l2_size);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index * s->l2_size);
+ lseek(s->fd, (int64_t)l2_offset * 512, SEEK_SET);
+ if (read(s->fd, l2_table, s->l2_size * sizeof(uint32_t)) !=
+ s->l2_size * sizeof(uint32_t))
+ return 0;
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+ cluster_offset = le32_to_cpu(l2_table[l2_index]);
+ cluster_offset <<= 9;
+ return cluster_offset;
+}
+
+static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ cluster_offset = get_cluster_offset(bs, sector_num << 9);
+ index_in_cluster = sector_num % s->cluster_sectors;
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int ret, index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ while (nb_sectors > 0) {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9);
+ index_in_cluster = sector_num % s->cluster_sectors;
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ if (!cluster_offset) {
+ memset(buf, 0, 512 * n);
+ } else {
+ lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
+ ret = read(s->fd, buf, n * 512);
+ if (ret != n * 512)
+ return -1;
+ }
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ return 0;
+}
+
+static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ return -1;
+}
+
+static void vmdk_close(BlockDriverState *bs)
+{
+ BDRVVmdkState *s = bs->opaque;
+ qemu_free(s->l1_table);
+ qemu_free(s->l2_cache);
+ close(s->fd);
+}
+
+BlockDriver bdrv_vmdk = {
+ "vmdk",
+ sizeof(BDRVVmdkState),
+ vmdk_probe,
+ vmdk_open,
+ vmdk_read,
+ vmdk_write,
+ vmdk_close,
+ NULL, /* no create yet */
+ vmdk_is_allocated,
+};
diff --git a/tools/ioemu/block.c b/tools/ioemu/block.c
new file mode 100644
index 0000000000..cff0d2a202
--- /dev/null
+++ b/tools/ioemu/block.c
@@ -0,0 +1,548 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "block_int.h"
+
+static BlockDriverState *bdrv_first;
+static BlockDriver *first_drv;
+
+void bdrv_register(BlockDriver *bdrv)
+{
+ bdrv->next = first_drv;
+ first_drv = bdrv;
+}
+
+/* create a new block device (by default it is empty) */
+BlockDriverState *bdrv_new(const char *device_name)
+{
+ BlockDriverState **pbs, *bs;
+
+ bs = qemu_mallocz(sizeof(BlockDriverState));
+ if(!bs)
+ return NULL;
+ pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
+ if (device_name[0] != '\0') {
+ /* insert at the end */
+ pbs = &bdrv_first;
+ while (*pbs != NULL)
+ pbs = &(*pbs)->next;
+ *pbs = bs;
+ }
+ return bs;
+}
+
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+ BlockDriver *drv1;
+ for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
+ if (!strcmp(drv1->format_name, format_name))
+ return drv1;
+ }
+ return NULL;
+}
+
+int bdrv_create(BlockDriver *drv,
+ const char *filename, int64_t size_in_sectors,
+ const char *backing_file, int flags)
+{
+ if (!drv->bdrv_create)
+ return -ENOTSUP;
+ return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
+}
+
+static BlockDriver *find_image_format(const char *filename)
+{
+ int fd, ret, score, score_max;
+ BlockDriver *drv1, *drv;
+ uint8_t buf[1024];
+
+ fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd < 0)
+ return NULL;
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ close(fd);
+ return NULL;
+ }
+ close(fd);
+
+ drv = NULL;
+ score_max = 0;
+ for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
+ score = drv1->bdrv_probe(buf, ret, filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = drv1;
+ }
+ }
+ return drv;
+}
+
+int bdrv_open(BlockDriverState *bs, const char *filename, int snapshot)
+{
+ return bdrv_open2(bs, filename, snapshot, NULL);
+}
+
+int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
+ BlockDriver *drv)
+{
+ int ret;
+
+ bs->read_only = 0;
+ bs->is_temporary = 0;
+ bs->encrypted = 0;
+
+ pstrcpy(bs->filename, sizeof(bs->filename), filename);
+ if (!drv) {
+ drv = find_image_format(filename);
+ if (!drv)
+ return -1;
+ }
+ bs->drv = drv;
+ bs->opaque = qemu_mallocz(drv->instance_size);
+ if (bs->opaque == NULL && drv->instance_size > 0)
+ return -1;
+
+ ret = drv->bdrv_open(bs, filename);
+ if (ret < 0) {
+ qemu_free(bs->opaque);
+ return -1;
+ }
+#ifndef _WIN32
+ if (bs->is_temporary) {
+ unlink(filename);
+ }
+#endif
+ if (bs->backing_file[0] != '\0' && drv->bdrv_is_allocated) {
+ /* if there is a backing file, use it */
+ bs->backing_hd = bdrv_new("");
+ if (!bs->backing_hd) {
+ fail:
+ bdrv_close(bs);
+ return -1;
+ }
+ if (bdrv_open(bs->backing_hd, bs->backing_file, 0) < 0)
+ goto fail;
+ }
+
+ bs->inserted = 1;
+
+ /* call the change callback */
+ if (bs->change_cb)
+ bs->change_cb(bs->change_opaque);
+
+ return 0;
+}
+
+void bdrv_close(BlockDriverState *bs)
+{
+ if (bs->inserted) {
+ if (bs->backing_hd)
+ bdrv_delete(bs->backing_hd);
+ bs->drv->bdrv_close(bs);
+ qemu_free(bs->opaque);
+#ifdef _WIN32
+ if (bs->is_temporary) {
+ unlink(bs->filename);
+ }
+#endif
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ bs->inserted = 0;
+
+ /* call the change callback */
+ if (bs->change_cb)
+ bs->change_cb(bs->change_opaque);
+ }
+}
+
+void bdrv_delete(BlockDriverState *bs)
+{
+ /* XXX: remove the driver list */
+ bdrv_close(bs);
+ qemu_free(bs);
+}
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ int64_t i;
+ int n, j;
+ unsigned char sector[512];
+
+ if (!bs->inserted)
+ return -ENOENT;
+
+ if (bs->read_only) {
+ return -EACCES;
+ }
+
+ if (!bs->backing_hd) {
+ return -ENOTSUP;
+ }
+
+ for (i = 0; i < bs->total_sectors;) {
+ if (bs->drv->bdrv_is_allocated(bs, i, 65536, &n)) {
+ for(j = 0; j < n; j++) {
+ if (bdrv_read(bs, i, sector, 1) != 0) {
+ return -EIO;
+ }
+
+ if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
+ return -EIO;
+ }
+ i++;
+ }
+ } else {
+ i += n;
+ }
+ }
+ return 0;
+}
+
+/* return -1 if error */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ int ret, n;
+ BlockDriver *drv = bs->drv;
+
+ if (!bs->inserted)
+ return -1;
+
+ while (nb_sectors > 0) {
+ if (sector_num == 0 && bs->boot_sector_enabled) {
+ memcpy(buf, bs->boot_sector_data, 512);
+ n = 1;
+ } else if (bs->backing_hd) {
+ if (drv->bdrv_is_allocated(bs, sector_num, nb_sectors, &n)) {
+ ret = drv->bdrv_read(bs, sector_num, buf, n);
+ if (ret < 0)
+ return -1;
+ } else {
+ /* read from the base image */
+ ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+ if (ret < 0)
+ return -1;
+ }
+ } else {
+ ret = drv->bdrv_read(bs, sector_num, buf, nb_sectors);
+ if (ret < 0)
+ return -1;
+ /* no need to loop */
+ break;
+ }
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ return 0;
+}
+
+/* return -1 if error */
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ if (!bs->inserted)
+ return -1;
+ if (bs->read_only)
+ return -1;
+ return bs->drv->bdrv_write(bs, sector_num, buf, nb_sectors);
+}
+
+void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)
+{
+ *nb_sectors_ptr = bs->total_sectors;
+}
+
+/* force a given boot sector. */
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size)
+{
+ bs->boot_sector_enabled = 1;
+ if (size > 512)
+ size = 512;
+ memcpy(bs->boot_sector_data, data, size);
+ memset(bs->boot_sector_data + size, 0, 512 - size);
+}
+
+void bdrv_set_geometry_hint(BlockDriverState *bs,
+ int cyls, int heads, int secs)
+{
+ bs->cyls = cyls;
+ bs->heads = heads;
+ bs->secs = secs;
+}
+
+void bdrv_set_type_hint(BlockDriverState *bs, int type)
+{
+ bs->type = type;
+ bs->removable = ((type == BDRV_TYPE_CDROM ||
+ type == BDRV_TYPE_FLOPPY));
+}
+
+void bdrv_get_geometry_hint(BlockDriverState *bs,
+ int *pcyls, int *pheads, int *psecs)
+{
+ *pcyls = bs->cyls;
+ *pheads = bs->heads;
+ *psecs = bs->secs;
+}
+
+int bdrv_get_type_hint(BlockDriverState *bs)
+{
+ return bs->type;
+}
+
+int bdrv_is_removable(BlockDriverState *bs)
+{
+ return bs->removable;
+}
+
+int bdrv_is_read_only(BlockDriverState *bs)
+{
+ return bs->read_only;
+}
+
+int bdrv_is_inserted(BlockDriverState *bs)
+{
+ return bs->inserted;
+}
+
+int bdrv_is_locked(BlockDriverState *bs)
+{
+ return bs->locked;
+}
+
+void bdrv_set_locked(BlockDriverState *bs, int locked)
+{
+ bs->locked = locked;
+}
+
+void bdrv_set_change_cb(BlockDriverState *bs,
+ void (*change_cb)(void *opaque), void *opaque)
+{
+ bs->change_cb = change_cb;
+ bs->change_opaque = opaque;
+}
+
+int bdrv_is_encrypted(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return 1;
+ return bs->encrypted;
+}
+
+int bdrv_set_key(BlockDriverState *bs, const char *key)
+{
+ int ret;
+ if (bs->backing_hd && bs->backing_hd->encrypted) {
+ ret = bdrv_set_key(bs->backing_hd, key);
+ if (ret < 0)
+ return ret;
+ if (!bs->encrypted)
+ return 0;
+ }
+ if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
+ return -1;
+ return bs->drv->bdrv_set_key(bs, key);
+}
+
+void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
+{
+ if (!bs->inserted || !bs->drv) {
+ buf[0] = '\0';
+ } else {
+ pstrcpy(buf, buf_size, bs->drv->format_name);
+ }
+}
+
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque)
+{
+ BlockDriver *drv;
+
+ for (drv = first_drv; drv != NULL; drv = drv->next) {
+ it(opaque, drv->format_name);
+ }
+}
+
+BlockDriverState *bdrv_find(const char *name)
+{
+ BlockDriverState *bs;
+
+ for (bs = bdrv_first; bs != NULL; bs = bs->next) {
+ if (!strcmp(name, bs->device_name))
+ return bs;
+ }
+ return NULL;
+}
+
+void bdrv_iterate(void (*it)(void *opaque, const char *name), void *opaque)
+{
+ BlockDriverState *bs;
+
+ for (bs = bdrv_first; bs != NULL; bs = bs->next) {
+ it(opaque, bs->device_name);
+ }
+}
+
+const char *bdrv_get_device_name(BlockDriverState *bs)
+{
+ return bs->device_name;
+}
+
+void bdrv_info(void)
+{
+ BlockDriverState *bs;
+
+ for (bs = bdrv_first; bs != NULL; bs = bs->next) {
+ term_printf("%s:", bs->device_name);
+ term_printf(" type=");
+ switch(bs->type) {
+ case BDRV_TYPE_HD:
+ term_printf("hd");
+ break;
+ case BDRV_TYPE_CDROM:
+ term_printf("cdrom");
+ break;
+ case BDRV_TYPE_FLOPPY:
+ term_printf("floppy");
+ break;
+ }
+ term_printf(" removable=%d", bs->removable);
+ if (bs->removable) {
+ term_printf(" locked=%d", bs->locked);
+ }
+ if (bs->inserted) {
+ term_printf(" file=%s", bs->filename);
+ if (bs->backing_file[0] != '\0')
+ term_printf(" backing_file=%s", bs->backing_file);
+ term_printf(" ro=%d", bs->read_only);
+ term_printf(" drv=%s", bs->drv->format_name);
+ if (bs->encrypted)
+ term_printf(" encrypted");
+ } else {
+ term_printf(" [not inserted]");
+ }
+ term_printf("\n");
+ }
+}
+
+
+/**************************************************************/
+/* RAW block driver */
+
+typedef struct BDRVRawState {
+ int fd;
+} BDRVRawState;
+
+static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ return 1; /* maybe */
+}
+
+static int raw_open(BlockDriverState *bs, const char *filename)
+{
+ BDRVRawState *s = bs->opaque;
+ int fd;
+ int64_t size;
+
+ fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+ if (fd < 0) {
+ fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd < 0)
+ return -1;
+ bs->read_only = 1;
+ }
+ size = lseek(fd, 0, SEEK_END);
+ bs->total_sectors = size / 512;
+ s->fd = fd;
+ return 0;
+}
+
+static int raw_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ lseek(s->fd, sector_num * 512, SEEK_SET);
+ ret = read(s->fd, buf, nb_sectors * 512);
+ if (ret != nb_sectors * 512)
+ return -1;
+ return 0;
+}
+
+static int raw_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ lseek(s->fd, sector_num * 512, SEEK_SET);
+ ret = write(s->fd, buf, nb_sectors * 512);
+ if (ret != nb_sectors * 512)
+ return -1;
+ return 0;
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ close(s->fd);
+}
+
+static int raw_create(const char *filename, int64_t total_size,
+ const char *backing_file, int flags)
+{
+ int fd;
+
+ if (flags || backing_file)
+ return -ENOTSUP;
+
+ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+ 0644);
+ if (fd < 0)
+ return -EIO;
+ ftruncate(fd, total_size * 512);
+ close(fd);
+ return 0;
+}
+
+BlockDriver bdrv_raw = {
+ "raw",
+ sizeof(BDRVRawState),
+ raw_probe,
+ raw_open,
+ raw_read,
+ raw_write,
+ raw_close,
+ raw_create,
+};
+
+void bdrv_init(void)
+{
+ bdrv_register(&bdrv_raw);
+ bdrv_register(&bdrv_cloop);
+}
diff --git a/tools/ioemu/block_int.h b/tools/ioemu/block_int.h
new file mode 100644
index 0000000000..9d047c4ff3
--- /dev/null
+++ b/tools/ioemu/block_int.h
@@ -0,0 +1,77 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_open)(BlockDriverState *bs, const char *filename);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, int64_t total_sectors,
+ const char *backing_file, int flags);
+ int (*bdrv_is_allocated)(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ struct BlockDriver *next;
+};
+
+struct BlockDriverState {
+ int64_t total_sectors;
+ int read_only; /* if true, the media is read only */
+ int inserted; /* if true, the media is present */
+ int removable; /* if true, the media can be removed */
+ int locked; /* if true, the media cannot temporarily be ejected */
+ int encrypted; /* if true, the media is encrypted */
+ /* event callback when inserting/removing */
+ void (*change_cb)(void *opaque);
+ void *change_opaque;
+
+ BlockDriver *drv;
+ void *opaque;
+
+ int boot_sector_enabled;
+ uint8_t boot_sector_data[512];
+
+ char filename[1024];
+ char backing_file[1024]; /* if non zero, the image is a diff of
+ this file image */
+ int is_temporary;
+
+ BlockDriverState *backing_hd;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ int cyls, heads, secs;
+ int type;
+ char device_name[32];
+ BlockDriverState *next;
+};
+
+#endif /* BLOCK_INT_H */
diff --git a/tools/ioemu/bswap.h b/tools/ioemu/bswap.h
new file mode 100644
index 0000000000..37fb04ed97
--- /dev/null
+++ b/tools/ioemu/bswap.h
@@ -0,0 +1,202 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+#include "config-host.h"
+
+#include <inttypes.h>
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#define bswap_16(x) \
+({ \
+ uint16_t __x = (x); \
+ ((uint16_t)( \
+ (((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \
+ (((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \
+})
+
+#define bswap_32(x) \
+({ \
+ uint32_t __x = (x); \
+ ((uint32_t)( \
+ (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+ (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \
+ (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \
+ (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \
+})
+
+#define bswap_64(x) \
+({ \
+ uint64_t __x = (x); \
+ ((uint64_t)( \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+ (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
+})
+
+#endif /* !HAVE_BYTESWAP_H */
+
+static inline uint16_t bswap16(uint16_t x)
+{
+ return bswap_16(x);
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return bswap_32(x);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return bswap_64(x);
+}
+
+static inline void bswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#if defined(WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) bswap ## size(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) *p = bswap ## size(*p);
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) bswap ## size(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) *p = bswap ## size(*p);
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+ return endian ## _bswap(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+ endian ## _bswaps(p, size)\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+ return endian ## size ## _to_cpu(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+ *p = cpu_to_ ## endian ## size(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* unaligned versions (optimized for frequent unaligned accesses)*/
+
+#if defined(__i386__) || defined(__powerpc__)
+
+#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
+#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
+#define le16_to_cpupu(p) le16_to_cpup(p)
+#define le32_to_cpupu(p) le32_to_cpup(p)
+
+#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
+#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+
+#else
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v;
+ p1[1] = v >> 8;
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v;
+ p1[1] = v >> 8;
+ p1[2] = v >> 16;
+ p1[3] = v >> 24;
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+ const uint8_t *p1 = (const uint8_t *)p;
+ return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 8;
+ p1[1] = v;
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+ uint8_t *p1 = (uint8_t *)p;
+
+ p1[0] = v >> 24;
+ p1[1] = v >> 16;
+ p1[2] = v >> 8;
+ p1[3] = v;
+}
+
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define cpu_to_32wu cpu_to_be32wu
+#else
+#define cpu_to_32wu cpu_to_le32wu
+#endif
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+#endif /* BSWAP_H */
diff --git a/tools/ioemu/configure b/tools/ioemu/configure
new file mode 100755
index 0000000000..f49a38ca9f
--- /dev/null
+++ b/tools/ioemu/configure
@@ -0,0 +1,583 @@
+#!/bin/sh
+#
+# qemu configure script (c) 2003 Fabrice Bellard
+#
+# set temporary file name
+if test ! -z "$TMPDIR" ; then
+ TMPDIR1="${TMPDIR}"
+elif test ! -z "$TEMPDIR" ; then
+ TMPDIR1="${TEMPDIR}"
+else
+ TMPDIR1="/tmp"
+fi
+
+TMPC="${TMPDIR1}/qemu-conf-${RANDOM}-$$-${RANDOM}.c"
+TMPO="${TMPDIR1}/qemu-conf-${RANDOM}-$$-${RANDOM}.o"
+TMPE="${TMPDIR1}/qemu-conf-${RANDOM}-$$-${RANDOM}"
+TMPS="${TMPDIR1}/qemu-conf-${RANDOM}-$$-${RANDOM}.S"
+
+# default parameters
+prefix=""
+static="no"
+libdir="lib"
+cross_prefix=""
+cc="gcc"
+host_cc="gcc"
+ar="ar"
+make="make"
+strip="strip"
+cpu=`uname -m`
+target_list="target-i386-dm"
+case "$cpu" in
+ i386|i486|i586|i686|i86pc|BePC)
+ cpu="i386"
+ ;;
+ armv4l)
+ cpu="armv4l"
+ ;;
+ alpha)
+ cpu="alpha"
+ ;;
+ "Power Macintosh"|ppc|ppc64)
+ cpu="powerpc"
+ ;;
+ mips)
+ cpu="mips"
+ ;;
+ s390)
+ cpu="s390"
+ ;;
+ sparc)
+ cpu="sparc"
+ ;;
+ sparc64)
+ cpu="sparc64"
+ ;;
+ ia64)
+ cpu="ia64"
+ ;;
+ m68k)
+ cpu="m68k"
+ ;;
+ x86_64|amd64)
+ cpu="amd64"
+ libdir="lib64"
+ ;;
+ *)
+ cpu="unknown"
+ ;;
+esac
+gprof="no"
+bigendian="no"
+mingw32="no"
+EXESUF=""
+gdbstub="no"
+slirp="no"
+adlib="no"
+oss="no"
+fmod="no"
+fmod_lib=""
+fmod_inc=""
+
+# OS specific
+targetos=`uname -s`
+case $targetos in
+MINGW32*)
+mingw32="yes"
+;;
+FreeBSD)
+bsd="yes"
+oss="yes"
+;;
+NetBSD)
+bsd="yes"
+oss="yes"
+;;
+OpenBSD)
+bsd="yes"
+oss="yes"
+;;
+Darwin)
+bsd="yes"
+darwin="yes"
+;;
+*)
+oss="yes"
+;;
+esac
+
+if [ "$bsd" = "yes" ] ; then
+ if [ ! "$darwin" = "yes" ] ; then
+ make="gmake"
+ fi
+ target_list="i386-softmmu ppc-softmmu sparc-softmmu"
+fi
+
+# find source path
+# XXX: we assume an absolute path is given when launching configure,
+# except in './configure' case.
+source_path=${0%configure}
+source_path=${source_path%/}
+source_path_used="yes"
+if test -z "$source_path" -o "$source_path" = "." ; then
+ source_path=`pwd`
+ source_path_used="no"
+fi
+
+for opt do
+ case "$opt" in
+ --prefix=*) prefix=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --interp-prefix=*) interp_prefix=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --source-path=*) source_path=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --cross-prefix=*) cross_prefix=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --cc=*) cc=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --make=*) make=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --extra-cflags=*) CFLAGS="${opt#--extra-cflags=}"
+ ;;
+ --extra-ldflags=*) LDFLAGS="${opt#--extra-ldflags=}"
+ ;;
+ --extra-libs=*) extralibs=${opt#--extra-libs=}
+ ;;
+ --cpu=*) cpu=`echo $opt | cut -d '=' -f 2`
+ ;;
+ --target-list=*) target_list=${opt#--target-list=}
+ ;;
+ --enable-gprof) gprof="yes"
+ ;;
+ --static) static="yes"
+ ;;
+ --disable-sdl) sdl="no"
+ ;;
+ --enable-fmod) fmod="yes"
+ ;;
+ --fmod-lib=*) fmod_lib=${opt#--fmod-lib=}
+ ;;
+ --fmod-inc=*) fmod_inc=${opt#--fmod-inc=}
+ ;;
+ --disable-vnc) vnc="no"
+ ;;
+ --enable-mingw32) mingw32="yes" ; cross_prefix="i386-mingw32-"
+ ;;
+ --disable-slirp) slirp="no"
+ ;;
+ --enable-adlib) adlib="yes"
+ ;;
+ esac
+done
+
+# Checking for CFLAGS
+if test -z "$CFLAGS"; then
+ CFLAGS="-O2"
+fi
+
+cc="${cross_prefix}${cc}"
+ar="${cross_prefix}${ar}"
+strip="${cross_prefix}${strip}"
+
+if test "$mingw32" = "yes" ; then
+ target_list="i386-softmmu ppc-softmmu sparc-softmmu"
+ EXESUF=".exe"
+ gdbstub="no"
+ oss="no"
+fi
+
+if test -z "$cross_prefix" ; then
+
+# ---
+# big/little endian test
+cat > $TMPC << EOF
+#include <inttypes.h>
+int main(int argc, char ** argv){
+ volatile uint32_t i=0x01234567;
+ return (*((uint8_t*)(&i))) == 0x67;
+}
+EOF
+
+if $cc -o $TMPE $TMPC 2>/dev/null ; then
+$TMPE && bigendian="yes"
+else
+echo big/little test failed
+fi
+
+else
+
+# if cross compiling, cannot launch a program, so make a static guess
+if test "$cpu" = "powerpc" -o "$cpu" = "mips" -o "$cpu" = "s390" -o "$cpu" = "sparc" -o "$cpu" = "sparc64" -o "$cpu" = "m68k"; then
+ bigendian="yes"
+fi
+
+fi
+
+# check gcc options support
+cat > $TMPC <<EOF
+int main(void) {
+}
+EOF
+
+have_gcc3_options="no"
+if $cc -fno-reorder-blocks -fno-optimize-sibling-calls -o $TMPO $TMPC 2> /dev/null ; then
+ have_gcc3_options="yes"
+fi
+
+##########################################
+# VNC probe
+
+if test -z "$vnc"; then
+
+if libvncserver-config --version >& /dev/null; then
+ vnc=yes
+else
+ vnc=no
+fi
+
+fi
+
+##########################################
+# SDL probe
+
+sdl_too_old=no
+
+if test -z "$sdl" ; then
+
+sdl_config="sdl-config"
+sdl=no
+sdl_static=no
+
+if test "$mingw32" = "yes" -a ! -z "$cross_prefix" ; then
+# win32 cross compilation case
+ sdl_config="i386-mingw32msvc-sdl-config"
+ sdl=yes
+else
+# normal SDL probe
+cat > $TMPC << EOF
+#include <SDL.h>
+#undef main /* We don't want SDL to override our main() */
+int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
+EOF
+
+if $cc -o $TMPE `$sdl_config --cflags 2> /dev/null` $TMPC `$sdl_config --libs 2> /dev/null` 2> /dev/null ; then
+_sdlversion=`$sdl_config --version | sed 's/[^0-9]//g'`
+if test "$_sdlversion" -lt 121 ; then
+sdl_too_old=yes
+else
+sdl=yes
+fi
+
+# static link with sdl ?
+if test "$sdl" = "yes" ; then
+aa="no"
+`$sdl_config --static-libs | grep \\\-laa > /dev/null` && aa="yes"
+sdl_static_libs=`$sdl_config --static-libs`
+if [ "$aa" = "yes" ] ; then
+ sdl_static_libs="$sdl_static_libs `aalib-config --static-libs`"
+fi
+
+if $cc -o $TMPE `$sdl_config --cflags 2> /dev/null` $TMPC $sdl_static_libs 2> /dev/null; then
+ sdl_static=yes
+fi
+
+fi # static link
+
+fi # sdl compile test
+
+fi # cross compilation
+fi # -z $sdl
+
+if test x"$1" = x"-h" -o x"$1" = x"--help" ; then
+cat << EOF
+
+Usage: configure [options]
+Options: [defaults in brackets after descriptions]
+
+EOF
+echo "Standard options:"
+echo " --help print this message"
+echo " --prefix=PREFIX install in PREFIX [$prefix]"
+echo " --interp-prefix=PREFIX where to find shared libraries, etc."
+echo " use %M for cpu name [$interp_prefix]"
+echo " --target-list=LIST set target list [$target_list]"
+echo " --disable-vnc disable vnc support (else configure checks"
+echo " for libvncserver-config in your PATH)"
+echo ""
+echo "Advanced options (experts only):"
+echo " --source-path=PATH path of source code [$source_path]"
+echo " --cross-prefix=PREFIX use PREFIX for compile tools [$cross_prefix]"
+echo " --cc=CC use C compiler CC [$cc]"
+echo " --make=MAKE use specified make [$make]"
+echo " --static enable static build [$static]"
+echo " --enable-mingw32 enable Win32 cross compilation with mingw32"
+echo " --enable-fmod enable FMOD audio output driver"
+echo " --fmod-lib path to FMOD library"
+echo " --fmod-inc path to FMOD includes"
+echo ""
+echo "NOTE: The object files are build at the place where configure is launched"
+exit 1
+fi
+
+#installroot=$source_path/../../dist/install
+installroot=
+
+if test "$mingw32" = "yes" ; then
+if test -z "$prefix" ; then
+ prefix="/c/Program Files/Qemu"
+fi
+
+mandir="$prefix"
+datadir="$prefix"
+docdir="$prefix"
+bindir="$prefix"
+configdir=""
+else
+if test -z "$prefix" ; then
+ prefix="usr/local"
+fi
+mandir="$installroot/$prefix/share/man"
+datadir="$installroot/$prefix/share/xen/qemu"
+docdir="$installroot/$prefix/share/doc/qemu"
+bindir="$installroot/$prefix/bin"
+configdir="$installroot/etc/xen"
+fi
+
+echo "Install prefix $prefix"
+echo "BIOS directory $datadir"
+echo "binary directory $bindir"
+if test "$mingw32" = "no" ; then
+echo "Manual directory $mandir"
+fi
+echo "Source path $source_path"
+echo "C compiler $cc"
+echo "make $make"
+echo "host CPU $cpu"
+echo "host big endian $bigendian"
+echo "target list $target_list"
+echo "gprof enabled $gprof"
+echo "static build $static"
+echo "VNC support $vnc"
+echo "SDL support $sdl"
+echo "SDL static link $sdl_static"
+echo "mingw32 support $mingw32"
+echo "Adlib support $adlib"
+echo -n "FMOD support $fmod"
+if test $fmod = "yes"; then
+ echo -n " (lib='$fmod_lib' include='$fmod_inc')"
+fi
+echo ""
+
+if test $sdl_too_old = "yes"; then
+echo "-> Your SDL version is too old - please upgrade to have FFplay/SDL support"
+fi
+if test "$sdl_static" = "no"; then
+ echo "WARNING: cannot compile statically with SDL - qemu-fast won't have a graphical output"
+fi
+
+config_mak="config-host.mak"
+config_h="config-host.h"
+
+#echo "Creating $config_mak and $config_h"
+
+echo "# Automatically generated by configure - do not modify" > $config_mak
+echo "/* Automatically generated by configure - do not modify */" > $config_h
+
+echo "prefix=$prefix" >> $config_mak
+echo "bindir=$bindir" >> $config_mak
+echo "mandir=$mandir" >> $config_mak
+echo "datadir=$datadir" >> $config_mak
+echo "docdir=$docdir" >> $config_mak
+echo "configdir=$configdir" >> $config_mak
+echo "LIBDIR=$libdir" >> $config_mak
+echo "#define CONFIG_QEMU_SHAREDIR \"$datadir\"" >> $config_h
+echo "MAKE=$make" >> $config_mak
+echo "CC=$cc" >> $config_mak
+if test "$have_gcc3_options" = "yes" ; then
+ echo "HAVE_GCC3_OPTIONS=yes" >> $config_mak
+fi
+echo "HOST_CC=$host_cc" >> $config_mak
+echo "AR=$ar" >> $config_mak
+echo "STRIP=$strip -s -R .comment -R .note" >> $config_mak
+echo "CFLAGS=$CFLAGS" >> $config_mak
+echo "LDFLAGS=$LDFLAGS" >> $config_mak
+echo "EXESUF=$EXESUF" >> $config_mak
+
+if test "$bigendian" = "yes" ; then
+ echo "WORDS_BIGENDIAN=yes" >> $config_mak
+ echo "#define WORDS_BIGENDIAN 1" >> $config_h
+fi
+if test "$mingw32" = "yes" ; then
+ echo "CONFIG_WIN32=yes" >> $config_mak
+ echo "#define CONFIG_WIN32 1" >> $config_h
+elif test -f "/usr/include/byteswap.h" ; then
+ echo "#define HAVE_BYTESWAP_H 1" >> $config_h
+fi
+if test "$darwin" = "yes" ; then
+ echo "CONFIG_DARWIN=yes" >> $config_mak
+ echo "#define CONFIG_DARWIN 1" >> $config_h
+fi
+if test "$gdbstub" = "yes" ; then
+ echo "CONFIG_GDBSTUB=yes" >> $config_mak
+ echo "#define CONFIG_GDBSTUB 1" >> $config_h
+fi
+if test "$gprof" = "yes" ; then
+ echo "TARGET_GPROF=yes" >> $config_mak
+ echo "#define HAVE_GPROF 1" >> $config_h
+fi
+if test "$static" = "yes" ; then
+ echo "CONFIG_STATIC=yes" >> $config_mak
+ echo "#define CONFIG_STATIC 1" >> $config_h
+fi
+if test "$slirp" = "yes" ; then
+ echo "CONFIG_SLIRP=yes" >> $config_mak
+ echo "#define CONFIG_SLIRP 1" >> $config_h
+fi
+if test "$adlib" = "yes" ; then
+ echo "CONFIG_ADLIB=yes" >> $config_mak
+ echo "#define CONFIG_ADLIB 1" >> $config_h
+fi
+if test "$oss" = "yes" ; then
+ echo "CONFIG_OSS=yes" >> $config_mak
+ echo "#define CONFIG_OSS 1" >> $config_h
+fi
+if test "$fmod" = "yes" ; then
+ echo "CONFIG_FMOD=yes" >> $config_mak
+ echo "CONFIG_FMOD_LIB=$fmod_lib" >> $config_mak
+ echo "CONFIG_FMOD_INC=$fmod_inc" >> $config_mak
+ echo "#define CONFIG_FMOD 1" >> $config_h
+fi
+echo -n "VERSION=" >>$config_mak
+head $source_path/VERSION >>$config_mak
+echo "" >>$config_mak
+echo -n "#define QEMU_VERSION \"" >> $config_h
+head $source_path/VERSION >> $config_h
+echo "\"" >> $config_h
+
+echo "SRC_PATH=$source_path" >> $config_mak
+echo "TARGET_DIRS=$target_list" >> $config_mak
+
+# XXX: suppress that
+if [ "$bsd" = "yes" ] ; then
+ echo "#define O_LARGEFILE 0" >> $config_h
+ echo "#define MAP_ANONYMOUS MAP_ANON" >> $config_h
+ echo "#define _BSD 1" >> $config_h
+fi
+
+if test "$vnc" = "yes"; then
+ echo "CONFIG_VNC=yes" >> $config_mak
+ vnc_cflags=`libvncserver-config --cflags`
+ if [ -z $vnc_cflags ]; then
+ vnc_cflags="/usr/include"
+ fi
+ echo "VNC_CFLAGS=$vnc_cflags" >> $config_mak
+fi
+
+if test "$sdl" = "yes"; then
+ echo "CONFIG_SDL=yes" >> $config_mak
+ echo "SDL_CFLAGS=`$sdl_config --cflags`" >> $config_mak
+fi
+
+for target in $target_list; do
+
+target_dir="$target"
+config_mak=$target_dir/config.mak
+config_h=$target_dir/config.h
+target_cpu=`echo $target | cut -d '-' -f 2`
+[ "$target_cpu" = "ppc" ] && target_bigendian=yes
+target_softmmu="no"
+if expr $target : '.*-softmmu' > /dev/null ; then
+ target_softmmu="yes"
+fi
+target_user_only="no"
+if expr $target : '.*-user' > /dev/null ; then
+ target_user_only="yes"
+fi
+#echo "Creating $config_mak, $config_h and $target_dir/Makefile"
+
+mkdir -p $target_dir
+if test "$target" = "arm-user" ; then
+ mkdir -p $target_dir/nwfpe
+fi
+if test "$target_user_only" = "no" ; then
+ mkdir -p $target_dir/slirp
+fi
+
+#ln -sf $source_path/Makefile.target $target_dir/Makefile
+
+echo "# Automatically generated by configure - do not modify" > $config_mak
+echo "/* Automatically generated by configure - do not modify */" > $config_h
+
+
+echo "include ../config-host.mak" >> $config_mak
+echo "#include \"../config-host.h\"" >> $config_h
+
+echo "TARGET_ARCH=i386" >> $config_mak
+echo "#define TARGET_ARCH \"i386\"" >> $config_h
+echo "#define TARGET_I386 1" >> $config_h
+
+interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
+echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
+
+if test "$target_bigendian" = "yes" ; then
+ echo "TARGET_WORDS_BIGENDIAN=yes" >> $config_mak
+ echo "#define TARGET_WORDS_BIGENDIAN 1" >> $config_h
+fi
+if test "$target_softmmu" = "yes" ; then
+ echo "CONFIG_SOFTMMU=yes" >> $config_mak
+ echo "#define CONFIG_SOFTMMU 1" >> $config_h
+fi
+if test "$target_user_only" = "yes" ; then
+ echo "CONFIG_USER_ONLY=yes" >> $config_mak
+ echo "#define CONFIG_USER_ONLY 1" >> $config_h
+fi
+
+if test "$target_user_only" = "no"; then
+ if test "$vnc" = "yes"; then
+ echo "#define CONFIG_VNC 1" >> $config_h
+ echo "CONFIG_VNC=yes" >> $config_mak
+ echo "VNC_CFLAGS=`libvncserver-config --cflags`" >> $config_mak
+ echo "VNC_LIBS=`libvncserver-config --libs`" >> $config_mak
+ fi
+fi
+
+# sdl defines
+
+if test "$sdl" = "yes" -a "$target_user_only" = "no"; then
+ if test "$target_softmmu" = "no" -o "$static" = "yes"; then
+ sdl1=$sdl_static
+ else
+ sdl1=$sdl
+ fi
+ if test "$sdl1" = "yes" ; then
+ echo "#define CONFIG_SDL 1" >> $config_h
+ echo "CONFIG_SDL=yes" >> $config_mak
+ if test "$target_softmmu" = "no" -o "$static" = "yes"; then
+ echo "SDL_LIBS=$sdl_static_libs" >> $config_mak
+ else
+ echo "SDL_LIBS=`$sdl_config --libs`" >> $config_mak
+ fi
+ echo -n "SDL_CFLAGS=`$sdl_config --cflags`" >> $config_mak
+ if [ "${aa}" = "yes" ] ; then
+ echo -n " `aalib-config --cflags`" >> $config_mak ;
+ fi
+ echo "" >> $config_mak
+ fi
+fi
+
+done # for target in $targets
+
+# build tree in object directory if source path is different from current one
+if test "$source_path_used" = "yes" ; then
+ DIRS="tests"
+ FILES="Makefile tests/Makefile"
+ for dir in $DIRS ; do
+ mkdir -p $dir
+ done
+ for f in $FILES ; do
+ ln -sf $source_path/$f $f
+ done
+fi
+
+rm -f $TMPO $TMPC $TMPE $TMPS
diff --git a/tools/ioemu/console.c b/tools/ioemu/console.c
new file mode 100644
index 0000000000..d7588016db
--- /dev/null
+++ b/tools/ioemu/console.c
@@ -0,0 +1,731 @@
+/*
+ * QEMU graphical console
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define DEFAULT_BACKSCROLL 512
+#define MAX_CONSOLES 12
+
+#define RGBA(r, g, b, a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+#define RGB(r, g, b) RGBA(r, g, b, 0xff)
+
+typedef struct TextCell {
+ uint8_t ch;
+ uint8_t bgcol:4;
+ uint8_t fgcol:4;
+} TextCell;
+
+#define MAX_ESC_PARAMS 3
+
+enum TTYState {
+ TTY_STATE_NORM,
+ TTY_STATE_ESC,
+ TTY_STATE_CSI,
+};
+
+struct TextConsole {
+ int text_console; /* true if text console */
+ DisplayState *ds;
+ int g_width, g_height;
+ int width;
+ int height;
+ int total_height;
+ int backscroll_height;
+ int fgcol;
+ int bgcol;
+ int x, y;
+ int y_displayed;
+ int y_base;
+ TextCell *cells;
+
+ enum TTYState state;
+ int esc_params[MAX_ESC_PARAMS];
+ int nb_esc_params;
+
+ /* kbd read handler */
+ IOReadHandler *fd_read;
+ void *fd_opaque;
+};
+
+static TextConsole *active_console;
+static TextConsole *consoles[MAX_CONSOLES];
+static int nb_consoles = 0;
+
+/* convert a RGBA color to a color index usable in graphic primitives */
+static unsigned int vga_get_color(DisplayState *ds, unsigned int rgba)
+{
+ unsigned int r, g, b, color;
+
+ switch(ds->depth) {
+#if 0
+ case 8:
+ r = (rgba >> 16) & 0xff;
+ g = (rgba >> 8) & 0xff;
+ b = (rgba) & 0xff;
+ color = (rgb_to_index[r] * 6 * 6) +
+ (rgb_to_index[g] * 6) +
+ (rgb_to_index[b]);
+ break;
+#endif
+ case 15:
+ r = (rgba >> 16) & 0xff;
+ g = (rgba >> 8) & 0xff;
+ b = (rgba) & 0xff;
+ color = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3);
+ break;
+ case 16:
+ r = (rgba >> 16) & 0xff;
+ g = (rgba >> 8) & 0xff;
+ b = (rgba) & 0xff;
+ color = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+ break;
+ case 32:
+ default:
+ color = rgba;
+ break;
+ }
+ return color;
+}
+
+static void vga_fill_rect (DisplayState *ds,
+ int posx, int posy, int width, int height, uint32_t color)
+{
+ uint8_t *d, *d1;
+ int x, y, bpp;
+
+ bpp = (ds->depth + 7) >> 3;
+ d1 = ds->data +
+ ds->linesize * posy + bpp * posx;
+ for (y = 0; y < height; y++) {
+ d = d1;
+ switch(bpp) {
+ case 1:
+ for (x = 0; x < width; x++) {
+ *((uint8_t *)d) = color;
+ d++;
+ }
+ break;
+ case 2:
+ for (x = 0; x < width; x++) {
+ *((uint16_t *)d) = color;
+ d += 2;
+ }
+ break;
+ case 4:
+ for (x = 0; x < width; x++) {
+ *((uint32_t *)d) = color;
+ d += 4;
+ }
+ break;
+ }
+ d1 += ds->linesize;
+ }
+}
+
+/* copy from (xs, ys) to (xd, yd) a rectangle of size (w, h) */
+static void vga_bitblt(DisplayState *ds, int xs, int ys, int xd, int yd, int w, int h)
+{
+ const uint8_t *s;
+ uint8_t *d;
+ int wb, y, bpp;
+
+ bpp = (ds->depth + 7) >> 3;
+ wb = w * bpp;
+ if (yd <= ys) {
+ s = ds->data +
+ ds->linesize * ys + bpp * xs;
+ d = ds->data +
+ ds->linesize * yd + bpp * xd;
+ for (y = 0; y < h; y++) {
+ memmove(d, s, wb);
+ d += ds->linesize;
+ s += ds->linesize;
+ }
+ } else {
+ s = ds->data +
+ ds->linesize * (ys + h - 1) + bpp * xs;
+ d = ds->data +
+ ds->linesize * (yd + h - 1) + bpp * xd;
+ for (y = 0; y < h; y++) {
+ memmove(d, s, wb);
+ d -= ds->linesize;
+ s -= ds->linesize;
+ }
+ }
+}
+
+/***********************************************************/
+/* basic char display */
+
+#define FONT_HEIGHT 16
+#define FONT_WIDTH 8
+
+#include "vgafont.h"
+
+#define cbswap_32(__x) \
+((uint32_t)( \
+ (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+ (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \
+ (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \
+ (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) ))
+
+#ifdef WORDS_BIGENDIAN
+#define PAT(x) x
+#else
+#define PAT(x) cbswap_32(x)
+#endif
+
+static const uint32_t dmask16[16] = {
+ PAT(0x00000000),
+ PAT(0x000000ff),
+ PAT(0x0000ff00),
+ PAT(0x0000ffff),
+ PAT(0x00ff0000),
+ PAT(0x00ff00ff),
+ PAT(0x00ffff00),
+ PAT(0x00ffffff),
+ PAT(0xff000000),
+ PAT(0xff0000ff),
+ PAT(0xff00ff00),
+ PAT(0xff00ffff),
+ PAT(0xffff0000),
+ PAT(0xffff00ff),
+ PAT(0xffffff00),
+ PAT(0xffffffff),
+};
+
+static const uint32_t dmask4[4] = {
+ PAT(0x00000000),
+ PAT(0x0000ffff),
+ PAT(0xffff0000),
+ PAT(0xffffffff),
+};
+
+static uint32_t color_table[8];
+
+static const uint32_t color_table_rgb[8] = {
+ RGB(0x00, 0x00, 0x00),
+ RGB(0xff, 0x00, 0x00),
+ RGB(0x00, 0xff, 0x00),
+ RGB(0xff, 0xff, 0x00),
+ RGB(0x00, 0x00, 0xff),
+ RGB(0xff, 0x00, 0xff),
+ RGB(0x00, 0xff, 0xff),
+ RGB(0xff, 0xff, 0xff),
+};
+
+static inline unsigned int col_expand(DisplayState *ds, unsigned int col)
+{
+ switch(ds->depth) {
+ case 8:
+ col |= col << 8;
+ col |= col << 16;
+ break;
+ case 15:
+ case 16:
+ col |= col << 16;
+ break;
+ default:
+ break;
+ }
+
+ return col;
+}
+
+static void vga_putcharxy(DisplayState *ds, int x, int y, int ch,
+ unsigned int fgcol, unsigned int bgcol)
+{
+ uint8_t *d;
+ const uint8_t *font_ptr;
+ unsigned int font_data, linesize, xorcol, bpp;
+ int i;
+
+ bpp = (ds->depth + 7) >> 3;
+ d = ds->data +
+ ds->linesize * y * FONT_HEIGHT + bpp * x * FONT_WIDTH;
+ linesize = ds->linesize;
+ font_ptr = vgafont16 + FONT_HEIGHT * ch;
+ xorcol = bgcol ^ fgcol;
+ switch(ds->depth) {
+ case 8:
+ for(i = 0; i < FONT_HEIGHT; i++) {
+ font_data = *font_ptr++;
+ ((uint32_t *)d)[0] = (dmask16[(font_data >> 4)] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (dmask16[(font_data >> 0) & 0xf] & xorcol) ^ bgcol;
+ d += linesize;
+ }
+ break;
+ case 16:
+ case 15:
+ for(i = 0; i < FONT_HEIGHT; i++) {
+ font_data = *font_ptr++;
+ ((uint32_t *)d)[0] = (dmask4[(font_data >> 6)] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (dmask4[(font_data >> 4) & 3] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[2] = (dmask4[(font_data >> 2) & 3] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[3] = (dmask4[(font_data >> 0) & 3] & xorcol) ^ bgcol;
+ d += linesize;
+ }
+ break;
+ case 32:
+ for(i = 0; i < FONT_HEIGHT; i++) {
+ font_data = *font_ptr++;
+ ((uint32_t *)d)[0] = (-((font_data >> 7)) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (-((font_data >> 6) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[2] = (-((font_data >> 5) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[3] = (-((font_data >> 4) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[4] = (-((font_data >> 3) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[5] = (-((font_data >> 2) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[6] = (-((font_data >> 1) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[7] = (-((font_data >> 0) & 1) & xorcol) ^ bgcol;
+ d += linesize;
+ }
+ break;
+ }
+}
+
+static void text_console_resize(TextConsole *s)
+{
+ TextCell *cells, *c, *c1;
+ int w1, x, y, last_width;
+
+ last_width = s->width;
+ s->width = s->g_width / FONT_WIDTH;
+ s->height = s->g_height / FONT_HEIGHT;
+
+ w1 = last_width;
+ if (s->width < w1)
+ w1 = s->width;
+
+ cells = qemu_malloc(s->width * s->total_height * sizeof(TextCell));
+ for(y = 0; y < s->total_height; y++) {
+ c = &cells[y * s->width];
+ if (w1 > 0) {
+ c1 = &s->cells[y * last_width];
+ for(x = 0; x < w1; x++) {
+ *c++ = *c1++;
+ }
+ }
+ for(x = w1; x < s->width; x++) {
+ c->ch = ' ';
+ c->fgcol = 7;
+ c->bgcol = 0;
+ c++;
+ }
+ }
+ qemu_free(s->cells);
+ s->cells = cells;
+}
+
+static void update_xy(TextConsole *s, int x, int y)
+{
+ TextCell *c;
+ int y1, y2;
+
+ if (s == active_console) {
+ y1 = (s->y_base + y) % s->total_height;
+ y2 = y1 - s->y_displayed;
+ if (y2 < 0)
+ y2 += s->total_height;
+ if (y2 < s->height) {
+ c = &s->cells[y1 * s->width + x];
+ vga_putcharxy(s->ds, x, y2, c->ch,
+ color_table[c->fgcol], color_table[c->bgcol]);
+ dpy_update(s->ds, x * FONT_WIDTH, y2 * FONT_HEIGHT,
+ FONT_WIDTH, FONT_HEIGHT);
+ }
+ }
+}
+
+static void console_show_cursor(TextConsole *s, int show)
+{
+ TextCell *c;
+ int y, y1;
+
+ if (s == active_console) {
+ y1 = (s->y_base + s->y) % s->total_height;
+ y = y1 - s->y_displayed;
+ if (y < 0)
+ y += s->total_height;
+ if (y < s->height) {
+ c = &s->cells[y1 * s->width + s->x];
+ if (show) {
+ vga_putcharxy(s->ds, s->x, y, c->ch,
+ color_table[0], color_table[7]);
+ } else {
+ vga_putcharxy(s->ds, s->x, y, c->ch,
+ color_table[c->fgcol], color_table[c->bgcol]);
+ }
+ dpy_update(s->ds, s->x * FONT_WIDTH, y * FONT_HEIGHT,
+ FONT_WIDTH, FONT_HEIGHT);
+ }
+ }
+}
+
+static void console_refresh(TextConsole *s)
+{
+ TextCell *c;
+ int x, y, y1;
+
+ if (s != active_console)
+ return;
+
+ vga_fill_rect(s->ds, 0, 0, s->ds->width, s->ds->height,
+ color_table[0]);
+ y1 = s->y_displayed;
+ for(y = 0; y < s->height; y++) {
+ c = s->cells + y1 * s->width;
+ for(x = 0; x < s->width; x++) {
+ vga_putcharxy(s->ds, x, y, c->ch,
+ color_table[c->fgcol], color_table[c->bgcol]);
+ c++;
+ }
+ if (++y1 == s->total_height)
+ y1 = 0;
+ }
+ dpy_update(s->ds, 0, 0, s->ds->width, s->ds->height);
+ console_show_cursor(s, 1);
+}
+
+static void console_scroll(int ydelta)
+{
+ TextConsole *s;
+ int i, y1;
+
+ s = active_console;
+ if (!s || !s->text_console)
+ return;
+
+ if (ydelta > 0) {
+ for(i = 0; i < ydelta; i++) {
+ if (s->y_displayed == s->y_base)
+ break;
+ if (++s->y_displayed == s->total_height)
+ s->y_displayed = 0;
+ }
+ } else {
+ ydelta = -ydelta;
+ i = s->backscroll_height;
+ if (i > s->total_height - s->height)
+ i = s->total_height - s->height;
+ y1 = s->y_base - i;
+ if (y1 < 0)
+ y1 += s->total_height;
+ for(i = 0; i < ydelta; i++) {
+ if (s->y_displayed == y1)
+ break;
+ if (--s->y_displayed < 0)
+ s->y_displayed = s->total_height - 1;
+ }
+ }
+ console_refresh(s);
+}
+
+static void console_put_lf(TextConsole *s)
+{
+ TextCell *c;
+ int x, y1;
+
+ s->x = 0;
+ s->y++;
+ if (s->y >= s->height) {
+ s->y = s->height - 1;
+
+ if (s->y_displayed == s->y_base) {
+ if (++s->y_displayed == s->total_height)
+ s->y_displayed = 0;
+ }
+ if (++s->y_base == s->total_height)
+ s->y_base = 0;
+ if (s->backscroll_height < s->total_height)
+ s->backscroll_height++;
+ y1 = (s->y_base + s->height - 1) % s->total_height;
+ c = &s->cells[y1 * s->width];
+ for(x = 0; x < s->width; x++) {
+ c->ch = ' ';
+ c->fgcol = s->fgcol;
+ c->bgcol = s->bgcol;
+ c++;
+ }
+ if (s == active_console && s->y_displayed == s->y_base) {
+ vga_bitblt(s->ds, 0, FONT_HEIGHT, 0, 0,
+ s->width * FONT_WIDTH,
+ (s->height - 1) * FONT_HEIGHT);
+ vga_fill_rect(s->ds, 0, (s->height - 1) * FONT_HEIGHT,
+ s->width * FONT_WIDTH, FONT_HEIGHT,
+ color_table[s->bgcol]);
+ dpy_update(s->ds, 0, 0,
+ s->width * FONT_WIDTH, s->height * FONT_HEIGHT);
+ }
+ }
+}
+
+static void console_putchar(TextConsole *s, int ch)
+{
+ TextCell *c;
+ int y1, i, x;
+
+ switch(s->state) {
+ case TTY_STATE_NORM:
+ switch(ch) {
+ case '\r':
+ s->x = 0;
+ break;
+ case '\n':
+ console_put_lf(s);
+ break;
+ case 27:
+ s->state = TTY_STATE_ESC;
+ break;
+ default:
+ y1 = (s->y_base + s->y) % s->total_height;
+ c = &s->cells[y1 * s->width + s->x];
+ c->ch = ch;
+ c->fgcol = s->fgcol;
+ c->bgcol = s->bgcol;
+ update_xy(s, s->x, s->y);
+ s->x++;
+ if (s->x >= s->width)
+ console_put_lf(s);
+ break;
+ }
+ break;
+ case TTY_STATE_ESC:
+ if (ch == '[') {
+ for(i=0;i<MAX_ESC_PARAMS;i++)
+ s->esc_params[i] = 0;
+ s->nb_esc_params = 0;
+ s->state = TTY_STATE_CSI;
+ } else {
+ s->state = TTY_STATE_NORM;
+ }
+ break;
+ case TTY_STATE_CSI:
+ if (ch >= '0' && ch <= '9') {
+ if (s->nb_esc_params < MAX_ESC_PARAMS) {
+ s->esc_params[s->nb_esc_params] =
+ s->esc_params[s->nb_esc_params] * 10 + ch - '0';
+ }
+ } else {
+ s->nb_esc_params++;
+ if (ch == ';')
+ break;
+ s->state = TTY_STATE_NORM;
+ switch(ch) {
+ case 'D':
+ if (s->x > 0)
+ s->x--;
+ break;
+ case 'C':
+ if (s->x < (s->width - 1))
+ s->x++;
+ break;
+ case 'K':
+ /* clear to eol */
+ y1 = (s->y_base + s->y) % s->total_height;
+ for(x = s->x; x < s->width; x++) {
+ c = &s->cells[y1 * s->width + x];
+ c->ch = ' ';
+ c->fgcol = s->fgcol;
+ c->bgcol = s->bgcol;
+ c++;
+ update_xy(s, x, s->y);
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ }
+}
+
+void console_select(unsigned int index)
+{
+ TextConsole *s;
+
+ if (index >= MAX_CONSOLES)
+ return;
+ s = consoles[index];
+ if (s) {
+ active_console = s;
+ if (s->text_console) {
+ if (s->g_width != s->ds->width ||
+ s->g_height != s->ds->height) {
+ s->g_width = s->ds->width;
+ s->g_height = s->ds->height;
+ text_console_resize(s);
+ }
+ console_refresh(s);
+ }
+ }
+}
+
+static int console_puts(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ TextConsole *s = chr->opaque;
+ int i;
+
+ console_show_cursor(s, 0);
+ for(i = 0; i < len; i++) {
+ console_putchar(s, buf[i]);
+ }
+ console_show_cursor(s, 1);
+ return len;
+}
+
+static void console_chr_add_read_handler(CharDriverState *chr,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ TextConsole *s = chr->opaque;
+ s->fd_read = fd_read;
+ s->fd_opaque = opaque;
+}
+
+static void console_send_event(CharDriverState *chr, int event)
+{
+ TextConsole *s = chr->opaque;
+ int i;
+
+ if (event == CHR_EVENT_FOCUS) {
+ for(i = 0; i < nb_consoles; i++) {
+ if (consoles[i] == s) {
+ console_select(i);
+ break;
+ }
+ }
+ }
+}
+
+/* called when an ascii key is pressed */
+void kbd_put_keysym(int keysym)
+{
+ TextConsole *s;
+ uint8_t buf[16], *q;
+ int c;
+
+ s = active_console;
+ if (!s || !s->text_console)
+ return;
+
+ switch(keysym) {
+ case QEMU_KEY_CTRL_UP:
+ console_scroll(-1);
+ break;
+ case QEMU_KEY_CTRL_DOWN:
+ console_scroll(1);
+ break;
+ case QEMU_KEY_CTRL_PAGEUP:
+ console_scroll(-10);
+ break;
+ case QEMU_KEY_CTRL_PAGEDOWN:
+ console_scroll(10);
+ break;
+ default:
+ if (s->fd_read) {
+ /* convert the QEMU keysym to VT100 key string */
+ q = buf;
+ if (keysym >= 0xe100 && keysym <= 0xe11f) {
+ *q++ = '\033';
+ *q++ = '[';
+ c = keysym - 0xe100;
+ if (c >= 10)
+ *q++ = '0' + (c / 10);
+ *q++ = '0' + (c % 10);
+ *q++ = '~';
+ } else if (keysym >= 0xe120 && keysym <= 0xe17f) {
+ *q++ = '\033';
+ *q++ = '[';
+ *q++ = keysym & 0xff;
+ } else {
+ *q++ = keysym;
+ }
+ s->fd_read(s->fd_opaque, buf, q - buf);
+ }
+ break;
+ }
+}
+
+TextConsole *graphic_console_init(DisplayState *ds)
+{
+ TextConsole *s;
+
+ if (nb_consoles >= MAX_CONSOLES)
+ return NULL;
+ s = qemu_mallocz(sizeof(TextConsole));
+ if (!s) {
+ return NULL;
+ }
+ if (!active_console)
+ active_console = s;
+ s->ds = ds;
+ consoles[nb_consoles++] = s;
+ return s;
+}
+
+int is_active_console(TextConsole *s)
+{
+ return s == active_console;
+}
+
+CharDriverState *text_console_init(DisplayState *ds)
+{
+ CharDriverState *chr;
+ TextConsole *s;
+ int i;
+ static int color_inited;
+
+ chr = qemu_mallocz(sizeof(CharDriverState));
+ if (!chr)
+ return NULL;
+ s = graphic_console_init(ds);
+ if (!s) {
+ free(chr);
+ return NULL;
+ }
+ s->text_console = 1;
+ chr->opaque = s;
+ chr->chr_write = console_puts;
+ chr->chr_add_read_handler = console_chr_add_read_handler;
+ chr->chr_send_event = console_send_event;
+
+ if (!color_inited) {
+ color_inited = 1;
+ for(i = 0; i < 8; i++) {
+ color_table[i] = col_expand(s->ds,
+ vga_get_color(s->ds, color_table_rgb[i]));
+ }
+ }
+ s->y_displayed = 0;
+ s->y_base = 0;
+ s->total_height = DEFAULT_BACKSCROLL;
+ s->x = 0;
+ s->y = 0;
+ s->fgcol = 7;
+ s->bgcol = 0;
+ s->g_width = s->ds->width;
+ s->g_height = s->ds->height;
+ text_console_resize(s);
+
+ return chr;
+}
diff --git a/tools/ioemu/cpu-all.h b/tools/ioemu/cpu-all.h
new file mode 100644
index 0000000000..6e9a8b8ac5
--- /dev/null
+++ b/tools/ioemu/cpu-all.h
@@ -0,0 +1,688 @@
+/*
+ * defines common to all virtual CPUs
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef CPU_ALL_H
+#define CPU_ALL_H
+
+#if defined(__arm__) || defined(__sparc__)
+#define WORDS_ALIGNED
+#endif
+
+/* some important defines:
+ *
+ * WORDS_ALIGNED : if defined, the host cpu can only make word aligned
+ * memory accesses.
+ *
+ * WORDS_BIGENDIAN : if defined, the host cpu is big endian and
+ * otherwise little endian.
+ *
+ * (TARGET_WORDS_ALIGNED : same for target cpu (not supported yet))
+ *
+ * TARGET_WORDS_BIGENDIAN : same for target cpu
+ */
+
+#include "bswap.h"
+
+#if defined(WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
+#define BSWAP_NEEDED
+#endif
+
+#ifdef BSWAP_NEEDED
+
+static inline uint16_t tswap16(uint16_t s)
+{
+ return bswap16(s);
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+ return bswap32(s);
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+ return bswap64(s);
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#else
+
+static inline uint16_t tswap16(uint16_t s)
+{
+ return s;
+}
+
+static inline uint32_t tswap32(uint32_t s)
+{
+ return s;
+}
+
+static inline uint64_t tswap64(uint64_t s)
+{
+ return s;
+}
+
+static inline void tswap16s(uint16_t *s)
+{
+}
+
+static inline void tswap32s(uint32_t *s)
+{
+}
+
+static inline void tswap64s(uint64_t *s)
+{
+}
+
+#endif
+
+#if TARGET_LONG_SIZE == 4
+#define tswapl(s) tswap32(s)
+#define tswapls(s) tswap32s((uint32_t *)(s))
+#else
+#define tswapl(s) tswap64(s)
+#define tswapls(s) tswap64s((uint64_t *)(s))
+#endif
+
+/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
+typedef union {
+ double d;
+#if !defined(WORDS_BIGENDIAN) && !defined(__arm__)
+ struct {
+ uint32_t lower;
+ uint32_t upper;
+ } l;
+#else
+ struct {
+ uint32_t upper;
+ uint32_t lower;
+ } l;
+#endif
+ uint64_t ll;
+} CPU_DoubleU;
+
+/* CPU memory access without any memory or io remapping */
+
+/*
+ * the generic syntax for the memory accesses is:
+ *
+ * load: ld{type}{sign}{size}{endian}_{access_type}(ptr)
+ *
+ * store: st{type}{size}{endian}_{access_type}(ptr, val)
+ *
+ * type is:
+ * (empty): integer access
+ * f : float access
+ *
+ * sign is:
+ * (empty): for floats or 32 bit size
+ * u : unsigned
+ * s : signed
+ *
+ * size is:
+ * b: 8 bits
+ * w: 16 bits
+ * l: 32 bits
+ * q: 64 bits
+ *
+ * endian is:
+ * (empty): target cpu endianness or 8 bit access
+ * r : reversed target cpu endianness (not implemented yet)
+ * be : big endian (not implemented yet)
+ * le : little endian (not implemented yet)
+ *
+ * access_type is:
+ * raw : host memory access
+ * user : user mode access using soft MMU
+ * kernel : kernel mode access using soft MMU
+ */
+static inline int ldub_raw(void *ptr)
+{
+ return *(uint8_t *)ptr;
+}
+
+static inline int ldsb_raw(void *ptr)
+{
+ return *(int8_t *)ptr;
+}
+
+static inline void stb_raw(void *ptr, int v)
+{
+ *(uint8_t *)ptr = v;
+}
+
+/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the
+ kernel handles unaligned load/stores may give better results, but
+ it is a system wide setting : bad */
+#if !defined(TARGET_WORDS_BIGENDIAN) && (defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED))
+
+/* conservative code for little endian unaligned accesses */
+static inline int lduw_raw(void *ptr)
+{
+#ifdef __powerpc__
+ int val;
+ __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return val;
+#else
+ uint8_t *p = ptr;
+ return p[0] | (p[1] << 8);
+#endif
+}
+
+static inline int ldsw_raw(void *ptr)
+{
+#ifdef __powerpc__
+ int val;
+ __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return (int16_t)val;
+#else
+ uint8_t *p = ptr;
+ return (int16_t)(p[0] | (p[1] << 8));
+#endif
+}
+
+static inline int ldl_raw(void *ptr)
+{
+#ifdef __powerpc__
+ int val;
+ __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
+ return val;
+#else
+ uint8_t *p = ptr;
+ return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+#endif
+}
+
+static inline uint64_t ldq_raw(void *ptr)
+{
+ uint8_t *p = ptr;
+ uint32_t v1, v2;
+ v1 = ldl_raw(p);
+ v2 = ldl_raw(p + 4);
+ return v1 | ((uint64_t)v2 << 32);
+}
+
+static inline void stw_raw(void *ptr, int v)
+{
+#ifdef __powerpc__
+ __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
+#else
+ uint8_t *p = ptr;
+ p[0] = v;
+ p[1] = v >> 8;
+#endif
+}
+
+static inline void stl_raw(void *ptr, int v)
+{
+#ifdef __powerpc__
+ __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
+#else
+ uint8_t *p = ptr;
+ p[0] = v;
+ p[1] = v >> 8;
+ p[2] = v >> 16;
+ p[3] = v >> 24;
+#endif
+}
+
+static inline void stq_raw(void *ptr, uint64_t v)
+{
+ uint8_t *p = ptr;
+ stl_raw(p, (uint32_t)v);
+ stl_raw(p + 4, v >> 32);
+}
+
+/* float access */
+
+static inline float ldfl_raw(void *ptr)
+{
+ union {
+ float f;
+ uint32_t i;
+ } u;
+ u.i = ldl_raw(ptr);
+ return u.f;
+}
+
+static inline void stfl_raw(void *ptr, float v)
+{
+ union {
+ float f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ stl_raw(ptr, u.i);
+}
+
+static inline double ldfq_raw(void *ptr)
+{
+ CPU_DoubleU u;
+ u.l.lower = ldl_raw(ptr);
+ u.l.upper = ldl_raw(ptr + 4);
+ return u.d;
+}
+
+static inline void stfq_raw(void *ptr, double v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stl_raw(ptr, u.l.lower);
+ stl_raw(ptr + 4, u.l.upper);
+}
+
+#elif defined(TARGET_WORDS_BIGENDIAN) && (!defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED))
+
+static inline int lduw_raw(void *ptr)
+{
+#if defined(__i386__)
+ int val;
+ asm volatile ("movzwl %1, %0\n"
+ "xchgb %b0, %h0\n"
+ : "=q" (val)
+ : "m" (*(uint16_t *)ptr));
+ return val;
+#else
+ uint8_t *b = (uint8_t *) ptr;
+ return ((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldsw_raw(void *ptr)
+{
+#if defined(__i386__)
+ int val;
+ asm volatile ("movzwl %1, %0\n"
+ "xchgb %b0, %h0\n"
+ : "=q" (val)
+ : "m" (*(uint16_t *)ptr));
+ return (int16_t)val;
+#else
+ uint8_t *b = (uint8_t *) ptr;
+ return (int16_t)((b[0] << 8) | b[1]);
+#endif
+}
+
+static inline int ldl_raw(void *ptr)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ int val;
+ asm volatile ("movl %1, %0\n"
+ "bswap %0\n"
+ : "=r" (val)
+ : "m" (*(uint32_t *)ptr));
+ return val;
+#else
+ uint8_t *b = (uint8_t *) ptr;
+ return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+#endif
+}
+
+static inline uint64_t ldq_raw(void *ptr)
+{
+ uint32_t a,b;
+ a = ldl_raw(ptr);
+ b = ldl_raw(ptr+4);
+ return (((uint64_t)a<<32)|b);
+}
+
+static inline void stw_raw(void *ptr, int v)
+{
+#if defined(__i386__)
+ asm volatile ("xchgb %b0, %h0\n"
+ "movw %w0, %1\n"
+ : "=q" (v)
+ : "m" (*(uint16_t *)ptr), "0" (v));
+#else
+ uint8_t *d = (uint8_t *) ptr;
+ d[0] = v >> 8;
+ d[1] = v;
+#endif
+}
+
+static inline void stl_raw(void *ptr, int v)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ asm volatile ("bswap %0\n"
+ "movl %0, %1\n"
+ : "=r" (v)
+ : "m" (*(uint32_t *)ptr), "0" (v));
+#else
+ uint8_t *d = (uint8_t *) ptr;
+ d[0] = v >> 24;
+ d[1] = v >> 16;
+ d[2] = v >> 8;
+ d[3] = v;
+#endif
+}
+
+static inline void stq_raw(void *ptr, uint64_t v)
+{
+ stl_raw(ptr, v >> 32);
+ stl_raw(ptr + 4, v);
+}
+
+/* float access */
+
+static inline float ldfl_raw(void *ptr)
+{
+ union {
+ float f;
+ uint32_t i;
+ } u;
+ u.i = ldl_raw(ptr);
+ return u.f;
+}
+
+static inline void stfl_raw(void *ptr, float v)
+{
+ union {
+ float f;
+ uint32_t i;
+ } u;
+ u.f = v;
+ stl_raw(ptr, u.i);
+}
+
+static inline double ldfq_raw(void *ptr)
+{
+ CPU_DoubleU u;
+ u.l.upper = ldl_raw(ptr);
+ u.l.lower = ldl_raw(ptr + 4);
+ return u.d;
+}
+
+static inline void stfq_raw(void *ptr, double v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stl_raw(ptr, u.l.upper);
+ stl_raw(ptr + 4, u.l.lower);
+}
+
+#else
+
+static inline int lduw_raw(void *ptr)
+{
+ return *(uint16_t *)ptr;
+}
+
+static inline int ldsw_raw(void *ptr)
+{
+ return *(int16_t *)ptr;
+}
+
+static inline int ldl_raw(void *ptr)
+{
+ return *(uint32_t *)ptr;
+}
+
+static inline uint64_t ldq_raw(void *ptr)
+{
+ return *(uint64_t *)ptr;
+}
+
+static inline void stw_raw(void *ptr, int v)
+{
+ *(uint16_t *)ptr = v;
+}
+
+static inline void stl_raw(void *ptr, int v)
+{
+ *(uint32_t *)ptr = v;
+}
+
+static inline void stq_raw(void *ptr, uint64_t v)
+{
+ *(uint64_t *)ptr = v;
+}
+
+/* float access */
+
+static inline float ldfl_raw(void *ptr)
+{
+ return *(float *)ptr;
+}
+
+static inline double ldfq_raw(void *ptr)
+{
+ return *(double *)ptr;
+}
+
+static inline void stfl_raw(void *ptr, float v)
+{
+ *(float *)ptr = v;
+}
+
+static inline void stfq_raw(void *ptr, double v)
+{
+ *(double *)ptr = v;
+}
+#endif
+
+/* MMU memory access macros */
+
+#if defined(CONFIG_USER_ONLY)
+
+/* if user mode, no other memory access functions */
+#define ldub(p) ldub_raw(p)
+#define ldsb(p) ldsb_raw(p)
+#define lduw(p) lduw_raw(p)
+#define ldsw(p) ldsw_raw(p)
+#define ldl(p) ldl_raw(p)
+#define ldq(p) ldq_raw(p)
+#define ldfl(p) ldfl_raw(p)
+#define ldfq(p) ldfq_raw(p)
+#define stb(p, v) stb_raw(p, v)
+#define stw(p, v) stw_raw(p, v)
+#define stl(p, v) stl_raw(p, v)
+#define stq(p, v) stq_raw(p, v)
+#define stfl(p, v) stfl_raw(p, v)
+#define stfq(p, v) stfq_raw(p, v)
+
+#define ldub_code(p) ldub_raw(p)
+#define ldsb_code(p) ldsb_raw(p)
+#define lduw_code(p) lduw_raw(p)
+#define ldsw_code(p) ldsw_raw(p)
+#define ldl_code(p) ldl_raw(p)
+
+#define ldub_kernel(p) ldub_raw(p)
+#define ldsb_kernel(p) ldsb_raw(p)
+#define lduw_kernel(p) lduw_raw(p)
+#define ldsw_kernel(p) ldsw_raw(p)
+#define ldl_kernel(p) ldl_raw(p)
+#define ldfl_kernel(p) ldfl_raw(p)
+#define ldfq_kernel(p) ldfq_raw(p)
+#define stb_kernel(p, v) stb_raw(p, v)
+#define stw_kernel(p, v) stw_raw(p, v)
+#define stl_kernel(p, v) stl_raw(p, v)
+#define stq_kernel(p, v) stq_raw(p, v)
+#define stfl_kernel(p, v) stfl_raw(p, v)
+#define stfq_kernel(p, vt) stfq_raw(p, v)
+
+#endif /* defined(CONFIG_USER_ONLY) */
+
+/* page related stuff */
+
+#define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
+#define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1)
+#define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK)
+
+extern unsigned long qemu_real_host_page_size;
+extern unsigned long qemu_host_page_bits;
+extern unsigned long qemu_host_page_size;
+extern unsigned long qemu_host_page_mask;
+
+#define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask)
+
+/* same as PROT_xxx */
+#define PAGE_READ 0x0001
+#define PAGE_WRITE 0x0002
+#define PAGE_EXEC 0x0004
+#define PAGE_BITS (PAGE_READ | PAGE_WRITE | PAGE_EXEC)
+#define PAGE_VALID 0x0008
+/* original state of the write flag (used when tracking self-modifying
+ code */
+#define PAGE_WRITE_ORG 0x0010
+
+void page_dump(FILE *f);
+int page_get_flags(unsigned long address);
+void page_set_flags(unsigned long start, unsigned long end, int flags);
+void page_unprotect_range(uint8_t *data, unsigned long data_size);
+
+#define CPUState CPUX86State
+
+void cpu_dump_state(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags);
+
+void cpu_abort(CPUState *env, const char *fmt, ...);
+extern CPUState *cpu_single_env;
+extern int code_copy_enabled;
+
+#define CPU_INTERRUPT_EXIT 0x01 /* wants exit from main loop */
+#define CPU_INTERRUPT_HARD 0x02 /* hardware interrupt pending */
+#define CPU_INTERRUPT_EXITTB 0x04 /* exit the current TB (use for x86 a20 case) */
+#define CPU_INTERRUPT_TIMER 0x08 /* internal timer exception pending */
+void cpu_interrupt(CPUState *s, int mask);
+void cpu_reset_interrupt(CPUState *env, int mask);
+
+int cpu_breakpoint_insert(CPUState *env, target_ulong pc);
+int cpu_breakpoint_remove(CPUState *env, target_ulong pc);
+void cpu_single_step(CPUState *env, int enabled);
+void cpu_reset(CPUState *s);
+CPUState *cpu_init(void);
+int main_loop(void);
+
+/* Return the physical page corresponding to a virtual one. Use it
+ only for debugging because no protection checks are done. Return -1
+ if no page found. */
+target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr);
+
+#define CPU_LOG_TB_OUT_ASM (1 << 0)
+#define CPU_LOG_TB_IN_ASM (1 << 1)
+#define CPU_LOG_TB_OP (1 << 2)
+#define CPU_LOG_TB_OP_OPT (1 << 3)
+#define CPU_LOG_INT (1 << 4)
+#define CPU_LOG_EXEC (1 << 5)
+#define CPU_LOG_PCALL (1 << 6)
+#define CPU_LOG_IOPORT (1 << 7)
+#define CPU_LOG_TB_CPU (1 << 8)
+
+/* define log items */
+typedef struct CPULogItem {
+ int mask;
+ const char *name;
+ const char *help;
+} CPULogItem;
+
+extern CPULogItem cpu_log_items[];
+
+void cpu_set_log(int log_flags);
+void cpu_set_log_filename(const char *filename);
+int cpu_str_to_log_mask(const char *str);
+
+/* IO ports API */
+
+/* NOTE: as these functions may be even used when there is an isa
+ brige on non x86 targets, we always defined them */
+#ifndef NO_CPU_IO_DEFS
+void cpu_outb(CPUState *env, int addr, int val);
+void cpu_outw(CPUState *env, int addr, int val);
+void cpu_outl(CPUState *env, int addr, int val);
+int cpu_inb(CPUState *env, int addr);
+int cpu_inw(CPUState *env, int addr);
+int cpu_inl(CPUState *env, int addr);
+#endif
+
+/* memory API */
+
+extern int phys_ram_size;
+extern int phys_ram_fd;
+extern uint8_t *phys_ram_base;
+extern uint8_t *phys_ram_dirty;
+
+/* physical memory access */
+#define IO_MEM_NB_ENTRIES 256
+#define TLB_INVALID_MASK (1 << 3)
+#define IO_MEM_SHIFT 4
+
+#define IO_MEM_RAM (0 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_ROM (1 << IO_MEM_SHIFT) /* hardcoded offset */
+#define IO_MEM_UNASSIGNED (2 << IO_MEM_SHIFT)
+#define IO_MEM_CODE (3 << IO_MEM_SHIFT) /* used internally, never use directly */
+#define IO_MEM_NOTDIRTY (4 << IO_MEM_SHIFT) /* used internally, never use directly */
+
+typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
+
+void cpu_register_physical_memory(target_phys_addr_t start_addr,
+ unsigned long size,
+ unsigned long phys_offset);
+int cpu_register_io_memory(int io_index,
+ CPUReadMemoryFunc **mem_read,
+ CPUWriteMemoryFunc **mem_write,
+ void *opaque);
+CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index);
+CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index);
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write);
+static inline void cpu_physical_memory_read(target_phys_addr_t addr,
+ uint8_t *buf, int len)
+{
+ cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(target_phys_addr_t addr,
+ const uint8_t *buf, int len)
+{
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
+}
+
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+ uint8_t *buf, int len, int is_write);
+
+/* read dirty bit (return 0 or 1) */
+static inline int cpu_physical_memory_is_dirty(target_ulong addr)
+{
+ return phys_ram_dirty[addr >> TARGET_PAGE_BITS];
+}
+
+static inline void cpu_physical_memory_set_dirty(target_ulong addr)
+{
+ phys_ram_dirty[addr >> TARGET_PAGE_BITS] = 1;
+}
+
+void cpu_physical_memory_reset_dirty(target_ulong start, target_ulong end);
+
+#endif /* CPU_ALL_H */
diff --git a/tools/ioemu/cpu-defs.h b/tools/ioemu/cpu-defs.h
new file mode 100644
index 0000000000..388d4abdbb
--- /dev/null
+++ b/tools/ioemu/cpu-defs.h
@@ -0,0 +1,95 @@
+/*
+ * common defines for all CPUs
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef CPU_DEFS_H
+#define CPU_DEFS_H
+
+#include "config.h"
+#include <setjmp.h>
+#include <inttypes.h>
+#include "osdep.h"
+
+#ifndef TARGET_LONG_BITS
+#error TARGET_LONG_BITS must be defined before including this header
+#endif
+
+#if defined(__alpha__) || defined (__ia64__) || defined(__x86_64__)
+#define HOST_LONG_BITS 64
+#else
+#define HOST_LONG_BITS 32
+#endif
+
+#ifndef TARGET_PHYS_ADDR_BITS
+#if TARGET_LONG_BITS >= HOST_LONG_BITS
+#define TARGET_PHYS_ADDR_BITS TARGET_LONG_BITS
+#else
+#define TARGET_PHYS_ADDR_BITS HOST_LONG_BITS
+#endif
+#endif
+
+#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
+
+/* target_ulong is the type of a virtual address */
+#if TARGET_LONG_SIZE == 4
+typedef int32_t target_long;
+typedef uint32_t target_ulong;
+#elif TARGET_LONG_SIZE == 8
+typedef int64_t target_long;
+typedef uint64_t target_ulong;
+#else
+#error TARGET_LONG_SIZE undefined
+#endif
+
+/* target_phys_addr_t is the type of a physical address (its size can
+ be different from 'target_ulong'). We have sizeof(target_phys_addr)
+ = max(sizeof(unsigned long),
+ sizeof(size_of_target_physical_address)) because we must pass a
+ host pointer to memory operations in some cases */
+
+#if TARGET_PHYS_ADDR_BITS == 32
+typedef uint32_t target_phys_addr_t;
+#elif TARGET_PHYS_ADDR_BITS == 64
+typedef uint64_t target_phys_addr_t;
+#else
+#error TARGET_PHYS_ADDR_BITS undefined
+#endif
+
+#define HOST_LONG_SIZE (HOST_LONG_BITS / 8)
+
+#define EXCP_INTERRUPT 256 /* async interruption */
+#define EXCP_HLT 257 /* hlt instruction reached */
+#define EXCP_DEBUG 258 /* cpu stopped after a breakpoint or singlestep */
+
+#define MAX_BREAKPOINTS 32
+
+#define CPU_TLB_SIZE 256
+
+typedef struct CPUTLBEntry {
+ /* bit 31 to TARGET_PAGE_BITS : virtual address
+ bit TARGET_PAGE_BITS-1..IO_MEM_SHIFT : if non zero, memory io
+ zone number
+ bit 3 : indicates that the entry is invalid
+ bit 2..0 : zero
+ */
+ target_ulong address;
+ /* addend to virtual address to get physical address */
+ target_phys_addr_t addend;
+} CPUTLBEntry;
+
+#endif
diff --git a/tools/ioemu/cpu.h b/tools/ioemu/cpu.h
new file mode 100644
index 0000000000..adeb5bdd93
--- /dev/null
+++ b/tools/ioemu/cpu.h
@@ -0,0 +1,69 @@
+/*
+ * i386 virtual CPU header
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#include "config.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
+/* target supports implicit self modifying code */
+#define TARGET_HAS_SMC
+/* support for self modifying code even if the modified instruction is
+ close to the modifying instruction */
+#define TARGET_HAS_PRECISE_SMC
+
+#include "cpu-defs.h"
+
+#if defined(__i386__) && !defined(CONFIG_SOFTMMU)
+#define USE_CODE_COPY
+#endif
+
+/* Empty for now */
+typedef struct CPUX86State {
+ uint32_t a20_mask;
+ int interrupt_request;
+ int send_event;
+} CPUX86State;
+
+#ifndef IN_OP_I386
+void cpu_x86_outb(CPUX86State *env, int addr, int val);
+void cpu_x86_outw(CPUX86State *env, int addr, int val);
+void cpu_x86_outl(CPUX86State *env, int addr, int val);
+int cpu_x86_inb(CPUX86State *env, int addr);
+int cpu_x86_inw(CPUX86State *env, int addr);
+int cpu_x86_inl(CPUX86State *env, int addr);
+#endif
+
+CPUX86State *cpu_x86_init(void);
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+int cpu_get_pic_interrupt(CPUX86State *s);
+/* MSDOS compatibility mode FPU exception support */
+void cpu_set_ferr(CPUX86State *s);
+
+#define TARGET_PAGE_BITS 12
+#include "cpu-all.h"
+
+#endif /* CPU_I386_H */
diff --git a/tools/ioemu/create_keysym_header.sh b/tools/ioemu/create_keysym_header.sh
new file mode 100644
index 0000000000..87b05e00ad
--- /dev/null
+++ b/tools/ioemu/create_keysym_header.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+# QEMU keysym adapter: create a header file to link the name to its keysym
+#
+# Copyright (c) 2004,2005 Johannes E. Schindelin
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+type="$1"
+cflags="$2"
+if [ -z "$cflags" ]; then
+ echo "Usage: $0 vnc|sdl cflags"
+ exit 1
+fi
+
+
+case "$type" in
+vnc)
+ ignore_case=''
+ header=rfb/keysym.h
+ keysym_t=rfbKeySym
+ pattern='^#define[ ]*XK_\([^ ]*\)[ ].*$'
+ replacement='#ifdef XK_\1\n{\"\1\", XK_\1},\n#endif'
+ extra_replace='cat'
+ extra_sort='cat'
+ ;;
+sdl)
+ ignore_case='-f'
+ header=SDL_keysym.h
+ keysym_t=int
+ pattern='^[ ]*SDLK_\([^ ]*\)[ ]*=.*$'
+ replacement='{\"\1\", SDLK_\1},'
+ extra_replace='sed -e s/{"\([RL]\)\(SUPER\|META\|ALT\|CONTROL\|SHIFT\)/{"\2_\1/ -e s/{"\([RL]\)CTRL/{"CONTROL_\1/ -e s/{"\(PAGE\)\(UP\|DOWN\)/{"\1_\2/ -e s/{"\(KP\)\([0-9]\)/{"\1_\2/ -e s/{"KP_MINUS/{"KP_SUBTRACT/ -e s/{"KP_PLUS/{"KP_ADD/ -e s/{"KP_PERIOD/{"KP_DECIMAL/ -e s/{"\(LEFT\|RIGHT\)\(PAREN\|BRACKET\)/{"\2\1/ -e s/{"EXCLAIM/{"EXCLAM/ -e s/{"\(CAPS\|NUM\)\(LOCK\)/{"\1_\2/ -e s/{"SCROLLOCK/{"SCROLL_LOCK/ -e s/{"KP_EQUALS/{"KP_EQUAL/ -e s/{"SYSREQ/{"SYS_REQ/ -e s/{"QUOTE"/{"APOSTROPHE"/ -e s/{"BACKQUOTE/{"GRAVE/ -e s/{"EQUALS/{"EQUAL/ -e s/{"EURO/{"EUROSIGN/ -e s/{"COMPOSE/{"MULTI_KEY/ -e s/{"MODE/{"MODE_SWITCH/ -e s/{"HASH/{"NUMBERSIGN/ -e s/{"WORLD_68/{"ADIAERESIS/ -e s/{"WORLD_86/{"ODIAERESIS/ -e s/{"WORLD_92/{"UDIAERESIS/ -e s/{"WORLD_63/{"SSHARP/ -e s/{"WORLD_20/{"ACUTE/ -e s/{"CARET/{"ASCIICIRCUM/'
+ extra_sort='sort -f'
+ ;;
+*) echo "Unknown type: $type is neither vnc nor sdl"; exit 1;;
+esac
+
+outfile=keysym_adapter_"$type".h
+
+echo "typedef struct {" > $outfile
+echo " const char* name;" >> $outfile
+echo " $keysym_t keysym;" >> $outfile
+echo "} name2keysym_t;" >> $outfile
+echo "static name2keysym_t name2keysym[]={" >> $outfile
+
+for path in $(echo "$cflags" | sed "s/-I/ /g"); do
+ if [ -f $path/$header ]; then
+ cat $path/$header
+ fi
+done | tr "\011" " " | LC_ALL=C sort $ignore_case | uniq | \
+sed -n -e "s/$pattern/$replacement/p" | $extra_replace | \
+LC_ALL=C $extra_sort >> $outfile
+
+echo "{0,0}};" >> $outfile
+
+if [ -n "$ignore_case" ]; then
+echo "#define KEYBOARD_IGNORE_CASE" >> $outfile
+fi
+
+
diff --git a/tools/ioemu/exec-all.h b/tools/ioemu/exec-all.h
new file mode 100644
index 0000000000..ac0533982d
--- /dev/null
+++ b/tools/ioemu/exec-all.h
@@ -0,0 +1,579 @@
+/*
+ * internal execution defines for qemu
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* allow to see translation results - the slowdown should be negligible, so we leave it */
+#define DEBUG_DISAS
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+#endif
+
+#if GCC_MAJOR < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#ifdef __i386__
+#define REGPARM(n) __attribute((regparm(n)))
+#else
+#define REGPARM(n)
+#endif
+
+/* is_jmp field values */
+#define DISAS_NEXT 0 /* next instruction can be analyzed */
+#define DISAS_JUMP 1 /* only pc was modified dynamically */
+#define DISAS_UPDATE 2 /* cpu state was modified dynamically */
+#define DISAS_TB_JUMP 3 /* only pc was modified statically */
+
+struct TranslationBlock;
+
+/* XXX: make safe guess about sizes */
+#define MAX_OP_PER_INSTR 32
+#define OPC_BUF_SIZE 512
+#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
+
+#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * 3)
+
+extern uint16_t gen_opc_buf[OPC_BUF_SIZE];
+extern uint32_t gen_opparam_buf[OPPARAM_BUF_SIZE];
+extern uint32_t gen_opc_pc[OPC_BUF_SIZE];
+extern uint32_t gen_opc_npc[OPC_BUF_SIZE];
+extern uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+
+typedef void (GenOpFunc)(void);
+typedef void (GenOpFunc1)(long);
+typedef void (GenOpFunc2)(long, long);
+typedef void (GenOpFunc3)(long, long, long);
+
+#if defined(TARGET_I386)
+
+void optimize_flags_init(void);
+
+#endif
+
+extern FILE *logfile;
+extern int loglevel;
+
+int gen_intermediate_code(CPUState *env, struct TranslationBlock *tb);
+int gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb);
+void dump_ops(const uint16_t *opc_buf, const uint32_t *opparam_buf);
+int cpu_gen_code(CPUState *env, struct TranslationBlock *tb,
+ int max_code_size, int *gen_code_size_ptr);
+int cpu_restore_state(struct TranslationBlock *tb,
+ CPUState *env, unsigned long searched_pc,
+ void *puc);
+int cpu_gen_code_copy(CPUState *env, struct TranslationBlock *tb,
+ int max_code_size, int *gen_code_size_ptr);
+int cpu_restore_state_copy(struct TranslationBlock *tb,
+ CPUState *env, unsigned long searched_pc,
+ void *puc);
+void cpu_resume_from_signal(CPUState *env1, void *puc);
+void cpu_exec_init(void);
+int page_unprotect(unsigned long address, unsigned long pc, void *puc);
+void tb_invalidate_phys_page_range(target_ulong start, target_ulong end,
+ int is_cpu_write_access);
+void tb_invalidate_page_range(target_ulong start, target_ulong end);
+void tlb_flush_page(CPUState *env, target_ulong addr);
+void tlb_flush(CPUState *env, int flush_global);
+int tlb_set_page(CPUState *env, target_ulong vaddr,
+ target_phys_addr_t paddr, int prot,
+ int is_user, int is_softmmu);
+
+#define CODE_GEN_MAX_SIZE 65536
+#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
+
+#define CODE_GEN_HASH_BITS 15
+#define CODE_GEN_HASH_SIZE (1 << CODE_GEN_HASH_BITS)
+
+#define CODE_GEN_PHYS_HASH_BITS 15
+#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS)
+
+/* maximum total translate dcode allocated */
+
+/* NOTE: the translated code area cannot be too big because on some
+ archs the range of "fast" function calls is limited. Here is a
+ summary of the ranges:
+
+ i386 : signed 32 bits
+ arm : signed 26 bits
+ ppc : signed 24 bits
+ sparc : signed 32 bits
+ alpha : signed 23 bits
+*/
+
+#if defined(__alpha__)
+#define CODE_GEN_BUFFER_SIZE (2 * 1024 * 1024)
+#elif defined(__powerpc__)
+#define CODE_GEN_BUFFER_SIZE (6 * 1024 * 1024)
+#else
+#define CODE_GEN_BUFFER_SIZE (8 * 1024 * 1024)
+#endif
+
+//#define CODE_GEN_BUFFER_SIZE (128 * 1024)
+
+/* estimated block size for TB allocation */
+/* XXX: use a per code average code fragment size and modulate it
+ according to the host CPU */
+#if defined(CONFIG_SOFTMMU)
+#define CODE_GEN_AVG_BLOCK_SIZE 128
+#else
+#define CODE_GEN_AVG_BLOCK_SIZE 64
+#endif
+
+#define CODE_GEN_MAX_BLOCKS (CODE_GEN_BUFFER_SIZE / CODE_GEN_AVG_BLOCK_SIZE)
+
+#if defined(__powerpc__)
+#define USE_DIRECT_JUMP
+#endif
+#if defined(__i386__) && !defined(_WIN32)
+#define USE_DIRECT_JUMP
+#endif
+
+typedef struct TranslationBlock {
+ target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
+ target_ulong cs_base; /* CS base for this block */
+ unsigned int flags; /* flags defining in which context the code was generated */
+ uint16_t size; /* size of target code for this block (1 <=
+ size <= TARGET_PAGE_SIZE) */
+ uint16_t cflags; /* compile flags */
+#define CF_CODE_COPY 0x0001 /* block was generated in code copy mode */
+#define CF_TB_FP_USED 0x0002 /* fp ops are used in the TB */
+#define CF_FP_USED 0x0004 /* fp ops are used in the TB or in a chained TB */
+#define CF_SINGLE_INSN 0x0008 /* compile only a single instruction */
+
+ uint8_t *tc_ptr; /* pointer to the translated code */
+ struct TranslationBlock *hash_next; /* next matching tb for virtual address */
+ /* next matching tb for physical address. */
+ struct TranslationBlock *phys_hash_next;
+ /* first and second physical page containing code. The lower bit
+ of the pointer tells the index in page_next[] */
+ struct TranslationBlock *page_next[2];
+ target_ulong page_addr[2];
+
+ /* the following data are used to directly call another TB from
+ the code of this one. */
+ uint16_t tb_next_offset[2]; /* offset of original jump target */
+#ifdef USE_DIRECT_JUMP
+ uint16_t tb_jmp_offset[4]; /* offset of jump instruction */
+#else
+ uint32_t tb_next[2]; /* address of jump generated code */
+#endif
+ /* list of TBs jumping to this one. This is a circular list using
+ the two least significant bits of the pointers to tell what is
+ the next pointer: 0 = jmp_next[0], 1 = jmp_next[1], 2 =
+ jmp_first */
+ struct TranslationBlock *jmp_next[2];
+ struct TranslationBlock *jmp_first;
+} TranslationBlock;
+
+static inline unsigned int tb_hash_func(unsigned long pc)
+{
+ return pc & (CODE_GEN_HASH_SIZE - 1);
+}
+
+static inline unsigned int tb_phys_hash_func(unsigned long pc)
+{
+ return pc & (CODE_GEN_PHYS_HASH_SIZE - 1);
+}
+
+TranslationBlock *tb_alloc(unsigned long pc);
+void tb_flush(CPUState *env);
+void tb_link(TranslationBlock *tb);
+void tb_link_phys(TranslationBlock *tb,
+ target_ulong phys_pc, target_ulong phys_page2);
+
+extern TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE];
+extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+
+extern uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
+extern uint8_t *code_gen_ptr;
+
+/* find a translation block in the translation cache. If not found,
+ return NULL and the pointer to the last element of the list in pptb */
+static inline TranslationBlock *tb_find(TranslationBlock ***pptb,
+ target_ulong pc,
+ target_ulong cs_base,
+ unsigned int flags)
+{
+ TranslationBlock **ptb, *tb;
+ unsigned int h;
+
+ h = tb_hash_func(pc);
+ ptb = &tb_hash[h];
+ for(;;) {
+ tb = *ptb;
+ if (!tb)
+ break;
+ if (tb->pc == pc && tb->cs_base == cs_base && tb->flags == flags)
+ return tb;
+ ptb = &tb->hash_next;
+ }
+ *pptb = ptb;
+ return NULL;
+}
+
+
+#if defined(USE_DIRECT_JUMP)
+
+#if defined(__powerpc__)
+static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr)
+{
+ uint32_t val, *ptr;
+
+ /* patch the branch destination */
+ ptr = (uint32_t *)jmp_addr;
+ val = *ptr;
+ val = (val & ~0x03fffffc) | ((addr - jmp_addr) & 0x03fffffc);
+ *ptr = val;
+ /* flush icache */
+ asm volatile ("dcbst 0,%0" : : "r"(ptr) : "memory");
+ asm volatile ("sync" : : : "memory");
+ asm volatile ("icbi 0,%0" : : "r"(ptr) : "memory");
+ asm volatile ("sync" : : : "memory");
+ asm volatile ("isync" : : : "memory");
+}
+#elif defined(__i386__)
+static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr)
+{
+ /* patch the branch destination */
+ *(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+ /* no need to flush icache explicitely */
+}
+#endif
+
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+ int n, unsigned long addr)
+{
+ unsigned long offset;
+
+ offset = tb->tb_jmp_offset[n];
+ tb_set_jmp_target1((unsigned long)(tb->tc_ptr + offset), addr);
+ offset = tb->tb_jmp_offset[n + 2];
+ if (offset != 0xffff)
+ tb_set_jmp_target1((unsigned long)(tb->tc_ptr + offset), addr);
+}
+
+#else
+
+/* set the jump target */
+static inline void tb_set_jmp_target(TranslationBlock *tb,
+ int n, unsigned long addr)
+{
+ tb->tb_next[n] = addr;
+}
+
+#endif
+
+static inline void tb_add_jump(TranslationBlock *tb, int n,
+ TranslationBlock *tb_next)
+{
+ /* NOTE: this test is only needed for thread safety */
+ if (!tb->jmp_next[n]) {
+ /* patch the native jump address */
+ tb_set_jmp_target(tb, n, (unsigned long)tb_next->tc_ptr);
+
+ /* add in TB jmp circular list */
+ tb->jmp_next[n] = tb_next->jmp_first;
+ tb_next->jmp_first = (TranslationBlock *)((long)(tb) | (n));
+ }
+}
+
+TranslationBlock *tb_find_pc(unsigned long pc_ptr);
+
+#ifndef offsetof
+#define offsetof(type, field) ((size_t) &((type *)0)->field)
+#endif
+
+#if defined(_WIN32)
+#define ASM_DATA_SECTION ".section \".data\"\n"
+#define ASM_PREVIOUS_SECTION ".section .text\n"
+#elif defined(__APPLE__)
+#define ASM_DATA_SECTION ".data\n"
+#define ASM_PREVIOUS_SECTION ".text\n"
+#define ASM_NAME(x) "_" #x
+#else
+#define ASM_DATA_SECTION ".section \".data\"\n"
+#define ASM_PREVIOUS_SECTION ".previous\n"
+#define ASM_NAME(x) stringify(x)
+#endif
+
+#if defined(__powerpc__)
+
+/* we patch the jump instruction directly */
+#define JUMP_TB(opname, tbparam, n, eip)\
+do {\
+ asm volatile (ASM_DATA_SECTION\
+ ASM_NAME(__op_label) #n "." ASM_NAME(opname) ":\n"\
+ ".long 1f\n"\
+ ASM_PREVIOUS_SECTION \
+ "b " ASM_NAME(__op_jmp) #n "\n"\
+ "1:\n");\
+ T0 = (long)(tbparam) + (n);\
+ EIP = eip;\
+ EXIT_TB();\
+} while (0)
+
+#define JUMP_TB2(opname, tbparam, n)\
+do {\
+ asm volatile ("b " ASM_NAME(__op_jmp) #n "\n");\
+} while (0)
+
+#elif defined(__i386__) && defined(USE_DIRECT_JUMP)
+
+/* we patch the jump instruction directly */
+#define JUMP_TB(opname, tbparam, n, eip)\
+do {\
+ asm volatile (".section .data\n"\
+ ASM_NAME(__op_label) #n "." ASM_NAME(opname) ":\n"\
+ ".long 1f\n"\
+ ASM_PREVIOUS_SECTION \
+ "jmp " ASM_NAME(__op_jmp) #n "\n"\
+ "1:\n");\
+ T0 = (long)(tbparam) + (n);\
+ EIP = eip;\
+ EXIT_TB();\
+} while (0)
+
+#define JUMP_TB2(opname, tbparam, n)\
+do {\
+ asm volatile ("jmp " ASM_NAME(__op_jmp) #n "\n");\
+} while (0)
+
+#else
+
+/* jump to next block operations (more portable code, does not need
+ cache flushing, but slower because of indirect jump) */
+#define JUMP_TB(opname, tbparam, n, eip)\
+do {\
+ static void __attribute__((unused)) *__op_label ## n = &&label ## n;\
+ static void __attribute__((unused)) *dummy ## n = &&dummy_label ## n;\
+ goto *(void *)(((TranslationBlock *)tbparam)->tb_next[n]);\
+label ## n:\
+ T0 = (long)(tbparam) + (n);\
+ EIP = eip;\
+dummy_label ## n:\
+ EXIT_TB();\
+} while (0)
+
+/* second jump to same destination 'n' */
+#define JUMP_TB2(opname, tbparam, n)\
+do {\
+ goto *(void *)(((TranslationBlock *)tbparam)->tb_next[n - 2]);\
+} while (0)
+
+#endif
+
+extern CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+
+#ifdef __powerpc__
+static inline int testandset (int *p)
+{
+ int ret;
+ __asm__ __volatile__ (
+ "0: lwarx %0,0,%1\n"
+ " xor. %0,%3,%0\n"
+ " bne 1f\n"
+ " stwcx. %2,0,%1\n"
+ " bne- 0b\n"
+ "1: "
+ : "=&r" (ret)
+ : "r" (p), "r" (1), "r" (0)
+ : "cr0", "memory");
+ return ret;
+}
+#endif
+
+#ifdef __i386__
+static inline int testandset (int *p)
+{
+ char ret;
+ long int readval;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0"
+ : "=q" (ret), "=m" (*p), "=a" (readval)
+ : "r" (1), "m" (*p), "a" (0)
+ : "memory");
+ return ret;
+}
+#endif
+
+#ifdef __x86_64__
+static inline int testandset (int *p)
+{
+ char ret;
+ int readval;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %3, %1; sete %0"
+ : "=q" (ret), "=m" (*p), "=a" (readval)
+ : "r" (1), "m" (*p), "a" (0)
+ : "memory");
+ return ret;
+}
+#endif
+
+#ifdef __s390__
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
+ " jl 0b"
+ : "=&d" (ret)
+ : "r" (1), "a" (p), "0" (*p)
+ : "cc", "memory" );
+ return ret;
+}
+#endif
+
+#ifdef __alpha__
+static inline int testandset (int *p)
+{
+ int ret;
+ unsigned long one;
+
+ __asm__ __volatile__ ("0: mov 1,%2\n"
+ " ldl_l %0,%1\n"
+ " stl_c %2,%1\n"
+ " beq %2,1f\n"
+ ".subsection 2\n"
+ "1: br 0b\n"
+ ".previous"
+ : "=r" (ret), "=m" (*p), "=r" (one)
+ : "m" (*p));
+ return ret;
+}
+#endif
+
+#ifdef __sparc__
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__("ldstub [%1], %0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory");
+
+ return (ret ? 1 : 0);
+}
+#endif
+
+#ifdef __arm__
+static inline int testandset (int *spinlock)
+{
+ register unsigned int ret;
+ __asm__ __volatile__("swp %0, %1, [%2]"
+ : "=r"(ret)
+ : "0"(1), "r"(spinlock));
+
+ return ret;
+}
+#endif
+
+#ifdef __mc68000
+static inline int testandset (int *p)
+{
+ char ret;
+ __asm__ __volatile__("tas %1; sne %0"
+ : "=r" (ret)
+ : "m" (p)
+ : "cc","memory");
+ return ret == 0;
+}
+#endif
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+ while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ *lock = 0;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return 1;
+}
+#endif
+
+extern spinlock_t tb_lock;
+
+extern int tb_invalidated_flag;
+
+#if !defined(CONFIG_USER_ONLY)
+
+void tlb_fill(unsigned long addr, int is_write, int is_user,
+ void *retaddr);
+
+#define ACCESS_TYPE 3
+#define MEMSUFFIX _code
+#define env cpu_single_env
+
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+#undef env
+
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+static inline target_ulong get_phys_addr_code(CPUState *env, target_ulong addr)
+{
+ return addr;
+}
+#else
+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+ is the offset relative to phys_ram_base */
+/* XXX: i386 target specific */
+static inline target_ulong get_phys_addr_code(CPUState *env, target_ulong addr)
+{
+ return addr;
+}
+#endif
+
+//#define DEBUG_UNUSED_IOPORT
+//#define DEBUG_IOPORT
+#define TARGET_VMX
+
diff --git a/tools/ioemu/exec.c b/tools/ioemu/exec.c
new file mode 100644
index 0000000000..c49975c3c1
--- /dev/null
+++ b/tools/ioemu/exec.c
@@ -0,0 +1,461 @@
+/*
+ * virtual page mapping and translated block handling
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "config.h"
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+//#define DEBUG_TLB
+
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+//#define DEBUG_TLB_CHECK
+
+/* threshold to flush the translated code buffer */
+#define CODE_GEN_BUFFER_MAX_SIZE (CODE_GEN_BUFFER_SIZE - CODE_GEN_MAX_SIZE)
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+#define MMAP_AREA_START 0x00000000
+#define MMAP_AREA_END 0xa8000000
+
+TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
+TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE];
+TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+int nb_tbs;
+/* any access to the tbs or the page table must use this lock */
+spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
+
+uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
+uint8_t *code_gen_ptr;
+
+int phys_ram_size;
+int phys_ram_fd;
+uint8_t *phys_ram_base;
+uint8_t *phys_ram_dirty;
+
+typedef struct PageDesc {
+ /* list of TBs intersecting this ram page */
+ TranslationBlock *first_tb;
+ /* in order to optimize self modifying code, we count the number
+ of lookups we do to a given page to use a bitmap */
+ unsigned int code_write_count;
+ uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+ unsigned long flags;
+#endif
+} PageDesc;
+
+typedef struct PhysPageDesc {
+ /* offset in host memory of the page + io_index in the low 12 bits */
+ unsigned long phys_offset;
+} PhysPageDesc;
+
+typedef struct VirtPageDesc {
+ /* physical address of code page. It is valid only if 'valid_tag'
+ matches 'virt_valid_tag' */
+ target_ulong phys_addr;
+ unsigned int valid_tag;
+#if !defined(CONFIG_SOFTMMU)
+ /* original page access rights. It is valid only if 'valid_tag'
+ matches 'virt_valid_tag' */
+ unsigned int prot;
+#endif
+} VirtPageDesc;
+
+#define L2_BITS 10
+#define L1_BITS (32 - L2_BITS - TARGET_PAGE_BITS)
+
+#define L1_SIZE (1 << L1_BITS)
+#define L2_SIZE (1 << L2_BITS)
+
+unsigned long qemu_real_host_page_size;
+unsigned long qemu_host_page_bits;
+unsigned long qemu_host_page_size;
+unsigned long qemu_host_page_mask;
+
+/* io memory support */
+CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
+CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
+void *io_mem_opaque[IO_MEM_NB_ENTRIES];
+static int io_mem_nb = 1;
+
+/* log support */
+char *logfilename = "/tmp/qemu.log";
+FILE *logfile;
+int loglevel;
+
+void cpu_exec_init(void)
+{
+ /* alloc dirty bits array */
+ phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS);
+}
+
+/* enable or disable low levels log */
+void cpu_set_log(int log_flags)
+{
+ loglevel = log_flags;
+ if (loglevel && !logfile) {
+ logfile = fopen(logfilename, "w");
+ if (!logfile) {
+ perror(logfilename);
+ _exit(1);
+ }
+#if !defined(CONFIG_SOFTMMU)
+ /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
+ {
+ static uint8_t logfile_buf[4096];
+ setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
+ }
+#else
+ setvbuf(logfile, NULL, _IOLBF, 0);
+#endif
+ }
+}
+
+void cpu_set_log_filename(const char *filename)
+{
+ logfilename = strdup(filename);
+}
+
+/* mask must never be zero, except for A20 change call */
+void cpu_interrupt(CPUState *env, int mask)
+{
+ env->interrupt_request |= mask;
+}
+
+void cpu_reset_interrupt(CPUState *env, int mask)
+{
+ env->interrupt_request &= ~mask;
+}
+
+CPULogItem cpu_log_items[] = {
+ { CPU_LOG_TB_OUT_ASM, "out_asm",
+ "show generated host assembly code for each compiled TB" },
+ { CPU_LOG_TB_IN_ASM, "in_asm",
+ "show target assembly code for each compiled TB" },
+ { CPU_LOG_TB_OP, "op",
+ "show micro ops for each compiled TB (only usable if 'in_asm' used)" },
+#ifdef TARGET_I386
+ { CPU_LOG_TB_OP_OPT, "op_opt",
+ "show micro ops after optimization for each compiled TB" },
+#endif
+ { CPU_LOG_INT, "int",
+ "show interrupts/exceptions in short format" },
+ { CPU_LOG_EXEC, "exec",
+ "show trace before each executed TB (lots of logs)" },
+ { CPU_LOG_TB_CPU, "cpu",
+ "show CPU state before bloc translation" },
+#ifdef TARGET_I386
+ { CPU_LOG_PCALL, "pcall",
+ "show protected mode far calls/returns/exceptions" },
+#endif
+#ifdef DEBUG_IOPORT
+ { CPU_LOG_IOPORT, "ioport",
+ "show all i/o ports accesses" },
+#endif
+ { 0, NULL, NULL },
+};
+
+static int cmp1(const char *s1, int n, const char *s2)
+{
+ if (strlen(s2) != n)
+ return 0;
+ return memcmp(s1, s2, n) == 0;
+}
+
+/* takes a comma separated list of log masks. Return 0 if error. */
+int cpu_str_to_log_mask(const char *str)
+{
+ CPULogItem *item;
+ int mask;
+ const char *p, *p1;
+
+ p = str;
+ mask = 0;
+ for(;;) {
+ p1 = strchr(p, ',');
+ if (!p1)
+ p1 = p + strlen(p);
+ if(cmp1(p,p1-p,"all")) {
+ for(item = cpu_log_items; item->mask != 0; item++) {
+ mask |= item->mask;
+ }
+ } else {
+ for(item = cpu_log_items; item->mask != 0; item++) {
+ if (cmp1(p, p1 - p, item->name))
+ goto found;
+ }
+ return 0;
+ }
+ found:
+ mask |= item->mask;
+ if (*p1 != ',')
+ break;
+ p = p1 + 1;
+ }
+ return mask;
+}
+
+void cpu_abort(CPUState *env, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "qemu: fatal: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ abort();
+}
+
+
+/* XXX: Simple implementation. Fix later */
+#define MAX_MMIO 32
+struct mmio_space {
+ target_phys_addr_t start;
+ unsigned long size;
+ unsigned long io_index;
+} mmio[MAX_MMIO];
+unsigned long mmio_cnt;
+
+/* register physical memory. 'size' must be a multiple of the target
+ page size. If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
+ io memory page */
+void cpu_register_physical_memory(target_phys_addr_t start_addr,
+ unsigned long size,
+ unsigned long phys_offset)
+{
+ if (mmio_cnt == MAX_MMIO) {
+ fprintf(logfile, "too many mmio regions\n");
+ exit(-1);
+ }
+ mmio[mmio_cnt].io_index = phys_offset;
+ mmio[mmio_cnt].start = start_addr;
+ mmio[mmio_cnt++].size = size;
+}
+
+/* mem_read and mem_write are arrays of functions containing the
+ function to access byte (index 0), word (index 1) and dword (index
+ 2). All functions must be supplied. If io_index is non zero, the
+ corresponding io zone is modified. If it is zero, a new io zone is
+ allocated. The return value can be used with
+ cpu_register_physical_memory(). (-1) is returned if error. */
+int cpu_register_io_memory(int io_index,
+ CPUReadMemoryFunc **mem_read,
+ CPUWriteMemoryFunc **mem_write,
+ void *opaque)
+{
+ int i;
+
+ if (io_index <= 0) {
+ if (io_index >= IO_MEM_NB_ENTRIES)
+ return -1;
+ io_index = io_mem_nb++;
+ } else {
+ if (io_index >= IO_MEM_NB_ENTRIES)
+ return -1;
+ }
+
+ for(i = 0;i < 3; i++) {
+ io_mem_read[io_index][i] = mem_read[i];
+ io_mem_write[io_index][i] = mem_write[i];
+ }
+ io_mem_opaque[io_index] = opaque;
+ return io_index << IO_MEM_SHIFT;
+}
+
+CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index)
+{
+ return io_mem_write[io_index >> IO_MEM_SHIFT];
+}
+
+CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index)
+{
+ return io_mem_read[io_index >> IO_MEM_SHIFT];
+}
+
+/* physical memory access (slow version, mainly for debug) */
+#if defined(CONFIG_USER_ONLY)
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write)
+{
+ int l, flags;
+ target_ulong page;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ flags = page_get_flags(page);
+ if (!(flags & PAGE_VALID))
+ return;
+ if (is_write) {
+ if (!(flags & PAGE_WRITE))
+ return;
+ memcpy((uint8_t *)addr, buf, len);
+ } else {
+ if (!(flags & PAGE_READ))
+ return;
+ memcpy(buf, (uint8_t *)addr, len);
+ }
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+}
+#else
+
+int iomem_index(target_phys_addr_t addr)
+{
+ int i;
+
+ for (i = 0; i < mmio_cnt; i++) {
+ unsigned long start, end;
+
+ start = mmio[i].start;
+ end = mmio[i].start + mmio[i].size;
+
+ if ((addr >= start) && (addr <= end)){
+ return (mmio[i].io_index >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+ }
+ }
+ return 0;
+}
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write)
+{
+ int l, io_index;
+ uint8_t *ptr;
+ uint32_t val;
+ target_phys_addr_t page;
+ unsigned long pd;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+
+ pd = page;
+ io_index = iomem_index(page);
+ if (is_write) {
+ if (io_index) {
+ if (l >= 4 && ((addr & 3) == 0)) {
+ /* 32 bit read access */
+ val = ldl_raw(buf);
+ io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ l = 4;
+ } else if (l >= 2 && ((addr & 1) == 0)) {
+ /* 16 bit read access */
+ val = lduw_raw(buf);
+ io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
+ l = 2;
+ } else {
+ /* 8 bit access */
+ val = ldub_raw(buf);
+ io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
+ l = 1;
+ }
+ } else {
+ unsigned long addr1;
+
+ addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ /* RAM case */
+ ptr = phys_ram_base + addr1;
+ memcpy(ptr, buf, l);
+ }
+ } else {
+ if (io_index) {
+ if (l >= 4 && ((addr & 3) == 0)) {
+ /* 32 bit read access */
+ val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+ stl_raw(buf, val);
+ l = 4;
+ } else if (l >= 2 && ((addr & 1) == 0)) {
+ /* 16 bit read access */
+ val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
+ stw_raw(buf, val);
+ l = 2;
+ } else {
+ /* 8 bit access */
+ val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
+ stb_raw(buf, val);
+ l = 1;
+ }
+ } else {
+ /* RAM case */
+ ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
+ (addr & ~TARGET_PAGE_MASK);
+ memcpy(buf, ptr, l);
+ }
+ }
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+}
+#endif
+
+/* virtual memory access for debug */
+int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
+ uint8_t *buf, int len, int is_write)
+{
+ int l;
+ target_ulong page, phys_addr;
+
+ while (len > 0) {
+ page = addr & TARGET_PAGE_MASK;
+ phys_addr = cpu_get_phys_page_debug(env, page);
+ /* if no physical page mapped, return an error */
+ if (phys_addr == -1)
+ return -1;
+ l = (page + TARGET_PAGE_SIZE) - addr;
+ if (l > len)
+ l = len;
+ cpu_physical_memory_rw(phys_addr + (addr & ~TARGET_PAGE_MASK),
+ buf, l, is_write);
+ len -= l;
+ buf += l;
+ addr += l;
+ }
+ return 0;
+}
+
+void cpu_physical_memory_reset_dirty(target_ulong start, target_ulong end)
+{
+}
diff --git a/tools/ioemu/hw/adb.c b/tools/ioemu/hw/adb.c
new file mode 100644
index 0000000000..36c4aecd25
--- /dev/null
+++ b/tools/ioemu/hw/adb.c
@@ -0,0 +1,386 @@
+/*
+ * QEMU ADB support
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* ADB commands */
+#define ADB_BUSRESET 0x00
+#define ADB_FLUSH 0x01
+#define ADB_WRITEREG 0x08
+#define ADB_READREG 0x0c
+
+/* ADB device commands */
+#define ADB_CMD_SELF_TEST 0xff
+#define ADB_CMD_CHANGE_ID 0xfe
+#define ADB_CMD_CHANGE_ID_AND_ACT 0xfd
+#define ADB_CMD_CHANGE_ID_AND_ENABLE 0x00
+
+/* ADB default device IDs (upper 4 bits of ADB command byte) */
+#define ADB_DONGLE 1
+#define ADB_KEYBOARD 2
+#define ADB_MOUSE 3
+#define ADB_TABLET 4
+#define ADB_MODEM 5
+#define ADB_MISC 7
+
+/* error codes */
+#define ADB_RET_NOTPRESENT (-2)
+
+int adb_request(ADBBusState *s, uint8_t *obuf, const uint8_t *buf, int len)
+{
+ ADBDevice *d;
+ int devaddr, cmd, i;
+
+ cmd = buf[0] & 0xf;
+ if (cmd == ADB_BUSRESET) {
+ for(i = 0; i < s->nb_devices; i++) {
+ d = &s->devices[i];
+ if (d->devreset) {
+ d->devreset(d);
+ }
+ }
+ return 0;
+ }
+ devaddr = buf[0] >> 4;
+ for(i = 0; i < s->nb_devices; i++) {
+ d = &s->devices[i];
+ if (d->devaddr == devaddr) {
+ return d->devreq(d, obuf, buf, len);
+ }
+ }
+ return ADB_RET_NOTPRESENT;
+}
+
+/* XXX: move that to cuda ? */
+int adb_poll(ADBBusState *s, uint8_t *obuf)
+{
+ ADBDevice *d;
+ int olen, i;
+ uint8_t buf[1];
+
+ olen = 0;
+ for(i = 0; i < s->nb_devices; i++) {
+ if (s->poll_index >= s->nb_devices)
+ s->poll_index = 0;
+ d = &s->devices[s->poll_index];
+ buf[0] = ADB_READREG | (d->devaddr << 4);
+ olen = adb_request(s, obuf + 1, buf, 1);
+ /* if there is data, we poll again the same device */
+ if (olen > 0) {
+ obuf[0] = buf[0];
+ olen++;
+ break;
+ }
+ s->poll_index++;
+ }
+ return olen;
+}
+
+ADBDevice *adb_register_device(ADBBusState *s, int devaddr,
+ ADBDeviceRequest *devreq,
+ ADBDeviceReset *devreset,
+ void *opaque)
+{
+ ADBDevice *d;
+ if (s->nb_devices >= MAX_ADB_DEVICES)
+ return NULL;
+ d = &s->devices[s->nb_devices++];
+ d->bus = s;
+ d->devaddr = devaddr;
+ d->devreq = devreq;
+ d->devreset = devreset;
+ d->opaque = opaque;
+ return d;
+}
+
+/***************************************************************/
+/* Keyboard ADB device */
+
+typedef struct KBDState {
+ uint8_t data[128];
+ int rptr, wptr, count;
+} KBDState;
+
+static const uint8_t pc_to_adb_keycode[256] = {
+ 0, 53, 18, 19, 20, 21, 23, 22, 26, 28, 25, 29, 27, 24, 51, 48,
+ 12, 13, 14, 15, 17, 16, 32, 34, 31, 35, 33, 30, 36, 54, 0, 1,
+ 2, 3, 5, 4, 38, 40, 37, 41, 39, 50, 56, 42, 6, 7, 8, 9,
+ 11, 45, 46, 43, 47, 44,123, 67, 58, 49, 57,122,120, 99,118, 96,
+ 97, 98,100,101,109, 71,107, 89, 91, 92, 78, 86, 87, 88, 69, 83,
+ 84, 85, 82, 65, 0, 0, 10,103,111, 0, 0,110, 81, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 94, 0, 93, 0, 0, 0, 0, 0, 0,104,102, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76,125, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,105, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 75, 0, 0,124, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,115, 62,116, 0, 59, 0, 60, 0,119,
+ 61,121,114,117, 0, 0, 0, 0, 0, 0, 0, 55,126, 0,127, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static void adb_kbd_put_keycode(void *opaque, int keycode)
+{
+ ADBDevice *d = opaque;
+ KBDState *s = d->opaque;
+
+ if (s->count < sizeof(s->data)) {
+ s->data[s->wptr] = keycode;
+ if (++s->wptr == sizeof(s->data))
+ s->wptr = 0;
+ s->count++;
+ }
+}
+
+static int adb_kbd_poll(ADBDevice *d, uint8_t *obuf)
+{
+ static int ext_keycode;
+ KBDState *s = d->opaque;
+ int adb_keycode, keycode;
+ int olen;
+
+ olen = 0;
+ for(;;) {
+ if (s->count == 0)
+ break;
+ keycode = s->data[s->rptr];
+ if (++s->rptr == sizeof(s->data))
+ s->rptr = 0;
+ s->count--;
+
+ if (keycode == 0xe0) {
+ ext_keycode = 1;
+ } else {
+ if (ext_keycode)
+ adb_keycode = pc_to_adb_keycode[keycode | 0x80];
+ else
+ adb_keycode = pc_to_adb_keycode[keycode & 0x7f];
+ obuf[0] = adb_keycode | (keycode & 0x80);
+ /* NOTE: could put a second keycode if needed */
+ obuf[1] = 0xff;
+ olen = 2;
+ ext_keycode = 0;
+ break;
+ }
+ }
+ return olen;
+}
+
+static int adb_kbd_request(ADBDevice *d, uint8_t *obuf,
+ const uint8_t *buf, int len)
+{
+ KBDState *s = d->opaque;
+ int cmd, reg, olen;
+
+ if ((buf[0] & 0x0f) == ADB_FLUSH) {
+ /* flush keyboard fifo */
+ s->wptr = s->rptr = s->count = 0;
+ return 0;
+ }
+
+ cmd = buf[0] & 0xc;
+ reg = buf[0] & 0x3;
+ olen = 0;
+ switch(cmd) {
+ case ADB_WRITEREG:
+ switch(reg) {
+ case 2:
+ /* LED status */
+ break;
+ case 3:
+ switch(buf[2]) {
+ case ADB_CMD_SELF_TEST:
+ break;
+ case ADB_CMD_CHANGE_ID:
+ case ADB_CMD_CHANGE_ID_AND_ACT:
+ case ADB_CMD_CHANGE_ID_AND_ENABLE:
+ d->devaddr = buf[1] & 0xf;
+ break;
+ default:
+ /* XXX: check this */
+ d->devaddr = buf[1] & 0xf;
+ d->handler = buf[2];
+ break;
+ }
+ }
+ break;
+ case ADB_READREG:
+ switch(reg) {
+ case 0:
+ olen = adb_kbd_poll(d, obuf);
+ break;
+ case 1:
+ break;
+ case 2:
+ obuf[0] = 0x00; /* XXX: check this */
+ obuf[1] = 0x07; /* led status */
+ olen = 2;
+ break;
+ case 3:
+ obuf[0] = d->handler;
+ obuf[1] = d->devaddr;
+ olen = 2;
+ break;
+ }
+ break;
+ }
+ return olen;
+}
+
+void adb_kbd_init(ADBBusState *bus)
+{
+ ADBDevice *d;
+ KBDState *s;
+ s = qemu_mallocz(sizeof(KBDState));
+ d = adb_register_device(bus, ADB_KEYBOARD, adb_kbd_request, NULL, s);
+ d->handler = 1;
+ qemu_add_kbd_event_handler(adb_kbd_put_keycode, d);
+}
+
+/***************************************************************/
+/* Mouse ADB device */
+
+typedef struct MouseState {
+ int buttons_state, last_buttons_state;
+ int dx, dy, dz;
+} MouseState;
+
+static void adb_mouse_event(void *opaque,
+ int dx1, int dy1, int dz1, int buttons_state)
+{
+ ADBDevice *d = opaque;
+ MouseState *s = d->opaque;
+
+ s->dx += dx1;
+ s->dy += dy1;
+ s->dz += dz1;
+ s->buttons_state = buttons_state;
+}
+
+
+static int adb_mouse_poll(ADBDevice *d, uint8_t *obuf)
+{
+ MouseState *s = d->opaque;
+ int dx, dy;
+
+ if (s->last_buttons_state == s->buttons_state &&
+ s->dx == 0 && s->dy == 0)
+ return 0;
+
+ dx = s->dx;
+ if (dx < -63)
+ dx = -63;
+ else if (dx > 63)
+ dx = 63;
+
+ dy = s->dy;
+ if (dy < -63)
+ dy = -63;
+ else if (dy > 63)
+ dy = 63;
+
+ s->dx -= dx;
+ s->dy -= dy;
+ s->last_buttons_state = s->buttons_state;
+
+ dx &= 0x7f;
+ dy &= 0x7f;
+
+ if (!(s->buttons_state & MOUSE_EVENT_LBUTTON))
+ dy |= 0x80;
+ if (!(s->buttons_state & MOUSE_EVENT_RBUTTON))
+ dx |= 0x80;
+
+ obuf[0] = dy;
+ obuf[1] = dx;
+ return 2;
+}
+
+static int adb_mouse_request(ADBDevice *d, uint8_t *obuf,
+ const uint8_t *buf, int len)
+{
+ MouseState *s = d->opaque;
+ int cmd, reg, olen;
+
+ if ((buf[0] & 0x0f) == ADB_FLUSH) {
+ /* flush mouse fifo */
+ s->buttons_state = s->last_buttons_state;
+ s->dx = 0;
+ s->dy = 0;
+ s->dz = 0;
+ return 0;
+ }
+
+ cmd = buf[0] & 0xc;
+ reg = buf[0] & 0x3;
+ olen = 0;
+ switch(cmd) {
+ case ADB_WRITEREG:
+ switch(reg) {
+ case 2:
+ break;
+ case 3:
+ switch(buf[2]) {
+ case ADB_CMD_SELF_TEST:
+ break;
+ case ADB_CMD_CHANGE_ID:
+ case ADB_CMD_CHANGE_ID_AND_ACT:
+ case ADB_CMD_CHANGE_ID_AND_ENABLE:
+ d->devaddr = buf[1] & 0xf;
+ break;
+ default:
+ /* XXX: check this */
+ d->devaddr = buf[1] & 0xf;
+ break;
+ }
+ }
+ break;
+ case ADB_READREG:
+ switch(reg) {
+ case 0:
+ olen = adb_mouse_poll(d, obuf);
+ break;
+ case 1:
+ break;
+ case 3:
+ obuf[0] = d->handler;
+ obuf[1] = d->devaddr;
+ olen = 2;
+ break;
+ }
+ break;
+ }
+ return olen;
+}
+
+void adb_mouse_init(ADBBusState *bus)
+{
+ ADBDevice *d;
+ MouseState *s;
+
+ s = qemu_mallocz(sizeof(MouseState));
+ d = adb_register_device(bus, ADB_MOUSE, adb_mouse_request, NULL, s);
+ d->handler = 2;
+ qemu_add_mouse_event_handler(adb_mouse_event, d);
+}
diff --git a/tools/ioemu/hw/adlib.c b/tools/ioemu/hw/adlib.c
new file mode 100644
index 0000000000..939a7ed036
--- /dev/null
+++ b/tools/ioemu/hw/adlib.c
@@ -0,0 +1,313 @@
+/*
+ * QEMU Adlib emulation
+ *
+ * Copyright (c) 2004 Vassili Karpov (malc)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define dolog(...) AUD_log ("adlib", __VA_ARGS__)
+#ifdef DEBUG
+#define ldebug(...) dolog (__VA_ARGS__)
+#else
+#define ldebug(...)
+#endif
+
+#ifdef USE_YMF262
+#define HAS_YMF262 1
+#include "ymf262.h"
+void YMF262UpdateOneQEMU(int which, INT16 *dst, int length);
+#define SHIFT 2
+#else
+#include "fmopl.h"
+#define SHIFT 1
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#define small_delay() Sleep (1)
+#else
+#define small_delay() usleep (1)
+#endif
+
+#define IO_READ_PROTO(name) \
+ uint32_t name (void *opaque, uint32_t nport)
+#define IO_WRITE_PROTO(name) \
+ void name (void *opaque, uint32_t nport, uint32_t val)
+
+static struct {
+ int port;
+ int freq;
+} conf = {0x220, 44100};
+
+typedef struct {
+ int enabled;
+ int active;
+ int cparam;
+ int64_t ticks;
+ int bufpos;
+ int16_t *mixbuf;
+ double interval;
+ QEMUTimer *ts, *opl_ts;
+ SWVoice *voice;
+ int left, pos, samples, bytes_per_second, old_free;
+ int refcount;
+#ifndef USE_YMF262
+ FM_OPL *opl;
+#endif
+} AdlibState;
+
+static AdlibState adlib;
+
+static IO_WRITE_PROTO(adlib_write)
+{
+ AdlibState *s = opaque;
+ int a = nport & 3;
+ int status;
+
+ s->ticks = qemu_get_clock (vm_clock);
+ s->active = 1;
+ AUD_enable (s->voice, 1);
+
+#ifdef USE_YMF262
+ status = YMF262Write (0, a, val);
+#else
+ status = OPLWrite (s->opl, a, val);
+#endif
+}
+
+static IO_READ_PROTO(adlib_read)
+{
+ AdlibState *s = opaque;
+ uint8_t data;
+ int a = nport & 3;
+
+#ifdef USE_YMF262
+ (void) s;
+ data = YMF262Read (0, a);
+#else
+ data = OPLRead (s->opl, a);
+#endif
+ return data;
+}
+
+static void OPL_timer (void *opaque)
+{
+ AdlibState *s = opaque;
+#ifdef USE_YMF262
+ YMF262TimerOver (s->cparam >> 1, s->cparam & 1);
+#else
+ OPLTimerOver (s->opl, s->cparam);
+#endif
+ qemu_mod_timer (s->opl_ts, qemu_get_clock (vm_clock) + s->interval);
+}
+
+static void YMF262TimerHandler (int c, double interval_Sec)
+{
+ AdlibState *s = &adlib;
+ if (interval_Sec == 0.0) {
+ qemu_del_timer (s->opl_ts);
+ return;
+ }
+ s->cparam = c;
+ s->interval = ticks_per_sec * interval_Sec;
+ qemu_mod_timer (s->opl_ts, qemu_get_clock (vm_clock) + s->interval);
+ small_delay ();
+}
+
+static int write_audio (AdlibState *s, int samples)
+{
+ int net = 0;
+ int ss = samples;
+ while (samples) {
+ int nbytes = samples << SHIFT;
+ int wbytes = AUD_write (s->voice,
+ s->mixbuf + (s->pos << (SHIFT - 1)),
+ nbytes);
+ int wsampl = wbytes >> SHIFT;
+ samples -= wsampl;
+ s->pos = (s->pos + wsampl) % s->samples;
+ net += wsampl;
+ if (!wbytes)
+ break;
+ }
+ if (net > ss) {
+ dolog ("WARNING: net > ss\n");
+ }
+ return net;
+}
+
+static void timer (void *opaque)
+{
+ AdlibState *s = opaque;
+ int elapsed, samples, net = 0;
+
+ if (s->refcount)
+ dolog ("refcount=%d\n", s->refcount);
+
+ s->refcount += 1;
+ if (!(s->active && s->enabled))
+ goto reset;
+
+ AUD_run ();
+
+ while (s->left) {
+ int written = write_audio (s, s->left);
+ net += written;
+ if (!written)
+ goto reset2;
+ s->left -= written;
+ }
+ s->pos = 0;
+
+ elapsed = AUD_calc_elapsed (s->voice);
+ if (!elapsed)
+ goto reset2;
+
+ /* elapsed = AUD_get_free (s->voice); */
+ samples = elapsed >> SHIFT;
+ if (!samples)
+ goto reset2;
+
+ samples = audio_MIN (samples, s->samples - s->pos);
+ if (s->left)
+ dolog ("left=%d samples=%d elapsed=%d free=%d\n",
+ s->left, samples, elapsed, AUD_get_free (s->voice));
+
+ if (!samples)
+ goto reset2;
+
+#ifdef USE_YMF262
+ YMF262UpdateOneQEMU (0, s->mixbuf + s->pos * 2, samples);
+#else
+ YM3812UpdateOne (s->opl, s->mixbuf + s->pos, samples);
+#endif
+
+ while (samples) {
+ int written = write_audio (s, samples);
+ net += written;
+ if (!written)
+ break;
+ samples -= written;
+ }
+ if (!samples)
+ s->pos = 0;
+ s->left = samples;
+
+reset2:
+ AUD_adjust (s->voice, net << SHIFT);
+reset:
+ qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + ticks_per_sec / 1024);
+ s->refcount -= 1;
+}
+
+static void Adlib_fini (AdlibState *s)
+{
+#ifdef USE_YMF262
+ YMF262Shutdown ();
+#else
+ if (s->opl) {
+ OPLDestroy (s->opl);
+ s->opl = NULL;
+ }
+#endif
+
+ if (s->opl_ts)
+ qemu_free_timer (s->opl_ts);
+
+ if (s->ts)
+ qemu_free_timer (s->ts);
+
+#define maybe_free(p) if (p) qemu_free (p)
+ maybe_free (s->mixbuf);
+#undef maybe_free
+
+ s->active = 0;
+ s->enabled = 0;
+}
+
+void Adlib_init (void)
+{
+ AdlibState *s = &adlib;
+
+ memset (s, 0, sizeof (*s));
+
+#ifdef USE_YMF262
+ if (YMF262Init (1, 14318180, conf.freq)) {
+ dolog ("YMF262Init %d failed\n", conf.freq);
+ return;
+ }
+ else {
+ YMF262SetTimerHandler (0, YMF262TimerHandler, 0);
+ s->enabled = 1;
+ }
+#else
+ s->opl = OPLCreate (OPL_TYPE_YM3812, 3579545, conf.freq);
+ if (!s->opl) {
+ dolog ("OPLCreate %d failed\n", conf.freq);
+ return;
+ }
+ else {
+ OPLSetTimerHandler (s->opl, YMF262TimerHandler, 0);
+ s->enabled = 1;
+ }
+#endif
+
+ s->opl_ts = qemu_new_timer (vm_clock, OPL_timer, s);
+ if (!s->opl_ts) {
+ dolog ("Can not get timer for adlib emulation\n");
+ Adlib_fini (s);
+ return;
+ }
+
+ s->ts = qemu_new_timer (vm_clock, timer, s);
+ if (!s->opl_ts) {
+ dolog ("Can not get timer for adlib emulation\n");
+ Adlib_fini (s);
+ return;
+ }
+
+ s->voice = AUD_open (s->voice, "adlib", conf.freq, SHIFT, AUD_FMT_S16);
+ if (!s->voice) {
+ Adlib_fini (s);
+ return;
+ }
+
+ s->bytes_per_second = conf.freq << SHIFT;
+ s->samples = AUD_get_buffer_size (s->voice) >> SHIFT;
+ s->mixbuf = qemu_mallocz (s->samples << SHIFT);
+
+ if (!s->mixbuf) {
+ dolog ("not enough memory for adlib mixing buffer (%d)\n",
+ s->samples << SHIFT);
+ Adlib_fini (s);
+ return;
+ }
+ register_ioport_read (0x388, 4, 1, adlib_read, s);
+ register_ioport_write (0x388, 4, 1, adlib_write, s);
+
+ register_ioport_read (conf.port, 4, 1, adlib_read, s);
+ register_ioport_write (conf.port, 4, 1, adlib_write, s);
+
+ register_ioport_read (conf.port + 8, 2, 1, adlib_read, s);
+ register_ioport_write (conf.port + 8, 2, 1, adlib_write, s);
+
+ qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + 1);
+}
diff --git a/tools/ioemu/hw/cirrus_vga.c b/tools/ioemu/hw/cirrus_vga.c
new file mode 100644
index 0000000000..7c34c57899
--- /dev/null
+++ b/tools/ioemu/hw/cirrus_vga.c
@@ -0,0 +1,3115 @@
+/*
+ * QEMU Cirrus CLGD 54xx VGA Emulator.
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2004 Makoto Suzuki (suzu)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ * Reference: Finn Thogersons' VGADOC4b
+ * available at http://home.worldonline.dk/~finth/
+ */
+#include "vl.h"
+#include "vga_int.h"
+
+/*
+ * TODO:
+ * - add support for WRITEMASK (GR2F)
+ * - optimize linear mappings
+ * - optimize bitblt functions
+ */
+
+//#define DEBUG_CIRRUS
+//#define DEBUG_BITBLT
+
+/***************************************
+ *
+ * definitions
+ *
+ ***************************************/
+
+#define qemu_MIN(a,b) ((a) < (b) ? (a) : (b))
+
+// ID
+#define CIRRUS_ID_CLGD5422 (0x23<<2)
+#define CIRRUS_ID_CLGD5426 (0x24<<2)
+#define CIRRUS_ID_CLGD5424 (0x25<<2)
+#define CIRRUS_ID_CLGD5428 (0x26<<2)
+#define CIRRUS_ID_CLGD5430 (0x28<<2)
+#define CIRRUS_ID_CLGD5434 (0x2A<<2)
+#define CIRRUS_ID_CLGD5436 (0x2B<<2)
+#define CIRRUS_ID_CLGD5446 (0x2E<<2)
+
+// sequencer 0x07
+#define CIRRUS_SR7_BPP_VGA 0x00
+#define CIRRUS_SR7_BPP_SVGA 0x01
+#define CIRRUS_SR7_BPP_MASK 0x0e
+#define CIRRUS_SR7_BPP_8 0x00
+#define CIRRUS_SR7_BPP_16_DOUBLEVCLK 0x02
+#define CIRRUS_SR7_BPP_24 0x04
+#define CIRRUS_SR7_BPP_16 0x06
+#define CIRRUS_SR7_BPP_32 0x08
+#define CIRRUS_SR7_ISAADDR_MASK 0xe0
+
+// sequencer 0x0f
+#define CIRRUS_MEMSIZE_512k 0x08
+#define CIRRUS_MEMSIZE_1M 0x10
+#define CIRRUS_MEMSIZE_2M 0x18
+#define CIRRUS_MEMFLAGS_BANKSWITCH 0x80 // bank switching is enabled.
+
+// sequencer 0x12
+#define CIRRUS_CURSOR_SHOW 0x01
+#define CIRRUS_CURSOR_HIDDENPEL 0x02
+#define CIRRUS_CURSOR_LARGE 0x04 // 64x64 if set, 32x32 if clear
+
+// sequencer 0x17
+#define CIRRUS_BUSTYPE_VLBFAST 0x10
+#define CIRRUS_BUSTYPE_PCI 0x20
+#define CIRRUS_BUSTYPE_VLBSLOW 0x30
+#define CIRRUS_BUSTYPE_ISA 0x38
+#define CIRRUS_MMIO_ENABLE 0x04
+#define CIRRUS_MMIO_USE_PCIADDR 0x40 // 0xb8000 if cleared.
+#define CIRRUS_MEMSIZEEXT_DOUBLE 0x80
+
+// control 0x0b
+#define CIRRUS_BANKING_DUAL 0x01
+#define CIRRUS_BANKING_GRANULARITY_16K 0x20 // set:16k, clear:4k
+
+// control 0x30
+#define CIRRUS_BLTMODE_BACKWARDS 0x01
+#define CIRRUS_BLTMODE_MEMSYSDEST 0x02
+#define CIRRUS_BLTMODE_MEMSYSSRC 0x04
+#define CIRRUS_BLTMODE_TRANSPARENTCOMP 0x08
+#define CIRRUS_BLTMODE_PATTERNCOPY 0x40
+#define CIRRUS_BLTMODE_COLOREXPAND 0x80
+#define CIRRUS_BLTMODE_PIXELWIDTHMASK 0x30
+#define CIRRUS_BLTMODE_PIXELWIDTH8 0x00
+#define CIRRUS_BLTMODE_PIXELWIDTH16 0x10
+#define CIRRUS_BLTMODE_PIXELWIDTH24 0x20
+#define CIRRUS_BLTMODE_PIXELWIDTH32 0x30
+
+// control 0x31
+#define CIRRUS_BLT_BUSY 0x01
+#define CIRRUS_BLT_START 0x02
+#define CIRRUS_BLT_RESET 0x04
+#define CIRRUS_BLT_FIFOUSED 0x10
+#define CIRRUS_BLT_AUTOSTART 0x80
+
+// control 0x32
+#define CIRRUS_ROP_0 0x00
+#define CIRRUS_ROP_SRC_AND_DST 0x05
+#define CIRRUS_ROP_NOP 0x06
+#define CIRRUS_ROP_SRC_AND_NOTDST 0x09
+#define CIRRUS_ROP_NOTDST 0x0b
+#define CIRRUS_ROP_SRC 0x0d
+#define CIRRUS_ROP_1 0x0e
+#define CIRRUS_ROP_NOTSRC_AND_DST 0x50
+#define CIRRUS_ROP_SRC_XOR_DST 0x59
+#define CIRRUS_ROP_SRC_OR_DST 0x6d
+#define CIRRUS_ROP_NOTSRC_OR_NOTDST 0x90
+#define CIRRUS_ROP_SRC_NOTXOR_DST 0x95
+#define CIRRUS_ROP_SRC_OR_NOTDST 0xad
+#define CIRRUS_ROP_NOTSRC 0xd0
+#define CIRRUS_ROP_NOTSRC_OR_DST 0xd6
+#define CIRRUS_ROP_NOTSRC_AND_NOTDST 0xda
+
+#define CIRRUS_ROP_NOP_INDEX 2
+#define CIRRUS_ROP_SRC_INDEX 5
+
+// control 0x33
+#define CIRRUS_BLTMODEEXT_SOLIDFILL 0x04
+#define CIRRUS_BLTMODEEXT_COLOREXPINV 0x02
+#define CIRRUS_BLTMODEEXT_DWORDGRANULARITY 0x01
+
+// memory-mapped IO
+#define CIRRUS_MMIO_BLTBGCOLOR 0x00 // dword
+#define CIRRUS_MMIO_BLTFGCOLOR 0x04 // dword
+#define CIRRUS_MMIO_BLTWIDTH 0x08 // word
+#define CIRRUS_MMIO_BLTHEIGHT 0x0a // word
+#define CIRRUS_MMIO_BLTDESTPITCH 0x0c // word
+#define CIRRUS_MMIO_BLTSRCPITCH 0x0e // word
+#define CIRRUS_MMIO_BLTDESTADDR 0x10 // dword
+#define CIRRUS_MMIO_BLTSRCADDR 0x14 // dword
+#define CIRRUS_MMIO_BLTWRITEMASK 0x17 // byte
+#define CIRRUS_MMIO_BLTMODE 0x18 // byte
+#define CIRRUS_MMIO_BLTROP 0x1a // byte
+#define CIRRUS_MMIO_BLTMODEEXT 0x1b // byte
+#define CIRRUS_MMIO_BLTTRANSPARENTCOLOR 0x1c // word?
+#define CIRRUS_MMIO_BLTTRANSPARENTCOLORMASK 0x20 // word?
+#define CIRRUS_MMIO_LINEARDRAW_START_X 0x24 // word
+#define CIRRUS_MMIO_LINEARDRAW_START_Y 0x26 // word
+#define CIRRUS_MMIO_LINEARDRAW_END_X 0x28 // word
+#define CIRRUS_MMIO_LINEARDRAW_END_Y 0x2a // word
+#define CIRRUS_MMIO_LINEARDRAW_LINESTYLE_INC 0x2c // byte
+#define CIRRUS_MMIO_LINEARDRAW_LINESTYLE_ROLLOVER 0x2d // byte
+#define CIRRUS_MMIO_LINEARDRAW_LINESTYLE_MASK 0x2e // byte
+#define CIRRUS_MMIO_LINEARDRAW_LINESTYLE_ACCUM 0x2f // byte
+#define CIRRUS_MMIO_BRESENHAM_K1 0x30 // word
+#define CIRRUS_MMIO_BRESENHAM_K3 0x32 // word
+#define CIRRUS_MMIO_BRESENHAM_ERROR 0x34 // word
+#define CIRRUS_MMIO_BRESENHAM_DELTA_MAJOR 0x36 // word
+#define CIRRUS_MMIO_BRESENHAM_DIRECTION 0x38 // byte
+#define CIRRUS_MMIO_LINEDRAW_MODE 0x39 // byte
+#define CIRRUS_MMIO_BLTSTATUS 0x40 // byte
+
+// PCI 0x00: vendor, 0x02: device
+#define PCI_VENDOR_CIRRUS 0x1013
+#define PCI_DEVICE_CLGD5462 0x00d0
+#define PCI_DEVICE_CLGD5465 0x00d6
+
+// PCI 0x04: command(word), 0x06(word): status
+#define PCI_COMMAND_IOACCESS 0x0001
+#define PCI_COMMAND_MEMACCESS 0x0002
+#define PCI_COMMAND_BUSMASTER 0x0004
+#define PCI_COMMAND_SPECIALCYCLE 0x0008
+#define PCI_COMMAND_MEMWRITEINVALID 0x0010
+#define PCI_COMMAND_PALETTESNOOPING 0x0020
+#define PCI_COMMAND_PARITYDETECTION 0x0040
+#define PCI_COMMAND_ADDRESSDATASTEPPING 0x0080
+#define PCI_COMMAND_SERR 0x0100
+#define PCI_COMMAND_BACKTOBACKTRANS 0x0200
+// PCI 0x08, 0xff000000 (0x09-0x0b:class,0x08:rev)
+#define PCI_CLASS_BASE_DISPLAY 0x03
+// PCI 0x08, 0x00ff0000
+#define PCI_CLASS_SUB_VGA 0x00
+// PCI 0x0c, 0x00ff0000 (0x0c:cacheline,0x0d:latency,0x0e:headertype,0x0f:Built-in self test)
+#define PCI_CLASS_HEADERTYPE_00h 0x00
+// 0x10-0x3f (headertype 00h)
+// PCI 0x10,0x14,0x18,0x1c,0x20,0x24: base address mapping registers
+// 0x10: MEMBASE, 0x14: IOBASE(hard-coded in XFree86 3.x)
+#define PCI_MAP_MEM 0x0
+#define PCI_MAP_IO 0x1
+#define PCI_MAP_MEM_ADDR_MASK (~0xf)
+#define PCI_MAP_IO_ADDR_MASK (~0x3)
+#define PCI_MAP_MEMFLAGS_32BIT 0x0
+#define PCI_MAP_MEMFLAGS_32BIT_1M 0x1
+#define PCI_MAP_MEMFLAGS_64BIT 0x4
+#define PCI_MAP_MEMFLAGS_CACHEABLE 0x8
+// PCI 0x28: cardbus CIS pointer
+// PCI 0x2c: subsystem vendor id, 0x2e: subsystem id
+// PCI 0x30: expansion ROM base address
+#define PCI_ROMBIOS_ENABLED 0x1
+// PCI 0x34: 0xffffff00=reserved, 0x000000ff=capabilities pointer
+// PCI 0x38: reserved
+// PCI 0x3c: 0x3c=int-line, 0x3d=int-pin, 0x3e=min-gnt, 0x3f=maax-lat
+
+#define CIRRUS_PNPMMIO_SIZE 0x1000
+
+
+/* I/O and memory hook */
+#define CIRRUS_HOOK_NOT_HANDLED 0
+#define CIRRUS_HOOK_HANDLED 1
+
+struct CirrusVGAState;
+typedef void (*cirrus_bitblt_rop_t) (struct CirrusVGAState *s,
+ uint8_t * dst, const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight);
+typedef void (*cirrus_fill_t)(struct CirrusVGAState *s,
+ uint8_t *dst, int dst_pitch, int width, int height);
+
+typedef struct CirrusVGAState {
+ VGA_STATE_COMMON
+
+ int cirrus_linear_io_addr;
+ int cirrus_linear_bitblt_io_addr;
+ int cirrus_mmio_io_addr;
+ uint32_t cirrus_addr_mask;
+ uint32_t linear_mmio_mask;
+ uint8_t cirrus_shadow_gr0;
+ uint8_t cirrus_shadow_gr1;
+ uint8_t cirrus_hidden_dac_lockindex;
+ uint8_t cirrus_hidden_dac_data;
+ uint32_t cirrus_bank_base[2];
+ uint32_t cirrus_bank_limit[2];
+ uint8_t cirrus_hidden_palette[48];
+ uint32_t hw_cursor_x;
+ uint32_t hw_cursor_y;
+ int cirrus_blt_pixelwidth;
+ int cirrus_blt_width;
+ int cirrus_blt_height;
+ int cirrus_blt_dstpitch;
+ int cirrus_blt_srcpitch;
+ uint32_t cirrus_blt_fgcol;
+ uint32_t cirrus_blt_bgcol;
+ uint32_t cirrus_blt_dstaddr;
+ uint32_t cirrus_blt_srcaddr;
+ uint8_t cirrus_blt_mode;
+ uint8_t cirrus_blt_modeext;
+ cirrus_bitblt_rop_t cirrus_rop;
+#define CIRRUS_BLTBUFSIZE (2048 * 4) /* one line width */
+ uint8_t cirrus_bltbuf[CIRRUS_BLTBUFSIZE];
+ uint8_t *cirrus_srcptr;
+ uint8_t *cirrus_srcptr_end;
+ uint32_t cirrus_srccounter;
+ /* hwcursor display state */
+ int last_hw_cursor_size;
+ int last_hw_cursor_x;
+ int last_hw_cursor_y;
+ int last_hw_cursor_y_start;
+ int last_hw_cursor_y_end;
+ int real_vram_size; /* XXX: suppress that */
+ CPUWriteMemoryFunc **cirrus_linear_write;
+} CirrusVGAState;
+
+typedef struct PCICirrusVGAState {
+ PCIDevice dev;
+ CirrusVGAState cirrus_vga;
+} PCICirrusVGAState;
+
+static uint8_t rop_to_index[256];
+
+/***************************************
+ *
+ * prototypes.
+ *
+ ***************************************/
+
+
+static void cirrus_bitblt_reset(CirrusVGAState *s);
+static void cirrus_update_memory_access(CirrusVGAState *s);
+
+/***************************************
+ *
+ * raster operations
+ *
+ ***************************************/
+
+static void cirrus_bitblt_rop_nop(CirrusVGAState *s,
+ uint8_t *dst,const uint8_t *src,
+ int dstpitch,int srcpitch,
+ int bltwidth,int bltheight)
+{
+}
+
+static void cirrus_bitblt_fill_nop(CirrusVGAState *s,
+ uint8_t *dst,
+ int dstpitch, int bltwidth,int bltheight)
+{
+}
+
+#define ROP_NAME 0
+#define ROP_OP(d, s) d = 0
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_and_dst
+#define ROP_OP(d, s) d = (s) & (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_and_notdst
+#define ROP_OP(d, s) d = (s) & (~(d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notdst
+#define ROP_OP(d, s) d = ~(d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src
+#define ROP_OP(d, s) d = s
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME 1
+#define ROP_OP(d, s) d = ~0
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_and_dst
+#define ROP_OP(d, s) d = (~(s)) & (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_xor_dst
+#define ROP_OP(d, s) d = (s) ^ (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_or_dst
+#define ROP_OP(d, s) d = (s) | (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_or_notdst
+#define ROP_OP(d, s) d = (~(s)) | (~(d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_notxor_dst
+#define ROP_OP(d, s) d = ~((s) ^ (d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME src_or_notdst
+#define ROP_OP(d, s) d = (s) | (~(d))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc
+#define ROP_OP(d, s) d = (~(s))
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_or_dst
+#define ROP_OP(d, s) d = (~(s)) | (d)
+#include "cirrus_vga_rop.h"
+
+#define ROP_NAME notsrc_and_notdst
+#define ROP_OP(d, s) d = (~(s)) & (~(d))
+#include "cirrus_vga_rop.h"
+
+static const cirrus_bitblt_rop_t cirrus_fwd_rop[16] = {
+ cirrus_bitblt_rop_fwd_0,
+ cirrus_bitblt_rop_fwd_src_and_dst,
+ cirrus_bitblt_rop_nop,
+ cirrus_bitblt_rop_fwd_src_and_notdst,
+ cirrus_bitblt_rop_fwd_notdst,
+ cirrus_bitblt_rop_fwd_src,
+ cirrus_bitblt_rop_fwd_1,
+ cirrus_bitblt_rop_fwd_notsrc_and_dst,
+ cirrus_bitblt_rop_fwd_src_xor_dst,
+ cirrus_bitblt_rop_fwd_src_or_dst,
+ cirrus_bitblt_rop_fwd_notsrc_or_notdst,
+ cirrus_bitblt_rop_fwd_src_notxor_dst,
+ cirrus_bitblt_rop_fwd_src_or_notdst,
+ cirrus_bitblt_rop_fwd_notsrc,
+ cirrus_bitblt_rop_fwd_notsrc_or_dst,
+ cirrus_bitblt_rop_fwd_notsrc_and_notdst,
+};
+
+static const cirrus_bitblt_rop_t cirrus_bkwd_rop[16] = {
+ cirrus_bitblt_rop_bkwd_0,
+ cirrus_bitblt_rop_bkwd_src_and_dst,
+ cirrus_bitblt_rop_nop,
+ cirrus_bitblt_rop_bkwd_src_and_notdst,
+ cirrus_bitblt_rop_bkwd_notdst,
+ cirrus_bitblt_rop_bkwd_src,
+ cirrus_bitblt_rop_bkwd_1,
+ cirrus_bitblt_rop_bkwd_notsrc_and_dst,
+ cirrus_bitblt_rop_bkwd_src_xor_dst,
+ cirrus_bitblt_rop_bkwd_src_or_dst,
+ cirrus_bitblt_rop_bkwd_notsrc_or_notdst,
+ cirrus_bitblt_rop_bkwd_src_notxor_dst,
+ cirrus_bitblt_rop_bkwd_src_or_notdst,
+ cirrus_bitblt_rop_bkwd_notsrc,
+ cirrus_bitblt_rop_bkwd_notsrc_or_dst,
+ cirrus_bitblt_rop_bkwd_notsrc_and_notdst,
+};
+
+#define ROP2(name) {\
+ name ## _8,\
+ name ## _16,\
+ name ## _24,\
+ name ## _32,\
+ }
+
+#define ROP_NOP2(func) {\
+ func,\
+ func,\
+ func,\
+ func,\
+ }
+
+static const cirrus_bitblt_rop_t cirrus_patternfill[16][4] = {
+ ROP2(cirrus_patternfill_0),
+ ROP2(cirrus_patternfill_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_rop_nop),
+ ROP2(cirrus_patternfill_src_and_notdst),
+ ROP2(cirrus_patternfill_notdst),
+ ROP2(cirrus_patternfill_src),
+ ROP2(cirrus_patternfill_1),
+ ROP2(cirrus_patternfill_notsrc_and_dst),
+ ROP2(cirrus_patternfill_src_xor_dst),
+ ROP2(cirrus_patternfill_src_or_dst),
+ ROP2(cirrus_patternfill_notsrc_or_notdst),
+ ROP2(cirrus_patternfill_src_notxor_dst),
+ ROP2(cirrus_patternfill_src_or_notdst),
+ ROP2(cirrus_patternfill_notsrc),
+ ROP2(cirrus_patternfill_notsrc_or_dst),
+ ROP2(cirrus_patternfill_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_transp[16][4] = {
+ ROP2(cirrus_colorexpand_transp_0),
+ ROP2(cirrus_colorexpand_transp_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_rop_nop),
+ ROP2(cirrus_colorexpand_transp_src_and_notdst),
+ ROP2(cirrus_colorexpand_transp_notdst),
+ ROP2(cirrus_colorexpand_transp_src),
+ ROP2(cirrus_colorexpand_transp_1),
+ ROP2(cirrus_colorexpand_transp_notsrc_and_dst),
+ ROP2(cirrus_colorexpand_transp_src_xor_dst),
+ ROP2(cirrus_colorexpand_transp_src_or_dst),
+ ROP2(cirrus_colorexpand_transp_notsrc_or_notdst),
+ ROP2(cirrus_colorexpand_transp_src_notxor_dst),
+ ROP2(cirrus_colorexpand_transp_src_or_notdst),
+ ROP2(cirrus_colorexpand_transp_notsrc),
+ ROP2(cirrus_colorexpand_transp_notsrc_or_dst),
+ ROP2(cirrus_colorexpand_transp_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand[16][4] = {
+ ROP2(cirrus_colorexpand_0),
+ ROP2(cirrus_colorexpand_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_rop_nop),
+ ROP2(cirrus_colorexpand_src_and_notdst),
+ ROP2(cirrus_colorexpand_notdst),
+ ROP2(cirrus_colorexpand_src),
+ ROP2(cirrus_colorexpand_1),
+ ROP2(cirrus_colorexpand_notsrc_and_dst),
+ ROP2(cirrus_colorexpand_src_xor_dst),
+ ROP2(cirrus_colorexpand_src_or_dst),
+ ROP2(cirrus_colorexpand_notsrc_or_notdst),
+ ROP2(cirrus_colorexpand_src_notxor_dst),
+ ROP2(cirrus_colorexpand_src_or_notdst),
+ ROP2(cirrus_colorexpand_notsrc),
+ ROP2(cirrus_colorexpand_notsrc_or_dst),
+ ROP2(cirrus_colorexpand_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_pattern_transp[16][4] = {
+ ROP2(cirrus_colorexpand_pattern_transp_0),
+ ROP2(cirrus_colorexpand_pattern_transp_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_rop_nop),
+ ROP2(cirrus_colorexpand_pattern_transp_src_and_notdst),
+ ROP2(cirrus_colorexpand_pattern_transp_notdst),
+ ROP2(cirrus_colorexpand_pattern_transp_src),
+ ROP2(cirrus_colorexpand_pattern_transp_1),
+ ROP2(cirrus_colorexpand_pattern_transp_notsrc_and_dst),
+ ROP2(cirrus_colorexpand_pattern_transp_src_xor_dst),
+ ROP2(cirrus_colorexpand_pattern_transp_src_or_dst),
+ ROP2(cirrus_colorexpand_pattern_transp_notsrc_or_notdst),
+ ROP2(cirrus_colorexpand_pattern_transp_src_notxor_dst),
+ ROP2(cirrus_colorexpand_pattern_transp_src_or_notdst),
+ ROP2(cirrus_colorexpand_pattern_transp_notsrc),
+ ROP2(cirrus_colorexpand_pattern_transp_notsrc_or_dst),
+ ROP2(cirrus_colorexpand_pattern_transp_notsrc_and_notdst),
+};
+
+static const cirrus_bitblt_rop_t cirrus_colorexpand_pattern[16][4] = {
+ ROP2(cirrus_colorexpand_pattern_0),
+ ROP2(cirrus_colorexpand_pattern_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_rop_nop),
+ ROP2(cirrus_colorexpand_pattern_src_and_notdst),
+ ROP2(cirrus_colorexpand_pattern_notdst),
+ ROP2(cirrus_colorexpand_pattern_src),
+ ROP2(cirrus_colorexpand_pattern_1),
+ ROP2(cirrus_colorexpand_pattern_notsrc_and_dst),
+ ROP2(cirrus_colorexpand_pattern_src_xor_dst),
+ ROP2(cirrus_colorexpand_pattern_src_or_dst),
+ ROP2(cirrus_colorexpand_pattern_notsrc_or_notdst),
+ ROP2(cirrus_colorexpand_pattern_src_notxor_dst),
+ ROP2(cirrus_colorexpand_pattern_src_or_notdst),
+ ROP2(cirrus_colorexpand_pattern_notsrc),
+ ROP2(cirrus_colorexpand_pattern_notsrc_or_dst),
+ ROP2(cirrus_colorexpand_pattern_notsrc_and_notdst),
+};
+
+static const cirrus_fill_t cirrus_fill[16][4] = {
+ ROP2(cirrus_fill_0),
+ ROP2(cirrus_fill_src_and_dst),
+ ROP_NOP2(cirrus_bitblt_fill_nop),
+ ROP2(cirrus_fill_src_and_notdst),
+ ROP2(cirrus_fill_notdst),
+ ROP2(cirrus_fill_src),
+ ROP2(cirrus_fill_1),
+ ROP2(cirrus_fill_notsrc_and_dst),
+ ROP2(cirrus_fill_src_xor_dst),
+ ROP2(cirrus_fill_src_or_dst),
+ ROP2(cirrus_fill_notsrc_or_notdst),
+ ROP2(cirrus_fill_src_notxor_dst),
+ ROP2(cirrus_fill_src_or_notdst),
+ ROP2(cirrus_fill_notsrc),
+ ROP2(cirrus_fill_notsrc_or_dst),
+ ROP2(cirrus_fill_notsrc_and_notdst),
+};
+
+static inline void cirrus_bitblt_fgcol(CirrusVGAState *s)
+{
+ unsigned int color;
+ switch (s->cirrus_blt_pixelwidth) {
+ case 1:
+ s->cirrus_blt_fgcol = s->cirrus_shadow_gr1;
+ break;
+ case 2:
+ color = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8);
+ s->cirrus_blt_fgcol = le16_to_cpu(color);
+ break;
+ case 3:
+ s->cirrus_blt_fgcol = s->cirrus_shadow_gr1 |
+ (s->gr[0x11] << 8) | (s->gr[0x13] << 16);
+ break;
+ default:
+ case 4:
+ color = s->cirrus_shadow_gr1 | (s->gr[0x11] << 8) |
+ (s->gr[0x13] << 16) | (s->gr[0x15] << 24);
+ s->cirrus_blt_fgcol = le32_to_cpu(color);
+ break;
+ }
+}
+
+static inline void cirrus_bitblt_bgcol(CirrusVGAState *s)
+{
+ unsigned int color;
+ switch (s->cirrus_blt_pixelwidth) {
+ case 1:
+ s->cirrus_blt_bgcol = s->cirrus_shadow_gr0;
+ break;
+ case 2:
+ color = s->cirrus_shadow_gr0 | (s->gr[0x10] << 8);
+ s->cirrus_blt_bgcol = le16_to_cpu(color);
+ break;
+ case 3:
+ s->cirrus_blt_bgcol = s->cirrus_shadow_gr0 |
+ (s->gr[0x10] << 8) | (s->gr[0x12] << 16);
+ break;
+ default:
+ case 4:
+ color = s->cirrus_shadow_gr0 | (s->gr[0x10] << 8) |
+ (s->gr[0x12] << 16) | (s->gr[0x14] << 24);
+ s->cirrus_blt_bgcol = le32_to_cpu(color);
+ break;
+ }
+}
+
+static void cirrus_invalidate_region(CirrusVGAState * s, int off_begin,
+ int off_pitch, int bytesperline,
+ int lines)
+{
+ int y;
+ int off_cur;
+ int off_cur_end;
+
+ for (y = 0; y < lines; y++) {
+ off_cur = off_begin;
+ off_cur_end = off_cur + bytesperline;
+ off_cur &= TARGET_PAGE_MASK;
+ while (off_cur < off_cur_end) {
+ cpu_physical_memory_set_dirty(s->vram_offset + off_cur);
+ off_cur += TARGET_PAGE_SIZE;
+ }
+ off_begin += off_pitch;
+ }
+}
+
+static int cirrus_bitblt_common_patterncopy(CirrusVGAState * s,
+ const uint8_t * src)
+{
+ uint8_t *dst;
+
+ dst = s->vram_ptr + s->cirrus_blt_dstaddr;
+ (*s->cirrus_rop) (s, dst, src,
+ s->cirrus_blt_dstpitch, 0,
+ s->cirrus_blt_width, s->cirrus_blt_height);
+ cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
+ s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+ s->cirrus_blt_height);
+ return 1;
+}
+
+/* fill */
+
+static int cirrus_bitblt_solidfill(CirrusVGAState *s, int blt_rop)
+{
+ cirrus_fill_t rop_func;
+
+ rop_func = cirrus_fill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ rop_func(s, s->vram_ptr + s->cirrus_blt_dstaddr,
+ s->cirrus_blt_dstpitch,
+ s->cirrus_blt_width, s->cirrus_blt_height);
+ cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
+ s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+ s->cirrus_blt_height);
+ cirrus_bitblt_reset(s);
+ return 1;
+}
+
+/***************************************
+ *
+ * bitblt (video-to-video)
+ *
+ ***************************************/
+
+static int cirrus_bitblt_videotovideo_patterncopy(CirrusVGAState * s)
+{
+ return cirrus_bitblt_common_patterncopy(s,
+ s->vram_ptr +
+ (s->cirrus_blt_srcaddr & ~7));
+}
+
+static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
+{
+ (*s->cirrus_rop) (s, s->vram_ptr + s->cirrus_blt_dstaddr,
+ s->vram_ptr + s->cirrus_blt_srcaddr,
+ s->cirrus_blt_dstpitch, s->cirrus_blt_srcpitch,
+ s->cirrus_blt_width, s->cirrus_blt_height);
+ cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
+ s->cirrus_blt_dstpitch, s->cirrus_blt_width,
+ s->cirrus_blt_height);
+ return 1;
+}
+
+/***************************************
+ *
+ * bitblt (cpu-to-video)
+ *
+ ***************************************/
+
+static void cirrus_bitblt_cputovideo_next(CirrusVGAState * s)
+{
+ int copy_count;
+ uint8_t *end_ptr;
+
+ if (s->cirrus_srccounter > 0) {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+ cirrus_bitblt_common_patterncopy(s, s->cirrus_bltbuf);
+ the_end:
+ s->cirrus_srccounter = 0;
+ cirrus_bitblt_reset(s);
+ } else {
+ /* at least one scan line */
+ do {
+ (*s->cirrus_rop)(s, s->vram_ptr + s->cirrus_blt_dstaddr,
+ s->cirrus_bltbuf, 0, 0, s->cirrus_blt_width, 1);
+ cirrus_invalidate_region(s, s->cirrus_blt_dstaddr, 0,
+ s->cirrus_blt_width, 1);
+ s->cirrus_blt_dstaddr += s->cirrus_blt_dstpitch;
+ s->cirrus_srccounter -= s->cirrus_blt_srcpitch;
+ if (s->cirrus_srccounter <= 0)
+ goto the_end;
+ /* more bytes than needed can be transfered because of
+ word alignment, so we keep them for the next line */
+ /* XXX: keep alignment to speed up transfer */
+ end_ptr = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+ copy_count = s->cirrus_srcptr_end - end_ptr;
+ memmove(s->cirrus_bltbuf, end_ptr, copy_count);
+ s->cirrus_srcptr = s->cirrus_bltbuf + copy_count;
+ s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+ } while (s->cirrus_srcptr >= s->cirrus_srcptr_end);
+ }
+ }
+}
+
+/***************************************
+ *
+ * bitblt wrapper
+ *
+ ***************************************/
+
+static void cirrus_bitblt_reset(CirrusVGAState * s)
+{
+ s->gr[0x31] &=
+ ~(CIRRUS_BLT_START | CIRRUS_BLT_BUSY | CIRRUS_BLT_FIFOUSED);
+ s->cirrus_srcptr = &s->cirrus_bltbuf[0];
+ s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
+ s->cirrus_srccounter = 0;
+ cirrus_update_memory_access(s);
+}
+
+static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
+{
+ int w;
+
+ s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
+ s->cirrus_srcptr = &s->cirrus_bltbuf[0];
+ s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
+
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
+ s->cirrus_blt_srcpitch = 8;
+ } else {
+ /* XXX: check for 24 bpp */
+ s->cirrus_blt_srcpitch = 8 * 8 * s->cirrus_blt_pixelwidth;
+ }
+ s->cirrus_srccounter = s->cirrus_blt_srcpitch;
+ } else {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
+ w = s->cirrus_blt_width / s->cirrus_blt_pixelwidth;
+ if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_DWORDGRANULARITY)
+ s->cirrus_blt_srcpitch = ((w + 31) >> 5);
+ else
+ s->cirrus_blt_srcpitch = ((w + 7) >> 3);
+ } else {
+ s->cirrus_blt_srcpitch = s->cirrus_blt_width;
+ }
+ s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
+ }
+ s->cirrus_srcptr = s->cirrus_bltbuf;
+ s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
+ cirrus_update_memory_access(s);
+ return 1;
+}
+
+static int cirrus_bitblt_videotocpu(CirrusVGAState * s)
+{
+ /* XXX */
+#ifdef DEBUG_BITBLT
+ printf("cirrus: bitblt (video to cpu) is not implemented yet\n");
+#endif
+ return 0;
+}
+
+static int cirrus_bitblt_videotovideo(CirrusVGAState * s)
+{
+ int ret;
+
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+ ret = cirrus_bitblt_videotovideo_patterncopy(s);
+ } else {
+ ret = cirrus_bitblt_videotovideo_copy(s);
+ }
+ if (ret)
+ cirrus_bitblt_reset(s);
+ return ret;
+}
+
+static void cirrus_bitblt_start(CirrusVGAState * s)
+{
+ uint8_t blt_rop;
+
+ s->gr[0x31] |= CIRRUS_BLT_BUSY;
+
+ s->cirrus_blt_width = (s->gr[0x20] | (s->gr[0x21] << 8)) + 1;
+ s->cirrus_blt_height = (s->gr[0x22] | (s->gr[0x23] << 8)) + 1;
+ s->cirrus_blt_dstpitch = (s->gr[0x24] | (s->gr[0x25] << 8));
+ s->cirrus_blt_srcpitch = (s->gr[0x26] | (s->gr[0x27] << 8));
+ s->cirrus_blt_dstaddr =
+ (s->gr[0x28] | (s->gr[0x29] << 8) | (s->gr[0x2a] << 16));
+ s->cirrus_blt_srcaddr =
+ (s->gr[0x2c] | (s->gr[0x2d] << 8) | (s->gr[0x2e] << 16));
+ s->cirrus_blt_mode = s->gr[0x30];
+ s->cirrus_blt_modeext = s->gr[0x33];
+ blt_rop = s->gr[0x32];
+
+#ifdef DEBUG_BITBLT
+ printf("rop=0x%02x mode=0x%02x modeext=0x%02x w=%d h=%d dpitch=%d spicth=%d daddr=0x%08x saddr=0x%08x writemask=0x%02x\n",
+ blt_rop,
+ s->cirrus_blt_mode,
+ s->cirrus_blt_modeext,
+ s->cirrus_blt_width,
+ s->cirrus_blt_height,
+ s->cirrus_blt_dstpitch,
+ s->cirrus_blt_srcpitch,
+ s->cirrus_blt_dstaddr,
+ s->cirrus_blt_srcaddr,
+ s->sr[0x2f]);
+#endif
+
+ switch (s->cirrus_blt_mode & CIRRUS_BLTMODE_PIXELWIDTHMASK) {
+ case CIRRUS_BLTMODE_PIXELWIDTH8:
+ s->cirrus_blt_pixelwidth = 1;
+ break;
+ case CIRRUS_BLTMODE_PIXELWIDTH16:
+ s->cirrus_blt_pixelwidth = 2;
+ break;
+ case CIRRUS_BLTMODE_PIXELWIDTH24:
+ s->cirrus_blt_pixelwidth = 3;
+ break;
+ case CIRRUS_BLTMODE_PIXELWIDTH32:
+ s->cirrus_blt_pixelwidth = 4;
+ break;
+ default:
+#ifdef DEBUG_BITBLT
+ printf("cirrus: bitblt - pixel width is unknown\n");
+#endif
+ goto bitblt_ignore;
+ }
+ s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_PIXELWIDTHMASK;
+
+ if ((s->
+ cirrus_blt_mode & (CIRRUS_BLTMODE_MEMSYSSRC |
+ CIRRUS_BLTMODE_MEMSYSDEST))
+ == (CIRRUS_BLTMODE_MEMSYSSRC | CIRRUS_BLTMODE_MEMSYSDEST)) {
+#ifdef DEBUG_BITBLT
+ printf("cirrus: bitblt - memory-to-memory copy is requested\n");
+#endif
+ goto bitblt_ignore;
+ }
+
+ if ((s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_SOLIDFILL) &&
+ (s->cirrus_blt_mode & (CIRRUS_BLTMODE_MEMSYSDEST |
+ CIRRUS_BLTMODE_TRANSPARENTCOMP |
+ CIRRUS_BLTMODE_PATTERNCOPY |
+ CIRRUS_BLTMODE_COLOREXPAND)) ==
+ (CIRRUS_BLTMODE_PATTERNCOPY | CIRRUS_BLTMODE_COLOREXPAND)) {
+ cirrus_bitblt_fgcol(s);
+ cirrus_bitblt_solidfill(s, blt_rop);
+ } else {
+ if ((s->cirrus_blt_mode & (CIRRUS_BLTMODE_COLOREXPAND |
+ CIRRUS_BLTMODE_PATTERNCOPY)) ==
+ CIRRUS_BLTMODE_COLOREXPAND) {
+
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_TRANSPARENTCOMP) {
+ if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV)
+ cirrus_bitblt_bgcol(s);
+ else
+ cirrus_bitblt_fgcol(s);
+ s->cirrus_rop = cirrus_colorexpand_transp[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ } else {
+ cirrus_bitblt_fgcol(s);
+ cirrus_bitblt_bgcol(s);
+ s->cirrus_rop = cirrus_colorexpand[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ }
+ } else if (s->cirrus_blt_mode & CIRRUS_BLTMODE_PATTERNCOPY) {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_COLOREXPAND) {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_TRANSPARENTCOMP) {
+ if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV)
+ cirrus_bitblt_bgcol(s);
+ else
+ cirrus_bitblt_fgcol(s);
+ s->cirrus_rop = cirrus_colorexpand_pattern_transp[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ } else {
+ cirrus_bitblt_fgcol(s);
+ cirrus_bitblt_bgcol(s);
+ s->cirrus_rop = cirrus_colorexpand_pattern[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ }
+ } else {
+ s->cirrus_rop = cirrus_patternfill[rop_to_index[blt_rop]][s->cirrus_blt_pixelwidth - 1];
+ }
+ } else {
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_BACKWARDS) {
+ s->cirrus_blt_dstpitch = -s->cirrus_blt_dstpitch;
+ s->cirrus_blt_srcpitch = -s->cirrus_blt_srcpitch;
+ s->cirrus_rop = cirrus_bkwd_rop[rop_to_index[blt_rop]];
+ } else {
+ s->cirrus_rop = cirrus_fwd_rop[rop_to_index[blt_rop]];
+ }
+ }
+
+ // setup bitblt engine.
+ if (s->cirrus_blt_mode & CIRRUS_BLTMODE_MEMSYSSRC) {
+ if (!cirrus_bitblt_cputovideo(s))
+ goto bitblt_ignore;
+ } else if (s->cirrus_blt_mode & CIRRUS_BLTMODE_MEMSYSDEST) {
+ if (!cirrus_bitblt_videotocpu(s))
+ goto bitblt_ignore;
+ } else {
+ if (!cirrus_bitblt_videotovideo(s))
+ goto bitblt_ignore;
+ }
+ }
+ return;
+ bitblt_ignore:;
+ cirrus_bitblt_reset(s);
+}
+
+static void cirrus_write_bitblt(CirrusVGAState * s, unsigned reg_value)
+{
+ unsigned old_value;
+
+ old_value = s->gr[0x31];
+ s->gr[0x31] = reg_value;
+
+ if (((old_value & CIRRUS_BLT_RESET) != 0) &&
+ ((reg_value & CIRRUS_BLT_RESET) == 0)) {
+ cirrus_bitblt_reset(s);
+ } else if (((old_value & CIRRUS_BLT_START) == 0) &&
+ ((reg_value & CIRRUS_BLT_START) != 0)) {
+ cirrus_bitblt_start(s);
+ }
+}
+
+
+/***************************************
+ *
+ * basic parameters
+ *
+ ***************************************/
+
+static void cirrus_get_offsets(VGAState *s1,
+ uint32_t *pline_offset,
+ uint32_t *pstart_addr)
+{
+ CirrusVGAState * s = (CirrusVGAState *)s1;
+ uint32_t start_addr;
+ uint32_t line_offset;
+
+ line_offset = s->cr[0x13]
+ | ((s->cr[0x1b] & 0x10) << 4);
+ line_offset <<= 3;
+ *pline_offset = line_offset;
+
+ start_addr = (s->cr[0x0c] << 8)
+ | s->cr[0x0d]
+ | ((s->cr[0x1b] & 0x01) << 16)
+ | ((s->cr[0x1b] & 0x0c) << 15)
+ | ((s->cr[0x1d] & 0x80) << 12);
+ *pstart_addr = start_addr;
+}
+
+static uint32_t cirrus_get_bpp16_depth(CirrusVGAState * s)
+{
+ uint32_t ret = 16;
+
+ switch (s->cirrus_hidden_dac_data & 0xf) {
+ case 0:
+ ret = 15;
+ break; /* Sierra HiColor */
+ case 1:
+ ret = 16;
+ break; /* XGA HiColor */
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: invalid DAC value %x in 16bpp\n",
+ (s->cirrus_hidden_dac_data & 0xf));
+#endif
+ ret = 15; /* XXX */
+ break;
+ }
+ return ret;
+}
+
+static int cirrus_get_bpp(VGAState *s1)
+{
+ CirrusVGAState * s = (CirrusVGAState *)s1;
+ uint32_t ret = 8;
+
+ if ((s->sr[0x07] & 0x01) != 0) {
+ /* Cirrus SVGA */
+ switch (s->sr[0x07] & CIRRUS_SR7_BPP_MASK) {
+ case CIRRUS_SR7_BPP_8:
+ ret = 8;
+ break;
+ case CIRRUS_SR7_BPP_16_DOUBLEVCLK:
+ ret = cirrus_get_bpp16_depth(s);
+ break;
+ case CIRRUS_SR7_BPP_24:
+ ret = 24;
+ break;
+ case CIRRUS_SR7_BPP_16:
+ ret = cirrus_get_bpp16_depth(s);
+ break;
+ case CIRRUS_SR7_BPP_32:
+ ret = 32;
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: unknown bpp - sr7=%x\n", s->sr[0x7]);
+#endif
+ ret = 8;
+ break;
+ }
+ } else {
+ /* VGA */
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static void cirrus_get_resolution(VGAState *s, int *pwidth, int *pheight)
+{
+ int width, height;
+
+ width = (s->cr[0x01] + 1) * 8;
+ height = s->cr[0x12] |
+ ((s->cr[0x07] & 0x02) << 7) |
+ ((s->cr[0x07] & 0x40) << 3);
+ height = (height + 1);
+ /* interlace support */
+ if (s->cr[0x1a] & 0x01)
+ height = height * 2;
+ *pwidth = width;
+ *pheight = height;
+}
+
+/***************************************
+ *
+ * bank memory
+ *
+ ***************************************/
+
+static void cirrus_update_bank_ptr(CirrusVGAState * s, unsigned bank_index)
+{
+ unsigned offset;
+ unsigned limit;
+
+ if ((s->gr[0x0b] & 0x01) != 0) /* dual bank */
+ offset = s->gr[0x09 + bank_index];
+ else /* single bank */
+ offset = s->gr[0x09];
+
+ if ((s->gr[0x0b] & 0x20) != 0)
+ offset <<= 14;
+ else
+ offset <<= 12;
+
+ if (s->vram_size <= offset)
+ limit = 0;
+ else
+ limit = s->vram_size - offset;
+
+ if (((s->gr[0x0b] & 0x01) == 0) && (bank_index != 0)) {
+ if (limit > 0x8000) {
+ offset += 0x8000;
+ limit -= 0x8000;
+ } else {
+ limit = 0;
+ }
+ }
+
+ if (limit > 0) {
+ s->cirrus_bank_base[bank_index] = offset;
+ s->cirrus_bank_limit[bank_index] = limit;
+ } else {
+ s->cirrus_bank_base[bank_index] = 0;
+ s->cirrus_bank_limit[bank_index] = 0;
+ }
+}
+
+/***************************************
+ *
+ * I/O access between 0x3c4-0x3c5
+ *
+ ***************************************/
+
+static int
+cirrus_hook_read_sr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
+{
+ switch (reg_index) {
+ case 0x00: // Standard VGA
+ case 0x01: // Standard VGA
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x06: // Unlock Cirrus extensions
+ *reg_value = s->sr[reg_index];
+ break;
+ case 0x10:
+ case 0x30:
+ case 0x50:
+ case 0x70: // Graphics Cursor X
+ case 0x90:
+ case 0xb0:
+ case 0xd0:
+ case 0xf0: // Graphics Cursor X
+ *reg_value = s->sr[0x10];
+ break;
+ case 0x11:
+ case 0x31:
+ case 0x51:
+ case 0x71: // Graphics Cursor Y
+ case 0x91:
+ case 0xb1:
+ case 0xd1:
+ case 0xf1: // Graphics Cursor Y
+ *reg_value = s->sr[0x11];
+ break;
+ case 0x05: // ???
+ case 0x07: // Extended Sequencer Mode
+ case 0x08: // EEPROM Control
+ case 0x09: // Scratch Register 0
+ case 0x0a: // Scratch Register 1
+ case 0x0b: // VCLK 0
+ case 0x0c: // VCLK 1
+ case 0x0d: // VCLK 2
+ case 0x0e: // VCLK 3
+ case 0x0f: // DRAM Control
+ case 0x12: // Graphics Cursor Attribute
+ case 0x13: // Graphics Cursor Pattern Address
+ case 0x14: // Scratch Register 2
+ case 0x15: // Scratch Register 3
+ case 0x16: // Performance Tuning Register
+ case 0x17: // Configuration Readback and Extended Control
+ case 0x18: // Signature Generator Control
+ case 0x19: // Signal Generator Result
+ case 0x1a: // Signal Generator Result
+ case 0x1b: // VCLK 0 Denominator & Post
+ case 0x1c: // VCLK 1 Denominator & Post
+ case 0x1d: // VCLK 2 Denominator & Post
+ case 0x1e: // VCLK 3 Denominator & Post
+ case 0x1f: // BIOS Write Enable and MCLK select
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: handled inport sr_index %02x\n", reg_index);
+#endif
+ *reg_value = s->sr[reg_index];
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: inport sr_index %02x\n", reg_index);
+#endif
+ *reg_value = 0xff;
+ break;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+static int
+cirrus_hook_write_sr(CirrusVGAState * s, unsigned reg_index, int reg_value)
+{
+ switch (reg_index) {
+ case 0x00: // Standard VGA
+ case 0x01: // Standard VGA
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x06: // Unlock Cirrus extensions
+ reg_value &= 0x17;
+ if (reg_value == 0x12) {
+ s->sr[reg_index] = 0x12;
+ } else {
+ s->sr[reg_index] = 0x0f;
+ }
+ break;
+ case 0x10:
+ case 0x30:
+ case 0x50:
+ case 0x70: // Graphics Cursor X
+ case 0x90:
+ case 0xb0:
+ case 0xd0:
+ case 0xf0: // Graphics Cursor X
+ s->sr[0x10] = reg_value;
+ s->hw_cursor_x = (reg_value << 3) | (reg_index >> 5);
+ break;
+ case 0x11:
+ case 0x31:
+ case 0x51:
+ case 0x71: // Graphics Cursor Y
+ case 0x91:
+ case 0xb1:
+ case 0xd1:
+ case 0xf1: // Graphics Cursor Y
+ s->sr[0x11] = reg_value;
+ s->hw_cursor_y = (reg_value << 3) | (reg_index >> 5);
+ break;
+ case 0x07: // Extended Sequencer Mode
+ case 0x08: // EEPROM Control
+ case 0x09: // Scratch Register 0
+ case 0x0a: // Scratch Register 1
+ case 0x0b: // VCLK 0
+ case 0x0c: // VCLK 1
+ case 0x0d: // VCLK 2
+ case 0x0e: // VCLK 3
+ case 0x0f: // DRAM Control
+ case 0x12: // Graphics Cursor Attribute
+ case 0x13: // Graphics Cursor Pattern Address
+ case 0x14: // Scratch Register 2
+ case 0x15: // Scratch Register 3
+ case 0x16: // Performance Tuning Register
+ case 0x18: // Signature Generator Control
+ case 0x19: // Signature Generator Result
+ case 0x1a: // Signature Generator Result
+ case 0x1b: // VCLK 0 Denominator & Post
+ case 0x1c: // VCLK 1 Denominator & Post
+ case 0x1d: // VCLK 2 Denominator & Post
+ case 0x1e: // VCLK 3 Denominator & Post
+ case 0x1f: // BIOS Write Enable and MCLK select
+ s->sr[reg_index] = reg_value;
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: handled outport sr_index %02x, sr_value %02x\n",
+ reg_index, reg_value);
+#endif
+ break;
+ case 0x17: // Configuration Readback and Extended Control
+ s->sr[reg_index] = reg_value;
+ cirrus_update_memory_access(s);
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: outport sr_index %02x, sr_value %02x\n", reg_index,
+ reg_value);
+#endif
+ break;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+/***************************************
+ *
+ * I/O access at 0x3c6
+ *
+ ***************************************/
+
+static void cirrus_read_hidden_dac(CirrusVGAState * s, int *reg_value)
+{
+ *reg_value = 0xff;
+ if (++s->cirrus_hidden_dac_lockindex == 5) {
+ *reg_value = s->cirrus_hidden_dac_data;
+ s->cirrus_hidden_dac_lockindex = 0;
+ }
+}
+
+static void cirrus_write_hidden_dac(CirrusVGAState * s, int reg_value)
+{
+ if (s->cirrus_hidden_dac_lockindex == 4) {
+ s->cirrus_hidden_dac_data = reg_value;
+#if defined(DEBUG_CIRRUS)
+ printf("cirrus: outport hidden DAC, value %02x\n", reg_value);
+#endif
+ }
+ s->cirrus_hidden_dac_lockindex = 0;
+}
+
+/***************************************
+ *
+ * I/O access at 0x3c9
+ *
+ ***************************************/
+
+static int cirrus_hook_read_palette(CirrusVGAState * s, int *reg_value)
+{
+ if (!(s->sr[0x12] & CIRRUS_CURSOR_HIDDENPEL))
+ return CIRRUS_HOOK_NOT_HANDLED;
+ *reg_value =
+ s->cirrus_hidden_palette[(s->dac_read_index & 0x0f) * 3 +
+ s->dac_sub_index];
+ if (++s->dac_sub_index == 3) {
+ s->dac_sub_index = 0;
+ s->dac_read_index++;
+ }
+ return CIRRUS_HOOK_HANDLED;
+}
+
+static int cirrus_hook_write_palette(CirrusVGAState * s, int reg_value)
+{
+ if (!(s->sr[0x12] & CIRRUS_CURSOR_HIDDENPEL))
+ return CIRRUS_HOOK_NOT_HANDLED;
+ s->dac_cache[s->dac_sub_index] = reg_value;
+ if (++s->dac_sub_index == 3) {
+ memcpy(&s->cirrus_hidden_palette[(s->dac_write_index & 0x0f) * 3],
+ s->dac_cache, 3);
+ /* XXX update cursor */
+ s->dac_sub_index = 0;
+ s->dac_write_index++;
+ }
+ return CIRRUS_HOOK_HANDLED;
+}
+
+/***************************************
+ *
+ * I/O access between 0x3ce-0x3cf
+ *
+ ***************************************/
+
+static int
+cirrus_hook_read_gr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
+{
+ switch (reg_index) {
+ case 0x00: // Standard VGA, BGCOLOR 0x000000ff
+ *reg_value = s->cirrus_shadow_gr0;
+ return CIRRUS_HOOK_HANDLED;
+ case 0x01: // Standard VGA, FGCOLOR 0x000000ff
+ *reg_value = s->cirrus_shadow_gr1;
+ return CIRRUS_HOOK_HANDLED;
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ case 0x06: // Standard VGA
+ case 0x07: // Standard VGA
+ case 0x08: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x05: // Standard VGA, Cirrus extended mode
+ default:
+ break;
+ }
+
+ if (reg_index < 0x3a) {
+ *reg_value = s->gr[reg_index];
+ } else {
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: inport gr_index %02x\n", reg_index);
+#endif
+ *reg_value = 0xff;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+static int
+cirrus_hook_write_gr(CirrusVGAState * s, unsigned reg_index, int reg_value)
+{
+#if defined(DEBUG_BITBLT) && 0
+ printf("gr%02x: %02x\n", reg_index, reg_value);
+#endif
+ switch (reg_index) {
+ case 0x00: // Standard VGA, BGCOLOR 0x000000ff
+ s->cirrus_shadow_gr0 = reg_value;
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x01: // Standard VGA, FGCOLOR 0x000000ff
+ s->cirrus_shadow_gr1 = reg_value;
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ case 0x06: // Standard VGA
+ case 0x07: // Standard VGA
+ case 0x08: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x05: // Standard VGA, Cirrus extended mode
+ s->gr[reg_index] = reg_value & 0x7f;
+ cirrus_update_memory_access(s);
+ break;
+ case 0x09: // bank offset #0
+ case 0x0A: // bank offset #1
+ s->gr[reg_index] = reg_value;
+ cirrus_update_bank_ptr(s, 0);
+ cirrus_update_bank_ptr(s, 1);
+ break;
+ case 0x0B:
+ s->gr[reg_index] = reg_value;
+ cirrus_update_bank_ptr(s, 0);
+ cirrus_update_bank_ptr(s, 1);
+ cirrus_update_memory_access(s);
+ break;
+ case 0x10: // BGCOLOR 0x0000ff00
+ case 0x11: // FGCOLOR 0x0000ff00
+ case 0x12: // BGCOLOR 0x00ff0000
+ case 0x13: // FGCOLOR 0x00ff0000
+ case 0x14: // BGCOLOR 0xff000000
+ case 0x15: // FGCOLOR 0xff000000
+ case 0x20: // BLT WIDTH 0x0000ff
+ case 0x22: // BLT HEIGHT 0x0000ff
+ case 0x24: // BLT DEST PITCH 0x0000ff
+ case 0x26: // BLT SRC PITCH 0x0000ff
+ case 0x28: // BLT DEST ADDR 0x0000ff
+ case 0x29: // BLT DEST ADDR 0x00ff00
+ case 0x2c: // BLT SRC ADDR 0x0000ff
+ case 0x2d: // BLT SRC ADDR 0x00ff00
+ case 0x2f: // BLT WRITEMASK
+ case 0x30: // BLT MODE
+ case 0x32: // RASTER OP
+ case 0x33: // BLT MODEEXT
+ case 0x34: // BLT TRANSPARENT COLOR 0x00ff
+ case 0x35: // BLT TRANSPARENT COLOR 0xff00
+ case 0x38: // BLT TRANSPARENT COLOR MASK 0x00ff
+ case 0x39: // BLT TRANSPARENT COLOR MASK 0xff00
+ s->gr[reg_index] = reg_value;
+ break;
+ case 0x21: // BLT WIDTH 0x001f00
+ case 0x23: // BLT HEIGHT 0x001f00
+ case 0x25: // BLT DEST PITCH 0x001f00
+ case 0x27: // BLT SRC PITCH 0x001f00
+ s->gr[reg_index] = reg_value & 0x1f;
+ break;
+ case 0x2a: // BLT DEST ADDR 0x3f0000
+ s->gr[reg_index] = reg_value & 0x3f;
+ /* if auto start mode, starts bit blt now */
+ if (s->gr[0x31] & CIRRUS_BLT_AUTOSTART) {
+ cirrus_bitblt_start(s);
+ }
+ break;
+ case 0x2e: // BLT SRC ADDR 0x3f0000
+ s->gr[reg_index] = reg_value & 0x3f;
+ break;
+ case 0x31: // BLT STATUS/START
+ cirrus_write_bitblt(s, reg_value);
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: outport gr_index %02x, gr_value %02x\n", reg_index,
+ reg_value);
+#endif
+ break;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+/***************************************
+ *
+ * I/O access between 0x3d4-0x3d5
+ *
+ ***************************************/
+
+static int
+cirrus_hook_read_cr(CirrusVGAState * s, unsigned reg_index, int *reg_value)
+{
+ switch (reg_index) {
+ case 0x00: // Standard VGA
+ case 0x01: // Standard VGA
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ case 0x05: // Standard VGA
+ case 0x06: // Standard VGA
+ case 0x07: // Standard VGA
+ case 0x08: // Standard VGA
+ case 0x09: // Standard VGA
+ case 0x0a: // Standard VGA
+ case 0x0b: // Standard VGA
+ case 0x0c: // Standard VGA
+ case 0x0d: // Standard VGA
+ case 0x0e: // Standard VGA
+ case 0x0f: // Standard VGA
+ case 0x10: // Standard VGA
+ case 0x11: // Standard VGA
+ case 0x12: // Standard VGA
+ case 0x13: // Standard VGA
+ case 0x14: // Standard VGA
+ case 0x15: // Standard VGA
+ case 0x16: // Standard VGA
+ case 0x17: // Standard VGA
+ case 0x18: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x19: // Interlace End
+ case 0x1a: // Miscellaneous Control
+ case 0x1b: // Extended Display Control
+ case 0x1c: // Sync Adjust and Genlock
+ case 0x1d: // Overlay Extended Control
+ case 0x22: // Graphics Data Latches Readback (R)
+ case 0x24: // Attribute Controller Toggle Readback (R)
+ case 0x25: // Part Status
+ case 0x27: // Part ID (R)
+ *reg_value = s->cr[reg_index];
+ break;
+ case 0x26: // Attribute Controller Index Readback (R)
+ *reg_value = s->ar_index & 0x3f;
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: inport cr_index %02x\n", reg_index);
+ *reg_value = 0xff;
+#endif
+ break;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+static int
+cirrus_hook_write_cr(CirrusVGAState * s, unsigned reg_index, int reg_value)
+{
+ switch (reg_index) {
+ case 0x00: // Standard VGA
+ case 0x01: // Standard VGA
+ case 0x02: // Standard VGA
+ case 0x03: // Standard VGA
+ case 0x04: // Standard VGA
+ case 0x05: // Standard VGA
+ case 0x06: // Standard VGA
+ case 0x07: // Standard VGA
+ case 0x08: // Standard VGA
+ case 0x09: // Standard VGA
+ case 0x0a: // Standard VGA
+ case 0x0b: // Standard VGA
+ case 0x0c: // Standard VGA
+ case 0x0d: // Standard VGA
+ case 0x0e: // Standard VGA
+ case 0x0f: // Standard VGA
+ case 0x10: // Standard VGA
+ case 0x11: // Standard VGA
+ case 0x12: // Standard VGA
+ case 0x13: // Standard VGA
+ case 0x14: // Standard VGA
+ case 0x15: // Standard VGA
+ case 0x16: // Standard VGA
+ case 0x17: // Standard VGA
+ case 0x18: // Standard VGA
+ return CIRRUS_HOOK_NOT_HANDLED;
+ case 0x19: // Interlace End
+ case 0x1a: // Miscellaneous Control
+ case 0x1b: // Extended Display Control
+ case 0x1c: // Sync Adjust and Genlock
+ case 0x1d: // Overlay Extended Control
+ s->cr[reg_index] = reg_value;
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: handled outport cr_index %02x, cr_value %02x\n",
+ reg_index, reg_value);
+#endif
+ break;
+ case 0x22: // Graphics Data Latches Readback (R)
+ case 0x24: // Attribute Controller Toggle Readback (R)
+ case 0x26: // Attribute Controller Index Readback (R)
+ case 0x27: // Part ID (R)
+ break;
+ case 0x25: // Part Status
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: outport cr_index %02x, cr_value %02x\n", reg_index,
+ reg_value);
+#endif
+ break;
+ }
+
+ return CIRRUS_HOOK_HANDLED;
+}
+
+/***************************************
+ *
+ * memory-mapped I/O (bitblt)
+ *
+ ***************************************/
+
+static uint8_t cirrus_mmio_blt_read(CirrusVGAState * s, unsigned address)
+{
+ int value = 0xff;
+
+ switch (address) {
+ case (CIRRUS_MMIO_BLTBGCOLOR + 0):
+ cirrus_hook_read_gr(s, 0x00, &value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 1):
+ cirrus_hook_read_gr(s, 0x10, &value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 2):
+ cirrus_hook_read_gr(s, 0x12, &value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 3):
+ cirrus_hook_read_gr(s, 0x14, &value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 0):
+ cirrus_hook_read_gr(s, 0x01, &value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 1):
+ cirrus_hook_read_gr(s, 0x11, &value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 2):
+ cirrus_hook_read_gr(s, 0x13, &value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 3):
+ cirrus_hook_read_gr(s, 0x15, &value);
+ break;
+ case (CIRRUS_MMIO_BLTWIDTH + 0):
+ cirrus_hook_read_gr(s, 0x20, &value);
+ break;
+ case (CIRRUS_MMIO_BLTWIDTH + 1):
+ cirrus_hook_read_gr(s, 0x21, &value);
+ break;
+ case (CIRRUS_MMIO_BLTHEIGHT + 0):
+ cirrus_hook_read_gr(s, 0x22, &value);
+ break;
+ case (CIRRUS_MMIO_BLTHEIGHT + 1):
+ cirrus_hook_read_gr(s, 0x23, &value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTPITCH + 0):
+ cirrus_hook_read_gr(s, 0x24, &value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTPITCH + 1):
+ cirrus_hook_read_gr(s, 0x25, &value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCPITCH + 0):
+ cirrus_hook_read_gr(s, 0x26, &value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCPITCH + 1):
+ cirrus_hook_read_gr(s, 0x27, &value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 0):
+ cirrus_hook_read_gr(s, 0x28, &value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 1):
+ cirrus_hook_read_gr(s, 0x29, &value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 2):
+ cirrus_hook_read_gr(s, 0x2a, &value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 0):
+ cirrus_hook_read_gr(s, 0x2c, &value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 1):
+ cirrus_hook_read_gr(s, 0x2d, &value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 2):
+ cirrus_hook_read_gr(s, 0x2e, &value);
+ break;
+ case CIRRUS_MMIO_BLTWRITEMASK:
+ cirrus_hook_read_gr(s, 0x2f, &value);
+ break;
+ case CIRRUS_MMIO_BLTMODE:
+ cirrus_hook_read_gr(s, 0x30, &value);
+ break;
+ case CIRRUS_MMIO_BLTROP:
+ cirrus_hook_read_gr(s, 0x32, &value);
+ break;
+ case CIRRUS_MMIO_BLTMODEEXT:
+ cirrus_hook_read_gr(s, 0x33, &value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLOR + 0):
+ cirrus_hook_read_gr(s, 0x34, &value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLOR + 1):
+ cirrus_hook_read_gr(s, 0x35, &value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLORMASK + 0):
+ cirrus_hook_read_gr(s, 0x38, &value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLORMASK + 1):
+ cirrus_hook_read_gr(s, 0x39, &value);
+ break;
+ case CIRRUS_MMIO_BLTSTATUS:
+ cirrus_hook_read_gr(s, 0x31, &value);
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: mmio read - address 0x%04x\n", address);
+#endif
+ break;
+ }
+
+ return (uint8_t) value;
+}
+
+static void cirrus_mmio_blt_write(CirrusVGAState * s, unsigned address,
+ uint8_t value)
+{
+ switch (address) {
+ case (CIRRUS_MMIO_BLTBGCOLOR + 0):
+ cirrus_hook_write_gr(s, 0x00, value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 1):
+ cirrus_hook_write_gr(s, 0x10, value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 2):
+ cirrus_hook_write_gr(s, 0x12, value);
+ break;
+ case (CIRRUS_MMIO_BLTBGCOLOR + 3):
+ cirrus_hook_write_gr(s, 0x14, value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 0):
+ cirrus_hook_write_gr(s, 0x01, value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 1):
+ cirrus_hook_write_gr(s, 0x11, value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 2):
+ cirrus_hook_write_gr(s, 0x13, value);
+ break;
+ case (CIRRUS_MMIO_BLTFGCOLOR + 3):
+ cirrus_hook_write_gr(s, 0x15, value);
+ break;
+ case (CIRRUS_MMIO_BLTWIDTH + 0):
+ cirrus_hook_write_gr(s, 0x20, value);
+ break;
+ case (CIRRUS_MMIO_BLTWIDTH + 1):
+ cirrus_hook_write_gr(s, 0x21, value);
+ break;
+ case (CIRRUS_MMIO_BLTHEIGHT + 0):
+ cirrus_hook_write_gr(s, 0x22, value);
+ break;
+ case (CIRRUS_MMIO_BLTHEIGHT + 1):
+ cirrus_hook_write_gr(s, 0x23, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTPITCH + 0):
+ cirrus_hook_write_gr(s, 0x24, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTPITCH + 1):
+ cirrus_hook_write_gr(s, 0x25, value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCPITCH + 0):
+ cirrus_hook_write_gr(s, 0x26, value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCPITCH + 1):
+ cirrus_hook_write_gr(s, 0x27, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 0):
+ cirrus_hook_write_gr(s, 0x28, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 1):
+ cirrus_hook_write_gr(s, 0x29, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 2):
+ cirrus_hook_write_gr(s, 0x2a, value);
+ break;
+ case (CIRRUS_MMIO_BLTDESTADDR + 3):
+ /* ignored */
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 0):
+ cirrus_hook_write_gr(s, 0x2c, value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 1):
+ cirrus_hook_write_gr(s, 0x2d, value);
+ break;
+ case (CIRRUS_MMIO_BLTSRCADDR + 2):
+ cirrus_hook_write_gr(s, 0x2e, value);
+ break;
+ case CIRRUS_MMIO_BLTWRITEMASK:
+ cirrus_hook_write_gr(s, 0x2f, value);
+ break;
+ case CIRRUS_MMIO_BLTMODE:
+ cirrus_hook_write_gr(s, 0x30, value);
+ break;
+ case CIRRUS_MMIO_BLTROP:
+ cirrus_hook_write_gr(s, 0x32, value);
+ break;
+ case CIRRUS_MMIO_BLTMODEEXT:
+ cirrus_hook_write_gr(s, 0x33, value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLOR + 0):
+ cirrus_hook_write_gr(s, 0x34, value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLOR + 1):
+ cirrus_hook_write_gr(s, 0x35, value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLORMASK + 0):
+ cirrus_hook_write_gr(s, 0x38, value);
+ break;
+ case (CIRRUS_MMIO_BLTTRANSPARENTCOLORMASK + 1):
+ cirrus_hook_write_gr(s, 0x39, value);
+ break;
+ case CIRRUS_MMIO_BLTSTATUS:
+ cirrus_hook_write_gr(s, 0x31, value);
+ break;
+ default:
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: mmio write - addr 0x%04x val 0x%02x (ignored)\n",
+ address, value);
+#endif
+ break;
+ }
+}
+
+/***************************************
+ *
+ * write mode 4/5
+ *
+ * assume TARGET_PAGE_SIZE >= 16
+ *
+ ***************************************/
+
+static void cirrus_mem_writeb_mode4and5_8bpp(CirrusVGAState * s,
+ unsigned mode,
+ unsigned offset,
+ uint32_t mem_value)
+{
+ int x;
+ unsigned val = mem_value;
+ uint8_t *dst;
+
+ dst = s->vram_ptr + offset;
+ for (x = 0; x < 8; x++) {
+ if (val & 0x80) {
+ *dst++ = s->cirrus_shadow_gr1;
+ } else if (mode == 5) {
+ *dst++ = s->cirrus_shadow_gr0;
+ }
+ val <<= 1;
+ }
+ cpu_physical_memory_set_dirty(s->vram_offset + offset);
+ cpu_physical_memory_set_dirty(s->vram_offset + offset + 7);
+}
+
+static void cirrus_mem_writeb_mode4and5_16bpp(CirrusVGAState * s,
+ unsigned mode,
+ unsigned offset,
+ uint32_t mem_value)
+{
+ int x;
+ unsigned val = mem_value;
+ uint8_t *dst;
+
+ dst = s->vram_ptr + offset;
+ for (x = 0; x < 8; x++) {
+ if (val & 0x80) {
+ *dst++ = s->cirrus_shadow_gr1;
+ *dst++ = s->gr[0x11];
+ } else if (mode == 5) {
+ *dst++ = s->cirrus_shadow_gr0;
+ *dst++ = s->gr[0x10];
+ }
+ val <<= 1;
+ }
+ cpu_physical_memory_set_dirty(s->vram_offset + offset);
+ cpu_physical_memory_set_dirty(s->vram_offset + offset + 15);
+}
+
+/***************************************
+ *
+ * memory access between 0xa0000-0xbffff
+ *
+ ***************************************/
+
+static uint32_t cirrus_vga_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+ CirrusVGAState *s = opaque;
+ unsigned bank_index;
+ unsigned bank_offset;
+ uint32_t val;
+
+ if ((s->sr[0x07] & 0x01) == 0) {
+ return vga_mem_readb(s, addr);
+ }
+
+ addr &= 0x1ffff;
+
+ if (addr < 0x10000) {
+ /* XXX handle bitblt */
+ /* video memory */
+ bank_index = addr >> 15;
+ bank_offset = addr & 0x7fff;
+ if (bank_offset < s->cirrus_bank_limit[bank_index]) {
+ bank_offset += s->cirrus_bank_base[bank_index];
+ if ((s->gr[0x0B] & 0x14) == 0x14) {
+ bank_offset <<= 4;
+ } else if (s->gr[0x0B] & 0x02) {
+ bank_offset <<= 3;
+ }
+ bank_offset &= s->cirrus_addr_mask;
+ val = *(s->vram_ptr + bank_offset);
+ } else
+ val = 0xff;
+ } else if (addr >= 0x18000 && addr < 0x18100) {
+ /* memory-mapped I/O */
+ val = 0xff;
+ if ((s->sr[0x17] & 0x44) == 0x04) {
+ val = cirrus_mmio_blt_read(s, addr & 0xff);
+ }
+ } else {
+ val = 0xff;
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: mem_readb %06x\n", addr);
+#endif
+ }
+ return val;
+}
+
+static uint32_t cirrus_vga_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_vga_mem_readb(opaque, addr) << 8;
+ v |= cirrus_vga_mem_readb(opaque, addr + 1);
+#else
+ v = cirrus_vga_mem_readb(opaque, addr);
+ v |= cirrus_vga_mem_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t cirrus_vga_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_vga_mem_readb(opaque, addr) << 24;
+ v |= cirrus_vga_mem_readb(opaque, addr + 1) << 16;
+ v |= cirrus_vga_mem_readb(opaque, addr + 2) << 8;
+ v |= cirrus_vga_mem_readb(opaque, addr + 3);
+#else
+ v = cirrus_vga_mem_readb(opaque, addr);
+ v |= cirrus_vga_mem_readb(opaque, addr + 1) << 8;
+ v |= cirrus_vga_mem_readb(opaque, addr + 2) << 16;
+ v |= cirrus_vga_mem_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+static void cirrus_vga_mem_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t mem_value)
+{
+ CirrusVGAState *s = opaque;
+ unsigned bank_index;
+ unsigned bank_offset;
+ unsigned mode;
+
+ if ((s->sr[0x07] & 0x01) == 0) {
+ vga_mem_writeb(s, addr, mem_value);
+ return;
+ }
+
+ addr &= 0x1ffff;
+
+ if (addr < 0x10000) {
+ if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+ /* bitblt */
+ *s->cirrus_srcptr++ = (uint8_t) mem_value;
+ if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
+ cirrus_bitblt_cputovideo_next(s);
+ }
+ } else {
+ /* video memory */
+ bank_index = addr >> 15;
+ bank_offset = addr & 0x7fff;
+ if (bank_offset < s->cirrus_bank_limit[bank_index]) {
+ bank_offset += s->cirrus_bank_base[bank_index];
+ if ((s->gr[0x0B] & 0x14) == 0x14) {
+ bank_offset <<= 4;
+ } else if (s->gr[0x0B] & 0x02) {
+ bank_offset <<= 3;
+ }
+ bank_offset &= s->cirrus_addr_mask;
+ mode = s->gr[0x05] & 0x7;
+ if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+ *(s->vram_ptr + bank_offset) = mem_value;
+ cpu_physical_memory_set_dirty(s->vram_offset +
+ bank_offset);
+ } else {
+ if ((s->gr[0x0B] & 0x14) != 0x14) {
+ cirrus_mem_writeb_mode4and5_8bpp(s, mode,
+ bank_offset,
+ mem_value);
+ } else {
+ cirrus_mem_writeb_mode4and5_16bpp(s, mode,
+ bank_offset,
+ mem_value);
+ }
+ }
+ }
+ }
+ } else if (addr >= 0x18000 && addr < 0x18100) {
+ /* memory-mapped I/O */
+ if ((s->sr[0x17] & 0x44) == 0x04) {
+ cirrus_mmio_blt_write(s, addr & 0xff, mem_value);
+ }
+ } else {
+#ifdef DEBUG_CIRRUS
+ printf("cirrus: mem_writeb %06x value %02x\n", addr, mem_value);
+#endif
+ }
+}
+
+static void cirrus_vga_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_vga_mem_writeb(opaque, addr, (val >> 8) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 1, val & 0xff);
+#else
+ cirrus_vga_mem_writeb(opaque, addr, val & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void cirrus_vga_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_vga_mem_writeb(opaque, addr, (val >> 24) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 3, val & 0xff);
+#else
+ cirrus_vga_mem_writeb(opaque, addr, val & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ cirrus_vga_mem_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+static CPUReadMemoryFunc *cirrus_vga_mem_read[3] = {
+ cirrus_vga_mem_readb,
+ cirrus_vga_mem_readw,
+ cirrus_vga_mem_readl,
+};
+
+static CPUWriteMemoryFunc *cirrus_vga_mem_write[3] = {
+ cirrus_vga_mem_writeb,
+ cirrus_vga_mem_writew,
+ cirrus_vga_mem_writel,
+};
+
+/***************************************
+ *
+ * hardware cursor
+ *
+ ***************************************/
+
+static inline void invalidate_cursor1(CirrusVGAState *s)
+{
+ if (s->last_hw_cursor_size) {
+ vga_invalidate_scanlines((VGAState *)s,
+ s->last_hw_cursor_y + s->last_hw_cursor_y_start,
+ s->last_hw_cursor_y + s->last_hw_cursor_y_end);
+ }
+}
+
+static inline void cirrus_cursor_compute_yrange(CirrusVGAState *s)
+{
+ const uint8_t *src;
+ uint32_t content;
+ int y, y_min, y_max;
+
+ src = s->vram_ptr + s->real_vram_size - 16 * 1024;
+ if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+ src += (s->sr[0x13] & 0x3c) * 256;
+ y_min = 64;
+ y_max = -1;
+ for(y = 0; y < 64; y++) {
+ content = ((uint32_t *)src)[0] |
+ ((uint32_t *)src)[1] |
+ ((uint32_t *)src)[2] |
+ ((uint32_t *)src)[3];
+ if (content) {
+ if (y < y_min)
+ y_min = y;
+ if (y > y_max)
+ y_max = y;
+ }
+ src += 16;
+ }
+ } else {
+ src += (s->sr[0x13] & 0x3f) * 256;
+ y_min = 32;
+ y_max = -1;
+ for(y = 0; y < 32; y++) {
+ content = ((uint32_t *)src)[0] |
+ ((uint32_t *)(src + 128))[0];
+ if (content) {
+ if (y < y_min)
+ y_min = y;
+ if (y > y_max)
+ y_max = y;
+ }
+ src += 4;
+ }
+ }
+ if (y_min > y_max) {
+ s->last_hw_cursor_y_start = 0;
+ s->last_hw_cursor_y_end = 0;
+ } else {
+ s->last_hw_cursor_y_start = y_min;
+ s->last_hw_cursor_y_end = y_max + 1;
+ }
+}
+
+/* NOTE: we do not currently handle the cursor bitmap change, so we
+ update the cursor only if it moves. */
+static void cirrus_cursor_invalidate(VGAState *s1)
+{
+ CirrusVGAState *s = (CirrusVGAState *)s1;
+ int size;
+
+ if (!s->sr[0x12] & CIRRUS_CURSOR_SHOW) {
+ size = 0;
+ } else {
+ if (s->sr[0x12] & CIRRUS_CURSOR_LARGE)
+ size = 64;
+ else
+ size = 32;
+ }
+ /* invalidate last cursor and new cursor if any change */
+ if (s->last_hw_cursor_size != size ||
+ s->last_hw_cursor_x != s->hw_cursor_x ||
+ s->last_hw_cursor_y != s->hw_cursor_y) {
+
+ invalidate_cursor1(s);
+
+ s->last_hw_cursor_size = size;
+ s->last_hw_cursor_x = s->hw_cursor_x;
+ s->last_hw_cursor_y = s->hw_cursor_y;
+ /* compute the real cursor min and max y */
+ cirrus_cursor_compute_yrange(s);
+ invalidate_cursor1(s);
+ }
+}
+
+static void cirrus_cursor_draw_line(VGAState *s1, uint8_t *d1, int scr_y)
+{
+ CirrusVGAState *s = (CirrusVGAState *)s1;
+ int w, h, bpp, x1, x2, poffset;
+ unsigned int color0, color1;
+ const uint8_t *palette, *src;
+ uint32_t content;
+
+ if (!(s->sr[0x12] & CIRRUS_CURSOR_SHOW))
+ return;
+ /* fast test to see if the cursor intersects with the scan line */
+ if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+ h = 64;
+ } else {
+ h = 32;
+ }
+ if (scr_y < s->hw_cursor_y ||
+ scr_y >= (s->hw_cursor_y + h))
+ return;
+
+ src = s->vram_ptr + s->real_vram_size - 16 * 1024;
+ if (s->sr[0x12] & CIRRUS_CURSOR_LARGE) {
+ src += (s->sr[0x13] & 0x3c) * 256;
+ src += (scr_y - s->hw_cursor_y) * 16;
+ poffset = 8;
+ content = ((uint32_t *)src)[0] |
+ ((uint32_t *)src)[1] |
+ ((uint32_t *)src)[2] |
+ ((uint32_t *)src)[3];
+ } else {
+ src += (s->sr[0x13] & 0x3f) * 256;
+ src += (scr_y - s->hw_cursor_y) * 4;
+ poffset = 128;
+ content = ((uint32_t *)src)[0] |
+ ((uint32_t *)(src + 128))[0];
+ }
+ /* if nothing to draw, no need to continue */
+ if (!content)
+ return;
+ w = h;
+
+ x1 = s->hw_cursor_x;
+ if (x1 >= s->last_scr_width)
+ return;
+ x2 = s->hw_cursor_x + w;
+ if (x2 > s->last_scr_width)
+ x2 = s->last_scr_width;
+ w = x2 - x1;
+ palette = s->cirrus_hidden_palette;
+ color0 = s->rgb_to_pixel(c6_to_8(palette[0x0 * 3]),
+ c6_to_8(palette[0x0 * 3 + 1]),
+ c6_to_8(palette[0x0 * 3 + 2]));
+ color1 = s->rgb_to_pixel(c6_to_8(palette[0xf * 3]),
+ c6_to_8(palette[0xf * 3 + 1]),
+ c6_to_8(palette[0xf * 3 + 2]));
+ bpp = ((s->ds->depth + 7) >> 3);
+ d1 += x1 * bpp;
+ switch(s->ds->depth) {
+ default:
+ break;
+ case 8:
+ vga_draw_cursor_line_8(d1, src, poffset, w, color0, color1, 0xff);
+ break;
+ case 15:
+ vga_draw_cursor_line_16(d1, src, poffset, w, color0, color1, 0x7fff);
+ break;
+ case 16:
+ vga_draw_cursor_line_16(d1, src, poffset, w, color0, color1, 0xffff);
+ break;
+ case 32:
+ vga_draw_cursor_line_32(d1, src, poffset, w, color0, color1, 0xffffff);
+ break;
+ }
+}
+
+/***************************************
+ *
+ * LFB memory access
+ *
+ ***************************************/
+
+static uint32_t cirrus_linear_readb(void *opaque, target_phys_addr_t addr)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+ uint32_t ret;
+
+ addr &= s->cirrus_addr_mask;
+
+ if (((s->sr[0x17] & 0x44) == 0x44) &&
+ ((addr & s->linear_mmio_mask) == s->linear_mmio_mask)) {
+ /* memory-mapped I/O */
+ ret = cirrus_mmio_blt_read(s, addr & 0xff);
+ } else if (0) {
+ /* XXX handle bitblt */
+ ret = 0xff;
+ } else {
+ /* video memory */
+ if ((s->gr[0x0B] & 0x14) == 0x14) {
+ addr <<= 4;
+ } else if (s->gr[0x0B] & 0x02) {
+ addr <<= 3;
+ }
+ addr &= s->cirrus_addr_mask;
+ ret = *(s->vram_ptr + addr);
+ }
+
+ return ret;
+}
+
+static uint32_t cirrus_linear_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_linear_readb(opaque, addr) << 8;
+ v |= cirrus_linear_readb(opaque, addr + 1);
+#else
+ v = cirrus_linear_readb(opaque, addr);
+ v |= cirrus_linear_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t cirrus_linear_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_linear_readb(opaque, addr) << 24;
+ v |= cirrus_linear_readb(opaque, addr + 1) << 16;
+ v |= cirrus_linear_readb(opaque, addr + 2) << 8;
+ v |= cirrus_linear_readb(opaque, addr + 3);
+#else
+ v = cirrus_linear_readb(opaque, addr);
+ v |= cirrus_linear_readb(opaque, addr + 1) << 8;
+ v |= cirrus_linear_readb(opaque, addr + 2) << 16;
+ v |= cirrus_linear_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+static void cirrus_linear_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+ unsigned mode;
+
+ addr &= s->cirrus_addr_mask;
+
+ if (((s->sr[0x17] & 0x44) == 0x44) &&
+ ((addr & s->linear_mmio_mask) == s->linear_mmio_mask)) {
+ /* memory-mapped I/O */
+ cirrus_mmio_blt_write(s, addr & 0xff, val);
+ } else if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+ /* bitblt */
+ *s->cirrus_srcptr++ = (uint8_t) val;
+ if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
+ cirrus_bitblt_cputovideo_next(s);
+ }
+ } else {
+ /* video memory */
+ if ((s->gr[0x0B] & 0x14) == 0x14) {
+ addr <<= 4;
+ } else if (s->gr[0x0B] & 0x02) {
+ addr <<= 3;
+ }
+ addr &= s->cirrus_addr_mask;
+
+ mode = s->gr[0x05] & 0x7;
+ if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+ *(s->vram_ptr + addr) = (uint8_t) val;
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+ } else {
+ if ((s->gr[0x0B] & 0x14) != 0x14) {
+ cirrus_mem_writeb_mode4and5_8bpp(s, mode, addr, val);
+ } else {
+ cirrus_mem_writeb_mode4and5_16bpp(s, mode, addr, val);
+ }
+ }
+ }
+}
+
+static void cirrus_linear_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_linear_writeb(opaque, addr, (val >> 8) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 1, val & 0xff);
+#else
+ cirrus_linear_writeb(opaque, addr, val & 0xff);
+ cirrus_linear_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void cirrus_linear_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_linear_writeb(opaque, addr, (val >> 24) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 3, val & 0xff);
+#else
+ cirrus_linear_writeb(opaque, addr, val & 0xff);
+ cirrus_linear_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ cirrus_linear_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+
+static CPUReadMemoryFunc *cirrus_linear_read[3] = {
+ cirrus_linear_readb,
+ cirrus_linear_readw,
+ cirrus_linear_readl,
+};
+
+static CPUWriteMemoryFunc *cirrus_linear_write[3] = {
+ cirrus_linear_writeb,
+ cirrus_linear_writew,
+ cirrus_linear_writel,
+};
+
+static void cirrus_linear_mem_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ addr &= s->cirrus_addr_mask;
+ *(s->vram_ptr + addr) = val;
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+static void cirrus_linear_mem_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ addr &= s->cirrus_addr_mask;
+ cpu_to_le16w((uint16_t *)(s->vram_ptr + addr), val);
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+static void cirrus_linear_mem_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ addr &= s->cirrus_addr_mask;
+ cpu_to_le32w((uint32_t *)(s->vram_ptr + addr), val);
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+}
+
+/***************************************
+ *
+ * system to screen memory access
+ *
+ ***************************************/
+
+
+static uint32_t cirrus_linear_bitblt_readb(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t ret;
+
+ /* XXX handle bitblt */
+ ret = 0xff;
+ return ret;
+}
+
+static uint32_t cirrus_linear_bitblt_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_linear_bitblt_readb(opaque, addr) << 8;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 1);
+#else
+ v = cirrus_linear_bitblt_readb(opaque, addr);
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t cirrus_linear_bitblt_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_linear_bitblt_readb(opaque, addr) << 24;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 16;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 2) << 8;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 3);
+#else
+ v = cirrus_linear_bitblt_readb(opaque, addr);
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 1) << 8;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 2) << 16;
+ v |= cirrus_linear_bitblt_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+static void cirrus_linear_bitblt_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+ /* bitblt */
+ *s->cirrus_srcptr++ = (uint8_t) val;
+ if (s->cirrus_srcptr >= s->cirrus_srcptr_end) {
+ cirrus_bitblt_cputovideo_next(s);
+ }
+ }
+}
+
+static void cirrus_linear_bitblt_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_linear_bitblt_writeb(opaque, addr, (val >> 8) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 1, val & 0xff);
+#else
+ cirrus_linear_bitblt_writeb(opaque, addr, val & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void cirrus_linear_bitblt_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_linear_bitblt_writeb(opaque, addr, (val >> 24) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 3, val & 0xff);
+#else
+ cirrus_linear_bitblt_writeb(opaque, addr, val & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ cirrus_linear_bitblt_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+
+static CPUReadMemoryFunc *cirrus_linear_bitblt_read[3] = {
+ cirrus_linear_bitblt_readb,
+ cirrus_linear_bitblt_readw,
+ cirrus_linear_bitblt_readl,
+};
+
+static CPUWriteMemoryFunc *cirrus_linear_bitblt_write[3] = {
+ cirrus_linear_bitblt_writeb,
+ cirrus_linear_bitblt_writew,
+ cirrus_linear_bitblt_writel,
+};
+
+/* Compute the memory access functions */
+static void cirrus_update_memory_access(CirrusVGAState *s)
+{
+ unsigned mode;
+
+ if ((s->sr[0x17] & 0x44) == 0x44) {
+ goto generic_io;
+ } else if (s->cirrus_srcptr != s->cirrus_srcptr_end) {
+ goto generic_io;
+ } else {
+ if ((s->gr[0x0B] & 0x14) == 0x14) {
+ goto generic_io;
+ } else if (s->gr[0x0B] & 0x02) {
+ goto generic_io;
+ }
+
+ mode = s->gr[0x05] & 0x7;
+ if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+ s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
+ s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
+ s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
+ } else {
+ generic_io:
+ s->cirrus_linear_write[0] = cirrus_linear_writeb;
+ s->cirrus_linear_write[1] = cirrus_linear_writew;
+ s->cirrus_linear_write[2] = cirrus_linear_writel;
+ }
+ }
+}
+
+
+/* I/O ports */
+
+static uint32_t vga_ioport_read(void *opaque, uint32_t addr)
+{
+ CirrusVGAState *s = opaque;
+ int val, index;
+
+ /* check port range access depending on color/monochrome mode */
+ if ((addr >= 0x3b0 && addr <= 0x3bf && (s->msr & MSR_COLOR_EMULATION))
+ || (addr >= 0x3d0 && addr <= 0x3df
+ && !(s->msr & MSR_COLOR_EMULATION))) {
+ val = 0xff;
+ } else {
+ switch (addr) {
+ case 0x3c0:
+ if (s->ar_flip_flop == 0) {
+ val = s->ar_index;
+ } else {
+ val = 0;
+ }
+ break;
+ case 0x3c1:
+ index = s->ar_index & 0x1f;
+ if (index < 21)
+ val = s->ar[index];
+ else
+ val = 0;
+ break;
+ case 0x3c2:
+ val = s->st00;
+ break;
+ case 0x3c4:
+ val = s->sr_index;
+ break;
+ case 0x3c5:
+ if (cirrus_hook_read_sr(s, s->sr_index, &val))
+ break;
+ val = s->sr[s->sr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read SR%x = 0x%02x\n", s->sr_index, val);
+#endif
+ break;
+ case 0x3c6:
+ cirrus_read_hidden_dac(s, &val);
+ break;
+ case 0x3c7:
+ val = s->dac_state;
+ break;
+ case 0x3c8:
+ val = s->dac_write_index;
+ s->cirrus_hidden_dac_lockindex = 0;
+ break;
+ case 0x3c9:
+ if (cirrus_hook_read_palette(s, &val))
+ break;
+ val = s->palette[s->dac_read_index * 3 + s->dac_sub_index];
+ if (++s->dac_sub_index == 3) {
+ s->dac_sub_index = 0;
+ s->dac_read_index++;
+ }
+ break;
+ case 0x3ca:
+ val = s->fcr;
+ break;
+ case 0x3cc:
+ val = s->msr;
+ break;
+ case 0x3ce:
+ val = s->gr_index;
+ break;
+ case 0x3cf:
+ if (cirrus_hook_read_gr(s, s->gr_index, &val))
+ break;
+ val = s->gr[s->gr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read GR%x = 0x%02x\n", s->gr_index, val);
+#endif
+ break;
+ case 0x3b4:
+ case 0x3d4:
+ val = s->cr_index;
+ break;
+ case 0x3b5:
+ case 0x3d5:
+ if (cirrus_hook_read_cr(s, s->cr_index, &val))
+ break;
+ val = s->cr[s->cr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read CR%x = 0x%02x\n", s->cr_index, val);
+#endif
+ break;
+ case 0x3ba:
+ case 0x3da:
+ /* just toggle to fool polling */
+ s->st01 ^= ST01_V_RETRACE | ST01_DISP_ENABLE;
+ val = s->st01;
+ s->ar_flip_flop = 0;
+ break;
+ default:
+ val = 0x00;
+ break;
+ }
+ }
+#if defined(DEBUG_VGA)
+ printf("VGA: read addr=0x%04x data=0x%02x\n", addr, val);
+#endif
+ return val;
+}
+
+static void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ CirrusVGAState *s = opaque;
+ int index;
+
+ /* check port range access depending on color/monochrome mode */
+ if ((addr >= 0x3b0 && addr <= 0x3bf && (s->msr & MSR_COLOR_EMULATION))
+ || (addr >= 0x3d0 && addr <= 0x3df
+ && !(s->msr & MSR_COLOR_EMULATION)))
+ return;
+
+#ifdef DEBUG_VGA
+ printf("VGA: write addr=0x%04x data=0x%02x\n", addr, val);
+#endif
+
+ switch (addr) {
+ case 0x3c0:
+ if (s->ar_flip_flop == 0) {
+ val &= 0x3f;
+ s->ar_index = val;
+ } else {
+ index = s->ar_index & 0x1f;
+ switch (index) {
+ case 0x00 ... 0x0f:
+ s->ar[index] = val & 0x3f;
+ break;
+ case 0x10:
+ s->ar[index] = val & ~0x10;
+ break;
+ case 0x11:
+ s->ar[index] = val;
+ break;
+ case 0x12:
+ s->ar[index] = val & ~0xc0;
+ break;
+ case 0x13:
+ s->ar[index] = val & ~0xf0;
+ break;
+ case 0x14:
+ s->ar[index] = val & ~0xf0;
+ break;
+ default:
+ break;
+ }
+ }
+ s->ar_flip_flop ^= 1;
+ break;
+ case 0x3c2:
+ s->msr = val & ~0x10;
+ break;
+ case 0x3c4:
+ s->sr_index = val;
+ break;
+ case 0x3c5:
+ if (cirrus_hook_write_sr(s, s->sr_index, val))
+ break;
+#ifdef DEBUG_VGA_REG
+ printf("vga: write SR%x = 0x%02x\n", s->sr_index, val);
+#endif
+ s->sr[s->sr_index] = val & sr_mask[s->sr_index];
+ break;
+ case 0x3c6:
+ cirrus_write_hidden_dac(s, val);
+ break;
+ case 0x3c7:
+ s->dac_read_index = val;
+ s->dac_sub_index = 0;
+ s->dac_state = 3;
+ break;
+ case 0x3c8:
+ s->dac_write_index = val;
+ s->dac_sub_index = 0;
+ s->dac_state = 0;
+ break;
+ case 0x3c9:
+ if (cirrus_hook_write_palette(s, val))
+ break;
+ s->dac_cache[s->dac_sub_index] = val;
+ if (++s->dac_sub_index == 3) {
+ memcpy(&s->palette[s->dac_write_index * 3], s->dac_cache, 3);
+ s->dac_sub_index = 0;
+ s->dac_write_index++;
+ }
+ break;
+ case 0x3ce:
+ s->gr_index = val;
+ break;
+ case 0x3cf:
+ if (cirrus_hook_write_gr(s, s->gr_index, val))
+ break;
+#ifdef DEBUG_VGA_REG
+ printf("vga: write GR%x = 0x%02x\n", s->gr_index, val);
+#endif
+ s->gr[s->gr_index] = val & gr_mask[s->gr_index];
+ break;
+ case 0x3b4:
+ case 0x3d4:
+ s->cr_index = val;
+ break;
+ case 0x3b5:
+ case 0x3d5:
+ if (cirrus_hook_write_cr(s, s->cr_index, val))
+ break;
+#ifdef DEBUG_VGA_REG
+ printf("vga: write CR%x = 0x%02x\n", s->cr_index, val);
+#endif
+ /* handle CR0-7 protection */
+ if ((s->cr[0x11] & 0x80) && s->cr_index <= 7) {
+ /* can always write bit 4 of CR7 */
+ if (s->cr_index == 7)
+ s->cr[7] = (s->cr[7] & ~0x10) | (val & 0x10);
+ return;
+ }
+ switch (s->cr_index) {
+ case 0x01: /* horizontal display end */
+ case 0x07:
+ case 0x09:
+ case 0x0c:
+ case 0x0d:
+ case 0x12: /* veritcal display end */
+ s->cr[s->cr_index] = val;
+ break;
+
+ default:
+ s->cr[s->cr_index] = val;
+ break;
+ }
+ break;
+ case 0x3ba:
+ case 0x3da:
+ s->fcr = val & 0x10;
+ break;
+ }
+}
+
+/***************************************
+ *
+ * memory-mapped I/O access
+ *
+ ***************************************/
+
+static uint32_t cirrus_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ addr &= CIRRUS_PNPMMIO_SIZE - 1;
+
+ if (addr >= 0x100) {
+ return cirrus_mmio_blt_read(s, addr - 0x100);
+ } else {
+ return vga_ioport_read(s, addr + 0x3c0);
+ }
+}
+
+static uint32_t cirrus_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_mmio_readb(opaque, addr) << 8;
+ v |= cirrus_mmio_readb(opaque, addr + 1);
+#else
+ v = cirrus_mmio_readb(opaque, addr);
+ v |= cirrus_mmio_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t cirrus_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = cirrus_mmio_readb(opaque, addr) << 24;
+ v |= cirrus_mmio_readb(opaque, addr + 1) << 16;
+ v |= cirrus_mmio_readb(opaque, addr + 2) << 8;
+ v |= cirrus_mmio_readb(opaque, addr + 3);
+#else
+ v = cirrus_mmio_readb(opaque, addr);
+ v |= cirrus_mmio_readb(opaque, addr + 1) << 8;
+ v |= cirrus_mmio_readb(opaque, addr + 2) << 16;
+ v |= cirrus_mmio_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+static void cirrus_mmio_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ CirrusVGAState *s = (CirrusVGAState *) opaque;
+
+ addr &= CIRRUS_PNPMMIO_SIZE - 1;
+
+ if (addr >= 0x100) {
+ cirrus_mmio_blt_write(s, addr - 0x100, val);
+ } else {
+ vga_ioport_write(s, addr + 0x3c0, val);
+ }
+}
+
+static void cirrus_mmio_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_mmio_writeb(opaque, addr, (val >> 8) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 1, val & 0xff);
+#else
+ cirrus_mmio_writeb(opaque, addr, val & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void cirrus_mmio_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ cirrus_mmio_writeb(opaque, addr, (val >> 24) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 3, val & 0xff);
+#else
+ cirrus_mmio_writeb(opaque, addr, val & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ cirrus_mmio_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+
+static CPUReadMemoryFunc *cirrus_mmio_read[3] = {
+ cirrus_mmio_readb,
+ cirrus_mmio_readw,
+ cirrus_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *cirrus_mmio_write[3] = {
+ cirrus_mmio_writeb,
+ cirrus_mmio_writew,
+ cirrus_mmio_writel,
+};
+
+/* load/save state */
+
+static void cirrus_vga_save(QEMUFile *f, void *opaque)
+{
+ CirrusVGAState *s = opaque;
+
+ qemu_put_be32s(f, &s->latch);
+ qemu_put_8s(f, &s->sr_index);
+ qemu_put_buffer(f, s->sr, 256);
+ qemu_put_8s(f, &s->gr_index);
+ qemu_put_8s(f, &s->cirrus_shadow_gr0);
+ qemu_put_8s(f, &s->cirrus_shadow_gr1);
+ qemu_put_buffer(f, s->gr + 2, 254);
+ qemu_put_8s(f, &s->ar_index);
+ qemu_put_buffer(f, s->ar, 21);
+ qemu_put_be32s(f, &s->ar_flip_flop);
+ qemu_put_8s(f, &s->cr_index);
+ qemu_put_buffer(f, s->cr, 256);
+ qemu_put_8s(f, &s->msr);
+ qemu_put_8s(f, &s->fcr);
+ qemu_put_8s(f, &s->st00);
+ qemu_put_8s(f, &s->st01);
+
+ qemu_put_8s(f, &s->dac_state);
+ qemu_put_8s(f, &s->dac_sub_index);
+ qemu_put_8s(f, &s->dac_read_index);
+ qemu_put_8s(f, &s->dac_write_index);
+ qemu_put_buffer(f, s->dac_cache, 3);
+ qemu_put_buffer(f, s->palette, 768);
+
+ qemu_put_be32s(f, &s->bank_offset);
+
+ qemu_put_8s(f, &s->cirrus_hidden_dac_lockindex);
+ qemu_put_8s(f, &s->cirrus_hidden_dac_data);
+
+ qemu_put_be32s(f, &s->hw_cursor_x);
+ qemu_put_be32s(f, &s->hw_cursor_y);
+ /* XXX: we do not save the bitblt state - we assume we do not save
+ the state when the blitter is active */
+}
+
+static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
+{
+ CirrusVGAState *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_be32s(f, &s->latch);
+ qemu_get_8s(f, &s->sr_index);
+ qemu_get_buffer(f, s->sr, 256);
+ qemu_get_8s(f, &s->gr_index);
+ qemu_get_8s(f, &s->cirrus_shadow_gr0);
+ qemu_get_8s(f, &s->cirrus_shadow_gr1);
+ s->gr[0x00] = s->cirrus_shadow_gr0 & 0x0f;
+ s->gr[0x01] = s->cirrus_shadow_gr1 & 0x0f;
+ qemu_get_buffer(f, s->gr + 2, 254);
+ qemu_get_8s(f, &s->ar_index);
+ qemu_get_buffer(f, s->ar, 21);
+ qemu_get_be32s(f, &s->ar_flip_flop);
+ qemu_get_8s(f, &s->cr_index);
+ qemu_get_buffer(f, s->cr, 256);
+ qemu_get_8s(f, &s->msr);
+ qemu_get_8s(f, &s->fcr);
+ qemu_get_8s(f, &s->st00);
+ qemu_get_8s(f, &s->st01);
+
+ qemu_get_8s(f, &s->dac_state);
+ qemu_get_8s(f, &s->dac_sub_index);
+ qemu_get_8s(f, &s->dac_read_index);
+ qemu_get_8s(f, &s->dac_write_index);
+ qemu_get_buffer(f, s->dac_cache, 3);
+ qemu_get_buffer(f, s->palette, 768);
+
+ qemu_get_be32s(f, &s->bank_offset);
+
+ qemu_get_8s(f, &s->cirrus_hidden_dac_lockindex);
+ qemu_get_8s(f, &s->cirrus_hidden_dac_data);
+
+ qemu_get_be32s(f, &s->hw_cursor_x);
+ qemu_get_be32s(f, &s->hw_cursor_y);
+
+ /* force refresh */
+ s->graphic_mode = -1;
+ cirrus_update_bank_ptr(s, 0);
+ cirrus_update_bank_ptr(s, 1);
+ return 0;
+}
+
+/***************************************
+ *
+ * initialize
+ *
+ ***************************************/
+
+static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
+{
+ int vga_io_memory, i;
+ static int inited;
+
+ if (!inited) {
+ inited = 1;
+ for(i = 0;i < 256; i++)
+ rop_to_index[i] = CIRRUS_ROP_NOP_INDEX; /* nop rop */
+ rop_to_index[CIRRUS_ROP_0] = 0;
+ rop_to_index[CIRRUS_ROP_SRC_AND_DST] = 1;
+ rop_to_index[CIRRUS_ROP_NOP] = 2;
+ rop_to_index[CIRRUS_ROP_SRC_AND_NOTDST] = 3;
+ rop_to_index[CIRRUS_ROP_NOTDST] = 4;
+ rop_to_index[CIRRUS_ROP_SRC] = 5;
+ rop_to_index[CIRRUS_ROP_1] = 6;
+ rop_to_index[CIRRUS_ROP_NOTSRC_AND_DST] = 7;
+ rop_to_index[CIRRUS_ROP_SRC_XOR_DST] = 8;
+ rop_to_index[CIRRUS_ROP_SRC_OR_DST] = 9;
+ rop_to_index[CIRRUS_ROP_NOTSRC_OR_NOTDST] = 10;
+ rop_to_index[CIRRUS_ROP_SRC_NOTXOR_DST] = 11;
+ rop_to_index[CIRRUS_ROP_SRC_OR_NOTDST] = 12;
+ rop_to_index[CIRRUS_ROP_NOTSRC] = 13;
+ rop_to_index[CIRRUS_ROP_NOTSRC_OR_DST] = 14;
+ rop_to_index[CIRRUS_ROP_NOTSRC_AND_NOTDST] = 15;
+ }
+
+ register_ioport_write(0x3c0, 16, 1, vga_ioport_write, s);
+
+ register_ioport_write(0x3b4, 2, 1, vga_ioport_write, s);
+ register_ioport_write(0x3d4, 2, 1, vga_ioport_write, s);
+ register_ioport_write(0x3ba, 1, 1, vga_ioport_write, s);
+ register_ioport_write(0x3da, 1, 1, vga_ioport_write, s);
+
+ register_ioport_read(0x3c0, 16, 1, vga_ioport_read, s);
+
+ register_ioport_read(0x3b4, 2, 1, vga_ioport_read, s);
+ register_ioport_read(0x3d4, 2, 1, vga_ioport_read, s);
+ register_ioport_read(0x3ba, 1, 1, vga_ioport_read, s);
+ register_ioport_read(0x3da, 1, 1, vga_ioport_read, s);
+
+ vga_io_memory = cpu_register_io_memory(0, cirrus_vga_mem_read,
+ cirrus_vga_mem_write, s);
+ cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
+ vga_io_memory);
+
+ s->sr[0x06] = 0x0f;
+ if (device_id == CIRRUS_ID_CLGD5446) {
+ /* 4MB 64 bit memory config, always PCI */
+ s->sr[0x1F] = 0x2d; // MemClock
+ s->gr[0x18] = 0x0f; // fastest memory configuration
+#if 1
+ s->sr[0x0f] = 0x98;
+ s->sr[0x17] = 0x20;
+ s->sr[0x15] = 0x04; /* memory size, 3=2MB, 4=4MB */
+ s->real_vram_size = 4096 * 1024;
+#else
+ s->sr[0x0f] = 0x18;
+ s->sr[0x17] = 0x20;
+ s->sr[0x15] = 0x03; /* memory size, 3=2MB, 4=4MB */
+ s->real_vram_size = 2048 * 1024;
+#endif
+ } else {
+ s->sr[0x1F] = 0x22; // MemClock
+ s->sr[0x0F] = CIRRUS_MEMSIZE_2M;
+ if (is_pci)
+ s->sr[0x17] = CIRRUS_BUSTYPE_PCI;
+ else
+ s->sr[0x17] = CIRRUS_BUSTYPE_ISA;
+ s->real_vram_size = 2048 * 1024;
+ s->sr[0x15] = 0x03; /* memory size, 3=2MB, 4=4MB */
+ }
+ s->cr[0x27] = device_id;
+
+ /* Win2K seems to assume that the pattern buffer is at 0xff
+ initially ! */
+ memset(s->vram_ptr, 0xff, s->real_vram_size);
+
+ s->cirrus_hidden_dac_lockindex = 5;
+ s->cirrus_hidden_dac_data = 0;
+
+ /* I/O handler for LFB */
+ s->cirrus_linear_io_addr =
+ cpu_register_io_memory(0, cirrus_linear_read, cirrus_linear_write,
+ s);
+ s->cirrus_linear_write = cpu_get_io_memory_write(s->cirrus_linear_io_addr);
+
+ /* I/O handler for LFB */
+ s->cirrus_linear_bitblt_io_addr =
+ cpu_register_io_memory(0, cirrus_linear_bitblt_read, cirrus_linear_bitblt_write,
+ s);
+
+ /* I/O handler for memory-mapped I/O */
+ s->cirrus_mmio_io_addr =
+ cpu_register_io_memory(0, cirrus_mmio_read, cirrus_mmio_write, s);
+
+ /* XXX: s->vram_size must be a power of two */
+ s->cirrus_addr_mask = s->real_vram_size - 1;
+ s->linear_mmio_mask = s->real_vram_size - 256;
+
+ s->get_bpp = cirrus_get_bpp;
+ s->get_offsets = cirrus_get_offsets;
+ s->get_resolution = cirrus_get_resolution;
+ s->cursor_invalidate = cirrus_cursor_invalidate;
+ s->cursor_draw_line = cirrus_cursor_draw_line;
+
+ register_savevm("cirrus_vga", 0, 1, cirrus_vga_save, cirrus_vga_load, s);
+}
+
+/***************************************
+ *
+ * ISA bus support
+ *
+ ***************************************/
+
+void isa_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size)
+{
+ CirrusVGAState *s;
+
+ s = qemu_mallocz(sizeof(CirrusVGAState));
+
+ vga_common_init((VGAState *)s,
+ ds, vga_ram_base, vga_ram_offset, vga_ram_size);
+ cirrus_init_common(s, CIRRUS_ID_CLGD5430, 0);
+ /* XXX ISA-LFB support */
+}
+
+/***************************************
+ *
+ * PCI bus support
+ *
+ ***************************************/
+
+static void cirrus_pci_lfb_map(PCIDevice *d, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ CirrusVGAState *s = &((PCICirrusVGAState *)d)->cirrus_vga;
+
+ /* XXX: add byte swapping apertures */
+ cpu_register_physical_memory(addr, s->vram_size,
+ s->cirrus_linear_io_addr);
+ cpu_register_physical_memory(addr + 0x1000000, 0x400000,
+ s->cirrus_linear_bitblt_io_addr);
+}
+
+static void cirrus_pci_mmio_map(PCIDevice *d, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ CirrusVGAState *s = &((PCICirrusVGAState *)d)->cirrus_vga;
+
+ cpu_register_physical_memory(addr, CIRRUS_PNPMMIO_SIZE,
+ s->cirrus_mmio_io_addr);
+}
+
+void pci_cirrus_vga_init(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size)
+{
+ PCICirrusVGAState *d;
+ uint8_t *pci_conf;
+ CirrusVGAState *s;
+ int device_id;
+
+ device_id = CIRRUS_ID_CLGD5446;
+
+ /* setup PCI configuration registers */
+ d = (PCICirrusVGAState *)pci_register_device(bus, "Cirrus VGA",
+ sizeof(PCICirrusVGAState),
+ -1, NULL, NULL);
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = (uint8_t) (PCI_VENDOR_CIRRUS & 0xff);
+ pci_conf[0x01] = (uint8_t) (PCI_VENDOR_CIRRUS >> 8);
+ pci_conf[0x02] = (uint8_t) (device_id & 0xff);
+ pci_conf[0x03] = (uint8_t) (device_id >> 8);
+ pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+ pci_conf[0x0a] = PCI_CLASS_SUB_VGA;
+ pci_conf[0x0b] = PCI_CLASS_BASE_DISPLAY;
+ pci_conf[0x0e] = PCI_CLASS_HEADERTYPE_00h;
+
+ /* setup VGA */
+ s = &d->cirrus_vga;
+ vga_common_init((VGAState *)s,
+ ds, vga_ram_base, vga_ram_offset, vga_ram_size);
+ cirrus_init_common(s, device_id, 1);
+
+ /* setup memory space */
+ /* memory #0 LFB */
+ /* memory #1 memory-mapped I/O */
+ /* XXX: s->vram_size must be a power of two */
+ pci_register_io_region((PCIDevice *)d, 0, 0x2000000,
+ PCI_ADDRESS_SPACE_MEM_PREFETCH, cirrus_pci_lfb_map);
+ if (device_id == CIRRUS_ID_CLGD5446) {
+ pci_register_io_region((PCIDevice *)d, 1, CIRRUS_PNPMMIO_SIZE,
+ PCI_ADDRESS_SPACE_MEM, cirrus_pci_mmio_map);
+ }
+ /* XXX: ROM BIOS */
+}
diff --git a/tools/ioemu/hw/cirrus_vga_rop.h b/tools/ioemu/hw/cirrus_vga_rop.h
new file mode 100644
index 0000000000..c54f1258b3
--- /dev/null
+++ b/tools/ioemu/hw/cirrus_vga_rop.h
@@ -0,0 +1,78 @@
+/*
+ * QEMU Cirrus CLGD 54xx VGA Emulator.
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+static void
+glue(cirrus_bitblt_rop_fwd_, ROP_NAME)(CirrusVGAState *s,
+ uint8_t *dst,const uint8_t *src,
+ int dstpitch,int srcpitch,
+ int bltwidth,int bltheight)
+{
+ int x,y;
+ dstpitch -= bltwidth;
+ srcpitch -= bltwidth;
+ for (y = 0; y < bltheight; y++) {
+ for (x = 0; x < bltwidth; x++) {
+ ROP_OP(*dst, *src);
+ dst++;
+ src++;
+ }
+ dst += dstpitch;
+ src += srcpitch;
+ }
+}
+
+static void
+glue(cirrus_bitblt_rop_bkwd_, ROP_NAME)(CirrusVGAState *s,
+ uint8_t *dst,const uint8_t *src,
+ int dstpitch,int srcpitch,
+ int bltwidth,int bltheight)
+{
+ int x,y;
+ dstpitch += bltwidth;
+ srcpitch += bltwidth;
+ for (y = 0; y < bltheight; y++) {
+ for (x = 0; x < bltwidth; x++) {
+ ROP_OP(*dst, *src);
+ dst--;
+ src--;
+ }
+ dst += dstpitch;
+ src += srcpitch;
+ }
+}
+
+#define DEPTH 8
+#include "cirrus_vga_rop2.h"
+
+#define DEPTH 16
+#include "cirrus_vga_rop2.h"
+
+#define DEPTH 24
+#include "cirrus_vga_rop2.h"
+
+#define DEPTH 32
+#include "cirrus_vga_rop2.h"
+
+#undef ROP_NAME
+#undef ROP_OP
diff --git a/tools/ioemu/hw/cirrus_vga_rop2.h b/tools/ioemu/hw/cirrus_vga_rop2.h
new file mode 100644
index 0000000000..5521870c8b
--- /dev/null
+++ b/tools/ioemu/hw/cirrus_vga_rop2.h
@@ -0,0 +1,260 @@
+/*
+ * QEMU Cirrus CLGD 54xx VGA Emulator.
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#if DEPTH == 8
+#define PUTPIXEL() ROP_OP(d[0], col)
+#elif DEPTH == 16
+#define PUTPIXEL() ROP_OP(((uint16_t *)d)[0], col);
+#elif DEPTH == 24
+#define PUTPIXEL() ROP_OP(d[0], col); \
+ ROP_OP(d[1], (col >> 8)); \
+ ROP_OP(d[2], (col >> 16))
+#elif DEPTH == 32
+#define PUTPIXEL() ROP_OP(((uint32_t *)d)[0], col)
+#else
+#error unsupported DEPTH
+#endif
+
+static void
+glue(glue(glue(cirrus_patternfill_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState * s, uint8_t * dst,
+ const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight)
+{
+ uint8_t *d;
+ int x, y, pattern_y, pattern_pitch, pattern_x;
+ unsigned int col;
+ const uint8_t *src1;
+
+#if DEPTH == 8
+ pattern_pitch = 8;
+#elif DEPTH == 16
+ pattern_pitch = 16;
+#else
+ pattern_pitch = 32;
+#endif
+ pattern_y = s->cirrus_blt_srcaddr & 7;
+ pattern_x = 0;
+ for(y = 0; y < bltheight; y++) {
+ d = dst;
+ src1 = src + pattern_y * pattern_pitch;
+ for (x = 0; x < bltwidth; x += (DEPTH / 8)) {
+#if DEPTH == 8
+ col = src1[pattern_x];
+ pattern_x = (pattern_x + 1) & 7;
+#elif DEPTH == 16
+ col = ((uint16_t *)(src1 + pattern_x))[0];
+ pattern_x = (pattern_x + 2) & 15;
+#elif DEPTH == 24
+ {
+ const uint8_t *src2 = src1 + pattern_x * 3;
+ col = src2[0] | (src2[1] << 8) | (src2[2] << 16);
+ pattern_x = (pattern_x + 1) & 7;
+ }
+#else
+ col = ((uint32_t *)(src1 + pattern_x))[0];
+ pattern_x = (pattern_x + 4) & 31;
+#endif
+ PUTPIXEL();
+ d += (DEPTH / 8);
+ }
+ pattern_y = (pattern_y + 1) & 7;
+ dst += dstpitch;
+ }
+}
+
+/* NOTE: srcpitch is ignored */
+static void
+glue(glue(glue(cirrus_colorexpand_transp_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState * s, uint8_t * dst,
+ const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight)
+{
+ uint8_t *d;
+ int x, y;
+ unsigned bits, bits_xor;
+ unsigned int col;
+ unsigned bitmask;
+ unsigned index;
+ int srcskipleft = 0;
+
+ if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV) {
+ bits_xor = 0xff;
+ col = s->cirrus_blt_bgcol;
+ } else {
+ bits_xor = 0x00;
+ col = s->cirrus_blt_fgcol;
+ }
+
+ for(y = 0; y < bltheight; y++) {
+ bitmask = 0x80 >> srcskipleft;
+ bits = *src++ ^ bits_xor;
+ d = dst;
+ for (x = 0; x < bltwidth; x += (DEPTH / 8)) {
+ if ((bitmask & 0xff) == 0) {
+ bitmask = 0x80;
+ bits = *src++ ^ bits_xor;
+ }
+ index = (bits & bitmask);
+ if (index) {
+ PUTPIXEL();
+ }
+ d += (DEPTH / 8);
+ bitmask >>= 1;
+ }
+ dst += dstpitch;
+ }
+}
+
+static void
+glue(glue(glue(cirrus_colorexpand_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState * s, uint8_t * dst,
+ const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight)
+{
+ uint32_t colors[2];
+ uint8_t *d;
+ int x, y;
+ unsigned bits;
+ unsigned int col;
+ unsigned bitmask;
+ int srcskipleft = 0;
+
+ colors[0] = s->cirrus_blt_bgcol;
+ colors[1] = s->cirrus_blt_fgcol;
+ for(y = 0; y < bltheight; y++) {
+ bitmask = 0x80 >> srcskipleft;
+ bits = *src++;
+ d = dst;
+ for (x = 0; x < bltwidth; x += (DEPTH / 8)) {
+ if ((bitmask & 0xff) == 0) {
+ bitmask = 0x80;
+ bits = *src++;
+ }
+ col = colors[!!(bits & bitmask)];
+ PUTPIXEL();
+ d += (DEPTH / 8);
+ bitmask >>= 1;
+ }
+ dst += dstpitch;
+ }
+}
+
+static void
+glue(glue(glue(cirrus_colorexpand_pattern_transp_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState * s, uint8_t * dst,
+ const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight)
+{
+ uint8_t *d;
+ int x, y, bitpos, pattern_y;
+ unsigned int bits, bits_xor;
+ unsigned int col;
+
+ if (s->cirrus_blt_modeext & CIRRUS_BLTMODEEXT_COLOREXPINV) {
+ bits_xor = 0xff;
+ col = s->cirrus_blt_bgcol;
+ } else {
+ bits_xor = 0x00;
+ col = s->cirrus_blt_fgcol;
+ }
+ pattern_y = s->cirrus_blt_srcaddr & 7;
+
+ for(y = 0; y < bltheight; y++) {
+ bits = src[pattern_y] ^ bits_xor;
+ bitpos = 7;
+ d = dst;
+ for (x = 0; x < bltwidth; x += (DEPTH / 8)) {
+ if ((bits >> bitpos) & 1) {
+ PUTPIXEL();
+ }
+ d += (DEPTH / 8);
+ bitpos = (bitpos - 1) & 7;
+ }
+ pattern_y = (pattern_y + 1) & 7;
+ dst += dstpitch;
+ }
+}
+
+static void
+glue(glue(glue(cirrus_colorexpand_pattern_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState * s, uint8_t * dst,
+ const uint8_t * src,
+ int dstpitch, int srcpitch,
+ int bltwidth, int bltheight)
+{
+ uint32_t colors[2];
+ uint8_t *d;
+ int x, y, bitpos, pattern_y;
+ unsigned int bits;
+ unsigned int col;
+
+ colors[0] = s->cirrus_blt_bgcol;
+ colors[1] = s->cirrus_blt_fgcol;
+ pattern_y = s->cirrus_blt_srcaddr & 7;
+
+ for(y = 0; y < bltheight; y++) {
+ bits = src[pattern_y];
+ bitpos = 7;
+ d = dst;
+ for (x = 0; x < bltwidth; x += (DEPTH / 8)) {
+ col = colors[(bits >> bitpos) & 1];
+ PUTPIXEL();
+ d += (DEPTH / 8);
+ bitpos = (bitpos - 1) & 7;
+ }
+ pattern_y = (pattern_y + 1) & 7;
+ dst += dstpitch;
+ }
+}
+
+static void
+glue(glue(glue(cirrus_fill_, ROP_NAME), _),DEPTH)
+ (CirrusVGAState *s,
+ uint8_t *dst, int dst_pitch,
+ int width, int height)
+{
+ uint8_t *d, *d1;
+ uint32_t col;
+ int x, y;
+
+ col = s->cirrus_blt_fgcol;
+
+ d1 = dst;
+ for(y = 0; y < height; y++) {
+ d = d1;
+ for(x = 0; x < width; x += (DEPTH / 8)) {
+ PUTPIXEL();
+ d += (DEPTH / 8);
+ }
+ d1 += dst_pitch;
+ }
+}
+
+#undef DEPTH
+#undef PUTPIXEL
diff --git a/tools/ioemu/hw/cuda.c b/tools/ioemu/hw/cuda.c
new file mode 100644
index 0000000000..c05cdeb5fe
--- /dev/null
+++ b/tools/ioemu/hw/cuda.c
@@ -0,0 +1,614 @@
+/*
+ * QEMU CUDA support
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+//#define DEBUG_CUDA
+//#define DEBUG_CUDA_PACKET
+
+/* Bits in B data register: all active low */
+#define TREQ 0x08 /* Transfer request (input) */
+#define TACK 0x10 /* Transfer acknowledge (output) */
+#define TIP 0x20 /* Transfer in progress (output) */
+
+/* Bits in ACR */
+#define SR_CTRL 0x1c /* Shift register control bits */
+#define SR_EXT 0x0c /* Shift on external clock */
+#define SR_OUT 0x10 /* Shift out if 1 */
+
+/* Bits in IFR and IER */
+#define IER_SET 0x80 /* set bits in IER */
+#define IER_CLR 0 /* clear bits in IER */
+#define SR_INT 0x04 /* Shift register full/empty */
+#define T1_INT 0x40 /* Timer 1 interrupt */
+
+/* Bits in ACR */
+#define T1MODE 0xc0 /* Timer 1 mode */
+#define T1MODE_CONT 0x40 /* continuous interrupts */
+
+/* commands (1st byte) */
+#define ADB_PACKET 0
+#define CUDA_PACKET 1
+#define ERROR_PACKET 2
+#define TIMER_PACKET 3
+#define POWER_PACKET 4
+#define MACIIC_PACKET 5
+#define PMU_PACKET 6
+
+
+/* CUDA commands (2nd byte) */
+#define CUDA_WARM_START 0x0
+#define CUDA_AUTOPOLL 0x1
+#define CUDA_GET_6805_ADDR 0x2
+#define CUDA_GET_TIME 0x3
+#define CUDA_GET_PRAM 0x7
+#define CUDA_SET_6805_ADDR 0x8
+#define CUDA_SET_TIME 0x9
+#define CUDA_POWERDOWN 0xa
+#define CUDA_POWERUP_TIME 0xb
+#define CUDA_SET_PRAM 0xc
+#define CUDA_MS_RESET 0xd
+#define CUDA_SEND_DFAC 0xe
+#define CUDA_BATTERY_SWAP_SENSE 0x10
+#define CUDA_RESET_SYSTEM 0x11
+#define CUDA_SET_IPL 0x12
+#define CUDA_FILE_SERVER_FLAG 0x13
+#define CUDA_SET_AUTO_RATE 0x14
+#define CUDA_GET_AUTO_RATE 0x16
+#define CUDA_SET_DEVICE_LIST 0x19
+#define CUDA_GET_DEVICE_LIST 0x1a
+#define CUDA_SET_ONE_SECOND_MODE 0x1b
+#define CUDA_SET_POWER_MESSAGES 0x21
+#define CUDA_GET_SET_IIC 0x22
+#define CUDA_WAKEUP 0x23
+#define CUDA_TIMER_TICKLE 0x24
+#define CUDA_COMBINED_FORMAT_IIC 0x25
+
+#define CUDA_TIMER_FREQ (4700000 / 6)
+#define CUDA_ADB_POLL_FREQ 50
+
+typedef struct CUDATimer {
+ unsigned int latch;
+ uint16_t counter_value; /* counter value at load time */
+ int64_t load_time;
+ int64_t next_irq_time;
+ QEMUTimer *timer;
+} CUDATimer;
+
+typedef struct CUDAState {
+ /* cuda registers */
+ uint8_t b; /* B-side data */
+ uint8_t a; /* A-side data */
+ uint8_t dirb; /* B-side direction (1=output) */
+ uint8_t dira; /* A-side direction (1=output) */
+ uint8_t sr; /* Shift register */
+ uint8_t acr; /* Auxiliary control register */
+ uint8_t pcr; /* Peripheral control register */
+ uint8_t ifr; /* Interrupt flag register */
+ uint8_t ier; /* Interrupt enable register */
+ uint8_t anh; /* A-side data, no handshake */
+
+ CUDATimer timers[2];
+
+ uint8_t last_b; /* last value of B register */
+ uint8_t last_acr; /* last value of B register */
+
+ int data_in_size;
+ int data_in_index;
+ int data_out_index;
+
+ int irq;
+ openpic_t *openpic;
+ uint8_t autopoll;
+ uint8_t data_in[128];
+ uint8_t data_out[16];
+ QEMUTimer *adb_poll_timer;
+} CUDAState;
+
+static CUDAState cuda_state;
+ADBBusState adb_bus;
+
+static void cuda_update(CUDAState *s);
+static void cuda_receive_packet_from_host(CUDAState *s,
+ const uint8_t *data, int len);
+static void cuda_timer_update(CUDAState *s, CUDATimer *ti,
+ int64_t current_time);
+
+static void cuda_update_irq(CUDAState *s)
+{
+ if (s->ifr & s->ier & (SR_INT | T1_INT)) {
+ openpic_set_irq(s->openpic, s->irq, 1);
+ } else {
+ openpic_set_irq(s->openpic, s->irq, 0);
+ }
+}
+
+static unsigned int get_counter(CUDATimer *s)
+{
+ int64_t d;
+ unsigned int counter;
+
+ d = muldiv64(qemu_get_clock(vm_clock) - s->load_time,
+ CUDA_TIMER_FREQ, ticks_per_sec);
+ if (d <= s->counter_value) {
+ counter = d;
+ } else {
+ counter = s->latch - 1 - ((d - s->counter_value) % s->latch);
+ }
+ return counter;
+}
+
+static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val)
+{
+#ifdef DEBUG_CUDA
+ printf("cuda: T%d.counter=%d\n",
+ 1 + (ti->timer == NULL), val);
+#endif
+ ti->load_time = qemu_get_clock(vm_clock);
+ ti->counter_value = val;
+ cuda_timer_update(s, ti, ti->load_time);
+}
+
+static int64_t get_next_irq_time(CUDATimer *s, int64_t current_time)
+{
+ int64_t d, next_time, base;
+ /* current counter value */
+ d = muldiv64(current_time - s->load_time,
+ CUDA_TIMER_FREQ, ticks_per_sec);
+ if (d <= s->counter_value) {
+ next_time = s->counter_value + 1;
+ } else {
+ base = ((d - s->counter_value) / s->latch);
+ base = (base * s->latch) + s->counter_value;
+ next_time = base + s->latch;
+ }
+#ifdef DEBUG_CUDA
+ printf("latch=%d counter=%lld delta_next=%lld\n",
+ s->latch, d, next_time - d);
+#endif
+ next_time = muldiv64(next_time, ticks_per_sec, CUDA_TIMER_FREQ) +
+ s->load_time;
+ if (next_time <= current_time)
+ next_time = current_time + 1;
+ return next_time;
+}
+
+static void cuda_timer_update(CUDAState *s, CUDATimer *ti,
+ int64_t current_time)
+{
+ if (!ti->timer)
+ return;
+ if ((s->acr & T1MODE) != T1MODE_CONT) {
+ qemu_del_timer(ti->timer);
+ } else {
+ ti->next_irq_time = get_next_irq_time(ti, current_time);
+ qemu_mod_timer(ti->timer, ti->next_irq_time);
+ }
+}
+
+static void cuda_timer1(void *opaque)
+{
+ CUDAState *s = opaque;
+ CUDATimer *ti = &s->timers[0];
+
+ cuda_timer_update(s, ti, ti->next_irq_time);
+ s->ifr |= T1_INT;
+ cuda_update_irq(s);
+}
+
+static uint32_t cuda_readb(void *opaque, target_phys_addr_t addr)
+{
+ CUDAState *s = opaque;
+ uint32_t val;
+
+ addr = (addr >> 9) & 0xf;
+ switch(addr) {
+ case 0:
+ val = s->b;
+ break;
+ case 1:
+ val = s->a;
+ break;
+ case 2:
+ val = s->dirb;
+ break;
+ case 3:
+ val = s->dira;
+ break;
+ case 4:
+ val = get_counter(&s->timers[0]) & 0xff;
+ s->ifr &= ~T1_INT;
+ cuda_update_irq(s);
+ break;
+ case 5:
+ val = get_counter(&s->timers[0]) >> 8;
+ s->ifr &= ~T1_INT;
+ cuda_update_irq(s);
+ break;
+ case 6:
+ val = s->timers[0].latch & 0xff;
+ break;
+ case 7:
+ val = (s->timers[0].latch >> 8) & 0xff;
+ break;
+ case 8:
+ val = get_counter(&s->timers[1]) & 0xff;
+ break;
+ case 9:
+ val = get_counter(&s->timers[1]) >> 8;
+ break;
+ case 10:
+ val = s->sr;
+ s->ifr &= ~SR_INT;
+ cuda_update_irq(s);
+ break;
+ case 11:
+ val = s->acr;
+ break;
+ case 12:
+ val = s->pcr;
+ break;
+ case 13:
+ val = s->ifr;
+ break;
+ case 14:
+ val = s->ier;
+ break;
+ default:
+ case 15:
+ val = s->anh;
+ break;
+ }
+#ifdef DEBUG_CUDA
+ if (addr != 13 || val != 0)
+ printf("cuda: read: reg=0x%x val=%02x\n", addr, val);
+#endif
+ return val;
+}
+
+static void cuda_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ CUDAState *s = opaque;
+
+ addr = (addr >> 9) & 0xf;
+#ifdef DEBUG_CUDA
+ printf("cuda: write: reg=0x%x val=%02x\n", addr, val);
+#endif
+
+ switch(addr) {
+ case 0:
+ s->b = val;
+ cuda_update(s);
+ break;
+ case 1:
+ s->a = val;
+ break;
+ case 2:
+ s->dirb = val;
+ break;
+ case 3:
+ s->dira = val;
+ break;
+ case 4:
+ val = val | (get_counter(&s->timers[0]) & 0xff00);
+ set_counter(s, &s->timers[0], val);
+ break;
+ case 5:
+ val = (val << 8) | (get_counter(&s->timers[0]) & 0xff);
+ set_counter(s, &s->timers[0], val);
+ break;
+ case 6:
+ s->timers[0].latch = (s->timers[0].latch & 0xff00) | val;
+ cuda_timer_update(s, &s->timers[0], qemu_get_clock(vm_clock));
+ break;
+ case 7:
+ s->timers[0].latch = (s->timers[0].latch & 0xff) | (val << 8);
+ cuda_timer_update(s, &s->timers[0], qemu_get_clock(vm_clock));
+ break;
+ case 8:
+ val = val | (get_counter(&s->timers[1]) & 0xff00);
+ set_counter(s, &s->timers[1], val);
+ break;
+ case 9:
+ val = (val << 8) | (get_counter(&s->timers[1]) & 0xff);
+ set_counter(s, &s->timers[1], val);
+ break;
+ case 10:
+ s->sr = val;
+ break;
+ case 11:
+ s->acr = val;
+ cuda_timer_update(s, &s->timers[0], qemu_get_clock(vm_clock));
+ cuda_update(s);
+ break;
+ case 12:
+ s->pcr = val;
+ break;
+ case 13:
+ /* reset bits */
+ s->ifr &= ~val;
+ cuda_update_irq(s);
+ break;
+ case 14:
+ if (val & IER_SET) {
+ /* set bits */
+ s->ier |= val & 0x7f;
+ } else {
+ /* reset bits */
+ s->ier &= ~val;
+ }
+ cuda_update_irq(s);
+ break;
+ default:
+ case 15:
+ s->anh = val;
+ break;
+ }
+}
+
+/* NOTE: TIP and TREQ are negated */
+static void cuda_update(CUDAState *s)
+{
+ int packet_received, len;
+
+ packet_received = 0;
+ if (!(s->b & TIP)) {
+ /* transfer requested from host */
+
+ if (s->acr & SR_OUT) {
+ /* data output */
+ if ((s->b & (TACK | TIP)) != (s->last_b & (TACK | TIP))) {
+ if (s->data_out_index < sizeof(s->data_out)) {
+#ifdef DEBUG_CUDA
+ printf("cuda: send: %02x\n", s->sr);
+#endif
+ s->data_out[s->data_out_index++] = s->sr;
+ s->ifr |= SR_INT;
+ cuda_update_irq(s);
+ }
+ }
+ } else {
+ if (s->data_in_index < s->data_in_size) {
+ /* data input */
+ if ((s->b & (TACK | TIP)) != (s->last_b & (TACK | TIP))) {
+ s->sr = s->data_in[s->data_in_index++];
+#ifdef DEBUG_CUDA
+ printf("cuda: recv: %02x\n", s->sr);
+#endif
+ /* indicate end of transfer */
+ if (s->data_in_index >= s->data_in_size) {
+ s->b = (s->b | TREQ);
+ }
+ s->ifr |= SR_INT;
+ cuda_update_irq(s);
+ }
+ }
+ }
+ } else {
+ /* no transfer requested: handle sync case */
+ if ((s->last_b & TIP) && (s->b & TACK) != (s->last_b & TACK)) {
+ /* update TREQ state each time TACK change state */
+ if (s->b & TACK)
+ s->b = (s->b | TREQ);
+ else
+ s->b = (s->b & ~TREQ);
+ s->ifr |= SR_INT;
+ cuda_update_irq(s);
+ } else {
+ if (!(s->last_b & TIP)) {
+ /* handle end of host to cuda transfert */
+ packet_received = (s->data_out_index > 0);
+ /* always an IRQ at the end of transfert */
+ s->ifr |= SR_INT;
+ cuda_update_irq(s);
+ }
+ /* signal if there is data to read */
+ if (s->data_in_index < s->data_in_size) {
+ s->b = (s->b & ~TREQ);
+ }
+ }
+ }
+
+ s->last_acr = s->acr;
+ s->last_b = s->b;
+
+ /* NOTE: cuda_receive_packet_from_host() can call cuda_update()
+ recursively */
+ if (packet_received) {
+ len = s->data_out_index;
+ s->data_out_index = 0;
+ cuda_receive_packet_from_host(s, s->data_out, len);
+ }
+}
+
+static void cuda_send_packet_to_host(CUDAState *s,
+ const uint8_t *data, int len)
+{
+#ifdef DEBUG_CUDA_PACKET
+ {
+ int i;
+ printf("cuda_send_packet_to_host:\n");
+ for(i = 0; i < len; i++)
+ printf(" %02x", data[i]);
+ printf("\n");
+ }
+#endif
+ memcpy(s->data_in, data, len);
+ s->data_in_size = len;
+ s->data_in_index = 0;
+ cuda_update(s);
+ s->ifr |= SR_INT;
+ cuda_update_irq(s);
+}
+
+static void cuda_adb_poll(void *opaque)
+{
+ CUDAState *s = opaque;
+ uint8_t obuf[ADB_MAX_OUT_LEN + 2];
+ int olen;
+
+ olen = adb_poll(&adb_bus, obuf + 2);
+ if (olen > 0) {
+ obuf[0] = ADB_PACKET;
+ obuf[1] = 0x40; /* polled data */
+ cuda_send_packet_to_host(s, obuf, olen + 2);
+ }
+ qemu_mod_timer(s->adb_poll_timer,
+ qemu_get_clock(vm_clock) +
+ (ticks_per_sec / CUDA_ADB_POLL_FREQ));
+}
+
+static void cuda_receive_packet(CUDAState *s,
+ const uint8_t *data, int len)
+{
+ uint8_t obuf[16];
+ int ti, autopoll;
+
+ switch(data[0]) {
+ case CUDA_AUTOPOLL:
+ autopoll = (data[1] != 0);
+ if (autopoll != s->autopoll) {
+ s->autopoll = autopoll;
+ if (autopoll) {
+ qemu_mod_timer(s->adb_poll_timer,
+ qemu_get_clock(vm_clock) +
+ (ticks_per_sec / CUDA_ADB_POLL_FREQ));
+ } else {
+ qemu_del_timer(s->adb_poll_timer);
+ }
+ }
+ obuf[0] = CUDA_PACKET;
+ obuf[1] = data[1];
+ cuda_send_packet_to_host(s, obuf, 2);
+ break;
+ case CUDA_GET_TIME:
+ /* XXX: add time support ? */
+ ti = time(NULL);
+ obuf[0] = CUDA_PACKET;
+ obuf[1] = 0;
+ obuf[2] = 0;
+ obuf[3] = ti >> 24;
+ obuf[4] = ti >> 16;
+ obuf[5] = ti >> 8;
+ obuf[6] = ti;
+ cuda_send_packet_to_host(s, obuf, 7);
+ break;
+ case CUDA_SET_TIME:
+ case CUDA_FILE_SERVER_FLAG:
+ case CUDA_SET_DEVICE_LIST:
+ case CUDA_SET_AUTO_RATE:
+ case CUDA_SET_POWER_MESSAGES:
+ obuf[0] = CUDA_PACKET;
+ obuf[1] = 0;
+ cuda_send_packet_to_host(s, obuf, 2);
+ break;
+ default:
+ break;
+ }
+}
+
+static void cuda_receive_packet_from_host(CUDAState *s,
+ const uint8_t *data, int len)
+{
+#ifdef DEBUG_CUDA_PACKET
+ {
+ int i;
+ printf("cuda_receive_packet_to_host:\n");
+ for(i = 0; i < len; i++)
+ printf(" %02x", data[i]);
+ printf("\n");
+ }
+#endif
+ switch(data[0]) {
+ case ADB_PACKET:
+ {
+ uint8_t obuf[ADB_MAX_OUT_LEN + 2];
+ int olen;
+ olen = adb_request(&adb_bus, obuf + 2, data + 1, len - 1);
+ if (olen > 0) {
+ obuf[0] = ADB_PACKET;
+ obuf[1] = 0x00;
+ } else {
+ /* error */
+ obuf[0] = ADB_PACKET;
+ obuf[1] = -olen;
+ olen = 0;
+ }
+ cuda_send_packet_to_host(s, obuf, olen + 2);
+ }
+ break;
+ case CUDA_PACKET:
+ cuda_receive_packet(s, data + 1, len - 1);
+ break;
+ }
+}
+
+static void cuda_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+}
+
+static void cuda_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+}
+
+static uint32_t cuda_readw (void *opaque, target_phys_addr_t addr)
+{
+ return 0;
+}
+
+static uint32_t cuda_readl (void *opaque, target_phys_addr_t addr)
+{
+ return 0;
+}
+
+static CPUWriteMemoryFunc *cuda_write[] = {
+ &cuda_writeb,
+ &cuda_writew,
+ &cuda_writel,
+};
+
+static CPUReadMemoryFunc *cuda_read[] = {
+ &cuda_readb,
+ &cuda_readw,
+ &cuda_readl,
+};
+
+int cuda_init(openpic_t *openpic, int irq)
+{
+ CUDAState *s = &cuda_state;
+ int cuda_mem_index;
+
+ s->openpic = openpic;
+ s->irq = irq;
+
+ s->timers[0].timer = qemu_new_timer(vm_clock, cuda_timer1, s);
+ s->timers[0].latch = 0x10000;
+ set_counter(s, &s->timers[0], 0xffff);
+ s->timers[1].latch = 0x10000;
+ s->ier = T1_INT | SR_INT;
+ set_counter(s, &s->timers[1], 0xffff);
+
+ s->adb_poll_timer = qemu_new_timer(vm_clock, cuda_adb_poll, s);
+ cuda_mem_index = cpu_register_io_memory(0, cuda_read, cuda_write, s);
+ return cuda_mem_index;
+}
diff --git a/tools/ioemu/hw/dma.c b/tools/ioemu/hw/dma.c
new file mode 100644
index 0000000000..ce828699f0
--- /dev/null
+++ b/tools/ioemu/hw/dma.c
@@ -0,0 +1,535 @@
+/*
+ * QEMU DMA emulation
+ *
+ * Copyright (c) 2003-2004 Vassili Karpov (malc)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* #define DEBUG_DMA */
+
+#define dolog(...) fprintf (stderr, "dma: " __VA_ARGS__)
+#ifdef DEBUG_DMA
+#define lwarn(...) fprintf (stderr, "dma: " __VA_ARGS__)
+#define linfo(...) fprintf (stderr, "dma: " __VA_ARGS__)
+#define ldebug(...) fprintf (stderr, "dma: " __VA_ARGS__)
+#else
+#define lwarn(...)
+#define linfo(...)
+#define ldebug(...)
+#endif
+
+#define LENOFA(a) ((int) (sizeof(a)/sizeof(a[0])))
+
+struct dma_regs {
+ int now[2];
+ uint16_t base[2];
+ uint8_t mode;
+ uint8_t page;
+ uint8_t pageh;
+ uint8_t dack;
+ uint8_t eop;
+ DMA_transfer_handler transfer_handler;
+ void *opaque;
+};
+
+#define ADDR 0
+#define COUNT 1
+
+static struct dma_cont {
+ uint8_t status;
+ uint8_t command;
+ uint8_t mask;
+ uint8_t flip_flop;
+ int dshift;
+ struct dma_regs regs[4];
+} dma_controllers[2];
+
+enum {
+ CMD_MEMORY_TO_MEMORY = 0x01,
+ CMD_FIXED_ADDRESS = 0x02,
+ CMD_BLOCK_CONTROLLER = 0x04,
+ CMD_COMPRESSED_TIME = 0x08,
+ CMD_CYCLIC_PRIORITY = 0x10,
+ CMD_EXTENDED_WRITE = 0x20,
+ CMD_LOW_DREQ = 0x40,
+ CMD_LOW_DACK = 0x80,
+ CMD_NOT_SUPPORTED = CMD_MEMORY_TO_MEMORY | CMD_FIXED_ADDRESS
+ | CMD_COMPRESSED_TIME | CMD_CYCLIC_PRIORITY | CMD_EXTENDED_WRITE
+ | CMD_LOW_DREQ | CMD_LOW_DACK
+
+};
+
+static int channels[8] = {-1, 2, 3, 1, -1, -1, -1, 0};
+
+static void write_page (void *opaque, uint32_t nport, uint32_t data)
+{
+ struct dma_cont *d = opaque;
+ int ichan;
+
+ ichan = channels[nport & 7];
+ if (-1 == ichan) {
+ dolog ("invalid channel %#x %#x\n", nport, data);
+ return;
+ }
+ d->regs[ichan].page = data;
+}
+
+static void write_pageh (void *opaque, uint32_t nport, uint32_t data)
+{
+ struct dma_cont *d = opaque;
+ int ichan;
+
+ ichan = channels[nport & 7];
+ if (-1 == ichan) {
+ dolog ("invalid channel %#x %#x\n", nport, data);
+ return;
+ }
+ d->regs[ichan].pageh = data;
+}
+
+static uint32_t read_page (void *opaque, uint32_t nport)
+{
+ struct dma_cont *d = opaque;
+ int ichan;
+
+ ichan = channels[nport & 7];
+ if (-1 == ichan) {
+ dolog ("invalid channel read %#x\n", nport);
+ return 0;
+ }
+ return d->regs[ichan].page;
+}
+
+static uint32_t read_pageh (void *opaque, uint32_t nport)
+{
+ struct dma_cont *d = opaque;
+ int ichan;
+
+ ichan = channels[nport & 7];
+ if (-1 == ichan) {
+ dolog ("invalid channel read %#x\n", nport);
+ return 0;
+ }
+ return d->regs[ichan].pageh;
+}
+
+static inline void init_chan (struct dma_cont *d, int ichan)
+{
+ struct dma_regs *r;
+
+ r = d->regs + ichan;
+ r->now[ADDR] = r->base[ADDR] << d->dshift;
+ r->now[COUNT] = 0;
+}
+
+static inline int getff (struct dma_cont *d)
+{
+ int ff;
+
+ ff = d->flip_flop;
+ d->flip_flop = !ff;
+ return ff;
+}
+
+static uint32_t read_chan (void *opaque, uint32_t nport)
+{
+ struct dma_cont *d = opaque;
+ int ichan, nreg, iport, ff, val, dir;
+ struct dma_regs *r;
+
+ iport = (nport >> d->dshift) & 0x0f;
+ ichan = iport >> 1;
+ nreg = iport & 1;
+ r = d->regs + ichan;
+
+ dir = ((r->mode >> 5) & 1) ? -1 : 1;
+ ff = getff (d);
+ if (nreg)
+ val = (r->base[COUNT] << d->dshift) - r->now[COUNT];
+ else
+ val = r->now[ADDR] + r->now[COUNT] * dir;
+
+ ldebug ("read_chan %#x -> %d\n", iport, val);
+ return (val >> (d->dshift + (ff << 3))) & 0xff;
+}
+
+static void write_chan (void *opaque, uint32_t nport, uint32_t data)
+{
+ struct dma_cont *d = opaque;
+ int iport, ichan, nreg;
+ struct dma_regs *r;
+
+ iport = (nport >> d->dshift) & 0x0f;
+ ichan = iport >> 1;
+ nreg = iport & 1;
+ r = d->regs + ichan;
+ if (getff (d)) {
+ r->base[nreg] = (r->base[nreg] & 0xff) | ((data << 8) & 0xff00);
+ init_chan (d, ichan);
+ } else {
+ r->base[nreg] = (r->base[nreg] & 0xff00) | (data & 0xff);
+ }
+}
+
+static void write_cont (void *opaque, uint32_t nport, uint32_t data)
+{
+ struct dma_cont *d = opaque;
+ int iport, ichan = 0;
+
+ iport = (nport >> d->dshift) & 0x0f;
+ switch (iport) {
+ case 0x08: /* command */
+ if ((data != 0) && (data & CMD_NOT_SUPPORTED)) {
+ dolog ("command %#x not supported\n", data);
+ return;
+ }
+ d->command = data;
+ break;
+
+ case 0x09:
+ ichan = data & 3;
+ if (data & 4) {
+ d->status |= 1 << (ichan + 4);
+ }
+ else {
+ d->status &= ~(1 << (ichan + 4));
+ }
+ d->status &= ~(1 << ichan);
+ break;
+
+ case 0x0a: /* single mask */
+ if (data & 4)
+ d->mask |= 1 << (data & 3);
+ else
+ d->mask &= ~(1 << (data & 3));
+ break;
+
+ case 0x0b: /* mode */
+ {
+ ichan = data & 3;
+#ifdef DEBUG_DMA
+ {
+ int op, ai, dir, opmode;
+ op = (data >> 2) & 3;
+ ai = (data >> 4) & 1;
+ dir = (data >> 5) & 1;
+ opmode = (data >> 6) & 3;
+
+ linfo ("ichan %d, op %d, ai %d, dir %d, opmode %d\n",
+ ichan, op, ai, dir, opmode);
+ }
+#endif
+ d->regs[ichan].mode = data;
+ break;
+ }
+
+ case 0x0c: /* clear flip flop */
+ d->flip_flop = 0;
+ break;
+
+ case 0x0d: /* reset */
+ d->flip_flop = 0;
+ d->mask = ~0;
+ d->status = 0;
+ d->command = 0;
+ break;
+
+ case 0x0e: /* clear mask for all channels */
+ d->mask = 0;
+ break;
+
+ case 0x0f: /* write mask for all channels */
+ d->mask = data;
+ break;
+
+ default:
+ dolog ("unknown iport %#x\n", iport);
+ break;
+ }
+
+#ifdef DEBUG_DMA
+ if (0xc != iport) {
+ linfo ("write_cont: nport %#06x, ichan % 2d, val %#06x\n",
+ nport, ichan, data);
+ }
+#endif
+}
+
+static uint32_t read_cont (void *opaque, uint32_t nport)
+{
+ struct dma_cont *d = opaque;
+ int iport, val;
+
+ iport = (nport >> d->dshift) & 0x0f;
+ switch (iport) {
+ case 0x08: /* status */
+ val = d->status;
+ d->status &= 0xf0;
+ break;
+ case 0x0f: /* mask */
+ val = d->mask;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+
+ ldebug ("read_cont: nport %#06x, iport %#04x val %#x\n", nport, iport, val);
+ return val;
+}
+
+int DMA_get_channel_mode (int nchan)
+{
+ return dma_controllers[nchan > 3].regs[nchan & 3].mode;
+}
+
+void DMA_hold_DREQ (int nchan)
+{
+ int ncont, ichan;
+
+ ncont = nchan > 3;
+ ichan = nchan & 3;
+ linfo ("held cont=%d chan=%d\n", ncont, ichan);
+ dma_controllers[ncont].status |= 1 << (ichan + 4);
+}
+
+void DMA_release_DREQ (int nchan)
+{
+ int ncont, ichan;
+
+ ncont = nchan > 3;
+ ichan = nchan & 3;
+ linfo ("released cont=%d chan=%d\n", ncont, ichan);
+ dma_controllers[ncont].status &= ~(1 << (ichan + 4));
+}
+
+static void channel_run (int ncont, int ichan)
+{
+ int n;
+ struct dma_regs *r = &dma_controllers[ncont].regs[ichan];
+#ifdef DEBUG_DMA
+ int dir, opmode;
+
+ dir = (r->mode >> 5) & 1;
+ opmode = (r->mode >> 6) & 3;
+
+ if (dir) {
+ dolog ("DMA in address decrement mode\n");
+ }
+ if (opmode != 1) {
+ dolog ("DMA not in single mode select %#x\n", opmode);
+ }
+#endif
+
+ r = dma_controllers[ncont].regs + ichan;
+ n = r->transfer_handler (r->opaque, ichan + (ncont << 2),
+ r->now[COUNT], (r->base[COUNT] + 1) << ncont);
+ r->now[COUNT] = n;
+ ldebug ("dma_pos %d size %d\n", n, (r->base[COUNT] + 1) << ncont);
+}
+
+void DMA_run (void)
+{
+ struct dma_cont *d;
+ int icont, ichan;
+
+ d = dma_controllers;
+
+ for (icont = 0; icont < 2; icont++, d++) {
+ for (ichan = 0; ichan < 4; ichan++) {
+ int mask;
+
+ mask = 1 << ichan;
+
+ if ((0 == (d->mask & mask)) && (0 != (d->status & (mask << 4))))
+ channel_run (icont, ichan);
+ }
+ }
+}
+
+void DMA_register_channel (int nchan,
+ DMA_transfer_handler transfer_handler,
+ void *opaque)
+{
+ struct dma_regs *r;
+ int ichan, ncont;
+
+ ncont = nchan > 3;
+ ichan = nchan & 3;
+
+ r = dma_controllers[ncont].regs + ichan;
+ r->transfer_handler = transfer_handler;
+ r->opaque = opaque;
+}
+
+int DMA_read_memory (int nchan, void *buf, int pos, int len)
+{
+ struct dma_regs *r = &dma_controllers[nchan > 3].regs[nchan & 3];
+ target_ulong addr = ((r->pageh & 0x7f) << 24) | (r->page << 16) | r->now[ADDR];
+
+ if (r->mode & 0x20) {
+ int i;
+ uint8_t *p = buf;
+
+ cpu_physical_memory_read (addr - pos - len, buf, len);
+ /* What about 16bit transfers? */
+ for (i = 0; i < len >> 1; i++) {
+ uint8_t b = p[len - i - 1];
+ p[i] = b;
+ }
+ }
+ else
+ cpu_physical_memory_read (addr + pos, buf, len);
+
+ return len;
+}
+
+int DMA_write_memory (int nchan, void *buf, int pos, int len)
+{
+ struct dma_regs *r = &dma_controllers[nchan > 3].regs[nchan & 3];
+ target_ulong addr = ((r->pageh & 0x7f) << 24) | (r->page << 16) | r->now[ADDR];
+
+ if (r->mode & 0x20) {
+ int i;
+ uint8_t *p = buf;
+
+ cpu_physical_memory_write (addr - pos - len, buf, len);
+ /* What about 16bit transfers? */
+ for (i = 0; i < len; i++) {
+ uint8_t b = p[len - i - 1];
+ p[i] = b;
+ }
+ }
+ else
+ cpu_physical_memory_write (addr + pos, buf, len);
+
+ return len;
+}
+
+/* request the emulator to transfer a new DMA memory block ASAP */
+void DMA_schedule(int nchan)
+{
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+}
+
+static void dma_reset(void *opaque)
+{
+ struct dma_cont *d = opaque;
+ write_cont (d, (0x0d << d->dshift), 0);
+}
+
+/* dshift = 0: 8 bit DMA, 1 = 16 bit DMA */
+static void dma_init2(struct dma_cont *d, int base, int dshift,
+ int page_base, int pageh_base)
+{
+ const static int page_port_list[] = { 0x1, 0x2, 0x3, 0x7 };
+ int i;
+
+ d->dshift = dshift;
+ for (i = 0; i < 8; i++) {
+ register_ioport_write (base + (i << dshift), 1, 1, write_chan, d);
+ register_ioport_read (base + (i << dshift), 1, 1, read_chan, d);
+ }
+ for (i = 0; i < LENOFA (page_port_list); i++) {
+ register_ioport_write (page_base + page_port_list[i], 1, 1,
+ write_page, d);
+ register_ioport_read (page_base + page_port_list[i], 1, 1,
+ read_page, d);
+ if (pageh_base >= 0) {
+ register_ioport_write (pageh_base + page_port_list[i], 1, 1,
+ write_pageh, d);
+ register_ioport_read (pageh_base + page_port_list[i], 1, 1,
+ read_pageh, d);
+ }
+ }
+ for (i = 0; i < 8; i++) {
+ register_ioport_write (base + ((i + 8) << dshift), 1, 1,
+ write_cont, d);
+ register_ioport_read (base + ((i + 8) << dshift), 1, 1,
+ read_cont, d);
+ }
+ qemu_register_reset(dma_reset, d);
+ dma_reset(d);
+}
+
+static void dma_save (QEMUFile *f, void *opaque)
+{
+ struct dma_cont *d = opaque;
+ int i;
+
+ /* qemu_put_8s (f, &d->status); */
+ qemu_put_8s (f, &d->command);
+ qemu_put_8s (f, &d->mask);
+ qemu_put_8s (f, &d->flip_flop);
+ qemu_put_be32s (f, &d->dshift);
+
+ for (i = 0; i < 4; ++i) {
+ struct dma_regs *r = &d->regs[i];
+ qemu_put_be32s (f, &r->now[0]);
+ qemu_put_be32s (f, &r->now[1]);
+ qemu_put_be16s (f, &r->base[0]);
+ qemu_put_be16s (f, &r->base[1]);
+ qemu_put_8s (f, &r->mode);
+ qemu_put_8s (f, &r->page);
+ qemu_put_8s (f, &r->pageh);
+ qemu_put_8s (f, &r->dack);
+ qemu_put_8s (f, &r->eop);
+ }
+}
+
+static int dma_load (QEMUFile *f, void *opaque, int version_id)
+{
+ struct dma_cont *d = opaque;
+ int i;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ /* qemu_get_8s (f, &d->status); */
+ qemu_get_8s (f, &d->command);
+ qemu_get_8s (f, &d->mask);
+ qemu_get_8s (f, &d->flip_flop);
+ qemu_get_be32s (f, &d->dshift);
+
+ for (i = 0; i < 4; ++i) {
+ struct dma_regs *r = &d->regs[i];
+ qemu_get_be32s (f, &r->now[0]);
+ qemu_get_be32s (f, &r->now[1]);
+ qemu_get_be16s (f, &r->base[0]);
+ qemu_get_be16s (f, &r->base[1]);
+ qemu_get_8s (f, &r->mode);
+ qemu_get_8s (f, &r->page);
+ qemu_get_8s (f, &r->pageh);
+ qemu_get_8s (f, &r->dack);
+ qemu_get_8s (f, &r->eop);
+ }
+ return 0;
+}
+
+void DMA_init (int high_page_enable)
+{
+ dma_init2(&dma_controllers[0], 0x00, 0, 0x80,
+ high_page_enable ? 0x480 : -1);
+ dma_init2(&dma_controllers[1], 0xc0, 1, 0x88,
+ high_page_enable ? 0x488 : -1);
+ register_savevm ("dma", 0, 1, dma_save, dma_load, &dma_controllers[0]);
+ register_savevm ("dma", 1, 1, dma_save, dma_load, &dma_controllers[1]);
+}
diff --git a/tools/ioemu/hw/fdc.c b/tools/ioemu/hw/fdc.c
new file mode 100644
index 0000000000..d512b1ca98
--- /dev/null
+++ b/tools/ioemu/hw/fdc.c
@@ -0,0 +1,1719 @@
+/*
+ * QEMU Floppy disk emulator (Intel 82078)
+ *
+ * Copyright (c) 2003 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/********************************************************/
+/* debug Floppy devices */
+//#define DEBUG_FLOPPY
+
+#ifdef DEBUG_FLOPPY
+#define FLOPPY_DPRINTF(fmt, args...) \
+do { printf("FLOPPY: " fmt , ##args); } while (0)
+#else
+#define FLOPPY_DPRINTF(fmt, args...)
+#endif
+
+#define FLOPPY_ERROR(fmt, args...) \
+do { printf("FLOPPY ERROR: %s: " fmt, __func__ , ##args); } while (0)
+
+/********************************************************/
+/* Floppy drive emulation */
+
+/* Will always be a fixed parameter for us */
+#define FD_SECTOR_LEN 512
+#define FD_SECTOR_SC 2 /* Sector size code */
+
+/* Floppy disk drive emulation */
+typedef enum fdisk_type_t {
+ FDRIVE_DISK_288 = 0x01, /* 2.88 MB disk */
+ FDRIVE_DISK_144 = 0x02, /* 1.44 MB disk */
+ FDRIVE_DISK_720 = 0x03, /* 720 kB disk */
+ FDRIVE_DISK_USER = 0x04, /* User defined geometry */
+ FDRIVE_DISK_NONE = 0x05, /* No disk */
+} fdisk_type_t;
+
+typedef enum fdrive_type_t {
+ FDRIVE_DRV_144 = 0x00, /* 1.44 MB 3"5 drive */
+ FDRIVE_DRV_288 = 0x01, /* 2.88 MB 3"5 drive */
+ FDRIVE_DRV_120 = 0x02, /* 1.2 MB 5"25 drive */
+ FDRIVE_DRV_NONE = 0x03, /* No drive connected */
+} fdrive_type_t;
+
+typedef enum fdrive_flags_t {
+ FDRIVE_MOTOR_ON = 0x01, /* motor on/off */
+ FDRIVE_REVALIDATE = 0x02, /* Revalidated */
+} fdrive_flags_t;
+
+typedef enum fdisk_flags_t {
+ FDISK_DBL_SIDES = 0x01,
+} fdisk_flags_t;
+
+typedef struct fdrive_t {
+ BlockDriverState *bs;
+ /* Drive status */
+ fdrive_type_t drive;
+ fdrive_flags_t drflags;
+ uint8_t perpendicular; /* 2.88 MB access mode */
+ /* Position */
+ uint8_t head;
+ uint8_t track;
+ uint8_t sect;
+ /* Last operation status */
+ uint8_t dir; /* Direction */
+ uint8_t rw; /* Read/write */
+ /* Media */
+ fdisk_flags_t flags;
+ uint8_t last_sect; /* Nb sector per track */
+ uint8_t max_track; /* Nb of tracks */
+ uint16_t bps; /* Bytes per sector */
+ uint8_t ro; /* Is read-only */
+} fdrive_t;
+
+static void fd_init (fdrive_t *drv, BlockDriverState *bs)
+{
+ /* Drive */
+ drv->bs = bs;
+ drv->drive = FDRIVE_DRV_NONE;
+ drv->drflags = 0;
+ drv->perpendicular = 0;
+ /* Disk */
+ drv->last_sect = 0;
+ drv->max_track = 0;
+}
+
+static int _fd_sector (uint8_t head, uint8_t track,
+ uint8_t sect, uint8_t last_sect)
+{
+ return (((track * 2) + head) * last_sect) + sect - 1;
+}
+
+/* Returns current position, in sectors, for given drive */
+static int fd_sector (fdrive_t *drv)
+{
+ return _fd_sector(drv->head, drv->track, drv->sect, drv->last_sect);
+}
+
+static int fd_seek (fdrive_t *drv, uint8_t head, uint8_t track, uint8_t sect,
+ int enable_seek)
+{
+ uint32_t sector;
+ int ret;
+
+ if (track > drv->max_track ||
+ (head != 0 && (drv->flags & FDISK_DBL_SIDES) == 0)) {
+ FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n",
+ head, track, sect, 1,
+ (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1,
+ drv->max_track, drv->last_sect);
+ return 2;
+ }
+ if (sect > drv->last_sect) {
+ FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n",
+ head, track, sect, 1,
+ (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1,
+ drv->max_track, drv->last_sect);
+ return 3;
+ }
+ sector = _fd_sector(head, track, sect, drv->last_sect);
+ ret = 0;
+ if (sector != fd_sector(drv)) {
+#if 0
+ if (!enable_seek) {
+ FLOPPY_ERROR("no implicit seek %d %02x %02x (max=%d %02x %02x)\n",
+ head, track, sect, 1, drv->max_track, drv->last_sect);
+ return 4;
+ }
+#endif
+ drv->head = head;
+ if (drv->track != track)
+ ret = 1;
+ drv->track = track;
+ drv->sect = sect;
+ }
+
+ return ret;
+}
+
+/* Set drive back to track 0 */
+static void fd_recalibrate (fdrive_t *drv)
+{
+ FLOPPY_DPRINTF("recalibrate\n");
+ drv->head = 0;
+ drv->track = 0;
+ drv->sect = 1;
+ drv->dir = 1;
+ drv->rw = 0;
+}
+
+/* Recognize floppy formats */
+typedef struct fd_format_t {
+ fdrive_type_t drive;
+ fdisk_type_t disk;
+ uint8_t last_sect;
+ uint8_t max_track;
+ uint8_t max_head;
+ const unsigned char *str;
+} fd_format_t;
+
+static fd_format_t fd_formats[] = {
+ /* First entry is default format */
+ /* 1.44 MB 3"1/2 floppy disks */
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 18, 80, 1, "1.44 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 20, 80, 1, "1.6 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 21, 80, 1, "1.68 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 21, 82, 1, "1.72 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 21, 83, 1, "1.74 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 22, 80, 1, "1.76 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 23, 80, 1, "1.84 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_144, 24, 80, 1, "1.92 MB 3\"1/2", },
+ /* 2.88 MB 3"1/2 floppy disks */
+ { FDRIVE_DRV_288, FDRIVE_DISK_288, 36, 80, 1, "2.88 MB 3\"1/2", },
+ { FDRIVE_DRV_288, FDRIVE_DISK_288, 39, 80, 1, "3.12 MB 3\"1/2", },
+ { FDRIVE_DRV_288, FDRIVE_DISK_288, 40, 80, 1, "3.2 MB 3\"1/2", },
+ { FDRIVE_DRV_288, FDRIVE_DISK_288, 44, 80, 1, "3.52 MB 3\"1/2", },
+ { FDRIVE_DRV_288, FDRIVE_DISK_288, 48, 80, 1, "3.84 MB 3\"1/2", },
+ /* 720 kB 3"1/2 floppy disks */
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 9, 80, 1, "720 kB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 10, 80, 1, "800 kB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 10, 82, 1, "820 kB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 10, 83, 1, "830 kB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 13, 80, 1, "1.04 MB 3\"1/2", },
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 14, 80, 1, "1.12 MB 3\"1/2", },
+ /* 1.2 MB 5"1/4 floppy disks */
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 15, 80, 1, "1.2 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 18, 80, 1, "1.44 MB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 18, 82, 1, "1.48 MB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 18, 83, 1, "1.49 MB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 20, 80, 1, "1.6 MB 5\"1/4", },
+ /* 720 kB 5"1/4 floppy disks */
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 9, 80, 1, "720 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 11, 80, 1, "880 kB 5\"1/4", },
+ /* 360 kB 5"1/4 floppy disks */
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 9, 40, 1, "360 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 9, 40, 0, "180 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 10, 41, 1, "410 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 10, 42, 1, "420 kB 5\"1/4", },
+ /* 320 kB 5"1/4 floppy disks */
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 8, 40, 1, "320 kB 5\"1/4", },
+ { FDRIVE_DRV_120, FDRIVE_DISK_288, 8, 40, 0, "160 kB 5\"1/4", },
+ /* 360 kB must match 5"1/4 better than 3"1/2... */
+ { FDRIVE_DRV_144, FDRIVE_DISK_720, 9, 80, 0, "360 kB 3\"1/2", },
+ /* end */
+ { FDRIVE_DRV_NONE, FDRIVE_DISK_NONE, -1, -1, 0, NULL, },
+};
+
+/* Revalidate a disk drive after a disk change */
+static void fd_revalidate (fdrive_t *drv)
+{
+ fd_format_t *parse;
+ int64_t nb_sectors, size;
+ int i, first_match, match;
+ int nb_heads, max_track, last_sect, ro;
+
+ FLOPPY_DPRINTF("revalidate\n");
+ drv->drflags &= ~FDRIVE_REVALIDATE;
+ if (drv->bs != NULL && bdrv_is_inserted(drv->bs)) {
+ ro = bdrv_is_read_only(drv->bs);
+ bdrv_get_geometry_hint(drv->bs, &nb_heads, &max_track, &last_sect);
+ if (nb_heads != 0 && max_track != 0 && last_sect != 0) {
+ FLOPPY_DPRINTF("User defined disk (%d %d %d)",
+ nb_heads - 1, max_track, last_sect);
+ } else {
+ bdrv_get_geometry(drv->bs, &nb_sectors);
+ match = -1;
+ first_match = -1;
+ for (i = 0;; i++) {
+ parse = &fd_formats[i];
+ if (parse->drive == FDRIVE_DRV_NONE)
+ break;
+ if (drv->drive == parse->drive ||
+ drv->drive == FDRIVE_DRV_NONE) {
+ size = (parse->max_head + 1) * parse->max_track *
+ parse->last_sect;
+ if (nb_sectors == size) {
+ match = i;
+ break;
+ }
+ if (first_match == -1)
+ first_match = i;
+ }
+ }
+ if (match == -1) {
+ if (first_match == -1)
+ match = 1;
+ else
+ match = first_match;
+ parse = &fd_formats[match];
+ }
+ nb_heads = parse->max_head + 1;
+ max_track = parse->max_track;
+ last_sect = parse->last_sect;
+ drv->drive = parse->drive;
+ FLOPPY_DPRINTF("%s floppy disk (%d h %d t %d s) %s\n", parse->str,
+ nb_heads, max_track, last_sect, ro ? "ro" : "rw");
+ }
+ if (nb_heads == 1) {
+ drv->flags &= ~FDISK_DBL_SIDES;
+ } else {
+ drv->flags |= FDISK_DBL_SIDES;
+ }
+ drv->max_track = max_track;
+ drv->last_sect = last_sect;
+ drv->ro = ro;
+ } else {
+ FLOPPY_DPRINTF("No disk in drive\n");
+ drv->last_sect = 0;
+ drv->max_track = 0;
+ drv->flags &= ~FDISK_DBL_SIDES;
+ }
+ drv->drflags |= FDRIVE_REVALIDATE;
+}
+
+/* Motor control */
+static void fd_start (fdrive_t *drv)
+{
+ drv->drflags |= FDRIVE_MOTOR_ON;
+}
+
+static void fd_stop (fdrive_t *drv)
+{
+ drv->drflags &= ~FDRIVE_MOTOR_ON;
+}
+
+/* Re-initialise a drives (motor off, repositioned) */
+static void fd_reset (fdrive_t *drv)
+{
+ fd_stop(drv);
+ fd_recalibrate(drv);
+}
+
+/********************************************************/
+/* Intel 82078 floppy disk controller emulation */
+
+static void fdctrl_reset (fdctrl_t *fdctrl, int do_irq);
+static void fdctrl_reset_fifo (fdctrl_t *fdctrl);
+static int fdctrl_transfer_handler (void *opaque, int nchan,
+ int dma_pos, int dma_len);
+static void fdctrl_raise_irq (fdctrl_t *fdctrl, uint8_t status);
+static void fdctrl_result_timer(void *opaque);
+
+static uint32_t fdctrl_read_statusB (fdctrl_t *fdctrl);
+static uint32_t fdctrl_read_dor (fdctrl_t *fdctrl);
+static void fdctrl_write_dor (fdctrl_t *fdctrl, uint32_t value);
+static uint32_t fdctrl_read_tape (fdctrl_t *fdctrl);
+static void fdctrl_write_tape (fdctrl_t *fdctrl, uint32_t value);
+static uint32_t fdctrl_read_main_status (fdctrl_t *fdctrl);
+static void fdctrl_write_rate (fdctrl_t *fdctrl, uint32_t value);
+static uint32_t fdctrl_read_data (fdctrl_t *fdctrl);
+static void fdctrl_write_data (fdctrl_t *fdctrl, uint32_t value);
+static uint32_t fdctrl_read_dir (fdctrl_t *fdctrl);
+
+enum {
+ FD_CTRL_ACTIVE = 0x01, /* XXX: suppress that */
+ FD_CTRL_RESET = 0x02,
+ FD_CTRL_SLEEP = 0x04, /* XXX: suppress that */
+ FD_CTRL_BUSY = 0x08, /* dma transfer in progress */
+ FD_CTRL_INTR = 0x10,
+};
+
+enum {
+ FD_DIR_WRITE = 0,
+ FD_DIR_READ = 1,
+ FD_DIR_SCANE = 2,
+ FD_DIR_SCANL = 3,
+ FD_DIR_SCANH = 4,
+};
+
+enum {
+ FD_STATE_CMD = 0x00,
+ FD_STATE_STATUS = 0x01,
+ FD_STATE_DATA = 0x02,
+ FD_STATE_STATE = 0x03,
+ FD_STATE_MULTI = 0x10,
+ FD_STATE_SEEK = 0x20,
+ FD_STATE_FORMAT = 0x40,
+};
+
+#define FD_STATE(state) ((state) & FD_STATE_STATE)
+#define FD_SET_STATE(state, new_state) \
+do { (state) = ((state) & ~FD_STATE_STATE) | (new_state); } while (0)
+#define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
+#define FD_DID_SEEK(state) ((state) & FD_STATE_SEEK)
+#define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
+
+struct fdctrl_t {
+ fdctrl_t *fdctrl;
+ /* Controller's identification */
+ uint8_t version;
+ /* HW */
+ int irq_lvl;
+ int dma_chann;
+ uint32_t io_base;
+ /* Controller state */
+ QEMUTimer *result_timer;
+ uint8_t state;
+ uint8_t dma_en;
+ uint8_t cur_drv;
+ uint8_t bootsel;
+ /* Command FIFO */
+ uint8_t fifo[FD_SECTOR_LEN];
+ uint32_t data_pos;
+ uint32_t data_len;
+ uint8_t data_state;
+ uint8_t data_dir;
+ uint8_t int_status;
+ uint8_t eot; /* last wanted sector */
+ /* States kept only to be returned back */
+ /* Timers state */
+ uint8_t timer0;
+ uint8_t timer1;
+ /* precompensation */
+ uint8_t precomp_trk;
+ uint8_t config;
+ uint8_t lock;
+ /* Power down config (also with status regB access mode */
+ uint8_t pwrd;
+ /* Floppy drives */
+ fdrive_t drives[2];
+};
+
+static uint32_t fdctrl_read (void *opaque, uint32_t reg)
+{
+ fdctrl_t *fdctrl = opaque;
+ uint32_t retval;
+
+ switch (reg & 0x07) {
+ case 0x01:
+ retval = fdctrl_read_statusB(fdctrl);
+ break;
+ case 0x02:
+ retval = fdctrl_read_dor(fdctrl);
+ break;
+ case 0x03:
+ retval = fdctrl_read_tape(fdctrl);
+ break;
+ case 0x04:
+ retval = fdctrl_read_main_status(fdctrl);
+ break;
+ case 0x05:
+ retval = fdctrl_read_data(fdctrl);
+ break;
+ case 0x07:
+ retval = fdctrl_read_dir(fdctrl);
+ break;
+ default:
+ retval = (uint32_t)(-1);
+ break;
+ }
+ FLOPPY_DPRINTF("read reg%d: 0x%02x\n", reg & 7, retval);
+
+ return retval;
+}
+
+static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value)
+{
+ fdctrl_t *fdctrl = opaque;
+
+ FLOPPY_DPRINTF("write reg%d: 0x%02x\n", reg & 7, value);
+
+ switch (reg & 0x07) {
+ case 0x02:
+ fdctrl_write_dor(fdctrl, value);
+ break;
+ case 0x03:
+ fdctrl_write_tape(fdctrl, value);
+ break;
+ case 0x04:
+ fdctrl_write_rate(fdctrl, value);
+ break;
+ case 0x05:
+ fdctrl_write_data(fdctrl, value);
+ break;
+ default:
+ break;
+ }
+}
+
+static void fd_change_cb (void *opaque)
+{
+ fdrive_t *drv = opaque;
+
+ FLOPPY_DPRINTF("disk change\n");
+ fd_revalidate(drv);
+#if 0
+ fd_recalibrate(drv);
+ fdctrl_reset_fifo(drv->fdctrl);
+ fdctrl_raise_irq(drv->fdctrl, 0x20);
+#endif
+}
+
+fdctrl_t *fdctrl_init (int irq_lvl, int dma_chann, int mem_mapped,
+ uint32_t io_base,
+ BlockDriverState **fds)
+{
+ fdctrl_t *fdctrl;
+// int io_mem;
+ int i;
+
+ FLOPPY_DPRINTF("init controller\n");
+ fdctrl = qemu_mallocz(sizeof(fdctrl_t));
+ if (!fdctrl)
+ return NULL;
+ fdctrl->result_timer = qemu_new_timer(vm_clock,
+ fdctrl_result_timer, fdctrl);
+
+ fdctrl->version = 0x90; /* Intel 82078 controller */
+ fdctrl->irq_lvl = irq_lvl;
+ fdctrl->dma_chann = dma_chann;
+ fdctrl->io_base = io_base;
+ fdctrl->config = 0x60; /* Implicit seek, polling & FIFO enabled */
+ if (fdctrl->dma_chann != -1) {
+ fdctrl->dma_en = 1;
+ DMA_register_channel(dma_chann, &fdctrl_transfer_handler, fdctrl);
+ } else {
+ fdctrl->dma_en = 0;
+ }
+ for (i = 0; i < 2; i++) {
+ fd_init(&fdctrl->drives[i], fds[i]);
+ if (fds[i]) {
+ bdrv_set_change_cb(fds[i],
+ &fd_change_cb, &fdctrl->drives[i]);
+ }
+ }
+ fdctrl_reset(fdctrl, 0);
+ fdctrl->state = FD_CTRL_ACTIVE;
+ if (mem_mapped) {
+ FLOPPY_ERROR("memory mapped floppy not supported by now !\n");
+#if 0
+ io_mem = cpu_register_io_memory(0, fdctrl_mem_read, fdctrl_mem_write);
+ cpu_register_physical_memory(base, 0x08, io_mem);
+#endif
+ } else {
+ register_ioport_read(io_base + 0x01, 5, 1, &fdctrl_read, fdctrl);
+ register_ioport_read(io_base + 0x07, 1, 1, &fdctrl_read, fdctrl);
+ register_ioport_write(io_base + 0x01, 5, 1, &fdctrl_write, fdctrl);
+ register_ioport_write(io_base + 0x07, 1, 1, &fdctrl_write, fdctrl);
+ }
+ for (i = 0; i < 2; i++) {
+ fd_revalidate(&fdctrl->drives[i]);
+ }
+
+ return fdctrl;
+}
+
+/* XXX: may change if moved to bdrv */
+int fdctrl_get_drive_type(fdctrl_t *fdctrl, int drive_num)
+{
+ return fdctrl->drives[drive_num].drive;
+}
+
+/* Change IRQ state */
+static void fdctrl_reset_irq (fdctrl_t *fdctrl)
+{
+ FLOPPY_DPRINTF("Reset interrupt\n");
+ pic_set_irq(fdctrl->irq_lvl, 0);
+ fdctrl->state &= ~FD_CTRL_INTR;
+}
+
+static void fdctrl_raise_irq (fdctrl_t *fdctrl, uint8_t status)
+{
+ if (~(fdctrl->state & FD_CTRL_INTR)) {
+ pic_set_irq(fdctrl->irq_lvl, 1);
+ fdctrl->state |= FD_CTRL_INTR;
+ }
+ FLOPPY_DPRINTF("Set interrupt status to 0x%02x\n", status);
+ fdctrl->int_status = status;
+}
+
+/* Reset controller */
+static void fdctrl_reset (fdctrl_t *fdctrl, int do_irq)
+{
+ int i;
+
+ FLOPPY_DPRINTF("reset controller\n");
+ fdctrl_reset_irq(fdctrl);
+ /* Initialise controller */
+ fdctrl->cur_drv = 0;
+ /* FIFO state */
+ fdctrl->data_pos = 0;
+ fdctrl->data_len = 0;
+ fdctrl->data_state = FD_STATE_CMD;
+ fdctrl->data_dir = FD_DIR_WRITE;
+ for (i = 0; i < MAX_FD; i++)
+ fd_reset(&fdctrl->drives[i]);
+ fdctrl_reset_fifo(fdctrl);
+ if (do_irq)
+ fdctrl_raise_irq(fdctrl, 0xc0);
+}
+
+static inline fdrive_t *drv0 (fdctrl_t *fdctrl)
+{
+ return &fdctrl->drives[fdctrl->bootsel];
+}
+
+static inline fdrive_t *drv1 (fdctrl_t *fdctrl)
+{
+ return &fdctrl->drives[1 - fdctrl->bootsel];
+}
+
+static fdrive_t *get_cur_drv (fdctrl_t *fdctrl)
+{
+ return fdctrl->cur_drv == 0 ? drv0(fdctrl) : drv1(fdctrl);
+}
+
+/* Status B register : 0x01 (read-only) */
+static uint32_t fdctrl_read_statusB (fdctrl_t *fdctrl)
+{
+ FLOPPY_DPRINTF("status register: 0x00\n");
+ return 0;
+}
+
+/* Digital output register : 0x02 */
+static uint32_t fdctrl_read_dor (fdctrl_t *fdctrl)
+{
+ uint32_t retval = 0;
+
+ /* Drive motors state indicators */
+ if (drv0(fdctrl)->drflags & FDRIVE_MOTOR_ON)
+ retval |= 1 << 5;
+ if (drv1(fdctrl)->drflags & FDRIVE_MOTOR_ON)
+ retval |= 1 << 4;
+ /* DMA enable */
+ retval |= fdctrl->dma_en << 3;
+ /* Reset indicator */
+ retval |= (fdctrl->state & FD_CTRL_RESET) == 0 ? 0x04 : 0;
+ /* Selected drive */
+ retval |= fdctrl->cur_drv;
+ FLOPPY_DPRINTF("digital output register: 0x%02x\n", retval);
+
+ return retval;
+}
+
+static void fdctrl_write_dor (fdctrl_t *fdctrl, uint32_t value)
+{
+ /* Reset mode */
+ if (fdctrl->state & FD_CTRL_RESET) {
+ if (!(value & 0x04)) {
+ FLOPPY_DPRINTF("Floppy controller in RESET state !\n");
+ return;
+ }
+ }
+ FLOPPY_DPRINTF("digital output register set to 0x%02x\n", value);
+ /* Drive motors state indicators */
+ if (value & 0x20)
+ fd_start(drv1(fdctrl));
+ else
+ fd_stop(drv1(fdctrl));
+ if (value & 0x10)
+ fd_start(drv0(fdctrl));
+ else
+ fd_stop(drv0(fdctrl));
+ /* DMA enable */
+#if 0
+ if (fdctrl->dma_chann != -1)
+ fdctrl->dma_en = 1 - ((value >> 3) & 1);
+#endif
+ /* Reset */
+ if (!(value & 0x04)) {
+ if (!(fdctrl->state & FD_CTRL_RESET)) {
+ FLOPPY_DPRINTF("controller enter RESET state\n");
+ fdctrl->state |= FD_CTRL_RESET;
+ }
+ } else {
+ if (fdctrl->state & FD_CTRL_RESET) {
+ FLOPPY_DPRINTF("controller out of RESET state\n");
+ fdctrl_reset(fdctrl, 1);
+ fdctrl->state &= ~(FD_CTRL_RESET | FD_CTRL_SLEEP);
+ }
+ }
+ /* Selected drive */
+ fdctrl->cur_drv = value & 1;
+}
+
+/* Tape drive register : 0x03 */
+static uint32_t fdctrl_read_tape (fdctrl_t *fdctrl)
+{
+ uint32_t retval = 0;
+
+ /* Disk boot selection indicator */
+ retval |= fdctrl->bootsel << 2;
+ /* Tape indicators: never allowed */
+ FLOPPY_DPRINTF("tape drive register: 0x%02x\n", retval);
+
+ return retval;
+}
+
+static void fdctrl_write_tape (fdctrl_t *fdctrl, uint32_t value)
+{
+ /* Reset mode */
+ if (fdctrl->state & FD_CTRL_RESET) {
+ FLOPPY_DPRINTF("Floppy controller in RESET state !\n");
+ return;
+ }
+ FLOPPY_DPRINTF("tape drive register set to 0x%02x\n", value);
+ /* Disk boot selection indicator */
+ fdctrl->bootsel = (value >> 2) & 1;
+ /* Tape indicators: never allow */
+}
+
+/* Main status register : 0x04 (read) */
+static uint32_t fdctrl_read_main_status (fdctrl_t *fdctrl)
+{
+ uint32_t retval = 0;
+
+ fdctrl->state &= ~(FD_CTRL_SLEEP | FD_CTRL_RESET);
+ if (!(fdctrl->state & FD_CTRL_BUSY)) {
+ /* Data transfer allowed */
+ retval |= 0x80;
+ /* Data transfer direction indicator */
+ if (fdctrl->data_dir == FD_DIR_READ)
+ retval |= 0x40;
+ }
+ /* Should handle 0x20 for SPECIFY command */
+ /* Command busy indicator */
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_DATA ||
+ FD_STATE(fdctrl->data_state) == FD_STATE_STATUS)
+ retval |= 0x10;
+ FLOPPY_DPRINTF("main status register: 0x%02x\n", retval);
+
+ return retval;
+}
+
+/* Data select rate register : 0x04 (write) */
+static void fdctrl_write_rate (fdctrl_t *fdctrl, uint32_t value)
+{
+ /* Reset mode */
+ if (fdctrl->state & FD_CTRL_RESET) {
+ FLOPPY_DPRINTF("Floppy controller in RESET state !\n");
+ return;
+ }
+ FLOPPY_DPRINTF("select rate register set to 0x%02x\n", value);
+ /* Reset: autoclear */
+ if (value & 0x80) {
+ fdctrl->state |= FD_CTRL_RESET;
+ fdctrl_reset(fdctrl, 1);
+ fdctrl->state &= ~FD_CTRL_RESET;
+ }
+ if (value & 0x40) {
+ fdctrl->state |= FD_CTRL_SLEEP;
+ fdctrl_reset(fdctrl, 1);
+ }
+// fdctrl.precomp = (value >> 2) & 0x07;
+}
+
+/* Digital input register : 0x07 (read-only) */
+static uint32_t fdctrl_read_dir (fdctrl_t *fdctrl)
+{
+ uint32_t retval = 0;
+
+ if (drv0(fdctrl)->drflags & FDRIVE_REVALIDATE ||
+ drv1(fdctrl)->drflags & FDRIVE_REVALIDATE)
+ retval |= 0x80;
+ if (retval != 0)
+ FLOPPY_DPRINTF("Floppy digital input register: 0x%02x\n", retval);
+ drv0(fdctrl)->drflags &= ~FDRIVE_REVALIDATE;
+ drv1(fdctrl)->drflags &= ~FDRIVE_REVALIDATE;
+
+ return retval;
+}
+
+/* FIFO state control */
+static void fdctrl_reset_fifo (fdctrl_t *fdctrl)
+{
+ fdctrl->data_dir = FD_DIR_WRITE;
+ fdctrl->data_pos = 0;
+ FD_SET_STATE(fdctrl->data_state, FD_STATE_CMD);
+}
+
+/* Set FIFO status for the host to read */
+static void fdctrl_set_fifo (fdctrl_t *fdctrl, int fifo_len, int do_irq)
+{
+ fdctrl->data_dir = FD_DIR_READ;
+ fdctrl->data_len = fifo_len;
+ fdctrl->data_pos = 0;
+ FD_SET_STATE(fdctrl->data_state, FD_STATE_STATUS);
+ if (do_irq)
+ fdctrl_raise_irq(fdctrl, 0x00);
+}
+
+/* Set an error: unimplemented/unknown command */
+static void fdctrl_unimplemented (fdctrl_t *fdctrl)
+{
+#if 0
+ fdrive_t *cur_drv;
+
+ cur_drv = get_cur_drv(fdctrl);
+ fdctrl->fifo[0] = 0x60 | (cur_drv->head << 2) | fdctrl->cur_drv;
+ fdctrl->fifo[1] = 0x00;
+ fdctrl->fifo[2] = 0x00;
+ fdctrl_set_fifo(fdctrl, 3, 1);
+#else
+ // fdctrl_reset_fifo(fdctrl);
+ fdctrl->fifo[0] = 0x80;
+ fdctrl_set_fifo(fdctrl, 1, 0);
+#endif
+}
+
+/* Callback for transfer end (stop or abort) */
+static void fdctrl_stop_transfer (fdctrl_t *fdctrl, uint8_t status0,
+ uint8_t status1, uint8_t status2)
+{
+ fdrive_t *cur_drv;
+
+ cur_drv = get_cur_drv(fdctrl);
+ FLOPPY_DPRINTF("transfer status: %02x %02x %02x (%02x)\n",
+ status0, status1, status2,
+ status0 | (cur_drv->head << 2) | fdctrl->cur_drv);
+ fdctrl->fifo[0] = status0 | (cur_drv->head << 2) | fdctrl->cur_drv;
+ fdctrl->fifo[1] = status1;
+ fdctrl->fifo[2] = status2;
+ fdctrl->fifo[3] = cur_drv->track;
+ fdctrl->fifo[4] = cur_drv->head;
+ fdctrl->fifo[5] = cur_drv->sect;
+ fdctrl->fifo[6] = FD_SECTOR_SC;
+ fdctrl->data_dir = FD_DIR_READ;
+ if (fdctrl->state & FD_CTRL_BUSY) {
+ DMA_release_DREQ(fdctrl->dma_chann);
+ fdctrl->state &= ~FD_CTRL_BUSY;
+ }
+ fdctrl_set_fifo(fdctrl, 7, 1);
+}
+
+/* Prepare a data transfer (either DMA or FIFO) */
+static void fdctrl_start_transfer (fdctrl_t *fdctrl, int direction)
+{
+ fdrive_t *cur_drv;
+ uint8_t kh, kt, ks;
+ int did_seek;
+
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ kt = fdctrl->fifo[2];
+ kh = fdctrl->fifo[3];
+ ks = fdctrl->fifo[4];
+ FLOPPY_DPRINTF("Start transfer at %d %d %02x %02x (%d)\n",
+ fdctrl->cur_drv, kh, kt, ks,
+ _fd_sector(kh, kt, ks, cur_drv->last_sect));
+ did_seek = 0;
+ switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & 0x40)) {
+ case 2:
+ /* sect too big */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x00, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 3:
+ /* track too big */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x80, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 4:
+ /* No seek enabled */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x00, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 1:
+ did_seek = 1;
+ break;
+ default:
+ break;
+ }
+ /* Set the FIFO state */
+ fdctrl->data_dir = direction;
+ fdctrl->data_pos = 0;
+ FD_SET_STATE(fdctrl->data_state, FD_STATE_DATA); /* FIFO ready for data */
+ if (fdctrl->fifo[0] & 0x80)
+ fdctrl->data_state |= FD_STATE_MULTI;
+ else
+ fdctrl->data_state &= ~FD_STATE_MULTI;
+ if (did_seek)
+ fdctrl->data_state |= FD_STATE_SEEK;
+ else
+ fdctrl->data_state &= ~FD_STATE_SEEK;
+ if (fdctrl->fifo[5] == 00) {
+ fdctrl->data_len = fdctrl->fifo[8];
+ } else {
+ int tmp;
+ fdctrl->data_len = 128 << fdctrl->fifo[5];
+ tmp = (cur_drv->last_sect - ks + 1);
+ if (fdctrl->fifo[0] & 0x80)
+ tmp += cur_drv->last_sect;
+ fdctrl->data_len *= tmp;
+ }
+ fdctrl->eot = fdctrl->fifo[6];
+ if (fdctrl->dma_en) {
+ int dma_mode;
+ /* DMA transfer are enabled. Check if DMA channel is well programmed */
+ dma_mode = DMA_get_channel_mode(fdctrl->dma_chann);
+ dma_mode = (dma_mode >> 2) & 3;
+ FLOPPY_DPRINTF("dma_mode=%d direction=%d (%d - %d)\n",
+ dma_mode, direction,
+ (128 << fdctrl->fifo[5]) *
+ (cur_drv->last_sect - ks + 1), fdctrl->data_len);
+ if (((direction == FD_DIR_SCANE || direction == FD_DIR_SCANL ||
+ direction == FD_DIR_SCANH) && dma_mode == 0) ||
+ (direction == FD_DIR_WRITE && dma_mode == 2) ||
+ (direction == FD_DIR_READ && dma_mode == 1)) {
+ /* No access is allowed until DMA transfer has completed */
+ fdctrl->state |= FD_CTRL_BUSY;
+ /* Now, we just have to wait for the DMA controller to
+ * recall us...
+ */
+ DMA_hold_DREQ(fdctrl->dma_chann);
+ DMA_schedule(fdctrl->dma_chann);
+ return;
+ } else {
+ FLOPPY_ERROR("dma_mode=%d direction=%d\n", dma_mode, direction);
+ }
+ }
+ FLOPPY_DPRINTF("start non-DMA transfer\n");
+ /* IO based transfer: calculate len */
+ fdctrl_raise_irq(fdctrl, 0x00);
+
+ return;
+}
+
+/* Prepare a transfer of deleted data */
+static void fdctrl_start_transfer_del (fdctrl_t *fdctrl, int direction)
+{
+ /* We don't handle deleted data,
+ * so we don't return *ANYTHING*
+ */
+ fdctrl_stop_transfer(fdctrl, 0x60, 0x00, 0x00);
+}
+
+/* handlers for DMA transfers */
+static int fdctrl_transfer_handler (void *opaque, int nchan,
+ int dma_pos, int dma_len)
+{
+ fdctrl_t *fdctrl;
+ fdrive_t *cur_drv;
+ int len, start_pos, rel_pos;
+ uint8_t status0 = 0x00, status1 = 0x00, status2 = 0x00;
+
+ fdctrl = opaque;
+ if (!(fdctrl->state & FD_CTRL_BUSY)) {
+ FLOPPY_DPRINTF("Not in DMA transfer mode !\n");
+ return 0;
+ }
+ cur_drv = get_cur_drv(fdctrl);
+ if (fdctrl->data_dir == FD_DIR_SCANE || fdctrl->data_dir == FD_DIR_SCANL ||
+ fdctrl->data_dir == FD_DIR_SCANH)
+ status2 = 0x04;
+ if (dma_len > fdctrl->data_len)
+ dma_len = fdctrl->data_len;
+ if (cur_drv->bs == NULL) {
+ if (fdctrl->data_dir == FD_DIR_WRITE)
+ fdctrl_stop_transfer(fdctrl, 0x60, 0x00, 0x00);
+ else
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x00, 0x00);
+ len = 0;
+ goto transfer_error;
+ }
+ rel_pos = fdctrl->data_pos % FD_SECTOR_LEN;
+ for (start_pos = fdctrl->data_pos; fdctrl->data_pos < dma_len;) {
+ len = dma_len - fdctrl->data_pos;
+ if (len + rel_pos > FD_SECTOR_LEN)
+ len = FD_SECTOR_LEN - rel_pos;
+ FLOPPY_DPRINTF("copy %d bytes (%d %d %d) %d pos %d %02x %02x "
+ "(%d-0x%08x 0x%08x)\n", len, size, fdctrl->data_pos,
+ fdctrl->data_len, fdctrl->cur_drv, cur_drv->head,
+ cur_drv->track, cur_drv->sect, fd_sector(cur_drv),
+ fd_sector(cur_drv) * 512, addr);
+ if (fdctrl->data_dir != FD_DIR_WRITE ||
+ len < FD_SECTOR_LEN || rel_pos != 0) {
+ /* READ & SCAN commands and realign to a sector for WRITE */
+ if (bdrv_read(cur_drv->bs, fd_sector(cur_drv),
+ fdctrl->fifo, 1) < 0) {
+ FLOPPY_DPRINTF("Floppy: error getting sector %d\n",
+ fd_sector(cur_drv));
+ /* Sure, image size is too small... */
+ memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
+ }
+ }
+ switch (fdctrl->data_dir) {
+ case FD_DIR_READ:
+ /* READ commands */
+ DMA_write_memory (nchan, fdctrl->fifo + rel_pos,
+ fdctrl->data_pos, len);
+/* cpu_physical_memory_write(addr + fdctrl->data_pos, */
+/* fdctrl->fifo + rel_pos, len); */
+ break;
+ case FD_DIR_WRITE:
+ /* WRITE commands */
+ DMA_read_memory (nchan, fdctrl->fifo + rel_pos,
+ fdctrl->data_pos, len);
+/* cpu_physical_memory_read(addr + fdctrl->data_pos, */
+/* fdctrl->fifo + rel_pos, len); */
+ if (bdrv_write(cur_drv->bs, fd_sector(cur_drv),
+ fdctrl->fifo, 1) < 0) {
+ FLOPPY_ERROR("writting sector %d\n", fd_sector(cur_drv));
+ fdctrl_stop_transfer(fdctrl, 0x60, 0x00, 0x00);
+ goto transfer_error;
+ }
+ break;
+ default:
+ /* SCAN commands */
+ {
+ uint8_t tmpbuf[FD_SECTOR_LEN];
+ int ret;
+ DMA_read_memory (nchan, tmpbuf, fdctrl->data_pos, len);
+/* cpu_physical_memory_read(addr + fdctrl->data_pos, */
+/* tmpbuf, len); */
+ ret = memcmp(tmpbuf, fdctrl->fifo + rel_pos, len);
+ if (ret == 0) {
+ status2 = 0x08;
+ goto end_transfer;
+ }
+ if ((ret < 0 && fdctrl->data_dir == FD_DIR_SCANL) ||
+ (ret > 0 && fdctrl->data_dir == FD_DIR_SCANH)) {
+ status2 = 0x00;
+ goto end_transfer;
+ }
+ }
+ break;
+ }
+ fdctrl->data_pos += len;
+ rel_pos = fdctrl->data_pos % FD_SECTOR_LEN;
+ if (rel_pos == 0) {
+ /* Seek to next sector */
+ FLOPPY_DPRINTF("seek to next sector (%d %02x %02x => %d) (%d)\n",
+ cur_drv->head, cur_drv->track, cur_drv->sect,
+ fd_sector(cur_drv),
+ fdctrl->data_pos - size);
+ /* XXX: cur_drv->sect >= cur_drv->last_sect should be an
+ error in fact */
+ if (cur_drv->sect >= cur_drv->last_sect ||
+ cur_drv->sect == fdctrl->eot) {
+ cur_drv->sect = 1;
+ if (FD_MULTI_TRACK(fdctrl->data_state)) {
+ if (cur_drv->head == 0 &&
+ (cur_drv->flags & FDISK_DBL_SIDES) != 0) {
+ cur_drv->head = 1;
+ } else {
+ cur_drv->head = 0;
+ cur_drv->track++;
+ if ((cur_drv->flags & FDISK_DBL_SIDES) == 0)
+ break;
+ }
+ } else {
+ cur_drv->track++;
+ break;
+ }
+ FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n",
+ cur_drv->head, cur_drv->track,
+ cur_drv->sect, fd_sector(cur_drv));
+ } else {
+ cur_drv->sect++;
+ }
+ }
+ }
+end_transfer:
+ len = fdctrl->data_pos - start_pos;
+ FLOPPY_DPRINTF("end transfer %d %d %d\n",
+ fdctrl->data_pos, len, fdctrl->data_len);
+ if (fdctrl->data_dir == FD_DIR_SCANE ||
+ fdctrl->data_dir == FD_DIR_SCANL ||
+ fdctrl->data_dir == FD_DIR_SCANH)
+ status2 = 0x08;
+ if (FD_DID_SEEK(fdctrl->data_state))
+ status0 |= 0x20;
+ fdctrl->data_len -= len;
+ // if (fdctrl->data_len == 0)
+ fdctrl_stop_transfer(fdctrl, status0, status1, status2);
+transfer_error:
+
+ return len;
+}
+
+/* Data register : 0x05 */
+static uint32_t fdctrl_read_data (fdctrl_t *fdctrl)
+{
+ fdrive_t *cur_drv;
+ uint32_t retval = 0;
+ int pos, len;
+
+ cur_drv = get_cur_drv(fdctrl);
+ fdctrl->state &= ~FD_CTRL_SLEEP;
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_CMD) {
+ FLOPPY_ERROR("can't read data in CMD state\n");
+ return 0;
+ }
+ pos = fdctrl->data_pos;
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_DATA) {
+ pos %= FD_SECTOR_LEN;
+ if (pos == 0) {
+ len = fdctrl->data_len - fdctrl->data_pos;
+ if (len > FD_SECTOR_LEN)
+ len = FD_SECTOR_LEN;
+ bdrv_read(cur_drv->bs, fd_sector(cur_drv),
+ fdctrl->fifo, len);
+ }
+ }
+ retval = fdctrl->fifo[pos];
+ if (++fdctrl->data_pos == fdctrl->data_len) {
+ fdctrl->data_pos = 0;
+ /* Switch from transfer mode to status mode
+ * then from status mode to command mode
+ */
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_DATA) {
+ fdctrl_stop_transfer(fdctrl, 0x20, 0x00, 0x00);
+ } else {
+ fdctrl_reset_fifo(fdctrl);
+ fdctrl_reset_irq(fdctrl);
+ }
+ }
+ FLOPPY_DPRINTF("data register: 0x%02x\n", retval);
+
+ return retval;
+}
+
+static void fdctrl_format_sector (fdctrl_t *fdctrl)
+{
+ fdrive_t *cur_drv;
+ uint8_t kh, kt, ks;
+ int did_seek;
+
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ kt = fdctrl->fifo[6];
+ kh = fdctrl->fifo[7];
+ ks = fdctrl->fifo[8];
+ FLOPPY_DPRINTF("format sector at %d %d %02x %02x (%d)\n",
+ fdctrl->cur_drv, kh, kt, ks,
+ _fd_sector(kh, kt, ks, cur_drv->last_sect));
+ did_seek = 0;
+ switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & 0x40)) {
+ case 2:
+ /* sect too big */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x00, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 3:
+ /* track too big */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x80, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 4:
+ /* No seek enabled */
+ fdctrl_stop_transfer(fdctrl, 0x40, 0x00, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ case 1:
+ did_seek = 1;
+ fdctrl->data_state |= FD_STATE_SEEK;
+ break;
+ default:
+ break;
+ }
+ memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
+ if (cur_drv->bs == NULL ||
+ bdrv_write(cur_drv->bs, fd_sector(cur_drv), fdctrl->fifo, 1) < 0) {
+ FLOPPY_ERROR("formating sector %d\n", fd_sector(cur_drv));
+ fdctrl_stop_transfer(fdctrl, 0x60, 0x00, 0x00);
+ } else {
+ if (cur_drv->sect == cur_drv->last_sect) {
+ fdctrl->data_state &= ~FD_STATE_FORMAT;
+ /* Last sector done */
+ if (FD_DID_SEEK(fdctrl->data_state))
+ fdctrl_stop_transfer(fdctrl, 0x20, 0x00, 0x00);
+ else
+ fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+ } else {
+ /* More to do */
+ fdctrl->data_pos = 0;
+ fdctrl->data_len = 4;
+ }
+ }
+}
+
+static void fdctrl_write_data (fdctrl_t *fdctrl, uint32_t value)
+{
+ fdrive_t *cur_drv;
+
+ cur_drv = get_cur_drv(fdctrl);
+ /* Reset mode */
+ if (fdctrl->state & FD_CTRL_RESET) {
+ FLOPPY_DPRINTF("Floppy controller in RESET state !\n");
+ return;
+ }
+ fdctrl->state &= ~FD_CTRL_SLEEP;
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_STATUS) {
+ FLOPPY_ERROR("can't write data in status mode\n");
+ return;
+ }
+ /* Is it write command time ? */
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_DATA) {
+ /* FIFO data write */
+ fdctrl->fifo[fdctrl->data_pos++] = value;
+ if (fdctrl->data_pos % FD_SECTOR_LEN == (FD_SECTOR_LEN - 1) ||
+ fdctrl->data_pos == fdctrl->data_len) {
+ bdrv_write(cur_drv->bs, fd_sector(cur_drv),
+ fdctrl->fifo, FD_SECTOR_LEN);
+ }
+ /* Switch from transfer mode to status mode
+ * then from status mode to command mode
+ */
+ if (FD_STATE(fdctrl->data_state) == FD_STATE_DATA)
+ fdctrl_stop_transfer(fdctrl, 0x20, 0x00, 0x00);
+ return;
+ }
+ if (fdctrl->data_pos == 0) {
+ /* Command */
+ switch (value & 0x5F) {
+ case 0x46:
+ /* READ variants */
+ FLOPPY_DPRINTF("READ command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x4C:
+ /* READ_DELETED variants */
+ FLOPPY_DPRINTF("READ_DELETED command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x50:
+ /* SCAN_EQUAL variants */
+ FLOPPY_DPRINTF("SCAN_EQUAL command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x56:
+ /* VERIFY variants */
+ FLOPPY_DPRINTF("VERIFY command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x59:
+ /* SCAN_LOW_OR_EQUAL variants */
+ FLOPPY_DPRINTF("SCAN_LOW_OR_EQUAL command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x5D:
+ /* SCAN_HIGH_OR_EQUAL variants */
+ FLOPPY_DPRINTF("SCAN_HIGH_OR_EQUAL command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ default:
+ break;
+ }
+ switch (value & 0x7F) {
+ case 0x45:
+ /* WRITE variants */
+ FLOPPY_DPRINTF("WRITE command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x49:
+ /* WRITE_DELETED variants */
+ FLOPPY_DPRINTF("WRITE_DELETED command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ default:
+ break;
+ }
+ switch (value) {
+ case 0x03:
+ /* SPECIFY */
+ FLOPPY_DPRINTF("SPECIFY command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 3;
+ goto enqueue;
+ case 0x04:
+ /* SENSE_DRIVE_STATUS */
+ FLOPPY_DPRINTF("SENSE_DRIVE_STATUS command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 2;
+ goto enqueue;
+ case 0x07:
+ /* RECALIBRATE */
+ FLOPPY_DPRINTF("RECALIBRATE command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 2;
+ goto enqueue;
+ case 0x08:
+ /* SENSE_INTERRUPT_STATUS */
+ FLOPPY_DPRINTF("SENSE_INTERRUPT_STATUS command (%02x)\n",
+ fdctrl->int_status);
+ /* No parameters cmd: returns status if no interrupt */
+#if 0
+ fdctrl->fifo[0] =
+ fdctrl->int_status | (cur_drv->head << 2) | fdctrl->cur_drv;
+#else
+ /* XXX: int_status handling is broken for read/write
+ commands, so we do this hack. It should be suppressed
+ ASAP */
+ fdctrl->fifo[0] =
+ 0x20 | (cur_drv->head << 2) | fdctrl->cur_drv;
+#endif
+ fdctrl->fifo[1] = cur_drv->track;
+ fdctrl_set_fifo(fdctrl, 2, 0);
+ fdctrl_reset_irq(fdctrl);
+ fdctrl->int_status = 0xC0;
+ return;
+ case 0x0E:
+ /* DUMPREG */
+ FLOPPY_DPRINTF("DUMPREG command\n");
+ /* Drives position */
+ fdctrl->fifo[0] = drv0(fdctrl)->track;
+ fdctrl->fifo[1] = drv1(fdctrl)->track;
+ fdctrl->fifo[2] = 0;
+ fdctrl->fifo[3] = 0;
+ /* timers */
+ fdctrl->fifo[4] = fdctrl->timer0;
+ fdctrl->fifo[5] = (fdctrl->timer1 << 1) | fdctrl->dma_en;
+ fdctrl->fifo[6] = cur_drv->last_sect;
+ fdctrl->fifo[7] = (fdctrl->lock << 7) |
+ (cur_drv->perpendicular << 2);
+ fdctrl->fifo[8] = fdctrl->config;
+ fdctrl->fifo[9] = fdctrl->precomp_trk;
+ fdctrl_set_fifo(fdctrl, 10, 0);
+ return;
+ case 0x0F:
+ /* SEEK */
+ FLOPPY_DPRINTF("SEEK command\n");
+ /* 2 parameters cmd */
+ fdctrl->data_len = 3;
+ goto enqueue;
+ case 0x10:
+ /* VERSION */
+ FLOPPY_DPRINTF("VERSION command\n");
+ /* No parameters cmd */
+ /* Controller's version */
+ fdctrl->fifo[0] = fdctrl->version;
+ fdctrl_set_fifo(fdctrl, 1, 1);
+ return;
+ case 0x12:
+ /* PERPENDICULAR_MODE */
+ FLOPPY_DPRINTF("PERPENDICULAR_MODE command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 2;
+ goto enqueue;
+ case 0x13:
+ /* CONFIGURE */
+ FLOPPY_DPRINTF("CONFIGURE command\n");
+ /* 3 parameters cmd */
+ fdctrl->data_len = 4;
+ goto enqueue;
+ case 0x14:
+ /* UNLOCK */
+ FLOPPY_DPRINTF("UNLOCK command\n");
+ /* No parameters cmd */
+ fdctrl->lock = 0;
+ fdctrl->fifo[0] = 0;
+ fdctrl_set_fifo(fdctrl, 1, 0);
+ return;
+ case 0x17:
+ /* POWERDOWN_MODE */
+ FLOPPY_DPRINTF("POWERDOWN_MODE command\n");
+ /* 2 parameters cmd */
+ fdctrl->data_len = 3;
+ goto enqueue;
+ case 0x18:
+ /* PART_ID */
+ FLOPPY_DPRINTF("PART_ID command\n");
+ /* No parameters cmd */
+ fdctrl->fifo[0] = 0x41; /* Stepping 1 */
+ fdctrl_set_fifo(fdctrl, 1, 0);
+ return;
+ case 0x2C:
+ /* SAVE */
+ FLOPPY_DPRINTF("SAVE command\n");
+ /* No parameters cmd */
+ fdctrl->fifo[0] = 0;
+ fdctrl->fifo[1] = 0;
+ /* Drives position */
+ fdctrl->fifo[2] = drv0(fdctrl)->track;
+ fdctrl->fifo[3] = drv1(fdctrl)->track;
+ fdctrl->fifo[4] = 0;
+ fdctrl->fifo[5] = 0;
+ /* timers */
+ fdctrl->fifo[6] = fdctrl->timer0;
+ fdctrl->fifo[7] = fdctrl->timer1;
+ fdctrl->fifo[8] = cur_drv->last_sect;
+ fdctrl->fifo[9] = (fdctrl->lock << 7) |
+ (cur_drv->perpendicular << 2);
+ fdctrl->fifo[10] = fdctrl->config;
+ fdctrl->fifo[11] = fdctrl->precomp_trk;
+ fdctrl->fifo[12] = fdctrl->pwrd;
+ fdctrl->fifo[13] = 0;
+ fdctrl->fifo[14] = 0;
+ fdctrl_set_fifo(fdctrl, 15, 1);
+ return;
+ case 0x33:
+ /* OPTION */
+ FLOPPY_DPRINTF("OPTION command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 2;
+ goto enqueue;
+ case 0x42:
+ /* READ_TRACK */
+ FLOPPY_DPRINTF("READ_TRACK command\n");
+ /* 8 parameters cmd */
+ fdctrl->data_len = 9;
+ goto enqueue;
+ case 0x4A:
+ /* READ_ID */
+ FLOPPY_DPRINTF("READ_ID command\n");
+ /* 1 parameter cmd */
+ fdctrl->data_len = 2;
+ goto enqueue;
+ case 0x4C:
+ /* RESTORE */
+ FLOPPY_DPRINTF("RESTORE command\n");
+ /* 17 parameters cmd */
+ fdctrl->data_len = 18;
+ goto enqueue;
+ case 0x4D:
+ /* FORMAT_TRACK */
+ FLOPPY_DPRINTF("FORMAT_TRACK command\n");
+ /* 5 parameters cmd */
+ fdctrl->data_len = 6;
+ goto enqueue;
+ case 0x8E:
+ /* DRIVE_SPECIFICATION_COMMAND */
+ FLOPPY_DPRINTF("DRIVE_SPECIFICATION_COMMAND command\n");
+ /* 5 parameters cmd */
+ fdctrl->data_len = 6;
+ goto enqueue;
+ case 0x8F:
+ /* RELATIVE_SEEK_OUT */
+ FLOPPY_DPRINTF("RELATIVE_SEEK_OUT command\n");
+ /* 2 parameters cmd */
+ fdctrl->data_len = 3;
+ goto enqueue;
+ case 0x94:
+ /* LOCK */
+ FLOPPY_DPRINTF("LOCK command\n");
+ /* No parameters cmd */
+ fdctrl->lock = 1;
+ fdctrl->fifo[0] = 0x10;
+ fdctrl_set_fifo(fdctrl, 1, 1);
+ return;
+ case 0xCD:
+ /* FORMAT_AND_WRITE */
+ FLOPPY_DPRINTF("FORMAT_AND_WRITE command\n");
+ /* 10 parameters cmd */
+ fdctrl->data_len = 11;
+ goto enqueue;
+ case 0xCF:
+ /* RELATIVE_SEEK_IN */
+ FLOPPY_DPRINTF("RELATIVE_SEEK_IN command\n");
+ /* 2 parameters cmd */
+ fdctrl->data_len = 3;
+ goto enqueue;
+ default:
+ /* Unknown command */
+ FLOPPY_ERROR("unknown command: 0x%02x\n", value);
+ fdctrl_unimplemented(fdctrl);
+ return;
+ }
+ }
+enqueue:
+ FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
+ fdctrl->fifo[fdctrl->data_pos] = value;
+ if (++fdctrl->data_pos == fdctrl->data_len) {
+ /* We now have all parameters
+ * and will be able to treat the command
+ */
+ if (fdctrl->data_state & FD_STATE_FORMAT) {
+ fdctrl_format_sector(fdctrl);
+ return;
+ }
+ switch (fdctrl->fifo[0] & 0x1F) {
+ case 0x06:
+ {
+ /* READ variants */
+ FLOPPY_DPRINTF("treat READ command\n");
+ fdctrl_start_transfer(fdctrl, FD_DIR_READ);
+ return;
+ }
+ case 0x0C:
+ /* READ_DELETED variants */
+// FLOPPY_DPRINTF("treat READ_DELETED command\n");
+ FLOPPY_ERROR("treat READ_DELETED command\n");
+ fdctrl_start_transfer_del(fdctrl, FD_DIR_READ);
+ return;
+ case 0x16:
+ /* VERIFY variants */
+// FLOPPY_DPRINTF("treat VERIFY command\n");
+ FLOPPY_ERROR("treat VERIFY command\n");
+ fdctrl_stop_transfer(fdctrl, 0x20, 0x00, 0x00);
+ return;
+ case 0x10:
+ /* SCAN_EQUAL variants */
+// FLOPPY_DPRINTF("treat SCAN_EQUAL command\n");
+ FLOPPY_ERROR("treat SCAN_EQUAL command\n");
+ fdctrl_start_transfer(fdctrl, FD_DIR_SCANE);
+ return;
+ case 0x19:
+ /* SCAN_LOW_OR_EQUAL variants */
+// FLOPPY_DPRINTF("treat SCAN_LOW_OR_EQUAL command\n");
+ FLOPPY_ERROR("treat SCAN_LOW_OR_EQUAL command\n");
+ fdctrl_start_transfer(fdctrl, FD_DIR_SCANL);
+ return;
+ case 0x1D:
+ /* SCAN_HIGH_OR_EQUAL variants */
+// FLOPPY_DPRINTF("treat SCAN_HIGH_OR_EQUAL command\n");
+ FLOPPY_ERROR("treat SCAN_HIGH_OR_EQUAL command\n");
+ fdctrl_start_transfer(fdctrl, FD_DIR_SCANH);
+ return;
+ default:
+ break;
+ }
+ switch (fdctrl->fifo[0] & 0x3F) {
+ case 0x05:
+ /* WRITE variants */
+ FLOPPY_DPRINTF("treat WRITE command (%02x)\n", fdctrl->fifo[0]);
+ fdctrl_start_transfer(fdctrl, FD_DIR_WRITE);
+ return;
+ case 0x09:
+ /* WRITE_DELETED variants */
+// FLOPPY_DPRINTF("treat WRITE_DELETED command\n");
+ FLOPPY_ERROR("treat WRITE_DELETED command\n");
+ fdctrl_start_transfer_del(fdctrl, FD_DIR_WRITE);
+ return;
+ default:
+ break;
+ }
+ switch (fdctrl->fifo[0]) {
+ case 0x03:
+ /* SPECIFY */
+ FLOPPY_DPRINTF("treat SPECIFY command\n");
+ fdctrl->timer0 = (fdctrl->fifo[1] >> 4) & 0xF;
+ fdctrl->timer1 = fdctrl->fifo[2] >> 1;
+ fdctrl->dma_en = 1 - (fdctrl->fifo[2] & 1) ;
+ /* No result back */
+ fdctrl_reset_fifo(fdctrl);
+ break;
+ case 0x04:
+ /* SENSE_DRIVE_STATUS */
+ FLOPPY_DPRINTF("treat SENSE_DRIVE_STATUS command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ cur_drv->head = (fdctrl->fifo[1] >> 2) & 1;
+ /* 1 Byte status back */
+ fdctrl->fifo[0] = (cur_drv->ro << 6) |
+ (cur_drv->track == 0 ? 0x10 : 0x00) |
+ (cur_drv->head << 2) |
+ fdctrl->cur_drv |
+ 0x28;
+ fdctrl_set_fifo(fdctrl, 1, 0);
+ break;
+ case 0x07:
+ /* RECALIBRATE */
+ FLOPPY_DPRINTF("treat RECALIBRATE command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ fd_recalibrate(cur_drv);
+ fdctrl_reset_fifo(fdctrl);
+ /* Raise Interrupt */
+ fdctrl_raise_irq(fdctrl, 0x20);
+ break;
+ case 0x0F:
+ /* SEEK */
+ FLOPPY_DPRINTF("treat SEEK command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ fd_start(cur_drv);
+ if (fdctrl->fifo[2] <= cur_drv->track)
+ cur_drv->dir = 1;
+ else
+ cur_drv->dir = 0;
+ fdctrl_reset_fifo(fdctrl);
+ if (fdctrl->fifo[2] > cur_drv->max_track) {
+ fdctrl_raise_irq(fdctrl, 0x60);
+ } else {
+ cur_drv->track = fdctrl->fifo[2];
+ /* Raise Interrupt */
+ fdctrl_raise_irq(fdctrl, 0x20);
+ }
+ break;
+ case 0x12:
+ /* PERPENDICULAR_MODE */
+ FLOPPY_DPRINTF("treat PERPENDICULAR_MODE command\n");
+ if (fdctrl->fifo[1] & 0x80)
+ cur_drv->perpendicular = fdctrl->fifo[1] & 0x7;
+ /* No result back */
+ fdctrl_reset_fifo(fdctrl);
+ break;
+ case 0x13:
+ /* CONFIGURE */
+ FLOPPY_DPRINTF("treat CONFIGURE command\n");
+ fdctrl->config = fdctrl->fifo[2];
+ fdctrl->precomp_trk = fdctrl->fifo[3];
+ /* No result back */
+ fdctrl_reset_fifo(fdctrl);
+ break;
+ case 0x17:
+ /* POWERDOWN_MODE */
+ FLOPPY_DPRINTF("treat POWERDOWN_MODE command\n");
+ fdctrl->pwrd = fdctrl->fifo[1];
+ fdctrl->fifo[0] = fdctrl->fifo[1];
+ fdctrl_set_fifo(fdctrl, 1, 1);
+ break;
+ case 0x33:
+ /* OPTION */
+ FLOPPY_DPRINTF("treat OPTION command\n");
+ /* No result back */
+ fdctrl_reset_fifo(fdctrl);
+ break;
+ case 0x42:
+ /* READ_TRACK */
+// FLOPPY_DPRINTF("treat READ_TRACK command\n");
+ FLOPPY_ERROR("treat READ_TRACK command\n");
+ fdctrl_start_transfer(fdctrl, FD_DIR_READ);
+ break;
+ case 0x4A:
+ /* READ_ID */
+ FLOPPY_DPRINTF("treat READ_ID command\n");
+ /* XXX: should set main status register to busy */
+ cur_drv->head = (fdctrl->fifo[1] >> 2) & 1;
+ qemu_mod_timer(fdctrl->result_timer,
+ qemu_get_clock(vm_clock) + (ticks_per_sec / 50));
+ break;
+ case 0x4C:
+ /* RESTORE */
+ FLOPPY_DPRINTF("treat RESTORE command\n");
+ /* Drives position */
+ drv0(fdctrl)->track = fdctrl->fifo[3];
+ drv1(fdctrl)->track = fdctrl->fifo[4];
+ /* timers */
+ fdctrl->timer0 = fdctrl->fifo[7];
+ fdctrl->timer1 = fdctrl->fifo[8];
+ cur_drv->last_sect = fdctrl->fifo[9];
+ fdctrl->lock = fdctrl->fifo[10] >> 7;
+ cur_drv->perpendicular = (fdctrl->fifo[10] >> 2) & 0xF;
+ fdctrl->config = fdctrl->fifo[11];
+ fdctrl->precomp_trk = fdctrl->fifo[12];
+ fdctrl->pwrd = fdctrl->fifo[13];
+ fdctrl_reset_fifo(fdctrl);
+ break;
+ case 0x4D:
+ /* FORMAT_TRACK */
+ FLOPPY_DPRINTF("treat FORMAT_TRACK command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ fdctrl->data_state |= FD_STATE_FORMAT;
+ if (fdctrl->fifo[0] & 0x80)
+ fdctrl->data_state |= FD_STATE_MULTI;
+ else
+ fdctrl->data_state &= ~FD_STATE_MULTI;
+ fdctrl->data_state &= ~FD_STATE_SEEK;
+ cur_drv->bps =
+ fdctrl->fifo[2] > 7 ? 16384 : 128 << fdctrl->fifo[2];
+#if 0
+ cur_drv->last_sect =
+ cur_drv->flags & FDISK_DBL_SIDES ? fdctrl->fifo[3] :
+ fdctrl->fifo[3] / 2;
+#else
+ cur_drv->last_sect = fdctrl->fifo[3];
+#endif
+ /* Bochs BIOS is buggy and don't send format informations
+ * for each sector. So, pretend all's done right now...
+ */
+ fdctrl->data_state &= ~FD_STATE_FORMAT;
+ fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+ break;
+ case 0x8E:
+ /* DRIVE_SPECIFICATION_COMMAND */
+ FLOPPY_DPRINTF("treat DRIVE_SPECIFICATION_COMMAND command\n");
+ if (fdctrl->fifo[fdctrl->data_pos - 1] & 0x80) {
+ /* Command parameters done */
+ if (fdctrl->fifo[fdctrl->data_pos - 1] & 0x40) {
+ fdctrl->fifo[0] = fdctrl->fifo[1];
+ fdctrl->fifo[2] = 0;
+ fdctrl->fifo[3] = 0;
+ fdctrl_set_fifo(fdctrl, 4, 1);
+ } else {
+ fdctrl_reset_fifo(fdctrl);
+ }
+ } else if (fdctrl->data_len > 7) {
+ /* ERROR */
+ fdctrl->fifo[0] = 0x80 |
+ (cur_drv->head << 2) | fdctrl->cur_drv;
+ fdctrl_set_fifo(fdctrl, 1, 1);
+ }
+ break;
+ case 0x8F:
+ /* RELATIVE_SEEK_OUT */
+ FLOPPY_DPRINTF("treat RELATIVE_SEEK_OUT command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ fd_start(cur_drv);
+ cur_drv->dir = 0;
+ if (fdctrl->fifo[2] + cur_drv->track >= cur_drv->max_track) {
+ cur_drv->track = cur_drv->max_track - 1;
+ } else {
+ cur_drv->track += fdctrl->fifo[2];
+ }
+ fdctrl_reset_fifo(fdctrl);
+ fdctrl_raise_irq(fdctrl, 0x20);
+ break;
+ case 0xCD:
+ /* FORMAT_AND_WRITE */
+// FLOPPY_DPRINTF("treat FORMAT_AND_WRITE command\n");
+ FLOPPY_ERROR("treat FORMAT_AND_WRITE command\n");
+ fdctrl_unimplemented(fdctrl);
+ break;
+ case 0xCF:
+ /* RELATIVE_SEEK_IN */
+ FLOPPY_DPRINTF("treat RELATIVE_SEEK_IN command\n");
+ fdctrl->cur_drv = fdctrl->fifo[1] & 1;
+ cur_drv = get_cur_drv(fdctrl);
+ fd_start(cur_drv);
+ cur_drv->dir = 1;
+ if (fdctrl->fifo[2] > cur_drv->track) {
+ cur_drv->track = 0;
+ } else {
+ cur_drv->track -= fdctrl->fifo[2];
+ }
+ fdctrl_reset_fifo(fdctrl);
+ /* Raise Interrupt */
+ fdctrl_raise_irq(fdctrl, 0x20);
+ break;
+ }
+ }
+}
+
+static void fdctrl_result_timer(void *opaque)
+{
+ fdctrl_t *fdctrl = opaque;
+ fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+}
diff --git a/tools/ioemu/hw/fmopl.c b/tools/ioemu/hw/fmopl.c
new file mode 100644
index 0000000000..2b0e82b0cc
--- /dev/null
+++ b/tools/ioemu/hw/fmopl.c
@@ -0,0 +1,1390 @@
+/*
+**
+** File: fmopl.c -- software implementation of FM sound generator
+**
+** Copyright (C) 1999,2000 Tatsuyuki Satoh , MultiArcadeMachineEmurator development
+**
+** Version 0.37a
+**
+*/
+
+/*
+ preliminary :
+ Problem :
+ note:
+*/
+
+/* This version of fmopl.c is a fork of the MAME one, relicensed under the LGPL.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define INLINE __inline
+#define HAS_YM3812 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <math.h>
+//#include "driver.h" /* use M.A.M.E. */
+#include "fmopl.h"
+
+#ifndef PI
+#define PI 3.14159265358979323846
+#endif
+
+/* -------------------- for debug --------------------- */
+/* #define OPL_OUTPUT_LOG */
+#ifdef OPL_OUTPUT_LOG
+static FILE *opl_dbg_fp = NULL;
+static FM_OPL *opl_dbg_opl[16];
+static int opl_dbg_maxchip,opl_dbg_chip;
+#endif
+
+/* -------------------- preliminary define section --------------------- */
+/* attack/decay rate time rate */
+#define OPL_ARRATE 141280 /* RATE 4 = 2826.24ms @ 3.6MHz */
+#define OPL_DRRATE 1956000 /* RATE 4 = 39280.64ms @ 3.6MHz */
+
+#define DELTAT_MIXING_LEVEL (1) /* DELTA-T ADPCM MIXING LEVEL */
+
+#define FREQ_BITS 24 /* frequency turn */
+
+/* counter bits = 20 , octerve 7 */
+#define FREQ_RATE (1<<(FREQ_BITS-20))
+#define TL_BITS (FREQ_BITS+2)
+
+/* final output shift , limit minimum and maximum */
+#define OPL_OUTSB (TL_BITS+3-16) /* OPL output final shift 16bit */
+#define OPL_MAXOUT (0x7fff<<OPL_OUTSB)
+#define OPL_MINOUT (-0x8000<<OPL_OUTSB)
+
+/* -------------------- quality selection --------------------- */
+
+/* sinwave entries */
+/* used static memory = SIN_ENT * 4 (byte) */
+#define SIN_ENT 2048
+
+/* output level entries (envelope,sinwave) */
+/* envelope counter lower bits */
+#define ENV_BITS 16
+/* envelope output entries */
+#define EG_ENT 4096
+/* used dynamic memory = EG_ENT*4*4(byte)or EG_ENT*6*4(byte) */
+/* used static memory = EG_ENT*4 (byte) */
+
+#define EG_OFF ((2*EG_ENT)<<ENV_BITS) /* OFF */
+#define EG_DED EG_OFF
+#define EG_DST (EG_ENT<<ENV_BITS) /* DECAY START */
+#define EG_AED EG_DST
+#define EG_AST 0 /* ATTACK START */
+
+#define EG_STEP (96.0/EG_ENT) /* OPL is 0.1875 dB step */
+
+/* LFO table entries */
+#define VIB_ENT 512
+#define VIB_SHIFT (32-9)
+#define AMS_ENT 512
+#define AMS_SHIFT (32-9)
+
+#define VIB_RATE 256
+
+/* -------------------- local defines , macros --------------------- */
+
+/* register number to channel number , slot offset */
+#define SLOT1 0
+#define SLOT2 1
+
+/* envelope phase */
+#define ENV_MOD_RR 0x00
+#define ENV_MOD_DR 0x01
+#define ENV_MOD_AR 0x02
+
+/* -------------------- tables --------------------- */
+static const int slot_array[32]=
+{
+ 0, 2, 4, 1, 3, 5,-1,-1,
+ 6, 8,10, 7, 9,11,-1,-1,
+ 12,14,16,13,15,17,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1
+};
+
+/* key scale level */
+/* table is 3dB/OCT , DV converts this in TL step at 6dB/OCT */
+#define DV (EG_STEP/2)
+static const UINT32 KSL_TABLE[8*16]=
+{
+ /* OCT 0 */
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ /* OCT 1 */
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 0.750/DV, 1.125/DV, 1.500/DV,
+ 1.875/DV, 2.250/DV, 2.625/DV, 3.000/DV,
+ /* OCT 2 */
+ 0.000/DV, 0.000/DV, 0.000/DV, 0.000/DV,
+ 0.000/DV, 1.125/DV, 1.875/DV, 2.625/DV,
+ 3.000/DV, 3.750/DV, 4.125/DV, 4.500/DV,
+ 4.875/DV, 5.250/DV, 5.625/DV, 6.000/DV,
+ /* OCT 3 */
+ 0.000/DV, 0.000/DV, 0.000/DV, 1.875/DV,
+ 3.000/DV, 4.125/DV, 4.875/DV, 5.625/DV,
+ 6.000/DV, 6.750/DV, 7.125/DV, 7.500/DV,
+ 7.875/DV, 8.250/DV, 8.625/DV, 9.000/DV,
+ /* OCT 4 */
+ 0.000/DV, 0.000/DV, 3.000/DV, 4.875/DV,
+ 6.000/DV, 7.125/DV, 7.875/DV, 8.625/DV,
+ 9.000/DV, 9.750/DV,10.125/DV,10.500/DV,
+ 10.875/DV,11.250/DV,11.625/DV,12.000/DV,
+ /* OCT 5 */
+ 0.000/DV, 3.000/DV, 6.000/DV, 7.875/DV,
+ 9.000/DV,10.125/DV,10.875/DV,11.625/DV,
+ 12.000/DV,12.750/DV,13.125/DV,13.500/DV,
+ 13.875/DV,14.250/DV,14.625/DV,15.000/DV,
+ /* OCT 6 */
+ 0.000/DV, 6.000/DV, 9.000/DV,10.875/DV,
+ 12.000/DV,13.125/DV,13.875/DV,14.625/DV,
+ 15.000/DV,15.750/DV,16.125/DV,16.500/DV,
+ 16.875/DV,17.250/DV,17.625/DV,18.000/DV,
+ /* OCT 7 */
+ 0.000/DV, 9.000/DV,12.000/DV,13.875/DV,
+ 15.000/DV,16.125/DV,16.875/DV,17.625/DV,
+ 18.000/DV,18.750/DV,19.125/DV,19.500/DV,
+ 19.875/DV,20.250/DV,20.625/DV,21.000/DV
+};
+#undef DV
+
+/* sustain lebel table (3db per step) */
+/* 0 - 15: 0, 3, 6, 9,12,15,18,21,24,27,30,33,36,39,42,93 (dB)*/
+#define SC(db) (db*((3/EG_STEP)*(1<<ENV_BITS)))+EG_DST
+static const INT32 SL_TABLE[16]={
+ SC( 0),SC( 1),SC( 2),SC(3 ),SC(4 ),SC(5 ),SC(6 ),SC( 7),
+ SC( 8),SC( 9),SC(10),SC(11),SC(12),SC(13),SC(14),SC(31)
+};
+#undef SC
+
+#define TL_MAX (EG_ENT*2) /* limit(tl + ksr + envelope) + sinwave */
+/* TotalLevel : 48 24 12 6 3 1.5 0.75 (dB) */
+/* TL_TABLE[ 0 to TL_MAX ] : plus section */
+/* TL_TABLE[ TL_MAX to TL_MAX+TL_MAX-1 ] : minus section */
+static INT32 *TL_TABLE;
+
+/* pointers to TL_TABLE with sinwave output offset */
+static INT32 **SIN_TABLE;
+
+/* LFO table */
+static INT32 *AMS_TABLE;
+static INT32 *VIB_TABLE;
+
+/* envelope output curve table */
+/* attack + decay + OFF */
+static INT32 ENV_CURVE[2*EG_ENT+1];
+
+/* multiple table */
+#define ML 2
+static const UINT32 MUL_TABLE[16]= {
+/* 1/2, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 */
+ 0.50*ML, 1.00*ML, 2.00*ML, 3.00*ML, 4.00*ML, 5.00*ML, 6.00*ML, 7.00*ML,
+ 8.00*ML, 9.00*ML,10.00*ML,10.00*ML,12.00*ML,12.00*ML,15.00*ML,15.00*ML
+};
+#undef ML
+
+/* dummy attack / decay rate ( when rate == 0 ) */
+static INT32 RATE_0[16]=
+{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+/* -------------------- static state --------------------- */
+
+/* lock level of common table */
+static int num_lock = 0;
+
+/* work table */
+static void *cur_chip = NULL; /* current chip point */
+/* currenct chip state */
+/* static OPLSAMPLE *bufL,*bufR; */
+static OPL_CH *S_CH;
+static OPL_CH *E_CH;
+OPL_SLOT *SLOT7_1,*SLOT7_2,*SLOT8_1,*SLOT8_2;
+
+static INT32 outd[1];
+static INT32 ams;
+static INT32 vib;
+INT32 *ams_table;
+INT32 *vib_table;
+static INT32 amsIncr;
+static INT32 vibIncr;
+static INT32 feedback2; /* connect for SLOT 2 */
+
+/* log output level */
+#define LOG_ERR 3 /* ERROR */
+#define LOG_WAR 2 /* WARNING */
+#define LOG_INF 1 /* INFORMATION */
+
+//#define LOG_LEVEL LOG_INF
+#define LOG_LEVEL LOG_ERR
+
+//#define LOG(n,x) if( (n)>=LOG_LEVEL ) logerror x
+#define LOG(n,x)
+
+/* --------------------- subroutines --------------------- */
+
+INLINE int Limit( int val, int max, int min ) {
+ if ( val > max )
+ val = max;
+ else if ( val < min )
+ val = min;
+
+ return val;
+}
+
+/* status set and IRQ handling */
+INLINE void OPL_STATUS_SET(FM_OPL *OPL,int flag)
+{
+ /* set status flag */
+ OPL->status |= flag;
+ if(!(OPL->status & 0x80))
+ {
+ if(OPL->status & OPL->statusmask)
+ { /* IRQ on */
+ OPL->status |= 0x80;
+ /* callback user interrupt handler (IRQ is OFF to ON) */
+ if(OPL->IRQHandler) (OPL->IRQHandler)(OPL->IRQParam,1);
+ }
+ }
+}
+
+/* status reset and IRQ handling */
+INLINE void OPL_STATUS_RESET(FM_OPL *OPL,int flag)
+{
+ /* reset status flag */
+ OPL->status &=~flag;
+ if((OPL->status & 0x80))
+ {
+ if (!(OPL->status & OPL->statusmask) )
+ {
+ OPL->status &= 0x7f;
+ /* callback user interrupt handler (IRQ is ON to OFF) */
+ if(OPL->IRQHandler) (OPL->IRQHandler)(OPL->IRQParam,0);
+ }
+ }
+}
+
+/* IRQ mask set */
+INLINE void OPL_STATUSMASK_SET(FM_OPL *OPL,int flag)
+{
+ OPL->statusmask = flag;
+ /* IRQ handling check */
+ OPL_STATUS_SET(OPL,0);
+ OPL_STATUS_RESET(OPL,0);
+}
+
+/* ----- key on ----- */
+INLINE void OPL_KEYON(OPL_SLOT *SLOT)
+{
+ /* sin wave restart */
+ SLOT->Cnt = 0;
+ /* set attack */
+ SLOT->evm = ENV_MOD_AR;
+ SLOT->evs = SLOT->evsa;
+ SLOT->evc = EG_AST;
+ SLOT->eve = EG_AED;
+}
+/* ----- key off ----- */
+INLINE void OPL_KEYOFF(OPL_SLOT *SLOT)
+{
+ if( SLOT->evm > ENV_MOD_RR)
+ {
+ /* set envelope counter from envleope output */
+ SLOT->evm = ENV_MOD_RR;
+ if( !(SLOT->evc&EG_DST) )
+ //SLOT->evc = (ENV_CURVE[SLOT->evc>>ENV_BITS]<<ENV_BITS) + EG_DST;
+ SLOT->evc = EG_DST;
+ SLOT->eve = EG_DED;
+ SLOT->evs = SLOT->evsr;
+ }
+}
+
+/* ---------- calcrate Envelope Generator & Phase Generator ---------- */
+/* return : envelope output */
+INLINE UINT32 OPL_CALC_SLOT( OPL_SLOT *SLOT )
+{
+ /* calcrate envelope generator */
+ if( (SLOT->evc+=SLOT->evs) >= SLOT->eve )
+ {
+ switch( SLOT->evm ){
+ case ENV_MOD_AR: /* ATTACK -> DECAY1 */
+ /* next DR */
+ SLOT->evm = ENV_MOD_DR;
+ SLOT->evc = EG_DST;
+ SLOT->eve = SLOT->SL;
+ SLOT->evs = SLOT->evsd;
+ break;
+ case ENV_MOD_DR: /* DECAY -> SL or RR */
+ SLOT->evc = SLOT->SL;
+ SLOT->eve = EG_DED;
+ if(SLOT->eg_typ)
+ {
+ SLOT->evs = 0;
+ }
+ else
+ {
+ SLOT->evm = ENV_MOD_RR;
+ SLOT->evs = SLOT->evsr;
+ }
+ break;
+ case ENV_MOD_RR: /* RR -> OFF */
+ SLOT->evc = EG_OFF;
+ SLOT->eve = EG_OFF+1;
+ SLOT->evs = 0;
+ break;
+ }
+ }
+ /* calcrate envelope */
+ return SLOT->TLL+ENV_CURVE[SLOT->evc>>ENV_BITS]+(SLOT->ams ? ams : 0);
+}
+
+/* set algorythm connection */
+static void set_algorythm( OPL_CH *CH)
+{
+ INT32 *carrier = &outd[0];
+ CH->connect1 = CH->CON ? carrier : &feedback2;
+ CH->connect2 = carrier;
+}
+
+/* ---------- frequency counter for operater update ---------- */
+INLINE void CALC_FCSLOT(OPL_CH *CH,OPL_SLOT *SLOT)
+{
+ int ksr;
+
+ /* frequency step counter */
+ SLOT->Incr = CH->fc * SLOT->mul;
+ ksr = CH->kcode >> SLOT->KSR;
+
+ if( SLOT->ksr != ksr )
+ {
+ SLOT->ksr = ksr;
+ /* attack , decay rate recalcration */
+ SLOT->evsa = SLOT->AR[ksr];
+ SLOT->evsd = SLOT->DR[ksr];
+ SLOT->evsr = SLOT->RR[ksr];
+ }
+ SLOT->TLL = SLOT->TL + (CH->ksl_base>>SLOT->ksl);
+}
+
+/* set multi,am,vib,EG-TYP,KSR,mul */
+INLINE void set_mul(FM_OPL *OPL,int slot,int v)
+{
+ OPL_CH *CH = &OPL->P_CH[slot/2];
+ OPL_SLOT *SLOT = &CH->SLOT[slot&1];
+
+ SLOT->mul = MUL_TABLE[v&0x0f];
+ SLOT->KSR = (v&0x10) ? 0 : 2;
+ SLOT->eg_typ = (v&0x20)>>5;
+ SLOT->vib = (v&0x40);
+ SLOT->ams = (v&0x80);
+ CALC_FCSLOT(CH,SLOT);
+}
+
+/* set ksl & tl */
+INLINE void set_ksl_tl(FM_OPL *OPL,int slot,int v)
+{
+ OPL_CH *CH = &OPL->P_CH[slot/2];
+ OPL_SLOT *SLOT = &CH->SLOT[slot&1];
+ int ksl = v>>6; /* 0 / 1.5 / 3 / 6 db/OCT */
+
+ SLOT->ksl = ksl ? 3-ksl : 31;
+ SLOT->TL = (v&0x3f)*(0.75/EG_STEP); /* 0.75db step */
+
+ if( !(OPL->mode&0x80) )
+ { /* not CSM latch total level */
+ SLOT->TLL = SLOT->TL + (CH->ksl_base>>SLOT->ksl);
+ }
+}
+
+/* set attack rate & decay rate */
+INLINE void set_ar_dr(FM_OPL *OPL,int slot,int v)
+{
+ OPL_CH *CH = &OPL->P_CH[slot/2];
+ OPL_SLOT *SLOT = &CH->SLOT[slot&1];
+ int ar = v>>4;
+ int dr = v&0x0f;
+
+ SLOT->AR = ar ? &OPL->AR_TABLE[ar<<2] : RATE_0;
+ SLOT->evsa = SLOT->AR[SLOT->ksr];
+ if( SLOT->evm == ENV_MOD_AR ) SLOT->evs = SLOT->evsa;
+
+ SLOT->DR = dr ? &OPL->DR_TABLE[dr<<2] : RATE_0;
+ SLOT->evsd = SLOT->DR[SLOT->ksr];
+ if( SLOT->evm == ENV_MOD_DR ) SLOT->evs = SLOT->evsd;
+}
+
+/* set sustain level & release rate */
+INLINE void set_sl_rr(FM_OPL *OPL,int slot,int v)
+{
+ OPL_CH *CH = &OPL->P_CH[slot/2];
+ OPL_SLOT *SLOT = &CH->SLOT[slot&1];
+ int sl = v>>4;
+ int rr = v & 0x0f;
+
+ SLOT->SL = SL_TABLE[sl];
+ if( SLOT->evm == ENV_MOD_DR ) SLOT->eve = SLOT->SL;
+ SLOT->RR = &OPL->DR_TABLE[rr<<2];
+ SLOT->evsr = SLOT->RR[SLOT->ksr];
+ if( SLOT->evm == ENV_MOD_RR ) SLOT->evs = SLOT->evsr;
+}
+
+/* operator output calcrator */
+#define OP_OUT(slot,env,con) slot->wavetable[((slot->Cnt+con)/(0x1000000/SIN_ENT))&(SIN_ENT-1)][env]
+/* ---------- calcrate one of channel ---------- */
+INLINE void OPL_CALC_CH( OPL_CH *CH )
+{
+ UINT32 env_out;
+ OPL_SLOT *SLOT;
+
+ feedback2 = 0;
+ /* SLOT 1 */
+ SLOT = &CH->SLOT[SLOT1];
+ env_out=OPL_CALC_SLOT(SLOT);
+ if( env_out < EG_ENT-1 )
+ {
+ /* PG */
+ if(SLOT->vib) SLOT->Cnt += (SLOT->Incr*vib/VIB_RATE);
+ else SLOT->Cnt += SLOT->Incr;
+ /* connectoion */
+ if(CH->FB)
+ {
+ int feedback1 = (CH->op1_out[0]+CH->op1_out[1])>>CH->FB;
+ CH->op1_out[1] = CH->op1_out[0];
+ *CH->connect1 += CH->op1_out[0] = OP_OUT(SLOT,env_out,feedback1);
+ }
+ else
+ {
+ *CH->connect1 += OP_OUT(SLOT,env_out,0);
+ }
+ }else
+ {
+ CH->op1_out[1] = CH->op1_out[0];
+ CH->op1_out[0] = 0;
+ }
+ /* SLOT 2 */
+ SLOT = &CH->SLOT[SLOT2];
+ env_out=OPL_CALC_SLOT(SLOT);
+ if( env_out < EG_ENT-1 )
+ {
+ /* PG */
+ if(SLOT->vib) SLOT->Cnt += (SLOT->Incr*vib/VIB_RATE);
+ else SLOT->Cnt += SLOT->Incr;
+ /* connectoion */
+ outd[0] += OP_OUT(SLOT,env_out, feedback2);
+ }
+}
+
+/* ---------- calcrate rythm block ---------- */
+#define WHITE_NOISE_db 6.0
+INLINE void OPL_CALC_RH( OPL_CH *CH )
+{
+ UINT32 env_tam,env_sd,env_top,env_hh;
+ int whitenoise = (rand()&1)*(WHITE_NOISE_db/EG_STEP);
+ INT32 tone8;
+
+ OPL_SLOT *SLOT;
+ int env_out;
+
+ /* BD : same as FM serial mode and output level is large */
+ feedback2 = 0;
+ /* SLOT 1 */
+ SLOT = &CH[6].SLOT[SLOT1];
+ env_out=OPL_CALC_SLOT(SLOT);
+ if( env_out < EG_ENT-1 )
+ {
+ /* PG */
+ if(SLOT->vib) SLOT->Cnt += (SLOT->Incr*vib/VIB_RATE);
+ else SLOT->Cnt += SLOT->Incr;
+ /* connectoion */
+ if(CH[6].FB)
+ {
+ int feedback1 = (CH[6].op1_out[0]+CH[6].op1_out[1])>>CH[6].FB;
+ CH[6].op1_out[1] = CH[6].op1_out[0];
+ feedback2 = CH[6].op1_out[0] = OP_OUT(SLOT,env_out,feedback1);
+ }
+ else
+ {
+ feedback2 = OP_OUT(SLOT,env_out,0);
+ }
+ }else
+ {
+ feedback2 = 0;
+ CH[6].op1_out[1] = CH[6].op1_out[0];
+ CH[6].op1_out[0] = 0;
+ }
+ /* SLOT 2 */
+ SLOT = &CH[6].SLOT[SLOT2];
+ env_out=OPL_CALC_SLOT(SLOT);
+ if( env_out < EG_ENT-1 )
+ {
+ /* PG */
+ if(SLOT->vib) SLOT->Cnt += (SLOT->Incr*vib/VIB_RATE);
+ else SLOT->Cnt += SLOT->Incr;
+ /* connectoion */
+ outd[0] += OP_OUT(SLOT,env_out, feedback2)*2;
+ }
+
+ // SD (17) = mul14[fnum7] + white noise
+ // TAM (15) = mul15[fnum8]
+ // TOP (18) = fnum6(mul18[fnum8]+whitenoise)
+ // HH (14) = fnum7(mul18[fnum8]+whitenoise) + white noise
+ env_sd =OPL_CALC_SLOT(SLOT7_2) + whitenoise;
+ env_tam=OPL_CALC_SLOT(SLOT8_1);
+ env_top=OPL_CALC_SLOT(SLOT8_2);
+ env_hh =OPL_CALC_SLOT(SLOT7_1) + whitenoise;
+
+ /* PG */
+ if(SLOT7_1->vib) SLOT7_1->Cnt += (2*SLOT7_1->Incr*vib/VIB_RATE);
+ else SLOT7_1->Cnt += 2*SLOT7_1->Incr;
+ if(SLOT7_2->vib) SLOT7_2->Cnt += ((CH[7].fc*8)*vib/VIB_RATE);
+ else SLOT7_2->Cnt += (CH[7].fc*8);
+ if(SLOT8_1->vib) SLOT8_1->Cnt += (SLOT8_1->Incr*vib/VIB_RATE);
+ else SLOT8_1->Cnt += SLOT8_1->Incr;
+ if(SLOT8_2->vib) SLOT8_2->Cnt += ((CH[8].fc*48)*vib/VIB_RATE);
+ else SLOT8_2->Cnt += (CH[8].fc*48);
+
+ tone8 = OP_OUT(SLOT8_2,whitenoise,0 );
+
+ /* SD */
+ if( env_sd < EG_ENT-1 )
+ outd[0] += OP_OUT(SLOT7_1,env_sd, 0)*8;
+ /* TAM */
+ if( env_tam < EG_ENT-1 )
+ outd[0] += OP_OUT(SLOT8_1,env_tam, 0)*2;
+ /* TOP-CY */
+ if( env_top < EG_ENT-1 )
+ outd[0] += OP_OUT(SLOT7_2,env_top,tone8)*2;
+ /* HH */
+ if( env_hh < EG_ENT-1 )
+ outd[0] += OP_OUT(SLOT7_2,env_hh,tone8)*2;
+}
+
+/* ----------- initialize time tabls ----------- */
+static void init_timetables( FM_OPL *OPL , int ARRATE , int DRRATE )
+{
+ int i;
+ double rate;
+
+ /* make attack rate & decay rate tables */
+ for (i = 0;i < 4;i++) OPL->AR_TABLE[i] = OPL->DR_TABLE[i] = 0;
+ for (i = 4;i <= 60;i++){
+ rate = OPL->freqbase; /* frequency rate */
+ if( i < 60 ) rate *= 1.0+(i&3)*0.25; /* b0-1 : x1 , x1.25 , x1.5 , x1.75 */
+ rate *= 1<<((i>>2)-1); /* b2-5 : shift bit */
+ rate *= (double)(EG_ENT<<ENV_BITS);
+ OPL->AR_TABLE[i] = rate / ARRATE;
+ OPL->DR_TABLE[i] = rate / DRRATE;
+ }
+ for (i = 60;i < 76;i++)
+ {
+ OPL->AR_TABLE[i] = EG_AED-1;
+ OPL->DR_TABLE[i] = OPL->DR_TABLE[60];
+ }
+#if 0
+ for (i = 0;i < 64 ;i++){ /* make for overflow area */
+ LOG(LOG_WAR,("rate %2d , ar %f ms , dr %f ms \n",i,
+ ((double)(EG_ENT<<ENV_BITS) / OPL->AR_TABLE[i]) * (1000.0 / OPL->rate),
+ ((double)(EG_ENT<<ENV_BITS) / OPL->DR_TABLE[i]) * (1000.0 / OPL->rate) ));
+ }
+#endif
+}
+
+/* ---------- generic table initialize ---------- */
+static int OPLOpenTable( void )
+{
+ int s,t;
+ double rate;
+ int i,j;
+ double pom;
+
+ /* allocate dynamic tables */
+ if( (TL_TABLE = malloc(TL_MAX*2*sizeof(INT32))) == NULL)
+ return 0;
+ if( (SIN_TABLE = malloc(SIN_ENT*4 *sizeof(INT32 *))) == NULL)
+ {
+ free(TL_TABLE);
+ return 0;
+ }
+ if( (AMS_TABLE = malloc(AMS_ENT*2 *sizeof(INT32))) == NULL)
+ {
+ free(TL_TABLE);
+ free(SIN_TABLE);
+ return 0;
+ }
+ if( (VIB_TABLE = malloc(VIB_ENT*2 *sizeof(INT32))) == NULL)
+ {
+ free(TL_TABLE);
+ free(SIN_TABLE);
+ free(AMS_TABLE);
+ return 0;
+ }
+ /* make total level table */
+ for (t = 0;t < EG_ENT-1 ;t++){
+ rate = ((1<<TL_BITS)-1)/pow(10,EG_STEP*t/20); /* dB -> voltage */
+ TL_TABLE[ t] = (int)rate;
+ TL_TABLE[TL_MAX+t] = -TL_TABLE[t];
+/* LOG(LOG_INF,("TotalLevel(%3d) = %x\n",t,TL_TABLE[t]));*/
+ }
+ /* fill volume off area */
+ for ( t = EG_ENT-1; t < TL_MAX ;t++){
+ TL_TABLE[t] = TL_TABLE[TL_MAX+t] = 0;
+ }
+
+ /* make sinwave table (total level offet) */
+ /* degree 0 = degree 180 = off */
+ SIN_TABLE[0] = SIN_TABLE[SIN_ENT/2] = &TL_TABLE[EG_ENT-1];
+ for (s = 1;s <= SIN_ENT/4;s++){
+ pom = sin(2*PI*s/SIN_ENT); /* sin */
+ pom = 20*log10(1/pom); /* decibel */
+ j = pom / EG_STEP; /* TL_TABLE steps */
+
+ /* degree 0 - 90 , degree 180 - 90 : plus section */
+ SIN_TABLE[ s] = SIN_TABLE[SIN_ENT/2-s] = &TL_TABLE[j];
+ /* degree 180 - 270 , degree 360 - 270 : minus section */
+ SIN_TABLE[SIN_ENT/2+s] = SIN_TABLE[SIN_ENT -s] = &TL_TABLE[TL_MAX+j];
+/* LOG(LOG_INF,("sin(%3d) = %f:%f db\n",s,pom,(double)j * EG_STEP));*/
+ }
+ for (s = 0;s < SIN_ENT;s++)
+ {
+ SIN_TABLE[SIN_ENT*1+s] = s<(SIN_ENT/2) ? SIN_TABLE[s] : &TL_TABLE[EG_ENT];
+ SIN_TABLE[SIN_ENT*2+s] = SIN_TABLE[s % (SIN_ENT/2)];
+ SIN_TABLE[SIN_ENT*3+s] = (s/(SIN_ENT/4))&1 ? &TL_TABLE[EG_ENT] : SIN_TABLE[SIN_ENT*2+s];
+ }
+
+ /* envelope counter -> envelope output table */
+ for (i=0; i<EG_ENT; i++)
+ {
+ /* ATTACK curve */
+ pom = pow( ((double)(EG_ENT-1-i)/EG_ENT) , 8 ) * EG_ENT;
+ /* if( pom >= EG_ENT ) pom = EG_ENT-1; */
+ ENV_CURVE[i] = (int)pom;
+ /* DECAY ,RELEASE curve */
+ ENV_CURVE[(EG_DST>>ENV_BITS)+i]= i;
+ }
+ /* off */
+ ENV_CURVE[EG_OFF>>ENV_BITS]= EG_ENT-1;
+ /* make LFO ams table */
+ for (i=0; i<AMS_ENT; i++)
+ {
+ pom = (1.0+sin(2*PI*i/AMS_ENT))/2; /* sin */
+ AMS_TABLE[i] = (1.0/EG_STEP)*pom; /* 1dB */
+ AMS_TABLE[AMS_ENT+i] = (4.8/EG_STEP)*pom; /* 4.8dB */
+ }
+ /* make LFO vibrate table */
+ for (i=0; i<VIB_ENT; i++)
+ {
+ /* 100cent = 1seminote = 6% ?? */
+ pom = (double)VIB_RATE*0.06*sin(2*PI*i/VIB_ENT); /* +-100sect step */
+ VIB_TABLE[i] = VIB_RATE + (pom*0.07); /* +- 7cent */
+ VIB_TABLE[VIB_ENT+i] = VIB_RATE + (pom*0.14); /* +-14cent */
+ /* LOG(LOG_INF,("vib %d=%d\n",i,VIB_TABLE[VIB_ENT+i])); */
+ }
+ return 1;
+}
+
+
+static void OPLCloseTable( void )
+{
+ free(TL_TABLE);
+ free(SIN_TABLE);
+ free(AMS_TABLE);
+ free(VIB_TABLE);
+}
+
+/* CSM Key Controll */
+INLINE void CSMKeyControll(OPL_CH *CH)
+{
+ OPL_SLOT *slot1 = &CH->SLOT[SLOT1];
+ OPL_SLOT *slot2 = &CH->SLOT[SLOT2];
+ /* all key off */
+ OPL_KEYOFF(slot1);
+ OPL_KEYOFF(slot2);
+ /* total level latch */
+ slot1->TLL = slot1->TL + (CH->ksl_base>>slot1->ksl);
+ slot1->TLL = slot1->TL + (CH->ksl_base>>slot1->ksl);
+ /* key on */
+ CH->op1_out[0] = CH->op1_out[1] = 0;
+ OPL_KEYON(slot1);
+ OPL_KEYON(slot2);
+}
+
+/* ---------- opl initialize ---------- */
+static void OPL_initalize(FM_OPL *OPL)
+{
+ int fn;
+
+ /* frequency base */
+ OPL->freqbase = (OPL->rate) ? ((double)OPL->clock / OPL->rate) / 72 : 0;
+ /* Timer base time */
+ OPL->TimerBase = 1.0/((double)OPL->clock / 72.0 );
+ /* make time tables */
+ init_timetables( OPL , OPL_ARRATE , OPL_DRRATE );
+ /* make fnumber -> increment counter table */
+ for( fn=0 ; fn < 1024 ; fn++ )
+ {
+ OPL->FN_TABLE[fn] = OPL->freqbase * fn * FREQ_RATE * (1<<7) / 2;
+ }
+ /* LFO freq.table */
+ OPL->amsIncr = OPL->rate ? (double)AMS_ENT*(1<<AMS_SHIFT) / OPL->rate * 3.7 * ((double)OPL->clock/3600000) : 0;
+ OPL->vibIncr = OPL->rate ? (double)VIB_ENT*(1<<VIB_SHIFT) / OPL->rate * 6.4 * ((double)OPL->clock/3600000) : 0;
+}
+
+/* ---------- write a OPL registers ---------- */
+static void OPLWriteReg(FM_OPL *OPL, int r, int v)
+{
+ OPL_CH *CH;
+ int slot;
+ int block_fnum;
+
+ switch(r&0xe0)
+ {
+ case 0x00: /* 00-1f:controll */
+ switch(r&0x1f)
+ {
+ case 0x01:
+ /* wave selector enable */
+ if(OPL->type&OPL_TYPE_WAVESEL)
+ {
+ OPL->wavesel = v&0x20;
+ if(!OPL->wavesel)
+ {
+ /* preset compatible mode */
+ int c;
+ for(c=0;c<OPL->max_ch;c++)
+ {
+ OPL->P_CH[c].SLOT[SLOT1].wavetable = &SIN_TABLE[0];
+ OPL->P_CH[c].SLOT[SLOT2].wavetable = &SIN_TABLE[0];
+ }
+ }
+ }
+ return;
+ case 0x02: /* Timer 1 */
+ OPL->T[0] = (256-v)*4;
+ break;
+ case 0x03: /* Timer 2 */
+ OPL->T[1] = (256-v)*16;
+ return;
+ case 0x04: /* IRQ clear / mask and Timer enable */
+ if(v&0x80)
+ { /* IRQ flag clear */
+ OPL_STATUS_RESET(OPL,0x7f);
+ }
+ else
+ { /* set IRQ mask ,timer enable*/
+ UINT8 st1 = v&1;
+ UINT8 st2 = (v>>1)&1;
+ /* IRQRST,T1MSK,t2MSK,EOSMSK,BRMSK,x,ST2,ST1 */
+ OPL_STATUS_RESET(OPL,v&0x78);
+ OPL_STATUSMASK_SET(OPL,((~v)&0x78)|0x01);
+ /* timer 2 */
+ if(OPL->st[1] != st2)
+ {
+ double interval = st2 ? (double)OPL->T[1]*OPL->TimerBase : 0.0;
+ OPL->st[1] = st2;
+ if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+1,interval);
+ }
+ /* timer 1 */
+ if(OPL->st[0] != st1)
+ {
+ double interval = st1 ? (double)OPL->T[0]*OPL->TimerBase : 0.0;
+ OPL->st[0] = st1;
+ if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+0,interval);
+ }
+ }
+ return;
+#if BUILD_Y8950
+ case 0x06: /* Key Board OUT */
+ if(OPL->type&OPL_TYPE_KEYBOARD)
+ {
+ if(OPL->keyboardhandler_w)
+ OPL->keyboardhandler_w(OPL->keyboard_param,v);
+ else
+ LOG(LOG_WAR,("OPL:write unmapped KEYBOARD port\n"));
+ }
+ return;
+ case 0x07: /* DELTA-T controll : START,REC,MEMDATA,REPT,SPOFF,x,x,RST */
+ if(OPL->type&OPL_TYPE_ADPCM)
+ YM_DELTAT_ADPCM_Write(OPL->deltat,r-0x07,v);
+ return;
+ case 0x08: /* MODE,DELTA-T : CSM,NOTESEL,x,x,smpl,da/ad,64k,rom */
+ OPL->mode = v;
+ v&=0x1f; /* for DELTA-T unit */
+ case 0x09: /* START ADD */
+ case 0x0a:
+ case 0x0b: /* STOP ADD */
+ case 0x0c:
+ case 0x0d: /* PRESCALE */
+ case 0x0e:
+ case 0x0f: /* ADPCM data */
+ case 0x10: /* DELTA-N */
+ case 0x11: /* DELTA-N */
+ case 0x12: /* EG-CTRL */
+ if(OPL->type&OPL_TYPE_ADPCM)
+ YM_DELTAT_ADPCM_Write(OPL->deltat,r-0x07,v);
+ return;
+#if 0
+ case 0x15: /* DAC data */
+ case 0x16:
+ case 0x17: /* SHIFT */
+ return;
+ case 0x18: /* I/O CTRL (Direction) */
+ if(OPL->type&OPL_TYPE_IO)
+ OPL->portDirection = v&0x0f;
+ return;
+ case 0x19: /* I/O DATA */
+ if(OPL->type&OPL_TYPE_IO)
+ {
+ OPL->portLatch = v;
+ if(OPL->porthandler_w)
+ OPL->porthandler_w(OPL->port_param,v&OPL->portDirection);
+ }
+ return;
+ case 0x1a: /* PCM data */
+ return;
+#endif
+#endif
+ }
+ break;
+ case 0x20: /* am,vib,ksr,eg type,mul */
+ slot = slot_array[r&0x1f];
+ if(slot == -1) return;
+ set_mul(OPL,slot,v);
+ return;
+ case 0x40:
+ slot = slot_array[r&0x1f];
+ if(slot == -1) return;
+ set_ksl_tl(OPL,slot,v);
+ return;
+ case 0x60:
+ slot = slot_array[r&0x1f];
+ if(slot == -1) return;
+ set_ar_dr(OPL,slot,v);
+ return;
+ case 0x80:
+ slot = slot_array[r&0x1f];
+ if(slot == -1) return;
+ set_sl_rr(OPL,slot,v);
+ return;
+ case 0xa0:
+ switch(r)
+ {
+ case 0xbd:
+ /* amsep,vibdep,r,bd,sd,tom,tc,hh */
+ {
+ UINT8 rkey = OPL->rythm^v;
+ OPL->ams_table = &AMS_TABLE[v&0x80 ? AMS_ENT : 0];
+ OPL->vib_table = &VIB_TABLE[v&0x40 ? VIB_ENT : 0];
+ OPL->rythm = v&0x3f;
+ if(OPL->rythm&0x20)
+ {
+#if 0
+ usrintf_showmessage("OPL Rythm mode select");
+#endif
+ /* BD key on/off */
+ if(rkey&0x10)
+ {
+ if(v&0x10)
+ {
+ OPL->P_CH[6].op1_out[0] = OPL->P_CH[6].op1_out[1] = 0;
+ OPL_KEYON(&OPL->P_CH[6].SLOT[SLOT1]);
+ OPL_KEYON(&OPL->P_CH[6].SLOT[SLOT2]);
+ }
+ else
+ {
+ OPL_KEYOFF(&OPL->P_CH[6].SLOT[SLOT1]);
+ OPL_KEYOFF(&OPL->P_CH[6].SLOT[SLOT2]);
+ }
+ }
+ /* SD key on/off */
+ if(rkey&0x08)
+ {
+ if(v&0x08) OPL_KEYON(&OPL->P_CH[7].SLOT[SLOT2]);
+ else OPL_KEYOFF(&OPL->P_CH[7].SLOT[SLOT2]);
+ }/* TAM key on/off */
+ if(rkey&0x04)
+ {
+ if(v&0x04) OPL_KEYON(&OPL->P_CH[8].SLOT[SLOT1]);
+ else OPL_KEYOFF(&OPL->P_CH[8].SLOT[SLOT1]);
+ }
+ /* TOP-CY key on/off */
+ if(rkey&0x02)
+ {
+ if(v&0x02) OPL_KEYON(&OPL->P_CH[8].SLOT[SLOT2]);
+ else OPL_KEYOFF(&OPL->P_CH[8].SLOT[SLOT2]);
+ }
+ /* HH key on/off */
+ if(rkey&0x01)
+ {
+ if(v&0x01) OPL_KEYON(&OPL->P_CH[7].SLOT[SLOT1]);
+ else OPL_KEYOFF(&OPL->P_CH[7].SLOT[SLOT1]);
+ }
+ }
+ }
+ return;
+ }
+ /* keyon,block,fnum */
+ if( (r&0x0f) > 8) return;
+ CH = &OPL->P_CH[r&0x0f];
+ if(!(r&0x10))
+ { /* a0-a8 */
+ block_fnum = (CH->block_fnum&0x1f00) | v;
+ }
+ else
+ { /* b0-b8 */
+ int keyon = (v>>5)&1;
+ block_fnum = ((v&0x1f)<<8) | (CH->block_fnum&0xff);
+ if(CH->keyon != keyon)
+ {
+ if( (CH->keyon=keyon) )
+ {
+ CH->op1_out[0] = CH->op1_out[1] = 0;
+ OPL_KEYON(&CH->SLOT[SLOT1]);
+ OPL_KEYON(&CH->SLOT[SLOT2]);
+ }
+ else
+ {
+ OPL_KEYOFF(&CH->SLOT[SLOT1]);
+ OPL_KEYOFF(&CH->SLOT[SLOT2]);
+ }
+ }
+ }
+ /* update */
+ if(CH->block_fnum != block_fnum)
+ {
+ int blockRv = 7-(block_fnum>>10);
+ int fnum = block_fnum&0x3ff;
+ CH->block_fnum = block_fnum;
+
+ CH->ksl_base = KSL_TABLE[block_fnum>>6];
+ CH->fc = OPL->FN_TABLE[fnum]>>blockRv;
+ CH->kcode = CH->block_fnum>>9;
+ if( (OPL->mode&0x40) && CH->block_fnum&0x100) CH->kcode |=1;
+ CALC_FCSLOT(CH,&CH->SLOT[SLOT1]);
+ CALC_FCSLOT(CH,&CH->SLOT[SLOT2]);
+ }
+ return;
+ case 0xc0:
+ /* FB,C */
+ if( (r&0x0f) > 8) return;
+ CH = &OPL->P_CH[r&0x0f];
+ {
+ int feedback = (v>>1)&7;
+ CH->FB = feedback ? (8+1) - feedback : 0;
+ CH->CON = v&1;
+ set_algorythm(CH);
+ }
+ return;
+ case 0xe0: /* wave type */
+ slot = slot_array[r&0x1f];
+ if(slot == -1) return;
+ CH = &OPL->P_CH[slot/2];
+ if(OPL->wavesel)
+ {
+ /* LOG(LOG_INF,("OPL SLOT %d wave select %d\n",slot,v&3)); */
+ CH->SLOT[slot&1].wavetable = &SIN_TABLE[(v&0x03)*SIN_ENT];
+ }
+ return;
+ }
+}
+
+/* lock/unlock for common table */
+static int OPL_LockTable(void)
+{
+ num_lock++;
+ if(num_lock>1) return 0;
+ /* first time */
+ cur_chip = NULL;
+ /* allocate total level table (128kb space) */
+ if( !OPLOpenTable() )
+ {
+ num_lock--;
+ return -1;
+ }
+ return 0;
+}
+
+static void OPL_UnLockTable(void)
+{
+ if(num_lock) num_lock--;
+ if(num_lock) return;
+ /* last time */
+ cur_chip = NULL;
+ OPLCloseTable();
+}
+
+#if (BUILD_YM3812 || BUILD_YM3526)
+/*******************************************************************************/
+/* YM3812 local section */
+/*******************************************************************************/
+
+/* ---------- update one of chip ----------- */
+void YM3812UpdateOne(FM_OPL *OPL, INT16 *buffer, int length)
+{
+ int i;
+ int data;
+ OPLSAMPLE *buf = buffer;
+ UINT32 amsCnt = OPL->amsCnt;
+ UINT32 vibCnt = OPL->vibCnt;
+ UINT8 rythm = OPL->rythm&0x20;
+ OPL_CH *CH,*R_CH;
+
+ if( (void *)OPL != cur_chip ){
+ cur_chip = (void *)OPL;
+ /* channel pointers */
+ S_CH = OPL->P_CH;
+ E_CH = &S_CH[9];
+ /* rythm slot */
+ SLOT7_1 = &S_CH[7].SLOT[SLOT1];
+ SLOT7_2 = &S_CH[7].SLOT[SLOT2];
+ SLOT8_1 = &S_CH[8].SLOT[SLOT1];
+ SLOT8_2 = &S_CH[8].SLOT[SLOT2];
+ /* LFO state */
+ amsIncr = OPL->amsIncr;
+ vibIncr = OPL->vibIncr;
+ ams_table = OPL->ams_table;
+ vib_table = OPL->vib_table;
+ }
+ R_CH = rythm ? &S_CH[6] : E_CH;
+ for( i=0; i < length ; i++ )
+ {
+ /* channel A channel B channel C */
+ /* LFO */
+ ams = ams_table[(amsCnt+=amsIncr)>>AMS_SHIFT];
+ vib = vib_table[(vibCnt+=vibIncr)>>VIB_SHIFT];
+ outd[0] = 0;
+ /* FM part */
+ for(CH=S_CH ; CH < R_CH ; CH++)
+ OPL_CALC_CH(CH);
+ /* Rythn part */
+ if(rythm)
+ OPL_CALC_RH(S_CH);
+ /* limit check */
+ data = Limit( outd[0] , OPL_MAXOUT, OPL_MINOUT );
+ /* store to sound buffer */
+ buf[i] = data >> OPL_OUTSB;
+ }
+
+ OPL->amsCnt = amsCnt;
+ OPL->vibCnt = vibCnt;
+#ifdef OPL_OUTPUT_LOG
+ if(opl_dbg_fp)
+ {
+ for(opl_dbg_chip=0;opl_dbg_chip<opl_dbg_maxchip;opl_dbg_chip++)
+ if( opl_dbg_opl[opl_dbg_chip] == OPL) break;
+ fprintf(opl_dbg_fp,"%c%c%c",0x20+opl_dbg_chip,length&0xff,length/256);
+ }
+#endif
+}
+#endif /* (BUILD_YM3812 || BUILD_YM3526) */
+
+#if BUILD_Y8950
+
+void Y8950UpdateOne(FM_OPL *OPL, INT16 *buffer, int length)
+{
+ int i;
+ int data;
+ OPLSAMPLE *buf = buffer;
+ UINT32 amsCnt = OPL->amsCnt;
+ UINT32 vibCnt = OPL->vibCnt;
+ UINT8 rythm = OPL->rythm&0x20;
+ OPL_CH *CH,*R_CH;
+ YM_DELTAT *DELTAT = OPL->deltat;
+
+ /* setup DELTA-T unit */
+ YM_DELTAT_DECODE_PRESET(DELTAT);
+
+ if( (void *)OPL != cur_chip ){
+ cur_chip = (void *)OPL;
+ /* channel pointers */
+ S_CH = OPL->P_CH;
+ E_CH = &S_CH[9];
+ /* rythm slot */
+ SLOT7_1 = &S_CH[7].SLOT[SLOT1];
+ SLOT7_2 = &S_CH[7].SLOT[SLOT2];
+ SLOT8_1 = &S_CH[8].SLOT[SLOT1];
+ SLOT8_2 = &S_CH[8].SLOT[SLOT2];
+ /* LFO state */
+ amsIncr = OPL->amsIncr;
+ vibIncr = OPL->vibIncr;
+ ams_table = OPL->ams_table;
+ vib_table = OPL->vib_table;
+ }
+ R_CH = rythm ? &S_CH[6] : E_CH;
+ for( i=0; i < length ; i++ )
+ {
+ /* channel A channel B channel C */
+ /* LFO */
+ ams = ams_table[(amsCnt+=amsIncr)>>AMS_SHIFT];
+ vib = vib_table[(vibCnt+=vibIncr)>>VIB_SHIFT];
+ outd[0] = 0;
+ /* deltaT ADPCM */
+ if( DELTAT->portstate )
+ YM_DELTAT_ADPCM_CALC(DELTAT);
+ /* FM part */
+ for(CH=S_CH ; CH < R_CH ; CH++)
+ OPL_CALC_CH(CH);
+ /* Rythn part */
+ if(rythm)
+ OPL_CALC_RH(S_CH);
+ /* limit check */
+ data = Limit( outd[0] , OPL_MAXOUT, OPL_MINOUT );
+ /* store to sound buffer */
+ buf[i] = data >> OPL_OUTSB;
+ }
+ OPL->amsCnt = amsCnt;
+ OPL->vibCnt = vibCnt;
+ /* deltaT START flag */
+ if( !DELTAT->portstate )
+ OPL->status &= 0xfe;
+}
+#endif
+
+/* ---------- reset one of chip ---------- */
+void OPLResetChip(FM_OPL *OPL)
+{
+ int c,s;
+ int i;
+
+ /* reset chip */
+ OPL->mode = 0; /* normal mode */
+ OPL_STATUS_RESET(OPL,0x7f);
+ /* reset with register write */
+ OPLWriteReg(OPL,0x01,0); /* wabesel disable */
+ OPLWriteReg(OPL,0x02,0); /* Timer1 */
+ OPLWriteReg(OPL,0x03,0); /* Timer2 */
+ OPLWriteReg(OPL,0x04,0); /* IRQ mask clear */
+ for(i = 0xff ; i >= 0x20 ; i-- ) OPLWriteReg(OPL,i,0);
+ /* reset OPerator paramater */
+ for( c = 0 ; c < OPL->max_ch ; c++ )
+ {
+ OPL_CH *CH = &OPL->P_CH[c];
+ /* OPL->P_CH[c].PAN = OPN_CENTER; */
+ for(s = 0 ; s < 2 ; s++ )
+ {
+ /* wave table */
+ CH->SLOT[s].wavetable = &SIN_TABLE[0];
+ /* CH->SLOT[s].evm = ENV_MOD_RR; */
+ CH->SLOT[s].evc = EG_OFF;
+ CH->SLOT[s].eve = EG_OFF+1;
+ CH->SLOT[s].evs = 0;
+ }
+ }
+#if BUILD_Y8950
+ if(OPL->type&OPL_TYPE_ADPCM)
+ {
+ YM_DELTAT *DELTAT = OPL->deltat;
+
+ DELTAT->freqbase = OPL->freqbase;
+ DELTAT->output_pointer = outd;
+ DELTAT->portshift = 5;
+ DELTAT->output_range = DELTAT_MIXING_LEVEL<<TL_BITS;
+ YM_DELTAT_ADPCM_Reset(DELTAT,0);
+ }
+#endif
+}
+
+/* ---------- Create one of vietual YM3812 ---------- */
+/* 'rate' is sampling rate and 'bufsiz' is the size of the */
+FM_OPL *OPLCreate(int type, int clock, int rate)
+{
+ char *ptr;
+ FM_OPL *OPL;
+ int state_size;
+ int max_ch = 9; /* normaly 9 channels */
+
+ if( OPL_LockTable() ==-1) return NULL;
+ /* allocate OPL state space */
+ state_size = sizeof(FM_OPL);
+ state_size += sizeof(OPL_CH)*max_ch;
+#if BUILD_Y8950
+ if(type&OPL_TYPE_ADPCM) state_size+= sizeof(YM_DELTAT);
+#endif
+ /* allocate memory block */
+ ptr = malloc(state_size);
+ if(ptr==NULL) return NULL;
+ /* clear */
+ memset(ptr,0,state_size);
+ OPL = (FM_OPL *)ptr; ptr+=sizeof(FM_OPL);
+ OPL->P_CH = (OPL_CH *)ptr; ptr+=sizeof(OPL_CH)*max_ch;
+#if BUILD_Y8950
+ if(type&OPL_TYPE_ADPCM) OPL->deltat = (YM_DELTAT *)ptr; ptr+=sizeof(YM_DELTAT);
+#endif
+ /* set channel state pointer */
+ OPL->type = type;
+ OPL->clock = clock;
+ OPL->rate = rate;
+ OPL->max_ch = max_ch;
+ /* init grobal tables */
+ OPL_initalize(OPL);
+ /* reset chip */
+ OPLResetChip(OPL);
+#ifdef OPL_OUTPUT_LOG
+ if(!opl_dbg_fp)
+ {
+ opl_dbg_fp = fopen("opllog.opl","wb");
+ opl_dbg_maxchip = 0;
+ }
+ if(opl_dbg_fp)
+ {
+ opl_dbg_opl[opl_dbg_maxchip] = OPL;
+ fprintf(opl_dbg_fp,"%c%c%c%c%c%c",0x00+opl_dbg_maxchip,
+ type,
+ clock&0xff,
+ (clock/0x100)&0xff,
+ (clock/0x10000)&0xff,
+ (clock/0x1000000)&0xff);
+ opl_dbg_maxchip++;
+ }
+#endif
+ return OPL;
+}
+
+/* ---------- Destroy one of vietual YM3812 ---------- */
+void OPLDestroy(FM_OPL *OPL)
+{
+#ifdef OPL_OUTPUT_LOG
+ if(opl_dbg_fp)
+ {
+ fclose(opl_dbg_fp);
+ opl_dbg_fp = NULL;
+ }
+#endif
+ OPL_UnLockTable();
+ free(OPL);
+}
+
+/* ---------- Option handlers ---------- */
+
+void OPLSetTimerHandler(FM_OPL *OPL,OPL_TIMERHANDLER TimerHandler,int channelOffset)
+{
+ OPL->TimerHandler = TimerHandler;
+ OPL->TimerParam = channelOffset;
+}
+void OPLSetIRQHandler(FM_OPL *OPL,OPL_IRQHANDLER IRQHandler,int param)
+{
+ OPL->IRQHandler = IRQHandler;
+ OPL->IRQParam = param;
+}
+void OPLSetUpdateHandler(FM_OPL *OPL,OPL_UPDATEHANDLER UpdateHandler,int param)
+{
+ OPL->UpdateHandler = UpdateHandler;
+ OPL->UpdateParam = param;
+}
+#if BUILD_Y8950
+void OPLSetPortHandler(FM_OPL *OPL,OPL_PORTHANDLER_W PortHandler_w,OPL_PORTHANDLER_R PortHandler_r,int param)
+{
+ OPL->porthandler_w = PortHandler_w;
+ OPL->porthandler_r = PortHandler_r;
+ OPL->port_param = param;
+}
+
+void OPLSetKeyboardHandler(FM_OPL *OPL,OPL_PORTHANDLER_W KeyboardHandler_w,OPL_PORTHANDLER_R KeyboardHandler_r,int param)
+{
+ OPL->keyboardhandler_w = KeyboardHandler_w;
+ OPL->keyboardhandler_r = KeyboardHandler_r;
+ OPL->keyboard_param = param;
+}
+#endif
+/* ---------- YM3812 I/O interface ---------- */
+int OPLWrite(FM_OPL *OPL,int a,int v)
+{
+ if( !(a&1) )
+ { /* address port */
+ OPL->address = v & 0xff;
+ }
+ else
+ { /* data port */
+ if(OPL->UpdateHandler) OPL->UpdateHandler(OPL->UpdateParam,0);
+#ifdef OPL_OUTPUT_LOG
+ if(opl_dbg_fp)
+ {
+ for(opl_dbg_chip=0;opl_dbg_chip<opl_dbg_maxchip;opl_dbg_chip++)
+ if( opl_dbg_opl[opl_dbg_chip] == OPL) break;
+ fprintf(opl_dbg_fp,"%c%c%c",0x10+opl_dbg_chip,OPL->address,v);
+ }
+#endif
+ OPLWriteReg(OPL,OPL->address,v);
+ }
+ return OPL->status>>7;
+}
+
+unsigned char OPLRead(FM_OPL *OPL,int a)
+{
+ if( !(a&1) )
+ { /* status port */
+ return OPL->status & (OPL->statusmask|0x80);
+ }
+ /* data port */
+ switch(OPL->address)
+ {
+ case 0x05: /* KeyBoard IN */
+ if(OPL->type&OPL_TYPE_KEYBOARD)
+ {
+ if(OPL->keyboardhandler_r)
+ return OPL->keyboardhandler_r(OPL->keyboard_param);
+ else
+ LOG(LOG_WAR,("OPL:read unmapped KEYBOARD port\n"));
+ }
+ return 0;
+#if 0
+ case 0x0f: /* ADPCM-DATA */
+ return 0;
+#endif
+ case 0x19: /* I/O DATA */
+ if(OPL->type&OPL_TYPE_IO)
+ {
+ if(OPL->porthandler_r)
+ return OPL->porthandler_r(OPL->port_param);
+ else
+ LOG(LOG_WAR,("OPL:read unmapped I/O port\n"));
+ }
+ return 0;
+ case 0x1a: /* PCM-DATA */
+ return 0;
+ }
+ return 0;
+}
+
+int OPLTimerOver(FM_OPL *OPL,int c)
+{
+ if( c )
+ { /* Timer B */
+ OPL_STATUS_SET(OPL,0x20);
+ }
+ else
+ { /* Timer A */
+ OPL_STATUS_SET(OPL,0x40);
+ /* CSM mode key,TL controll */
+ if( OPL->mode & 0x80 )
+ { /* CSM mode total level latch and auto key on */
+ int ch;
+ if(OPL->UpdateHandler) OPL->UpdateHandler(OPL->UpdateParam,0);
+ for(ch=0;ch<9;ch++)
+ CSMKeyControll( &OPL->P_CH[ch] );
+ }
+ }
+ /* reload timer */
+ if (OPL->TimerHandler) (OPL->TimerHandler)(OPL->TimerParam+c,(double)OPL->T[c]*OPL->TimerBase);
+ return OPL->status>>7;
+}
diff --git a/tools/ioemu/hw/fmopl.h b/tools/ioemu/hw/fmopl.h
new file mode 100644
index 0000000000..a01ff902c7
--- /dev/null
+++ b/tools/ioemu/hw/fmopl.h
@@ -0,0 +1,174 @@
+#ifndef __FMOPL_H_
+#define __FMOPL_H_
+
+/* --- select emulation chips --- */
+#define BUILD_YM3812 (HAS_YM3812)
+//#define BUILD_YM3526 (HAS_YM3526)
+//#define BUILD_Y8950 (HAS_Y8950)
+
+/* --- system optimize --- */
+/* select bit size of output : 8 or 16 */
+#define OPL_OUTPUT_BIT 16
+
+/* compiler dependence */
+#ifndef OSD_CPU_H
+#define OSD_CPU_H
+typedef unsigned char UINT8; /* unsigned 8bit */
+typedef unsigned short UINT16; /* unsigned 16bit */
+typedef unsigned int UINT32; /* unsigned 32bit */
+typedef signed char INT8; /* signed 8bit */
+typedef signed short INT16; /* signed 16bit */
+typedef signed int INT32; /* signed 32bit */
+#endif
+
+#if (OPL_OUTPUT_BIT==16)
+typedef INT16 OPLSAMPLE;
+#endif
+#if (OPL_OUTPUT_BIT==8)
+typedef unsigned char OPLSAMPLE;
+#endif
+
+
+#if BUILD_Y8950
+#include "ymdeltat.h"
+#endif
+
+typedef void (*OPL_TIMERHANDLER)(int channel,double interval_Sec);
+typedef void (*OPL_IRQHANDLER)(int param,int irq);
+typedef void (*OPL_UPDATEHANDLER)(int param,int min_interval_us);
+typedef void (*OPL_PORTHANDLER_W)(int param,unsigned char data);
+typedef unsigned char (*OPL_PORTHANDLER_R)(int param);
+
+/* !!!!! here is private section , do not access there member direct !!!!! */
+
+#define OPL_TYPE_WAVESEL 0x01 /* waveform select */
+#define OPL_TYPE_ADPCM 0x02 /* DELTA-T ADPCM unit */
+#define OPL_TYPE_KEYBOARD 0x04 /* keyboard interface */
+#define OPL_TYPE_IO 0x08 /* I/O port */
+
+/* Saving is necessary for member of the 'R' mark for suspend/resume */
+/* ---------- OPL one of slot ---------- */
+typedef struct fm_opl_slot {
+ INT32 TL; /* total level :TL << 8 */
+ INT32 TLL; /* adjusted now TL */
+ UINT8 KSR; /* key scale rate :(shift down bit) */
+ INT32 *AR; /* attack rate :&AR_TABLE[AR<<2] */
+ INT32 *DR; /* decay rate :&DR_TALBE[DR<<2] */
+ INT32 SL; /* sustin level :SL_TALBE[SL] */
+ INT32 *RR; /* release rate :&DR_TABLE[RR<<2] */
+ UINT8 ksl; /* keyscale level :(shift down bits) */
+ UINT8 ksr; /* key scale rate :kcode>>KSR */
+ UINT32 mul; /* multiple :ML_TABLE[ML] */
+ UINT32 Cnt; /* frequency count : */
+ UINT32 Incr; /* frequency step : */
+ /* envelope generator state */
+ UINT8 eg_typ; /* envelope type flag */
+ UINT8 evm; /* envelope phase */
+ INT32 evc; /* envelope counter */
+ INT32 eve; /* envelope counter end point */
+ INT32 evs; /* envelope counter step */
+ INT32 evsa; /* envelope step for AR :AR[ksr] */
+ INT32 evsd; /* envelope step for DR :DR[ksr] */
+ INT32 evsr; /* envelope step for RR :RR[ksr] */
+ /* LFO */
+ UINT8 ams; /* ams flag */
+ UINT8 vib; /* vibrate flag */
+ /* wave selector */
+ INT32 **wavetable;
+}OPL_SLOT;
+
+/* ---------- OPL one of channel ---------- */
+typedef struct fm_opl_channel {
+ OPL_SLOT SLOT[2];
+ UINT8 CON; /* connection type */
+ UINT8 FB; /* feed back :(shift down bit) */
+ INT32 *connect1; /* slot1 output pointer */
+ INT32 *connect2; /* slot2 output pointer */
+ INT32 op1_out[2]; /* slot1 output for selfeedback */
+ /* phase generator state */
+ UINT32 block_fnum; /* block+fnum : */
+ UINT8 kcode; /* key code : KeyScaleCode */
+ UINT32 fc; /* Freq. Increment base */
+ UINT32 ksl_base; /* KeyScaleLevel Base step */
+ UINT8 keyon; /* key on/off flag */
+} OPL_CH;
+
+/* OPL state */
+typedef struct fm_opl_f {
+ UINT8 type; /* chip type */
+ int clock; /* master clock (Hz) */
+ int rate; /* sampling rate (Hz) */
+ double freqbase; /* frequency base */
+ double TimerBase; /* Timer base time (==sampling time) */
+ UINT8 address; /* address register */
+ UINT8 status; /* status flag */
+ UINT8 statusmask; /* status mask */
+ UINT32 mode; /* Reg.08 : CSM , notesel,etc. */
+ /* Timer */
+ int T[2]; /* timer counter */
+ UINT8 st[2]; /* timer enable */
+ /* FM channel slots */
+ OPL_CH *P_CH; /* pointer of CH */
+ int max_ch; /* maximum channel */
+ /* Rythm sention */
+ UINT8 rythm; /* Rythm mode , key flag */
+#if BUILD_Y8950
+ /* Delta-T ADPCM unit (Y8950) */
+ YM_DELTAT *deltat; /* DELTA-T ADPCM */
+#endif
+ /* Keyboard / I/O interface unit (Y8950) */
+ UINT8 portDirection;
+ UINT8 portLatch;
+ OPL_PORTHANDLER_R porthandler_r;
+ OPL_PORTHANDLER_W porthandler_w;
+ int port_param;
+ OPL_PORTHANDLER_R keyboardhandler_r;
+ OPL_PORTHANDLER_W keyboardhandler_w;
+ int keyboard_param;
+ /* time tables */
+ INT32 AR_TABLE[75]; /* atttack rate tables */
+ INT32 DR_TABLE[75]; /* decay rate tables */
+ UINT32 FN_TABLE[1024]; /* fnumber -> increment counter */
+ /* LFO */
+ INT32 *ams_table;
+ INT32 *vib_table;
+ INT32 amsCnt;
+ INT32 amsIncr;
+ INT32 vibCnt;
+ INT32 vibIncr;
+ /* wave selector enable flag */
+ UINT8 wavesel;
+ /* external event callback handler */
+ OPL_TIMERHANDLER TimerHandler; /* TIMER handler */
+ int TimerParam; /* TIMER parameter */
+ OPL_IRQHANDLER IRQHandler; /* IRQ handler */
+ int IRQParam; /* IRQ parameter */
+ OPL_UPDATEHANDLER UpdateHandler; /* stream update handler */
+ int UpdateParam; /* stream update parameter */
+} FM_OPL;
+
+/* ---------- Generic interface section ---------- */
+#define OPL_TYPE_YM3526 (0)
+#define OPL_TYPE_YM3812 (OPL_TYPE_WAVESEL)
+#define OPL_TYPE_Y8950 (OPL_TYPE_ADPCM|OPL_TYPE_KEYBOARD|OPL_TYPE_IO)
+
+FM_OPL *OPLCreate(int type, int clock, int rate);
+void OPLDestroy(FM_OPL *OPL);
+void OPLSetTimerHandler(FM_OPL *OPL,OPL_TIMERHANDLER TimerHandler,int channelOffset);
+void OPLSetIRQHandler(FM_OPL *OPL,OPL_IRQHANDLER IRQHandler,int param);
+void OPLSetUpdateHandler(FM_OPL *OPL,OPL_UPDATEHANDLER UpdateHandler,int param);
+/* Y8950 port handlers */
+void OPLSetPortHandler(FM_OPL *OPL,OPL_PORTHANDLER_W PortHandler_w,OPL_PORTHANDLER_R PortHandler_r,int param);
+void OPLSetKeyboardHandler(FM_OPL *OPL,OPL_PORTHANDLER_W KeyboardHandler_w,OPL_PORTHANDLER_R KeyboardHandler_r,int param);
+
+void OPLResetChip(FM_OPL *OPL);
+int OPLWrite(FM_OPL *OPL,int a,int v);
+unsigned char OPLRead(FM_OPL *OPL,int a);
+int OPLTimerOver(FM_OPL *OPL,int c);
+
+/* YM3626/YM3812 local section */
+void YM3812UpdateOne(FM_OPL *OPL, INT16 *buffer, int length);
+
+void Y8950UpdateOne(FM_OPL *OPL, INT16 *buffer, int length);
+
+#endif
diff --git a/tools/ioemu/hw/i8254.c b/tools/ioemu/hw/i8254.c
new file mode 100644
index 0000000000..29a270e399
--- /dev/null
+++ b/tools/ioemu/hw/i8254.c
@@ -0,0 +1,525 @@
+/*
+ * QEMU 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "xc.h"
+#include <io/ioreq.h>
+
+//#define DEBUG_PIT
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+typedef struct PITChannelState {
+ int count; /* can be 65536 */
+ uint16_t latched_count;
+ uint8_t count_latched;
+ uint8_t status_latched;
+ uint8_t status;
+ uint8_t read_state;
+ uint8_t write_state;
+ uint8_t write_latch;
+ uint8_t rw_mode;
+ uint8_t mode;
+ uint8_t bcd; /* not supported */
+ uint8_t gate; /* timer start */
+ int64_t count_load_time;
+ /* irq handling */
+ int64_t next_transition_time;
+ QEMUTimer *irq_timer;
+ int irq;
+ int vmx_channel; /* Is this accelerated by VMX ? */
+} PITChannelState;
+
+struct PITState {
+ PITChannelState channels[3];
+};
+
+static PITState pit_state;
+
+static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
+
+/* currently operate which channel for vmx use */
+int vmx_channel = -1;
+extern FILE *logfile;
+static int pit_get_count(PITChannelState *s)
+{
+ uint64_t d;
+ int counter;
+
+ d = muldiv64(qemu_get_clock(vm_clock) - s->count_load_time, PIT_FREQ, ticks_per_sec);
+ switch(s->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (s->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = s->count - ((2 * d) % s->count);
+ break;
+ default:
+ counter = s->count - (d % s->count);
+ break;
+ }
+ return counter;
+}
+
+/* get pit output bit */
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+ uint64_t d;
+ int out;
+
+ d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec);
+ switch(s->mode) {
+ default:
+ case 0:
+ out = (d >= s->count);
+ break;
+ case 1:
+ out = (d < s->count);
+ break;
+ case 2:
+ if ((d % s->count) == 0 && d != 0)
+ out = 1;
+ else
+ out = 0;
+ break;
+ case 3:
+ out = (d % s->count) < ((s->count + 1) >> 1);
+ break;
+ case 4:
+ case 5:
+ out = (d == s->count);
+ break;
+ }
+ return out;
+}
+
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
+{
+ PITChannelState *s = &pit->channels[channel];
+ return pit_get_out1(s, current_time);
+}
+
+/* return -1 if no transition will occur. */
+static int64_t pit_get_next_transition_time(PITChannelState *s,
+ int64_t current_time)
+{
+ uint64_t d, next_time, base;
+ int period2;
+
+ d = muldiv64(current_time - s->count_load_time, PIT_FREQ, ticks_per_sec);
+ switch(s->mode) {
+ default:
+ case 0:
+ case 1:
+ if (d < s->count)
+ next_time = s->count;
+ else
+ return -1;
+ break;
+ case 2:
+ base = (d / s->count) * s->count;
+ if ((d - base) == 0 && d != 0)
+ next_time = base + s->count;
+ else
+ next_time = base + s->count + 1;
+ break;
+ case 3:
+ base = (d / s->count) * s->count;
+ period2 = ((s->count + 1) >> 1);
+ if ((d - base) < period2)
+ next_time = base + period2;
+ else
+ next_time = base + s->count;
+ break;
+ case 4:
+ case 5:
+ if (d < s->count)
+ next_time = s->count;
+ else if (d == s->count)
+ next_time = s->count + 1;
+ else
+ return -1;
+ break;
+ }
+ /* convert to timer units */
+ next_time = s->count_load_time + muldiv64(next_time, ticks_per_sec, PIT_FREQ);
+ /* fix potential rounding problems */
+ /* XXX: better solution: use a clock at PIT_FREQ Hz */
+ if (next_time <= current_time)
+ next_time = current_time + 1;
+ return next_time;
+}
+
+/* val must be 0 or 1 */
+void pit_set_gate(PITState *pit, int channel, int val)
+{
+ PITChannelState *s = &pit->channels[channel];
+
+ switch(s->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 5:
+ if (s->gate < val) {
+ /* restart counting on rising edge */
+ s->count_load_time = qemu_get_clock(vm_clock);
+ pit_irq_timer_update(s, s->count_load_time);
+ }
+ break;
+ case 2:
+ case 3:
+ if (s->gate < val) {
+ /* restart counting on rising edge */
+ s->count_load_time = qemu_get_clock(vm_clock);
+ pit_irq_timer_update(s, s->count_load_time);
+ }
+ /* XXX: disable/enable counting */
+ break;
+ }
+ s->gate = val;
+}
+
+int pit_get_gate(PITState *pit, int channel)
+{
+ PITChannelState *s = &pit->channels[channel];
+ return s->gate;
+}
+
+void pit_reset_vmx_vectors()
+{
+ extern void *shared_page;
+ ioreq_t *req;
+ int irq, i;
+ PITChannelState *s;
+
+ /* Assumes PIT is wired to IRQ0 and -1 is uninitialized irq base */
+ if ((irq = pic_irq2vec(0)) == -1)
+ return;
+
+ for(i = 0; i < 3; i++) {
+ if (pit_state.channels[i].vmx_channel)
+ break;
+ }
+
+ if (i == 3)
+ return;
+
+ /* Assumes just one VMX accelerated channel */
+ vmx_channel = i;
+ s = &pit_state.channels[vmx_channel];
+ fprintf(logfile,
+ "VMX_PIT:guest init pit channel %d!\n", vmx_channel);
+ req = &((vcpu_iodata_t *) shared_page)->vp_ioreq;
+
+ req->state = STATE_IORESP_HOOK;
+ /*
+ * info passed to HV as following
+ * -- init count:16 bit, timer vec:8 bit,
+ * PIT channel(0~2):2 bit, rw mode:2 bit
+ */
+ req->u.data = s->count;
+ req->u.data |= (irq << 16);
+ req->u.data |= (vmx_channel << 24);
+ req->u.data |= ((s->rw_mode) << 26);
+ fprintf(logfile, "VMX_PIT:pass info 0x%llx to HV!\n", req->u.data);
+}
+
+static inline void pit_load_count(PITChannelState *s, int val)
+{
+ if (val == 0)
+ val = 0x10000;
+ s->count_load_time = qemu_get_clock(vm_clock);
+ s->count = val;
+
+ /* guest init this pit channel for periodic mode. we do not update related
+ * timer so the channel never send intr from device model*/
+ if (vmx_channel != -1 && s->mode == 2) {
+ pit_reset_vmx_vectors();
+ vmx_channel = -1;
+ }
+
+/* pit_irq_timer_update(s, s->count_load_time);*/
+}
+
+/* if already latched, do not latch again */
+static void pit_latch_count(PITChannelState *s)
+{
+ if (!s->count_latched) {
+ s->latched_count = pit_get_count(s);
+ s->count_latched = s->rw_mode;
+ }
+}
+
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PITState *pit = opaque;
+ int channel, access;
+ PITChannelState *s;
+
+ addr &= 3;
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* read back command */
+ for(channel = 0; channel < 3; channel++) {
+ s = &pit->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20)) {
+ pit_latch_count(s);
+ }
+ if (!(val & 0x10) && !s->status_latched) {
+ /* status latch */
+ /* XXX: add BCD and null count */
+ s->status = (pit_get_out1(s, qemu_get_clock(vm_clock)) << 7) |
+ (s->rw_mode << 4) |
+ (s->mode << 1) |
+ s->bcd;
+ s->status_latched = 1;
+ }
+ }
+ }
+ } else {
+ s = &pit->channels[channel];
+ access = (val >> 4) & 3;
+ if (access == 0) {
+ pit_latch_count(s);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+
+ s->mode = (val >> 1) & 7;
+ s->bcd = val & 1;
+ /* XXX: update irq timer ? */
+ }
+ }
+ } else {
+ s = &pit->channels[addr];
+ s->vmx_channel = 1;
+ vmx_channel = addr;
+ switch(s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(s, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(s, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(s, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+}
+
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+ PITState *pit = opaque;
+ int ret, count;
+ PITChannelState *s;
+
+ addr &= 3;
+ s = &pit->channels[addr];
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch(s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch(s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(s);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(s);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(s);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(s);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+ return ret;
+}
+
+static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
+{
+ int64_t expire_time;
+ int irq_level;
+
+ if (!s->irq_timer)
+ return;
+ expire_time = pit_get_next_transition_time(s, current_time);
+ irq_level = pit_get_out1(s, current_time);
+ pic_set_irq(s->irq, irq_level);
+#ifdef DEBUG_PIT
+ printf("irq_level=%d next_delay=%f\n",
+ irq_level,
+ (double)(expire_time - current_time) / ticks_per_sec);
+#endif
+ s->next_transition_time = expire_time;
+ if (expire_time != -1)
+ qemu_mod_timer(s->irq_timer, expire_time);
+ else
+ qemu_del_timer(s->irq_timer);
+}
+
+static void pit_irq_timer(void *opaque)
+{
+ PITChannelState *s = opaque;
+
+ pit_irq_timer_update(s, s->next_transition_time);
+}
+
+static void pit_save(QEMUFile *f, void *opaque)
+{
+ PITState *pit = opaque;
+ PITChannelState *s;
+ int i;
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ qemu_put_be32s(f, &s->count);
+ qemu_put_be16s(f, &s->latched_count);
+ qemu_put_8s(f, &s->count_latched);
+ qemu_put_8s(f, &s->status_latched);
+ qemu_put_8s(f, &s->status);
+ qemu_put_8s(f, &s->read_state);
+ qemu_put_8s(f, &s->write_state);
+ qemu_put_8s(f, &s->write_latch);
+ qemu_put_8s(f, &s->rw_mode);
+ qemu_put_8s(f, &s->mode);
+ qemu_put_8s(f, &s->bcd);
+ qemu_put_8s(f, &s->gate);
+ qemu_put_be64s(f, &s->count_load_time);
+ if (s->irq_timer) {
+ qemu_put_be64s(f, &s->next_transition_time);
+ qemu_put_timer(f, s->irq_timer);
+ }
+ }
+}
+
+static int pit_load(QEMUFile *f, void *opaque, int version_id)
+{
+ PITState *pit = opaque;
+ PITChannelState *s;
+ int i;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ qemu_get_be32s(f, &s->count);
+ qemu_get_be16s(f, &s->latched_count);
+ qemu_get_8s(f, &s->count_latched);
+ qemu_get_8s(f, &s->status_latched);
+ qemu_get_8s(f, &s->status);
+ qemu_get_8s(f, &s->read_state);
+ qemu_get_8s(f, &s->write_state);
+ qemu_get_8s(f, &s->write_latch);
+ qemu_get_8s(f, &s->rw_mode);
+ qemu_get_8s(f, &s->mode);
+ qemu_get_8s(f, &s->bcd);
+ qemu_get_8s(f, &s->gate);
+ qemu_get_be64s(f, &s->count_load_time);
+ if (s->irq_timer) {
+ qemu_get_be64s(f, &s->next_transition_time);
+ qemu_get_timer(f, s->irq_timer);
+ }
+ }
+ return 0;
+}
+
+static void pit_reset(void *opaque)
+{
+ PITState *pit = opaque;
+ PITChannelState *s;
+ int i;
+
+ for(i = 0;i < 3; i++) {
+ s = &pit->channels[i];
+ s->mode = 3;
+ s->gate = (i != 2);
+ pit_load_count(s, 0);
+ }
+}
+
+PITState *pit_init(int base, int irq)
+{
+ PITState *pit = &pit_state;
+ PITChannelState *s;
+
+ s = &pit->channels[0];
+ /* the timer 0 is connected to an IRQ */
+ s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
+ s->irq = irq;
+
+ register_savevm("i8254", base, 1, pit_save, pit_load, pit);
+
+ qemu_register_reset(pit_reset, pit);
+ register_ioport_write(base, 4, 1, pit_ioport_write, pit);
+ register_ioport_read(base, 3, 1, pit_ioport_read, pit);
+
+ pit_reset(pit);
+
+ return pit;
+}
diff --git a/tools/ioemu/hw/i8259.c b/tools/ioemu/hw/i8259.c
new file mode 100644
index 0000000000..5bdc65d700
--- /dev/null
+++ b/tools/ioemu/hw/i8259.c
@@ -0,0 +1,529 @@
+/*
+ * QEMU 8259 interrupt controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug PIC */
+//#define DEBUG_PIC
+
+//#define DEBUG_IRQ_LATENCY
+//#define DEBUG_IRQ_COUNT
+
+typedef struct PicState {
+ uint8_t last_irr; /* edge detection */
+ uint8_t irr; /* interrupt request register */
+ uint8_t imr; /* interrupt mask register */
+ uint8_t isr; /* interrupt service register */
+ uint8_t priority_add; /* highest irq priority */
+ uint8_t irq_base;
+ uint8_t read_reg_select;
+ uint8_t poll;
+ uint8_t special_mask;
+ uint8_t init_state;
+ uint8_t auto_eoi;
+ uint8_t rotate_on_auto_eoi;
+ uint8_t special_fully_nested_mode;
+ uint8_t init4; /* true if 4 byte init */
+ uint8_t elcr; /* PIIX edge/trigger selection*/
+ uint8_t elcr_mask;
+} PicState;
+
+/* 0 is master pic, 1 is slave pic */
+static PicState pics[2];
+
+#if defined(DEBUG_PIC) || defined (DEBUG_IRQ_COUNT)
+static int irq_level[16];
+#endif
+#ifdef DEBUG_IRQ_COUNT
+static uint64_t irq_count[16];
+#endif
+
+/* set irq level. If an edge is detected, then the IRR is set to 1 */
+static inline void pic_set_irq1(PicState *s, int irq, int level)
+{
+ int mask;
+ mask = 1 << irq;
+ if (s->elcr & mask) {
+ /* level triggered */
+ if (level) {
+ s->irr |= mask;
+ s->last_irr |= mask;
+ } else {
+ s->irr &= ~mask;
+ s->last_irr &= ~mask;
+ }
+ } else {
+ /* edge triggered */
+ if (level) {
+ if ((s->last_irr & mask) == 0)
+ s->irr |= mask;
+ s->last_irr |= mask;
+ } else {
+ s->last_irr &= ~mask;
+ }
+ }
+}
+
+/* return the highest priority found in mask (highest = smallest
+ number). Return 8 if no irq */
+static inline int get_priority(PicState *s, int mask)
+{
+ int priority;
+ if (mask == 0)
+ return 8;
+ priority = 0;
+ while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
+ priority++;
+ return priority;
+}
+
+/* return the pic wanted interrupt. return -1 if none */
+static int pic_get_irq(PicState *s)
+{
+ int mask, cur_priority, priority;
+
+ mask = s->irr & ~s->imr;
+ priority = get_priority(s, mask);
+ if (priority == 8)
+ return -1;
+ /* compute current priority. If special fully nested mode on the
+ master, the IRQ coming from the slave is not taken into account
+ for the priority computation. */
+ mask = s->isr;
+ if (s->special_fully_nested_mode && s == &pics[0])
+ mask &= ~(1 << 2);
+ cur_priority = get_priority(s, mask);
+ if (priority < cur_priority) {
+ /* higher priority found: an irq should be generated */
+ return (priority + s->priority_add) & 7;
+ } else {
+ return -1;
+ }
+}
+
+/* raise irq to CPU if necessary. must be called every time the active
+ irq may change */
+static void pic_update_irq(void)
+{
+ int irq2, irq;
+
+ /* first look at slave pic */
+ irq2 = pic_get_irq(&pics[1]);
+ if (irq2 >= 0) {
+ /* if irq request by slave pic, signal master PIC */
+ pic_set_irq1(&pics[0], 2, 1);
+ pic_set_irq1(&pics[0], 2, 0);
+ }
+ /* look at requested irq */
+ irq = pic_get_irq(&pics[0]);
+ if (irq >= 0) {
+#if defined(DEBUG_PIC)
+ {
+ int i;
+ for(i = 0; i < 2; i++) {
+ printf("pic%d: imr=%x irr=%x padd=%d\n",
+ i, pics[i].imr, pics[i].irr, pics[i].priority_add);
+
+ }
+ }
+ printf("pic: cpu_interrupt\n");
+#endif
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+ }
+}
+
+#ifdef DEBUG_IRQ_LATENCY
+int64_t irq_time[16];
+#endif
+
+void pic_set_irq(int irq, int level)
+{
+#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
+ if (level != irq_level[irq]) {
+#if defined(DEBUG_PIC)
+ printf("pic_set_irq: irq=%d level=%d\n", irq, level);
+#endif
+ irq_level[irq] = level;
+#ifdef DEBUG_IRQ_COUNT
+ if (level == 1)
+ irq_count[irq]++;
+#endif
+ }
+#endif
+#ifdef DEBUG_IRQ_LATENCY
+ if (level) {
+ irq_time[irq] = qemu_get_clock(vm_clock);
+ }
+#endif
+ pic_set_irq1(&pics[irq >> 3], irq & 7, level);
+ pic_update_irq();
+}
+
+/* acknowledge interrupt 'irq' */
+static inline void pic_intack(PicState *s, int irq)
+{
+ if (s->auto_eoi) {
+ if (s->rotate_on_auto_eoi)
+ s->priority_add = (irq + 1) & 7;
+ } else {
+ s->isr |= (1 << irq);
+ }
+ /* We don't clear a level sensitive interrupt here */
+ if (!(s->elcr & (1 << irq)))
+ s->irr &= ~(1 << irq);
+}
+
+int cpu_get_pic_interrupt(CPUState *env)
+{
+ int irq, irq2, intno;
+
+ /* read the irq from the PIC */
+
+ irq = pic_get_irq(&pics[0]);
+ if (irq >= 0) {
+ pic_intack(&pics[0], irq);
+ if (irq == 2) {
+ irq2 = pic_get_irq(&pics[1]);
+ if (irq2 >= 0) {
+ pic_intack(&pics[1], irq2);
+ } else {
+ /* spurious IRQ on slave controller */
+ irq2 = 7;
+ }
+ intno = pics[1].irq_base + irq2;
+ irq = irq2 + 8;
+ } else {
+ intno = pics[0].irq_base + irq;
+ }
+ } else {
+ /* spurious IRQ on host controller */
+ irq = 7;
+ intno = pics[0].irq_base + irq;
+ }
+ pic_update_irq();
+
+#ifdef DEBUG_IRQ_LATENCY
+ printf("IRQ%d latency=%0.3fus\n",
+ irq,
+ (double)(qemu_get_clock(vm_clock) - irq_time[irq]) * 1000000.0 / ticks_per_sec);
+#endif
+#if defined(DEBUG_PIC)
+ printf("pic_interrupt: irq=%d\n", irq);
+#endif
+ return intno;
+}
+
+int pic_irq2vec(int irq)
+{
+ int vector = -1;
+
+ if (irq >= 8 && irq <= 15) {
+ if (pics[1].irq_base != 0xFF)
+ vector = pics[1].irq_base + irq;
+ } else if (irq != 2 && irq <= 7) {
+ if (pics[0].irq_base != 0xFF)
+ vector = pics[0].irq_base + irq;
+ }
+ return vector;
+}
+
+static void pic_reset(void *opaque)
+{
+ PicState *s = opaque;
+ int tmp;
+
+ tmp = s->elcr_mask;
+ memset(s, 0, sizeof(PicState));
+ s->elcr_mask = tmp;
+}
+
+static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PicState *s = opaque;
+ int priority, cmd, irq;
+
+#ifdef DEBUG_PIC
+ printf("pic_write: addr=0x%02x val=0x%02x\n", addr, val);
+#endif
+ addr &= 1;
+ if (addr == 0) {
+ if (val & 0x10) {
+ /* init */
+ pic_reset(s);
+ /* deassert a pending interrupt */
+ cpu_reset_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+
+ s->init_state = 1;
+ s->init4 = val & 1;
+ if (val & 0x02)
+ hw_error("single mode not supported");
+ if (val & 0x08)
+ hw_error("level sensitive irq not supported");
+ } else if (val & 0x08) {
+ if (val & 0x04)
+ s->poll = 1;
+ if (val & 0x02)
+ s->read_reg_select = val & 1;
+ if (val & 0x40)
+ s->special_mask = (val >> 5) & 1;
+ } else {
+ cmd = val >> 5;
+ switch(cmd) {
+ case 0:
+ case 4:
+ s->rotate_on_auto_eoi = cmd >> 2;
+ break;
+ case 1: /* end of interrupt */
+ case 5:
+ priority = get_priority(s, s->isr);
+ if (priority != 8) {
+ irq = (priority + s->priority_add) & 7;
+ s->isr &= ~(1 << irq);
+ if (cmd == 5)
+ s->priority_add = (irq + 1) & 7;
+ pic_update_irq();
+ }
+ break;
+ case 3:
+ irq = val & 7;
+ s->isr &= ~(1 << irq);
+ pic_update_irq();
+ break;
+ case 6:
+ s->priority_add = (val + 1) & 7;
+ pic_update_irq();
+ break;
+ case 7:
+ irq = val & 7;
+ s->isr &= ~(1 << irq);
+ s->priority_add = (irq + 1) & 7;
+ pic_update_irq();
+ break;
+ default:
+ /* no operation */
+ break;
+ }
+ }
+ } else {
+ switch(s->init_state) {
+ case 0:
+ /* normal mode */
+ s->imr = val;
+ pic_update_irq();
+ break;
+ case 1:
+ s->irq_base = val & 0xf8;
+ s->init_state = 2;
+ pit_reset_vmx_vectors();
+ break;
+ case 2:
+ if (s->init4) {
+ s->init_state = 3;
+ } else {
+ s->init_state = 0;
+ }
+ break;
+ case 3:
+ s->special_fully_nested_mode = (val >> 4) & 1;
+ s->auto_eoi = (val >> 1) & 1;
+ s->init_state = 0;
+ break;
+ }
+ }
+}
+
+static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
+{
+ int ret;
+
+ ret = pic_get_irq(s);
+ if (ret >= 0) {
+ if (addr1 >> 7) {
+ pics[0].isr &= ~(1 << 2);
+ pics[0].irr &= ~(1 << 2);
+ }
+ s->irr &= ~(1 << ret);
+ s->isr &= ~(1 << ret);
+ if (addr1 >> 7 || ret != 2)
+ pic_update_irq();
+ } else {
+ ret = 0x07;
+ pic_update_irq();
+ }
+
+ return ret;
+}
+
+static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
+{
+ PicState *s = opaque;
+ unsigned int addr;
+ int ret;
+
+ addr = addr1;
+ addr &= 1;
+ if (s->poll) {
+ ret = pic_poll_read(s, addr1);
+ s->poll = 0;
+ } else {
+ if (addr == 0) {
+ if (s->read_reg_select)
+ ret = s->isr;
+ else
+ ret = s->irr;
+ } else {
+ ret = s->imr;
+ }
+ }
+#ifdef DEBUG_PIC
+ printf("pic_read: addr=0x%02x val=0x%02x\n", addr1, ret);
+#endif
+ return ret;
+}
+
+/* memory mapped interrupt status */
+uint32_t pic_intack_read(CPUState *env)
+{
+ int ret;
+
+ ret = pic_poll_read(&pics[0], 0x00);
+ if (ret == 2)
+ ret = pic_poll_read(&pics[1], 0x80) + 8;
+ /* Prepare for ISR read */
+ pics[0].read_reg_select = 1;
+
+ return ret;
+}
+
+static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PicState *s = opaque;
+ s->elcr = val & s->elcr_mask;
+}
+
+static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
+{
+ PicState *s = opaque;
+ return s->elcr;
+}
+
+static void pic_save(QEMUFile *f, void *opaque)
+{
+ PicState *s = opaque;
+
+ qemu_put_8s(f, &s->last_irr);
+ qemu_put_8s(f, &s->irr);
+ qemu_put_8s(f, &s->imr);
+ qemu_put_8s(f, &s->isr);
+ qemu_put_8s(f, &s->priority_add);
+ qemu_put_8s(f, &s->irq_base);
+ qemu_put_8s(f, &s->read_reg_select);
+ qemu_put_8s(f, &s->poll);
+ qemu_put_8s(f, &s->special_mask);
+ qemu_put_8s(f, &s->init_state);
+ qemu_put_8s(f, &s->auto_eoi);
+ qemu_put_8s(f, &s->rotate_on_auto_eoi);
+ qemu_put_8s(f, &s->special_fully_nested_mode);
+ qemu_put_8s(f, &s->init4);
+ qemu_put_8s(f, &s->elcr);
+}
+
+static int pic_load(QEMUFile *f, void *opaque, int version_id)
+{
+ PicState *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_8s(f, &s->last_irr);
+ qemu_get_8s(f, &s->irr);
+ qemu_get_8s(f, &s->imr);
+ qemu_get_8s(f, &s->isr);
+ qemu_get_8s(f, &s->priority_add);
+ qemu_get_8s(f, &s->irq_base);
+ qemu_get_8s(f, &s->read_reg_select);
+ qemu_get_8s(f, &s->poll);
+ qemu_get_8s(f, &s->special_mask);
+ qemu_get_8s(f, &s->init_state);
+ qemu_get_8s(f, &s->auto_eoi);
+ qemu_get_8s(f, &s->rotate_on_auto_eoi);
+ qemu_get_8s(f, &s->special_fully_nested_mode);
+ qemu_get_8s(f, &s->init4);
+ qemu_get_8s(f, &s->elcr);
+ return 0;
+}
+
+/* XXX: add generic master/slave system */
+static void pic_init1(int io_addr, int elcr_addr, PicState *s)
+{
+ register_ioport_write(io_addr, 2, 1, pic_ioport_write, s);
+ register_ioport_read(io_addr, 2, 1, pic_ioport_read, s);
+ if (elcr_addr >= 0) {
+ register_ioport_write(elcr_addr, 1, 1, elcr_ioport_write, s);
+ register_ioport_read(elcr_addr, 1, 1, elcr_ioport_read, s);
+ }
+ register_savevm("i8259", io_addr, 1, pic_save, pic_load, s);
+ qemu_register_reset(pic_reset, s);
+}
+
+void pic_info(void)
+{
+ int i;
+ PicState *s;
+
+ for(i=0;i<2;i++) {
+ s = &pics[i];
+ term_printf("pic%d: irr=%02x imr=%02x isr=%02x hprio=%d irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n",
+ i, s->irr, s->imr, s->isr, s->priority_add,
+ s->irq_base, s->read_reg_select, s->elcr,
+ s->special_fully_nested_mode);
+ }
+}
+
+void irq_info(void)
+{
+#ifndef DEBUG_IRQ_COUNT
+ term_printf("irq statistic code not compiled.\n");
+#else
+ int i;
+ int64_t count;
+
+ term_printf("IRQ statistics:\n");
+ for (i = 0; i < 16; i++) {
+ count = irq_count[i];
+ if (count > 0)
+ term_printf("%2d: %lld\n", i, count);
+ }
+#endif
+}
+
+void pic_init(void)
+{
+ pic_init1(0x20, 0x4d0, &pics[0]);
+ pic_init1(0xa0, 0x4d1, &pics[1]);
+ pics[0].elcr_mask = 0xf8;
+ pics[1].elcr_mask = 0xde;
+ pics[0].irq_base = 0xff;
+ pics[0].irq_base = 0xff;
+}
+
diff --git a/tools/ioemu/hw/ide.c b/tools/ioemu/hw/ide.c
new file mode 100644
index 0000000000..bc7ebd3205
--- /dev/null
+++ b/tools/ioemu/hw/ide.c
@@ -0,0 +1,2318 @@
+/*
+ * QEMU IDE disk and CD-ROM Emulator
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug IDE devices */
+//#define DEBUG_IDE
+//#define DEBUG_IDE_ATAPI
+
+/* Bits of HD_STATUS */
+#define ERR_STAT 0x01
+#define INDEX_STAT 0x02
+#define ECC_STAT 0x04 /* Corrected error */
+#define DRQ_STAT 0x08
+#define SEEK_STAT 0x10
+#define SRV_STAT 0x10
+#define WRERR_STAT 0x20
+#define READY_STAT 0x40
+#define BUSY_STAT 0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR 0x01 /* Bad address mark */
+#define TRK0_ERR 0x02 /* couldn't find track 0 */
+#define ABRT_ERR 0x04 /* Command aborted */
+#define MCR_ERR 0x08 /* media change request */
+#define ID_ERR 0x10 /* ID field not found */
+#define MC_ERR 0x20 /* media changed */
+#define ECC_ERR 0x40 /* Uncorrectable ECC error */
+#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */
+#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */
+
+/* Bits of HD_NSECTOR */
+#define CD 0x01
+#define IO 0x02
+#define REL 0x04
+#define TAG_MASK 0xf8
+
+#define IDE_CMD_RESET 0x04
+#define IDE_CMD_DISABLE_IRQ 0x02
+
+/* ATA/ATAPI Commands pre T13 Spec */
+#define WIN_NOP 0x00
+/*
+ * 0x01->0x02 Reserved
+ */
+#define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */
+/*
+ * 0x04->0x07 Reserved
+ */
+#define WIN_SRST 0x08 /* ATAPI soft reset command */
+#define WIN_DEVICE_RESET 0x08
+/*
+ * 0x09->0x0F Reserved
+ */
+#define WIN_RECAL 0x10
+#define WIN_RESTORE WIN_RECAL
+/*
+ * 0x10->0x1F Reserved
+ */
+#define WIN_READ 0x20 /* 28-Bit */
+#define WIN_READ_ONCE 0x21 /* 28-Bit without retries */
+#define WIN_READ_LONG 0x22 /* 28-Bit */
+#define WIN_READ_LONG_ONCE 0x23 /* 28-Bit without retries */
+#define WIN_READ_EXT 0x24 /* 48-Bit */
+#define WIN_READDMA_EXT 0x25 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit */
+#define WIN_READ_NATIVE_MAX_EXT 0x27 /* 48-Bit */
+/*
+ * 0x28
+ */
+#define WIN_MULTREAD_EXT 0x29 /* 48-Bit */
+/*
+ * 0x2A->0x2F Reserved
+ */
+#define WIN_WRITE 0x30 /* 28-Bit */
+#define WIN_WRITE_ONCE 0x31 /* 28-Bit without retries */
+#define WIN_WRITE_LONG 0x32 /* 28-Bit */
+#define WIN_WRITE_LONG_ONCE 0x33 /* 28-Bit without retries */
+#define WIN_WRITE_EXT 0x34 /* 48-Bit */
+#define WIN_WRITEDMA_EXT 0x35 /* 48-Bit */
+#define WIN_WRITEDMA_QUEUED_EXT 0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT 0x37 /* 48-Bit */
+#define CFA_WRITE_SECT_WO_ERASE 0x38 /* CFA Write Sectors without erase */
+#define WIN_MULTWRITE_EXT 0x39 /* 48-Bit */
+/*
+ * 0x3A->0x3B Reserved
+ */
+#define WIN_WRITE_VERIFY 0x3C /* 28-Bit */
+/*
+ * 0x3D->0x3F Reserved
+ */
+#define WIN_VERIFY 0x40 /* 28-Bit - Read Verify Sectors */
+#define WIN_VERIFY_ONCE 0x41 /* 28-Bit - without retries */
+#define WIN_VERIFY_EXT 0x42 /* 48-Bit */
+/*
+ * 0x43->0x4F Reserved
+ */
+#define WIN_FORMAT 0x50
+/*
+ * 0x51->0x5F Reserved
+ */
+#define WIN_INIT 0x60
+/*
+ * 0x61->0x5F Reserved
+ */
+#define WIN_SEEK 0x70 /* 0x70-0x7F Reserved */
+#define CFA_TRANSLATE_SECTOR 0x87 /* CFA Translate Sector */
+#define WIN_DIAGNOSE 0x90
+#define WIN_SPECIFY 0x91 /* set drive geometry translation */
+#define WIN_DOWNLOAD_MICROCODE 0x92
+#define WIN_STANDBYNOW2 0x94
+#define WIN_STANDBY2 0x96
+#define WIN_SETIDLE2 0x97
+#define WIN_CHECKPOWERMODE2 0x98
+#define WIN_SLEEPNOW2 0x99
+/*
+ * 0x9A VENDOR
+ */
+#define WIN_PACKETCMD 0xA0 /* Send a packet command. */
+#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */
+#define WIN_QUEUED_SERVICE 0xA2
+#define WIN_SMART 0xB0 /* self-monitoring and reporting */
+#define CFA_ERASE_SECTORS 0xC0
+#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode*/
+#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */
+#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */
+#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers */
+#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */
+#define WIN_READDMA_ONCE 0xC9 /* 28-Bit - without retries */
+#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */
+#define WIN_WRITEDMA_ONCE 0xCB /* 28-Bit - without retries */
+#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers */
+#define CFA_WRITE_MULTI_WO_ERASE 0xCD /* CFA Write multiple without erase */
+#define WIN_GETMEDIASTATUS 0xDA
+#define WIN_ACKMEDIACHANGE 0xDB /* ATA-1, ATA-2 vendor */
+#define WIN_POSTBOOT 0xDC
+#define WIN_PREBOOT 0xDD
+#define WIN_DOORLOCK 0xDE /* lock door on removable drives */
+#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives */
+#define WIN_STANDBYNOW1 0xE0
+#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */
+#define WIN_STANDBY 0xE2 /* Set device in Standby Mode */
+#define WIN_SETIDLE1 0xE3
+#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */
+#define WIN_CHECKPOWERMODE1 0xE5
+#define WIN_SLEEPNOW1 0xE6
+#define WIN_FLUSH_CACHE 0xE7
+#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */
+#define WIN_WRITE_SAME 0xE9 /* read ata-2 to use */
+ /* SET_FEATURES 0x22 or 0xDD */
+#define WIN_FLUSH_CACHE_EXT 0xEA /* 48-Bit */
+#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */
+#define WIN_MEDIAEJECT 0xED
+#define WIN_IDENTIFY_DMA 0xEE /* same as WIN_IDENTIFY, but DMA */
+#define WIN_SETFEATURES 0xEF /* set special drive features */
+#define EXABYTE_ENABLE_NEST 0xF0
+#define WIN_SECURITY_SET_PASS 0xF1
+#define WIN_SECURITY_UNLOCK 0xF2
+#define WIN_SECURITY_ERASE_PREPARE 0xF3
+#define WIN_SECURITY_ERASE_UNIT 0xF4
+#define WIN_SECURITY_FREEZE_LOCK 0xF5
+#define WIN_SECURITY_DISABLE 0xF6
+#define WIN_READ_NATIVE_MAX 0xF8 /* return the native maximum address */
+#define WIN_SET_MAX 0xF9
+#define DISABLE_SEAGATE 0xFB
+
+/* set to 1 set disable mult support */
+#define MAX_MULT_SECTORS 16
+
+/* ATAPI defines */
+
+#define ATAPI_PACKET_SIZE 12
+
+/* The generic packet command opcodes for CD/DVD Logical Units,
+ * From Table 57 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */
+#define GPCMD_BLANK 0xa1
+#define GPCMD_CLOSE_TRACK 0x5b
+#define GPCMD_FLUSH_CACHE 0x35
+#define GPCMD_FORMAT_UNIT 0x04
+#define GPCMD_GET_CONFIGURATION 0x46
+#define GPCMD_GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define GPCMD_GET_PERFORMANCE 0xac
+#define GPCMD_INQUIRY 0x12
+#define GPCMD_LOAD_UNLOAD 0xa6
+#define GPCMD_MECHANISM_STATUS 0xbd
+#define GPCMD_MODE_SELECT_10 0x55
+#define GPCMD_MODE_SENSE_10 0x5a
+#define GPCMD_PAUSE_RESUME 0x4b
+#define GPCMD_PLAY_AUDIO_10 0x45
+#define GPCMD_PLAY_AUDIO_MSF 0x47
+#define GPCMD_PLAY_AUDIO_TI 0x48
+#define GPCMD_PLAY_CD 0xbc
+#define GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL 0x1e
+#define GPCMD_READ_10 0x28
+#define GPCMD_READ_12 0xa8
+#define GPCMD_READ_CDVD_CAPACITY 0x25
+#define GPCMD_READ_CD 0xbe
+#define GPCMD_READ_CD_MSF 0xb9
+#define GPCMD_READ_DISC_INFO 0x51
+#define GPCMD_READ_DVD_STRUCTURE 0xad
+#define GPCMD_READ_FORMAT_CAPACITIES 0x23
+#define GPCMD_READ_HEADER 0x44
+#define GPCMD_READ_TRACK_RZONE_INFO 0x52
+#define GPCMD_READ_SUBCHANNEL 0x42
+#define GPCMD_READ_TOC_PMA_ATIP 0x43
+#define GPCMD_REPAIR_RZONE_TRACK 0x58
+#define GPCMD_REPORT_KEY 0xa4
+#define GPCMD_REQUEST_SENSE 0x03
+#define GPCMD_RESERVE_RZONE_TRACK 0x53
+#define GPCMD_SCAN 0xba
+#define GPCMD_SEEK 0x2b
+#define GPCMD_SEND_DVD_STRUCTURE 0xad
+#define GPCMD_SEND_EVENT 0xa2
+#define GPCMD_SEND_KEY 0xa3
+#define GPCMD_SEND_OPC 0x54
+#define GPCMD_SET_READ_AHEAD 0xa7
+#define GPCMD_SET_STREAMING 0xb6
+#define GPCMD_START_STOP_UNIT 0x1b
+#define GPCMD_STOP_PLAY_SCAN 0x4e
+#define GPCMD_TEST_UNIT_READY 0x00
+#define GPCMD_VERIFY_10 0x2f
+#define GPCMD_WRITE_10 0x2a
+#define GPCMD_WRITE_AND_VERIFY_10 0x2e
+/* This is listed as optional in ATAPI 2.6, but is (curiously)
+ * missing from Mt. Fuji, Table 57. It _is_ mentioned in Mt. Fuji
+ * Table 377 as an MMC command for SCSi devices though... Most ATAPI
+ * drives support it. */
+#define GPCMD_SET_SPEED 0xbb
+/* This seems to be a SCSI specific CD-ROM opcode
+ * to play data at track/index */
+#define GPCMD_PLAYAUDIO_TI 0x48
+/*
+ * From MS Media Status Notification Support Specification. For
+ * older drives only.
+ */
+#define GPCMD_GET_MEDIA_STATUS 0xda
+
+/* Mode page codes for mode sense/set */
+#define GPMODE_R_W_ERROR_PAGE 0x01
+#define GPMODE_WRITE_PARMS_PAGE 0x05
+#define GPMODE_AUDIO_CTL_PAGE 0x0e
+#define GPMODE_POWER_PAGE 0x1a
+#define GPMODE_FAULT_FAIL_PAGE 0x1c
+#define GPMODE_TO_PROTECT_PAGE 0x1d
+#define GPMODE_CAPABILITIES_PAGE 0x2a
+#define GPMODE_ALL_PAGES 0x3f
+/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor
+ * of MODE_SENSE_POWER_PAGE */
+#define GPMODE_CDROM_PAGE 0x0d
+
+#define ATAPI_INT_REASON_CD 0x01 /* 0 = data transfer */
+#define ATAPI_INT_REASON_IO 0x02 /* 1 = transfer to the host */
+#define ATAPI_INT_REASON_REL 0x04
+#define ATAPI_INT_REASON_TAG 0xf8
+
+/* same constants as bochs */
+#define ASC_ILLEGAL_OPCODE 0x20
+#define ASC_LOGICAL_BLOCK_OOR 0x21
+#define ASC_INV_FIELD_IN_CMD_PACKET 0x24
+#define ASC_MEDIUM_NOT_PRESENT 0x3a
+#define ASC_SAVING_PARAMETERS_NOT_SUPPORTED 0x39
+
+#define SENSE_NONE 0
+#define SENSE_NOT_READY 2
+#define SENSE_ILLEGAL_REQUEST 5
+#define SENSE_UNIT_ATTENTION 6
+
+struct IDEState;
+
+typedef void EndTransferFunc(struct IDEState *);
+
+/* NOTE: IDEState represents in fact one drive */
+typedef struct IDEState {
+ /* ide config */
+ int is_cdrom;
+ int cylinders, heads, sectors;
+ int64_t nb_sectors;
+ int mult_sectors;
+ int irq;
+ openpic_t *openpic;
+ PCIDevice *pci_dev;
+ struct BMDMAState *bmdma;
+ int drive_serial;
+ /* ide regs */
+ uint8_t feature;
+ uint8_t error;
+ uint16_t nsector; /* 0 is 256 to ease computations */
+ uint8_t sector;
+ uint8_t lcyl;
+ uint8_t hcyl;
+ uint8_t select;
+ uint8_t status;
+ /* 0x3f6 command, only meaningful for drive 0 */
+ uint8_t cmd;
+ /* depends on bit 4 in select, only meaningful for drive 0 */
+ struct IDEState *cur_drive;
+ BlockDriverState *bs;
+ /* ATAPI specific */
+ uint8_t sense_key;
+ uint8_t asc;
+ int packet_transfer_size;
+ int elementary_transfer_size;
+ int io_buffer_index;
+ int lba;
+ int cd_sector_size;
+ int atapi_dma; /* true if dma is requested for the packet cmd */
+ /* ATA DMA state */
+ int io_buffer_size;
+ /* PIO transfer handling */
+ int req_nb_sectors; /* number of sectors per interrupt */
+ EndTransferFunc *end_transfer_func;
+ uint8_t *data_ptr;
+ uint8_t *data_end;
+ uint8_t io_buffer[MAX_MULT_SECTORS*512 + 4];
+} IDEState;
+
+#define BM_STATUS_DMAING 0x01
+#define BM_STATUS_ERROR 0x02
+#define BM_STATUS_INT 0x04
+
+#define BM_CMD_START 0x01
+#define BM_CMD_READ 0x08
+
+typedef int IDEDMAFunc(IDEState *s,
+ target_phys_addr_t phys_addr,
+ int transfer_size1);
+
+typedef struct BMDMAState {
+ uint8_t cmd;
+ uint8_t status;
+ uint32_t addr;
+ /* current transfer state */
+ IDEState *ide_if;
+ IDEDMAFunc *dma_cb;
+} BMDMAState;
+
+typedef struct PCIIDEState {
+ PCIDevice dev;
+ IDEState ide_if[4];
+ BMDMAState bmdma[2];
+} PCIIDEState;
+
+static void ide_dma_start(IDEState *s, IDEDMAFunc *dma_cb);
+
+static void padstr(char *str, const char *src, int len)
+{
+ int i, v;
+ for(i = 0; i < len; i++) {
+ if (*src)
+ v = *src++;
+ else
+ v = ' ';
+ *(char *)((long)str ^ 1) = v;
+ str++;
+ }
+}
+
+static void padstr8(uint8_t *buf, int buf_size, const char *src)
+{
+ int i;
+ for(i = 0; i < buf_size; i++) {
+ if (*src)
+ buf[i] = *src++;
+ else
+ buf[i] = ' ';
+ }
+}
+
+static void put_le16(uint16_t *p, unsigned int v)
+{
+ *p = cpu_to_le16(v);
+}
+
+static void ide_identify(IDEState *s)
+{
+ uint16_t *p;
+ unsigned int oldsize;
+ char buf[20];
+
+ memset(s->io_buffer, 0, 512);
+ p = (uint16_t *)s->io_buffer;
+ put_le16(p + 0, 0x0040);
+ put_le16(p + 1, s->cylinders);
+ put_le16(p + 3, s->heads);
+ put_le16(p + 4, 512 * s->sectors); /* XXX: retired, remove ? */
+ put_le16(p + 5, 512); /* XXX: retired, remove ? */
+ put_le16(p + 6, s->sectors);
+ snprintf(buf, sizeof(buf), "QM%05d", s->drive_serial);
+ padstr((uint8_t *)(p + 10), buf, 20); /* serial number */
+ put_le16(p + 20, 3); /* XXX: retired, remove ? */
+ put_le16(p + 21, 512); /* cache size in sectors */
+ put_le16(p + 22, 4); /* ecc bytes */
+ padstr((uint8_t *)(p + 23), QEMU_VERSION, 8); /* firmware version */
+ padstr((uint8_t *)(p + 27), "QEMU HARDDISK", 40); /* model */
+#if MAX_MULT_SECTORS > 1
+ put_le16(p + 47, 0x8000 | MAX_MULT_SECTORS);
+#endif
+ put_le16(p + 48, 1); /* dword I/O */
+ put_le16(p + 49, 1 << 9 | 1 << 8); /* DMA and LBA supported */
+ put_le16(p + 51, 0x200); /* PIO transfer cycle */
+ put_le16(p + 52, 0x200); /* DMA transfer cycle */
+ put_le16(p + 53, 1 | 1 << 2); /* words 54-58,88 are valid */
+ put_le16(p + 54, s->cylinders);
+ put_le16(p + 55, s->heads);
+ put_le16(p + 56, s->sectors);
+ oldsize = s->cylinders * s->heads * s->sectors;
+ put_le16(p + 57, oldsize);
+ put_le16(p + 58, oldsize >> 16);
+ if (s->mult_sectors)
+ put_le16(p + 59, 0x100 | s->mult_sectors);
+ put_le16(p + 60, s->nb_sectors);
+ put_le16(p + 61, s->nb_sectors >> 16);
+ put_le16(p + 80, (1 << 1) | (1 << 2));
+ put_le16(p + 82, (1 << 14));
+ put_le16(p + 83, (1 << 14));
+ put_le16(p + 84, (1 << 14));
+ put_le16(p + 85, (1 << 14));
+ put_le16(p + 86, 0);
+ put_le16(p + 87, (1 << 14));
+ put_le16(p + 88, 0x1f | (1 << 13));
+ put_le16(p + 93, 1 | (1 << 14) | 0x2000 | 0x4000);
+}
+
+static void ide_atapi_identify(IDEState *s)
+{
+ uint16_t *p;
+ char buf[20];
+
+ memset(s->io_buffer, 0, 512);
+ p = (uint16_t *)s->io_buffer;
+ /* Removable CDROM, 50us response, 12 byte packets */
+ put_le16(p + 0, (2 << 14) | (5 << 8) | (1 << 7) | (2 << 5) | (0 << 0));
+ snprintf(buf, sizeof(buf), "QM%05d", s->drive_serial);
+ padstr((uint8_t *)(p + 10), buf, 20); /* serial number */
+ put_le16(p + 20, 3); /* buffer type */
+ put_le16(p + 21, 512); /* cache size in sectors */
+ put_le16(p + 22, 4); /* ecc bytes */
+ padstr((uint8_t *)(p + 23), QEMU_VERSION, 8); /* firmware version */
+ padstr((uint8_t *)(p + 27), "QEMU CD-ROM", 40); /* model */
+ put_le16(p + 48, 1); /* dword I/O (XXX: should not be set on CDROM) */
+ put_le16(p + 49, 1 << 9); /* LBA supported, no DMA */
+ put_le16(p + 53, 3); /* words 64-70, 54-58 valid */
+ put_le16(p + 63, 0x103); /* DMA modes XXX: may be incorrect */
+ put_le16(p + 64, 1); /* PIO modes */
+ put_le16(p + 65, 0xb4); /* minimum DMA multiword tx cycle time */
+ put_le16(p + 66, 0xb4); /* recommended DMA multiword tx cycle time */
+ put_le16(p + 67, 0x12c); /* minimum PIO cycle time without flow control */
+ put_le16(p + 68, 0xb4); /* minimum PIO cycle time with IORDY flow control */
+
+ put_le16(p + 71, 30); /* in ns */
+ put_le16(p + 72, 30); /* in ns */
+
+ put_le16(p + 80, 0x1e); /* support up to ATA/ATAPI-4 */
+}
+
+static void ide_set_signature(IDEState *s)
+{
+ s->select &= 0xf0; /* clear head */
+ /* put signature */
+ s->nsector = 1;
+ s->sector = 1;
+ if (s->is_cdrom) {
+ s->lcyl = 0x14;
+ s->hcyl = 0xeb;
+ } else if (s->bs) {
+ s->lcyl = 0;
+ s->hcyl = 0;
+ } else {
+ s->lcyl = 0xff;
+ s->hcyl = 0xff;
+ }
+}
+
+static inline void ide_abort_command(IDEState *s)
+{
+ s->status = READY_STAT | ERR_STAT;
+ s->error = ABRT_ERR;
+}
+
+static inline void ide_set_irq(IDEState *s)
+{
+ if (!(s->cmd & IDE_CMD_DISABLE_IRQ)) {
+#ifdef TARGET_PPC
+ if (s->openpic)
+ openpic_set_irq(s->openpic, s->irq, 1);
+ else
+#endif
+ if (s->irq == 16)
+ pci_set_irq(s->pci_dev, 0, 1);
+ else
+ pic_set_irq(s->irq, 1);
+ }
+}
+
+/* prepare data transfer and tell what to do after */
+static void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
+ EndTransferFunc *end_transfer_func)
+{
+ s->end_transfer_func = end_transfer_func;
+ s->data_ptr = buf;
+ s->data_end = buf + size;
+ s->status |= DRQ_STAT;
+}
+
+static void ide_transfer_stop(IDEState *s)
+{
+ s->end_transfer_func = ide_transfer_stop;
+ s->data_ptr = s->io_buffer;
+ s->data_end = s->io_buffer;
+ s->status &= ~DRQ_STAT;
+}
+
+static int64_t ide_get_sector(IDEState *s)
+{
+ int64_t sector_num;
+ if (s->select & 0x40) {
+ /* lba */
+ sector_num = ((s->select & 0x0f) << 24) | (s->hcyl << 16) |
+ (s->lcyl << 8) | s->sector;
+ } else {
+ sector_num = ((s->hcyl << 8) | s->lcyl) * s->heads * s->sectors +
+ (s->select & 0x0f) * s->sectors +
+ (s->sector - 1);
+ }
+ return sector_num;
+}
+
+static void ide_set_sector(IDEState *s, int64_t sector_num)
+{
+ unsigned int cyl, r;
+ if (s->select & 0x40) {
+ s->select = (s->select & 0xf0) | (sector_num >> 24);
+ s->hcyl = (sector_num >> 16);
+ s->lcyl = (sector_num >> 8);
+ s->sector = (sector_num);
+ } else {
+ cyl = sector_num / (s->heads * s->sectors);
+ r = sector_num % (s->heads * s->sectors);
+ s->hcyl = cyl >> 8;
+ s->lcyl = cyl;
+ s->select = (s->select & 0xf0) | ((r / s->sectors) & 0x0f);
+ s->sector = (r % s->sectors) + 1;
+ }
+}
+
+static void ide_sector_read(IDEState *s)
+{
+ int64_t sector_num;
+ int ret, n;
+
+ s->status = READY_STAT | SEEK_STAT;
+ s->error = 0; /* not needed by IDE spec, but needed by Windows */
+ sector_num = ide_get_sector(s);
+ n = s->nsector;
+ if (n == 0) {
+ /* no more sector to read from disk */
+ ide_transfer_stop(s);
+ } else {
+#if defined(DEBUG_IDE)
+ printf("read sector=%Ld\n", sector_num);
+#endif
+ if (n > s->req_nb_sectors)
+ n = s->req_nb_sectors;
+ ret = bdrv_read(s->bs, sector_num, s->io_buffer, n);
+ ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_read);
+ ide_set_irq(s);
+ ide_set_sector(s, sector_num + n);
+ s->nsector -= n;
+ }
+}
+
+static int ide_read_dma_cb(IDEState *s,
+ target_phys_addr_t phys_addr,
+ int transfer_size1)
+{
+ int len, transfer_size, n;
+ int64_t sector_num;
+
+ transfer_size = transfer_size1;
+ while (transfer_size > 0) {
+ len = s->io_buffer_size - s->io_buffer_index;
+ if (len <= 0) {
+ /* transfert next data */
+ n = s->nsector;
+ if (n == 0)
+ break;
+ if (n > MAX_MULT_SECTORS)
+ n = MAX_MULT_SECTORS;
+ sector_num = ide_get_sector(s);
+ bdrv_read(s->bs, sector_num, s->io_buffer, n);
+ s->io_buffer_index = 0;
+ s->io_buffer_size = n * 512;
+ len = s->io_buffer_size;
+ sector_num += n;
+ ide_set_sector(s, sector_num);
+ s->nsector -= n;
+ }
+ if (len > transfer_size)
+ len = transfer_size;
+ cpu_physical_memory_write(phys_addr,
+ s->io_buffer + s->io_buffer_index, len);
+ s->io_buffer_index += len;
+ transfer_size -= len;
+ phys_addr += len;
+ }
+ if (s->io_buffer_index >= s->io_buffer_size && s->nsector == 0) {
+ s->status = READY_STAT | SEEK_STAT;
+ ide_set_irq(s);
+#ifdef DEBUG_IDE_ATAPI
+ printf("dma status=0x%x\n", s->status);
+#endif
+ return 0;
+ }
+ return transfer_size1 - transfer_size;
+}
+
+static void ide_sector_read_dma(IDEState *s)
+{
+ s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
+ s->io_buffer_index = 0;
+ s->io_buffer_size = 0;
+ ide_dma_start(s, ide_read_dma_cb);
+}
+
+static void ide_sector_write(IDEState *s)
+{
+ int64_t sector_num;
+ int ret, n, n1;
+
+ s->status = READY_STAT | SEEK_STAT;
+ sector_num = ide_get_sector(s);
+#if defined(DEBUG_IDE)
+ printf("write sector=%Ld\n", sector_num);
+#endif
+ n = s->nsector;
+ if (n > s->req_nb_sectors)
+ n = s->req_nb_sectors;
+ ret = bdrv_write(s->bs, sector_num, s->io_buffer, n);
+ s->nsector -= n;
+ if (s->nsector == 0) {
+ /* no more sector to write */
+ ide_transfer_stop(s);
+ } else {
+ n1 = s->nsector;
+ if (n1 > s->req_nb_sectors)
+ n1 = s->req_nb_sectors;
+ ide_transfer_start(s, s->io_buffer, 512 * n1, ide_sector_write);
+ }
+ ide_set_sector(s, sector_num + n);
+ ide_set_irq(s);
+}
+
+static int ide_write_dma_cb(IDEState *s,
+ target_phys_addr_t phys_addr,
+ int transfer_size1)
+{
+ int len, transfer_size, n;
+ int64_t sector_num;
+
+ transfer_size = transfer_size1;
+ for(;;) {
+ len = s->io_buffer_size - s->io_buffer_index;
+ if (len == 0) {
+ n = s->io_buffer_size >> 9;
+ sector_num = ide_get_sector(s);
+ bdrv_write(s->bs, sector_num, s->io_buffer,
+ s->io_buffer_size >> 9);
+ sector_num += n;
+ ide_set_sector(s, sector_num);
+ s->nsector -= n;
+ n = s->nsector;
+ if (n == 0) {
+ /* end of transfer */
+ s->status = READY_STAT | SEEK_STAT;
+ ide_set_irq(s);
+ return 0;
+ }
+ if (n > MAX_MULT_SECTORS)
+ n = MAX_MULT_SECTORS;
+ s->io_buffer_index = 0;
+ s->io_buffer_size = n * 512;
+ len = s->io_buffer_size;
+ }
+ if (transfer_size <= 0)
+ break;
+ if (len > transfer_size)
+ len = transfer_size;
+ cpu_physical_memory_read(phys_addr,
+ s->io_buffer + s->io_buffer_index, len);
+ s->io_buffer_index += len;
+ transfer_size -= len;
+ phys_addr += len;
+ }
+ return transfer_size1 - transfer_size;
+}
+
+static void ide_sector_write_dma(IDEState *s)
+{
+ int n;
+ s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
+ n = s->nsector;
+ if (n > MAX_MULT_SECTORS)
+ n = MAX_MULT_SECTORS;
+ s->io_buffer_index = 0;
+ s->io_buffer_size = n * 512;
+ ide_dma_start(s, ide_write_dma_cb);
+}
+
+static void ide_atapi_cmd_ok(IDEState *s)
+{
+ s->error = 0;
+ s->status = READY_STAT;
+ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+ ide_set_irq(s);
+}
+
+static void ide_atapi_cmd_error(IDEState *s, int sense_key, int asc)
+{
+#ifdef DEBUG_IDE_ATAPI
+ printf("atapi_cmd_error: sense=0x%x asc=0x%x\n", sense_key, asc);
+#endif
+ s->error = sense_key << 4;
+ s->status = READY_STAT | ERR_STAT;
+ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+ s->sense_key = sense_key;
+ s->asc = asc;
+ ide_set_irq(s);
+}
+
+static inline void cpu_to_ube16(uint8_t *buf, int val)
+{
+ buf[0] = val >> 8;
+ buf[1] = val;
+}
+
+static inline void cpu_to_ube32(uint8_t *buf, unsigned int val)
+{
+ buf[0] = val >> 24;
+ buf[1] = val >> 16;
+ buf[2] = val >> 8;
+ buf[3] = val;
+}
+
+static inline int ube16_to_cpu(const uint8_t *buf)
+{
+ return (buf[0] << 8) | buf[1];
+}
+
+static inline int ube32_to_cpu(const uint8_t *buf)
+{
+ return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
+static void lba_to_msf(uint8_t *buf, int lba)
+{
+ lba += 150;
+ buf[0] = (lba / 75) / 60;
+ buf[1] = (lba / 75) % 60;
+ buf[2] = lba % 75;
+}
+
+static void cd_read_sector(BlockDriverState *bs, int lba, uint8_t *buf,
+ int sector_size)
+{
+ switch(sector_size) {
+ case 2048:
+ bdrv_read(bs, (int64_t)lba << 2, buf, 4);
+ break;
+ case 2352:
+ /* sync bytes */
+ buf[0] = 0x00;
+ memset(buf + 1, 0xff, 11);
+ buf += 12;
+ /* MSF */
+ lba_to_msf(buf, lba);
+ buf[3] = 0x01; /* mode 1 data */
+ buf += 4;
+ /* data */
+ bdrv_read(bs, (int64_t)lba << 2, buf, 4);
+ buf += 2048;
+ /* ECC */
+ memset(buf, 0, 288);
+ break;
+ default:
+ break;
+ }
+}
+
+/* The whole ATAPI transfer logic is handled in this function */
+static void ide_atapi_cmd_reply_end(IDEState *s)
+{
+ int byte_count_limit, size;
+#ifdef DEBUG_IDE_ATAPI
+ printf("reply: tx_size=%d elem_tx_size=%d index=%d\n",
+ s->packet_transfer_size,
+ s->elementary_transfer_size,
+ s->io_buffer_index);
+#endif
+ if (s->packet_transfer_size <= 0) {
+ /* end of transfer */
+ ide_transfer_stop(s);
+ s->status = READY_STAT;
+ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+ ide_set_irq(s);
+#ifdef DEBUG_IDE_ATAPI
+ printf("status=0x%x\n", s->status);
+#endif
+ } else {
+ /* see if a new sector must be read */
+ if (s->lba != -1 && s->io_buffer_index >= s->cd_sector_size) {
+ cd_read_sector(s->bs, s->lba, s->io_buffer, s->cd_sector_size);
+ s->lba++;
+ s->io_buffer_index = 0;
+ }
+ if (s->elementary_transfer_size > 0) {
+ /* there are some data left to transmit in this elementary
+ transfer */
+ size = s->cd_sector_size - s->io_buffer_index;
+ if (size > s->elementary_transfer_size)
+ size = s->elementary_transfer_size;
+ ide_transfer_start(s, s->io_buffer + s->io_buffer_index,
+ size, ide_atapi_cmd_reply_end);
+ s->packet_transfer_size -= size;
+ s->elementary_transfer_size -= size;
+ s->io_buffer_index += size;
+ } else {
+ /* a new transfer is needed */
+ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO;
+ byte_count_limit = s->lcyl | (s->hcyl << 8);
+#ifdef DEBUG_IDE_ATAPI
+ printf("byte_count_limit=%d\n", byte_count_limit);
+#endif
+ if (byte_count_limit == 0xffff)
+ byte_count_limit--;
+ size = s->packet_transfer_size;
+ if (size > byte_count_limit) {
+ /* byte count limit must be even if this case */
+ if (byte_count_limit & 1)
+ byte_count_limit--;
+ size = byte_count_limit;
+ }
+ s->lcyl = size;
+ s->hcyl = size >> 8;
+ s->elementary_transfer_size = size;
+ /* we cannot transmit more than one sector at a time */
+ if (s->lba != -1) {
+ if (size > (s->cd_sector_size - s->io_buffer_index))
+ size = (s->cd_sector_size - s->io_buffer_index);
+ }
+ ide_transfer_start(s, s->io_buffer + s->io_buffer_index,
+ size, ide_atapi_cmd_reply_end);
+ s->packet_transfer_size -= size;
+ s->elementary_transfer_size -= size;
+ s->io_buffer_index += size;
+ ide_set_irq(s);
+#ifdef DEBUG_IDE_ATAPI
+ printf("status=0x%x\n", s->status);
+#endif
+ }
+ }
+}
+
+/* send a reply of 'size' bytes in s->io_buffer to an ATAPI command */
+static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size)
+{
+ if (size > max_size)
+ size = max_size;
+ s->lba = -1; /* no sector read */
+ s->packet_transfer_size = size;
+ s->elementary_transfer_size = 0;
+ s->io_buffer_index = 0;
+
+ s->status = READY_STAT;
+ ide_atapi_cmd_reply_end(s);
+}
+
+/* start a CD-CDROM read command */
+static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors,
+ int sector_size)
+{
+ s->lba = lba;
+ s->packet_transfer_size = nb_sectors * sector_size;
+ s->elementary_transfer_size = 0;
+ s->io_buffer_index = sector_size;
+ s->cd_sector_size = sector_size;
+
+ s->status = READY_STAT;
+ ide_atapi_cmd_reply_end(s);
+}
+
+/* ATAPI DMA support */
+static int ide_atapi_cmd_read_dma_cb(IDEState *s,
+ target_phys_addr_t phys_addr,
+ int transfer_size1)
+{
+ int len, transfer_size;
+
+ transfer_size = transfer_size1;
+ while (transfer_size > 0) {
+ if (s->packet_transfer_size <= 0)
+ break;
+ len = s->cd_sector_size - s->io_buffer_index;
+ if (len <= 0) {
+ /* transfert next data */
+ cd_read_sector(s->bs, s->lba, s->io_buffer, s->cd_sector_size);
+ s->lba++;
+ s->io_buffer_index = 0;
+ len = s->cd_sector_size;
+ }
+ if (len > transfer_size)
+ len = transfer_size;
+ cpu_physical_memory_write(phys_addr,
+ s->io_buffer + s->io_buffer_index, len);
+ s->packet_transfer_size -= len;
+ s->io_buffer_index += len;
+ transfer_size -= len;
+ phys_addr += len;
+ }
+ if (s->packet_transfer_size <= 0) {
+ s->status = READY_STAT;
+ s->nsector = (s->nsector & ~7) | ATAPI_INT_REASON_IO | ATAPI_INT_REASON_CD;
+ ide_set_irq(s);
+#ifdef DEBUG_IDE_ATAPI
+ printf("dma status=0x%x\n", s->status);
+#endif
+ return 0;
+ }
+ return transfer_size1 - transfer_size;
+}
+
+/* start a CD-CDROM read command with DMA */
+/* XXX: test if DMA is available */
+static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors,
+ int sector_size)
+{
+ s->lba = lba;
+ s->packet_transfer_size = nb_sectors * sector_size;
+ s->io_buffer_index = sector_size;
+ s->cd_sector_size = sector_size;
+
+ s->status = READY_STAT | DRQ_STAT;
+ ide_dma_start(s, ide_atapi_cmd_read_dma_cb);
+}
+
+static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors,
+ int sector_size)
+{
+#ifdef DEBUG_IDE_ATAPI
+ printf("read: LBA=%d nb_sectors=%d\n", lba, nb_sectors);
+#endif
+ if (s->atapi_dma) {
+ ide_atapi_cmd_read_dma(s, lba, nb_sectors, sector_size);
+ } else {
+ ide_atapi_cmd_read_pio(s, lba, nb_sectors, sector_size);
+ }
+}
+
+/* same toc as bochs. Return -1 if error or the toc length */
+/* XXX: check this */
+static int cdrom_read_toc(IDEState *s, uint8_t *buf, int msf, int start_track)
+{
+ uint8_t *q;
+ int nb_sectors, len;
+
+ if (start_track > 1 && start_track != 0xaa)
+ return -1;
+ q = buf + 2;
+ *q++ = 1; /* first session */
+ *q++ = 1; /* last session */
+ if (start_track <= 1) {
+ *q++ = 0; /* reserved */
+ *q++ = 0x14; /* ADR, control */
+ *q++ = 1; /* track number */
+ *q++ = 0; /* reserved */
+ if (msf) {
+ *q++ = 0; /* reserved */
+ *q++ = 0; /* minute */
+ *q++ = 2; /* second */
+ *q++ = 0; /* frame */
+ } else {
+ /* sector 0 */
+ cpu_to_ube32(q, 0);
+ q += 4;
+ }
+ }
+ /* lead out track */
+ *q++ = 0; /* reserved */
+ *q++ = 0x16; /* ADR, control */
+ *q++ = 0xaa; /* track number */
+ *q++ = 0; /* reserved */
+ nb_sectors = s->nb_sectors >> 2;
+ if (msf) {
+ *q++ = 0; /* reserved */
+ lba_to_msf(q, nb_sectors);
+ q += 3;
+ } else {
+ cpu_to_ube32(q, nb_sectors);
+ q += 4;
+ }
+ len = q - buf;
+ cpu_to_ube16(buf, len - 2);
+ return len;
+}
+
+/* mostly same info as PearPc */
+static int cdrom_read_toc_raw(IDEState *s, uint8_t *buf, int msf,
+ int session_num)
+{
+ uint8_t *q;
+ int nb_sectors, len;
+
+ q = buf + 2;
+ *q++ = 1; /* first session */
+ *q++ = 1; /* last session */
+
+ *q++ = 1; /* session number */
+ *q++ = 0x14; /* data track */
+ *q++ = 0; /* track number */
+ *q++ = 0xa0; /* lead-in */
+ *q++ = 0; /* min */
+ *q++ = 0; /* sec */
+ *q++ = 0; /* frame */
+ *q++ = 0;
+ *q++ = 1; /* first track */
+ *q++ = 0x00; /* disk type */
+ *q++ = 0x00;
+
+ *q++ = 1; /* session number */
+ *q++ = 0x14; /* data track */
+ *q++ = 0; /* track number */
+ *q++ = 0xa1;
+ *q++ = 0; /* min */
+ *q++ = 0; /* sec */
+ *q++ = 0; /* frame */
+ *q++ = 0;
+ *q++ = 1; /* last track */
+ *q++ = 0x00;
+ *q++ = 0x00;
+
+ *q++ = 1; /* session number */
+ *q++ = 0x14; /* data track */
+ *q++ = 0; /* track number */
+ *q++ = 0xa2; /* lead-out */
+ *q++ = 0; /* min */
+ *q++ = 0; /* sec */
+ *q++ = 0; /* frame */
+ nb_sectors = s->nb_sectors >> 2;
+ if (msf) {
+ *q++ = 0; /* reserved */
+ lba_to_msf(q, nb_sectors);
+ q += 3;
+ } else {
+ cpu_to_ube32(q, nb_sectors);
+ q += 4;
+ }
+
+ *q++ = 1; /* session number */
+ *q++ = 0x14; /* ADR, control */
+ *q++ = 0; /* track number */
+ *q++ = 1; /* point */
+ *q++ = 0; /* min */
+ *q++ = 0; /* sec */
+ *q++ = 0; /* frame */
+ *q++ = 0;
+ *q++ = 0;
+ *q++ = 0;
+ *q++ = 0;
+
+ len = q - buf;
+ cpu_to_ube16(buf, len - 2);
+ return len;
+}
+
+static void ide_atapi_cmd(IDEState *s)
+{
+ const uint8_t *packet;
+ uint8_t *buf;
+ int max_len;
+
+ packet = s->io_buffer;
+ buf = s->io_buffer;
+#ifdef DEBUG_IDE_ATAPI
+ {
+ int i;
+ printf("ATAPI limit=0x%x packet:", s->lcyl | (s->hcyl << 8));
+ for(i = 0; i < ATAPI_PACKET_SIZE; i++) {
+ printf(" %02x", packet[i]);
+ }
+ printf("\n");
+ }
+#endif
+ switch(s->io_buffer[0]) {
+ case GPCMD_TEST_UNIT_READY:
+ if (bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_ok(s);
+ } else {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ }
+ break;
+ case GPCMD_MODE_SENSE_10:
+ {
+ int action, code;
+ max_len = ube16_to_cpu(packet + 7);
+ action = packet[2] >> 6;
+ code = packet[2] & 0x3f;
+ switch(action) {
+ case 0: /* current values */
+ switch(code) {
+ case 0x01: /* error recovery */
+ cpu_to_ube16(&buf[0], 16 + 6);
+ buf[2] = 0x70;
+ buf[3] = 0;
+ buf[4] = 0;
+ buf[5] = 0;
+ buf[6] = 0;
+ buf[7] = 0;
+
+ buf[8] = 0x01;
+ buf[9] = 0x06;
+ buf[10] = 0x00;
+ buf[11] = 0x05;
+ buf[12] = 0x00;
+ buf[13] = 0x00;
+ buf[14] = 0x00;
+ buf[15] = 0x00;
+ ide_atapi_cmd_reply(s, 16, max_len);
+ break;
+ case 0x2a:
+ cpu_to_ube16(&buf[0], 28 + 6);
+ buf[2] = 0x70;
+ buf[3] = 0;
+ buf[4] = 0;
+ buf[5] = 0;
+ buf[6] = 0;
+ buf[7] = 0;
+
+ buf[8] = 0x2a;
+ buf[9] = 0x12;
+ buf[10] = 0x00;
+ buf[11] = 0x00;
+
+ buf[12] = 0x70;
+ buf[13] = 3 << 5;
+ buf[14] = (1 << 0) | (1 << 3) | (1 << 5);
+ if (bdrv_is_locked(s->bs))
+ buf[6] |= 1 << 1;
+ buf[15] = 0x00;
+ cpu_to_ube16(&buf[16], 706);
+ buf[18] = 0;
+ buf[19] = 2;
+ cpu_to_ube16(&buf[20], 512);
+ cpu_to_ube16(&buf[22], 706);
+ buf[24] = 0;
+ buf[25] = 0;
+ buf[26] = 0;
+ buf[27] = 0;
+ ide_atapi_cmd_reply(s, 28, max_len);
+ break;
+ default:
+ goto error_cmd;
+ }
+ break;
+ case 1: /* changeable values */
+ goto error_cmd;
+ case 2: /* default values */
+ goto error_cmd;
+ default:
+ case 3: /* saved values */
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_SAVING_PARAMETERS_NOT_SUPPORTED);
+ break;
+ }
+ }
+ break;
+ case GPCMD_REQUEST_SENSE:
+ max_len = packet[4];
+ memset(buf, 0, 18);
+ buf[0] = 0x70 | (1 << 7);
+ buf[2] = s->sense_key;
+ buf[7] = 10;
+ buf[12] = s->asc;
+ ide_atapi_cmd_reply(s, 18, max_len);
+ break;
+ case GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL:
+ if (bdrv_is_inserted(s->bs)) {
+ bdrv_set_locked(s->bs, packet[4] & 1);
+ ide_atapi_cmd_ok(s);
+ } else {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ }
+ break;
+ case GPCMD_READ_10:
+ case GPCMD_READ_12:
+ {
+ int nb_sectors, lba;
+
+ if (!bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ break;
+ }
+ if (packet[0] == GPCMD_READ_10)
+ nb_sectors = ube16_to_cpu(packet + 7);
+ else
+ nb_sectors = ube32_to_cpu(packet + 6);
+ lba = ube32_to_cpu(packet + 2);
+ if (nb_sectors == 0) {
+ ide_atapi_cmd_ok(s);
+ break;
+ }
+ if (((int64_t)(lba + nb_sectors) << 2) > s->nb_sectors) {
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_LOGICAL_BLOCK_OOR);
+ break;
+ }
+ ide_atapi_cmd_read(s, lba, nb_sectors, 2048);
+ }
+ break;
+ case GPCMD_READ_CD:
+ {
+ int nb_sectors, lba, transfer_request;
+
+ if (!bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ break;
+ }
+ nb_sectors = (packet[6] << 16) | (packet[7] << 8) | packet[8];
+ lba = ube32_to_cpu(packet + 2);
+ if (nb_sectors == 0) {
+ ide_atapi_cmd_ok(s);
+ break;
+ }
+ if (((int64_t)(lba + nb_sectors) << 2) > s->nb_sectors) {
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_LOGICAL_BLOCK_OOR);
+ break;
+ }
+ transfer_request = packet[9];
+ switch(transfer_request & 0xf8) {
+ case 0x00:
+ /* nothing */
+ ide_atapi_cmd_ok(s);
+ break;
+ case 0x10:
+ /* normal read */
+ ide_atapi_cmd_read(s, lba, nb_sectors, 2048);
+ break;
+ case 0xf8:
+ /* read all data */
+ ide_atapi_cmd_read(s, lba, nb_sectors, 2352);
+ break;
+ default:
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_INV_FIELD_IN_CMD_PACKET);
+ break;
+ }
+ }
+ break;
+ case GPCMD_SEEK:
+ {
+ int lba;
+ if (!bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ break;
+ }
+ lba = ube32_to_cpu(packet + 2);
+ if (((int64_t)lba << 2) > s->nb_sectors) {
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_LOGICAL_BLOCK_OOR);
+ break;
+ }
+ ide_atapi_cmd_ok(s);
+ }
+ break;
+ case GPCMD_START_STOP_UNIT:
+ {
+ int start, eject;
+ start = packet[4] & 1;
+ eject = (packet[4] >> 1) & 1;
+
+ if (eject && !start) {
+ /* eject the disk */
+ bdrv_close(s->bs);
+ }
+ ide_atapi_cmd_ok(s);
+ }
+ break;
+ case GPCMD_MECHANISM_STATUS:
+ {
+ max_len = ube16_to_cpu(packet + 8);
+ cpu_to_ube16(buf, 0);
+ /* no current LBA */
+ buf[2] = 0;
+ buf[3] = 0;
+ buf[4] = 0;
+ buf[5] = 1;
+ cpu_to_ube16(buf + 6, 0);
+ ide_atapi_cmd_reply(s, 8, max_len);
+ }
+ break;
+ case GPCMD_READ_TOC_PMA_ATIP:
+ {
+ int format, msf, start_track, len;
+
+ if (!bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ break;
+ }
+ max_len = ube16_to_cpu(packet + 7);
+ format = packet[9] >> 6;
+ msf = (packet[1] >> 1) & 1;
+ start_track = packet[6];
+ switch(format) {
+ case 0:
+ len = cdrom_read_toc(s, buf, msf, start_track);
+ if (len < 0)
+ goto error_cmd;
+ ide_atapi_cmd_reply(s, len, max_len);
+ break;
+ case 1:
+ /* multi session : only a single session defined */
+ memset(buf, 0, 12);
+ buf[1] = 0x0a;
+ buf[2] = 0x01;
+ buf[3] = 0x01;
+ ide_atapi_cmd_reply(s, 12, max_len);
+ break;
+ case 2:
+ len = cdrom_read_toc_raw(s, buf, msf, start_track);
+ if (len < 0)
+ goto error_cmd;
+ ide_atapi_cmd_reply(s, len, max_len);
+ break;
+ default:
+ error_cmd:
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_INV_FIELD_IN_CMD_PACKET);
+ break;
+ }
+ }
+ break;
+ case GPCMD_READ_CDVD_CAPACITY:
+ if (!bdrv_is_inserted(s->bs)) {
+ ide_atapi_cmd_error(s, SENSE_NOT_READY,
+ ASC_MEDIUM_NOT_PRESENT);
+ break;
+ }
+ /* NOTE: it is really the number of sectors minus 1 */
+ cpu_to_ube32(buf, (s->nb_sectors >> 2) - 1);
+ cpu_to_ube32(buf + 4, 2048);
+ ide_atapi_cmd_reply(s, 8, 8);
+ break;
+ case GPCMD_INQUIRY:
+ max_len = packet[4];
+ buf[0] = 0x05; /* CD-ROM */
+ buf[1] = 0x80; /* removable */
+ buf[2] = 0x00; /* ISO */
+ buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */
+ buf[4] = 31; /* additionnal length */
+ buf[5] = 0; /* reserved */
+ buf[6] = 0; /* reserved */
+ buf[7] = 0; /* reserved */
+ padstr8(buf + 8, 8, "QEMU");
+ padstr8(buf + 16, 16, "QEMU CD-ROM");
+ padstr8(buf + 32, 4, QEMU_VERSION);
+ ide_atapi_cmd_reply(s, 36, max_len);
+ break;
+ default:
+ ide_atapi_cmd_error(s, SENSE_ILLEGAL_REQUEST,
+ ASC_ILLEGAL_OPCODE);
+ break;
+ }
+}
+
+/* called when the inserted state of the media has changed */
+static void cdrom_change_cb(void *opaque)
+{
+ IDEState *s = opaque;
+ int64_t nb_sectors;
+
+ /* XXX: send interrupt too */
+ bdrv_get_geometry(s->bs, &nb_sectors);
+ s->nb_sectors = nb_sectors;
+}
+
+static void ide_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ IDEState *ide_if = opaque;
+ IDEState *s;
+ int unit, n;
+
+#ifdef DEBUG_IDE
+ printf("IDE: write addr=0x%x val=0x%02x\n", addr, val);
+#endif
+ addr &= 7;
+ switch(addr) {
+ case 0:
+ break;
+ case 1:
+ /* NOTE: data is written to the two drives */
+ ide_if[0].feature = val;
+ ide_if[1].feature = val;
+ break;
+ case 2:
+ if (val == 0)
+ val = 256;
+ ide_if[0].nsector = val;
+ ide_if[1].nsector = val;
+ break;
+ case 3:
+ ide_if[0].sector = val;
+ ide_if[1].sector = val;
+ break;
+ case 4:
+ ide_if[0].lcyl = val;
+ ide_if[1].lcyl = val;
+ break;
+ case 5:
+ ide_if[0].hcyl = val;
+ ide_if[1].hcyl = val;
+ break;
+ case 6:
+ ide_if[0].select = (val & ~0x10) | 0xa0;
+ ide_if[1].select = (val | 0x10) | 0xa0;
+ /* select drive */
+ unit = (val >> 4) & 1;
+ s = ide_if + unit;
+ ide_if->cur_drive = s;
+ break;
+ default:
+ case 7:
+ /* command */
+#if defined(DEBUG_IDE)
+ printf("ide: CMD=%02x\n", val);
+#endif
+ s = ide_if->cur_drive;
+ /* ignore commands to non existant slave */
+ if (s != ide_if && !s->bs)
+ break;
+ switch(val) {
+ case WIN_IDENTIFY:
+ if (s->bs && !s->is_cdrom) {
+ ide_identify(s);
+ s->status = READY_STAT | SEEK_STAT;
+ ide_transfer_start(s, s->io_buffer, 512, ide_transfer_stop);
+ } else {
+ if (s->is_cdrom) {
+ ide_set_signature(s);
+ }
+ ide_abort_command(s);
+ }
+ ide_set_irq(s);
+ break;
+ case WIN_SPECIFY:
+ case WIN_RECAL:
+ s->error = 0;
+ s->status = READY_STAT | SEEK_STAT;
+ ide_set_irq(s);
+ break;
+ case WIN_SETMULT:
+ if (s->nsector > MAX_MULT_SECTORS ||
+ s->nsector == 0 ||
+ (s->nsector & (s->nsector - 1)) != 0) {
+ ide_abort_command(s);
+ } else {
+ s->mult_sectors = s->nsector;
+ s->status = READY_STAT;
+ }
+ ide_set_irq(s);
+ break;
+ case WIN_VERIFY:
+ case WIN_VERIFY_ONCE:
+ /* do sector number check ? */
+ s->status = READY_STAT;
+ ide_set_irq(s);
+ break;
+ case WIN_READ:
+ case WIN_READ_ONCE:
+ if (!s->bs)
+ goto abort_cmd;
+ s->req_nb_sectors = 1;
+ ide_sector_read(s);
+ break;
+ case WIN_WRITE:
+ case WIN_WRITE_ONCE:
+ s->error = 0;
+ s->status = SEEK_STAT | READY_STAT;
+ s->req_nb_sectors = 1;
+ ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
+ break;
+ case WIN_MULTREAD:
+ if (!s->mult_sectors)
+ goto abort_cmd;
+ s->req_nb_sectors = s->mult_sectors;
+ ide_sector_read(s);
+ break;
+ case WIN_MULTWRITE:
+ if (!s->mult_sectors)
+ goto abort_cmd;
+ s->error = 0;
+ s->status = SEEK_STAT | READY_STAT;
+ s->req_nb_sectors = s->mult_sectors;
+ n = s->nsector;
+ if (n > s->req_nb_sectors)
+ n = s->req_nb_sectors;
+ ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
+ break;
+ case WIN_READDMA:
+ case WIN_READDMA_ONCE:
+ if (!s->bs)
+ goto abort_cmd;
+ ide_sector_read_dma(s);
+ break;
+ case WIN_WRITEDMA:
+ case WIN_WRITEDMA_ONCE:
+ if (!s->bs)
+ goto abort_cmd;
+ ide_sector_write_dma(s);
+ break;
+ case WIN_READ_NATIVE_MAX:
+ ide_set_sector(s, s->nb_sectors - 1);
+ s->status = READY_STAT;
+ ide_set_irq(s);
+ break;
+ case WIN_CHECKPOWERMODE1:
+ s->nsector = 0xff; /* device active or idle */
+ s->status = READY_STAT;
+ ide_set_irq(s);
+ break;
+ case WIN_SETFEATURES:
+ if (!s->bs)
+ goto abort_cmd;
+ /* XXX: valid for CDROM ? */
+ switch(s->feature) {
+ case 0x02: /* write cache enable */
+ case 0x03: /* set transfer mode */
+ case 0x82: /* write cache disable */
+ case 0xaa: /* read look-ahead enable */
+ case 0x55: /* read look-ahead disable */
+ s->status = READY_STAT | SEEK_STAT;
+ ide_set_irq(s);
+ break;
+ default:
+ goto abort_cmd;
+ }
+ break;
+ case WIN_STANDBYNOW1:
+ s->status = READY_STAT;
+ ide_set_irq(s);
+ break;
+ /* ATAPI commands */
+ case WIN_PIDENTIFY:
+ if (s->is_cdrom) {
+ ide_atapi_identify(s);
+ s->status = READY_STAT;
+ ide_transfer_start(s, s->io_buffer, 512, ide_transfer_stop);
+ } else {
+ ide_abort_command(s);
+ }
+ ide_set_irq(s);
+ break;
+ case WIN_SRST:
+ if (!s->is_cdrom)
+ goto abort_cmd;
+ ide_set_signature(s);
+ s->status = 0x00; /* NOTE: READY is _not_ set */
+ s->error = 0x01;
+ break;
+ case WIN_PACKETCMD:
+ if (!s->is_cdrom)
+ goto abort_cmd;
+ /* overlapping commands not supported */
+ if (s->feature & 0x02)
+ goto abort_cmd;
+ s->atapi_dma = s->feature & 1;
+ s->nsector = 1;
+ ide_transfer_start(s, s->io_buffer, ATAPI_PACKET_SIZE,
+ ide_atapi_cmd);
+ break;
+ default:
+ abort_cmd:
+ ide_abort_command(s);
+ ide_set_irq(s);
+ break;
+ }
+ }
+}
+
+static uint32_t ide_ioport_read(void *opaque, uint32_t addr1)
+{
+ IDEState *ide_if = opaque;
+ IDEState *s = ide_if->cur_drive;
+ uint32_t addr;
+ int ret;
+
+ addr = addr1 & 7;
+ switch(addr) {
+ case 0:
+ ret = 0xff;
+ break;
+ case 1:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->error;
+ break;
+ case 2:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->nsector & 0xff;
+ break;
+ case 3:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->sector;
+ break;
+ case 4:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->lcyl;
+ break;
+ case 5:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->hcyl;
+ break;
+ case 6:
+ if (!ide_if[0].bs && !ide_if[1].bs)
+ ret = 0;
+ else
+ ret = s->select;
+ break;
+ default:
+ case 7:
+ if ((!ide_if[0].bs && !ide_if[1].bs) ||
+ (s != ide_if && !s->bs))
+ ret = 0;
+ else
+ ret = s->status;
+#ifdef TARGET_PPC
+ if (s->openpic)
+ openpic_set_irq(s->openpic, s->irq, 0);
+ else
+#endif
+ if (s->irq == 16)
+ pci_set_irq(s->pci_dev, 0, 0);
+ else
+ pic_set_irq(s->irq, 0);
+ break;
+ }
+#ifdef DEBUG_IDE
+ printf("ide: read addr=0x%x val=%02x\n", addr1, ret);
+#endif
+ return ret;
+}
+
+static uint32_t ide_status_read(void *opaque, uint32_t addr)
+{
+ IDEState *ide_if = opaque;
+ IDEState *s = ide_if->cur_drive;
+ int ret;
+
+ if ((!ide_if[0].bs && !ide_if[1].bs) ||
+ (s != ide_if && !s->bs))
+ ret = 0;
+ else
+ ret = s->status;
+#ifdef DEBUG_IDE
+ printf("ide: read status addr=0x%x val=%02x\n", addr, ret);
+#endif
+ return ret;
+}
+
+static void ide_cmd_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ IDEState *ide_if = opaque;
+ IDEState *s;
+ int i;
+
+#ifdef DEBUG_IDE
+ printf("ide: write control addr=0x%x val=%02x\n", addr, val);
+#endif
+ /* common for both drives */
+ if (!(ide_if[0].cmd & IDE_CMD_RESET) &&
+ (val & IDE_CMD_RESET)) {
+ /* reset low to high */
+ for(i = 0;i < 2; i++) {
+ s = &ide_if[i];
+ s->status = BUSY_STAT | SEEK_STAT;
+ s->error = 0x01;
+ }
+ } else if ((ide_if[0].cmd & IDE_CMD_RESET) &&
+ !(val & IDE_CMD_RESET)) {
+ /* high to low */
+ for(i = 0;i < 2; i++) {
+ s = &ide_if[i];
+ if (s->is_cdrom)
+ s->status = 0x00; /* NOTE: READY is _not_ set */
+ else
+ s->status = READY_STAT | SEEK_STAT;
+ ide_set_signature(s);
+ }
+ }
+
+ ide_if[0].cmd = val;
+ ide_if[1].cmd = val;
+}
+
+static void ide_data_writew(void *opaque, uint32_t addr, uint32_t val)
+{
+ IDEState *s = ((IDEState *)opaque)->cur_drive;
+ uint8_t *p;
+
+ p = s->data_ptr;
+ *(uint16_t *)p = le16_to_cpu(val);
+ p += 2;
+ s->data_ptr = p;
+ if (p >= s->data_end)
+ s->end_transfer_func(s);
+}
+
+static uint32_t ide_data_readw(void *opaque, uint32_t addr)
+{
+ IDEState *s = ((IDEState *)opaque)->cur_drive;
+ uint8_t *p;
+ int ret;
+ p = s->data_ptr;
+ ret = cpu_to_le16(*(uint16_t *)p);
+ p += 2;
+ s->data_ptr = p;
+ if (p >= s->data_end)
+ s->end_transfer_func(s);
+ return ret;
+}
+
+static void ide_data_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+ IDEState *s = ((IDEState *)opaque)->cur_drive;
+ uint8_t *p;
+
+ p = s->data_ptr;
+ *(uint32_t *)p = le32_to_cpu(val);
+ p += 4;
+ s->data_ptr = p;
+ if (p >= s->data_end)
+ s->end_transfer_func(s);
+}
+
+static uint32_t ide_data_readl(void *opaque, uint32_t addr)
+{
+ IDEState *s = ((IDEState *)opaque)->cur_drive;
+ uint8_t *p;
+ int ret;
+
+ p = s->data_ptr;
+ ret = cpu_to_le32(*(uint32_t *)p);
+ p += 4;
+ s->data_ptr = p;
+ if (p >= s->data_end)
+ s->end_transfer_func(s);
+ return ret;
+}
+
+static void ide_dummy_transfer_stop(IDEState *s)
+{
+ s->data_ptr = s->io_buffer;
+ s->data_end = s->io_buffer;
+ s->io_buffer[0] = 0xff;
+ s->io_buffer[1] = 0xff;
+ s->io_buffer[2] = 0xff;
+ s->io_buffer[3] = 0xff;
+}
+
+static void ide_reset(IDEState *s)
+{
+ s->mult_sectors = MAX_MULT_SECTORS;
+ s->cur_drive = s;
+ s->select = 0xa0;
+ s->status = READY_STAT;
+ ide_set_signature(s);
+ /* init the transfer handler so that 0xffff is returned on data
+ accesses */
+ s->end_transfer_func = ide_dummy_transfer_stop;
+ ide_dummy_transfer_stop(s);
+}
+
+struct partition {
+ uint8_t boot_ind; /* 0x80 - active */
+ uint8_t head; /* starting head */
+ uint8_t sector; /* starting sector */
+ uint8_t cyl; /* starting cylinder */
+ uint8_t sys_ind; /* What partition type */
+ uint8_t end_head; /* end head */
+ uint8_t end_sector; /* end sector */
+ uint8_t end_cyl; /* end cylinder */
+ uint32_t start_sect; /* starting sector counting from 0 */
+ uint32_t nr_sects; /* nr of sectors in partition */
+} __attribute__((packed));
+
+/* try to guess the IDE geometry from the MSDOS partition table */
+static void ide_guess_geometry(IDEState *s)
+{
+ uint8_t buf[512];
+ int ret, i;
+ struct partition *p;
+ uint32_t nr_sects;
+
+ if (s->cylinders != 0)
+ return;
+ ret = bdrv_read(s->bs, 0, buf, 1);
+ if (ret < 0)
+ return;
+ /* test msdos magic */
+ if (buf[510] != 0x55 || buf[511] != 0xaa)
+ return;
+ for(i = 0; i < 4; i++) {
+ p = ((struct partition *)(buf + 0x1be)) + i;
+ nr_sects = le32_to_cpu(p->nr_sects);
+ if (nr_sects && p->end_head) {
+ /* We make the assumption that the partition terminates on
+ a cylinder boundary */
+ s->heads = p->end_head + 1;
+ s->sectors = p->end_sector & 63;
+ s->cylinders = s->nb_sectors / (s->heads * s->sectors);
+#if 0
+ printf("guessed partition: CHS=%d %d %d\n",
+ s->cylinders, s->heads, s->sectors);
+#endif
+ }
+ }
+}
+
+static void ide_init2(IDEState *ide_state, int irq,
+ BlockDriverState *hd0, BlockDriverState *hd1)
+{
+ IDEState *s;
+ static int drive_serial = 1;
+ int i, cylinders, heads, secs;
+ int64_t nb_sectors;
+
+ for(i = 0; i < 2; i++) {
+ s = ide_state + i;
+ if (i == 0)
+ s->bs = hd0;
+ else
+ s->bs = hd1;
+ if (s->bs) {
+ bdrv_get_geometry(s->bs, &nb_sectors);
+ s->nb_sectors = nb_sectors;
+ /* if a geometry hint is available, use it */
+ bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
+ if (cylinders != 0) {
+ s->cylinders = cylinders;
+ s->heads = heads;
+ s->sectors = secs;
+ } else {
+ ide_guess_geometry(s);
+ if (s->cylinders == 0) {
+ /* if no geometry, use a LBA compatible one */
+ cylinders = nb_sectors / (16 * 63);
+ if (cylinders > 16383)
+ cylinders = 16383;
+ else if (cylinders < 2)
+ cylinders = 2;
+ s->cylinders = cylinders;
+ s->heads = 16;
+ s->sectors = 63;
+ }
+ bdrv_set_geometry_hint(s->bs, s->cylinders, s->heads, s->sectors);
+ }
+ if (bdrv_get_type_hint(s->bs) == BDRV_TYPE_CDROM) {
+ s->is_cdrom = 1;
+ bdrv_set_change_cb(s->bs, cdrom_change_cb, s);
+ }
+ }
+ s->drive_serial = drive_serial++;
+ s->irq = irq;
+ ide_reset(s);
+ }
+}
+
+static void ide_init_ioport(IDEState *ide_state, int iobase, int iobase2)
+{
+ register_ioport_write(iobase, 8, 1, ide_ioport_write, ide_state);
+ register_ioport_read(iobase, 8, 1, ide_ioport_read, ide_state);
+ if (iobase2) {
+ register_ioport_read(iobase2, 1, 1, ide_status_read, ide_state);
+ register_ioport_write(iobase2, 1, 1, ide_cmd_write, ide_state);
+ }
+
+ /* data ports */
+ register_ioport_write(iobase, 2, 2, ide_data_writew, ide_state);
+ register_ioport_read(iobase, 2, 2, ide_data_readw, ide_state);
+ register_ioport_write(iobase, 4, 4, ide_data_writel, ide_state);
+ register_ioport_read(iobase, 4, 4, ide_data_readl, ide_state);
+}
+
+/***********************************************************/
+/* ISA IDE definitions */
+
+void isa_ide_init(int iobase, int iobase2, int irq,
+ BlockDriverState *hd0, BlockDriverState *hd1)
+{
+ IDEState *ide_state;
+
+ ide_state = qemu_mallocz(sizeof(IDEState) * 2);
+ if (!ide_state)
+ return;
+
+ ide_init2(ide_state, irq, hd0, hd1);
+ ide_init_ioport(ide_state, iobase, iobase2);
+}
+
+/***********************************************************/
+/* PCI IDE definitions */
+
+static void ide_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCIIDEState *d = (PCIIDEState *)pci_dev;
+ IDEState *ide_state;
+
+ if (region_num <= 3) {
+ ide_state = &d->ide_if[(region_num >> 1) * 2];
+ if (region_num & 1) {
+ register_ioport_read(addr + 2, 1, 1, ide_status_read, ide_state);
+ register_ioport_write(addr + 2, 1, 1, ide_cmd_write, ide_state);
+ } else {
+ register_ioport_write(addr, 8, 1, ide_ioport_write, ide_state);
+ register_ioport_read(addr, 8, 1, ide_ioport_read, ide_state);
+
+ /* data ports */
+ register_ioport_write(addr, 2, 2, ide_data_writew, ide_state);
+ register_ioport_read(addr, 2, 2, ide_data_readw, ide_state);
+ register_ioport_write(addr, 4, 4, ide_data_writel, ide_state);
+ register_ioport_read(addr, 4, 4, ide_data_readl, ide_state);
+ }
+ }
+}
+
+/* XXX: full callback usage to prepare non blocking I/Os support -
+ error handling */
+static void ide_dma_loop(BMDMAState *bm)
+{
+ struct {
+ uint32_t addr;
+ uint32_t size;
+ } prd;
+ target_phys_addr_t cur_addr;
+ int len, i, len1;
+
+ cur_addr = bm->addr;
+ /* at most one page to avoid hanging if erroneous parameters */
+ for(i = 0; i < 512; i++) {
+ cpu_physical_memory_read(cur_addr, (uint8_t *)&prd, 8);
+ prd.addr = le32_to_cpu(prd.addr);
+ prd.size = le32_to_cpu(prd.size);
+#ifdef DEBUG_IDE
+ printf("ide: dma: prd: %08x: addr=0x%08x size=0x%08x\n",
+ (int)cur_addr, prd.addr, prd.size);
+#endif
+ len = prd.size & 0xfffe;
+ if (len == 0)
+ len = 0x10000;
+ while (len > 0) {
+ len1 = bm->dma_cb(bm->ide_if, prd.addr, len);
+ if (len1 == 0)
+ goto the_end;
+ prd.addr += len1;
+ len -= len1;
+ }
+ /* end of transfer */
+ if (prd.size & 0x80000000)
+ break;
+ cur_addr += 8;
+ }
+ /* end of transfer */
+ the_end:
+ bm->status &= ~BM_STATUS_DMAING;
+ bm->status |= BM_STATUS_INT;
+ bm->dma_cb = NULL;
+ bm->ide_if = NULL;
+}
+
+static void ide_dma_start(IDEState *s, IDEDMAFunc *dma_cb)
+{
+ BMDMAState *bm = s->bmdma;
+ if(!bm)
+ return;
+ bm->ide_if = s;
+ bm->dma_cb = dma_cb;
+ if (bm->status & BM_STATUS_DMAING) {
+ ide_dma_loop(bm);
+ }
+}
+
+static uint32_t bmdma_cmd_readb(void *opaque, uint32_t addr)
+{
+ BMDMAState *bm = opaque;
+ uint32_t val;
+ val = bm->cmd;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ return val;
+}
+
+static void bmdma_cmd_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ BMDMAState *bm = opaque;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ if (!(val & BM_CMD_START)) {
+ /* XXX: do it better */
+ bm->status &= ~BM_STATUS_DMAING;
+ bm->cmd = val & 0x09;
+ } else {
+ bm->status |= BM_STATUS_DMAING;
+ bm->cmd = val & 0x09;
+ /* start dma transfer if possible */
+ if (bm->dma_cb)
+ ide_dma_loop(bm);
+ }
+}
+
+static uint32_t bmdma_status_readb(void *opaque, uint32_t addr)
+{
+ BMDMAState *bm = opaque;
+ uint32_t val;
+ val = bm->status;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ return val;
+}
+
+static void bmdma_status_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+ BMDMAState *bm = opaque;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ bm->status = (val & 0x60) | (bm->status & 1) | (bm->status & ~val & 0x06);
+}
+
+static uint32_t bmdma_addr_readl(void *opaque, uint32_t addr)
+{
+ BMDMAState *bm = opaque;
+ uint32_t val;
+ val = bm->addr;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ return val;
+}
+
+static void bmdma_addr_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+ BMDMAState *bm = opaque;
+#ifdef DEBUG_IDE
+ printf("%s: 0x%08x\n", __func__, val);
+#endif
+ bm->addr = val & ~3;
+}
+
+static void bmdma_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCIIDEState *d = (PCIIDEState *)pci_dev;
+ int i;
+
+ for(i = 0;i < 2; i++) {
+ BMDMAState *bm = &d->bmdma[i];
+ d->ide_if[2 * i].bmdma = bm;
+ d->ide_if[2 * i + 1].bmdma = bm;
+
+ register_ioport_write(addr, 1, 1, bmdma_cmd_writeb, bm);
+ register_ioport_read(addr, 1, 1, bmdma_cmd_readb, bm);
+
+ register_ioport_write(addr + 2, 1, 1, bmdma_status_writeb, bm);
+ register_ioport_read(addr + 2, 1, 1, bmdma_status_readb, bm);
+
+ register_ioport_write(addr + 4, 4, 4, bmdma_addr_writel, bm);
+ register_ioport_read(addr + 4, 4, 4, bmdma_addr_readl, bm);
+ addr += 8;
+ }
+}
+
+/* hd_table must contain 4 block drivers */
+void pci_ide_init(PCIBus *bus, BlockDriverState **hd_table)
+{
+ PCIIDEState *d;
+ uint8_t *pci_conf;
+ int i;
+
+ d = (PCIIDEState *)pci_register_device(bus, "IDE", sizeof(PCIIDEState),
+ -1,
+ NULL, NULL);
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = 0x86; // Intel
+ pci_conf[0x01] = 0x80;
+ pci_conf[0x02] = 0x00; // fake
+ pci_conf[0x03] = 0x01; // fake
+ pci_conf[0x0a] = 0x01; // class_sub = PCI_IDE
+ pci_conf[0x0b] = 0x01; // class_base = PCI_mass_storage
+ pci_conf[0x0e] = 0x80; // header_type = PCI_multifunction, generic
+
+ pci_conf[0x2c] = 0x86; // subsys vendor
+ pci_conf[0x2d] = 0x80; // subsys vendor
+ pci_conf[0x2e] = 0x00; // fake
+ pci_conf[0x2f] = 0x01; // fake
+
+ pci_register_io_region((PCIDevice *)d, 0, 0x8,
+ PCI_ADDRESS_SPACE_IO, ide_map);
+ pci_register_io_region((PCIDevice *)d, 1, 0x4,
+ PCI_ADDRESS_SPACE_IO, ide_map);
+ pci_register_io_region((PCIDevice *)d, 2, 0x8,
+ PCI_ADDRESS_SPACE_IO, ide_map);
+ pci_register_io_region((PCIDevice *)d, 3, 0x4,
+ PCI_ADDRESS_SPACE_IO, ide_map);
+ pci_register_io_region((PCIDevice *)d, 4, 0x10,
+ PCI_ADDRESS_SPACE_IO, bmdma_map);
+
+ pci_conf[0x3d] = 0x01; // interrupt on pin 1
+
+ for(i = 0; i < 4; i++)
+ d->ide_if[i].pci_dev = (PCIDevice *)d;
+ ide_init2(&d->ide_if[0], 16, hd_table[0], hd_table[1]);
+ ide_init2(&d->ide_if[2], 16, hd_table[2], hd_table[3]);
+}
+
+/* hd_table must contain 4 block drivers */
+/* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
+void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table)
+{
+ PCIIDEState *d;
+ uint8_t *pci_conf;
+
+ /* register a function 1 of PIIX3 */
+ d = (PCIIDEState *)pci_register_device(bus, "PIIX3 IDE",
+ sizeof(PCIIDEState),
+ ((PCIDevice *)piix3_state)->devfn + 1,
+ NULL, NULL);
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = 0x86; // Intel
+ pci_conf[0x01] = 0x80;
+ pci_conf[0x02] = 0x10;
+ pci_conf[0x03] = 0x70;
+ pci_conf[0x0a] = 0x01; // class_sub = PCI_IDE
+ pci_conf[0x0b] = 0x01; // class_base = PCI_mass_storage
+ pci_conf[0x0e] = 0x00; // header_type
+
+ pci_register_io_region((PCIDevice *)d, 4, 0x10,
+ PCI_ADDRESS_SPACE_IO, bmdma_map);
+
+ ide_init2(&d->ide_if[0], 14, hd_table[0], hd_table[1]);
+ ide_init2(&d->ide_if[2], 15, hd_table[2], hd_table[3]);
+ ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
+ ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
+}
+
+/***********************************************************/
+/* MacIO based PowerPC IDE */
+
+/* PowerMac IDE memory IO */
+static void pmac_ide_writeb (void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ addr = (addr & 0xFFF) >> 4;
+ switch (addr) {
+ case 1 ... 7:
+ ide_ioport_write(opaque, addr, val);
+ break;
+ case 8:
+ case 22:
+ ide_cmd_write(opaque, 0, val);
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t pmac_ide_readb (void *opaque,target_phys_addr_t addr)
+{
+ uint8_t retval;
+
+ addr = (addr & 0xFFF) >> 4;
+ switch (addr) {
+ case 1 ... 7:
+ retval = ide_ioport_read(opaque, addr);
+ break;
+ case 8:
+ case 22:
+ retval = ide_status_read(opaque, 0);
+ break;
+ default:
+ retval = 0xFF;
+ break;
+ }
+ return retval;
+}
+
+static void pmac_ide_writew (void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ addr = (addr & 0xFFF) >> 4;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ if (addr == 0) {
+ ide_data_writew(opaque, 0, val);
+ }
+}
+
+static uint32_t pmac_ide_readw (void *opaque,target_phys_addr_t addr)
+{
+ uint16_t retval;
+
+ addr = (addr & 0xFFF) >> 4;
+ if (addr == 0) {
+ retval = ide_data_readw(opaque, 0);
+ } else {
+ retval = 0xFFFF;
+ }
+#ifdef TARGET_WORDS_BIGENDIAN
+ retval = bswap16(retval);
+#endif
+ return retval;
+}
+
+static void pmac_ide_writel (void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ addr = (addr & 0xFFF) >> 4;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ if (addr == 0) {
+ ide_data_writel(opaque, 0, val);
+ }
+}
+
+static uint32_t pmac_ide_readl (void *opaque,target_phys_addr_t addr)
+{
+ uint32_t retval;
+
+ addr = (addr & 0xFFF) >> 4;
+ if (addr == 0) {
+ retval = ide_data_readl(opaque, 0);
+ } else {
+ retval = 0xFFFFFFFF;
+ }
+#ifdef TARGET_WORDS_BIGENDIAN
+ retval = bswap32(retval);
+#endif
+ return retval;
+}
+
+static CPUWriteMemoryFunc *pmac_ide_write[] = {
+ pmac_ide_writeb,
+ pmac_ide_writew,
+ pmac_ide_writel,
+};
+
+static CPUReadMemoryFunc *pmac_ide_read[] = {
+ pmac_ide_readb,
+ pmac_ide_readw,
+ pmac_ide_readl,
+};
+
+/* hd_table must contain 4 block drivers */
+/* PowerMac uses memory mapped registers, not I/O. Return the memory
+ I/O index to access the ide. */
+int pmac_ide_init (BlockDriverState **hd_table,
+ openpic_t *openpic, int irq)
+{
+ IDEState *ide_if;
+ int pmac_ide_memory;
+
+ ide_if = qemu_mallocz(sizeof(IDEState) * 2);
+ ide_init2(&ide_if[0], irq, hd_table[0], hd_table[1]);
+ ide_if[0].openpic = openpic;
+ ide_if[1].openpic = openpic;
+
+ pmac_ide_memory = cpu_register_io_memory(0, pmac_ide_read,
+ pmac_ide_write, &ide_if[0]);
+ return pmac_ide_memory;
+}
diff --git a/tools/ioemu/hw/iommu.c b/tools/ioemu/hw/iommu.c
new file mode 100644
index 0000000000..a9249c4ba7
--- /dev/null
+++ b/tools/ioemu/hw/iommu.c
@@ -0,0 +1,218 @@
+/*
+ * QEMU SPARC iommu emulation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug iommu */
+//#define DEBUG_IOMMU
+
+/* The IOMMU registers occupy three pages in IO space. */
+struct iommu_regs {
+ /* First page */
+ volatile unsigned long control; /* IOMMU control */
+ volatile unsigned long base; /* Physical base of iopte page table */
+ volatile unsigned long _unused1[3];
+ volatile unsigned long tlbflush; /* write only */
+ volatile unsigned long pageflush; /* write only */
+ volatile unsigned long _unused2[1017];
+ /* Second page */
+ volatile unsigned long afsr; /* Async-fault status register */
+ volatile unsigned long afar; /* Async-fault physical address */
+ volatile unsigned long _unused3[2];
+ volatile unsigned long sbuscfg0; /* SBUS configuration registers, per-slot */
+ volatile unsigned long sbuscfg1;
+ volatile unsigned long sbuscfg2;
+ volatile unsigned long sbuscfg3;
+ volatile unsigned long mfsr; /* Memory-fault status register */
+ volatile unsigned long mfar; /* Memory-fault physical address */
+ volatile unsigned long _unused4[1014];
+ /* Third page */
+ volatile unsigned long mid; /* IOMMU module-id */
+};
+
+#define IOMMU_CTRL_IMPL 0xf0000000 /* Implementation */
+#define IOMMU_CTRL_VERS 0x0f000000 /* Version */
+#define IOMMU_CTRL_RNGE 0x0000001c /* Mapping RANGE */
+#define IOMMU_RNGE_16MB 0x00000000 /* 0xff000000 -> 0xffffffff */
+#define IOMMU_RNGE_32MB 0x00000004 /* 0xfe000000 -> 0xffffffff */
+#define IOMMU_RNGE_64MB 0x00000008 /* 0xfc000000 -> 0xffffffff */
+#define IOMMU_RNGE_128MB 0x0000000c /* 0xf8000000 -> 0xffffffff */
+#define IOMMU_RNGE_256MB 0x00000010 /* 0xf0000000 -> 0xffffffff */
+#define IOMMU_RNGE_512MB 0x00000014 /* 0xe0000000 -> 0xffffffff */
+#define IOMMU_RNGE_1GB 0x00000018 /* 0xc0000000 -> 0xffffffff */
+#define IOMMU_RNGE_2GB 0x0000001c /* 0x80000000 -> 0xffffffff */
+#define IOMMU_CTRL_ENAB 0x00000001 /* IOMMU Enable */
+
+#define IOMMU_AFSR_ERR 0x80000000 /* LE, TO, or BE asserted */
+#define IOMMU_AFSR_LE 0x40000000 /* SBUS reports error after transaction */
+#define IOMMU_AFSR_TO 0x20000000 /* Write access took more than 12.8 us. */
+#define IOMMU_AFSR_BE 0x10000000 /* Write access received error acknowledge */
+#define IOMMU_AFSR_SIZE 0x0e000000 /* Size of transaction causing error */
+#define IOMMU_AFSR_S 0x01000000 /* Sparc was in supervisor mode */
+#define IOMMU_AFSR_RESV 0x00f00000 /* Reserver, forced to 0x8 by hardware */
+#define IOMMU_AFSR_ME 0x00080000 /* Multiple errors occurred */
+#define IOMMU_AFSR_RD 0x00040000 /* A read operation was in progress */
+#define IOMMU_AFSR_FAV 0x00020000 /* IOMMU afar has valid contents */
+
+#define IOMMU_SBCFG_SAB30 0x00010000 /* Phys-address bit 30 when bypass enabled */
+#define IOMMU_SBCFG_BA16 0x00000004 /* Slave supports 16 byte bursts */
+#define IOMMU_SBCFG_BA8 0x00000002 /* Slave supports 8 byte bursts */
+#define IOMMU_SBCFG_BYPASS 0x00000001 /* Bypass IOMMU, treat all addresses
+ produced by this device as pure
+ physical. */
+
+#define IOMMU_MFSR_ERR 0x80000000 /* One or more of PERR1 or PERR0 */
+#define IOMMU_MFSR_S 0x01000000 /* Sparc was in supervisor mode */
+#define IOMMU_MFSR_CPU 0x00800000 /* CPU transaction caused parity error */
+#define IOMMU_MFSR_ME 0x00080000 /* Multiple parity errors occurred */
+#define IOMMU_MFSR_PERR 0x00006000 /* high bit indicates parity error occurred
+ on the even word of the access, low bit
+ indicated odd word caused the parity error */
+#define IOMMU_MFSR_BM 0x00001000 /* Error occurred while in boot mode */
+#define IOMMU_MFSR_C 0x00000800 /* Address causing error was marked cacheable */
+#define IOMMU_MFSR_RTYP 0x000000f0 /* Memory request transaction type */
+
+#define IOMMU_MID_SBAE 0x001f0000 /* SBus arbitration enable */
+#define IOMMU_MID_SE 0x00100000 /* Enables SCSI/ETHERNET arbitration */
+#define IOMMU_MID_SB3 0x00080000 /* Enable SBUS device 3 arbitration */
+#define IOMMU_MID_SB2 0x00040000 /* Enable SBUS device 2 arbitration */
+#define IOMMU_MID_SB1 0x00020000 /* Enable SBUS device 1 arbitration */
+#define IOMMU_MID_SB0 0x00010000 /* Enable SBUS device 0 arbitration */
+#define IOMMU_MID_MID 0x0000000f /* Module-id, hardcoded to 0x8 */
+
+/* The format of an iopte in the page tables */
+#define IOPTE_PAGE 0x07ffff00 /* Physical page number (PA[30:12]) */
+#define IOPTE_CACHE 0x00000080 /* Cached (in vme IOCACHE or Viking/MXCC) */
+#define IOPTE_WRITE 0x00000004 /* Writeable */
+#define IOPTE_VALID 0x00000002 /* IOPTE is valid */
+#define IOPTE_WAZ 0x00000001 /* Write as zeros */
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define PAGE_MASK (PAGE_SIZE - 1)
+
+typedef struct IOMMUState {
+ uint32_t addr;
+ uint32_t regs[sizeof(struct iommu_regs)];
+ uint32_t iostart;
+} IOMMUState;
+
+static IOMMUState *ps;
+
+static uint32_t iommu_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ IOMMUState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ default:
+ return s->regs[saddr];
+ break;
+ }
+ return 0;
+}
+
+static void iommu_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ IOMMUState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ case 0:
+ switch (val & IOMMU_CTRL_RNGE) {
+ case IOMMU_RNGE_16MB:
+ s->iostart = 0xff000000;
+ break;
+ case IOMMU_RNGE_32MB:
+ s->iostart = 0xfe000000;
+ break;
+ case IOMMU_RNGE_64MB:
+ s->iostart = 0xfc000000;
+ break;
+ case IOMMU_RNGE_128MB:
+ s->iostart = 0xf8000000;
+ break;
+ case IOMMU_RNGE_256MB:
+ s->iostart = 0xf0000000;
+ break;
+ case IOMMU_RNGE_512MB:
+ s->iostart = 0xe0000000;
+ break;
+ case IOMMU_RNGE_1GB:
+ s->iostart = 0xc0000000;
+ break;
+ default:
+ case IOMMU_RNGE_2GB:
+ s->iostart = 0x80000000;
+ break;
+ }
+ /* Fall through */
+ default:
+ s->regs[saddr] = val;
+ break;
+ }
+}
+
+static CPUReadMemoryFunc *iommu_mem_read[3] = {
+ iommu_mem_readw,
+ iommu_mem_readw,
+ iommu_mem_readw,
+};
+
+static CPUWriteMemoryFunc *iommu_mem_write[3] = {
+ iommu_mem_writew,
+ iommu_mem_writew,
+ iommu_mem_writew,
+};
+
+uint32_t iommu_translate(uint32_t addr)
+{
+ uint32_t *iopte = (void *)(ps->regs[1] << 4), pa;
+
+ iopte += ((addr - ps->iostart) >> PAGE_SHIFT);
+ cpu_physical_memory_rw((uint32_t)iopte, (void *) &pa, 4, 0);
+ bswap32s(&pa);
+ pa = (pa & IOPTE_PAGE) << 4; /* Loose higher bits of 36 */
+ return pa + (addr & PAGE_MASK);
+}
+
+void iommu_init(uint32_t addr)
+{
+ IOMMUState *s;
+ int iommu_io_memory;
+
+ s = qemu_mallocz(sizeof(IOMMUState));
+ if (!s)
+ return;
+
+ s->addr = addr;
+
+ iommu_io_memory = cpu_register_io_memory(0, iommu_mem_read, iommu_mem_write, s);
+ cpu_register_physical_memory(addr, sizeof(struct iommu_regs),
+ iommu_io_memory);
+
+ ps = s;
+}
+
diff --git a/tools/ioemu/hw/lance.c b/tools/ioemu/hw/lance.c
new file mode 100644
index 0000000000..25ad8c45b2
--- /dev/null
+++ b/tools/ioemu/hw/lance.c
@@ -0,0 +1,468 @@
+/*
+ * QEMU Lance emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug LANCE card */
+//#define DEBUG_LANCE
+
+#ifndef LANCE_LOG_TX_BUFFERS
+#define LANCE_LOG_TX_BUFFERS 4
+#define LANCE_LOG_RX_BUFFERS 4
+#endif
+
+#define CRC_POLYNOMIAL_BE 0x04c11db7UL /* Ethernet CRC, big endian */
+#define CRC_POLYNOMIAL_LE 0xedb88320UL /* Ethernet CRC, little endian */
+
+
+#define LE_CSR0 0
+#define LE_CSR1 1
+#define LE_CSR2 2
+#define LE_CSR3 3
+#define LE_MAXREG (LE_CSR3 + 1)
+
+#define LE_RDP 0
+#define LE_RAP 1
+
+#define LE_MO_PROM 0x8000 /* Enable promiscuous mode */
+
+#define LE_C0_ERR 0x8000 /* Error: set if BAB, SQE, MISS or ME is set */
+#define LE_C0_BABL 0x4000 /* BAB: Babble: tx timeout. */
+#define LE_C0_CERR 0x2000 /* SQE: Signal quality error */
+#define LE_C0_MISS 0x1000 /* MISS: Missed a packet */
+#define LE_C0_MERR 0x0800 /* ME: Memory error */
+#define LE_C0_RINT 0x0400 /* Received interrupt */
+#define LE_C0_TINT 0x0200 /* Transmitter Interrupt */
+#define LE_C0_IDON 0x0100 /* IFIN: Init finished. */
+#define LE_C0_INTR 0x0080 /* Interrupt or error */
+#define LE_C0_INEA 0x0040 /* Interrupt enable */
+#define LE_C0_RXON 0x0020 /* Receiver on */
+#define LE_C0_TXON 0x0010 /* Transmitter on */
+#define LE_C0_TDMD 0x0008 /* Transmitter demand */
+#define LE_C0_STOP 0x0004 /* Stop the card */
+#define LE_C0_STRT 0x0002 /* Start the card */
+#define LE_C0_INIT 0x0001 /* Init the card */
+
+#define LE_C3_BSWP 0x4 /* SWAP */
+#define LE_C3_ACON 0x2 /* ALE Control */
+#define LE_C3_BCON 0x1 /* Byte control */
+
+/* Receive message descriptor 1 */
+#define LE_R1_OWN 0x80 /* Who owns the entry */
+#define LE_R1_ERR 0x40 /* Error: if FRA, OFL, CRC or BUF is set */
+#define LE_R1_FRA 0x20 /* FRA: Frame error */
+#define LE_R1_OFL 0x10 /* OFL: Frame overflow */
+#define LE_R1_CRC 0x08 /* CRC error */
+#define LE_R1_BUF 0x04 /* BUF: Buffer error */
+#define LE_R1_SOP 0x02 /* Start of packet */
+#define LE_R1_EOP 0x01 /* End of packet */
+#define LE_R1_POK 0x03 /* Packet is complete: SOP + EOP */
+
+#define LE_T1_OWN 0x80 /* Lance owns the packet */
+#define LE_T1_ERR 0x40 /* Error summary */
+#define LE_T1_EMORE 0x10 /* Error: more than one retry needed */
+#define LE_T1_EONE 0x08 /* Error: one retry needed */
+#define LE_T1_EDEF 0x04 /* Error: deferred */
+#define LE_T1_SOP 0x02 /* Start of packet */
+#define LE_T1_EOP 0x01 /* End of packet */
+#define LE_T1_POK 0x03 /* Packet is complete: SOP + EOP */
+
+#define LE_T3_BUF 0x8000 /* Buffer error */
+#define LE_T3_UFL 0x4000 /* Error underflow */
+#define LE_T3_LCOL 0x1000 /* Error late collision */
+#define LE_T3_CLOS 0x0800 /* Error carrier loss */
+#define LE_T3_RTY 0x0400 /* Error retry */
+#define LE_T3_TDR 0x03ff /* Time Domain Reflectometry counter */
+
+#define TX_RING_SIZE (1 << (LANCE_LOG_TX_BUFFERS))
+#define TX_RING_MOD_MASK (TX_RING_SIZE - 1)
+#define TX_RING_LEN_BITS ((LANCE_LOG_TX_BUFFERS) << 29)
+
+#define RX_RING_SIZE (1 << (LANCE_LOG_RX_BUFFERS))
+#define RX_RING_MOD_MASK (RX_RING_SIZE - 1)
+#define RX_RING_LEN_BITS ((LANCE_LOG_RX_BUFFERS) << 29)
+
+#define PKT_BUF_SZ 1544
+#define RX_BUFF_SIZE PKT_BUF_SZ
+#define TX_BUFF_SIZE PKT_BUF_SZ
+
+struct lance_rx_desc {
+ unsigned short rmd0; /* low address of packet */
+ unsigned char rmd1_bits; /* descriptor bits */
+ unsigned char rmd1_hadr; /* high address of packet */
+ short length; /* This length is 2s complement (negative)!
+ * Buffer length
+ */
+ unsigned short mblength; /* This is the actual number of bytes received */
+};
+
+struct lance_tx_desc {
+ unsigned short tmd0; /* low address of packet */
+ unsigned char tmd1_bits; /* descriptor bits */
+ unsigned char tmd1_hadr; /* high address of packet */
+ short length; /* Length is 2s complement (negative)! */
+ unsigned short misc;
+};
+
+/* The LANCE initialization block, described in databook. */
+/* On the Sparc, this block should be on a DMA region */
+struct lance_init_block {
+ unsigned short mode; /* Pre-set mode (reg. 15) */
+ unsigned char phys_addr[6]; /* Physical ethernet address */
+ unsigned filter[2]; /* Multicast filter. */
+
+ /* Receive and transmit ring base, along with extra bits. */
+ unsigned short rx_ptr; /* receive descriptor addr */
+ unsigned short rx_len; /* receive len and high addr */
+ unsigned short tx_ptr; /* transmit descriptor addr */
+ unsigned short tx_len; /* transmit len and high addr */
+
+ /* The Tx and Rx ring entries must aligned on 8-byte boundaries. */
+ struct lance_rx_desc brx_ring[RX_RING_SIZE];
+ struct lance_tx_desc btx_ring[TX_RING_SIZE];
+
+ char tx_buf [TX_RING_SIZE][TX_BUFF_SIZE];
+ char pad[2]; /* align rx_buf for copy_and_sum(). */
+ char rx_buf [RX_RING_SIZE][RX_BUFF_SIZE];
+};
+
+#define LEDMA_REGS 4
+#if 0
+/* Structure to describe the current status of DMA registers on the Sparc */
+struct sparc_dma_registers {
+ uint32_t cond_reg; /* DMA condition register */
+ uint32_t st_addr; /* Start address of this transfer */
+ uint32_t cnt; /* How many bytes to transfer */
+ uint32_t dma_test; /* DMA test register */
+};
+#endif
+
+typedef struct LEDMAState {
+ uint32_t addr;
+ uint32_t regs[LEDMA_REGS];
+} LEDMAState;
+
+typedef struct LANCEState {
+ uint32_t paddr;
+ NetDriverState *nd;
+ uint32_t leptr;
+ uint16_t addr;
+ uint16_t regs[LE_MAXREG];
+ uint8_t phys[6]; /* mac address */
+ int irq;
+ LEDMAState *ledma;
+} LANCEState;
+
+static unsigned int rxptr, txptr;
+
+static void lance_send(void *opaque);
+
+static void lance_reset(LANCEState *s)
+{
+ memcpy(s->phys, s->nd->macaddr, 6);
+ rxptr = 0;
+ txptr = 0;
+ s->regs[LE_CSR0] = LE_C0_STOP;
+}
+
+static uint32_t lance_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ LANCEState *s = opaque;
+ uint32_t saddr;
+
+ saddr = addr - s->paddr;
+ switch (saddr >> 1) {
+ case LE_RDP:
+ return s->regs[s->addr];
+ case LE_RAP:
+ return s->addr;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void lance_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ LANCEState *s = opaque;
+ uint32_t saddr;
+ uint16_t reg;
+
+ saddr = addr - s->paddr;
+ switch (saddr >> 1) {
+ case LE_RDP:
+ switch(s->addr) {
+ case LE_CSR0:
+ if (val & LE_C0_STOP) {
+ s->regs[LE_CSR0] = LE_C0_STOP;
+ break;
+ }
+
+ reg = s->regs[LE_CSR0];
+
+ // 1 = clear for some bits
+ reg &= ~(val & 0x7f00);
+
+ // generated bits
+ reg &= ~(LE_C0_ERR | LE_C0_INTR);
+ if (reg & 0x7100)
+ reg |= LE_C0_ERR;
+ if (reg & 0x7f00)
+ reg |= LE_C0_INTR;
+
+ // direct bit
+ reg &= ~LE_C0_INEA;
+ reg |= val & LE_C0_INEA;
+
+ // exclusive bits
+ if (val & LE_C0_INIT) {
+ reg |= LE_C0_IDON | LE_C0_INIT;
+ reg &= ~LE_C0_STOP;
+ }
+ else if (val & LE_C0_STRT) {
+ reg |= LE_C0_STRT | LE_C0_RXON | LE_C0_TXON;
+ reg &= ~LE_C0_STOP;
+ }
+
+ s->regs[LE_CSR0] = reg;
+
+ // trigger bits
+ //if (val & LE_C0_TDMD)
+
+ if ((s->regs[LE_CSR0] & LE_C0_INTR) && (s->regs[LE_CSR0] & LE_C0_INEA))
+ pic_set_irq(s->irq, 1);
+ break;
+ case LE_CSR1:
+ s->leptr = (s->leptr & 0xffff0000) | (val & 0xffff);
+ s->regs[s->addr] = val;
+ break;
+ case LE_CSR2:
+ s->leptr = (s->leptr & 0xffff) | ((val & 0xffff) << 16);
+ s->regs[s->addr] = val;
+ break;
+ case LE_CSR3:
+ s->regs[s->addr] = val;
+ break;
+ }
+ break;
+ case LE_RAP:
+ if (val < LE_MAXREG)
+ s->addr = val;
+ break;
+ default:
+ break;
+ }
+ lance_send(s);
+}
+
+static CPUReadMemoryFunc *lance_mem_read[3] = {
+ lance_mem_readw,
+ lance_mem_readw,
+ lance_mem_readw,
+};
+
+static CPUWriteMemoryFunc *lance_mem_write[3] = {
+ lance_mem_writew,
+ lance_mem_writew,
+ lance_mem_writew,
+};
+
+
+/* return the max buffer size if the LANCE can receive more data */
+static int lance_can_receive(void *opaque)
+{
+ LANCEState *s = opaque;
+ void *dmaptr = (void *) (s->leptr + s->ledma->regs[3]);
+ struct lance_init_block *ib;
+ int i;
+ uint16_t temp;
+
+ if ((s->regs[LE_CSR0] & LE_C0_STOP) == LE_C0_STOP)
+ return 0;
+
+ ib = (void *) iommu_translate(dmaptr);
+
+ for (i = 0; i < RX_RING_SIZE; i++) {
+ cpu_physical_memory_read(&ib->brx_ring[i].rmd1_bits, (void *) &temp, 1);
+ temp &= 0xff;
+ if (temp == (LE_R1_OWN)) {
+#ifdef DEBUG_LANCE
+ fprintf(stderr, "lance: can receive %d\n", RX_BUFF_SIZE);
+#endif
+ return RX_BUFF_SIZE;
+ }
+ }
+#ifdef DEBUG_LANCE
+ fprintf(stderr, "lance: cannot receive\n");
+#endif
+ return 0;
+}
+
+#define MIN_BUF_SIZE 60
+
+static void lance_receive(void *opaque, const uint8_t *buf, int size)
+{
+ LANCEState *s = opaque;
+ void *dmaptr = (void *) (s->leptr + s->ledma->regs[3]);
+ struct lance_init_block *ib;
+ unsigned int i, old_rxptr, j;
+ uint16_t temp;
+
+ if ((s->regs[LE_CSR0] & LE_C0_STOP) == LE_C0_STOP)
+ return;
+
+ ib = (void *) iommu_translate(dmaptr);
+
+ old_rxptr = rxptr;
+ for (i = rxptr; i != ((old_rxptr - 1) & RX_RING_MOD_MASK); i = (i + 1) & RX_RING_MOD_MASK) {
+ cpu_physical_memory_read(&ib->brx_ring[i].rmd1_bits, (void *) &temp, 1);
+ if (temp == (LE_R1_OWN)) {
+ rxptr = (rxptr + 1) & RX_RING_MOD_MASK;
+ temp = size;
+ bswap16s(&temp);
+ cpu_physical_memory_write(&ib->brx_ring[i].mblength, (void *) &temp, 2);
+#if 0
+ cpu_physical_memory_write(&ib->rx_buf[i], buf, size);
+#else
+ for (j = 0; j < size; j++) {
+ cpu_physical_memory_write(((void *)&ib->rx_buf[i]) + j, &buf[j], 1);
+ }
+#endif
+ temp = LE_R1_POK;
+ cpu_physical_memory_write(&ib->brx_ring[i].rmd1_bits, (void *) &temp, 1);
+ s->regs[LE_CSR0] |= LE_C0_RINT | LE_C0_INTR;
+ if ((s->regs[LE_CSR0] & LE_C0_INTR) && (s->regs[LE_CSR0] & LE_C0_INEA))
+ pic_set_irq(s->irq, 1);
+#ifdef DEBUG_LANCE
+ fprintf(stderr, "lance: got packet, len %d\n", size);
+#endif
+ return;
+ }
+ }
+}
+
+static void lance_send(void *opaque)
+{
+ LANCEState *s = opaque;
+ void *dmaptr = (void *) (s->leptr + s->ledma->regs[3]);
+ struct lance_init_block *ib;
+ unsigned int i, old_txptr, j;
+ uint16_t temp;
+ char pkt_buf[PKT_BUF_SZ];
+
+ if ((s->regs[LE_CSR0] & LE_C0_STOP) == LE_C0_STOP)
+ return;
+
+ ib = (void *) iommu_translate(dmaptr);
+
+ old_txptr = txptr;
+ for (i = txptr; i != ((old_txptr - 1) & TX_RING_MOD_MASK); i = (i + 1) & TX_RING_MOD_MASK) {
+ cpu_physical_memory_read(&ib->btx_ring[i].tmd1_bits, (void *) &temp, 1);
+ if (temp == (LE_T1_POK|LE_T1_OWN)) {
+ cpu_physical_memory_read(&ib->btx_ring[i].length, (void *) &temp, 2);
+ bswap16s(&temp);
+ temp = (~temp) + 1;
+#if 0
+ cpu_physical_memory_read(&ib->tx_buf[i], pkt_buf, temp);
+#else
+ for (j = 0; j < temp; j++) {
+ cpu_physical_memory_read(((void *)&ib->tx_buf[i]) + j, &pkt_buf[j], 1);
+ }
+#endif
+
+#ifdef DEBUG_LANCE
+ fprintf(stderr, "lance: sending packet, len %d\n", temp);
+#endif
+ qemu_send_packet(s->nd, pkt_buf, temp);
+ temp = LE_T1_POK;
+ cpu_physical_memory_write(&ib->btx_ring[i].tmd1_bits, (void *) &temp, 1);
+ txptr = (txptr + 1) & TX_RING_MOD_MASK;
+ s->regs[LE_CSR0] |= LE_C0_TINT | LE_C0_INTR;
+ }
+ }
+}
+
+static uint32_t ledma_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ LEDMAState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ if (saddr < LEDMA_REGS)
+ return s->regs[saddr];
+ else
+ return 0;
+}
+
+static void ledma_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ LEDMAState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ if (saddr < LEDMA_REGS)
+ s->regs[saddr] = val;
+}
+
+static CPUReadMemoryFunc *ledma_mem_read[3] = {
+ ledma_mem_readl,
+ ledma_mem_readl,
+ ledma_mem_readl,
+};
+
+static CPUWriteMemoryFunc *ledma_mem_write[3] = {
+ ledma_mem_writel,
+ ledma_mem_writel,
+ ledma_mem_writel,
+};
+
+void lance_init(NetDriverState *nd, int irq, uint32_t leaddr, uint32_t ledaddr)
+{
+ LANCEState *s;
+ LEDMAState *led;
+ int lance_io_memory, ledma_io_memory;
+
+ s = qemu_mallocz(sizeof(LANCEState));
+ if (!s)
+ return;
+
+ s->paddr = leaddr;
+ s->nd = nd;
+ s->irq = irq;
+
+ lance_io_memory = cpu_register_io_memory(0, lance_mem_read, lance_mem_write, s);
+ cpu_register_physical_memory(leaddr, 8, lance_io_memory);
+
+ led = qemu_mallocz(sizeof(LEDMAState));
+ if (!led)
+ return;
+
+ s->ledma = led;
+ led->addr = ledaddr;
+ ledma_io_memory = cpu_register_io_memory(0, ledma_mem_read, ledma_mem_write, led);
+ cpu_register_physical_memory(ledaddr, 16, ledma_io_memory);
+
+ lance_reset(s);
+ qemu_add_read_packet(nd, lance_can_receive, lance_receive, s);
+}
+
diff --git a/tools/ioemu/hw/m48t08.c b/tools/ioemu/hw/m48t08.c
new file mode 100644
index 0000000000..46ec665570
--- /dev/null
+++ b/tools/ioemu/hw/m48t08.c
@@ -0,0 +1,391 @@
+/*
+ * QEMU M48T08 NVRAM emulation for Sparc platform
+ *
+ * Copyright (c) 2003-2004 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "m48t08.h"
+
+//#define DEBUG_NVRAM
+
+#if defined(DEBUG_NVRAM)
+#define NVRAM_PRINTF(fmt, args...) do { printf(fmt , ##args); } while (0)
+#else
+#define NVRAM_PRINTF(fmt, args...) do { } while (0)
+#endif
+
+#define NVRAM_MAX_MEM 0xfff0
+
+struct m48t08_t {
+ /* Hardware parameters */
+ int mem_index;
+ uint32_t mem_base;
+ uint16_t size;
+ /* RTC management */
+ time_t time_offset;
+ time_t stop_time;
+ /* NVRAM storage */
+ uint8_t lock;
+ uint16_t addr;
+ uint8_t *buffer;
+};
+
+/* Fake timer functions */
+/* Generic helpers for BCD */
+static inline uint8_t toBCD (uint8_t value)
+{
+ return (((value / 10) % 10) << 4) | (value % 10);
+}
+
+static inline uint8_t fromBCD (uint8_t BCD)
+{
+ return ((BCD >> 4) * 10) + (BCD & 0x0F);
+}
+
+/* RTC management helpers */
+static void get_time (m48t08_t *NVRAM, struct tm *tm)
+{
+ time_t t;
+
+ t = time(NULL) + NVRAM->time_offset;
+#ifdef _WIN32
+ memcpy(tm,localtime(&t),sizeof(*tm));
+#else
+ localtime_r (&t, tm) ;
+#endif
+}
+
+static void set_time (m48t08_t *NVRAM, struct tm *tm)
+{
+ time_t now, new_time;
+
+ new_time = mktime(tm);
+ now = time(NULL);
+ NVRAM->time_offset = new_time - now;
+}
+
+/* Direct access to NVRAM */
+void m48t08_write (m48t08_t *NVRAM, uint32_t val)
+{
+ struct tm tm;
+ int tmp;
+
+ if (NVRAM->addr > NVRAM_MAX_MEM && NVRAM->addr < 0x2000)
+ NVRAM_PRINTF("%s: 0x%08x => 0x%08x\n", __func__, NVRAM->addr, val);
+ switch (NVRAM->addr) {
+ case 0x1FF8:
+ /* control */
+ NVRAM->buffer[0x1FF8] = (val & ~0xA0) | 0x90;
+ break;
+ case 0x1FF9:
+ /* seconds (BCD) */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_time(NVRAM, &tm);
+ tm.tm_sec = tmp;
+ set_time(NVRAM, &tm);
+ }
+ if ((val & 0x80) ^ (NVRAM->buffer[0x1FF9] & 0x80)) {
+ if (val & 0x80) {
+ NVRAM->stop_time = time(NULL);
+ } else {
+ NVRAM->time_offset += NVRAM->stop_time - time(NULL);
+ NVRAM->stop_time = 0;
+ }
+ }
+ NVRAM->buffer[0x1FF9] = val & 0x80;
+ break;
+ case 0x1FFA:
+ /* minutes (BCD) */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_time(NVRAM, &tm);
+ tm.tm_min = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFB:
+ /* hours (BCD) */
+ tmp = fromBCD(val & 0x3F);
+ if (tmp >= 0 && tmp <= 23) {
+ get_time(NVRAM, &tm);
+ tm.tm_hour = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFC:
+ /* day of the week / century */
+ tmp = fromBCD(val & 0x07);
+ get_time(NVRAM, &tm);
+ tm.tm_wday = tmp;
+ set_time(NVRAM, &tm);
+ NVRAM->buffer[0x1FFC] = val & 0x40;
+ break;
+ case 0x1FFD:
+ /* date */
+ tmp = fromBCD(val & 0x1F);
+ if (tmp != 0) {
+ get_time(NVRAM, &tm);
+ tm.tm_mday = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFE:
+ /* month */
+ tmp = fromBCD(val & 0x1F);
+ if (tmp >= 1 && tmp <= 12) {
+ get_time(NVRAM, &tm);
+ tm.tm_mon = tmp - 1;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFF:
+ /* year */
+ tmp = fromBCD(val);
+ if (tmp >= 0 && tmp <= 99) {
+ get_time(NVRAM, &tm);
+ tm.tm_year = fromBCD(val);
+ set_time(NVRAM, &tm);
+ }
+ break;
+ default:
+ /* Check lock registers state */
+ if (NVRAM->addr >= 0x20 && NVRAM->addr <= 0x2F && (NVRAM->lock & 1))
+ break;
+ if (NVRAM->addr >= 0x30 && NVRAM->addr <= 0x3F && (NVRAM->lock & 2))
+ break;
+ if (NVRAM->addr < NVRAM_MAX_MEM ||
+ (NVRAM->addr > 0x1FFF && NVRAM->addr < NVRAM->size)) {
+ NVRAM->buffer[NVRAM->addr] = val & 0xFF;
+ }
+ break;
+ }
+}
+
+uint32_t m48t08_read (m48t08_t *NVRAM)
+{
+ struct tm tm;
+ uint32_t retval = 0xFF;
+
+ switch (NVRAM->addr) {
+ case 0x1FF8:
+ /* control */
+ goto do_read;
+ case 0x1FF9:
+ /* seconds (BCD) */
+ get_time(NVRAM, &tm);
+ retval = (NVRAM->buffer[0x1FF9] & 0x80) | toBCD(tm.tm_sec);
+ break;
+ case 0x1FFA:
+ /* minutes (BCD) */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_min);
+ break;
+ case 0x1FFB:
+ /* hours (BCD) */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_hour);
+ break;
+ case 0x1FFC:
+ /* day of the week / century */
+ get_time(NVRAM, &tm);
+ retval = NVRAM->buffer[0x1FFC] | tm.tm_wday;
+ break;
+ case 0x1FFD:
+ /* date */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_mday);
+ break;
+ case 0x1FFE:
+ /* month */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_mon + 1);
+ break;
+ case 0x1FFF:
+ /* year */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_year);
+ break;
+ default:
+ /* Check lock registers state */
+ if (NVRAM->addr >= 0x20 && NVRAM->addr <= 0x2F && (NVRAM->lock & 1))
+ break;
+ if (NVRAM->addr >= 0x30 && NVRAM->addr <= 0x3F && (NVRAM->lock & 2))
+ break;
+ if (NVRAM->addr < NVRAM_MAX_MEM ||
+ (NVRAM->addr > 0x1FFF && NVRAM->addr < NVRAM->size)) {
+ do_read:
+ retval = NVRAM->buffer[NVRAM->addr];
+ }
+ break;
+ }
+ if (NVRAM->addr > NVRAM_MAX_MEM + 1 && NVRAM->addr < 0x2000)
+ NVRAM_PRINTF("0x%08x <= 0x%08x\n", NVRAM->addr, retval);
+
+ return retval;
+}
+
+void m48t08_set_addr (m48t08_t *NVRAM, uint32_t addr)
+{
+ NVRAM->addr = addr;
+}
+
+void m48t08_toggle_lock (m48t08_t *NVRAM, int lock)
+{
+ NVRAM->lock ^= 1 << lock;
+}
+
+static void nvram_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t08_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM)
+ NVRAM->buffer[addr] = value;
+}
+
+static void nvram_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t08_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM) {
+ NVRAM->buffer[addr] = value >> 8;
+ NVRAM->buffer[addr + 1] = value;
+ }
+}
+
+static void nvram_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t08_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM) {
+ NVRAM->buffer[addr] = value >> 24;
+ NVRAM->buffer[addr + 1] = value >> 16;
+ NVRAM->buffer[addr + 2] = value >> 8;
+ NVRAM->buffer[addr + 3] = value;
+ }
+}
+
+static uint32_t nvram_readb (void *opaque, target_phys_addr_t addr)
+{
+ m48t08_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM)
+ retval = NVRAM->buffer[addr];
+
+ return retval;
+}
+
+static uint32_t nvram_readw (void *opaque, target_phys_addr_t addr)
+{
+ m48t08_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM) {
+ retval = NVRAM->buffer[addr] << 8;
+ retval |= NVRAM->buffer[addr + 1];
+ }
+
+ return retval;
+}
+
+static uint32_t nvram_readl (void *opaque, target_phys_addr_t addr)
+{
+ m48t08_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < NVRAM_MAX_MEM) {
+ retval = NVRAM->buffer[addr] << 24;
+ retval |= NVRAM->buffer[addr + 1] << 16;
+ retval |= NVRAM->buffer[addr + 2] << 8;
+ retval |= NVRAM->buffer[addr + 3];
+ }
+
+ return retval;
+}
+
+static CPUWriteMemoryFunc *nvram_write[] = {
+ &nvram_writeb,
+ &nvram_writew,
+ &nvram_writel,
+};
+
+static CPUReadMemoryFunc *nvram_read[] = {
+ &nvram_readb,
+ &nvram_readw,
+ &nvram_readl,
+};
+
+/* Initialisation routine */
+m48t08_t *m48t08_init(uint32_t mem_base, uint16_t size, uint8_t *macaddr)
+{
+ m48t08_t *s;
+ int i;
+ unsigned char tmp = 0;
+
+ s = qemu_mallocz(sizeof(m48t08_t));
+ if (!s)
+ return NULL;
+ s->buffer = qemu_mallocz(size);
+ if (!s->buffer) {
+ qemu_free(s);
+ return NULL;
+ }
+ s->size = size;
+ s->mem_base = mem_base;
+ s->addr = 0;
+ if (mem_base != 0) {
+ s->mem_index = cpu_register_io_memory(0, nvram_read, nvram_write, s);
+ cpu_register_physical_memory(mem_base, 0x4000, s->mem_index);
+ }
+ s->lock = 0;
+
+ i = 0x1fd8;
+ s->buffer[i++] = 0x01;
+ s->buffer[i++] = 0x80; /* Sun4m OBP */
+ memcpy(&s->buffer[i], macaddr, 6);
+
+ /* Calculate checksum */
+ for (i = 0x1fd8; i < 0x1fe7; i++) {
+ tmp ^= s->buffer[i];
+ }
+ s->buffer[0x1fe7] = tmp;
+ return s;
+}
+
+#if 0
+struct idprom
+{
+ unsigned char id_format; /* Format identifier (always 0x01) */
+ unsigned char id_machtype; /* Machine type */
+ unsigned char id_ethaddr[6]; /* Hardware ethernet address */
+ long id_date; /* Date of manufacture */
+ unsigned int id_sernum:24; /* Unique serial number */
+ unsigned char id_cksum; /* Checksum - xor of the data bytes */
+ unsigned char reserved[16];
+};
+#endif
diff --git a/tools/ioemu/hw/m48t08.h b/tools/ioemu/hw/m48t08.h
new file mode 100644
index 0000000000..9b44bc0d16
--- /dev/null
+++ b/tools/ioemu/hw/m48t08.h
@@ -0,0 +1,12 @@
+#if !defined (__M48T08_H__)
+#define __M48T08_H__
+
+typedef struct m48t08_t m48t08_t;
+
+void m48t08_write (m48t08_t *NVRAM, uint32_t val);
+uint32_t m48t08_read (m48t08_t *NVRAM);
+void m48t08_set_addr (m48t08_t *NVRAM, uint32_t addr);
+void m48t08_toggle_lock (m48t08_t *NVRAM, int lock);
+m48t08_t *m48t08_init(uint32_t mem_base, uint16_t size, uint8_t *macaddr);
+
+#endif /* !defined (__M48T08_H__) */
diff --git a/tools/ioemu/hw/m48t59.c b/tools/ioemu/hw/m48t59.c
new file mode 100644
index 0000000000..5ab58160a9
--- /dev/null
+++ b/tools/ioemu/hw/m48t59.c
@@ -0,0 +1,602 @@
+/*
+ * QEMU M48T59 NVRAM emulation for PPC PREP platform
+ *
+ * Copyright (c) 2003-2004 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "m48t59.h"
+
+//#define DEBUG_NVRAM
+
+#if defined(DEBUG_NVRAM)
+#define NVRAM_PRINTF(fmt, args...) do { printf(fmt , ##args); } while (0)
+#else
+#define NVRAM_PRINTF(fmt, args...) do { } while (0)
+#endif
+
+struct m48t59_t {
+ /* Hardware parameters */
+ int IRQ;
+ int mem_index;
+ uint32_t mem_base;
+ uint32_t io_base;
+ uint16_t size;
+ /* RTC management */
+ time_t time_offset;
+ time_t stop_time;
+ /* Alarm & watchdog */
+ time_t alarm;
+ struct QEMUTimer *alrm_timer;
+ struct QEMUTimer *wd_timer;
+ /* NVRAM storage */
+ uint8_t lock;
+ uint16_t addr;
+ uint8_t *buffer;
+};
+
+/* Fake timer functions */
+/* Generic helpers for BCD */
+static inline uint8_t toBCD (uint8_t value)
+{
+ return (((value / 10) % 10) << 4) | (value % 10);
+}
+
+static inline uint8_t fromBCD (uint8_t BCD)
+{
+ return ((BCD >> 4) * 10) + (BCD & 0x0F);
+}
+
+/* RTC management helpers */
+static void get_time (m48t59_t *NVRAM, struct tm *tm)
+{
+ time_t t;
+
+ t = time(NULL) + NVRAM->time_offset;
+#ifdef _WIN32
+ memcpy(tm,localtime(&t),sizeof(*tm));
+#else
+ localtime_r (&t, tm) ;
+#endif
+}
+
+static void set_time (m48t59_t *NVRAM, struct tm *tm)
+{
+ time_t now, new_time;
+
+ new_time = mktime(tm);
+ now = time(NULL);
+ NVRAM->time_offset = new_time - now;
+}
+
+/* Alarm management */
+static void alarm_cb (void *opaque)
+{
+ struct tm tm, tm_now;
+ uint64_t next_time;
+ m48t59_t *NVRAM = opaque;
+
+ pic_set_irq(NVRAM->IRQ, 1);
+ if ((NVRAM->buffer[0x1FF5] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF4] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF3] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF2] & 0x80) == 0) {
+ /* Repeat once a month */
+ get_time(NVRAM, &tm_now);
+ memcpy(&tm, &tm_now, sizeof(struct tm));
+ tm.tm_mon++;
+ if (tm.tm_mon == 13) {
+ tm.tm_mon = 1;
+ tm.tm_year++;
+ }
+ next_time = mktime(&tm);
+ } else if ((NVRAM->buffer[0x1FF5] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF4] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF3] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF2] & 0x80) == 0) {
+ /* Repeat once a day */
+ next_time = 24 * 60 * 60 + mktime(&tm_now);
+ } else if ((NVRAM->buffer[0x1FF5] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF4] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF3] & 0x80) == 0 &&
+ (NVRAM->buffer[0x1FF2] & 0x80) == 0) {
+ /* Repeat once an hour */
+ next_time = 60 * 60 + mktime(&tm_now);
+ } else if ((NVRAM->buffer[0x1FF5] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF4] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF3] & 0x80) != 0 &&
+ (NVRAM->buffer[0x1FF2] & 0x80) == 0) {
+ /* Repeat once a minute */
+ next_time = 60 + mktime(&tm_now);
+ } else {
+ /* Repeat once a second */
+ next_time = 1 + mktime(&tm_now);
+ }
+ qemu_mod_timer(NVRAM->alrm_timer, next_time * 1000);
+ pic_set_irq(NVRAM->IRQ, 0);
+}
+
+
+static void get_alarm (m48t59_t *NVRAM, struct tm *tm)
+{
+#ifdef _WIN32
+ memcpy(tm,localtime(&NVRAM->alarm),sizeof(*tm));
+#else
+ localtime_r (&NVRAM->alarm, tm);
+#endif
+}
+
+static void set_alarm (m48t59_t *NVRAM, struct tm *tm)
+{
+ NVRAM->alarm = mktime(tm);
+ if (NVRAM->alrm_timer != NULL) {
+ qemu_del_timer(NVRAM->alrm_timer);
+ NVRAM->alrm_timer = NULL;
+ }
+ if (NVRAM->alarm - time(NULL) > 0)
+ qemu_mod_timer(NVRAM->alrm_timer, NVRAM->alarm * 1000);
+}
+
+/* Watchdog management */
+static void watchdog_cb (void *opaque)
+{
+ m48t59_t *NVRAM = opaque;
+
+ NVRAM->buffer[0x1FF0] |= 0x80;
+ if (NVRAM->buffer[0x1FF7] & 0x80) {
+ NVRAM->buffer[0x1FF7] = 0x00;
+ NVRAM->buffer[0x1FFC] &= ~0x40;
+ /* May it be a hw CPU Reset instead ? */
+ qemu_system_reset_request();
+ } else {
+ pic_set_irq(NVRAM->IRQ, 1);
+ pic_set_irq(NVRAM->IRQ, 0);
+ }
+}
+
+static void set_up_watchdog (m48t59_t *NVRAM, uint8_t value)
+{
+ uint64_t interval; /* in 1/16 seconds */
+
+ if (NVRAM->wd_timer != NULL) {
+ qemu_del_timer(NVRAM->wd_timer);
+ NVRAM->wd_timer = NULL;
+ }
+ NVRAM->buffer[0x1FF0] &= ~0x80;
+ if (value != 0) {
+ interval = (1 << (2 * (value & 0x03))) * ((value >> 2) & 0x1F);
+ qemu_mod_timer(NVRAM->wd_timer, ((uint64_t)time(NULL) * 1000) +
+ ((interval * 1000) >> 4));
+ }
+}
+
+/* Direct access to NVRAM */
+void m48t59_write (m48t59_t *NVRAM, uint32_t val)
+{
+ struct tm tm;
+ int tmp;
+
+ if (NVRAM->addr > 0x1FF8 && NVRAM->addr < 0x2000)
+ NVRAM_PRINTF("%s: 0x%08x => 0x%08x\n", __func__, NVRAM->addr, val);
+ switch (NVRAM->addr) {
+ case 0x1FF0:
+ /* flags register : read-only */
+ break;
+ case 0x1FF1:
+ /* unused */
+ break;
+ case 0x1FF2:
+ /* alarm seconds */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_alarm(NVRAM, &tm);
+ tm.tm_sec = tmp;
+ NVRAM->buffer[0x1FF2] = val;
+ set_alarm(NVRAM, &tm);
+ }
+ break;
+ case 0x1FF3:
+ /* alarm minutes */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_alarm(NVRAM, &tm);
+ tm.tm_min = tmp;
+ NVRAM->buffer[0x1FF3] = val;
+ set_alarm(NVRAM, &tm);
+ }
+ break;
+ case 0x1FF4:
+ /* alarm hours */
+ tmp = fromBCD(val & 0x3F);
+ if (tmp >= 0 && tmp <= 23) {
+ get_alarm(NVRAM, &tm);
+ tm.tm_hour = tmp;
+ NVRAM->buffer[0x1FF4] = val;
+ set_alarm(NVRAM, &tm);
+ }
+ break;
+ case 0x1FF5:
+ /* alarm date */
+ tmp = fromBCD(val & 0x1F);
+ if (tmp != 0) {
+ get_alarm(NVRAM, &tm);
+ tm.tm_mday = tmp;
+ NVRAM->buffer[0x1FF5] = val;
+ set_alarm(NVRAM, &tm);
+ }
+ break;
+ case 0x1FF6:
+ /* interrupts */
+ NVRAM->buffer[0x1FF6] = val;
+ break;
+ case 0x1FF7:
+ /* watchdog */
+ NVRAM->buffer[0x1FF7] = val;
+ set_up_watchdog(NVRAM, val);
+ break;
+ case 0x1FF8:
+ /* control */
+ NVRAM->buffer[0x1FF8] = (val & ~0xA0) | 0x90;
+ break;
+ case 0x1FF9:
+ /* seconds (BCD) */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_time(NVRAM, &tm);
+ tm.tm_sec = tmp;
+ set_time(NVRAM, &tm);
+ }
+ if ((val & 0x80) ^ (NVRAM->buffer[0x1FF9] & 0x80)) {
+ if (val & 0x80) {
+ NVRAM->stop_time = time(NULL);
+ } else {
+ NVRAM->time_offset += NVRAM->stop_time - time(NULL);
+ NVRAM->stop_time = 0;
+ }
+ }
+ NVRAM->buffer[0x1FF9] = val & 0x80;
+ break;
+ case 0x1FFA:
+ /* minutes (BCD) */
+ tmp = fromBCD(val & 0x7F);
+ if (tmp >= 0 && tmp <= 59) {
+ get_time(NVRAM, &tm);
+ tm.tm_min = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFB:
+ /* hours (BCD) */
+ tmp = fromBCD(val & 0x3F);
+ if (tmp >= 0 && tmp <= 23) {
+ get_time(NVRAM, &tm);
+ tm.tm_hour = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFC:
+ /* day of the week / century */
+ tmp = fromBCD(val & 0x07);
+ get_time(NVRAM, &tm);
+ tm.tm_wday = tmp;
+ set_time(NVRAM, &tm);
+ NVRAM->buffer[0x1FFC] = val & 0x40;
+ break;
+ case 0x1FFD:
+ /* date */
+ tmp = fromBCD(val & 0x1F);
+ if (tmp != 0) {
+ get_time(NVRAM, &tm);
+ tm.tm_mday = tmp;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFE:
+ /* month */
+ tmp = fromBCD(val & 0x1F);
+ if (tmp >= 1 && tmp <= 12) {
+ get_time(NVRAM, &tm);
+ tm.tm_mon = tmp - 1;
+ set_time(NVRAM, &tm);
+ }
+ break;
+ case 0x1FFF:
+ /* year */
+ tmp = fromBCD(val);
+ if (tmp >= 0 && tmp <= 99) {
+ get_time(NVRAM, &tm);
+ tm.tm_year = fromBCD(val);
+ set_time(NVRAM, &tm);
+ }
+ break;
+ default:
+ /* Check lock registers state */
+ if (NVRAM->addr >= 0x20 && NVRAM->addr <= 0x2F && (NVRAM->lock & 1))
+ break;
+ if (NVRAM->addr >= 0x30 && NVRAM->addr <= 0x3F && (NVRAM->lock & 2))
+ break;
+ if (NVRAM->addr < 0x1FF0 ||
+ (NVRAM->addr > 0x1FFF && NVRAM->addr < NVRAM->size)) {
+ NVRAM->buffer[NVRAM->addr] = val & 0xFF;
+ }
+ break;
+ }
+}
+
+uint32_t m48t59_read (m48t59_t *NVRAM)
+{
+ struct tm tm;
+ uint32_t retval = 0xFF;
+
+ switch (NVRAM->addr) {
+ case 0x1FF0:
+ /* flags register */
+ goto do_read;
+ case 0x1FF1:
+ /* unused */
+ retval = 0;
+ break;
+ case 0x1FF2:
+ /* alarm seconds */
+ goto do_read;
+ case 0x1FF3:
+ /* alarm minutes */
+ goto do_read;
+ case 0x1FF4:
+ /* alarm hours */
+ goto do_read;
+ case 0x1FF5:
+ /* alarm date */
+ goto do_read;
+ case 0x1FF6:
+ /* interrupts */
+ goto do_read;
+ case 0x1FF7:
+ /* A read resets the watchdog */
+ set_up_watchdog(NVRAM, NVRAM->buffer[0x1FF7]);
+ goto do_read;
+ case 0x1FF8:
+ /* control */
+ goto do_read;
+ case 0x1FF9:
+ /* seconds (BCD) */
+ get_time(NVRAM, &tm);
+ retval = (NVRAM->buffer[0x1FF9] & 0x80) | toBCD(tm.tm_sec);
+ break;
+ case 0x1FFA:
+ /* minutes (BCD) */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_min);
+ break;
+ case 0x1FFB:
+ /* hours (BCD) */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_hour);
+ break;
+ case 0x1FFC:
+ /* day of the week / century */
+ get_time(NVRAM, &tm);
+ retval = NVRAM->buffer[0x1FFC] | tm.tm_wday;
+ break;
+ case 0x1FFD:
+ /* date */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_mday);
+ break;
+ case 0x1FFE:
+ /* month */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_mon + 1);
+ break;
+ case 0x1FFF:
+ /* year */
+ get_time(NVRAM, &tm);
+ retval = toBCD(tm.tm_year);
+ break;
+ default:
+ /* Check lock registers state */
+ if (NVRAM->addr >= 0x20 && NVRAM->addr <= 0x2F && (NVRAM->lock & 1))
+ break;
+ if (NVRAM->addr >= 0x30 && NVRAM->addr <= 0x3F && (NVRAM->lock & 2))
+ break;
+ if (NVRAM->addr < 0x1FF0 ||
+ (NVRAM->addr > 0x1FFF && NVRAM->addr < NVRAM->size)) {
+ do_read:
+ retval = NVRAM->buffer[NVRAM->addr];
+ }
+ break;
+ }
+ if (NVRAM->addr > 0x1FF9 && NVRAM->addr < 0x2000)
+ NVRAM_PRINTF("0x%08x <= 0x%08x\n", NVRAM->addr, retval);
+
+ return retval;
+}
+
+void m48t59_set_addr (m48t59_t *NVRAM, uint32_t addr)
+{
+ NVRAM->addr = addr;
+}
+
+void m48t59_toggle_lock (m48t59_t *NVRAM, int lock)
+{
+ NVRAM->lock ^= 1 << lock;
+}
+
+/* IO access to NVRAM */
+static void NVRAM_writeb (void *opaque, uint32_t addr, uint32_t val)
+{
+ m48t59_t *NVRAM = opaque;
+
+ addr -= NVRAM->io_base;
+ NVRAM_PRINTF("0x%08x => 0x%08x\n", addr, val);
+ switch (addr) {
+ case 0:
+ NVRAM->addr &= ~0x00FF;
+ NVRAM->addr |= val;
+ break;
+ case 1:
+ NVRAM->addr &= ~0xFF00;
+ NVRAM->addr |= val << 8;
+ break;
+ case 3:
+ m48t59_write(NVRAM, val);
+ NVRAM->addr = 0x0000;
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t NVRAM_readb (void *opaque, uint32_t addr)
+{
+ m48t59_t *NVRAM = opaque;
+ uint32_t retval;
+
+ addr -= NVRAM->io_base;
+ switch (addr) {
+ case 3:
+ retval = m48t59_read(NVRAM);
+ break;
+ default:
+ retval = -1;
+ break;
+ }
+ NVRAM_PRINTF("0x%08x <= 0x%08x\n", addr, retval);
+
+ return retval;
+}
+
+static void nvram_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t59_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0)
+ NVRAM->buffer[addr] = value;
+}
+
+static void nvram_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t59_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0) {
+ NVRAM->buffer[addr] = value >> 8;
+ NVRAM->buffer[addr + 1] = value;
+ }
+}
+
+static void nvram_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ m48t59_t *NVRAM = opaque;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0) {
+ NVRAM->buffer[addr] = value >> 24;
+ NVRAM->buffer[addr + 1] = value >> 16;
+ NVRAM->buffer[addr + 2] = value >> 8;
+ NVRAM->buffer[addr + 3] = value;
+ }
+}
+
+static uint32_t nvram_readb (void *opaque, target_phys_addr_t addr)
+{
+ m48t59_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0)
+ retval = NVRAM->buffer[addr];
+
+ return retval;
+}
+
+static uint32_t nvram_readw (void *opaque, target_phys_addr_t addr)
+{
+ m48t59_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0) {
+ retval = NVRAM->buffer[addr] << 8;
+ retval |= NVRAM->buffer[addr + 1];
+ }
+
+ return retval;
+}
+
+static uint32_t nvram_readl (void *opaque, target_phys_addr_t addr)
+{
+ m48t59_t *NVRAM = opaque;
+ uint32_t retval = 0;
+
+ addr -= NVRAM->mem_base;
+ if (addr < 0x1FF0) {
+ retval = NVRAM->buffer[addr] << 24;
+ retval |= NVRAM->buffer[addr + 1] << 16;
+ retval |= NVRAM->buffer[addr + 2] << 8;
+ retval |= NVRAM->buffer[addr + 3];
+ }
+
+ return retval;
+}
+
+static CPUWriteMemoryFunc *nvram_write[] = {
+ &nvram_writeb,
+ &nvram_writew,
+ &nvram_writel,
+};
+
+static CPUReadMemoryFunc *nvram_read[] = {
+ &nvram_readb,
+ &nvram_readw,
+ &nvram_readl,
+};
+/* Initialisation routine */
+m48t59_t *m48t59_init (int IRQ, uint32_t mem_base,
+ uint32_t io_base, uint16_t size)
+{
+ m48t59_t *s;
+
+ s = qemu_mallocz(sizeof(m48t59_t));
+ if (!s)
+ return NULL;
+ s->buffer = qemu_mallocz(size);
+ if (!s->buffer) {
+ qemu_free(s);
+ return NULL;
+ }
+ s->IRQ = IRQ;
+ s->size = size;
+ s->mem_base = mem_base;
+ s->io_base = io_base;
+ s->addr = 0;
+ register_ioport_read(io_base, 0x04, 1, NVRAM_readb, s);
+ register_ioport_write(io_base, 0x04, 1, NVRAM_writeb, s);
+ if (mem_base != 0) {
+ s->mem_index = cpu_register_io_memory(0, nvram_read, nvram_write, s);
+ cpu_register_physical_memory(mem_base, 0x4000, s->mem_index);
+ }
+ s->alrm_timer = qemu_new_timer(vm_clock, &alarm_cb, s);
+ s->wd_timer = qemu_new_timer(vm_clock, &watchdog_cb, s);
+ s->lock = 0;
+
+ return s;
+}
diff --git a/tools/ioemu/hw/m48t59.h b/tools/ioemu/hw/m48t59.h
new file mode 100644
index 0000000000..03d8ea3b9b
--- /dev/null
+++ b/tools/ioemu/hw/m48t59.h
@@ -0,0 +1,13 @@
+#if !defined (__M48T59_H__)
+#define __M48T59_H__
+
+typedef struct m48t59_t m48t59_t;
+
+void m48t59_write (m48t59_t *NVRAM, uint32_t val);
+uint32_t m48t59_read (m48t59_t *NVRAM);
+void m48t59_set_addr (m48t59_t *NVRAM, uint32_t addr);
+void m48t59_toggle_lock (m48t59_t *NVRAM, int lock);
+m48t59_t *m48t59_init (int IRQ, uint32_t io_base,
+ uint32_t mem_base, uint16_t size);
+
+#endif /* !defined (__M48T59_H__) */
diff --git a/tools/ioemu/hw/magic-load.c b/tools/ioemu/hw/magic-load.c
new file mode 100644
index 0000000000..06a5f743af
--- /dev/null
+++ b/tools/ioemu/hw/magic-load.c
@@ -0,0 +1,326 @@
+#include "vl.h"
+#include "disas.h"
+
+#define ELF_CLASS ELFCLASS32
+#define ELF_DATA ELFDATA2MSB
+#define ELF_ARCH EM_SPARC
+
+#include "elf.h"
+
+#ifdef BSWAP_NEEDED
+static void bswap_ehdr(Elf32_Ehdr *ehdr)
+{
+ bswap16s(&ehdr->e_type); /* Object file type */
+ bswap16s(&ehdr->e_machine); /* Architecture */
+ bswap32s(&ehdr->e_version); /* Object file version */
+ bswap32s(&ehdr->e_entry); /* Entry point virtual address */
+ bswap32s(&ehdr->e_phoff); /* Program header table file offset */
+ bswap32s(&ehdr->e_shoff); /* Section header table file offset */
+ bswap32s(&ehdr->e_flags); /* Processor-specific flags */
+ bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */
+ bswap16s(&ehdr->e_phentsize); /* Program header table entry size */
+ bswap16s(&ehdr->e_phnum); /* Program header table entry count */
+ bswap16s(&ehdr->e_shentsize); /* Section header table entry size */
+ bswap16s(&ehdr->e_shnum); /* Section header table entry count */
+ bswap16s(&ehdr->e_shstrndx); /* Section header string table index */
+}
+
+static void bswap_phdr(Elf32_Phdr *phdr)
+{
+ bswap32s(&phdr->p_type); /* Segment type */
+ bswap32s(&phdr->p_offset); /* Segment file offset */
+ bswap32s(&phdr->p_vaddr); /* Segment virtual address */
+ bswap32s(&phdr->p_paddr); /* Segment physical address */
+ bswap32s(&phdr->p_filesz); /* Segment size in file */
+ bswap32s(&phdr->p_memsz); /* Segment size in memory */
+ bswap32s(&phdr->p_flags); /* Segment flags */
+ bswap32s(&phdr->p_align); /* Segment alignment */
+}
+
+static void bswap_shdr(Elf32_Shdr *shdr)
+{
+ bswap32s(&shdr->sh_name);
+ bswap32s(&shdr->sh_type);
+ bswap32s(&shdr->sh_flags);
+ bswap32s(&shdr->sh_addr);
+ bswap32s(&shdr->sh_offset);
+ bswap32s(&shdr->sh_size);
+ bswap32s(&shdr->sh_link);
+ bswap32s(&shdr->sh_info);
+ bswap32s(&shdr->sh_addralign);
+ bswap32s(&shdr->sh_entsize);
+}
+
+static void bswap_sym(Elf32_Sym *sym)
+{
+ bswap32s(&sym->st_name);
+ bswap32s(&sym->st_value);
+ bswap32s(&sym->st_size);
+ bswap16s(&sym->st_shndx);
+}
+#else
+#define bswap_ehdr(e) do { } while (0)
+#define bswap_phdr(e) do { } while (0)
+#define bswap_shdr(e) do { } while (0)
+#define bswap_sym(e) do { } while (0)
+#endif
+
+static int find_phdr(struct elfhdr *ehdr, int fd, struct elf_phdr *phdr, uint32_t type)
+{
+ int i, retval;
+
+ retval = lseek(fd, ehdr->e_phoff, SEEK_SET);
+ if (retval < 0)
+ return -1;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ retval = read(fd, phdr, sizeof(*phdr));
+ if (retval < 0)
+ return -1;
+ bswap_phdr(phdr);
+ if (phdr->p_type == type)
+ return 0;
+ }
+ return -1;
+}
+
+static void *find_shdr(struct elfhdr *ehdr, int fd, struct elf_shdr *shdr, uint32_t type)
+{
+ int i, retval;
+
+ retval = lseek(fd, ehdr->e_shoff, SEEK_SET);
+ if (retval < 0)
+ return NULL;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ retval = read(fd, shdr, sizeof(*shdr));
+ if (retval < 0)
+ return NULL;
+ bswap_shdr(shdr);
+ if (shdr->sh_type == type)
+ return qemu_malloc(shdr->sh_size);
+ }
+ return NULL;
+}
+
+static int find_strtab(struct elfhdr *ehdr, int fd, struct elf_shdr *shdr, struct elf_shdr *symtab)
+{
+ int retval;
+
+ retval = lseek(fd, ehdr->e_shoff + sizeof(struct elf_shdr) * symtab->sh_link, SEEK_SET);
+ if (retval < 0)
+ return -1;
+
+ retval = read(fd, shdr, sizeof(*shdr));
+ if (retval < 0)
+ return -1;
+ bswap_shdr(shdr);
+ if (shdr->sh_type == SHT_STRTAB)
+ return qemu_malloc(shdr->sh_size);;
+ return 0;
+}
+
+static int read_program(int fd, struct elf_phdr *phdr, void *dst)
+{
+ int retval;
+ retval = lseek(fd, 0x4000, SEEK_SET);
+ if (retval < 0)
+ return -1;
+ return read(fd, dst, phdr->p_filesz);
+}
+
+static int read_section(int fd, struct elf_shdr *s, void *dst)
+{
+ int retval;
+
+ retval = lseek(fd, s->sh_offset, SEEK_SET);
+ if (retval < 0)
+ return -1;
+ retval = read(fd, dst, s->sh_size);
+ if (retval < 0)
+ return -1;
+ return 0;
+}
+
+static void *process_section(struct elfhdr *ehdr, int fd, struct elf_shdr *shdr, uint32_t type)
+{
+ void *dst;
+
+ dst = find_shdr(ehdr, fd, shdr, type);
+ if (!dst)
+ goto error;
+
+ if (read_section(fd, shdr, dst))
+ goto error;
+ return dst;
+ error:
+ qemu_free(dst);
+ return NULL;
+}
+
+static void *process_strtab(struct elfhdr *ehdr, int fd, struct elf_shdr *shdr, struct elf_shdr *symtab)
+{
+ void *dst;
+
+ dst = find_strtab(ehdr, fd, shdr, symtab);
+ if (!dst)
+ goto error;
+
+ if (read_section(fd, shdr, dst))
+ goto error;
+ return dst;
+ error:
+ qemu_free(dst);
+ return NULL;
+}
+
+static void load_symbols(struct elfhdr *ehdr, int fd)
+{
+ struct elf_shdr symtab, strtab;
+ struct elf_sym *syms;
+ int nsyms, i;
+ char *str;
+
+ /* Symbol table */
+ syms = process_section(ehdr, fd, &symtab, SHT_SYMTAB);
+ if (!syms)
+ return;
+
+ nsyms = symtab.sh_size / sizeof(struct elf_sym);
+ for (i = 0; i < nsyms; i++)
+ bswap_sym(&syms[i]);
+
+ /* String table */
+ str = process_strtab(ehdr, fd, &strtab, &symtab);
+ if (!str)
+ goto error_freesyms;
+
+ /* Commit */
+ if (disas_symtab)
+ qemu_free(disas_symtab); /* XXX Merge with old symbols? */
+ if (disas_strtab)
+ qemu_free(disas_strtab);
+ disas_symtab = syms;
+ disas_num_syms = nsyms;
+ disas_strtab = str;
+ return;
+ error_freesyms:
+ qemu_free(syms);
+ return;
+}
+
+int load_elf(const char * filename, uint8_t *addr)
+{
+ struct elfhdr ehdr;
+ struct elf_phdr phdr;
+ int retval, fd;
+
+ fd = open(filename, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ goto error;
+
+ retval = read(fd, &ehdr, sizeof(ehdr));
+ if (retval < 0)
+ goto error;
+
+ bswap_ehdr(&ehdr);
+
+ if (ehdr.e_ident[0] != 0x7f || ehdr.e_ident[1] != 'E'
+ || ehdr.e_ident[2] != 'L' || ehdr.e_ident[3] != 'F'
+ || ehdr.e_machine != EM_SPARC)
+ goto error;
+
+ if (find_phdr(&ehdr, fd, &phdr, PT_LOAD))
+ goto error;
+ retval = read_program(fd, &phdr, addr);
+ if (retval < 0)
+ goto error;
+
+ load_symbols(&ehdr, fd);
+
+ close(fd);
+ return retval;
+ error:
+ close(fd);
+ return -1;
+}
+
+int load_kernel(const char *filename, uint8_t *addr)
+{
+ int fd, size;
+
+ fd = open(filename, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -1;
+ /* load 32 bit code */
+ size = read(fd, addr, 16 * 1024 * 1024);
+ if (size < 0)
+ goto fail;
+ close(fd);
+ return size;
+ fail:
+ close(fd);
+ return -1;
+}
+
+typedef struct MAGICState {
+ uint32_t addr;
+ uint32_t saved_addr;
+ int magic_state;
+ char saved_kfn[1024];
+} MAGICState;
+
+static uint32_t magic_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ int ret;
+ MAGICState *s = opaque;
+
+ if (s->magic_state == 0) {
+ ret = load_elf(s->saved_kfn, (uint8_t *)s->saved_addr);
+ if (ret < 0)
+ ret = load_kernel(s->saved_kfn, (uint8_t *)s->saved_addr);
+ if (ret < 0) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ s->saved_kfn);
+ }
+ s->magic_state = 1; /* No more magic */
+ tb_flush();
+ return bswap32(ret);
+ }
+ return 0;
+}
+
+static void magic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+}
+
+
+static CPUReadMemoryFunc *magic_mem_read[3] = {
+ magic_mem_readl,
+ magic_mem_readl,
+ magic_mem_readl,
+};
+
+static CPUWriteMemoryFunc *magic_mem_write[3] = {
+ magic_mem_writel,
+ magic_mem_writel,
+ magic_mem_writel,
+};
+
+void magic_init(const char *kfn, int kloadaddr, uint32_t addr)
+{
+ int magic_io_memory;
+ MAGICState *s;
+
+ s = qemu_mallocz(sizeof(MAGICState));
+ if (!s)
+ return;
+
+ strcpy(s->saved_kfn, kfn);
+ s->saved_addr = kloadaddr;
+ s->magic_state = 0;
+ s->addr = addr;
+ magic_io_memory = cpu_register_io_memory(0, magic_mem_read, magic_mem_write, s);
+ cpu_register_physical_memory(addr, 4, magic_io_memory);
+}
+
diff --git a/tools/ioemu/hw/mc146818rtc.c b/tools/ioemu/hw/mc146818rtc.c
new file mode 100644
index 0000000000..9d4cbed90b
--- /dev/null
+++ b/tools/ioemu/hw/mc146818rtc.c
@@ -0,0 +1,463 @@
+/*
+ * QEMU MC146818 RTC emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+//#define DEBUG_CMOS
+
+#define RTC_SECONDS 0
+#define RTC_SECONDS_ALARM 1
+#define RTC_MINUTES 2
+#define RTC_MINUTES_ALARM 3
+#define RTC_HOURS 4
+#define RTC_HOURS_ALARM 5
+#define RTC_ALARM_DONT_CARE 0xC0
+
+#define RTC_DAY_OF_WEEK 6
+#define RTC_DAY_OF_MONTH 7
+#define RTC_MONTH 8
+#define RTC_YEAR 9
+
+#define RTC_REG_A 10
+#define RTC_REG_B 11
+#define RTC_REG_C 12
+#define RTC_REG_D 13
+
+#define REG_A_UIP 0x80
+
+#define REG_B_SET 0x80
+#define REG_B_PIE 0x40
+#define REG_B_AIE 0x20
+#define REG_B_UIE 0x10
+
+struct RTCState {
+ uint8_t cmos_data[128];
+ uint8_t cmos_index;
+ struct tm current_tm;
+ int irq;
+ /* periodic timer */
+ QEMUTimer *periodic_timer;
+ int64_t next_periodic_time;
+ /* second update */
+ int64_t next_second_time;
+ QEMUTimer *second_timer;
+ QEMUTimer *second_timer2;
+};
+
+static void rtc_set_time(RTCState *s);
+static void rtc_copy_date(RTCState *s);
+
+static void rtc_timer_update(RTCState *s, int64_t current_time)
+{
+ int period_code, period;
+ int64_t cur_clock, next_irq_clock;
+
+ period_code = s->cmos_data[RTC_REG_A] & 0x0f;
+ if (period_code != 0 &&
+ (s->cmos_data[RTC_REG_B] & REG_B_PIE)) {
+ if (period_code <= 2)
+ period_code += 7;
+ /* period in 32 Khz cycles */
+ period = 1 << (period_code - 1);
+ /* compute 32 khz clock */
+ cur_clock = muldiv64(current_time, 32768, ticks_per_sec);
+ next_irq_clock = (cur_clock & ~(period - 1)) + period;
+ s->next_periodic_time = muldiv64(next_irq_clock, ticks_per_sec, 32768) + 1;
+ qemu_mod_timer(s->periodic_timer, s->next_periodic_time);
+ } else {
+ qemu_del_timer(s->periodic_timer);
+ }
+}
+
+static void rtc_periodic_timer(void *opaque)
+{
+ RTCState *s = opaque;
+
+ rtc_timer_update(s, s->next_periodic_time);
+ s->cmos_data[RTC_REG_C] |= 0xc0;
+ pic_set_irq(s->irq, 1);
+}
+
+static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
+{
+ RTCState *s = opaque;
+
+ if ((addr & 1) == 0) {
+ s->cmos_index = data & 0x7f;
+ } else {
+#ifdef DEBUG_CMOS
+ printf("cmos: write index=0x%02x val=0x%02x\n",
+ s->cmos_index, data);
+#endif
+ switch(s->cmos_index) {
+ case RTC_SECONDS_ALARM:
+ case RTC_MINUTES_ALARM:
+ case RTC_HOURS_ALARM:
+ /* XXX: not supported */
+ s->cmos_data[s->cmos_index] = data;
+ break;
+ case RTC_SECONDS:
+ case RTC_MINUTES:
+ case RTC_HOURS:
+ case RTC_DAY_OF_WEEK:
+ case RTC_DAY_OF_MONTH:
+ case RTC_MONTH:
+ case RTC_YEAR:
+ s->cmos_data[s->cmos_index] = data;
+ /* if in set mode, do not update the time */
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+ rtc_set_time(s);
+ }
+ break;
+ case RTC_REG_A:
+ /* UIP bit is read only */
+ s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
+ (s->cmos_data[RTC_REG_A] & REG_A_UIP);
+ rtc_timer_update(s, qemu_get_clock(vm_clock));
+ break;
+ case RTC_REG_B:
+ if (data & REG_B_SET) {
+ /* set mode: reset UIP mode */
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+ data &= ~REG_B_UIE;
+ } else {
+ /* if disabling set mode, update the time */
+ if (s->cmos_data[RTC_REG_B] & REG_B_SET) {
+ rtc_set_time(s);
+ }
+ }
+ s->cmos_data[RTC_REG_B] = data;
+ rtc_timer_update(s, qemu_get_clock(vm_clock));
+ break;
+ case RTC_REG_C:
+ case RTC_REG_D:
+ /* cannot write to them */
+ break;
+ default:
+ s->cmos_data[s->cmos_index] = data;
+ break;
+ }
+ }
+}
+
+static inline int to_bcd(RTCState *s, int a)
+{
+ if (s->cmos_data[RTC_REG_B] & 0x04) {
+ return a;
+ } else {
+ return ((a / 10) << 4) | (a % 10);
+ }
+}
+
+static inline int from_bcd(RTCState *s, int a)
+{
+ if (s->cmos_data[RTC_REG_B] & 0x04) {
+ return a;
+ } else {
+ return ((a >> 4) * 10) + (a & 0x0f);
+ }
+}
+
+static void rtc_set_time(RTCState *s)
+{
+ struct tm *tm = &s->current_tm;
+
+ tm->tm_sec = from_bcd(s, s->cmos_data[RTC_SECONDS]);
+ tm->tm_min = from_bcd(s, s->cmos_data[RTC_MINUTES]);
+ tm->tm_hour = from_bcd(s, s->cmos_data[RTC_HOURS] & 0x7f);
+ if (!(s->cmos_data[RTC_REG_B] & 0x02) &&
+ (s->cmos_data[RTC_HOURS] & 0x80)) {
+ tm->tm_hour += 12;
+ }
+ tm->tm_wday = from_bcd(s, s->cmos_data[RTC_DAY_OF_WEEK]);
+ tm->tm_mday = from_bcd(s, s->cmos_data[RTC_DAY_OF_MONTH]);
+ tm->tm_mon = from_bcd(s, s->cmos_data[RTC_MONTH]) - 1;
+ tm->tm_year = from_bcd(s, s->cmos_data[RTC_YEAR]) + 100;
+}
+
+static void rtc_copy_date(RTCState *s)
+{
+ const struct tm *tm = &s->current_tm;
+
+ s->cmos_data[RTC_SECONDS] = to_bcd(s, tm->tm_sec);
+ s->cmos_data[RTC_MINUTES] = to_bcd(s, tm->tm_min);
+ if (s->cmos_data[RTC_REG_B] & 0x02) {
+ /* 24 hour format */
+ s->cmos_data[RTC_HOURS] = to_bcd(s, tm->tm_hour);
+ } else {
+ /* 12 hour format */
+ s->cmos_data[RTC_HOURS] = to_bcd(s, tm->tm_hour % 12);
+ if (tm->tm_hour >= 12)
+ s->cmos_data[RTC_HOURS] |= 0x80;
+ }
+ s->cmos_data[RTC_DAY_OF_WEEK] = to_bcd(s, tm->tm_wday);
+ s->cmos_data[RTC_DAY_OF_MONTH] = to_bcd(s, tm->tm_mday);
+ s->cmos_data[RTC_MONTH] = to_bcd(s, tm->tm_mon + 1);
+ s->cmos_data[RTC_YEAR] = to_bcd(s, tm->tm_year % 100);
+}
+
+/* month is between 0 and 11. */
+static int get_days_in_month(int month, int year)
+{
+ static const int days_tab[12] = {
+ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
+ };
+ int d;
+ if ((unsigned )month >= 12)
+ return 31;
+ d = days_tab[month];
+ if (month == 1) {
+ if ((year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0))
+ d++;
+ }
+ return d;
+}
+
+/* update 'tm' to the next second */
+static void rtc_next_second(struct tm *tm)
+{
+ int days_in_month;
+
+ tm->tm_sec++;
+ if ((unsigned)tm->tm_sec >= 60) {
+ tm->tm_sec = 0;
+ tm->tm_min++;
+ if ((unsigned)tm->tm_min >= 60) {
+ tm->tm_min = 0;
+ tm->tm_hour++;
+ if ((unsigned)tm->tm_hour >= 24) {
+ tm->tm_hour = 0;
+ /* next day */
+ tm->tm_wday++;
+ if ((unsigned)tm->tm_wday >= 7)
+ tm->tm_wday = 0;
+ days_in_month = get_days_in_month(tm->tm_mon,
+ tm->tm_year + 1900);
+ tm->tm_mday++;
+ if (tm->tm_mday < 1) {
+ tm->tm_mday = 1;
+ } else if (tm->tm_mday > days_in_month) {
+ tm->tm_mday = 1;
+ tm->tm_mon++;
+ if (tm->tm_mon >= 12) {
+ tm->tm_mon = 0;
+ tm->tm_year++;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+static void rtc_update_second(void *opaque)
+{
+ RTCState *s = opaque;
+ int64_t delay;
+
+ /* if the oscillator is not in normal operation, we do not update */
+ if ((s->cmos_data[RTC_REG_A] & 0x70) != 0x20) {
+ s->next_second_time += ticks_per_sec;
+ qemu_mod_timer(s->second_timer, s->next_second_time);
+ } else {
+ rtc_next_second(&s->current_tm);
+
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+ /* update in progress bit */
+ s->cmos_data[RTC_REG_A] |= REG_A_UIP;
+ }
+ /* should be 244 us = 8 / 32768 seconds, but currently the
+ timers do not have the necessary resolution. */
+ delay = (ticks_per_sec * 1) / 100;
+ if (delay < 1)
+ delay = 1;
+ qemu_mod_timer(s->second_timer2,
+ s->next_second_time + delay);
+ }
+}
+
+static void rtc_update_second2(void *opaque)
+{
+ RTCState *s = opaque;
+
+ if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
+ rtc_copy_date(s);
+ }
+
+ /* check alarm */
+ if (s->cmos_data[RTC_REG_B] & REG_B_AIE) {
+ if (((s->cmos_data[RTC_SECONDS_ALARM] & 0xc0) == 0xc0 ||
+ s->cmos_data[RTC_SECONDS_ALARM] == s->current_tm.tm_sec) &&
+ ((s->cmos_data[RTC_MINUTES_ALARM] & 0xc0) == 0xc0 ||
+ s->cmos_data[RTC_MINUTES_ALARM] == s->current_tm.tm_mon) &&
+ ((s->cmos_data[RTC_HOURS_ALARM] & 0xc0) == 0xc0 ||
+ s->cmos_data[RTC_HOURS_ALARM] == s->current_tm.tm_hour)) {
+
+ s->cmos_data[RTC_REG_C] |= 0xa0;
+ pic_set_irq(s->irq, 1);
+ }
+ }
+
+ /* update ended interrupt */
+ if (s->cmos_data[RTC_REG_B] & REG_B_UIE) {
+ s->cmos_data[RTC_REG_C] |= 0x90;
+ pic_set_irq(s->irq, 1);
+ }
+
+ /* clear update in progress bit */
+ s->cmos_data[RTC_REG_A] &= ~REG_A_UIP;
+
+ s->next_second_time += ticks_per_sec;
+ qemu_mod_timer(s->second_timer, s->next_second_time);
+}
+
+static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
+{
+ RTCState *s = opaque;
+ int ret;
+ if ((addr & 1) == 0) {
+ return 0xff;
+ } else {
+ switch(s->cmos_index) {
+ case RTC_SECONDS:
+ case RTC_MINUTES:
+ case RTC_HOURS:
+ case RTC_DAY_OF_WEEK:
+ case RTC_DAY_OF_MONTH:
+ case RTC_MONTH:
+ case RTC_YEAR:
+ ret = s->cmos_data[s->cmos_index];
+ break;
+ case RTC_REG_A:
+ ret = s->cmos_data[s->cmos_index];
+ break;
+ case RTC_REG_C:
+ ret = s->cmos_data[s->cmos_index];
+ pic_set_irq(s->irq, 0);
+ s->cmos_data[RTC_REG_C] = 0x00;
+ break;
+ default:
+ ret = s->cmos_data[s->cmos_index];
+ break;
+ }
+#ifdef DEBUG_CMOS
+ printf("cmos: read index=0x%02x val=0x%02x\n",
+ s->cmos_index, ret);
+#endif
+ return ret;
+ }
+}
+
+void rtc_set_memory(RTCState *s, int addr, int val)
+{
+ if (addr >= 0 && addr <= 127)
+ s->cmos_data[addr] = val;
+}
+
+void rtc_set_date(RTCState *s, const struct tm *tm)
+{
+ s->current_tm = *tm;
+ rtc_copy_date(s);
+}
+
+static void rtc_save(QEMUFile *f, void *opaque)
+{
+ RTCState *s = opaque;
+
+ qemu_put_buffer(f, s->cmos_data, 128);
+ qemu_put_8s(f, &s->cmos_index);
+
+ qemu_put_be32s(f, &s->current_tm.tm_sec);
+ qemu_put_be32s(f, &s->current_tm.tm_min);
+ qemu_put_be32s(f, &s->current_tm.tm_hour);
+ qemu_put_be32s(f, &s->current_tm.tm_wday);
+ qemu_put_be32s(f, &s->current_tm.tm_mday);
+ qemu_put_be32s(f, &s->current_tm.tm_mon);
+ qemu_put_be32s(f, &s->current_tm.tm_year);
+
+ qemu_put_timer(f, s->periodic_timer);
+ qemu_put_be64s(f, &s->next_periodic_time);
+
+ qemu_put_be64s(f, &s->next_second_time);
+ qemu_put_timer(f, s->second_timer);
+ qemu_put_timer(f, s->second_timer2);
+}
+
+static int rtc_load(QEMUFile *f, void *opaque, int version_id)
+{
+ RTCState *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_buffer(f, s->cmos_data, 128);
+ qemu_get_8s(f, &s->cmos_index);
+
+ qemu_get_be32s(f, &s->current_tm.tm_sec);
+ qemu_get_be32s(f, &s->current_tm.tm_min);
+ qemu_get_be32s(f, &s->current_tm.tm_hour);
+ qemu_get_be32s(f, &s->current_tm.tm_wday);
+ qemu_get_be32s(f, &s->current_tm.tm_mday);
+ qemu_get_be32s(f, &s->current_tm.tm_mon);
+ qemu_get_be32s(f, &s->current_tm.tm_year);
+
+ qemu_get_timer(f, s->periodic_timer);
+ qemu_get_be64s(f, &s->next_periodic_time);
+
+ qemu_get_be64s(f, &s->next_second_time);
+ qemu_get_timer(f, s->second_timer);
+ qemu_get_timer(f, s->second_timer2);
+ return 0;
+}
+
+RTCState *rtc_init(int base, int irq)
+{
+ RTCState *s;
+
+ s = qemu_mallocz(sizeof(RTCState));
+ if (!s)
+ return NULL;
+
+ s->irq = irq;
+ s->cmos_data[RTC_REG_A] = 0x26;
+ s->cmos_data[RTC_REG_B] = 0x02;
+ s->cmos_data[RTC_REG_C] = 0x00;
+ s->cmos_data[RTC_REG_D] = 0x80;
+
+ s->periodic_timer = qemu_new_timer(vm_clock,
+ rtc_periodic_timer, s);
+ s->second_timer = qemu_new_timer(vm_clock,
+ rtc_update_second, s);
+ s->second_timer2 = qemu_new_timer(vm_clock,
+ rtc_update_second2, s);
+
+ s->next_second_time = qemu_get_clock(vm_clock) + (ticks_per_sec * 99) / 100;
+ qemu_mod_timer(s->second_timer2, s->next_second_time);
+
+ register_ioport_write(base, 2, 1, cmos_ioport_write, s);
+ register_ioport_read(base, 2, 1, cmos_ioport_read, s);
+
+ register_savevm("mc146818rtc", base, 1, rtc_save, rtc_load, s);
+ return s;
+}
+
diff --git a/tools/ioemu/hw/ne2000.c b/tools/ioemu/hw/ne2000.c
new file mode 100644
index 0000000000..79d3026c01
--- /dev/null
+++ b/tools/ioemu/hw/ne2000.c
@@ -0,0 +1,684 @@
+/*
+ * QEMU NE2000 emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug NE2000 card */
+//#define DEBUG_NE2000
+
+#define MAX_ETH_FRAME_SIZE 1514
+
+#define E8390_CMD 0x00 /* The command register (for all pages) */
+/* Page 0 register offsets. */
+#define EN0_CLDALO 0x01 /* Low byte of current local dma addr RD */
+#define EN0_STARTPG 0x01 /* Starting page of ring bfr WR */
+#define EN0_CLDAHI 0x02 /* High byte of current local dma addr RD */
+#define EN0_STOPPG 0x02 /* Ending page +1 of ring bfr WR */
+#define EN0_BOUNDARY 0x03 /* Boundary page of ring bfr RD WR */
+#define EN0_TSR 0x04 /* Transmit status reg RD */
+#define EN0_TPSR 0x04 /* Transmit starting page WR */
+#define EN0_NCR 0x05 /* Number of collision reg RD */
+#define EN0_TCNTLO 0x05 /* Low byte of tx byte count WR */
+#define EN0_FIFO 0x06 /* FIFO RD */
+#define EN0_TCNTHI 0x06 /* High byte of tx byte count WR */
+#define EN0_ISR 0x07 /* Interrupt status reg RD WR */
+#define EN0_CRDALO 0x08 /* low byte of current remote dma address RD */
+#define EN0_RSARLO 0x08 /* Remote start address reg 0 */
+#define EN0_CRDAHI 0x09 /* high byte, current remote dma address RD */
+#define EN0_RSARHI 0x09 /* Remote start address reg 1 */
+#define EN0_RCNTLO 0x0a /* Remote byte count reg WR */
+#define EN0_RCNTHI 0x0b /* Remote byte count reg WR */
+#define EN0_RSR 0x0c /* rx status reg RD */
+#define EN0_RXCR 0x0c /* RX configuration reg WR */
+#define EN0_TXCR 0x0d /* TX configuration reg WR */
+#define EN0_COUNTER0 0x0d /* Rcv alignment error counter RD */
+#define EN0_DCFG 0x0e /* Data configuration reg WR */
+#define EN0_COUNTER1 0x0e /* Rcv CRC error counter RD */
+#define EN0_IMR 0x0f /* Interrupt mask reg WR */
+#define EN0_COUNTER2 0x0f /* Rcv missed frame error counter RD */
+
+#define EN1_PHYS 0x11
+#define EN1_CURPAG 0x17
+#define EN1_MULT 0x18
+
+/* Register accessed at EN_CMD, the 8390 base addr. */
+#define E8390_STOP 0x01 /* Stop and reset the chip */
+#define E8390_START 0x02 /* Start the chip, clear reset */
+#define E8390_TRANS 0x04 /* Transmit a frame */
+#define E8390_RREAD 0x08 /* Remote read */
+#define E8390_RWRITE 0x10 /* Remote write */
+#define E8390_NODMA 0x20 /* Remote DMA */
+#define E8390_PAGE0 0x00 /* Select page chip registers */
+#define E8390_PAGE1 0x40 /* using the two high-order bits */
+#define E8390_PAGE2 0x80 /* Page 3 is invalid. */
+
+/* Bits in EN0_ISR - Interrupt status register */
+#define ENISR_RX 0x01 /* Receiver, no error */
+#define ENISR_TX 0x02 /* Transmitter, no error */
+#define ENISR_RX_ERR 0x04 /* Receiver, with error */
+#define ENISR_TX_ERR 0x08 /* Transmitter, with error */
+#define ENISR_OVER 0x10 /* Receiver overwrote the ring */
+#define ENISR_COUNTERS 0x20 /* Counters need emptying */
+#define ENISR_RDC 0x40 /* remote dma complete */
+#define ENISR_RESET 0x80 /* Reset completed */
+#define ENISR_ALL 0x3f /* Interrupts we will enable */
+
+/* Bits in received packet status byte and EN0_RSR*/
+#define ENRSR_RXOK 0x01 /* Received a good packet */
+#define ENRSR_CRC 0x02 /* CRC error */
+#define ENRSR_FAE 0x04 /* frame alignment error */
+#define ENRSR_FO 0x08 /* FIFO overrun */
+#define ENRSR_MPA 0x10 /* missed pkt */
+#define ENRSR_PHY 0x20 /* physical/multicast address */
+#define ENRSR_DIS 0x40 /* receiver disable. set in monitor mode */
+#define ENRSR_DEF 0x80 /* deferring */
+
+/* Transmitted packet status, EN0_TSR. */
+#define ENTSR_PTX 0x01 /* Packet transmitted without error */
+#define ENTSR_ND 0x02 /* The transmit wasn't deferred. */
+#define ENTSR_COL 0x04 /* The transmit collided at least once. */
+#define ENTSR_ABT 0x08 /* The transmit collided 16 times, and was deferred. */
+#define ENTSR_CRS 0x10 /* The carrier sense was lost. */
+#define ENTSR_FU 0x20 /* A "FIFO underrun" occurred during transmit. */
+#define ENTSR_CDH 0x40 /* The collision detect "heartbeat" signal was lost. */
+#define ENTSR_OWC 0x80 /* There was an out-of-window collision. */
+
+#define NE2000_PMEM_SIZE (32*1024)
+#define NE2000_PMEM_START (16*1024)
+#define NE2000_PMEM_END (NE2000_PMEM_SIZE+NE2000_PMEM_START)
+#define NE2000_MEM_SIZE NE2000_PMEM_END
+
+typedef struct NE2000State {
+ uint8_t cmd;
+ uint32_t start;
+ uint32_t stop;
+ uint8_t boundary;
+ uint8_t tsr;
+ uint8_t tpsr;
+ uint16_t tcnt;
+ uint16_t rcnt;
+ uint32_t rsar;
+ uint8_t rsr;
+ uint8_t isr;
+ uint8_t dcfg;
+ uint8_t imr;
+ uint8_t phys[6]; /* mac address */
+ uint8_t curpag;
+ uint8_t mult[8]; /* multicast mask array */
+ int irq;
+ PCIDevice *pci_dev;
+ NetDriverState *nd;
+ uint8_t mem[NE2000_MEM_SIZE];
+} NE2000State;
+
+static void ne2000_reset(NE2000State *s)
+{
+ int i;
+
+ s->isr = ENISR_RESET;
+ memcpy(s->mem, s->nd->macaddr, 6);
+ s->mem[14] = 0x57;
+ s->mem[15] = 0x57;
+
+ /* duplicate prom data */
+ for(i = 15;i >= 0; i--) {
+ s->mem[2 * i] = s->mem[i];
+ s->mem[2 * i + 1] = s->mem[i];
+ }
+}
+
+static void ne2000_update_irq(NE2000State *s)
+{
+ int isr;
+ isr = s->isr & s->imr;
+#if defined(DEBUG_NE2000)
+ printf("NE2000: Set IRQ line %d to %d (%02x %02x)\n",
+ s->irq, isr ? 1 : 0, s->isr, s->imr);
+#endif
+ if (s->irq == 16) {
+ /* PCI irq */
+ pci_set_irq(s->pci_dev, 0, (isr != 0));
+ } else {
+ /* ISA irq */
+ pic_set_irq(s->irq, (isr != 0));
+ }
+}
+
+/* return the max buffer size if the NE2000 can receive more data */
+static int ne2000_can_receive(void *opaque)
+{
+ NE2000State *s = opaque;
+ int avail, index, boundary;
+
+ if (s->cmd & E8390_STOP)
+ return 0;
+ index = s->curpag << 8;
+ boundary = s->boundary << 8;
+ if (index < boundary)
+ avail = boundary - index;
+ else
+ avail = (s->stop - s->start) - (index - boundary);
+ if (avail < (MAX_ETH_FRAME_SIZE + 4))
+ return 0;
+ return MAX_ETH_FRAME_SIZE;
+}
+
+#define MIN_BUF_SIZE 60
+
+static void ne2000_receive(void *opaque, const uint8_t *buf, int size)
+{
+ NE2000State *s = opaque;
+ uint8_t *p;
+ int total_len, next, avail, len, index;
+ uint8_t buf1[60];
+
+#if defined(DEBUG_NE2000)
+ printf("NE2000: received len=%d\n", size);
+#endif
+
+ /* if too small buffer, then expand it */
+ if (size < MIN_BUF_SIZE) {
+ memcpy(buf1, buf, size);
+ memset(buf1 + size, 0, MIN_BUF_SIZE - size);
+ buf = buf1;
+ size = MIN_BUF_SIZE;
+ }
+
+ index = s->curpag << 8;
+ /* 4 bytes for header */
+ total_len = size + 4;
+ /* address for next packet (4 bytes for CRC) */
+ next = index + ((total_len + 4 + 255) & ~0xff);
+ if (next >= s->stop)
+ next -= (s->stop - s->start);
+ /* prepare packet header */
+ p = s->mem + index;
+ s->rsr = ENRSR_RXOK; /* receive status */
+ /* XXX: check this */
+ if (buf[0] & 0x01)
+ s->rsr |= ENRSR_PHY;
+ p[0] = s->rsr;
+ p[1] = next >> 8;
+ p[2] = total_len;
+ p[3] = total_len >> 8;
+ index += 4;
+
+ /* write packet data */
+ while (size > 0) {
+ avail = s->stop - index;
+ len = size;
+ if (len > avail)
+ len = avail;
+ memcpy(s->mem + index, buf, len);
+ buf += len;
+ index += len;
+ if (index == s->stop)
+ index = s->start;
+ size -= len;
+ }
+ s->curpag = next >> 8;
+
+ /* now we can signal we have receive something */
+ s->isr |= ENISR_RX;
+ ne2000_update_irq(s);
+}
+
+static void ne2000_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ NE2000State *s = opaque;
+ int offset, page;
+
+ addr &= 0xf;
+#ifdef DEBUG_NE2000
+ printf("NE2000: write addr=0x%x val=0x%02x\n", addr, val);
+#endif
+ if (addr == E8390_CMD) {
+ /* control register */
+ s->cmd = val;
+ if (val & E8390_START) {
+ s->isr &= ~ENISR_RESET;
+ /* test specific case: zero length transfert */
+ if ((val & (E8390_RREAD | E8390_RWRITE)) &&
+ s->rcnt == 0) {
+ s->isr |= ENISR_RDC;
+ ne2000_update_irq(s);
+ }
+ if (val & E8390_TRANS) {
+ qemu_send_packet(s->nd, s->mem + (s->tpsr << 8), s->tcnt);
+ /* signal end of transfert */
+ s->tsr = ENTSR_PTX;
+ s->isr |= ENISR_TX;
+ ne2000_update_irq(s);
+ }
+ }
+ } else {
+ page = s->cmd >> 6;
+ offset = addr | (page << 4);
+ switch(offset) {
+ case EN0_STARTPG:
+ s->start = val << 8;
+ break;
+ case EN0_STOPPG:
+ s->stop = val << 8;
+ break;
+ case EN0_BOUNDARY:
+ s->boundary = val;
+ break;
+ case EN0_IMR:
+ s->imr = val;
+ ne2000_update_irq(s);
+ break;
+ case EN0_TPSR:
+ s->tpsr = val;
+ break;
+ case EN0_TCNTLO:
+ s->tcnt = (s->tcnt & 0xff00) | val;
+ break;
+ case EN0_TCNTHI:
+ s->tcnt = (s->tcnt & 0x00ff) | (val << 8);
+ break;
+ case EN0_RSARLO:
+ s->rsar = (s->rsar & 0xff00) | val;
+ break;
+ case EN0_RSARHI:
+ s->rsar = (s->rsar & 0x00ff) | (val << 8);
+ break;
+ case EN0_RCNTLO:
+ s->rcnt = (s->rcnt & 0xff00) | val;
+ break;
+ case EN0_RCNTHI:
+ s->rcnt = (s->rcnt & 0x00ff) | (val << 8);
+ break;
+ case EN0_DCFG:
+ s->dcfg = val;
+ break;
+ case EN0_ISR:
+ s->isr &= ~(val & 0x7f);
+ ne2000_update_irq(s);
+ break;
+ case EN1_PHYS ... EN1_PHYS + 5:
+ s->phys[offset - EN1_PHYS] = val;
+ break;
+ case EN1_CURPAG:
+ s->curpag = val;
+ break;
+ case EN1_MULT ... EN1_MULT + 7:
+ s->mult[offset - EN1_MULT] = val;
+ break;
+ }
+ }
+}
+
+static uint32_t ne2000_ioport_read(void *opaque, uint32_t addr)
+{
+ NE2000State *s = opaque;
+ int offset, page, ret;
+
+ addr &= 0xf;
+ if (addr == E8390_CMD) {
+ ret = s->cmd;
+ } else {
+ page = s->cmd >> 6;
+ offset = addr | (page << 4);
+ switch(offset) {
+ case EN0_TSR:
+ ret = s->tsr;
+ break;
+ case EN0_BOUNDARY:
+ ret = s->boundary;
+ break;
+ case EN0_ISR:
+ ret = s->isr;
+ break;
+ case EN0_RSARLO:
+ ret = s->rsar & 0x00ff;
+ break;
+ case EN0_RSARHI:
+ ret = s->rsar >> 8;
+ break;
+ case EN1_PHYS ... EN1_PHYS + 5:
+ ret = s->phys[offset - EN1_PHYS];
+ break;
+ case EN1_CURPAG:
+ ret = s->curpag;
+ break;
+ case EN1_MULT ... EN1_MULT + 7:
+ ret = s->mult[offset - EN1_MULT];
+ break;
+ case EN0_RSR:
+ ret = s->rsr;
+ break;
+ default:
+ ret = 0x00;
+ break;
+ }
+ }
+#ifdef DEBUG_NE2000
+ printf("NE2000: read addr=0x%x val=%02x\n", addr, ret);
+#endif
+ return ret;
+}
+
+static inline void ne2000_mem_writeb(NE2000State *s, uint32_t addr,
+ uint32_t val)
+{
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ s->mem[addr] = val;
+ }
+}
+
+static inline void ne2000_mem_writew(NE2000State *s, uint32_t addr,
+ uint32_t val)
+{
+ addr &= ~1; /* XXX: check exact behaviour if not even */
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ *(uint16_t *)(s->mem + addr) = cpu_to_le16(val);
+ }
+}
+
+static inline void ne2000_mem_writel(NE2000State *s, uint32_t addr,
+ uint32_t val)
+{
+ addr &= ~1; /* XXX: check exact behaviour if not even */
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ cpu_to_le32wu((uint32_t *)(s->mem + addr), val);
+ }
+}
+
+static inline uint32_t ne2000_mem_readb(NE2000State *s, uint32_t addr)
+{
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ return s->mem[addr];
+ } else {
+ return 0xff;
+ }
+}
+
+static inline uint32_t ne2000_mem_readw(NE2000State *s, uint32_t addr)
+{
+ addr &= ~1; /* XXX: check exact behaviour if not even */
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ return le16_to_cpu(*(uint16_t *)(s->mem + addr));
+ } else {
+ return 0xffff;
+ }
+}
+
+static inline uint32_t ne2000_mem_readl(NE2000State *s, uint32_t addr)
+{
+ addr &= ~1; /* XXX: check exact behaviour if not even */
+ if (addr < 32 ||
+ (addr >= NE2000_PMEM_START && addr < NE2000_MEM_SIZE)) {
+ return le32_to_cpupu((uint32_t *)(s->mem + addr));
+ } else {
+ return 0xffffffff;
+ }
+}
+
+static inline void ne2000_dma_update(NE2000State *s, int len)
+{
+ s->rsar += len;
+ /* wrap */
+ /* XXX: check what to do if rsar > stop */
+ if (s->rsar == s->stop)
+ s->rsar = s->start;
+
+ if (s->rcnt <= len) {
+ s->rcnt = 0;
+ /* signal end of transfert */
+ s->isr |= ENISR_RDC;
+ ne2000_update_irq(s);
+ } else {
+ s->rcnt -= len;
+ }
+}
+
+static void ne2000_asic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ NE2000State *s = opaque;
+
+#ifdef DEBUG_NE2000
+ printf("NE2000: asic write val=0x%04x\n", val);
+#endif
+ if (s->rcnt == 0)
+ return;
+ if (s->dcfg & 0x01) {
+ /* 16 bit access */
+ ne2000_mem_writew(s, s->rsar, val);
+ ne2000_dma_update(s, 2);
+ } else {
+ /* 8 bit access */
+ ne2000_mem_writeb(s, s->rsar, val);
+ ne2000_dma_update(s, 1);
+ }
+}
+
+static uint32_t ne2000_asic_ioport_read(void *opaque, uint32_t addr)
+{
+ NE2000State *s = opaque;
+ int ret;
+
+ if (s->dcfg & 0x01) {
+ /* 16 bit access */
+ ret = ne2000_mem_readw(s, s->rsar);
+ ne2000_dma_update(s, 2);
+ } else {
+ /* 8 bit access */
+ ret = ne2000_mem_readb(s, s->rsar);
+ ne2000_dma_update(s, 1);
+ }
+#ifdef DEBUG_NE2000
+ printf("NE2000: asic read val=0x%04x\n", ret);
+#endif
+ return ret;
+}
+
+static void ne2000_asic_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
+{
+ NE2000State *s = opaque;
+
+#ifdef DEBUG_NE2000
+ printf("NE2000: asic writel val=0x%04x\n", val);
+#endif
+ if (s->rcnt == 0)
+ return;
+ /* 32 bit access */
+ ne2000_mem_writel(s, s->rsar, val);
+ ne2000_dma_update(s, 4);
+}
+
+static uint32_t ne2000_asic_ioport_readl(void *opaque, uint32_t addr)
+{
+ NE2000State *s = opaque;
+ int ret;
+
+ /* 32 bit access */
+ ret = ne2000_mem_readl(s, s->rsar);
+ ne2000_dma_update(s, 4);
+#ifdef DEBUG_NE2000
+ printf("NE2000: asic readl val=0x%04x\n", ret);
+#endif
+ return ret;
+}
+
+static void ne2000_reset_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ /* nothing to do (end of reset pulse) */
+}
+
+static uint32_t ne2000_reset_ioport_read(void *opaque, uint32_t addr)
+{
+ NE2000State *s = opaque;
+ ne2000_reset(s);
+ return 0;
+}
+
+static void ne2000_save(QEMUFile* f,void* opaque)
+{
+ NE2000State* s=(NE2000State*)opaque;
+
+ qemu_put_8s(f, &s->cmd);
+ qemu_put_be32s(f, &s->start);
+ qemu_put_be32s(f, &s->stop);
+ qemu_put_8s(f, &s->boundary);
+ qemu_put_8s(f, &s->tsr);
+ qemu_put_8s(f, &s->tpsr);
+ qemu_put_be16s(f, &s->tcnt);
+ qemu_put_be16s(f, &s->rcnt);
+ qemu_put_be32s(f, &s->rsar);
+ qemu_put_8s(f, &s->rsr);
+ qemu_put_8s(f, &s->isr);
+ qemu_put_8s(f, &s->dcfg);
+ qemu_put_8s(f, &s->imr);
+ qemu_put_buffer(f, s->phys, 6);
+ qemu_put_8s(f, &s->curpag);
+ qemu_put_buffer(f, s->mult, 8);
+ qemu_put_be32s(f, &s->irq);
+ qemu_put_buffer(f, s->mem, NE2000_MEM_SIZE);
+}
+
+static int ne2000_load(QEMUFile* f,void* opaque,int version_id)
+{
+ NE2000State* s=(NE2000State*)opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_8s(f, &s->cmd);
+ qemu_get_be32s(f, &s->start);
+ qemu_get_be32s(f, &s->stop);
+ qemu_get_8s(f, &s->boundary);
+ qemu_get_8s(f, &s->tsr);
+ qemu_get_8s(f, &s->tpsr);
+ qemu_get_be16s(f, &s->tcnt);
+ qemu_get_be16s(f, &s->rcnt);
+ qemu_get_be32s(f, &s->rsar);
+ qemu_get_8s(f, &s->rsr);
+ qemu_get_8s(f, &s->isr);
+ qemu_get_8s(f, &s->dcfg);
+ qemu_get_8s(f, &s->imr);
+ qemu_get_buffer(f, s->phys, 6);
+ qemu_get_8s(f, &s->curpag);
+ qemu_get_buffer(f, s->mult, 8);
+ qemu_get_be32s(f, &s->irq);
+ qemu_get_buffer(f, s->mem, NE2000_MEM_SIZE);
+
+ return 0;
+}
+
+void isa_ne2000_init(int base, int irq, NetDriverState *nd)
+{
+ NE2000State *s;
+
+ s = qemu_mallocz(sizeof(NE2000State));
+ if (!s)
+ return;
+
+ register_ioport_write(base, 16, 1, ne2000_ioport_write, s);
+ register_ioport_read(base, 16, 1, ne2000_ioport_read, s);
+
+ register_ioport_write(base + 0x10, 1, 1, ne2000_asic_ioport_write, s);
+ register_ioport_read(base + 0x10, 1, 1, ne2000_asic_ioport_read, s);
+ register_ioport_write(base + 0x10, 2, 2, ne2000_asic_ioport_write, s);
+ register_ioport_read(base + 0x10, 2, 2, ne2000_asic_ioport_read, s);
+
+ register_ioport_write(base + 0x1f, 1, 1, ne2000_reset_ioport_write, s);
+ register_ioport_read(base + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
+ s->irq = irq;
+ s->nd = nd;
+
+ ne2000_reset(s);
+
+ qemu_add_read_packet(nd, ne2000_can_receive, ne2000_receive, s);
+
+ register_savevm("ne2000", 0, 1, ne2000_save, ne2000_load, s);
+
+}
+
+/***********************************************************/
+/* PCI NE2000 definitions */
+
+typedef struct PCINE2000State {
+ PCIDevice dev;
+ NE2000State ne2000;
+} PCINE2000State;
+
+static void ne2000_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCINE2000State *d = (PCINE2000State *)pci_dev;
+ NE2000State *s = &d->ne2000;
+
+ register_ioport_write(addr, 16, 1, ne2000_ioport_write, s);
+ register_ioport_read(addr, 16, 1, ne2000_ioport_read, s);
+
+ register_ioport_write(addr + 0x10, 1, 1, ne2000_asic_ioport_write, s);
+ register_ioport_read(addr + 0x10, 1, 1, ne2000_asic_ioport_read, s);
+ register_ioport_write(addr + 0x10, 2, 2, ne2000_asic_ioport_write, s);
+ register_ioport_read(addr + 0x10, 2, 2, ne2000_asic_ioport_read, s);
+ register_ioport_write(addr + 0x10, 4, 4, ne2000_asic_ioport_writel, s);
+ register_ioport_read(addr + 0x10, 4, 4, ne2000_asic_ioport_readl, s);
+
+ register_ioport_write(addr + 0x1f, 1, 1, ne2000_reset_ioport_write, s);
+ register_ioport_read(addr + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
+}
+
+void pci_ne2000_init(PCIBus *bus, NetDriverState *nd)
+{
+ PCINE2000State *d;
+ NE2000State *s;
+ uint8_t *pci_conf;
+
+ d = (PCINE2000State *)pci_register_device(bus,
+ "NE2000", sizeof(PCINE2000State),
+ -1,
+ NULL, NULL);
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = 0xec; // Realtek 8029
+ pci_conf[0x01] = 0x10;
+ pci_conf[0x02] = 0x29;
+ pci_conf[0x03] = 0x80;
+ pci_conf[0x0a] = 0x00; // ethernet network controller
+ pci_conf[0x0b] = 0x02;
+ pci_conf[0x0e] = 0x00; // header_type
+ pci_conf[0x3d] = 1; // interrupt pin 0
+
+ pci_register_io_region(&d->dev, 0, 0x100,
+ PCI_ADDRESS_SPACE_IO, ne2000_map);
+ s = &d->ne2000;
+ s->irq = 16; // PCI interrupt
+ s->pci_dev = (PCIDevice *)d;
+ s->nd = nd;
+ ne2000_reset(s);
+ qemu_add_read_packet(nd, ne2000_can_receive, ne2000_receive, s);
+
+ /* XXX: instance number ? */
+ register_savevm("ne2000", 0, 1, ne2000_save, ne2000_load, s);
+ register_savevm("ne2000_pci", 0, 1, generic_pci_save, generic_pci_load,
+ &d->dev);
+}
diff --git a/tools/ioemu/hw/openpic.c b/tools/ioemu/hw/openpic.c
new file mode 100644
index 0000000000..d193cfe6f2
--- /dev/null
+++ b/tools/ioemu/hw/openpic.c
@@ -0,0 +1,1023 @@
+/*
+ * OpenPIC emulation
+ *
+ * Copyright (c) 2004 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ *
+ * Based on OpenPic implementations:
+ * - Intel GW80314 I/O compagnion chip developper's manual
+ * - Motorola MPC8245 & MPC8540 user manuals.
+ * - Motorola MCP750 (aka Raven) programmer manual.
+ * - Motorola Harrier programmer manuel
+ *
+ * Serial interrupts, as implemented in Raven chipset are not supported yet.
+ *
+ */
+#include "vl.h"
+
+//#define DEBUG_OPENPIC
+
+#ifdef DEBUG_OPENPIC
+#define DPRINTF(fmt, args...) do { printf(fmt , ##args); } while (0)
+#else
+#define DPRINTF(fmt, args...) do { } while (0)
+#endif
+#define ERROR(fmr, args...) do { printf("ERROR: " fmr , ##args); } while (0)
+
+#define USE_MPCxxx /* Intel model is broken, for now */
+
+#if defined (USE_INTEL_GW80314)
+/* Intel GW80314 I/O Companion chip */
+
+#define MAX_CPU 4
+#define MAX_IRQ 32
+#define MAX_DBL 4
+#define MAX_MBX 4
+#define MAX_TMR 4
+#define VECTOR_BITS 8
+#define MAX_IPI 0
+
+#define VID (0x00000000)
+
+#define OPENPIC_LITTLE_ENDIAN 1
+#define OPENPIC_BIG_ENDIAN 0
+
+#elif defined(USE_MPCxxx)
+
+#define MAX_CPU 2
+#define MAX_IRQ 64
+#define EXT_IRQ 48
+#define MAX_DBL 0
+#define MAX_MBX 0
+#define MAX_TMR 4
+#define VECTOR_BITS 8
+#define MAX_IPI 4
+#define VID 0x03 /* MPIC version ID */
+#define VENI 0x00000000 /* Vendor ID */
+
+enum {
+ IRQ_IPVP = 0,
+ IRQ_IDE,
+};
+
+#define OPENPIC_LITTLE_ENDIAN 1
+#define OPENPIC_BIG_ENDIAN 0
+
+#else
+#error "Please select which OpenPic implementation is to be emulated"
+#endif
+
+#if (OPENPIC_BIG_ENDIAN && !TARGET_WORDS_BIGENDIAN) || \
+ (OPENPIC_LITTLE_ENDIAN && TARGET_WORDS_BIGENDIAN)
+#define OPENPIC_SWAP
+#endif
+
+/* Interrupt definitions */
+#define IRQ_FE (EXT_IRQ) /* Internal functional IRQ */
+#define IRQ_ERR (EXT_IRQ + 1) /* Error IRQ */
+#define IRQ_TIM0 (EXT_IRQ + 2) /* First timer IRQ */
+#if MAX_IPI > 0
+#define IRQ_IPI0 (IRQ_TIM0 + MAX_TMR) /* First IPI IRQ */
+#define IRQ_DBL0 (IRQ_IPI0 + (MAX_CPU * MAX_IPI)) /* First doorbell IRQ */
+#else
+#define IRQ_DBL0 (IRQ_TIM0 + MAX_TMR) /* First doorbell IRQ */
+#define IRQ_MBX0 (IRQ_DBL0 + MAX_DBL) /* First mailbox IRQ */
+#endif
+
+#define BF_WIDTH(_bits_) \
+(((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
+
+static inline void set_bit (uint32_t *field, int bit)
+{
+ field[bit >> 5] |= 1 << (bit & 0x1F);
+}
+
+static inline void reset_bit (uint32_t *field, int bit)
+{
+ field[bit >> 5] &= ~(1 << (bit & 0x1F));
+}
+
+static inline int test_bit (uint32_t *field, int bit)
+{
+ return (field[bit >> 5] & 1 << (bit & 0x1F)) != 0;
+}
+
+enum {
+ IRQ_EXTERNAL = 0x01,
+ IRQ_INTERNAL = 0x02,
+ IRQ_TIMER = 0x04,
+ IRQ_SPECIAL = 0x08,
+} IRQ_src_type;
+
+typedef struct IRQ_queue_t {
+ uint32_t queue[BF_WIDTH(MAX_IRQ)];
+ int next;
+ int priority;
+} IRQ_queue_t;
+
+typedef struct IRQ_src_t {
+ uint32_t ipvp; /* IRQ vector/priority register */
+ uint32_t ide; /* IRQ destination register */
+ int type;
+ int last_cpu;
+ int pending; /* TRUE if IRQ is pending */
+} IRQ_src_t;
+
+enum IPVP_bits {
+ IPVP_MASK = 31,
+ IPVP_ACTIVITY = 30,
+ IPVP_MODE = 29,
+ IPVP_POLARITY = 23,
+ IPVP_SENSE = 22,
+};
+#define IPVP_PRIORITY_MASK (0x1F << 16)
+#define IPVP_PRIORITY(_ipvpr_) ((int)(((_ipvpr_) & IPVP_PRIORITY_MASK) >> 16))
+#define IPVP_VECTOR_MASK ((1 << VECTOR_BITS) - 1)
+#define IPVP_VECTOR(_ipvpr_) ((_ipvpr_) & IPVP_VECTOR_MASK)
+
+typedef struct IRQ_dst_t {
+ uint32_t pctp; /* CPU current task priority */
+ uint32_t pcsr; /* CPU sensitivity register */
+ IRQ_queue_t raised;
+ IRQ_queue_t servicing;
+ CPUState *env; /* Needed if we did SMP */
+} IRQ_dst_t;
+
+struct openpic_t {
+ PCIDevice pci_dev;
+ int mem_index;
+ /* Global registers */
+ uint32_t frep; /* Feature reporting register */
+ uint32_t glbc; /* Global configuration register */
+ uint32_t micr; /* MPIC interrupt configuration register */
+ uint32_t veni; /* Vendor identification register */
+ uint32_t spve; /* Spurious vector register */
+ uint32_t tifr; /* Timer frequency reporting register */
+ /* Source registers */
+ IRQ_src_t src[MAX_IRQ];
+ /* Local registers per output pin */
+ IRQ_dst_t dst[MAX_CPU];
+ int nb_cpus;
+ /* Timer registers */
+ struct {
+ uint32_t ticc; /* Global timer current count register */
+ uint32_t tibc; /* Global timer base count register */
+ } timers[MAX_TMR];
+#if MAX_DBL > 0
+ /* Doorbell registers */
+ uint32_t dar; /* Doorbell activate register */
+ struct {
+ uint32_t dmr; /* Doorbell messaging register */
+ } doorbells[MAX_DBL];
+#endif
+#if MAX_MBX > 0
+ /* Mailbox registers */
+ struct {
+ uint32_t mbr; /* Mailbox register */
+ } mailboxes[MAX_MAILBOXES];
+#endif
+};
+
+static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ)
+{
+ set_bit(q->queue, n_IRQ);
+}
+
+static inline void IRQ_resetbit (IRQ_queue_t *q, int n_IRQ)
+{
+ reset_bit(q->queue, n_IRQ);
+}
+
+static inline int IRQ_testbit (IRQ_queue_t *q, int n_IRQ)
+{
+ return test_bit(q->queue, n_IRQ);
+}
+
+static void IRQ_check (openpic_t *opp, IRQ_queue_t *q)
+{
+ int next, i;
+ int priority;
+
+ next = -1;
+ priority = -1;
+ for (i = 0; i < MAX_IRQ; i++) {
+ if (IRQ_testbit(q, i)) {
+ DPRINTF("IRQ_check: irq %d set ipvp_pr=%d pr=%d\n",
+ i, IPVP_PRIORITY(opp->src[i].ipvp), priority);
+ if (IPVP_PRIORITY(opp->src[i].ipvp) > priority) {
+ next = i;
+ priority = IPVP_PRIORITY(opp->src[i].ipvp);
+ }
+ }
+ }
+ q->next = next;
+ q->priority = priority;
+}
+
+static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q)
+{
+ if (q->next == -1) {
+ /* XXX: optimize */
+ IRQ_check(opp, q);
+ }
+
+ return q->next;
+}
+
+static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
+{
+ IRQ_dst_t *dst;
+ IRQ_src_t *src;
+ int priority;
+
+ dst = &opp->dst[n_CPU];
+ src = &opp->src[n_IRQ];
+ priority = IPVP_PRIORITY(src->ipvp);
+ if (priority <= dst->pctp) {
+ /* Too low priority */
+ return;
+ }
+ if (IRQ_testbit(&dst->raised, n_IRQ)) {
+ /* Interrupt miss */
+ return;
+ }
+ set_bit(&src->ipvp, IPVP_ACTIVITY);
+ IRQ_setbit(&dst->raised, n_IRQ);
+ if (priority > dst->raised.priority) {
+ IRQ_get_next(opp, &dst->raised);
+ DPRINTF("Raise CPU IRQ\n");
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+ }
+}
+
+/* update pic state because registers for n_IRQ have changed value */
+static void openpic_update_irq(openpic_t *opp, int n_IRQ)
+{
+ IRQ_src_t *src;
+ int i;
+
+ src = &opp->src[n_IRQ];
+
+ if (!src->pending) {
+ /* no irq pending */
+ return;
+ }
+ if (test_bit(&src->ipvp, IPVP_MASK)) {
+ /* Interrupt source is disabled */
+ return;
+ }
+ if (IPVP_PRIORITY(src->ipvp) == 0) {
+ /* Priority set to zero */
+ return;
+ }
+ if (test_bit(&src->ipvp, IPVP_ACTIVITY)) {
+ /* IRQ already active */
+ return;
+ }
+ if (src->ide == 0x00000000) {
+ /* No target */
+ return;
+ }
+
+ if (!test_bit(&src->ipvp, IPVP_MODE) ||
+ src->ide == (1 << src->last_cpu)) {
+ /* Directed delivery mode */
+ for (i = 0; i < opp->nb_cpus; i++) {
+ if (test_bit(&src->ide, i))
+ IRQ_local_pipe(opp, i, n_IRQ);
+ }
+ } else {
+ /* Distributed delivery mode */
+ /* XXX: incorrect code */
+ for (i = src->last_cpu; i < src->last_cpu; i++) {
+ if (i == MAX_IRQ)
+ i = 0;
+ if (test_bit(&src->ide, i)) {
+ IRQ_local_pipe(opp, i, n_IRQ);
+ src->last_cpu = i;
+ break;
+ }
+ }
+ }
+}
+
+void openpic_set_irq(openpic_t *opp, int n_IRQ, int level)
+{
+ IRQ_src_t *src;
+
+ src = &opp->src[n_IRQ];
+ DPRINTF("openpic: set irq %d = %d ipvp=%08x\n",
+ n_IRQ, level, src->ipvp);
+ if (test_bit(&src->ipvp, IPVP_SENSE)) {
+ /* level-sensitive irq */
+ src->pending = level;
+ if (!level)
+ reset_bit(&src->ipvp, IPVP_ACTIVITY);
+ } else {
+ /* edge-sensitive irq */
+ if (level)
+ src->pending = 1;
+ }
+ openpic_update_irq(opp, n_IRQ);
+}
+
+static void openpic_reset (openpic_t *opp)
+{
+ int i;
+
+ opp->glbc = 0x80000000;
+ /* Initialise controler registers */
+ opp->frep = ((EXT_IRQ - 1) << 16) | ((MAX_CPU - 1) << 8) | VID;
+ opp->veni = VENI;
+ opp->spve = 0x000000FF;
+ opp->tifr = 0x003F7A00;
+ /* ? */
+ opp->micr = 0x00000000;
+ /* Initialise IRQ sources */
+ for (i = 0; i < MAX_IRQ; i++) {
+ opp->src[i].ipvp = 0xA0000000;
+ opp->src[i].ide = 0x00000000;
+ }
+ /* Initialise IRQ destinations */
+ for (i = 0; i < opp->nb_cpus; i++) {
+ opp->dst[i].pctp = 0x0000000F;
+ opp->dst[i].pcsr = 0x00000000;
+ memset(&opp->dst[i].raised, 0, sizeof(IRQ_queue_t));
+ memset(&opp->dst[i].servicing, 0, sizeof(IRQ_queue_t));
+ }
+ /* Initialise timers */
+ for (i = 0; i < MAX_TMR; i++) {
+ opp->timers[i].ticc = 0x00000000;
+ opp->timers[i].tibc = 0x80000000;
+ }
+ /* Initialise doorbells */
+#if MAX_DBL > 0
+ opp->dar = 0x00000000;
+ for (i = 0; i < MAX_DBL; i++) {
+ opp->doorbells[i].dmr = 0x00000000;
+ }
+#endif
+ /* Initialise mailboxes */
+#if MAX_MBX > 0
+ for (i = 0; i < MAX_MBX; i++) { /* ? */
+ opp->mailboxes[i].mbr = 0x00000000;
+ }
+#endif
+ /* Go out of RESET state */
+ opp->glbc = 0x00000000;
+}
+
+static inline uint32_t read_IRQreg (openpic_t *opp, int n_IRQ, uint32_t reg)
+{
+ uint32_t retval;
+
+ switch (reg) {
+ case IRQ_IPVP:
+ retval = opp->src[n_IRQ].ipvp;
+ break;
+ case IRQ_IDE:
+ retval = opp->src[n_IRQ].ide;
+ break;
+ }
+
+ return retval;
+}
+
+static inline void write_IRQreg (openpic_t *opp, int n_IRQ,
+ uint32_t reg, uint32_t val)
+{
+ uint32_t tmp;
+
+ switch (reg) {
+ case IRQ_IPVP:
+ /* NOTE: not fully accurate for special IRQs, but simple and
+ sufficient */
+ /* ACTIVITY bit is read-only */
+ opp->src[n_IRQ].ipvp =
+ (opp->src[n_IRQ].ipvp & 0x40000000) |
+ (val & 0x800F00FF);
+ openpic_update_irq(opp, n_IRQ);
+ DPRINTF("Set IPVP %d to 0x%08x -> 0x%08x\n",
+ n_IRQ, val, opp->src[n_IRQ].ipvp);
+ break;
+ case IRQ_IDE:
+ tmp = val & 0xC0000000;
+ tmp |= val & ((1 << MAX_CPU) - 1);
+ opp->src[n_IRQ].ide = tmp;
+ DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
+ break;
+ }
+}
+
+#if 0 // Code provision for Intel model
+#if MAX_DBL > 0
+static uint32_t read_doorbell_register (openpic_t *opp,
+ int n_dbl, uint32_t offset)
+{
+ uint32_t retval;
+
+ switch (offset) {
+ case DBL_IPVP_OFFSET:
+ retval = read_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IPVP);
+ break;
+ case DBL_IDE_OFFSET:
+ retval = read_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IDE);
+ break;
+ case DBL_DMR_OFFSET:
+ retval = opp->doorbells[n_dbl].dmr;
+ break;
+ }
+
+ return retval;
+}
+
+static void write_doorbell_register (penpic_t *opp, int n_dbl,
+ uint32_t offset, uint32_t value)
+{
+ switch (offset) {
+ case DBL_IVPR_OFFSET:
+ write_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IPVP, value);
+ break;
+ case DBL_IDE_OFFSET:
+ write_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IDE, value);
+ break;
+ case DBL_DMR_OFFSET:
+ opp->doorbells[n_dbl].dmr = value;
+ break;
+ }
+}
+#endif
+
+#if MAX_MBX > 0
+static uint32_t read_mailbox_register (openpic_t *opp,
+ int n_mbx, uint32_t offset)
+{
+ uint32_t retval;
+
+ switch (offset) {
+ case MBX_MBR_OFFSET:
+ retval = opp->mailboxes[n_mbx].mbr;
+ break;
+ case MBX_IVPR_OFFSET:
+ retval = read_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IPVP);
+ break;
+ case MBX_DMR_OFFSET:
+ retval = read_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IDE);
+ break;
+ }
+
+ return retval;
+}
+
+static void write_mailbox_register (openpic_t *opp, int n_mbx,
+ uint32_t address, uint32_t value)
+{
+ switch (offset) {
+ case MBX_MBR_OFFSET:
+ opp->mailboxes[n_mbx].mbr = value;
+ break;
+ case MBX_IVPR_OFFSET:
+ write_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IPVP, value);
+ break;
+ case MBX_DMR_OFFSET:
+ write_IRQreg(opp, IRQ_MBX0 + n_mbx, IRQ_IDE, value);
+ break;
+ }
+}
+#endif
+#endif /* 0 : Code provision for Intel model */
+
+static void openpic_gbl_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ openpic_t *opp = opaque;
+
+ DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return;
+#if defined OPENPIC_SWAP
+ val = bswap32(val);
+#endif
+ addr &= 0xFF;
+ switch (addr) {
+ case 0x00: /* FREP */
+ break;
+ case 0x20: /* GLBC */
+ if (val & 0x80000000)
+ openpic_reset(opp);
+ opp->glbc = val & ~0x80000000;
+ break;
+ case 0x80: /* VENI */
+ break;
+ case 0x90: /* PINT */
+ /* XXX: Should be able to reset any CPU */
+ if (val & 1) {
+ DPRINTF("Reset CPU IRQ\n");
+ // cpu_interrupt(cpu_single_env, CPU_INTERRUPT_RESET);
+ }
+ break;
+#if MAX_IPI > 0
+ case 0xA0: /* IPI_IPVP */
+ case 0xB0:
+ case 0xC0:
+ case 0xD0:
+ {
+ int idx;
+ idx = (addr - 0xA0) >> 4;
+ write_IRQreg(opp, IRQ_IPI0 + idx, IRQ_IPVP, val);
+ }
+ break;
+#endif
+ case 0xE0: /* SPVE */
+ opp->spve = val & 0x000000FF;
+ break;
+ case 0xF0: /* TIFR */
+ opp->tifr = val;
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t openpic_gbl_read (void *opaque, uint32_t addr)
+{
+ openpic_t *opp = opaque;
+ uint32_t retval;
+
+ DPRINTF("%s: addr %08x\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ return retval;
+ addr &= 0xFF;
+ switch (addr) {
+ case 0x00: /* FREP */
+ retval = opp->frep;
+ break;
+ case 0x20: /* GLBC */
+ retval = opp->glbc;
+ break;
+ case 0x80: /* VENI */
+ retval = opp->veni;
+ break;
+ case 0x90: /* PINT */
+ retval = 0x00000000;
+ break;
+#if MAX_IPI > 0
+ case 0xA0: /* IPI_IPVP */
+ case 0xB0:
+ case 0xC0:
+ case 0xD0:
+ {
+ int idx;
+ idx = (addr - 0xA0) >> 4;
+ retval = read_IRQreg(opp, IRQ_IPI0 + idx, IRQ_IPVP);
+ }
+ break;
+#endif
+ case 0xE0: /* SPVE */
+ retval = opp->spve;
+ break;
+ case 0xF0: /* TIFR */
+ retval = opp->tifr;
+ break;
+ default:
+ break;
+ }
+ DPRINTF("%s: => %08x\n", __func__, retval);
+#if defined OPENPIC_SWAP
+ retval = bswap32(retval);
+#endif
+
+ return retval;
+}
+
+static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ openpic_t *opp = opaque;
+ int idx;
+
+ DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return;
+#if defined OPENPIC_SWAP
+ val = bswap32(val);
+#endif
+ addr -= 0x1100;
+ addr &= 0xFFFF;
+ idx = (addr & 0xFFF0) >> 6;
+ addr = addr & 0x30;
+ switch (addr) {
+ case 0x00: /* TICC */
+ break;
+ case 0x10: /* TIBC */
+ if ((opp->timers[idx].ticc & 0x80000000) != 0 &&
+ (val & 0x800000000) == 0 &&
+ (opp->timers[idx].tibc & 0x80000000) != 0)
+ opp->timers[idx].ticc &= ~0x80000000;
+ opp->timers[idx].tibc = val;
+ break;
+ case 0x20: /* TIVP */
+ write_IRQreg(opp, IRQ_TIM0 + idx, IRQ_IPVP, val);
+ break;
+ case 0x30: /* TIDE */
+ write_IRQreg(opp, IRQ_TIM0 + idx, IRQ_IDE, val);
+ break;
+ }
+}
+
+static uint32_t openpic_timer_read (void *opaque, uint32_t addr)
+{
+ openpic_t *opp = opaque;
+ uint32_t retval;
+ int idx;
+
+ DPRINTF("%s: addr %08x\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ return retval;
+ addr -= 0x1100;
+ addr &= 0xFFFF;
+ idx = (addr & 0xFFF0) >> 6;
+ addr = addr & 0x30;
+ switch (addr) {
+ case 0x00: /* TICC */
+ retval = opp->timers[idx].ticc;
+ break;
+ case 0x10: /* TIBC */
+ retval = opp->timers[idx].tibc;
+ break;
+ case 0x20: /* TIPV */
+ retval = read_IRQreg(opp, IRQ_TIM0 + idx, IRQ_IPVP);
+ break;
+ case 0x30: /* TIDE */
+ retval = read_IRQreg(opp, IRQ_TIM0 + idx, IRQ_IDE);
+ break;
+ }
+ DPRINTF("%s: => %08x\n", __func__, retval);
+#if defined OPENPIC_SWAP
+ retval = bswap32(retval);
+#endif
+
+ return retval;
+}
+
+static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ openpic_t *opp = opaque;
+ int idx;
+
+ DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return;
+#if defined OPENPIC_SWAP
+ val = tswap32(val);
+#endif
+ addr = addr & 0xFFF0;
+ idx = addr >> 5;
+ if (addr & 0x10) {
+ /* EXDE / IFEDE / IEEDE */
+ write_IRQreg(opp, idx, IRQ_IDE, val);
+ } else {
+ /* EXVP / IFEVP / IEEVP */
+ write_IRQreg(opp, idx, IRQ_IPVP, val);
+ }
+}
+
+static uint32_t openpic_src_read (void *opaque, uint32_t addr)
+{
+ openpic_t *opp = opaque;
+ uint32_t retval;
+ int idx;
+
+ DPRINTF("%s: addr %08x\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ return retval;
+ addr = addr & 0xFFF0;
+ idx = addr >> 5;
+ if (addr & 0x10) {
+ /* EXDE / IFEDE / IEEDE */
+ retval = read_IRQreg(opp, idx, IRQ_IDE);
+ } else {
+ /* EXVP / IFEVP / IEEVP */
+ retval = read_IRQreg(opp, idx, IRQ_IPVP);
+ }
+ DPRINTF("%s: => %08x\n", __func__, retval);
+#if defined OPENPIC_SWAP
+ retval = tswap32(retval);
+#endif
+
+ return retval;
+}
+
+static void openpic_cpu_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ openpic_t *opp = opaque;
+ IRQ_src_t *src;
+ IRQ_dst_t *dst;
+ int idx, n_IRQ;
+
+ DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
+ if (addr & 0xF)
+ return;
+#if defined OPENPIC_SWAP
+ val = bswap32(val);
+#endif
+ addr &= 0x1FFF0;
+ idx = addr / 0x1000;
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+#if MAX_IPI > 0
+ case 0x40: /* PIPD */
+ case 0x50:
+ case 0x60:
+ case 0x70:
+ idx = (addr - 0x40) >> 4;
+ write_IRQreg(opp, IRQ_IPI0 + idx, IRQ_IDE, val);
+ openpic_set_irq(opp, IRQ_IPI0 + idx, 1);
+ openpic_set_irq(opp, IRQ_IPI0 + idx, 0);
+ break;
+#endif
+ case 0x80: /* PCTP */
+ dst->pctp = val & 0x0000000F;
+ break;
+ case 0x90: /* WHOAMI */
+ /* Read-only register */
+ break;
+ case 0xA0: /* PIAC */
+ /* Read-only register */
+ break;
+ case 0xB0: /* PEOI */
+ DPRINTF("PEOI\n");
+ n_IRQ = IRQ_get_next(opp, &dst->servicing);
+ IRQ_resetbit(&dst->servicing, n_IRQ);
+ dst->servicing.next = -1;
+ src = &opp->src[n_IRQ];
+ /* Set up next servicing IRQ */
+ IRQ_get_next(opp, &dst->servicing);
+ /* Check queued interrupts. */
+ n_IRQ = IRQ_get_next(opp, &dst->raised);
+ if (n_IRQ != -1) {
+ src = &opp->src[n_IRQ];
+ if (IPVP_PRIORITY(src->ipvp) > dst->servicing.priority) {
+ DPRINTF("Raise CPU IRQ\n");
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t openpic_cpu_read (void *opaque, uint32_t addr)
+{
+ openpic_t *opp = opaque;
+ IRQ_src_t *src;
+ IRQ_dst_t *dst;
+ uint32_t retval;
+ int idx, n_IRQ;
+
+ DPRINTF("%s: addr %08x\n", __func__, addr);
+ retval = 0xFFFFFFFF;
+ if (addr & 0xF)
+ return retval;
+ addr &= 0x1FFF0;
+ idx = addr / 0x1000;
+ dst = &opp->dst[idx];
+ addr &= 0xFF0;
+ switch (addr) {
+ case 0x80: /* PCTP */
+ retval = dst->pctp;
+ break;
+ case 0x90: /* WHOAMI */
+ retval = idx;
+ break;
+ case 0xA0: /* PIAC */
+ n_IRQ = IRQ_get_next(opp, &dst->raised);
+ DPRINTF("PIAC: irq=%d\n", n_IRQ);
+ if (n_IRQ == -1) {
+ /* No more interrupt pending */
+ retval = opp->spve;
+ } else {
+ src = &opp->src[n_IRQ];
+ if (!test_bit(&src->ipvp, IPVP_ACTIVITY) ||
+ !(IPVP_PRIORITY(src->ipvp) > dst->pctp)) {
+ /* - Spurious level-sensitive IRQ
+ * - Priorities has been changed
+ * and the pending IRQ isn't allowed anymore
+ */
+ reset_bit(&src->ipvp, IPVP_ACTIVITY);
+ retval = IPVP_VECTOR(opp->spve);
+ } else {
+ /* IRQ enter servicing state */
+ IRQ_setbit(&dst->servicing, n_IRQ);
+ retval = IPVP_VECTOR(src->ipvp);
+ }
+ IRQ_resetbit(&dst->raised, n_IRQ);
+ dst->raised.next = -1;
+ if (!test_bit(&src->ipvp, IPVP_SENSE)) {
+ /* edge-sensitive IRQ */
+ reset_bit(&src->ipvp, IPVP_ACTIVITY);
+ src->pending = 0;
+ }
+ }
+ break;
+ case 0xB0: /* PEOI */
+ retval = 0;
+ break;
+#if MAX_IPI > 0
+ case 0x40: /* IDE */
+ case 0x50:
+ idx = (addr - 0x40) >> 4;
+ retval = read_IRQreg(opp, IRQ_IPI0 + idx, IRQ_IDE);
+ break;
+#endif
+ default:
+ break;
+ }
+ DPRINTF("%s: => %08x\n", __func__, retval);
+#if defined OPENPIC_SWAP
+ retval= bswap32(retval);
+#endif
+
+ return retval;
+}
+
+static void openpic_buggy_write (void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ printf("Invalid OPENPIC write access !\n");
+}
+
+static uint32_t openpic_buggy_read (void *opaque, target_phys_addr_t addr)
+{
+ printf("Invalid OPENPIC read access !\n");
+
+ return -1;
+}
+
+static void openpic_writel (void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ openpic_t *opp = opaque;
+
+ addr &= 0x3FFFF;
+ DPRINTF("%s: offset %08x val: %08x\n", __func__, (int)addr, val);
+ if (addr < 0x1100) {
+ /* Global registers */
+ openpic_gbl_write(opp, addr, val);
+ } else if (addr < 0x10000) {
+ /* Timers registers */
+ openpic_timer_write(opp, addr, val);
+ } else if (addr < 0x20000) {
+ /* Source registers */
+ openpic_src_write(opp, addr, val);
+ } else {
+ /* CPU registers */
+ openpic_cpu_write(opp, addr, val);
+ }
+}
+
+static uint32_t openpic_readl (void *opaque,target_phys_addr_t addr)
+{
+ openpic_t *opp = opaque;
+ uint32_t retval;
+
+ addr &= 0x3FFFF;
+ DPRINTF("%s: offset %08x\n", __func__, (int)addr);
+ if (addr < 0x1100) {
+ /* Global registers */
+ retval = openpic_gbl_read(opp, addr);
+ } else if (addr < 0x10000) {
+ /* Timers registers */
+ retval = openpic_timer_read(opp, addr);
+ } else if (addr < 0x20000) {
+ /* Source registers */
+ retval = openpic_src_read(opp, addr);
+ } else {
+ /* CPU registers */
+ retval = openpic_cpu_read(opp, addr);
+ }
+
+ return retval;
+}
+
+static CPUWriteMemoryFunc *openpic_write[] = {
+ &openpic_buggy_write,
+ &openpic_buggy_write,
+ &openpic_writel,
+};
+
+static CPUReadMemoryFunc *openpic_read[] = {
+ &openpic_buggy_read,
+ &openpic_buggy_read,
+ &openpic_readl,
+};
+
+static void openpic_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ openpic_t *opp;
+
+ DPRINTF("Map OpenPIC\n");
+ opp = (openpic_t *)pci_dev;
+ /* Global registers */
+ DPRINTF("Register OPENPIC gbl %08x => %08x\n",
+ addr + 0x1000, addr + 0x1000 + 0x100);
+ /* Timer registers */
+ DPRINTF("Register OPENPIC timer %08x => %08x\n",
+ addr + 0x1100, addr + 0x1100 + 0x40 * MAX_TMR);
+ /* Interrupt source registers */
+ DPRINTF("Register OPENPIC src %08x => %08x\n",
+ addr + 0x10000, addr + 0x10000 + 0x20 * (EXT_IRQ + 2));
+ /* Per CPU registers */
+ DPRINTF("Register OPENPIC dst %08x => %08x\n",
+ addr + 0x20000, addr + 0x20000 + 0x1000 * MAX_CPU);
+ cpu_register_physical_memory(addr, 0x40000, opp->mem_index);
+#if 0 // Don't implement ISU for now
+ opp_io_memory = cpu_register_io_memory(0, openpic_src_read,
+ openpic_src_write);
+ cpu_register_physical_memory(isu_base, 0x20 * (EXT_IRQ + 2),
+ opp_io_memory);
+#endif
+}
+
+openpic_t *openpic_init (PCIBus *bus, int *pmem_index, int nb_cpus)
+{
+ openpic_t *opp;
+ uint8_t *pci_conf;
+ int i, m;
+
+ /* XXX: for now, only one CPU is supported */
+ if (nb_cpus != 1)
+ return NULL;
+ if (bus) {
+ opp = (openpic_t *)pci_register_device(bus, "OpenPIC", sizeof(openpic_t),
+ -1, NULL, NULL);
+ if (opp == NULL)
+ return NULL;
+ pci_conf = opp->pci_dev.config;
+ pci_conf[0x00] = 0x14; // IBM MPIC2
+ pci_conf[0x01] = 0x10;
+ pci_conf[0x02] = 0xFF;
+ pci_conf[0x03] = 0xFF;
+ pci_conf[0x0a] = 0x80; // PIC
+ pci_conf[0x0b] = 0x08;
+ pci_conf[0x0e] = 0x00; // header_type
+ pci_conf[0x3d] = 0x00; // no interrupt pin
+
+ /* Register I/O spaces */
+ pci_register_io_region((PCIDevice *)opp, 0, 0x40000,
+ PCI_ADDRESS_SPACE_MEM, &openpic_map);
+ } else {
+ opp = qemu_mallocz(sizeof(openpic_t));
+ }
+
+ opp->mem_index = cpu_register_io_memory(0, openpic_read,
+ openpic_write, opp);
+
+ // isu_base &= 0xFFFC0000;
+ opp->nb_cpus = nb_cpus;
+ /* Set IRQ types */
+ for (i = 0; i < EXT_IRQ; i++) {
+ opp->src[i].type = IRQ_EXTERNAL;
+ }
+ for (; i < IRQ_TIM0; i++) {
+ opp->src[i].type = IRQ_SPECIAL;
+ }
+#if MAX_IPI > 0
+ m = IRQ_IPI0;
+#else
+ m = IRQ_DBL0;
+#endif
+ for (; i < m; i++) {
+ opp->src[i].type = IRQ_TIMER;
+ }
+ for (; i < MAX_IRQ; i++) {
+ opp->src[i].type = IRQ_INTERNAL;
+ }
+ openpic_reset(opp);
+ if (pmem_index)
+ *pmem_index = opp->mem_index;
+ return opp;
+}
diff --git a/tools/ioemu/hw/pc.c b/tools/ioemu/hw/pc.c
new file mode 100644
index 0000000000..70cafc55f1
--- /dev/null
+++ b/tools/ioemu/hw/pc.c
@@ -0,0 +1,573 @@
+/*
+ * QEMU PC System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* output Bochs bios info messages */
+//#define DEBUG_BIOS
+
+#define BIOS_FILENAME "bios.bin"
+#define VGABIOS_FILENAME "vgabios.bin"
+#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
+#define LINUX_BOOT_FILENAME "linux_boot.bin"
+
+#define KERNEL_LOAD_ADDR 0x00100000
+#define INITRD_LOAD_ADDR 0x00400000
+#define KERNEL_PARAMS_ADDR 0x00090000
+#define KERNEL_CMDLINE_ADDR 0x00099000
+
+int speaker_data_on;
+int dummy_refresh_clock;
+static fdctrl_t *floppy_controller;
+static RTCState *rtc_state;
+static PITState *pit;
+
+static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
+{
+}
+
+/* MSDOS compatibility mode FPU exception support */
+/* XXX: add IGNNE support */
+void cpu_set_ferr(CPUX86State *s)
+{
+ pic_set_irq(13, 1);
+}
+
+static void ioportF0_write(void *opaque, uint32_t addr, uint32_t data)
+{
+ pic_set_irq(13, 0);
+}
+
+/* TSC handling */
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+ return qemu_get_clock(vm_clock);
+}
+
+/* PC cmos mappings */
+
+#define REG_EQUIPMENT_BYTE 0x14
+#define REG_IBM_CENTURY_BYTE 0x32
+#define REG_IBM_PS2_CENTURY_BYTE 0x37
+
+
+static inline int to_bcd(RTCState *s, int a)
+{
+ return ((a / 10) << 4) | (a % 10);
+}
+
+static int cmos_get_fd_drive_type(int fd0)
+{
+ int val;
+
+ switch (fd0) {
+ case 0:
+ /* 1.44 Mb 3"5 drive */
+ val = 4;
+ break;
+ case 1:
+ /* 2.88 Mb 3"5 drive */
+ val = 5;
+ break;
+ case 2:
+ /* 1.2 Mb 5"5 drive */
+ val = 2;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+ return val;
+}
+
+static void cmos_init_hd(int type_ofs, int info_ofs, BlockDriverState *hd)
+{
+ RTCState *s = rtc_state;
+ int cylinders, heads, sectors;
+ bdrv_get_geometry_hint(hd, &cylinders, &heads, &sectors);
+ rtc_set_memory(s, type_ofs, 47);
+ rtc_set_memory(s, info_ofs, cylinders);
+ rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
+ rtc_set_memory(s, info_ofs + 2, heads);
+ rtc_set_memory(s, info_ofs + 3, 0xff);
+ rtc_set_memory(s, info_ofs + 4, 0xff);
+ rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
+ rtc_set_memory(s, info_ofs + 6, cylinders);
+ rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
+ rtc_set_memory(s, info_ofs + 8, sectors);
+}
+
+/* hd_table must contain 4 block drivers */
+static void cmos_init(int ram_size, int boot_device, BlockDriverState **hd_table)
+{
+ RTCState *s = rtc_state;
+ int val;
+ int fd0, fd1, nb;
+ time_t ti;
+ struct tm *tm;
+ int i;
+
+ /* set the CMOS date */
+ time(&ti);
+ if (rtc_utc)
+ tm = gmtime(&ti);
+ else
+ tm = localtime(&ti);
+ rtc_set_date(s, tm);
+
+ val = to_bcd(s, (tm->tm_year / 100) + 19);
+ rtc_set_memory(s, REG_IBM_CENTURY_BYTE, val);
+ rtc_set_memory(s, REG_IBM_PS2_CENTURY_BYTE, val);
+
+ /* various important CMOS locations needed by PC/Bochs bios */
+
+ /* memory size */
+ val = 640; /* base memory in K */
+ rtc_set_memory(s, 0x15, val);
+ rtc_set_memory(s, 0x16, val >> 8);
+
+ val = (ram_size / 1024) - 1024;
+ if (val > 65535)
+ val = 65535;
+ rtc_set_memory(s, 0x17, val);
+ rtc_set_memory(s, 0x18, val >> 8);
+ rtc_set_memory(s, 0x30, val);
+ rtc_set_memory(s, 0x31, val >> 8);
+
+ if (ram_size > (16 * 1024 * 1024))
+ val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536);
+ else
+ val = 0;
+ if (val > 65535)
+ val = 65535;
+ rtc_set_memory(s, 0x34, val);
+ rtc_set_memory(s, 0x35, val >> 8);
+
+ switch(boot_device) {
+ case 'a':
+ case 'b':
+ rtc_set_memory(s, 0x3d, 0x01); /* floppy boot */
+ break;
+ default:
+ case 'c':
+ rtc_set_memory(s, 0x3d, 0x02); /* hard drive boot */
+ break;
+ case 'd':
+ rtc_set_memory(s, 0x3d, 0x03); /* CD-ROM boot */
+ break;
+ }
+
+ /* floppy type */
+
+ fd0 = fdctrl_get_drive_type(floppy_controller, 0);
+ fd1 = fdctrl_get_drive_type(floppy_controller, 1);
+
+ val = (cmos_get_fd_drive_type(fd0) << 4) | cmos_get_fd_drive_type(fd1);
+ rtc_set_memory(s, 0x10, val);
+
+ val = 0;
+ nb = 0;
+ if (fd0 < 3)
+ nb++;
+ if (fd1 < 3)
+ nb++;
+ switch (nb) {
+ case 0:
+ break;
+ case 1:
+ val |= 0x01; /* 1 drive, ready for boot */
+ break;
+ case 2:
+ val |= 0x41; /* 2 drives, ready for boot */
+ break;
+ }
+ val |= 0x02; /* FPU is there */
+ val |= 0x04; /* PS/2 mouse installed */
+ rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);
+
+ /* hard drives */
+
+ rtc_set_memory(s, 0x12, (hd_table[0] ? 0xf0 : 0) | (hd_table[1] ? 0x0f : 0));
+ if (hd_table[0])
+ cmos_init_hd(0x19, 0x1b, hd_table[0]);
+ if (hd_table[1])
+ cmos_init_hd(0x1a, 0x24, hd_table[1]);
+
+ val = 0;
+ for (i = 0; i < 4; i++) {
+ if (hd_table[i]) {
+ int cylinders, heads, sectors;
+ uint8_t translation;
+ /* NOTE: bdrv_get_geometry_hint() returns the geometry
+ that the hard disk returns. It is always such that: 1 <=
+ sects <= 63, 1 <= heads <= 16, 1 <= cylinders <=
+ 16383. The BIOS geometry can be different. */
+ bdrv_get_geometry_hint(hd_table[i], &cylinders, &heads, &sectors);
+ if (cylinders <= 1024 && heads <= 16 && sectors <= 63) {
+ /* No translation. */
+ translation = 0;
+ } else {
+ /* LBA translation. */
+ translation = 1;
+ }
+ val |= translation << (i * 2);
+ }
+ }
+ rtc_set_memory(s, 0x39, val);
+
+ /* Disable check of 0x55AA signature on the last two bytes of
+ first sector of disk. XXX: make it the default ? */
+ // rtc_set_memory(s, 0x38, 1);
+}
+
+static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(pit, 2, val & 1);
+}
+
+static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
+{
+ int out;
+ out = pit_get_out(pit, 2, qemu_get_clock(vm_clock));
+ dummy_refresh_clock ^= 1;
+ return (speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
+ (dummy_refresh_clock << 4);
+}
+
+static void ioport92_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ cpu_x86_set_a20(cpu_single_env, (val >> 1) & 1);
+ /* XXX: bit 0 is fast reset */
+}
+
+static uint32_t ioport92_read(void *opaque, uint32_t addr)
+{
+ return ((cpu_single_env->a20_mask >> 20) & 1) << 1;
+}
+
+/***********************************************************/
+/* Bochs BIOS debug ports */
+
+void bochs_bios_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ static const char shutdown_str[8] = "Shutdown";
+ static int shutdown_index = 0;
+
+ switch(addr) {
+ /* Bochs BIOS messages */
+ case 0x400:
+ case 0x401:
+ fprintf(stderr, "BIOS panic at rombios.c, line %d\n", val);
+ exit(1);
+ case 0x402:
+ case 0x403:
+#ifdef DEBUG_BIOS
+ fprintf(stderr, "%c", val);
+#endif
+ break;
+ case 0x8900:
+ /* same as Bochs power off */
+ if (val == shutdown_str[shutdown_index]) {
+ shutdown_index++;
+ if (shutdown_index == 8) {
+ shutdown_index = 0;
+ qemu_system_shutdown_request();
+ }
+ } else {
+ shutdown_index = 0;
+ }
+ break;
+
+ /* LGPL'ed VGA BIOS messages */
+ case 0x501:
+ case 0x502:
+ fprintf(stderr, "VGA BIOS panic, line %d\n", val);
+ exit(1);
+ case 0x500:
+ case 0x503:
+#ifdef DEBUG_BIOS
+ fprintf(stderr, "%c", val);
+#endif
+ break;
+ }
+}
+
+void bochs_bios_init(void)
+{
+ register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
+ register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
+ register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL);
+ register_ioport_write(0x403, 1, 1, bochs_bios_write, NULL);
+ register_ioport_write(0x8900, 1, 1, bochs_bios_write, NULL);
+
+ register_ioport_write(0x501, 1, 2, bochs_bios_write, NULL);
+ register_ioport_write(0x502, 1, 2, bochs_bios_write, NULL);
+ register_ioport_write(0x500, 1, 1, bochs_bios_write, NULL);
+ register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
+}
+
+
+int load_kernel(const char *filename, uint8_t *addr,
+ uint8_t *real_addr)
+{
+ int fd, size;
+ int setup_sects;
+
+ fd = open(filename, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -1;
+
+ /* load 16 bit code */
+ if (read(fd, real_addr, 512) != 512)
+ goto fail;
+ setup_sects = real_addr[0x1F1];
+ if (!setup_sects)
+ setup_sects = 4;
+ if (read(fd, real_addr + 512, setup_sects * 512) !=
+ setup_sects * 512)
+ goto fail;
+
+ /* load 32 bit code */
+ size = read(fd, addr, 16 * 1024 * 1024);
+ if (size < 0)
+ goto fail;
+ close(fd);
+ return size;
+ fail:
+ close(fd);
+ return -1;
+}
+
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 14, 15 };
+
+#define NE2000_NB_MAX 6
+
+static int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
+static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
+
+static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
+
+#define NOBIOS 1
+
+/* PC hardware initialisation */
+void pc_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ char buf[1024];
+ int ret, linux_boot, initrd_size, i, nb_nics1;
+ unsigned long bios_offset, vga_bios_offset;
+ int bios_size, isa_bios_size;
+ PCIBus *pci_bus;
+
+ linux_boot = (kernel_filename != NULL);
+
+ /* allocate RAM */
+// cpu_register_physical_memory(0, ram_size, 0);
+
+#ifndef NOBIOS
+ /* BIOS load */
+ bios_offset = ram_size + vga_ram_size;
+ vga_bios_offset = bios_offset + 256 * 1024;
+
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, BIOS_FILENAME);
+ bios_size = get_image_size(buf);
+ if (bios_size <= 0 ||
+ (bios_size % 65536) != 0 ||
+ bios_size > (256 * 1024)) {
+ goto bios_error;
+ }
+ ret = load_image(buf, phys_ram_base + bios_offset);
+ if (ret != bios_size) {
+ bios_error:
+ fprintf(stderr, "qemu: could not load PC bios '%s'\n", buf);
+ exit(1);
+ }
+
+ /* VGA BIOS load */
+ if (cirrus_vga_enabled) {
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, VGABIOS_CIRRUS_FILENAME);
+ } else {
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, VGABIOS_FILENAME);
+ }
+ ret = load_image(buf, phys_ram_base + vga_bios_offset);
+#endif
+
+#ifndef NOBIOS
+ /* setup basic memory access */
+ cpu_register_physical_memory(0xc0000, 0x10000,
+ vga_bios_offset | IO_MEM_ROM);
+
+ /* map the last 128KB of the BIOS in ISA space */
+ isa_bios_size = bios_size;
+ if (isa_bios_size > (128 * 1024))
+ isa_bios_size = 128 * 1024;
+ cpu_register_physical_memory(0xd0000, (192 * 1024) - isa_bios_size,
+ IO_MEM_UNASSIGNED);
+ cpu_register_physical_memory(0x100000 - isa_bios_size,
+ isa_bios_size,
+ (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
+ /* map all the bios at the top of memory */
+ cpu_register_physical_memory((uint32_t)(-bios_size),
+ bios_size, bios_offset | IO_MEM_ROM);
+#endif
+
+ bochs_bios_init();
+
+ if (linux_boot) {
+ uint8_t bootsect[512];
+ uint8_t old_bootsect[512];
+
+ if (bs_table[0] == NULL) {
+ fprintf(stderr, "A disk image must be given for 'hda' when booting a Linux kernel\n");
+ exit(1);
+ }
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, LINUX_BOOT_FILENAME);
+ ret = load_image(buf, bootsect);
+ if (ret != sizeof(bootsect)) {
+ fprintf(stderr, "qemu: could not load linux boot sector '%s'\n",
+ buf);
+ exit(1);
+ }
+
+ if (bdrv_read(bs_table[0], 0, old_bootsect, 1) >= 0) {
+ /* copy the MSDOS partition table */
+ memcpy(bootsect + 0x1be, old_bootsect + 0x1be, 0x40);
+ }
+
+ bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
+
+ /* now we can load the kernel */
+ ret = load_kernel(kernel_filename,
+ phys_ram_base + KERNEL_LOAD_ADDR,
+ phys_ram_base + KERNEL_PARAMS_ADDR);
+ if (ret < 0) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ kernel_filename);
+ exit(1);
+ }
+
+ /* load initrd */
+ initrd_size = 0;
+ if (initrd_filename) {
+ initrd_size = load_image(initrd_filename, phys_ram_base + INITRD_LOAD_ADDR);
+ if (initrd_size < 0) {
+ fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+ }
+ if (initrd_size > 0) {
+ stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x218, INITRD_LOAD_ADDR);
+ stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x21c, initrd_size);
+ }
+ pstrcpy(phys_ram_base + KERNEL_CMDLINE_ADDR, 4096,
+ kernel_cmdline);
+ stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x20, 0xA33F);
+ stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x22,
+ KERNEL_CMDLINE_ADDR - KERNEL_PARAMS_ADDR);
+ /* loader type */
+ stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x210, 0x01);
+ }
+
+ if (pci_enabled) {
+ pci_bus = i440fx_init();
+ piix3_init(pci_bus);
+ } else {
+ pci_bus = NULL;
+ }
+
+ /* init basic PC hardware */
+ register_ioport_write(0x80, 1, 1, ioport80_write, NULL);
+
+ register_ioport_write(0xf0, 1, 1, ioportF0_write, NULL);
+
+ if (cirrus_vga_enabled) {
+ if (pci_enabled) {
+ pci_cirrus_vga_init(pci_bus,
+ ds, phys_ram_base + ram_size, ram_size,
+ vga_ram_size);
+ } else {
+ isa_cirrus_vga_init(ds, phys_ram_base + ram_size, ram_size,
+ vga_ram_size);
+ }
+ } else {
+ vga_initialize(pci_bus, ds, phys_ram_base + ram_size, ram_size,
+ vga_ram_size);
+ }
+
+ rtc_state = rtc_init(0x70, 8);
+ register_ioport_read(0x61, 1, 1, speaker_ioport_read, NULL);
+ register_ioport_write(0x61, 1, 1, speaker_ioport_write, NULL);
+
+ register_ioport_read(0x92, 1, 1, ioport92_read, NULL);
+ register_ioport_write(0x92, 1, 1, ioport92_write, NULL);
+
+ pic_init();
+ pit = pit_init(0x40, 0);
+
+ for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+ if (serial_hds[i]) {
+ serial_init(serial_io[i], serial_irq[i], serial_hds[i]);
+ }
+ }
+
+ if (pci_enabled) {
+ for(i = 0; i < nb_nics; i++) {
+ pci_ne2000_init(pci_bus, &nd_table[i]);
+ }
+ pci_piix3_ide_init(pci_bus, bs_table);
+ } else {
+ nb_nics1 = nb_nics;
+ if (nb_nics1 > NE2000_NB_MAX)
+ nb_nics1 = NE2000_NB_MAX;
+ for(i = 0; i < nb_nics1; i++) {
+ isa_ne2000_init(ne2000_io[i], ne2000_irq[i], &nd_table[i]);
+ }
+
+ for(i = 0; i < 2; i++) {
+ isa_ide_init(ide_iobase[i], ide_iobase2[i], ide_irq[i],
+ bs_table[2 * i], bs_table[2 * i + 1]);
+ }
+ }
+
+ kbd_init();
+ DMA_init(0);
+
+ floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table);
+
+ cmos_init(ram_size, boot_device, bs_table);
+
+ /* must be done after all PCI devices are instanciated */
+ /* XXX: should be done in the Bochs BIOS */
+ if (pci_enabled) {
+ pci_bios_init();
+ }
+ port_e9_init();
+}
diff --git a/tools/ioemu/hw/pci.c b/tools/ioemu/hw/pci.c
new file mode 100644
index 0000000000..2fed66f0a1
--- /dev/null
+++ b/tools/ioemu/hw/pci.c
@@ -0,0 +1,1524 @@
+/*
+ * QEMU PCI bus manager
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+//#define DEBUG_PCI
+
+#define PCI_VENDOR_ID 0x00 /* 16 bits */
+#define PCI_DEVICE_ID 0x02 /* 16 bits */
+#define PCI_COMMAND 0x04 /* 16 bits */
+#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */
+#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
+#define PCI_CLASS_DEVICE 0x0a /* Device class */
+#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
+#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
+#define PCI_MIN_GNT 0x3e /* 8 bits */
+#define PCI_MAX_LAT 0x3f /* 8 bits */
+
+/* just used for simpler irq handling. */
+#define PCI_DEVICES_MAX 64
+#define PCI_IRQ_WORDS ((PCI_DEVICES_MAX + 31) / 32)
+
+struct PCIBus {
+ int bus_num;
+ int devfn_min;
+ void (*set_irq)(PCIDevice *pci_dev, int irq_num, int level);
+ uint32_t config_reg; /* XXX: suppress */
+ openpic_t *openpic; /* XXX: suppress */
+ PCIDevice *devices[256];
+};
+
+target_phys_addr_t pci_mem_base;
+static int pci_irq_index;
+static uint32_t pci_irq_levels[4][PCI_IRQ_WORDS];
+static PCIBus *first_bus;
+
+static PCIBus *pci_register_bus(void)
+{
+ PCIBus *bus;
+ bus = qemu_mallocz(sizeof(PCIBus));
+ first_bus = bus;
+ return bus;
+}
+
+void generic_pci_save(QEMUFile* f, void *opaque)
+{
+ PCIDevice* s=(PCIDevice*)opaque;
+
+ qemu_put_buffer(f, s->config, 256);
+}
+
+int generic_pci_load(QEMUFile* f, void *opaque, int version_id)
+{
+ PCIDevice* s=(PCIDevice*)opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_buffer(f, s->config, 256);
+ return 0;
+}
+
+/* -1 for devfn means auto assign */
+PCIDevice *pci_register_device(PCIBus *bus, const char *name,
+ int instance_size, int devfn,
+ PCIConfigReadFunc *config_read,
+ PCIConfigWriteFunc *config_write)
+{
+ PCIDevice *pci_dev;
+
+ if (pci_irq_index >= PCI_DEVICES_MAX)
+ return NULL;
+
+ if (devfn < 0) {
+ for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) {
+ if (!bus->devices[devfn])
+ goto found;
+ }
+ return NULL;
+ found: ;
+ }
+ pci_dev = qemu_mallocz(instance_size);
+ if (!pci_dev)
+ return NULL;
+ pci_dev->bus = bus;
+ pci_dev->devfn = devfn;
+ pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
+
+ if (!config_read)
+ config_read = pci_default_read_config;
+ if (!config_write)
+ config_write = pci_default_write_config;
+ pci_dev->config_read = config_read;
+ pci_dev->config_write = config_write;
+ pci_dev->irq_index = pci_irq_index++;
+ bus->devices[devfn] = pci_dev;
+ return pci_dev;
+}
+
+void pci_register_io_region(PCIDevice *pci_dev, int region_num,
+ uint32_t size, int type,
+ PCIMapIORegionFunc *map_func)
+{
+ PCIIORegion *r;
+
+ if ((unsigned int)region_num >= PCI_NUM_REGIONS)
+ return;
+ r = &pci_dev->io_regions[region_num];
+ r->addr = -1;
+ r->size = size;
+ r->type = type;
+ r->map_func = map_func;
+}
+
+static void pci_addr_writel(void* opaque, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = opaque;
+ s->config_reg = val;
+}
+
+static uint32_t pci_addr_readl(void* opaque, uint32_t addr)
+{
+ PCIBus *s = opaque;
+ return s->config_reg;
+}
+
+static void pci_update_mappings(PCIDevice *d)
+{
+ PCIIORegion *r;
+ int cmd, i;
+ uint32_t last_addr, new_addr, config_ofs;
+
+ cmd = le16_to_cpu(*(uint16_t *)(d->config + PCI_COMMAND));
+ for(i = 0; i < PCI_NUM_REGIONS; i++) {
+ r = &d->io_regions[i];
+ if (i == PCI_ROM_SLOT) {
+ config_ofs = 0x30;
+ } else {
+ config_ofs = 0x10 + i * 4;
+ }
+ if (r->size != 0) {
+ if (r->type & PCI_ADDRESS_SPACE_IO) {
+ if (cmd & PCI_COMMAND_IO) {
+ new_addr = le32_to_cpu(*(uint32_t *)(d->config +
+ config_ofs));
+ new_addr = new_addr & ~(r->size - 1);
+ last_addr = new_addr + r->size - 1;
+ /* NOTE: we have only 64K ioports on PC */
+ if (last_addr <= new_addr || new_addr == 0 ||
+ last_addr >= 0x10000) {
+ new_addr = -1;
+ }
+ } else {
+ new_addr = -1;
+ }
+ } else {
+ if (cmd & PCI_COMMAND_MEMORY) {
+ new_addr = le32_to_cpu(*(uint32_t *)(d->config +
+ config_ofs));
+ /* the ROM slot has a specific enable bit */
+ if (i == PCI_ROM_SLOT && !(new_addr & 1))
+ goto no_mem_map;
+ new_addr = new_addr & ~(r->size - 1);
+ last_addr = new_addr + r->size - 1;
+ /* NOTE: we do not support wrapping */
+ /* XXX: as we cannot support really dynamic
+ mappings, we handle specific values as invalid
+ mappings. */
+ if (last_addr <= new_addr || new_addr == 0 ||
+ last_addr == -1) {
+ new_addr = -1;
+ }
+ } else {
+ no_mem_map:
+ new_addr = -1;
+ }
+ }
+ /* now do the real mapping */
+ if (new_addr != r->addr) {
+ if (r->addr != -1) {
+ if (r->type & PCI_ADDRESS_SPACE_IO) {
+ int class;
+ /* NOTE: specific hack for IDE in PC case:
+ only one byte must be mapped. */
+ class = d->config[0x0a] | (d->config[0x0b] << 8);
+ if (class == 0x0101 && r->size == 4) {
+ isa_unassign_ioport(r->addr + 2, 1);
+ } else {
+ isa_unassign_ioport(r->addr, r->size);
+ }
+ } else {
+ cpu_register_physical_memory(r->addr + pci_mem_base,
+ r->size,
+ IO_MEM_UNASSIGNED);
+ }
+ }
+ r->addr = new_addr;
+ if (r->addr != -1) {
+ r->map_func(d, i, r->addr, r->size, r->type);
+ }
+ }
+ }
+ }
+}
+
+uint32_t pci_default_read_config(PCIDevice *d,
+ uint32_t address, int len)
+{
+ uint32_t val;
+ switch(len) {
+ case 1:
+ val = d->config[address];
+ break;
+ case 2:
+ val = le16_to_cpu(*(uint16_t *)(d->config + address));
+ break;
+ default:
+ case 4:
+ val = le32_to_cpu(*(uint32_t *)(d->config + address));
+ break;
+ }
+ return val;
+}
+
+void pci_default_write_config(PCIDevice *d,
+ uint32_t address, uint32_t val, int len)
+{
+ int can_write, i;
+ uint32_t end, addr;
+
+ if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
+ (address >= 0x30 && address < 0x34))) {
+ PCIIORegion *r;
+ int reg;
+
+ if ( address >= 0x30 ) {
+ reg = PCI_ROM_SLOT;
+ }else{
+ reg = (address - 0x10) >> 2;
+ }
+ r = &d->io_regions[reg];
+ if (r->size == 0)
+ goto default_config;
+ /* compute the stored value */
+ if (reg == PCI_ROM_SLOT) {
+ /* keep ROM enable bit */
+ val &= (~(r->size - 1)) | 1;
+ } else {
+ val &= ~(r->size - 1);
+ val |= r->type;
+ }
+ *(uint32_t *)(d->config + address) = cpu_to_le32(val);
+ pci_update_mappings(d);
+ return;
+ }
+ default_config:
+ /* not efficient, but simple */
+ addr = address;
+ for(i = 0; i < len; i++) {
+ /* default read/write accesses */
+ switch(d->config[0x0e]) {
+ case 0x00:
+ case 0x80:
+ switch(addr) {
+ case 0x00:
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x08:
+ case 0x09:
+ case 0x0a:
+ case 0x0b:
+ case 0x0e:
+ case 0x10 ... 0x27: /* base */
+ case 0x30 ... 0x33: /* rom */
+ case 0x3d:
+ can_write = 0;
+ break;
+ default:
+ can_write = 1;
+ break;
+ }
+ break;
+ default:
+ case 0x01:
+ switch(addr) {
+ case 0x00:
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x08:
+ case 0x09:
+ case 0x0a:
+ case 0x0b:
+ case 0x0e:
+ case 0x38 ... 0x3b: /* rom */
+ case 0x3d:
+ can_write = 0;
+ break;
+ default:
+ can_write = 1;
+ break;
+ }
+ break;
+ }
+ if (can_write) {
+ d->config[addr] = val;
+ }
+ addr++;
+ val >>= 8;
+ }
+
+ end = address + len;
+ if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
+ /* if the command register is modified, we must modify the mappings */
+ pci_update_mappings(d);
+ }
+}
+
+static void pci_data_write(void *opaque, uint32_t addr,
+ uint32_t val, int len)
+{
+ PCIBus *s = opaque;
+ PCIDevice *pci_dev;
+ int config_addr, bus_num;
+
+#if defined(DEBUG_PCI) && 0
+ printf("pci_data_write: addr=%08x val=%08x len=%d\n",
+ s->config_reg, val, len);
+#endif
+ if (!(s->config_reg & (1 << 31))) {
+ return;
+ }
+ if ((s->config_reg & 0x3) != 0) {
+ return;
+ }
+ bus_num = (s->config_reg >> 16) & 0xff;
+ if (bus_num != 0)
+ return;
+ pci_dev = s->devices[(s->config_reg >> 8) & 0xff];
+ if (!pci_dev)
+ return;
+ config_addr = (s->config_reg & 0xfc) | (addr & 3);
+#if defined(DEBUG_PCI)
+ printf("pci_config_write: %s: addr=%02x val=%08x len=%d\n",
+ pci_dev->name, config_addr, val, len);
+#endif
+ pci_dev->config_write(pci_dev, config_addr, val, len);
+}
+
+static uint32_t pci_data_read(void *opaque, uint32_t addr,
+ int len)
+{
+ PCIBus *s = opaque;
+ PCIDevice *pci_dev;
+ int config_addr, bus_num;
+ uint32_t val;
+
+ if (!(s->config_reg & (1 << 31)))
+ goto fail;
+ if ((s->config_reg & 0x3) != 0)
+ goto fail;
+ bus_num = (s->config_reg >> 16) & 0xff;
+ if (bus_num != 0)
+ goto fail;
+ pci_dev = s->devices[(s->config_reg >> 8) & 0xff];
+ if (!pci_dev) {
+ fail:
+ switch(len) {
+ case 1:
+ val = 0xff;
+ break;
+ case 2:
+ val = 0xffff;
+ break;
+ default:
+ case 4:
+ val = 0xffffffff;
+ break;
+ }
+ goto the_end;
+ }
+ config_addr = (s->config_reg & 0xfc) | (addr & 3);
+ val = pci_dev->config_read(pci_dev, config_addr, len);
+#if defined(DEBUG_PCI)
+ printf("pci_config_read: %s: addr=%02x val=%08x len=%d\n",
+ pci_dev->name, config_addr, val, len);
+#endif
+ the_end:
+#if defined(DEBUG_PCI) && 0
+ printf("pci_data_read: addr=%08x val=%08x len=%d\n",
+ s->config_reg, val, len);
+#endif
+ return val;
+}
+
+static void pci_data_writeb(void* opaque, uint32_t addr, uint32_t val)
+{
+ pci_data_write(opaque, addr, val, 1);
+}
+
+static void pci_data_writew(void* opaque, uint32_t addr, uint32_t val)
+{
+ pci_data_write(opaque, addr, val, 2);
+}
+
+static void pci_data_writel(void* opaque, uint32_t addr, uint32_t val)
+{
+ pci_data_write(opaque, addr, val, 4);
+}
+
+static uint32_t pci_data_readb(void* opaque, uint32_t addr)
+{
+ return pci_data_read(opaque, addr, 1);
+}
+
+static uint32_t pci_data_readw(void* opaque, uint32_t addr)
+{
+ return pci_data_read(opaque, addr, 2);
+}
+
+static uint32_t pci_data_readl(void* opaque, uint32_t addr)
+{
+ return pci_data_read(opaque, addr, 4);
+}
+
+/* i440FX PCI bridge */
+
+static void piix3_set_irq(PCIDevice *pci_dev, int irq_num, int level);
+
+PCIBus *i440fx_init(void)
+{
+ PCIBus *s;
+ PCIDevice *d;
+
+ s = pci_register_bus();
+ s->set_irq = piix3_set_irq;
+
+ register_ioport_write(0xcf8, 4, 4, pci_addr_writel, s);
+ register_ioport_read(0xcf8, 4, 4, pci_addr_readl, s);
+
+ register_ioport_write(0xcfc, 4, 1, pci_data_writeb, s);
+ register_ioport_write(0xcfc, 4, 2, pci_data_writew, s);
+ register_ioport_write(0xcfc, 4, 4, pci_data_writel, s);
+ register_ioport_read(0xcfc, 4, 1, pci_data_readb, s);
+ register_ioport_read(0xcfc, 4, 2, pci_data_readw, s);
+ register_ioport_read(0xcfc, 4, 4, pci_data_readl, s);
+
+ d = pci_register_device(s, "i440FX", sizeof(PCIDevice), 0,
+ NULL, NULL);
+
+ d->config[0x00] = 0x86; // vendor_id
+ d->config[0x01] = 0x80;
+ d->config[0x02] = 0x37; // device_id
+ d->config[0x03] = 0x12;
+ d->config[0x08] = 0x02; // revision
+ d->config[0x0a] = 0x00; // class_sub = host2pci
+ d->config[0x0b] = 0x06; // class_base = PCI_bridge
+ d->config[0x0e] = 0x00; // header_type
+ return s;
+}
+
+/* PIIX3 PCI to ISA bridge */
+
+typedef struct PIIX3State {
+ PCIDevice dev;
+} PIIX3State;
+
+PIIX3State *piix3_state;
+
+/* return the global irq number corresponding to a given device irq
+ pin. We could also use the bus number to have a more precise
+ mapping. */
+static inline int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+{
+ int slot_addend;
+ slot_addend = (pci_dev->devfn >> 3);
+ return (irq_num + slot_addend) & 3;
+}
+
+static void piix3_set_irq(PCIDevice *pci_dev, int irq_num, int level)
+{
+ int irq_index, shift, pic_irq, pic_level;
+ uint32_t *p;
+
+ irq_num = pci_slot_get_pirq(pci_dev, irq_num);
+ irq_index = pci_dev->irq_index;
+ p = &pci_irq_levels[irq_num][irq_index >> 5];
+ shift = (irq_index & 0x1f);
+ *p = (*p & ~(1 << shift)) | (level << shift);
+
+ /* now we change the pic irq level according to the piix irq mappings */
+ pic_irq = piix3_state->dev.config[0x60 + irq_num];
+ if (pic_irq < 16) {
+ /* the pic level is the logical OR of all the PCI irqs mapped
+ to it */
+ pic_level = 0;
+#if (PCI_IRQ_WORDS == 2)
+ pic_level = ((pci_irq_levels[irq_num][0] |
+ pci_irq_levels[irq_num][1]) != 0);
+#else
+ {
+ int i;
+ pic_level = 0;
+ for(i = 0; i < PCI_IRQ_WORDS; i++) {
+ if (pci_irq_levels[irq_num][i]) {
+ pic_level = 1;
+ break;
+ }
+ }
+ }
+#endif
+ pic_set_irq(pic_irq, pic_level);
+ }
+}
+
+static void piix3_reset(PIIX3State *d)
+{
+ uint8_t *pci_conf = d->dev.config;
+
+ pci_conf[0x04] = 0x07; // master, memory and I/O
+ pci_conf[0x05] = 0x00;
+ pci_conf[0x06] = 0x00;
+ pci_conf[0x07] = 0x02; // PCI_status_devsel_medium
+ pci_conf[0x4c] = 0x4d;
+ pci_conf[0x4e] = 0x03;
+ pci_conf[0x4f] = 0x00;
+ pci_conf[0x60] = 0x80;
+ pci_conf[0x69] = 0x02;
+ pci_conf[0x70] = 0x80;
+ pci_conf[0x76] = 0x0c;
+ pci_conf[0x77] = 0x0c;
+ pci_conf[0x78] = 0x02;
+ pci_conf[0x79] = 0x00;
+ pci_conf[0x80] = 0x00;
+ pci_conf[0x82] = 0x00;
+ pci_conf[0xa0] = 0x08;
+ pci_conf[0xa0] = 0x08;
+ pci_conf[0xa2] = 0x00;
+ pci_conf[0xa3] = 0x00;
+ pci_conf[0xa4] = 0x00;
+ pci_conf[0xa5] = 0x00;
+ pci_conf[0xa6] = 0x00;
+ pci_conf[0xa7] = 0x00;
+ pci_conf[0xa8] = 0x0f;
+ pci_conf[0xaa] = 0x00;
+ pci_conf[0xab] = 0x00;
+ pci_conf[0xac] = 0x00;
+ pci_conf[0xae] = 0x00;
+}
+
+void piix3_init(PCIBus *bus)
+{
+ PIIX3State *d;
+ uint8_t *pci_conf;
+
+ d = (PIIX3State *)pci_register_device(bus, "PIIX3", sizeof(PIIX3State),
+ -1, NULL, NULL);
+ register_savevm("PIIX3", 0, 1, generic_pci_save, generic_pci_load, d);
+
+ piix3_state = d;
+ pci_conf = d->dev.config;
+
+ pci_conf[0x00] = 0x86; // Intel
+ pci_conf[0x01] = 0x80;
+ pci_conf[0x02] = 0x00; // 82371SB PIIX3 PCI-to-ISA bridge (Step A1)
+ pci_conf[0x03] = 0x70;
+ pci_conf[0x0a] = 0x01; // class_sub = PCI_ISA
+ pci_conf[0x0b] = 0x06; // class_base = PCI_bridge
+ pci_conf[0x0e] = 0x80; // header_type = PCI_multifunction, generic
+
+ piix3_reset(d);
+}
+
+/* PREP pci init */
+
+static inline void set_config(PCIBus *s, target_phys_addr_t addr)
+{
+ int devfn, i;
+
+ for(i = 0; i < 11; i++) {
+ if ((addr & (1 << (11 + i))) != 0)
+ break;
+ }
+ devfn = ((addr >> 8) & 7) | (i << 3);
+ s->config_reg = 0x80000000 | (addr & 0xfc) | (devfn << 8);
+}
+
+static void PPC_PCIIO_writeb (void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PCIBus *s = opaque;
+ set_config(s, addr);
+ pci_data_write(s, addr, val, 1);
+}
+
+static void PPC_PCIIO_writew (void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PCIBus *s = opaque;
+ set_config(s, addr);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ pci_data_write(s, addr, val, 2);
+}
+
+static void PPC_PCIIO_writel (void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ PCIBus *s = opaque;
+ set_config(s, addr);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ pci_data_write(s, addr, val, 4);
+}
+
+static uint32_t PPC_PCIIO_readb (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ set_config(s, addr);
+ val = pci_data_read(s, addr, 1);
+ return val;
+}
+
+static uint32_t PPC_PCIIO_readw (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ set_config(s, addr);
+ val = pci_data_read(s, addr, 2);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ return val;
+}
+
+static uint32_t PPC_PCIIO_readl (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ set_config(s, addr);
+ val = pci_data_read(s, addr, 4);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ return val;
+}
+
+static CPUWriteMemoryFunc *PPC_PCIIO_write[] = {
+ &PPC_PCIIO_writeb,
+ &PPC_PCIIO_writew,
+ &PPC_PCIIO_writel,
+};
+
+static CPUReadMemoryFunc *PPC_PCIIO_read[] = {
+ &PPC_PCIIO_readb,
+ &PPC_PCIIO_readw,
+ &PPC_PCIIO_readl,
+};
+
+static void prep_set_irq(PCIDevice *d, int irq_num, int level)
+{
+ /* XXX: we do not simulate the hardware - we rely on the BIOS to
+ set correctly for irq line field */
+ pic_set_irq(d->config[PCI_INTERRUPT_LINE], level);
+}
+
+PCIBus *pci_prep_init(void)
+{
+ PCIBus *s;
+ PCIDevice *d;
+ int PPC_io_memory;
+
+ s = pci_register_bus();
+ s->set_irq = prep_set_irq;
+
+ PPC_io_memory = cpu_register_io_memory(0, PPC_PCIIO_read,
+ PPC_PCIIO_write, s);
+ cpu_register_physical_memory(0x80800000, 0x00400000, PPC_io_memory);
+
+ d = pci_register_device(s, "PREP PCI Bridge", sizeof(PCIDevice), 0,
+ NULL, NULL);
+
+ /* XXX: put correct IDs */
+ d->config[0x00] = 0x11; // vendor_id
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x26; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x02; // revision
+ d->config[0x0a] = 0x04; // class_sub = pci2pci
+ d->config[0x0b] = 0x06; // class_base = PCI_bridge
+ d->config[0x0e] = 0x01; // header_type
+ return s;
+}
+
+
+/* pmac pci init */
+
+#if 0
+/* Grackle PCI host */
+static void pci_grackle_config_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ s->config_reg = val;
+}
+
+static uint32_t pci_grackle_config_readl (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = s->config_reg;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_grackle_config_write[] = {
+ &pci_grackle_config_writel,
+ &pci_grackle_config_writel,
+ &pci_grackle_config_writel,
+};
+
+static CPUReadMemoryFunc *pci_grackle_config_read[] = {
+ &pci_grackle_config_readl,
+ &pci_grackle_config_readl,
+ &pci_grackle_config_readl,
+};
+
+static void pci_grackle_writeb (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+ pci_data_write(s, addr, val, 1);
+}
+
+static void pci_grackle_writew (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ pci_data_write(s, addr, val, 2);
+}
+
+static void pci_grackle_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ pci_data_write(s, addr, val, 4);
+}
+
+static uint32_t pci_grackle_readb (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ val = pci_data_read(s, addr, 1);
+ return val;
+}
+
+static uint32_t pci_grackle_readw (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ val = pci_data_read(s, addr, 2);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ return val;
+}
+
+static uint32_t pci_grackle_readl (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr, 4);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_grackle_write[] = {
+ &pci_grackle_writeb,
+ &pci_grackle_writew,
+ &pci_grackle_writel,
+};
+
+static CPUReadMemoryFunc *pci_grackle_read[] = {
+ &pci_grackle_readb,
+ &pci_grackle_readw,
+ &pci_grackle_readl,
+};
+#endif
+
+/* Uninorth PCI host (for all Mac99 and newer machines */
+static void pci_unin_main_config_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+ int i;
+
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+
+ for (i = 11; i < 32; i++) {
+ if ((val & (1 << i)) != 0)
+ break;
+ }
+#if 0
+ s->config_reg = 0x80000000 | (1 << 16) | (val & 0x7FC) | (i << 11);
+#else
+ s->config_reg = 0x80000000 | (0 << 16) | (val & 0x7FC) | (i << 11);
+#endif
+}
+
+static uint32_t pci_unin_main_config_readl (void *opaque,
+ target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+ int devfn;
+
+ devfn = (s->config_reg >> 8) & 0xFF;
+ val = (1 << (devfn >> 3)) | ((devfn & 0x07) << 8) | (s->config_reg & 0xFC);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_unin_main_config_write[] = {
+ &pci_unin_main_config_writel,
+ &pci_unin_main_config_writel,
+ &pci_unin_main_config_writel,
+};
+
+static CPUReadMemoryFunc *pci_unin_main_config_read[] = {
+ &pci_unin_main_config_readl,
+ &pci_unin_main_config_readl,
+ &pci_unin_main_config_readl,
+};
+
+static void pci_unin_main_writeb (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+ pci_data_write(s, addr & 7, val, 1);
+}
+
+static void pci_unin_main_writew (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ pci_data_write(s, addr & 7, val, 2);
+}
+
+static void pci_unin_main_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ pci_data_write(s, addr & 7, val, 4);
+}
+
+static uint32_t pci_unin_main_readb (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr & 7, 1);
+
+ return val;
+}
+
+static uint32_t pci_unin_main_readw (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr & 7, 2);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+
+ return val;
+}
+
+static uint32_t pci_unin_main_readl (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr, 4);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_unin_main_write[] = {
+ &pci_unin_main_writeb,
+ &pci_unin_main_writew,
+ &pci_unin_main_writel,
+};
+
+static CPUReadMemoryFunc *pci_unin_main_read[] = {
+ &pci_unin_main_readb,
+ &pci_unin_main_readw,
+ &pci_unin_main_readl,
+};
+
+#if 0
+
+static void pci_unin_config_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ s->config_reg = 0x80000000 | (val & ~0x00000001);
+}
+
+static uint32_t pci_unin_config_readl (void *opaque,
+ target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = (s->config_reg | 0x00000001) & ~0x80000000;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_unin_config_write[] = {
+ &pci_unin_config_writel,
+ &pci_unin_config_writel,
+ &pci_unin_config_writel,
+};
+
+static CPUReadMemoryFunc *pci_unin_config_read[] = {
+ &pci_unin_config_readl,
+ &pci_unin_config_readl,
+ &pci_unin_config_readl,
+};
+
+static void pci_unin_writeb (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+ pci_data_write(s, addr & 3, val, 1);
+}
+
+static void pci_unin_writew (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+ pci_data_write(s, addr & 3, val, 2);
+}
+
+static void pci_unin_writel (void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PCIBus *s = opaque;
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+ pci_data_write(s, addr & 3, val, 4);
+}
+
+static uint32_t pci_unin_readb (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr & 3, 1);
+
+ return val;
+}
+
+static uint32_t pci_unin_readw (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr & 3, 2);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap16(val);
+#endif
+
+ return val;
+}
+
+static uint32_t pci_unin_readl (void *opaque, target_phys_addr_t addr)
+{
+ PCIBus *s = opaque;
+ uint32_t val;
+
+ val = pci_data_read(s, addr & 3, 4);
+#ifdef TARGET_WORDS_BIGENDIAN
+ val = bswap32(val);
+#endif
+
+ return val;
+}
+
+static CPUWriteMemoryFunc *pci_unin_write[] = {
+ &pci_unin_writeb,
+ &pci_unin_writew,
+ &pci_unin_writel,
+};
+
+static CPUReadMemoryFunc *pci_unin_read[] = {
+ &pci_unin_readb,
+ &pci_unin_readw,
+ &pci_unin_readl,
+};
+#endif
+
+static void pmac_set_irq(PCIDevice *d, int irq_num, int level)
+{
+ openpic_t *openpic;
+ /* XXX: we do not simulate the hardware - we rely on the BIOS to
+ set correctly for irq line field */
+ openpic = d->bus->openpic;
+#ifdef TARGET_PPC
+ if (openpic)
+ openpic_set_irq(openpic, d->config[PCI_INTERRUPT_LINE], level);
+#endif
+}
+
+void pci_pmac_set_openpic(PCIBus *bus, openpic_t *openpic)
+{
+ bus->openpic = openpic;
+}
+
+PCIBus *pci_pmac_init(void)
+{
+ PCIBus *s;
+ PCIDevice *d;
+ int pci_mem_config, pci_mem_data;
+
+ /* Use values found on a real PowerMac */
+ /* Uninorth main bus */
+ s = pci_register_bus();
+ s->set_irq = pmac_set_irq;
+
+ pci_mem_config = cpu_register_io_memory(0, pci_unin_main_config_read,
+ pci_unin_main_config_write, s);
+ pci_mem_data = cpu_register_io_memory(0, pci_unin_main_read,
+ pci_unin_main_write, s);
+ cpu_register_physical_memory(0xf2800000, 0x1000, pci_mem_config);
+ cpu_register_physical_memory(0xf2c00000, 0x1000, pci_mem_data);
+ s->devfn_min = 11 << 3;
+ d = pci_register_device(s, "Uni-north main", sizeof(PCIDevice),
+ 11 << 3, NULL, NULL);
+ d->config[0x00] = 0x6b; // vendor_id : Apple
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x1F; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x00; // revision
+ d->config[0x0A] = 0x00; // class_sub = pci host
+ d->config[0x0B] = 0x06; // class_base = PCI_bridge
+ d->config[0x0C] = 0x08; // cache_line_size
+ d->config[0x0D] = 0x10; // latency_timer
+ d->config[0x0E] = 0x00; // header_type
+ d->config[0x34] = 0x00; // capabilities_pointer
+
+#if 0 // XXX: not activated as PPC BIOS doesn't handle mutiple buses properly
+ /* pci-to-pci bridge */
+ d = pci_register_device("Uni-north bridge", sizeof(PCIDevice), 0, 13 << 3,
+ NULL, NULL);
+ d->config[0x00] = 0x11; // vendor_id : TI
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x26; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x05; // revision
+ d->config[0x0A] = 0x04; // class_sub = pci2pci
+ d->config[0x0B] = 0x06; // class_base = PCI_bridge
+ d->config[0x0C] = 0x08; // cache_line_size
+ d->config[0x0D] = 0x20; // latency_timer
+ d->config[0x0E] = 0x01; // header_type
+
+ d->config[0x18] = 0x01; // primary_bus
+ d->config[0x19] = 0x02; // secondary_bus
+ d->config[0x1A] = 0x02; // subordinate_bus
+ d->config[0x1B] = 0x20; // secondary_latency_timer
+ d->config[0x1C] = 0x11; // io_base
+ d->config[0x1D] = 0x01; // io_limit
+ d->config[0x20] = 0x00; // memory_base
+ d->config[0x21] = 0x80;
+ d->config[0x22] = 0x00; // memory_limit
+ d->config[0x23] = 0x80;
+ d->config[0x24] = 0x01; // prefetchable_memory_base
+ d->config[0x25] = 0x80;
+ d->config[0x26] = 0xF1; // prefectchable_memory_limit
+ d->config[0x27] = 0x7F;
+ // d->config[0x34] = 0xdc // capabilities_pointer
+#endif
+#if 0 // XXX: not needed for now
+ /* Uninorth AGP bus */
+ s = &pci_bridge[1];
+ pci_mem_config = cpu_register_io_memory(0, pci_unin_config_read,
+ pci_unin_config_write, s);
+ pci_mem_data = cpu_register_io_memory(0, pci_unin_read,
+ pci_unin_write, s);
+ cpu_register_physical_memory(0xf0800000, 0x1000, pci_mem_config);
+ cpu_register_physical_memory(0xf0c00000, 0x1000, pci_mem_data);
+
+ d = pci_register_device("Uni-north AGP", sizeof(PCIDevice), 0, 11 << 3,
+ NULL, NULL);
+ d->config[0x00] = 0x6b; // vendor_id : Apple
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x20; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x00; // revision
+ d->config[0x0A] = 0x00; // class_sub = pci host
+ d->config[0x0B] = 0x06; // class_base = PCI_bridge
+ d->config[0x0C] = 0x08; // cache_line_size
+ d->config[0x0D] = 0x10; // latency_timer
+ d->config[0x0E] = 0x00; // header_type
+ // d->config[0x34] = 0x80; // capabilities_pointer
+#endif
+
+#if 0 // XXX: not needed for now
+ /* Uninorth internal bus */
+ s = &pci_bridge[2];
+ pci_mem_config = cpu_register_io_memory(0, pci_unin_config_read,
+ pci_unin_config_write, s);
+ pci_mem_data = cpu_register_io_memory(0, pci_unin_read,
+ pci_unin_write, s);
+ cpu_register_physical_memory(0xf4800000, 0x1000, pci_mem_config);
+ cpu_register_physical_memory(0xf4c00000, 0x1000, pci_mem_data);
+
+ d = pci_register_device("Uni-north internal", sizeof(PCIDevice),
+ 3, 11 << 3, NULL, NULL);
+ d->config[0x00] = 0x6b; // vendor_id : Apple
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x1E; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x00; // revision
+ d->config[0x0A] = 0x00; // class_sub = pci host
+ d->config[0x0B] = 0x06; // class_base = PCI_bridge
+ d->config[0x0C] = 0x08; // cache_line_size
+ d->config[0x0D] = 0x10; // latency_timer
+ d->config[0x0E] = 0x00; // header_type
+ d->config[0x34] = 0x00; // capabilities_pointer
+#endif
+
+#if 0 // Grackle ?
+ /* same values as PearPC - check this */
+ d->config[0x00] = 0x11; // vendor_id
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x26; // device_id
+ d->config[0x03] = 0x00;
+ d->config[0x08] = 0x02; // revision
+ d->config[0x0a] = 0x04; // class_sub = pci2pci
+ d->config[0x0b] = 0x06; // class_base = PCI_bridge
+ d->config[0x0e] = 0x01; // header_type
+
+ d->config[0x18] = 0x0; // primary_bus
+ d->config[0x19] = 0x1; // secondary_bus
+ d->config[0x1a] = 0x1; // subordinate_bus
+ d->config[0x1c] = 0x10; // io_base
+ d->config[0x1d] = 0x20; // io_limit
+
+ d->config[0x20] = 0x80; // memory_base
+ d->config[0x21] = 0x80;
+ d->config[0x22] = 0x90; // memory_limit
+ d->config[0x23] = 0x80;
+
+ d->config[0x24] = 0x00; // prefetchable_memory_base
+ d->config[0x25] = 0x84;
+ d->config[0x26] = 0x00; // prefetchable_memory_limit
+ d->config[0x27] = 0x85;
+#endif
+ return s;
+}
+
+/***********************************************************/
+/* generic PCI irq support */
+
+/* 0 <= irq_num <= 3. level must be 0 or 1 */
+void pci_set_irq(PCIDevice *pci_dev, int irq_num, int level)
+{
+ PCIBus *bus = pci_dev->bus;
+ bus->set_irq(pci_dev, irq_num, level);
+}
+
+/***********************************************************/
+/* monitor info on PCI */
+
+static void pci_info_device(PCIDevice *d)
+{
+ int i, class;
+ PCIIORegion *r;
+
+ term_printf(" Bus %2d, device %3d, function %d:\n",
+ d->bus->bus_num, d->devfn >> 3, d->devfn & 7);
+ class = le16_to_cpu(*((uint16_t *)(d->config + PCI_CLASS_DEVICE)));
+ term_printf(" ");
+ switch(class) {
+ case 0x0101:
+ term_printf("IDE controller");
+ break;
+ case 0x0200:
+ term_printf("Ethernet controller");
+ break;
+ case 0x0300:
+ term_printf("VGA controller");
+ break;
+ default:
+ term_printf("Class %04x", class);
+ break;
+ }
+ term_printf(": PCI device %04x:%04x\n",
+ le16_to_cpu(*((uint16_t *)(d->config + PCI_VENDOR_ID))),
+ le16_to_cpu(*((uint16_t *)(d->config + PCI_DEVICE_ID))));
+
+ if (d->config[PCI_INTERRUPT_PIN] != 0) {
+ term_printf(" IRQ %d.\n", d->config[PCI_INTERRUPT_LINE]);
+ }
+ for(i = 0;i < PCI_NUM_REGIONS; i++) {
+ r = &d->io_regions[i];
+ if (r->size != 0) {
+ term_printf(" BAR%d: ", i);
+ if (r->type & PCI_ADDRESS_SPACE_IO) {
+ term_printf("I/O at 0x%04x [0x%04x].\n",
+ r->addr, r->addr + r->size - 1);
+ } else {
+ term_printf("32 bit memory at 0x%08x [0x%08x].\n",
+ r->addr, r->addr + r->size - 1);
+ }
+ }
+ }
+}
+
+void pci_info(void)
+{
+ PCIBus *bus = first_bus;
+ PCIDevice *d;
+ int devfn;
+
+ if (bus) {
+ for(devfn = 0; devfn < 256; devfn++) {
+ d = bus->devices[devfn];
+ if (d)
+ pci_info_device(d);
+ }
+ }
+}
+
+/***********************************************************/
+/* XXX: the following should be moved to the PC BIOS */
+
+static __attribute__((unused)) uint32_t isa_inb(uint32_t addr)
+{
+ return cpu_inb(cpu_single_env, addr);
+}
+
+static void isa_outb(uint32_t val, uint32_t addr)
+{
+ cpu_outb(cpu_single_env, addr, val);
+}
+
+static __attribute__((unused)) uint32_t isa_inw(uint32_t addr)
+{
+ return cpu_inw(cpu_single_env, addr);
+}
+
+static __attribute__((unused)) void isa_outw(uint32_t val, uint32_t addr)
+{
+ cpu_outw(cpu_single_env, addr, val);
+}
+
+static __attribute__((unused)) uint32_t isa_inl(uint32_t addr)
+{
+ return cpu_inl(cpu_single_env, addr);
+}
+
+static __attribute__((unused)) void isa_outl(uint32_t val, uint32_t addr)
+{
+ cpu_outl(cpu_single_env, addr, val);
+}
+
+static void pci_config_writel(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | addr;
+ pci_data_write(s, 0, val, 4);
+}
+
+static void pci_config_writew(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | (addr & ~3);
+ pci_data_write(s, addr & 3, val, 2);
+}
+
+static void pci_config_writeb(PCIDevice *d, uint32_t addr, uint32_t val)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | (addr & ~3);
+ pci_data_write(s, addr & 3, val, 1);
+}
+
+static __attribute__((unused)) uint32_t pci_config_readl(PCIDevice *d, uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | addr;
+ return pci_data_read(s, 0, 4);
+}
+
+static uint32_t pci_config_readw(PCIDevice *d, uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | (addr & ~3);
+ return pci_data_read(s, addr & 3, 2);
+}
+
+static uint32_t pci_config_readb(PCIDevice *d, uint32_t addr)
+{
+ PCIBus *s = d->bus;
+ s->config_reg = 0x80000000 | (s->bus_num << 16) |
+ (d->devfn << 8) | (addr & ~3);
+ return pci_data_read(s, addr & 3, 1);
+}
+
+static uint32_t pci_bios_io_addr;
+static uint32_t pci_bios_mem_addr;
+/* host irqs corresponding to PCI irqs A-D */
+static uint8_t pci_irqs[4] = { 11, 9, 11, 9 };
+
+static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr)
+{
+ PCIIORegion *r;
+ uint16_t cmd;
+ uint32_t ofs;
+
+ if ( region_num == PCI_ROM_SLOT ) {
+ ofs = 0x30;
+ }else{
+ ofs = 0x10 + region_num * 4;
+ }
+
+ pci_config_writel(d, ofs, addr);
+ r = &d->io_regions[region_num];
+
+ /* enable memory mappings */
+ cmd = pci_config_readw(d, PCI_COMMAND);
+ if ( region_num == PCI_ROM_SLOT )
+ cmd |= 2;
+ else if (r->type & PCI_ADDRESS_SPACE_IO)
+ cmd |= 1;
+ else
+ cmd |= 2;
+ pci_config_writew(d, PCI_COMMAND, cmd);
+}
+
+static void pci_bios_init_device(PCIDevice *d)
+{
+ int class;
+ PCIIORegion *r;
+ uint32_t *paddr;
+ int i, pin, pic_irq, vendor_id, device_id;
+
+ class = pci_config_readw(d, PCI_CLASS_DEVICE);
+ vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
+ device_id = pci_config_readw(d, PCI_DEVICE_ID);
+ switch(class) {
+ case 0x0101:
+ if (vendor_id == 0x8086 && device_id == 0x7010) {
+ /* PIIX3 IDE */
+ pci_config_writew(d, 0x40, 0x8000); // enable IDE0
+ pci_config_writew(d, 0x42, 0x8000); // enable IDE1
+ goto default_map;
+ } else {
+ /* IDE: we map it as in ISA mode */
+ pci_set_io_region_addr(d, 0, 0x1f0);
+ pci_set_io_region_addr(d, 1, 0x3f4);
+ pci_set_io_region_addr(d, 2, 0x170);
+ pci_set_io_region_addr(d, 3, 0x374);
+ }
+ break;
+ case 0x0300:
+ if (vendor_id != 0x1234)
+ goto default_map;
+ /* VGA: map frame buffer to default Bochs VBE address */
+ pci_set_io_region_addr(d, 0, 0xE0000000);
+ break;
+ case 0x0800:
+ /* PIC */
+ vendor_id = pci_config_readw(d, PCI_VENDOR_ID);
+ device_id = pci_config_readw(d, PCI_DEVICE_ID);
+ if (vendor_id == 0x1014) {
+ /* IBM */
+ if (device_id == 0x0046 || device_id == 0xFFFF) {
+ /* MPIC & MPIC2 */
+ pci_set_io_region_addr(d, 0, 0x80800000 + 0x00040000);
+ }
+ }
+ break;
+ case 0xff00:
+ if (vendor_id == 0x0106b &&
+ (device_id == 0x0017 || device_id == 0x0022)) {
+ /* macio bridge */
+ pci_set_io_region_addr(d, 0, 0x80800000);
+ }
+ break;
+ default:
+ default_map:
+ /* default memory mappings */
+ for(i = 0; i < PCI_NUM_REGIONS; i++) {
+ r = &d->io_regions[i];
+ if (r->size) {
+ if (r->type & PCI_ADDRESS_SPACE_IO)
+ paddr = &pci_bios_io_addr;
+ else
+ paddr = &pci_bios_mem_addr;
+ *paddr = (*paddr + r->size - 1) & ~(r->size - 1);
+ pci_set_io_region_addr(d, i, *paddr);
+ *paddr += r->size;
+ }
+ }
+ break;
+ }
+
+ /* map the interrupt */
+ pin = pci_config_readb(d, PCI_INTERRUPT_PIN);
+ if (pin != 0) {
+ pin = pci_slot_get_pirq(d, pin - 1);
+ pic_irq = pci_irqs[pin];
+ pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq);
+ }
+}
+
+/*
+ * This function initializes the PCI devices as a normal PCI BIOS
+ * would do. It is provided just in case the BIOS has no support for
+ * PCI.
+ */
+void pci_bios_init(void)
+{
+ PCIBus *bus;
+ PCIDevice *d;
+ int devfn, i, irq;
+ uint8_t elcr[2];
+
+ pci_bios_io_addr = 0xc000;
+ pci_bios_mem_addr = 0xf0000000;
+
+ /* activate IRQ mappings */
+ elcr[0] = 0x00;
+ elcr[1] = 0x00;
+ for(i = 0; i < 4; i++) {
+ irq = pci_irqs[i];
+ /* set to trigger level */
+ elcr[irq >> 3] |= (1 << (irq & 7));
+ /* activate irq remapping in PIIX */
+ pci_config_writeb((PCIDevice *)piix3_state, 0x60 + i, irq);
+ }
+ isa_outb(elcr[0], 0x4d0);
+ isa_outb(elcr[1], 0x4d1);
+
+ bus = first_bus;
+ if (bus) {
+ for(devfn = 0; devfn < 256; devfn++) {
+ d = bus->devices[devfn];
+ if (d)
+ pci_bios_init_device(d);
+ }
+ }
+}
diff --git a/tools/ioemu/hw/pckbd.c b/tools/ioemu/hw/pckbd.c
new file mode 100644
index 0000000000..85863b433f
--- /dev/null
+++ b/tools/ioemu/hw/pckbd.c
@@ -0,0 +1,919 @@
+/*
+ * QEMU PC keyboard emulation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/* debug PC keyboard */
+//#define DEBUG_KBD
+
+/* debug PC keyboard : only mouse */
+//#define DEBUG_MOUSE
+
+/* Keyboard Controller Commands */
+#define KBD_CCMD_READ_MODE 0x20 /* Read mode bits */
+#define KBD_CCMD_WRITE_MODE 0x60 /* Write mode bits */
+#define KBD_CCMD_GET_VERSION 0xA1 /* Get controller version */
+#define KBD_CCMD_MOUSE_DISABLE 0xA7 /* Disable mouse interface */
+#define KBD_CCMD_MOUSE_ENABLE 0xA8 /* Enable mouse interface */
+#define KBD_CCMD_TEST_MOUSE 0xA9 /* Mouse interface test */
+#define KBD_CCMD_SELF_TEST 0xAA /* Controller self test */
+#define KBD_CCMD_KBD_TEST 0xAB /* Keyboard interface test */
+#define KBD_CCMD_KBD_DISABLE 0xAD /* Keyboard interface disable */
+#define KBD_CCMD_KBD_ENABLE 0xAE /* Keyboard interface enable */
+#define KBD_CCMD_READ_INPORT 0xC0 /* read input port */
+#define KBD_CCMD_READ_OUTPORT 0xD0 /* read output port */
+#define KBD_CCMD_WRITE_OUTPORT 0xD1 /* write output port */
+#define KBD_CCMD_WRITE_OBUF 0xD2
+#define KBD_CCMD_WRITE_AUX_OBUF 0xD3 /* Write to output buffer as if
+ initiated by the auxiliary device */
+#define KBD_CCMD_WRITE_MOUSE 0xD4 /* Write the following byte to the mouse */
+#define KBD_CCMD_DISABLE_A20 0xDD /* HP vectra only ? */
+#define KBD_CCMD_ENABLE_A20 0xDF /* HP vectra only ? */
+#define KBD_CCMD_RESET 0xFE
+
+/* Keyboard Commands */
+#define KBD_CMD_SET_LEDS 0xED /* Set keyboard leds */
+#define KBD_CMD_ECHO 0xEE
+#define KBD_CMD_GET_ID 0xF2 /* get keyboard ID */
+#define KBD_CMD_SET_RATE 0xF3 /* Set typematic rate */
+#define KBD_CMD_ENABLE 0xF4 /* Enable scanning */
+#define KBD_CMD_RESET_DISABLE 0xF5 /* reset and disable scanning */
+#define KBD_CMD_RESET_ENABLE 0xF6 /* reset and enable scanning */
+#define KBD_CMD_RESET 0xFF /* Reset */
+
+/* Keyboard Replies */
+#define KBD_REPLY_POR 0xAA /* Power on reset */
+#define KBD_REPLY_ACK 0xFA /* Command ACK */
+#define KBD_REPLY_RESEND 0xFE /* Command NACK, send the cmd again */
+
+/* Status Register Bits */
+#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
+#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST 0x04 /* Self test successful */
+#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */
+#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
+#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */
+#define KBD_STAT_PERR 0x80 /* Parity error */
+
+/* Controller Mode Register Bits */
+#define KBD_MODE_KBD_INT 0x01 /* Keyboard data generate IRQ1 */
+#define KBD_MODE_MOUSE_INT 0x02 /* Mouse data generate IRQ12 */
+#define KBD_MODE_SYS 0x04 /* The system flag (?) */
+#define KBD_MODE_NO_KEYLOCK 0x08 /* The keylock doesn't affect the keyboard if set */
+#define KBD_MODE_DISABLE_KBD 0x10 /* Disable keyboard interface */
+#define KBD_MODE_DISABLE_MOUSE 0x20 /* Disable mouse interface */
+#define KBD_MODE_KCC 0x40 /* Scan code conversion to PC format */
+#define KBD_MODE_RFU 0x80
+
+/* Mouse Commands */
+#define AUX_SET_SCALE11 0xE6 /* Set 1:1 scaling */
+#define AUX_SET_SCALE21 0xE7 /* Set 2:1 scaling */
+#define AUX_SET_RES 0xE8 /* Set resolution */
+#define AUX_GET_SCALE 0xE9 /* Get scaling factor */
+/* according to Synaptic docs this $E9 is really 3-byte status */
+#define AUX_SET_STREAM 0xEA /* Set stream mode */
+#define AUX_POLL 0xEB /* Poll */
+#define AUX_RESET_WRAP 0xEC /* Reset wrap mode */
+#define AUX_SET_WRAP 0xEE /* Set wrap mode */
+#define AUX_SET_REMOTE 0xF0 /* Set remote mode */
+#define AUX_GET_TYPE 0xF2 /* Get type */
+#define AUX_SET_SAMPLE 0xF3 /* Set sample rate */
+#define AUX_ENABLE_DEV 0xF4 /* Enable aux device */
+#define AUX_DISABLE_DEV 0xF5 /* Disable aux device */
+#define AUX_SET_DEFAULT 0xF6
+#define AUX_RESET 0xFF /* Reset aux device */
+#define AUX_ACK 0xFA /* Command byte ACK. */
+
+#define MOUSE_STATUS_REMOTE 0x40
+#define MOUSE_STATUS_ENABLED 0x20
+#define MOUSE_STATUS_SCALE21 0x10
+
+#define KBD_QUEUE_SIZE 256
+
+typedef struct {
+ uint8_t aux[KBD_QUEUE_SIZE];
+ uint8_t data[KBD_QUEUE_SIZE];
+ int rptr, wptr, count;
+} KBDQueue;
+
+typedef struct {
+ int absolute;
+ int high;
+} TouchPad;
+
+typedef struct KBDState {
+ KBDQueue queue;
+ uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
+ uint8_t status;
+ uint8_t mode;
+ /* keyboard state */
+ int kbd_write_cmd;
+ int scan_enabled;
+ /* mouse state */
+ int mouse_write_cmd;
+ uint8_t mouse_status;
+ uint8_t mouse_resolution;
+ uint8_t mouse_sample_rate;
+ uint8_t mouse_wrap;
+ uint8_t mouse_type; /* 0 = PS2, 3 = IMPS/2, 4 = IMEX */
+ uint8_t mouse_detect_state;
+ int mouse_dx; /* current values, needed for 'poll' mode */
+ int mouse_dy;
+ int mouse_dz;
+ uint8_t mouse_buttons;
+ TouchPad touchpad;
+} KBDState;
+
+KBDState kbd_state;
+
+/* update irq and KBD_STAT_[MOUSE_]OBF */
+/* XXX: not generating the irqs if KBD_MODE_DISABLE_KBD is set may be
+ incorrect, but it avoids having to simulate exact delays */
+static void kbd_update_irq(KBDState *s)
+{
+ KBDQueue *q = &s->queue;
+ int irq12_level, irq1_level;
+
+ irq1_level = 0;
+ irq12_level = 0;
+ s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF);
+ if (q->count != 0) {
+ s->status |= KBD_STAT_OBF;
+ if (q->aux[q->rptr]) {
+ s->status |= KBD_STAT_MOUSE_OBF;
+ if (s->mode & KBD_MODE_MOUSE_INT)
+ irq12_level = 1;
+ } else {
+ if ((s->mode & KBD_MODE_KBD_INT) &&
+ !(s->mode & KBD_MODE_DISABLE_KBD))
+ irq1_level = 1;
+ }
+ }
+ pic_set_irq(1, irq1_level);
+ pic_set_irq(12, irq12_level);
+}
+
+static void kbd_queue(KBDState *s, int b, int aux)
+{
+ KBDQueue *q = &s->queue;
+
+#if defined(DEBUG_MOUSE) || defined(DEBUG_KBD)
+ if (aux)
+ printf("mouse event: 0x%02x\n", b);
+#ifdef DEBUG_KBD
+ else
+ printf("kbd event: 0x%02x\n", b);
+#endif
+#endif
+ if (q->count >= KBD_QUEUE_SIZE)
+ return;
+ q->aux[q->wptr] = aux;
+ q->data[q->wptr] = b;
+ if (++q->wptr == KBD_QUEUE_SIZE)
+ q->wptr = 0;
+ q->count++;
+ kbd_update_irq(s);
+}
+
+static void pc_kbd_put_keycode(void *opaque, int keycode)
+{
+ KBDState *s = opaque;
+ kbd_queue(s, keycode, 0);
+}
+
+static uint32_t kbd_read_status(void *opaque, uint32_t addr)
+{
+ KBDState *s = opaque;
+ int val;
+ val = s->status;
+#if defined(DEBUG_KBD)
+ printf("kbd: read status=0x%02x\n", val);
+#endif
+ return val;
+}
+
+static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
+{
+ KBDState *s = opaque;
+
+#ifdef DEBUG_KBD
+ printf("kbd: write cmd=0x%02x\n", val);
+#endif
+ switch(val) {
+ case KBD_CCMD_READ_MODE:
+ kbd_queue(s, s->mode, 0);
+ break;
+ case KBD_CCMD_WRITE_MODE:
+ case KBD_CCMD_WRITE_OBUF:
+ case KBD_CCMD_WRITE_AUX_OBUF:
+ case KBD_CCMD_WRITE_MOUSE:
+ case KBD_CCMD_WRITE_OUTPORT:
+ s->write_cmd = val;
+ break;
+ case KBD_CCMD_MOUSE_DISABLE:
+ s->mode |= KBD_MODE_DISABLE_MOUSE;
+ break;
+ case KBD_CCMD_MOUSE_ENABLE:
+ s->mode &= ~KBD_MODE_DISABLE_MOUSE;
+ break;
+ case KBD_CCMD_TEST_MOUSE:
+ kbd_queue(s, 0x00, 0);
+ break;
+ case KBD_CCMD_SELF_TEST:
+ s->status |= KBD_STAT_SELFTEST;
+ kbd_queue(s, 0x55, 0);
+ break;
+ case KBD_CCMD_KBD_TEST:
+ kbd_queue(s, 0x00, 0);
+ break;
+ case KBD_CCMD_KBD_DISABLE:
+ s->mode |= KBD_MODE_DISABLE_KBD;
+ kbd_update_irq(s);
+ break;
+ case KBD_CCMD_KBD_ENABLE:
+ s->mode &= ~KBD_MODE_DISABLE_KBD;
+ kbd_update_irq(s);
+ break;
+ case KBD_CCMD_READ_INPORT:
+ kbd_queue(s, 0x00, 0);
+ break;
+ case KBD_CCMD_READ_OUTPORT:
+ /* XXX: check that */
+#ifdef TARGET_I386
+ val = 0x01 | (((cpu_single_env->a20_mask >> 20) & 1) << 1);
+#else
+ val = 0x01;
+#endif
+ if (s->status & KBD_STAT_OBF)
+ val |= 0x10;
+ if (s->status & KBD_STAT_MOUSE_OBF)
+ val |= 0x20;
+ kbd_queue(s, val, 0);
+ break;
+#ifdef TARGET_I386
+ case KBD_CCMD_ENABLE_A20:
+ cpu_x86_set_a20(cpu_single_env, 1);
+ break;
+ case KBD_CCMD_DISABLE_A20:
+ cpu_x86_set_a20(cpu_single_env, 0);
+ break;
+#endif
+ case KBD_CCMD_RESET:
+ qemu_system_reset_request();
+ break;
+ case 0xff:
+ /* ignore that - I don't know what is its use */
+ break;
+ default:
+ fprintf(stderr, "qemu: unsupported keyboard cmd=0x%02x\n", val);
+ break;
+ }
+}
+
+static uint32_t kbd_read_data(void *opaque, uint32_t addr)
+{
+ KBDState *s = opaque;
+ KBDQueue *q;
+ int val, index, aux;
+
+ q = &s->queue;
+ if (q->count == 0) {
+ /* NOTE: if no data left, we return the last keyboard one
+ (needed for EMM386) */
+ /* XXX: need a timer to do things correctly */
+ index = q->rptr - 1;
+ if (index < 0)
+ index = KBD_QUEUE_SIZE - 1;
+ val = q->data[index];
+ } else {
+ aux = q->aux[q->rptr];
+ val = q->data[q->rptr];
+ if (++q->rptr == KBD_QUEUE_SIZE)
+ q->rptr = 0;
+ q->count--;
+ /* reading deasserts IRQ */
+ if (aux)
+ pic_set_irq(12, 0);
+ else
+ pic_set_irq(1, 0);
+ }
+ /* reassert IRQs if data left */
+ kbd_update_irq(s);
+#ifdef DEBUG_KBD
+ printf("kbd: read data=0x%02x\n", val);
+#endif
+ return val;
+}
+
+static void kbd_reset_keyboard(KBDState *s)
+{
+ s->scan_enabled = 1;
+}
+
+static void kbd_write_keyboard(KBDState *s, int val)
+{
+ switch(s->kbd_write_cmd) {
+ default:
+ case -1:
+ switch(val) {
+ case 0x00:
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ case 0x05:
+ kbd_queue(s, KBD_REPLY_RESEND, 0);
+ break;
+ case KBD_CMD_GET_ID:
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ kbd_queue(s, 0xab, 0);
+ kbd_queue(s, 0x83, 0);
+ break;
+ case KBD_CMD_ECHO:
+ kbd_queue(s, KBD_CMD_ECHO, 0);
+ break;
+ case KBD_CMD_ENABLE:
+ s->scan_enabled = 1;
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ case KBD_CMD_SET_LEDS:
+ case KBD_CMD_SET_RATE:
+ s->kbd_write_cmd = val;
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ case KBD_CMD_RESET_DISABLE:
+ kbd_reset_keyboard(s);
+ s->scan_enabled = 0;
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ case KBD_CMD_RESET_ENABLE:
+ kbd_reset_keyboard(s);
+ s->scan_enabled = 1;
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ case KBD_CMD_RESET:
+ kbd_reset_keyboard(s);
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ kbd_queue(s, KBD_REPLY_POR, 0);
+ break;
+ default:
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ break;
+ }
+ break;
+ case KBD_CMD_SET_LEDS:
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ s->kbd_write_cmd = -1;
+ break;
+ case KBD_CMD_SET_RATE:
+ kbd_queue(s, KBD_REPLY_ACK, 0);
+ s->kbd_write_cmd = -1;
+ break;
+ }
+}
+
+static void kbd_mouse_send_packet(KBDState *s)
+{
+ unsigned int b;
+ int dx1, dy1, dz1;
+
+ dx1 = s->mouse_dx;
+ dy1 = s->mouse_dy;
+ dz1 = s->mouse_dz;
+ if (s->touchpad.absolute)
+ {
+ int dz2, dleftnright, dg, df;
+ if (dx1 > 6143)
+ dx1 = 6143;
+ else if (dx1 < 0)
+ dx1 = 0;
+ if (dy1 > 6143)
+ dy1 = 6143;
+ else if (dy1 < 0)
+ dy1 = 0;
+ dz2 = 80; /* normal finger pressure */
+ dg = 0; /* guesture not supported */
+ df = 0; /* finger not supported */
+ dleftnright = (s->mouse_buttons & 0x07);
+ /*
+ X: 13 bits --return absolute x ord
+ Y: 13 bits --return absolute y ord
+ Z: 8 bits --return constant 80 since we don't know how hard the user
+ is pressing on the mouse button ;) 80 is the default for pen
+ pressure, as touchpads cant sense what pressure a pen makes.
+ W: 4 bits --return 0, we don't support finger width (should we?)
+ left: 1 bit --is left button pressed
+ right: 1 bit --is right button pressed
+ guesture: 1 bit --we dont support, return 0
+ finger: 1 bit --ditto
+ total: 42 bits in 6 bytes
+ note that Synaptics drivers ignore the finger and guesture bits and
+ consider them redundant
+ */
+ /*
+ note: the packet setup is different when Wmode = 1, but
+ this doesn't apply since we don't support Wmode capability
+ format of packet is as follows:
+ */
+ // 1 0 finger reserved 0 gesture right left
+ kbd_queue(s, (0x80 | (df ? 0x20 : 0) | (dg ? 0x04 : 0) | dleftnright), 1);
+ kbd_queue(s, ((dy1 & 0xF) * 256) + (dx1 & 0xF), 1);
+ kbd_queue(s, 80, 1); //byte 3
+ // 1 1 y-12 x-12 0 gesture right left
+ kbd_queue(s, (0xC0 | ((dy1 & 1000) ? 0x20 : 0) | ((dx1 & 1000) ? 0x10 : 0) | (dg ? 0x04 : 0) | dleftnright), 1);
+ kbd_queue(s, dx1 & 0xFF, 1);
+ kbd_queue(s, dy1 & 0xFF, 1);
+ return;
+ }
+ /* XXX: increase range to 8 bits ? */
+ if (dx1 > 127)
+ dx1 = 127;
+ else if (dx1 < -127)
+ dx1 = -127;
+ if (dy1 > 127)
+ dy1 = 127;
+ else if (dy1 < -127)
+ dy1 = -127;
+ b = 0x08 | ((dx1 < 0) << 4) | ((dy1 < 0) << 5) | (s->mouse_buttons & 0x07);
+ kbd_queue(s, b, 1);
+ kbd_queue(s, dx1 & 0xff, 1);
+ kbd_queue(s, dy1 & 0xff, 1);
+ /* extra byte for IMPS/2 or IMEX */
+ switch(s->mouse_type) {
+ default:
+ break;
+ case 3:
+ if (dz1 > 127)
+ dz1 = 127;
+ else if (dz1 < -127)
+ dz1 = -127;
+ kbd_queue(s, dz1 & 0xff, 1);
+ break;
+ case 4:
+ if (dz1 > 7)
+ dz1 = 7;
+ else if (dz1 < -7)
+ dz1 = -7;
+ b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1);
+ kbd_queue(s, b, 1);
+ break;
+ }
+
+ /* update deltas */
+ s->mouse_dx -= dx1;
+ s->mouse_dy -= dy1;
+ s->mouse_dz -= dz1;
+}
+
+static void pc_kbd_mouse_event(void *opaque,
+ int dx, int dy, int dz, int buttons_state)
+{
+ KBDState *s = opaque;
+
+ /* check if deltas are recorded when disabled */
+ if (!(s->mouse_status & MOUSE_STATUS_ENABLED))
+ return;
+
+ s->mouse_dx += dx;
+ s->mouse_dy -= dy;
+ s->mouse_dz += dz;
+ /* XXX: SDL sometimes generates nul events: we delete them */
+ if (s->mouse_dx == 0 && s->mouse_dy == 0 && s->mouse_dz == 0 &&
+ s->mouse_buttons == buttons_state)
+ return;
+ s->mouse_buttons = buttons_state;
+
+ if (!(s->mouse_status & MOUSE_STATUS_REMOTE) &&
+ (s->queue.count < (KBD_QUEUE_SIZE - 16))) {
+ for(;;) {
+ /* if not remote, send event. Multiple events are sent if
+ too big deltas */
+ kbd_mouse_send_packet(s);
+ if (s->mouse_dx == 0 && s->mouse_dy == 0 && s->mouse_dz == 0)
+ break;
+ }
+ }
+}
+
+static void kbd_write_mouse(KBDState *s, int val)
+{
+/* variables needed to store synaptics command info */
+static int rr = 0, ss = 0, tt = 0, uu = 0, res_count = 0, last_com = 0;
+int spare;
+#ifdef DEBUG_MOUSE
+ printf("kbd: write mouse 0x%02x\n", val);
+#endif
+ switch(s->mouse_write_cmd) {
+ default:
+ case -1:
+ /* mouse command */
+ if (s->mouse_wrap) {
+ if (val == AUX_RESET_WRAP) {
+ s->mouse_wrap = 0;
+ kbd_queue(s, AUX_ACK, 1);
+ return;
+ } else if (val != AUX_RESET) {
+ kbd_queue(s, val, 1);
+ return;
+ }
+ }
+ last_com = val;
+ switch(val) {
+ case AUX_SET_SCALE11:
+ s->mouse_status &= ~MOUSE_STATUS_SCALE21;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_SET_SCALE21:
+ s->mouse_status |= MOUSE_STATUS_SCALE21;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_SET_STREAM:
+ s->mouse_status &= ~MOUSE_STATUS_REMOTE;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_SET_WRAP:
+ s->mouse_wrap = 1;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_SET_REMOTE:
+ s->mouse_status |= MOUSE_STATUS_REMOTE;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_GET_TYPE:
+ kbd_queue(s, AUX_ACK, 1);
+ kbd_queue(s, s->mouse_type, 1);
+ break;
+ case AUX_SET_RES:
+ case AUX_SET_SAMPLE:
+ s->mouse_write_cmd = val;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_GET_SCALE:
+ if (res_count == 4)
+ {
+ /* time for the special stuff */
+ kbd_queue(s, AUX_ACK, 1);
+ /* below is how we get the real synaptic command */
+ val = (rr*64) + (ss*16) + (tt*4) + uu;
+ switch(val)
+ {
+ /* id touchpad */
+ case 0x00:
+ /* info Minor */
+ kbd_queue(s, 0x00, 1);
+ /* special verification byte */
+ kbd_queue(s, 0x47, 1);
+ /* info Major * 0x10 + Info ModelCode*/
+ kbd_queue(s, 4 * 0x10 + 0, 1);
+ break;
+ /* read touchpad modes */
+ case 0x01:
+ /* special verification byte */
+ kbd_queue(s, 0x3B, 1);
+ /* mode */
+ /*
+ bit 7 - absolute or relative position
+ bit 6 - 0 for 40 packets/sec, 1 for 80 pack/sec
+ bit 3 - 1 for sleep mode, 0 for normal
+ bit 2 - 1 to detect tap/drag, 0 to disable
+ bit 1 - packet size, only valid for serial protocol
+ bit 0 - 0 for normal packets, 1 for enhanced packets
+ (absolute mode packets which have finger width)
+ */
+ if (s->touchpad.absolute && s->touchpad.high)
+ {
+ spare = 0xC0;
+ }
+ else if (s->touchpad.absolute)
+ {
+ spare = 0x80;
+ }
+ else if (s->touchpad.high)
+ {
+ spare = 0x40;
+ }
+ else
+ {
+ spare = 0x00;
+ }
+ kbd_queue(s, spare, 1);
+ /* special verification byte */
+ kbd_queue(s, 0x47, 1);
+ break;
+ /* read touchpad capabilites */
+ case 0x02:
+ /* extended capability first 8 bits */
+ kbd_queue(s, 0x00, 1);
+ /* special verification byte */
+ kbd_queue(s, 0x47, 1);
+ /* extended capability last 8 bits */
+ kbd_queue(s, 0x00, 1);
+ /* basicly, we don't have any capabilites ;0 */
+ break;
+ /* read model id */
+ case 0x03:
+ /*
+ bit 23 = 0 (1 for upsidedownpad)
+ bit 22 = 0 (1 for 90 degree rotated pad)
+ bits 21-16 = 1 (standard model)
+ bits 15-9 = ??? (reserved for synaptics use)
+ bit 7 = 1
+ bit 6 = 0 (1 for sensing pens)
+ bit 5 = 1
+ bits 3-0 = 1 (rectangular geometery)
+ */
+ kbd_queue(s, 0xFC, 1);
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0xF5, 1); //F7 for sensing pens
+ break;
+ /* read serial number prefix */
+ case 0x06:
+ /* strange how they have this query even though
+ no touchpad actually has serial numbers */
+ /* return serial prefix of 0 if we dont have one */
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ break;
+ /* read serial number suffix */
+ case 0x07:
+ /* undefined if we dont have a valid serial prefix */
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ break;
+ /* read resolutions */
+ case 0x08:
+ /* going to go with infoSensor = 1 (Standard model) here */
+ /* absolute X in abolute units per mm */
+ kbd_queue(s, 85, 1);
+ /* undefined but first bit 7 will be set to 1...
+ hell I'm going to set them all to 1 */
+ kbd_queue(s, 0xFF, 1);
+ /* absolute Y in abolute units per mm */
+ kbd_queue(s, 94, 1);
+ break;
+ default:
+ /* invalid commands return undefined data */
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ kbd_queue(s, 0x00, 1);
+ break;
+ }
+ }
+ else
+ {
+ /* not a special command, just do the regular stuff */
+ kbd_queue(s, AUX_ACK, 1);
+ kbd_queue(s, s->mouse_status, 1);
+ kbd_queue(s, s->mouse_resolution, 1);
+ kbd_queue(s, s->mouse_sample_rate, 1);
+ }
+ break;
+ case AUX_POLL:
+ kbd_queue(s, AUX_ACK, 1);
+ kbd_mouse_send_packet(s);
+ break;
+ case AUX_ENABLE_DEV:
+ s->mouse_status |= MOUSE_STATUS_ENABLED;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_DISABLE_DEV:
+ s->mouse_status &= ~MOUSE_STATUS_ENABLED;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_SET_DEFAULT:
+ s->mouse_sample_rate = 100;
+ s->mouse_resolution = 2;
+ s->mouse_status = 0;
+ s->touchpad.absolute = 0;
+ kbd_queue(s, AUX_ACK, 1);
+ break;
+ case AUX_RESET:
+ s->mouse_sample_rate = 100;
+ s->mouse_resolution = 2;
+ s->mouse_status = 0;
+ s->touchpad.absolute = 0;
+ kbd_queue(s, AUX_ACK, 1);
+ kbd_queue(s, 0xaa, 1);
+ kbd_queue(s, s->mouse_type, 1);
+ break;
+ default:
+ break;
+ }
+ break;
+ case AUX_SET_SAMPLE:
+ if (res_count == 4 && val == 0x14)
+ {
+ /* time for the special stuff */
+ /* below is how we get the real synaptic command */
+ val = (rr*64) + (ss*16) + (tt*4) + uu;
+ /* TODO: set the mode byte */
+ } else
+ s->mouse_sample_rate = val;
+#if 0
+ /* detect IMPS/2 or IMEX */
+ switch(s->mouse_detect_state) {
+ default:
+ case 0:
+ if (val == 200)
+ s->mouse_detect_state = 1;
+ break;
+ case 1:
+ if (val == 100)
+ s->mouse_detect_state = 2;
+ else if (val == 200)
+ s->mouse_detect_state = 3;
+ else
+ s->mouse_detect_state = 0;
+ break;
+ case 2:
+ if (val == 80)
+ s->mouse_type = 3; /* IMPS/2 */
+ s->mouse_detect_state = 0;
+ break;
+ case 3:
+ if (val == 80)
+ s->mouse_type = 4; /* IMEX */
+ s->mouse_detect_state = 0;
+ break;
+ }
+#endif
+ kbd_queue(s, AUX_ACK, 1);
+ s->mouse_write_cmd = -1;
+ break;
+ case AUX_SET_RES:
+ if (last_com != AUX_SET_RES)
+ {
+ /* if its not 4 in a row, its not a command */
+ /* FIXME: if we are set 8 of these in a row, or 12, or 16,
+ or etc ... or 4^n commands, then the nth'd mode byte sent might
+ still work. not sure if this is how things are suppose to be
+ or not. */
+ res_count = 0;
+ }
+ res_count++;
+ if (res_count > 4) res_count = 4;
+ switch(res_count)
+ /* we need to save the val in the right spots to get the
+ real command later */
+ {
+ case 1:
+ break;
+ rr = val;
+ case 2:
+ ss = val;
+ break;
+ case 3:
+ tt = val;
+ break;
+ case 4:
+ uu = val;
+ break;
+ }
+ s->mouse_resolution = val;
+ kbd_queue(s, AUX_ACK, 1);
+ s->mouse_write_cmd = -1;
+ break;
+ }
+}
+
+void kbd_write_data(void *opaque, uint32_t addr, uint32_t val)
+{
+ KBDState *s = opaque;
+
+#ifdef DEBUG_KBD
+ printf("kbd: write data=0x%02x\n", val);
+#endif
+
+ switch(s->write_cmd) {
+ case 0:
+ kbd_write_keyboard(s, val);
+ break;
+ case KBD_CCMD_WRITE_MODE:
+ s->mode = val;
+ kbd_update_irq(s);
+ break;
+ case KBD_CCMD_WRITE_OBUF:
+ kbd_queue(s, val, 0);
+ break;
+ case KBD_CCMD_WRITE_AUX_OBUF:
+ kbd_queue(s, val, 1);
+ break;
+ case KBD_CCMD_WRITE_OUTPORT:
+#ifdef TARGET_I386
+ cpu_x86_set_a20(cpu_single_env, (val >> 1) & 1);
+#endif
+ if (!(val & 1)) {
+ qemu_system_reset_request();
+ }
+ break;
+ case KBD_CCMD_WRITE_MOUSE:
+ kbd_write_mouse(s, val);
+ break;
+ default:
+ break;
+ }
+ s->write_cmd = 0;
+}
+
+static void kbd_reset(void *opaque)
+{
+ KBDState *s = opaque;
+ KBDQueue *q;
+
+ s->kbd_write_cmd = -1;
+ s->mouse_write_cmd = -1;
+ s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT | KBD_MODE_KCC;
+ s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED;
+ q = &s->queue;
+ q->rptr = 0;
+ q->wptr = 0;
+ q->count = 0;
+}
+
+static void kbd_save(QEMUFile* f, void* opaque)
+{
+ KBDState *s = (KBDState*)opaque;
+
+ qemu_put_8s(f, &s->write_cmd);
+ qemu_put_8s(f, &s->status);
+ qemu_put_8s(f, &s->mode);
+ qemu_put_be32s(f, &s->kbd_write_cmd);
+ qemu_put_be32s(f, &s->scan_enabled);
+ qemu_put_be32s(f, &s->mouse_write_cmd);
+ qemu_put_8s(f, &s->mouse_status);
+ qemu_put_8s(f, &s->mouse_resolution);
+ qemu_put_8s(f, &s->mouse_sample_rate);
+ qemu_put_8s(f, &s->mouse_wrap);
+ qemu_put_8s(f, &s->mouse_type);
+ qemu_put_8s(f, &s->mouse_detect_state);
+ qemu_put_be32s(f, &s->mouse_dx);
+ qemu_put_be32s(f, &s->mouse_dy);
+ qemu_put_be32s(f, &s->mouse_dz);
+ qemu_put_8s(f, &s->mouse_buttons);
+ qemu_put_be32s(f, &s->touchpad.absolute);
+ qemu_put_be32s(f, &s->touchpad.high);
+}
+
+static int kbd_load(QEMUFile* f, void* opaque, int version_id)
+{
+ KBDState *s = (KBDState*)opaque;
+
+ if (version_id != 2)
+ return -EINVAL;
+ qemu_get_8s(f, &s->write_cmd);
+ qemu_get_8s(f, &s->status);
+ qemu_get_8s(f, &s->mode);
+ qemu_get_be32s(f, &s->kbd_write_cmd);
+ qemu_get_be32s(f, &s->scan_enabled);
+ qemu_get_be32s(f, &s->mouse_write_cmd);
+ qemu_get_8s(f, &s->mouse_status);
+ qemu_get_8s(f, &s->mouse_resolution);
+ qemu_get_8s(f, &s->mouse_sample_rate);
+ qemu_get_8s(f, &s->mouse_wrap);
+ qemu_get_8s(f, &s->mouse_type);
+ qemu_get_8s(f, &s->mouse_detect_state);
+ qemu_get_be32s(f, &s->mouse_dx);
+ qemu_get_be32s(f, &s->mouse_dy);
+ qemu_get_be32s(f, &s->mouse_dz);
+ qemu_get_8s(f, &s->mouse_buttons);
+ qemu_get_be32s(f, &s->touchpad.absolute);
+ qemu_get_be32s(f, &s->touchpad.high);
+ return 0;
+}
+
+void kbd_init(void)
+{
+ KBDState *s = &kbd_state;
+
+ kbd_reset(s);
+ register_savevm("pckbd", 0, 2, kbd_save, kbd_load, s);
+ register_ioport_read(0x60, 1, 1, kbd_read_data, s);
+ register_ioport_write(0x60, 1, 1, kbd_write_data, s);
+ register_ioport_read(0x64, 1, 1, kbd_read_status, s);
+ register_ioport_write(0x64, 1, 1, kbd_write_command, s);
+
+ qemu_add_kbd_event_handler(pc_kbd_put_keycode, s);
+ qemu_add_mouse_event_handler(pc_kbd_mouse_event, s);
+ qemu_register_reset(kbd_reset, s);
+}
diff --git a/tools/ioemu/hw/port-e9.c b/tools/ioemu/hw/port-e9.c
new file mode 100644
index 0000000000..374ec108d5
--- /dev/null
+++ b/tools/ioemu/hw/port-e9.c
@@ -0,0 +1,47 @@
+/*
+ * QEMU Port 0xe9 hack
+ *
+ * Copyright (c) 2000-2004 E. Marty, the bochs team, D. Decotigny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "vl.h"
+#include "exec-all.h"
+
+static void bochs_e9_write(void *opaque, uint32_t address, uint32_t data)
+{
+ fputc(data, logfile);
+}
+
+static uint32_t bochs_e9_read(void *opaque, uint32_t address)
+{
+ return 0xE9;
+}
+
+void port_e9_init ()
+{
+ register_ioport_write(0xe9, 1, 1, bochs_e9_write, NULL);
+ register_ioport_read (0xe9, 1, 1, bochs_e9_read, NULL);
+}
+
+
diff --git a/tools/ioemu/hw/ppc.c b/tools/ioemu/hw/ppc.c
new file mode 100644
index 0000000000..5f992290e4
--- /dev/null
+++ b/tools/ioemu/hw/ppc.c
@@ -0,0 +1,462 @@
+/*
+ * QEMU generic PPC hardware System Emulator
+ *
+ * Copyright (c) 2003-2004 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "m48t59.h"
+
+/*****************************************************************************/
+/* PPC time base and decrementer emulation */
+//#define DEBUG_TB
+
+struct ppc_tb_t {
+ /* Time base management */
+ int64_t tb_offset; /* Compensation */
+ uint32_t tb_freq; /* TB frequency */
+ /* Decrementer management */
+ uint64_t decr_next; /* Tick for next decr interrupt */
+ struct QEMUTimer *decr_timer;
+};
+
+static inline uint64_t cpu_ppc_get_tb (ppc_tb_t *tb_env)
+{
+ /* TB time in tb periods */
+ return muldiv64(qemu_get_clock(vm_clock) + tb_env->tb_offset,
+ tb_env->tb_freq, ticks_per_sec);
+}
+
+uint32_t cpu_ppc_load_tbl (CPUState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env);
+#ifdef DEBUG_TB
+ {
+ static int last_time;
+ int now;
+ now = time(NULL);
+ if (last_time != now) {
+ last_time = now;
+ printf("%s: tb=0x%016lx %d %08lx\n",
+ __func__, tb, now, tb_env->tb_offset);
+ }
+ }
+#endif
+
+ return tb & 0xFFFFFFFF;
+}
+
+uint32_t cpu_ppc_load_tbu (CPUState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env);
+#ifdef DEBUG_TB
+ printf("%s: tb=0x%016lx\n", __func__, tb);
+#endif
+ return tb >> 32;
+}
+
+static void cpu_ppc_store_tb (ppc_tb_t *tb_env, uint64_t value)
+{
+ tb_env->tb_offset = muldiv64(value, ticks_per_sec, tb_env->tb_freq)
+ - qemu_get_clock(vm_clock);
+#ifdef DEBUG_TB
+ printf("%s: tb=0x%016lx offset=%08x\n", __func__, value);
+#endif
+}
+
+void cpu_ppc_store_tbu (CPUState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ cpu_ppc_store_tb(tb_env,
+ ((uint64_t)value << 32) | cpu_ppc_load_tbl(env));
+}
+
+void cpu_ppc_store_tbl (CPUState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ cpu_ppc_store_tb(tb_env,
+ ((uint64_t)cpu_ppc_load_tbu(env) << 32) | value);
+}
+
+uint32_t cpu_ppc_load_decr (CPUState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint32_t decr;
+
+ decr = muldiv64(tb_env->decr_next - qemu_get_clock(vm_clock),
+ tb_env->tb_freq, ticks_per_sec);
+#if defined(DEBUG_TB)
+ printf("%s: 0x%08x\n", __func__, decr);
+#endif
+
+ return decr;
+}
+
+/* When decrementer expires,
+ * all we need to do is generate or queue a CPU exception
+ */
+static inline void cpu_ppc_decr_excp (CPUState *env)
+{
+ /* Raise it */
+#ifdef DEBUG_TB
+ printf("raise decrementer exception\n");
+#endif
+ cpu_interrupt(env, CPU_INTERRUPT_TIMER);
+}
+
+static void _cpu_ppc_store_decr (CPUState *env, uint32_t decr,
+ uint32_t value, int is_excp)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t now, next;
+
+#ifdef DEBUG_TB
+ printf("%s: 0x%08x => 0x%08x\n", __func__, decr, value);
+#endif
+ now = qemu_get_clock(vm_clock);
+ next = now + muldiv64(value, ticks_per_sec, tb_env->tb_freq);
+ if (is_excp)
+ next += tb_env->decr_next - now;
+ if (next == now)
+ next++;
+ tb_env->decr_next = next;
+ /* Adjust timer */
+ qemu_mod_timer(tb_env->decr_timer, next);
+ /* If we set a negative value and the decrementer was positive,
+ * raise an exception.
+ */
+ if ((value & 0x80000000) && !(decr & 0x80000000))
+ cpu_ppc_decr_excp(env);
+}
+
+void cpu_ppc_store_decr (CPUState *env, uint32_t value)
+{
+ _cpu_ppc_store_decr(env, cpu_ppc_load_decr(env), value, 0);
+}
+
+static void cpu_ppc_decr_cb (void *opaque)
+{
+ _cpu_ppc_store_decr(opaque, 0x00000000, 0xFFFFFFFF, 1);
+}
+
+/* Set up (once) timebase frequency (in Hz) */
+ppc_tb_t *cpu_ppc_tb_init (CPUState *env, uint32_t freq)
+{
+ ppc_tb_t *tb_env;
+
+ tb_env = qemu_mallocz(sizeof(ppc_tb_t));
+ if (tb_env == NULL)
+ return NULL;
+ env->tb_env = tb_env;
+ if (tb_env->tb_freq == 0 || 1) {
+ tb_env->tb_freq = freq;
+ /* Create new timer */
+ tb_env->decr_timer =
+ qemu_new_timer(vm_clock, &cpu_ppc_decr_cb, env);
+ /* There is a bug in 2.4 kernels:
+ * if a decrementer exception is pending when it enables msr_ee,
+ * it's not ready to handle it...
+ */
+ _cpu_ppc_store_decr(env, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+ }
+
+ return tb_env;
+}
+
+#if 0
+/*****************************************************************************/
+/* Handle system reset (for now, just stop emulation) */
+void cpu_ppc_reset (CPUState *env)
+{
+ printf("Reset asked... Stop emulation\n");
+ abort();
+}
+#endif
+
+static void PPC_io_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ cpu_outb(NULL, addr & 0xffff, value);
+}
+
+static uint32_t PPC_io_readb (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t ret = cpu_inb(NULL, addr & 0xffff);
+ return ret;
+}
+
+static void PPC_io_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ value = bswap16(value);
+#endif
+ cpu_outw(NULL, addr & 0xffff, value);
+}
+
+static uint32_t PPC_io_readw (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t ret = cpu_inw(NULL, addr & 0xffff);
+#ifdef TARGET_WORDS_BIGENDIAN
+ ret = bswap16(ret);
+#endif
+ return ret;
+}
+
+static void PPC_io_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ value = bswap32(value);
+#endif
+ cpu_outl(NULL, addr & 0xffff, value);
+}
+
+static uint32_t PPC_io_readl (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t ret = cpu_inl(NULL, addr & 0xffff);
+
+#ifdef TARGET_WORDS_BIGENDIAN
+ ret = bswap32(ret);
+#endif
+ return ret;
+}
+
+CPUWriteMemoryFunc *PPC_io_write[] = {
+ &PPC_io_writeb,
+ &PPC_io_writew,
+ &PPC_io_writel,
+};
+
+CPUReadMemoryFunc *PPC_io_read[] = {
+ &PPC_io_readb,
+ &PPC_io_readw,
+ &PPC_io_readl,
+};
+
+/*****************************************************************************/
+/* Debug port */
+void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ addr &= 0xF;
+ switch (addr) {
+ case 0:
+ printf("%c", val);
+ break;
+ case 1:
+ printf("\n");
+ fflush(stdout);
+ break;
+ case 2:
+ printf("Set loglevel to %04x\n", val);
+ cpu_set_log(val | 0x100);
+ break;
+ }
+}
+
+/*****************************************************************************/
+/* NVRAM helpers */
+void NVRAM_set_byte (m48t59_t *nvram, uint32_t addr, uint8_t value)
+{
+ m48t59_set_addr(nvram, addr);
+ m48t59_write(nvram, value);
+}
+
+uint8_t NVRAM_get_byte (m48t59_t *nvram, uint32_t addr)
+{
+ m48t59_set_addr(nvram, addr);
+ return m48t59_read(nvram);
+}
+
+void NVRAM_set_word (m48t59_t *nvram, uint32_t addr, uint16_t value)
+{
+ m48t59_set_addr(nvram, addr);
+ m48t59_write(nvram, value >> 8);
+ m48t59_set_addr(nvram, addr + 1);
+ m48t59_write(nvram, value & 0xFF);
+}
+
+uint16_t NVRAM_get_word (m48t59_t *nvram, uint32_t addr)
+{
+ uint16_t tmp;
+
+ m48t59_set_addr(nvram, addr);
+ tmp = m48t59_read(nvram) << 8;
+ m48t59_set_addr(nvram, addr + 1);
+ tmp |= m48t59_read(nvram);
+
+ return tmp;
+}
+
+void NVRAM_set_lword (m48t59_t *nvram, uint32_t addr, uint32_t value)
+{
+ m48t59_set_addr(nvram, addr);
+ m48t59_write(nvram, value >> 24);
+ m48t59_set_addr(nvram, addr + 1);
+ m48t59_write(nvram, (value >> 16) & 0xFF);
+ m48t59_set_addr(nvram, addr + 2);
+ m48t59_write(nvram, (value >> 8) & 0xFF);
+ m48t59_set_addr(nvram, addr + 3);
+ m48t59_write(nvram, value & 0xFF);
+}
+
+uint32_t NVRAM_get_lword (m48t59_t *nvram, uint32_t addr)
+{
+ uint32_t tmp;
+
+ m48t59_set_addr(nvram, addr);
+ tmp = m48t59_read(nvram) << 24;
+ m48t59_set_addr(nvram, addr + 1);
+ tmp |= m48t59_read(nvram) << 16;
+ m48t59_set_addr(nvram, addr + 2);
+ tmp |= m48t59_read(nvram) << 8;
+ m48t59_set_addr(nvram, addr + 3);
+ tmp |= m48t59_read(nvram);
+
+ return tmp;
+}
+
+void NVRAM_set_string (m48t59_t *nvram, uint32_t addr,
+ const unsigned char *str, uint32_t max)
+{
+ int i;
+
+ for (i = 0; i < max && str[i] != '\0'; i++) {
+ m48t59_set_addr(nvram, addr + i);
+ m48t59_write(nvram, str[i]);
+ }
+ m48t59_set_addr(nvram, addr + max - 1);
+ m48t59_write(nvram, '\0');
+}
+
+int NVRAM_get_string (m48t59_t *nvram, uint8_t *dst, uint16_t addr, int max)
+{
+ int i;
+
+ memset(dst, 0, max);
+ for (i = 0; i < max; i++) {
+ dst[i] = NVRAM_get_byte(nvram, addr + i);
+ if (dst[i] == '\0')
+ break;
+ }
+
+ return i;
+}
+
+static uint16_t NVRAM_crc_update (uint16_t prev, uint16_t value)
+{
+ uint16_t tmp;
+ uint16_t pd, pd1, pd2;
+
+ tmp = prev >> 8;
+ pd = prev ^ value;
+ pd1 = pd & 0x000F;
+ pd2 = ((pd >> 4) & 0x000F) ^ pd1;
+ tmp ^= (pd1 << 3) | (pd1 << 8);
+ tmp ^= pd2 | (pd2 << 7) | (pd2 << 12);
+
+ return tmp;
+}
+
+uint16_t NVRAM_compute_crc (m48t59_t *nvram, uint32_t start, uint32_t count)
+{
+ uint32_t i;
+ uint16_t crc = 0xFFFF;
+ int odd;
+
+ odd = count & 1;
+ count &= ~1;
+ for (i = 0; i != count; i++) {
+ crc = NVRAM_crc_update(crc, NVRAM_get_word(nvram, start + i));
+ }
+ if (odd) {
+ crc = NVRAM_crc_update(crc, NVRAM_get_byte(nvram, start + i) << 8);
+ }
+
+ return crc;
+}
+
+#define CMDLINE_ADDR 0x017ff000
+
+int PPC_NVRAM_set_params (m48t59_t *nvram, uint16_t NVRAM_size,
+ const unsigned char *arch,
+ uint32_t RAM_size, int boot_device,
+ uint32_t kernel_image, uint32_t kernel_size,
+ const char *cmdline,
+ uint32_t initrd_image, uint32_t initrd_size,
+ uint32_t NVRAM_image,
+ int width, int height, int depth)
+{
+ uint16_t crc;
+
+ /* Set parameters for Open Hack'Ware BIOS */
+ NVRAM_set_string(nvram, 0x00, "QEMU_BIOS", 16);
+ NVRAM_set_lword(nvram, 0x10, 0x00000002); /* structure v2 */
+ NVRAM_set_word(nvram, 0x14, NVRAM_size);
+ NVRAM_set_string(nvram, 0x20, arch, 16);
+ NVRAM_set_lword(nvram, 0x30, RAM_size);
+ NVRAM_set_byte(nvram, 0x34, boot_device);
+ NVRAM_set_lword(nvram, 0x38, kernel_image);
+ NVRAM_set_lword(nvram, 0x3C, kernel_size);
+ if (cmdline) {
+ /* XXX: put the cmdline in NVRAM too ? */
+ strcpy(phys_ram_base + CMDLINE_ADDR, cmdline);
+ NVRAM_set_lword(nvram, 0x40, CMDLINE_ADDR);
+ NVRAM_set_lword(nvram, 0x44, strlen(cmdline));
+ } else {
+ NVRAM_set_lword(nvram, 0x40, 0);
+ NVRAM_set_lword(nvram, 0x44, 0);
+ }
+ NVRAM_set_lword(nvram, 0x48, initrd_image);
+ NVRAM_set_lword(nvram, 0x4C, initrd_size);
+ NVRAM_set_lword(nvram, 0x50, NVRAM_image);
+
+ NVRAM_set_word(nvram, 0x54, width);
+ NVRAM_set_word(nvram, 0x56, height);
+ NVRAM_set_word(nvram, 0x58, depth);
+ crc = NVRAM_compute_crc(nvram, 0x00, 0xF8);
+ NVRAM_set_word(nvram, 0xFC, crc);
+
+ return 0;
+ }
+
+/*****************************************************************************/
+void ppc_init (int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ if (prep_enabled) {
+ ppc_prep_init(ram_size, vga_ram_size, boot_device, ds, fd_filename,
+ snapshot, kernel_filename, kernel_cmdline,
+ initrd_filename);
+ } else {
+ ppc_chrp_init(ram_size, vga_ram_size, boot_device, ds, fd_filename,
+ snapshot, kernel_filename, kernel_cmdline,
+ initrd_filename);
+ }
+ /* Special port to get debug messages from Open-Firmware */
+ register_ioport_write(0x0F00, 4, 1, &PPC_debug_write, NULL);
+}
diff --git a/tools/ioemu/hw/ppc_chrp.c b/tools/ioemu/hw/ppc_chrp.c
new file mode 100644
index 0000000000..cf3a5f32fa
--- /dev/null
+++ b/tools/ioemu/hw/ppc_chrp.c
@@ -0,0 +1,233 @@
+/*
+ * QEMU PPC CHRP/PMAC hardware System Emulator
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define BIOS_FILENAME "ppc_rom.bin"
+#define NVRAM_SIZE 0x2000
+
+#define KERNEL_LOAD_ADDR 0x01000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+/* MacIO devices (mapped inside the MacIO address space): CUDA, DBDMA,
+ NVRAM (not implemented). */
+
+static int dbdma_mem_index;
+static int cuda_mem_index;
+static int ide0_mem_index;
+static int ide1_mem_index;
+static int openpic_mem_index;
+
+/* DBDMA: currently no op - should suffice right now */
+
+static void dbdma_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ printf("%s: 0x%08x <= 0x%08x\n", __func__, addr, value);
+}
+
+static void dbdma_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+}
+
+static void dbdma_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+}
+
+static uint32_t dbdma_readb (void *opaque, target_phys_addr_t addr)
+{
+ printf("%s: 0x%08x => 0x00000000\n", __func__, addr);
+ return 0;
+}
+
+static uint32_t dbdma_readw (void *opaque, target_phys_addr_t addr)
+{
+ return 0;
+}
+
+static uint32_t dbdma_readl (void *opaque, target_phys_addr_t addr)
+{
+ return 0;
+}
+
+static CPUWriteMemoryFunc *dbdma_write[] = {
+ &dbdma_writeb,
+ &dbdma_writew,
+ &dbdma_writel,
+};
+
+static CPUReadMemoryFunc *dbdma_read[] = {
+ &dbdma_readb,
+ &dbdma_readw,
+ &dbdma_readl,
+};
+
+static void macio_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ cpu_register_physical_memory(addr + 0x08000, 0x1000, dbdma_mem_index);
+ cpu_register_physical_memory(addr + 0x16000, 0x2000, cuda_mem_index);
+ cpu_register_physical_memory(addr + 0x1f000, 0x1000, ide0_mem_index);
+ cpu_register_physical_memory(addr + 0x20000, 0x1000, ide1_mem_index);
+ cpu_register_physical_memory(addr + 0x40000, 0x40000, openpic_mem_index);
+}
+
+static void macio_init(PCIBus *bus)
+{
+ PCIDevice *d;
+
+ d = pci_register_device(bus, "macio", sizeof(PCIDevice),
+ -1, NULL, NULL);
+ /* Note: this code is strongly inspirated from the corresponding code
+ in PearPC */
+ d->config[0x00] = 0x6b; // vendor_id
+ d->config[0x01] = 0x10;
+ d->config[0x02] = 0x22;
+ d->config[0x03] = 0x00;
+
+ d->config[0x0a] = 0x00; // class_sub = pci2pci
+ d->config[0x0b] = 0xff; // class_base = bridge
+ d->config[0x0e] = 0x00; // header_type
+
+ d->config[0x3d] = 0x01; // interrupt on pin 1
+
+ dbdma_mem_index = cpu_register_io_memory(0, dbdma_read, dbdma_write, NULL);
+
+ pci_register_io_region(d, 0, 0x80000,
+ PCI_ADDRESS_SPACE_MEM, macio_map);
+}
+
+/* PowerPC PREP hardware initialisation */
+void ppc_chrp_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ char buf[1024];
+ openpic_t *openpic;
+ m48t59_t *nvram;
+ int PPC_io_memory;
+ int ret, linux_boot, i;
+ unsigned long bios_offset;
+ uint32_t kernel_base, kernel_size, initrd_base, initrd_size;
+ PCIBus *pci_bus;
+
+ linux_boot = (kernel_filename != NULL);
+
+ /* allocate RAM */
+ cpu_register_physical_memory(0, ram_size, IO_MEM_RAM);
+
+ /* allocate and load BIOS */
+ bios_offset = ram_size + vga_ram_size;
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, BIOS_FILENAME);
+ ret = load_image(buf, phys_ram_base + bios_offset);
+ if (ret != BIOS_SIZE) {
+ fprintf(stderr, "qemu: could not load PPC PREP bios '%s'\n", buf);
+ exit(1);
+ }
+ cpu_register_physical_memory((uint32_t)(-BIOS_SIZE),
+ BIOS_SIZE, bios_offset | IO_MEM_ROM);
+ cpu_single_env->nip = 0xfffffffc;
+
+ if (linux_boot) {
+ kernel_base = KERNEL_LOAD_ADDR;
+ /* now we can load the kernel */
+ kernel_size = load_image(kernel_filename, phys_ram_base + kernel_base);
+ if (kernel_size < 0) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ kernel_filename);
+ exit(1);
+ }
+ /* load initrd */
+ if (initrd_filename) {
+ initrd_base = INITRD_LOAD_ADDR;
+ initrd_size = load_image(initrd_filename,
+ phys_ram_base + initrd_base);
+ if (initrd_size < 0) {
+ fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+ boot_device = 'm';
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+ /* Register CPU as a 74x/75x */
+ cpu_ppc_register(cpu_single_env, 0x00080000);
+ /* Set time-base frequency to 100 Mhz */
+ cpu_ppc_tb_init(cpu_single_env, 100UL * 1000UL * 1000UL);
+
+ isa_mem_base = 0x80000000;
+ pci_bus = pci_pmac_init();
+
+ /* Register 8 MB of ISA IO space */
+ PPC_io_memory = cpu_register_io_memory(0, PPC_io_read, PPC_io_write, NULL);
+ cpu_register_physical_memory(0xF2000000, 0x00800000, PPC_io_memory);
+
+ /* init basic PC hardware */
+ vga_initialize(pci_bus, ds, phys_ram_base + ram_size, ram_size,
+ vga_ram_size);
+ openpic = openpic_init(NULL, &openpic_mem_index, 1);
+ pci_pmac_set_openpic(pci_bus, openpic);
+
+ /* XXX: suppress that */
+ pic_init();
+
+ /* XXX: use Mac Serial port */
+ serial_init(0x3f8, 4, serial_hds[0]);
+
+ for(i = 0; i < nb_nics; i++) {
+ pci_ne2000_init(pci_bus, &nd_table[i]);
+ }
+
+ ide0_mem_index = pmac_ide_init(&bs_table[0], openpic, 0x13);
+ ide1_mem_index = pmac_ide_init(&bs_table[2], openpic, 0x13);
+
+ /* cuda also initialize ADB */
+ cuda_mem_index = cuda_init(openpic, 0x19);
+
+ adb_kbd_init(&adb_bus);
+ adb_mouse_init(&adb_bus);
+
+ macio_init(pci_bus);
+
+ nvram = m48t59_init(8, 0xFFF04000, 0x0074, NVRAM_SIZE);
+
+ if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+ graphic_depth = 15;
+
+ PPC_NVRAM_set_params(nvram, NVRAM_SIZE, "CHRP", ram_size, boot_device,
+ kernel_base, kernel_size,
+ kernel_cmdline,
+ initrd_base, initrd_size,
+ /* XXX: need an option to load a NVRAM image */
+ 0,
+ graphic_width, graphic_height, graphic_depth);
+ /* No PCI init: the BIOS will do it */
+}
diff --git a/tools/ioemu/hw/ppc_prep.c b/tools/ioemu/hw/ppc_prep.c
new file mode 100644
index 0000000000..c93b72faeb
--- /dev/null
+++ b/tools/ioemu/hw/ppc_prep.c
@@ -0,0 +1,548 @@
+/*
+ * QEMU PPC PREP hardware System Emulator
+ *
+ * Copyright (c) 2003-2004 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+//#define HARD_DEBUG_PPC_IO
+//#define DEBUG_PPC_IO
+
+#define BIOS_FILENAME "ppc_rom.bin"
+#define KERNEL_LOAD_ADDR 0x01000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+extern int loglevel;
+extern FILE *logfile;
+
+#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
+#define DEBUG_PPC_IO
+#endif
+
+#if defined (HARD_DEBUG_PPC_IO)
+#define PPC_IO_DPRINTF(fmt, args...) \
+do { \
+ if (loglevel & CPU_LOG_IOPORT) { \
+ fprintf(logfile, "%s: " fmt, __func__ , ##args); \
+ } else { \
+ printf("%s : " fmt, __func__ , ##args); \
+ } \
+} while (0)
+#elif defined (DEBUG_PPC_IO)
+#define PPC_IO_DPRINTF(fmt, args...) \
+do { \
+ if (loglevel & CPU_LOG_IOPORT) { \
+ fprintf(logfile, "%s: " fmt, __func__ , ##args); \
+ } \
+} while (0)
+#else
+#define PPC_IO_DPRINTF(fmt, args...) do { } while (0)
+#endif
+
+/* Constants for devices init */
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 13, 13 };
+
+#define NE2000_NB_MAX 6
+
+static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
+static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
+
+//static PITState *pit;
+
+/* ISA IO ports bridge */
+#define PPC_IO_BASE 0x80000000
+
+/* Speaker port 0x61 */
+int speaker_data_on;
+int dummy_refresh_clock;
+
+static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+#if 0
+ speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(pit, 2, val & 1);
+#endif
+}
+
+static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
+{
+#if 0
+ int out;
+ out = pit_get_out(pit, 2, qemu_get_clock(vm_clock));
+ dummy_refresh_clock ^= 1;
+ return (speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
+ (dummy_refresh_clock << 4);
+#endif
+ return 0;
+}
+
+/* PCI intack register */
+/* Read-only register (?) */
+static void _PPC_intack_write (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ // printf("%s: 0x%08x => 0x%08x\n", __func__, addr, value);
+}
+
+static inline uint32_t _PPC_intack_read (target_phys_addr_t addr)
+{
+ uint32_t retval = 0;
+
+ if (addr == 0xBFFFFFF0)
+ retval = pic_intack_read(NULL);
+ // printf("%s: 0x%08x <= %d\n", __func__, addr, retval);
+
+ return retval;
+}
+
+static uint32_t PPC_intack_readb (void *opaque, target_phys_addr_t addr)
+{
+ return _PPC_intack_read(addr);
+}
+
+static uint32_t PPC_intack_readw (void *opaque, target_phys_addr_t addr)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ return bswap16(_PPC_intack_read(addr));
+#else
+ return _PPC_intack_read(addr);
+#endif
+}
+
+static uint32_t PPC_intack_readl (void *opaque, target_phys_addr_t addr)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ return bswap32(_PPC_intack_read(addr));
+#else
+ return _PPC_intack_read(addr);
+#endif
+}
+
+static CPUWriteMemoryFunc *PPC_intack_write[] = {
+ &_PPC_intack_write,
+ &_PPC_intack_write,
+ &_PPC_intack_write,
+};
+
+static CPUReadMemoryFunc *PPC_intack_read[] = {
+ &PPC_intack_readb,
+ &PPC_intack_readw,
+ &PPC_intack_readl,
+};
+
+/* PowerPC control and status registers */
+#if 0 // Not used
+static struct {
+ /* IDs */
+ uint32_t veni_devi;
+ uint32_t revi;
+ /* Control and status */
+ uint32_t gcsr;
+ uint32_t xcfr;
+ uint32_t ct32;
+ uint32_t mcsr;
+ /* General purpose registers */
+ uint32_t gprg[6];
+ /* Exceptions */
+ uint32_t feen;
+ uint32_t fest;
+ uint32_t fema;
+ uint32_t fecl;
+ uint32_t eeen;
+ uint32_t eest;
+ uint32_t eecl;
+ uint32_t eeint;
+ uint32_t eemck0;
+ uint32_t eemck1;
+ /* Error diagnostic */
+} XCSR;
+
+static void PPC_XCSR_writeb (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+ printf("%s: 0x%08lx => 0x%08x\n", __func__, (long)addr, value);
+}
+
+static void PPC_XCSR_writew (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ value = bswap16(value);
+#endif
+ printf("%s: 0x%08lx => 0x%08x\n", __func__, (long)addr, value);
+}
+
+static void PPC_XCSR_writel (void *opaque, target_phys_addr_t addr, uint32_t value)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ value = bswap32(value);
+#endif
+ printf("%s: 0x%08lx => 0x%08x\n", __func__, (long)addr, value);
+}
+
+static uint32_t PPC_XCSR_readb (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t retval = 0;
+
+ printf("%s: 0x%08lx <= %d\n", __func__, (long)addr, retval);
+
+ return retval;
+}
+
+static uint32_t PPC_XCSR_readw (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t retval = 0;
+
+ printf("%s: 0x%08lx <= %d\n", __func__, (long)addr, retval);
+#ifdef TARGET_WORDS_BIGENDIAN
+ retval = bswap16(retval);
+#endif
+
+ return retval;
+}
+
+static uint32_t PPC_XCSR_readl (void *opaque, target_phys_addr_t addr)
+{
+ uint32_t retval = 0;
+
+ printf("%s: 0x%08lx <= %d\n", __func__, (long)addr, retval);
+#ifdef TARGET_WORDS_BIGENDIAN
+ retval = bswap32(retval);
+#endif
+
+ return retval;
+}
+
+static CPUWriteMemoryFunc *PPC_XCSR_write[] = {
+ &PPC_XCSR_writeb,
+ &PPC_XCSR_writew,
+ &PPC_XCSR_writel,
+};
+
+static CPUReadMemoryFunc *PPC_XCSR_read[] = {
+ &PPC_XCSR_readb,
+ &PPC_XCSR_readw,
+ &PPC_XCSR_readl,
+};
+#endif
+
+/* Fake super-io ports for PREP platform (Intel 82378ZB) */
+typedef struct sysctrl_t {
+ m48t59_t *nvram;
+ uint8_t state;
+ uint8_t syscontrol;
+ uint8_t fake_io[2];
+} sysctrl_t;
+
+enum {
+ STATE_HARDFILE = 0x01,
+};
+
+static sysctrl_t *sysctrl;
+
+static void PREP_io_write (void *opaque, uint32_t addr, uint32_t val)
+{
+ sysctrl_t *sysctrl = opaque;
+
+ PPC_IO_DPRINTF("0x%08lx => 0x%08x\n", (long)addr - PPC_IO_BASE, val);
+ sysctrl->fake_io[addr - 0x0398] = val;
+}
+
+static uint32_t PREP_io_read (void *opaque, uint32_t addr)
+{
+ sysctrl_t *sysctrl = opaque;
+
+ PPC_IO_DPRINTF("0x%08lx <= 0x%08x\n", (long)addr - PPC_IO_BASE,
+ sysctrl->fake_io[addr - 0x0398]);
+ return sysctrl->fake_io[addr - 0x0398];
+}
+
+static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
+{
+ sysctrl_t *sysctrl = opaque;
+
+ PPC_IO_DPRINTF("0x%08lx => 0x%08x\n", (long)addr - PPC_IO_BASE, val);
+ switch (addr) {
+ case 0x0092:
+ /* Special port 92 */
+ /* Check soft reset asked */
+ if (val & 0x01) {
+ // cpu_interrupt(cpu_single_env, CPU_INTERRUPT_RESET);
+ }
+ /* Check LE mode */
+ if (val & 0x02) {
+ printf("Little Endian mode isn't supported (yet ?)\n");
+ abort();
+ }
+ break;
+ case 0x0800:
+ /* Motorola CPU configuration register : read-only */
+ break;
+ case 0x0802:
+ /* Motorola base module feature register : read-only */
+ break;
+ case 0x0803:
+ /* Motorola base module status register : read-only */
+ break;
+ case 0x0808:
+ /* Hardfile light register */
+ if (val & 1)
+ sysctrl->state |= STATE_HARDFILE;
+ else
+ sysctrl->state &= ~STATE_HARDFILE;
+ break;
+ case 0x0810:
+ /* Password protect 1 register */
+ if (sysctrl->nvram != NULL)
+ m48t59_toggle_lock(sysctrl->nvram, 1);
+ break;
+ case 0x0812:
+ /* Password protect 2 register */
+ if (sysctrl->nvram != NULL)
+ m48t59_toggle_lock(sysctrl->nvram, 2);
+ break;
+ case 0x0814:
+ /* L2 invalidate register */
+ // tlb_flush(cpu_single_env, 1);
+ break;
+ case 0x081C:
+ /* system control register */
+ sysctrl->syscontrol = val & 0x0F;
+ break;
+ case 0x0850:
+ /* I/O map type register */
+ if (!(val & 0x01)) {
+ printf("No support for non-continuous I/O map mode\n");
+ abort();
+ }
+ break;
+ default:
+ printf("ERROR: unaffected IO port write: %04lx => %02x\n",
+ (long)addr, val);
+ break;
+ }
+}
+
+static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
+{
+ sysctrl_t *sysctrl = opaque;
+ uint32_t retval = 0xFF;
+
+ switch (addr) {
+ case 0x0092:
+ /* Special port 92 */
+ retval = 0x00;
+ break;
+ case 0x0800:
+ /* Motorola CPU configuration register */
+ retval = 0xEF; /* MPC750 */
+ break;
+ case 0x0802:
+ /* Motorola Base module feature register */
+ retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
+ break;
+ case 0x0803:
+ /* Motorola base module status register */
+ retval = 0xE0; /* Standard MPC750 */
+ break;
+ case 0x080C:
+ /* Equipment present register:
+ * no L2 cache
+ * no upgrade processor
+ * no cards in PCI slots
+ * SCSI fuse is bad
+ */
+ retval = 0x3C;
+ break;
+ case 0x0810:
+ /* Motorola base module extended feature register */
+ retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
+ break;
+ case 0x0818:
+ /* Keylock */
+ retval = 0x00;
+ break;
+ case 0x081C:
+ /* system control register
+ * 7 - 6 / 1 - 0: L2 cache enable
+ */
+ retval = sysctrl->syscontrol;
+ break;
+ case 0x0823:
+ /* */
+ retval = 0x03; /* no L2 cache */
+ break;
+ case 0x0850:
+ /* I/O map type register */
+ retval = 0x01;
+ break;
+ default:
+ printf("ERROR: unaffected IO port: %04lx read\n", (long)addr);
+ break;
+ }
+ PPC_IO_DPRINTF("0x%08lx <= 0x%08x\n", (long)addr - PPC_IO_BASE, retval);
+
+ return retval;
+}
+
+extern CPUPPCState *global_env;
+
+#define NVRAM_SIZE 0x2000
+
+/* PowerPC PREP hardware initialisation */
+void ppc_prep_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ char buf[1024];
+ m48t59_t *nvram;
+ int PPC_io_memory;
+ int ret, linux_boot, i, nb_nics1;
+ unsigned long bios_offset;
+ uint32_t kernel_base, kernel_size, initrd_base, initrd_size;
+ PCIBus *pci_bus;
+
+ sysctrl = qemu_mallocz(sizeof(sysctrl_t));
+ if (sysctrl == NULL)
+ return;
+
+ linux_boot = (kernel_filename != NULL);
+
+ /* allocate RAM */
+ cpu_register_physical_memory(0, ram_size, IO_MEM_RAM);
+
+ /* allocate and load BIOS */
+ bios_offset = ram_size + vga_ram_size;
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, BIOS_FILENAME);
+ ret = load_image(buf, phys_ram_base + bios_offset);
+ if (ret != BIOS_SIZE) {
+ fprintf(stderr, "qemu: could not load PPC PREP bios '%s'\n", buf);
+ exit(1);
+ }
+ cpu_register_physical_memory((uint32_t)(-BIOS_SIZE),
+ BIOS_SIZE, bios_offset | IO_MEM_ROM);
+ cpu_single_env->nip = 0xfffffffc;
+
+ if (linux_boot) {
+ kernel_base = KERNEL_LOAD_ADDR;
+ /* now we can load the kernel */
+ kernel_size = load_image(kernel_filename, phys_ram_base + kernel_base);
+ if (kernel_size < 0) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ kernel_filename);
+ exit(1);
+ }
+ /* load initrd */
+ if (initrd_filename) {
+ initrd_base = INITRD_LOAD_ADDR;
+ initrd_size = load_image(initrd_filename,
+ phys_ram_base + initrd_base);
+ if (initrd_size < 0) {
+ fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
+ initrd_filename);
+ exit(1);
+ }
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+ boot_device = 'm';
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+
+ /* Register CPU as a 74x/75x */
+ cpu_ppc_register(cpu_single_env, 0x00080000);
+ /* Set time-base frequency to 100 Mhz */
+ cpu_ppc_tb_init(cpu_single_env, 100UL * 1000UL * 1000UL);
+
+ isa_mem_base = 0xc0000000;
+ pci_bus = pci_prep_init();
+ /* Register 64 KB of ISA IO space */
+ PPC_io_memory = cpu_register_io_memory(0, PPC_io_read, PPC_io_write, NULL);
+ cpu_register_physical_memory(0x80000000, 0x00010000, PPC_io_memory);
+
+ /* init basic PC hardware */
+ vga_initialize(pci_bus, ds, phys_ram_base + ram_size, ram_size,
+ vga_ram_size);
+ rtc_init(0x70, 8);
+ // openpic = openpic_init(0x00000000, 0xF0000000, 1);
+ // pic_init(openpic);
+ pic_init();
+ // pit = pit_init(0x40, 0);
+
+ serial_init(0x3f8, 4, serial_hds[0]);
+ nb_nics1 = nb_nics;
+ if (nb_nics1 > NE2000_NB_MAX)
+ nb_nics1 = NE2000_NB_MAX;
+ for(i = 0; i < nb_nics1; i++) {
+ isa_ne2000_init(ne2000_io[i], ne2000_irq[i], &nd_table[i]);
+ }
+
+ for(i = 0; i < 2; i++) {
+ isa_ide_init(ide_iobase[i], ide_iobase2[i], ide_irq[i],
+ bs_table[2 * i], bs_table[2 * i + 1]);
+ }
+ kbd_init();
+ DMA_init(1);
+ // AUD_init();
+ // SB16_init();
+
+ fdctrl_init(6, 2, 0, 0x3f0, fd_table);
+
+ /* Register speaker port */
+ register_ioport_read(0x61, 1, 1, speaker_ioport_read, NULL);
+ register_ioport_write(0x61, 1, 1, speaker_ioport_write, NULL);
+ /* Register fake IO ports for PREP */
+ register_ioport_read(0x398, 2, 1, &PREP_io_read, sysctrl);
+ register_ioport_write(0x398, 2, 1, &PREP_io_write, sysctrl);
+ /* System control ports */
+ register_ioport_read(0x0092, 0x01, 1, &PREP_io_800_readb, sysctrl);
+ register_ioport_write(0x0092, 0x01, 1, &PREP_io_800_writeb, sysctrl);
+ register_ioport_read(0x0800, 0x52, 1, &PREP_io_800_readb, sysctrl);
+ register_ioport_write(0x0800, 0x52, 1, &PREP_io_800_writeb, sysctrl);
+ /* PCI intack location */
+ PPC_io_memory = cpu_register_io_memory(0, PPC_intack_read,
+ PPC_intack_write, NULL);
+ cpu_register_physical_memory(0xBFFFFFF0, 0x4, PPC_io_memory);
+ /* PowerPC control and status register group */
+#if 0
+ PPC_io_memory = cpu_register_io_memory(0, PPC_XCSR_read, PPC_XCSR_write, NULL);
+ cpu_register_physical_memory(0xFEFF0000, 0x1000, PPC_io_memory);
+#endif
+
+ nvram = m48t59_init(8, 0, 0x0074, NVRAM_SIZE);
+ if (nvram == NULL)
+ return;
+ sysctrl->nvram = nvram;
+
+ /* Initialise NVRAM */
+ PPC_NVRAM_set_params(nvram, NVRAM_SIZE, "PREP", ram_size, boot_device,
+ kernel_base, kernel_size,
+ kernel_cmdline,
+ initrd_base, initrd_size,
+ /* XXX: need an option to load a NVRAM image */
+ 0,
+ graphic_width, graphic_height, graphic_depth);
+}
diff --git a/tools/ioemu/hw/sb16.c b/tools/ioemu/hw/sb16.c
new file mode 100644
index 0000000000..33026febb4
--- /dev/null
+++ b/tools/ioemu/hw/sb16.c
@@ -0,0 +1,1268 @@
+/*
+ * QEMU Soundblaster 16 emulation
+ *
+ * Copyright (c) 2003-2004 Vassili Karpov (malc)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define LENOFA(a) ((int) (sizeof(a)/sizeof(a[0])))
+
+#define dolog(...) AUD_log ("sb16", __VA_ARGS__)
+
+/* #define DEBUG */
+/* #define DEBUG_SB16_MOST */
+
+#ifdef DEBUG
+#define ldebug(...) dolog (__VA_ARGS__)
+#else
+#define ldebug(...)
+#endif
+
+#define IO_READ_PROTO(name) \
+ uint32_t name (void *opaque, uint32_t nport)
+#define IO_WRITE_PROTO(name) \
+ void name (void *opaque, uint32_t nport, uint32_t val)
+
+static const char e3[] = "COPYRIGHT (C) CREATIVE TECHNOLOGY LTD, 1992.";
+
+static struct {
+ int ver_lo;
+ int ver_hi;
+ int irq;
+ int dma;
+ int hdma;
+ int port;
+} conf = {5, 4, 5, 1, 5, 0x220};
+
+typedef struct SB16State {
+ int irq;
+ int dma;
+ int hdma;
+ int port;
+ int ver;
+
+ int in_index;
+ int out_data_len;
+ int fmt_stereo;
+ int fmt_signed;
+ int fmt_bits;
+ audfmt_e fmt;
+ int dma_auto;
+ int block_size;
+ int fifo;
+ int freq;
+ int time_const;
+ int speaker;
+ int needed_bytes;
+ int cmd;
+ int use_hdma;
+ int highspeed;
+ int can_write;
+
+ int v2x6;
+
+ uint8_t csp_param;
+ uint8_t csp_value;
+ uint8_t csp_mode;
+ uint8_t csp_regs[256];
+ uint8_t csp_index;
+ uint8_t csp_reg83[4];
+ int csp_reg83r;
+ int csp_reg83w;
+
+ uint8_t in2_data[10];
+ uint8_t out_data[50];
+ uint8_t test_reg;
+ uint8_t last_read_byte;
+ int nzero;
+
+ int left_till_irq;
+
+ int dma_running;
+ int bytes_per_second;
+ int align;
+ SWVoice *voice;
+
+ QEMUTimer *ts, *aux_ts;
+ /* mixer state */
+ int mixer_nreg;
+ uint8_t mixer_regs[256];
+} SB16State;
+
+/* XXX: suppress that and use a context */
+static struct SB16State dsp;
+
+static int magic_of_irq (int irq)
+{
+ switch (irq) {
+ case 5:
+ return 2;
+ case 7:
+ return 4;
+ case 9:
+ return 1;
+ case 10:
+ return 8;
+ default:
+ dolog ("bad irq %d\n", irq);
+ return 2;
+ }
+}
+
+static int irq_of_magic (int magic)
+{
+ switch (magic) {
+ case 1:
+ return 9;
+ case 2:
+ return 5;
+ case 4:
+ return 7;
+ case 8:
+ return 10;
+ default:
+ dolog ("bad irq magic %d\n", magic);
+ return -1;
+ }
+}
+
+#if 0
+static void log_dsp (SB16State *dsp)
+{
+ ldebug ("%s:%s:%d:%s:dmasize=%d:freq=%d:const=%d:speaker=%d\n",
+ dsp->fmt_stereo ? "Stereo" : "Mono",
+ dsp->fmt_signed ? "Signed" : "Unsigned",
+ dsp->fmt_bits,
+ dsp->dma_auto ? "Auto" : "Single",
+ dsp->block_size,
+ dsp->freq,
+ dsp->time_const,
+ dsp->speaker);
+}
+#endif
+
+static void speaker (SB16State *s, int on)
+{
+ s->speaker = on;
+ /* AUD_enable (s->voice, on); */
+}
+
+static void control (SB16State *s, int hold)
+{
+ int dma = s->use_hdma ? s->hdma : s->dma;
+ s->dma_running = hold;
+
+ ldebug ("hold %d high %d dma %d\n", hold, s->use_hdma, dma);
+
+ if (hold) {
+ DMA_hold_DREQ (dma);
+ AUD_enable (s->voice, 1);
+ }
+ else {
+ DMA_release_DREQ (dma);
+ AUD_enable (s->voice, 0);
+ }
+}
+
+static void aux_timer (void *opaque)
+{
+ SB16State *s = opaque;
+ s->can_write = 1;
+ pic_set_irq (s->irq, 1);
+}
+
+#define DMA8_AUTO 1
+#define DMA8_HIGH 2
+
+static void dma_cmd8 (SB16State *s, int mask, int dma_len)
+{
+ s->fmt = AUD_FMT_U8;
+ s->use_hdma = 0;
+ s->fmt_bits = 8;
+ s->fmt_signed = 0;
+ s->fmt_stereo = (s->mixer_regs[0x0e] & 2) != 0;
+ if (-1 == s->time_const) {
+ s->freq = 11025;
+ }
+ else {
+ int tmp = (256 - s->time_const);
+ s->freq = (1000000 + (tmp / 2)) / tmp;
+ }
+
+ if (dma_len != -1)
+ s->block_size = dma_len << s->fmt_stereo;
+ else {
+ /* This is apparently the only way to make both Act1/PL
+ and SecondReality/FC work
+
+ Act1 sets block size via command 0x48 and it's an odd number
+ SR does the same with even number
+ Both use stereo, and Creatives own documentation states that
+ 0x48 sets block size in bytes less one.. go figure */
+ s->block_size &= ~s->fmt_stereo;
+ }
+
+ s->freq >>= s->fmt_stereo;
+ s->left_till_irq = s->block_size;
+ s->bytes_per_second = (s->freq << s->fmt_stereo);
+ /* s->highspeed = (mask & DMA8_HIGH) != 0; */
+ s->dma_auto = (mask & DMA8_AUTO) != 0;
+ s->align = (1 << s->fmt_stereo) - 1;
+
+ if (s->block_size & s->align)
+ dolog ("warning: unaligned buffer\n");
+
+ ldebug ("freq %d, stereo %d, sign %d, bits %d, "
+ "dma %d, auto %d, fifo %d, high %d\n",
+ s->freq, s->fmt_stereo, s->fmt_signed, s->fmt_bits,
+ s->block_size, s->dma_auto, s->fifo, s->highspeed);
+
+ if (s->freq)
+ s->voice = AUD_open (s->voice, "sb16", s->freq,
+ 1 << s->fmt_stereo, s->fmt);
+
+ control (s, 1);
+ speaker (s, 1);
+}
+
+static void dma_cmd (SB16State *s, uint8_t cmd, uint8_t d0, int dma_len)
+{
+ s->use_hdma = cmd < 0xc0;
+ s->fifo = (cmd >> 1) & 1;
+ s->dma_auto = (cmd >> 2) & 1;
+ s->fmt_signed = (d0 >> 4) & 1;
+ s->fmt_stereo = (d0 >> 5) & 1;
+
+ switch (cmd >> 4) {
+ case 11:
+ s->fmt_bits = 16;
+ break;
+
+ case 12:
+ s->fmt_bits = 8;
+ break;
+ }
+
+ if (-1 != s->time_const) {
+#if 1
+ int tmp = 256 - s->time_const;
+ s->freq = (1000000 + (tmp / 2)) / tmp;
+#else
+ /* s->freq = 1000000 / ((255 - s->time_const) << s->fmt_stereo); */
+ s->freq = 1000000 / ((255 - s->time_const));
+#endif
+ s->time_const = -1;
+ }
+
+ s->block_size = dma_len + 1;
+ s->block_size <<= (s->fmt_bits == 16);
+ if (!s->dma_auto) {
+ /* It is clear that for DOOM and auto-init this value
+ shouldn't take stereo into account, while Miles Sound Systems
+ setsound.exe with single transfer mode wouldn't work without it
+ wonders of SB16 yet again */
+ s->block_size <<= s->fmt_stereo;
+ }
+
+ ldebug ("freq %d, stereo %d, sign %d, bits %d, "
+ "dma %d, auto %d, fifo %d, high %d\n",
+ s->freq, s->fmt_stereo, s->fmt_signed, s->fmt_bits,
+ s->block_size, s->dma_auto, s->fifo, s->highspeed);
+
+ if (16 == s->fmt_bits) {
+ if (s->fmt_signed) {
+ s->fmt = AUD_FMT_S16;
+ }
+ else {
+ s->fmt = AUD_FMT_U16;
+ }
+ }
+ else {
+ if (s->fmt_signed) {
+ s->fmt = AUD_FMT_S8;
+ }
+ else {
+ s->fmt = AUD_FMT_U8;
+ }
+ }
+
+ s->left_till_irq = s->block_size;
+
+ s->bytes_per_second = (s->freq << s->fmt_stereo) << (s->fmt_bits == 16);
+ s->highspeed = 0;
+ s->align = (1 << (s->fmt_stereo + (s->fmt_bits == 16))) - 1;
+ if (s->block_size & s->align)
+ dolog ("warning: unaligned buffer\n");
+
+ if (s->freq)
+ s->voice = AUD_open (s->voice, "sb16", s->freq,
+ 1 << s->fmt_stereo, s->fmt);
+
+ control (s, 1);
+ speaker (s, 1);
+}
+
+static inline void dsp_out_data (SB16State *s, uint8_t val)
+{
+ ldebug ("outdata %#x\n", val);
+ if (s->out_data_len < sizeof (s->out_data))
+ s->out_data[s->out_data_len++] = val;
+}
+
+static inline uint8_t dsp_get_data (SB16State *s)
+{
+ if (s->in_index)
+ return s->in2_data[--s->in_index];
+ else {
+ dolog ("buffer underflow\n");
+ return 0;
+ }
+}
+
+static void command (SB16State *s, uint8_t cmd)
+{
+ ldebug ("command %#x\n", cmd);
+
+ if (cmd > 0xaf && cmd < 0xd0) {
+ if (cmd & 8) {
+ dolog ("ADC not yet supported (command %#x)\n", cmd);
+ }
+
+ switch (cmd >> 4) {
+ case 11:
+ case 12:
+ break;
+ default:
+ dolog ("%#x wrong bits\n", cmd);
+ }
+ s->needed_bytes = 3;
+ }
+ else {
+ switch (cmd) {
+ case 0x03:
+ dsp_out_data (s, 0x10); /* s->csp_param); */
+ goto warn;
+
+ case 0x04:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0x05:
+ s->needed_bytes = 2;
+ goto warn;
+
+ case 0x08:
+ /* __asm__ ("int3"); */
+ goto warn;
+
+ case 0x0e:
+ s->needed_bytes = 2;
+ goto warn;
+
+ case 0x09:
+ dsp_out_data (s, 0xf8);
+ goto warn;
+
+ case 0x0f:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0x10:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0x14:
+ s->needed_bytes = 2;
+ s->block_size = 0;
+ break;
+
+ case 0x1c: /* Auto-Initialize DMA DAC, 8-bit */
+ control (s, 1);
+ break;
+
+ case 0x20: /* Direct ADC, Juice/PL */
+ dsp_out_data (s, 0xff);
+ goto warn;
+
+ case 0x35:
+ dolog ("MIDI command(0x35) not implemented\n");
+ break;
+
+ case 0x40:
+ s->freq = -1;
+ s->time_const = -1;
+ s->needed_bytes = 1;
+ break;
+
+ case 0x41:
+ s->freq = -1;
+ s->time_const = -1;
+ s->needed_bytes = 2;
+ break;
+
+ case 0x42:
+ s->freq = -1;
+ s->time_const = -1;
+ s->needed_bytes = 2;
+ goto warn;
+
+ case 0x45:
+ dsp_out_data (s, 0xaa);
+ goto warn;
+
+ case 0x47: /* Continue Auto-Initialize DMA 16bit */
+ break;
+
+ case 0x48:
+ s->needed_bytes = 2;
+ break;
+
+ case 0x80:
+ s->needed_bytes = 2;
+ break;
+
+ case 0x90:
+ case 0x91:
+ dma_cmd8 (s, ((cmd & 1) == 0) | DMA8_HIGH, -1);
+ break;
+
+ case 0xd0: /* halt DMA operation. 8bit */
+ control (s, 0);
+ break;
+
+ case 0xd1: /* speaker on */
+ speaker (s, 1);
+ break;
+
+ case 0xd3: /* speaker off */
+ speaker (s, 0);
+ break;
+
+ case 0xd4: /* continue DMA operation. 8bit */
+ control (s, 1);
+ break;
+
+ case 0xd5: /* halt DMA operation. 16bit */
+ control (s, 0);
+ break;
+
+ case 0xd6: /* continue DMA operation. 16bit */
+ control (s, 1);
+ break;
+
+ case 0xd9: /* exit auto-init DMA after this block. 16bit */
+ s->dma_auto = 0;
+ break;
+
+ case 0xda: /* exit auto-init DMA after this block. 8bit */
+ s->dma_auto = 0;
+ break;
+
+ case 0xe0:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0xe1:
+ dsp_out_data (s, s->ver & 0xff);
+ dsp_out_data (s, s->ver >> 8);
+ break;
+
+ case 0xe2:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0xe3:
+ {
+ int i;
+ for (i = sizeof (e3) - 1; i >= 0; --i)
+ dsp_out_data (s, e3[i]);
+ }
+ break;
+
+ case 0xe4: /* write test reg */
+ s->needed_bytes = 1;
+ break;
+
+ case 0xe7:
+ dolog ("Attempt to probe for ESS (0xe7)?\n");
+ return;
+
+ case 0xe8: /* read test reg */
+ dsp_out_data (s, s->test_reg);
+ break;
+
+ case 0xf2:
+ case 0xf3:
+ dsp_out_data (s, 0xaa);
+ s->mixer_regs[0x82] |= (cmd == 0xf2) ? 1 : 2;
+ pic_set_irq (s->irq, 1);
+ break;
+
+ case 0xf9:
+ s->needed_bytes = 1;
+ goto warn;
+
+ case 0xfa:
+ dsp_out_data (s, 0);
+ goto warn;
+
+ case 0xfc: /* FIXME */
+ dsp_out_data (s, 0);
+ goto warn;
+
+ default:
+ dolog ("unrecognized command %#x\n", cmd);
+ return;
+ }
+ }
+
+ s->cmd = cmd;
+ if (!s->needed_bytes)
+ ldebug ("\n");
+ return;
+
+ warn:
+ dolog ("warning: command %#x,%d is not trully understood yet\n",
+ cmd, s->needed_bytes);
+ s->cmd = cmd;
+ return;
+}
+
+static uint16_t dsp_get_lohi (SB16State *s)
+{
+ uint8_t hi = dsp_get_data (s);
+ uint8_t lo = dsp_get_data (s);
+ return (hi << 8) | lo;
+}
+
+static uint16_t dsp_get_hilo (SB16State *s)
+{
+ uint8_t lo = dsp_get_data (s);
+ uint8_t hi = dsp_get_data (s);
+ return (hi << 8) | lo;
+}
+
+static void complete (SB16State *s)
+{
+ int d0, d1, d2;
+ ldebug ("complete command %#x, in_index %d, needed_bytes %d\n",
+ s->cmd, s->in_index, s->needed_bytes);
+
+ if (s->cmd > 0xaf && s->cmd < 0xd0) {
+ d2 = dsp_get_data (s);
+ d1 = dsp_get_data (s);
+ d0 = dsp_get_data (s);
+
+ if (s->cmd & 8) {
+ dolog ("ADC params cmd = %#x d0 = %d, d1 = %d, d2 = %d\n",
+ s->cmd, d0, d1, d2);
+ }
+ else {
+ ldebug ("cmd = %#x d0 = %d, d1 = %d, d2 = %d\n",
+ s->cmd, d0, d1, d2);
+ dma_cmd (s, s->cmd, d0, d1 + (d2 << 8));
+ }
+ }
+ else {
+ switch (s->cmd) {
+ case 0x04:
+ s->csp_mode = dsp_get_data (s);
+ s->csp_reg83r = 0;
+ s->csp_reg83w = 0;
+ ldebug ("CSP command 0x04: mode=%#x\n", s->csp_mode);
+ break;
+
+ case 0x05:
+ s->csp_param = dsp_get_data (s);
+ s->csp_value = dsp_get_data (s);
+ ldebug ("CSP command 0x05: param=%#x value=%#x\n",
+ s->csp_param,
+ s->csp_value);
+ break;
+
+ case 0x0e:
+ d0 = dsp_get_data (s);
+ d1 = dsp_get_data (s);
+ ldebug ("write CSP register %d <- %#x\n", d1, d0);
+ if (d1 == 0x83) {
+ ldebug ("0x83[%d] <- %#x\n", s->csp_reg83r, d0);
+ s->csp_reg83[s->csp_reg83r % 4] = d0;
+ s->csp_reg83r += 1;
+ }
+ else
+ s->csp_regs[d1] = d0;
+ break;
+
+ case 0x0f:
+ d0 = dsp_get_data (s);
+ ldebug ("read CSP register %#x -> %#x, mode=%#x\n",
+ d0, s->csp_regs[d0], s->csp_mode);
+ if (d0 == 0x83) {
+ ldebug ("0x83[%d] -> %#x\n",
+ s->csp_reg83w,
+ s->csp_reg83[s->csp_reg83w % 4]);
+ dsp_out_data (s, s->csp_reg83[s->csp_reg83w % 4]);
+ s->csp_reg83w += 1;
+ }
+ else
+ dsp_out_data (s, s->csp_regs[d0]);
+ break;
+
+ case 0x10:
+ d0 = dsp_get_data (s);
+ dolog ("cmd 0x10 d0=%#x\n", d0);
+ break;
+
+ case 0x14:
+ dma_cmd8 (s, 0, dsp_get_lohi (s) + 1);
+ break;
+
+ case 0x40:
+ s->time_const = dsp_get_data (s);
+ ldebug ("set time const %d\n", s->time_const);
+ break;
+
+ case 0x42: /* FT2 sets output freq with this, go figure */
+ dolog ("cmd 0x42 might not do what it think it should\n");
+
+ case 0x41:
+ s->freq = dsp_get_hilo (s);
+ ldebug ("set freq %d\n", s->freq);
+ break;
+
+ case 0x48:
+ s->block_size = dsp_get_lohi (s) + 1;
+ ldebug ("set dma block len %d\n", s->block_size);
+ break;
+
+ case 0x80:
+ {
+ int freq, samples, bytes;
+ int64_t ticks;
+
+ freq = s->freq > 0 ? s->freq : 11025;
+ samples = dsp_get_lohi (s) + 1;
+ bytes = samples << s->fmt_stereo << (s->fmt_bits == 16);
+ ticks = (bytes * ticks_per_sec) / freq;
+ if (ticks < ticks_per_sec / 1024)
+ pic_set_irq (s->irq, 1);
+ else
+ qemu_mod_timer (s->aux_ts, qemu_get_clock (vm_clock) + ticks);
+ ldebug ("mix silence %d %d %lld\n", samples, bytes, ticks);
+ }
+ break;
+
+ case 0xe0:
+ d0 = dsp_get_data (s);
+ s->out_data_len = 0;
+ ldebug ("E0 data = %#x\n", d0);
+ dsp_out_data(s, ~d0);
+ break;
+
+ case 0xe2:
+ d0 = dsp_get_data (s);
+ ldebug ("E2 = %#x\n", d0);
+ break;
+
+ case 0xe4:
+ s->test_reg = dsp_get_data (s);
+ break;
+
+ case 0xf9:
+ d0 = dsp_get_data (s);
+ ldebug ("command 0xf9 with %#x\n", d0);
+ switch (d0) {
+ case 0x0e:
+ dsp_out_data (s, 0xff);
+ break;
+
+ case 0x0f:
+ dsp_out_data (s, 0x07);
+ break;
+
+ case 0x37:
+ dsp_out_data (s, 0x38);
+ break;
+
+ default:
+ dsp_out_data (s, 0x00);
+ break;
+ }
+ break;
+
+ default:
+ dolog ("complete: unrecognized command %#x\n", s->cmd);
+ return;
+ }
+ }
+
+ ldebug ("\n");
+ s->cmd = -1;
+ return;
+}
+
+static void reset (SB16State *s)
+{
+ pic_set_irq (s->irq, 0);
+ if (s->dma_auto) {
+ pic_set_irq (s->irq, 1);
+ pic_set_irq (s->irq, 0);
+ }
+
+ s->mixer_regs[0x82] = 0;
+ s->dma_auto = 0;
+ s->in_index = 0;
+ s->out_data_len = 0;
+ s->left_till_irq = 0;
+ s->needed_bytes = 0;
+ s->block_size = -1;
+ s->nzero = 0;
+ s->highspeed = 0;
+ s->v2x6 = 0;
+
+ dsp_out_data(s, 0xaa);
+ speaker (s, 0);
+ control (s, 0);
+}
+
+static IO_WRITE_PROTO (dsp_write)
+{
+ SB16State *s = opaque;
+ int iport;
+
+ iport = nport - s->port;
+
+ ldebug ("write %#x <- %#x\n", nport, val);
+ switch (iport) {
+ case 0x06:
+ switch (val) {
+ case 0x00:
+ if (s->v2x6 == 1) {
+ if (0 && s->highspeed) {
+ s->highspeed = 0;
+ pic_set_irq (s->irq, 0);
+ control (s, 0);
+ }
+ else
+ reset (s);
+ }
+ s->v2x6 = 0;
+ break;
+
+ case 0x01:
+ case 0x03: /* FreeBSD kludge */
+ s->v2x6 = 1;
+ break;
+
+ case 0xc6:
+ s->v2x6 = 0; /* Prince of Persia, csp.sys, diagnose.exe */
+ break;
+
+ case 0xb8: /* Panic */
+ reset (s);
+ break;
+
+ case 0x39:
+ dsp_out_data (s, 0x38);
+ reset (s);
+ s->v2x6 = 0x39;
+ break;
+
+ default:
+ s->v2x6 = val;
+ break;
+ }
+ break;
+
+ case 0x0c: /* write data or command | write status */
+/* if (s->highspeed) */
+/* break; */
+
+ if (0 == s->needed_bytes) {
+ command (s, val);
+#if 0
+ if (0 == s->needed_bytes) {
+ log_dsp (s);
+ }
+#endif
+ }
+ else {
+ if (s->in_index == sizeof (s->in2_data)) {
+ dolog ("in data overrun\n");
+ }
+ else {
+ s->in2_data[s->in_index++] = val;
+ if (s->in_index == s->needed_bytes) {
+ s->needed_bytes = 0;
+ complete (s);
+#if 0
+ log_dsp (s);
+#endif
+ }
+ }
+ }
+ break;
+
+ default:
+ ldebug ("(nport=%#x, val=%#x)\n", nport, val);
+ break;
+ }
+}
+
+static IO_READ_PROTO (dsp_read)
+{
+ SB16State *s = opaque;
+ int iport, retval, ack = 0;
+
+ iport = nport - s->port;
+
+ switch (iport) {
+ case 0x06: /* reset */
+ retval = 0xff;
+ break;
+
+ case 0x0a: /* read data */
+ if (s->out_data_len) {
+ retval = s->out_data[--s->out_data_len];
+ s->last_read_byte = retval;
+ }
+ else {
+ dolog ("empty output buffer\n");
+ retval = s->last_read_byte;
+ /* goto error; */
+ }
+ break;
+
+ case 0x0c: /* 0 can write */
+ retval = s->can_write ? 0 : 0x80;
+ break;
+
+ case 0x0d: /* timer interrupt clear */
+ /* dolog ("timer interrupt clear\n"); */
+ retval = 0;
+ break;
+
+ case 0x0e: /* data available status | irq 8 ack */
+ retval = (!s->out_data_len || s->highspeed) ? 0 : 0x80;
+ if (s->mixer_regs[0x82] & 1) {
+ ack = 1;
+ s->mixer_regs[0x82] &= 1;
+ pic_set_irq (s->irq, 0);
+ }
+ break;
+
+ case 0x0f: /* irq 16 ack */
+ retval = 0xff;
+ if (s->mixer_regs[0x82] & 2) {
+ ack = 1;
+ s->mixer_regs[0x82] &= 2;
+ pic_set_irq (s->irq, 0);
+ }
+ break;
+
+ default:
+ goto error;
+ }
+
+ if (!ack)
+ ldebug ("read %#x -> %#x\n", nport, retval);
+
+ return retval;
+
+ error:
+ dolog ("WARNING dsp_read %#x error\n", nport);
+ return 0xff;
+}
+
+static void reset_mixer (SB16State *s)
+{
+ int i;
+
+ memset (s->mixer_regs, 0xff, 0x7f);
+ memset (s->mixer_regs + 0x83, 0xff, sizeof (s->mixer_regs) - 0x83);
+
+ s->mixer_regs[0x02] = 4; /* master volume 3bits */
+ s->mixer_regs[0x06] = 4; /* MIDI volume 3bits */
+ s->mixer_regs[0x08] = 0; /* CD volume 3bits */
+ s->mixer_regs[0x0a] = 0; /* voice volume 2bits */
+
+ /* d5=input filt, d3=lowpass filt, d1,d2=input source */
+ s->mixer_regs[0x0c] = 0;
+
+ /* d5=output filt, d1=stereo switch */
+ s->mixer_regs[0x0e] = 0;
+
+ /* voice volume L d5,d7, R d1,d3 */
+ s->mixer_regs[0x04] = (4 << 5) | (4 << 1);
+ /* master ... */
+ s->mixer_regs[0x22] = (4 << 5) | (4 << 1);
+ /* MIDI ... */
+ s->mixer_regs[0x26] = (4 << 5) | (4 << 1);
+
+ for (i = 0x30; i < 0x48; i++) {
+ s->mixer_regs[i] = 0x20;
+ }
+}
+
+static IO_WRITE_PROTO(mixer_write_indexb)
+{
+ SB16State *s = opaque;
+ s->mixer_nreg = val;
+}
+
+static IO_WRITE_PROTO(mixer_write_datab)
+{
+ SB16State *s = opaque;
+
+ ldebug ("mixer_write [%#x] <- %#x\n", s->mixer_nreg, val);
+ if (s->mixer_nreg > sizeof (s->mixer_regs))
+ return;
+
+ switch (s->mixer_nreg) {
+ case 0x00:
+ reset_mixer (s);
+ break;
+
+ case 0x80:
+ {
+ int irq = irq_of_magic (val);
+ ldebug ("setting irq to %d (val=%#x)\n", irq, val);
+ if (irq > 0)
+ s->irq = irq;
+ }
+ break;
+
+ case 0x81:
+ {
+ int dma, hdma;
+
+ dma = lsbindex (val & 0xf);
+ hdma = lsbindex (val & 0xf0);
+ dolog ("attempt to set DMA register 8bit %d, 16bit %d (val=%#x)\n",
+ dma, hdma, val);
+#if 0
+ s->dma = dma;
+ s->hdma = hdma;
+#endif
+ }
+ break;
+
+ case 0x82:
+ dolog ("attempt to write into IRQ status register (val=%#x)\n",
+ val);
+ return;
+
+ default:
+ if (s->mixer_nreg >= 0x80)
+ dolog ("attempt to write mixer[%#x] <- %#x\n", s->mixer_nreg, val);
+ break;
+ }
+
+ s->mixer_regs[s->mixer_nreg] = val;
+}
+
+static IO_WRITE_PROTO(mixer_write_indexw)
+{
+ mixer_write_indexb (opaque, nport, val & 0xff);
+ mixer_write_datab (opaque, nport, (val >> 8) & 0xff);
+}
+
+static IO_READ_PROTO(mixer_read)
+{
+ SB16State *s = opaque;
+#ifndef DEBUG_SB16_MOST
+ if (s->mixer_nreg != 0x82)
+#endif
+ ldebug ("mixer_read[%#x] -> %#x\n",
+ s->mixer_nreg, s->mixer_regs[s->mixer_nreg]);
+ return s->mixer_regs[s->mixer_nreg];
+}
+
+static int write_audio (SB16State *s, int nchan, int dma_pos,
+ int dma_len, int len)
+{
+ int temp, net;
+ uint8_t tmpbuf[4096];
+
+ temp = len;
+ net = 0;
+
+ while (temp) {
+ int left = dma_len - dma_pos;
+ int to_copy, copied;
+
+ to_copy = audio_MIN (temp, left);
+ if (to_copy > sizeof(tmpbuf))
+ to_copy = sizeof(tmpbuf);
+
+ copied = DMA_read_memory (nchan, tmpbuf, dma_pos, to_copy);
+ copied = AUD_write (s->voice, tmpbuf, copied);
+
+ temp -= copied;
+ dma_pos = (dma_pos + copied) % dma_len;
+ net += copied;
+
+ if (!copied)
+ break;
+ }
+
+ return net;
+}
+
+static int SB_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len)
+{
+ SB16State *s = opaque;
+ int free, rfree, till, copy, written, elapsed;
+
+ if (s->left_till_irq < 0) {
+ s->left_till_irq = s->block_size;
+ }
+
+ elapsed = AUD_calc_elapsed (s->voice);
+ free = elapsed;/* AUD_get_free (s->voice); */
+ rfree = free;
+ free = audio_MIN (free, elapsed) & ~s->align;
+
+ if ((free <= 0) || !dma_len) {
+ return dma_pos;
+ }
+
+ copy = free;
+ till = s->left_till_irq;
+
+#ifdef DEBUG_SB16_MOST
+ dolog ("pos:%06d free:%d,%d till:%d len:%d\n",
+ dma_pos, free, AUD_get_free (s->voice), till, dma_len);
+#endif
+
+ if (till <= copy) {
+ if (0 == s->dma_auto) {
+ copy = till;
+ }
+ }
+
+ written = write_audio (s, nchan, dma_pos, dma_len, copy);
+ dma_pos = (dma_pos + written) % dma_len;
+ s->left_till_irq -= written;
+
+ if (s->left_till_irq <= 0) {
+ s->mixer_regs[0x82] |= (nchan & 4) ? 2 : 1;
+ pic_set_irq (s->irq, 1);
+ if (0 == s->dma_auto) {
+ control (s, 0);
+ speaker (s, 0);
+ }
+ }
+
+#ifdef DEBUG_SB16_MOST
+ ldebug ("pos %5d free %5d size %5d till % 5d copy %5d written %5d size %5d\n",
+ dma_pos, free, dma_len, s->left_till_irq, copy, written,
+ s->block_size);
+#endif
+
+ while (s->left_till_irq <= 0) {
+ s->left_till_irq = s->block_size + s->left_till_irq;
+ }
+
+ AUD_adjust (s->voice, written);
+ return dma_pos;
+}
+
+void SB_timer (void *opaque)
+{
+ SB16State *s = opaque;
+ AUD_run ();
+ qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + 1);
+}
+
+static void SB_save (QEMUFile *f, void *opaque)
+{
+ SB16State *s = opaque;
+
+ qemu_put_be32s (f, &s->irq);
+ qemu_put_be32s (f, &s->dma);
+ qemu_put_be32s (f, &s->hdma);
+ qemu_put_be32s (f, &s->port);
+ qemu_put_be32s (f, &s->ver);
+ qemu_put_be32s (f, &s->in_index);
+ qemu_put_be32s (f, &s->out_data_len);
+ qemu_put_be32s (f, &s->fmt_stereo);
+ qemu_put_be32s (f, &s->fmt_signed);
+ qemu_put_be32s (f, &s->fmt_bits);
+ qemu_put_be32s (f, &s->fmt);
+ qemu_put_be32s (f, &s->dma_auto);
+ qemu_put_be32s (f, &s->block_size);
+ qemu_put_be32s (f, &s->fifo);
+ qemu_put_be32s (f, &s->freq);
+ qemu_put_be32s (f, &s->time_const);
+ qemu_put_be32s (f, &s->speaker);
+ qemu_put_be32s (f, &s->needed_bytes);
+ qemu_put_be32s (f, &s->cmd);
+ qemu_put_be32s (f, &s->use_hdma);
+ qemu_put_be32s (f, &s->highspeed);
+ qemu_put_be32s (f, &s->can_write);
+ qemu_put_be32s (f, &s->v2x6);
+
+ qemu_put_8s (f, &s->csp_param);
+ qemu_put_8s (f, &s->csp_value);
+ qemu_put_8s (f, &s->csp_mode);
+ qemu_put_8s (f, &s->csp_param);
+ qemu_put_buffer (f, s->csp_regs, 256);
+ qemu_put_8s (f, &s->csp_index);
+ qemu_put_buffer (f, s->csp_reg83, 4);
+ qemu_put_be32s (f, &s->csp_reg83r);
+ qemu_put_be32s (f, &s->csp_reg83w);
+
+ qemu_put_buffer (f, s->in2_data, sizeof (s->in2_data));
+ qemu_put_buffer (f, s->out_data, sizeof (s->out_data));
+ qemu_put_8s (f, &s->test_reg);
+ qemu_put_8s (f, &s->last_read_byte);
+
+ qemu_put_be32s (f, &s->nzero);
+ qemu_put_be32s (f, &s->left_till_irq);
+ qemu_put_be32s (f, &s->dma_running);
+ qemu_put_be32s (f, &s->bytes_per_second);
+ qemu_put_be32s (f, &s->align);
+
+ qemu_put_be32s (f, &s->mixer_nreg);
+ qemu_put_buffer (f, s->mixer_regs, 256);
+}
+
+static int SB_load (QEMUFile *f, void *opaque, int version_id)
+{
+ SB16State *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_be32s (f, &s->irq);
+ qemu_get_be32s (f, &s->dma);
+ qemu_get_be32s (f, &s->hdma);
+ qemu_get_be32s (f, &s->port);
+ qemu_get_be32s (f, &s->ver);
+ qemu_get_be32s (f, &s->in_index);
+ qemu_get_be32s (f, &s->out_data_len);
+ qemu_get_be32s (f, &s->fmt_stereo);
+ qemu_get_be32s (f, &s->fmt_signed);
+ qemu_get_be32s (f, &s->fmt_bits);
+ qemu_get_be32s (f, &s->fmt);
+ qemu_get_be32s (f, &s->dma_auto);
+ qemu_get_be32s (f, &s->block_size);
+ qemu_get_be32s (f, &s->fifo);
+ qemu_get_be32s (f, &s->freq);
+ qemu_get_be32s (f, &s->time_const);
+ qemu_get_be32s (f, &s->speaker);
+ qemu_get_be32s (f, &s->needed_bytes);
+ qemu_get_be32s (f, &s->cmd);
+ qemu_get_be32s (f, &s->use_hdma);
+ qemu_get_be32s (f, &s->highspeed);
+ qemu_get_be32s (f, &s->can_write);
+ qemu_get_be32s (f, &s->v2x6);
+
+ qemu_get_8s (f, &s->csp_param);
+ qemu_get_8s (f, &s->csp_value);
+ qemu_get_8s (f, &s->csp_mode);
+ qemu_get_8s (f, &s->csp_param);
+ qemu_get_buffer (f, s->csp_regs, 256);
+ qemu_get_8s (f, &s->csp_index);
+ qemu_get_buffer (f, s->csp_reg83, 4);
+ qemu_get_be32s (f, &s->csp_reg83r);
+ qemu_get_be32s (f, &s->csp_reg83w);
+
+ qemu_get_buffer (f, s->in2_data, sizeof (s->in2_data));
+ qemu_get_buffer (f, s->out_data, sizeof (s->out_data));
+ qemu_get_8s (f, &s->test_reg);
+ qemu_get_8s (f, &s->last_read_byte);
+
+ qemu_get_be32s (f, &s->nzero);
+ qemu_get_be32s (f, &s->left_till_irq);
+ qemu_get_be32s (f, &s->dma_running);
+ qemu_get_be32s (f, &s->bytes_per_second);
+ qemu_get_be32s (f, &s->align);
+
+ qemu_get_be32s (f, &s->mixer_nreg);
+ qemu_get_buffer (f, s->mixer_regs, 256);
+
+ if (s->voice) {
+ AUD_close (s->voice);
+ s->voice = NULL;
+ }
+
+ if (s->dma_running) {
+ if (s->freq)
+ s->voice = AUD_open (s->voice, "sb16", s->freq,
+ 1 << s->fmt_stereo, s->fmt);
+
+ control (s, 1);
+ speaker (s, s->speaker);
+ }
+ return 0;
+}
+
+void SB16_init (void)
+{
+ SB16State *s = &dsp;
+ int i;
+ static const uint8_t dsp_write_ports[] = {0x6, 0xc};
+ static const uint8_t dsp_read_ports[] = {0x6, 0xa, 0xc, 0xd, 0xe, 0xf};
+
+ s->ts = qemu_new_timer (vm_clock, SB_timer, s);
+ if (!s->ts)
+ return;
+
+ s->irq = conf.irq;
+ s->dma = conf.dma;
+ s->hdma = conf.hdma;
+ s->port = conf.port;
+ s->ver = conf.ver_lo | (conf.ver_hi << 8);
+
+ s->mixer_regs[0x80] = magic_of_irq (s->irq);
+ s->mixer_regs[0x81] = (1 << s->dma) | (1 << s->hdma);
+ s->mixer_regs[0x82] = 2 << 5;
+
+ s->csp_regs[5] = 1;
+ s->csp_regs[9] = 0xf8;
+
+ reset_mixer (s);
+ s->aux_ts = qemu_new_timer (vm_clock, aux_timer, s);
+ if (!s->aux_ts)
+ return;
+
+ for (i = 0; i < LENOFA (dsp_write_ports); i++) {
+ register_ioport_write (s->port + dsp_write_ports[i], 1, 1, dsp_write, s);
+ }
+
+ for (i = 0; i < LENOFA (dsp_read_ports); i++) {
+ register_ioport_read (s->port + dsp_read_ports[i], 1, 1, dsp_read, s);
+ }
+
+ register_ioport_write (s->port + 0x4, 1, 1, mixer_write_indexb, s);
+ register_ioport_write (s->port + 0x4, 1, 2, mixer_write_indexw, s);
+ register_ioport_read (s->port + 0x5, 1, 1, mixer_read, s);
+ register_ioport_write (s->port + 0x5, 1, 1, mixer_write_datab, s);
+
+ DMA_register_channel (s->hdma, SB_read_DMA, s);
+ DMA_register_channel (s->dma, SB_read_DMA, s);
+ s->can_write = 1;
+
+ qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + 1);
+ register_savevm ("sb16", 0, 1, SB_save, SB_load, s);
+}
diff --git a/tools/ioemu/hw/sched.c b/tools/ioemu/hw/sched.c
new file mode 100644
index 0000000000..2ab966de4c
--- /dev/null
+++ b/tools/ioemu/hw/sched.c
@@ -0,0 +1,268 @@
+/*
+ * QEMU interrupt controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+//#define DEBUG_IRQ_COUNT
+
+/* These registers are used for sending/receiving irqs from/to
+ * different cpu's.
+ */
+struct sun4m_intreg_percpu {
+ unsigned int tbt; /* Intrs pending for this cpu, by PIL. */
+ /* These next two registers are WRITE-ONLY and are only
+ * "on bit" sensitive, "off bits" written have NO affect.
+ */
+ unsigned int clear; /* Clear this cpus irqs here. */
+ unsigned int set; /* Set this cpus irqs here. */
+};
+/*
+ * djhr
+ * Actually the clear and set fields in this struct are misleading..
+ * according to the SLAVIO manual (and the same applies for the SEC)
+ * the clear field clears bits in the mask which will ENABLE that IRQ
+ * the set field sets bits in the mask to DISABLE the IRQ.
+ *
+ * Also the undirected_xx address in the SLAVIO is defined as
+ * RESERVED and write only..
+ *
+ * DAVEM_NOTE: The SLAVIO only specifies behavior on uniprocessor
+ * sun4m machines, for MP the layout makes more sense.
+ */
+struct sun4m_intreg_master {
+ unsigned int tbt; /* IRQ's that are pending, see sun4m masks. */
+ unsigned int irqs; /* Master IRQ bits. */
+
+ /* Again, like the above, two these registers are WRITE-ONLY. */
+ unsigned int clear; /* Clear master IRQ's by setting bits here. */
+ unsigned int set; /* Set master IRQ's by setting bits here. */
+
+ /* This register is both READ and WRITE. */
+ unsigned int undirected_target; /* Which cpu gets undirected irqs. */
+};
+
+#define SUN4M_INT_ENABLE 0x80000000
+#define SUN4M_INT_E14 0x00000080
+#define SUN4M_INT_E10 0x00080000
+
+#define SUN4M_HARD_INT(x) (0x000000001 << (x))
+#define SUN4M_SOFT_INT(x) (0x000010000 << (x))
+
+#define SUN4M_INT_MASKALL 0x80000000 /* mask all interrupts */
+#define SUN4M_INT_MODULE_ERR 0x40000000 /* module error */
+#define SUN4M_INT_M2S_WRITE 0x20000000 /* write buffer error */
+#define SUN4M_INT_ECC 0x10000000 /* ecc memory error */
+#define SUN4M_INT_FLOPPY 0x00400000 /* floppy disk */
+#define SUN4M_INT_MODULE 0x00200000 /* module interrupt */
+#define SUN4M_INT_VIDEO 0x00100000 /* onboard video */
+#define SUN4M_INT_REALTIME 0x00080000 /* system timer */
+#define SUN4M_INT_SCSI 0x00040000 /* onboard scsi */
+#define SUN4M_INT_AUDIO 0x00020000 /* audio/isdn */
+#define SUN4M_INT_ETHERNET 0x00010000 /* onboard ethernet */
+#define SUN4M_INT_SERIAL 0x00008000 /* serial ports */
+#define SUN4M_INT_SBUSBITS 0x00003F80 /* sbus int bits */
+
+#define SUN4M_INT_SBUS(x) (1 << (x+7))
+#define SUN4M_INT_VME(x) (1 << (x))
+
+typedef struct SCHEDState {
+ uint32_t addr, addrg;
+ uint32_t intreg_pending;
+ uint32_t intreg_enabled;
+ uint32_t intregm_pending;
+ uint32_t intregm_enabled;
+} SCHEDState;
+
+static SCHEDState *ps;
+
+#ifdef DEBUG_IRQ_COUNT
+static uint64_t irq_count[32];
+#endif
+
+static uint32_t intreg_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ SCHEDState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ case 0:
+ return s->intreg_pending;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void intreg_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ SCHEDState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ case 0:
+ s->intreg_pending = val;
+ break;
+ case 1: // clear
+ s->intreg_enabled &= ~val;
+ break;
+ case 2: // set
+ s->intreg_enabled |= val;
+ break;
+ default:
+ break;
+ }
+}
+
+static CPUReadMemoryFunc *intreg_mem_read[3] = {
+ intreg_mem_readl,
+ intreg_mem_readl,
+ intreg_mem_readl,
+};
+
+static CPUWriteMemoryFunc *intreg_mem_write[3] = {
+ intreg_mem_writel,
+ intreg_mem_writel,
+ intreg_mem_writel,
+};
+
+static uint32_t intregm_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ SCHEDState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addrg) >> 2;
+ switch (saddr) {
+ case 0:
+ return s->intregm_pending;
+ break;
+ case 1:
+ return s->intregm_enabled;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void intregm_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ SCHEDState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addrg) >> 2;
+ switch (saddr) {
+ case 0:
+ s->intregm_pending = val;
+ break;
+ case 1:
+ s->intregm_enabled = val;
+ break;
+ case 2: // clear
+ s->intregm_enabled &= ~val;
+ break;
+ case 3: // set
+ s->intregm_enabled |= val;
+ break;
+ default:
+ break;
+ }
+}
+
+static CPUReadMemoryFunc *intregm_mem_read[3] = {
+ intregm_mem_readl,
+ intregm_mem_readl,
+ intregm_mem_readl,
+};
+
+static CPUWriteMemoryFunc *intregm_mem_write[3] = {
+ intregm_mem_writel,
+ intregm_mem_writel,
+ intregm_mem_writel,
+};
+
+void pic_info(void)
+{
+ term_printf("per-cpu: pending 0x%08x, enabled 0x%08x\n", ps->intreg_pending, ps->intreg_enabled);
+ term_printf("master: pending 0x%08x, enabled 0x%08x\n", ps->intregm_pending, ps->intregm_enabled);
+}
+
+void irq_info(void)
+{
+#ifndef DEBUG_IRQ_COUNT
+ term_printf("irq statistic code not compiled.\n");
+#else
+ int i;
+ int64_t count;
+
+ term_printf("IRQ statistics:\n");
+ for (i = 0; i < 32; i++) {
+ count = irq_count[i];
+ if (count > 0)
+ term_printf("%2d: %lld\n", i, count);
+ }
+#endif
+}
+
+static const unsigned int intr_to_mask[16] = {
+ 0, 0, 0, 0, 0, 0, SUN4M_INT_ETHERNET, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+void pic_set_irq(int irq, int level)
+{
+ if (irq < 16) {
+ unsigned int mask = intr_to_mask[irq];
+ ps->intreg_pending |= 1 << irq;
+ if (ps->intregm_enabled & mask) {
+ cpu_single_env->interrupt_index = irq;
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_HARD);
+ }
+ }
+#ifdef DEBUG_IRQ_COUNT
+ if (level == 1)
+ irq_count[irq]++;
+#endif
+}
+
+void sched_init(uint32_t addr, uint32_t addrg)
+{
+ int intreg_io_memory, intregm_io_memory;
+ SCHEDState *s;
+
+ s = qemu_mallocz(sizeof(SCHEDState));
+ if (!s)
+ return;
+ s->addr = addr;
+ s->addrg = addrg;
+
+ intreg_io_memory = cpu_register_io_memory(0, intreg_mem_read, intreg_mem_write, s);
+ cpu_register_physical_memory(addr, 3, intreg_io_memory);
+
+ intregm_io_memory = cpu_register_io_memory(0, intregm_mem_read, intregm_mem_write, s);
+ cpu_register_physical_memory(addrg, 5, intregm_io_memory);
+
+ ps = s;
+}
+
diff --git a/tools/ioemu/hw/serial.c b/tools/ioemu/hw/serial.c
new file mode 100644
index 0000000000..3fe482c391
--- /dev/null
+++ b/tools/ioemu/hw/serial.c
@@ -0,0 +1,279 @@
+/*
+ * QEMU 16450 UART emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+//#define DEBUG_SERIAL
+
+#define UART_LCR_DLAB 0x80 /* Divisor latch access bit */
+
+#define UART_IER_MSI 0x08 /* Enable Modem status interrupt */
+#define UART_IER_RLSI 0x04 /* Enable receiver line status interrupt */
+#define UART_IER_THRI 0x02 /* Enable Transmitter holding register int. */
+#define UART_IER_RDI 0x01 /* Enable receiver data interrupt */
+
+#define UART_IIR_NO_INT 0x01 /* No interrupts pending */
+#define UART_IIR_ID 0x06 /* Mask for the interrupt ID */
+
+#define UART_IIR_MSI 0x00 /* Modem status interrupt */
+#define UART_IIR_THRI 0x02 /* Transmitter holding register empty */
+#define UART_IIR_RDI 0x04 /* Receiver data interrupt */
+#define UART_IIR_RLSI 0x06 /* Receiver line status interrupt */
+
+/*
+ * These are the definitions for the Modem Control Register
+ */
+#define UART_MCR_LOOP 0x10 /* Enable loopback test mode */
+#define UART_MCR_OUT2 0x08 /* Out2 complement */
+#define UART_MCR_OUT1 0x04 /* Out1 complement */
+#define UART_MCR_RTS 0x02 /* RTS complement */
+#define UART_MCR_DTR 0x01 /* DTR complement */
+
+/*
+ * These are the definitions for the Modem Status Register
+ */
+#define UART_MSR_DCD 0x80 /* Data Carrier Detect */
+#define UART_MSR_RI 0x40 /* Ring Indicator */
+#define UART_MSR_DSR 0x20 /* Data Set Ready */
+#define UART_MSR_CTS 0x10 /* Clear to Send */
+#define UART_MSR_DDCD 0x08 /* Delta DCD */
+#define UART_MSR_TERI 0x04 /* Trailing edge ring indicator */
+#define UART_MSR_DDSR 0x02 /* Delta DSR */
+#define UART_MSR_DCTS 0x01 /* Delta CTS */
+#define UART_MSR_ANY_DELTA 0x0F /* Any of the delta bits! */
+
+#define UART_LSR_TEMT 0x40 /* Transmitter empty */
+#define UART_LSR_THRE 0x20 /* Transmit-hold-register empty */
+#define UART_LSR_BI 0x10 /* Break interrupt indicator */
+#define UART_LSR_FE 0x08 /* Frame error indicator */
+#define UART_LSR_PE 0x04 /* Parity error indicator */
+#define UART_LSR_OE 0x02 /* Overrun error indicator */
+#define UART_LSR_DR 0x01 /* Receiver data ready */
+
+struct SerialState {
+ uint8_t divider;
+ uint8_t rbr; /* receive register */
+ uint8_t ier;
+ uint8_t iir; /* read only */
+ uint8_t lcr;
+ uint8_t mcr;
+ uint8_t lsr; /* read only */
+ uint8_t msr;
+ uint8_t scr;
+ /* NOTE: this hidden state is necessary for tx irq generation as
+ it can be reset while reading iir */
+ int thr_ipending;
+ int irq;
+ CharDriverState *chr;
+};
+
+static void serial_update_irq(SerialState *s)
+{
+ if ((s->lsr & UART_LSR_DR) && (s->ier & UART_IER_RDI)) {
+ s->iir = UART_IIR_RDI;
+ } else if (s->thr_ipending && (s->ier & UART_IER_THRI)) {
+ s->iir = UART_IIR_THRI;
+ } else {
+ s->iir = UART_IIR_NO_INT;
+ }
+ if (s->iir != UART_IIR_NO_INT) {
+ pic_set_irq(s->irq, 1);
+ } else {
+ pic_set_irq(s->irq, 0);
+ }
+}
+
+static void serial_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ SerialState *s = opaque;
+ unsigned char ch;
+
+ addr &= 7;
+#ifdef DEBUG_SERIAL
+ printf("serial: write addr=0x%02x val=0x%02x\n", addr, val);
+#endif
+ switch(addr) {
+ default:
+ case 0:
+ if (s->lcr & UART_LCR_DLAB) {
+ s->divider = (s->divider & 0xff00) | val;
+ } else {
+ s->thr_ipending = 0;
+ s->lsr &= ~UART_LSR_THRE;
+ serial_update_irq(s);
+ ch = val;
+ qemu_chr_write(s->chr, &ch, 1);
+ s->thr_ipending = 1;
+ s->lsr |= UART_LSR_THRE;
+ s->lsr |= UART_LSR_TEMT;
+ serial_update_irq(s);
+ }
+ break;
+ case 1:
+ if (s->lcr & UART_LCR_DLAB) {
+ s->divider = (s->divider & 0x00ff) | (val << 8);
+ } else {
+ s->ier = val & 0x0f;
+ if (s->lsr & UART_LSR_THRE) {
+ s->thr_ipending = 1;
+ }
+ serial_update_irq(s);
+ }
+ break;
+ case 2:
+ break;
+ case 3:
+ s->lcr = val;
+ break;
+ case 4:
+ s->mcr = val & 0x1f;
+ break;
+ case 5:
+ break;
+ case 6:
+ s->msr = val;
+ break;
+ case 7:
+ s->scr = val;
+ break;
+ }
+}
+
+static uint32_t serial_ioport_read(void *opaque, uint32_t addr)
+{
+ SerialState *s = opaque;
+ uint32_t ret;
+
+ addr &= 7;
+ switch(addr) {
+ default:
+ case 0:
+ if (s->lcr & UART_LCR_DLAB) {
+ ret = s->divider & 0xff;
+ } else {
+ ret = s->rbr;
+ s->lsr &= ~(UART_LSR_DR | UART_LSR_BI);
+ serial_update_irq(s);
+ }
+ break;
+ case 1:
+ if (s->lcr & UART_LCR_DLAB) {
+ ret = (s->divider >> 8) & 0xff;
+ } else {
+ ret = s->ier;
+ }
+ break;
+ case 2:
+ ret = s->iir;
+ /* reset THR pending bit */
+ if ((ret & 0x7) == UART_IIR_THRI)
+ s->thr_ipending = 0;
+ serial_update_irq(s);
+ break;
+ case 3:
+ ret = s->lcr;
+ break;
+ case 4:
+ ret = s->mcr;
+ break;
+ case 5:
+ ret = s->lsr;
+ break;
+ case 6:
+ if (s->mcr & UART_MCR_LOOP) {
+ /* in loopback, the modem output pins are connected to the
+ inputs */
+ ret = (s->mcr & 0x0c) << 4;
+ ret |= (s->mcr & 0x02) << 3;
+ ret |= (s->mcr & 0x01) << 5;
+ } else {
+ ret = s->msr;
+ }
+ break;
+ case 7:
+ ret = s->scr;
+ break;
+ }
+#ifdef DEBUG_SERIAL
+ printf("serial: read addr=0x%02x val=0x%02x\n", addr, ret);
+#endif
+ return ret;
+}
+
+static int serial_can_receive(SerialState *s)
+{
+ return !(s->lsr & UART_LSR_DR);
+}
+
+static void serial_receive_byte(SerialState *s, int ch)
+{
+ s->rbr = ch;
+ s->lsr |= UART_LSR_DR;
+ serial_update_irq(s);
+}
+
+static void serial_receive_break(SerialState *s)
+{
+ s->rbr = 0;
+ s->lsr |= UART_LSR_BI | UART_LSR_DR;
+ serial_update_irq(s);
+}
+
+static int serial_can_receive1(void *opaque)
+{
+ SerialState *s = opaque;
+ return serial_can_receive(s);
+}
+
+static void serial_receive1(void *opaque, const uint8_t *buf, int size)
+{
+ SerialState *s = opaque;
+ serial_receive_byte(s, buf[0]);
+}
+
+static void serial_event(void *opaque, int event)
+{
+ SerialState *s = opaque;
+ if (event == CHR_EVENT_BREAK)
+ serial_receive_break(s);
+}
+
+/* If fd is zero, it means that the serial device uses the console */
+SerialState *serial_init(int base, int irq, CharDriverState *chr)
+{
+ SerialState *s;
+
+ s = qemu_mallocz(sizeof(SerialState));
+ if (!s)
+ return NULL;
+ s->irq = irq;
+ s->lsr = UART_LSR_TEMT | UART_LSR_THRE;
+ s->iir = UART_IIR_NO_INT;
+
+ register_ioport_write(base, 8, 1, serial_ioport_write, s);
+ register_ioport_read(base, 8, 1, serial_ioport_read, s);
+ s->chr = chr;
+ qemu_chr_add_read_handler(chr, serial_can_receive1, serial_receive1, s);
+ qemu_chr_add_event_handler(chr, serial_event);
+ return s;
+}
diff --git a/tools/ioemu/hw/sun4m.c b/tools/ioemu/hw/sun4m.c
new file mode 100644
index 0000000000..80305e09c3
--- /dev/null
+++ b/tools/ioemu/hw/sun4m.c
@@ -0,0 +1,113 @@
+/*
+ * QEMU Sun4m System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "m48t08.h"
+
+#define KERNEL_LOAD_ADDR 0x00004000
+#define MMU_CONTEXT_TBL 0x00003000
+#define MMU_L1PTP (MMU_CONTEXT_TBL + 0x0400)
+#define MMU_L2PTP (MMU_CONTEXT_TBL + 0x0800)
+#define PROM_ADDR 0xffd04000
+#define PROM_FILENAMEB "proll.bin"
+#define PROM_FILENAMEE "proll.elf"
+#define PROLL_MAGIC_ADDR 0x20000000
+#define PHYS_JJ_EEPROM 0x71200000 /* [2000] MK48T08 */
+#define PHYS_JJ_IDPROM_OFF 0x1FD8
+#define PHYS_JJ_EEPROM_SIZE 0x2000
+#define PHYS_JJ_IOMMU 0x10000000 /* First page of sun4m IOMMU */
+#define PHYS_JJ_TCX_FB 0x50800000 /* Start address, frame buffer body */
+#define PHYS_JJ_TCX_0E 0x5E000000 /* Top address, one byte used. */
+#define PHYS_JJ_IOMMU 0x10000000 /* First page of sun4m IOMMU */
+#define PHYS_JJ_LEDMA 0x78400010 /* ledma, off by 10 from unused SCSI */
+#define PHYS_JJ_LE 0x78C00000 /* LANCE, typical sun4m */
+#define PHYS_JJ_LE_IRQ 6
+#define PHYS_JJ_CLOCK 0x71D00000
+#define PHYS_JJ_CLOCK_IRQ 10
+#define PHYS_JJ_CLOCK1 0x71D10000
+#define PHYS_JJ_CLOCK1_IRQ 14
+#define PHYS_JJ_INTR0 0x71E00000 /* CPU0 interrupt control registers */
+#define PHYS_JJ_INTR_G 0x71E10000 /* Master interrupt control registers */
+
+/* TSC handling */
+
+uint64_t cpu_get_tsc()
+{
+ return qemu_get_clock(vm_clock);
+}
+
+void DMA_run() {}
+void SB16_run() {}
+int serial_can_receive(SerialState *s) { return 0; }
+void serial_receive_byte(SerialState *s, int ch) {}
+void serial_receive_break(SerialState *s) {}
+
+static m48t08_t *nvram;
+
+/* Sun4m hardware initialisation */
+void sun4m_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ char buf[1024];
+ int ret, linux_boot;
+ unsigned long bios_offset;
+
+ linux_boot = (kernel_filename != NULL);
+
+ /* allocate RAM */
+ cpu_register_physical_memory(0, ram_size, 0);
+ bios_offset = ram_size;
+
+ iommu_init(PHYS_JJ_IOMMU);
+ sched_init(PHYS_JJ_INTR0, PHYS_JJ_INTR_G);
+ tcx_init(ds, PHYS_JJ_TCX_FB);
+ lance_init(&nd_table[0], PHYS_JJ_LE_IRQ, PHYS_JJ_LE, PHYS_JJ_LEDMA);
+ nvram = m48t08_init(PHYS_JJ_EEPROM, PHYS_JJ_EEPROM_SIZE, &nd_table[0].macaddr);
+ timer_init(PHYS_JJ_CLOCK, PHYS_JJ_CLOCK_IRQ);
+ timer_init(PHYS_JJ_CLOCK1, PHYS_JJ_CLOCK1_IRQ);
+ magic_init(kernel_filename, phys_ram_base + KERNEL_LOAD_ADDR, PROLL_MAGIC_ADDR);
+
+ /* We load Proll as the kernel and start it. It will issue a magic
+ IO to load the real kernel */
+ if (linux_boot) {
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, PROM_FILENAMEB);
+ ret = load_kernel(buf,
+ phys_ram_base + KERNEL_LOAD_ADDR);
+ if (ret < 0) {
+ fprintf(stderr, "qemu: could not load kernel '%s'\n",
+ buf);
+ exit(1);
+ }
+ }
+ /* Setup a MMU entry for entire address space */
+ stl_raw(phys_ram_base + MMU_CONTEXT_TBL, (MMU_L1PTP >> 4) | 1);
+ stl_raw(phys_ram_base + MMU_L1PTP, (MMU_L2PTP >> 4) | 1);
+ stl_raw(phys_ram_base + MMU_L1PTP + (0x01 << 2), (MMU_L2PTP >> 4) | 1); // 01.. == 00..
+ stl_raw(phys_ram_base + MMU_L1PTP + (0xff << 2), (MMU_L2PTP >> 4) | 1); // ff.. == 00..
+ stl_raw(phys_ram_base + MMU_L1PTP + (0xf0 << 2), (MMU_L2PTP >> 4) | 1); // f0.. == 00..
+ /* 3 = U:RWX S:RWX */
+ stl_raw(phys_ram_base + MMU_L2PTP, (3 << PTE_ACCESS_SHIFT) | 2);
+ stl_raw(phys_ram_base + MMU_L2PTP, ((0x01 << PTE_PPN_SHIFT) >> 4 ) | (3 << PTE_ACCESS_SHIFT) | 2);
+}
diff --git a/tools/ioemu/hw/tcx.c b/tools/ioemu/hw/tcx.c
new file mode 100644
index 0000000000..7f979946fc
--- /dev/null
+++ b/tools/ioemu/hw/tcx.c
@@ -0,0 +1,207 @@
+/*
+ * QEMU Sun4m System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define MAXX 1024
+#define MAXY 768
+#define XSZ (8*80)
+#define YSZ (24*11)
+#define XOFF (MAXX-XSZ)
+#define YOFF (MAXY-YSZ)
+
+typedef struct TCXState {
+ uint32_t addr;
+ DisplayState *ds;
+ uint8_t *vram;
+} TCXState;
+
+static TCXState *ts;
+
+void vga_update_display()
+{
+ dpy_update(ts->ds, 0, 0, XSZ, YSZ);
+}
+
+void vga_invalidate_display() {}
+
+static uint32_t tcx_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+ TCXState *s = opaque;
+ uint32_t saddr;
+ unsigned int x, y;
+
+ saddr = addr - s->addr - YOFF*MAXX - XOFF;
+ y = saddr / MAXX;
+ x = saddr - y * MAXX;
+ if (x < XSZ && y < YSZ) {
+ return s->vram[y * XSZ + x];
+ }
+ return 0;
+}
+
+static uint32_t tcx_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = tcx_mem_readb(opaque, addr) << 8;
+ v |= tcx_mem_readb(opaque, addr + 1);
+#else
+ v = tcx_mem_readb(opaque, addr);
+ v |= tcx_mem_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t tcx_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = tcx_mem_readb(opaque, addr) << 24;
+ v |= tcx_mem_readb(opaque, addr + 1) << 16;
+ v |= tcx_mem_readb(opaque, addr + 2) << 8;
+ v |= tcx_mem_readb(opaque, addr + 3);
+#else
+ v = tcx_mem_readb(opaque, addr);
+ v |= tcx_mem_readb(opaque, addr + 1) << 8;
+ v |= tcx_mem_readb(opaque, addr + 2) << 16;
+ v |= tcx_mem_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+static void tcx_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ TCXState *s = opaque;
+ uint32_t saddr;
+ unsigned int x, y;
+ char *sptr;
+
+ saddr = addr - s->addr - YOFF*MAXX - XOFF;
+ y = saddr / MAXX;
+ x = saddr - y * MAXX;
+ if (x < XSZ && y < YSZ) {
+ sptr = s->ds->data;
+ if (sptr) {
+ if (s->ds->depth == 24 || s->ds->depth == 32) {
+ /* XXX need to do CLUT translation */
+ sptr[y * s->ds->linesize + x*4] = val & 0xff;
+ sptr[y * s->ds->linesize + x*4+1] = val & 0xff;
+ sptr[y * s->ds->linesize + x*4+2] = val & 0xff;
+ }
+ else if (s->ds->depth == 8) {
+ sptr[y * s->ds->linesize + x] = val & 0xff;
+ }
+ }
+ cpu_physical_memory_set_dirty(addr);
+ s->vram[y * XSZ + x] = val & 0xff;
+ }
+}
+
+static void tcx_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcx_mem_writeb(opaque, addr, (val >> 8) & 0xff);
+ tcx_mem_writeb(opaque, addr + 1, val & 0xff);
+#else
+ tcx_mem_writeb(opaque, addr, val & 0xff);
+ tcx_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void tcx_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcx_mem_writeb(opaque, addr, (val >> 24) & 0xff);
+ tcx_mem_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ tcx_mem_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ tcx_mem_writeb(opaque, addr + 3, val & 0xff);
+#else
+ tcx_mem_writeb(opaque, addr, val & 0xff);
+ tcx_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ tcx_mem_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ tcx_mem_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+static CPUReadMemoryFunc *tcx_mem_read[3] = {
+ tcx_mem_readb,
+ tcx_mem_readw,
+ tcx_mem_readl,
+};
+
+static CPUWriteMemoryFunc *tcx_mem_write[3] = {
+ tcx_mem_writeb,
+ tcx_mem_writew,
+ tcx_mem_writel,
+};
+
+void tcx_init(DisplayState *ds, uint32_t addr)
+{
+ TCXState *s;
+ int tcx_io_memory;
+
+ s = qemu_mallocz(sizeof(TCXState));
+ if (!s)
+ return;
+ s->ds = ds;
+ s->addr = addr;
+ ts = s;
+ tcx_io_memory = cpu_register_io_memory(0, tcx_mem_read, tcx_mem_write, s);
+ cpu_register_physical_memory(addr, 0x100000,
+ tcx_io_memory);
+ s->vram = qemu_mallocz(XSZ*YSZ);
+ dpy_resize(s->ds, XSZ, YSZ);
+}
+
+void vga_screen_dump(const char *filename)
+{
+ TCXState *s = ts;
+ FILE *f;
+ uint8_t *d, *d1;
+ unsigned int v;
+ int y, x;
+
+ f = fopen(filename, "wb");
+ if (!f)
+ return -1;
+ fprintf(f, "P6\n%d %d\n%d\n",
+ XSZ, YSZ, 255);
+ d1 = s->vram;
+ for(y = 0; y < YSZ; y++) {
+ d = d1;
+ for(x = 0; x < XSZ; x++) {
+ v = *d;
+ fputc((v) & 0xff, f);
+ fputc((v) & 0xff, f);
+ fputc((v) & 0xff, f);
+ d++;
+ }
+ d1 += XSZ;
+ }
+ fclose(f);
+ return;
+}
+
+
+
diff --git a/tools/ioemu/hw/timer.c b/tools/ioemu/hw/timer.c
new file mode 100644
index 0000000000..e393fa36fd
--- /dev/null
+++ b/tools/ioemu/hw/timer.c
@@ -0,0 +1,97 @@
+/*
+ * QEMU Sparc timer controller emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+/*
+ * Registers of hardware timer in sun4m.
+ */
+struct sun4m_timer_percpu {
+ volatile unsigned int l14_timer_limit; /* Initial value is 0x009c4000 */
+ volatile unsigned int l14_cur_count;
+};
+
+struct sun4m_timer_global {
+ volatile unsigned int l10_timer_limit;
+ volatile unsigned int l10_cur_count;
+};
+
+typedef struct TIMERState {
+ uint32_t addr;
+ uint32_t timer_regs[2];
+ int irq;
+} TIMERState;
+
+static uint32_t timer_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ TIMERState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ default:
+ return s->timer_regs[saddr];
+ break;
+ }
+ return 0;
+}
+
+static void timer_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ TIMERState *s = opaque;
+ uint32_t saddr;
+
+ saddr = (addr - s->addr) >> 2;
+ switch (saddr) {
+ default:
+ s->timer_regs[saddr] = val;
+ break;
+ }
+}
+
+static CPUReadMemoryFunc *timer_mem_read[3] = {
+ timer_mem_readl,
+ timer_mem_readl,
+ timer_mem_readl,
+};
+
+static CPUWriteMemoryFunc *timer_mem_write[3] = {
+ timer_mem_writel,
+ timer_mem_writel,
+ timer_mem_writel,
+};
+
+void timer_init(uint32_t addr, int irq)
+{
+ int timer_io_memory;
+ TIMERState *s;
+
+ s = qemu_mallocz(sizeof(TIMERState));
+ if (!s)
+ return;
+ s->addr = addr;
+ s->irq = irq;
+
+ timer_io_memory = cpu_register_io_memory(0, timer_mem_read, timer_mem_write, s);
+ cpu_register_physical_memory(addr, 2, timer_io_memory);
+}
diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c
new file mode 100644
index 0000000000..2867bd55d4
--- /dev/null
+++ b/tools/ioemu/hw/vga.c
@@ -0,0 +1,2059 @@
+/*
+ * QEMU VGA Emulator.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "vga_int.h"
+
+//#define DEBUG_VGA
+//#define DEBUG_VGA_MEM
+//#define DEBUG_VGA_REG
+
+//#define DEBUG_S3
+//#define DEBUG_BOCHS_VBE
+
+/* S3 VGA is deprecated - another graphic card will be emulated */
+//#define CONFIG_S3VGA
+
+/* force some bits to zero */
+const uint8_t sr_mask[8] = {
+ (uint8_t)~0xfc,
+ (uint8_t)~0xc2,
+ (uint8_t)~0xf0,
+ (uint8_t)~0xc0,
+ (uint8_t)~0xf1,
+ (uint8_t)~0xff,
+ (uint8_t)~0xff,
+ (uint8_t)~0x00,
+};
+
+const uint8_t gr_mask[16] = {
+ (uint8_t)~0xf0, /* 0x00 */
+ (uint8_t)~0xf0, /* 0x01 */
+ (uint8_t)~0xf0, /* 0x02 */
+ (uint8_t)~0xe0, /* 0x03 */
+ (uint8_t)~0xfc, /* 0x04 */
+ (uint8_t)~0x84, /* 0x05 */
+ (uint8_t)~0xf0, /* 0x06 */
+ (uint8_t)~0xf0, /* 0x07 */
+ (uint8_t)~0x00, /* 0x08 */
+ (uint8_t)~0xff, /* 0x09 */
+ (uint8_t)~0xff, /* 0x0a */
+ (uint8_t)~0xff, /* 0x0b */
+ (uint8_t)~0xff, /* 0x0c */
+ (uint8_t)~0xff, /* 0x0d */
+ (uint8_t)~0xff, /* 0x0e */
+ (uint8_t)~0xff, /* 0x0f */
+};
+
+#define cbswap_32(__x) \
+((uint32_t)( \
+ (((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
+ (((uint32_t)(__x) & (uint32_t)0x0000ff00UL) << 8) | \
+ (((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >> 8) | \
+ (((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) ))
+
+#ifdef WORDS_BIGENDIAN
+#define PAT(x) cbswap_32(x)
+#else
+#define PAT(x) (x)
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define BIG 1
+#else
+#define BIG 0
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define GET_PLANE(data, p) (((data) >> (24 - (p) * 8)) & 0xff)
+#else
+#define GET_PLANE(data, p) (((data) >> ((p) * 8)) & 0xff)
+#endif
+
+static const uint32_t mask16[16] = {
+ PAT(0x00000000),
+ PAT(0x000000ff),
+ PAT(0x0000ff00),
+ PAT(0x0000ffff),
+ PAT(0x00ff0000),
+ PAT(0x00ff00ff),
+ PAT(0x00ffff00),
+ PAT(0x00ffffff),
+ PAT(0xff000000),
+ PAT(0xff0000ff),
+ PAT(0xff00ff00),
+ PAT(0xff00ffff),
+ PAT(0xffff0000),
+ PAT(0xffff00ff),
+ PAT(0xffffff00),
+ PAT(0xffffffff),
+};
+
+#undef PAT
+
+#ifdef WORDS_BIGENDIAN
+#define PAT(x) (x)
+#else
+#define PAT(x) cbswap_32(x)
+#endif
+
+static const uint32_t dmask16[16] = {
+ PAT(0x00000000),
+ PAT(0x000000ff),
+ PAT(0x0000ff00),
+ PAT(0x0000ffff),
+ PAT(0x00ff0000),
+ PAT(0x00ff00ff),
+ PAT(0x00ffff00),
+ PAT(0x00ffffff),
+ PAT(0xff000000),
+ PAT(0xff0000ff),
+ PAT(0xff00ff00),
+ PAT(0xff00ffff),
+ PAT(0xffff0000),
+ PAT(0xffff00ff),
+ PAT(0xffffff00),
+ PAT(0xffffffff),
+};
+
+static const uint32_t dmask4[4] = {
+ PAT(0x00000000),
+ PAT(0x0000ffff),
+ PAT(0xffff0000),
+ PAT(0xffffffff),
+};
+
+static uint32_t expand4[256];
+static uint16_t expand2[256];
+static uint8_t expand4to8[16];
+
+VGAState *vga_state;
+int vga_io_memory;
+
+static uint32_t vga_ioport_read(void *opaque, uint32_t addr)
+{
+ VGAState *s = opaque;
+ int val, index;
+
+ /* check port range access depending on color/monochrome mode */
+ if ((addr >= 0x3b0 && addr <= 0x3bf && (s->msr & MSR_COLOR_EMULATION)) ||
+ (addr >= 0x3d0 && addr <= 0x3df && !(s->msr & MSR_COLOR_EMULATION))) {
+ val = 0xff;
+ } else {
+ switch(addr) {
+ case 0x3c0:
+ if (s->ar_flip_flop == 0) {
+ val = s->ar_index;
+ } else {
+ val = 0;
+ }
+ break;
+ case 0x3c1:
+ index = s->ar_index & 0x1f;
+ if (index < 21)
+ val = s->ar[index];
+ else
+ val = 0;
+ break;
+ case 0x3c2:
+ val = s->st00;
+ break;
+ case 0x3c4:
+ val = s->sr_index;
+ break;
+ case 0x3c5:
+ val = s->sr[s->sr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read SR%x = 0x%02x\n", s->sr_index, val);
+#endif
+ break;
+ case 0x3c7:
+ val = s->dac_state;
+ break;
+ case 0x3c8:
+ val = s->dac_write_index;
+ break;
+ case 0x3c9:
+ val = s->palette[s->dac_read_index * 3 + s->dac_sub_index];
+ if (++s->dac_sub_index == 3) {
+ s->dac_sub_index = 0;
+ s->dac_read_index++;
+ }
+ break;
+ case 0x3ca:
+ val = s->fcr;
+ break;
+ case 0x3cc:
+ val = s->msr;
+ break;
+ case 0x3ce:
+ val = s->gr_index;
+ break;
+ case 0x3cf:
+ val = s->gr[s->gr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read GR%x = 0x%02x\n", s->gr_index, val);
+#endif
+ break;
+ case 0x3b4:
+ case 0x3d4:
+ val = s->cr_index;
+ break;
+ case 0x3b5:
+ case 0x3d5:
+ val = s->cr[s->cr_index];
+#ifdef DEBUG_VGA_REG
+ printf("vga: read CR%x = 0x%02x\n", s->cr_index, val);
+#endif
+#ifdef DEBUG_S3
+ if (s->cr_index >= 0x20)
+ printf("S3: CR read index=0x%x val=0x%x\n",
+ s->cr_index, val);
+#endif
+ break;
+ case 0x3ba:
+ case 0x3da:
+ /* just toggle to fool polling */
+ s->st01 ^= ST01_V_RETRACE | ST01_DISP_ENABLE;
+ val = s->st01;
+ s->ar_flip_flop = 0;
+ break;
+ default:
+ val = 0x00;
+ break;
+ }
+ }
+#if defined(DEBUG_VGA)
+ printf("VGA: read addr=0x%04x data=0x%02x\n", addr, val);
+#endif
+ return val;
+}
+
+static void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ VGAState *s = opaque;
+ int index;
+
+ /* check port range access depending on color/monochrome mode */
+ if ((addr >= 0x3b0 && addr <= 0x3bf && (s->msr & MSR_COLOR_EMULATION)) ||
+ (addr >= 0x3d0 && addr <= 0x3df && !(s->msr & MSR_COLOR_EMULATION)))
+ return;
+
+#ifdef DEBUG_VGA
+ printf("VGA: write addr=0x%04x data=0x%02x\n", addr, val);
+#endif
+
+ switch(addr) {
+ case 0x3c0:
+ if (s->ar_flip_flop == 0) {
+ val &= 0x3f;
+ s->ar_index = val;
+ } else {
+ index = s->ar_index & 0x1f;
+ switch(index) {
+ case 0x00 ... 0x0f:
+ s->ar[index] = val & 0x3f;
+ break;
+ case 0x10:
+ s->ar[index] = val & ~0x10;
+ break;
+ case 0x11:
+ s->ar[index] = val;
+ break;
+ case 0x12:
+ s->ar[index] = val & ~0xc0;
+ break;
+ case 0x13:
+ s->ar[index] = val & ~0xf0;
+ break;
+ case 0x14:
+ s->ar[index] = val & ~0xf0;
+ break;
+ default:
+ break;
+ }
+ }
+ s->ar_flip_flop ^= 1;
+ break;
+ case 0x3c2:
+ s->msr = val & ~0x10;
+ break;
+ case 0x3c4:
+ s->sr_index = val & 7;
+ break;
+ case 0x3c5:
+#ifdef DEBUG_VGA_REG
+ printf("vga: write SR%x = 0x%02x\n", s->sr_index, val);
+#endif
+ s->sr[s->sr_index] = val & sr_mask[s->sr_index];
+ break;
+ case 0x3c7:
+ s->dac_read_index = val;
+ s->dac_sub_index = 0;
+ s->dac_state = 3;
+ break;
+ case 0x3c8:
+ s->dac_write_index = val;
+ s->dac_sub_index = 0;
+ s->dac_state = 0;
+ break;
+ case 0x3c9:
+ s->dac_cache[s->dac_sub_index] = val;
+ if (++s->dac_sub_index == 3) {
+ memcpy(&s->palette[s->dac_write_index * 3], s->dac_cache, 3);
+ s->dac_sub_index = 0;
+ s->dac_write_index++;
+ }
+ break;
+ case 0x3ce:
+ s->gr_index = val & 0x0f;
+ break;
+ case 0x3cf:
+#ifdef DEBUG_VGA_REG
+ printf("vga: write GR%x = 0x%02x\n", s->gr_index, val);
+#endif
+ s->gr[s->gr_index] = val & gr_mask[s->gr_index];
+ break;
+ case 0x3b4:
+ case 0x3d4:
+ s->cr_index = val;
+ break;
+ case 0x3b5:
+ case 0x3d5:
+#ifdef DEBUG_VGA_REG
+ printf("vga: write CR%x = 0x%02x\n", s->cr_index, val);
+#endif
+ /* handle CR0-7 protection */
+ if ((s->cr[0x11] & 0x80) && s->cr_index <= 7) {
+ /* can always write bit 4 of CR7 */
+ if (s->cr_index == 7)
+ s->cr[7] = (s->cr[7] & ~0x10) | (val & 0x10);
+ return;
+ }
+ switch(s->cr_index) {
+ case 0x01: /* horizontal display end */
+ case 0x07:
+ case 0x09:
+ case 0x0c:
+ case 0x0d:
+ case 0x12: /* veritcal display end */
+ s->cr[s->cr_index] = val;
+ break;
+
+#ifdef CONFIG_S3VGA
+ /* S3 registers */
+ case 0x2d:
+ case 0x2e:
+ case 0x2f:
+ case 0x30:
+ /* chip ID, cannot write */
+ break;
+ case 0x31:
+ /* update start address */
+ {
+ int v;
+ s->cr[s->cr_index] = val;
+ v = (val >> 4) & 3;
+ s->cr[0x69] = (s->cr[69] & ~0x03) | v;
+ }
+ break;
+ case 0x51:
+ /* update start address */
+ {
+ int v;
+ s->cr[s->cr_index] = val;
+ v = val & 3;
+ s->cr[0x69] = (s->cr[69] & ~0x0c) | (v << 2);
+ }
+ break;
+#endif
+ default:
+ s->cr[s->cr_index] = val;
+ break;
+ }
+#ifdef DEBUG_S3
+ if (s->cr_index >= 0x20)
+ printf("S3: CR write index=0x%x val=0x%x\n",
+ s->cr_index, val);
+#endif
+ break;
+ case 0x3ba:
+ case 0x3da:
+ s->fcr = val & 0x10;
+ break;
+ }
+}
+
+#ifdef CONFIG_BOCHS_VBE
+static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
+{
+ VGAState *s = opaque;
+ uint32_t val;
+ val = s->vbe_index;
+ return val;
+}
+
+static uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
+{
+ VGAState *s = opaque;
+ uint32_t val;
+
+ if (s->vbe_index <= VBE_DISPI_INDEX_NB)
+ val = s->vbe_regs[s->vbe_index];
+ else
+ val = 0;
+#ifdef DEBUG_BOCHS_VBE
+ printf("VBE: read index=0x%x val=0x%x\n", s->vbe_index, val);
+#endif
+ return val;
+}
+
+static void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val)
+{
+ VGAState *s = opaque;
+ s->vbe_index = val;
+}
+
+static void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
+{
+ VGAState *s = opaque;
+
+ if (s->vbe_index <= VBE_DISPI_INDEX_NB) {
+#ifdef DEBUG_BOCHS_VBE
+ printf("VBE: write index=0x%x val=0x%x\n", s->vbe_index, val);
+#endif
+ switch(s->vbe_index) {
+ case VBE_DISPI_INDEX_ID:
+ if (val == VBE_DISPI_ID0 ||
+ val == VBE_DISPI_ID1 ||
+ val == VBE_DISPI_ID2) {
+ s->vbe_regs[s->vbe_index] = val;
+ }
+ break;
+ case VBE_DISPI_INDEX_XRES:
+ if ((val <= VBE_DISPI_MAX_XRES) && ((val & 7) == 0)) {
+ s->vbe_regs[s->vbe_index] = val;
+ }
+ break;
+ case VBE_DISPI_INDEX_YRES:
+ if (val <= VBE_DISPI_MAX_YRES) {
+ s->vbe_regs[s->vbe_index] = val;
+ }
+ break;
+ case VBE_DISPI_INDEX_BPP:
+ if (val == 0)
+ val = 8;
+ if (val == 4 || val == 8 || val == 15 ||
+ val == 16 || val == 24 || val == 32) {
+ s->vbe_regs[s->vbe_index] = val;
+ }
+ break;
+ case VBE_DISPI_INDEX_BANK:
+ val &= s->vbe_bank_mask;
+ s->vbe_regs[s->vbe_index] = val;
+ s->bank_offset = (val << 16);
+ break;
+ case VBE_DISPI_INDEX_ENABLE:
+ if (val & VBE_DISPI_ENABLED) {
+ int h, shift_control;
+
+ s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] =
+ s->vbe_regs[VBE_DISPI_INDEX_XRES];
+ s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] =
+ s->vbe_regs[VBE_DISPI_INDEX_YRES];
+ s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET] = 0;
+ s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET] = 0;
+
+ if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
+ s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] >> 1;
+ else
+ s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] *
+ ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
+ s->vbe_start_addr = 0;
+
+ /* clear the screen (should be done in BIOS) */
+ if (!(val & VBE_DISPI_NOCLEARMEM)) {
+ memset(s->vram_ptr, 0,
+ s->vbe_regs[VBE_DISPI_INDEX_YRES] * s->vbe_line_offset);
+ }
+
+ /* we initialize the VGA graphic mode (should be done
+ in BIOS) */
+ s->gr[0x06] = (s->gr[0x06] & ~0x0c) | 0x05; /* graphic mode + memory map 1 */
+ s->cr[0x17] |= 3; /* no CGA modes */
+ s->cr[0x13] = s->vbe_line_offset >> 3;
+ /* width */
+ s->cr[0x01] = (s->vbe_regs[VBE_DISPI_INDEX_XRES] >> 3) - 1;
+ /* height */
+ h = s->vbe_regs[VBE_DISPI_INDEX_YRES] - 1;
+ s->cr[0x12] = h;
+ s->cr[0x07] = (s->cr[0x07] & ~0x42) |
+ ((h >> 7) & 0x02) | ((h >> 3) & 0x40);
+ /* line compare to 1023 */
+ s->cr[0x18] = 0xff;
+ s->cr[0x07] |= 0x10;
+ s->cr[0x09] |= 0x40;
+
+ if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4) {
+ shift_control = 0;
+ s->sr[0x01] &= ~8; /* no double line */
+ } else {
+ shift_control = 2;
+ s->sr[4] |= 0x08; /* set chain 4 mode */
+ s->sr[2] |= 0x0f; /* activate all planes */
+ }
+ s->gr[0x05] = (s->gr[0x05] & ~0x60) | (shift_control << 5);
+ s->cr[0x09] &= ~0x9f; /* no double scan */
+ } else {
+ /* XXX: the bios should do that */
+ s->bank_offset = 0;
+ }
+ s->vbe_regs[s->vbe_index] = val;
+ break;
+ case VBE_DISPI_INDEX_VIRT_WIDTH:
+ {
+ int w, h, line_offset;
+
+ if (val < s->vbe_regs[VBE_DISPI_INDEX_XRES])
+ return;
+ w = val;
+ if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
+ line_offset = w >> 1;
+ else
+ line_offset = w * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
+ h = s->vram_size / line_offset;
+ /* XXX: support weird bochs semantics ? */
+ if (h < s->vbe_regs[VBE_DISPI_INDEX_YRES])
+ return;
+ s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] = w;
+ s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] = h;
+ s->vbe_line_offset = line_offset;
+ }
+ break;
+ case VBE_DISPI_INDEX_X_OFFSET:
+ case VBE_DISPI_INDEX_Y_OFFSET:
+ {
+ int x;
+ s->vbe_regs[s->vbe_index] = val;
+ s->vbe_start_addr = s->vbe_line_offset * s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET];
+ x = s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET];
+ if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
+ s->vbe_start_addr += x >> 1;
+ else
+ s->vbe_start_addr += x * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
+ s->vbe_start_addr >>= 2;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
+#endif
+
+extern FILE *logfile;
+/* called for accesses between 0xa0000 and 0xc0000 */
+uint32_t vga_mem_readb(void *opaque, target_phys_addr_t addr)
+{
+ VGAState *s = opaque;
+ int memory_map_mode, plane;
+ uint32_t ret;
+
+ /* convert to VGA memory offset */
+ memory_map_mode = (s->gr[6] >> 2) & 3;
+ addr &= 0x1ffff;
+ switch(memory_map_mode) {
+ case 0:
+ break;
+ case 1:
+ if (addr >= 0x10000)
+ return 0xff;
+ addr += s->bank_offset;
+ break;
+ case 2:
+ addr -= 0x10000;
+ if (addr >= 0x8000)
+ return 0xff;
+ break;
+ default:
+ case 3:
+ addr -= 0x18000;
+ if (addr >= 0x8000)
+ return 0xff;
+ break;
+ }
+
+ if (s->sr[4] & 0x08) {
+ /* chain 4 mode : simplest access */
+ ret = s->vram_ptr[addr];
+ } else if (s->gr[5] & 0x10) {
+ /* odd/even mode (aka text mode mapping) */
+ plane = (s->gr[4] & 2) | (addr & 1);
+ ret = s->vram_ptr[((addr & ~1) << 1) | plane];
+ } else {
+ /* standard VGA latched access */
+ s->latch = ((uint32_t *)s->vram_ptr)[addr];
+
+ if (!(s->gr[5] & 0x08)) {
+ /* read mode 0 */
+ plane = s->gr[4];
+ ret = GET_PLANE(s->latch, plane);
+ } else {
+ /* read mode 1 */
+ ret = (s->latch ^ mask16[s->gr[2]]) & mask16[s->gr[7]];
+ ret |= ret >> 16;
+ ret |= ret >> 8;
+ ret = (~ret) & 0xff;
+ }
+ }
+ return ret;
+}
+
+static uint32_t vga_mem_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = vga_mem_readb(opaque, addr) << 8;
+ v |= vga_mem_readb(opaque, addr + 1);
+#else
+ v = vga_mem_readb(opaque, addr);
+ v |= vga_mem_readb(opaque, addr + 1) << 8;
+#endif
+ return v;
+}
+
+static uint32_t vga_mem_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t v;
+#ifdef TARGET_WORDS_BIGENDIAN
+ v = vga_mem_readb(opaque, addr) << 24;
+ v |= vga_mem_readb(opaque, addr + 1) << 16;
+ v |= vga_mem_readb(opaque, addr + 2) << 8;
+ v |= vga_mem_readb(opaque, addr + 3);
+#else
+ v = vga_mem_readb(opaque, addr);
+ v |= vga_mem_readb(opaque, addr + 1) << 8;
+ v |= vga_mem_readb(opaque, addr + 2) << 16;
+ v |= vga_mem_readb(opaque, addr + 3) << 24;
+#endif
+ return v;
+}
+
+/* called for accesses between 0xa0000 and 0xc0000 */
+void vga_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ VGAState *s = opaque;
+ int memory_map_mode, plane, write_mode, b, func_select, mask;
+ uint32_t write_mask, bit_mask, set_mask;
+
+#ifdef DEBUG_VGA_MEM
+ printf("vga: [0x%x] = 0x%02x\n", addr, val);
+#endif
+ /* convert to VGA memory offset */
+ memory_map_mode = (s->gr[6] >> 2) & 3;
+ addr &= 0x1ffff;
+ switch(memory_map_mode) {
+ case 0:
+ break;
+ case 1:
+ if (addr >= 0x10000)
+ return;
+ addr += s->bank_offset;
+ break;
+ case 2:
+ addr -= 0x10000;
+ if (addr >= 0x8000)
+ return;
+ break;
+ default:
+ case 3:
+ addr -= 0x18000;
+ if (addr >= 0x8000)
+ return;
+ break;
+ }
+
+ if (s->sr[4] & 0x08) {
+ /* chain 4 mode : simplest access */
+ plane = addr & 3;
+ mask = (1 << plane);
+ if (s->sr[2] & mask) {
+ s->vram_ptr[addr] = val;
+#ifdef DEBUG_VGA_MEM
+ printf("vga: chain4: [0x%x]\n", addr);
+#endif
+ s->plane_updated |= mask; /* only used to detect font change */
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+ }
+ } else if (s->gr[5] & 0x10) {
+ /* odd/even mode (aka text mode mapping) */
+ plane = (s->gr[4] & 2) | (addr & 1);
+ mask = (1 << plane);
+ if (s->sr[2] & mask) {
+ addr = ((addr & ~1) << 1) | plane;
+ s->vram_ptr[addr] = val;
+#ifdef DEBUG_VGA_MEM
+ printf("vga: odd/even: [0x%x]\n", addr);
+#endif
+ s->plane_updated |= mask; /* only used to detect font change */
+ cpu_physical_memory_set_dirty(s->vram_offset + addr);
+ }
+ } else {
+ /* standard VGA latched access */
+ write_mode = s->gr[5] & 3;
+ switch(write_mode) {
+ default:
+ case 0:
+ /* rotate */
+ b = s->gr[3] & 7;
+ val = ((val >> b) | (val << (8 - b))) & 0xff;
+ val |= val << 8;
+ val |= val << 16;
+
+ /* apply set/reset mask */
+ set_mask = mask16[s->gr[1]];
+ val = (val & ~set_mask) | (mask16[s->gr[0]] & set_mask);
+ bit_mask = s->gr[8];
+ break;
+ case 1:
+ val = s->latch;
+ goto do_write;
+ case 2:
+ val = mask16[val & 0x0f];
+ bit_mask = s->gr[8];
+ break;
+ case 3:
+ /* rotate */
+ b = s->gr[3] & 7;
+ val = (val >> b) | (val << (8 - b));
+
+ bit_mask = s->gr[8] & val;
+ val = mask16[s->gr[0]];
+ break;
+ }
+
+ /* apply logical operation */
+ func_select = s->gr[3] >> 3;
+ switch(func_select) {
+ case 0:
+ default:
+ /* nothing to do */
+ break;
+ case 1:
+ /* and */
+ val &= s->latch;
+ break;
+ case 2:
+ /* or */
+ val |= s->latch;
+ break;
+ case 3:
+ /* xor */
+ val ^= s->latch;
+ break;
+ }
+
+ /* apply bit mask */
+ bit_mask |= bit_mask << 8;
+ bit_mask |= bit_mask << 16;
+ val = (val & bit_mask) | (s->latch & ~bit_mask);
+
+ do_write:
+ /* mask data according to sr[2] */
+ mask = s->sr[2];
+ s->plane_updated |= mask; /* only used to detect font change */
+ write_mask = mask16[mask];
+ ((uint32_t *)s->vram_ptr)[addr] =
+ (((uint32_t *)s->vram_ptr)[addr] & ~write_mask) |
+ (val & write_mask);
+#ifdef DEBUG_VGA_MEM
+ printf("vga: latch: [0x%x] mask=0x%08x val=0x%08x\n",
+ addr * 4, write_mask, val);
+#endif
+ cpu_physical_memory_set_dirty(s->vram_offset + (addr << 2));
+ }
+}
+
+static void vga_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ vga_mem_writeb(opaque, addr, (val >> 8) & 0xff);
+ vga_mem_writeb(opaque, addr + 1, val & 0xff);
+#else
+ vga_mem_writeb(opaque, addr, val & 0xff);
+ vga_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+#endif
+}
+
+static void vga_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ vga_mem_writeb(opaque, addr, (val >> 24) & 0xff);
+ vga_mem_writeb(opaque, addr + 1, (val >> 16) & 0xff);
+ vga_mem_writeb(opaque, addr + 2, (val >> 8) & 0xff);
+ vga_mem_writeb(opaque, addr + 3, val & 0xff);
+#else
+ vga_mem_writeb(opaque, addr, val & 0xff);
+ vga_mem_writeb(opaque, addr + 1, (val >> 8) & 0xff);
+ vga_mem_writeb(opaque, addr + 2, (val >> 16) & 0xff);
+ vga_mem_writeb(opaque, addr + 3, (val >> 24) & 0xff);
+#endif
+}
+
+typedef void vga_draw_glyph8_func(uint8_t *d, int linesize,
+ const uint8_t *font_ptr, int h,
+ uint32_t fgcol, uint32_t bgcol);
+typedef void vga_draw_glyph9_func(uint8_t *d, int linesize,
+ const uint8_t *font_ptr, int h,
+ uint32_t fgcol, uint32_t bgcol, int dup9);
+typedef void vga_draw_line_func(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width);
+
+static inline unsigned int rgb_to_pixel8(unsigned int r, unsigned int g, unsigned b)
+{
+ return ((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6);
+}
+
+static inline unsigned int rgb_to_pixel15(unsigned int r, unsigned int g, unsigned b)
+{
+ return ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3);
+}
+
+static inline unsigned int rgb_to_pixel16(unsigned int r, unsigned int g, unsigned b)
+{
+ return ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+}
+
+static inline unsigned int rgb_to_pixel32(unsigned int r, unsigned int g, unsigned b)
+{
+ return (r << 16) | (g << 8) | b;
+}
+
+#define DEPTH 8
+#include "vga_template.h"
+
+#define DEPTH 15
+#include "vga_template.h"
+
+#define DEPTH 16
+#include "vga_template.h"
+
+#define DEPTH 32
+#include "vga_template.h"
+
+static unsigned int rgb_to_pixel8_dup(unsigned int r, unsigned int g, unsigned b)
+{
+ unsigned int col;
+ col = rgb_to_pixel8(r, g, b);
+ col |= col << 8;
+ col |= col << 16;
+ return col;
+}
+
+static unsigned int rgb_to_pixel15_dup(unsigned int r, unsigned int g, unsigned b)
+{
+ unsigned int col;
+ col = rgb_to_pixel15(r, g, b);
+ col |= col << 16;
+ return col;
+}
+
+static unsigned int rgb_to_pixel16_dup(unsigned int r, unsigned int g, unsigned b)
+{
+ unsigned int col;
+ col = rgb_to_pixel16(r, g, b);
+ col |= col << 16;
+ return col;
+}
+
+static unsigned int rgb_to_pixel32_dup(unsigned int r, unsigned int g, unsigned b)
+{
+ unsigned int col;
+ col = rgb_to_pixel32(r, g, b);
+ return col;
+}
+
+/* return true if the palette was modified */
+static int update_palette16(VGAState *s)
+{
+ int full_update, i;
+ uint32_t v, col, *palette;
+
+ full_update = 0;
+ palette = s->last_palette;
+ for(i = 0; i < 16; i++) {
+ v = s->ar[i];
+ if (s->ar[0x10] & 0x80)
+ v = ((s->ar[0x14] & 0xf) << 4) | (v & 0xf);
+ else
+ v = ((s->ar[0x14] & 0xc) << 4) | (v & 0x3f);
+ v = v * 3;
+ col = s->rgb_to_pixel(c6_to_8(s->palette[v]),
+ c6_to_8(s->palette[v + 1]),
+ c6_to_8(s->palette[v + 2]));
+ if (col != palette[i]) {
+ full_update = 1;
+ palette[i] = col;
+ }
+ }
+ return full_update;
+}
+
+/* return true if the palette was modified */
+static int update_palette256(VGAState *s)
+{
+ int full_update, i;
+ uint32_t v, col, *palette;
+
+ full_update = 0;
+ palette = s->last_palette;
+ v = 0;
+ for(i = 0; i < 256; i++) {
+ col = s->rgb_to_pixel(c6_to_8(s->palette[v]),
+ c6_to_8(s->palette[v + 1]),
+ c6_to_8(s->palette[v + 2]));
+ if (col != palette[i]) {
+ full_update = 1;
+ palette[i] = col;
+ }
+ v += 3;
+ }
+ return full_update;
+}
+
+static void vga_get_offsets(VGAState *s,
+ uint32_t *pline_offset,
+ uint32_t *pstart_addr)
+{
+ uint32_t start_addr, line_offset;
+#ifdef CONFIG_BOCHS_VBE
+ if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
+ line_offset = s->vbe_line_offset;
+ start_addr = s->vbe_start_addr;
+ } else
+#endif
+ {
+ /* compute line_offset in bytes */
+ line_offset = s->cr[0x13];
+#ifdef CONFIG_S3VGA
+ {
+ uinr32_t v;
+ v = (s->cr[0x51] >> 4) & 3; /* S3 extension */
+ if (v == 0)
+ v = (s->cr[0x43] >> 2) & 1; /* S3 extension */
+ line_offset |= (v << 8);
+ }
+#endif
+ line_offset <<= 3;
+
+ /* starting address */
+ start_addr = s->cr[0x0d] | (s->cr[0x0c] << 8);
+#ifdef CONFIG_S3VGA
+ start_addr |= (s->cr[0x69] & 0x1f) << 16; /* S3 extension */
+#endif
+ }
+ *pline_offset = line_offset;
+ *pstart_addr = start_addr;
+}
+
+/* update start_addr and line_offset. Return TRUE if modified */
+static int update_basic_params(VGAState *s)
+{
+ int full_update;
+ uint32_t start_addr, line_offset, line_compare;
+
+ full_update = 0;
+
+ s->get_offsets(s, &line_offset, &start_addr);
+ /* line compare */
+ line_compare = s->cr[0x18] |
+ ((s->cr[0x07] & 0x10) << 4) |
+ ((s->cr[0x09] & 0x40) << 3);
+
+ if (line_offset != s->line_offset ||
+ start_addr != s->start_addr ||
+ line_compare != s->line_compare) {
+ s->line_offset = line_offset;
+ s->start_addr = start_addr;
+ s->line_compare = line_compare;
+ full_update = 1;
+ }
+ return full_update;
+}
+
+static inline int get_depth_index(int depth)
+{
+ switch(depth) {
+ default:
+ case 8:
+ return 0;
+ case 15:
+ return 1;
+ case 16:
+ return 2;
+ case 32:
+ return 3;
+ }
+}
+
+static vga_draw_glyph8_func *vga_draw_glyph8_table[4] = {
+ vga_draw_glyph8_8,
+ vga_draw_glyph8_16,
+ vga_draw_glyph8_16,
+ vga_draw_glyph8_32,
+};
+
+static vga_draw_glyph8_func *vga_draw_glyph16_table[4] = {
+ vga_draw_glyph16_8,
+ vga_draw_glyph16_16,
+ vga_draw_glyph16_16,
+ vga_draw_glyph16_32,
+};
+
+static vga_draw_glyph9_func *vga_draw_glyph9_table[4] = {
+ vga_draw_glyph9_8,
+ vga_draw_glyph9_16,
+ vga_draw_glyph9_16,
+ vga_draw_glyph9_32,
+};
+
+static const uint8_t cursor_glyph[32 * 4] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+
+/*
+ * Text mode update
+ * Missing:
+ * - double scan
+ * - double width
+ * - underline
+ * - flashing
+ */
+static void vga_draw_text(VGAState *s, int full_update)
+{
+ int cx, cy, cheight, cw, ch, cattr, height, width, ch_attr;
+ int cx_min, cx_max, linesize, x_incr;
+ uint32_t offset, fgcol, bgcol, v, cursor_offset;
+ uint8_t *d1, *d, *src, *s1, *dest, *cursor_ptr;
+ const uint8_t *font_ptr, *font_base[2];
+ int dup9, line_offset, depth_index;
+ uint32_t *palette;
+ uint32_t *ch_attr_ptr;
+ vga_draw_glyph8_func *vga_draw_glyph8;
+ vga_draw_glyph9_func *vga_draw_glyph9;
+
+ full_update |= update_palette16(s);
+ palette = s->last_palette;
+
+ /* compute font data address (in plane 2) */
+ v = s->sr[3];
+ offset = (((v >> 4) & 1) | ((v << 1) & 6)) * 8192 * 4 + 2;
+ if (offset != s->font_offsets[0]) {
+ s->font_offsets[0] = offset;
+ full_update = 1;
+ }
+ font_base[0] = s->vram_ptr + offset;
+
+ offset = (((v >> 5) & 1) | ((v >> 1) & 6)) * 8192 * 4 + 2;
+ font_base[1] = s->vram_ptr + offset;
+ if (offset != s->font_offsets[1]) {
+ s->font_offsets[1] = offset;
+ full_update = 1;
+ }
+ if (s->plane_updated & (1 << 2)) {
+ /* if the plane 2 was modified since the last display, it
+ indicates the font may have been modified */
+ s->plane_updated = 0;
+ full_update = 1;
+ }
+ full_update |= update_basic_params(s);
+
+ line_offset = s->line_offset;
+ s1 = s->vram_ptr + (s->start_addr * 4);
+
+ /* total width & height */
+ cheight = (s->cr[9] & 0x1f) + 1;
+ cw = 8;
+ if (!(s->sr[1] & 0x01))
+ cw = 9;
+ if (s->sr[1] & 0x08)
+ cw = 16; /* NOTE: no 18 pixel wide */
+ x_incr = cw * ((s->ds->depth + 7) >> 3);
+ width = (s->cr[0x01] + 1);
+ if (s->cr[0x06] == 100) {
+ /* ugly hack for CGA 160x100x16 - explain me the logic */
+ height = 100;
+ } else {
+ height = s->cr[0x12] |
+ ((s->cr[0x07] & 0x02) << 7) |
+ ((s->cr[0x07] & 0x40) << 3);
+ height = (height + 1) / cheight;
+ }
+ if ((height * width) > CH_ATTR_SIZE) {
+ /* better than nothing: exit if transient size is too big */
+ return;
+ }
+
+ if (width != s->last_width || height != s->last_height ||
+ cw != s->last_cw || cheight != s->last_ch) {
+ s->last_scr_width = width * cw;
+ s->last_scr_height = height * cheight;
+ dpy_resize(s->ds, s->last_scr_width, s->last_scr_height);
+ s->last_width = width;
+ s->last_height = height;
+ s->last_ch = cheight;
+ s->last_cw = cw;
+ full_update = 1;
+ }
+ cursor_offset = ((s->cr[0x0e] << 8) | s->cr[0x0f]) - s->start_addr;
+ if (cursor_offset != s->cursor_offset ||
+ s->cr[0xa] != s->cursor_start ||
+ s->cr[0xb] != s->cursor_end) {
+ /* if the cursor position changed, we update the old and new
+ chars */
+ if (s->cursor_offset < CH_ATTR_SIZE)
+ s->last_ch_attr[s->cursor_offset] = -1;
+ if (cursor_offset < CH_ATTR_SIZE)
+ s->last_ch_attr[cursor_offset] = -1;
+ s->cursor_offset = cursor_offset;
+ s->cursor_start = s->cr[0xa];
+ s->cursor_end = s->cr[0xb];
+ }
+ cursor_ptr = s->vram_ptr + (s->start_addr + cursor_offset) * 4;
+
+ depth_index = get_depth_index(s->ds->depth);
+ if (cw == 16)
+ vga_draw_glyph8 = vga_draw_glyph16_table[depth_index];
+ else
+ vga_draw_glyph8 = vga_draw_glyph8_table[depth_index];
+ vga_draw_glyph9 = vga_draw_glyph9_table[depth_index];
+
+ dest = s->ds->data;
+ linesize = s->ds->linesize;
+ ch_attr_ptr = s->last_ch_attr;
+ for(cy = 0; cy < height; cy++) {
+ d1 = dest;
+ src = s1;
+ cx_min = width;
+ cx_max = -1;
+ for(cx = 0; cx < width; cx++) {
+ ch_attr = *(uint16_t *)src;
+ if (full_update || ch_attr != *ch_attr_ptr) {
+ if (cx < cx_min)
+ cx_min = cx;
+ if (cx > cx_max)
+ cx_max = cx;
+ *ch_attr_ptr = ch_attr;
+#ifdef WORDS_BIGENDIAN
+ ch = ch_attr >> 8;
+ cattr = ch_attr & 0xff;
+#else
+ ch = ch_attr & 0xff;
+ cattr = ch_attr >> 8;
+#endif
+ font_ptr = font_base[(cattr >> 3) & 1];
+ font_ptr += 32 * 4 * ch;
+ bgcol = palette[cattr >> 4];
+ fgcol = palette[cattr & 0x0f];
+ if (cw != 9) {
+ vga_draw_glyph8(d1, linesize,
+ font_ptr, cheight, fgcol, bgcol);
+ } else {
+ dup9 = 0;
+ if (ch >= 0xb0 && ch <= 0xdf && (s->ar[0x10] & 0x04))
+ dup9 = 1;
+ vga_draw_glyph9(d1, linesize,
+ font_ptr, cheight, fgcol, bgcol, dup9);
+ }
+ if (src == cursor_ptr &&
+ !(s->cr[0x0a] & 0x20)) {
+ int line_start, line_last, h;
+ /* draw the cursor */
+ line_start = s->cr[0x0a] & 0x1f;
+ line_last = s->cr[0x0b] & 0x1f;
+ /* XXX: check that */
+ if (line_last > cheight - 1)
+ line_last = cheight - 1;
+ if (line_last >= line_start && line_start < cheight) {
+ h = line_last - line_start + 1;
+ d = d1 + linesize * line_start;
+ if (cw != 9) {
+ vga_draw_glyph8(d, linesize,
+ cursor_glyph, h, fgcol, bgcol);
+ } else {
+ vga_draw_glyph9(d, linesize,
+ cursor_glyph, h, fgcol, bgcol, 1);
+ }
+ }
+ }
+ }
+ d1 += x_incr;
+ src += 4;
+ ch_attr_ptr++;
+ }
+ if (cx_max != -1) {
+ dpy_update(s->ds, cx_min * cw, cy * cheight,
+ (cx_max - cx_min + 1) * cw, cheight);
+ }
+ dest += linesize * cheight;
+ s1 += line_offset;
+ }
+}
+
+enum {
+ VGA_DRAW_LINE2,
+ VGA_DRAW_LINE2D2,
+ VGA_DRAW_LINE4,
+ VGA_DRAW_LINE4D2,
+ VGA_DRAW_LINE8D2,
+ VGA_DRAW_LINE8,
+ VGA_DRAW_LINE15,
+ VGA_DRAW_LINE16,
+ VGA_DRAW_LINE24,
+ VGA_DRAW_LINE32,
+ VGA_DRAW_LINE_NB,
+};
+
+static vga_draw_line_func *vga_draw_line_table[4 * VGA_DRAW_LINE_NB] = {
+ vga_draw_line2_8,
+ vga_draw_line2_16,
+ vga_draw_line2_16,
+ vga_draw_line2_32,
+
+ vga_draw_line2d2_8,
+ vga_draw_line2d2_16,
+ vga_draw_line2d2_16,
+ vga_draw_line2d2_32,
+
+ vga_draw_line4_8,
+ vga_draw_line4_16,
+ vga_draw_line4_16,
+ vga_draw_line4_32,
+
+ vga_draw_line4d2_8,
+ vga_draw_line4d2_16,
+ vga_draw_line4d2_16,
+ vga_draw_line4d2_32,
+
+ vga_draw_line8d2_8,
+ vga_draw_line8d2_16,
+ vga_draw_line8d2_16,
+ vga_draw_line8d2_32,
+
+ vga_draw_line8_8,
+ vga_draw_line8_16,
+ vga_draw_line8_16,
+ vga_draw_line8_32,
+
+ vga_draw_line15_8,
+ vga_draw_line15_15,
+ vga_draw_line15_16,
+ vga_draw_line15_32,
+
+ vga_draw_line16_8,
+ vga_draw_line16_15,
+ vga_draw_line16_16,
+ vga_draw_line16_32,
+
+ vga_draw_line24_8,
+ vga_draw_line24_15,
+ vga_draw_line24_16,
+ vga_draw_line24_32,
+
+ vga_draw_line32_8,
+ vga_draw_line32_15,
+ vga_draw_line32_16,
+ vga_draw_line32_32,
+};
+
+static int vga_get_bpp(VGAState *s)
+{
+ int ret;
+#ifdef CONFIG_BOCHS_VBE
+ if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
+ ret = s->vbe_regs[VBE_DISPI_INDEX_BPP];
+ } else
+#endif
+ {
+ ret = 0;
+ }
+ return ret;
+}
+
+static void vga_get_resolution(VGAState *s, int *pwidth, int *pheight)
+{
+ int width, height;
+
+ width = (s->cr[0x01] + 1) * 8;
+ height = s->cr[0x12] |
+ ((s->cr[0x07] & 0x02) << 7) |
+ ((s->cr[0x07] & 0x40) << 3);
+ height = (height + 1);
+ *pwidth = width;
+ *pheight = height;
+}
+
+void vga_invalidate_scanlines(VGAState *s, int y1, int y2)
+{
+ int y;
+ if (y1 >= VGA_MAX_HEIGHT)
+ return;
+ if (y2 >= VGA_MAX_HEIGHT)
+ y2 = VGA_MAX_HEIGHT;
+ for(y = y1; y < y2; y++) {
+ s->invalidated_y_table[y >> 5] |= 1 << (y & 0x1f);
+ }
+}
+
+/*
+ * graphic modes
+ */
+static void vga_draw_graphic(VGAState *s, int full_update)
+{
+ int y1, y, update, page_min, page_max, linesize, y_start, double_scan, mask;
+ int width, height, shift_control, line_offset, page0, page1, bwidth;
+ int disp_width, multi_scan, multi_run;
+ uint8_t *d;
+ uint32_t v, addr1, addr;
+ vga_draw_line_func *vga_draw_line;
+
+ full_update |= update_basic_params(s);
+
+ s->get_resolution(s, &width, &height);
+ disp_width = width;
+
+ shift_control = (s->gr[0x05] >> 5) & 3;
+ double_scan = (s->cr[0x09] >> 7);
+ if (shift_control != 1) {
+ multi_scan = (((s->cr[0x09] & 0x1f) + 1) << double_scan) - 1;
+ } else {
+ /* in CGA modes, multi_scan is ignored */
+ /* XXX: is it correct ? */
+ multi_scan = double_scan;
+ }
+ multi_run = multi_scan;
+ if (shift_control != s->shift_control ||
+ double_scan != s->double_scan) {
+ full_update = 1;
+ s->shift_control = shift_control;
+ s->double_scan = double_scan;
+ }
+
+ if (shift_control == 0) {
+ full_update |= update_palette16(s);
+ if (s->sr[0x01] & 8) {
+ v = VGA_DRAW_LINE4D2;
+ disp_width <<= 1;
+ } else {
+ v = VGA_DRAW_LINE4;
+ }
+ } else if (shift_control == 1) {
+ full_update |= update_palette16(s);
+ if (s->sr[0x01] & 8) {
+ v = VGA_DRAW_LINE2D2;
+ disp_width <<= 1;
+ } else {
+ v = VGA_DRAW_LINE2;
+ }
+ } else {
+ switch(s->get_bpp(s)) {
+ default:
+ case 0:
+ full_update |= update_palette256(s);
+ v = VGA_DRAW_LINE8D2;
+ break;
+ case 8:
+ full_update |= update_palette256(s);
+ v = VGA_DRAW_LINE8;
+ break;
+ case 15:
+ v = VGA_DRAW_LINE15;
+ break;
+ case 16:
+ v = VGA_DRAW_LINE16;
+ break;
+ case 24:
+ v = VGA_DRAW_LINE24;
+ break;
+ case 32:
+ v = VGA_DRAW_LINE32;
+ break;
+ }
+ }
+ vga_draw_line = vga_draw_line_table[v * 4 + get_depth_index(s->ds->depth)];
+
+ if (disp_width != s->last_width ||
+ height != s->last_height) {
+ dpy_resize(s->ds, disp_width, height);
+ s->last_scr_width = disp_width;
+ s->last_scr_height = height;
+ s->last_width = disp_width;
+ s->last_height = height;
+ full_update = 1;
+ }
+ if (s->cursor_invalidate)
+ s->cursor_invalidate(s);
+
+ line_offset = s->line_offset;
+#if 0
+ printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
+ width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]);
+#endif
+ addr1 = (s->start_addr * 4);
+ bwidth = width * 4;
+ y_start = -1;
+ page_min = 0x7fffffff;
+ page_max = -1;
+ d = s->ds->data;
+ linesize = s->ds->linesize;
+ y1 = 0;
+ for(y = 0; y < height; y++) {
+ addr = addr1;
+ if (!(s->cr[0x17] & 1)) {
+ int shift;
+ /* CGA compatibility handling */
+ shift = 14 + ((s->cr[0x17] >> 6) & 1);
+ addr = (addr & ~(1 << shift)) | ((y1 & 1) << shift);
+ }
+ if (!(s->cr[0x17] & 2)) {
+ addr = (addr & ~0x8000) | ((y1 & 2) << 14);
+ }
+ page0 = s->vram_offset + (addr & TARGET_PAGE_MASK);
+ page1 = s->vram_offset + ((addr + bwidth - 1) & TARGET_PAGE_MASK);
+ update = full_update | cpu_physical_memory_is_dirty(page0) |
+ cpu_physical_memory_is_dirty(page1);
+ if ((page1 - page0) > TARGET_PAGE_SIZE) {
+ /* if wide line, can use another page */
+ update |= cpu_physical_memory_is_dirty(page0 + TARGET_PAGE_SIZE);
+ }
+ /* explicit invalidation for the hardware cursor */
+ update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
+ if (update) {
+ if (y_start < 0)
+ y_start = y;
+ if (page0 < page_min)
+ page_min = page0;
+ if (page1 > page_max)
+ page_max = page1;
+ vga_draw_line(s, d, s->vram_ptr + addr, width);
+ if (s->cursor_draw_line)
+ s->cursor_draw_line(s, d, y);
+ } else {
+ if (y_start >= 0) {
+ /* flush to display */
+ dpy_update(s->ds, 0, y_start,
+ disp_width, y - y_start);
+ y_start = -1;
+ }
+ }
+ if (!multi_run) {
+ mask = (s->cr[0x17] & 3) ^ 3;
+ if ((y1 & mask) == mask)
+ addr1 += line_offset;
+ y1++;
+ multi_run = multi_scan;
+ } else {
+ multi_run--;
+ }
+ /* line compare acts on the displayed lines */
+ if (y == s->line_compare)
+ addr1 = 0;
+ d += linesize;
+ }
+ if (y_start >= 0) {
+ /* flush to display */
+ dpy_update(s->ds, 0, y_start,
+ disp_width, y - y_start);
+ }
+ /* reset modified pages */
+ if (page_max != -1) {
+ cpu_physical_memory_reset_dirty(page_min, page_max + TARGET_PAGE_SIZE);
+ }
+ memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4);
+}
+
+static void vga_draw_blank(VGAState *s, int full_update)
+{
+ int i, w, val;
+ uint8_t *d;
+
+ if (!full_update)
+ return;
+ if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
+ return;
+ if (s->ds->depth == 8)
+ val = s->rgb_to_pixel(0, 0, 0);
+ else
+ val = 0;
+ w = s->last_scr_width * ((s->ds->depth + 7) >> 3);
+ d = s->ds->data;
+ for(i = 0; i < s->last_scr_height; i++) {
+ memset(d, val, w);
+ d += s->ds->linesize;
+ }
+ dpy_update(s->ds, 0, 0,
+ s->last_scr_width, s->last_scr_height);
+}
+
+#define GMODE_TEXT 0
+#define GMODE_GRAPH 1
+#define GMODE_BLANK 2
+
+void vga_update_display(void)
+{
+ VGAState *s = vga_state;
+ int full_update, graphic_mode;
+
+ if (s->ds->depth == 0) {
+ /* nothing to do */
+ } else {
+ switch(s->ds->depth) {
+ case 8:
+ s->rgb_to_pixel = rgb_to_pixel8_dup;
+ break;
+ case 15:
+ s->rgb_to_pixel = rgb_to_pixel15_dup;
+ break;
+ default:
+ case 16:
+ s->rgb_to_pixel = rgb_to_pixel16_dup;
+ break;
+ case 32:
+ s->rgb_to_pixel = rgb_to_pixel32_dup;
+ break;
+ }
+
+ full_update = 0;
+ if (!(s->ar_index & 0x20)) {
+ graphic_mode = GMODE_BLANK;
+ } else {
+ graphic_mode = s->gr[6] & 1;
+ }
+ if (graphic_mode != s->graphic_mode) {
+ s->graphic_mode = graphic_mode;
+ full_update = 1;
+ }
+ switch(graphic_mode) {
+ case GMODE_TEXT:
+ vga_draw_text(s, full_update);
+ break;
+ case GMODE_GRAPH:
+ vga_draw_graphic(s, full_update);
+ break;
+ case GMODE_BLANK:
+ default:
+ vga_draw_blank(s, full_update);
+ break;
+ }
+ }
+}
+
+/* force a full display refresh */
+void vga_invalidate_display(void)
+{
+ VGAState *s = vga_state;
+
+ s->last_width = -1;
+ s->last_height = -1;
+}
+
+static void vga_reset(VGAState *s)
+{
+ memset(s, 0, sizeof(VGAState));
+#ifdef CONFIG_S3VGA
+ /* chip ID for 8c968 */
+ s->cr[0x2d] = 0x88;
+ s->cr[0x2e] = 0xb0;
+ s->cr[0x2f] = 0x01; /* XXX: check revision code */
+ s->cr[0x30] = 0xe1;
+#endif
+ s->graphic_mode = -1; /* force full update */
+}
+
+static CPUReadMemoryFunc *vga_mem_read[3] = {
+ vga_mem_readb,
+ vga_mem_readw,
+ vga_mem_readl,
+};
+
+static CPUWriteMemoryFunc *vga_mem_write[3] = {
+ vga_mem_writeb,
+ vga_mem_writew,
+ vga_mem_writel,
+};
+
+static void vga_save(QEMUFile *f, void *opaque)
+{
+ VGAState *s = opaque;
+ int i;
+
+ qemu_put_be32s(f, &s->latch);
+ qemu_put_8s(f, &s->sr_index);
+ qemu_put_buffer(f, s->sr, 8);
+ qemu_put_8s(f, &s->gr_index);
+ qemu_put_buffer(f, s->gr, 16);
+ qemu_put_8s(f, &s->ar_index);
+ qemu_put_buffer(f, s->ar, 21);
+ qemu_put_be32s(f, &s->ar_flip_flop);
+ qemu_put_8s(f, &s->cr_index);
+ qemu_put_buffer(f, s->cr, 256);
+ qemu_put_8s(f, &s->msr);
+ qemu_put_8s(f, &s->fcr);
+ qemu_put_8s(f, &s->st00);
+ qemu_put_8s(f, &s->st01);
+
+ qemu_put_8s(f, &s->dac_state);
+ qemu_put_8s(f, &s->dac_sub_index);
+ qemu_put_8s(f, &s->dac_read_index);
+ qemu_put_8s(f, &s->dac_write_index);
+ qemu_put_buffer(f, s->dac_cache, 3);
+ qemu_put_buffer(f, s->palette, 768);
+
+ qemu_put_be32s(f, &s->bank_offset);
+#ifdef CONFIG_BOCHS_VBE
+ qemu_put_byte(f, 1);
+ qemu_put_be16s(f, &s->vbe_index);
+ for(i = 0; i < VBE_DISPI_INDEX_NB; i++)
+ qemu_put_be16s(f, &s->vbe_regs[i]);
+ qemu_put_be32s(f, &s->vbe_start_addr);
+ qemu_put_be32s(f, &s->vbe_line_offset);
+ qemu_put_be32s(f, &s->vbe_bank_mask);
+#else
+ qemu_put_byte(f, 0);
+#endif
+}
+
+static int vga_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VGAState *s = opaque;
+ int is_vbe, i;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_be32s(f, &s->latch);
+ qemu_get_8s(f, &s->sr_index);
+ qemu_get_buffer(f, s->sr, 8);
+ qemu_get_8s(f, &s->gr_index);
+ qemu_get_buffer(f, s->gr, 16);
+ qemu_get_8s(f, &s->ar_index);
+ qemu_get_buffer(f, s->ar, 21);
+ qemu_get_be32s(f, &s->ar_flip_flop);
+ qemu_get_8s(f, &s->cr_index);
+ qemu_get_buffer(f, s->cr, 256);
+ qemu_get_8s(f, &s->msr);
+ qemu_get_8s(f, &s->fcr);
+ qemu_get_8s(f, &s->st00);
+ qemu_get_8s(f, &s->st01);
+
+ qemu_get_8s(f, &s->dac_state);
+ qemu_get_8s(f, &s->dac_sub_index);
+ qemu_get_8s(f, &s->dac_read_index);
+ qemu_get_8s(f, &s->dac_write_index);
+ qemu_get_buffer(f, s->dac_cache, 3);
+ qemu_get_buffer(f, s->palette, 768);
+
+ qemu_get_be32s(f, &s->bank_offset);
+ is_vbe = qemu_get_byte(f);
+#ifdef CONFIG_BOCHS_VBE
+ if (!is_vbe)
+ return -EINVAL;
+ qemu_get_be16s(f, &s->vbe_index);
+ for(i = 0; i < VBE_DISPI_INDEX_NB; i++)
+ qemu_get_be16s(f, &s->vbe_regs[i]);
+ qemu_get_be32s(f, &s->vbe_start_addr);
+ qemu_get_be32s(f, &s->vbe_line_offset);
+ qemu_get_be32s(f, &s->vbe_bank_mask);
+#else
+ if (is_vbe)
+ return -EINVAL;
+#endif
+
+ /* force refresh */
+ s->graphic_mode = -1;
+ return 0;
+}
+
+static void vga_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ VGAState *s = vga_state;
+
+ cpu_register_physical_memory(addr, s->vram_size, s->vram_offset);
+}
+
+/* do the same job as vgabios before vgabios get ready */
+void vga_bios_init(VGAState *s)
+{
+ uint8_t palette_model[192] = {
+ 0, 0, 0, 0, 0, 170, 0, 170, 0, 0, 170, 170, 170, 0, 0, 170,
+ 0, 170, 170, 85, 0, 170, 170, 170, 85, 85, 85, 85, 85, 255, 85, 255,
+ 85, 85, 255, 255, 255, 85, 85, 255, 85, 255, 255, 255, 85, 255, 255, 255,
+ 0, 21, 0, 0, 21, 42, 0, 63, 0, 0, 63, 42, 42, 21, 0, 42,
+ 21, 42, 42, 63, 0, 42, 63, 42, 0, 21, 21, 0, 21, 63, 0, 63,
+ 21, 0, 63, 63, 42, 21, 21, 42, 21, 63, 42, 63, 21, 42, 63, 63,
+ 21, 0, 0, 21, 0, 42, 21, 42, 0, 21, 42, 42, 63, 0, 0, 63,
+ 0, 42, 63, 42, 0, 63, 42, 42, 21, 0, 21, 21, 0, 63, 21, 42,
+ 21, 21, 42, 63, 63, 0, 21, 63, 0, 63, 63, 42, 21, 63, 42, 63,
+ 21, 21, 0, 21, 21, 42, 21, 63, 0, 21, 63, 42, 63, 21, 0, 63,
+ 21, 42, 63, 63, 0, 63, 63, 42, 21, 21, 21, 21, 21, 63, 21, 63,
+ 21, 21, 63, 63, 63, 21, 21, 63, 21, 63, 63, 63, 21, 63, 63, 63
+ };
+
+ s->latch = 0;
+
+ s->sr_index = 3;
+ s->sr[0] = 3;
+ s->sr[1] = 0;
+ s->sr[2] = 3;
+ s->sr[3] = 0;
+ s->sr[4] = 2;
+ s->sr[5] = 0;
+ s->sr[6] = 0;
+ s->sr[7] = 0;
+
+ s->gr_index = 5;
+ s->gr[0] = 0;
+ s->gr[1] = 0;
+ s->gr[2] = 0;
+ s->gr[3] = 0;
+ s->gr[4] = 0;
+ s->gr[5] = 16;
+ s->gr[6] = 14;
+ s->gr[7] = 15;
+ s->gr[8] = 255;
+
+ /*changed by out 0x03c0*/
+ s->ar_index = 32;
+ s->ar[0] = 0;
+ s->ar[1] = 1;
+ s->ar[2] = 2;
+ s->ar[3] = 3;
+ s->ar[4] = 4;
+ s->ar[5] = 5;
+ s->ar[6] = 6;
+ s->ar[7] = 7;
+ s->ar[8] = 8;
+ s->ar[9] = 9;
+ s->ar[10] = 10;
+ s->ar[11] = 11;
+ s->ar[12] = 12;
+ s->ar[13] = 13;
+ s->ar[14] = 14;
+ s->ar[15] = 15;
+ s->ar[16] = 12;
+ s->ar[17] = 0;
+ s->ar[18] = 15;
+ s->ar[19] = 8;
+ s->ar[20] = 0;
+
+ s->ar_flip_flop = 1;
+
+ s->cr_index = 15;
+ s->cr[0] = 95;
+ s->cr[1] = 79;
+ s->cr[2] = 80;
+ s->cr[3] = 130;
+ s->cr[4] = 85;
+ s->cr[5] = 129;
+ s->cr[6] = 191;
+ s->cr[7] = 31;
+ s->cr[8] = 0;
+ s->cr[9] = 79;
+ s->cr[10] = 14;
+ s->cr[11] = 15;
+ s->cr[12] = 0;
+ s->cr[13] = 0;
+ s->cr[14] = 5;
+ s->cr[15] = 160;
+ s->cr[16] = 156;
+ s->cr[17] = 142;
+ s->cr[18] = 143;
+ s->cr[19] = 40;
+ s->cr[20] = 31;
+ s->cr[21] = 150;
+ s->cr[22] = 185;
+ s->cr[23] = 163;
+ s->cr[24] = 255;
+
+ s->msr = 103;
+ s->fcr = 0;
+ s->st00 = 0;
+ s->st01 = 0;
+
+ /*dac_* & platte will be initialized by os through out 0x03c8 & out 0c03c9(1:3) */
+ s->dac_state = 0;
+ s->dac_sub_index = 0;
+ s->dac_read_index = 0;
+ s->dac_write_index = 16;
+ s->dac_cache[0] = 255;
+ s->dac_cache[1] = 255;
+ s->dac_cache[2] = 255;
+
+ /*platte*/
+ memcpy(s->palette, palette_model, 192);
+
+ s->bank_offset= 0;
+ s->graphic_mode = -1;
+
+ /* TODO:add vbe support if enable it */
+
+ FILE *qemuf = fopen("/etc/xen/qemu-vgaram-bin", "rb");
+ if (!qemuf) {
+ fprintf(logfile, "open qemu vgaram binary failed!\n");
+ } else {
+ /*load vram contents, else vga console can't boot */
+ qemu_get_buffer(qemuf, s->vram_ptr, 256*1024);
+
+ fclose(qemuf);
+ }
+
+}
+
+void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size)
+{
+ int i, j, v, b;
+
+ for(i = 0;i < 256; i++) {
+ v = 0;
+ for(j = 0; j < 8; j++) {
+ v |= ((i >> j) & 1) << (j * 4);
+ }
+ expand4[i] = v;
+
+ v = 0;
+ for(j = 0; j < 4; j++) {
+ v |= ((i >> (2 * j)) & 3) << (j * 4);
+ }
+ expand2[i] = v;
+ }
+ for(i = 0; i < 16; i++) {
+ v = 0;
+ for(j = 0; j < 4; j++) {
+ b = ((i >> j) & 1);
+ v |= b << (2 * j);
+ v |= b << (2 * j + 1);
+ }
+ expand4to8[i] = v;
+ }
+
+ vga_reset(s);
+
+ /* qemu's vga mem is not detached from phys_ram_base and can cause DM abort
+ * when guest write vga mem, so allocate a new one */
+ s->vram_ptr = qemu_mallocz(vga_ram_size);
+
+ s->vram_offset = vga_ram_offset;
+ s->vram_size = vga_ram_size;
+ s->ds = ds;
+ s->get_bpp = vga_get_bpp;
+ s->get_offsets = vga_get_offsets;
+ s->get_resolution = vga_get_resolution;
+ /* XXX: currently needed for display */
+ vga_state = s;
+}
+
+
+int vga_initialize(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size)
+{
+ VGAState *s;
+
+ s = qemu_mallocz(sizeof(VGAState));
+ if (!s)
+ return -1;
+
+ vga_common_init(s, ds, vga_ram_base, vga_ram_offset, vga_ram_size);
+
+ register_savevm("vga", 0, 1, vga_save, vga_load, s);
+
+ register_ioport_write(0x3c0, 16, 1, vga_ioport_write, s);
+
+ register_ioport_write(0x3b4, 2, 1, vga_ioport_write, s);
+ register_ioport_write(0x3d4, 2, 1, vga_ioport_write, s);
+ register_ioport_write(0x3ba, 1, 1, vga_ioport_write, s);
+ register_ioport_write(0x3da, 1, 1, vga_ioport_write, s);
+
+ register_ioport_read(0x3c0, 16, 1, vga_ioport_read, s);
+
+ register_ioport_read(0x3b4, 2, 1, vga_ioport_read, s);
+ register_ioport_read(0x3d4, 2, 1, vga_ioport_read, s);
+ register_ioport_read(0x3ba, 1, 1, vga_ioport_read, s);
+ register_ioport_read(0x3da, 1, 1, vga_ioport_read, s);
+ s->bank_offset = 0;
+
+#ifdef CONFIG_BOCHS_VBE
+ s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID0;
+ s->vbe_bank_mask = ((s->vram_size >> 16) - 1);
+#if defined (TARGET_I386)
+ register_ioport_read(0x1ce, 1, 2, vbe_ioport_read_index, s);
+ register_ioport_read(0x1cf, 1, 2, vbe_ioport_read_data, s);
+
+ register_ioport_write(0x1ce, 1, 2, vbe_ioport_write_index, s);
+ register_ioport_write(0x1cf, 1, 2, vbe_ioport_write_data, s);
+
+ /* old Bochs IO ports */
+ register_ioport_read(0xff80, 1, 2, vbe_ioport_read_index, s);
+ register_ioport_read(0xff81, 1, 2, vbe_ioport_read_data, s);
+
+ register_ioport_write(0xff80, 1, 2, vbe_ioport_write_index, s);
+ register_ioport_write(0xff81, 1, 2, vbe_ioport_write_data, s);
+#else
+ register_ioport_read(0x1ce, 1, 2, vbe_ioport_read_index, s);
+ register_ioport_read(0x1d0, 1, 2, vbe_ioport_read_data, s);
+
+ register_ioport_write(0x1ce, 1, 2, vbe_ioport_write_index, s);
+ register_ioport_write(0x1d0, 1, 2, vbe_ioport_write_data, s);
+#endif
+#endif /* CONFIG_BOCHS_VBE */
+
+ vga_io_memory = cpu_register_io_memory(0, vga_mem_read, vga_mem_write, s);
+ cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
+ vga_io_memory);
+
+ if (bus) {
+ PCIDevice *d;
+ uint8_t *pci_conf;
+
+ d = pci_register_device(bus, "VGA",
+ sizeof(PCIDevice),
+ -1, NULL, NULL);
+ pci_conf = d->config;
+ pci_conf[0x00] = 0x34; // dummy VGA (same as Bochs ID)
+ pci_conf[0x01] = 0x12;
+ pci_conf[0x02] = 0x11;
+ pci_conf[0x03] = 0x11;
+ pci_conf[0x0a] = 0x00; // VGA controller
+ pci_conf[0x0b] = 0x03;
+ pci_conf[0x0e] = 0x00; // header_type
+
+ /* XXX: vga_ram_size must be a power of two */
+ pci_register_io_region(d, 0, vga_ram_size,
+ PCI_ADDRESS_SPACE_MEM_PREFETCH, vga_map);
+ } else {
+#ifdef CONFIG_BOCHS_VBE
+ /* XXX: use optimized standard vga accesses */
+ cpu_register_physical_memory(VBE_DISPI_LFB_PHYSICAL_ADDRESS,
+ vga_ram_size, vga_ram_offset);
+#endif
+ }
+
+ vga_bios_init(s);
+ return 0;
+}
+
+/********************************************************/
+/* vga screen dump */
+
+static int vga_save_w, vga_save_h;
+
+static void vga_save_dpy_update(DisplayState *s,
+ int x, int y, int w, int h)
+{
+}
+
+static void vga_save_dpy_resize(DisplayState *s, int w, int h)
+{
+ s->linesize = w * 4;
+ s->data = qemu_malloc(h * s->linesize);
+ vga_save_w = w;
+ vga_save_h = h;
+}
+
+static void vga_save_dpy_refresh(DisplayState *s)
+{
+}
+
+static int ppm_save(const char *filename, uint8_t *data,
+ int w, int h, int linesize)
+{
+ FILE *f;
+ uint8_t *d, *d1;
+ unsigned int v;
+ int y, x;
+
+ f = fopen(filename, "wb");
+ if (!f)
+ return -1;
+ fprintf(f, "P6\n%d %d\n%d\n",
+ w, h, 255);
+ d1 = data;
+ for(y = 0; y < h; y++) {
+ d = d1;
+ for(x = 0; x < w; x++) {
+ v = *(uint32_t *)d;
+ fputc((v >> 16) & 0xff, f);
+ fputc((v >> 8) & 0xff, f);
+ fputc((v) & 0xff, f);
+ d += 4;
+ }
+ d1 += linesize;
+ }
+ fclose(f);
+ return 0;
+}
+
+/* save the vga display in a PPM image even if no display is
+ available */
+void vga_screen_dump(const char *filename)
+{
+ VGAState *s = vga_state;
+ DisplayState *saved_ds, ds1, *ds = &ds1;
+
+ /* XXX: this is a little hackish */
+ vga_invalidate_display();
+ saved_ds = s->ds;
+
+ memset(ds, 0, sizeof(DisplayState));
+ ds->dpy_update = vga_save_dpy_update;
+ ds->dpy_resize = vga_save_dpy_resize;
+ ds->dpy_refresh = vga_save_dpy_refresh;
+ ds->depth = 32;
+
+ s->ds = ds;
+ s->graphic_mode = -1;
+ vga_update_display();
+
+ if (ds->data) {
+ ppm_save(filename, ds->data, vga_save_w, vga_save_h,
+ s->ds->linesize);
+ qemu_free(ds->data);
+ }
+ s->ds = saved_ds;
+}
diff --git a/tools/ioemu/hw/vga_int.h b/tools/ioemu/hw/vga_int.h
new file mode 100644
index 0000000000..316688265b
--- /dev/null
+++ b/tools/ioemu/hw/vga_int.h
@@ -0,0 +1,168 @@
+/*
+ * QEMU internal VGA defines.
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define MSR_COLOR_EMULATION 0x01
+#define MSR_PAGE_SELECT 0x20
+
+#define ST01_V_RETRACE 0x08
+#define ST01_DISP_ENABLE 0x01
+
+/* bochs VBE support */
+//#define CONFIG_BOCHS_VBE
+
+#define VBE_DISPI_MAX_XRES 1024
+#define VBE_DISPI_MAX_YRES 768
+
+#define VBE_DISPI_INDEX_ID 0x0
+#define VBE_DISPI_INDEX_XRES 0x1
+#define VBE_DISPI_INDEX_YRES 0x2
+#define VBE_DISPI_INDEX_BPP 0x3
+#define VBE_DISPI_INDEX_ENABLE 0x4
+#define VBE_DISPI_INDEX_BANK 0x5
+#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+#define VBE_DISPI_INDEX_X_OFFSET 0x8
+#define VBE_DISPI_INDEX_Y_OFFSET 0x9
+#define VBE_DISPI_INDEX_NB 0xa
+
+#define VBE_DISPI_ID0 0xB0C0
+#define VBE_DISPI_ID1 0xB0C1
+#define VBE_DISPI_ID2 0xB0C2
+
+#define VBE_DISPI_DISABLED 0x00
+#define VBE_DISPI_ENABLED 0x01
+#define VBE_DISPI_LFB_ENABLED 0x40
+#define VBE_DISPI_NOCLEARMEM 0x80
+
+#define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000
+
+#ifdef CONFIG_BOCHS_VBE
+
+#define VGA_STATE_COMMON_BOCHS_VBE \
+ uint16_t vbe_index; \
+ uint16_t vbe_regs[VBE_DISPI_INDEX_NB]; \
+ uint32_t vbe_start_addr; \
+ uint32_t vbe_line_offset; \
+ uint32_t vbe_bank_mask;
+
+#else
+
+#define VGA_STATE_COMMON_BOCHS_VBE
+
+#endif /* !CONFIG_BOCHS_VBE */
+
+#define CH_ATTR_SIZE (160 * 100)
+#define VGA_MAX_HEIGHT 1024
+
+#define VGA_STATE_COMMON \
+ uint8_t *vram_ptr; \
+ unsigned long vram_offset; \
+ unsigned int vram_size; \
+ uint32_t latch; \
+ uint8_t sr_index; \
+ uint8_t sr[256]; \
+ uint8_t gr_index; \
+ uint8_t gr[256]; \
+ uint8_t ar_index; \
+ uint8_t ar[21]; \
+ int ar_flip_flop; \
+ uint8_t cr_index; \
+ uint8_t cr[256]; /* CRT registers */ \
+ uint8_t msr; /* Misc Output Register */ \
+ uint8_t fcr; /* Feature Control Register */ \
+ uint8_t st00; /* status 0 */ \
+ uint8_t st01; /* status 1 */ \
+ uint8_t dac_state; \
+ uint8_t dac_sub_index; \
+ uint8_t dac_read_index; \
+ uint8_t dac_write_index; \
+ uint8_t dac_cache[3]; /* used when writing */ \
+ uint8_t palette[768]; \
+ int32_t bank_offset; \
+ int (*get_bpp)(struct VGAState *s); \
+ void (*get_offsets)(struct VGAState *s, \
+ uint32_t *pline_offset, \
+ uint32_t *pstart_addr); \
+ void (*get_resolution)(struct VGAState *s, \
+ int *pwidth, \
+ int *pheight); \
+ VGA_STATE_COMMON_BOCHS_VBE \
+ /* display refresh support */ \
+ DisplayState *ds; \
+ uint32_t font_offsets[2]; \
+ int graphic_mode; \
+ uint8_t shift_control; \
+ uint8_t double_scan; \
+ uint32_t line_offset; \
+ uint32_t line_compare; \
+ uint32_t start_addr; \
+ uint32_t plane_updated; \
+ uint8_t last_cw, last_ch; \
+ uint32_t last_width, last_height; /* in chars or pixels */ \
+ uint32_t last_scr_width, last_scr_height; /* in pixels */ \
+ uint8_t cursor_start, cursor_end; \
+ uint32_t cursor_offset; \
+ unsigned int (*rgb_to_pixel)(unsigned int r, \
+ unsigned int g, unsigned b); \
+ /* hardware mouse cursor support */ \
+ uint32_t invalidated_y_table[VGA_MAX_HEIGHT / 32]; \
+ void (*cursor_invalidate)(struct VGAState *s); \
+ void (*cursor_draw_line)(struct VGAState *s, uint8_t *d, int y); \
+ /* tell for each page if it has been updated since the last time */ \
+ uint32_t last_palette[256]; \
+ uint32_t last_ch_attr[CH_ATTR_SIZE]; /* XXX: make it dynamic */
+
+
+typedef struct VGAState {
+ VGA_STATE_COMMON
+} VGAState;
+
+static inline int c6_to_8(int v)
+{
+ int b;
+ v &= 0x3f;
+ b = v & 1;
+ return (v << 2) | (b << 1) | b;
+}
+
+void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size);
+uint32_t vga_mem_readb(void *opaque, target_phys_addr_t addr);
+void vga_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val);
+void vga_invalidate_scanlines(VGAState *s, int y1, int y2);
+
+void vga_draw_cursor_line_8(uint8_t *d1, const uint8_t *src1,
+ int poffset, int w,
+ unsigned int color0, unsigned int color1,
+ unsigned int color_xor);
+void vga_draw_cursor_line_16(uint8_t *d1, const uint8_t *src1,
+ int poffset, int w,
+ unsigned int color0, unsigned int color1,
+ unsigned int color_xor);
+void vga_draw_cursor_line_32(uint8_t *d1, const uint8_t *src1,
+ int poffset, int w,
+ unsigned int color0, unsigned int color1,
+ unsigned int color_xor);
+
+extern const uint8_t sr_mask[8];
+extern const uint8_t gr_mask[16];
diff --git a/tools/ioemu/hw/vga_template.h b/tools/ioemu/hw/vga_template.h
new file mode 100644
index 0000000000..909571ebb3
--- /dev/null
+++ b/tools/ioemu/hw/vga_template.h
@@ -0,0 +1,519 @@
+/*
+ * QEMU VGA Emulator templates
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#if DEPTH == 8
+#define BPP 1
+#define PIXEL_TYPE uint8_t
+#elif DEPTH == 15 || DEPTH == 16
+#define BPP 2
+#define PIXEL_TYPE uint16_t
+#elif DEPTH == 32
+#define BPP 4
+#define PIXEL_TYPE uint32_t
+#else
+#error unsupport depth
+#endif
+
+#if DEPTH != 15
+
+static inline void glue(vga_draw_glyph_line_, DEPTH)(uint8_t *d,
+ uint32_t font_data,
+ uint32_t xorcol,
+ uint32_t bgcol)
+{
+#if BPP == 1
+ ((uint32_t *)d)[0] = (dmask16[(font_data >> 4)] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (dmask16[(font_data >> 0) & 0xf] & xorcol) ^ bgcol;
+#elif BPP == 2
+ ((uint32_t *)d)[0] = (dmask4[(font_data >> 6)] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (dmask4[(font_data >> 4) & 3] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[2] = (dmask4[(font_data >> 2) & 3] & xorcol) ^ bgcol;
+ ((uint32_t *)d)[3] = (dmask4[(font_data >> 0) & 3] & xorcol) ^ bgcol;
+#else
+ ((uint32_t *)d)[0] = (-((font_data >> 7)) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (-((font_data >> 6) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[2] = (-((font_data >> 5) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[3] = (-((font_data >> 4) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[4] = (-((font_data >> 3) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[5] = (-((font_data >> 2) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[6] = (-((font_data >> 1) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[7] = (-((font_data >> 0) & 1) & xorcol) ^ bgcol;
+#endif
+}
+
+static void glue(vga_draw_glyph8_, DEPTH)(uint8_t *d, int linesize,
+ const uint8_t *font_ptr, int h,
+ uint32_t fgcol, uint32_t bgcol)
+{
+ uint32_t font_data, xorcol;
+
+ xorcol = bgcol ^ fgcol;
+ do {
+ font_data = font_ptr[0];
+ glue(vga_draw_glyph_line_, DEPTH)(d, font_data, xorcol, bgcol);
+ font_ptr += 4;
+ d += linesize;
+ } while (--h);
+}
+
+static void glue(vga_draw_glyph16_, DEPTH)(uint8_t *d, int linesize,
+ const uint8_t *font_ptr, int h,
+ uint32_t fgcol, uint32_t bgcol)
+{
+ uint32_t font_data, xorcol;
+
+ xorcol = bgcol ^ fgcol;
+ do {
+ font_data = font_ptr[0];
+ glue(vga_draw_glyph_line_, DEPTH)(d,
+ expand4to8[font_data >> 4],
+ xorcol, bgcol);
+ glue(vga_draw_glyph_line_, DEPTH)(d + 8 * BPP,
+ expand4to8[font_data & 0x0f],
+ xorcol, bgcol);
+ font_ptr += 4;
+ d += linesize;
+ } while (--h);
+}
+
+static void glue(vga_draw_glyph9_, DEPTH)(uint8_t *d, int linesize,
+ const uint8_t *font_ptr, int h,
+ uint32_t fgcol, uint32_t bgcol, int dup9)
+{
+ uint32_t font_data, xorcol, v;
+
+ xorcol = bgcol ^ fgcol;
+ do {
+ font_data = font_ptr[0];
+#if BPP == 1
+ cpu_to_32wu((uint32_t *)d, (dmask16[(font_data >> 4)] & xorcol) ^ bgcol);
+ v = (dmask16[(font_data >> 0) & 0xf] & xorcol) ^ bgcol;
+ cpu_to_32wu(((uint32_t *)d)+1, v);
+ if (dup9)
+ ((uint8_t *)d)[8] = v >> (24 * (1 - BIG));
+ else
+ ((uint8_t *)d)[8] = bgcol;
+
+#elif BPP == 2
+ cpu_to_32wu(((uint32_t *)d)+0, (dmask4[(font_data >> 6)] & xorcol) ^ bgcol);
+ cpu_to_32wu(((uint32_t *)d)+1, (dmask4[(font_data >> 4) & 3] & xorcol) ^ bgcol);
+ cpu_to_32wu(((uint32_t *)d)+2, (dmask4[(font_data >> 2) & 3] & xorcol) ^ bgcol);
+ v = (dmask4[(font_data >> 0) & 3] & xorcol) ^ bgcol;
+ cpu_to_32wu(((uint32_t *)d)+3, v);
+ if (dup9)
+ ((uint16_t *)d)[8] = v >> (16 * (1 - BIG));
+ else
+ ((uint16_t *)d)[8] = bgcol;
+#else
+ ((uint32_t *)d)[0] = (-((font_data >> 7)) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[1] = (-((font_data >> 6) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[2] = (-((font_data >> 5) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[3] = (-((font_data >> 4) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[4] = (-((font_data >> 3) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[5] = (-((font_data >> 2) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[6] = (-((font_data >> 1) & 1) & xorcol) ^ bgcol;
+ v = (-((font_data >> 0) & 1) & xorcol) ^ bgcol;
+ ((uint32_t *)d)[7] = v;
+ if (dup9)
+ ((uint32_t *)d)[8] = v;
+ else
+ ((uint32_t *)d)[8] = bgcol;
+#endif
+ font_ptr += 4;
+ d += linesize;
+ } while (--h);
+}
+
+/*
+ * 4 color mode
+ */
+static void glue(vga_draw_line2_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t plane_mask, *palette, data, v;
+ int x;
+
+ palette = s1->last_palette;
+ plane_mask = mask16[s1->ar[0x12] & 0xf];
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ data = ((uint32_t *)s)[0];
+ data &= plane_mask;
+ v = expand2[GET_PLANE(data, 0)];
+ v |= expand2[GET_PLANE(data, 2)] << 2;
+ ((PIXEL_TYPE *)d)[0] = palette[v >> 12];
+ ((PIXEL_TYPE *)d)[1] = palette[(v >> 8) & 0xf];
+ ((PIXEL_TYPE *)d)[2] = palette[(v >> 4) & 0xf];
+ ((PIXEL_TYPE *)d)[3] = palette[(v >> 0) & 0xf];
+
+ v = expand2[GET_PLANE(data, 1)];
+ v |= expand2[GET_PLANE(data, 3)] << 2;
+ ((PIXEL_TYPE *)d)[4] = palette[v >> 12];
+ ((PIXEL_TYPE *)d)[5] = palette[(v >> 8) & 0xf];
+ ((PIXEL_TYPE *)d)[6] = palette[(v >> 4) & 0xf];
+ ((PIXEL_TYPE *)d)[7] = palette[(v >> 0) & 0xf];
+ d += BPP * 8;
+ s += 4;
+ }
+}
+
+#if BPP == 1
+#define PUT_PIXEL2(d, n, v) ((uint16_t *)d)[(n)] = (v)
+#elif BPP == 2
+#define PUT_PIXEL2(d, n, v) ((uint32_t *)d)[(n)] = (v)
+#else
+#define PUT_PIXEL2(d, n, v) \
+((uint32_t *)d)[2*(n)] = ((uint32_t *)d)[2*(n)+1] = (v)
+#endif
+
+/*
+ * 4 color mode, dup2 horizontal
+ */
+static void glue(vga_draw_line2d2_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t plane_mask, *palette, data, v;
+ int x;
+
+ palette = s1->last_palette;
+ plane_mask = mask16[s1->ar[0x12] & 0xf];
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ data = ((uint32_t *)s)[0];
+ data &= plane_mask;
+ v = expand2[GET_PLANE(data, 0)];
+ v |= expand2[GET_PLANE(data, 2)] << 2;
+ PUT_PIXEL2(d, 0, palette[v >> 12]);
+ PUT_PIXEL2(d, 1, palette[(v >> 8) & 0xf]);
+ PUT_PIXEL2(d, 2, palette[(v >> 4) & 0xf]);
+ PUT_PIXEL2(d, 3, palette[(v >> 0) & 0xf]);
+
+ v = expand2[GET_PLANE(data, 1)];
+ v |= expand2[GET_PLANE(data, 3)] << 2;
+ PUT_PIXEL2(d, 4, palette[v >> 12]);
+ PUT_PIXEL2(d, 5, palette[(v >> 8) & 0xf]);
+ PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
+ PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
+ d += BPP * 16;
+ s += 4;
+ }
+}
+
+/*
+ * 16 color mode
+ */
+static void glue(vga_draw_line4_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t plane_mask, data, v, *palette;
+ int x;
+
+ palette = s1->last_palette;
+ plane_mask = mask16[s1->ar[0x12] & 0xf];
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ data = ((uint32_t *)s)[0];
+ data &= plane_mask;
+ v = expand4[GET_PLANE(data, 0)];
+ v |= expand4[GET_PLANE(data, 1)] << 1;
+ v |= expand4[GET_PLANE(data, 2)] << 2;
+ v |= expand4[GET_PLANE(data, 3)] << 3;
+ ((PIXEL_TYPE *)d)[0] = palette[v >> 28];
+ ((PIXEL_TYPE *)d)[1] = palette[(v >> 24) & 0xf];
+ ((PIXEL_TYPE *)d)[2] = palette[(v >> 20) & 0xf];
+ ((PIXEL_TYPE *)d)[3] = palette[(v >> 16) & 0xf];
+ ((PIXEL_TYPE *)d)[4] = palette[(v >> 12) & 0xf];
+ ((PIXEL_TYPE *)d)[5] = palette[(v >> 8) & 0xf];
+ ((PIXEL_TYPE *)d)[6] = palette[(v >> 4) & 0xf];
+ ((PIXEL_TYPE *)d)[7] = palette[(v >> 0) & 0xf];
+ d += BPP * 8;
+ s += 4;
+ }
+}
+
+/*
+ * 16 color mode, dup2 horizontal
+ */
+static void glue(vga_draw_line4d2_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t plane_mask, data, v, *palette;
+ int x;
+
+ palette = s1->last_palette;
+ plane_mask = mask16[s1->ar[0x12] & 0xf];
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ data = ((uint32_t *)s)[0];
+ data &= plane_mask;
+ v = expand4[GET_PLANE(data, 0)];
+ v |= expand4[GET_PLANE(data, 1)] << 1;
+ v |= expand4[GET_PLANE(data, 2)] << 2;
+ v |= expand4[GET_PLANE(data, 3)] << 3;
+ PUT_PIXEL2(d, 0, palette[v >> 28]);
+ PUT_PIXEL2(d, 1, palette[(v >> 24) & 0xf]);
+ PUT_PIXEL2(d, 2, palette[(v >> 20) & 0xf]);
+ PUT_PIXEL2(d, 3, palette[(v >> 16) & 0xf]);
+ PUT_PIXEL2(d, 4, palette[(v >> 12) & 0xf]);
+ PUT_PIXEL2(d, 5, palette[(v >> 8) & 0xf]);
+ PUT_PIXEL2(d, 6, palette[(v >> 4) & 0xf]);
+ PUT_PIXEL2(d, 7, palette[(v >> 0) & 0xf]);
+ d += BPP * 16;
+ s += 4;
+ }
+}
+
+/*
+ * 256 color mode, double pixels
+ *
+ * XXX: add plane_mask support (never used in standard VGA modes)
+ */
+static void glue(vga_draw_line8d2_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t *palette;
+ int x;
+
+ palette = s1->last_palette;
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ PUT_PIXEL2(d, 0, palette[s[0]]);
+ PUT_PIXEL2(d, 1, palette[s[1]]);
+ PUT_PIXEL2(d, 2, palette[s[2]]);
+ PUT_PIXEL2(d, 3, palette[s[3]]);
+ d += BPP * 8;
+ s += 4;
+ }
+}
+
+/*
+ * standard 256 color mode
+ *
+ * XXX: add plane_mask support (never used in standard VGA modes)
+ */
+static void glue(vga_draw_line8_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ uint32_t *palette;
+ int x;
+
+ palette = s1->last_palette;
+ width >>= 3;
+ for(x = 0; x < width; x++) {
+ ((PIXEL_TYPE *)d)[0] = palette[s[0]];
+ ((PIXEL_TYPE *)d)[1] = palette[s[1]];
+ ((PIXEL_TYPE *)d)[2] = palette[s[2]];
+ ((PIXEL_TYPE *)d)[3] = palette[s[3]];
+ ((PIXEL_TYPE *)d)[4] = palette[s[4]];
+ ((PIXEL_TYPE *)d)[5] = palette[s[5]];
+ ((PIXEL_TYPE *)d)[6] = palette[s[6]];
+ ((PIXEL_TYPE *)d)[7] = palette[s[7]];
+ d += BPP * 8;
+ s += 8;
+ }
+}
+
+#endif /* DEPTH != 15 */
+
+
+/* XXX: optimize */
+
+/*
+ * 15 bit color
+ */
+static void glue(vga_draw_line15_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+#if DEPTH == 15 && defined(WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
+ memcpy(d, s, width * 2);
+#else
+ int w;
+ uint32_t v, r, g, b;
+
+ w = width;
+ do {
+ v = lduw_raw((void *)s);
+ r = (v >> 7) & 0xf8;
+ g = (v >> 2) & 0xf8;
+ b = (v << 3) & 0xf8;
+ ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, DEPTH)(r, g, b);
+ s += 2;
+ d += BPP;
+ } while (--w != 0);
+#endif
+}
+
+/*
+ * 16 bit color
+ */
+static void glue(vga_draw_line16_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+#if DEPTH == 16 && defined(WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
+ memcpy(d, s, width * 2);
+#else
+ int w;
+ uint32_t v, r, g, b;
+
+ w = width;
+ do {
+ v = lduw_raw((void *)s);
+ r = (v >> 8) & 0xf8;
+ g = (v >> 3) & 0xfc;
+ b = (v << 3) & 0xf8;
+ ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, DEPTH)(r, g, b);
+ s += 2;
+ d += BPP;
+ } while (--w != 0);
+#endif
+}
+
+/*
+ * 24 bit color
+ */
+static void glue(vga_draw_line24_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+ int w;
+ uint32_t r, g, b;
+
+ w = width;
+ do {
+#if defined(TARGET_WORDS_BIGENDIAN)
+ r = s[0];
+ g = s[1];
+ b = s[2];
+#else
+ b = s[0];
+ g = s[1];
+ r = s[2];
+#endif
+ ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, DEPTH)(r, g, b);
+ s += 3;
+ d += BPP;
+ } while (--w != 0);
+}
+
+/*
+ * 32 bit color
+ */
+static void glue(vga_draw_line32_, DEPTH)(VGAState *s1, uint8_t *d,
+ const uint8_t *s, int width)
+{
+#if DEPTH == 32 && defined(WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
+ memcpy(d, s, width * 4);
+#else
+ int w;
+ uint32_t r, g, b;
+
+ w = width;
+ do {
+#if defined(TARGET_WORDS_BIGENDIAN)
+ r = s[1];
+ g = s[2];
+ b = s[3];
+#else
+ b = s[0];
+ g = s[1];
+ r = s[2];
+#endif
+ ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, DEPTH)(r, g, b);
+ s += 4;
+ d += BPP;
+ } while (--w != 0);
+#endif
+}
+
+#if DEPTH != 15
+void glue(vga_draw_cursor_line_, DEPTH)(uint8_t *d1,
+ const uint8_t *src1,
+ int poffset, int w,
+ unsigned int color0,
+ unsigned int color1,
+ unsigned int color_xor)
+{
+ const uint8_t *plane0, *plane1;
+ int x, b0, b1;
+ uint8_t *d;
+
+ d = d1;
+ plane0 = src1;
+ plane1 = src1 + poffset;
+ for(x = 0; x < w; x++) {
+ b0 = (plane0[x >> 3] >> (7 - (x & 7))) & 1;
+ b1 = (plane1[x >> 3] >> (7 - (x & 7))) & 1;
+#if DEPTH == 8
+ switch(b0 | (b1 << 1)) {
+ case 0:
+ break;
+ case 1:
+ d[0] ^= color_xor;
+ break;
+ case 2:
+ d[0] = color0;
+ break;
+ case 3:
+ d[0] = color1;
+ break;
+ }
+#elif DEPTH == 16
+ switch(b0 | (b1 << 1)) {
+ case 0:
+ break;
+ case 1:
+ ((uint16_t *)d)[0] ^= color_xor;
+ break;
+ case 2:
+ ((uint16_t *)d)[0] = color0;
+ break;
+ case 3:
+ ((uint16_t *)d)[0] = color1;
+ break;
+ }
+#elif DEPTH == 32
+ switch(b0 | (b1 << 1)) {
+ case 0:
+ break;
+ case 1:
+ ((uint32_t *)d)[0] ^= color_xor;
+ break;
+ case 2:
+ ((uint32_t *)d)[0] = color0;
+ break;
+ case 3:
+ ((uint32_t *)d)[0] = color1;
+ break;
+ }
+#else
+#error unsupported depth
+#endif
+ d += BPP;
+ }
+}
+#endif
+
+#undef PUT_PIXEL2
+#undef DEPTH
+#undef BPP
+#undef PIXEL_TYPE
diff --git a/tools/ioemu/keyboard_rdesktop.c b/tools/ioemu/keyboard_rdesktop.c
new file mode 100644
index 0000000000..a98df10149
--- /dev/null
+++ b/tools/ioemu/keyboard_rdesktop.c
@@ -0,0 +1,165 @@
+/*
+ * QEMU keylayout reader: read rdesktop style keylaouts
+ *
+ * Copyright (c) 2004,2005 Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <ctype.h>
+
+#ifdef KEYBOARD_IGNORE_CASE
+#define STRCMP strcasecmp
+#else
+#define STRCMP strcmp
+#endif
+
+/* binary search through nameToKeysym */
+static int get_keysym(const char* name)
+{
+ int i1=-1,i2=sizeof(name2keysym)/sizeof(name2keysym_t),i3=i2/2,r;
+ while((r=STRCMP(name,name2keysym[i3].name))!=0) {
+ if(r<0)
+ i2=i3;
+ else
+ i1=i3;
+ i3=(i1+i2)/2;
+ if(i2-i1<2)
+ return 0;
+ }
+ return name2keysym[i3].keysym;
+}
+
+typedef unsigned short WORD;
+#define MAX_NORMAL_KEYCODE 512
+#define MAX_EXTRA_COUNT 256
+typedef struct {
+ WORD keysym2keycode[MAX_NORMAL_KEYCODE];
+ struct {
+ int keysym;
+ WORD keycode;
+ } keysym2keycode_extra[MAX_EXTRA_COUNT];
+ int extra_count;
+} kbd_layout_t;
+
+static int parse_int(const char* text)
+{
+ if(!strncmp(text,"0x",2)) {
+ int result=0;
+ sscanf(text+2,"%x",&result);
+ return result;
+ }
+ return atoi(text);
+}
+
+static kbd_layout_t* parse_keyboard_layout(const char* language,kbd_layout_t* k)
+{
+ FILE* f;
+ const char* prefix="/keymaps/";
+ char* file_name=malloc(strlen(prefix)+strlen(language)+strlen(bios_dir)+1);
+
+ if(!k)
+ k=calloc(sizeof(kbd_layout_t),1);
+ strcpy(file_name,bios_dir);
+ strcat(file_name,prefix);
+ strcat(file_name,language);
+ if(file_name[strlen(file_name)-1]=='\n')
+ file_name[strlen(file_name)-1]=0;
+ if(!(f=fopen(file_name,"r"))) {
+ term_printf("Warning: could not read keymap - falling back native keycodes!\n");
+ free(file_name);
+ return 0;
+ }
+ free(file_name);
+ while(!feof(f)) {
+ char line[1024];
+ fgets(line,1024,f);
+ if(line[0]=='#')
+ continue;
+ if(!strncmp(line,"map ",4))
+ continue;
+ if(!strncmp(line,"include ",8))
+ parse_keyboard_layout(line+8,k);
+ else {
+ char* end_of_keysym=line;
+ while(*end_of_keysym!=0 && *end_of_keysym!=' ')
+ end_of_keysym++;
+ if(*end_of_keysym) {
+ int keysym;
+ *end_of_keysym=0;
+ keysym=get_keysym(line);
+ if(keysym==0) {
+ term_printf("Warning: 1unknown keysym %s\n",line);
+ } else {
+ const char* rest=end_of_keysym+1;
+ int keycode=parse_int(rest);
+ /* if(keycode&0x80)
+ keycode=(keycode<<8)^0x80e0; */
+ if(keysym<MAX_NORMAL_KEYCODE) {
+ //term_printf("Setting keysym %s (%d) to %d\n",line,keysym,keycode);
+ k->keysym2keycode[keysym]=keycode;
+#ifndef KEYBOARD_IGNORE_CASE
+ line[0]=toupper(line[0]);
+ keysym=get_keysym(line);
+ if(keysym)
+ k->keysym2keycode[keysym]=keycode;
+#endif
+ } else {
+ if(k->extra_count>=MAX_EXTRA_COUNT) {
+ term_printf("Warning: Could not assign keysym %s (0x%x) because of memory constraints.\n",line,keysym);
+ } else {
+ //term_printf("Setting %d: %d,%d\n",k->extra_count,keysym,keycode);
+ k->keysym2keycode_extra[k->extra_count].keysym=keysym;
+ k->keysym2keycode_extra[k->extra_count].keycode=keycode;
+ k->extra_count++;
+ }
+ }
+ }
+ }
+ }
+ }
+ fclose(f);
+ return k;
+}
+
+static void* init_keyboard_layout(const char* language)
+{
+ return parse_keyboard_layout(language,0);
+}
+
+static WORD keysym2scancode(void* kbd_layout, int keysym)
+{
+ kbd_layout_t* k=kbd_layout;
+ if(keysym<MAX_NORMAL_KEYCODE) {
+ if(k->keysym2keycode[keysym]==0)
+ term_printf("Warning: no scancode found for keysym %d\n",keysym);
+ return k->keysym2keycode[keysym];
+ } else {
+ int i;
+#ifdef XK_ISO_Left_Tab
+ if(keysym==XK_ISO_Left_Tab)
+ keysym=XK_Tab;
+#endif
+ for(i=0;i<k->extra_count;i++)
+ if(k->keysym2keycode_extra[i].keysym==keysym)
+ return k->keysym2keycode_extra[i].keycode;
+ }
+ return 0;
+}
+
diff --git a/tools/ioemu/keymaps/ar b/tools/ioemu/keymaps/ar
new file mode 100644
index 0000000000..c430c03bb3
--- /dev/null
+++ b/tools/ioemu/keymaps/ar
@@ -0,0 +1,98 @@
+# generated from XKB map ar
+include common
+map 0x401
+exclam 0x02 shift
+at 0x03 shift
+numbersign 0x04 shift
+dollar 0x05 shift
+percent 0x06 shift
+asciicircum 0x07 shift
+ampersand 0x08 shift
+asterisk 0x09 shift
+parenleft 0x0a shift
+parenright 0x0b shift
+minus 0x0c
+underscore 0x0c shift
+equal 0x0d
+plus 0x0d shift
+Arabic_dad 0x10 altgr
+Arabic_fatha 0x10 shift altgr
+Arabic_sad 0x11 altgr
+Arabic_fathatan 0x11 shift altgr
+Arabic_theh 0x12 altgr
+Arabic_damma 0x12 shift altgr
+Arabic_qaf 0x13 altgr
+Arabic_dammatan 0x13 shift altgr
+Arabic_feh 0x14 altgr
+UFEF9 0x14 shift altgr
+Arabic_ghain 0x15 altgr
+Arabic_hamzaunderalef 0x15 shift altgr
+Arabic_ain 0x16 altgr
+grave 0x16 shift altgr
+Arabic_ha 0x17 altgr
+division 0x17 shift altgr
+Arabic_khah 0x18 altgr
+multiply 0x18 shift altgr
+Arabic_hah 0x19 altgr
+Arabic_semicolon 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+Arabic_jeem 0x1a altgr
+bracketright 0x1b
+braceright 0x1b shift
+Arabic_dal 0x1b altgr
+Arabic_sheen 0x1e altgr
+backslash 0x1e shift altgr
+Arabic_seen 0x1f altgr
+Arabic_yeh 0x20 altgr
+bracketleft 0x20 shift altgr
+Arabic_beh 0x21 altgr
+bracketright 0x21 shift altgr
+Arabic_lam 0x22 altgr
+UFEF7 0x22 shift altgr
+Arabic_alef 0x23 altgr
+Arabic_hamzaonalef 0x23 shift altgr
+Arabic_teh 0x24 altgr
+Arabic_tatweel 0x24 shift altgr
+Arabic_noon 0x25 altgr
+Arabic_comma 0x25 shift altgr
+Arabic_meem 0x26 altgr
+slash 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+Arabic_kaf 0x27 altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+Arabic_tah 0x28 altgr
+grave 0x29
+asciitilde 0x29 shift
+Arabic_thal 0x29 altgr
+Arabic_shadda 0x29 shift altgr
+backslash 0x2b
+bar 0x2b shift
+less 0x2b altgr
+greater 0x2b shift altgr
+Arabic_hamzaonyeh 0x2c altgr
+asciitilde 0x2c shift altgr
+Arabic_hamza 0x2d altgr
+Arabic_sukun 0x2d shift altgr
+Arabic_hamzaonwaw 0x2e altgr
+Arabic_kasra 0x2e shift altgr
+Arabic_ra 0x2f altgr
+Arabic_kasratan 0x2f shift altgr
+UFEFB 0x30 altgr
+UFEF5 0x30 shift altgr
+Arabic_alefmaksura 0x31 altgr
+Arabic_maddaonalef 0x31 shift altgr
+Arabic_tehmarbuta 0x32 altgr
+apostrophe 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+Arabic_waw 0x33 altgr
+period 0x34
+greater 0x34 shift
+Arabic_zain 0x34 altgr
+slash 0x35
+question 0x35 shift
+Arabic_zah 0x35 altgr
+Arabic_question_mark 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/common b/tools/ioemu/keymaps/common
new file mode 100644
index 0000000000..0b53f1c254
--- /dev/null
+++ b/tools/ioemu/keymaps/common
@@ -0,0 +1,157 @@
+include modifiers
+
+#
+# Top row
+#
+1 0x2
+2 0x3
+3 0x4
+4 0x5
+5 0x6
+6 0x7
+7 0x8
+8 0x9
+9 0xa
+0 0xb
+BackSpace 0xe
+
+#
+# QWERTY first row
+#
+Tab 0xf localstate
+ISO_Left_Tab 0xf shift
+q 0x10 addupper
+w 0x11 addupper
+e 0x12 addupper
+r 0x13 addupper
+t 0x14 addupper
+y 0x15 addupper
+u 0x16 addupper
+i 0x17 addupper
+o 0x18 addupper
+p 0x19 addupper
+
+#
+# QWERTY second row
+#
+a 0x1e addupper
+s 0x1f addupper
+d 0x20 addupper
+f 0x21 addupper
+g 0x22 addupper
+h 0x23 addupper
+j 0x24 addupper
+k 0x25 addupper
+l 0x26 addupper
+Return 0x1c localstate
+
+#
+# QWERTY third row
+#
+z 0x2c addupper
+x 0x2d addupper
+c 0x2e addupper
+v 0x2f addupper
+b 0x30 addupper
+n 0x31 addupper
+m 0x32 addupper
+
+space 0x39 localstate
+
+less 0x56
+greater 0x56 shift
+bar 0x56 altgr
+brokenbar 0x56 shift altgr
+
+#
+# Esc and Function keys
+#
+Escape 0x1 localstate
+F1 0x3b localstate
+F2 0x3c localstate
+F3 0x3d localstate
+F4 0x3e localstate
+F5 0x3f localstate
+F6 0x40 localstate
+F7 0x41 localstate
+F8 0x42 localstate
+F9 0x43 localstate
+F10 0x44 localstate
+F11 0x57 localstate
+F12 0x58 localstate
+
+# Printscreen, Scrollock and Pause
+# Printscreen really requires four scancodes (0xe0, 0x2a, 0xe0, 0x37),
+# but (0xe0, 0x37) seems to work.
+Print 0xb7 localstate
+Sys_Req 0xb7 localstate
+Execute 0xb7 localstate
+Scroll_Lock 0x46
+
+#
+# Insert - PgDown
+#
+Insert 0xd2 localstate
+Delete 0xd3 localstate
+Home 0xc7 localstate
+End 0xcf localstate
+Page_Up 0xc9 localstate
+Page_Down 0xd1 localstate
+
+#
+# Arrow keys
+#
+Left 0xcb localstate
+Up 0xc8 localstate
+Down 0xd0 localstate
+Right 0xcd localstate
+
+#
+# Numpad
+#
+Num_Lock 0x45
+KP_Divide 0xb5
+KP_Multiply 0x37
+KP_Subtract 0x4a
+KP_Add 0x4e
+KP_Enter 0x9c
+
+KP_Decimal 0x53 numlock
+KP_Separator 0x53 numlock
+KP_Delete 0x53
+
+KP_0 0x52 numlock
+KP_Insert 0x52
+
+KP_1 0x4f numlock
+KP_End 0x4f
+
+KP_2 0x50 numlock
+KP_Down 0x50
+
+KP_3 0x51 numlock
+KP_Next 0x51
+
+KP_4 0x4b numlock
+KP_Left 0x4b
+
+KP_5 0x4c numlock
+KP_Begin 0x4c
+
+KP_6 0x4d numlock
+KP_Right 0x4d
+
+KP_7 0x47 numlock
+KP_Home 0x47
+
+KP_8 0x48 numlock
+KP_Up 0x48
+
+KP_9 0x49 numlock
+KP_Prior 0x49
+
+Caps_Lock 0x3a
+#
+# Inhibited keys
+#
+Multi_key 0x0 inhibit
diff --git a/tools/ioemu/keymaps/convert-map b/tools/ioemu/keymaps/convert-map
new file mode 100644
index 0000000000..889b70338d
--- /dev/null
+++ b/tools/ioemu/keymaps/convert-map
@@ -0,0 +1,63 @@
+#!/usr/bin/env python2
+# -*-Python-*-
+#
+#
+# Copyright (C) 2001 Peter Åstrand <peter@cendio.se>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+import sys
+
+def main():
+ f = open(sys.argv[1])
+ while 1:
+ line = f.readline()
+ if not line: break
+
+ if line.startswith("#") or line.startswith("include"):
+ print line,
+ continue
+
+ fields = line.split()
+
+ if line.startswith("map"):
+ print "map 0x%s" % fields[1]
+ continue
+
+ scancode = fields[0]
+ for pos in range(1, len(fields)):
+ keysym = fields[pos]
+
+ if pos == 1:
+ modifiers = ""
+ elif pos == 2:
+ modifiers = "shift"
+ elif pos == 3:
+ modifiers = "altgr"
+ elif pos == 4:
+ modifiers = "shift altgr"
+ else:
+ raise("Invalid line: %s" % line)
+
+ print "%s 0x%s %s" % (keysym, scancode, modifiers)
+
+
+
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ print "Convert old-style keymaps to new style"
+ print "Usage: %s <old-style-keymap>" % sys.argv[0]
+ sys.exit(1)
+ else:
+ main()
diff --git a/tools/ioemu/keymaps/da b/tools/ioemu/keymaps/da
new file mode 100644
index 0000000000..3884dcf145
--- /dev/null
+++ b/tools/ioemu/keymaps/da
@@ -0,0 +1,120 @@
+# generated from XKB map dk
+include common
+map 0x406
+exclam 0x02 shift
+exclamdown 0x02 altgr
+onesuperior 0x02 shift altgr
+quotedbl 0x03 shift
+at 0x03 altgr
+twosuperior 0x03 shift altgr
+numbersign 0x04 shift
+sterling 0x04 altgr
+threesuperior 0x04 shift altgr
+currency 0x05 shift
+dollar 0x05 altgr
+onequarter 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+cent 0x06 shift altgr
+ampersand 0x07 shift
+yen 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+division 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+guillemotleft 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+guillemotright 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+plus 0x0c
+question 0x0c shift
+plusminus 0x0c altgr
+questiondown 0x0c shift altgr
+dead_acute 0x0d
+dead_grave 0x0d shift
+bar 0x0d altgr
+brokenbar 0x0d shift altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+registered 0x13 altgr
+thorn 0x14 altgr
+THORN 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oe 0x18 altgr
+OE 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+aring 0x1a
+Aring 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+dead_diaeresis 0x1b
+dead_circumflex 0x1b shift
+dead_tilde 0x1b altgr
+dead_caron 0x1b shift altgr
+ordfeminine 0x1e altgr
+masculine 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+ae 0x27
+AE 0x27 shift
+oslash 0x28
+Ooblique 0x28 shift
+dead_caron 0x28 shift altgr
+onehalf 0x29
+section 0x29 shift
+threequarters 0x29 altgr
+paragraph 0x29 shift altgr
+apostrophe 0x2b
+asterisk 0x2b shift
+dead_doubleacute 0x2b altgr
+multiply 0x2b shift altgr
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+copyright 0x2e altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+dead_cedilla 0x33 altgr
+dead_ogonek 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+dead_abovedot 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+hyphen 0x35 altgr
+macron 0x35 shift altgr
+nobreakspace 0x39 altgr
+less 0x56
+greater 0x56 shift
+backslash 0x56 altgr
+notsign 0x56 shift altgr
diff --git a/tools/ioemu/keymaps/de b/tools/ioemu/keymaps/de
new file mode 100644
index 0000000000..ed929c743b
--- /dev/null
+++ b/tools/ioemu/keymaps/de
@@ -0,0 +1,114 @@
+# generated from XKB map de
+include common
+map 0x407
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+section 0x04 shift
+threesuperior 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+currency 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+ssharp 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+acute 0x0d
+dead_acute 0x0d
+grave 0x0d shift
+dead_grave 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+z 0x15 addupper
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+udiaeresis 0x1a
+Udiaeresis 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+plus 0x1b
+asterisk 0x1b shift
+asciitilde 0x1b altgr
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+odiaeresis 0x27
+Odiaeresis 0x27 shift
+dead_doubleacute 0x27 altgr
+adiaeresis 0x28
+Adiaeresis 0x28 shift
+dead_caron 0x28 shift altgr
+asciicircum 0x29
+dead_circumflex 0x29
+degree 0x29 shift
+notsign 0x29 altgr
+numbersign 0x2b
+apostrophe 0x2b shift
+dead_breve 0x2b shift altgr
+y 0x2c addupper
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/de-ch b/tools/ioemu/keymaps/de-ch
new file mode 100644
index 0000000000..f83837b444
--- /dev/null
+++ b/tools/ioemu/keymaps/de-ch
@@ -0,0 +1,169 @@
+# rdesktop Swiss-German (de-ch) keymap file
+# 2003-06-03 by noldi@tristar.ch
+#
+include common
+map 0x00000807
+#
+# Scan Code 1
+section 0x29
+degree 0x29 shift
+notsign 0x29 altgr inhibit
+#
+# Scan Code 2
+plus 0x2 shift
+brokenbar 0x02 altgr
+#
+# Scan Code 3
+quotedbl 0x03 shift
+at 0x03 altgr
+#
+# Scan Code 4
+asterisk 0x04 shift
+numbersign 0x04 altgr
+#
+# Scan Code 5
+ccedilla 0x05 shift
+onequarter 0x05 altgr inhibit
+#
+# Scan Code 6
+percent 0x06 shift
+onehalf 0x06 altgr inhibit
+#
+# Scan Code 7
+ampersand 0x07 shift
+notsign 0x07 altgr
+#
+# Scan Code 8
+slash 0x08 shift
+bar 0x08 altgr
+#
+# Scan Code 9
+parenleft 0x09 shift
+cent 0x09 altgr
+#
+# Scan Code 10
+parenright 0x0a shift
+#
+# Scan Code 11
+equal 0x0b shift
+braceright 0x0b altgr inhibit
+#
+# Scan Code 12
+apostrophe 0x0c
+question 0x0c shift
+dead_acute 0x0c altgr
+#
+# Scan Code 13
+dead_circumflex 0x0d
+dead_grave 0x0d shift
+dead_tilde 0x0d altgr
+#
+# Scan Code 19
+EuroSign 0x12 altgr
+#
+# Scan Code 22
+z 0x15 addupper
+#
+# Scan Code 27
+udiaeresis 0x1a
+egrave 0x1a shift
+bracketleft 0x1a altgr
+#
+# Scan Code 28
+dead_diaeresis 0x1b
+exclam 0x1b shift
+bracketright 0x1b altgr
+#
+# Scan Code 40
+odiaeresis 0x27
+eacute 0x27 shift
+#
+# Scan Code 41
+adiaeresis 0x28
+agrave 0x28 shift
+braceleft 0x28 altgr
+#
+# Scan Code 42 (only on international keyboards)
+dollar 0x2b
+sterling 0x2b shift
+braceright 0x2b altgr
+#
+# Scan Code 45 (only on international keyboards)
+backslash 0x56 altgr
+#
+# Scan Code 46
+y 0x2c addupper
+#
+# Scan Code 53
+comma 0x33
+semicolon 0x33 shift
+#
+# Scan Code 54
+period 0x34
+colon 0x34 shift
+#
+# Scan Code 55
+minus 0x35
+underscore 0x35 shift
+#
+# Suppress Windows unsupported AltGr keys
+#
+# Scan Code 17
+paragraph 0x10 altgr inhibit
+#
+# Scan Code 21
+tslash 0x14 altgr inhibit
+#
+# Scan Code 22
+leftarrow 0x15 altgr inhibit
+#
+# Scan Code 23
+downarrow 0x16 altgr inhibit
+#
+# Scan Code 24
+rightarrow 0x17 altgr inhibit
+#
+# Scan Code 25
+oslash 0x18 altgr inhibit
+#
+# Scan Code 26
+thorn 0x19 altgr inhibit
+#
+# Scan Code 31
+ae 0x1e altgr inhibit
+#
+# Scan Code 32
+ssharp 0x1f altgr inhibit
+#
+# Scan Code 33
+eth 0x20 altgr inhibit
+#
+# Scan Code 34
+dstroke 0x21 altgr inhibit
+#
+# Scan Code 35
+eng 0x22 altgr inhibit
+#
+# Scan Code 36
+hstroke 0x23 altgr inhibit
+#
+# Scan Code 38
+kra 0x25 altgr inhibit
+#
+# Scan Code 39
+lstroke 0x26 altgr inhibit
+#
+# Scan Code 46
+guillemotleft 0x2c altgr inhibit
+#
+# Scan Code 47
+guillemotright 0x2d altgr inhibit
+#
+# Scan Code 49
+leftdoublequotemark 0x2f altgr inhibit
+#
+# Scan Code 50
+rightdoublequotemark 0x30 altgr inhibit
+#
+# Scan Code 52
+mu 0x32 altgr inhibit
diff --git a/tools/ioemu/keymaps/en-gb b/tools/ioemu/keymaps/en-gb
new file mode 100644
index 0000000000..b45f06c7ce
--- /dev/null
+++ b/tools/ioemu/keymaps/en-gb
@@ -0,0 +1,119 @@
+# generated from XKB map gb
+include common
+map 0x809
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+sterling 0x04 shift
+threesuperior 0x04 altgr
+dollar 0x05 shift
+EuroSign 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+asciicircum 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+ampersand 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+asterisk 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenleft 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+parenright 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+minus 0x0c
+underscore 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+equal 0x0d
+plus 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+apostrophe 0x28
+at 0x28 shift
+dead_circumflex 0x28 altgr
+dead_caron 0x28 shift altgr
+grave 0x29
+notsign 0x29 shift
+bar 0x29 altgr
+numbersign 0x2b
+asciitilde 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+guillemotleft 0x2c altgr
+less 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
+backslash 0x56
+bar 0x56 shift
diff --git a/tools/ioemu/keymaps/en-us b/tools/ioemu/keymaps/en-us
new file mode 100644
index 0000000000..f5784bbb39
--- /dev/null
+++ b/tools/ioemu/keymaps/en-us
@@ -0,0 +1,35 @@
+# generated from XKB map us
+include common
+map 0x409
+exclam 0x02 shift
+at 0x03 shift
+numbersign 0x04 shift
+dollar 0x05 shift
+percent 0x06 shift
+asciicircum 0x07 shift
+ampersand 0x08 shift
+asterisk 0x09 shift
+parenleft 0x0a shift
+parenright 0x0b shift
+minus 0x0c
+underscore 0x0c shift
+equal 0x0d
+plus 0x0d shift
+bracketleft 0x1a
+braceleft 0x1a shift
+bracketright 0x1b
+braceright 0x1b shift
+semicolon 0x27
+colon 0x27 shift
+apostrophe 0x28
+quotedbl 0x28 shift
+grave 0x29
+asciitilde 0x29 shift
+backslash 0x2b
+bar 0x2b shift
+comma 0x33
+less 0x33 shift
+period 0x34
+greater 0x34 shift
+slash 0x35
+question 0x35 shift
diff --git a/tools/ioemu/keymaps/es b/tools/ioemu/keymaps/es
new file mode 100644
index 0000000000..0c29eec5ad
--- /dev/null
+++ b/tools/ioemu/keymaps/es
@@ -0,0 +1,105 @@
+# generated from XKB map es
+include common
+map 0x40a
+exclam 0x02 shift
+bar 0x02 altgr
+quotedbl 0x03 shift
+at 0x03 altgr
+oneeighth 0x03 shift altgr
+periodcentered 0x04 shift
+numbersign 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+asciitilde 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+notsign 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+trademark 0x09 shift altgr
+parenright 0x0a shift
+plusminus 0x0a shift altgr
+equal 0x0b shift
+degree 0x0b shift altgr
+apostrophe 0x0c
+question 0x0c shift
+exclamdown 0x0d
+questiondown 0x0d shift
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+dead_grave 0x1a
+dead_circumflex 0x1a shift
+bracketleft 0x1a altgr
+dead_abovering 0x1a shift altgr
+plus 0x1b
+asterisk 0x1b shift
+bracketright 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+ntilde 0x27
+Ntilde 0x27 shift
+dead_doubleacute 0x27 shift altgr
+dead_acute 0x28
+dead_diaeresis 0x28 shift
+braceleft 0x28 altgr
+masculine 0x29
+ordfeminine 0x29 shift
+backslash 0x29 altgr
+ccedilla 0x2b
+Ccedilla 0x2b shift
+braceright 0x2b altgr
+dead_breve 0x2b shift altgr
+guillemotleft 0x2c altgr
+less 0x56
+greater 0x56 shift
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/et b/tools/ioemu/keymaps/et
new file mode 100644
index 0000000000..b5a73fef70
--- /dev/null
+++ b/tools/ioemu/keymaps/et
@@ -0,0 +1,86 @@
+map 0x00000425
+include common
+
+#
+# Top row
+#
+dead_caron 0x29
+dead_tilde 0x29 shift
+
+# 1
+exclam 0x2 shift
+
+# 2
+quotedbl 0x3 shift
+at 0x3 altgr
+
+# 3
+numbersign 0x4 shift
+sterling 0x4 altgr
+# 4
+currency 0x5 shift
+dollar 0x5 altgr
+# 5
+percent 0x6 shift
+# 6
+ampersand 0x7 shift
+# 7
+slash 0x8 shift
+braceleft 0x8 altgr
+# 8
+parenleft 0x9 shift
+bracketleft 0x9 altgr
+# 9
+parenright 0xa shift
+bracketright 0xa altgr
+# 0
+equal 0xb shift
+braceright 0xb altgr
+
+plus 0xc
+question 0xc shift
+backslash 0xc altgr
+
+acute 0xd
+dead_acute 0xd
+grave 0xd shift
+dead_grave 0xd shift
+
+#
+# QWERTY first row
+#
+EuroSign 0x12 altgr
+udiaeresis 0x1a
+Udiaeresis 0x1a shift
+otilde 0x1b
+Otilde 0x1b shift
+section 0x1b altgr
+
+#
+# QWERTY second row
+#
+scaron 0x1f altgr
+Scaron 0x1f altgr shift
+odiaeresis 0x27
+Odiaeresis 0x27 shift
+adiaeresis 0x28
+Adiaeresis 0x28 shift
+asciicircum 0x28 altgr
+apostrophe 0x2b
+asterisk 0x2b shift
+onehalf 0x2b altgr
+#
+# QWERTY third row
+#
+less 0x56
+greater 0x56 shift
+bar 0x56 altgr
+zcaron 0x2c altgr
+Zcaron 0x2c altgr shift
+comma 0x33
+semicolon 0x33 shift
+period 0x34
+colon 0x34 shift
+minus 0x35
+underscore 0x35 shift
+
diff --git a/tools/ioemu/keymaps/fi b/tools/ioemu/keymaps/fi
new file mode 100644
index 0000000000..2a4e0f0454
--- /dev/null
+++ b/tools/ioemu/keymaps/fi
@@ -0,0 +1,124 @@
+# generated from XKB map se_FI
+include common
+map 0x40b
+exclam 0x02 shift
+exclamdown 0x02 altgr
+onesuperior 0x02 shift altgr
+quotedbl 0x03 shift
+at 0x03 altgr
+twosuperior 0x03 shift altgr
+numbersign 0x04 shift
+sterling 0x04 altgr
+threesuperior 0x04 shift altgr
+currency 0x05 shift
+dollar 0x05 altgr
+onequarter 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+cent 0x06 shift altgr
+ampersand 0x07 shift
+yen 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+division 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+guillemotleft 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+guillemotright 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+plus 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+dead_acute 0x0d
+dead_grave 0x0d shift
+plusminus 0x0d altgr
+notsign 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+registered 0x13 altgr
+thorn 0x14 altgr
+THORN 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oe 0x18 altgr
+OE 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+aring 0x1a
+Aring 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+dead_diaeresis 0x1b
+dead_circumflex 0x1b shift
+dead_tilde 0x1b altgr
+dead_caron 0x1b shift altgr
+ordfeminine 0x1e altgr
+masculine 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+ampersand 0x25 shift altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+odiaeresis 0x27
+Odiaeresis 0x27 shift
+oslash 0x27 altgr
+Ooblique 0x27 shift altgr
+adiaeresis 0x28
+Adiaeresis 0x28 shift
+ae 0x28 altgr
+AE 0x28 shift altgr
+section 0x29
+onehalf 0x29 shift
+paragraph 0x29 altgr
+threequarters 0x29 shift altgr
+apostrophe 0x2b
+asterisk 0x2b shift
+acute 0x2b altgr
+multiply 0x2b shift altgr
+guillemotleft 0x2c altgr
+less 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+copyright 0x2e altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+apostrophe 0x30 shift altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+dead_cedilla 0x33 altgr
+dead_ogonek 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+dead_abovedot 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+hyphen 0x35 altgr
+macron 0x35 shift altgr
+nobreakspace 0x39 altgr
diff --git a/tools/ioemu/keymaps/fo b/tools/ioemu/keymaps/fo
new file mode 100644
index 0000000000..83add423c6
--- /dev/null
+++ b/tools/ioemu/keymaps/fo
@@ -0,0 +1,77 @@
+map 0x438
+include common
+
+#
+# Top row
+#
+onehalf 0x29
+section 0x29 shift
+
+# 1
+exclam 0x2 shift
+
+# 2
+quotedbl 0x3 shift
+at 0x3 altgr
+
+# 3
+numbersign 0x4 shift
+sterling 0x4 altgr
+# 4
+currency 0x5 shift
+dollar 0x5 altgr
+# 5
+percent 0x6 shift
+# 6
+ampersand 0x7 shift
+# 7
+slash 0x8 shift
+braceleft 0x8 altgr
+# 8
+parenleft 0x9 shift
+bracketleft 0x9 altgr
+# 9
+parenright 0xa shift
+bracketright 0xa altgr
+# 0
+equal 0xb shift
+braceright 0xb altgr
+
+plus 0xc
+question 0xc shift
+plusminus 0xc altgr
+
+bar 0xd altgr
+dead_acute 0xd
+
+#
+# QWERTY first row
+#
+EuroSign 0x12 altgr
+aring 0x1a
+Aring 0x1a shift
+eth 0x1b addupper
+asciitilde 0x1b altgr
+
+#
+# QWERTY second row
+#
+ae 0x27 addupper
+oslash 0x28
+Ooblique 0x28 shift
+apostrophe 0x2b
+asterisk 0x2b shift
+
+#
+# QWERTY third row
+#
+less 0x56
+greater 0x56 shift
+backslash 0x56 altgr
+comma 0x33
+semicolon 0x33 shift
+period 0x34
+colon 0x34 shift
+minus 0x35
+underscore 0x35 shift
+
diff --git a/tools/ioemu/keymaps/fr b/tools/ioemu/keymaps/fr
new file mode 100644
index 0000000000..cbb45910f4
--- /dev/null
+++ b/tools/ioemu/keymaps/fr
@@ -0,0 +1,181 @@
+include common
+map 0x40c
+#
+# Top row
+#
+twosuperior 0x29
+notsign 0x29 altgr
+
+ampersand 0x02
+1 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+
+eacute 0x03
+2 0x03 shift
+asciitilde 0x03 altgr
+oneeighth 0x03 shift altgr
+
+quotedbl 0x04
+3 0x04 shift
+numbersign 0x04 altgr
+
+apostrophe 0x05
+4 0x05 shift
+braceleft 0x05 altgr
+
+parenleft 0x06
+5 0x06 shift
+bracketleft 0x06 altgr
+threeeighths 0x06 shift altgr
+
+minus 0x07
+6 0x07 shift
+bar 0x07 altgr
+fiveeighths 0x07 shift altgr
+
+egrave 0x08
+7 0x08 shift
+grave 0x08 altgr
+seveneighths 0x08 shift altgr
+
+underscore 0x09
+8 0x09 shift
+backslash 0x09 altgr
+trademark 0x09 shift altgr
+
+ccedilla 0x0a
+9 0x0a shift
+asciicircum 0x0a altgr
+plusminus 0x0a shift altgr
+
+agrave 0x0b
+0 0x0b shift
+at 0x0b altgr
+
+parenright 0x0c
+degree 0x0c shift
+bracketright 0x0c altgr
+questiondown 0x0c shift altgr
+
+equal 0x0d
+plus 0x0d shift
+braceright 0x0d altgr
+dead_ogonek 0x0d shift altgr
+
+#
+# AZERTY first row
+#
+
+a 0x10 addupper
+ae 0x10 altgr
+AE 0x10 shift altgr
+
+z 0x11 addupper
+guillemotleft 0x11 altgr
+
+EuroSign 0x12 altgr
+
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+
+dead_circumflex 0x1a
+dead_diaeresis 0x1a shift
+dead_abovering 0x1a shift altgr
+
+dollar 0x1b
+sterling 0x1b shift
+currency 0x1b altgr
+dead_macron 0x1b shift altgr
+
+#
+# AZERTY second row
+#
+q 0x1e addupper
+Greek_OMEGA 0x1e shift altgr
+
+ssharp 0x1f altgr
+
+eth 0x20 altgr
+ETH 0x20 shift altgr
+
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+
+eng 0x22 altgr
+ENG 0x22 shift altgr
+
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+
+kra 0x25 altgr
+
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+
+m 0x27 addupper
+masculine 0x27 shift altgr
+
+ugrave 0x28
+percent 0x28 shift
+dead_caron 0x28 shift altgr
+
+asterisk 0x2b
+mu 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+
+#
+# AZERTY third row
+#
+less 0x56
+greater 0x56 shift
+
+w 0x2c addupper
+
+guillemotright 0x2d altgr
+
+cent 0x2e altgr
+copyright 0x2e shift altgr
+
+leftdoublequotemark 0x2f altgr
+
+rightdoublequotemark 0x30 altgr
+
+comma 0x32
+question 0x32 shift
+dead_acute 0x32 altgr
+dead_doubleacute 0x32 shift altgr
+
+semicolon 0x33
+period 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+
+colon 0x34
+slash 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+
+exclam 0x35
+section 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/fr-be b/tools/ioemu/keymaps/fr-be
new file mode 100644
index 0000000000..92d668eb61
--- /dev/null
+++ b/tools/ioemu/keymaps/fr-be
@@ -0,0 +1,140 @@
+# generated from XKB map be
+include common
+map 0x80c
+ampersand 0x02
+1 0x02 shift
+bar 0x02 altgr
+exclamdown 0x02 shift altgr
+eacute 0x03
+2 0x03 shift
+at 0x03 altgr
+oneeighth 0x03 shift altgr
+quotedbl 0x04
+3 0x04 shift
+numbersign 0x04 altgr
+sterling 0x04 shift altgr
+apostrophe 0x05
+4 0x05 shift
+onequarter 0x05 altgr
+dollar 0x05 shift altgr
+parenleft 0x06
+5 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+section 0x07
+6 0x07 shift
+asciicircum 0x07 altgr
+fiveeighths 0x07 shift altgr
+egrave 0x08
+7 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+exclam 0x09
+8 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+ccedilla 0x0a
+9 0x0a shift
+braceleft 0x0a altgr
+plusminus 0x0a shift altgr
+agrave 0x0b
+0 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+parenright 0x0c
+degree 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+minus 0x0d
+underscore 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+a 0x10 addupper
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+z 0x11 addupper
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+dead_circumflex 0x1a
+dead_diaeresis 0x1a shift
+bracketleft 0x1a altgr
+dead_abovering 0x1a shift altgr
+dollar 0x1b
+asterisk 0x1b shift
+bracketright 0x1b altgr
+dead_macron 0x1b shift altgr
+q 0x1e addupper
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+ampersand 0x25 shift altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+m 0x27 addupper
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+ugrave 0x28
+percent 0x28 shift
+dead_acute 0x28 altgr
+dead_caron 0x28 shift altgr
+twosuperior 0x29
+threesuperior 0x29 shift
+notsign 0x29 altgr
+mu 0x2b
+sterling 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+w 0x2c addupper
+guillemotleft 0x2c altgr
+less 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+apostrophe 0x30 shift altgr
+comma 0x32
+question 0x32 shift
+dead_cedilla 0x32 altgr
+masculine 0x32 shift altgr
+semicolon 0x33
+period 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+colon 0x34
+slash 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+equal 0x35
+plus 0x35 shift
+dead_tilde 0x35 altgr
+dead_abovedot 0x35 shift altgr
+backslash 0x56 altgr
diff --git a/tools/ioemu/keymaps/fr-ca b/tools/ioemu/keymaps/fr-ca
new file mode 100644
index 0000000000..b645208e42
--- /dev/null
+++ b/tools/ioemu/keymaps/fr-ca
@@ -0,0 +1,50 @@
+# Canadian French
+# By Simon Germain
+include common
+map 0xc0c
+
+backslash 0x29 altgr
+plusminus 0x2 altgr
+at 0x3 altgr
+sterling 0x4 altgr
+cent 0x5 altgr
+currency 0x6 altgr
+notsign 0x7 altgr
+bar 0x29 shift
+twosuperior 0x9 altgr
+threesuperior 0xa altgr
+onequarter 0xb altgr
+onehalf 0xc altgr
+threequarters 0xd altgr
+section 0x18 altgr
+paragraph 0x19 altgr
+bracketleft 0x1a altgr
+bracketright 0x1b altgr
+asciitilde 0x27 altgr
+braceleft 0x28 altgr
+braceright 0x2b altgr
+less 0x2b
+greater 0x2b shift
+guillemotleft 0x56
+guillemotright 0x56 shift
+degree 0x56 altgr
+mu 0x32 altgr
+eacute 0x35
+dead_acute 0x35 altgr
+dead_grave 0x28
+dead_circumflex 0x1a
+dead_circumflex 0x1a shift
+dead_cedilla 0x1b
+dead_diaeresis 0x1b shift
+exclam 0x2 shift
+quotedbl 0x3 shift
+slash 0x4 shift
+dollar 0x5 shift
+percent 0x6 shift
+question 0x7 shift
+ampersand 0x8 shift
+asterisk 0x9 shift
+parenleft 0xa shift
+parenright 0xb shift
+underscore 0xc shift
+plus 0xd shift
diff --git a/tools/ioemu/keymaps/fr-ch b/tools/ioemu/keymaps/fr-ch
new file mode 100644
index 0000000000..4620d2033b
--- /dev/null
+++ b/tools/ioemu/keymaps/fr-ch
@@ -0,0 +1,114 @@
+# generated from XKB map fr_CH
+include common
+map 0x100c
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+section 0x04 shift
+threesuperior 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+currency 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+ssharp 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+acute 0x0d
+dead_acute 0x0d
+grave 0x0d shift
+dead_grave 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+z 0x15 addupper
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+udiaeresis 0x1a
+Udiaeresis 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+plus 0x1b
+asterisk 0x1b shift
+asciitilde 0x1b altgr
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+odiaeresis 0x27
+Odiaeresis 0x27 shift
+dead_doubleacute 0x27 altgr
+adiaeresis 0x28
+Adiaeresis 0x28 shift
+dead_caron 0x28 shift altgr
+asciicircum 0x29
+dead_circumflex 0x29
+degree 0x29 shift
+notsign 0x29 altgr
+numbersign 0x2b
+apostrophe 0x2b shift
+dead_breve 0x2b shift altgr
+y 0x2c addupper
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/hr b/tools/ioemu/keymaps/hr
new file mode 100644
index 0000000000..613aa6925d
--- /dev/null
+++ b/tools/ioemu/keymaps/hr
@@ -0,0 +1,125 @@
+# generated from XKB map hr
+include common
+map 0x41a
+exclam 0x02 shift
+asciitilde 0x02 altgr
+dead_tilde 0x02 shift altgr
+quotedbl 0x03 shift
+dead_caron 0x03 altgr
+caron 0x03 shift altgr
+numbersign 0x04 shift
+asciicircum 0x04 altgr
+dead_circumflex 0x04 shift altgr
+dollar 0x05 shift
+dead_breve 0x05 altgr
+breve 0x05 shift altgr
+percent 0x06 shift
+degree 0x06 altgr
+dead_abovering 0x06 shift altgr
+ampersand 0x07 shift
+dead_ogonek 0x07 altgr
+ogonek 0x07 shift altgr
+slash 0x08 shift
+grave 0x08 altgr
+dead_grave 0x08 shift altgr
+parenleft 0x09 shift
+dead_abovedot 0x09 altgr
+abovedot 0x09 shift altgr
+parenright 0x0a shift
+dead_acute 0x0a altgr
+apostrophe 0x0a shift altgr
+equal 0x0b shift
+dead_doubleacute 0x0b altgr
+doubleacute 0x0b shift altgr
+apostrophe 0x0c
+question 0x0c shift
+dead_diaeresis 0x0c altgr
+diaeresis 0x0c shift altgr
+plus 0x0d
+asterisk 0x0d shift
+dead_cedilla 0x0d altgr
+cedilla 0x0d shift altgr
+backslash 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+bar 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+z 0x15 addupper
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+scaron 0x1a
+Scaron 0x1a shift
+division 0x1a altgr
+dead_abovering 0x1a shift altgr
+dstroke 0x1b
+Dstroke 0x1b shift
+multiply 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+bracketleft 0x21 altgr
+ordfeminine 0x21 shift altgr
+bracketright 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+lstroke 0x25 altgr
+ampersand 0x25 shift altgr
+Lstroke 0x26 altgr
+ccaron 0x27
+Ccaron 0x27 shift
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+cacute 0x28
+Cacute 0x28 shift
+ssharp 0x28 altgr
+dead_caron 0x28 shift altgr
+dead_cedilla 0x29
+dead_diaeresis 0x29 shift
+notsign 0x29 altgr
+zcaron 0x2b
+Zcaron 0x2b shift
+currency 0x2b altgr
+dead_breve 0x2b shift altgr
+y 0x2c addupper
+guillemotleft 0x2c altgr
+less 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+at 0x2f altgr
+grave 0x2f shift altgr
+braceleft 0x30 altgr
+apostrophe 0x30 shift altgr
+braceright 0x31 altgr
+section 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/hu b/tools/ioemu/keymaps/hu
new file mode 100644
index 0000000000..8aba444417
--- /dev/null
+++ b/tools/ioemu/keymaps/hu
@@ -0,0 +1,115 @@
+# Hungarian keyboard layout (QWERTZ)
+# Created by: The NeverGone <never@delfin.klte.hu>
+
+include common
+map 0x40e
+
+
+# AltGr keys:
+notsign 0x29 altgr
+asciitilde 0x02 altgr
+caron 0x03 altgr
+asciicircum 0x04 altgr
+breve 0x05 altgr
+degree 0x06 altgr
+ogonek 0x07 altgr
+grave 0x08 altgr
+abovedot 0x09 altgr
+acute 0x0a altgr
+doubleacute 0x0b altgr
+diaeresis 0x0c altgr
+cedilla 0x0d altgr
+backslash 0x10 altgr
+bar 0x11 altgr
+EuroSign 0x12 altgr
+Iacute 0x17 altgr
+division 0x1a altgr
+multiply 0x1b altgr
+dstroke 0x1f altgr
+Dstroke 0x20 altgr
+bracketleft 0x21 altgr
+bracketright 0x22 altgr
+iacute 0x24 altgr
+lstroke 0x25 altgr
+Lstroke 0x26 altgr
+dollar 0x27 altgr
+ssharp 0x28 altgr
+currency 0x2b altgr
+less 0x56 altgr
+greater 0x2c altgr
+numbersign 0x2d altgr
+ampersand 0x2e altgr
+at 0x2f altgr
+braceleft 0x30 altgr
+braceright 0x31 altgr
+semicolon 0x33 altgr
+asterisk 0x35 altgr
+
+
+# Shift keys:
+section 0x29 shift
+apostrophe 0x02 shift
+quotedbl 0x03 shift
+plus 0x04 shift
+exclam 0x05 shift
+percent 0x06 shift
+slash 0x07 shift
+equal 0x08 shift
+parenleft 0x09 shift
+parenright 0x0a shift
+Odiaeresis 0x0b shift
+Udiaeresis 0x0c shift
+Oacute 0x0d shift
+Z 0x15 shift
+Odoubleacute 0x1a shift
+Uacute 0x1b shift
+Eacute 0x27 shift
+Aacute 0x28 shift
+Udoubleacute 0x2b shift
+Y 0x2c shift
+question 0x33 shift
+colon 0x34 shift
+underscore 0x35 shift
+F13 0x3b shift
+F14 0x3c shift
+F15 0x3d shift
+F16 0x3e shift
+F17 0x3f shift
+F18 0x40 shift
+F19 0x41 shift
+F20 0x42 shift
+F21 0x43 shift
+F22 0x44 shift
+F23 0x57 shift
+F24 0x58 shift
+
+
+# Ctrl keys:
+F25 0x3b ctrl
+F26 0x3c ctrl
+F27 0x3d ctrl
+F28 0x3e ctrl
+F29 0x3f ctrl
+F30 0x40 ctrl
+F31 0x41 ctrl
+F32 0x42 ctrl
+F33 0x43 ctrl
+F34 0x44 ctrl
+F35 0x57 ctrl
+#NoSymbol 0x58 ctrl
+
+
+0 0x29
+odiaeresis 0x0b
+udiaeresis 0x0c
+oacute 0x0d
+z 0x15
+odoubleacute 0x1a
+uacute 0x1b
+eacute 0x27
+aacute 0x28
+udoubleacute 0x2b
+y 0x2c
+comma 0x33
+period 0x34
+minus 0x35
diff --git a/tools/ioemu/keymaps/is b/tools/ioemu/keymaps/is
new file mode 100644
index 0000000000..8fde40f19a
--- /dev/null
+++ b/tools/ioemu/keymaps/is
@@ -0,0 +1,140 @@
+# 2004-03-16 Halldór Guðmundsson and Morten Lange
+# Keyboard definition file for the Icelandic keyboard
+# to be used in rdesktop 1.3.x ( See rdesktop.org)
+# generated from XKB map de, and changed manually
+# Location for example /usr/local/share/rdesktop/keymaps/is
+include common
+map 0x40f
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+#section 0x04 shift
+numbersign 0x04 shift
+threesuperior 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+currency 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+#ssharp 0x0c
+odiaeresis 0x0c
+#question 0x0c shift
+Odiaeresis 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+#acute 0x0d
+minus 0x0d
+#dead_acute 0x0d
+#grave 0x0d shift
+#dead_grave 0x0d shift
+underscore 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+#z 0x15 addupper
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+#thorn 0x19 altgr
+#THORN 0x19 shift altgr
+#udiaeresis 0x1a
+#Udiaeresis 0x1a shift
+#dead_diaeresis 0x1a altgr
+#dead_abovering 0x1a shift altgr
+eth 0x1a
+ETH 0x1a shift
+apostrophe 0x1b
+question 0x1b shift
+#plus 0x1b
+#asterisk 0x1b shift
+asciitilde 0x1b altgr
+#grave 0x1b altgr
+#dead_tilde 0x1b altgr
+#dead_macron 0x1b shift altgr
+#ae 0x1e altgr
+#AE 0x1e shift altgr
+#eth 0x20 altgr
+#eth 0x20
+#ETH 0x20 shift altgr
+#ETH 0x20 shift
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+#adiaeresis 0x27
+#Adiaeresis 0x27 shift
+ae 0x27
+AE 0x27 shift
+dead_doubleacute 0x27 altgr
+#adiaeresis 0x28
+#Adiaeresis 0x28 shift
+#dead_caron 0x28 shift altgr
+#asciicircum 0x29
+acute 0x28
+dead_acute 0x28
+#dead_circumflex 0x29
+#degree 0x29 shift
+#notsign 0x29 altgr
+plus 0x2b
+asterisk 0x2b shift
+grave 0x2b altgr
+#numbersign 0x2b
+#apostrophe 0x2b shift
+#dead_breve 0x2b shift altgr
+#y 0x2c addupper
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+#minus 0x35
+#underscore 0x35 shift
+thorn 0x35
+THORN 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
+
diff --git a/tools/ioemu/keymaps/it b/tools/ioemu/keymaps/it
new file mode 100644
index 0000000000..00ca73a3e2
--- /dev/null
+++ b/tools/ioemu/keymaps/it
@@ -0,0 +1,115 @@
+# generated from XKB map it
+include common
+map 0x410
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+sterling 0x04 shift
+threesuperior 0x04 altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+trademark 0x09 shift altgr
+parenright 0x0a shift
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+apostrophe 0x0c
+question 0x0c shift
+grave 0x0c altgr
+questiondown 0x0c shift altgr
+igrave 0x0d
+asciicircum 0x0d shift
+asciitilde 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+egrave 0x1a
+eacute 0x1a shift
+bracketleft 0x1a altgr
+dead_abovering 0x1a shift altgr
+plus 0x1b
+asterisk 0x1b shift
+bracketright 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+ograve 0x27
+ccedilla 0x27 shift
+at 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+agrave 0x28
+degree 0x28 shift
+numbersign 0x28 altgr
+backslash 0x29
+bar 0x29 shift
+notsign 0x29 altgr
+ugrave 0x2b
+section 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/ja b/tools/ioemu/keymaps/ja
new file mode 100644
index 0000000000..8fd0b9ef1b
--- /dev/null
+++ b/tools/ioemu/keymaps/ja
@@ -0,0 +1,104 @@
+# generated from XKB map jp106
+include common
+map 0x411
+exclam 0x02 shift
+kana_NU 0x02 altgr
+quotedbl 0x03 shift
+kana_FU 0x03 altgr
+numbersign 0x04 shift
+kana_A 0x04 altgr
+kana_a 0x04 shift altgr
+dollar 0x05 shift
+kana_U 0x05 altgr
+kana_u 0x05 shift altgr
+percent 0x06 shift
+kana_E 0x06 altgr
+kana_e 0x06 shift altgr
+ampersand 0x07 shift
+kana_O 0x07 altgr
+kana_o 0x07 shift altgr
+apostrophe 0x08 shift
+kana_YA 0x08 altgr
+kana_ya 0x08 shift altgr
+parenleft 0x09 shift
+kana_YU 0x09 altgr
+kana_yu 0x09 shift altgr
+parenright 0x0a shift
+kana_YO 0x0a altgr
+kana_yo 0x0a shift altgr
+asciitilde 0x0b shift
+kana_WA 0x0b altgr
+kana_WO 0x0b shift altgr
+minus 0x0c
+equal 0x0c shift
+kana_HO 0x0c altgr
+asciicircum 0x0d
+asciitilde 0x0d shift
+kana_HE 0x0d altgr
+kana_TA 0x10 altgr
+kana_TE 0x11 altgr
+kana_I 0x12 altgr
+kana_i 0x12 shift altgr
+kana_SU 0x13 altgr
+kana_KA 0x14 altgr
+kana_N 0x15 altgr
+kana_NA 0x16 altgr
+kana_NI 0x17 altgr
+kana_RA 0x18 altgr
+kana_SE 0x19 altgr
+at 0x1a
+grave 0x1a shift
+voicedsound 0x1a altgr
+bracketleft 0x1b
+braceleft 0x1b shift
+semivoicedsound 0x1b altgr
+kana_openingbracket 0x1b shift altgr
+kana_CHI 0x1e altgr
+kana_TO 0x1f altgr
+kana_SHI 0x20 altgr
+kana_HA 0x21 altgr
+kana_KI 0x22 altgr
+kana_KU 0x23 altgr
+kana_MA 0x24 altgr
+kana_NO 0x25 altgr
+kana_RI 0x26 altgr
+semicolon 0x27
+plus 0x27 shift
+kana_RE 0x27 altgr
+colon 0x28
+asterisk 0x28 shift
+kana_KE 0x28 altgr
+Zenkaku_Hankaku 0x29
+bracketright 0x2b
+braceright 0x2b shift
+kana_MU 0x2b altgr
+kana_closingbracket 0x2b shift altgr
+kana_TSU 0x2c altgr
+kana_tsu 0x2c shift altgr
+kana_SA 0x2d altgr
+kana_SO 0x2e altgr
+kana_HI 0x2f altgr
+kana_KO 0x30 altgr
+kana_MI 0x31 altgr
+kana_MO 0x32 altgr
+comma 0x33
+less 0x33 shift
+kana_NE 0x33 altgr
+kana_comma 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+kana_RU 0x34 altgr
+kana_fullstop 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+kana_ME 0x35 altgr
+kana_conjunctive 0x35 shift altgr
+Eisu_toggle 0x3a shift
+Execute 0x54 shift
+Kanji 0x70
+backslash 0x73
+bar 0x7d shift
+underscore 0x73 shift
+Henkan_Mode 0x79
+Katakana 0x70
+Muhenkan 0x7b
diff --git a/tools/ioemu/keymaps/lt b/tools/ioemu/keymaps/lt
new file mode 100644
index 0000000000..3d9d619ea5
--- /dev/null
+++ b/tools/ioemu/keymaps/lt
@@ -0,0 +1,57 @@
+# generated from XKB map lt
+include common
+map 0x427
+exclam 0x02 shift
+aogonek 0x02 altgr
+Aogonek 0x02 shift altgr
+at 0x03 shift
+ccaron 0x03 altgr
+Ccaron 0x03 shift altgr
+numbersign 0x04 shift
+eogonek 0x04 altgr
+Eogonek 0x04 shift altgr
+dollar 0x05 shift
+eabovedot 0x05 altgr
+Eabovedot 0x05 shift altgr
+percent 0x06 shift
+iogonek 0x06 altgr
+Iogonek 0x06 shift altgr
+asciicircum 0x07 shift
+scaron 0x07 altgr
+Scaron 0x07 shift altgr
+ampersand 0x08 shift
+uogonek 0x08 altgr
+Uogonek 0x08 shift altgr
+asterisk 0x09 shift
+umacron 0x09 altgr
+Umacron 0x09 shift altgr
+parenleft 0x0a shift
+doublelowquotemark 0x0a altgr
+parenright 0x0b shift
+leftdoublequotemark 0x0b altgr
+minus 0x0c
+underscore 0x0c shift
+equal 0x0d
+plus 0x0d shift
+zcaron 0x0d altgr
+Zcaron 0x0d shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+bracketright 0x1b
+braceright 0x1b shift
+semicolon 0x27
+colon 0x27 shift
+apostrophe 0x28
+quotedbl 0x28 shift
+grave 0x29
+asciitilde 0x29 shift
+backslash 0x2b
+bar 0x2b shift
+comma 0x33
+less 0x33 shift
+period 0x34
+greater 0x34 shift
+slash 0x35
+question 0x35 shift
+endash 0x56
+EuroSign 0x56 shift
diff --git a/tools/ioemu/keymaps/lv b/tools/ioemu/keymaps/lv
new file mode 100644
index 0000000000..1d91727912
--- /dev/null
+++ b/tools/ioemu/keymaps/lv
@@ -0,0 +1,128 @@
+# generated from XKB map lv
+include common
+map 0x426
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+at 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+numbersign 0x04 shift
+threesuperior 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+EuroSign 0x05 altgr
+cent 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+asciicircum 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+ampersand 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+asterisk 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenleft 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+parenright 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+minus 0x0c
+underscore 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+equal 0x0d
+plus 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+emacron 0x12 altgr
+Emacron 0x12 shift altgr
+rcedilla 0x13 altgr
+Rcedilla 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+umacron 0x16 altgr
+Umacron 0x16 shift altgr
+imacron 0x17 altgr
+Imacron 0x17 shift altgr
+omacron 0x18 altgr
+Omacron 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ISO_Next_Group 0x1c shift
+amacron 0x1e altgr
+Amacron 0x1e shift altgr
+scaron 0x1f altgr
+Scaron 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+gcedilla 0x22 altgr
+Gcedilla 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kcedilla 0x25 altgr
+Kcedilla 0x25 shift altgr
+lcedilla 0x26 altgr
+Lcedilla 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+leftdoublequotemark 0x28 altgr
+doublelowquotemark 0x28 shift altgr
+grave 0x29
+asciitilde 0x29 shift
+notsign 0x29 altgr
+backslash 0x2b
+bar 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+zcaron 0x2c altgr
+Zcaron 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+ccaron 0x2e altgr
+Ccaron 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+apostrophe 0x30 shift altgr
+ncedilla 0x31 altgr
+Ncedilla 0x31 shift altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
+nobreakspace 0x39 altgr
diff --git a/tools/ioemu/keymaps/mk b/tools/ioemu/keymaps/mk
new file mode 100644
index 0000000000..18c1504842
--- /dev/null
+++ b/tools/ioemu/keymaps/mk
@@ -0,0 +1,101 @@
+# generated from XKB map mk
+include common
+map 0x42f
+exclam 0x02 shift
+at 0x03 shift
+doublelowquotemark 0x03 shift altgr
+numbersign 0x04 shift
+leftdoublequotemark 0x04 shift altgr
+dollar 0x05 shift
+percent 0x06 shift
+asciicircum 0x07 shift
+ampersand 0x08 shift
+asterisk 0x09 shift
+parenleft 0x0a shift
+parenright 0x0b shift
+minus 0x0c
+underscore 0x0c shift
+equal 0x0d
+plus 0x0d shift
+Cyrillic_lje 0x10 altgr
+Cyrillic_LJE 0x10 shift altgr
+Cyrillic_nje 0x11 altgr
+Cyrillic_NJE 0x11 shift altgr
+Cyrillic_ie 0x12 altgr
+Cyrillic_IE 0x12 shift altgr
+Cyrillic_er 0x13 altgr
+Cyrillic_ER 0x13 shift altgr
+Cyrillic_te 0x14 altgr
+Cyrillic_TE 0x14 shift altgr
+Macedonia_dse 0x15 altgr
+Macedonia_DSE 0x15 shift altgr
+Cyrillic_u 0x16 altgr
+Cyrillic_U 0x16 shift altgr
+Cyrillic_i 0x17 altgr
+Cyrillic_I 0x17 shift altgr
+Cyrillic_o 0x18 altgr
+Cyrillic_O 0x18 shift altgr
+Cyrillic_pe 0x19 altgr
+Cyrillic_PE 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+Cyrillic_sha 0x1a altgr
+Cyrillic_SHA 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+Macedonia_gje 0x1b altgr
+Macedonia_GJE 0x1b shift altgr
+Cyrillic_a 0x1e altgr
+Cyrillic_A 0x1e shift altgr
+Cyrillic_es 0x1f altgr
+Cyrillic_ES 0x1f shift altgr
+Cyrillic_de 0x20 altgr
+Cyrillic_DE 0x20 shift altgr
+Cyrillic_ef 0x21 altgr
+Cyrillic_EF 0x21 shift altgr
+Cyrillic_ghe 0x22 altgr
+Cyrillic_GHE 0x22 shift altgr
+Cyrillic_ha 0x23 altgr
+Cyrillic_HA 0x23 shift altgr
+Cyrillic_je 0x24 altgr
+Cyrillic_JE 0x24 shift altgr
+Cyrillic_ka 0x25 altgr
+Cyrillic_KA 0x25 shift altgr
+Cyrillic_el 0x26 altgr
+Cyrillic_EL 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+Cyrillic_che 0x27 altgr
+Cyrillic_CHE 0x27 shift altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+Macedonia_kje 0x28 altgr
+Macedonia_KJE 0x28 shift altgr
+grave 0x29
+asciitilde 0x29 shift
+backslash 0x2b
+bar 0x2b shift
+Cyrillic_zhe 0x2b altgr
+Cyrillic_ZHE 0x2b shift altgr
+Cyrillic_ze 0x2c altgr
+Cyrillic_ZE 0x2c shift altgr
+Cyrillic_dzhe 0x2d altgr
+Cyrillic_DZHE 0x2d shift altgr
+Cyrillic_tse 0x2e altgr
+Cyrillic_TSE 0x2e shift altgr
+Cyrillic_ve 0x2f altgr
+Cyrillic_VE 0x2f shift altgr
+Cyrillic_be 0x30 altgr
+Cyrillic_BE 0x30 shift altgr
+Cyrillic_en 0x31 altgr
+Cyrillic_EN 0x31 shift altgr
+Cyrillic_em 0x32 altgr
+Cyrillic_EM 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+semicolon 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+colon 0x34 shift altgr
+slash 0x35
+question 0x35 shift
diff --git a/tools/ioemu/keymaps/modifiers b/tools/ioemu/keymaps/modifiers
new file mode 100644
index 0000000000..d8b019f040
--- /dev/null
+++ b/tools/ioemu/keymaps/modifiers
@@ -0,0 +1,17 @@
+Shift_R 0x36
+Shift_L 0x2a
+
+Alt_R 0xb8
+Mode_switch 0xb8
+Alt_L 0x38
+
+Control_R 0x9d
+Control_L 0x1d
+
+# Translate Super to Windows keys.
+# This is hardcoded. See documentation for details.
+Super_R 0xdb
+Super_L 0xdc
+
+# Translate Menu to the Windows Application key.
+Menu 0xdd
diff --git a/tools/ioemu/keymaps/nl b/tools/ioemu/keymaps/nl
new file mode 100644
index 0000000000..bc823bd2f7
--- /dev/null
+++ b/tools/ioemu/keymaps/nl
@@ -0,0 +1,60 @@
+# Dutch (Netherlands)
+include common
+map 0x413
+
+exclam 0x02 shift
+onesuperior 0x02 altgr
+quotebl 0x03 shift
+twosuperior 0x03 altgr
+numbersign 0x04 shift
+threesuperior 0x04 altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+underscore 0x08 shift
+sterling 0x08 altgr
+parenleft 0x09 shift
+braceleft 0x09 altgr
+parenright 0x0a shift
+braceright 0x0a altgr
+apostrophe 0x0b shift
+slash 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+degree 0x0d
+dead_tilde 0x0d shift
+dead_cedilla 0x0d altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+dead_diaeresis 0x1a
+dead_circumflex 0x1a shift
+asterisk 0x1b
+bar 0x1b shift
+ssharp 0x1f altgr
+plus 0x27
+plusminus 0x27 shift
+dead_acute 0x28
+dead_grave 0x28 shift
+at 0x29
+section 0x29 shift
+notsign 0x29 altgr
+less 0x2b
+greater 0x2b shift
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+copyright 0x2e altgr
+mu 0x32 altgr
+comma 0x33
+semicolon 0x33 shift
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+hyphen 0x35
+equal 0x35 shift
+bracketright 0x56
+bracketleft 0x56 shift
+brokenbar 0x56 altgr
+
diff --git a/tools/ioemu/keymaps/nl-be b/tools/ioemu/keymaps/nl-be
new file mode 100644
index 0000000000..34fc881ad0
--- /dev/null
+++ b/tools/ioemu/keymaps/nl-be
@@ -0,0 +1,3 @@
+# Dutch (Belgium)
+map 0x813
+include common
diff --git a/tools/ioemu/keymaps/no b/tools/ioemu/keymaps/no
new file mode 100644
index 0000000000..40a64790d1
--- /dev/null
+++ b/tools/ioemu/keymaps/no
@@ -0,0 +1,119 @@
+# generated from XKB map no
+include common
+map 0x414
+exclam 0x02 shift
+exclamdown 0x02 altgr
+onesuperior 0x02 shift altgr
+quotedbl 0x03 shift
+at 0x03 altgr
+twosuperior 0x03 shift altgr
+numbersign 0x04 shift
+sterling 0x04 altgr
+threesuperior 0x04 shift altgr
+currency 0x05 shift
+dollar 0x05 altgr
+onequarter 0x05 shift altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+cent 0x06 shift altgr
+ampersand 0x07 shift
+yen 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+division 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+guillemotleft 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+guillemotright 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+plus 0x0c
+question 0x0c shift
+plusminus 0x0c altgr
+questiondown 0x0c shift altgr
+backslash 0x0d
+dead_grave 0x0d shift
+dead_acute 0x0d altgr
+notsign 0x0d shift altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+registered 0x13 altgr
+thorn 0x14 altgr
+THORN 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oe 0x18 altgr
+OE 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+aring 0x1a
+Aring 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+dead_diaeresis 0x1b
+dead_circumflex 0x1b shift
+asciicircum 0x01b shift
+dead_tilde 0x1b altgr
+asciitilde 0x1b altgr
+dead_caron 0x1b shift altgr
+ordfeminine 0x1e altgr
+masculine 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+oslash 0x27
+Ooblique 0x27 shift
+dead_doubleacute 0x27 shift altgr
+ae 0x28
+AE 0x28 shift
+dead_caron 0x28 shift altgr
+bar 0x29
+section 0x29 shift
+brokenbar 0x29 altgr
+paragraph 0x29 shift altgr
+apostrophe 0x2b
+asterisk 0x2b shift
+multiply 0x2b shift altgr
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+copyright 0x2e altgr
+leftdoublequotemark 0x2f altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+dead_cedilla 0x33 altgr
+dead_ogonek 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+dead_abovedot 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+hyphen 0x35 altgr
+macron 0x35 shift altgr
+nobreakspace 0x39 altgr
+onehalf 0x56 altgr
+threequarters 0x56 shift altgr
diff --git a/tools/ioemu/keymaps/pl b/tools/ioemu/keymaps/pl
new file mode 100644
index 0000000000..09c600d355
--- /dev/null
+++ b/tools/ioemu/keymaps/pl
@@ -0,0 +1,122 @@
+# generated from XKB map pl
+include common
+map 0x415
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+at 0x03 shift
+twosuperior 0x03 altgr
+oneeighth 0x03 shift altgr
+numbersign 0x04 shift
+threesuperior 0x04 altgr
+sterling 0x04 shift altgr
+dollar 0x05 shift
+onequarter 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+asciicircum 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+ampersand 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+asterisk 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenleft 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+parenright 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+minus 0x0c
+underscore 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+equal 0x0d
+plus 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+eogonek 0x12 altgr
+Eogonek 0x12 shift altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+EuroSign 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oacute 0x18 altgr
+Oacute 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+aogonek 0x1e altgr
+Aogonek 0x1e shift altgr
+sacute 0x1f altgr
+Sacute 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+dead_circumflex 0x28 altgr
+dead_caron 0x28 shift altgr
+grave 0x29
+asciitilde 0x29 shift
+notsign 0x29 altgr
+backslash 0x2b
+bar 0x2b shift
+dead_grave 0x2b altgr
+dead_breve 0x2b shift altgr
+zabovedot 0x2c altgr
+Zabovedot 0x2c shift altgr
+zacute 0x2d altgr
+Zacute 0x2d shift altgr
+cacute 0x2e altgr
+Cacute 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+nacute 0x31 altgr
+Nacute 0x31 shift altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/pt b/tools/ioemu/keymaps/pt
new file mode 100644
index 0000000000..c6941f651c
--- /dev/null
+++ b/tools/ioemu/keymaps/pt
@@ -0,0 +1,113 @@
+# generated from XKB map pt
+include common
+map 0x816
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+quotedbl 0x03 shift
+at 0x03 altgr
+oneeighth 0x03 shift altgr
+numbersign 0x04 shift
+sterling 0x04 altgr
+dollar 0x05 shift
+section 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+threequarters 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+apostrophe 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+guillemotleft 0x0d
+guillemotright 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+cent 0x12 shift altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+plus 0x1a
+asterisk 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+dead_acute 0x1b
+dead_grave 0x1b shift
+dead_tilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+ccedilla 0x27
+Ccedilla 0x27 shift
+dead_doubleacute 0x27 shift altgr
+masculine 0x28
+ordfeminine 0x28 shift
+dead_circumflex 0x28 altgr
+dead_caron 0x28 shift altgr
+backslash 0x29
+bar 0x29 shift
+notsign 0x29 altgr
+dead_tilde 0x2b
+dead_circumflex 0x2b shift
+dead_breve 0x2b shift altgr
+less 0x56
+greater 0x56 shift
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+mu 0x32 altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+multiply 0x33 shift altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+division 0x34 shift altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/keymaps/pt-br b/tools/ioemu/keymaps/pt-br
new file mode 100644
index 0000000000..54bafc5dc3
--- /dev/null
+++ b/tools/ioemu/keymaps/pt-br
@@ -0,0 +1,69 @@
+# generated from XKB map br
+include common
+map 0x416
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+at 0x03 shift
+twosuperior 0x03 altgr
+onehalf 0x03 shift altgr
+numbersign 0x04 shift
+threesuperior 0x04 altgr
+threequarters 0x04 shift altgr
+dollar 0x05 shift
+sterling 0x05 altgr
+onequarter 0x05 shift altgr
+percent 0x06 shift
+cent 0x06 altgr
+dead_diaeresis 0x07 shift
+notsign 0x07 altgr
+diaeresis 0x07 shift altgr
+ampersand 0x08 shift
+braceleft 0x08 altgr
+asterisk 0x09 shift
+bracketleft 0x09 altgr
+parenleft 0x0a shift
+bracketright 0x0a altgr
+parenright 0x0b shift
+braceright 0x0b altgr
+minus 0x0c
+underscore 0x0c shift
+backslash 0x0c altgr
+equal 0x0d
+plus 0x0d shift
+section 0x0d altgr
+EuroSign 0x12 altgr
+registered 0x13 altgr
+dead_acute 0x1a
+dead_grave 0x1a shift
+acute 0x1a altgr
+grave 0x1a shift altgr
+bracketleft 0x1b
+braceleft 0x1b shift
+ordfeminine 0x1b altgr
+ccedilla 0x27
+Ccedilla 0x27 shift
+dead_tilde 0x28
+dead_circumflex 0x28 shift
+asciitilde 0x28 altgr
+asciicircum 0x28 shift altgr
+apostrophe 0x29
+quotedbl 0x29 shift
+bracketright 0x2b
+braceright 0x2b shift
+masculine 0x2b altgr
+copyright 0x2e altgr
+mu 0x32 altgr
+comma 0x33
+less 0x33 shift
+period 0x34
+greater 0x34 shift
+semicolon 0x35
+colon 0x35 shift
+comma 0x53 numlock
+backslash 0x56
+bar 0x56 shift
+slash 0x73
+question 0x73 shift
+degree 0x73 altgr
+KP_Decimal 0x34
diff --git a/tools/ioemu/keymaps/ru b/tools/ioemu/keymaps/ru
new file mode 100644
index 0000000000..b3e7d24de5
--- /dev/null
+++ b/tools/ioemu/keymaps/ru
@@ -0,0 +1,109 @@
+# generated from XKB map ru
+include common
+map 0x419
+exclam 0x02 shift
+at 0x03 shift
+quotedbl 0x03 shift altgr
+numbersign 0x04 shift
+dollar 0x05 shift
+asterisk 0x05 shift altgr
+percent 0x06 shift
+colon 0x06 shift altgr
+asciicircum 0x07 shift
+comma 0x07 shift altgr
+ampersand 0x08 shift
+period 0x08 shift altgr
+asterisk 0x09 shift
+semicolon 0x09 shift altgr
+parenleft 0x0a shift
+parenright 0x0b shift
+minus 0x0c
+underscore 0x0c shift
+equal 0x0d
+plus 0x0d shift
+Cyrillic_shorti 0x10 altgr
+Cyrillic_SHORTI 0x10 shift altgr
+Cyrillic_tse 0x11 altgr
+Cyrillic_TSE 0x11 shift altgr
+Cyrillic_u 0x12 altgr
+Cyrillic_U 0x12 shift altgr
+Cyrillic_ka 0x13 altgr
+Cyrillic_KA 0x13 shift altgr
+Cyrillic_ie 0x14 altgr
+Cyrillic_IE 0x14 shift altgr
+Cyrillic_en 0x15 altgr
+Cyrillic_EN 0x15 shift altgr
+Cyrillic_ghe 0x16 altgr
+Cyrillic_GHE 0x16 shift altgr
+Cyrillic_sha 0x17 altgr
+Cyrillic_SHA 0x17 shift altgr
+Cyrillic_shcha 0x18 altgr
+Cyrillic_SHCHA 0x18 shift altgr
+Cyrillic_ze 0x19 altgr
+Cyrillic_ZE 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+Cyrillic_ha 0x1a altgr
+Cyrillic_HA 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+Cyrillic_hardsign 0x1b altgr
+Cyrillic_HARDSIGN 0x1b shift altgr
+Cyrillic_ef 0x1e altgr
+Cyrillic_EF 0x1e shift altgr
+Cyrillic_yeru 0x1f altgr
+Cyrillic_YERU 0x1f shift altgr
+Cyrillic_ve 0x20 altgr
+Cyrillic_VE 0x20 shift altgr
+Cyrillic_a 0x21 altgr
+Cyrillic_A 0x21 shift altgr
+Cyrillic_pe 0x22 altgr
+Cyrillic_PE 0x22 shift altgr
+Cyrillic_er 0x23 altgr
+Cyrillic_ER 0x23 shift altgr
+Cyrillic_o 0x24 altgr
+Cyrillic_O 0x24 shift altgr
+Cyrillic_el 0x25 altgr
+Cyrillic_EL 0x25 shift altgr
+Cyrillic_de 0x26 altgr
+Cyrillic_DE 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+Cyrillic_zhe 0x27 altgr
+Cyrillic_ZHE 0x27 shift altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+Cyrillic_e 0x28 altgr
+Cyrillic_E 0x28 shift altgr
+grave 0x29
+asciitilde 0x29 shift
+Cyrillic_io 0x29 altgr
+Cyrillic_IO 0x29 shift altgr
+backslash 0x2b
+bar 0x2b shift
+Cyrillic_ya 0x2c altgr
+Cyrillic_YA 0x2c shift altgr
+Cyrillic_che 0x2d altgr
+Cyrillic_CHE 0x2d shift altgr
+Cyrillic_es 0x2e altgr
+Cyrillic_ES 0x2e shift altgr
+Cyrillic_em 0x2f altgr
+Cyrillic_EM 0x2f shift altgr
+Cyrillic_i 0x30 altgr
+Cyrillic_I 0x30 shift altgr
+Cyrillic_te 0x31 altgr
+Cyrillic_TE 0x31 shift altgr
+Cyrillic_softsign 0x32 altgr
+Cyrillic_SOFTSIGN 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+Cyrillic_be 0x33 altgr
+Cyrillic_BE 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+Cyrillic_yu 0x34 altgr
+Cyrillic_YU 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+slash 0x56 altgr
+bar 0x56 shift altgr
diff --git a/tools/ioemu/keymaps/sl b/tools/ioemu/keymaps/sl
new file mode 100644
index 0000000000..56835a92c3
--- /dev/null
+++ b/tools/ioemu/keymaps/sl
@@ -0,0 +1,110 @@
+# generated from XKB map sl
+include common
+map 0x424
+exclam 0x02 shift
+asciitilde 0x02 altgr
+dead_tilde 0x02 shift altgr
+quotedbl 0x03 shift
+dead_caron 0x03 altgr
+caron 0x03 shift altgr
+numbersign 0x04 shift
+asciicircum 0x04 altgr
+dead_circumflex 0x04 shift altgr
+dollar 0x05 shift
+dead_breve 0x05 altgr
+breve 0x05 shift altgr
+percent 0x06 shift
+degree 0x06 altgr
+dead_abovering 0x06 shift altgr
+ampersand 0x07 shift
+dead_ogonek 0x07 altgr
+ogonek 0x07 shift altgr
+slash 0x08 shift
+grave 0x08 altgr
+dead_grave 0x08 shift altgr
+parenleft 0x09 shift
+dead_abovedot 0x09 altgr
+abovedot 0x09 shift altgr
+parenright 0x0a shift
+dead_acute 0x0a altgr
+equal 0x0b shift
+dead_doubleacute 0x0b altgr
+doubleacute 0x0b shift altgr
+apostrophe 0x0c
+question 0x0c shift
+dead_diaeresis 0x0c altgr
+diaeresis 0x0c shift altgr
+plus 0x0d
+asterisk 0x0d shift
+dead_cedilla 0x0d altgr
+cedilla 0x0d shift altgr
+backslash 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+bar 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+z 0x15 addupper
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+rightarrow 0x17 altgr
+idotless 0x17 shift altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+scaron 0x1a
+Scaron 0x1a shift
+division 0x1a altgr
+dstroke 0x1b
+Dstroke 0x1b shift
+multiply 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+bracketleft 0x21 altgr
+ordfeminine 0x21 shift altgr
+bracketright 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+lstroke 0x25 altgr
+Lstroke 0x26 altgr
+ccaron 0x27
+Ccaron 0x27 shift
+cacute 0x28
+Cacute 0x28 shift
+ssharp 0x28 altgr
+dead_cedilla 0x29
+notsign 0x29 altgr
+zcaron 0x2b
+Zcaron 0x2b shift
+currency 0x2b altgr
+y 0x2c addupper
+guillemotleft 0x2c altgr
+guillemotright 0x2d altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+at 0x2f altgr
+braceleft 0x30 altgr
+braceright 0x31 altgr
+section 0x32 altgr
+masculine 0x32 shift altgr
+comma 0x33
+semicolon 0x33 shift
+horizconnector 0x33 altgr
+period 0x34
+colon 0x34 shift
+periodcentered 0x34 altgr
+minus 0x35
+underscore 0x35 shift
+dead_belowdot 0x35 altgr
diff --git a/tools/ioemu/keymaps/sv b/tools/ioemu/keymaps/sv
new file mode 100644
index 0000000000..736d637b3f
--- /dev/null
+++ b/tools/ioemu/keymaps/sv
@@ -0,0 +1,82 @@
+map 0x0000041d
+include common
+
+#
+# Top row
+#
+section 0x29
+onehalf 0x29 shift
+
+# 1
+exclam 0x2 shift
+
+# 2
+quotedbl 0x3 shift
+at 0x3 altgr
+
+# 3
+numbersign 0x4 shift
+sterling 0x4 altgr
+# 4
+currency 0x5 shift
+dollar 0x5 altgr
+# 5
+percent 0x6 shift
+# 6
+ampersand 0x7 shift
+# 7
+slash 0x8 shift
+braceleft 0x8 altgr
+# 8
+parenleft 0x9 shift
+bracketleft 0x9 altgr
+# 9
+parenright 0xa shift
+bracketright 0xa altgr
+# 0
+equal 0xb shift
+braceright 0xb altgr
+
+plus 0xc
+question 0xc shift
+backslash 0xc altgr
+
+acute 0xd
+dead_acute 0xd
+grave 0xd shift
+dead_grave 0xd shift
+
+#
+# QWERTY first row
+#
+EuroSign 0x12 altgr
+aring 0x1a
+Aring 0x1a shift
+dead_diaeresis 0x1b
+dead_circumflex 0x1b shift
+dead_tilde 0x1b altgr
+
+#
+# QWERTY second row
+#
+odiaeresis 0x27
+Odiaeresis 0x27 shift
+adiaeresis 0x28
+Adiaeresis 0x28 shift
+apostrophe 0x2b
+asterisk 0x2b shift
+
+#
+# QWERTY third row
+#
+less 0x56
+greater 0x56 shift
+bar 0x56 altgr
+mu 0x32 altgr
+comma 0x33
+semicolon 0x33 shift
+period 0x34
+colon 0x34 shift
+minus 0x35
+underscore 0x35 shift
+
diff --git a/tools/ioemu/keymaps/th b/tools/ioemu/keymaps/th
new file mode 100644
index 0000000000..b65b6da5d9
--- /dev/null
+++ b/tools/ioemu/keymaps/th
@@ -0,0 +1,131 @@
+# generated from XKB map th
+include common
+map 0x41e
+exclam 0x02 shift
+Thai_lakkhangyao 0x02 altgr
+plus 0x02 shift altgr
+at 0x03 shift
+slash 0x03 altgr
+Thai_leknung 0x03 shift altgr
+numbersign 0x04 shift
+minus 0x04 altgr
+Thai_leksong 0x04 shift altgr
+dollar 0x05 shift
+Thai_phosamphao 0x05 altgr
+Thai_leksam 0x05 shift altgr
+percent 0x06 shift
+Thai_thothung 0x06 altgr
+Thai_leksi 0x06 shift altgr
+asciicircum 0x07 shift
+Thai_sarau 0x07 altgr
+Thai_sarauu 0x07 shift altgr
+ampersand 0x08 shift
+Thai_saraue 0x08 altgr
+Thai_baht 0x08 shift altgr
+asterisk 0x09 shift
+Thai_khokhwai 0x09 altgr
+Thai_lekha 0x09 shift altgr
+parenleft 0x0a shift
+Thai_totao 0x0a altgr
+Thai_lekhok 0x0a shift altgr
+parenright 0x0b shift
+Thai_chochan 0x0b altgr
+Thai_lekchet 0x0b shift altgr
+minus 0x0c
+underscore 0x0c shift
+Thai_khokhai 0x0c altgr
+Thai_lekpaet 0x0c shift altgr
+equal 0x0d
+plus 0x0d shift
+Thai_chochang 0x0d altgr
+Thai_lekkao 0x0d shift altgr
+Thai_maiyamok 0x10 altgr
+Thai_leksun 0x10 shift altgr
+Thai_saraaimaimalai 0x11 altgr
+quotedbl 0x11 shift altgr
+Thai_saraam 0x12 altgr
+Thai_dochada 0x12 shift altgr
+Thai_phophan 0x13 altgr
+Thai_thonangmontho 0x13 shift altgr
+Thai_saraa 0x14 altgr
+Thai_thothong 0x14 shift altgr
+Thai_maihanakat 0x15 altgr
+Thai_nikhahit 0x15 shift altgr
+Thai_saraii 0x16 altgr
+Thai_maitri 0x16 shift altgr
+Thai_rorua 0x17 altgr
+Thai_nonen 0x17 shift altgr
+Thai_nonu 0x18 altgr
+Thai_paiyannoi 0x18 shift altgr
+Thai_yoyak 0x19 altgr
+Thai_yoying 0x19 shift altgr
+bracketleft 0x1a
+braceleft 0x1a shift
+Thai_bobaimai 0x1a altgr
+Thai_thothan 0x1a shift altgr
+bracketright 0x1b
+braceright 0x1b shift
+Thai_loling 0x1b altgr
+comma 0x1b shift altgr
+Thai_fofan 0x1e altgr
+Thai_ru 0x1e shift altgr
+Thai_hohip 0x1f altgr
+Thai_khorakhang 0x1f shift altgr
+Thai_kokai 0x20 altgr
+Thai_topatak 0x20 shift altgr
+Thai_dodek 0x21 altgr
+Thai_sarao 0x21 shift altgr
+Thai_sarae 0x22 altgr
+Thai_chochoe 0x22 shift altgr
+Thai_maitho 0x23 altgr
+Thai_maitaikhu 0x23 shift altgr
+Thai_maiek 0x24 altgr
+Thai_maichattawa 0x24 shift altgr
+Thai_saraaa 0x25 altgr
+Thai_sorusi 0x25 shift altgr
+Thai_sosua 0x26 altgr
+Thai_sosala 0x26 shift altgr
+semicolon 0x27
+colon 0x27 shift
+Thai_wowaen 0x27 altgr
+Thai_soso 0x27 shift altgr
+apostrophe 0x28
+quotedbl 0x28 shift
+Thai_ngongu 0x28 altgr
+period 0x28 shift altgr
+grave 0x29
+asciitilde 0x29 shift
+underscore 0x29 altgr
+percent 0x29 shift altgr
+ISO_First_Group 0x2a shift
+backslash 0x2b
+bar 0x2b shift
+Thai_khokhuat 0x2b altgr
+Thai_khokhon 0x2b shift altgr
+Thai_phophung 0x2c altgr
+parenleft 0x2c shift altgr
+Thai_popla 0x2d altgr
+parenright 0x2d shift altgr
+Thai_saraae 0x2e altgr
+Thai_choching 0x2e shift altgr
+Thai_oang 0x2f altgr
+Thai_honokhuk 0x2f shift altgr
+Thai_sarai 0x30 altgr
+Thai_phinthu 0x30 shift altgr
+Thai_sarauee 0x31 altgr
+Thai_thanthakhat 0x31 shift altgr
+Thai_thothahan 0x32 altgr
+question 0x32 shift altgr
+comma 0x33
+less 0x33 shift
+Thai_moma 0x33 altgr
+Thai_thophuthao 0x33 shift altgr
+period 0x34
+greater 0x34 shift
+Thai_saraaimaimuan 0x34 altgr
+Thai_lochula 0x34 shift altgr
+slash 0x35
+question 0x35 shift
+Thai_fofa 0x35 altgr
+Thai_lu 0x35 shift altgr
+ISO_Last_Group 0x36 shift
diff --git a/tools/ioemu/keymaps/tr b/tools/ioemu/keymaps/tr
new file mode 100644
index 0000000000..5650e1e93f
--- /dev/null
+++ b/tools/ioemu/keymaps/tr
@@ -0,0 +1,123 @@
+# generated from XKB map tr
+include common
+map 0x41f
+exclam 0x02 shift
+onesuperior 0x02 altgr
+exclamdown 0x02 shift altgr
+apostrophe 0x03 shift
+at 0x03 altgr
+oneeighth 0x03 shift altgr
+dead_circumflex 0x04 shift
+numbersign 0x04 altgr
+sterling 0x04 shift altgr
+plus 0x05 shift
+dollar 0x05 altgr
+percent 0x06 shift
+onehalf 0x06 altgr
+threeeighths 0x06 shift altgr
+ampersand 0x07 shift
+asciicircum 0x07 altgr
+fiveeighths 0x07 shift altgr
+slash 0x08 shift
+braceleft 0x08 altgr
+seveneighths 0x08 shift altgr
+parenleft 0x09 shift
+bracketleft 0x09 altgr
+trademark 0x09 shift altgr
+parenright 0x0a shift
+bracketright 0x0a altgr
+plusminus 0x0a shift altgr
+equal 0x0b shift
+braceright 0x0b altgr
+degree 0x0b shift altgr
+asterisk 0x0c
+question 0x0c shift
+backslash 0x0c altgr
+questiondown 0x0c shift altgr
+minus 0x0d
+underscore 0x0d shift
+dead_cedilla 0x0d altgr
+dead_ogonek 0x0d shift altgr
+at 0x10 altgr
+Greek_OMEGA 0x10 shift altgr
+lstroke 0x11 altgr
+Lstroke 0x11 shift altgr
+EuroSign 0x12 altgr
+paragraph 0x13 altgr
+registered 0x13 shift altgr
+tslash 0x14 altgr
+Tslash 0x14 shift altgr
+leftarrow 0x15 altgr
+yen 0x15 shift altgr
+downarrow 0x16 altgr
+uparrow 0x16 shift altgr
+idotless 0x17
+I 0x17 shift
+rightarrow 0x17 altgr
+oslash 0x18 altgr
+Ooblique 0x18 shift altgr
+thorn 0x19 altgr
+THORN 0x19 shift altgr
+gbreve 0x1a
+Gbreve 0x1a shift
+dead_diaeresis 0x1a altgr
+dead_abovering 0x1a shift altgr
+udiaeresis 0x1b
+Udiaeresis 0x1b shift
+asciitilde 0x1b altgr
+dead_macron 0x1b shift altgr
+ae 0x1e altgr
+AE 0x1e shift altgr
+ssharp 0x1f altgr
+section 0x1f shift altgr
+eth 0x20 altgr
+ETH 0x20 shift altgr
+dstroke 0x21 altgr
+ordfeminine 0x21 shift altgr
+eng 0x22 altgr
+ENG 0x22 shift altgr
+hstroke 0x23 altgr
+Hstroke 0x23 shift altgr
+kra 0x25 altgr
+ampersand 0x25 shift altgr
+lstroke 0x26 altgr
+Lstroke 0x26 shift altgr
+scedilla 0x27
+Scedilla 0x27 shift
+dead_acute 0x27 altgr
+dead_doubleacute 0x27 shift altgr
+i 0x28
+Iabovedot 0x28 shift
+dead_circumflex 0x28 altgr
+dead_caron 0x28 shift altgr
+backslash 0x29
+quotedbl 0x29 shift
+asciitilde 0x29 altgr
+comma 0x2b
+semicolon 0x2b shift
+bar 0x2b altgr
+dead_breve 0x2b shift altgr
+guillemotleft 0x2c altgr
+less 0x2c shift altgr
+guillemotright 0x2d altgr
+greater 0x2d shift altgr
+cent 0x2e altgr
+copyright 0x2e shift altgr
+leftdoublequotemark 0x2f altgr
+grave 0x2f shift altgr
+rightdoublequotemark 0x30 altgr
+apostrophe 0x30 shift altgr
+mu 0x32 altgr
+masculine 0x32 shift altgr
+odiaeresis 0x33
+Odiaeresis 0x33 shift
+less 0x33 altgr
+multiply 0x33 shift altgr
+ccedilla 0x34
+Ccedilla 0x34 shift
+greater 0x34 altgr
+division 0x34 shift altgr
+period 0x35
+colon 0x35 shift
+dead_belowdot 0x35 altgr
+dead_abovedot 0x35 shift altgr
diff --git a/tools/ioemu/main.c b/tools/ioemu/main.c
new file mode 100644
index 0000000000..d745aed128
--- /dev/null
+++ b/tools/ioemu/main.c
@@ -0,0 +1,250 @@
+/*
+ * qemu user main
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "qemu.h"
+
+#define DEBUG_LOGFILE "/tmp/qemu.log"
+
+#ifdef __APPLE__
+#include <crt_externs.h>
+# define environ (*_NSGetEnviron())
+#endif
+
+static const char *interp_prefix = CONFIG_QEMU_PREFIX;
+
+#if defined(__i386__) && !defined(CONFIG_STATIC)
+/* Force usage of an ELF interpreter even if it is an ELF shared
+ object ! */
+const char interp[] __attribute__((section(".interp"))) = "/lib/ld-linux.so.2";
+#endif
+
+/* for recent libc, we add these dummy symbols which are not declared
+ when generating a linked object (bug in ld ?) */
+#if (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)) && !defined(CONFIG_STATIC)
+long __preinit_array_start[0];
+long __preinit_array_end[0];
+long __init_array_start[0];
+long __init_array_end[0];
+long __fini_array_start[0];
+long __fini_array_end[0];
+#endif
+
+/* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
+ we allocate a bigger stack. Need a better solution, for example
+ by remapping the process stack directly at the right place */
+unsigned long x86_stack_size = 512 * 1024;
+
+void gemu_log(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+/* timers for rdtsc */
+
+#if defined(__i386__)
+
+int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+
+#elif defined(__x86_64__)
+
+int64_t cpu_get_real_ticks(void)
+{
+ uint32_t low,high;
+ int64_t val;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val <<= 32;
+ val |= low;
+ return val;
+}
+
+#else
+
+static uint64_t emu_time;
+
+int64_t cpu_get_real_ticks(void)
+{
+ return emu_time++;
+}
+
+#endif
+
+#ifdef TARGET_I386
+/***********************************************************/
+/* CPUX86 core interface */
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+ return cpu_get_real_ticks();
+}
+
+void cpu_loop()
+{
+}
+#endif
+
+void usage(void)
+{
+ printf("qemu-" TARGET_ARCH " version " QEMU_VERSION ", Copyright (c) 2003-2004 Fabrice Bellard\n"
+ "usage: qemu-" TARGET_ARCH " [-h] [-d opts] [-L path] [-s size] program [arguments...]\n"
+ "Linux CPU emulator (compiled for %s emulation)\n"
+ "\n"
+ "-h print this help\n"
+ "-L path set the elf interpreter prefix (default=%s)\n"
+ "-s size set the stack size in bytes (default=%ld)\n"
+ "\n"
+ "debug options:\n"
+#ifdef USE_CODE_COPY
+ "-no-code-copy disable code copy acceleration\n"
+#endif
+ "-l options activate log (logfile=%s)\n"
+ "-p xen port number\n",
+ "-d xen domain id\n",
+ TARGET_ARCH,
+ interp_prefix,
+ x86_stack_size,
+ DEBUG_LOGFILE);
+ _exit(1);
+}
+
+/* XXX: currently only used for async signals (see signal.c) */
+CPUState *global_env;
+/* used only if single thread */
+CPUState *cpu_single_env = NULL;
+
+/* used to free thread contexts */
+TaskState *first_task_state;
+
+int main(int argc, char **argv)
+{
+ const char *filename;
+ struct target_pt_regs regs1, *regs = &regs1;
+ struct image_info info1, *info = &info1;
+ TaskState ts1, *ts = &ts1;
+ CPUState *env;
+ int optind;
+ const char *r;
+
+ if (argc <= 1)
+ usage();
+
+ /* init debug */
+ cpu_set_log_filename(DEBUG_LOGFILE);
+ cpu_set_log(0);
+
+ optind = 1;
+ for(;;) {
+ if (optind >= argc)
+ break;
+ r = argv[optind];
+ if (r[0] != '-')
+ break;
+ optind++;
+ r++;
+ if (!strcmp(r, "-")) {
+ break;
+ } else if (!strcmp(r, "l")) {
+ int mask;
+ CPULogItem *item;
+
+ if (optind >= argc)
+ break;
+
+ r = argv[optind++];
+ mask = cpu_str_to_log_mask(r);
+ if (!mask) {
+ printf("Log items (comma separated):\n");
+ for(item = cpu_log_items; item->mask != 0; item++) {
+ printf("%-10s %s\n", item->name, item->help);
+ }
+ exit(1);
+ }
+ cpu_set_log(mask);
+ } else if (!strcmp(r, "s")) {
+ r = argv[optind++];
+ x86_stack_size = strtol(r, (char **)&r, 0);
+ if (x86_stack_size <= 0)
+ usage();
+ if (*r == 'M')
+ x86_stack_size *= 1024 * 1024;
+ else if (*r == 'k' || *r == 'K')
+ x86_stack_size *= 1024;
+ } else if (!strcmp(r, "L")) {
+ interp_prefix = argv[optind++];
+ } else if (!strcmp(r, "p")) {
+ qemu_host_page_size = atoi(argv[optind++]);
+ if (qemu_host_page_size == 0 ||
+ (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) {
+ fprintf(stderr, "page size must be a power of two\n");
+ exit(1);
+ }
+ } else
+#ifdef USE_CODE_COPY
+ if (!strcmp(r, "no-code-copy")) {
+ code_copy_enabled = 0;
+ } else
+#endif
+ {
+ usage();
+ }
+ }
+ if (optind >= argc)
+ usage();
+ filename = argv[optind];
+
+ /* Zero out regs */
+ memset(regs, 0, sizeof(struct target_pt_regs));
+
+ /* Zero out image_info */
+ memset(info, 0, sizeof(struct image_info));
+
+ /* Scan interp_prefix dir for replacement files. */
+ init_paths(interp_prefix);
+
+ /* NOTE: we need to init the CPU at this stage to get
+ qemu_host_page_size */
+ env = cpu_init();
+
+ global_env = env;
+
+ /* build Task State */
+ memset(ts, 0, sizeof(TaskState));
+ env->opaque = ts;
+ ts->used = 1;
+ env->user_mode_only = 1;
+
+ cpu_loop(env);
+ /* never exits */
+ return 0;
+}
diff --git a/tools/ioemu/monitor.c b/tools/ioemu/monitor.c
new file mode 100644
index 0000000000..dec25febaa
--- /dev/null
+++ b/tools/ioemu/monitor.c
@@ -0,0 +1,282 @@
+/*
+ * QEMU monitor
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include <dirent.h>
+
+//#define DEBUG
+//#define DEBUG_COMPLETION
+
+#ifndef offsetof
+#define offsetof(type, field) ((size_t) &((type *)0)->field)
+#endif
+
+/*
+ * Supported types:
+ *
+ * 'F' filename
+ * 'B' block device name
+ * 's' string (accept optional quote)
+ * 'i' integer
+ * '/' optional gdb-like print format (like "/10x")
+ *
+ * '?' optional type (for 'F', 's' and 'i')
+ *
+ */
+
+typedef struct term_cmd_t {
+ const char *name;
+ const char *args_type;
+ void (*handler)();
+ const char *params;
+ const char *help;
+} term_cmd_t;
+
+static CharDriverState *monitor_hd;
+
+static term_cmd_t term_cmds[];
+static term_cmd_t info_cmds[];
+
+static char term_outbuf[1024];
+static int term_outbuf_index;
+
+static void monitor_start_input(void);
+
+void term_flush(void)
+{
+ if (term_outbuf_index > 0) {
+ if(monitor_hd)
+ qemu_chr_write(monitor_hd, term_outbuf, term_outbuf_index);
+ else
+ fwrite(term_outbuf, term_outbuf_index, 1, stderr);
+ term_outbuf_index = 0;
+ }
+}
+
+/* flush at every end of line or if the buffer is full */
+void term_puts(const char *str)
+{
+ int c;
+ for(;;) {
+ c = *str++;
+ if (c == '\0')
+ break;
+ term_outbuf[term_outbuf_index++] = c;
+ if (term_outbuf_index >= sizeof(term_outbuf) ||
+ c == '\n')
+ term_flush();
+ }
+}
+
+void term_vprintf(const char *fmt, va_list ap)
+{
+ char buf[4096];
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ term_puts(buf);
+}
+
+void term_printf(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ term_vprintf(fmt, ap);
+ va_end(ap);
+}
+
+static int compare_cmd(const char *name, const char *list)
+{
+ const char *p, *pstart;
+ int len;
+ len = strlen(name);
+ p = list;
+ for(;;) {
+ pstart = p;
+ p = strchr(p, '|');
+ if (!p)
+ p = pstart + strlen(pstart);
+ if ((p - pstart) == len && !memcmp(pstart, name, len))
+ return 1;
+ if (*p == '\0')
+ break;
+ p++;
+ }
+ return 0;
+}
+
+static void do_quit(void)
+{
+ extern int domid;
+ extern FILE* logfile;
+ char destroy_cmd[20];
+ sprintf(destroy_cmd, "xm destroy %d", domid);
+ if (system(destroy_cmd) == -1)
+ fprintf(logfile, "%s failed.!\n", destroy_cmd);
+ exit(0);
+}
+
+static term_cmd_t term_cmds[] = {
+ { "q|quit", "", do_quit,
+ "", "quit the emulator" },
+ { NULL, NULL, },
+};
+
+#define MAX_ARGS 16
+
+static void monitor_handle_command(const char *cmdline)
+{
+ const char *p, *pstart, *typestr;
+ char *q;
+ int c, nb_args, len, i;
+ term_cmd_t *cmd;
+ char cmdname[256];
+ void *str_allocated[MAX_ARGS];
+ void *args[MAX_ARGS];
+
+#ifdef DEBUG
+ term_printf("command='%s'\n", cmdline);
+#endif
+
+ /* extract the command name */
+ p = cmdline;
+ q = cmdname;
+ while (isspace(*p))
+ p++;
+ if (*p == '\0')
+ return;
+ pstart = p;
+ while (*p != '\0' && *p != '/' && !isspace(*p))
+ p++;
+ len = p - pstart;
+ if (len > sizeof(cmdname) - 1)
+ len = sizeof(cmdname) - 1;
+ memcpy(cmdname, pstart, len);
+ cmdname[len] = '\0';
+
+ /* find the command */
+ for(cmd = term_cmds; cmd->name != NULL; cmd++) {
+ if (compare_cmd(cmdname, cmd->name))
+ goto found;
+ }
+ term_printf("unknown command: '%s'\n", cmdname);
+ return;
+ found:
+
+ for(i = 0; i < MAX_ARGS; i++)
+ str_allocated[i] = NULL;
+
+ /* parse the parameters */
+ typestr = cmd->args_type;
+ nb_args = 0;
+ for(;;) {
+ c = *typestr;
+ if (c == '\0')
+ break;
+ typestr++;
+ switch(c) {
+ /* TODO: add more commands we need here to support vmx device model */
+ case 'F':
+ case 'B':
+ case 's':
+ case '/':
+ case 'i':
+ case '-':
+ default:
+ term_printf("%s: unknown type '%c', we only support quit command now.\n", cmdname, c);
+ goto fail;
+ }
+ }
+ /* check that all arguments were parsed */
+ while (isspace(*p))
+ p++;
+ if (*p != '\0') {
+ term_printf("%s: extraneous characters at the end of line\n",
+ cmdname);
+ goto fail;
+ }
+
+ switch(nb_args) {
+ case 0:
+ cmd->handler();
+ break;
+ case 1:
+ cmd->handler(args[0]);
+ break;
+ case 2:
+ cmd->handler(args[0], args[1]);
+ break;
+ case 3:
+ cmd->handler(args[0], args[1], args[2]);
+ break;
+ case 4:
+ cmd->handler(args[0], args[1], args[2], args[3]);
+ break;
+ case 5:
+ cmd->handler(args[0], args[1], args[2], args[3], args[4]);
+ break;
+ case 6:
+ cmd->handler(args[0], args[1], args[2], args[3], args[4], args[5]);
+ break;
+ default:
+ term_printf("unsupported number of arguments: %d\n", nb_args);
+ goto fail;
+ }
+ fail:
+ for(i = 0; i < MAX_ARGS; i++)
+ qemu_free(str_allocated[i]);
+ return;
+}
+
+static int term_can_read(void *opaque)
+{
+ return 128;
+}
+
+static void term_read(void *opaque, const uint8_t *buf, int size)
+{
+ int i;
+ for(i = 0; i < size; i++)
+ readline_handle_byte(buf[i]);
+}
+
+static void monitor_start_input(void);
+
+static void monitor_handle_command1(void *opaque, const char *cmdline)
+{
+ monitor_handle_command(cmdline);
+ monitor_start_input();
+}
+
+static void monitor_start_input(void)
+{
+ readline_start("(VTXen) ", 0, monitor_handle_command1, NULL);
+}
+
+void monitor_init(CharDriverState *hd, int show_banner)
+{
+ monitor_hd = hd;
+ if (show_banner) {
+ term_printf("VMX device model. type 'q' to exit\n");
+ }
+ qemu_chr_add_read_handler(hd, term_can_read, term_read, NULL);
+ monitor_start_input();
+}
diff --git a/tools/ioemu/osdep.c b/tools/ioemu/osdep.c
new file mode 100644
index 0000000000..087a5c2185
--- /dev/null
+++ b/tools/ioemu/osdep.c
@@ -0,0 +1,499 @@
+/*
+ * QEMU low level functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "cpu.h"
+
+#if defined(__i386__) && !defined(CONFIG_SOFTMMU) && !defined(CONFIG_USER_ONLY)
+
+#include <sys/mman.h>
+#include <sys/ipc.h>
+
+/* When not using soft mmu, libc independant functions are needed for
+ the CPU core because it needs to use alternates stacks and
+ libc/thread incompatibles settings */
+
+#include <linux/unistd.h>
+
+#define QEMU_SYSCALL0(name) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name)); \
+return __res; \
+}
+
+#define QEMU_SYSCALL1(name,arg1) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"b" ((long)(arg1))); \
+return __res; \
+}
+
+#define QEMU_SYSCALL2(name,arg1,arg2) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); \
+return __res; \
+}
+
+#define QEMU_SYSCALL3(name,arg1,arg2,arg3) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3))); \
+return __res; \
+}
+
+#define QEMU_SYSCALL4(name,arg1,arg2,arg3,arg4) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4))); \
+return __res; \
+}
+
+#define QEMU_SYSCALL5(name,arg1,arg2,arg3,arg4,arg5) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+ : "=a" (__res) \
+ : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \
+return __res; \
+}
+
+#define QEMU_SYSCALL6(name,arg1,arg2,arg3,arg4,arg5,arg6) \
+{ \
+long __res; \
+__asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; pop %%ebp" \
+ : "=a" (__res) \
+ : "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+ "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \
+ "0" ((long)(arg6))); \
+return __res; \
+}
+
+int qemu_write(int fd, const void *buf, size_t n)
+{
+ QEMU_SYSCALL3(write, fd, buf, n);
+}
+
+
+
+/****************************************************************/
+/* shmat replacement */
+
+int qemu_ipc(int call, unsigned long first,
+ unsigned long second, unsigned long third,
+ void *ptr, unsigned long fifth)
+{
+ QEMU_SYSCALL6(ipc, call, first, second, third, ptr, fifth);
+}
+
+#define SHMAT 21
+
+/* we must define shmat so that a specific address will be used when
+ mapping the X11 ximage */
+void *shmat(int shmid, const void *shmaddr, int shmflg)
+{
+ void *ptr;
+ int ret;
+ /* we give an address in the right memory area */
+ if (!shmaddr)
+ shmaddr = get_mmap_addr(8192 * 1024);
+ ret = qemu_ipc(SHMAT, shmid, shmflg, (unsigned long)&ptr, (void *)shmaddr, 0);
+ if (ret < 0)
+ return NULL;
+ return ptr;
+}
+
+/****************************************************************/
+/* sigaction bypassing the threads */
+
+static int kernel_sigaction(int signum, const struct qemu_sigaction *act,
+ struct qemu_sigaction *oldact,
+ int sigsetsize)
+{
+ QEMU_SYSCALL4(rt_sigaction, signum, act, oldact, sigsetsize);
+}
+
+int qemu_sigaction(int signum, const struct qemu_sigaction *act,
+ struct qemu_sigaction *oldact)
+{
+ return kernel_sigaction(signum, act, oldact, 8);
+}
+
+/****************************************************************/
+/* memory allocation */
+
+//#define DEBUG_MALLOC
+
+#define MALLOC_BASE 0xab000000
+#define PHYS_RAM_BASE 0xac000000
+
+#define MALLOC_ALIGN 16
+#define BLOCK_HEADER_SIZE 16
+
+typedef struct MemoryBlock {
+ struct MemoryBlock *next;
+ unsigned long size; /* size of block, including header */
+} MemoryBlock;
+
+static MemoryBlock *first_free_block;
+static unsigned long malloc_addr = MALLOC_BASE;
+
+static void *malloc_get_space(size_t size)
+{
+ void *ptr;
+ size = TARGET_PAGE_ALIGN(size);
+ ptr = mmap((void *)malloc_addr, size,
+ PROT_WRITE | PROT_READ,
+ MAP_PRIVATE | MAP_FIXED | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED)
+ return NULL;
+ malloc_addr += size;
+ return ptr;
+}
+
+void *qemu_malloc(size_t size)
+{
+ MemoryBlock *mb, *mb1, **pmb;
+ void *ptr;
+ size_t size1, area_size;
+
+ if (size == 0)
+ return NULL;
+
+ size = (size + BLOCK_HEADER_SIZE + MALLOC_ALIGN - 1) & ~(MALLOC_ALIGN - 1);
+ pmb = &first_free_block;
+ for(;;) {
+ mb = *pmb;
+ if (mb == NULL)
+ break;
+ if (size <= mb->size)
+ goto found;
+ pmb = &mb->next;
+ }
+ /* no big enough blocks found: get new space */
+ area_size = TARGET_PAGE_ALIGN(size);
+ mb = malloc_get_space(area_size);
+ if (!mb)
+ return NULL;
+ size1 = area_size - size;
+ if (size1 > 0) {
+ /* create a new free block */
+ mb1 = (MemoryBlock *)((uint8_t *)mb + size);
+ mb1->next = NULL;
+ mb1->size = size1;
+ *pmb = mb1;
+ }
+ goto the_end;
+ found:
+ /* a free block was found: use it */
+ size1 = mb->size - size;
+ if (size1 > 0) {
+ /* create a new free block */
+ mb1 = (MemoryBlock *)((uint8_t *)mb + size);
+ mb1->next = mb->next;
+ mb1->size = size1;
+ *pmb = mb1;
+ } else {
+ /* suppress the first block */
+ *pmb = mb->next;
+ }
+ the_end:
+ mb->size = size;
+ mb->next = NULL;
+ ptr = ((uint8_t *)mb + BLOCK_HEADER_SIZE);
+#ifdef DEBUG_MALLOC
+ qemu_printf("malloc: size=0x%x ptr=0x%lx\n", size, (unsigned long)ptr);
+#endif
+ return ptr;
+}
+
+void qemu_free(void *ptr)
+{
+ MemoryBlock *mb;
+
+ if (!ptr)
+ return;
+ mb = (MemoryBlock *)((uint8_t *)ptr - BLOCK_HEADER_SIZE);
+ mb->next = first_free_block;
+ first_free_block = mb;
+}
+
+/****************************************************************/
+/* virtual memory allocation */
+
+unsigned long mmap_addr = PHYS_RAM_BASE;
+
+void *get_mmap_addr(unsigned long size)
+{
+ unsigned long addr;
+ addr = mmap_addr;
+ mmap_addr += ((size + 4095) & ~4095) + 4096;
+ return (void *)addr;
+}
+
+#else
+
+int qemu_write(int fd, const void *buf, size_t n)
+{
+ int ret;
+ ret = write(fd, buf, n);
+ if (ret < 0)
+ return -errno;
+ else
+ return ret;
+}
+
+void *get_mmap_addr(unsigned long size)
+{
+ return NULL;
+}
+
+void qemu_free(void *ptr)
+{
+ free(ptr);
+}
+
+void *qemu_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+#endif
+
+void *qemu_mallocz(size_t size)
+{
+ void *ptr;
+ ptr = qemu_malloc(size);
+ if (!ptr)
+ return NULL;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+char *qemu_strdup(const char *str)
+{
+ char *ptr;
+ ptr = qemu_malloc(strlen(str) + 1);
+ if (!ptr)
+ return NULL;
+ strcpy(ptr, str);
+ return ptr;
+}
+
+/****************************************************************/
+/* printf support */
+
+static inline int qemu_isdigit(int c)
+{
+ return c >= '0' && c <= '9';
+}
+
+#define OUTCHAR(c) (buflen > 0? (--buflen, *buf++ = (c)): 0)
+
+/* from BSD ppp sources */
+int qemu_vsnprintf(char *buf, int buflen, const char *fmt, va_list args)
+{
+ int c, i, n;
+ int width, prec, fillch;
+ int base, len, neg;
+ unsigned long val = 0;
+ const char *f;
+ char *str, *buf0;
+ char num[32];
+ static const char hexchars[] = "0123456789abcdef";
+
+ buf0 = buf;
+ --buflen;
+ while (buflen > 0) {
+ for (f = fmt; *f != '%' && *f != 0; ++f)
+ ;
+ if (f > fmt) {
+ len = f - fmt;
+ if (len > buflen)
+ len = buflen;
+ memcpy(buf, fmt, len);
+ buf += len;
+ buflen -= len;
+ fmt = f;
+ }
+ if (*fmt == 0)
+ break;
+ c = *++fmt;
+ width = prec = 0;
+ fillch = ' ';
+ if (c == '0') {
+ fillch = '0';
+ c = *++fmt;
+ }
+ if (c == '*') {
+ width = va_arg(args, int);
+ c = *++fmt;
+ } else {
+ while (qemu_isdigit(c)) {
+ width = width * 10 + c - '0';
+ c = *++fmt;
+ }
+ }
+ if (c == '.') {
+ c = *++fmt;
+ if (c == '*') {
+ prec = va_arg(args, int);
+ c = *++fmt;
+ } else {
+ while (qemu_isdigit(c)) {
+ prec = prec * 10 + c - '0';
+ c = *++fmt;
+ }
+ }
+ }
+ /* modifiers */
+ switch(c) {
+ case 'l':
+ c = *++fmt;
+ break;
+ default:
+ break;
+ }
+ str = 0;
+ base = 0;
+ neg = 0;
+ ++fmt;
+ switch (c) {
+ case 'd':
+ i = va_arg(args, int);
+ if (i < 0) {
+ neg = 1;
+ val = -i;
+ } else
+ val = i;
+ base = 10;
+ break;
+ case 'o':
+ val = va_arg(args, unsigned int);
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ val = va_arg(args, unsigned int);
+ base = 16;
+ break;
+ case 'p':
+ val = (unsigned long) va_arg(args, void *);
+ base = 16;
+ neg = 2;
+ break;
+ case 's':
+ str = va_arg(args, char *);
+ break;
+ case 'c':
+ num[0] = va_arg(args, int);
+ num[1] = 0;
+ str = num;
+ break;
+ default:
+ *buf++ = '%';
+ if (c != '%')
+ --fmt; /* so %z outputs %z etc. */
+ --buflen;
+ continue;
+ }
+ if (base != 0) {
+ str = num + sizeof(num);
+ *--str = 0;
+ while (str > num + neg) {
+ *--str = hexchars[val % base];
+ val = val / base;
+ if (--prec <= 0 && val == 0)
+ break;
+ }
+ switch (neg) {
+ case 1:
+ *--str = '-';
+ break;
+ case 2:
+ *--str = 'x';
+ *--str = '0';
+ break;
+ }
+ len = num + sizeof(num) - 1 - str;
+ } else {
+ len = strlen(str);
+ if (prec > 0 && len > prec)
+ len = prec;
+ }
+ if (width > 0) {
+ if (width > buflen)
+ width = buflen;
+ if ((n = width - len) > 0) {
+ buflen -= n;
+ for (; n > 0; --n)
+ *buf++ = fillch;
+ }
+ }
+ if (len > buflen)
+ len = buflen;
+ memcpy(buf, str, len);
+ buf += len;
+ buflen -= len;
+ }
+ *buf = 0;
+ return buf - buf0;
+}
+
+void qemu_vprintf(const char *fmt, va_list ap)
+{
+ char buf[1024];
+ int len;
+
+ len = qemu_vsnprintf(buf, sizeof(buf), fmt, ap);
+ qemu_write(1, buf, len);
+}
+
+void qemu_printf(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ qemu_vprintf(fmt, ap);
+ va_end(ap);
+}
+
diff --git a/tools/ioemu/osdep.h b/tools/ioemu/osdep.h
new file mode 100644
index 0000000000..f1d18202b0
--- /dev/null
+++ b/tools/ioemu/osdep.h
@@ -0,0 +1,50 @@
+#ifndef QEMU_OSDEP_H
+#define QEMU_OSDEP_H
+
+#include <stdarg.h>
+
+int qemu_vsnprintf(char *buf, int buflen, const char *fmt, va_list args);
+void qemu_vprintf(const char *fmt, va_list ap);
+void qemu_printf(const char *fmt, ...);
+
+void *qemu_malloc(size_t size);
+void *qemu_mallocz(size_t size);
+void qemu_free(void *ptr);
+char *qemu_strdup(const char *str);
+
+void *get_mmap_addr(unsigned long size);
+
+/* specific kludges for OS compatibility (should be moved elsewhere) */
+#if defined(__i386__) && !defined(CONFIG_SOFTMMU) && !defined(CONFIG_USER_ONLY)
+
+/* disabled pthread version of longjmp which prevent us from using an
+ alternative signal stack */
+extern void __longjmp(jmp_buf env, int val);
+#define longjmp __longjmp
+
+#include <signal.h>
+
+/* NOTE: it works only because the glibc sigset_t is >= kernel sigset_t */
+struct qemu_sigaction {
+ union {
+ void (*_sa_handler)(int);
+ void (*_sa_sigaction)(int, struct siginfo *, void *);
+ } _u;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ sigset_t sa_mask; /* mask last for extensibility */
+};
+
+int qemu_sigaction(int signum, const struct qemu_sigaction *act,
+ struct qemu_sigaction *oldact);
+
+#undef sigaction
+#undef sa_handler
+#undef sa_sigaction
+#define sigaction qemu_sigaction
+#define sa_handler _u._sa_handler
+#define sa_sigaction _u._sa_sigaction
+
+#endif
+
+#endif
diff --git a/tools/ioemu/path.c b/tools/ioemu/path.c
new file mode 100644
index 0000000000..76809705a4
--- /dev/null
+++ b/tools/ioemu/path.c
@@ -0,0 +1,147 @@
+/* Code to mangle pathnames into those matching a given prefix.
+ eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so");
+
+ The assumption is that this area does not change.
+*/
+#include <sys/types.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include "qemu.h"
+
+struct pathelem
+{
+ /* Name of this, eg. lib */
+ char *name;
+ /* Full path name, eg. /usr/gnemul/x86-linux/lib. */
+ char *pathname;
+ struct pathelem *parent;
+ /* Children */
+ unsigned int num_entries;
+ struct pathelem *entries[0];
+};
+
+static struct pathelem *base;
+
+/* First N chars of S1 match S2, and S2 is N chars long. */
+static int strneq(const char *s1, unsigned int n, const char *s2)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ if (s1[i] != s2[i])
+ return 0;
+ return s2[i] == 0;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name);
+
+static struct pathelem *new_entry(const char *root,
+ struct pathelem *parent,
+ const char *name)
+{
+ struct pathelem *new = malloc(sizeof(*new));
+ new->name = strdup(name);
+ asprintf(&new->pathname, "%s/%s", root, name);
+ new->num_entries = 0;
+ return new;
+}
+
+#define streq(a,b) (strcmp((a), (b)) == 0)
+
+static struct pathelem *add_dir_maybe(struct pathelem *path)
+{
+ DIR *dir;
+
+ if ((dir = opendir(path->pathname)) != NULL) {
+ struct dirent *dirent;
+
+ while ((dirent = readdir(dir)) != NULL) {
+ if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){
+ path = add_entry(path, dirent->d_name);
+ }
+ }
+ closedir(dir);
+ }
+ return path;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name)
+{
+ root->num_entries++;
+
+ root = realloc(root, sizeof(*root)
+ + sizeof(root->entries[0])*root->num_entries);
+
+ root->entries[root->num_entries-1] = new_entry(root->pathname, root, name);
+ root->entries[root->num_entries-1]
+ = add_dir_maybe(root->entries[root->num_entries-1]);
+ return root;
+}
+
+/* This needs to be done after tree is stabalized (ie. no more reallocs!). */
+static void set_parents(struct pathelem *child, struct pathelem *parent)
+{
+ unsigned int i;
+
+ child->parent = parent;
+ for (i = 0; i < child->num_entries; i++)
+ set_parents(child->entries[i], child);
+}
+
+void init_paths(const char *prefix)
+{
+ if (prefix[0] != '/' ||
+ prefix[0] == '\0' ||
+ !strcmp(prefix, "/"))
+ return;
+
+ base = new_entry("", NULL, prefix+1);
+ base = add_dir_maybe(base);
+ if (base->num_entries == 0) {
+ free (base);
+ base = NULL;
+ } else {
+ set_parents(base, base);
+ }
+}
+
+/* FIXME: Doesn't handle DIR/.. where DIR is not in emulated dir. */
+static const char *
+follow_path(const struct pathelem *cursor, const char *name)
+{
+ unsigned int i, namelen;
+
+ name += strspn(name, "/");
+ namelen = strcspn(name, "/");
+
+ if (namelen == 0)
+ return cursor->pathname;
+
+ if (strneq(name, namelen, ".."))
+ return follow_path(cursor->parent, name + namelen);
+
+ if (strneq(name, namelen, "."))
+ return follow_path(cursor, name + namelen);
+
+ for (i = 0; i < cursor->num_entries; i++)
+ if (strneq(name, namelen, cursor->entries[i]->name))
+ return follow_path(cursor->entries[i], name + namelen);
+
+ /* Not found */
+ return NULL;
+}
+
+/* Look for path in emulation dir, otherwise return name. */
+const char *path(const char *name)
+{
+ /* Only do absolute paths: quick and dirty, but should mostly be OK.
+ Could do relative by tracking cwd. */
+ if (!base || name[0] != '/')
+ return name;
+
+ return follow_path(base, name) ?: name;
+}
diff --git a/tools/ioemu/qemu-binfmt-conf.sh b/tools/ioemu/qemu-binfmt-conf.sh
new file mode 100644
index 0000000000..e5acc474aa
--- /dev/null
+++ b/tools/ioemu/qemu-binfmt-conf.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# enable automatic i386/ARM/SPARC/PPC program execution by the kernel
+
+# load the binfmt_misc module
+/sbin/modprobe binfmt_misc
+
+# probe cpu type
+cpu=`uname -m`
+case "$cpu" in
+ i386|i486|i586|i686|i86pc|BePC)
+ cpu="i386"
+ ;;
+ "Power Macintosh"|ppc|ppc64)
+ cpu="ppc"
+ ;;
+ armv4l)
+ cpu="arm"
+ ;;
+esac
+
+# register the interpreter for each cpu except for the native one
+if [ $cpu != "i386" ] ; then
+ echo ':i386:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register
+ echo ':i486:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register
+fi
+if [ $cpu != "arm" ] ; then
+ echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
+fi
+if [ $cpu != "sparc" ] ; then
+ echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
+fi
+if [ $cpu != "ppc" ] ; then
+ echo ':ppc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff\xff:/usr/local/bin/qemu-ppc:' > /proc/sys/fs/binfmt_misc/register
+fi
diff --git a/tools/ioemu/qemu-img.c b/tools/ioemu/qemu-img.c
new file mode 100644
index 0000000000..132428cf6d
--- /dev/null
+++ b/tools/ioemu/qemu-img.c
@@ -0,0 +1,698 @@
+/*
+ * create a COW disk image
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+void *get_mmap_addr(unsigned long size)
+{
+ return NULL;
+}
+
+void qemu_free(void *ptr)
+{
+ free(ptr);
+}
+
+void *qemu_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+void *qemu_mallocz(size_t size)
+{
+ void *ptr;
+ ptr = qemu_malloc(size);
+ if (!ptr)
+ return NULL;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+char *qemu_strdup(const char *str)
+{
+ char *ptr;
+ ptr = qemu_malloc(strlen(str) + 1);
+ if (!ptr)
+ return NULL;
+ strcpy(ptr, str);
+ return ptr;
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+void term_printf(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void __attribute__((noreturn)) error(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ fprintf(stderr, "qemu-img: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ exit(1);
+ va_end(ap);
+}
+
+static void format_print(void *opaque, const char *name)
+{
+ printf(" %s", name);
+}
+
+void help(void)
+{
+ printf("qemu-img version " QEMU_VERSION ", Copyright (c) 2004 Fabrice Bellard\n"
+ "usage: qemu-img command [command options]\n"
+ "QEMU disk image utility\n"
+ "\n"
+ "Command syntax:\n"
+ " create [-e] [-b base_image] [-f fmt] filename [size]\n"
+ " commit [-f fmt] filename\n"
+ " convert [-c] [-e] [-f fmt] filename [-O output_fmt] output_filename\n"
+ " info [-f fmt] filename\n"
+ "\n"
+ "Command parameters:\n"
+ " 'filename' is a disk image filename\n"
+ " 'base_image' is the read-only disk image which is used as base for a copy on\n"
+ " write image; the copy on write image only stores the modified data\n"
+ " 'fmt' is the disk image format. It is guessed automatically in most cases\n"
+ " 'size' is the disk image size in kilobytes. Optional suffixes 'M' (megabyte)\n"
+ " and 'G' (gigabyte) are supported\n"
+ " 'output_filename' is the destination disk image filename\n"
+ " 'output_fmt' is the destination format\n"
+ " '-c' indicates that target image must be compressed (qcow format only)\n"
+ " '-e' indicates that the target image must be encrypted (qcow format only)\n"
+ );
+ printf("\nSupported format:");
+ bdrv_iterate_format(format_print, NULL);
+ printf("\n");
+ exit(1);
+}
+
+
+#define NB_SUFFIXES 4
+
+static void get_human_readable_size(char *buf, int buf_size, int64_t size)
+{
+ char suffixes[NB_SUFFIXES] = "KMGT";
+ int64_t base;
+ int i;
+
+ if (size <= 999) {
+ snprintf(buf, buf_size, "%lld", size);
+ } else {
+ base = 1024;
+ for(i = 0; i < NB_SUFFIXES; i++) {
+ if (size < (10 * base)) {
+ snprintf(buf, buf_size, "%0.1f%c",
+ (double)size / base,
+ suffixes[i]);
+ break;
+ } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
+ snprintf(buf, buf_size, "%lld%c",
+ (size + (base >> 1)) / base,
+ suffixes[i]);
+ break;
+ }
+ base = base * 1024;
+ }
+ }
+}
+
+#if defined(WIN32)
+/* XXX: put correct support for win32 */
+static int read_password(char *buf, int buf_size)
+{
+ int c, i;
+ printf("Password: ");
+ fflush(stdout);
+ i = 0;
+ for(;;) {
+ c = getchar();
+ if (c == '\n')
+ break;
+ if (i < (buf_size - 1))
+ buf[i++] = c;
+ }
+ buf[i] = '\0';
+ return 0;
+}
+
+#else
+
+#include <termios.h>
+
+static struct termios oldtty;
+
+static void term_exit(void)
+{
+ tcsetattr (0, TCSANOW, &oldtty);
+}
+
+static void term_init(void)
+{
+ struct termios tty;
+
+ tcgetattr (0, &tty);
+ oldtty = tty;
+
+ tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
+ |INLCR|IGNCR|ICRNL|IXON);
+ tty.c_oflag |= OPOST;
+ tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
+ tty.c_cflag &= ~(CSIZE|PARENB);
+ tty.c_cflag |= CS8;
+ tty.c_cc[VMIN] = 1;
+ tty.c_cc[VTIME] = 0;
+
+ tcsetattr (0, TCSANOW, &tty);
+
+ atexit(term_exit);
+}
+
+int read_password(char *buf, int buf_size)
+{
+ uint8_t ch;
+ int i, ret;
+
+ printf("password: ");
+ fflush(stdout);
+ term_init();
+ i = 0;
+ for(;;) {
+ ret = read(0, &ch, 1);
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR) {
+ continue;
+ } else {
+ ret = -1;
+ break;
+ }
+ } else if (ret == 0) {
+ ret = -1;
+ break;
+ } else {
+ if (ch == '\r') {
+ ret = 0;
+ break;
+ }
+ if (i < (buf_size - 1))
+ buf[i++] = ch;
+ }
+ }
+ term_exit();
+ buf[i] = '\0';
+ printf("\n");
+ return ret;
+}
+#endif
+
+static BlockDriverState *bdrv_new_open(const char *filename,
+ const char *fmt)
+{
+ BlockDriverState *bs;
+ BlockDriver *drv;
+ char password[256];
+
+ bs = bdrv_new("");
+ if (!bs)
+ error("Not enough memory");
+ if (fmt) {
+ drv = bdrv_find_format(fmt);
+ if (!drv)
+ error("Unknown file format '%s'", fmt);
+ } else {
+ drv = NULL;
+ }
+ if (bdrv_open2(bs, filename, 0, drv) < 0) {
+ error("Could not open '%s'", filename);
+ }
+ if (bdrv_is_encrypted(bs)) {
+ printf("Disk image '%s' is encrypted.\n", filename);
+ if (read_password(password, sizeof(password)) < 0)
+ error("No password given");
+ if (bdrv_set_key(bs, password) < 0)
+ error("invalid password");
+ }
+ return bs;
+}
+
+static int img_create(int argc, char **argv)
+{
+ int c, ret, encrypted;
+ const char *fmt = "raw";
+ const char *filename;
+ const char *base_filename = NULL;
+ int64_t size;
+ const char *p;
+ BlockDriver *drv;
+
+ encrypted = 0;
+ for(;;) {
+ c = getopt(argc, argv, "b:f:he");
+ if (c == -1)
+ break;
+ switch(c) {
+ case 'h':
+ help();
+ break;
+ case 'b':
+ base_filename = optarg;
+ break;
+ case 'f':
+ fmt = optarg;
+ break;
+ case 'e':
+ encrypted = 1;
+ break;
+ }
+ }
+ if (optind >= argc)
+ help();
+ filename = argv[optind++];
+ size = 0;
+ if (base_filename) {
+ BlockDriverState *bs;
+ bs = bdrv_new_open(base_filename, NULL);
+ bdrv_get_geometry(bs, &size);
+ size *= 512;
+ bdrv_delete(bs);
+ } else {
+ if (optind >= argc)
+ help();
+ p = argv[optind];
+ size = strtoul(p, (char **)&p, 0);
+ if (*p == 'M') {
+ size *= 1024 * 1024;
+ } else if (*p == 'G') {
+ size *= 1024 * 1024 * 1024;
+ } else if (*p == 'k' || *p == 'K' || *p == '\0') {
+ size *= 1024;
+ } else {
+ help();
+ }
+ }
+ drv = bdrv_find_format(fmt);
+ if (!drv)
+ error("Unknown file format '%s'", fmt);
+ printf("Formating '%s', fmt=%s",
+ filename, fmt);
+ if (encrypted)
+ printf(", encrypted");
+ if (base_filename) {
+ printf(", backing_file=%s",
+ base_filename);
+ }
+ printf(", size=%lld kB\n", size / 1024);
+ ret = bdrv_create(drv, filename, size / 512, base_filename, encrypted);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error("Formatting or formatting option not supported for file format '%s'", fmt);
+ } else {
+ error("Error while formatting");
+ }
+ }
+ return 0;
+}
+
+static int img_commit(int argc, char **argv)
+{
+ int c, ret;
+ const char *filename, *fmt;
+ BlockDriver *drv;
+ BlockDriverState *bs;
+
+ fmt = NULL;
+ for(;;) {
+ c = getopt(argc, argv, "f:h");
+ if (c == -1)
+ break;
+ switch(c) {
+ case 'h':
+ help();
+ break;
+ case 'f':
+ fmt = optarg;
+ break;
+ }
+ }
+ if (optind >= argc)
+ help();
+ filename = argv[optind++];
+
+ bs = bdrv_new("");
+ if (!bs)
+ error("Not enough memory");
+ if (fmt) {
+ drv = bdrv_find_format(fmt);
+ if (!drv)
+ error("Unknown file format '%s'", fmt);
+ } else {
+ drv = NULL;
+ }
+ if (bdrv_open2(bs, filename, 0, drv) < 0) {
+ error("Could not open '%s'", filename);
+ }
+ ret = bdrv_commit(bs);
+ switch(ret) {
+ case 0:
+ printf("Image committed.\n");
+ break;
+ case -ENOENT:
+ error("No disk inserted");
+ break;
+ case -EACCES:
+ error("Image is read-only");
+ break;
+ case -ENOTSUP:
+ error("Image is already committed");
+ break;
+ default:
+ error("Error while committing image");
+ break;
+ }
+
+ bdrv_delete(bs);
+ return 0;
+}
+
+static int is_not_zero(const uint8_t *sector, int len)
+{
+ int i;
+ len >>= 2;
+ for(i = 0;i < len; i++) {
+ if (((uint32_t *)sector)[i] != 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
+{
+ int v, i;
+
+ if (n <= 0) {
+ *pnum = 0;
+ return 0;
+ }
+ v = is_not_zero(buf, 512);
+ for(i = 1; i < n; i++) {
+ buf += 512;
+ if (v != is_not_zero(buf, 512))
+ break;
+ }
+ *pnum = i;
+ return v;
+}
+
+#define IO_BUF_SIZE 65536
+
+static int img_convert(int argc, char **argv)
+{
+ int c, ret, n, n1, compress, cluster_size, cluster_sectors, encrypt;
+ const char *filename, *fmt, *out_fmt, *out_filename;
+ BlockDriver *drv;
+ BlockDriverState *bs, *out_bs;
+ int64_t total_sectors, nb_sectors, sector_num;
+ uint8_t buf[IO_BUF_SIZE];
+ const uint8_t *buf1;
+
+ fmt = NULL;
+ out_fmt = "raw";
+ compress = 0;
+ encrypt = 0;
+ for(;;) {
+ c = getopt(argc, argv, "f:O:hce");
+ if (c == -1)
+ break;
+ switch(c) {
+ case 'h':
+ help();
+ break;
+ case 'f':
+ fmt = optarg;
+ break;
+ case 'O':
+ out_fmt = optarg;
+ break;
+ case 'c':
+ compress = 1;
+ break;
+ case 'e':
+ encrypt = 1;
+ break;
+ }
+ }
+ if (optind >= argc)
+ help();
+ filename = argv[optind++];
+ if (optind >= argc)
+ help();
+ out_filename = argv[optind++];
+
+ bs = bdrv_new_open(filename, fmt);
+
+ drv = bdrv_find_format(out_fmt);
+ if (!drv)
+ error("Unknown file format '%s'", fmt);
+ if (compress && drv != &bdrv_qcow)
+ error("Compression not supported for this file format");
+ if (encrypt && drv != &bdrv_qcow)
+ error("Encryption not supported for this file format");
+ if (compress && encrypt)
+ error("Compression and encryption not supported at the same time");
+ bdrv_get_geometry(bs, &total_sectors);
+ ret = bdrv_create(drv, out_filename, total_sectors, NULL, encrypt);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error("Formatting not supported for file format '%s'", fmt);
+ } else {
+ error("Error while formatting '%s'", out_filename);
+ }
+ }
+
+ out_bs = bdrv_new_open(out_filename, out_fmt);
+
+ if (compress) {
+ cluster_size = qcow_get_cluster_size(out_bs);
+ if (cluster_size <= 0 || cluster_size > IO_BUF_SIZE)
+ error("invalid cluster size");
+ cluster_sectors = cluster_size >> 9;
+ sector_num = 0;
+ for(;;) {
+ nb_sectors = total_sectors - sector_num;
+ if (nb_sectors <= 0)
+ break;
+ if (nb_sectors >= cluster_sectors)
+ n = cluster_sectors;
+ else
+ n = nb_sectors;
+ if (bdrv_read(bs, sector_num, buf, n) < 0)
+ error("error while reading");
+ if (n < cluster_sectors)
+ memset(buf + n * 512, 0, cluster_size - n * 512);
+ if (is_not_zero(buf, cluster_size)) {
+ if (qcow_compress_cluster(out_bs, sector_num, buf) != 0)
+ error("error while compressing sector %lld", sector_num);
+ }
+ sector_num += n;
+ }
+ } else {
+ sector_num = 0;
+ for(;;) {
+ nb_sectors = total_sectors - sector_num;
+ if (nb_sectors <= 0)
+ break;
+ if (nb_sectors >= (IO_BUF_SIZE / 512))
+ n = (IO_BUF_SIZE / 512);
+ else
+ n = nb_sectors;
+ if (bdrv_read(bs, sector_num, buf, n) < 0)
+ error("error while reading");
+ /* NOTE: at the same time we convert, we do not write zero
+ sectors to have a chance to compress the image. Ideally, we
+ should add a specific call to have the info to go faster */
+ buf1 = buf;
+ while (n > 0) {
+ if (is_allocated_sectors(buf1, n, &n1)) {
+ if (bdrv_write(out_bs, sector_num, buf1, n1) < 0)
+ error("error while writing");
+ }
+ sector_num += n1;
+ n -= n1;
+ buf1 += n1 * 512;
+ }
+ }
+ }
+ bdrv_delete(out_bs);
+ bdrv_delete(bs);
+ return 0;
+}
+
+#ifdef _WIN32
+static int64_t get_allocated_file_size(const char *filename)
+{
+ struct _stati64 st;
+ if (_stati64(filename, &st) < 0)
+ return -1;
+ return st.st_size;
+}
+#else
+static int64_t get_allocated_file_size(const char *filename)
+{
+ struct stat st;
+ if (stat(filename, &st) < 0)
+ return -1;
+ return (int64_t)st.st_blocks * 512;
+}
+#endif
+
+static int img_info(int argc, char **argv)
+{
+ int c;
+ const char *filename, *fmt;
+ BlockDriver *drv;
+ BlockDriverState *bs;
+ char fmt_name[128], size_buf[128], dsize_buf[128];
+ int64_t total_sectors, allocated_size;
+
+ fmt = NULL;
+ for(;;) {
+ c = getopt(argc, argv, "f:h");
+ if (c == -1)
+ break;
+ switch(c) {
+ case 'h':
+ help();
+ break;
+ case 'f':
+ fmt = optarg;
+ break;
+ }
+ }
+ if (optind >= argc)
+ help();
+ filename = argv[optind++];
+
+ bs = bdrv_new("");
+ if (!bs)
+ error("Not enough memory");
+ if (fmt) {
+ drv = bdrv_find_format(fmt);
+ if (!drv)
+ error("Unknown file format '%s'", fmt);
+ } else {
+ drv = NULL;
+ }
+ if (bdrv_open2(bs, filename, 0, drv) < 0) {
+ error("Could not open '%s'", filename);
+ }
+ bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
+ bdrv_get_geometry(bs, &total_sectors);
+ get_human_readable_size(size_buf, sizeof(size_buf), total_sectors * 512);
+ allocated_size = get_allocated_file_size(filename);
+ if (allocated_size < 0)
+ error("Could not get file size '%s'", filename);
+ get_human_readable_size(dsize_buf, sizeof(dsize_buf),
+ allocated_size);
+ printf("image: %s\n"
+ "file format: %s\n"
+ "virtual size: %s (%lld bytes)\n"
+ "disk size: %s\n",
+ filename, fmt_name, size_buf,
+ total_sectors * 512,
+ dsize_buf);
+ if (bdrv_is_encrypted(bs))
+ printf("encrypted: yes\n");
+ bdrv_delete(bs);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *cmd;
+
+ bdrv_init();
+ if (argc < 2)
+ help();
+ cmd = argv[1];
+ optind++;
+ if (!strcmp(cmd, "create")) {
+ img_create(argc, argv);
+ } else if (!strcmp(cmd, "commit")) {
+ img_commit(argc, argv);
+ } else if (!strcmp(cmd, "convert")) {
+ img_convert(argc, argv);
+ } else if (!strcmp(cmd, "info")) {
+ img_info(argc, argv);
+ } else {
+ help();
+ }
+ return 0;
+}
diff --git a/tools/ioemu/readline.c b/tools/ioemu/readline.c
new file mode 100644
index 0000000000..5ed0971177
--- /dev/null
+++ b/tools/ioemu/readline.c
@@ -0,0 +1,424 @@
+/*
+ * QEMU readline utility
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#define TERM_CMD_BUF_SIZE 4095
+#define TERM_MAX_CMDS 64
+#define NB_COMPLETIONS_MAX 256
+
+#define IS_NORM 0
+#define IS_ESC 1
+#define IS_CSI 2
+
+#define printf do_not_use_printf
+
+static char term_cmd_buf[TERM_CMD_BUF_SIZE + 1];
+static int term_cmd_buf_index;
+static int term_cmd_buf_size;
+
+static char term_last_cmd_buf[TERM_CMD_BUF_SIZE + 1];
+static int term_last_cmd_buf_index;
+static int term_last_cmd_buf_size;
+
+static int term_esc_state;
+static int term_esc_param;
+
+static char *term_history[TERM_MAX_CMDS];
+static int term_hist_entry = -1;
+
+static int nb_completions;
+int completion_index;
+static char *completions[NB_COMPLETIONS_MAX];
+
+static ReadLineFunc *term_readline_func;
+static int term_is_password;
+static char term_prompt[256];
+static void *term_readline_opaque;
+
+static void term_show_prompt2(void)
+{
+ term_printf("%s", term_prompt);
+ term_flush();
+ term_last_cmd_buf_index = 0;
+ term_last_cmd_buf_size = 0;
+ term_esc_state = IS_NORM;
+}
+
+static void term_show_prompt(void)
+{
+ term_show_prompt2();
+ term_cmd_buf_index = 0;
+ term_cmd_buf_size = 0;
+}
+
+/* update the displayed command line */
+static void term_update(void)
+{
+ int i, delta, len;
+
+ if (term_cmd_buf_size != term_last_cmd_buf_size ||
+ memcmp(term_cmd_buf, term_last_cmd_buf, term_cmd_buf_size) != 0) {
+ for(i = 0; i < term_last_cmd_buf_index; i++) {
+ term_printf("\033[D");
+ }
+ term_cmd_buf[term_cmd_buf_size] = '\0';
+ if (term_is_password) {
+ len = strlen(term_cmd_buf);
+ for(i = 0; i < len; i++)
+ term_printf("*");
+ } else {
+ term_printf("%s", term_cmd_buf);
+ }
+ term_printf("\033[K");
+ memcpy(term_last_cmd_buf, term_cmd_buf, term_cmd_buf_size);
+ term_last_cmd_buf_size = term_cmd_buf_size;
+ term_last_cmd_buf_index = term_cmd_buf_size;
+ }
+ if (term_cmd_buf_index != term_last_cmd_buf_index) {
+ delta = term_cmd_buf_index - term_last_cmd_buf_index;
+ if (delta > 0) {
+ for(i = 0;i < delta; i++) {
+ term_printf("\033[C");
+ }
+ } else {
+ delta = -delta;
+ for(i = 0;i < delta; i++) {
+ term_printf("\033[D");
+ }
+ }
+ term_last_cmd_buf_index = term_cmd_buf_index;
+ }
+ term_flush();
+}
+
+static void term_insert_char(int ch)
+{
+ if (term_cmd_buf_index < TERM_CMD_BUF_SIZE) {
+ memmove(term_cmd_buf + term_cmd_buf_index + 1,
+ term_cmd_buf + term_cmd_buf_index,
+ term_cmd_buf_size - term_cmd_buf_index);
+ term_cmd_buf[term_cmd_buf_index] = ch;
+ term_cmd_buf_size++;
+ term_cmd_buf_index++;
+ }
+}
+
+static void term_backward_char(void)
+{
+ if (term_cmd_buf_index > 0) {
+ term_cmd_buf_index--;
+ }
+}
+
+static void term_forward_char(void)
+{
+ if (term_cmd_buf_index < term_cmd_buf_size) {
+ term_cmd_buf_index++;
+ }
+}
+
+static void term_delete_char(void)
+{
+ if (term_cmd_buf_index < term_cmd_buf_size) {
+ memmove(term_cmd_buf + term_cmd_buf_index,
+ term_cmd_buf + term_cmd_buf_index + 1,
+ term_cmd_buf_size - term_cmd_buf_index - 1);
+ term_cmd_buf_size--;
+ }
+}
+
+static void term_backspace(void)
+{
+ if (term_cmd_buf_index > 0) {
+ term_backward_char();
+ term_delete_char();
+ }
+}
+
+static void term_bol(void)
+{
+ term_cmd_buf_index = 0;
+}
+
+static void term_eol(void)
+{
+ term_cmd_buf_index = term_cmd_buf_size;
+}
+
+static void term_up_char(void)
+{
+ int idx;
+
+ if (term_hist_entry == 0)
+ return;
+ if (term_hist_entry == -1) {
+ /* Find latest entry */
+ for (idx = 0; idx < TERM_MAX_CMDS; idx++) {
+ if (term_history[idx] == NULL)
+ break;
+ }
+ term_hist_entry = idx;
+ }
+ term_hist_entry--;
+ if (term_hist_entry >= 0) {
+ pstrcpy(term_cmd_buf, sizeof(term_cmd_buf),
+ term_history[term_hist_entry]);
+ term_cmd_buf_index = term_cmd_buf_size = strlen(term_cmd_buf);
+ }
+}
+
+static void term_down_char(void)
+{
+ if (term_hist_entry == TERM_MAX_CMDS - 1 || term_hist_entry == -1)
+ return;
+ if (term_history[++term_hist_entry] != NULL) {
+ pstrcpy(term_cmd_buf, sizeof(term_cmd_buf),
+ term_history[term_hist_entry]);
+ } else {
+ term_hist_entry = -1;
+ }
+ term_cmd_buf_index = term_cmd_buf_size = strlen(term_cmd_buf);
+}
+
+static void term_hist_add(const char *cmdline)
+{
+ char *hist_entry, *new_entry;
+ int idx;
+
+ if (cmdline[0] == '\0')
+ return;
+ new_entry = NULL;
+ if (term_hist_entry != -1) {
+ /* We were editing an existing history entry: replace it */
+ hist_entry = term_history[term_hist_entry];
+ idx = term_hist_entry;
+ if (strcmp(hist_entry, cmdline) == 0) {
+ goto same_entry;
+ }
+ }
+ /* Search cmdline in history buffers */
+ for (idx = 0; idx < TERM_MAX_CMDS; idx++) {
+ hist_entry = term_history[idx];
+ if (hist_entry == NULL)
+ break;
+ if (strcmp(hist_entry, cmdline) == 0) {
+ same_entry:
+ new_entry = hist_entry;
+ /* Put this entry at the end of history */
+ memmove(&term_history[idx], &term_history[idx + 1],
+ &term_history[TERM_MAX_CMDS] - &term_history[idx + 1]);
+ term_history[TERM_MAX_CMDS - 1] = NULL;
+ for (; idx < TERM_MAX_CMDS; idx++) {
+ if (term_history[idx] == NULL)
+ break;
+ }
+ break;
+ }
+ }
+ if (idx == TERM_MAX_CMDS) {
+ /* Need to get one free slot */
+ free(term_history[0]);
+ memcpy(term_history, &term_history[1],
+ &term_history[TERM_MAX_CMDS] - &term_history[1]);
+ term_history[TERM_MAX_CMDS - 1] = NULL;
+ idx = TERM_MAX_CMDS - 1;
+ }
+ if (new_entry == NULL)
+ new_entry = strdup(cmdline);
+ term_history[idx] = new_entry;
+ term_hist_entry = -1;
+}
+
+/* completion support */
+
+void add_completion(const char *str)
+{
+ if (nb_completions < NB_COMPLETIONS_MAX) {
+ completions[nb_completions++] = qemu_strdup(str);
+ }
+}
+
+static void term_completion(void)
+{
+ int len, i, j, max_width, nb_cols;
+ char *cmdline;
+
+ nb_completions = 0;
+
+ cmdline = qemu_malloc(term_cmd_buf_index + 1);
+ if (!cmdline)
+ return;
+ memcpy(cmdline, term_cmd_buf, term_cmd_buf_index);
+ cmdline[term_cmd_buf_index] = '\0';
+ qemu_free(cmdline);
+
+ /* no completion found */
+ if (nb_completions <= 0)
+ return;
+ if (nb_completions == 1) {
+ len = strlen(completions[0]);
+ for(i = completion_index; i < len; i++) {
+ term_insert_char(completions[0][i]);
+ }
+ /* extra space for next argument. XXX: make it more generic */
+ if (len > 0 && completions[0][len - 1] != '/')
+ term_insert_char(' ');
+ } else {
+ term_printf("\n");
+ max_width = 0;
+ for(i = 0; i < nb_completions; i++) {
+ len = strlen(completions[i]);
+ if (len > max_width)
+ max_width = len;
+ }
+ max_width += 2;
+ if (max_width < 10)
+ max_width = 10;
+ else if (max_width > 80)
+ max_width = 80;
+ nb_cols = 80 / max_width;
+ j = 0;
+ for(i = 0; i < nb_completions; i++) {
+ term_printf("%-*s", max_width, completions[i]);
+ if (++j == nb_cols || i == (nb_completions - 1)) {
+ term_printf("\n");
+ j = 0;
+ }
+ }
+ term_show_prompt2();
+ }
+}
+
+/* return true if command handled */
+void readline_handle_byte(int ch)
+{
+ switch(term_esc_state) {
+ case IS_NORM:
+ switch(ch) {
+ case 1:
+ term_bol();
+ break;
+ case 4:
+ term_delete_char();
+ break;
+ case 5:
+ term_eol();
+ break;
+ case 9:
+ term_completion();
+ break;
+ case 10:
+ case 13:
+ term_cmd_buf[term_cmd_buf_size] = '\0';
+ if (!term_is_password)
+ term_hist_add(term_cmd_buf);
+ term_printf("\n");
+ /* NOTE: readline_start can be called here */
+ term_readline_func(term_readline_opaque, term_cmd_buf);
+ break;
+ case 27:
+ term_esc_state = IS_ESC;
+ break;
+ case 127:
+ case 8:
+ term_backspace();
+ break;
+ case 155:
+ term_esc_state = IS_CSI;
+ break;
+ default:
+ if (ch >= 32) {
+ term_insert_char(ch);
+ }
+ break;
+ }
+ break;
+ case IS_ESC:
+ if (ch == '[') {
+ term_esc_state = IS_CSI;
+ term_esc_param = 0;
+ } else {
+ term_esc_state = IS_NORM;
+ }
+ break;
+ case IS_CSI:
+ switch(ch) {
+ case 'A':
+ case 'F':
+ term_up_char();
+ break;
+ case 'B':
+ case 'E':
+ term_down_char();
+ break;
+ case 'D':
+ term_backward_char();
+ break;
+ case 'C':
+ term_forward_char();
+ break;
+ case '0' ... '9':
+ term_esc_param = term_esc_param * 10 + (ch - '0');
+ goto the_end;
+ case '~':
+ switch(term_esc_param) {
+ case 1:
+ term_bol();
+ break;
+ case 3:
+ term_delete_char();
+ break;
+ case 4:
+ term_eol();
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ term_esc_state = IS_NORM;
+ the_end:
+ break;
+ }
+ term_update();
+}
+
+void readline_start(const char *prompt, int is_password,
+ ReadLineFunc *readline_func, void *opaque)
+{
+ pstrcpy(term_prompt, sizeof(term_prompt), prompt);
+ term_readline_func = readline_func;
+ term_readline_opaque = opaque;
+ term_is_password = is_password;
+ term_show_prompt();
+}
+
+const char *readline_get_history(unsigned int index)
+{
+ if (index >= TERM_MAX_CMDS)
+ return NULL;
+ return term_history[index];
+}
+
+
diff --git a/tools/ioemu/sdl.c b/tools/ioemu/sdl.c
new file mode 100644
index 0000000000..91c0e23d56
--- /dev/null
+++ b/tools/ioemu/sdl.c
@@ -0,0 +1,605 @@
+/*
+ * QEMU SDL display driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#include <SDL.h>
+
+/* keyboard stuff */
+#include <SDL_keysym.h>
+#include "keysym_adapter_sdl.h"
+#include "keyboard_rdesktop.c"
+
+#ifndef _WIN32
+#include <signal.h>
+#endif
+
+#if defined(__APPLE__)
+#define CONFIG_SDL_GENERIC_KBD
+#endif
+
+static SDL_Surface *screen;
+static int gui_grab; /* if true, all keyboard/mouse events are grabbed */
+static int last_vm_running;
+static int gui_saved_grab;
+static int gui_fullscreen;
+static int gui_key_modifier_pressed;
+static int gui_keysym;
+static void* kbd_layout=0;
+static int gui_fullscreen_initial_grab;
+static int gui_grab_code = KMOD_LALT | KMOD_LCTRL;
+static uint8_t modifiers_state[256];
+
+SDL_PixelFormat* sdl_get_format() {
+ return screen->format;
+}
+
+static void sdl_update(DisplayState *ds, int x, int y, int w, int h)
+{
+ SDL_UpdateRect(screen, x, y, w, h);
+}
+
+static void sdl_resize(DisplayState *ds, int w, int h)
+{
+ int flags;
+
+ // printf("resizing to %d %d\n", w, h);
+
+ flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL;
+ flags |= SDL_RESIZABLE;
+ if (gui_fullscreen)
+ flags |= SDL_FULLSCREEN;
+ screen = SDL_SetVideoMode(w, h, 0, flags);
+ if (!screen) {
+ fprintf(stderr, "Could not open SDL display\n");
+ exit(1);
+ }
+ ds->data = screen->pixels;
+ ds->linesize = screen->pitch;
+ ds->depth = screen->format->BitsPerPixel;
+ ds->width = w;
+ ds->height = h;
+}
+
+#ifdef CONFIG_SDL_GENERIC_KBD
+
+/* XXX: use keymap tables defined in the VNC patch because the
+ following code suppose you have a US keyboard. */
+
+static const uint8_t scancodes[SDLK_LAST] = {
+ [SDLK_ESCAPE] = 0x01,
+ [SDLK_1] = 0x02,
+ [SDLK_2] = 0x03,
+ [SDLK_3] = 0x04,
+ [SDLK_4] = 0x05,
+ [SDLK_5] = 0x06,
+ [SDLK_6] = 0x07,
+ [SDLK_7] = 0x08,
+ [SDLK_8] = 0x09,
+ [SDLK_9] = 0x0a,
+ [SDLK_0] = 0x0b,
+ [SDLK_MINUS] = 0x0c,
+ [SDLK_EQUALS] = 0x0d,
+ [SDLK_BACKSPACE] = 0x0e,
+ [SDLK_TAB] = 0x0f,
+ [SDLK_q] = 0x10,
+ [SDLK_w] = 0x11,
+ [SDLK_e] = 0x12,
+ [SDLK_r] = 0x13,
+ [SDLK_t] = 0x14,
+ [SDLK_y] = 0x15,
+ [SDLK_u] = 0x16,
+ [SDLK_i] = 0x17,
+ [SDLK_o] = 0x18,
+ [SDLK_p] = 0x19,
+ [SDLK_LEFTBRACKET] = 0x1a,
+ [SDLK_RIGHTBRACKET] = 0x1b,
+ [SDLK_RETURN] = 0x1c,
+ [SDLK_LCTRL] = 0x1d,
+ [SDLK_a] = 0x1e,
+ [SDLK_s] = 0x1f,
+ [SDLK_d] = 0x20,
+ [SDLK_f] = 0x21,
+ [SDLK_g] = 0x22,
+ [SDLK_h] = 0x23,
+ [SDLK_j] = 0x24,
+ [SDLK_k] = 0x25,
+ [SDLK_l] = 0x26,
+ [SDLK_SEMICOLON] = 0x27,
+ [SDLK_QUOTE] = 0x28,
+ [SDLK_BACKQUOTE] = 0x29,
+ [SDLK_LSHIFT] = 0x2a,
+ [SDLK_BACKSLASH] = 0x2b,
+ [SDLK_z] = 0x2c,
+ [SDLK_x] = 0x2d,
+ [SDLK_c] = 0x2e,
+ [SDLK_v] = 0x2f,
+ [SDLK_b] = 0x30,
+ [SDLK_n] = 0x31,
+ [SDLK_m] = 0x32,
+ [SDLK_COMMA] = 0x33,
+ [SDLK_PERIOD] = 0x34,
+ [SDLK_SLASH] = 0x35,
+ [SDLK_KP_MULTIPLY] = 0x37,
+ [SDLK_LALT] = 0x38,
+ [SDLK_SPACE] = 0x39,
+ [SDLK_CAPSLOCK] = 0x3a,
+ [SDLK_F1] = 0x3b,
+ [SDLK_F2] = 0x3c,
+ [SDLK_F3] = 0x3d,
+ [SDLK_F4] = 0x3e,
+ [SDLK_F5] = 0x3f,
+ [SDLK_F6] = 0x40,
+ [SDLK_F7] = 0x41,
+ [SDLK_F8] = 0x42,
+ [SDLK_F9] = 0x43,
+ [SDLK_F10] = 0x44,
+ [SDLK_NUMLOCK] = 0x45,
+ [SDLK_SCROLLOCK] = 0x46,
+ [SDLK_KP7] = 0x47,
+ [SDLK_KP8] = 0x48,
+ [SDLK_KP9] = 0x49,
+ [SDLK_KP_MINUS] = 0x4a,
+ [SDLK_KP4] = 0x4b,
+ [SDLK_KP5] = 0x4c,
+ [SDLK_KP6] = 0x4d,
+ [SDLK_KP_PLUS] = 0x4e,
+ [SDLK_KP1] = 0x4f,
+ [SDLK_KP2] = 0x50,
+ [SDLK_KP3] = 0x51,
+ [SDLK_KP0] = 0x52,
+ [SDLK_KP_PERIOD] = 0x53,
+ [SDLK_PRINT] = 0x54,
+ [SDLK_LMETA] = 0x56,
+
+ [SDLK_KP_ENTER] = 0x9c,
+ [SDLK_KP_DIVIDE] = 0xb5,
+
+ [SDLK_UP] = 0xc8,
+ [SDLK_DOWN] = 0xd0,
+ [SDLK_RIGHT] = 0xcd,
+ [SDLK_LEFT] = 0xcb,
+ [SDLK_INSERT] = 0xd2,
+ [SDLK_HOME] = 0xc7,
+ [SDLK_END] = 0xcf,
+ [SDLK_PAGEUP] = 0xc9,
+ [SDLK_PAGEDOWN] = 0xd1,
+ [SDLK_DELETE] = 0xd3,
+};
+
+static uint8_t sdl_keyevent_to_keycode(const SDL_KeyboardEvent *ev)
+{
+ return scancodes[ev->keysym.sym];
+}
+
+#elif defined(_WIN32)
+
+static uint8_t sdl_keyevent_to_keycode(const SDL_KeyboardEvent *ev)
+{
+ return ev->keysym.scancode;
+}
+
+#else
+
+static const uint8_t x_keycode_to_pc_keycode[61] = {
+ 0xc7, /* 97 Home */
+ 0xc8, /* 98 Up */
+ 0xc9, /* 99 PgUp */
+ 0xcb, /* 100 Left */
+ 0x4c, /* 101 KP-5 */
+ 0xcd, /* 102 Right */
+ 0xcf, /* 103 End */
+ 0xd0, /* 104 Down */
+ 0xd1, /* 105 PgDn */
+ 0xd2, /* 106 Ins */
+ 0xd3, /* 107 Del */
+ 0x9c, /* 108 Enter */
+ 0x9d, /* 109 Ctrl-R */
+ 0x0, /* 110 Pause */
+ 0xb7, /* 111 Print */
+ 0xb5, /* 112 Divide */
+ 0xb8, /* 113 Alt-R */
+ 0xc6, /* 114 Break */
+ 0x0, /* 115 */
+ 0x0, /* 116 */
+ 0x0, /* 117 */
+ 0x0, /* 118 */
+ 0x0, /* 119 */
+ 0x70, /* 120 Hiragana_Katakana */
+ 0x0, /* 121 */
+ 0x0, /* 122 */
+ 0x73, /* 123 backslash */
+ 0x0, /* 124 */
+ 0x0, /* 125 */
+ 0x0, /* 126 */
+ 0x0, /* 127 */
+ 0x0, /* 128 */
+ 0x79, /* 129 Henkan */
+ 0x0, /* 130 */
+ 0x7b, /* 131 Muhenkan */
+ 0x0, /* 132 */
+ 0x7d, /* 133 Yen */
+ 0x0, /* 134 */
+ 0x0, /* 135 */
+ 0x47, /* 136 KP_7 */
+ 0x48, /* 137 KP_8 */
+ 0x49, /* 138 KP_9 */
+ 0x4b, /* 139 KP_4 */
+ 0x4c, /* 140 KP_5 */
+ 0x4d, /* 141 KP_6 */
+ 0x4f, /* 142 KP_1 */
+ 0x50, /* 143 KP_2 */
+ 0x51, /* 144 KP_3 */
+ 0x52, /* 145 KP_0 */
+ 0x53, /* 146 KP_. */
+ 0x47, /* 147 KP_HOME */
+ 0x48, /* 148 KP_UP */
+ 0x49, /* 149 KP_PgUp */
+ 0x4b, /* 150 KP_Left */
+ 0x4c, /* 151 KP_ */
+ 0x4d, /* 152 KP_Right */
+ 0x4f, /* 153 KP_End */
+ 0x50, /* 154 KP_Down */
+ 0x51, /* 155 KP_PgDn */
+ 0x52, /* 156 KP_Ins */
+ 0x53, /* 157 KP_Del */
+};
+
+static uint8_t sdl_keyevent_to_keycode(const SDL_KeyboardEvent *ev)
+{
+ int keycode;
+
+ keycode = ev->keysym.scancode;
+
+ if (keycode < 9) {
+ keycode = 0;
+ } else if (keycode < 97) {
+ keycode -= 8; /* just an offset */
+ } else if (keycode < 158) {
+ /* use conversion table */
+ keycode = x_keycode_to_pc_keycode[keycode - 97];
+ } else {
+ keycode = 0;
+ }
+ return keycode;
+}
+
+#endif
+
+static void reset_keys(void)
+{
+ int i;
+ for(i = 0; i < 256; i++) {
+ if (modifiers_state[i]) {
+ if (i & 0x80)
+ kbd_put_keycode(0xe0);
+ kbd_put_keycode(i | 0x80);
+ modifiers_state[i] = 0;
+ }
+ }
+}
+
+static void sdl_process_key(SDL_KeyboardEvent *ev)
+{
+ int keycode, v;
+
+ if(kbd_layout)
+ keycode=keysym2scancode(kbd_layout, ev->keysym.sym);
+ else {
+
+ if (ev->keysym.sym == SDLK_PAUSE) {
+ /* specific case */
+ v = 0;
+ if (ev->type == SDL_KEYUP)
+ v |= 0x80;
+ kbd_put_keycode(0xe1);
+ kbd_put_keycode(0x1d | v);
+ kbd_put_keycode(0x45 | v);
+ return;
+ }
+
+ /* XXX: not portable, but avoids complicated mappings */
+ keycode = sdl_keyevent_to_keycode(ev);
+
+ switch(keycode) {
+ case 0x00:
+ /* sent when leaving window: reset the modifiers state */
+ reset_keys();
+ return;
+ case 0x2a: /* Left Shift */
+ case 0x36: /* Right Shift */
+ case 0x1d: /* Left CTRL */
+ case 0x9d: /* Right CTRL */
+ case 0x38: /* Left ALT */
+ case 0xb8: /* Right ALT */
+ if (ev->type == SDL_KEYUP)
+ modifiers_state[keycode] = 0;
+ else
+ modifiers_state[keycode] = 1;
+ break;
+ case 0x45: /* num lock */
+ case 0x3a: /* caps lock */
+ /* SDL does not send the key up event, so we generate it */
+ kbd_put_keycode(keycode);
+ kbd_put_keycode(keycode | 0x80);
+ return;
+ }
+ }
+
+ /* now send the key code */
+ if (keycode & 0x80)
+ kbd_put_keycode(0xe0);
+ if (ev->type == SDL_KEYUP)
+ kbd_put_keycode(keycode | 0x80);
+ else
+ kbd_put_keycode(keycode & 0x7f);
+}
+
+static void sdl_update_caption(void)
+{
+ char buf[1024];
+ strcpy(buf, "VTXen");
+ if (!vm_running) {
+ strcat(buf, " [Stopped]");
+ }
+ if (gui_grab) {
+ strcat(buf, " - Press Ctrl-Alt to exit grab");
+ }
+ SDL_WM_SetCaption(buf, "VTXen");
+}
+
+static void sdl_grab_start(void)
+{
+ SDL_ShowCursor(0);
+ SDL_WM_GrabInput(SDL_GRAB_ON);
+ /* dummy read to avoid moving the mouse */
+ SDL_GetRelativeMouseState(NULL, NULL);
+ gui_grab = 1;
+ sdl_update_caption();
+}
+
+static void sdl_grab_end(void)
+{
+ SDL_WM_GrabInput(SDL_GRAB_OFF);
+ SDL_ShowCursor(1);
+ gui_grab = 0;
+ sdl_update_caption();
+}
+
+static void sdl_send_mouse_event(void)
+{
+ int dx, dy, dz, state, buttons;
+ state = SDL_GetRelativeMouseState(&dx, &dy);
+ buttons = 0;
+ if (state & SDL_BUTTON(SDL_BUTTON_LEFT))
+ buttons |= MOUSE_EVENT_LBUTTON;
+ if (state & SDL_BUTTON(SDL_BUTTON_RIGHT))
+ buttons |= MOUSE_EVENT_RBUTTON;
+ if (state & SDL_BUTTON(SDL_BUTTON_MIDDLE))
+ buttons |= MOUSE_EVENT_MBUTTON;
+ /* XXX: test wheel */
+ dz = 0;
+#ifdef SDL_BUTTON_WHEELUP
+ if (state & SDL_BUTTON(SDL_BUTTON_WHEELUP))
+ dz--;
+ if (state & SDL_BUTTON(SDL_BUTTON_WHEELDOWN))
+ dz++;
+#endif
+ kbd_mouse_event(dx, dy, dz, buttons);
+}
+
+static void toggle_full_screen(DisplayState *ds)
+{
+ gui_fullscreen = !gui_fullscreen;
+ sdl_resize(ds, screen->w, screen->h);
+ if (gui_fullscreen) {
+ gui_saved_grab = gui_grab;
+ sdl_grab_start();
+ } else {
+ if (!gui_saved_grab)
+ sdl_grab_end();
+ }
+ vga_invalidate_display();
+ vga_update_display();
+}
+
+static void sdl_refresh(DisplayState *ds)
+{
+ SDL_Event ev1, *ev = &ev1;
+ int mod_state;
+
+ if (last_vm_running != vm_running) {
+ last_vm_running = vm_running;
+ sdl_update_caption();
+ }
+
+ if (is_active_console(vga_console))
+ vga_update_display();
+
+ while (SDL_PollEvent(ev)) {
+ switch (ev->type) {
+ case SDL_VIDEOEXPOSE:
+ sdl_update(ds, 0, 0, screen->w, screen->h);
+ break;
+ case SDL_KEYDOWN:
+ case SDL_KEYUP:
+ if (ev->type == SDL_KEYDOWN) {
+ mod_state = (SDL_GetModState() & gui_grab_code) ==
+ gui_grab_code;
+ gui_key_modifier_pressed = mod_state;
+ if (gui_key_modifier_pressed) {
+ int keycode;
+ keycode = sdl_keyevent_to_keycode(&ev->key);
+ switch(keycode) {
+ case 0x21: /* 'f' key on US keyboard */
+ toggle_full_screen(ds);
+ gui_keysym = 1;
+ break;
+ case 0x02 ... 0x0a: /* '1' to '9' keys */
+ console_select(keycode - 0x02);
+ if (is_active_console(vga_console)) {
+ /* tell the vga console to redisplay itself */
+ vga_invalidate_display();
+ } else {
+ /* display grab if going to a text console */
+ if (gui_grab)
+ sdl_grab_end();
+ }
+ gui_keysym = 1;
+ break;
+ default:
+ break;
+ }
+ } else if (!is_active_console(vga_console)) {
+ int keysym;
+ keysym = 0;
+ if (ev->key.keysym.mod & (KMOD_LCTRL | KMOD_RCTRL)) {
+ switch(ev->key.keysym.sym) {
+ case SDLK_UP: keysym = QEMU_KEY_CTRL_UP; break;
+ case SDLK_DOWN: keysym = QEMU_KEY_CTRL_DOWN; break;
+ case SDLK_LEFT: keysym = QEMU_KEY_CTRL_LEFT; break;
+ case SDLK_RIGHT: keysym = QEMU_KEY_CTRL_RIGHT; break;
+ case SDLK_HOME: keysym = QEMU_KEY_CTRL_HOME; break;
+ case SDLK_END: keysym = QEMU_KEY_CTRL_END; break;
+ case SDLK_PAGEUP: keysym = QEMU_KEY_CTRL_PAGEUP; break;
+ case SDLK_PAGEDOWN: keysym = QEMU_KEY_CTRL_PAGEDOWN; break;
+ default: break;
+ }
+ } else {
+ switch(ev->key.keysym.sym) {
+ case SDLK_UP: keysym = QEMU_KEY_UP; break;
+ case SDLK_DOWN: keysym = QEMU_KEY_DOWN; break;
+ case SDLK_LEFT: keysym = QEMU_KEY_LEFT; break;
+ case SDLK_RIGHT: keysym = QEMU_KEY_RIGHT; break;
+ case SDLK_HOME: keysym = QEMU_KEY_HOME; break;
+ case SDLK_END: keysym = QEMU_KEY_END; break;
+ case SDLK_PAGEUP: keysym = QEMU_KEY_PAGEUP; break;
+ case SDLK_PAGEDOWN: keysym = QEMU_KEY_PAGEDOWN; break;
+ case SDLK_BACKSPACE: keysym = QEMU_KEY_BACKSPACE; break; case SDLK_DELETE: keysym = QEMU_KEY_DELETE; break;
+ default: break;
+ }
+ }
+ if (keysym) {
+ kbd_put_keysym(keysym);
+ } else if (ev->key.keysym.unicode != 0) {
+ kbd_put_keysym(ev->key.keysym.unicode);
+ }
+ }
+ } else if (ev->type == SDL_KEYUP) {
+ mod_state = (ev->key.keysym.mod & gui_grab_code);
+ if (!mod_state) {
+ if (gui_key_modifier_pressed) {
+ gui_key_modifier_pressed = 0;
+ if (gui_keysym == 0) {
+ /* exit/enter grab if pressing Ctrl-Alt */
+ if (!gui_grab)
+ sdl_grab_start();
+ else
+ sdl_grab_end();
+ /* SDL does not send back all the
+ modifiers key, so we must correct it */
+ reset_keys();
+ break;
+ }
+ gui_keysym = 0;
+ }
+ }
+ }
+ if (is_active_console(vga_console))
+ sdl_process_key(&ev->key);
+ break;
+ case SDL_QUIT:
+ qemu_system_shutdown_request();
+ break;
+ case SDL_MOUSEMOTION:
+ if (gui_grab) {
+ sdl_send_mouse_event();
+ }
+ break;
+ case SDL_MOUSEBUTTONDOWN:
+ case SDL_MOUSEBUTTONUP:
+ {
+ SDL_MouseButtonEvent *bev = &ev->button;
+ if (!gui_grab) {
+ if (ev->type == SDL_MOUSEBUTTONDOWN &&
+ (bev->state & SDL_BUTTON_LMASK)) {
+ /* start grabbing all events */
+ sdl_grab_start();
+ }
+ } else {
+ sdl_send_mouse_event();
+ }
+ }
+ break;
+ case SDL_ACTIVEEVENT:
+ if (gui_grab && (ev->active.gain & SDL_ACTIVEEVENTMASK) == 0 &&
+ !gui_fullscreen_initial_grab) {
+ sdl_grab_end();
+ }
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void sdl_cleanup(void)
+{
+ SDL_Quit();
+}
+
+void sdl_display_init(DisplayState *ds, int full_screen)
+{
+ int flags;
+
+ if(keyboard_layout)
+ kbd_layout=init_keyboard_layout(keyboard_layout);
+
+ flags = SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE;
+ if (SDL_Init (flags)) {
+ fprintf(stderr, "Could not initialize SDL - exiting\n");
+ exit(1);
+ }
+#ifndef _WIN32
+ /* NOTE: we still want Ctrl-C to work, so we undo the SDL redirections */
+ signal(SIGINT, SIG_DFL);
+ signal(SIGQUIT, SIG_DFL);
+#endif
+
+ ds->dpy_update = sdl_update;
+ ds->dpy_resize = sdl_resize;
+ ds->dpy_refresh = sdl_refresh;
+
+ sdl_resize(ds, 640, 400);
+ sdl_update_caption();
+ SDL_EnableKeyRepeat(250, 50);
+ SDL_EnableUNICODE(1);
+ gui_grab = 0;
+
+ atexit(sdl_cleanup);
+ if (full_screen) {
+ gui_fullscreen = 1;
+ gui_fullscreen_initial_grab = 1;
+ sdl_grab_start();
+ }
+}
diff --git a/tools/ioemu/target-i386-dm/Makefile b/tools/ioemu/target-i386-dm/Makefile
new file mode 100644
index 0000000000..bfe8befc25
--- /dev/null
+++ b/tools/ioemu/target-i386-dm/Makefile
@@ -0,0 +1,399 @@
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+include config.mak
+override TARGET_ARCH=i386
+
+#assume we directly put qemu code in tools/, same level as bochs dm(ioemu)
+XEN_PATH=../../..
+TARGET_PATH=$(SRC_PATH)/target-$(TARGET_ARCH)
+VPATH=$(SRC_PATH):$(TARGET_PATH):$(SRC_PATH)/hw:$(SRC_PATH)/audio
+DEFINES=-I. -I$(TARGET_PATH) -I$(SRC_PATH) -I$(XEN_PATH)/xen/include/public
+DEFINES+= -I$(XEN_PATH)/tools/libxc
+ifdef CONFIG_USER_ONLY
+VPATH+=:$(SRC_PATH)/linux-user
+DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH)
+endif
+CFLAGS=-Wall -O2 -g -fno-strict-aliasing
+LDFLAGS=-g
+LIBS=
+HELPER_CFLAGS=$(CFLAGS)
+DYNGEN=../dyngen$(EXESUF)
+# user emulator name
+QEMU_USER=qemu-$(TARGET_ARCH)
+# system emulator name
+ifdef CONFIG_SOFTMMU
+ifeq ($(TARGET_ARCH), i386)
+QEMU_SYSTEM=qemu$(EXESUF)
+else
+QEMU_SYSTEM=qemu-system-$(TARGET_ARCH)$(EXESUF)
+endif
+else
+QEMU_SYSTEM=qemu-fast
+endif
+
+QEMU_SYSTEM=qemu-dm
+PROGS=$(QEMU_SYSTEM)
+
+ifdef CONFIG_USER_ONLY
+PROGS=$(QEMU_USER)
+else
+ifeq ($(TARGET_ARCH), i386)
+
+ifeq ($(ARCH), i386)
+PROGS+=$(QEMU_SYSTEM)
+ifndef CONFIG_SOFTMMU
+CONFIG_STATIC=y
+endif
+else
+# the system emulator using soft mmu is portable
+ifdef CONFIG_SOFTMMU
+PROGS+=$(QEMU_SYSTEM)
+endif
+endif # ARCH != i386
+
+endif # TARGET_ARCH = i386
+
+ifeq ($(TARGET_ARCH), ppc)
+
+ifeq ($(ARCH), ppc)
+PROGS+=$(QEMU_SYSTEM)
+endif
+
+ifeq ($(ARCH), i386)
+ifdef CONFIG_SOFTMMU
+PROGS+=$(QEMU_SYSTEM)
+endif
+endif # ARCH = i386
+
+ifeq ($(ARCH), amd64)
+ifdef CONFIG_SOFTMMU
+PROGS+=$(QEMU_SYSTEM)
+endif
+endif # ARCH = amd64
+
+endif # TARGET_ARCH = ppc
+
+ifeq ($(TARGET_ARCH), sparc)
+
+ifeq ($(ARCH), ppc)
+PROGS+=$(QEMU_SYSTEM)
+endif
+
+ifeq ($(ARCH), i386)
+ifdef CONFIG_SOFTMMU
+PROGS+=$(QEMU_SYSTEM)
+endif
+endif # ARCH = i386
+
+ifeq ($(ARCH), amd64)
+ifdef CONFIG_SOFTMMU
+PROGS+=$(QEMU_SYSTEM)
+endif
+endif # ARCH = amd64
+
+endif # TARGET_ARCH = sparc
+endif # !CONFIG_USER_ONLY
+
+ifdef CONFIG_STATIC
+LDFLAGS+=-static
+endif
+
+ifeq ($(ARCH),i386)
+CFLAGS+=-fomit-frame-pointer
+OP_CFLAGS=$(CFLAGS) -mpreferred-stack-boundary=2
+ifeq ($(HAVE_GCC3_OPTIONS),yes)
+OP_CFLAGS+= -falign-functions=0 -fno-gcse
+else
+OP_CFLAGS+= -malign-functions=0
+endif
+
+ifdef TARGET_GPROF
+USE_I386_LD=y
+endif
+ifdef CONFIG_STATIC
+USE_I386_LD=y
+endif
+ifdef USE_I386_LD
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/i386.ld
+else
+# WARNING: this LDFLAGS is _very_ tricky : qemu is an ELF shared object
+# that the kernel ELF loader considers as an executable. I think this
+# is the simplest way to make it self virtualizable!
+LDFLAGS+=-Wl,-shared
+endif
+endif
+
+ifeq ($(ARCH),amd64)
+OP_CFLAGS=$(CFLAGS) -falign-functions=0
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/amd64.ld
+endif
+
+ifeq ($(ARCH),ppc)
+CFLAGS+= -D__powerpc__
+OP_CFLAGS=$(CFLAGS)
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/ppc.ld
+endif
+
+ifeq ($(ARCH),s390)
+OP_CFLAGS=$(CFLAGS)
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/s390.ld
+endif
+
+ifeq ($(ARCH),sparc)
+CFLAGS+=-m32 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
+LDFLAGS+=-m32
+OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
+HELPER_CFLAGS=$(CFLAGS) -ffixed-i0 -mflat
+# -static is used to avoid g1/g3 usage by the dynamic linker
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/sparc.ld -static
+endif
+
+ifeq ($(ARCH),sparc64)
+CFLAGS+=-m64 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
+LDFLAGS+=-m64
+OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
+endif
+
+ifeq ($(ARCH),alpha)
+# -msmall-data is not used because we want two-instruction relocations
+# for the constant constructions
+OP_CFLAGS=-Wall -O2 -g
+# Ensure there's only a single GP
+CFLAGS += -msmall-data
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/alpha.ld
+endif
+
+ifeq ($(ARCH),ia64)
+OP_CFLAGS=$(CFLAGS)
+endif
+
+ifeq ($(ARCH),arm)
+OP_CFLAGS=$(CFLAGS) -mno-sched-prolog
+LDFLAGS+=-Wl,-T,$(SRC_PATH)/arm.ld
+endif
+
+ifeq ($(ARCH),m68k)
+OP_CFLAGS=$(CFLAGS) -fomit-frame-pointer
+LDFLAGS+=-Wl,-T,m68k.ld
+endif
+
+ifeq ($(HAVE_GCC3_OPTIONS),yes)
+# very important to generate a return at the end of every operation
+OP_CFLAGS+=-fno-reorder-blocks -fno-optimize-sibling-calls
+endif
+
+ifeq ($(CONFIG_DARWIN),yes)
+OP_CFLAGS+= -mdynamic-no-pic
+endif
+
+#########################################################
+
+DEFINES+=-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
+LIBS+=-lm -L../../libxc -lxc
+ifndef CONFIG_USER_ONLY
+LIBS+=-lz
+endif
+ifdef CONFIG_WIN32
+LIBS+=-lwinmm -lws2_32 -liphlpapi
+endif
+
+# profiling code
+ifdef TARGET_GPROF
+LDFLAGS+=-p
+main.o: CFLAGS+=-p
+endif
+
+OBJS= elfload.o main.o syscall.o mmap.o signal.o path.o osdep.o thunk.o
+ifeq ($(TARGET_ARCH), i386)
+OBJS+= vm86.o
+endif
+ifeq ($(TARGET_ARCH), arm)
+OBJS+=nwfpe/softfloat.o nwfpe/fpa11.o nwfpe/fpa11_cpdo.o \
+nwfpe/fpa11_cpdt.o nwfpe/fpa11_cprt.o nwfpe/fpopcode.o nwfpe/single_cpdo.o \
+ nwfpe/double_cpdo.o nwfpe/extended_cpdo.o
+endif
+SRCS:= $(OBJS:.o=.c)
+OBJS+= libqemu.a
+
+# cpu emulator library
+LIBOBJS=
+
+ifeq ($(TARGET_ARCH), i386)
+LIBOBJS+= helper2.o
+ifeq ($(ARCH), i386)
+LIBOBJS+=translate-copy.o
+endif
+endif
+
+ifeq ($(TARGET_ARCH), ppc)
+LIBOBJS+= op_helper.o helper.o
+endif
+
+ifeq ($(TARGET_ARCH), sparc)
+LIBOBJS+= op_helper.o helper.o
+endif
+
+all: $(PROGS)
+
+$(QEMU_USER): $(OBJS)
+ $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+ifeq ($(ARCH),alpha)
+# Mark as 32 bit binary, i. e. it will be mapped into the low 31 bit of
+# the address space (31 bit so sign extending doesn't matter)
+ echo -ne '\001\000\000\000' | dd of=qemu bs=1 seek=48 count=4 conv=notrunc
+endif
+
+# must use static linking to avoid leaving stuff in virtual address space
+VL_OBJS=vl.o exec.o monitor.o osdep.o block.o readline.o pci.o console.o
+#VL_OBJS+=block-cow.o block-qcow.o block-vmdk.o block-cloop.o
+VL_OBJS+= block-cloop.o
+
+SOUND_HW = sb16.o
+AUDIODRV = audio.o noaudio.o wavaudio.o
+ifdef CONFIG_SDL
+AUDIODRV += sdlaudio.o
+endif
+ifdef CONFIG_OSS
+AUDIODRV += ossaudio.o
+endif
+
+pc.o: DEFINES := -DUSE_SB16 $(DEFINES)
+
+ifdef CONFIG_ADLIB
+SOUND_HW += fmopl.o adlib.o
+endif
+
+ifdef CONFIG_FMOD
+AUDIODRV += fmodaudio.o
+audio.o fmodaudio.o: DEFINES := -I$(CONFIG_FMOD_INC) $(DEFINES)
+LIBS += $(CONFIG_FMOD_LIB)
+endif
+
+# Hardware support
+VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o
+VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pc.o port-e9.o
+VL_OBJS+= cirrus_vga.o
+
+ifeq ($(TARGET_ARCH), ppc)
+VL_OBJS+= ppc.o ide.o ne2000.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
+VL_OBJS+= mc146818rtc.o serial.o i8259.o i8254.o fdc.o m48t59.o
+VL_OBJS+= ppc_prep.o ppc_chrp.o cuda.o adb.o openpic.o mixeng.o
+endif
+ifeq ($(TARGET_ARCH), sparc)
+VL_OBJS+= sun4m.o tcx.o lance.o iommu.o sched.o m48t08.o magic-load.o timer.o
+endif
+ifdef CONFIG_GDBSTUB
+VL_OBJS+=gdbstub.o
+endif
+ifdef CONFIG_VNC
+VL_OBJS+=vnc.o
+endif
+ifdef CONFIG_SDL
+VL_OBJS+=sdl.o
+endif
+ifdef CONFIG_SLIRP
+DEFINES+=-I$(SRC_PATH)/slirp
+SLIRP_OBJS=cksum.o if.o ip_icmp.o ip_input.o ip_output.o \
+slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o \
+tcp_subr.o tcp_timer.o udp.o bootp.o debug.o tftp.o
+VL_OBJS+=$(addprefix slirp/, $(SLIRP_OBJS))
+endif
+
+VL_LDFLAGS=
+# specific flags are needed for non soft mmu emulator
+ifdef CONFIG_STATIC
+VL_LDFLAGS+=-static
+endif
+ifeq ($(XEN_TARGET_ARCH),x86_32)
+VL_LDFLAGS+=-Wl,-T,$(SRC_PATH)/$(XEN_TARGET_ARCH).ld
+endif
+ifndef CONFIG_DARWIN
+ifndef CONFIG_WIN32
+VL_LIBS=-lutil
+endif
+endif
+
+$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
+ $(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(VNC_LIBS) $(VL_LIBS)
+
+vnc.o: vnc.c keyboard_rdesktop.c
+ $(CC) $(CFLAGS) $(DEFINES) $(VNC_CFLAGS) -c -o $@ $<
+
+sdl.o: sdl.c keyboard_rdesktop.c
+ $(CC) $(CFLAGS) $(DEFINES) $(SDL_CFLAGS) -c -o $@ $<
+
+sdlaudio.o: sdlaudio.c
+ $(CC) $(CFLAGS) $(DEFINES) $(SDL_CFLAGS) -c -o $@ $<
+
+depend: $(SRCS)
+ $(CC) -MM $(CFLAGS) $(DEFINES) $^ 1>.depend
+
+# libqemu
+
+libqemu.a: $(LIBOBJS)
+ rm -f $@
+ $(AR) rcs $@ $(LIBOBJS)
+
+translate.o: translate.c gen-op.h opc.h cpu.h
+
+translate-all.o: translate-all.c op.h opc.h cpu.h
+
+op.h: op.o $(DYNGEN)
+ $(DYNGEN) -o $@ $<
+
+opc.h: op.o $(DYNGEN)
+ $(DYNGEN) -c -o $@ $<
+
+gen-op.h: op.o $(DYNGEN)
+ $(DYNGEN) -g -o $@ $<
+
+op.o: op.c
+ $(CC) $(OP_CFLAGS) $(DEFINES) -c -o $@ $<
+
+helper.o: helper.c
+ $(CC) $(HELPER_CFLAGS) $(DEFINES) -c -o $@ $<
+
+ifeq ($(TARGET_ARCH), i386)
+op.o: op.c opreg_template.h ops_template.h ops_template_mem.h ops_mem.h
+endif
+
+ifeq ($(TARGET_ARCH), arm)
+op.o: op.c op_template.h
+endif
+
+ifeq ($(TARGET_ARCH), sparc)
+op.o: op.c op_template.h op_mem.h
+endif
+
+ifeq ($(TARGET_ARCH), ppc)
+op.o: op.c op_template.h op_mem.h
+op_helper.o: op_helper_mem.h
+endif
+
+mixeng.o: mixeng.c mixeng.h mixeng_template.h
+
+%.o: %.c
+ $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $<
+
+%.o: %.S
+ $(CC) $(DEFINES) -c -o $@ $<
+
+clean:
+ rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp qemu-vgaram-bin
+
+distclean:
+ rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp qemu-vgaram-bin
+
+install: all
+ if [ ! -d $(DESTDIR)$(bindir) ];then mkdir -p $(DESTDIR)$(bindir);fi
+ if [ ! -d $(DESTDIR)$(configdir) ];then mkdir -p $(DESTDIR)$(configdir);fi
+ifneq ($(PROGS),)
+ install -m 755 -s $(PROGS) "$(DESTDIR)$(bindir)"
+endif
+ install -m 755 device-model "$(DESTDIR)$(bindir)"
+ install -m 755 qemu-ifup "$(DESTDIR)$(configdir)"
+ gunzip -c qemu-vgaram-bin.gz >qemu-vgaram-bin
+ install -m 755 qemu-vgaram-bin "$(DESTDIR)$(configdir)"
+ifneq ($(wildcard .depend),)
+include .depend
+endif
diff --git a/tools/ioemu/target-i386-dm/device-model b/tools/ioemu/target-i386-dm/device-model
new file mode 100755
index 0000000000..a7f5e3838f
--- /dev/null
+++ b/tools/ioemu/target-i386-dm/device-model
@@ -0,0 +1,91 @@
+#!/bin/sh
+
+. /etc/rc.d/init.d/functions
+
+qemubin=/usr/bin/qemu-dm
+
+ulimit -c unlimited
+
+# Use this for debugging:
+#gdb --args /usr/sbin/qemu-dm -hda /var/images/qemu-linux.img -nographic \
+# -serial pty -l 'ioport,int' $*
+
+# XXX this is a bit skanky. we assume an order of arguments here.
+# namely to have configfile and vncconnect argument as the first
+# two arguments.
+
+while getopts ":f:v:d:" opt;
+do
+ case $opt in
+ f) QEMUCONFIGFILE=$OPTARG;;
+ v) VNCCONNECT=$OPTARG;;
+ d) DOMAIN=$OPTARG;;
+ \?) echo;;
+ esac
+done
+if [ "x$QEMUCONFIGFILE" != "x" ]; then shift; shift; fi
+if [ "x$VNCCONNECT" != "x" ]; then shift; shift; fi
+
+
+echo $QEMUCONFIGFILE
+if [ ! -z $QEMUCONFIGFILE ];then
+ . $QEMUCONFIGFILE
+else
+ echo "no config file specified!" > /dev/tty
+ echo "no config file specified!" >> /tmp/qemustart.log
+ exit
+fi
+
+PARMETER=""
+
+if [ ! -z $hda ];then
+PARMETER="$PARMETER -hda $hda"
+fi
+
+if [ ! -z $hdb ];then
+PARMETER="$PARMETER -hdb $hdb"
+fi
+
+if [ ! -z $hdc ];then
+PARMETER="$PARMETER -hdc $hdc"
+fi
+
+if [ ! -z $hdd ];then
+PARMETER="$PARMETER -hdd $hdd"
+fi
+
+if [ ! -z $cdrom ];then
+PARMETER="$PARMETER -cdrom $cdrom"
+fi
+
+if [ ! -z $boot ];then
+PARMETER="$PARMETER -boot $boot"
+fi
+
+if [ ! -z $nographic ] && [ $nographic -eq 1 ];then
+PARMETER="$PARMETER -nographic"
+fi
+
+vnc=${vnc:=1}
+sdl=${sdl:=0}
+if qemu-dm 2>&1 |grep vnc > /dev/null;then
+ if [ $vnc -eq 1 ] && [ $sdl -eq 1 ];then
+ PARMETER="$PARMETER -vnc-and-sdl -k en-us"
+ elif [ $vnc -eq 1 ];then
+ PARMETER="$PARMETER -vnc -k en-us"
+ fi
+ VNCPORT=`expr 5900 + $DOMAIN`
+ PARMETER="$PARMETER -vncport $VNCPORT"
+ if [ "x$VNCCONNECT" != "x" ]; then
+ PARMETER="$PARMETER -vncconnect $VNCCONNECT"
+ fi
+fi
+
+#optional cmdline for qemu
+# -nographic \
+# -serial pty \
+
+
+PARMETER="$PARMETER -l int $*";
+echo "$qemubin $PARMETER" >>/tmp/qemustart.log
+$qemubin $PARMETER &
diff --git a/tools/ioemu/target-i386-dm/helper2.c b/tools/ioemu/target-i386-dm/helper2.c
new file mode 100644
index 0000000000..6ac4349fc5
--- /dev/null
+++ b/tools/ioemu/target-i386-dm/helper2.c
@@ -0,0 +1,415 @@
+/*
+ * i386 helpers (without register variable usage)
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Main cpu loop for handling I/O requests coming from a virtual machine
+ * Copyright © 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <assert.h>
+
+#include <limits.h>
+#include <fcntl.h>
+
+#include "xc.h"
+#include <io/ioreq.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+
+void *shared_page;
+
+CPUX86State *cpu_86_init(void)
+{
+ CPUX86State *env;
+ static int inited;
+
+ cpu_exec_init();
+
+ env = malloc(sizeof(CPUX86State));
+ if (!env)
+ return NULL;
+ memset(env, 0, sizeof(CPUX86State));
+ /* init various static tables */
+ if (!inited) {
+ inited = 1;
+ }
+ cpu_single_env = env;
+ cpu_reset(env);
+ return env;
+}
+
+/* NOTE: must be called outside the CPU execute loop */
+void cpu_reset(CPUX86State *env)
+{
+}
+
+void cpu_x86_close(CPUX86State *env)
+{
+ free(env);
+}
+
+
+void cpu_dump_state(CPUState *env, FILE *f,
+ int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+ int flags)
+{
+}
+
+/***********************************************************/
+/* x86 mmu */
+/* XXX: add PGE support */
+
+void cpu_x86_set_a20(CPUX86State *env, int a20_state)
+{
+ a20_state = (a20_state != 0);
+ if (a20_state != ((env->a20_mask >> 20) & 1)) {
+#if defined(DEBUG_MMU)
+ printf("A20 update: a20=%d\n", a20_state);
+#endif
+ env->a20_mask = 0xffefffff | (a20_state << 20);
+ }
+}
+
+target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+ return addr;
+}
+
+//the evtchn fd for polling
+int evtchn_fd = -1;
+//the evtchn port for polling the notification, should be inputed as bochs's parameter
+u16 ioreq_port = 0;
+
+void *shared_page = NULL;
+
+//some functions to handle the io req packet
+
+//get the ioreq packets from share mem
+ioreq_t* __cpu_get_ioreq(void)
+{
+ ioreq_t *req;
+ req = &((vcpu_iodata_t *) shared_page)->vp_ioreq;
+ if (req->state == STATE_IOREQ_READY) {
+ req->state = STATE_IOREQ_INPROCESS;
+ } else {
+ fprintf(logfile, "False I/O requrest ... in-service already: %x, pvalid: %x,port: %llx, data: %llx, count: %llx, size: %llx\n", req->state, req->pdata_valid, req->addr, req->u.data, req->count, req->size);
+ req = NULL;
+ }
+
+ return req;
+}
+
+//use poll to get the port notification
+//ioreq_vec--out,the
+//retval--the number of ioreq packet
+ioreq_t* cpu_get_ioreq(void)
+{
+ int rc;
+ u16 buf[2];
+ rc = read(evtchn_fd, buf, 2);
+ if (rc == 2 && buf[0] == ioreq_port){//got only one matched 16bit port index
+ // unmask the wanted port again
+ write(evtchn_fd, &ioreq_port, 2);
+
+ //get the io packet from shared memory
+ return __cpu_get_ioreq();
+ }
+
+ //read error or read nothing
+ return NULL;
+}
+
+unsigned long
+do_inp(CPUState *env, unsigned long addr, unsigned long size)
+{
+ switch(size) {
+ case 1:
+ return cpu_inb(env, addr);
+ case 2:
+ return cpu_inw(env, addr);
+ case 4:
+ return cpu_inl(env, addr);
+ default:
+ fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size);
+ exit(-1);
+ }
+}
+
+void
+do_outp(CPUState *env, unsigned long addr, unsigned long size,
+ unsigned long val)
+{
+ switch(size) {
+ case 1:
+ return cpu_outb(env, addr, val);
+ case 2:
+ return cpu_outw(env, addr, val);
+ case 4:
+ return cpu_outl(env, addr, val);
+ default:
+ fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size);
+ exit(-1);
+ }
+}
+
+extern void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+ int len, int is_write);
+
+static inline void
+read_physical(target_phys_addr_t addr, unsigned long size, void *val)
+{
+ return cpu_physical_memory_rw(addr, val, size, 0);
+}
+
+static inline void
+write_physical(target_phys_addr_t addr, unsigned long size, void *val)
+{
+ return cpu_physical_memory_rw(addr, val, size, 1);
+}
+
+//send the ioreq to device model
+void cpu_dispatch_ioreq(CPUState *env, ioreq_t *req)
+{
+ int i;
+ int sign;
+
+ sign = (req->df) ? -1 : 1;
+
+ if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) {
+ if (req->size != 4) {
+ // Bochs expects higher bits to be 0
+ req->u.data &= (1UL << (8 * req->size))-1;
+ }
+ }
+
+ if (req->port_mm == 0){//port io
+ if(req->dir == IOREQ_READ){//read
+ if (!req->pdata_valid) {
+ req->u.data = do_inp(env, req->addr, req->size);
+ } else {
+ unsigned long tmp;
+
+ for (i = 0; i < req->count; i++) {
+ tmp = do_inp(env, req->addr, req->size);
+ write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size),
+ req->size, &tmp);
+ }
+ }
+ } else if(req->dir == IOREQ_WRITE) {
+ if (!req->pdata_valid) {
+ do_outp(env, req->addr, req->size, req->u.data);
+ } else {
+ for (i = 0; i < req->count; i++) {
+ unsigned long tmp;
+
+ read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size,
+ &tmp);
+ do_outp(env, req->addr, req->size, tmp);
+ }
+ }
+
+ }
+ } else if (req->port_mm == 1){//memory map io
+ if (!req->pdata_valid) {
+ //handle stos
+ if(req->dir == IOREQ_READ) { //read
+ for (i = 0; i < req->count; i++) {
+ read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data);
+ }
+ } else if(req->dir == IOREQ_WRITE) { //write
+ for (i = 0; i < req->count; i++) {
+ write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data);
+ }
+ }
+ } else {
+ //handle movs
+ unsigned long tmp;
+ if (req->dir == IOREQ_READ) {
+ for (i = 0; i < req->count; i++) {
+ read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp);
+ write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp);
+ }
+ } else if (req->dir == IOREQ_WRITE) {
+ for (i = 0; i < req->count; i++) {
+ read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp);
+ write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp);
+ }
+ }
+ }
+ }
+ /* No state change if state = STATE_IORESP_HOOK */
+ if (req->state == STATE_IOREQ_INPROCESS)
+ req->state = STATE_IORESP_READY;
+ env->send_event = 1;
+}
+
+void
+cpu_handle_ioreq(CPUState *env)
+{
+ ioreq_t *req = cpu_get_ioreq();
+ if (req)
+ cpu_dispatch_ioreq(env, req);
+}
+
+void
+cpu_timer_handler(CPUState *env)
+{
+ cpu_handle_ioreq(env);
+}
+
+int xc_handle;
+
+static __inline__ void atomic_set_bit(long nr, volatile void *addr)
+{
+ __asm__ __volatile__(
+ "lock ; bts %1,%0"
+ :"=m" (*(volatile long *)addr)
+ :"dIr" (nr));
+}
+
+void
+do_interrupt(CPUState *env, int vector)
+{
+ unsigned long *intr;
+
+ // Send a message on the event channel. Add the vector to the shared mem
+ // page.
+
+ intr = &(((vcpu_iodata_t *) shared_page)->vp_intr[0]);
+ atomic_set_bit(vector, intr);
+ if (loglevel & CPU_LOG_INT)
+ fprintf(logfile, "injecting vector: %x\n", vector);
+ env->send_event = 1;
+}
+
+//static unsigned long tsc_per_tick = 1; /* XXX: calibrate */
+
+int main_loop(void)
+{
+ int vector;
+ fd_set rfds;
+ struct timeval tv;
+ extern CPUState *global_env;
+ extern int vm_running;
+ extern int shutdown_requested;
+ CPUState *env = global_env;
+ int retval;
+ extern void main_loop_wait(int);
+
+ /* Watch stdin (fd 0) to see when it has input. */
+ FD_ZERO(&rfds);
+
+ while (1) {
+ if (vm_running) {
+ if (shutdown_requested) {
+ break;
+ }
+ }
+
+ /* Wait up to one seconds. */
+ tv.tv_sec = 0;
+ tv.tv_usec = 100000;
+ FD_SET(evtchn_fd, &rfds);
+
+ env->send_event = 0;
+ retval = select(evtchn_fd+1, &rfds, NULL, NULL, &tv);
+ if (retval == -1) {
+ perror("select");
+ return 0;
+ }
+
+#if __WORDSIZE == 32
+#define ULONGLONG_MAX 0xffffffffffffffffULL
+#else
+#define ULONGLONG_MAX ULONG_MAX
+#endif
+
+ main_loop_wait(0);
+
+ cpu_timer_handler(env);
+ if (env->interrupt_request & CPU_INTERRUPT_HARD) {
+ env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+ vector = cpu_get_pic_interrupt(env);
+ do_interrupt(env, vector);
+ }
+
+ if (env->send_event) {
+ int ret;
+ ret = xc_evtchn_send(xc_handle, ioreq_port);
+ if (ret == -1) {
+ fprintf(logfile, "evtchn_send failed on port: %d\n", ioreq_port);
+ }
+ }
+ }
+ return 0;
+}
+
+CPUState *
+cpu_init()
+{
+ CPUX86State *env;
+
+ cpu_exec_init();
+
+ env = malloc(sizeof(CPUX86State));
+ if (!env)
+ return NULL;
+ memset(env, 0, sizeof(CPUX86State));
+
+ cpu_single_env = env;
+
+ if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
+ return NULL;
+
+ //use nonblock reading not polling, may change in future.
+ evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
+ if (evtchn_fd == -1) {
+ perror("open");
+ return NULL;
+ }
+
+ fprintf(logfile, "listening to port: %d\n", ioreq_port);
+ /*unmask the wanted port -- bind*/
+ if (ioctl(evtchn_fd, ('E'<<8)|2, ioreq_port) == -1) {
+ perror("ioctl");
+ return NULL;
+ }
+
+ return env;
+}
diff --git a/tools/ioemu/target-i386-dm/qemu-ifup b/tools/ioemu/target-i386-dm/qemu-ifup
new file mode 100755
index 0000000000..87bff3410c
--- /dev/null
+++ b/tools/ioemu/target-i386-dm/qemu-ifup
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+#. /etc/rc.d/init.d/functions
+#ulimit -c unlimited
+
+echo 'config qemu network with xen bridge for '
+echo $*
+
+ifconfig $1 0.0.0.0 up
+brctl addif xen-br0 $1
diff --git a/tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz b/tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz
new file mode 100644
index 0000000000..86e44ab486
--- /dev/null
+++ b/tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz
Binary files differ
diff --git a/tools/ioemu/thunk.c b/tools/ioemu/thunk.c
new file mode 100644
index 0000000000..2dbc378cd9
--- /dev/null
+++ b/tools/ioemu/thunk.c
@@ -0,0 +1,243 @@
+/*
+ * Generic thunking code to convert data between host and target CPU
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "qemu.h"
+#include "thunk.h"
+
+//#define DEBUG
+
+#define MAX_STRUCTS 128
+
+/* XXX: make it dynamic */
+StructEntry struct_entries[MAX_STRUCTS];
+
+static inline const argtype *thunk_type_next(const argtype *type_ptr)
+{
+ int type;
+
+ type = *type_ptr++;
+ switch(type) {
+ case TYPE_CHAR:
+ case TYPE_SHORT:
+ case TYPE_INT:
+ case TYPE_LONGLONG:
+ case TYPE_ULONGLONG:
+ case TYPE_LONG:
+ case TYPE_ULONG:
+ case TYPE_PTRVOID:
+ return type_ptr;
+ case TYPE_PTR:
+ return thunk_type_next(type_ptr);
+ case TYPE_ARRAY:
+ return thunk_type_next(type_ptr + 1);
+ case TYPE_STRUCT:
+ return type_ptr + 1;
+ default:
+ return NULL;
+ }
+}
+
+void thunk_register_struct(int id, const char *name, const argtype *types)
+{
+ const argtype *type_ptr;
+ StructEntry *se;
+ int nb_fields, offset, max_align, align, size, i, j;
+
+ se = struct_entries + id;
+
+ /* first we count the number of fields */
+ type_ptr = types;
+ nb_fields = 0;
+ while (*type_ptr != TYPE_NULL) {
+ type_ptr = thunk_type_next(type_ptr);
+ nb_fields++;
+ }
+ se->field_types = types;
+ se->nb_fields = nb_fields;
+ se->name = name;
+#ifdef DEBUG
+ printf("struct %s: id=%d nb_fields=%d\n",
+ se->name, id, se->nb_fields);
+#endif
+ /* now we can alloc the data */
+
+ for(i = 0;i < 2; i++) {
+ offset = 0;
+ max_align = 1;
+ se->field_offsets[i] = malloc(nb_fields * sizeof(int));
+ type_ptr = se->field_types;
+ for(j = 0;j < nb_fields; j++) {
+ size = thunk_type_size(type_ptr, i);
+ align = thunk_type_align(type_ptr, i);
+ offset = (offset + align - 1) & ~(align - 1);
+ se->field_offsets[i][j] = offset;
+ offset += size;
+ if (align > max_align)
+ max_align = align;
+ type_ptr = thunk_type_next(type_ptr);
+ }
+ offset = (offset + max_align - 1) & ~(max_align - 1);
+ se->size[i] = offset;
+ se->align[i] = max_align;
+#ifdef DEBUG
+ printf("%s: size=%d align=%d\n",
+ i == THUNK_HOST ? "host" : "target", offset, max_align);
+#endif
+ }
+}
+
+void thunk_register_struct_direct(int id, const char *name, StructEntry *se1)
+{
+ StructEntry *se;
+ se = struct_entries + id;
+ *se = *se1;
+ se->name = name;
+}
+
+
+/* now we can define the main conversion functions */
+const argtype *thunk_convert(void *dst, const void *src,
+ const argtype *type_ptr, int to_host)
+{
+ int type;
+
+ type = *type_ptr++;
+ switch(type) {
+ case TYPE_CHAR:
+ *(uint8_t *)dst = *(uint8_t *)src;
+ break;
+ case TYPE_SHORT:
+ *(uint16_t *)dst = tswap16(*(uint16_t *)src);
+ break;
+ case TYPE_INT:
+ *(uint32_t *)dst = tswap32(*(uint32_t *)src);
+ break;
+ case TYPE_LONGLONG:
+ case TYPE_ULONGLONG:
+ *(uint64_t *)dst = tswap64(*(uint64_t *)src);
+ break;
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+ case TYPE_LONG:
+ case TYPE_ULONG:
+ case TYPE_PTRVOID:
+ *(uint32_t *)dst = tswap32(*(uint32_t *)src);
+ break;
+#elif HOST_LONG_BITS == 64 && TARGET_LONG_BITS == 32
+ case TYPE_LONG:
+ case TYPE_ULONG:
+ case TYPE_PTRVOID:
+ if (to_host) {
+ *(uint64_t *)dst = tswap32(*(uint32_t *)src);
+ } else {
+ *(uint32_t *)dst = tswap32(*(uint64_t *)src & 0xffffffff);
+ }
+ break;
+#else
+#error unsupported conversion
+#endif
+ case TYPE_ARRAY:
+ {
+ int array_length, i, dst_size, src_size;
+ const uint8_t *s;
+ uint8_t *d;
+
+ array_length = *type_ptr++;
+ dst_size = thunk_type_size(type_ptr, to_host);
+ src_size = thunk_type_size(type_ptr, 1 - to_host);
+ d = dst;
+ s = src;
+ for(i = 0;i < array_length; i++) {
+ thunk_convert(d, s, type_ptr, to_host);
+ d += dst_size;
+ s += src_size;
+ }
+ type_ptr = thunk_type_next(type_ptr);
+ }
+ break;
+ case TYPE_STRUCT:
+ {
+ int i;
+ const StructEntry *se;
+ const uint8_t *s;
+ uint8_t *d;
+ const argtype *field_types;
+ const int *dst_offsets, *src_offsets;
+
+ se = struct_entries + *type_ptr++;
+ if (se->convert[0] != NULL) {
+ /* specific conversion is needed */
+ (*se->convert[to_host])(dst, src);
+ } else {
+ /* standard struct conversion */
+ field_types = se->field_types;
+ dst_offsets = se->field_offsets[to_host];
+ src_offsets = se->field_offsets[1 - to_host];
+ d = dst;
+ s = src;
+ for(i = 0;i < se->nb_fields; i++) {
+ field_types = thunk_convert(d + dst_offsets[i],
+ s + src_offsets[i],
+ field_types, to_host);
+ }
+ }
+ }
+ break;
+ default:
+ fprintf(stderr, "Invalid type 0x%x\n", type);
+ break;
+ }
+ return type_ptr;
+}
+
+/* from em86 */
+
+/* Utility function: Table-driven functions to translate bitmasks
+ * between X86 and Alpha formats...
+ */
+unsigned int target_to_host_bitmask(unsigned int x86_mask,
+ bitmask_transtbl * trans_tbl)
+{
+ bitmask_transtbl * btp;
+ unsigned int alpha_mask = 0;
+
+ for(btp = trans_tbl; btp->x86_mask && btp->alpha_mask; btp++) {
+ if((x86_mask & btp->x86_mask) == btp->x86_bits) {
+ alpha_mask |= btp->alpha_bits;
+ }
+ }
+ return(alpha_mask);
+}
+
+unsigned int host_to_target_bitmask(unsigned int alpha_mask,
+ bitmask_transtbl * trans_tbl)
+{
+ bitmask_transtbl * btp;
+ unsigned int x86_mask = 0;
+
+ for(btp = trans_tbl; btp->x86_mask && btp->alpha_mask; btp++) {
+ if((alpha_mask & btp->alpha_mask) == btp->alpha_bits) {
+ x86_mask |= btp->x86_bits;
+ }
+ }
+ return(x86_mask);
+}
diff --git a/tools/ioemu/thunk.h b/tools/ioemu/thunk.h
new file mode 100644
index 0000000000..42fd96f3a3
--- /dev/null
+++ b/tools/ioemu/thunk.h
@@ -0,0 +1,158 @@
+/*
+ * Generic thunking code to convert data between host and target CPU
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef THUNK_H
+#define THUNK_H
+
+#include <inttypes.h>
+#include "cpu.h"
+
+/* types enums definitions */
+
+typedef enum argtype {
+ TYPE_NULL,
+ TYPE_CHAR,
+ TYPE_SHORT,
+ TYPE_INT,
+ TYPE_LONG,
+ TYPE_ULONG,
+ TYPE_PTRVOID, /* pointer on unknown data */
+ TYPE_LONGLONG,
+ TYPE_ULONGLONG,
+ TYPE_PTR,
+ TYPE_ARRAY,
+ TYPE_STRUCT,
+} argtype;
+
+#define MK_PTR(type) TYPE_PTR, type
+#define MK_ARRAY(type, size) TYPE_ARRAY, size, type
+#define MK_STRUCT(id) TYPE_STRUCT, id
+
+#define THUNK_TARGET 0
+#define THUNK_HOST 1
+
+typedef struct {
+ /* standard struct handling */
+ const argtype *field_types;
+ int nb_fields;
+ int *field_offsets[2];
+ /* special handling */
+ void (*convert[2])(void *dst, const void *src);
+ int size[2];
+ int align[2];
+ const char *name;
+} StructEntry;
+
+/* Translation table for bitmasks... */
+typedef struct bitmask_transtbl {
+ unsigned int x86_mask;
+ unsigned int x86_bits;
+ unsigned int alpha_mask;
+ unsigned int alpha_bits;
+} bitmask_transtbl;
+
+void thunk_register_struct(int id, const char *name, const argtype *types);
+void thunk_register_struct_direct(int id, const char *name, StructEntry *se1);
+const argtype *thunk_convert(void *dst, const void *src,
+ const argtype *type_ptr, int to_host);
+#ifndef NO_THUNK_TYPE_SIZE
+
+extern StructEntry struct_entries[];
+
+static inline int thunk_type_size(const argtype *type_ptr, int is_host)
+{
+ int type, size;
+ const StructEntry *se;
+
+ type = *type_ptr;
+ switch(type) {
+ case TYPE_CHAR:
+ return 1;
+ case TYPE_SHORT:
+ return 2;
+ case TYPE_INT:
+ return 4;
+ case TYPE_LONGLONG:
+ case TYPE_ULONGLONG:
+ return 8;
+ case TYPE_LONG:
+ case TYPE_ULONG:
+ case TYPE_PTRVOID:
+ case TYPE_PTR:
+ if (is_host) {
+ return HOST_LONG_SIZE;
+ } else {
+ return TARGET_LONG_SIZE;
+ }
+ break;
+ case TYPE_ARRAY:
+ size = type_ptr[1];
+ return size * thunk_type_size(type_ptr + 2, is_host);
+ case TYPE_STRUCT:
+ se = struct_entries + type_ptr[1];
+ return se->size[is_host];
+ default:
+ return -1;
+ }
+}
+
+static inline int thunk_type_align(const argtype *type_ptr, int is_host)
+{
+ int type;
+ const StructEntry *se;
+
+ type = *type_ptr;
+ switch(type) {
+ case TYPE_CHAR:
+ return 1;
+ case TYPE_SHORT:
+ return 2;
+ case TYPE_INT:
+ return 4;
+ case TYPE_LONGLONG:
+ case TYPE_ULONGLONG:
+ return 8;
+ case TYPE_LONG:
+ case TYPE_ULONG:
+ case TYPE_PTRVOID:
+ case TYPE_PTR:
+ if (is_host) {
+ return HOST_LONG_SIZE;
+ } else {
+ return TARGET_LONG_SIZE;
+ }
+ break;
+ case TYPE_ARRAY:
+ return thunk_type_align(type_ptr + 2, is_host);
+ case TYPE_STRUCT:
+ se = struct_entries + type_ptr[1];
+ return se->align[is_host];
+ default:
+ return -1;
+ }
+}
+
+#endif /* NO_THUNK_TYPE_SIZE */
+
+unsigned int target_to_host_bitmask(unsigned int x86_mask,
+ bitmask_transtbl * trans_tbl);
+unsigned int host_to_target_bitmask(unsigned int alpha_mask,
+ bitmask_transtbl * trans_tbl);
+
+#endif
diff --git a/tools/ioemu/vgafont.h b/tools/ioemu/vgafont.h
new file mode 100644
index 0000000000..bb75796be5
--- /dev/null
+++ b/tools/ioemu/vgafont.h
@@ -0,0 +1,4611 @@
+static uint8_t vgafont16[256 * 16] = {
+
+ /* 0 0x00 '^@' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 1 0x01 '^A' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x81, /* 10000001 */
+ 0xa5, /* 10100101 */
+ 0x81, /* 10000001 */
+ 0x81, /* 10000001 */
+ 0xbd, /* 10111101 */
+ 0x99, /* 10011001 */
+ 0x81, /* 10000001 */
+ 0x81, /* 10000001 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 2 0x02 '^B' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0xff, /* 11111111 */
+ 0xdb, /* 11011011 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xc3, /* 11000011 */
+ 0xe7, /* 11100111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 3 0x03 '^C' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x6c, /* 01101100 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0x7c, /* 01111100 */
+ 0x38, /* 00111000 */
+ 0x10, /* 00010000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 4 0x04 '^D' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x7c, /* 01111100 */
+ 0xfe, /* 11111110 */
+ 0x7c, /* 01111100 */
+ 0x38, /* 00111000 */
+ 0x10, /* 00010000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 5 0x05 '^E' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0xe7, /* 11100111 */
+ 0xe7, /* 11100111 */
+ 0xe7, /* 11100111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 6 0x06 '^F' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x7e, /* 01111110 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 7 0x07 '^G' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 8 0x08 '^H' */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xe7, /* 11100111 */
+ 0xc3, /* 11000011 */
+ 0xc3, /* 11000011 */
+ 0xe7, /* 11100111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+
+ /* 9 0x09 '^I' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x42, /* 01000010 */
+ 0x42, /* 01000010 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 10 0x0a '^J' */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xc3, /* 11000011 */
+ 0x99, /* 10011001 */
+ 0xbd, /* 10111101 */
+ 0xbd, /* 10111101 */
+ 0x99, /* 10011001 */
+ 0xc3, /* 11000011 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+
+ /* 11 0x0b '^K' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1e, /* 00011110 */
+ 0x0e, /* 00001110 */
+ 0x1a, /* 00011010 */
+ 0x32, /* 00110010 */
+ 0x78, /* 01111000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 12 0x0c '^L' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 13 0x0d '^M' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3f, /* 00111111 */
+ 0x33, /* 00110011 */
+ 0x3f, /* 00111111 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x70, /* 01110000 */
+ 0xf0, /* 11110000 */
+ 0xe0, /* 11100000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 14 0x0e '^N' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7f, /* 01111111 */
+ 0x63, /* 01100011 */
+ 0x7f, /* 01111111 */
+ 0x63, /* 01100011 */
+ 0x63, /* 01100011 */
+ 0x63, /* 01100011 */
+ 0x63, /* 01100011 */
+ 0x67, /* 01100111 */
+ 0xe7, /* 11100111 */
+ 0xe6, /* 11100110 */
+ 0xc0, /* 11000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 15 0x0f '^O' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xdb, /* 11011011 */
+ 0x3c, /* 00111100 */
+ 0xe7, /* 11100111 */
+ 0x3c, /* 00111100 */
+ 0xdb, /* 11011011 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 16 0x10 '^P' */
+ 0x00, /* 00000000 */
+ 0x80, /* 10000000 */
+ 0xc0, /* 11000000 */
+ 0xe0, /* 11100000 */
+ 0xf0, /* 11110000 */
+ 0xf8, /* 11111000 */
+ 0xfe, /* 11111110 */
+ 0xf8, /* 11111000 */
+ 0xf0, /* 11110000 */
+ 0xe0, /* 11100000 */
+ 0xc0, /* 11000000 */
+ 0x80, /* 10000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 17 0x11 '^Q' */
+ 0x00, /* 00000000 */
+ 0x02, /* 00000010 */
+ 0x06, /* 00000110 */
+ 0x0e, /* 00001110 */
+ 0x1e, /* 00011110 */
+ 0x3e, /* 00111110 */
+ 0xfe, /* 11111110 */
+ 0x3e, /* 00111110 */
+ 0x1e, /* 00011110 */
+ 0x0e, /* 00001110 */
+ 0x06, /* 00000110 */
+ 0x02, /* 00000010 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 18 0x12 '^R' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 19 0x13 '^S' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 20 0x14 '^T' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7f, /* 01111111 */
+ 0xdb, /* 11011011 */
+ 0xdb, /* 11011011 */
+ 0xdb, /* 11011011 */
+ 0x7b, /* 01111011 */
+ 0x1b, /* 00011011 */
+ 0x1b, /* 00011011 */
+ 0x1b, /* 00011011 */
+ 0x1b, /* 00011011 */
+ 0x1b, /* 00011011 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 21 0x15 '^U' */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0x60, /* 01100000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x0c, /* 00001100 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 22 0x16 '^V' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 23 0x17 '^W' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 24 0x18 '^X' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 25 0x19 '^Y' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 26 0x1a '^Z' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0xfe, /* 11111110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 27 0x1b '^[' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xfe, /* 11111110 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 28 0x1c '^\' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 29 0x1d '^]' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x28, /* 00101000 */
+ 0x6c, /* 01101100 */
+ 0xfe, /* 11111110 */
+ 0x6c, /* 01101100 */
+ 0x28, /* 00101000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 30 0x1e '^^' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x38, /* 00111000 */
+ 0x7c, /* 01111100 */
+ 0x7c, /* 01111100 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 31 0x1f '^_' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0x7c, /* 01111100 */
+ 0x7c, /* 01111100 */
+ 0x38, /* 00111000 */
+ 0x38, /* 00111000 */
+ 0x10, /* 00010000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 32 0x20 ' ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 33 0x21 '!' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 34 0x22 '"' */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x24, /* 00100100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 35 0x23 '#' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0xfe, /* 11111110 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0xfe, /* 11111110 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 36 0x24 '$' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc2, /* 11000010 */
+ 0xc0, /* 11000000 */
+ 0x7c, /* 01111100 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x86, /* 10000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 37 0x25 '%' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc2, /* 11000010 */
+ 0xc6, /* 11000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc6, /* 11000110 */
+ 0x86, /* 10000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 38 0x26 '&' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 39 0x27 ''' */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 40 0x28 '(' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 41 0x29 ')' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 42 0x2a '*' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0xff, /* 11111111 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 43 0x2b '+' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 44 0x2c ',' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 45 0x2d '-' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 46 0x2e '.' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 47 0x2f '/' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x02, /* 00000010 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0x80, /* 10000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 48 0x30 '0' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 49 0x31 '1' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x38, /* 00111000 */
+ 0x78, /* 01111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 50 0x32 '2' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 51 0x33 '3' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x3c, /* 00111100 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 52 0x34 '4' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x0c, /* 00001100 */
+ 0x1c, /* 00011100 */
+ 0x3c, /* 00111100 */
+ 0x6c, /* 01101100 */
+ 0xcc, /* 11001100 */
+ 0xfe, /* 11111110 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x1e, /* 00011110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 53 0x35 '5' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xfc, /* 11111100 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 54 0x36 '6' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xfc, /* 11111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 55 0x37 '7' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 56 0x38 '8' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 57 0x39 '9' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7e, /* 01111110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 58 0x3a ':' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 59 0x3b ';' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 60 0x3c '<' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x06, /* 00000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 61 0x3d '=' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 62 0x3e '>' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 63 0x3f '?' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 64 0x40 '@' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xde, /* 11011110 */
+ 0xde, /* 11011110 */
+ 0xde, /* 11011110 */
+ 0xdc, /* 11011100 */
+ 0xc0, /* 11000000 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 65 0x41 'A' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 66 0x42 'B' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfc, /* 11111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0xfc, /* 11111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 67 0x43 'C' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0xc2, /* 11000010 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc2, /* 11000010 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 68 0x44 'D' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xf8, /* 11111000 */
+ 0x6c, /* 01101100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x6c, /* 01101100 */
+ 0xf8, /* 11111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 69 0x45 'E' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x66, /* 01100110 */
+ 0x62, /* 01100010 */
+ 0x68, /* 01101000 */
+ 0x78, /* 01111000 */
+ 0x68, /* 01101000 */
+ 0x60, /* 01100000 */
+ 0x62, /* 01100010 */
+ 0x66, /* 01100110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 70 0x46 'F' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x66, /* 01100110 */
+ 0x62, /* 01100010 */
+ 0x68, /* 01101000 */
+ 0x78, /* 01111000 */
+ 0x68, /* 01101000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xf0, /* 11110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 71 0x47 'G' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0xc2, /* 11000010 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xde, /* 11011110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x66, /* 01100110 */
+ 0x3a, /* 00111010 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 72 0x48 'H' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 73 0x49 'I' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 74 0x4a 'J' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1e, /* 00011110 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 75 0x4b 'K' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xe6, /* 11100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x6c, /* 01101100 */
+ 0x78, /* 01111000 */
+ 0x78, /* 01111000 */
+ 0x6c, /* 01101100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0xe6, /* 11100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 76 0x4c 'L' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xf0, /* 11110000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x62, /* 01100010 */
+ 0x66, /* 01100110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 77 0x4d 'M' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xee, /* 11101110 */
+ 0xfe, /* 11111110 */
+ 0xfe, /* 11111110 */
+ 0xd6, /* 11010110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 78 0x4e 'N' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xe6, /* 11100110 */
+ 0xf6, /* 11110110 */
+ 0xfe, /* 11111110 */
+ 0xde, /* 11011110 */
+ 0xce, /* 11001110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 79 0x4f 'O' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 80 0x50 'P' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfc, /* 11111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xf0, /* 11110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 81 0x51 'Q' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xd6, /* 11010110 */
+ 0xde, /* 11011110 */
+ 0x7c, /* 01111100 */
+ 0x0c, /* 00001100 */
+ 0x0e, /* 00001110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 82 0x52 'R' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfc, /* 11111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x6c, /* 01101100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0xe6, /* 11100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 83 0x53 'S' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x60, /* 01100000 */
+ 0x38, /* 00111000 */
+ 0x0c, /* 00001100 */
+ 0x06, /* 00000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 84 0x54 'T' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x5a, /* 01011010 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 85 0x55 'U' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 86 0x56 'V' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x10, /* 00010000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 87 0x57 'W' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xfe, /* 11111110 */
+ 0xee, /* 11101110 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 88 0x58 'X' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x7c, /* 01111100 */
+ 0x38, /* 00111000 */
+ 0x38, /* 00111000 */
+ 0x7c, /* 01111100 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 89 0x59 'Y' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 90 0x5a 'Z' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0x86, /* 10000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc2, /* 11000010 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 91 0x5b '[' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 92 0x5c '\' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x80, /* 10000000 */
+ 0xc0, /* 11000000 */
+ 0xe0, /* 11100000 */
+ 0x70, /* 01110000 */
+ 0x38, /* 00111000 */
+ 0x1c, /* 00011100 */
+ 0x0e, /* 00001110 */
+ 0x06, /* 00000110 */
+ 0x02, /* 00000010 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 93 0x5d ']' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 94 0x5e '^' */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 95 0x5f '_' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 96 0x60 '`' */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 97 0x61 'a' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 98 0x62 'b' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xe0, /* 11100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x78, /* 01111000 */
+ 0x6c, /* 01101100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 99 0x63 'c' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 100 0x64 'd' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1c, /* 00011100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x3c, /* 00111100 */
+ 0x6c, /* 01101100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 101 0x65 'e' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 102 0x66 'f' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1c, /* 00011100 */
+ 0x36, /* 00110110 */
+ 0x32, /* 00110010 */
+ 0x30, /* 00110000 */
+ 0x78, /* 01111000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 103 0x67 'g' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x7c, /* 01111100 */
+ 0x0c, /* 00001100 */
+ 0xcc, /* 11001100 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+
+ /* 104 0x68 'h' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xe0, /* 11100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x6c, /* 01101100 */
+ 0x76, /* 01110110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0xe6, /* 11100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 105 0x69 'i' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 106 0x6a 'j' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x00, /* 00000000 */
+ 0x0e, /* 00001110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+
+ /* 107 0x6b 'k' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xe0, /* 11100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x66, /* 01100110 */
+ 0x6c, /* 01101100 */
+ 0x78, /* 01111000 */
+ 0x78, /* 01111000 */
+ 0x6c, /* 01101100 */
+ 0x66, /* 01100110 */
+ 0xe6, /* 11100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 108 0x6c 'l' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 109 0x6d 'm' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xec, /* 11101100 */
+ 0xfe, /* 11111110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 110 0x6e 'n' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xdc, /* 11011100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 111 0x6f 'o' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 112 0x70 'p' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xdc, /* 11011100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xf0, /* 11110000 */
+ 0x00, /* 00000000 */
+
+ /* 113 0x71 'q' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x7c, /* 01111100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x1e, /* 00011110 */
+ 0x00, /* 00000000 */
+
+ /* 114 0x72 'r' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xdc, /* 11011100 */
+ 0x76, /* 01110110 */
+ 0x66, /* 01100110 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xf0, /* 11110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 115 0x73 's' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0x60, /* 01100000 */
+ 0x38, /* 00111000 */
+ 0x0c, /* 00001100 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 116 0x74 't' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0xfc, /* 11111100 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x36, /* 00110110 */
+ 0x1c, /* 00011100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 117 0x75 'u' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 118 0x76 'v' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 119 0x77 'w' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xd6, /* 11010110 */
+ 0xfe, /* 11111110 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 120 0x78 'x' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x38, /* 00111000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 121 0x79 'y' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7e, /* 01111110 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0xf8, /* 11111000 */
+ 0x00, /* 00000000 */
+
+ /* 122 0x7a 'z' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xcc, /* 11001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 123 0x7b '{' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x0e, /* 00001110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x70, /* 01110000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x0e, /* 00001110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 124 0x7c '|' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 125 0x7d '}' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x70, /* 01110000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x0e, /* 00001110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 126 0x7e '~' */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 127 0x7f '' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 128 0x80 '€' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0xc2, /* 11000010 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc2, /* 11000010 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 129 0x81 'Â' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 130 0x82 '‚' */
+ 0x00, /* 00000000 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 131 0x83 'ƒ' */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 132 0x84 '„' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 133 0x85 'Â…' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 134 0x86 '†' */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 135 0x87 '‡' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x18, /* 00011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 136 0x88 'ˆ' */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 137 0x89 '‰' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 138 0x8a 'Š' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 139 0x8b '‹' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 140 0x8c 'Œ' */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 141 0x8d 'Â' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 142 0x8e 'ÂŽ' */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 143 0x8f 'Â' */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 144 0x90 'Â' */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x66, /* 01100110 */
+ 0x62, /* 01100010 */
+ 0x68, /* 01101000 */
+ 0x78, /* 01111000 */
+ 0x68, /* 01101000 */
+ 0x62, /* 01100010 */
+ 0x66, /* 01100110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 145 0x91 '‘' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xec, /* 11101100 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x7e, /* 01111110 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0x6e, /* 01101110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 146 0x92 'Â’' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3e, /* 00111110 */
+ 0x6c, /* 01101100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xfe, /* 11111110 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xce, /* 11001110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 147 0x93 '“' */
+ 0x00, /* 00000000 */
+ 0x10, /* 00010000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 148 0x94 '”' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 149 0x95 '•' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 150 0x96 '–' */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x78, /* 01111000 */
+ 0xcc, /* 11001100 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 151 0x97 '—' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 152 0x98 '˜' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7e, /* 01111110 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x78, /* 01111000 */
+ 0x00, /* 00000000 */
+
+ /* 153 0x99 '™' */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 154 0x9a 'š' */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 155 0x9b '›' */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 156 0x9c 'œ' */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x64, /* 01100100 */
+ 0x60, /* 01100000 */
+ 0xf0, /* 11110000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xe6, /* 11100110 */
+ 0xfc, /* 11111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 157 0x9d 'Â' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 158 0x9e 'ž' */
+ 0x00, /* 00000000 */
+ 0xf8, /* 11111000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xf8, /* 11111000 */
+ 0xc4, /* 11000100 */
+ 0xcc, /* 11001100 */
+ 0xde, /* 11011110 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 159 0x9f 'Ÿ' */
+ 0x00, /* 00000000 */
+ 0x0e, /* 00001110 */
+ 0x1b, /* 00011011 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xd8, /* 11011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 160 0xa0 ' ' */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0x0c, /* 00001100 */
+ 0x7c, /* 01111100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 161 0xa1 '¡' */
+ 0x00, /* 00000000 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 162 0xa2 '¢' */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 163 0xa3 '£' */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x00, /* 00000000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 164 0xa4 '¤' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x00, /* 00000000 */
+ 0xdc, /* 11011100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 165 0xa5 'Â¥' */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x00, /* 00000000 */
+ 0xc6, /* 11000110 */
+ 0xe6, /* 11100110 */
+ 0xf6, /* 11110110 */
+ 0xfe, /* 11111110 */
+ 0xde, /* 11011110 */
+ 0xce, /* 11001110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 166 0xa6 '¦' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x3e, /* 00111110 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 167 0xa7 '§' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 168 0xa8 '¨' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x7c, /* 01111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 169 0xa9 '©' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 170 0xaa 'ª' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 171 0xab '«' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0xe0, /* 11100000 */
+ 0x62, /* 01100010 */
+ 0x66, /* 01100110 */
+ 0x6c, /* 01101100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xdc, /* 11011100 */
+ 0x86, /* 10000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x3e, /* 00111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 172 0xac '¬' */
+ 0x00, /* 00000000 */
+ 0x60, /* 01100000 */
+ 0xe0, /* 11100000 */
+ 0x62, /* 01100010 */
+ 0x66, /* 01100110 */
+ 0x6c, /* 01101100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x66, /* 01100110 */
+ 0xce, /* 11001110 */
+ 0x9a, /* 10011010 */
+ 0x3f, /* 00111111 */
+ 0x06, /* 00000110 */
+ 0x06, /* 00000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 173 0xad '­' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 174 0xae '®' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x36, /* 00110110 */
+ 0x6c, /* 01101100 */
+ 0xd8, /* 11011000 */
+ 0x6c, /* 01101100 */
+ 0x36, /* 00110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 175 0xaf '¯' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xd8, /* 11011000 */
+ 0x6c, /* 01101100 */
+ 0x36, /* 00110110 */
+ 0x6c, /* 01101100 */
+ 0xd8, /* 11011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 176 0xb0 '°' */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+ 0x11, /* 00010001 */
+ 0x44, /* 01000100 */
+
+ /* 177 0xb1 '±' */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+ 0x55, /* 01010101 */
+ 0xaa, /* 10101010 */
+
+ /* 178 0xb2 '²' */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+ 0xdd, /* 11011101 */
+ 0x77, /* 01110111 */
+
+ /* 179 0xb3 '³' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 180 0xb4 '´' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 181 0xb5 'µ' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 182 0xb6 '¶' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xf6, /* 11110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 183 0xb7 '·' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 184 0xb8 '¸' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 185 0xb9 '¹' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xf6, /* 11110110 */
+ 0x06, /* 00000110 */
+ 0xf6, /* 11110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 186 0xba 'º' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 187 0xbb '»' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x06, /* 00000110 */
+ 0xf6, /* 11110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 188 0xbc '¼' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xf6, /* 11110110 */
+ 0x06, /* 00000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 189 0xbd '½' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 190 0xbe '¾' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 191 0xbf '¿' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xf8, /* 11111000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 192 0xc0 'À' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 193 0xc1 'Ã' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 194 0xc2 'Â' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 195 0xc3 'Ã' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 196 0xc4 'Ä' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 197 0xc5 'Ã…' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xff, /* 11111111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 198 0xc6 'Æ' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 199 0xc7 'Ç' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x37, /* 00110111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 200 0xc8 'È' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x37, /* 00110111 */
+ 0x30, /* 00110000 */
+ 0x3f, /* 00111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 201 0xc9 'É' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3f, /* 00111111 */
+ 0x30, /* 00110000 */
+ 0x37, /* 00110111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 202 0xca 'Ê' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xf7, /* 11110111 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 203 0xcb 'Ë' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0xf7, /* 11110111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 204 0xcc 'Ì' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x37, /* 00110111 */
+ 0x30, /* 00110000 */
+ 0x37, /* 00110111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 205 0xcd 'Ã' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 206 0xce 'ÃŽ' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xf7, /* 11110111 */
+ 0x00, /* 00000000 */
+ 0xf7, /* 11110111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 207 0xcf 'Ã' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 208 0xd0 'Ã' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 209 0xd1 'Ñ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 210 0xd2 'Ã’' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 211 0xd3 'Ó' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x3f, /* 00111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 212 0xd4 'Ô' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 213 0xd5 'Õ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 214 0xd6 'Ö' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x3f, /* 00111111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 215 0xd7 '×' */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0xff, /* 11111111 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+
+ /* 216 0xd8 'Ø' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xff, /* 11111111 */
+ 0x18, /* 00011000 */
+ 0xff, /* 11111111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 217 0xd9 'Ù' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xf8, /* 11111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 218 0xda 'Ú' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1f, /* 00011111 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 219 0xdb 'Û' */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+
+ /* 220 0xdc 'Ü' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+
+ /* 221 0xdd 'Ã' */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+ 0xf0, /* 11110000 */
+
+ /* 222 0xde 'Þ' */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+ 0x0f, /* 00001111 */
+
+ /* 223 0xdf 'ß' */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0xff, /* 11111111 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 224 0xe0 'à' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xdc, /* 11011100 */
+ 0x76, /* 01110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 225 0xe1 'á' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x78, /* 01111000 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xcc, /* 11001100 */
+ 0xd8, /* 11011000 */
+ 0xcc, /* 11001100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xcc, /* 11001100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 226 0xe2 'â' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0xc0, /* 11000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 227 0xe3 'ã' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 228 0xe4 'ä' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 229 0xe5 'Ã¥' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 230 0xe6 'æ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x7c, /* 01111100 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0x00, /* 00000000 */
+
+ /* 231 0xe7 'ç' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 232 0xe8 'è' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 233 0xe9 'é' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xfe, /* 11111110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 234 0xea 'ê' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0xee, /* 11101110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 235 0xeb 'ë' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1e, /* 00011110 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x3e, /* 00111110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x66, /* 01100110 */
+ 0x3c, /* 00111100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 236 0xec 'ì' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0xdb, /* 11011011 */
+ 0xdb, /* 11011011 */
+ 0xdb, /* 11011011 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 237 0xed 'í' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x03, /* 00000011 */
+ 0x06, /* 00000110 */
+ 0x7e, /* 01111110 */
+ 0xdb, /* 11011011 */
+ 0xdb, /* 11011011 */
+ 0xf3, /* 11110011 */
+ 0x7e, /* 01111110 */
+ 0x60, /* 01100000 */
+ 0xc0, /* 11000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 238 0xee 'î' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x1c, /* 00011100 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x7c, /* 01111100 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x1c, /* 00011100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 239 0xef 'ï' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7c, /* 01111100 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0xc6, /* 11000110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 240 0xf0 'ð' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0xfe, /* 11111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 241 0xf1 'ñ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x7e, /* 01111110 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 242 0xf2 'ò' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x06, /* 00000110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 243 0xf3 'ó' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x30, /* 00110000 */
+ 0x60, /* 01100000 */
+ 0x30, /* 00110000 */
+ 0x18, /* 00011000 */
+ 0x0c, /* 00001100 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 244 0xf4 'ô' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x0e, /* 00001110 */
+ 0x1b, /* 00011011 */
+ 0x1b, /* 00011011 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+
+ /* 245 0xf5 'õ' */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0xd8, /* 11011000 */
+ 0x70, /* 01110000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 246 0xf6 'ö' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 247 0xf7 '÷' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x00, /* 00000000 */
+ 0x76, /* 01110110 */
+ 0xdc, /* 11011100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 248 0xf8 'ø' */
+ 0x00, /* 00000000 */
+ 0x38, /* 00111000 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x38, /* 00111000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 249 0xf9 'ù' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 250 0xfa 'ú' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x18, /* 00011000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 251 0xfb 'û' */
+ 0x00, /* 00000000 */
+ 0x0f, /* 00001111 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0x0c, /* 00001100 */
+ 0xec, /* 11101100 */
+ 0x6c, /* 01101100 */
+ 0x6c, /* 01101100 */
+ 0x3c, /* 00111100 */
+ 0x1c, /* 00011100 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 252 0xfc 'ü' */
+ 0x00, /* 00000000 */
+ 0x6c, /* 01101100 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x36, /* 00110110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 253 0xfd 'ý' */
+ 0x00, /* 00000000 */
+ 0x3c, /* 00111100 */
+ 0x66, /* 01100110 */
+ 0x0c, /* 00001100 */
+ 0x18, /* 00011000 */
+ 0x32, /* 00110010 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 254 0xfe 'þ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x7e, /* 01111110 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+ /* 255 0xff 'ÿ' */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+ 0x00, /* 00000000 */
+
+};
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
new file mode 100644
index 0000000000..350b76d451
--- /dev/null
+++ b/tools/ioemu/vl.c
@@ -0,0 +1,3000 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#ifndef _WIN32
+#include <sys/times.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <dirent.h>
+#ifdef _BSD
+#include <sys/stat.h>
+#ifndef __APPLE__
+#include <libutil.h>
+#endif
+#else
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <pty.h>
+#include <malloc.h>
+#include <linux/rtc.h>
+#endif
+#endif
+
+#if defined(CONFIG_SLIRP)
+#include "libslirp.h"
+#endif
+
+#ifdef _WIN32
+#include <malloc.h>
+#include <sys/timeb.h>
+#include <windows.h>
+#define getopt_long_only getopt_long
+#define memalign(align, size) malloc(size)
+#endif
+
+#ifdef CONFIG_SDL
+#ifdef __APPLE__
+#include <SDL/SDL.h>
+#endif
+#endif /* CONFIG_SDL */
+
+#include "xc.h"
+#include "exec-all.h"
+
+//#define DO_TB_FLUSH
+
+#define DEFAULT_NETWORK_SCRIPT "/etc/xen/qemu-ifup"
+
+#if !defined(CONFIG_SOFTMMU)
+#define PHYS_RAM_MAX_SIZE (256 * 1024 * 1024)
+#else
+#define PHYS_RAM_MAX_SIZE (2047 * 1024 * 1024)
+#endif
+
+#ifdef TARGET_PPC
+#define DEFAULT_RAM_SIZE 144
+#else
+#define DEFAULT_RAM_SIZE 128
+#endif
+/* in ms */
+#define GUI_REFRESH_INTERVAL 30
+#define POLLING_INTERVAL 5
+
+/* XXX: use a two level table to limit memory usage */
+#define MAX_IOPORTS 65536
+
+const char *bios_dir = CONFIG_QEMU_SHAREDIR;
+char phys_ram_file[1024];
+CPUState *global_env;
+CPUState *cpu_single_env;
+void *ioport_opaque[MAX_IOPORTS];
+IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
+IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
+BlockDriverState *bs_table[MAX_DISKS], *fd_table[MAX_FD];
+int vga_ram_size;
+int bios_size;
+static DisplayState display_state;
+int nographic;
+int usevnc; /* 1=vnc only, 2=vnc and sdl */
+long vncport; /* server port */
+const char* vncconnect; /* do a reverse connect to this host*/
+const char* keyboard_layout = 0;
+int64_t ticks_per_sec;
+int boot_device = 'c';
+int ram_size;
+int domid;
+static char network_script[1024];
+int pit_min_timer_count = 0;
+int nb_nics;
+NetDriverState nd_table[MAX_NICS];
+QEMUTimer *gui_timer;
+QEMUTimer *polling_timer;
+int vm_running;
+int audio_enabled = 0;
+int sb16_enabled = 1;
+int adlib_enabled = 1;
+int gus_enabled = 1;
+int pci_enabled = 1;
+int prep_enabled = 0;
+int rtc_utc = 1;
+int cirrus_vga_enabled = 1;
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 15;
+int full_screen = 0;
+TextConsole *vga_console;
+CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+int xc_handle;
+
+/***********************************************************/
+/* x86 ISA bus support */
+
+target_phys_addr_t isa_mem_base = 0;
+
+uint32_t default_ioport_readb(void *opaque, uint32_t address)
+{
+#ifdef DEBUG_UNUSED_IOPORT
+ fprintf(stderr, "inb: port=0x%04x\n", address);
+#endif
+ return 0xff;
+}
+
+void default_ioport_writeb(void *opaque, uint32_t address, uint32_t data)
+{
+#ifdef DEBUG_UNUSED_IOPORT
+ fprintf(stderr, "outb: port=0x%04x data=0x%02x\n", address, data);
+#endif
+}
+
+/* default is to make two byte accesses */
+uint32_t default_ioport_readw(void *opaque, uint32_t address)
+{
+ uint32_t data;
+ data = ioport_read_table[0][address](ioport_opaque[address], address);
+ address = (address + 1) & (MAX_IOPORTS - 1);
+ data |= ioport_read_table[0][address](ioport_opaque[address], address) << 8;
+ return data;
+}
+
+void default_ioport_writew(void *opaque, uint32_t address, uint32_t data)
+{
+ ioport_write_table[0][address](ioport_opaque[address], address, data & 0xff);
+ address = (address + 1) & (MAX_IOPORTS - 1);
+ ioport_write_table[0][address](ioport_opaque[address], address, (data >> 8) & 0xff);
+}
+
+uint32_t default_ioport_readl(void *opaque, uint32_t address)
+{
+#ifdef DEBUG_UNUSED_IOPORT
+ fprintf(stderr, "inl: port=0x%04x\n", address);
+#endif
+ return 0xffffffff;
+}
+
+void default_ioport_writel(void *opaque, uint32_t address, uint32_t data)
+{
+#ifdef DEBUG_UNUSED_IOPORT
+ fprintf(stderr, "outl: port=0x%04x data=0x%02x\n", address, data);
+#endif
+}
+
+void init_ioports(void)
+{
+ int i;
+
+ for(i = 0; i < MAX_IOPORTS; i++) {
+ ioport_read_table[0][i] = default_ioport_readb;
+ ioport_write_table[0][i] = default_ioport_writeb;
+ ioport_read_table[1][i] = default_ioport_readw;
+ ioport_write_table[1][i] = default_ioport_writew;
+ ioport_read_table[2][i] = default_ioport_readl;
+ ioport_write_table[2][i] = default_ioport_writel;
+ }
+}
+
+/* size is the word size in byte */
+int register_ioport_read(int start, int length, int size,
+ IOPortReadFunc *func, void *opaque)
+{
+ int i, bsize;
+
+ if (size == 1) {
+ bsize = 0;
+ } else if (size == 2) {
+ bsize = 1;
+ } else if (size == 4) {
+ bsize = 2;
+ } else {
+ hw_error("register_ioport_read: invalid size");
+ return -1;
+ }
+ for(i = start; i < start + length; i += size) {
+ ioport_read_table[bsize][i] = func;
+ if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
+ hw_error("register_ioport_read: invalid opaque");
+ ioport_opaque[i] = opaque;
+ }
+ return 0;
+}
+
+/* size is the word size in byte */
+int register_ioport_write(int start, int length, int size,
+ IOPortWriteFunc *func, void *opaque)
+{
+ int i, bsize;
+
+ if (size == 1) {
+ bsize = 0;
+ } else if (size == 2) {
+ bsize = 1;
+ } else if (size == 4) {
+ bsize = 2;
+ } else {
+ hw_error("register_ioport_write: invalid size");
+ return -1;
+ }
+ for(i = start; i < start + length; i += size) {
+ ioport_write_table[bsize][i] = func;
+ if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
+ hw_error("register_ioport_read: invalid opaque");
+ ioport_opaque[i] = opaque;
+ }
+ return 0;
+}
+
+void isa_unassign_ioport(int start, int length)
+{
+ int i;
+
+ for(i = start; i < start + length; i++) {
+ ioport_read_table[0][i] = default_ioport_readb;
+ ioport_read_table[1][i] = default_ioport_readw;
+ ioport_read_table[2][i] = default_ioport_readl;
+
+ ioport_write_table[0][i] = default_ioport_writeb;
+ ioport_write_table[1][i] = default_ioport_writew;
+ ioport_write_table[2][i] = default_ioport_writel;
+ }
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+/* return the size or -1 if error */
+int get_image_size(const char *filename)
+{
+ int fd, size;
+ fd = open(filename, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -1;
+ size = lseek(fd, 0, SEEK_END);
+ close(fd);
+ return size;
+}
+
+/* return the size or -1 if error */
+int load_image(const char *filename, uint8_t *addr)
+{
+ int fd, size;
+ fd = open(filename, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -1;
+ size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+ if (read(fd, addr, size) != size) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ return size;
+}
+
+void cpu_outb(CPUState *env, int addr, int val)
+{
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "outb: %04x %02x\n", addr, val);
+#endif
+ ioport_write_table[0][addr](ioport_opaque[addr], addr, val);
+}
+
+void cpu_outw(CPUState *env, int addr, int val)
+{
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "outw: %04x %04x\n", addr, val);
+#endif
+ ioport_write_table[1][addr](ioport_opaque[addr], addr, val);
+}
+
+void cpu_outl(CPUState *env, int addr, int val)
+{
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "outl: %04x %08x\n", addr, val);
+#endif
+ ioport_write_table[2][addr](ioport_opaque[addr], addr, val);
+}
+
+int cpu_inb(CPUState *env, int addr)
+{
+ int val;
+ val = ioport_read_table[0][addr](ioport_opaque[addr], addr);
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "inb : %04x %02x\n", addr, val);
+#endif
+ return val;
+}
+
+int cpu_inw(CPUState *env, int addr)
+{
+ int val;
+ val = ioport_read_table[1][addr](ioport_opaque[addr], addr);
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "inw : %04x %04x\n", addr, val);
+#endif
+ return val;
+}
+
+int cpu_inl(CPUState *env, int addr)
+{
+ int val;
+ val = ioport_read_table[2][addr](ioport_opaque[addr], addr);
+#ifdef DEBUG_IOPORT
+ if (loglevel & CPU_LOG_IOPORT)
+ fprintf(logfile, "inl : %04x %08x\n", addr, val);
+#endif
+ return val;
+}
+
+/***********************************************************/
+void hw_error(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "qemu: hardware error: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ abort();
+}
+
+/***********************************************************/
+/* keyboard/mouse */
+
+static QEMUPutKBDEvent *qemu_put_kbd_event;
+static void *qemu_put_kbd_event_opaque;
+static QEMUPutMouseEvent *qemu_put_mouse_event;
+static void *qemu_put_mouse_event_opaque;
+
+void qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque)
+{
+ qemu_put_kbd_event_opaque = opaque;
+ qemu_put_kbd_event = func;
+}
+
+void qemu_add_mouse_event_handler(QEMUPutMouseEvent *func, void *opaque)
+{
+ qemu_put_mouse_event_opaque = opaque;
+ qemu_put_mouse_event = func;
+}
+
+void kbd_put_keycode(int keycode)
+{
+ if (qemu_put_kbd_event) {
+ qemu_put_kbd_event(qemu_put_kbd_event_opaque, keycode);
+ }
+}
+
+void kbd_mouse_event(int dx, int dy, int dz, int buttons_state)
+{
+ if (qemu_put_mouse_event) {
+ qemu_put_mouse_event(qemu_put_mouse_event_opaque,
+ dx, dy, dz, buttons_state);
+ }
+}
+
+/***********************************************************/
+/* timers */
+
+#if defined(__powerpc__)
+
+static inline uint32_t get_tbl(void)
+{
+ uint32_t tbl;
+ asm volatile("mftb %0" : "=r" (tbl));
+ return tbl;
+}
+
+static inline uint32_t get_tbu(void)
+{
+ uint32_t tbl;
+ asm volatile("mftbu %0" : "=r" (tbl));
+ return tbl;
+}
+
+int64_t cpu_get_real_ticks(void)
+{
+ uint32_t l, h, h1;
+ /* NOTE: we test if wrapping has occurred */
+ do {
+ h = get_tbu();
+ l = get_tbl();
+ h1 = get_tbu();
+ } while (h != h1);
+ return ((int64_t)h << 32) | l;
+}
+
+#elif defined(__i386__)
+
+int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+
+#elif defined(__x86_64__)
+
+int64_t cpu_get_real_ticks(void)
+{
+ uint32_t low,high;
+ int64_t val;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val <<= 32;
+ val |= low;
+ return val;
+}
+
+#else
+#error unsupported CPU
+#endif
+
+static int64_t cpu_ticks_offset;
+static int cpu_ticks_enabled;
+int64_t cpu_virt_tsc;
+
+static inline int64_t cpu_get_ticks(void)
+{
+ if (!cpu_ticks_enabled) {
+ return cpu_ticks_offset;
+ } else {
+ return cpu_get_real_ticks() + cpu_ticks_offset;
+ }
+
+}
+
+/* enable cpu_get_ticks() */
+void cpu_enable_ticks(void)
+{
+ if (!cpu_ticks_enabled) {
+ cpu_ticks_offset -= cpu_get_real_ticks();
+ cpu_ticks_enabled = 1;
+ }
+}
+
+/* disable cpu_get_ticks() : the clock is stopped. You must not call
+ cpu_get_ticks() after that. */
+void cpu_disable_ticks(void)
+{
+ if (cpu_ticks_enabled) {
+ cpu_ticks_offset = cpu_get_ticks();
+ cpu_ticks_enabled = 0;
+ }
+}
+
+static int64_t get_clock(void)
+{
+#ifdef _WIN32
+ struct _timeb tb;
+ _ftime(&tb);
+ return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000;
+#else
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec * 1000000LL + tv.tv_usec;
+#endif
+}
+
+void cpu_calibrate_ticks(void)
+{
+ int64_t usec, ticks;
+
+ usec = get_clock();
+ ticks = cpu_get_real_ticks();
+#ifdef _WIN32
+ Sleep(50);
+#else
+ usleep(50 * 1000);
+#endif
+ usec = get_clock() - usec;
+ ticks = cpu_get_real_ticks() - ticks;
+ ticks_per_sec = (ticks * 1000000LL + (usec >> 1)) / usec;
+}
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ union {
+ uint64_t ll;
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } u, res;
+ uint64_t rl, rh;
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+}
+
+#define QEMU_TIMER_REALTIME 0
+#define QEMU_TIMER_VIRTUAL 1
+
+struct QEMUClock {
+ int type;
+ /* XXX: add frequency */
+};
+
+struct QEMUTimer {
+ QEMUClock *clock;
+ int64_t expire_time;
+ QEMUTimerCB *cb;
+ void *opaque;
+ struct QEMUTimer *next;
+};
+
+QEMUClock *rt_clock;
+QEMUClock *vm_clock;
+
+static QEMUTimer *active_timers[2];
+#ifdef _WIN32
+static MMRESULT timerID;
+#else
+/* frequency of the times() clock tick */
+static int timer_freq;
+#endif
+
+QEMUClock *qemu_new_clock(int type)
+{
+ QEMUClock *clock;
+ clock = qemu_mallocz(sizeof(QEMUClock));
+ if (!clock)
+ return NULL;
+ clock->type = type;
+ return clock;
+}
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque)
+{
+ QEMUTimer *ts;
+
+ ts = qemu_mallocz(sizeof(QEMUTimer));
+ ts->clock = clock;
+ ts->cb = cb;
+ ts->opaque = opaque;
+ return ts;
+}
+
+void qemu_free_timer(QEMUTimer *ts)
+{
+ qemu_free(ts);
+}
+
+/* stop a timer, but do not dealloc it */
+void qemu_del_timer(QEMUTimer *ts)
+{
+ QEMUTimer **pt, *t;
+
+ /* NOTE: this code must be signal safe because
+ qemu_timer_expired() can be called from a signal. */
+ pt = &active_timers[ts->clock->type];
+ for(;;) {
+ t = *pt;
+ if (!t)
+ break;
+ if (t == ts) {
+ *pt = t->next;
+ break;
+ }
+ pt = &t->next;
+ }
+}
+
+/* modify the current timer so that it will be fired when current_time
+ >= expire_time. The corresponding callback will be called. */
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time)
+{
+ QEMUTimer **pt, *t;
+
+ qemu_del_timer(ts);
+
+ /* add the timer in the sorted list */
+ /* NOTE: this code must be signal safe because
+ qemu_timer_expired() can be called from a signal. */
+ pt = &active_timers[ts->clock->type];
+ for(;;) {
+ t = *pt;
+ if (!t)
+ break;
+ if (t->expire_time > expire_time)
+ break;
+ pt = &t->next;
+ }
+ ts->expire_time = expire_time;
+ ts->next = *pt;
+ *pt = ts;
+}
+
+int qemu_timer_pending(QEMUTimer *ts)
+{
+ QEMUTimer *t;
+ for(t = active_timers[ts->clock->type]; t != NULL; t = t->next) {
+ if (t == ts)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time)
+{
+ if (!timer_head)
+ return 0;
+ return (timer_head->expire_time <= current_time);
+}
+
+static void qemu_run_timers(QEMUTimer **ptimer_head, int64_t current_time)
+{
+ QEMUTimer *ts;
+
+ for(;;) {
+ ts = *ptimer_head;
+ if (!ts || ts->expire_time > current_time)
+ break;
+ /* remove timer from the list before calling the callback */
+ *ptimer_head = ts->next;
+ ts->next = NULL;
+
+ /* run the callback (the timer list can be modified) */
+ ts->cb(ts->opaque);
+ }
+}
+
+int64_t qemu_get_clock(QEMUClock *clock)
+{
+ switch(clock->type) {
+ case QEMU_TIMER_REALTIME:
+#ifdef _WIN32
+ return GetTickCount();
+#else
+ {
+ struct tms tp;
+
+ /* Note that using gettimeofday() is not a good solution
+ for timers because its value change when the date is
+ modified. */
+ if (timer_freq == 100) {
+ return times(&tp) * 10;
+ } else {
+ return ((int64_t)times(&tp) * 1000) / timer_freq;
+ }
+ }
+#endif
+ default:
+ case QEMU_TIMER_VIRTUAL:
+ return cpu_get_ticks();
+ }
+}
+
+/* save a timer */
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts)
+{
+ uint64_t expire_time;
+
+ if (qemu_timer_pending(ts)) {
+ expire_time = ts->expire_time;
+ } else {
+ expire_time = -1;
+ }
+ qemu_put_be64(f, expire_time);
+}
+
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts)
+{
+ uint64_t expire_time;
+
+ expire_time = qemu_get_be64(f);
+ if (expire_time != -1) {
+ qemu_mod_timer(ts, expire_time);
+ } else {
+ qemu_del_timer(ts);
+ }
+}
+
+static void init_timers(void)
+{
+ rt_clock = qemu_new_clock(QEMU_TIMER_REALTIME);
+ vm_clock = qemu_new_clock(QEMU_TIMER_VIRTUAL);
+
+#ifdef _WIN32
+ {
+ int count=0;
+ timerID = timeSetEvent(10, // interval (ms)
+ 0, // resolution
+ host_alarm_handler, // function
+ (DWORD)&count, // user parameter
+ TIME_PERIODIC | TIME_CALLBACK_FUNCTION);
+ if( !timerID ) {
+ perror("failed timer alarm");
+ exit(1);
+ }
+ }
+ pit_min_timer_count = ((uint64_t)10000 * PIT_FREQ) / 1000000;
+#else
+ {
+ /* get times() syscall frequency */
+ timer_freq = sysconf(_SC_CLK_TCK);
+
+#ifndef TARGET_VMX
+ /* timer signal */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+ act.sa_flags |= SA_ONSTACK;
+#endif
+ act.sa_handler = host_alarm_handler;
+ sigaction(SIGALRM, &act, NULL);
+
+ itv.it_interval.tv_sec = 0;
+ itv.it_interval.tv_usec = 1000;
+ itv.it_value.tv_sec = 0;
+ itv.it_value.tv_usec = 10 * 1000;
+ setitimer(ITIMER_REAL, &itv, NULL);
+ /* we probe the tick duration of the kernel to inform the user if
+ the emulated kernel requested a too high timer frequency */
+ getitimer(ITIMER_REAL, &itv);
+
+#if defined(__linux__)
+ if (itv.it_interval.tv_usec > 1000) {
+ /* try to use /dev/rtc to have a faster timer */
+ if (start_rtc_timer() < 0)
+ goto use_itimer;
+ /* disable itimer */
+ itv.it_interval.tv_sec = 0;
+ itv.it_interval.tv_usec = 0;
+ itv.it_value.tv_sec = 0;
+ itv.it_value.tv_usec = 0;
+ setitimer(ITIMER_REAL, &itv, NULL);
+
+ /* use the RTC */
+ sigaction(SIGIO, &act, NULL);
+ fcntl(rtc_fd, F_SETFL, O_ASYNC);
+ fcntl(rtc_fd, F_SETOWN, getpid());
+ } else
+#endif /* defined(__linux__) */
+ {
+ use_itimer:
+ pit_min_timer_count = ((uint64_t)itv.it_interval.tv_usec *
+ PIT_FREQ) / 1000000;
+ }
+#endif /* TARGET_VMX */
+ }
+#endif
+}
+
+void quit_timers(void)
+{
+#ifdef _WIN32
+ timeKillEvent(timerID);
+#endif
+}
+
+/***********************************************************/
+/* character device */
+
+int qemu_chr_write(CharDriverState *s, const uint8_t *buf, int len)
+{
+ return s->chr_write(s, buf, len);
+}
+
+void qemu_chr_printf(CharDriverState *s, const char *fmt, ...)
+{
+ char buf[4096];
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ qemu_chr_write(s, buf, strlen(buf));
+ va_end(ap);
+}
+
+void qemu_chr_send_event(CharDriverState *s, int event)
+{
+ if (s->chr_send_event)
+ s->chr_send_event(s, event);
+}
+
+void qemu_chr_add_read_handler(CharDriverState *s,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ s->chr_add_read_handler(s, fd_can_read, fd_read, opaque);
+}
+
+void qemu_chr_add_event_handler(CharDriverState *s, IOEventHandler *chr_event)
+{
+ s->chr_event = chr_event;
+}
+
+static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ return len;
+}
+
+static void null_chr_add_read_handler(CharDriverState *chr,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+}
+
+CharDriverState *qemu_chr_open_null(void)
+{
+ CharDriverState *chr;
+
+ chr = qemu_mallocz(sizeof(CharDriverState));
+ if (!chr)
+ return NULL;
+ chr->chr_write = null_chr_write;
+ chr->chr_add_read_handler = null_chr_add_read_handler;
+ return chr;
+}
+
+#ifndef _WIN32
+
+typedef struct {
+ int fd_in, fd_out;
+ /* for nographic stdio only */
+ IOCanRWHandler *fd_can_read;
+ IOReadHandler *fd_read;
+ void *fd_opaque;
+} FDCharDriver;
+
+#define STDIO_MAX_CLIENTS 2
+
+static int stdio_nb_clients;
+static CharDriverState *stdio_clients[STDIO_MAX_CLIENTS];
+
+static int unix_write(int fd, const uint8_t *buf, int len1)
+{
+ int ret, len;
+
+ len = len1;
+ while (len > 0) {
+ ret = write(fd, buf, len);
+ if (ret < 0) {
+ if (errno != EINTR && errno != EAGAIN)
+ return -1;
+ } else if (ret == 0) {
+ break;
+ } else {
+ buf += ret;
+ len -= ret;
+ }
+ }
+ return len1 - len;
+}
+
+static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ FDCharDriver *s = chr->opaque;
+ return unix_write(s->fd_out, buf, len);
+}
+
+static void fd_chr_add_read_handler(CharDriverState *chr,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ FDCharDriver *s = chr->opaque;
+
+ if (nographic && s->fd_in == 0) {
+ s->fd_can_read = fd_can_read;
+ s->fd_read = fd_read;
+ s->fd_opaque = opaque;
+ } else {
+ qemu_add_fd_read_handler(s->fd_in, fd_can_read, fd_read, opaque);
+ }
+}
+
+/* open a character device to a unix fd */
+CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out)
+{
+ CharDriverState *chr;
+ FDCharDriver *s;
+
+ chr = qemu_mallocz(sizeof(CharDriverState));
+ if (!chr)
+ return NULL;
+ s = qemu_mallocz(sizeof(FDCharDriver));
+ if (!s) {
+ free(chr);
+ return NULL;
+ }
+ s->fd_in = fd_in;
+ s->fd_out = fd_out;
+ chr->opaque = s;
+ chr->chr_write = fd_chr_write;
+ chr->chr_add_read_handler = fd_chr_add_read_handler;
+ return chr;
+}
+
+/* for STDIO, we handle the case where several clients use it
+ (nographic mode) */
+
+#define TERM_ESCAPE 0x01 /* ctrl-a is used for escape */
+
+static int term_got_escape, client_index;
+
+void term_print_help(void)
+{
+ printf("\n"
+ "C-a h print this help\n"
+ "C-a x exit emulator\n"
+ "C-a s save disk data back to file (if -snapshot)\n"
+ "C-a b send break (magic sysrq)\n"
+ "C-a c switch between console and monitor\n"
+ "C-a C-a send C-a\n"
+ );
+}
+
+/* called when a char is received */
+static void stdio_received_byte(int ch)
+{
+ if (term_got_escape) {
+ term_got_escape = 0;
+ switch(ch) {
+ case 'h':
+ term_print_help();
+ break;
+ case 'x':
+ exit(0);
+ break;
+ case 's':
+ {
+ int i;
+ for (i = 0; i < MAX_DISKS; i++) {
+ if (bs_table[i])
+ bdrv_commit(bs_table[i]);
+ }
+ }
+ break;
+ case 'b':
+ if (client_index < stdio_nb_clients) {
+ CharDriverState *chr;
+ FDCharDriver *s;
+
+ chr = stdio_clients[client_index];
+ s = chr->opaque;
+ chr->chr_event(s->fd_opaque, CHR_EVENT_BREAK);
+ }
+ break;
+ case 'c':
+ client_index++;
+ if (client_index >= stdio_nb_clients)
+ client_index = 0;
+ if (client_index == 0) {
+ /* send a new line in the monitor to get the prompt */
+ ch = '\r';
+ goto send_char;
+ }
+ break;
+ case TERM_ESCAPE:
+ goto send_char;
+ }
+ } else if (ch == TERM_ESCAPE) {
+ term_got_escape = 1;
+ } else {
+ send_char:
+ if (client_index < stdio_nb_clients) {
+ uint8_t buf[1];
+ CharDriverState *chr;
+ FDCharDriver *s;
+
+ chr = stdio_clients[client_index];
+ s = chr->opaque;
+ buf[0] = ch;
+ /* XXX: should queue the char if the device is not
+ ready */
+ if (s->fd_can_read(s->fd_opaque) > 0)
+ s->fd_read(s->fd_opaque, buf, 1);
+ }
+ }
+}
+
+static int stdio_can_read(void *opaque)
+{
+ /* XXX: not strictly correct */
+ return 1;
+}
+
+static void stdio_read(void *opaque, const uint8_t *buf, int size)
+{
+ int i;
+ for(i = 0; i < size; i++)
+ stdio_received_byte(buf[i]);
+}
+
+/* init terminal so that we can grab keys */
+static struct termios oldtty;
+static int old_fd0_flags;
+
+static void term_exit(void)
+{
+ tcsetattr (0, TCSANOW, &oldtty);
+ fcntl(0, F_SETFL, old_fd0_flags);
+}
+
+static void term_init(void)
+{
+ struct termios tty;
+
+ tcgetattr (0, &tty);
+ oldtty = tty;
+ old_fd0_flags = fcntl(0, F_GETFL);
+
+ tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
+ |INLCR|IGNCR|ICRNL|IXON);
+ tty.c_oflag |= OPOST;
+ tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
+ /* if graphical mode, we allow Ctrl-C handling */
+ if (nographic)
+ tty.c_lflag &= ~ISIG;
+ tty.c_cflag &= ~(CSIZE|PARENB);
+ tty.c_cflag |= CS8;
+ tty.c_cc[VMIN] = 1;
+ tty.c_cc[VTIME] = 0;
+
+ tcsetattr (0, TCSANOW, &tty);
+
+ atexit(term_exit);
+
+ fcntl(0, F_SETFL, O_NONBLOCK);
+}
+
+CharDriverState *qemu_chr_open_stdio(void)
+{
+ CharDriverState *chr;
+
+ if (nographic) {
+ if (stdio_nb_clients >= STDIO_MAX_CLIENTS)
+ return NULL;
+ chr = qemu_chr_open_fd(0, 1);
+ if (stdio_nb_clients == 0)
+ qemu_add_fd_read_handler(0, stdio_can_read, stdio_read, NULL);
+ client_index = stdio_nb_clients;
+ } else {
+ if (stdio_nb_clients != 0)
+ return NULL;
+ chr = qemu_chr_open_fd(0, 1);
+ }
+ stdio_clients[stdio_nb_clients++] = chr;
+ if (stdio_nb_clients == 1) {
+ /* set the terminal in raw mode */
+ term_init();
+ }
+ return chr;
+}
+
+#if defined(__linux__)
+CharDriverState *qemu_chr_open_pty(void)
+{
+ char slave_name[1024];
+ int master_fd, slave_fd;
+
+ /* Not satisfying */
+ if (openpty(&master_fd, &slave_fd, slave_name, NULL, NULL) < 0) {
+ return NULL;
+ }
+ fprintf(stderr, "char device redirected to %s\n", slave_name);
+ return qemu_chr_open_fd(master_fd, master_fd);
+}
+#else
+CharDriverState *qemu_chr_open_pty(void)
+{
+ return NULL;
+}
+#endif
+
+#endif /* !defined(_WIN32) */
+
+CharDriverState *qemu_chr_open(const char *filename)
+{
+ if (!strcmp(filename, "vc")) {
+ return text_console_init(&display_state);
+ } else if (!strcmp(filename, "null")) {
+ return qemu_chr_open_null();
+ } else
+#ifndef _WIN32
+ if (!strcmp(filename, "pty")) {
+ return qemu_chr_open_pty();
+ } else if (!strcmp(filename, "stdio")) {
+ return qemu_chr_open_stdio();
+ } else
+#endif
+ {
+ return NULL;
+ }
+}
+
+/***********************************************************/
+/* Linux network device redirectors */
+
+void hex_dump(FILE *f, const uint8_t *buf, int size)
+{
+ int len, i, j, c;
+
+ for(i=0;i<size;i+=16) {
+ len = size - i;
+ if (len > 16)
+ len = 16;
+ fprintf(f, "%08x ", i);
+ for(j=0;j<16;j++) {
+ if (j < len)
+ fprintf(f, " %02x", buf[i+j]);
+ else
+ fprintf(f, " ");
+ }
+ fprintf(f, " ");
+ for(j=0;j<len;j++) {
+ c = buf[i+j];
+ if (c < ' ' || c > '~')
+ c = '.';
+ fprintf(f, "%c", c);
+ }
+ fprintf(f, "\n");
+ }
+}
+
+void qemu_send_packet(NetDriverState *nd, const uint8_t *buf, int size)
+{
+ nd->send_packet(nd, buf, size);
+}
+
+void qemu_add_read_packet(NetDriverState *nd, IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ nd->add_read_packet(nd, fd_can_read, fd_read, opaque);
+}
+
+/* dummy network adapter */
+
+static void dummy_send_packet(NetDriverState *nd, const uint8_t *buf, int size)
+{
+}
+
+static void dummy_add_read_packet(NetDriverState *nd,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+}
+
+static int net_dummy_init(NetDriverState *nd)
+{
+ nd->send_packet = dummy_send_packet;
+ nd->add_read_packet = dummy_add_read_packet;
+ pstrcpy(nd->ifname, sizeof(nd->ifname), "dummy");
+ return 0;
+}
+
+#if defined(CONFIG_SLIRP)
+
+/* slirp network adapter */
+
+static void *slirp_fd_opaque;
+static IOCanRWHandler *slirp_fd_can_read;
+static IOReadHandler *slirp_fd_read;
+static int slirp_inited;
+
+int slirp_can_output(void)
+{
+ return slirp_fd_can_read(slirp_fd_opaque);
+}
+
+void slirp_output(const uint8_t *pkt, int pkt_len)
+{
+#if 0
+ printf("output:\n");
+ hex_dump(stdout, pkt, pkt_len);
+#endif
+ slirp_fd_read(slirp_fd_opaque, pkt, pkt_len);
+}
+
+static void slirp_send_packet(NetDriverState *nd, const uint8_t *buf, int size)
+{
+#if 0
+ printf("input:\n");
+ hex_dump(stdout, buf, size);
+#endif
+ slirp_input(buf, size);
+}
+
+static void slirp_add_read_packet(NetDriverState *nd,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ slirp_fd_opaque = opaque;
+ slirp_fd_can_read = fd_can_read;
+ slirp_fd_read = fd_read;
+}
+
+static int net_slirp_init(NetDriverState *nd)
+{
+ if (!slirp_inited) {
+ slirp_inited = 1;
+ slirp_init();
+ }
+ nd->send_packet = slirp_send_packet;
+ nd->add_read_packet = slirp_add_read_packet;
+ pstrcpy(nd->ifname, sizeof(nd->ifname), "slirp");
+ return 0;
+}
+
+static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
+{
+ const char *p, *p1;
+ int len;
+ p = *pp;
+ p1 = strchr(p, sep);
+ if (!p1)
+ return -1;
+ len = p1 - p;
+ p1++;
+ if (buf_size > 0) {
+ if (len > buf_size - 1)
+ len = buf_size - 1;
+ memcpy(buf, p, len);
+ buf[len] = '\0';
+ }
+ *pp = p1;
+ return 0;
+}
+
+static void net_slirp_redir(const char *redir_str)
+{
+ int is_udp;
+ char buf[256], *r;
+ const char *p;
+ struct in_addr guest_addr;
+ int host_port, guest_port;
+
+ if (!slirp_inited) {
+ slirp_inited = 1;
+ slirp_init();
+ }
+
+ p = redir_str;
+ if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
+ goto fail;
+ if (!strcmp(buf, "tcp")) {
+ is_udp = 0;
+ } else if (!strcmp(buf, "udp")) {
+ is_udp = 1;
+ } else {
+ goto fail;
+ }
+
+ if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
+ goto fail;
+ host_port = strtol(buf, &r, 0);
+ if (r == buf)
+ goto fail;
+
+ if (get_str_sep(buf, sizeof(buf), &p, ':') < 0)
+ goto fail;
+ if (buf[0] == '\0') {
+ pstrcpy(buf, sizeof(buf), "10.0.2.15");
+ }
+ if (!inet_aton(buf, &guest_addr))
+ goto fail;
+
+ guest_port = strtol(p, &r, 0);
+ if (r == p)
+ goto fail;
+
+ if (slirp_redir(is_udp, host_port, guest_addr, guest_port) < 0) {
+ fprintf(stderr, "qemu: could not set up redirection\n");
+ exit(1);
+ }
+ return;
+ fail:
+ fprintf(stderr, "qemu: syntax: -redir [tcp|udp]:host-port:[guest-host]:guest-port\n");
+ exit(1);
+}
+
+#ifndef _WIN32
+
+char smb_dir[1024];
+
+static void smb_exit(void)
+{
+ DIR *d;
+ struct dirent *de;
+ char filename[1024];
+
+ /* erase all the files in the directory */
+ d = opendir(smb_dir);
+ for(;;) {
+ de = readdir(d);
+ if (!de)
+ break;
+ if (strcmp(de->d_name, ".") != 0 &&
+ strcmp(de->d_name, "..") != 0) {
+ snprintf(filename, sizeof(filename), "%s/%s",
+ smb_dir, de->d_name);
+ unlink(filename);
+ }
+ }
+ closedir(d);
+ rmdir(smb_dir);
+}
+
+/* automatic user mode samba server configuration */
+void net_slirp_smb(const char *exported_dir)
+{
+ char smb_conf[1024];
+ char smb_cmdline[1024];
+ FILE *f;
+
+ if (!slirp_inited) {
+ slirp_inited = 1;
+ slirp_init();
+ }
+
+ /* XXX: better tmp dir construction */
+ snprintf(smb_dir, sizeof(smb_dir), "/tmp/qemu-smb.%d", getpid());
+ if (mkdir(smb_dir, 0700) < 0) {
+ fprintf(stderr, "qemu: could not create samba server dir '%s'\n", smb_dir);
+ exit(1);
+ }
+ snprintf(smb_conf, sizeof(smb_conf), "%s/%s", smb_dir, "smb.conf");
+
+ f = fopen(smb_conf, "w");
+ if (!f) {
+ fprintf(stderr, "qemu: could not create samba server configuration file '%s'\n", smb_conf);
+ exit(1);
+ }
+ fprintf(f,
+ "[global]\n"
+ "pid directory=%s\n"
+ "lock directory=%s\n"
+ "log file=%s/log.smbd\n"
+ "smb passwd file=%s/smbpasswd\n"
+ "security = share\n"
+ "[qemu]\n"
+ "path=%s\n"
+ "read only=no\n"
+ "guest ok=yes\n",
+ smb_dir,
+ smb_dir,
+ smb_dir,
+ smb_dir,
+ exported_dir
+ );
+ fclose(f);
+ atexit(smb_exit);
+
+ snprintf(smb_cmdline, sizeof(smb_cmdline), "/usr/sbin/smbd -s %s",
+ smb_conf);
+
+ slirp_add_exec(0, smb_cmdline, 4, 139);
+}
+
+#endif /* !defined(_WIN32) */
+
+#endif /* CONFIG_SLIRP */
+
+#if !defined(_WIN32)
+#ifdef _BSD
+static int tun_open(char *ifname, int ifname_size)
+{
+ int fd;
+ char *dev;
+ struct stat s;
+
+ fd = open("/dev/tap", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "warning: could not open /dev/tap: no virtual network emulation\n");
+ return -1;
+ }
+
+ fstat(fd, &s);
+ dev = devname(s.st_rdev, S_IFCHR);
+ pstrcpy(ifname, ifname_size, dev);
+
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ return fd;
+}
+#else
+static int tun_open(char *ifname, int ifname_size)
+{
+ struct ifreq ifr;
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
+ return -1;
+ }
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ pstrcpy(ifr.ifr_name, IFNAMSIZ, "tun%d");
+ ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (ret != 0) {
+ fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
+ close(fd);
+ return -1;
+ }
+ printf("Connected to host network interface: %s\n", ifr.ifr_name);
+ pstrcpy(ifname, ifname_size, ifr.ifr_name);
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ return fd;
+}
+#endif
+
+static void tun_send_packet(NetDriverState *nd, const uint8_t *buf, int size)
+{
+ write(nd->fd, buf, size);
+}
+
+static void tun_add_read_packet(NetDriverState *nd,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ qemu_add_fd_read_handler(nd->fd, fd_can_read, fd_read, opaque);
+}
+
+static int net_tun_init(NetDriverState *nd)
+{
+ int pid, status;
+ char *args[3];
+ char **parg;
+
+ nd->fd = tun_open(nd->ifname, sizeof(nd->ifname));
+ if (nd->fd < 0)
+ return -1;
+
+ /* try to launch network init script */
+ pid = fork();
+ if (pid >= 0) {
+ if (pid == 0) {
+ parg = args;
+ *parg++ = network_script;
+ *parg++ = nd->ifname;
+ *parg++ = NULL;
+ execv(network_script, args);
+ exit(1);
+ }
+ while (waitpid(pid, &status, 0) != pid);
+ if (!WIFEXITED(status) ||
+ WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "%s: could not launch network script\n",
+ network_script);
+ }
+ }
+ nd->send_packet = tun_send_packet;
+ nd->add_read_packet = tun_add_read_packet;
+ return 0;
+}
+
+static int net_fd_init(NetDriverState *nd, int fd)
+{
+ nd->fd = fd;
+ nd->send_packet = tun_send_packet;
+ nd->add_read_packet = tun_add_read_packet;
+ pstrcpy(nd->ifname, sizeof(nd->ifname), "tunfd");
+ return 0;
+}
+
+#endif /* !_WIN32 */
+
+/***********************************************************/
+/* dumb display */
+
+static void dumb_update(DisplayState *ds, int x, int y, int w, int h)
+{
+}
+
+static void dumb_resize(DisplayState *ds, int w, int h)
+{
+}
+
+static void dumb_refresh(DisplayState *ds)
+{
+ vga_update_display();
+}
+
+void dumb_display_init(DisplayState *ds)
+{
+ ds->data = NULL;
+ ds->linesize = 0;
+ ds->depth = 0;
+ ds->dpy_update = dumb_update;
+ ds->dpy_resize = dumb_resize;
+ ds->dpy_refresh = dumb_refresh;
+}
+
+#if !defined(CONFIG_SOFTMMU)
+/***********************************************************/
+/* cpu signal handler */
+static void host_segv_handler(int host_signum, siginfo_t *info,
+ void *puc)
+{
+ abort();
+}
+#endif
+
+/***********************************************************/
+/* I/O handling */
+
+#define MAX_IO_HANDLERS 64
+
+typedef struct IOHandlerRecord {
+ int fd;
+ IOCanRWHandler *fd_can_read;
+ IOReadHandler *fd_read;
+ void *opaque;
+ /* temporary data */
+ struct pollfd *ufd;
+ int max_size;
+ struct IOHandlerRecord *next;
+} IOHandlerRecord;
+
+static IOHandlerRecord *first_io_handler;
+
+int qemu_add_fd_read_handler(int fd, IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque)
+{
+ IOHandlerRecord *ioh;
+
+ ioh = qemu_mallocz(sizeof(IOHandlerRecord));
+ if (!ioh)
+ return -1;
+ ioh->fd = fd;
+ ioh->fd_can_read = fd_can_read;
+ ioh->fd_read = fd_read;
+ ioh->opaque = opaque;
+ ioh->next = first_io_handler;
+ first_io_handler = ioh;
+ return 0;
+}
+
+void qemu_del_fd_read_handler(int fd)
+{
+ IOHandlerRecord **pioh, *ioh;
+
+ pioh = &first_io_handler;
+ for(;;) {
+ ioh = *pioh;
+ if (ioh == NULL)
+ break;
+ if (ioh->fd == fd) {
+ *pioh = ioh->next;
+ break;
+ }
+ pioh = &ioh->next;
+ }
+}
+
+/***********************************************************/
+/* savevm/loadvm support */
+
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
+{
+ fwrite(buf, 1, size, f);
+}
+
+void qemu_put_byte(QEMUFile *f, int v)
+{
+ fputc(v, f);
+}
+
+void qemu_put_be16(QEMUFile *f, unsigned int v)
+{
+ qemu_put_byte(f, v >> 8);
+ qemu_put_byte(f, v);
+}
+
+void qemu_put_be32(QEMUFile *f, unsigned int v)
+{
+ qemu_put_byte(f, v >> 24);
+ qemu_put_byte(f, v >> 16);
+ qemu_put_byte(f, v >> 8);
+ qemu_put_byte(f, v);
+}
+
+void qemu_put_be64(QEMUFile *f, uint64_t v)
+{
+ qemu_put_be32(f, v >> 32);
+ qemu_put_be32(f, v);
+}
+
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size)
+{
+ return fread(buf, 1, size, f);
+}
+
+int qemu_get_byte(QEMUFile *f)
+{
+ int v;
+ v = fgetc(f);
+ if (v == EOF)
+ return 0;
+ else
+ return v;
+}
+
+unsigned int qemu_get_be16(QEMUFile *f)
+{
+ unsigned int v;
+ v = qemu_get_byte(f) << 8;
+ v |= qemu_get_byte(f);
+ return v;
+}
+
+unsigned int qemu_get_be32(QEMUFile *f)
+{
+ unsigned int v;
+ v = qemu_get_byte(f) << 24;
+ v |= qemu_get_byte(f) << 16;
+ v |= qemu_get_byte(f) << 8;
+ v |= qemu_get_byte(f);
+ return v;
+}
+
+uint64_t qemu_get_be64(QEMUFile *f)
+{
+ uint64_t v;
+ v = (uint64_t)qemu_get_be32(f) << 32;
+ v |= qemu_get_be32(f);
+ return v;
+}
+
+int64_t qemu_ftell(QEMUFile *f)
+{
+ return ftell(f);
+}
+
+int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence)
+{
+ if (fseek(f, pos, whence) < 0)
+ return -1;
+ return ftell(f);
+}
+
+typedef struct SaveStateEntry {
+ char idstr[256];
+ int instance_id;
+ int version_id;
+ SaveStateHandler *save_state;
+ LoadStateHandler *load_state;
+ void *opaque;
+ struct SaveStateEntry *next;
+} SaveStateEntry;
+
+static SaveStateEntry *first_se;
+
+int register_savevm(const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque)
+{
+ SaveStateEntry *se, **pse;
+
+ se = qemu_malloc(sizeof(SaveStateEntry));
+ if (!se)
+ return -1;
+ pstrcpy(se->idstr, sizeof(se->idstr), idstr);
+ se->instance_id = instance_id;
+ se->version_id = version_id;
+ se->save_state = save_state;
+ se->load_state = load_state;
+ se->opaque = opaque;
+ se->next = NULL;
+
+ /* add at the end of list */
+ pse = &first_se;
+ while (*pse != NULL)
+ pse = &(*pse)->next;
+ *pse = se;
+ return 0;
+}
+
+#define QEMU_VM_FILE_MAGIC 0x5145564d
+#define QEMU_VM_FILE_VERSION 0x00000001
+
+int qemu_savevm(const char *filename)
+{
+ SaveStateEntry *se;
+ QEMUFile *f;
+ int len, len_pos, cur_pos, saved_vm_running, ret;
+
+ saved_vm_running = vm_running;
+ vm_stop(0);
+
+ f = fopen(filename, "wb");
+ if (!f) {
+ ret = -1;
+ goto the_end;
+ }
+
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
+
+ for(se = first_se; se != NULL; se = se->next) {
+ /* ID string */
+ len = strlen(se->idstr);
+ qemu_put_byte(f, len);
+ qemu_put_buffer(f, se->idstr, len);
+
+ qemu_put_be32(f, se->instance_id);
+ qemu_put_be32(f, se->version_id);
+
+ /* record size: filled later */
+ len_pos = ftell(f);
+ qemu_put_be32(f, 0);
+
+ se->save_state(f, se->opaque);
+
+ /* fill record size */
+ cur_pos = ftell(f);
+ len = ftell(f) - len_pos - 4;
+ fseek(f, len_pos, SEEK_SET);
+ qemu_put_be32(f, len);
+ fseek(f, cur_pos, SEEK_SET);
+ }
+
+ fclose(f);
+ ret = 0;
+ the_end:
+ if (saved_vm_running)
+ vm_start();
+ return ret;
+}
+
+static SaveStateEntry *find_se(const char *idstr, int instance_id)
+{
+ SaveStateEntry *se;
+
+ for(se = first_se; se != NULL; se = se->next) {
+ if (!strcmp(se->idstr, idstr) &&
+ instance_id == se->instance_id)
+ return se;
+ }
+ return NULL;
+}
+
+int qemu_loadvm(const char *filename)
+{
+ SaveStateEntry *se;
+ QEMUFile *f;
+ int len, cur_pos, ret, instance_id, record_len, version_id;
+ int saved_vm_running;
+ unsigned int v;
+ char idstr[256];
+
+ saved_vm_running = vm_running;
+ vm_stop(0);
+
+ f = fopen(filename, "rb");
+ if (!f) {
+ ret = -1;
+ goto the_end;
+ }
+
+ v = qemu_get_be32(f);
+ if (v != QEMU_VM_FILE_MAGIC)
+ goto fail;
+ v = qemu_get_be32(f);
+ if (v != QEMU_VM_FILE_VERSION) {
+ fail:
+ fclose(f);
+ ret = -1;
+ goto the_end;
+ }
+ for(;;) {
+#if defined (DO_TB_FLUSH)
+ tb_flush(global_env);
+#endif
+ len = qemu_get_byte(f);
+ if (feof(f))
+ break;
+ qemu_get_buffer(f, idstr, len);
+ idstr[len] = '\0';
+ instance_id = qemu_get_be32(f);
+ version_id = qemu_get_be32(f);
+ record_len = qemu_get_be32(f);
+#if 0
+ printf("idstr=%s instance=0x%x version=%d len=%d\n",
+ idstr, instance_id, version_id, record_len);
+#endif
+ cur_pos = ftell(f);
+ se = find_se(idstr, instance_id);
+ if (!se) {
+ fprintf(stderr, "qemu: warning: instance 0x%x of device '%s' not present in current VM\n",
+ instance_id, idstr);
+ } else {
+ ret = se->load_state(f, se->opaque, version_id);
+ if (ret < 0) {
+ fprintf(stderr, "qemu: warning: error while loading state for instance 0x%x of device '%s'\n",
+ instance_id, idstr);
+ }
+ }
+ /* always seek to exact end of record */
+ qemu_fseek(f, cur_pos + record_len, SEEK_SET);
+ }
+ fclose(f);
+ ret = 0;
+ the_end:
+ if (saved_vm_running)
+ vm_start();
+ return ret;
+}
+
+/***********************************************************/
+/* main execution loop */
+
+void gui_update(void *opaque)
+{
+ display_state.dpy_refresh(&display_state);
+ qemu_mod_timer(gui_timer, GUI_REFRESH_INTERVAL + qemu_get_clock(rt_clock));
+}
+void polling_handler(void *opaque)
+{
+#ifndef _WIN32
+ struct pollfd ufds[MAX_IO_HANDLERS + 1], *pf;
+ IOHandlerRecord *ioh, *ioh_next;
+ uint8_t buf[4096];
+ int n, max_size;
+#endif
+ int timeout = 0;
+ int ret;
+
+#ifdef _WIN32
+ if (timeout > 0)
+ Sleep(timeout);
+#else
+ /* poll any events */
+ /* XXX: separate device handlers from system ones */
+ pf = ufds;
+ for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
+ if (!ioh->fd_can_read) {
+ max_size = 0;
+ pf->fd = ioh->fd;
+ pf->events = POLLIN;
+ ioh->ufd = pf;
+ pf++;
+ } else {
+ max_size = ioh->fd_can_read(ioh->opaque);
+ if (max_size > 0) {
+ if (max_size > sizeof(buf))
+ max_size = sizeof(buf);
+ pf->fd = ioh->fd;
+ pf->events = POLLIN;
+ ioh->ufd = pf;
+ pf++;
+ } else {
+ ioh->ufd = NULL;
+ }
+ }
+ ioh->max_size = max_size;
+ }
+
+ ret = poll(ufds, pf - ufds, timeout);
+ if (ret > 0) {
+ /* XXX: better handling of removal */
+ for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) {
+ ioh_next = ioh->next;
+ pf = ioh->ufd;
+ if (pf) {
+ if (pf->revents & POLLIN) {
+ if (ioh->max_size == 0) {
+ /* just a read event */
+ ioh->fd_read(ioh->opaque, NULL, 0);
+ } else {
+ n = read(ioh->fd, buf, ioh->max_size);
+ if (n >= 0) {
+ ioh->fd_read(ioh->opaque, buf, n);
+ } else if (errno != EAGAIN) {
+ ioh->fd_read(ioh->opaque, NULL, -errno);
+ }
+ }
+ }
+ }
+ }
+ }
+#endif /* !defined(_WIN32) */
+
+ qemu_mod_timer(polling_timer, POLLING_INTERVAL + qemu_get_clock(rt_clock));
+}
+
+
+/* XXX: support several handlers */
+VMStopHandler *vm_stop_cb;
+VMStopHandler *vm_stop_opaque;
+
+int qemu_add_vm_stop_handler(VMStopHandler *cb, void *opaque)
+{
+ vm_stop_cb = cb;
+ vm_stop_opaque = opaque;
+ return 0;
+}
+
+void qemu_del_vm_stop_handler(VMStopHandler *cb, void *opaque)
+{
+ vm_stop_cb = NULL;
+}
+
+void vm_start(void)
+{
+ if (!vm_running) {
+ cpu_enable_ticks();
+ vm_running = 1;
+ }
+}
+
+void vm_stop(int reason)
+{
+ if (vm_running) {
+ cpu_disable_ticks();
+ vm_running = 0;
+ if (reason != 0) {
+ if (vm_stop_cb) {
+ vm_stop_cb(vm_stop_opaque, reason);
+ }
+ }
+ }
+}
+
+/* reset/shutdown handler */
+
+typedef struct QEMUResetEntry {
+ QEMUResetHandler *func;
+ void *opaque;
+ struct QEMUResetEntry *next;
+} QEMUResetEntry;
+
+static QEMUResetEntry *first_reset_entry;
+static int reset_requested;
+int shutdown_requested;
+
+void qemu_register_reset(QEMUResetHandler *func, void *opaque)
+{
+ QEMUResetEntry **pre, *re;
+
+ pre = &first_reset_entry;
+ while (*pre != NULL)
+ pre = &(*pre)->next;
+ re = qemu_mallocz(sizeof(QEMUResetEntry));
+ re->func = func;
+ re->opaque = opaque;
+ re->next = NULL;
+ *pre = re;
+}
+
+void qemu_system_reset(void)
+{
+ QEMUResetEntry *re;
+
+ /* reset all devices */
+ for(re = first_reset_entry; re != NULL; re = re->next) {
+ re->func(re->opaque);
+ }
+}
+
+void qemu_system_reset_request(void)
+{
+ reset_requested = 1;
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+}
+
+void qemu_system_shutdown_request(void)
+{
+ shutdown_requested = 1;
+ cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+}
+
+void main_loop_wait(int timeout)
+{
+ if (vm_running) {
+ qemu_run_timers(&active_timers[QEMU_TIMER_VIRTUAL],
+ qemu_get_clock(vm_clock));
+ /* run dma transfers, if any */
+ DMA_run();
+ }
+
+ /* real time timers */
+ qemu_run_timers(&active_timers[QEMU_TIMER_REALTIME],
+ qemu_get_clock(rt_clock));
+}
+
+void help(void)
+{
+ printf("QEMU PC emulator version " QEMU_VERSION ", Copyright (c) 2003-2004 Fabrice Bellard\n"
+ "usage: %s [options] [disk_image]\n"
+ "\n"
+ "'disk_image' is a raw hard image image for IDE hard disk 0\n"
+ "\n"
+ "Standard options:\n"
+ "-fda/-fdb file use 'file' as floppy disk 0/1 image\n"
+ "-hda/-hdb file use 'file' as IDE hard disk 0/1 image\n"
+ "-hdc/-hdd file use 'file' as IDE hard disk 2/3 image\n"
+ "-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
+ "-boot [a|c|d] boot on floppy (a), hard disk (c) or CD-ROM (d)\n"
+ "-snapshot write to temporary files instead of disk image files\n"
+ "-m megs set virtual RAM size to megs MB [default=%d]\n"
+ "-nographic disable graphical output and redirect serial I/Os to console\n"
+#ifdef CONFIG_VNC
+ "-vnc port use vnc instead of sdl\n"
+ "-vncport port use a different port\n"
+ "-vncconnect host:port do a reverse connect\n"
+#ifdef CONFIG_SDL
+ "-vnc-and-sdl use vnc and sdl simultaneously\n"
+#endif
+#endif
+ "-k <language> use keyboard layout (for example \"fr\" for french)\n"
+ "-enable-audio enable audio support\n"
+ "-localtime set the real time clock to local time [default=utc]\n"
+ "-full-screen start in full screen\n"
+#ifdef TARGET_PPC
+ "-prep Simulate a PREP system (default is PowerMAC)\n"
+ "-g WxH[xDEPTH] Set the initial VGA graphic mode\n"
+#endif
+ "\n"
+ "Network options:\n"
+ "-nics n simulate 'n' network cards [default=1]\n"
+ "-macaddr addr set the mac address of the first interface\n"
+ "-n script set tap/tun network init script [default=%s]\n"
+ "-tun-fd fd use this fd as already opened tap/tun interface\n"
+#ifdef CONFIG_SLIRP
+ "-user-net use user mode network stack [default if no tap/tun script]\n"
+ "-tftp prefix allow tftp access to files starting with prefix [-user-net]\n"
+#ifndef _WIN32
+ "-smb dir allow SMB access to files in 'dir' [-user-net]\n"
+#endif
+ "-redir [tcp|udp]:host-port:[guest-host]:guest-port\n"
+ " redirect TCP or UDP connections from host to guest [-user-net]\n"
+#endif
+ "-dummy-net use dummy network stack\n"
+ "\n"
+ "Linux boot specific:\n"
+ "-kernel bzImage use 'bzImage' as kernel image\n"
+ "-append cmdline use 'cmdline' as kernel command line\n"
+ "-initrd file use 'file' as initial ram disk\n"
+ "\n"
+ "Debug/Expert options:\n"
+ "-monitor dev redirect the monitor to char device 'dev'\n"
+ "-serial dev redirect the serial port to char device 'dev'\n"
+ "-S freeze CPU at startup (use 'c' to start execution)\n"
+ "-s wait gdb connection to port %d\n"
+ "-p port ioreq port for xen\n"
+ "-d domain domain that we're serving\n"
+ "-hdachs c,h,s force hard disk 0 geometry (usually qemu can guess it)\n"
+ "-L path set the directory for the BIOS and VGA BIOS\n"
+#ifdef USE_CODE_COPY
+ "-no-code-copy disable code copy acceleration\n"
+#endif
+#ifdef TARGET_I386
+ "-isa simulate an ISA-only system (default is PCI system)\n"
+ "-std-vga simulate a standard VGA card with VESA Bochs Extensions\n"
+ " (default is CL-GD5446 PCI VGA)\n"
+#endif
+ "-loadvm file start right away with a saved state (loadvm in monitor)\n"
+ "\n"
+ "During emulation, the following keys are useful:\n"
+ "ctrl-alt-f toggle full screen\n"
+ "ctrl-alt-n switch to virtual console 'n'\n"
+ "ctrl-alt toggle mouse and keyboard grab\n"
+ "\n"
+ "When using -nographic, press 'ctrl-a h' to get some help.\n"
+ ,
+#ifdef CONFIG_SOFTMMU
+ "qemu",
+#else
+ "qemu-fast",
+#endif
+ DEFAULT_RAM_SIZE,
+ DEFAULT_NETWORK_SCRIPT,
+ DEFAULT_GDBSTUB_PORT);
+#ifndef CONFIG_SOFTMMU
+ printf("\n"
+ "NOTE: this version of QEMU is faster but it needs slightly patched OSes to\n"
+ "work. Please use the 'qemu' executable to have a more accurate (but slower)\n"
+ "PC emulation.\n");
+#endif
+ exit(1);
+}
+
+#define HAS_ARG 0x0001
+
+enum {
+ QEMU_OPTION_h,
+
+ QEMU_OPTION_fda,
+ QEMU_OPTION_fdb,
+ QEMU_OPTION_hda,
+ QEMU_OPTION_hdb,
+ QEMU_OPTION_hdc,
+ QEMU_OPTION_hdd,
+ QEMU_OPTION_cdrom,
+ QEMU_OPTION_boot,
+ QEMU_OPTION_snapshot,
+ QEMU_OPTION_m,
+ QEMU_OPTION_nographic,
+#ifdef CONFIG_VNC
+ QEMU_OPTION_vnc,
+ QEMU_OPTION_vncport,
+ QEMU_OPTION_vncconnect,
+#ifdef CONFIG_SDL
+ QEMU_OPTION_vnc_and_sdl,
+#endif
+#endif
+ QEMU_OPTION_enable_audio,
+
+ QEMU_OPTION_nics,
+ QEMU_OPTION_macaddr,
+ QEMU_OPTION_n,
+ QEMU_OPTION_tun_fd,
+ QEMU_OPTION_user_net,
+ QEMU_OPTION_tftp,
+ QEMU_OPTION_smb,
+ QEMU_OPTION_redir,
+ QEMU_OPTION_dummy_net,
+
+ QEMU_OPTION_kernel,
+ QEMU_OPTION_append,
+ QEMU_OPTION_initrd,
+
+ QEMU_OPTION_S,
+ QEMU_OPTION_s,
+ QEMU_OPTION_p,
+ QEMU_OPTION_d,
+ QEMU_OPTION_l,
+ QEMU_OPTION_hdachs,
+ QEMU_OPTION_L,
+ QEMU_OPTION_no_code_copy,
+ QEMU_OPTION_pci,
+ QEMU_OPTION_isa,
+ QEMU_OPTION_prep,
+ QEMU_OPTION_k,
+ QEMU_OPTION_localtime,
+ QEMU_OPTION_cirrusvga,
+ QEMU_OPTION_g,
+ QEMU_OPTION_std_vga,
+ QEMU_OPTION_monitor,
+ QEMU_OPTION_serial,
+ QEMU_OPTION_loadvm,
+ QEMU_OPTION_full_screen,
+};
+
+typedef struct QEMUOption {
+ const char *name;
+ int flags;
+ int index;
+} QEMUOption;
+
+const QEMUOption qemu_options[] = {
+ { "h", 0, QEMU_OPTION_h },
+
+ { "fda", HAS_ARG, QEMU_OPTION_fda },
+ { "fdb", HAS_ARG, QEMU_OPTION_fdb },
+ { "hda", HAS_ARG, QEMU_OPTION_hda },
+ { "hdb", HAS_ARG, QEMU_OPTION_hdb },
+ { "hdc", HAS_ARG, QEMU_OPTION_hdc },
+ { "hdd", HAS_ARG, QEMU_OPTION_hdd },
+ { "cdrom", HAS_ARG, QEMU_OPTION_cdrom },
+ { "boot", HAS_ARG, QEMU_OPTION_boot },
+ { "snapshot", 0, QEMU_OPTION_snapshot },
+ { "m", HAS_ARG, QEMU_OPTION_m },
+ { "nographic", 0, QEMU_OPTION_nographic },
+#ifdef CONFIG_VNC
+ { "vnc", 0, QEMU_OPTION_vnc },
+ { "vncport", HAS_ARG, QEMU_OPTION_vncport },
+ { "vncconnect", HAS_ARG, QEMU_OPTION_vncconnect },
+#ifdef CONFIG_SDL
+ { "vnc-and-sdl", 0, QEMU_OPTION_vnc_and_sdl },
+#endif
+#endif
+ { "k", HAS_ARG, QEMU_OPTION_k },
+ { "enable-audio", 0, QEMU_OPTION_enable_audio },
+
+ { "nics", HAS_ARG, QEMU_OPTION_nics},
+ { "macaddr", HAS_ARG, QEMU_OPTION_macaddr},
+ { "n", HAS_ARG, QEMU_OPTION_n },
+ { "tun-fd", HAS_ARG, QEMU_OPTION_tun_fd },
+#ifdef CONFIG_SLIRP
+ { "user-net", 0, QEMU_OPTION_user_net },
+ { "tftp", HAS_ARG, QEMU_OPTION_tftp },
+#ifndef _WIN32
+ { "smb", HAS_ARG, QEMU_OPTION_smb },
+#endif
+ { "redir", HAS_ARG, QEMU_OPTION_redir },
+#endif
+ { "dummy-net", 0, QEMU_OPTION_dummy_net },
+
+ { "kernel", HAS_ARG, QEMU_OPTION_kernel },
+ { "append", HAS_ARG, QEMU_OPTION_append },
+ { "initrd", HAS_ARG, QEMU_OPTION_initrd },
+
+ { "S", 0, QEMU_OPTION_S },
+ { "s", 0, QEMU_OPTION_s },
+ { "p", HAS_ARG, QEMU_OPTION_p },
+ { "d", HAS_ARG, QEMU_OPTION_d },
+ { "l", HAS_ARG, QEMU_OPTION_l },
+ { "hdachs", HAS_ARG, QEMU_OPTION_hdachs },
+ { "L", HAS_ARG, QEMU_OPTION_L },
+ { "no-code-copy", 0, QEMU_OPTION_no_code_copy },
+#ifdef TARGET_PPC
+ { "prep", 0, QEMU_OPTION_prep },
+ { "g", 1, QEMU_OPTION_g },
+#endif
+ { "localtime", 0, QEMU_OPTION_localtime },
+ { "isa", 0, QEMU_OPTION_isa },
+ { "std-vga", 0, QEMU_OPTION_std_vga },
+ { "monitor", 1, QEMU_OPTION_monitor },
+ { "serial", 1, QEMU_OPTION_serial },
+ { "loadvm", HAS_ARG, QEMU_OPTION_loadvm },
+ { "full-screen", 0, QEMU_OPTION_full_screen },
+
+ /* temporary options */
+ { "pci", 0, QEMU_OPTION_pci },
+ { "cirrusvga", 0, QEMU_OPTION_cirrusvga },
+ { NULL },
+};
+
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+
+/* this stack is only used during signal handling */
+#define SIGNAL_STACK_SIZE 32768
+
+static uint8_t *signal_stack;
+
+#endif
+
+#define NET_IF_TUN 0
+#define NET_IF_USER 1
+#define NET_IF_DUMMY 2
+
+int main(int argc, char **argv)
+{
+#ifdef CONFIG_GDBSTUB
+ int use_gdbstub, gdbstub_port;
+#endif
+ int i, has_cdrom;
+ int snapshot, linux_boot;
+ CPUState *env;
+ const char *initrd_filename;
+ const char *hd_filename[MAX_DISKS], *fd_filename[MAX_FD];
+ const char *kernel_filename, *kernel_cmdline;
+ DisplayState *ds = &display_state;
+ int cyls, heads, secs;
+ int start_emulation = 1;
+ uint8_t macaddr[6];
+ int net_if_type, nb_tun_fds, tun_fds[MAX_NICS];
+ int optind;
+ const char *r, *optarg;
+ CharDriverState *monitor_hd;
+ char monitor_device[128];
+ char serial_devices[MAX_SERIAL_PORTS][128];
+ int serial_device_index;
+ const char *loadvm = NULL;
+ unsigned long nr_pages, *page_array;
+ extern void *shared_page;
+ /* change the qemu-dm to daemon, just like bochs dm */
+// daemon(0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* we never want that malloc() uses mmap() */
+ mallopt(M_MMAP_THRESHOLD, 4096 * 1024);
+#endif
+ initrd_filename = NULL;
+ for(i = 0; i < MAX_FD; i++)
+ fd_filename[i] = NULL;
+ for(i = 0; i < MAX_DISKS; i++)
+ hd_filename[i] = NULL;
+ ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
+ vga_ram_size = VGA_RAM_SIZE;
+ bios_size = BIOS_SIZE;
+ pstrcpy(network_script, sizeof(network_script), DEFAULT_NETWORK_SCRIPT);
+#ifdef CONFIG_GDBSTUB
+ use_gdbstub = 0;
+ gdbstub_port = DEFAULT_GDBSTUB_PORT;
+#endif
+ snapshot = 0;
+ nographic = 0;
+ usevnc = 0;
+ vncport=0;
+ vncconnect=NULL;
+ kernel_filename = NULL;
+ kernel_cmdline = "";
+ has_cdrom = 1;
+ cyls = heads = secs = 0;
+ pstrcpy(monitor_device, sizeof(monitor_device), "vc");
+
+ pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
+ for(i = 1; i < MAX_SERIAL_PORTS; i++)
+ serial_devices[i][0] = '\0';
+ serial_device_index = 0;
+
+ nb_tun_fds = 0;
+ net_if_type = -1;
+ nb_nics = 1;
+ /* default mac address of the first network interface */
+ macaddr[0] = 0x52;
+ macaddr[1] = 0x54;
+ macaddr[2] = 0x00;
+ macaddr[3] = 0x12;
+ macaddr[4] = 0x34;
+ macaddr[5] = 0x56;
+
+ optind = 1;
+ for(;;) {
+ if (optind >= argc)
+ break;
+ r = argv[optind];
+ if (r[0] != '-') {
+ hd_filename[0] = argv[optind++];
+ } else {
+ const QEMUOption *popt;
+
+ optind++;
+ popt = qemu_options;
+ for(;;) {
+ if (!popt->name) {
+ fprintf(stderr, "%s: invalid option -- '%s'\n",
+ argv[0], r);
+ exit(1);
+ }
+ if (!strcmp(popt->name, r + 1))
+ break;
+ popt++;
+ }
+ if (popt->flags & HAS_ARG) {
+ if (optind >= argc) {
+ fprintf(stderr, "%s: option '%s' requires an argument\n",
+ argv[0], r);
+ exit(1);
+ }
+ optarg = argv[optind++];
+ } else {
+ optarg = NULL;
+ }
+
+ switch(popt->index) {
+ case QEMU_OPTION_initrd:
+ initrd_filename = optarg;
+ break;
+ case QEMU_OPTION_hda:
+ hd_filename[0] = optarg;
+ break;
+ case QEMU_OPTION_hdb:
+ hd_filename[1] = optarg;
+ break;
+ case QEMU_OPTION_snapshot:
+ snapshot = 1;
+ break;
+ case QEMU_OPTION_hdachs:
+ {
+ const char *p;
+ p = optarg;
+ cyls = strtol(p, (char **)&p, 0);
+ if (*p != ',')
+ goto chs_fail;
+ p++;
+ heads = strtol(p, (char **)&p, 0);
+ if (*p != ',')
+ goto chs_fail;
+ p++;
+ secs = strtol(p, (char **)&p, 0);
+ if (*p != '\0') {
+ chs_fail:
+ cyls = 0;
+ }
+ }
+ break;
+ case QEMU_OPTION_nographic:
+ pstrcpy(monitor_device, sizeof(monitor_device), "stdio");
+ pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "stdio");
+ nographic = 1;
+ break;
+#ifdef CONFIG_VNC
+ case QEMU_OPTION_vnc:
+ usevnc = 1;
+ break;
+ case QEMU_OPTION_vncport:
+ {
+ const char *p;
+ p = optarg;
+ vncport= strtol(optarg, (char **)&p, 0);
+ }
+ break;
+ case QEMU_OPTION_vncconnect:
+ {
+ vncconnect=optarg;
+ }
+ break;
+#ifdef CONFIG_SDL
+ case QEMU_OPTION_vnc_and_sdl:
+ usevnc = 2;
+ break;
+#endif
+#endif
+ case QEMU_OPTION_kernel:
+ kernel_filename = optarg;
+ break;
+ case QEMU_OPTION_append:
+ kernel_cmdline = optarg;
+ break;
+ case QEMU_OPTION_tun_fd:
+ {
+ const char *p;
+ int fd;
+ net_if_type = NET_IF_TUN;
+ if (nb_tun_fds < MAX_NICS) {
+ fd = strtol(optarg, (char **)&p, 0);
+ if (*p != '\0') {
+ fprintf(stderr, "qemu: invalid fd for network interface %d\n", nb_tun_fds);
+ exit(1);
+ }
+ tun_fds[nb_tun_fds++] = fd;
+ }
+ }
+ break;
+ case QEMU_OPTION_hdc:
+ hd_filename[2] = optarg;
+ has_cdrom = 0;
+ break;
+ case QEMU_OPTION_hdd:
+ hd_filename[3] = optarg;
+ break;
+ case QEMU_OPTION_cdrom:
+ hd_filename[2] = optarg;
+ has_cdrom = 1;
+ break;
+ case QEMU_OPTION_boot:
+ boot_device = optarg[0];
+ if (boot_device != 'a' &&
+ boot_device != 'c' && boot_device != 'd') {
+ fprintf(stderr, "qemu: invalid boot device '%c'\n", boot_device);
+ exit(1);
+ }
+ break;
+ case QEMU_OPTION_fda:
+ fd_filename[0] = optarg;
+ break;
+ case QEMU_OPTION_fdb:
+ fd_filename[1] = optarg;
+ break;
+ case QEMU_OPTION_nics:
+ nb_nics = atoi(optarg);
+ if (nb_nics < 0 || nb_nics > MAX_NICS) {
+ fprintf(stderr, "qemu: invalid number of network interfaces\n");
+ exit(1);
+ }
+ break;
+ case QEMU_OPTION_macaddr:
+ {
+ const char *p;
+ int i;
+ p = optarg;
+ for(i = 0; i < 6; i++) {
+ macaddr[i] = strtol(p, (char **)&p, 16);
+ if (i == 5) {
+ if (*p != '\0')
+ goto macaddr_error;
+ } else {
+ if (*p != ':') {
+ macaddr_error:
+ fprintf(stderr, "qemu: invalid syntax for ethernet address\n");
+ exit(1);
+ }
+ p++;
+ }
+ }
+ }
+ break;
+#ifdef CONFIG_SLIRP
+ case QEMU_OPTION_tftp:
+ tftp_prefix = optarg;
+ break;
+#ifndef _WIN32
+ case QEMU_OPTION_smb:
+ net_slirp_smb(optarg);
+ break;
+#endif
+ case QEMU_OPTION_user_net:
+ net_if_type = NET_IF_USER;
+ break;
+ case QEMU_OPTION_redir:
+ net_slirp_redir(optarg);
+ break;
+#endif
+ case QEMU_OPTION_dummy_net:
+ net_if_type = NET_IF_DUMMY;
+ break;
+ case QEMU_OPTION_enable_audio:
+ audio_enabled = 1;
+ break;
+ case QEMU_OPTION_h:
+ help();
+ break;
+ case QEMU_OPTION_m:
+ ram_size = atoi(optarg) * 1024 * 1024;
+ if (ram_size <= 0)
+ help();
+ break;
+ case QEMU_OPTION_d:
+ {
+ domid = atoi(optarg);
+ printf("domid: %d\n", domid);
+ }
+ break;
+
+ case QEMU_OPTION_p:
+ {
+ extern short ioreq_port;
+ ioreq_port = atoi(optarg);
+ printf("port: %d\n", ioreq_port);
+ }
+ break;
+ case QEMU_OPTION_l:
+ {
+ int mask;
+ mask = cpu_str_to_log_mask(optarg);
+ printf("mask: %x\n", mask);
+ cpu_set_log(mask);
+ }
+ break;
+ case QEMU_OPTION_n:
+ pstrcpy(network_script, sizeof(network_script), optarg);
+ break;
+#ifdef CONFIG_GDBSTUB
+ case QEMU_OPTION_s:
+ use_gdbstub = 1;
+ break;
+#endif
+ case QEMU_OPTION_L:
+ bios_dir = optarg;
+ break;
+ case QEMU_OPTION_S:
+ start_emulation = 0;
+ break;
+ case QEMU_OPTION_pci:
+ pci_enabled = 1;
+ break;
+ case QEMU_OPTION_isa:
+ pci_enabled = 0;
+ break;
+ case QEMU_OPTION_prep:
+ prep_enabled = 1;
+ break;
+ case QEMU_OPTION_k:
+ keyboard_layout = optarg;
+ break;
+ case QEMU_OPTION_localtime:
+ rtc_utc = 0;
+ break;
+ case QEMU_OPTION_cirrusvga:
+ cirrus_vga_enabled = 1;
+ break;
+ case QEMU_OPTION_std_vga:
+ cirrus_vga_enabled = 0;
+ break;
+ case QEMU_OPTION_g:
+ {
+ const char *p;
+ int w, h, depth;
+ p = optarg;
+ w = strtol(p, (char **)&p, 10);
+ if (w <= 0) {
+ graphic_error:
+ fprintf(stderr, "qemu: invalid resolution or depth\n");
+ exit(1);
+ }
+ if (*p != 'x')
+ goto graphic_error;
+ p++;
+ h = strtol(p, (char **)&p, 10);
+ if (h <= 0)
+ goto graphic_error;
+ if (*p == 'x') {
+ p++;
+ depth = strtol(p, (char **)&p, 10);
+ if (depth != 8 && depth != 15 && depth != 16 &&
+ depth != 24 && depth != 32)
+ goto graphic_error;
+ } else if (*p == '\0') {
+ depth = graphic_depth;
+ } else {
+ goto graphic_error;
+ }
+
+ graphic_width = w;
+ graphic_height = h;
+ graphic_depth = depth;
+ }
+ break;
+ case QEMU_OPTION_monitor:
+ pstrcpy(monitor_device, sizeof(monitor_device), optarg);
+ break;
+ case QEMU_OPTION_serial:
+ if (serial_device_index >= MAX_SERIAL_PORTS) {
+ fprintf(stderr, "qemu: too many serial ports\n");
+ exit(1);
+ }
+ pstrcpy(serial_devices[serial_device_index],
+ sizeof(serial_devices[0]), optarg);
+ serial_device_index++;
+ break;
+ case QEMU_OPTION_loadvm:
+ loadvm = optarg;
+ break;
+ case QEMU_OPTION_full_screen:
+ full_screen = 1;
+ break;
+ }
+ }
+ }
+
+ linux_boot = (kernel_filename != NULL);
+
+ if (!linux_boot && hd_filename[0] == '\0' && hd_filename[2] == '\0' &&
+ fd_filename[0] == '\0')
+ help();
+
+ /* boot to cd by default if no hard disk */
+ if (hd_filename[0] == '\0' && boot_device == 'c') {
+ if (fd_filename[0] != '\0')
+ boot_device = 'a';
+ else
+ boot_device = 'd';
+ }
+
+#if !defined(CONFIG_SOFTMMU)
+ /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
+ {
+ static uint8_t stdout_buf[4096];
+ setvbuf(stdout, stdout_buf, _IOLBF, sizeof(stdout_buf));
+ }
+#else
+ setvbuf(stdout, NULL, _IOLBF, 0);
+#endif
+
+ /* init host network redirectors */
+ if (net_if_type == -1) {
+ net_if_type = NET_IF_TUN;
+#if defined(CONFIG_SLIRP)
+ if (access(network_script, R_OK) < 0) {
+ net_if_type = NET_IF_USER;
+ }
+#endif
+ }
+
+ for(i = 0; i < nb_nics; i++) {
+ NetDriverState *nd = &nd_table[i];
+ nd->index = i;
+ /* init virtual mac address */
+ nd->macaddr[0] = macaddr[0];
+ nd->macaddr[1] = macaddr[1];
+ nd->macaddr[2] = macaddr[2];
+ nd->macaddr[3] = macaddr[3];
+ nd->macaddr[4] = macaddr[4];
+ nd->macaddr[5] = macaddr[5] + i;
+ switch(net_if_type) {
+#if defined(CONFIG_SLIRP)
+ case NET_IF_USER:
+ net_slirp_init(nd);
+ break;
+#endif
+#if !defined(_WIN32)
+ case NET_IF_TUN:
+ if (i < nb_tun_fds) {
+ net_fd_init(nd, tun_fds[i]);
+ } else {
+ if (net_tun_init(nd) < 0)
+ net_dummy_init(nd);
+ }
+ break;
+#endif
+ case NET_IF_DUMMY:
+ default:
+ net_dummy_init(nd);
+ break;
+ }
+ }
+
+ /* init the memory */
+ phys_ram_size = ram_size + vga_ram_size + bios_size;
+
+ #define PAGE_SHIFT 12
+ #define PAGE_SIZE (1 << PAGE_SHIFT)
+
+ nr_pages = ram_size/PAGE_SIZE;
+ xc_handle = xc_interface_open();
+
+ if ( (page_array = (unsigned long *)
+ malloc(nr_pages * sizeof(unsigned long))) == NULL)
+ {
+ perror("malloc");
+ exit(-1);
+ }
+
+ if ( xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages )
+ {
+ perror("xc_get_pfn_list");
+ exit(-1);
+ }
+
+ if ((phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
+ PROT_READ|PROT_WRITE,
+ page_array,
+ nr_pages - 1)) == 0) {
+ perror("xc_map_foreign_batch");
+ exit(-1);
+ }
+
+
+ shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ page_array[nr_pages - 1]);
+
+ /* we always create the cdrom drive, even if no disk is there */
+ bdrv_init();
+ if (has_cdrom) {
+ bs_table[2] = bdrv_new("cdrom");
+ bdrv_set_type_hint(bs_table[2], BDRV_TYPE_CDROM);
+ }
+
+ /* open the virtual block devices */
+ for(i = 0; i < MAX_DISKS; i++) {
+ if (hd_filename[i]) {
+ if (!bs_table[i]) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "hd%c", i + 'a');
+ bs_table[i] = bdrv_new(buf);
+ }
+ if (bdrv_open(bs_table[i], hd_filename[i], snapshot) < 0) {
+ fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
+ hd_filename[i]);
+ exit(1);
+ }
+ if (i == 0 && cyls != 0)
+ bdrv_set_geometry_hint(bs_table[i], cyls, heads, secs);
+ }
+ }
+
+ /* we always create at least one floppy disk */
+ fd_table[0] = bdrv_new("fda");
+ bdrv_set_type_hint(fd_table[0], BDRV_TYPE_FLOPPY);
+
+ for(i = 0; i < MAX_FD; i++) {
+ if (fd_filename[i]) {
+ if (!fd_table[i]) {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "fd%c", i + 'a');
+ fd_table[i] = bdrv_new(buf);
+ bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY);
+ }
+ if (fd_filename[i] != '\0') {
+ if (bdrv_open(fd_table[i], fd_filename[i], snapshot) < 0) {
+ fprintf(stderr, "qemu: could not open floppy disk image '%s'\n",
+ fd_filename[i]);
+ exit(1);
+ }
+ }
+ }
+ }
+
+ /* init CPU state */
+ env = cpu_init();
+ global_env = env;
+ cpu_single_env = env;
+
+ init_ioports();
+ cpu_calibrate_ticks();
+
+ /* terminal init */
+ if (nographic) {
+ dumb_display_init(ds);
+ } else {
+ if (usevnc) {
+#ifdef CONFIG_VNC
+ vnc_display_init(ds, (usevnc==2), vncport, vncconnect);
+#else
+ perror("qemu not configured with vnc support");
+#endif
+ } else {
+#ifdef CONFIG_SDL
+ sdl_display_init(ds, full_screen);
+#else
+ dumb_display_init(ds);
+#endif
+ }
+ }
+
+ vga_console = graphic_console_init(ds);
+
+ monitor_hd = qemu_chr_open(monitor_device);
+ if (!monitor_hd) {
+ fprintf(stderr, "qemu: could not open monitor device '%s'\n", monitor_device);
+ exit(1);
+ }
+ monitor_init(monitor_hd, !nographic);
+
+ for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+ if (serial_devices[i][0] != '\0') {
+ serial_hds[i] = qemu_chr_open(serial_devices[i]);
+ if (!serial_hds[i]) {
+ fprintf(stderr, "qemu: could not open serial device '%s'\n",
+ serial_devices[i]);
+ exit(1);
+ }
+ if (!strcmp(serial_devices[i], "vc"))
+ qemu_chr_printf(serial_hds[i], "serial%d console\n", i);
+ }
+ }
+
+ /* setup cpu signal handlers for MMU / self modifying code handling */
+#if !defined(CONFIG_SOFTMMU)
+
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+ {
+ stack_t stk;
+ signal_stack = memalign(16, SIGNAL_STACK_SIZE);
+ stk.ss_sp = signal_stack;
+ stk.ss_size = SIGNAL_STACK_SIZE;
+ stk.ss_flags = 0;
+
+ if (sigaltstack(&stk, NULL) < 0) {
+ perror("sigaltstack");
+ exit(1);
+ }
+ }
+#endif
+ {
+ struct sigaction act;
+
+ sigfillset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+ act.sa_flags |= SA_ONSTACK;
+#endif
+ act.sa_sigaction = host_segv_handler;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+ sigaction(SIGFPE, &act, NULL);
+#endif
+ }
+#endif
+
+#ifndef _WIN32
+ {
+ struct sigaction act;
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGPIPE, &act, NULL);
+ }
+#endif
+ init_timers();
+
+#if defined(TARGET_I386)
+ pc_init(ram_size, vga_ram_size, boot_device,
+ ds, fd_filename, snapshot,
+ kernel_filename, kernel_cmdline, initrd_filename);
+#elif defined(TARGET_PPC)
+ ppc_init(ram_size, vga_ram_size, boot_device,
+ ds, fd_filename, snapshot,
+ kernel_filename, kernel_cmdline, initrd_filename);
+#elif defined(TARGET_SPARC)
+ sun4m_init(ram_size, vga_ram_size, boot_device,
+ ds, fd_filename, snapshot,
+ kernel_filename, kernel_cmdline, initrd_filename);
+#endif
+
+ gui_timer = qemu_new_timer(rt_clock, gui_update, NULL);
+ qemu_mod_timer(gui_timer, qemu_get_clock(rt_clock));
+
+ polling_timer = qemu_new_timer(rt_clock, polling_handler, NULL);
+ qemu_mod_timer(polling_timer, qemu_get_clock(rt_clock));
+
+#ifdef CONFIG_GDBSTUB
+ if (use_gdbstub) {
+ if (gdbserver_start(gdbstub_port) < 0) {
+ fprintf(stderr, "Could not open gdbserver socket on port %d\n",
+ gdbstub_port);
+ exit(1);
+ } else {
+ printf("Waiting gdb connection on port %d\n", gdbstub_port);
+ }
+ } else
+#endif
+ if (loadvm)
+ qemu_loadvm(loadvm);
+
+ {
+ /* XXX: simplify init */
+ if (start_emulation) {
+ vm_start();
+ }
+ }
+ main_loop();
+ quit_timers();
+ return 0;
+}
diff --git a/tools/ioemu/vl.h b/tools/ioemu/vl.h
new file mode 100644
index 0000000000..fcee3da4a7
--- /dev/null
+++ b/tools/ioemu/vl.h
@@ -0,0 +1,787 @@
+/*
+ * QEMU System Emulator header
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef VL_H
+#define VL_H
+
+/* we put basic includes here to avoid repeating them in device drivers */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#ifdef _WIN32
+#define lseek _lseeki64
+#define ENOTSUP 4096
+/* XXX: find 64 bit version */
+#define ftruncate chsize
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+ _fullpath(resolved_path, path, _MAX_PATH);
+ return resolved_path;
+}
+#endif
+
+#ifdef QEMU_TOOL
+
+/* we use QEMU_TOOL in the command line tools which do not depend on
+ the target CPU type */
+#include "config-host.h"
+#include <setjmp.h>
+#include "osdep.h"
+#include "bswap.h"
+
+#else
+
+#include "cpu.h"
+
+#endif /* !defined(QEMU_TOOL) */
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+#endif
+
+/* vl.c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
+
+void hw_error(const char *fmt, ...);
+
+int get_image_size(const char *filename);
+int load_image(const char *filename, uint8_t *addr);
+extern const char *bios_dir;
+
+void pstrcpy(char *buf, int buf_size, const char *str);
+char *pstrcat(char *buf, int buf_size, const char *s);
+int strstart(const char *str, const char *val, const char **ptr);
+
+extern int vm_running;
+
+typedef void VMStopHandler(void *opaque, int reason);
+
+int qemu_add_vm_stop_handler(VMStopHandler *cb, void *opaque);
+void qemu_del_vm_stop_handler(VMStopHandler *cb, void *opaque);
+
+void vm_start(void);
+void vm_stop(int reason);
+
+typedef void QEMUResetHandler(void *opaque);
+
+void qemu_register_reset(QEMUResetHandler *func, void *opaque);
+void qemu_system_reset_request(void);
+void qemu_system_shutdown_request(void);
+
+void main_loop_wait(int timeout);
+
+extern int audio_enabled;
+extern int sb16_enabled;
+extern int adlib_enabled;
+extern int gus_enabled;
+extern int ram_size;
+extern int bios_size;
+extern int rtc_utc;
+extern int cirrus_vga_enabled;
+extern int graphic_width;
+extern int graphic_height;
+extern int graphic_depth;
+
+/* XXX: make it dynamic */
+#if defined (TARGET_PPC)
+#define BIOS_SIZE (512 * 1024)
+#else
+#define BIOS_SIZE ((256 + 64) * 1024)
+#endif
+
+/* keyboard/mouse support */
+
+#define MOUSE_EVENT_LBUTTON 0x01
+#define MOUSE_EVENT_RBUTTON 0x02
+#define MOUSE_EVENT_MBUTTON 0x04
+
+typedef void QEMUPutKBDEvent(void *opaque, int keycode);
+typedef void QEMUPutMouseEvent(void *opaque, int dx, int dy, int dz, int buttons_state);
+
+void qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque);
+void qemu_add_mouse_event_handler(QEMUPutMouseEvent *func, void *opaque);
+
+void kbd_put_keycode(int keycode);
+void kbd_mouse_event(int dx, int dy, int dz, int buttons_state);
+
+/* keysym is a unicode code except for special keys (see QEMU_KEY_xxx
+ constants) */
+#define QEMU_KEY_ESC1(c) ((c) | 0xe100)
+#define QEMU_KEY_BACKSPACE 0x007f
+#define QEMU_KEY_UP QEMU_KEY_ESC1('A')
+#define QEMU_KEY_DOWN QEMU_KEY_ESC1('B')
+#define QEMU_KEY_RIGHT QEMU_KEY_ESC1('C')
+#define QEMU_KEY_LEFT QEMU_KEY_ESC1('D')
+#define QEMU_KEY_HOME QEMU_KEY_ESC1(1)
+#define QEMU_KEY_END QEMU_KEY_ESC1(4)
+#define QEMU_KEY_PAGEUP QEMU_KEY_ESC1(5)
+#define QEMU_KEY_PAGEDOWN QEMU_KEY_ESC1(6)
+#define QEMU_KEY_DELETE QEMU_KEY_ESC1(3)
+
+#define QEMU_KEY_CTRL_UP 0xe400
+#define QEMU_KEY_CTRL_DOWN 0xe401
+#define QEMU_KEY_CTRL_LEFT 0xe402
+#define QEMU_KEY_CTRL_RIGHT 0xe403
+#define QEMU_KEY_CTRL_HOME 0xe404
+#define QEMU_KEY_CTRL_END 0xe405
+#define QEMU_KEY_CTRL_PAGEUP 0xe406
+#define QEMU_KEY_CTRL_PAGEDOWN 0xe407
+
+void kbd_put_keysym(int keysym);
+
+/* async I/O support */
+
+typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef int IOCanRWHandler(void *opaque);
+
+int qemu_add_fd_read_handler(int fd, IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque);
+void qemu_del_fd_read_handler(int fd);
+
+/* character device */
+
+#define CHR_EVENT_BREAK 0 /* serial break char */
+#define CHR_EVENT_FOCUS 1 /* focus to this terminal (modal input needed) */
+
+typedef void IOEventHandler(void *opaque, int event);
+
+typedef struct CharDriverState {
+ int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len);
+ void (*chr_add_read_handler)(struct CharDriverState *s,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque);
+ IOEventHandler *chr_event;
+ void (*chr_send_event)(struct CharDriverState *chr, int event);
+ void *opaque;
+} CharDriverState;
+
+void qemu_chr_printf(CharDriverState *s, const char *fmt, ...);
+int qemu_chr_write(CharDriverState *s, const uint8_t *buf, int len);
+void qemu_chr_send_event(CharDriverState *s, int event);
+void qemu_chr_add_read_handler(CharDriverState *s,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque);
+void qemu_chr_add_event_handler(CharDriverState *s, IOEventHandler *chr_event);
+
+/* consoles */
+
+typedef struct DisplayState DisplayState;
+typedef struct TextConsole TextConsole;
+
+extern TextConsole *vga_console;
+
+TextConsole *graphic_console_init(DisplayState *ds);
+int is_active_console(TextConsole *s);
+CharDriverState *text_console_init(DisplayState *ds);
+void console_select(unsigned int index);
+
+/* serial ports */
+
+#define MAX_SERIAL_PORTS 4
+
+extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+
+/* network redirectors support */
+
+#define MAX_NICS 8
+
+typedef struct NetDriverState {
+ int index; /* index number in QEMU */
+ uint8_t macaddr[6];
+ char ifname[16];
+ void (*send_packet)(struct NetDriverState *nd,
+ const uint8_t *buf, int size);
+ void (*add_read_packet)(struct NetDriverState *nd,
+ IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque);
+ /* tun specific data */
+ int fd;
+ /* slirp specific data */
+} NetDriverState;
+
+extern int nb_nics;
+extern NetDriverState nd_table[MAX_NICS];
+
+void qemu_send_packet(NetDriverState *nd, const uint8_t *buf, int size);
+void qemu_add_read_packet(NetDriverState *nd, IOCanRWHandler *fd_can_read,
+ IOReadHandler *fd_read, void *opaque);
+
+/* timers */
+
+typedef struct QEMUClock QEMUClock;
+typedef struct QEMUTimer QEMUTimer;
+typedef void QEMUTimerCB(void *opaque);
+
+/* The real time clock should be used only for stuff which does not
+ change the virtual machine state, as it is run even if the virtual
+ machine is stopped. The real time clock has a frequency of 1000
+ Hz. */
+extern QEMUClock *rt_clock;
+
+/* Rge virtual clock is only run during the emulation. It is stopped
+ when the virtual machine is stopped. Virtual timers use a high
+ precision clock, usually cpu cycles (use ticks_per_sec). */
+extern QEMUClock *vm_clock;
+
+int64_t qemu_get_clock(QEMUClock *clock);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
+void qemu_free_timer(QEMUTimer *ts);
+void qemu_del_timer(QEMUTimer *ts);
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+int qemu_timer_pending(QEMUTimer *ts);
+
+extern int64_t ticks_per_sec;
+extern int pit_min_timer_count;
+
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+/* VM Load/Save */
+
+typedef FILE QEMUFile;
+
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_put_byte(QEMUFile *f, int v);
+void qemu_put_be16(QEMUFile *f, unsigned int v);
+void qemu_put_be32(QEMUFile *f, unsigned int v);
+void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_get_byte(QEMUFile *f);
+unsigned int qemu_get_be16(QEMUFile *f);
+unsigned int qemu_get_be32(QEMUFile *f);
+uint64_t qemu_get_be64(QEMUFile *f);
+
+static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
+{
+ qemu_put_be64(f, *pv);
+}
+
+static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
+{
+ qemu_put_be32(f, *pv);
+}
+
+static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
+{
+ qemu_put_be16(f, *pv);
+}
+
+static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
+{
+ qemu_put_byte(f, *pv);
+}
+
+static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
+{
+ *pv = qemu_get_be64(f);
+}
+
+static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
+{
+ *pv = qemu_get_be32(f);
+}
+
+static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
+{
+ *pv = qemu_get_be16(f);
+}
+
+static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
+{
+ *pv = qemu_get_byte(f);
+}
+
+int64_t qemu_ftell(QEMUFile *f);
+int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+int qemu_loadvm(const char *filename);
+int qemu_savevm(const char *filename);
+int register_savevm(const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
+
+/* block.c */
+typedef struct BlockDriverState BlockDriverState;
+typedef struct BlockDriver BlockDriver;
+
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_cow;
+extern BlockDriver bdrv_qcow;
+extern BlockDriver bdrv_vmdk;
+extern BlockDriver bdrv_cloop;
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_format(const char *format_name);
+int bdrv_create(BlockDriver *drv,
+ const char *filename, int64_t size_in_sectors,
+ const char *backing_file, int flags);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_open(BlockDriverState *bs, const char *filename, int snapshot);
+int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
+ BlockDriver *drv);
+void bdrv_close(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size);
+
+#define BDRV_TYPE_HD 0
+#define BDRV_TYPE_CDROM 1
+#define BDRV_TYPE_FLOPPY 2
+
+void bdrv_set_geometry_hint(BlockDriverState *bs,
+ int cyls, int heads, int secs);
+void bdrv_set_type_hint(BlockDriverState *bs, int type);
+void bdrv_get_geometry_hint(BlockDriverState *bs,
+ int *pcyls, int *pheads, int *psecs);
+int bdrv_get_type_hint(BlockDriverState *bs);
+int bdrv_is_removable(BlockDriverState *bs);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_is_locked(BlockDriverState *bs);
+void bdrv_set_locked(BlockDriverState *bs, int locked);
+void bdrv_set_change_cb(BlockDriverState *bs,
+ void (*change_cb)(void *opaque), void *opaque);
+void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size);
+void bdrv_info(void);
+BlockDriverState *bdrv_find(const char *name);
+void bdrv_iterate(void (*it)(void *opaque, const char *name), void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+
+int qcow_get_cluster_size(BlockDriverState *bs);
+int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf);
+
+#ifndef QEMU_TOOL
+/* ISA bus */
+
+extern target_phys_addr_t isa_mem_base;
+
+typedef void (IOPortWriteFunc)(void *opaque, uint32_t address, uint32_t data);
+typedef uint32_t (IOPortReadFunc)(void *opaque, uint32_t address);
+
+int register_ioport_read(int start, int length, int size,
+ IOPortReadFunc *func, void *opaque);
+int register_ioport_write(int start, int length, int size,
+ IOPortWriteFunc *func, void *opaque);
+void isa_unassign_ioport(int start, int length);
+
+/* PCI bus */
+
+extern int pci_enabled;
+
+extern target_phys_addr_t pci_mem_base;
+
+typedef struct PCIBus PCIBus;
+typedef struct PCIDevice PCIDevice;
+
+typedef void PCIConfigWriteFunc(PCIDevice *pci_dev,
+ uint32_t address, uint32_t data, int len);
+typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
+ uint32_t address, int len);
+typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type);
+
+#define PCI_ADDRESS_SPACE_MEM 0x00
+#define PCI_ADDRESS_SPACE_IO 0x01
+#define PCI_ADDRESS_SPACE_MEM_PREFETCH 0x08
+
+typedef struct PCIIORegion {
+ uint32_t addr; /* current PCI mapping address. -1 means not mapped */
+ uint32_t size;
+ uint8_t type;
+ PCIMapIORegionFunc *map_func;
+} PCIIORegion;
+
+#define PCI_ROM_SLOT 6
+#define PCI_NUM_REGIONS 7
+struct PCIDevice {
+ /* PCI config space */
+ uint8_t config[256];
+
+ /* the following fields are read only */
+ PCIBus *bus;
+ int devfn;
+ char name[64];
+ PCIIORegion io_regions[PCI_NUM_REGIONS];
+
+ /* do not access the following fields */
+ PCIConfigReadFunc *config_read;
+ PCIConfigWriteFunc *config_write;
+ int irq_index;
+};
+
+PCIDevice *pci_register_device(PCIBus *bus, const char *name,
+ int instance_size, int devfn,
+ PCIConfigReadFunc *config_read,
+ PCIConfigWriteFunc *config_write);
+
+void pci_register_io_region(PCIDevice *pci_dev, int region_num,
+ uint32_t size, int type,
+ PCIMapIORegionFunc *map_func);
+
+void pci_set_irq(PCIDevice *pci_dev, int irq_num, int level);
+
+uint32_t pci_default_read_config(PCIDevice *d,
+ uint32_t address, int len);
+void pci_default_write_config(PCIDevice *d,
+ uint32_t address, uint32_t val, int len);
+void generic_pci_save(QEMUFile* f, void *opaque);
+int generic_pci_load(QEMUFile* f, void *opaque, int version_id);
+
+extern struct PIIX3State *piix3_state;
+
+PCIBus *i440fx_init(void);
+void piix3_init(PCIBus *bus);
+void pci_bios_init(void);
+void pci_info(void);
+
+/* temporary: will be moved in platform specific file */
+PCIBus *pci_prep_init(void);
+struct openpic_t;
+void pci_pmac_set_openpic(PCIBus *bus, struct openpic_t *openpic);
+PCIBus *pci_pmac_init(void);
+
+/* openpic.c */
+typedef struct openpic_t openpic_t;
+void openpic_set_irq (openpic_t *opp, int n_IRQ, int level);
+openpic_t *openpic_init (PCIBus *bus, int *pmem_index, int nb_cpus);
+
+/* vga.c */
+
+#define VGA_RAM_SIZE (4096 * 1024)
+
+struct DisplayState {
+ uint8_t *data;
+ int linesize;
+ int depth;
+ int width;
+ int height;
+ void (*dpy_update)(struct DisplayState *s, int x, int y, int w, int h);
+ void (*dpy_resize)(struct DisplayState *s, int w, int h);
+ void (*dpy_refresh)(struct DisplayState *s);
+};
+
+static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
+{
+ s->dpy_update(s, x, y, w, h);
+}
+
+static inline void dpy_resize(DisplayState *s, int w, int h)
+{
+ s->dpy_resize(s, w, h);
+}
+
+int vga_initialize(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size);
+void vga_update_display(void);
+void vga_invalidate_display(void);
+void vga_screen_dump(const char *filename);
+
+/* vnc.c */
+void vnc_display_init(DisplayState *ds, int useAlsoSDL,
+ long port, const char* connect);
+
+/* cirrus_vga.c */
+void pci_cirrus_vga_init(PCIBus *bus, DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size);
+void isa_cirrus_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
+ unsigned long vga_ram_offset, int vga_ram_size);
+
+/* sdl.c */
+void sdl_display_init(DisplayState *ds, int full_screen);
+
+/* ide.c */
+#define MAX_DISKS 4
+
+extern BlockDriverState *bs_table[MAX_DISKS];
+
+void isa_ide_init(int iobase, int iobase2, int irq,
+ BlockDriverState *hd0, BlockDriverState *hd1);
+void pci_ide_init(PCIBus *bus, BlockDriverState **hd_table);
+void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table);
+int pmac_ide_init (BlockDriverState **hd_table,
+ openpic_t *openpic, int irq);
+
+/* sb16.c */
+void SB16_init (void);
+
+/* adlib.c */
+void Adlib_init (void);
+
+/* gus.c */
+void GUS_init (void);
+
+/* dma.c */
+typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size);
+int DMA_get_channel_mode (int nchan);
+int DMA_read_memory (int nchan, void *buf, int pos, int size);
+int DMA_write_memory (int nchan, void *buf, int pos, int size);
+void DMA_hold_DREQ (int nchan);
+void DMA_release_DREQ (int nchan);
+void DMA_schedule(int nchan);
+void DMA_run (void);
+void DMA_init (int high_page_enable);
+void DMA_register_channel (int nchan,
+ DMA_transfer_handler transfer_handler,
+ void *opaque);
+/* fdc.c */
+#define MAX_FD 2
+extern BlockDriverState *fd_table[MAX_FD];
+
+typedef struct fdctrl_t fdctrl_t;
+
+fdctrl_t *fdctrl_init (int irq_lvl, int dma_chann, int mem_mapped,
+ uint32_t io_base,
+ BlockDriverState **fds);
+int fdctrl_get_drive_type(fdctrl_t *fdctrl, int drive_num);
+
+/* ne2000.c */
+
+void isa_ne2000_init(int base, int irq, NetDriverState *nd);
+void pci_ne2000_init(PCIBus *bus, NetDriverState *nd);
+
+/* pckbd.c */
+
+void kbd_init(void);
+extern const char* keyboard_layout;
+
+/* mc146818rtc.c */
+
+typedef struct RTCState RTCState;
+
+RTCState *rtc_init(int base, int irq);
+void rtc_set_memory(RTCState *s, int addr, int val);
+void rtc_set_date(RTCState *s, const struct tm *tm);
+
+/* serial.c */
+
+typedef struct SerialState SerialState;
+SerialState *serial_init(int base, int irq, CharDriverState *chr);
+
+/* i8259.c */
+
+void pic_set_irq(int irq, int level);
+void pic_init(void);
+uint32_t pic_intack_read(CPUState *env);
+void pic_info(void);
+void irq_info(void);
+int pic_irq2vec(int irq);
+
+/* i8254.c */
+
+#define PIT_FREQ 1193182
+
+typedef struct PITState PITState;
+
+PITState *pit_init(int base, int irq);
+void pit_set_gate(PITState *pit, int channel, int val);
+int pit_get_gate(PITState *pit, int channel);
+int pit_get_out(PITState *pit, int channel, int64_t current_time);
+
+/* pc.c */
+void pc_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename);
+
+/* ppc.c */
+void ppc_init (int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename);
+void ppc_prep_init (int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename);
+void ppc_chrp_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename);
+#ifdef TARGET_PPC
+ppc_tb_t *cpu_ppc_tb_init (CPUState *env, uint32_t freq);
+#endif
+void PREP_debug_write (void *opaque, uint32_t addr, uint32_t val);
+
+extern CPUWriteMemoryFunc *PPC_io_write[];
+extern CPUReadMemoryFunc *PPC_io_read[];
+extern int prep_enabled;
+
+/* sun4m.c */
+void sun4m_init(int ram_size, int vga_ram_size, int boot_device,
+ DisplayState *ds, const char **fd_filename, int snapshot,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename);
+
+/* iommu.c */
+void iommu_init(uint32_t addr);
+uint32_t iommu_translate(uint32_t addr);
+
+/* lance.c */
+void lance_init(NetDriverState *nd, int irq, uint32_t leaddr, uint32_t ledaddr);
+
+/* tcx.c */
+void tcx_init(DisplayState *ds, uint32_t addr);
+
+/* sched.c */
+void sched_init();
+
+/* magic-load.c */
+void magic_init(const char *kfn, int kloadaddr, uint32_t addr);
+
+/* timer.c */
+void timer_init(uint32_t addr, int irq);
+
+/* NVRAM helpers */
+#include "hw/m48t59.h"
+
+void NVRAM_set_byte (m48t59_t *nvram, uint32_t addr, uint8_t value);
+uint8_t NVRAM_get_byte (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_word (m48t59_t *nvram, uint32_t addr, uint16_t value);
+uint16_t NVRAM_get_word (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_lword (m48t59_t *nvram, uint32_t addr, uint32_t value);
+uint32_t NVRAM_get_lword (m48t59_t *nvram, uint32_t addr);
+void NVRAM_set_string (m48t59_t *nvram, uint32_t addr,
+ const unsigned char *str, uint32_t max);
+int NVRAM_get_string (m48t59_t *nvram, uint8_t *dst, uint16_t addr, int max);
+void NVRAM_set_crc (m48t59_t *nvram, uint32_t addr,
+ uint32_t start, uint32_t count);
+int PPC_NVRAM_set_params (m48t59_t *nvram, uint16_t NVRAM_size,
+ const unsigned char *arch,
+ uint32_t RAM_size, int boot_device,
+ uint32_t kernel_image, uint32_t kernel_size,
+ const char *cmdline,
+ uint32_t initrd_image, uint32_t initrd_size,
+ uint32_t NVRAM_image,
+ int width, int height, int depth);
+
+/* adb.c */
+
+#define MAX_ADB_DEVICES 16
+
+#define ADB_MAX_OUT_LEN 16
+
+typedef struct ADBDevice ADBDevice;
+
+/* buf = NULL means polling */
+typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
+ const uint8_t *buf, int len);
+typedef int ADBDeviceReset(ADBDevice *d);
+
+struct ADBDevice {
+ struct ADBBusState *bus;
+ int devaddr;
+ int handler;
+ ADBDeviceRequest *devreq;
+ ADBDeviceReset *devreset;
+ void *opaque;
+};
+
+typedef struct ADBBusState {
+ ADBDevice devices[MAX_ADB_DEVICES];
+ int nb_devices;
+ int poll_index;
+} ADBBusState;
+
+int adb_request(ADBBusState *s, uint8_t *buf_out,
+ const uint8_t *buf, int len);
+int adb_poll(ADBBusState *s, uint8_t *buf_out);
+
+ADBDevice *adb_register_device(ADBBusState *s, int devaddr,
+ ADBDeviceRequest *devreq,
+ ADBDeviceReset *devreset,
+ void *opaque);
+void adb_kbd_init(ADBBusState *bus);
+void adb_mouse_init(ADBBusState *bus);
+
+/* cuda.c */
+
+extern ADBBusState adb_bus;
+int cuda_init(openpic_t *openpic, int irq);
+
+#endif /* defined(QEMU_TOOL) */
+
+/* monitor.c */
+void monitor_init(CharDriverState *hd, int show_banner);
+void term_puts(const char *str);
+void term_vprintf(const char *fmt, va_list ap);
+void term_printf(const char *fmt, ...) __attribute__ ((__format__ (__printf__, 1, 2)));
+void term_flush(void);
+void term_print_help(void);
+
+/* readline.c */
+typedef void ReadLineFunc(void *opaque, const char *str);
+
+extern int completion_index;
+void add_completion(const char *str);
+void readline_handle_byte(int ch);
+void readline_find_completion(const char *cmdline);
+const char *readline_get_history(unsigned int index);
+void readline_start(const char *prompt, int is_password,
+ ReadLineFunc *readline_func, void *opaque);
+
+/* gdbstub.c */
+
+#define DEFAULT_GDBSTUB_PORT 1234
+
+int gdbserver_start(int port);
+
+#endif /* VL_H */
diff --git a/tools/ioemu/vnc.c b/tools/ioemu/vnc.c
new file mode 100644
index 0000000000..24c397dcc5
--- /dev/null
+++ b/tools/ioemu/vnc.c
@@ -0,0 +1,549 @@
+/*
+ * QEMU VNC display driver (uses LibVNCServer, based on QEMU SDL driver)
+ *
+ * Copyright (c) 2003,2004 Fabrice Bellard, Matthew Mastracci,
+ * Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *
+ * reverse connection setup copied from x11vnc.c
+ * Copyright (c) 2002-2005 Karl J. Runge <runge@karlrunge.com>
+ * All rights reserved.
+ * based on:
+ * the originial x11vnc.c in libvncserver (Johannes E. Schindelin)
+ * x0rfbserver, the original native X vnc server (Jens Wagner)
+ * krfb, the KDE desktopsharing project (Tim Jansen)
+ */
+#include "vl.h"
+
+#include <rfb/rfb.h>
+
+/* keyboard stuff */
+#include <rfb/keysym.h>
+#include "keysym_adapter_vnc.h"
+#include "keyboard_rdesktop.c"
+
+
+#ifndef _WIN32
+#include <signal.h>
+#endif
+
+static rfbScreenInfoPtr screen;
+static DisplayState* ds_sdl=0;
+static void* kbd_layout=0; // TODO: move into rfbClient
+
+/* mouse stuff */
+
+typedef struct mouse_magic_t {
+ /* When calibrating, mouse_calibration contains a copy of the
+ * current frame buffer. After a simulated mouse movement, the
+ * update function only gets (0,y1,width,y2) as bounding box
+ * of the changed region, so we refine that with the help of
+ * this copy, and then update the copy. */
+ char* calibration;
+ /* Mouse handling using VNC used to be wrong, because if moving the
+ * mouse very fast, the pointer got even faster. The reason for this:
+ * when the mouse sends a delta of at least 4 (Windows: 3) pixels,
+ * it is treated as if it were double the amount. I call this the
+ * sonic wall. */
+ int sonic_wall_x;
+ int sonic_wall_y;
+ /* Unfortunately, Windows and X behave differently, when the sonic
+ * wall was reached in one axis, but not the other: Windows treats
+ * them independently. I call this orthogonal. */
+ char sonic_wall_is_orthogonal;
+ /* last_dy contains the last delta sent on the y axis. We don't
+ * use the x axis (see mouse_calibration). */
+ //static int last_dy=0;
+} mouse_magic_t;
+
+mouse_magic_t* init_mouse_magic() {
+ mouse_magic_t* ret=(mouse_magic_t*)malloc(sizeof(mouse_magic_t));
+
+ ret->calibration=0;
+#ifdef EXPECT_WINDOWS_GUEST
+ ret->sonic_wall_x=3;
+ ret->sonic_wall_y=3;
+ ret->sonic_wall_is_orthogonal=1;
+#else
+ ret->sonic_wall_x=4;
+ ret->sonic_wall_y=4;
+ ret->sonic_wall_is_orthogonal=0;
+#endif
+ return ret;
+}
+
+static void vnc_save(QEMUFile* f,void* opaque)
+{
+ mouse_magic_t* s=(mouse_magic_t*)opaque;
+
+ qemu_put_be32s(f, &s->sonic_wall_x);
+ qemu_put_be32s(f, &s->sonic_wall_y);
+ qemu_put_8s(f, &s->sonic_wall_is_orthogonal);
+}
+
+static int vnc_load(QEMUFile* f,void* opaque,int version_id)
+{
+ mouse_magic_t* s=(mouse_magic_t*)opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ qemu_get_be32s(f, &s->sonic_wall_x);
+ qemu_get_be32s(f, &s->sonic_wall_y);
+ qemu_get_8s(f, &s->sonic_wall_is_orthogonal);
+
+ return 0;
+}
+
+static mouse_magic_t* mouse_magic;
+
+typedef struct {
+ int x,y,w,h;
+} rectangle_t;
+/* In order to calibrate the mouse, we have to know about the bounding boxes
+ * of the last changes. */
+static rectangle_t last_update, before_update;
+static int updates_since_mouse=0;
+
+static int mouse_x,mouse_y;
+static int new_mouse_x,new_mouse_y,new_mouse_z,new_mouse_buttons;
+
+static void init_mouse(int initial_x,int initial_y) {
+ mouse_x=new_mouse_x=initial_x;
+ mouse_y=new_mouse_y=initial_y;
+ new_mouse_z=new_mouse_buttons=0;
+ mouse_magic->calibration = 0;
+}
+
+static void mouse_refresh() {
+ int dx=0,dy=0,dz=new_mouse_z;
+ static int counter=1;
+
+ counter++;
+ if(!mouse_magic->calibration && counter>=2) { counter=0; return; }
+
+ dx=new_mouse_x-mouse_x;
+ dy=new_mouse_y-mouse_y;
+
+ if(mouse_magic->sonic_wall_is_orthogonal) {
+ if(abs(dx)>=mouse_magic->sonic_wall_x) { dx/=2; mouse_x+=dx; }
+ if(abs(dy)>=mouse_magic->sonic_wall_y) { dy/=2; mouse_y+=dy; }
+ } else {
+ if(abs(dx)>=mouse_magic->sonic_wall_x || abs(dy)>=mouse_magic->sonic_wall_y) {
+ dx/=2; mouse_x+=dx;
+ dy/=2; mouse_y+=dy;
+ }
+ }
+ //fprintf(stderr,"sending mouse event %d,%d\n",dx,dy);
+ kbd_mouse_event(dx,dy,dz,new_mouse_buttons);
+ mouse_x+=dx;
+ mouse_y+=dy;
+
+ updates_since_mouse=0;
+}
+
+static int calibration_step=0;
+//static int calibration_count=0;
+
+static void mouse_find_bounding_box_of_difference(int* x,int* y,int* w,int* h) {
+ int i,j,X=*x,Y=*y,W=*w,H=*h;
+ int bpp=screen->depth/8;
+
+ *x=screen->width; *w=-*x;
+ *y=screen->height; *h=-*y;
+ for(i=X;i<X+W;i++)
+ for(j=Y;j<Y+H;j++) {
+ int offset=i*bpp+j*screen->paddedWidthInBytes;
+ if(memcmp(mouse_magic->calibration+offset,screen->frameBuffer+offset,bpp)) {
+ if(i<((*x))) { (*w)+=(*x)-i; (*x)=i; }
+ if(i>(*x)+(*w)) (*w)=i-(*x);
+ if(j<(*y)) { (*h)+=(*y)-j; (*y)=j; }
+ if(j>(*y)+(*h)) (*h)=j-(*y);
+ }
+ }
+ if(h>0)
+ memcpy(mouse_magic->calibration+Y*screen->paddedWidthInBytes,
+ screen->frameBuffer+Y*screen->paddedWidthInBytes,
+ H*screen->paddedWidthInBytes);
+}
+
+static void start_mouse_calibration() {
+ int size = screen->height*screen->paddedWidthInBytes;
+ if(mouse_magic->calibration)
+ free(mouse_magic->calibration);
+ mouse_magic->calibration = malloc(size);
+ memcpy(mouse_magic->calibration, screen->frameBuffer, size);
+ calibration_step=0;
+ // calibration_count=-1;
+ //calibration_count=1000; updates_since_mouse=1;
+ fprintf(stderr,"Starting mouse calibration:\n");
+}
+
+static void stop_mouse_calibration() {
+ if(mouse_magic->calibration)
+ free(mouse_magic->calibration);
+ mouse_magic->calibration = 0;
+}
+
+static void mouse_calibration_update(int x,int y,int w,int h) {
+ mouse_find_bounding_box_of_difference(&x,&y,&w,&h);
+ if(w<=0 || h<=0)
+ return;
+ last_update.x=x;
+ last_update.y=y;
+ last_update.w=w;
+ last_update.h=h;
+ updates_since_mouse++;
+}
+
+static void mouse_calibration_refresh() {
+ static rectangle_t cursor;
+ static int x,y;
+ static int idle_counter;
+
+ if(calibration_step==0)
+ idle_counter=0;
+ else {
+ if(updates_since_mouse==0) {
+ idle_counter++;
+ if(idle_counter>5) {
+ fprintf(stderr, "Calibration failed: no update for 5 cycles\n");
+ stop_mouse_calibration();
+ }
+ return;
+ }
+ if(updates_since_mouse!=1) {
+ fprintf(stderr,"Calibration failed: updates=%d\n",updates_since_mouse);
+ stop_mouse_calibration();
+ return;
+ }
+ }
+
+ if(calibration_step==0) {
+ x=0; y=1;
+ kbd_mouse_event(0,-1,0,0);
+ calibration_step++;
+ } else if(calibration_step==1) {
+ // find out the initial position of the cursor
+ cursor=last_update;
+ cursor.h--;
+ calibration_step++;
+ mouse_magic->sonic_wall_y=-1;
+ last_update=cursor;
+ x=0; y=2;
+ goto move_calibrate;
+ } else if(calibration_step==2) {
+ // find out the sonic_wall
+ if(last_update.y==before_update.y-2*y) {
+ mouse_magic->sonic_wall_y=y;
+ // test orthogonality
+ calibration_step++;
+ x=mouse_magic->sonic_wall_y+1; y=1;
+ goto move_calibrate;
+ } else if(last_update.y<=2) {
+ if(y<6)
+ fprintf(stderr,"Calibration failed: not enough head room!\n");
+ else
+ fprintf(stderr,"Calibration finished.\n");
+ mouse_magic->sonic_wall_x=mouse_magic->sonic_wall_y=32768;
+ goto stop_calibration;
+ } else if(last_update.y!=before_update.y-y) {
+ fprintf(stderr,"Calibration failed: delta=%d (expected: %d)\n",last_update.y-before_update.y,-y);
+ goto stop_calibration;
+ } else {
+ y++;
+move_calibrate:
+ kbd_mouse_event(-x,-y,0,0);
+ before_update=last_update;
+ }
+ } else if(calibration_step==3) {
+ if(last_update.y==before_update.y-2)
+ mouse_magic->sonic_wall_is_orthogonal=0;
+ else if(last_update.y==before_update.y-1)
+ mouse_magic->sonic_wall_is_orthogonal=-1;
+ else
+ fprintf(stderr,"Calibration failed: no clue of orthogonal.\n");
+ mouse_magic->sonic_wall_x=mouse_magic->sonic_wall_y;
+ if(last_update.x==before_update.x-mouse_magic->sonic_wall_x)
+ mouse_magic->sonic_wall_x++;
+ else if(last_update.x!=before_update.x-x*2)
+ fprintf(stderr,"Calibration failed: could not determine horizontal sonic wall x\n");
+ fprintf(stderr,"Calibration finished\n");
+stop_calibration:
+ mouse_x=last_update.x;
+ mouse_y=last_update.y;
+ stop_mouse_calibration();
+ }
+ updates_since_mouse=0;
+}
+
+/* end of mouse stuff */
+
+static void vnc_update(DisplayState *ds, int x, int y, int w, int h)
+{
+ if(ds_sdl)
+ ds_sdl->dpy_update(ds_sdl,x,y,w,h);
+ if(0) fprintf(stderr,"updating x=%d y=%d w=%d h=%d\n", x, y, w, h);
+ rfbMarkRectAsModified(screen,x,y,x+w,y+h);
+ if(mouse_magic->calibration) {
+ mouse_calibration_update(x,y,w,h);
+ }
+}
+
+#include <SDL/SDL_video.h>
+extern SDL_PixelFormat* sdl_get_format();
+
+static void vnc_resize(DisplayState *ds, int w, int h)
+{
+ int depth = screen->bitsPerPixel;
+ rfbClientIteratorPtr iter;
+ rfbClientPtr cl;
+
+ if(w==screen->width && h==screen->height)
+ return;
+
+ if(ds_sdl) {
+ SDL_PixelFormat* sdl_format;
+ ds_sdl->dpy_resize(ds_sdl,w,h);
+ ds->data = ds_sdl->data;
+ ds->linesize = screen->paddedWidthInBytes = ds_sdl->linesize;
+ screen->serverFormat.bitsPerPixel = screen->serverFormat.depth
+ = screen->bitsPerPixel = depth = ds->depth = ds_sdl->depth;
+ w = ds->width = ds_sdl->width;
+ h = ds->height = ds_sdl->height;
+ sdl_format=sdl_get_format();
+ if(sdl_format->palette==0) {
+ screen->serverFormat.trueColour=TRUE;
+ screen->serverFormat.redShift=sdl_format->Rshift;
+ screen->serverFormat.greenShift=sdl_format->Gshift;
+ screen->serverFormat.blueShift=sdl_format->Bshift;
+ screen->serverFormat.redMax=sdl_format->Rmask>>screen->serverFormat.redShift;
+ screen->serverFormat.greenMax=sdl_format->Gmask>>screen->serverFormat.greenShift;
+ screen->serverFormat.blueMax=sdl_format->Bmask>>screen->serverFormat.blueShift;
+ } else {
+ rfbColourMap* cmap=&(screen->colourMap);
+ int i;
+ screen->serverFormat.trueColour=FALSE;
+ cmap->is16=FALSE;
+ cmap->count=sdl_format->palette->ncolors;
+ if(cmap->data.bytes==0)
+ cmap->data.bytes=malloc(256*3);
+ for(i=0;i<cmap->count;i++) {
+ cmap->data.bytes[3*i+0]=sdl_format->palette->colors[i].r;
+ cmap->data.bytes[3*i+1]=sdl_format->palette->colors[i].g;
+ cmap->data.bytes[3*i+2]=sdl_format->palette->colors[i].b;
+ }
+ }
+ } else {
+ ds->data = (unsigned char*)realloc(ds->data, w*h*depth/8);
+ ds->linesize = screen->paddedWidthInBytes = w*2;
+ ds->width = w;
+ ds->height = h;
+ ds->depth = depth;
+ screen->paddedWidthInBytes = w*depth/8;
+ }
+ screen->frameBuffer = ds->data;
+
+ screen->width = w;
+ screen->height = h;
+
+ iter=rfbGetClientIterator(screen);
+ while((cl=rfbClientIteratorNext(iter)))
+ if(cl->useNewFBSize)
+ cl->newFBSizePending = TRUE;
+ else
+ rfbLog("Warning: Client %s does not support NewFBSize!\n",cl->host);
+ rfbReleaseClientIterator(iter);
+
+ if(mouse_magic->calibration) {
+ fprintf(stderr,"Warning: mouse calibration interrupted by video mode change\n");
+ stop_mouse_calibration();
+ }
+ init_mouse(w/2,h/2);
+}
+
+static void vnc_process_key(rfbBool down, rfbKeySym keySym, rfbClientPtr cl)
+{
+ static int magic=0; // Ctrl+Alt starts calibration
+
+ if(is_active_console(vga_console)) {
+ WORD keycode=keysym2scancode(kbd_layout, keySym);
+ if(keycode>=0x80)
+ keycode=(keycode<<8)^0x80e0;
+ while(keycode!=0) {
+ kbd_put_keycode((keycode&0xff)|(down?0:0x80));
+ keycode>>=8;
+ }
+ } else if(down) {
+ kbd_put_keysym(keySym);
+ }
+ if(down) {
+ if(keySym==XK_Control_L)
+ magic|=1;
+ else if(keySym==XK_Alt_L)
+ magic|=2;
+ } else {
+ if((magic&3)==3) {
+ switch(keySym) {
+ case XK_Control_L:
+ magic&=~1;
+ break;
+ case XK_Alt_L:
+ magic&=~2;
+ break;
+ case XK_m:
+ magic=0;
+ start_mouse_calibration();
+ break;
+ case XK_1 ... XK_9:
+ magic=0;
+ fprintf(stderr,"switch to %d\n",keySym-XK_1);
+ console_select(keySym - XK_1);
+ if (is_active_console(vga_console)) {
+ /* tell the vga console to redisplay itself */
+ vga_invalidate_display();
+ vnc_update(0,0,0,screen->width,screen->height);
+ }
+ break;
+ }
+ }
+ }
+}
+
+static void vnc_process_mouse(int buttonMask, int x, int y, rfbClientPtr cl)
+{
+ new_mouse_x=x; new_mouse_y=y; new_mouse_buttons=0;
+ if(buttonMask&1) new_mouse_buttons|=MOUSE_EVENT_LBUTTON;
+ if(buttonMask&2) new_mouse_buttons|=MOUSE_EVENT_MBUTTON;
+ if(buttonMask&4) new_mouse_buttons|=MOUSE_EVENT_RBUTTON;
+ if(buttonMask&8) new_mouse_z--;
+ if(buttonMask&16) new_mouse_z++;
+}
+
+ static void vnc_refresh(DisplayState *ds) {
+ if(ds_sdl)
+ ds_sdl->dpy_refresh(ds_sdl);
+ else
+ vga_update_display();
+ rfbProcessEvents(screen,0);
+ if(mouse_magic->calibration) {
+ mouse_calibration_refresh();
+ } else {
+ mouse_refresh();
+ }
+ }
+
+static void vnc_cleanup(void)
+{
+ rfbScreenCleanup(screen);
+}
+
+
+void vnc_display_init(DisplayState *ds, int useAlsoSDL,
+ long port, const char* connect)
+{
+ int len, rport = 5500;
+ char host[1024];
+ char *p;
+ rfbClientPtr cl;
+
+ if(!keyboard_layout) {
+ fprintf(stderr, "No keyboard language specified\n");
+ exit(1);
+ }
+
+ kbd_layout=init_keyboard_layout(keyboard_layout);
+ if(!kbd_layout) {
+ fprintf(stderr, "Could not initialize keyboard\n");
+ exit(1);
+ }
+
+
+ mouse_magic=init_mouse_magic();
+ register_savevm("vnc", 0, 1, vnc_save, vnc_load, mouse_magic);
+
+ rfbLog=rfbErr=term_printf;
+ screen=rfbGetScreen(0,0,0,0,5,3,2);
+ if(screen==0) {
+ fprintf(stderr, "Could not initialize VNC - exiting\n");
+ exit(1);
+ }
+
+
+ screen->serverFormat.redShift = 11;
+ screen->serverFormat.greenShift = 5;
+ screen->serverFormat.blueShift = 0;
+ screen->serverFormat.redMax = 31;
+ screen->serverFormat.greenMax = 63;
+ screen->serverFormat.blueMax = 31;
+
+ if (port != 0)
+ screen->port = port;
+ else
+ screen->autoPort = TRUE;
+
+ if(useAlsoSDL) {
+ ds_sdl=(DisplayState*)malloc(sizeof(DisplayState));
+ sdl_display_init(ds_sdl,0);
+ screen->frameBuffer = ds_sdl->data;
+ } else
+ screen->frameBuffer = malloc(640*400*2);
+
+ screen->desktopName = "QEMU/VNC";
+ screen->cursor = 0;
+ screen->kbdAddEvent = vnc_process_key;
+ screen->ptrAddEvent = vnc_process_mouse;
+ rfbInitServer(screen);
+
+ vnc_resize(ds,640,400);
+
+ ds->dpy_update = vnc_update;
+ ds->dpy_resize = vnc_resize;
+ ds->dpy_refresh = vnc_refresh;
+
+ /* deal with reverse connections */
+ if ( connect == NULL || (len = strlen(connect)) < 1) {
+ return;
+ }
+ if ( len > 1024 ) {
+ fprintf(stderr, "vnc reverse connect name too long\n");
+ exit(1);
+ }
+ strncpy(host, connect, len);
+ host[len] = '\0';
+ /* extract port, if any */
+ if ((p = strchr(host, ':')) != NULL) {
+ rport = atoi(p+1);
+ *p = '\0';
+ }
+ cl = rfbReverseConnection(screen, host, rport);
+ if (cl == NULL) {
+ fprintf(stderr, "reverse_connect: %s failed\n", connect);
+ } else {
+ fprintf(stderr, "reverse_connect: %s/%s OK\n", connect, cl->host);
+ }
+
+ atexit(vnc_cleanup);
+
+
+
+}
+
diff --git a/tools/ioemu/x86_32.ld b/tools/ioemu/x86_32.ld
new file mode 100644
index 0000000000..d41c62695e
--- /dev/null
+++ b/tools/ioemu/x86_32.ld
@@ -0,0 +1,140 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+SEARCH_DIR(/lib); SEARCH_DIR(/usr/lib); SEARCH_DIR(/usr/local/lib); SEARCH_DIR(/usr/alpha-unknown-linux-gnu/lib);
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0x60000000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .hash : { *(.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.text :
+ { *(.rel.text) *(.rel.gnu.linkonce.t*) }
+ .rela.text :
+ { *(.rela.text) *(.rela.gnu.linkonce.t*) }
+ .rel.data :
+ { *(.rel.data) *(.rel.gnu.linkonce.d*) }
+ .rela.data :
+ { *(.rela.data) *(.rela.gnu.linkonce.d*) }
+ .rel.rodata :
+ { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
+ .rela.rodata :
+ { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+ .rel.got : { *(.rel.got) }
+ .rela.got : { *(.rela.got) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rel.init : { *(.rel.init) }
+ .rela.init : { *(.rela.init) }
+ .rel.fini : { *(.rel.fini) }
+ .rela.fini : { *(.rela.fini) }
+ .rel.bss : { *(.rel.bss) }
+ .rela.bss : { *(.rela.bss) }
+ .rel.plt : { *(.rel.plt) }
+ .rela.plt : { *(.rela.plt) }
+ .init : { *(.init) } =0x47ff041f
+ .text :
+ {
+ *(.text)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ *(.gnu.linkonce.t*)
+ } =0x47ff041f
+ _etext = .;
+ PROVIDE (etext = .);
+ .fini : { *(.fini) } =0x47ff041f
+ . = ALIGN(32 / 8);
+ PROVIDE (__preinit_array_start = .);
+ .preinit_array : { *(.preinit_array) }
+ PROVIDE (__preinit_array_end = .);
+ PROVIDE (__init_array_start = .);
+ .init_array : { *(.init_array) }
+ PROVIDE (__init_array_end = .);
+ PROVIDE (__fini_array_start = .);
+ .fini_array : { *(.fini_array) }
+ PROVIDE (__fini_array_end = .);
+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) }
+ .rodata1 : { *(.rodata1) }
+ .reginfo : { *(.reginfo) }
+ /* Adjust the address for the data segment. We want to adjust up to
+ the same address within the page on the next page up. */
+ . = ALIGN(0x100000) + (. & (0x100000 - 1));
+ .data :
+ {
+ *(.data)
+ *(.gnu.linkonce.d*)
+ CONSTRUCTORS
+ }
+ .data1 : { *(.data1) }
+ .ctors :
+ {
+ *(.ctors)
+ }
+ .dtors :
+ {
+ *(.dtors)
+ }
+ .plt : { *(.plt) }
+ .got : { *(.got.plt) *(.got) }
+ .dynamic : { *(.dynamic) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : { *(.sdata) }
+ _edata = .;
+ PROVIDE (edata = .);
+ __bss_start = .;
+ .sbss : { *(.sbss) *(.scommon) }
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss)
+ *(COMMON)
+ }
+ _end = . ;
+ PROVIDE (end = .);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+}
diff --git a/tools/ioemu/x86_64.ld b/tools/ioemu/x86_64.ld
new file mode 100644
index 0000000000..878dafbe79
--- /dev/null
+++ b/tools/ioemu/x86_64.ld
@@ -0,0 +1,171 @@
+/* Default linker script, for normal executables */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(_start)
+SEARCH_DIR("/lib64"); SEARCH_DIR("/usr/lib64"); SEARCH_DIR("/usr/local/lib64");
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0x60000000 + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+ .hash : { *(.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rela.init : { *(.rela.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rela.fini : { *(.rela.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rela.ctors : { *(.rela.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rela.dtors : { *(.rela.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rela.got : { *(.rela.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+ .rel.plt : { *(.rel.plt) }
+ .rela.plt : { *(.rela.plt) }
+ .init :
+ {
+ KEEP (*(.init))
+ } =0x90909090
+ .plt : { *(.plt) }
+ .text :
+ {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ /* .gnu.warning sections are handled specially by elf32.em. */
+ *(.gnu.warning)
+ } =0x90909090
+ .fini :
+ {
+ KEEP (*(.fini))
+ } =0x90909090
+ PROVIDE (__etext = .);
+ PROVIDE (_etext = .);
+ PROVIDE (etext = .);
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+ .eh_frame_hdr : { *(.eh_frame_hdr) }
+ .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table) }
+ /* Adjust the address for the data segment. We want to adjust up to
+ the same address within the page on the next page up. */
+ . = ALIGN (0x100000) - ((0x100000 - .) & (0x100000 - 1)); . = DATA_SEGMENT_ALIGN (0x100000, 0x1000);
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+ the linker would then create the section even if it turns out to
+ be empty, which isn't pretty. */
+ . = ALIGN(64 / 8);
+ PROVIDE (__preinit_array_start = .);
+ .preinit_array : { *(.preinit_array) }
+ PROVIDE (__preinit_array_end = .);
+ PROVIDE (__init_array_start = .);
+ .init_array : { *(.init_array) }
+ PROVIDE (__init_array_end = .);
+ PROVIDE (__fini_array_start = .);
+ .fini_array : { *(.fini_array) }
+ PROVIDE (__fini_array_end = .);
+ .data :
+ {
+ *(.data .data.* .gnu.linkonce.d.*)
+ SORT(CONSTRUCTORS)
+ }
+ .data1 : { *(.data1) }
+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
+ .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table) }
+ .dynamic : { *(.dynamic) }
+ .ctors :
+ {
+ /* gcc uses crtbegin.o to find the start of
+ the constructors, so we make sure it is
+ first. Because this is a wildcard, it
+ doesn't matter if the user does not
+ actually link against crtbegin.o; the
+ linker won't look for a file to match a
+ wildcard. The wildcard also means that it
+ doesn't matter which directory crtbegin.o
+ is in. */
+ KEEP (*crtbegin.o(.ctors))
+ /* We don't want to include the .ctor section from
+ from the crtend.o file until after the sorted ctors.
+ The .ctor section from the crtend file contains the
+ end of ctors marker and it must be last */
+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors))
+ KEEP (*(SORT(.ctors.*)))
+ KEEP (*(.ctors))
+ }
+ .dtors :
+ {
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors))
+ KEEP (*(SORT(.dtors.*)))
+ KEEP (*(.dtors))
+ }
+ .jcr : { KEEP (*(.jcr)) }
+ .got : { *(.got.plt) *(.got) }
+ _edata = .;
+ PROVIDE (edata = .);
+ __bss_start = .;
+ .bss :
+ {
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(COMMON)
+ /* Align here to ensure that the .bss section occupies space up to
+ _end. Align after .bss to ensure correct alignment even if the
+ .bss section disappears because there are no input sections. */
+ . = ALIGN(64 / 8);
+ }
+ . = ALIGN(64 / 8);
+ _end = .;
+ PROVIDE (end = .);
+ . = DATA_SEGMENT_END (.);
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+}
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
index a254f3f396..cbe7983a44 100644
--- a/tools/libxc/Makefile
+++ b/tools/libxc/Makefile
@@ -4,7 +4,7 @@ INSTALL_PROG = $(INSTALL) -m0755
INSTALL_DATA = $(INSTALL) -m0644
INSTALL_DIR = $(INSTALL) -d -m0755
-MAJOR = 2.0
+MAJOR = 3.0
MINOR = 0
CC = gcc
@@ -12,15 +12,15 @@ CC = gcc
XEN_ROOT = ../..
include $(XEN_ROOT)/tools/Rules.mk
-vpath %c $(XEN_LIBXUTIL)
-INCLUDES += -I $(XEN_LIBXUTIL)
-
SRCS :=
-SRCS += xc_atropos.c
+SRCS += xc_sedf.c
SRCS += xc_bvtsched.c
+SRCS += xc_core.c
SRCS += xc_domain.c
SRCS += xc_evtchn.c
-SRCS += xc_io.c
+SRCS += xc_gnttab.c
+SRCS += xc_load_bin.c
+SRCS += xc_load_elf.c
SRCS += xc_linux_build.c
SRCS += xc_plan9_build.c
SRCS += xc_linux_restore.c
@@ -28,7 +28,9 @@ SRCS += xc_linux_save.c
SRCS += xc_misc.c
SRCS += xc_physdev.c
SRCS += xc_private.c
-SRCS += xc_rrobin.c
+SRCS += xc_ptrace.c
+SRCS += xc_ptrace_core.c
+SRCS += xc_vmx_build.c
CFLAGS += -Wall
CFLAGS += -Werror
@@ -68,14 +70,19 @@ mk-symlinks:
ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . )
install: build
- [ -d $(DESTDIR)/usr/lib ] || $(INSTALL_DIR) $(DESTDIR)/usr/lib
+ [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
[ -d $(DESTDIR)/usr/include ] || $(INSTALL_DIR) $(DESTDIR)/usr/include
- $(INSTALL_PROG) libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/lib
- $(INSTALL_DATA) libxc.a $(DESTDIR)/usr/lib
- ln -sf libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/lib/libxc.so.$(MAJOR)
- ln -sf libxc.so.$(MAJOR) $(DESTDIR)/usr/lib/libxc.so
+ $(INSTALL_PROG) libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxc.a $(DESTDIR)/usr/$(LIBDIR)
+ ln -sf libxc.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxc.so.$(MAJOR)
+ ln -sf libxc.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxc.so
$(INSTALL_DATA) xc.h $(DESTDIR)/usr/include
+.PHONY: TAGS clean rpm install all
+
+TAGS:
+ etags -t $(SRCS) *.h
+
clean:
rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen
@@ -88,7 +95,7 @@ rpm: build
mv staging/i386/*.rpm .
rm -rf staging
-libxc.a: $(OBJS)
+libxc.a: $(LIB_OBJS)
$(AR) rc $@ $^
libxc.so: libxc.so.$(MAJOR)
@@ -97,6 +104,6 @@ libxc.so.$(MAJOR): libxc.so.$(MAJOR).$(MINOR)
ln -sf $< $@
libxc.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
- $(CC) -Wl,-soname -Wl,libxc.so.$(MAJOR) -shared -o $@ $^ -L../libxutil -lxutil -lz
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxc.so.$(MAJOR) -shared -o $@ $^ -lz
-include $(DEPS)
diff --git a/tools/libxc/linux_boot_params.h b/tools/libxc/linux_boot_params.h
new file mode 100644
index 0000000000..9b0b25cef9
--- /dev/null
+++ b/tools/libxc/linux_boot_params.h
@@ -0,0 +1,165 @@
+#ifndef __LINUX_BOOT_PARAMS_H__
+#define __LINUX_BOOT_PARAMS_H__
+
+#include <asm/types.h>
+
+#define E820MAX 32
+
+struct mem_map {
+ int nr_map;
+ struct entry {
+ unsigned long long addr; /* start of memory segment */
+ unsigned long long size; /* size of memory segment */
+ unsigned long type; /* type of memory segment */
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS 4
+#define E820_IO 16
+#define E820_SHARED 17
+
+ unsigned long caching_attr; /* used by hypervisor */
+#define MEMMAP_UC 0
+#define MEMMAP_WC 1
+#define MEMMAP_WT 4
+#define MEMMAP_WP 5
+#define MEMMAP_WB 6
+
+ }map[E820MAX];
+};
+
+struct e820entry {
+ unsigned long long addr; /* start of memory segment */
+ unsigned long long size; /* size of memory segment */
+ unsigned long type; /* type of memory segment */
+};
+
+struct e820map {
+ int nr_map;
+ struct e820entry map[E820MAX];
+};
+
+struct drive_info_struct { __u8 dummy[32]; };
+
+struct sys_desc_table {
+ __u16 length;
+ __u8 table[318];
+};
+
+struct screen_info {
+ unsigned char orig_x; /* 0x00 */
+ unsigned char orig_y; /* 0x01 */
+ unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */
+ unsigned short orig_video_page; /* 0x04 */
+ unsigned char orig_video_mode; /* 0x06 */
+ unsigned char orig_video_cols; /* 0x07 */
+ unsigned short unused2; /* 0x08 */
+ unsigned short orig_video_ega_bx; /* 0x0a */
+ unsigned short unused3; /* 0x0c */
+ unsigned char orig_video_lines; /* 0x0e */
+ unsigned char orig_video_isVGA; /* 0x0f */
+ unsigned short orig_video_points; /* 0x10 */
+
+ /* VESA graphic mode -- linear frame buffer */
+ unsigned short lfb_width; /* 0x12 */
+ unsigned short lfb_height; /* 0x14 */
+ unsigned short lfb_depth; /* 0x16 */
+ unsigned long lfb_base; /* 0x18 */
+ unsigned long lfb_size; /* 0x1c */
+ unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
+ unsigned short lfb_linelength; /* 0x24 */
+ unsigned char red_size; /* 0x26 */
+ unsigned char red_pos; /* 0x27 */
+ unsigned char green_size; /* 0x28 */
+ unsigned char green_pos; /* 0x29 */
+ unsigned char blue_size; /* 0x2a */
+ unsigned char blue_pos; /* 0x2b */
+ unsigned char rsvd_size; /* 0x2c */
+ unsigned char rsvd_pos; /* 0x2d */
+ unsigned short vesapm_seg; /* 0x2e */
+ unsigned short vesapm_off; /* 0x30 */
+ unsigned short pages; /* 0x32 */
+ /* 0x34 -- 0x3f reserved for future expansion */
+};
+
+struct screen_info_overlap {
+ __u8 reserved1[2]; /* 0x00 */
+ __u16 ext_mem_k; /* 0x02 */
+ __u8 reserved2[0x20 - 0x04]; /* 0x04 */
+ __u16 cl_magic; /* 0x20 */
+#define CL_MAGIC_VALUE 0xA33F
+ __u16 cl_offset; /* 0x22 */
+ __u8 reserved3[0x40 - 0x24]; /* 0x24 */
+};
+
+
+struct apm_bios_info {
+ __u16 version;
+ __u16 cseg;
+ __u32 offset;
+ __u16 cseg_16;
+ __u16 dseg;
+ __u16 flags;
+ __u16 cseg_len;
+ __u16 cseg_16_len;
+ __u16 dseg_len;
+};
+
+struct linux_boot_params {
+ union { /* 0x00 */
+ struct screen_info info;
+ struct screen_info_overlap overlap;
+ } screen;
+
+ struct apm_bios_info apm_bios_info; /* 0x40 */
+ __u8 reserved4[0x80 - 0x54]; /* 0x54 */
+ struct drive_info_struct drive_info; /* 0x80 */
+ struct sys_desc_table sys_desc_table; /* 0xa0 */
+ __u32 alt_mem_k; /* 0x1e0 */
+ __u8 reserved5[4]; /* 0x1e4 */
+ __u8 e820_map_nr; /* 0x1e8 */
+ __u8 reserved6[8]; /* 0x1e9 */
+ __u8 setup_sects; /* 0x1f1 */
+ __u16 mount_root_rdonly; /* 0x1f2 */
+ __u16 syssize; /* 0x1f4 */
+ __u16 swapdev; /* 0x1f6 */
+ __u16 ramdisk_flags; /* 0x1f8 */
+#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_PROMPT_FLAG 0x8000
+#define RAMDISK_LOAD_FLAG 0x4000
+ __u16 vid_mode; /* 0x1fa */
+ __u16 root_dev; /* 0x1fc */
+ __u8 reserved9[1]; /* 0x1fe */
+ __u8 aux_device_info; /* 0x1ff */
+ /* 2.00+ */
+ __u8 reserved10[2]; /* 0x200 */
+ __u8 header_magic[4]; /* 0x202 */
+ __u16 protocol_version; /* 0x206 */
+ __u8 reserved11[8]; /* 0x208 */
+ __u8 loader_type; /* 0x210 */
+#define LOADER_TYPE_LOADLIN 1
+#define LOADER_TYPE_BOOTSECT_LOADER 2
+#define LOADER_TYPE_SYSLINUX 3
+#define LOADER_TYPE_ETHERBOOT 4
+#define LOADER_TYPE_UNKNOWN 0xFF
+ __u8 loader_flags; /* 0x211 */
+ __u8 reserved12[2]; /* 0x212 */
+ __u32 code32_start; /* 0x214 */
+ __u32 initrd_start; /* 0x218 */
+ __u32 initrd_size; /* 0x21c */
+ __u8 reserved13[4]; /* 0x220 */
+ /* 2.01+ */
+ __u16 heap_end_ptr; /* 0x224 */
+ __u8 reserved14[2]; /* 0x226 */
+ /* 2.02+ */
+ __u32 cmd_line_ptr; /* 0x228 */
+ /* 2.03+ */
+ __u32 ramdisk_max; /* 0x22c */
+ __u8 reserved15[0x2d0 - 0x230]; /* 0x230 */
+ struct e820entry e820_map[E820MAX]; /* 0x2d0 */
+ __u64 shared_info; /* 0x550 */
+ __u8 padding[0x800 - 0x558]; /* 0x558 */
+ __u8 cmd_line[0x800]; /* 0x800 */
+} __attribute__((packed));
+
+#endif /* __LINUX_BOOT_PARAMS_H__ */
diff --git a/tools/libxc/plan9a.out.h b/tools/libxc/plan9a.out.h
index d53f636517..d53f636517 100755..100644
--- a/tools/libxc/plan9a.out.h
+++ b/tools/libxc/plan9a.out.h
diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h
index c4440d9838..27e7845798 100644
--- a/tools/libxc/xc.h
+++ b/tools/libxc/xc.h
@@ -10,6 +10,7 @@
#define __XC_H__
#include <stdint.h>
+
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
@@ -19,15 +20,31 @@ typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
+#include <sys/ptrace.h>
#include <xen/xen.h>
#include <xen/dom0_ops.h>
#include <xen/event_channel.h>
#include <xen/sched_ctl.h>
-#include <xen/io/domain_controller.h>
-/*\
+/*
+ * DEFINITIONS FOR CPU BARRIERS
+ */
+
+#if defined(__i386__)
+#define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#elif defined(__x86_64__)
+#define mb() __asm__ __volatile__ ( "mfence" : : : "memory")
+#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory")
+#define wmb() __asm__ __volatile__ ( "" : : : "memory")
+#else
+#error "Define barriers"
+#endif
+
+/*
* INITIALIZATION FUNCTIONS
-\*/
+ */
/**
* This function opens a handle to the hypervisor interface. This function can
@@ -55,13 +72,44 @@ int xc_interface_open(void);
*/
int xc_interface_close(int xc_handle);
-/*\
+/*
+ * DOMAIN DEBUGGING FUNCTIONS
+ */
+
+typedef struct xc_core_header {
+ unsigned int xch_magic;
+ unsigned int xch_nr_vcpus;
+ unsigned int xch_nr_pages;
+ unsigned int xch_ctxt_offset;
+ unsigned int xch_index_offset;
+ unsigned int xch_pages_offset;
+} xc_core_header_t;
+
+
+long xc_ptrace(enum __ptrace_request request,
+ u32 domid,
+ long addr,
+ long data);
+
+long xc_ptrace_core(enum __ptrace_request request,
+ u32 domid,
+ long addr,
+ long data);
+
+int xc_waitdomain(int domain,
+ int *status,
+ int options);
+
+int xc_waitdomain_core(int domain,
+ int *status,
+ int options);
+
+/*
* DOMAIN MANAGEMENT FUNCTIONS
-\*/
+ */
typedef struct {
u32 domid;
- unsigned int cpu;
unsigned int dying:1, crashed:1, shutdown:1,
paused:1, blocked:1, running:1;
unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
@@ -69,15 +117,21 @@ typedef struct {
unsigned long shared_info_frame;
u64 cpu_time;
unsigned long max_memkb;
+ unsigned int vcpus;
+ s32 vcpu_to_cpu[MAX_VIRT_CPUS];
+ cpumap_t cpumap[MAX_VIRT_CPUS];
} xc_dominfo_t;
typedef dom0_getdomaininfo_t xc_domaininfo_t;
int xc_domain_create(int xc_handle,
- unsigned int mem_kb,
- int cpu,
- float cpu_weight,
u32 *pdomid);
+
+int xc_domain_dumpcore(int xc_handle,
+ u32 domid,
+ const char *corename);
+
+
/**
* This function pauses a domain. A paused domain still exists in memory
* however it does not receive any timeslices from the hypervisor.
@@ -112,7 +166,8 @@ int xc_domain_destroy(int xc_handle,
u32 domid);
int xc_domain_pincpu(int xc_handle,
u32 domid,
- int cpu);
+ int vcpu,
+ cpumap_t *cpumap);
/**
* This function will return information about one or more domains.
*
@@ -140,15 +195,17 @@ int xc_domain_getinfo(int xc_handle,
* domain
* @return 0 on success, -1 on failure
*/
-int xc_domain_getfullinfo(int xc_handle,
- u32 domid,
- xc_domaininfo_t *info,
- full_execution_context_t *ctxt);
+int xc_domain_get_vcpu_context(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ vcpu_guest_context_t *ctxt);
+
int xc_domain_setcpuweight(int xc_handle,
u32 domid,
float weight);
long long xc_domain_get_cpu_usage(int xc_handle,
- domid_t domid);
+ domid_t domid,
+ int vcpu);
typedef dom0_shadow_control_stats_t xc_shadow_control_stats_t;
@@ -168,26 +225,25 @@ int xc_shadow_control(int xc_handle,
struct XcIOContext;
/**
- * This function will save a domain running Linux to an IO context. This
- * IO context is currently a private interface making this function difficult
- * to call. It's interface will likely change in the future.
+ * This function will save a domain running Linux.
*
* @parm xc_handle a handle to an open hypervisor interface
- * @parm ioctxt the IO context to save a domain to
+ * @parm fd the file descriptor to save a domain to
+ * @parm dom the id of the domain
* @return 0 on success, -1 on failure
*/
-int xc_linux_save(int xc_handle, struct XcIOContext *ioctxt);
+int xc_linux_save(int xc_handle, int fd, u32 dom);
/**
- * This function will restore a saved domain running Linux to an IO context.
- * Like xc_linux_save(), this function uses a parameter who's structure is
- * privately defined. It's interface will also likely change.
+ * This function will restore a saved domain running Linux.
*
* @parm xc_handle a handle to an open hypervisor interface
- * @parm ioctxt the IO context to restore a domain from
+ * @parm fd the file descriptor to restore a domain from
+ * @parm dom the id of the domain
+ * @parm nr_pfns the number of pages
* @return 0 on success, -1 on failure
*/
-int xc_linux_restore(int xc_handle, struct XcIOContext *ioctxt);
+int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns);
int xc_linux_build(int xc_handle,
u32 domid,
@@ -195,15 +251,29 @@ int xc_linux_build(int xc_handle,
const char *ramdisk_name,
const char *cmdline,
unsigned int control_evtchn,
- unsigned long flags);
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn);
int
xc_plan9_build (int xc_handle,
u32 domid,
const char *image_name,
const char *cmdline,
- unsigned int control_evtchn,
- unsigned long flags);
+ unsigned int control_evtchn,
+ unsigned long flags);
+
+struct mem_map;
+int xc_vmx_build(int xc_handle,
+ u32 domid,
+ int memsize,
+ const char *image_name,
+ struct mem_map *memmap,
+ const char *ramdisk_name,
+ const char *cmdline,
+ unsigned int control_evtchn,
+ unsigned long flags);
int xc_bvtsched_global_set(int xc_handle,
unsigned long ctx_allow);
@@ -227,25 +297,19 @@ int xc_bvtsched_domain_get(int xc_handle,
long long *warpl,
long long *warpu);
-int xc_atropos_domain_set(int xc_handle,
+int xc_sedf_domain_set(int xc_handle,
u32 domid,
- u64 period, u64 slice, u64 latency,
- int xtratime);
+ u64 period, u64 slice, u64 latency, u16 extratime, u16 weight);
-int xc_atropos_domain_get(int xc_handle,
+int xc_sedf_domain_get(int xc_handle,
u32 domid,
- u64* period, u64 *slice, u64 *latency,
- int *xtratime);
-
-int xc_rrobin_global_set(int xc_handle, u64 slice);
-
-int xc_rrobin_global_get(int xc_handle, u64 *slice);
+ u64* period, u64 *slice, u64 *latency, u16 *extratime, u16* weight);
typedef evtchn_status_t xc_evtchn_status_t;
-/*\
+/*
* EVENT CHANNEL FUNCTIONS
-\*/
+ */
/**
* This function allocates an unbound port. Ports are named endpoints used for
@@ -326,8 +390,8 @@ int xc_physdev_pci_access_modify(int xc_handle,
int enable);
int xc_readconsolering(int xc_handle,
- char *str,
- unsigned int max_chars,
+ char **pbuffer,
+ unsigned int *pnr_chars,
int clear);
typedef dom0_physinfo_t xc_physinfo_t;
@@ -337,18 +401,13 @@ int xc_physinfo(int xc_handle,
int xc_sched_id(int xc_handle,
int *sched_id);
-int xc_domain_setinitialmem(int xc_handle,
- u32 domid,
- unsigned int initial_memkb);
-
int xc_domain_setmaxmem(int xc_handle,
u32 domid,
unsigned int max_memkb);
-int xc_domain_setvmassist(int xc_handle,
- u32 domid,
- unsigned int cmd,
- unsigned int type);
+int xc_domain_memory_increase_reservation(int xc_handle,
+ u32 domid,
+ unsigned int mem_kb);
typedef dom0_perfc_desc_t xc_perfc_desc_t;
/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
@@ -356,10 +415,15 @@ int xc_perfc_control(int xc_handle,
u32 op,
xc_perfc_desc_t *desc);
+/* read/write msr */
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+ unsigned int high);
+
/**
* Memory maps a range within one domain to a local address range. Mappings
* should be unmapped with munmap and should follow the same rules as mmap
- * regarding page alignment.
+ * regarding page alignment. Returns NULL on failure.
*
* In Linux, the ring queue for the control channel is accessible by mapping
* the shared_info_frame (from xc_domain_getinfo()) + 2048. The structure
@@ -378,4 +442,69 @@ void *xc_map_foreign_range(int xc_handle, u32 dom,
void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
unsigned long *arr, int num );
+int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf,
+ unsigned long max_pfns);
+
+/*\
+ * GRANT TABLE FUNCTIONS
+\*/
+
+/**
+ * This function opens a handle to the more restricted grant table hypervisor
+ * interface. This may be used where the standard interface is not
+ * available because the domain is not privileged.
+ * This function can be called multiple times within a single process.
+ * Multiple processes can have an open hypervisor interface at the same time.
+ *
+ * Each call to this function should have a corresponding call to
+ * xc_grant_interface_close().
+ *
+ * This function can fail if a Xen-enabled kernel is not currently running.
+ *
+ * @return a handle to the hypervisor grant table interface or -1 on failure
+ */
+int xc_grant_interface_open(void);
+
+/**
+ * This function closes an open grant table hypervisor interface.
+ *
+ * This function can fail if the handle does not represent an open interface or
+ * if there were problems closing the interface.
+ *
+ * @parm xc_handle a handle to an open grant table hypervisor interface
+ * @return 0 on success, -1 otherwise.
+ */
+int xc_grant_interface_close(int xc_handle);
+
+int xc_gnttab_map_grant_ref(int xc_handle,
+ memory_t host_virt_addr,
+ u32 dom,
+ u16 ref,
+ u16 flags,
+ s16 *handle,
+ memory_t *dev_bus_addr);
+
+int xc_gnttab_unmap_grant_ref(int xc_handle,
+ memory_t host_virt_addr,
+ memory_t dev_bus_addr,
+ u16 handle,
+ s16 *status);
+
+int xc_gnttab_setup_table(int xc_handle,
+ u32 dom,
+ u16 nr_frames,
+ s16 *status,
+ memory_t **frame_list);
+
+/* Grant debug builds only: */
+int xc_gnttab_dump_table(int xc_handle,
+ u32 dom,
+ s16 *status);
+
+/* Get current total pages allocated to a domain. */
+long xc_get_tot_pages(int xc_handle, u32 domid);
+
+/* Execute a privileged dom0 operation. */
+int xc_dom0_op(int xc_handle, dom0_op_t *op);
+
#endif /* __XC_H__ */
diff --git a/tools/libxc/xc_atropos.c b/tools/libxc/xc_atropos.c
deleted file mode 100644
index 13d07ca440..0000000000
--- a/tools/libxc/xc_atropos.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/******************************************************************************
- * xc_atropos.c
- *
- * API for manipulating parameters of the Atropos scheduler.
- *
- * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
- */
-
-#include "xc_private.h"
-
-int xc_atropos_domain_set(int xc_handle,
- u32 domid, u64 period, u64 slice, u64 latency,
- int xtratime)
-{
- dom0_op_t op;
- struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
-
- op.cmd = DOM0_ADJUSTDOM;
- op.u.adjustdom.domain = (domid_t)domid;
- op.u.adjustdom.sched_id = SCHED_ATROPOS;
- op.u.adjustdom.direction = SCHED_INFO_PUT;
-
- p->nat_period = period;
- p->nat_slice = slice;
- p->latency = latency;
- p->xtratime = xtratime;
-
- return do_dom0_op(xc_handle, &op);
-}
-
-int xc_atropos_domain_get(int xc_handle, u32 domid, u64 *period,
- u64 *slice, u64 *latency, int *xtratime)
-{
- dom0_op_t op;
- int ret;
- struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
-
- op.cmd = DOM0_ADJUSTDOM;
- op.u.adjustdom.domain = (domid_t)domid;
- op.u.adjustdom.sched_id = SCHED_ATROPOS;
- op.u.adjustdom.direction = SCHED_INFO_GET;
-
- ret = do_dom0_op(xc_handle, &op);
-
- *period = p->nat_period;
- *slice = p->nat_slice;
- *latency = p->latency;
- *xtratime = p->xtratime;
-
- return ret;
-}
diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c
new file mode 100644
index 0000000000..653512ab92
--- /dev/null
+++ b/tools/libxc/xc_core.c
@@ -0,0 +1,116 @@
+#include "xc_private.h"
+#define ELFSIZE 32
+#include "xc_elf.h"
+#include <stdlib.h>
+#include <zlib.h>
+
+/* number of pages to write at a time */
+#define DUMP_INCREMENT 4 * 1024
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+static int
+copy_from_domain_page(int xc_handle,
+ u32 domid,
+ unsigned long *page_array,
+ unsigned long src_pfn,
+ void *dst_page)
+{
+ void *vaddr = xc_map_foreign_range(
+ xc_handle, domid, PAGE_SIZE, PROT_READ, page_array[src_pfn]);
+ if ( vaddr == NULL )
+ return -1;
+ memcpy(dst_page, vaddr, PAGE_SIZE);
+ munmap(vaddr, PAGE_SIZE);
+ return 0;
+}
+
+int
+xc_domain_dumpcore(int xc_handle,
+ u32 domid,
+ const char *corename)
+{
+ unsigned long nr_pages;
+ unsigned long *page_array;
+ xc_dominfo_t info;
+ int i, j, vcpu_map_size, dump_fd;
+ char *dump_mem, *dump_mem_start = NULL;
+ struct xc_core_header header;
+ vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
+
+
+ if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) {
+ PERROR("Could not open corefile %s: %s", corename, strerror(errno));
+ goto error_out;
+ }
+
+ if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == 0) {
+ PERROR("Could not allocate dump_mem");
+ goto error_out;
+ }
+
+ if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) {
+ PERROR("Could not get info for domain");
+ goto error_out;
+ }
+
+ vcpu_map_size = sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]);
+
+ for (i = 0, j = 0; i < vcpu_map_size; i++) {
+ if (info.vcpu_to_cpu[i] == -1) {
+ continue;
+ }
+ if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[j])) {
+ PERROR("Could not get all vcpu contexts for domain");
+ goto error_out;
+ }
+ j++;
+ }
+
+ nr_pages = info.nr_pages;
+
+ header.xch_magic = 0xF00FEBED;
+ header.xch_nr_vcpus = info.vcpus;
+ header.xch_nr_pages = nr_pages;
+ header.xch_ctxt_offset = sizeof(struct xc_core_header);
+ header.xch_index_offset = sizeof(struct xc_core_header) +
+ sizeof(vcpu_guest_context_t)*info.vcpus;
+ header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) +
+ (sizeof(vcpu_guest_context_t) * info.vcpus) +
+ (nr_pages * sizeof(unsigned long)));
+
+ write(dump_fd, &header, sizeof(struct xc_core_header));
+ write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus);
+
+ if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+ printf("Could not allocate memory\n");
+ goto error_out;
+ }
+ if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
+ printf("Could not get the page frame list\n");
+ goto error_out;
+ }
+ write(dump_fd, page_array, nr_pages * sizeof(unsigned long));
+ lseek(dump_fd, header.xch_pages_offset, SEEK_SET);
+ for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) {
+ copy_from_domain_page(xc_handle, domid, page_array, i, dump_mem);
+ dump_mem += PAGE_SIZE;
+ if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) {
+ if (write(dump_fd, dump_mem_start, dump_mem - dump_mem_start) <
+ dump_mem - dump_mem_start) {
+ PERROR("Partial write, file system full?");
+ goto error_out;
+ }
+ dump_mem = dump_mem_start;
+ }
+ }
+
+ close(dump_fd);
+ free(dump_mem_start);
+ return 0;
+ error_out:
+ if (dump_fd)
+ close(dump_fd);
+ if (dump_mem_start)
+ free(dump_mem_start);
+ return -1;
+}
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index b2c468bc90..8f0bba3216 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -9,9 +9,6 @@
#include "xc_private.h"
int xc_domain_create(int xc_handle,
- unsigned int mem_kb,
- int cpu,
- float cpu_weight,
u32 *pdomid)
{
int err;
@@ -19,17 +16,11 @@ int xc_domain_create(int xc_handle,
op.cmd = DOM0_CREATEDOMAIN;
op.u.createdomain.domain = (domid_t)*pdomid;
- op.u.createdomain.memory_kb = mem_kb;
- op.u.createdomain.cpu = cpu;
+ if ( (err = do_dom0_op(xc_handle, &op)) != 0 )
+ return err;
- if ( (err = do_dom0_op(xc_handle, &op)) == 0 )
- {
- *pdomid = (u16)op.u.createdomain.domain;
-
- err = xc_domain_setcpuweight(xc_handle, *pdomid, cpu_weight);
- }
-
- return err;
+ *pdomid = (u16)op.u.createdomain.domain;
+ return 0;
}
@@ -64,12 +55,14 @@ int xc_domain_destroy(int xc_handle,
int xc_domain_pincpu(int xc_handle,
u32 domid,
- int cpu)
+ int vcpu,
+ cpumap_t *cpumap)
{
dom0_op_t op;
op.cmd = DOM0_PINCPUDOMAIN;
- op.u.pincpudomain.domain = (domid_t)domid;
- op.u.pincpudomain.cpu = cpu;
+ op.u.pincpudomain.domain = (domid_t)domid;
+ op.u.pincpudomain.vcpu = vcpu;
+ op.u.pincpudomain.cpumap = cpumap;
return do_dom0_op(xc_handle, &op);
}
@@ -82,21 +75,17 @@ int xc_domain_getinfo(int xc_handle,
unsigned int nr_doms;
u32 next_domid = first_domid;
dom0_op_t op;
+ int rc = 0;
for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ )
{
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)next_domid;
- op.u.getdomaininfo.ctxt = NULL; /* no exec context info, thanks. */
- if ( do_dom0_op(xc_handle, &op) < 0 )
+ if ( (rc = do_dom0_op(xc_handle, &op)) < 0 )
break;
- info->domid = (u16)op.u.getdomaininfo.domain;
-
- info->cpu =
- (op.u.getdomaininfo.flags>>DOMFLAGS_CPUSHIFT) & DOMFLAGS_CPUMASK;
+ info->domid = (u16)op.u.getdomaininfo.domain;
info->dying = !!(op.u.getdomaininfo.flags & DOMFLAGS_DYING);
- info->crashed = !!(op.u.getdomaininfo.flags & DOMFLAGS_CRASHED);
info->shutdown = !!(op.u.getdomaininfo.flags & DOMFLAGS_SHUTDOWN);
info->paused = !!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED);
info->blocked = !!(op.u.getdomaininfo.flags & DOMFLAGS_BLOCKED);
@@ -106,36 +95,58 @@ int xc_domain_getinfo(int xc_handle,
(op.u.getdomaininfo.flags>>DOMFLAGS_SHUTDOWNSHIFT) &
DOMFLAGS_SHUTDOWNMASK;
+ if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_crash) )
+ {
+ info->shutdown = 0;
+ info->crashed = 1;
+ }
+
info->nr_pages = op.u.getdomaininfo.tot_pages;
info->max_memkb = op.u.getdomaininfo.max_pages<<(PAGE_SHIFT);
info->shared_info_frame = op.u.getdomaininfo.shared_info_frame;
info->cpu_time = op.u.getdomaininfo.cpu_time;
+ info->vcpus = op.u.getdomaininfo.n_vcpu;
+ memcpy(&info->vcpu_to_cpu, &op.u.getdomaininfo.vcpu_to_cpu,
+ sizeof(info->vcpu_to_cpu));
+ memcpy(&info->cpumap, &op.u.getdomaininfo.cpumap,
+ sizeof(info->cpumap));
next_domid = (u16)op.u.getdomaininfo.domain + 1;
info++;
}
+ if( !nr_doms ) return rc;
+
return nr_doms;
}
-int xc_domain_getfullinfo(int xc_handle,
- u32 domid,
- xc_domaininfo_t *info,
- full_execution_context_t *ctxt)
+int xc_domain_get_vcpu_context(int xc_handle,
+ u32 domid,
+ u32 vcpu,
+ vcpu_guest_context_t *ctxt)
{
- int rc;
+ int rc, errno_saved;
dom0_op_t op;
- op.cmd = DOM0_GETDOMAININFO;
- op.u.getdomaininfo.domain = (domid_t)domid;
- op.u.getdomaininfo.ctxt = ctxt;
+ op.cmd = DOM0_GETVCPUCONTEXT;
+ op.u.getvcpucontext.domain = (domid_t)domid;
+ op.u.getvcpucontext.vcpu = (u16)vcpu;
+ op.u.getvcpucontext.ctxt = ctxt;
+
+ if ( (ctxt != NULL) &&
+ ((rc = mlock(ctxt, sizeof(*ctxt))) != 0) )
+ return rc;
rc = do_dom0_op(xc_handle, &op);
- if ( info )
- memcpy(info, &op.u.getdomaininfo, sizeof(*info));
+ if ( ctxt != NULL )
+ {
+ errno_saved = errno;
+ (void)munlock(ctxt, sizeof(*ctxt));
+ errno = errno_saved;
+ }
- if ( ((u16)op.u.getdomaininfo.domain != domid) && rc > 0 )
+ if ( rc > 0 )
return -ESRCH;
else
return rc;
@@ -174,10 +185,10 @@ int xc_domain_setcpuweight(int xc_handle,
int ret;
/* Figure out which scheduler is currently used: */
- if((ret = xc_sched_id(xc_handle, &sched_id)))
+ if ( (ret = xc_sched_id(xc_handle, &sched_id)) != 0 )
return ret;
- switch(sched_id)
+ switch ( sched_id )
{
case SCHED_BVT:
{
@@ -189,47 +200,26 @@ int xc_domain_setcpuweight(int xc_handle,
/* Preserve all the scheduling parameters apart
of MCU advance. */
- if((ret = xc_bvtsched_domain_get(xc_handle, domid, &mcuadv,
- &warpback, &warpvalue, &warpl, &warpu)))
+ if ( (ret = xc_bvtsched_domain_get(
+ xc_handle, domid, &mcuadv,
+ &warpback, &warpvalue, &warpl, &warpu)) != 0 )
return ret;
/* The MCU advance is inverse of the weight.
Default value of the weight is 1, default mcuadv 10.
The scaling factor is therefore 10. */
- if(weight > 0) mcuadv = 10 / weight;
+ if ( weight > 0 )
+ mcuadv = 10 / weight;
ret = xc_bvtsched_domain_set(xc_handle, domid, mcuadv,
warpback, warpvalue, warpl, warpu);
break;
}
-
- case SCHED_RROBIN:
- {
- /* The weight cannot be set for RRobin */
- break;
- }
- case SCHED_ATROPOS:
- {
- /* TODO - can we set weights in Atropos? */
- break;
- }
}
return ret;
}
-
-int xc_domain_setinitialmem(int xc_handle,
- u32 domid,
- unsigned int initial_memkb)
-{
- dom0_op_t op;
- op.cmd = DOM0_SETDOMAININITIALMEM;
- op.u.setdomaininitialmem.domain = (domid_t)domid;
- op.u.setdomaininitialmem.initial_memkb = initial_memkb;
- return do_dom0_op(xc_handle, &op);
-}
-
int xc_domain_setmaxmem(int xc_handle,
u32 domid,
unsigned int max_memkb)
@@ -241,15 +231,20 @@ int xc_domain_setmaxmem(int xc_handle,
return do_dom0_op(xc_handle, &op);
}
-int xc_domain_setvmassist(int xc_handle,
- u32 domid,
- unsigned int cmd,
- unsigned int type)
+int xc_domain_memory_increase_reservation(int xc_handle,
+ u32 domid,
+ unsigned int mem_kb)
{
- dom0_op_t op;
- op.cmd = DOM0_SETDOMAINVMASSIST;
- op.u.setdomainvmassist.domain = (domid_t)domid;
- op.u.setdomainvmassist.cmd = cmd;
- op.u.setdomainvmassist.type = type;
- return do_dom0_op(xc_handle, &op);
+ int err;
+
+ err = do_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL,
+ mem_kb / 4, 0, domid);
+ if (err == mem_kb / 4)
+ return 0;
+
+ if (err > 0) {
+ errno = ENOMEM;
+ err = -1;
+ }
+ return err;
}
diff --git a/tools/libxc/xc_evtchn.c b/tools/libxc/xc_evtchn.c
index 9371e61261..1c0294d83b 100644
--- a/tools/libxc/xc_evtchn.c
+++ b/tools/libxc/xc_evtchn.c
@@ -19,15 +19,16 @@ static int do_evtchn_op(int xc_handle, evtchn_op_t *op)
if ( mlock(op, sizeof(*op)) != 0 )
{
- PERROR("Could not lock memory for Xen hypercall");
- goto out1;
+ PERROR("do_evtchn_op: op mlock failed");
+ goto out;
}
- if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
- goto out2;
+ if ((ret = do_xen_hypercall(xc_handle, &hypercall)) < 0)
+ ERROR("do_evtchn_op: HYPERVISOR_event_channel_op failed: %d", ret);
- out2: (void)munlock(op, sizeof(*op));
- out1: return ret;
+ (void)munlock(op, sizeof(*op));
+ out:
+ return ret;
}
@@ -39,8 +40,9 @@ int xc_evtchn_alloc_unbound(int xc_handle,
int rc;
op.cmd = EVTCHNOP_alloc_unbound;
- op.u.alloc_unbound.dom = (domid_t)dom;
-
+ op.u.alloc_unbound.dom = (domid_t)dom;
+ op.u.alloc_unbound.port = (port != NULL) ? *port : 0;
+
if ( (rc = do_evtchn_op(xc_handle, &op)) == 0 )
{
if ( port != NULL )
diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
new file mode 100644
index 0000000000..ad23e68013
--- /dev/null
+++ b/tools/libxc/xc_gnttab.c
@@ -0,0 +1,141 @@
+/******************************************************************************
+ * xc_gnttab.c
+ *
+ * API for manipulating and accessing grant tables
+ *
+ * Copyright (c) 2005 Christopher Clark
+ * based on xc_evtchn.c Copyright (c) 2004, K A Fraser.
+ */
+
+#include "xc_private.h"
+#include "xen/grant_table.h"
+
+static int
+do_gnttab_op( int xc_handle,
+ unsigned long cmd,
+ gnttab_op_t *op,
+ unsigned long count )
+{
+ int ret = -1;
+ privcmd_hypercall_t hypercall;
+
+ hypercall.op = __HYPERVISOR_grant_table_op;
+ hypercall.arg[0] = cmd;
+ hypercall.arg[1] = (unsigned long)(op);
+ hypercall.arg[2] = count;
+
+ if ( mlock(op, sizeof(*op)) != 0 )
+ {
+ PERROR("do_gnttab_op: op mlock failed");
+ goto out;
+ }
+
+ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+ ERROR("do_gnttab_op: HYPERVISOR_grant_table_op failed: %d", ret);
+
+ (void)munlock(op, sizeof(*op));
+ out:
+ return ret;
+}
+
+
+int xc_gnttab_map_grant_ref(int xc_handle,
+ memory_t host_virt_addr,
+ u32 dom,
+ u16 ref,
+ u16 flags,
+ s16 *handle,
+ memory_t *dev_bus_addr)
+{
+ gnttab_op_t op;
+ int rc;
+
+ op.u.map_grant_ref.host_virt_addr = host_virt_addr;
+ op.u.map_grant_ref.dom = (domid_t)dom;
+ op.u.map_grant_ref.ref = ref;
+ op.u.map_grant_ref.flags = flags;
+
+ if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_map_grant_ref, &op, 1)) == 0 )
+ {
+ *handle = op.u.map_grant_ref.handle;
+ *dev_bus_addr = op.u.map_grant_ref.dev_bus_addr;
+ }
+
+ return rc;
+}
+
+
+int xc_gnttab_unmap_grant_ref(int xc_handle,
+ memory_t host_virt_addr,
+ memory_t dev_bus_addr,
+ u16 handle,
+ s16 *status)
+{
+ gnttab_op_t op;
+ int rc;
+
+ op.u.unmap_grant_ref.host_virt_addr = host_virt_addr;
+ op.u.unmap_grant_ref.dev_bus_addr = dev_bus_addr;
+ op.u.unmap_grant_ref.handle = handle;
+
+ if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_unmap_grant_ref, &op, 1)) == 0 )
+ *status = op.u.unmap_grant_ref.status;
+
+ return rc;
+}
+
+int xc_gnttab_setup_table(int xc_handle,
+ u32 dom,
+ u16 nr_frames,
+ s16 *status,
+ memory_t **frame_list)
+{
+ gnttab_op_t op;
+ int rc;
+ int i;
+
+ op.u.setup_table.dom = (domid_t)dom;
+ op.u.setup_table.nr_frames = nr_frames;
+
+ if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_setup_table, &op, 1)) == 0 )
+ {
+ *status = op.u.setup_table.status;
+ for ( i = 0; i < nr_frames; i++ )
+ {
+ (*frame_list)[i] = op.u.setup_table.frame_list[i];
+ }
+ }
+
+ return rc;
+}
+
+int xc_gnttab_dump_table(int xc_handle,
+ u32 dom,
+ s16 *status)
+{
+ gnttab_op_t op;
+ int rc;
+
+ op.u.dump_table.dom = (domid_t)dom;
+
+ printf("xc_gnttab_dump_table: domain %d\n", dom);
+
+ if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_dump_table, &op, 1)) == 0 )
+ *status = op.u.dump_table.status;
+
+ return rc;
+}
+
+int xc_grant_interface_open(void)
+{
+ int fd = open("/proc/xen/grant", O_RDWR);
+ if ( fd == -1 )
+ PERROR("Could not obtain handle on grant command interface");
+ return fd;
+
+}
+
+int xc_grant_interface_close(int xc_grant_handle)
+{
+ return close(xc_grant_handle);
+}
diff --git a/tools/libxc/xc_io.c b/tools/libxc/xc_io.c
deleted file mode 100644
index 5589483f10..0000000000
--- a/tools/libxc/xc_io.c
+++ /dev/null
@@ -1,43 +0,0 @@
-#include "xc_io.h"
-#include <sys/time.h>
-
-void xcio_timestamp(XcIOContext *ctxt, const char *msg){
- struct timeval tv;
-
- gettimeofday(&tv, NULL);
- if (msg[0] != '\b' && msg[0] != '\r')
- fprintf(stdout, "[%08ld.%06ld] ", tv.tv_sec, tv.tv_usec);
-}
-
-void xcio_error(XcIOContext *ctxt, const char *msg, ...){
- va_list args;
-
- va_start(args, msg);
- vfprintf(stdout, msg, args); fprintf(stdout, "\n"); fflush(stdout);
- IOStream_vprint(ctxt->info, msg, args);
- IOStream_print(ctxt->info, "\n");
- va_end(args);
-}
-
-void xcio_info(XcIOContext *ctxt, const char *msg, ...){
- va_list args;
-
- if(0 && !(ctxt->flags & XCFLAGS_VERBOSE)) return;
- va_start(args, msg);
- xcio_timestamp(ctxt, msg);
- vfprintf(stdout, msg, args); fprintf(stdout, "\n");
- IOStream_vprint(ctxt->info, msg, args);
- fflush(stdout);
- va_end(args);
-}
-
-void xcio_debug(XcIOContext *ctxt, const char *msg, ...){
- va_list args;
-
- if(0 && !(ctxt->flags & XCFLAGS_DEBUG)) return;
- va_start(args, msg);
- xcio_timestamp(ctxt, msg);
- vfprintf(stdout, msg, args); fprintf(stdout, "\n");
- IOStream_vprint(ctxt->info, msg, args);
- va_end(args);
-}
diff --git a/tools/libxc/xc_io.h b/tools/libxc/xc_io.h
deleted file mode 100644
index 4325473518..0000000000
--- a/tools/libxc/xc_io.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef __XC_XC_IO_H__
-#define __XC_XC_IO_H__
-
-#include <errno.h>
-#include "xc_private.h"
-#include "iostream.h"
-
-typedef struct XcIOContext {
- u32 domain;
- unsigned flags;
- int resource;
- IOStream *io;
- IOStream *info;
- IOStream *err;
- char *vmconfig;
- int vmconfig_n;
- int (*suspend)(void *data, u32 domain);
- int (*configure)(void *data, u32 domain, char *vmconfig, int vmconfig_n);
- void *data;
-} XcIOContext;
-
-static inline int xcio_suspend_domain(XcIOContext *ctxt){
- int err = 0;
-
- if(ctxt->suspend){
- err = ctxt->suspend(ctxt->data, ctxt->domain);
- } else {
- err = -EINVAL;
- }
- return err;
-}
-
-static inline int xcio_configure_domain(XcIOContext *ctxt){
- int err = 0;
-
- if(ctxt->configure){
- err = ctxt->configure(ctxt->data, ctxt->domain, ctxt->vmconfig, ctxt->vmconfig_n);
- } else {
- err = -EINVAL;
- }
- return err;
-}
-
-static inline int xcio_read(XcIOContext *ctxt, void *buf, int n){
- int rc;
-
- rc = IOStream_read(ctxt->io, buf, n);
- return (rc == n ? 0 : -1);
-}
-
-static inline int xcio_write(XcIOContext *ctxt, void *buf, int n){
- int rc;
-
- rc = IOStream_write(ctxt->io, buf, n);
- return (rc == n ? 0 : -1);
-}
-
-static inline int xcio_flush(XcIOContext *ctxt){
- return IOStream_flush(ctxt->io);
-}
-
-extern void xcio_error(XcIOContext *ctxt, const char *msg, ...);
-extern void xcio_info(XcIOContext *ctxt, const char *msg, ...);
-
-#define xcio_perror(_ctxt, _msg...) \
-xcio_error(_ctxt, "(errno %d %s)" _msg, errno, strerror(errno), ## _msg)
-
-#endif /* ! __XC_XC_IO_H__ */
-
-
-
diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c
index cc4c0f4561..660c30a3f1 100644
--- a/tools/libxc/xc_linux_build.c
+++ b/tools/libxc/xc_linux_build.c
@@ -3,107 +3,77 @@
*/
#include "xc_private.h"
+
+#if defined(__i386__)
#define ELFSIZE 32
+#endif
+
+#if defined(__x86_64__)
+#define ELFSIZE 64
+#endif
+
+
#include "xc_elf.h"
#include <stdlib.h>
#include <zlib.h>
+#if defined(__i386__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
+
+#if defined(__x86_64__)
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
+
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
-struct domain_setup_info
-{
- unsigned long v_start;
- unsigned long v_end;
- unsigned long v_kernstart;
- unsigned long v_kernend;
- unsigned long v_kernentry;
-
- unsigned int use_writable_pagetables;
- unsigned int load_bsd_symtab;
-
- unsigned long symtab_addr;
- unsigned long symtab_len;
-};
-
-static int
-parseelfimage(
- char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
-static int
-loadelfimage(
- char *elfbase, int xch, u32 dom, unsigned long *parray,
- unsigned long vstart);
-static int
-loadelfsymtab(
- char *elfbase, int xch, u32 dom, unsigned long *parray,
- struct domain_setup_info *dsi);
-
-static long get_tot_pages(int xc_handle, u32 domid)
+static int probeimageformat(char *image,
+ unsigned long image_size,
+ struct load_funcs *load_funcs)
{
- dom0_op_t op;
- op.cmd = DOM0_GETDOMAININFO;
- op.u.getdomaininfo.domain = (domid_t)domid;
- op.u.getdomaininfo.ctxt = NULL;
- return (do_dom0_op(xc_handle, &op) < 0) ?
- -1 : op.u.getdomaininfo.tot_pages;
-}
-
-static int get_pfn_list(int xc_handle,
- u32 domid,
- unsigned long *pfn_buf,
- unsigned long max_pfns)
-{
- dom0_op_t op;
- int ret;
- op.cmd = DOM0_GETMEMLIST;
- op.u.getmemlist.domain = (domid_t)domid;
- op.u.getmemlist.max_pfns = max_pfns;
- op.u.getmemlist.buffer = pfn_buf;
-
- if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
- return -1;
-
- ret = do_dom0_op(xc_handle, &op);
-
- (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
-
- return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
-}
+ if ( probe_elf(image, image_size, load_funcs) &&
+ probe_bin(image, image_size, load_funcs) )
+ {
+ ERROR( "Unrecognized image format" );
+ return -EINVAL;
+ }
-static int copy_to_domain_page(int xc_handle,
- u32 domid,
- unsigned long dst_pfn,
- void *src_page)
-{
- void *vaddr = xc_map_foreign_range(
- xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
- if ( vaddr == NULL )
- return -1;
- memcpy(vaddr, src_page, PAGE_SIZE);
- munmap(vaddr, PAGE_SIZE);
return 0;
}
-static int setup_guestos(int xc_handle,
- u32 dom,
- char *image, unsigned long image_size,
- gzFile initrd_gfd, unsigned long initrd_len,
- unsigned long nr_pages,
- unsigned long *pvsi, unsigned long *pvke,
- full_execution_context_t *ctxt,
- const char *cmdline,
- unsigned long shared_info_frame,
- unsigned int control_evtchn,
- unsigned long flags)
+static int setup_guest(int xc_handle,
+ u32 dom,
+ char *image, unsigned long image_size,
+ gzFile initrd_gfd, unsigned long initrd_len,
+ unsigned long nr_pages,
+ unsigned long *pvsi, unsigned long *pvke,
+ unsigned long *pvss, vcpu_guest_context_t *ctxt,
+ const char *cmdline,
+ unsigned long shared_info_frame,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn, unsigned long *store_mfn)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
+#if defined(__x86_64__)
+ l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
+ l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
+#endif
unsigned long *page_array = NULL;
- unsigned long l2tab;
- unsigned long l1tab;
+ unsigned long l2tab = 0;
+ unsigned long l1tab = 0;
+#if defined(__x86_64__)
+ unsigned long l3tab = 0;
+ unsigned long l4tab = 0;
+#endif
unsigned long count, i;
start_info_t *start_info;
shared_info_t *shared_info;
@@ -114,6 +84,7 @@ static int setup_guestos(int xc_handle,
unsigned long ppt_alloc;
unsigned long *physmap, *physmap_e, physmap_pfn;
+ struct load_funcs load_funcs;
struct domain_setup_info dsi;
unsigned long vinitrd_start;
unsigned long vinitrd_end;
@@ -121,24 +92,23 @@ static int setup_guestos(int xc_handle,
unsigned long vphysmap_end;
unsigned long vstartinfo_start;
unsigned long vstartinfo_end;
+ unsigned long vstoreinfo_start;
+ unsigned long vstoreinfo_end;
unsigned long vstack_start;
unsigned long vstack_end;
unsigned long vpt_start;
unsigned long vpt_end;
unsigned long v_end;
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- rc = parseelfimage(image, image_size, &dsi);
+ rc = probeimageformat(image, image_size, &load_funcs);
if ( rc != 0 )
goto error_out;
- if (dsi.use_writable_pagetables)
- xc_domain_setvmassist(xc_handle, dom, VMASST_CMD_enable,
- VMASST_TYPE_writable_pagetables);
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
- if (dsi.load_bsd_symtab)
- loadelfsymtab(image, xc_handle, dom, NULL, &dsi);
+ rc = (load_funcs.parseimage)(image, image_size, &dsi);
+ if ( rc != 0 )
+ goto error_out;
if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
{
@@ -163,32 +133,52 @@ static int setup_guestos(int xc_handle,
vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
vstartinfo_start = vpt_end;
vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
+ /* Place store shared page after startinfo. */
+ vstoreinfo_start = vstartinfo_end;
+ vstoreinfo_end = vstartinfo_end + PAGE_SIZE;
+ vstack_start = vstoreinfo_end;
vstack_end = vstack_start + PAGE_SIZE;
- v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
- if ( (v_end - vstack_end) < (512 << 10) )
- v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
+ v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
+ if ( (v_end - vstack_end) < (512UL << 10) )
+ v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
+#if defined(__i386__)
if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
break;
+#endif
+#if defined(__x86_64__)
+#define NR(_l,_h,_s) \
+ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
+ ((_l) & ~((1UL<<(_s))-1))) >> (_s))
+ if ( (1 + /* # L4 */
+ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
+ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+ <= nr_pt_pages )
+ break;
+#endif
}
+#define _p(a) ((void *) (a))
+
printf("VIRTUAL MEMORY ARRANGEMENT:\n"
- " Loaded kernel: %08lx->%08lx\n"
- " Init. ramdisk: %08lx->%08lx\n"
- " Phys-Mach map: %08lx->%08lx\n"
- " Page tables: %08lx->%08lx\n"
- " Start info: %08lx->%08lx\n"
- " Boot stack: %08lx->%08lx\n"
- " TOTAL: %08lx->%08lx\n",
- dsi.v_kernstart, dsi.v_kernend,
- vinitrd_start, vinitrd_end,
- vphysmap_start, vphysmap_end,
- vpt_start, vpt_end,
- vstartinfo_start, vstartinfo_end,
- vstack_start, vstack_end,
- dsi.v_start, v_end);
- printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+ " Loaded kernel: %p->%p\n"
+ " Init. ramdisk: %p->%p\n"
+ " Phys-Mach map: %p->%p\n"
+ " Page tables: %p->%p\n"
+ " Start info: %p->%p\n"
+ " Store page: %p->%p\n"
+ " Boot stack: %p->%p\n"
+ " TOTAL: %p->%p\n",
+ _p(dsi.v_kernstart), _p(dsi.v_kernend),
+ _p(vinitrd_start), _p(vinitrd_end),
+ _p(vphysmap_start), _p(vphysmap_end),
+ _p(vpt_start), _p(vpt_end),
+ _p(vstartinfo_start), _p(vstartinfo_end),
+ _p(vstoreinfo_start), _p(vstoreinfo_end),
+ _p(vstack_start), _p(vstack_end),
+ _p(dsi.v_start), _p(v_end));
+ printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
{
@@ -204,16 +194,14 @@ static int setup_guestos(int xc_handle,
goto error_out;
}
- if ( get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
+ if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
{
PERROR("Could not get the page frame list");
goto error_out;
}
- loadelfimage(image, xc_handle, dom, page_array, dsi.v_start);
-
- if (dsi.load_bsd_symtab)
- loadelfsymtab(image, xc_handle, dom, page_array, &dsi);
+ (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
+ &dsi);
/* Load the initial ramdisk image. */
if ( initrd_len != 0 )
@@ -227,7 +215,7 @@ static int setup_guestos(int xc_handle,
PERROR("Error reading initrd image, could not");
goto error_out;
}
- copy_to_domain_page(xc_handle, dom,
+ xc_copy_to_domain_page(xc_handle, dom,
page_array[i>>PAGE_SHIFT], page);
}
}
@@ -235,6 +223,7 @@ static int setup_guestos(int xc_handle,
if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
goto error_out;
+#if defined(__i386__)
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
@@ -274,6 +263,74 @@ static int setup_guestos(int xc_handle,
}
munmap(vl1tab, PAGE_SIZE);
munmap(vl2tab, PAGE_SIZE);
+#endif
+#if defined(__x86_64__)
+
+#define alloc_pt(ltab, vltab) \
+ ltab = page_array[ppt_alloc++] << PAGE_SHIFT; \
+ if (vltab != NULL) { \
+ munmap(vltab, PAGE_SIZE); \
+ } \
+ if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
+ PROT_READ|PROT_WRITE, \
+ ltab >> PAGE_SHIFT)) == NULL) { \
+ munmap(vltab, PAGE_SIZE); \
+ goto error_out; \
+ } \
+ memset(vltab, 0, PAGE_SIZE);
+
+ /* First allocate page for page dir. */
+ ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+ l4tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+ ctxt->pt_base = l4tab;
+
+ /* Intiliaize page table */
+ if ( (vl4tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l4tab >> PAGE_SHIFT)) == NULL )
+ goto error_out;
+ memset(vl4tab, 0, PAGE_SIZE);
+ vl4e = &vl4tab[l4_table_offset(dsi.v_start)];
+
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++)
+ {
+ if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
+ {
+ alloc_pt(l1tab, vl1tab);
+
+ if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+ {
+ alloc_pt(l2tab, vl2tab);
+ if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
+ {
+ alloc_pt(l3tab, vl3tab);
+ vl3e = &vl3tab[l3_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+ *vl4e = l3tab | L4_PROT;
+ vl4e++;
+ }
+ vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+ *vl3e = l2tab | L3_PROT;
+ vl3e++;
+ }
+ vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+ *vl2e = l1tab | L2_PROT;
+ vl2e++;
+ }
+
+ *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+ if ( (count >= ((vpt_start-dsi.v_start)>>PAGE_SHIFT)) &&
+ (count < ((vpt_end -dsi.v_start)>>PAGE_SHIFT)) )
+ {
+ *vl1e &= ~_PAGE_RW;
+ }
+ vl1e++;
+ }
+
+ munmap(vl1tab, PAGE_SIZE);
+ munmap(vl2tab, PAGE_SIZE);
+ munmap(vl3tab, PAGE_SIZE);
+ munmap(vl4tab, PAGE_SIZE);
+#endif
/* Write the phys->machine and machine->phys table entries. */
physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
@@ -300,14 +357,23 @@ static int setup_guestos(int xc_handle,
}
munmap(physmap, PAGE_SIZE);
+#if defined(__i386__)
/*
* Pin down l2tab addr as page dir page - causes hypervisor to provide
* correct protection for the page
*/
- if ( add_mmu_update(xc_handle, mmu,
- l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
+ if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
goto error_out;
+#endif
+#if defined(__x86_64__)
+ /*
+ * Pin down l4tab addr as page dir page - causes hypervisor to provide
+ * correct protection for the page
+ */
+ if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE, l4tab>>PAGE_SHIFT, dom) )
+ goto error_out;
+#endif
start_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
@@ -319,15 +385,20 @@ static int setup_guestos(int xc_handle,
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
start_info->domain_controller_evtchn = control_evtchn;
+ start_info->store_page = vstoreinfo_start;
+ start_info->store_evtchn = store_evtchn;
if ( initrd_len != 0 )
{
start_info->mod_start = vinitrd_start;
start_info->mod_len = initrd_len;
}
- strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
- start_info->cmd_line[MAX_CMDLINE-1] = '\0';
+ strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
munmap(start_info, PAGE_SIZE);
+ /* Tell our caller where we told domain store page was. */
+ *store_mfn = page_array[((vstoreinfo_start-dsi.v_start)>>PAGE_SHIFT)];
+
/* shared_info page starts its life empty. */
shared_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
@@ -335,6 +406,10 @@ static int setup_guestos(int xc_handle,
/* Mask all upcalls... */
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ shared_info->n_vcpu = vcpus;
+ printf(" VCPUS: %d\n", shared_info->n_vcpu);
+
munmap(shared_info, PAGE_SIZE);
/* Send the page update requests down to the hypervisor. */
@@ -345,6 +420,7 @@ static int setup_guestos(int xc_handle,
free(page_array);
*pvsi = vstartinfo_start;
+ *pvss = vstack_start;
*pvke = dsi.v_kernentry;
return 0;
@@ -357,94 +433,34 @@ static int setup_guestos(int xc_handle,
return -1;
}
-static unsigned long get_filesz(int fd)
-{
- u16 sig;
- u32 _sz = 0;
- unsigned long sz;
-
- lseek(fd, 0, SEEK_SET);
- read(fd, &sig, sizeof(sig));
- sz = lseek(fd, 0, SEEK_END);
- if ( sig == 0x8b1f ) /* GZIP signature? */
- {
- lseek(fd, -4, SEEK_END);
- read(fd, &_sz, 4);
- sz = _sz;
- }
- lseek(fd, 0, SEEK_SET);
-
- return sz;
-}
-
-static char *read_kernel_image(const char *filename, unsigned long *size)
-{
- int kernel_fd = -1;
- gzFile kernel_gfd = NULL;
- char *image = NULL;
- unsigned int bytes;
-
- if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
- {
- PERROR("Could not open kernel image");
- goto out;
- }
-
- *size = get_filesz(kernel_fd);
-
- if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
- {
- PERROR("Could not allocate decompression state for state file");
- goto out;
- }
-
- if ( (image = malloc(*size)) == NULL )
- {
- PERROR("Could not allocate memory for kernel image");
- goto out;
- }
-
- if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
- {
- PERROR("Error reading kernel image, could not"
- " read the whole image (%d != %ld).", bytes, *size);
- free(image);
- image = NULL;
- }
-
- out:
- if ( kernel_gfd != NULL )
- gzclose(kernel_gfd);
- else if ( kernel_fd >= 0 )
- close(kernel_fd);
- return image;
-}
-
int xc_linux_build(int xc_handle,
u32 domid,
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
unsigned int control_evtchn,
- unsigned long flags)
+ unsigned long flags,
+ unsigned int vcpus,
+ unsigned int store_evtchn,
+ unsigned long *store_mfn)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
gzFile initrd_gfd = NULL;
int rc, i;
- full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
+ vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
unsigned long nr_pages;
char *image = NULL;
unsigned long image_size, initrd_size=0;
- unsigned long vstartinfo_start, vkern_entry;
+ unsigned long vstartinfo_start, vkern_entry, vstack_start;
- if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
+ if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
{
PERROR("Could not find total pages for domain");
goto error_out;
}
- if ( (image = read_kernel_image(image_name, &image_size)) == NULL )
+ if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
goto error_out;
if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
@@ -455,7 +471,7 @@ int xc_linux_build(int xc_handle,
goto error_out;
}
- initrd_size = get_filesz(initrd_fd);
+ initrd_size = xc_get_filesz(initrd_fd);
if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
{
@@ -466,19 +482,25 @@ int xc_linux_build(int xc_handle,
if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
{
- PERROR("Unable to mlock ctxt");
+ PERROR("xc_linux_build: ctxt mlock failed");
return 1;
}
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)domid;
- op.u.getdomaininfo.ctxt = ctxt;
if ( (do_dom0_op(xc_handle, &op) < 0) ||
((u16)op.u.getdomaininfo.domain != domid) )
{
PERROR("Could not get info on domain");
goto error_out;
}
+
+ if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ {
+ PERROR("Could not get vcpu context");
+ goto error_out;
+ }
+
if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
(ctxt->pt_base != 0) )
{
@@ -486,12 +508,13 @@ int xc_linux_build(int xc_handle,
goto error_out;
}
- if ( setup_guestos(xc_handle, domid, image, image_size,
- initrd_gfd, initrd_size, nr_pages,
- &vstartinfo_start, &vkern_entry,
- ctxt, cmdline,
- op.u.getdomaininfo.shared_info_frame,
- control_evtchn, flags) < 0 )
+ if ( setup_guest(xc_handle, domid, image, image_size,
+ initrd_gfd, initrd_size, nr_pages,
+ &vstartinfo_start, &vkern_entry,
+ &vstack_start, ctxt, cmdline,
+ op.u.getdomaininfo.shared_info_frame,
+ control_evtchn, flags, vcpus,
+ store_evtchn, store_mfn) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
@@ -508,34 +531,33 @@ int xc_linux_build(int xc_handle,
/*
* Initial register values:
- * DS,ES,FS,GS = FLAT_GUESTOS_DS
- * CS:EIP = FLAT_GUESTOS_CS:start_pc
- * SS:ESP = FLAT_GUESTOS_DS:start_stack
+ * DS,ES,FS,GS = FLAT_KERNEL_DS
+ * CS:EIP = FLAT_KERNEL_CS:start_pc
+ * SS:ESP = FLAT_KERNEL_DS:start_stack
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
* EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
*/
- ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
- ctxt->cpu_ctxt.eip = vkern_entry;
- ctxt->cpu_ctxt.esp = vstartinfo_start + 2*PAGE_SIZE;
- ctxt->cpu_ctxt.esi = vstartinfo_start;
- ctxt->cpu_ctxt.eflags = (1<<9) | (1<<2);
+ ctxt->user_regs.ds = FLAT_KERNEL_DS;
+ ctxt->user_regs.es = FLAT_KERNEL_DS;
+ ctxt->user_regs.fs = FLAT_KERNEL_DS;
+ ctxt->user_regs.gs = FLAT_KERNEL_DS;
+ ctxt->user_regs.ss = FLAT_KERNEL_SS;
+ ctxt->user_regs.cs = FLAT_KERNEL_CS;
+ ctxt->user_regs.eip = vkern_entry;
+ ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
+ ctxt->user_regs.esi = vstartinfo_start;
+ ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
/* FPU is set up to default initial state. */
- memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for ( i = 0; i < 256; i++ )
{
ctxt->trap_ctxt[i].vector = i;
- ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
+ ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
}
- ctxt->fast_trap_idx = 0;
/* No LDT. */
ctxt->ldt_ents = 0;
@@ -544,24 +566,31 @@ int xc_linux_build(int xc_handle,
ctxt->gdt_ents = 0;
/* Ring 1 stack is the initial stack. */
- ctxt->guestos_ss = FLAT_GUESTOS_DS;
- ctxt->guestos_esp = vstartinfo_start + 2*PAGE_SIZE;
+ ctxt->kernel_ss = FLAT_KERNEL_SS;
+ ctxt->kernel_sp = vstack_start + PAGE_SIZE;
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
/* No callback handlers. */
- ctxt->event_callback_cs = FLAT_GUESTOS_CS;
+#if defined(__i386__)
+ ctxt->event_callback_cs = FLAT_KERNEL_CS;
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
+ ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
ctxt->event_callback_eip = 0;
- ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
ctxt->failsafe_callback_eip = 0;
+ ctxt->syscall_callback_eip = 0;
+#endif
memset( &launch_op, 0, sizeof(launch_op) );
- launch_op.u.builddomain.domain = (domid_t)domid;
- launch_op.u.builddomain.ctxt = ctxt;
+ launch_op.u.setdomaininfo.domain = (domid_t)domid;
+ launch_op.u.setdomaininfo.vcpu = 0;
+ launch_op.u.setdomaininfo.ctxt = ctxt;
- launch_op.cmd = DOM0_BUILDDOMAIN;
+ launch_op.cmd = DOM0_SETDOMAININFO;
rc = do_dom0_op(xc_handle, &launch_op);
return rc;
@@ -576,284 +605,3 @@ int xc_linux_build(int xc_handle,
return -1;
}
-
-static inline int is_loadable_phdr(Elf_Phdr *phdr)
-{
- return ((phdr->p_type == PT_LOAD) &&
- ((phdr->p_flags & (PF_W|PF_X)) != 0));
-}
-
-static int parseelfimage(char *elfbase,
- unsigned long elfsize,
- struct domain_setup_info *dsi)
-{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
- Elf_Phdr *phdr;
- Elf_Shdr *shdr;
- unsigned long kernstart = ~0UL, kernend=0UL;
- char *shstrtab, *guestinfo=NULL, *p;
- int h;
-
- if ( !IS_ELF(*ehdr) )
- {
- ERROR("Kernel image does not have an ELF header.");
- return -EINVAL;
- }
-
- if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
- {
- ERROR("ELF program headers extend beyond end of image.");
- return -EINVAL;
- }
-
- if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
- {
- ERROR("ELF section headers extend beyond end of image.");
- return -EINVAL;
- }
-
- /* Find the section-header strings table. */
- if ( ehdr->e_shstrndx == SHN_UNDEF )
- {
- ERROR("ELF image has no section-header strings table (shstrtab).");
- return -EINVAL;
- }
- shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff +
- (ehdr->e_shstrndx*ehdr->e_shentsize));
- shstrtab = elfbase + shdr->sh_offset;
-
- /* Find the special '__xen_guest' section and check its contents. */
- for ( h = 0; h < ehdr->e_shnum; h++ )
- {
- shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + (h*ehdr->e_shentsize));
- if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 )
- continue;
-
- guestinfo = elfbase + shdr->sh_offset;
-
- if ( (strstr(guestinfo, "LOADER=generic") == NULL) &&
- (strstr(guestinfo, "GUEST_OS=linux") == NULL) )
- {
- ERROR("Will only load images built for the generic loader "
- "or Linux images");
- ERROR("Actually saw: '%s'", guestinfo);
- return -EINVAL;
- }
-
- if ( (strstr(guestinfo, "XEN_VER=2.0") == NULL) )
- {
- ERROR("Will only load images built for Xen v2.0");
- ERROR("Actually saw: '%s'", guestinfo);
- return -EINVAL;
- }
-
- break;
- }
- if ( guestinfo == NULL )
- {
- ERROR("Not a Xen-ELF image: '__xen_guest' section not found.");
- return -EINVAL;
- }
-
- for ( h = 0; h < ehdr->e_phnum; h++ )
- {
- phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
- if ( !is_loadable_phdr(phdr) )
- continue;
- if ( phdr->p_vaddr < kernstart )
- kernstart = phdr->p_vaddr;
- if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
- kernend = phdr->p_vaddr + phdr->p_memsz;
- }
-
- if ( (kernstart > kernend) ||
- (ehdr->e_entry < kernstart) ||
- (ehdr->e_entry > kernend) )
- {
- ERROR("Malformed ELF image.");
- return -EINVAL;
- }
-
- dsi->v_start = kernstart;
- if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
- dsi->v_start = strtoul(p+10, &p, 0);
-
- if ( (p = strstr(guestinfo, "PT_MODE_WRITABLE")) != NULL )
- dsi->use_writable_pagetables = 1;
-
- if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
- dsi->load_bsd_symtab = 1;
-
- dsi->v_kernstart = kernstart;
- dsi->v_kernend = kernend;
- dsi->v_kernentry = ehdr->e_entry;
-
- dsi->v_end = dsi->v_kernend;
-
- return 0;
-}
-
-static int
-loadelfimage(
- char *elfbase, int xch, u32 dom, unsigned long *parray,
- unsigned long vstart)
-{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
- Elf_Phdr *phdr;
- int h;
-
- char *va;
- unsigned long pa, done, chunksz;
-
- for ( h = 0; h < ehdr->e_phnum; h++ )
- {
- phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
- if ( !is_loadable_phdr(phdr) )
- continue;
-
- for ( done = 0; done < phdr->p_filesz; done += chunksz )
- {
- pa = (phdr->p_vaddr + done) - vstart;
- va = xc_map_foreign_range(
- xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
- chunksz = phdr->p_filesz - done;
- if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
- chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
- memcpy(va + (pa & (PAGE_SIZE-1)),
- elfbase + phdr->p_offset + done, chunksz);
- munmap(va, PAGE_SIZE);
- }
-
- for ( ; done < phdr->p_memsz; done += chunksz )
- {
- pa = (phdr->p_vaddr + done) - vstart;
- va = xc_map_foreign_range(
- xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
- chunksz = phdr->p_memsz - done;
- if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
- chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
- memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
- munmap(va, PAGE_SIZE);
- }
- }
-
- return 0;
-}
-
-static void
-map_memcpy(
- unsigned long dst, char *src, unsigned long size,
- int xch, u32 dom, unsigned long *parray, unsigned long vstart)
-{
- char *va;
- unsigned long chunksz, done, pa;
-
- for ( done = 0; done < size; done += chunksz )
- {
- pa = dst + done - vstart;
- va = xc_map_foreign_range(
- xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
- chunksz = size - done;
- if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
- chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
- memcpy(va + (pa & (PAGE_SIZE-1)), src + done, chunksz);
- munmap(va, PAGE_SIZE);
- }
-}
-
-#define ELFROUND (ELFSIZE / 8)
-
-static int
-loadelfsymtab(
- char *elfbase, int xch, u32 dom, unsigned long *parray,
- struct domain_setup_info *dsi)
-{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr;
- Elf_Shdr *shdr;
- unsigned long maxva, symva;
- char *p;
- int h, i;
-
- p = malloc(sizeof(int) + sizeof(Elf_Ehdr) +
- ehdr->e_shnum * sizeof(Elf_Shdr));
- if (p == NULL)
- return 0;
-
- maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
- symva = maxva;
- maxva += sizeof(int);
- dsi->symtab_addr = maxva;
- dsi->symtab_len = 0;
- maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
- maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
-
- shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
- memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
-
- for ( h = 0; h < ehdr->e_shnum; h++ )
- {
- if ( shdr[h].sh_type == SHT_STRTAB )
- {
- /* Look for a strtab @i linked to symtab @h. */
- for ( i = 0; i < ehdr->e_shnum; i++ )
- if ( (shdr[i].sh_type == SHT_SYMTAB) &&
- (shdr[i].sh_link == h) )
- break;
- /* Skip symtab @h if we found no corresponding strtab @i. */
- if ( i == ehdr->e_shnum )
- {
- shdr[h].sh_offset = 0;
- continue;
- }
- }
-
- if ( (shdr[h].sh_type == SHT_STRTAB) ||
- (shdr[h].sh_type == SHT_SYMTAB) )
- {
- if ( parray != NULL )
- map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size,
- xch, dom, parray, dsi->v_start);
-
- /* Mangled to be based on ELF header location. */
- shdr[h].sh_offset = maxva - dsi->symtab_addr;
-
- dsi->symtab_len += shdr[h].sh_size;
- maxva += shdr[h].sh_size;
- maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
- }
-
- shdr[h].sh_name = 0; /* Name is NULL. */
- }
-
- if ( dsi->symtab_len == 0 )
- {
- dsi->symtab_addr = 0;
- goto out;
- }
-
- if ( parray != NULL )
- {
- *(int *)p = maxva - dsi->symtab_addr;
- sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
- memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
- sym_ehdr->e_phoff = 0;
- sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
- sym_ehdr->e_phentsize = 0;
- sym_ehdr->e_phnum = 0;
- sym_ehdr->e_shstrndx = SHN_UNDEF;
-
- /* Copy total length, crafted ELF header and section header table */
- map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) +
- ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray,
- dsi->v_start);
- }
-
- dsi->symtab_len = maxva - dsi->symtab_addr;
- dsi->v_end = round_pgup(maxva);
-
- out:
- if ( p != NULL )
- free(p);
-
- return 0;
-}
diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c
index 46373a2806..57814ca0a8 100644
--- a/tools/libxc/xc_linux_restore.c
+++ b/tools/libxc/xc_linux_restore.c
@@ -13,95 +13,58 @@
#define DEBUG 0
+#if 1
+#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a ); fflush(stderr)
+#else
+#define ERR(_f, _a...) ((void)0)
+#endif
+
#if DEBUG
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#define DPRINTF(_f, _a...) fprintf ( stdout, _f , ## _a ); fflush(stdout)
#else
#define DPRINTF(_f, _a...) ((void)0)
#endif
-static int get_pfn_list(int xc_handle,
- u32 domain_id,
- unsigned long *pfn_buf,
- unsigned long max_pfns)
-{
- dom0_op_t op;
- int ret;
- op.cmd = DOM0_GETMEMLIST;
- op.u.getmemlist.domain = (domid_t)domain_id;
- op.u.getmemlist.max_pfns = max_pfns;
- op.u.getmemlist.buffer = pfn_buf;
-
- if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
- {
- PERROR("Could not lock pfn list buffer");
- return -1;
- }
-
- ret = do_dom0_op(xc_handle, &op);
-
- (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
-
- return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
-}
+#define PROGRESS 0
+#if PROGRESS
+#define PPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a ); fflush(stderr)
+#else
+#define PPRINTF(_f, _a...)
+#endif
-/** Read the vmconfig string from the state input.
- * It is stored as a 4-byte count 'n' followed by n bytes.
- * The config data is stored in a new string in 'ioctxt->vmconfig',
- * and is null-terminated. The count is stored in 'ioctxt->vmconfig_n'.
- *
- * @param ioctxt i/o context
- * @return 0 on success, non-zero on error.
- */
-static int read_vmconfig(XcIOContext *ioctxt)
+ssize_t
+read_exact(int fd, void *buf, size_t count)
{
- int err = -1;
-
- if ( xcio_read(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n)) )
- goto exit;
-
- ioctxt->vmconfig = malloc(ioctxt->vmconfig_n + 1);
- if ( ioctxt->vmconfig == NULL )
- goto exit;
-
- if ( xcio_read(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n) )
- goto exit;
-
- ioctxt->vmconfig[ioctxt->vmconfig_n] = '\0';
- err = 0;
-
- exit:
- if ( err )
- {
- if ( ioctxt->vmconfig != NULL )
- free(ioctxt->vmconfig);
- ioctxt->vmconfig = NULL;
- ioctxt->vmconfig_n = 0;
+ int r = 0, s;
+ unsigned char *b = buf;
+
+ while (r < count) {
+ s = read(fd, &b[r], count - r);
+ if (s <= 0)
+ break;
+ r += s;
}
- return err;
+
+ return r;
}
-int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
+int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns)
{
dom0_op_t op;
int rc = 1, i, n, k;
unsigned long mfn, pfn, xpfn;
unsigned int prev_pc, this_pc;
- u32 dom = 0;
- int verify = 0;
-
- /* Number of page frames in use by this Linux session. */
- unsigned long nr_pfns;
+ int verify = 0;
+ int err;
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
- unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
+ unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+ shared_info_t *shared_info = (shared_info_t *)shared_info_page;
/* A copy of the CPU context of the guest. */
- full_execution_context_t ctxt;
+ vcpu_guest_context_t ctxt;
- /* First 16 bytes of the state file must contain 'LinuxGuestRecord'. */
- char signature[16];
-
/* A table containg the type of each PFN (/not/ MFN!). */
unsigned long *pfn_type = NULL;
@@ -123,116 +86,84 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
/* A temporary mapping of the guest's suspend record. */
suspend_record_t *p_srec;
- char *region_base, *p_gdt;
+ char *region_base;
mmu_t *mmu = NULL;
/* used by debug verify code */
unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
- xcio_info(ioctxt, "xc_linux_restore start\n");
+#define MAX_PIN_BATCH 1024
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ unsigned int nr_pins = 0;
- if ( mlock(&ctxt, sizeof(ctxt) ) )
- {
+ DPRINTF("xc_linux_restore start\n");
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
/* needed for when we do the build dom0 op,
but might as well do early */
- PERROR("Unable to mlock ctxt");
+ ERR("Unable to mlock ctxt");
return 1;
}
- /* Start reading the saved-domain record. */
- if ( xcio_read(ioctxt, signature, 16) ||
- (memcmp(signature, "LinuxGuestRecord", 16) != 0) )
- {
- xcio_error(ioctxt, "Unrecognised state format -- no signature found");
- goto out;
- }
-
- if ( xcio_read(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
- xcio_read(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) )
- {
- xcio_error(ioctxt, "Error reading header");
- goto out;
- }
-
- if ( read_vmconfig(ioctxt) )
- {
- xcio_error(ioctxt, "Error writing vmconfig");
- goto out;
- }
-
- if ( nr_pfns > 1024*1024 )
- {
- xcio_error(ioctxt, "Invalid state file -- pfn count out of range");
- goto out;
+ if (read_exact(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("read pfn_to_mfn_frame_list failed");
+ goto out;
}
/* We want zeroed memory so use calloc rather than malloc. */
- pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
- pfn_type = calloc(1, 4 * nr_pfns);
- region_mfn = calloc(1, 4 * MAX_BATCH_SIZE);
+ pfn_to_mfn_table = calloc(4, nr_pfns);
+ pfn_type = calloc(4, nr_pfns);
+ region_mfn = calloc(4, MAX_BATCH_SIZE);
- if ( (pfn_to_mfn_table == NULL) ||
- (pfn_type == NULL) ||
- (region_mfn == NULL) )
- {
+ if ((pfn_to_mfn_table == NULL) || (pfn_type == NULL) ||
+ (region_mfn == NULL)) {
+ ERR("memory alloc failed");
errno = ENOMEM;
goto out;
}
- if ( mlock(region_mfn, 4 * MAX_BATCH_SIZE ) )
- {
- xcio_error(ioctxt, "Could not mlock region_mfn");
+ if (mlock(region_mfn, 4 * MAX_BATCH_SIZE)) {
+ ERR("Could not mlock region_mfn");
goto out;
}
- /* Create domain on CPU -1 so that it may auto load-balance in future. */
- if ( xc_domain_create(xc_handle, nr_pfns * (PAGE_SIZE / 1024),
- -1, 1, &dom) )
- {
- xcio_error(ioctxt, "Could not create domain. pfns=%d, %dKB",
- nr_pfns,nr_pfns * (PAGE_SIZE / 1024));
- goto out;
- }
-
- ioctxt->domain = dom;
- xcio_info(ioctxt, "Created domain %ld\n",dom);
-
/* Get the domain's shared-info frame. */
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t)dom;
- op.u.getdomaininfo.ctxt = NULL;
- if ( do_dom0_op(xc_handle, &op) < 0 )
- {
- xcio_error(ioctxt, "Could not get information on new domain");
+ if (do_dom0_op(xc_handle, &op) < 0) {
+ ERR("Could not get information on new domain");
goto out;
}
shared_info_frame = op.u.getdomaininfo.shared_info_frame;
- if(ioctxt->flags & XCFLAGS_CONFIGURE)
- {
- if(xcio_configure_domain(ioctxt))
- {
- xcio_error(ioctxt, "Configuring domain failed");
- goto out;
- }
+ err = xc_domain_setmaxmem(xc_handle, dom, nr_pfns * PAGE_SIZE / 1024);
+ if (err != 0) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ err = xc_domain_memory_increase_reservation(xc_handle, dom,
+ nr_pfns * PAGE_SIZE / 1024);
+ if (err != 0) {
+ errno = ENOMEM;
+ goto out;
}
/* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
- if ( get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) != nr_pfns )
- {
- xcio_error(ioctxt, "Did not read correct number of frame "
- "numbers for new dom");
+ if (xc_get_pfn_list(xc_handle, dom, pfn_to_mfn_table, nr_pfns) !=
+ nr_pfns) {
+ ERR("Did not read correct number of frame numbers for new dom");
goto out;
}
- if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
- {
- xcio_error(ioctxt, "Could not initialise for MMU updates");
+ mmu = init_mmu_updates(xc_handle, dom);
+ if (mmu == NULL) {
+ ERR("Could not initialise for MMU updates");
goto out;
}
- xcio_info(ioctxt, "Reloading memory pages: 0%%");
+ DPRINTF("Reloading memory pages: 0%%");
/*
* Now simply read each saved frame into its new machine frame.
@@ -249,17 +180,17 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
this_pc = (n * 100) / nr_pfns;
if ( (this_pc - prev_pc) >= 5 )
{
- xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
+ PPRINTF("\b\b\b\b%3d%%", this_pc);
prev_pc = this_pc;
}
- if ( xcio_read(ioctxt, &j, sizeof(int)) )
+ if ( read_exact(io_fd, &j, sizeof(int)) != sizeof(int) )
{
- xcio_error(ioctxt, "Error when reading from state file");
+ ERR("Error when reading batch size");
goto out;
}
- DPRINTF("batch %d\n",j);
+ PPRINTF("batch %d\n",j);
if ( j == -1 )
{
@@ -273,12 +204,13 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
if ( j > MAX_BATCH_SIZE )
{
- xcio_error(ioctxt, "Max batch size exceeded. Giving up.");
+ ERR("Max batch size exceeded. Giving up.");
goto out;
}
- if ( xcio_read(ioctxt, region_pfn_type, j*sizeof(unsigned long)) ) {
- xcio_error(ioctxt, "Error when reading from state file");
+ if ( read_exact(io_fd, region_pfn_type, j*sizeof(unsigned long)) !=
+ j*sizeof(unsigned long) ) {
+ ERR("Error when reading region pfn types");
goto out;
}
@@ -300,7 +232,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
region_mfn,
j )) == 0 )
{
- xcio_error(ioctxt, "map batch failed");
+ ERR("map batch failed");
goto out;
}
@@ -314,7 +246,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
if (pfn>nr_pfns)
{
- xcio_error(ioctxt, "pfn out of range");
+ ERR("pfn out of range");
goto out;
}
@@ -329,9 +261,9 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
else
ppage = (unsigned long*) (region_base + i*PAGE_SIZE);
- if ( xcio_read(ioctxt, ppage, PAGE_SIZE) )
+ if ( read_exact(io_fd, ppage, PAGE_SIZE) != PAGE_SIZE )
{
- xcio_error(ioctxt, "Error when reading from state file");
+ ERR("Error when reading pagetable page");
goto out;
}
@@ -349,7 +281,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
xpfn = ppage[k] >> PAGE_SHIFT;
if ( xpfn >= nr_pfns )
{
- xcio_error(ioctxt, "Frame number in type %lu page "
+ ERR("Frame number in type %lu page "
"table is out of range. i=%d k=%d "
"pfn=0x%lx nr_pfns=%lu",
region_pfn_type[i]>>28, i,
@@ -377,7 +309,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
if ( xpfn >= nr_pfns )
{
- xcio_error(ioctxt, "Frame number in type %lu page"
+ ERR("Frame number in type %lu page"
" table is out of range. i=%d k=%d "
"pfn=%lu nr_pfns=%lu",
region_pfn_type[i]>>28, i, k,
@@ -394,7 +326,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
break;
default:
- xcio_error(ioctxt, "Bogus page type %lx page table is "
+ ERR("Bogus page type %lx page table is "
"out of range. i=%d nr_pfns=%lu",
region_pfn_type[i], i, nr_pfns);
goto out;
@@ -435,7 +367,10 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
n+=j; /* crude stats */
}
- xcio_info(ioctxt, "Received all pages\n");
+ DPRINTF("Received all pages\n");
+
+ if ( finish_mmu_updates(xc_handle, mmu) )
+ goto out;
/*
* Pin page tables. Do this after writing to them as otherwise Xen
@@ -443,62 +378,50 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
*/
for ( i = 0; i < nr_pfns; i++ )
{
+ if ( (pfn_type[i] & LPINTAB) == 0 )
+ continue;
if ( pfn_type[i] == (L1TAB|LPINTAB) )
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ else /* pfn_type[i] == (L2TAB|LPINTAB) */
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ pin[nr_pins].mfn = pfn_to_mfn_table[i];
+ if ( ++nr_pins == MAX_PIN_BATCH )
{
- if ( add_mmu_update(xc_handle, mmu,
- (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
- MMU_EXTENDED_COMMAND,
- MMUEXT_PIN_L1_TABLE) ) {
- printf("ERR pin L1 pfn=%lx mfn=%lx\n",
- (unsigned long)i, pfn_to_mfn_table[i]);
+ if ( do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
goto out;
- }
+ nr_pins = 0;
}
}
- /* must pin all L1's before L2's (need consistent va back ptr) */
- for ( i = 0; i < nr_pfns; i++ )
- {
- if ( pfn_type[i] == (L2TAB|LPINTAB) )
- {
- if ( add_mmu_update(xc_handle, mmu,
- (pfn_to_mfn_table[i]<<PAGE_SHIFT) |
- MMU_EXTENDED_COMMAND,
- MMUEXT_PIN_L2_TABLE) )
- {
- printf("ERR pin L2 pfn=%lx mfn=%lx\n",
- (unsigned long)i, pfn_to_mfn_table[i]);
- goto out;
- }
- }
- }
-
- if ( finish_mmu_updates(xc_handle, mmu) ) goto out;
+ if ( (nr_pins != 0) &&
+ (do_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
+ goto out;
- xcio_info(ioctxt, "\b\b\b\b100%%\n");
- xcio_info(ioctxt, "Memory reloaded.\n");
+ DPRINTF("\b\b\b\b100%%\n");
+ DPRINTF("Memory reloaded.\n");
/* Get the list of PFNs that are not in the psuedo-phys map */
{
unsigned int count, *pfntab;
int rc;
- if ( xcio_read(ioctxt, &count, sizeof(count)) )
+ if ( read_exact(io_fd, &count, sizeof(count)) != sizeof(count) )
{
- xcio_error(ioctxt, "Error when reading from state file");
+ ERR("Error when reading pfn count");
goto out;
}
pfntab = malloc( sizeof(unsigned int) * count );
if ( pfntab == NULL )
{
- xcio_error(ioctxt, "Out of memory");
+ ERR("Out of memory");
goto out;
}
- if ( xcio_read(ioctxt, pfntab, sizeof(unsigned int)*count) )
+ if ( read_exact(io_fd, pfntab, sizeof(unsigned int)*count) !=
+ sizeof(unsigned int)*count )
{
- xcio_error(ioctxt, "Error when reading pfntab from state file");
+ ERR("Error when reading pfntab");
goto out;
}
@@ -515,7 +438,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
MEMOP_decrease_reservation,
pfntab, count, 0, dom )) <0 )
{
- xcio_error(ioctxt, "Could not decrease reservation : %d",rc);
+ ERR("Could not decrease reservation : %d",rc);
goto out;
}
else
@@ -525,21 +448,21 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
}
}
- if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
- xcio_read(ioctxt, shared_info, PAGE_SIZE) )
+ if ( read_exact(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
+ read_exact(io_fd, shared_info_page, PAGE_SIZE) != PAGE_SIZE )
{
- xcio_error(ioctxt, "Error when reading from state file");
+ ERR("Error when reading ctxt or shared info page");
goto out;
}
/* Uncanonicalise the suspend-record frame number and poke resume rec. */
- pfn = ctxt.cpu_ctxt.esi;
+ pfn = ctxt.user_regs.esi;
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
{
- xcio_error(ioctxt, "Suspend record frame number is bad");
+ ERR("Suspend record frame number is bad");
goto out;
}
- ctxt.cpu_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
+ ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn];
p_srec = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_WRITE, mfn);
p_srec->resume_info.nr_pages = nr_pfns;
@@ -550,7 +473,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
/* Uncanonicalise each GDT frame number. */
if ( ctxt.gdt_ents > 8192 )
{
- xcio_error(ioctxt, "GDT entry count out of range");
+ ERR("GDT entry count out of range");
goto out;
}
@@ -559,34 +482,28 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
pfn = ctxt.gdt_frames[i];
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
{
- xcio_error(ioctxt, "GDT frame number is bad");
+ ERR("GDT frame number is bad");
goto out;
}
ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn];
}
- /* Zero hypervisor GDT entries (supresses ugly warning) */
- p_gdt = xc_map_foreign_range(
- xc_handle, dom, PAGE_SIZE, PROT_WRITE, ctxt.gdt_frames[0]);
- memset( p_gdt + FIRST_RESERVED_GDT_ENTRY*8, 0,
- NR_RESERVED_GDT_ENTRIES*8 );
- munmap( p_gdt, PAGE_SIZE );
-
/* Uncanonicalise the page table base pointer. */
pfn = ctxt.pt_base >> PAGE_SHIFT;
if ( (pfn >= nr_pfns) || ((pfn_type[pfn]&LTABTYPE_MASK) != L2TAB) )
{
printf("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
pfn, nr_pfns, pfn_type[pfn], (unsigned long)L2TAB);
- xcio_error(ioctxt, "PT base is bad.");
+ ERR("PT base is bad.");
goto out;
}
ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
/* clear any pending events and the selector */
- memset(&(((shared_info_t *)shared_info)->evtchn_pending[0]),
- 0, sizeof (((shared_info_t *)shared_info)->evtchn_pending)+
- sizeof(((shared_info_t *)shared_info)->evtchn_pending_sel));
+ memset(&(shared_info->evtchn_pending[0]), 0,
+ sizeof (shared_info->evtchn_pending));
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ shared_info->vcpu_data[i].evtchn_pending_sel = 0;
/* Copy saved contents of shared-info page. No checking needed. */
ppage = xc_map_foreign_range(
@@ -602,7 +519,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
pfn = pfn_to_mfn_frame_list[i];
if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
{
- xcio_error(ioctxt, "PFN-to-MFN frame number is bad");
+ ERR("PFN-to-MFN frame number is bad");
goto out;
}
mfn = pfn_to_mfn_table[pfn];
@@ -615,7 +532,7 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
pfn_to_mfn_frame_list,
(nr_pfns+1023)/1024 )) == 0 )
{
- xcio_error(ioctxt, "Couldn't map pfn_to_mfn table");
+ ERR("Couldn't map pfn_to_mfn table");
goto out;
}
@@ -626,67 +543,63 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
/*
* Safety checking of saved context:
- * 1. cpu_ctxt is fine, as Xen checks that on context switch.
+ * 1. user_regs is fine, as Xen checks that on context switch.
* 2. fpu_ctxt is fine, as it can't hurt Xen.
* 3. trap_ctxt needs the code selectors checked.
- * 4. fast_trap_idx is checked by Xen.
- * 5. ldt base must be page-aligned, no more than 8192 ents, ...
- * 6. gdt already done, and further checking is done by Xen.
- * 7. check that guestos_ss is safe.
- * 8. pt_base is already done.
- * 9. debugregs are checked by Xen.
- * 10. callback code selectors need checking.
+ * 4. ldt base must be page-aligned, no more than 8192 ents, ...
+ * 5. gdt already done, and further checking is done by Xen.
+ * 6. check that kernel_ss is safe.
+ * 7. pt_base is already done.
+ * 8. debugregs are checked by Xen.
+ * 9. callback code selectors need checking.
*/
for ( i = 0; i < 256; i++ )
{
ctxt.trap_ctxt[i].vector = i;
if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
- ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
}
- if ( (ctxt.guestos_ss & 3) == 0 )
- ctxt.guestos_ss = FLAT_GUESTOS_DS;
+ if ( (ctxt.kernel_ss & 3) == 0 )
+ ctxt.kernel_ss = FLAT_KERNEL_DS;
+#if defined(__i386__)
if ( (ctxt.event_callback_cs & 3) == 0 )
- ctxt.event_callback_cs = FLAT_GUESTOS_CS;
+ ctxt.event_callback_cs = FLAT_KERNEL_CS;
if ( (ctxt.failsafe_callback_cs & 3) == 0 )
- ctxt.failsafe_callback_cs = FLAT_GUESTOS_CS;
+ ctxt.failsafe_callback_cs = FLAT_KERNEL_CS;
+#endif
if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
(ctxt.ldt_ents > 8192) ||
(ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) )
{
- xcio_error(ioctxt, "Bad LDT base or size");
+ ERR("Bad LDT base or size");
goto out;
}
- xcio_info(ioctxt, "Domain ready to be built.\n");
+ DPRINTF("Domain ready to be built.\n");
- op.cmd = DOM0_BUILDDOMAIN;
- op.u.builddomain.domain = (domid_t)dom;
- op.u.builddomain.ctxt = &ctxt;
+ op.cmd = DOM0_SETDOMAININFO;
+ op.u.setdomaininfo.domain = (domid_t)dom;
+ op.u.setdomaininfo.vcpu = 0;
+ op.u.setdomaininfo.ctxt = &ctxt;
rc = do_dom0_op(xc_handle, &op);
if ( rc != 0 )
{
- xcio_error(ioctxt, "Couldn't build the domain");
+ ERR("Couldn't build the domain");
goto out;
}
- if ( ioctxt->flags & XCFLAGS_CONFIGURE )
- {
- xcio_info(ioctxt, "Domain ready to be unpaused\n");
- op.cmd = DOM0_UNPAUSEDOMAIN;
- op.u.unpausedomain.domain = (domid_t)dom;
- rc = do_dom0_op(xc_handle, &op);
- }
-
- if ( rc == 0 )
- {
+ DPRINTF("Domain ready to be unpaused\n");
+ op.cmd = DOM0_UNPAUSEDOMAIN;
+ op.u.unpausedomain.domain = (domid_t)dom;
+ rc = do_dom0_op(xc_handle, &op);
+ if (rc == 0) {
/* Success: print the domain id. */
- xcio_info(ioctxt, "DOM=%lu\n", dom);
+ DPRINTF("DOM=%u\n", dom);
return 0;
}
-
out:
if ( (rc != 0) && (dom != 0) )
xc_domain_destroy(xc_handle, dom);
@@ -697,9 +610,6 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt)
if ( pfn_type != NULL )
free(pfn_type);
- if ( rc == 0 )
- ioctxt->domain = dom;
-
- DPRINTF("Restore exit with rc=%d\n",rc);
+ DPRINTF("Restore exit with rc=%d\n", rc);
return rc;
}
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index ab952230dd..feb051884d 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -10,25 +10,32 @@
#include <sys/time.h>
#include "xc_private.h"
#include <xen/linux/suspend.h>
+#include <xen/io/domain_controller.h>
#include <time.h>
#define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
#define MAX_MBIT_RATE 500
-#define DEBUG 0
-#define DDEBUG 0
+#define DEBUG 0
+
+#if 1
+#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a )
+#else
+#define ERR(_f, _a...) ((void)0)
+#endif
#if DEBUG
-#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#define DPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
#else
#define DPRINTF(_f, _a...) ((void)0)
#endif
-#if DDEBUG
-#define DDPRINTF(_f, _a...) printf ( _f , ## _a )
+#define PROGRESS 0
+#if PROGRESS
+#define PPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
#else
-#define DDPRINTF(_f, _a...) ((void)0)
+#define PPRINTF(_f, _a...)
#endif
/*
@@ -143,7 +150,7 @@ static long long tv_delta( struct timeval *new, struct timeval *old )
}
-#define START_MBIT_RATE ioctxt->resource
+#define START_MBIT_RATE 0 //ioctxt->resource
static int mbit_rate, ombit_rate = 0;
static int burst_time_us = -1;
@@ -166,7 +173,9 @@ static int burst_time_us = -1;
#define RATE_TO_BTU 781250
#define BURST_TIME_US burst_time_us
-static int xcio_ratewrite(XcIOContext *ioctxt, void *buf, int n){
+static int
+ratewrite(int io_fd, void *buf, int n)
+{
static int budget = 0;
static struct timeval last_put = { 0 };
struct timeval now;
@@ -174,16 +183,15 @@ static int xcio_ratewrite(XcIOContext *ioctxt, void *buf, int n){
long long delta;
if (START_MBIT_RATE == 0)
- return xcio_write(ioctxt, buf, n);
+ return write(io_fd, buf, n);
budget -= n;
if (budget < 0) {
if (MBIT_RATE != ombit_rate) {
BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
ombit_rate = MBIT_RATE;
- xcio_info(ioctxt,
- "rate limit: %d mbit/s burst budget %d slot time %d\n",
- MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
+ DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
+ MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
}
if (last_put.tv_sec == 0) {
budget += BURST_BUDGET;
@@ -211,7 +219,7 @@ static int xcio_ratewrite(XcIOContext *ioctxt, void *buf, int n){
}
}
}
- return xcio_write(ioctxt, buf, n);
+ return write(io_fd, buf, n);
}
static int print_stats( int xc_handle, u32 domid,
@@ -229,11 +237,11 @@ static int print_stats( int xc_handle, u32 domid,
gettimeofday(&wall_now, NULL);
- d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
- d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
+ d0_cpu_now = xc_domain_get_cpu_usage(xc_handle, 0, /* FIXME */ 0)/1000;
+ d1_cpu_now = xc_domain_get_cpu_usage(xc_handle, domid, /* FIXME */ 0)/1000;
if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
- printf("ARRHHH!!\n");
+ fprintf(stderr, "ARRHHH!!\n");
wall_delta = tv_delta(&wall_now,&wall_last)/1000;
@@ -243,14 +251,15 @@ static int print_stats( int xc_handle, u32 domid,
d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
if ( print )
- printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
- "dirtied %dMb/s %" PRId32 " pages\n",
- wall_delta,
- (int)((d0_cpu_delta*100)/wall_delta),
- (int)((d1_cpu_delta*100)/wall_delta),
- (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
- (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
- stats->dirty_count);
+ fprintf(stderr,
+ "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+ "dirtied %dMb/s %" PRId32 " pages\n",
+ wall_delta,
+ (int)((d0_cpu_delta*100)/wall_delta),
+ (int)((d1_cpu_delta*100)/wall_delta),
+ (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+ (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+ stats->dirty_count);
if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) {
mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
@@ -266,21 +275,6 @@ static int print_stats( int xc_handle, u32 domid,
return 0;
}
-/** Write the vmconfig string.
- * It is stored as a 4-byte count 'n' followed by n bytes.
- *
- * @param ioctxt i/o context
- * @return 0 on success, non-zero on error.
- */
-static int write_vmconfig(XcIOContext *ioctxt){
- int err = -1;
- if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
- if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
- err = 0;
- exit:
- return err;
-}
-
static int analysis_phase( int xc_handle, u32 domid,
int nr_pfns, unsigned long *arr, int runs )
{
@@ -297,7 +291,7 @@ static int analysis_phase( int xc_handle, u32 domid,
xc_shadow_control( xc_handle, domid,
DOM0_SHADOW_CONTROL_OP_CLEAN,
arr, nr_pfns, NULL);
- printf("#Flush\n");
+ fprintf(stderr, "#Flush\n");
for ( i = 0; i < 40; i++ )
{
usleep(50000);
@@ -306,11 +300,11 @@ static int analysis_phase( int xc_handle, u32 domid,
DOM0_SHADOW_CONTROL_OP_PEEK,
NULL, 0, &stats);
- printf("now= %lld faults= %" PRId32 " dirty= %" PRId32
- " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n",
- ((now-start)+500)/1000,
- stats.fault_count, stats.dirty_count,
- stats.dirty_net_count, stats.dirty_block_count);
+ fprintf(stderr, "now= %lld faults= %" PRId32 " dirty= %" PRId32
+ " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n",
+ ((now-start)+500)/1000,
+ stats.fault_count, stats.dirty_count,
+ stats.dirty_net_count, stats.dirty_block_count);
}
}
@@ -318,36 +312,49 @@ static int analysis_phase( int xc_handle, u32 domid,
}
-int suspend_and_state(int xc_handle, XcIOContext *ioctxt,
- xc_domaininfo_t *info,
- full_execution_context_t *ctxt)
+int suspend_and_state(int xc_handle, int io_fd, int dom,
+ xc_dominfo_t *info,
+ vcpu_guest_context_t *ctxt)
{
int i=0;
-
- xcio_suspend_domain(ioctxt);
+ char ans[30];
+
+ printf("suspend\n");
+ fflush(stdout);
+ if (fgets(ans, sizeof(ans), stdin) == NULL) {
+ ERR("failed reading suspend reply");
+ return -1;
+ }
+ if (strncmp(ans, "done\n", 5)) {
+ ERR("suspend reply incorrect: %s", ans);
+ return -1;
+ }
retry:
- if ( xc_domain_getfullinfo(xc_handle, ioctxt->domain, info, ctxt) )
+ if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1)
{
- xcio_error(ioctxt, "Could not get full domain info");
+ ERR("Could not get domain info");
return -1;
}
- if ( (info->flags &
- (DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT))) ==
- (DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT)) )
+ if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */,
+ ctxt) )
+ {
+ ERR("Could not get vcpu context");
+ }
+
+ if ( info->shutdown && info->shutdown_reason == SHUTDOWN_suspend )
{
return 0; // success
}
- if ( info->flags & DOMFLAGS_PAUSED )
+ if ( info->paused )
{
// try unpausing domain, wait, and retest
- xc_domain_unpause( xc_handle, ioctxt->domain );
+ xc_domain_unpause( xc_handle, dom );
- xcio_error(ioctxt, "Domain was paused. Wait and re-test. (%lx)",
- info->flags);
+ ERR("Domain was paused. Wait and re-test.");
usleep(10000); // 10ms
goto retry;
@@ -356,27 +363,24 @@ retry:
if( ++i < 100 )
{
- xcio_error(ioctxt, "Retry suspend domain (%lx)",
- info->flags);
+ ERR("Retry suspend domain.");
usleep(10000); // 10ms
goto retry;
}
- xcio_error(ioctxt, "Unable to suspend domain. (%lx)",
- info->flags);
+ ERR("Unable to suspend domain.");
return -1;
}
-int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
+int xc_linux_save(int xc_handle, int io_fd, u32 dom)
{
- xc_domaininfo_t info;
+ xc_dominfo_t info;
int rc = 1, i, j, k, last_iter, iter = 0;
unsigned long mfn;
- u32 domid = ioctxt->domain;
- int live = (ioctxt->flags & XCFLAGS_LIVE);
- int debug = (ioctxt->flags & XCFLAGS_DEBUG);
+ int live = 0; // (ioctxt->flags & XCFLAGS_LIVE);
+ int debug = 0; // (ioctxt->flags & XCFLAGS_DEBUG);
int sent_last_iter, skip_this_iter;
/* Important tuning parameters */
@@ -387,7 +391,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
unsigned long shared_info_frame;
/* A copy of the CPU context of the guest. */
- full_execution_context_t ctxt;
+ vcpu_guest_context_t ctxt;
/* A table containg the type of each PFN (/not/ MFN!). */
unsigned long *pfn_type = NULL;
@@ -434,53 +438,59 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
MBIT_RATE = START_MBIT_RATE;
- xcio_info(ioctxt, "xc_linux_save start %d\n", domid);
+ DPRINTF("xc_linux_save start %d\n", dom);
if (mlock(&ctxt, sizeof(ctxt))) {
- xcio_perror(ioctxt, "Unable to mlock ctxt");
+ ERR("Unable to mlock ctxt");
return 1;
}
-
- if ( xc_domain_getfullinfo( xc_handle, domid, &info, &ctxt) )
+
+ if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1)
+ {
+ ERR("Could not get domain info");
+ goto out;
+ }
+ if ( xc_domain_get_vcpu_context( xc_handle, dom, /* FIXME */ 0,
+ &ctxt) )
{
- xcio_error(ioctxt, "Could not get full domain info");
+ ERR("Could not get vcpu context");
goto out;
}
shared_info_frame = info.shared_info_frame;
/* A cheesy test to see whether the domain contains valid state. */
if ( ctxt.pt_base == 0 ){
- xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
+ ERR("Domain is not in a valid Linux guest OS state");
goto out;
}
- nr_pfns = info.max_pages;
+ nr_pfns = info.max_memkb >> PAGE_SHIFT;
/* cheesy sanity check */
if ( nr_pfns > 1024*1024 ){
- xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns);
+ ERR("Invalid state record -- pfn count out of range: %lu", nr_pfns);
goto out;
}
/* Map the shared info frame */
- live_shinfo = xc_map_foreign_range(xc_handle, domid,
+ live_shinfo = xc_map_foreign_range(xc_handle, dom,
PAGE_SIZE, PROT_READ,
shared_info_frame);
if (!live_shinfo){
- xcio_error(ioctxt, "Couldn't map live_shinfo");
+ ERR("Couldn't map live_shinfo");
goto out;
}
/* the pfn_to_mfn_frame_list fits in a single page */
live_pfn_to_mfn_frame_list =
- xc_map_foreign_range(xc_handle, domid,
+ xc_map_foreign_range(xc_handle, dom,
PAGE_SIZE, PROT_READ,
live_shinfo->arch.pfn_to_mfn_frame_list );
if (!live_pfn_to_mfn_frame_list){
- xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
+ ERR("Couldn't map pfn_to_mfn_frame_list");
goto out;
}
@@ -490,12 +500,12 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
(its not clear why it would want to change them, and we'll be OK
from a safety POV anyhow. */
- live_pfn_to_mfn_table = xc_map_foreign_batch(xc_handle, domid,
+ live_pfn_to_mfn_table = xc_map_foreign_batch(xc_handle, dom,
PROT_READ,
live_pfn_to_mfn_frame_list,
(nr_pfns+1023)/1024 );
if( !live_pfn_to_mfn_table ){
- xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
+ ERR("Couldn't map pfn_to_mfn table");
goto out;
}
@@ -512,7 +522,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
for ( i = 0; i < nr_pfns; i += 1024 ){
if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
- xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
+ ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
goto out;
}
}
@@ -522,10 +532,10 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
if( live )
{
- if ( xc_shadow_control( xc_handle, domid,
+ if ( xc_shadow_control( xc_handle, dom,
DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
NULL, 0, NULL ) < 0 ) {
- xcio_error(ioctxt, "Couldn't enable shadow mode");
+ ERR("Couldn't enable shadow mode");
goto out;
}
@@ -536,10 +546,9 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
last_iter = 1;
- if ( suspend_and_state( xc_handle, ioctxt, &info, &ctxt) )
+ if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
{
- xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
- info.flags);
+ ERR("Domain appears not to have suspended");
goto out;
}
@@ -563,27 +572,27 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
to_skip = malloc( sz );
if (!to_send || !to_fix || !to_skip){
- xcio_error(ioctxt, "Couldn't allocate to_send array");
+ ERR("Couldn't allocate to_send array");
goto out;
}
memset( to_send, 0xff, sz );
if ( mlock( to_send, sz ) ){
- xcio_perror(ioctxt, "Unable to mlock to_send");
+ ERR("Unable to mlock to_send");
return 1;
}
/* (to fix is local only) */
if ( mlock( to_skip, sz ) ){
- xcio_perror(ioctxt, "Unable to mlock to_skip");
+ ERR("Unable to mlock to_skip");
return 1;
}
}
- analysis_phase( xc_handle, domid, nr_pfns, to_skip, 0 );
+ analysis_phase( xc_handle, dom, nr_pfns, to_skip, 0 );
/* We want zeroed memory so use calloc rather than malloc. */
pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
@@ -595,7 +604,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
}
if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
- xcio_error(ioctxt, "Unable to mlock");
+ ERR("Unable to mlock");
goto out;
}
@@ -612,30 +621,29 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
{
- printf("i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
- i,mfn,live_mfn_to_pfn_table[mfn]);
+ fprintf(stderr, "i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
+ i,mfn,live_mfn_to_pfn_table[mfn]);
err++;
}
}
- printf("Had %d unexplained entries in p2m table\n",err);
+ fprintf(stderr, "Had %d unexplained entries in p2m table\n",err);
}
#endif
/* Start writing out the saved-domain record. */
- if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
- xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
- xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
- xcio_error(ioctxt, "Error writing header");
- goto out;
+ if (write(io_fd, &nr_pfns, sizeof(unsigned long)) !=
+ sizeof(unsigned long)) {
+ ERR("write: nr_pfns");
+ goto out;
}
- if(write_vmconfig(ioctxt)){
- xcio_error(ioctxt, "Error writing vmconfig");
+ if (write(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("write: pfn_to_mfn_frame_list");
goto out;
}
- print_stats( xc_handle, domid, 0, &stats, 0 );
+ print_stats( xc_handle, dom, 0, &stats, 0 );
/* Now write out each data page, canonicalising page tables as we go... */
@@ -648,13 +656,13 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
prev_pc = 0;
N=0;
- xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
+ DPRINTF("Saving memory pages: iter %d 0%%", iter);
while( N < nr_pfns ){
unsigned int this_pc = (N * 100) / nr_pfns;
if ( (this_pc - prev_pc) >= 5 ){
- xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
+ DPRINTF("\b\b\b\b%3d%%", this_pc);
prev_pc = this_pc;
}
@@ -662,11 +670,11 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
but this is fast enough for the moment. */
if ( !last_iter &&
- xc_shadow_control(xc_handle, domid,
+ xc_shadow_control(xc_handle, dom,
DOM0_SHADOW_CONTROL_OP_PEEK,
to_skip, nr_pfns, NULL) != nr_pfns )
{
- xcio_error(ioctxt, "Error peeking shadow bitmap");
+ ERR("Error peeking shadow bitmap");
goto out;
}
@@ -716,8 +724,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
set_bit( n, to_fix );
if( iter>1 )
- DDPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
- iter,n,pfn_type[batch]);
+ DPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
+ iter,n,pfn_type[batch]);
continue;
}
@@ -735,27 +743,27 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
batch++;
}
-// DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
+// DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
if ( batch == 0 )
goto skip; /* vanishingly unlikely... */
- if ( (region_base = xc_map_foreign_batch(xc_handle, domid,
+ if ( (region_base = xc_map_foreign_batch(xc_handle, dom,
PROT_READ,
pfn_type,
batch)) == 0 ){
- xcio_perror(ioctxt, "map batch failed");
+ ERR("map batch failed");
goto out;
}
- if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
- xcio_error(ioctxt, "get_pfn_type_batch failed");
+ if ( get_pfn_type_batch(xc_handle, dom, batch, pfn_type) ){
+ ERR("get_pfn_type_batch failed");
goto out;
}
for ( j = 0; j < batch; j++ ){
if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
- DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
+ DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
@@ -772,13 +780,14 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
}
- if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
- xcio_error(ioctxt, "Error when writing to state file (2)");
+ if (write(io_fd, &batch, sizeof(int)) != sizeof(int)) {
+ ERR("Error when writing to state file (2)");
goto out;
}
- if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
- xcio_error(ioctxt, "Error when writing to state file (3)");
+ if (write(io_fd, pfn_type, sizeof(unsigned long)*j) !=
+ sizeof(unsigned long)*j) {
+ ERR("Error when writing to state file (3)");
goto out;
}
@@ -786,7 +795,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
for( j = 0; j < batch; j++ ){
/* write out pages in batch */
if( (pfn_type[j] & LTAB_MASK) == XTAB){
- DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
+ DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
continue;
}
@@ -810,14 +819,14 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
{
/* I don't think this should ever happen */
- printf("FNI %d : [%08lx,%d] pte=%08lx, "
- "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
- j, pfn_type[j], k,
- page[k], mfn, live_mfn_to_pfn_table[mfn],
- (live_mfn_to_pfn_table[mfn]<nr_pfns)?
- live_pfn_to_mfn_table[
- live_mfn_to_pfn_table[mfn]] :
- 0xdeadbeef);
+ fprintf(stderr, "FNI %d : [%08lx,%d] pte=%08lx, "
+ "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
+ j, pfn_type[j], k,
+ page[k], mfn, live_mfn_to_pfn_table[mfn],
+ (live_mfn_to_pfn_table[mfn]<nr_pfns)?
+ live_pfn_to_mfn_table[
+ live_mfn_to_pfn_table[mfn]] :
+ 0xdeadbeef);
pfn = 0; /* be suspicious */
}
@@ -826,24 +835,25 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
page[k] |= pfn << PAGE_SHIFT;
#if 0
- printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
- "xpfn=%d\n",
- pfn_type[j]>>28,
- j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+ fprintf(stderr,
+ "L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
+ "xpfn=%d\n",
+ pfn_type[j]>>28,
+ j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
#endif
-
+
} /* end of page table rewrite for loop */
- if ( xcio_ratewrite(ioctxt, page, PAGE_SIZE) ){
- xcio_error(ioctxt, "Error when writing to state file (4)");
+ if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("Error when writing to state file (4)");
goto out;
}
} /* end of it's a PT page */ else { /* normal page */
- if ( xcio_ratewrite(ioctxt, region_base + (PAGE_SIZE*j),
- PAGE_SIZE) ){
- xcio_error(ioctxt, "Error when writing to state file (5)");
+ if (ratewrite(io_fd, region_base + (PAGE_SIZE*j),
+ PAGE_SIZE) != PAGE_SIZE) {
+ ERR("Error when writing to state file (5)");
goto out;
}
}
@@ -859,27 +869,26 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
total_sent += sent_this_iter;
- xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
+ DPRINTF("\r %d: sent %d, skipped %d, ",
iter, sent_this_iter, skip_this_iter );
if ( last_iter ) {
- print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
+ print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
- xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
+ DPRINTF("Total pages sent= %d (%.2fx)\n",
total_sent, ((float)total_sent)/nr_pfns );
- xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
+ DPRINTF("(of which %d were fixups)\n", needed_to_fix );
}
if (last_iter && debug){
int minusone = -1;
memset( to_send, 0xff, (nr_pfns+8)/8 );
debug = 0;
- printf("Entering debug resend-all mode\n");
+ fprintf(stderr, "Entering debug resend-all mode\n");
/* send "-1" to put receiver into debug mode */
- if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
- {
- xcio_error(ioctxt, "Error when writing to state file (6)");
+ if (write(io_fd, &minusone, sizeof(int)) != sizeof(int)) {
+ ERR("Error when writing to state file (6)");
goto out;
}
@@ -900,31 +909,28 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
DPRINTF("Start last iteration\n");
last_iter = 1;
- if ( suspend_and_state( xc_handle, ioctxt, &info, &ctxt) )
+ if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
{
- xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
- info.flags);
+ ERR("Domain appears not to have suspended");
goto out;
}
- xcio_info(ioctxt,
- "SUSPEND flags %08lx shinfo %08lx eip %08lx "
- "esi %08lx\n",info.flags,
- info.shared_info_frame,
- ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
+ DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n",
+ info.shared_info_frame,
+ ctxt.user_regs.eip, ctxt.user_regs.esi);
}
- if ( xc_shadow_control( xc_handle, domid,
+ if ( xc_shadow_control( xc_handle, dom,
DOM0_SHADOW_CONTROL_OP_CLEAN,
to_send, nr_pfns, &stats ) != nr_pfns )
{
- xcio_error(ioctxt, "Error flushing shadow PT");
+ ERR("Error flushing shadow PT");
goto out;
}
sent_last_iter = sent_this_iter;
- print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
+ print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
}
@@ -937,9 +943,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
rc = 0;
/* Zero terminate */
- if ( xcio_write(ioctxt, &rc, sizeof(int)) )
- {
- xcio_error(ioctxt, "Error when writing to state file (6)");
+ if (write(io_fd, &rc, sizeof(int)) != sizeof(int)) {
+ ERR("Error when writing to state file (6)");
goto out;
}
@@ -954,9 +959,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
j++;
}
- if ( xcio_write(ioctxt, &j, sizeof(unsigned int)) )
- {
- xcio_error(ioctxt, "Error when writing to state file (6a)");
+ if (write(io_fd, &j, sizeof(unsigned int)) != sizeof(unsigned int)) {
+ ERR("Error when writing to state file (6a)");
goto out;
}
@@ -969,9 +973,9 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
i++;
if ( j == 1024 || i == nr_pfns )
{
- if ( xcio_write(ioctxt, &pfntab, sizeof(unsigned long)*j) )
- {
- xcio_error(ioctxt, "Error when writing to state file (6b)");
+ if (write(io_fd, &pfntab, sizeof(unsigned long)*j) !=
+ sizeof(unsigned long)*j) {
+ ERR("Error when writing to state file (6b)");
goto out;
}
j = 0;
@@ -980,60 +984,70 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
}
/* Map the suspend-record MFN to pin it. The page must be owned by
- domid for this to succeed. */
- p_srec = xc_map_foreign_range(xc_handle, domid,
+ dom for this to succeed. */
+ p_srec = xc_map_foreign_range(xc_handle, dom,
sizeof(*p_srec), PROT_READ,
- ctxt.cpu_ctxt.esi);
+ ctxt.user_regs.esi);
if (!p_srec){
- xcio_error(ioctxt, "Couldn't map suspend record");
+ ERR("Couldn't map suspend record");
goto out;
}
if (nr_pfns != p_srec->nr_pfns )
{
- xcio_error(ioctxt, "Suspend record nr_pfns unexpected (%ld != %ld)",
+ ERR("Suspend record nr_pfns unexpected (%ld != %ld)",
p_srec->nr_pfns, nr_pfns);
goto out;
}
/* Canonicalise the suspend-record frame number. */
- if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
- xcio_error(ioctxt, "Suspend record is not in range of pseudophys map");
+ if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ){
+ ERR("Suspend record is not in range of pseudophys map");
goto out;
}
/* Canonicalise each GDT frame number. */
for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
- xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
+ ERR("GDT frame is not in range of pseudophys map");
goto out;
}
}
/* Canonicalise the page table base pointer. */
if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
- xcio_error(ioctxt, "PT base is not in range of pseudophys map");
+ ERR("PT base is not in range of pseudophys map");
goto out;
}
ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
PAGE_SHIFT;
- if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
- xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
- xcio_error(ioctxt, "Error when writing to state file (1)");
+ if (write(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
+ write(io_fd, live_shinfo, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("Error when writing to state file (1)");
goto out;
}
out:
- if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE);
- if ( p_srec ) munmap(p_srec, sizeof(*p_srec));
- if ( live_pfn_to_mfn_frame_list ) munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
- if ( live_pfn_to_mfn_table ) munmap(live_pfn_to_mfn_table, nr_pfns*4 );
- if ( live_mfn_to_pfn_table ) munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024 );
+ if(live_shinfo)
+ munmap(live_shinfo, PAGE_SIZE);
+
+ if(p_srec)
+ munmap(p_srec, sizeof(*p_srec));
+
+ if(live_pfn_to_mfn_frame_list)
+ munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
+
+ if(live_pfn_to_mfn_table)
+ munmap(live_pfn_to_mfn_table, nr_pfns*4);
+
+ if(live_mfn_to_pfn_table)
+ munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024);
+
+ if (pfn_type != NULL)
+ free(pfn_type);
- if ( pfn_type != NULL ) free(pfn_type);
DPRINTF("Save exit rc=%d\n",rc);
return !!rc;
-
}
diff --git a/tools/libxc/xc_load_bin.c b/tools/libxc/xc_load_bin.c
new file mode 100644
index 0000000000..965ccd08da
--- /dev/null
+++ b/tools/libxc/xc_load_bin.c
@@ -0,0 +1,299 @@
+/******************************************************************************
+ * xc_bin_load.c
+ *
+ * Based on xc_elf_load.c
+ *
+ * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
+ * present. The only requirement is that it must have a xen_bin_image table
+ * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
+ * Those familiar with the multiboot specification should recognize this, it's
+ * (almost) the same as the multiboot header.
+ * The layout of the xen_bin_image table is:
+ *
+ * Offset Type Name Note
+ * 0 u32 magic required
+ * 4 u32 flags required
+ * 8 u32 checksum required
+ * 12 u32 header_addr required
+ * 16 u32 load_addr required
+ * 20 u32 load_end_addr required
+ * 24 u32 bss_end_addr required
+ * 28 u32 entry_addr required
+ *
+ * - magic
+ * Magic number identifying the table. For images to be loaded by Xen 3, the
+ * magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
+ * - flags
+ * bit 0: indicates whether the image needs to be loaded on a page boundary
+ * bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ * that memory info should be passed to the image)
+ * bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ * that the bootloader should pass video mode info to the image)
+ * bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
+ * that the values in the fields header_addr - entry_addr are
+ * valid)
+ * All other bits should be set to 0.
+ * - checksum
+ * When added to "magic" and "flags", the resulting value should be 0.
+ * - header_addr
+ * Contains the virtual address corresponding to the beginning of the
+ * table - the memory location at which the magic value is supposed to be
+ * loaded. This field serves to synchronize the mapping between OS image
+ * offsets and virtual memory addresses.
+ * - load_addr
+ * Contains the virtual address of the beginning of the text segment. The
+ * offset in the OS image file at which to start loading is defined by the
+ * offset at which the table was found, minus (header addr - load addr).
+ * load addr must be less than or equal to header addr.
+ * - load_end_addr
+ * Contains the virtual address of the end of the data segment.
+ * (load_end_addr - load_addr) specifies how much data to load. This implies
+ * that the text and data segments must be consecutive in the OS image. If
+ * this field is zero, the domain builder assumes that the text and data
+ * segments occupy the whole OS image file.
+ * - bss_end_addr
+ * Contains the virtual address of the end of the bss segment. The domain
+ * builder initializes this area to zero, and reserves the memory it occupies
+ * to avoid placing boot modules and other data relevant to the loaded image
+ * in that area. If this field is zero, the domain builder assumes that no bss
+ * segment is present.
+ * - entry_addr
+ * The virtual address at which to start execution of the loaded image.
+ *
+ * Some of the field descriptions were copied from "The Multiboot
+ * Specification", Copyright 1995, 96 Bryan Ford <baford@cs.utah.edu>,
+ * Erich Stefan Boleyn <erich@uruk.org> Copyright 1999, 2000, 2001, 2002
+ * Free Software Foundation, Inc.
+ */
+
+#include "xc_private.h"
+#include <stdlib.h>
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+struct xen_bin_image_table
+{
+ unsigned long magic;
+ unsigned long flags;
+ unsigned long checksum;
+ unsigned long header_addr;
+ unsigned long load_addr;
+ unsigned long load_end_addr;
+ unsigned long bss_end_addr;
+ unsigned long entry_addr;
+};
+
+#define XEN_REACTOS_MAGIC3 0x336ec578
+
+#define XEN_REACTOS_FLAG_ALIGN4K 0x00000001
+#define XEN_REACTOS_FLAG_NEEDMEMINFO 0x00000002
+#define XEN_REACTOS_FLAG_NEEDVIDINFO 0x00000004
+#define XEN_REACTOS_FLAG_ADDRSVALID 0x00010000
+
+/* Flags we test for */
+#define FLAGS_MASK ((~ 0) & (~ XEN_REACTOS_FLAG_ALIGN4K))
+#define FLAGS_REQUIRED XEN_REACTOS_FLAG_ADDRSVALID
+
+static struct xen_bin_image_table *
+findtable(char *image, unsigned long image_size);
+static int
+parsebinimage(
+ char *image, unsigned long image_size, struct domain_setup_info *dsi);
+static int
+loadbinimage(
+ char *image, unsigned long image_size, int xch, u32 dom,
+ unsigned long *parray, struct domain_setup_info *dsi);
+
+int probe_bin(char *image,
+ unsigned long image_size,
+ struct load_funcs *load_funcs)
+{
+ if ( NULL == findtable(image, image_size) )
+ {
+ return -EINVAL;
+ }
+
+ load_funcs->parseimage = parsebinimage;
+ load_funcs->loadimage = loadbinimage;
+
+ return 0;
+}
+
+static struct xen_bin_image_table *
+findtable(char *image, unsigned long image_size)
+{
+ struct xen_bin_image_table *table;
+ unsigned long *probe_ptr;
+ unsigned probe_index;
+ unsigned probe_count;
+
+ /* Don't go outside the image */
+ if ( image_size < sizeof(struct xen_bin_image_table) )
+ {
+ return NULL;
+ }
+ probe_count = image_size;
+ /* Restrict to first 8k */
+ if ( 8192 < probe_count )
+ {
+ probe_count = 8192;
+ }
+ probe_count = (probe_count - sizeof(struct xen_bin_image_table)) /
+ sizeof(unsigned long);
+
+ /* Search for the magic header */
+ probe_ptr = (unsigned long *) image;
+ table = NULL;
+ for ( probe_index = 0; probe_index < probe_count; probe_index++ )
+ {
+ if ( XEN_REACTOS_MAGIC3 == *probe_ptr )
+ {
+ table = (struct xen_bin_image_table *) probe_ptr;
+ /* Checksum correct? */
+ if ( 0 == table->magic + table->flags + table->checksum )
+ {
+ return table;
+ }
+ }
+ probe_ptr++;
+ }
+
+ return NULL;
+}
+
+static int parsebinimage(char *image,
+ unsigned long image_size,
+ struct domain_setup_info *dsi)
+{
+ struct xen_bin_image_table *image_info;
+ unsigned long start_addr;
+ unsigned long end_addr;
+
+ image_info = findtable(image, image_size);
+ if ( NULL == image_info )
+ {
+ ERROR("Image does not have a valid xen_bin_image_table table.");
+ return -EINVAL;
+ }
+
+ /* Check the flags */
+ if ( FLAGS_REQUIRED != (image_info->flags & FLAGS_MASK) )
+ {
+ ERROR("xen_bin_image_table flags required 0x%08x found 0x%08lx",
+ FLAGS_REQUIRED, image_info->flags & FLAGS_MASK);
+ return -EINVAL;
+ }
+
+ /* Sanity check on the addresses */
+ if ( image_info->header_addr < image_info->load_addr ||
+ ((char *) image_info - image) <
+ (image_info->header_addr - image_info->load_addr) )
+ {
+ ERROR("Invalid header_addr.");
+ return -EINVAL;
+ }
+ start_addr = image_info->header_addr - ((char *) image_info - image);
+ if ( 0 != image_info->load_end_addr &&
+ ( image_info->load_end_addr < image_info->load_end_addr ||
+ start_addr + image_size < image_info->load_end_addr ) )
+ {
+ ERROR("Invalid load_end_addr");
+ return -EINVAL;
+ }
+ end_addr = (0 == image_info->load_end_addr ? start_addr + image_size :
+ image_info->load_end_addr);
+ if ( 0 != image_info->bss_end_addr &&
+ image_info->bss_end_addr < end_addr )
+ {
+ ERROR("Invalid bss_end_addr");
+ return -EINVAL;
+ }
+
+ dsi->v_start = image_info->load_addr;
+ if ( 0 != image_info->bss_end_addr )
+ {
+ dsi->v_end = image_info->bss_end_addr;
+ }
+ else if ( 0 != image_info->load_end_addr )
+ {
+ dsi->v_end = image_info->load_end_addr;
+ }
+ else
+ {
+ dsi->v_end = image_info->load_addr + image_size -
+ (((char *) image_info - image) -
+ (image_info->header_addr - image_info->load_addr));
+ }
+ dsi->v_kernstart = dsi->v_start;
+ dsi->v_kernend = dsi->v_end;
+ dsi->v_kernentry = image_info->entry_addr;
+
+ return 0;
+}
+
+static int
+loadbinimage(
+ char *image, unsigned long image_size, int xch, u32 dom,
+ unsigned long *parray, struct domain_setup_info *dsi)
+{
+ unsigned long size;
+ char *va;
+ unsigned long done, chunksz;
+ struct xen_bin_image_table *image_info;
+
+ image_info = findtable(image, image_size);
+ if ( NULL == image_info )
+ {
+ ERROR("Image does not have a valid xen_bin_image_table table.");
+ return -EINVAL;
+ }
+
+ /* Determine image size */
+ if ( 0 == image_info->load_end_addr )
+ {
+ size = image_size - (((char *) image_info - image) -
+ (image_info->header_addr -
+ image_info->load_addr));
+ }
+ else
+ {
+ size = image_info->load_end_addr - image_info->load_addr;
+ }
+
+ /* It's possible that we need to skip the first part of the image */
+ image += ((char *)image_info - image) -
+ (image_info->header_addr - image_info->load_addr);
+
+ for ( done = 0; done < size; done += chunksz )
+ {
+ va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE, parray[done>>PAGE_SHIFT]);
+ chunksz = size - done;
+ if ( chunksz > PAGE_SIZE )
+ chunksz = PAGE_SIZE;
+ memcpy(va, image + done, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+
+ if ( 0 != image_info->bss_end_addr &&
+ image_info->load_addr + size < image_info->bss_end_addr )
+ {
+ size = image_info->bss_end_addr - image_info->load_addr;
+ }
+ for ( ; done < size; done += chunksz )
+ {
+ va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE, parray[done>>PAGE_SHIFT]);
+ chunksz = size - done;
+ if ( chunksz > (PAGE_SIZE - (done & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (done & (PAGE_SIZE-1));
+ memset(va + (done & (PAGE_SIZE-1)), 0, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+
+ return 0;
+}
diff --git a/tools/libxc/xc_load_elf.c b/tools/libxc/xc_load_elf.c
new file mode 100644
index 0000000000..bc46636d75
--- /dev/null
+++ b/tools/libxc/xc_load_elf.c
@@ -0,0 +1,310 @@
+/******************************************************************************
+ * xc_elf_load.c
+ */
+
+#include "xc_private.h"
+
+#if defined(__i386__)
+#define ELFSIZE 32
+#endif
+#if defined(__x86_64__)
+#define ELFSIZE 64
+#endif
+
+#include "xc_elf.h"
+#include <stdlib.h>
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+static int
+parseelfimage(
+ char *image, unsigned long image_size, struct domain_setup_info *dsi);
+static int
+loadelfimage(
+ char *image, unsigned long image_size, int xch, u32 dom,
+ unsigned long *parray, struct domain_setup_info *dsi);
+static int
+loadelfsymtab(
+ char *image, int xch, u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi);
+
+int probe_elf(char *image,
+ unsigned long image_size,
+ struct load_funcs *load_funcs)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
+
+ if ( !IS_ELF(*ehdr) )
+ {
+ return -EINVAL;
+ }
+
+ load_funcs->parseimage = parseelfimage;
+ load_funcs->loadimage = loadelfimage;
+
+ return 0;
+}
+
+static inline int is_loadable_phdr(Elf_Phdr *phdr)
+{
+ return ((phdr->p_type == PT_LOAD) &&
+ ((phdr->p_flags & (PF_W|PF_X)) != 0));
+}
+
+static int parseelfimage(char *image,
+ unsigned long elfsize,
+ struct domain_setup_info *dsi)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
+ Elf_Phdr *phdr;
+ Elf_Shdr *shdr;
+ unsigned long kernstart = ~0UL, kernend=0UL;
+ char *shstrtab, *guestinfo=NULL, *p;
+ int h;
+
+ if ( !IS_ELF(*ehdr) )
+ {
+ ERROR("Kernel image does not have an ELF header.");
+ return -EINVAL;
+ }
+
+ if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
+ {
+ ERROR("ELF program headers extend beyond end of image.");
+ return -EINVAL;
+ }
+
+ if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
+ {
+ ERROR("ELF section headers extend beyond end of image.");
+ return -EINVAL;
+ }
+
+ /* Find the section-header strings table. */
+ if ( ehdr->e_shstrndx == SHN_UNDEF )
+ {
+ ERROR("ELF image has no section-header strings table (shstrtab).");
+ return -EINVAL;
+ }
+ shdr = (Elf_Shdr *)(image + ehdr->e_shoff +
+ (ehdr->e_shstrndx*ehdr->e_shentsize));
+ shstrtab = image + shdr->sh_offset;
+
+ /* Find the special '__xen_guest' section and check its contents. */
+ for ( h = 0; h < ehdr->e_shnum; h++ )
+ {
+ shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize));
+ if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 )
+ continue;
+
+ guestinfo = image + shdr->sh_offset;
+
+ if ( (strstr(guestinfo, "LOADER=generic") == NULL) &&
+ (strstr(guestinfo, "GUEST_OS=linux") == NULL) )
+ {
+ ERROR("Will only load images built for the generic loader "
+ "or Linux images");
+ ERROR("Actually saw: '%s'", guestinfo);
+ return -EINVAL;
+ }
+
+ if ( (strstr(guestinfo, "XEN_VER=3.0") == NULL) )
+ {
+ ERROR("Will only load images built for Xen v3.0");
+ ERROR("Actually saw: '%s'", guestinfo);
+ return -EINVAL;
+ }
+
+ break;
+ }
+ if ( guestinfo == NULL )
+ {
+ ERROR("Not a Xen-ELF image: '__xen_guest' section not found.");
+ return -EINVAL;
+ }
+
+ for ( h = 0; h < ehdr->e_phnum; h++ )
+ {
+ phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
+ if ( !is_loadable_phdr(phdr) )
+ continue;
+ if ( phdr->p_paddr < kernstart )
+ kernstart = phdr->p_paddr;
+ if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
+ kernend = phdr->p_paddr + phdr->p_memsz;
+ }
+
+ if ( (kernstart > kernend) ||
+ (ehdr->e_entry < kernstart) ||
+ (ehdr->e_entry > kernend) )
+ {
+ ERROR("Malformed ELF image.");
+ return -EINVAL;
+ }
+
+ dsi->v_start = kernstart;
+ if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
+ dsi->v_start = strtoul(p+10, &p, 0);
+
+ if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
+ dsi->load_symtab = 1;
+
+ dsi->v_kernstart = kernstart;
+ dsi->v_kernend = kernend;
+ dsi->v_kernentry = ehdr->e_entry;
+ dsi->v_end = dsi->v_kernend;
+
+ loadelfsymtab(image, 0, 0, NULL, dsi);
+
+ return 0;
+}
+
+static int
+loadelfimage(
+ char *image, unsigned long elfsize, int xch, u32 dom,
+ unsigned long *parray, struct domain_setup_info *dsi)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
+ Elf_Phdr *phdr;
+ int h;
+
+ char *va;
+ unsigned long pa, done, chunksz;
+
+ for ( h = 0; h < ehdr->e_phnum; h++ )
+ {
+ phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
+ if ( !is_loadable_phdr(phdr) )
+ continue;
+
+ for ( done = 0; done < phdr->p_filesz; done += chunksz )
+ {
+ pa = (phdr->p_paddr + done) - dsi->v_start;
+ va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+ chunksz = phdr->p_filesz - done;
+ if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+ memcpy(va + (pa & (PAGE_SIZE-1)),
+ image + phdr->p_offset + done, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+
+ for ( ; done < phdr->p_memsz; done += chunksz )
+ {
+ pa = (phdr->p_paddr + done) - dsi->v_start;
+ va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+ chunksz = phdr->p_memsz - done;
+ if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+ memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+ }
+
+ loadelfsymtab(image, xch, dom, parray, dsi);
+
+ return 0;
+}
+
+#define ELFROUND (ELFSIZE / 8)
+
+static int
+loadelfsymtab(
+ char *image, int xch, u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr;
+ Elf_Shdr *shdr;
+ unsigned long maxva, symva;
+ char *p;
+ int h, i;
+
+ if ( !dsi->load_symtab )
+ return 0;
+
+ p = malloc(sizeof(int) + sizeof(Elf_Ehdr) +
+ ehdr->e_shnum * sizeof(Elf_Shdr));
+ if (p == NULL)
+ return 0;
+
+ maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
+ symva = maxva;
+ maxva += sizeof(int);
+ dsi->symtab_addr = maxva;
+ dsi->symtab_len = 0;
+ maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
+ maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
+
+ shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
+ memcpy(shdr, image + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
+
+ for ( h = 0; h < ehdr->e_shnum; h++ )
+ {
+ if ( shdr[h].sh_type == SHT_STRTAB )
+ {
+ /* Look for a strtab @i linked to symtab @h. */
+ for ( i = 0; i < ehdr->e_shnum; i++ )
+ if ( (shdr[i].sh_type == SHT_SYMTAB) &&
+ (shdr[i].sh_link == h) )
+ break;
+ /* Skip symtab @h if we found no corresponding strtab @i. */
+ if ( i == ehdr->e_shnum )
+ {
+ shdr[h].sh_offset = 0;
+ continue;
+ }
+ }
+
+ if ( (shdr[h].sh_type == SHT_STRTAB) ||
+ (shdr[h].sh_type == SHT_SYMTAB) )
+ {
+ if ( parray != NULL )
+ xc_map_memcpy(maxva, image + shdr[h].sh_offset, shdr[h].sh_size,
+ xch, dom, parray, dsi->v_start);
+
+ /* Mangled to be based on ELF header location. */
+ shdr[h].sh_offset = maxva - dsi->symtab_addr;
+
+ dsi->symtab_len += shdr[h].sh_size;
+ maxva += shdr[h].sh_size;
+ maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
+ }
+
+ shdr[h].sh_name = 0; /* Name is NULL. */
+ }
+
+ if ( dsi->symtab_len == 0 )
+ {
+ dsi->symtab_addr = 0;
+ goto out;
+ }
+
+ if ( parray != NULL )
+ {
+ *(int *)p = maxva - dsi->symtab_addr;
+ sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
+ memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
+ sym_ehdr->e_phoff = 0;
+ sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
+ sym_ehdr->e_phentsize = 0;
+ sym_ehdr->e_phnum = 0;
+ sym_ehdr->e_shstrndx = SHN_UNDEF;
+
+ /* Copy total length, crafted ELF header and section header table */
+ xc_map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) +
+ ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray,
+ dsi->v_start);
+ }
+
+ dsi->symtab_len = maxva - dsi->symtab_addr;
+ dsi->v_end = round_pgup(maxva);
+
+ out:
+ if ( p != NULL )
+ free(p);
+
+ return 0;
+}
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 0efd1f1972..40291bc3ef 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -19,34 +19,35 @@ int xc_interface_close(int xc_handle)
return close(xc_handle);
}
-
-#define CONSOLE_RING_CLEAR 1
-
int xc_readconsolering(int xc_handle,
- char *str,
- unsigned int max_chars,
+ char **pbuffer,
+ unsigned int *pnr_chars,
int clear)
{
int ret;
dom0_op_t op;
+ char *buffer = *pbuffer;
+ unsigned int nr_chars = *pnr_chars;
op.cmd = DOM0_READCONSOLE;
- op.u.readconsole.str = (unsigned long)str;
- op.u.readconsole.count = max_chars;
- op.u.readconsole.cmd = clear ? CONSOLE_RING_CLEAR : 0;
+ op.u.readconsole.buffer = buffer;
+ op.u.readconsole.count = nr_chars;
+ op.u.readconsole.clear = clear;
- if ( (ret = mlock(str, max_chars)) != 0 )
+ if ( (ret = mlock(buffer, nr_chars)) != 0 )
return ret;
- if ( (ret = do_dom0_op(xc_handle, &op)) >= 0 )
- str[ret] = '\0';
+ if ( (ret = do_dom0_op(xc_handle, &op)) == 0 )
+ {
+ *pbuffer = op.u.readconsole.buffer;
+ *pnr_chars = op.u.readconsole.count;
+ }
- (void)munlock(str, max_chars);
+ (void)munlock(buffer, nr_chars);
return ret;
}
-
int xc_physinfo(int xc_handle,
xc_physinfo_t *put_info)
{
@@ -64,7 +65,6 @@ int xc_physinfo(int xc_handle,
return 0;
}
-
int xc_sched_id(int xc_handle,
int *sched_id)
{
@@ -97,3 +97,36 @@ int xc_perfc_control(int xc_handle,
return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
}
+
+long long xc_msr_read(int xc_handle, int cpu_mask, int msr)
+{
+ int rc;
+ dom0_op_t op;
+
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 0;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
+}
+
+int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
+ unsigned int high)
+{
+ int rc;
+ dom0_op_t op;
+
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 1;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+ op.u.msr.in1 = low;
+ op.u.msr.in2 = high;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ return rc;
+}
diff --git a/tools/libxc/xc_physdev.c b/tools/libxc/xc_physdev.c
index ba5dd9ccdc..94fe34dc52 100644
--- a/tools/libxc/xc_physdev.c
+++ b/tools/libxc/xc_physdev.c
@@ -16,14 +16,6 @@ int xc_physdev_pci_access_modify(int xc_handle,
int func,
int enable)
{
- dom0_op_t op;
-
- op.cmd = DOM0_PCIDEV_ACCESS;
- op.u.pcidev_access.domain = (domid_t)domid;
- op.u.pcidev_access.bus = bus;
- op.u.pcidev_access.dev = dev;
- op.u.pcidev_access.func = func;
- op.u.pcidev_access.enable = enable;
-
- return do_dom0_op(xc_handle, &op);
+ errno = ENOSYS;
+ return -1;
}
diff --git a/tools/libxc/xc_plan9_build.c b/tools/libxc/xc_plan9_build.c
index 3655a26f3e..7f697d2115 100755..100644
--- a/tools/libxc/xc_plan9_build.c
+++ b/tools/libxc/xc_plan9_build.c
@@ -58,33 +58,6 @@ memcpy_toguest(int xc_handle, u32 dom, void *v, int size,
return ret;
}
-/* this is a function which can go away. It dumps a hunk of
- * guest pages to a file (/tmp/dumpit); handy for debugging
- * your image builder.
- * Xen guys, nuke this if you wish.
- */
-void
-dumpit(int xc_handle, u32 dom,
- int start_page, int tot, unsigned long *page_array)
-{
- int i, ofd;
- unsigned char *vaddr;
-
- ofd = open("/tmp/dumpit", O_RDWR);
- for (i = start_page; i < tot; i++) {
- vaddr = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- page_array[i]);
- if (!vaddr) {
- fprintf(stderr, "Page %d\n", i);
- perror("shit");
- read(0, &i, 1);
- return;
- }
- write(ofd, vaddr, 4096);
- munmap(vaddr, PAGE_SIZE);
- }
-}
int
blah(char *b)
{
@@ -108,14 +81,14 @@ void
plan9header(Exec * header)
{
/* header is big-endian */
- swabby(&header->magic, "magic");
- swabby(&header->text, "text");
- swabby(&header->data, "data");
- swabby(&header->bss, "bss");
- swabby(&header->syms, "syms");
- swabby(&header->entry, "entry");
- swabby(&header->spsz, "spsz");
- swabby(&header->pcsz, "pcsz");
+ swabby((unsigned long *)&header->magic, "magic");
+ swabby((unsigned long *)&header->text, "text");
+ swabby((unsigned long *)&header->data, "data");
+ swabby((unsigned long *)&header->bss, "bss");
+ swabby((unsigned long *)&header->syms, "syms");
+ swabby((unsigned long *)&header->entry, "entry");
+ swabby((unsigned long *)&header->spsz, "spsz");
+ swabby((unsigned long *)&header->pcsz, "pcsz");
}
@@ -133,60 +106,14 @@ static int
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-static long
-get_tot_pages(int xc_handle, u32 domid)
-{
- dom0_op_t op;
- op.cmd = DOM0_GETDOMAININFO;
- op.u.getdomaininfo.domain = (domid_t) domid;
- op.u.getdomaininfo.ctxt = NULL;
- return (do_dom0_op(xc_handle, &op) < 0) ?
- -1 : op.u.getdomaininfo.tot_pages;
-}
-
static int
-get_pfn_list(int xc_handle,
- u32 domid, unsigned long *pfn_buf, unsigned long max_pfns)
-{
- dom0_op_t op;
- int ret;
- op.cmd = DOM0_GETMEMLIST;
- op.u.getmemlist.domain = (domid_t) domid;
- op.u.getmemlist.max_pfns = max_pfns;
- op.u.getmemlist.buffer = pfn_buf;
-
- if (mlock(pfn_buf, max_pfns * sizeof (unsigned long)) != 0)
- return -1;
-
- ret = do_dom0_op(xc_handle, &op);
-
- (void) munlock(pfn_buf, max_pfns * sizeof (unsigned long));
-
-#if 0
-#ifdef DEBUG
- DPRINTF(("Ret for get_pfn_list is %d\n", ret));
- if (ret >= 0) {
- int i, j;
- for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
- fprintf(stderr, "0x%x: ", i);
- for (j = 0; j < 16; j++)
- fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
- fprintf(stderr, "\n");
- }
- }
-#endif
-#endif
- return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
-}
-
-static int
-setup_guestos(int xc_handle,
+setup_guest(int xc_handle,
u32 dom,
gzFile kernel_gfd,
unsigned long tot_pages,
unsigned long *virt_startinfo_addr,
unsigned long *virt_load_addr,
- full_execution_context_t * ctxt,
+ vcpu_guest_context_t * ctxt,
const char *cmdline,
unsigned long shared_info_frame,
unsigned int control_evtchn,
@@ -205,8 +132,8 @@ setup_guestos(int xc_handle,
unsigned long ksize;
mmu_t *mmu = NULL;
int i;
- unsigned long first_page_after_kernel,
- first_data_page,
+ unsigned long first_page_after_kernel = 0,
+ first_data_page = 0,
page_array_page;
unsigned long cpu0pdb, cpu0pte, cpu0ptelast;
unsigned long /*last_pfn, */ tot_pte_pages;
@@ -217,7 +144,7 @@ setup_guestos(int xc_handle,
goto error_out;
}
- if (get_pfn_list(xc_handle, dom, cpage_array, tot_pages) != tot_pages) {
+ if (xc_get_pfn_list(xc_handle, dom, cpage_array, tot_pages) != tot_pages) {
PERROR("Could not get the page frame list");
goto error_out;
}
@@ -387,8 +314,7 @@ setup_guestos(int xc_handle,
* Pin down l2tab addr as page dir page - causes hypervisor to provide
* correct protection for the page
*/
- if (add_mmu_update(xc_handle, mmu,
- l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE))
+ if (pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom))
goto error_out;
for (count = 0; count < tot_pages; count++) {
@@ -431,8 +357,8 @@ setup_guestos(int xc_handle,
start_info->flags = 0;
DPRINTF((" control event channel is %d\n", control_evtchn));
start_info->domain_controller_evtchn = control_evtchn;
- strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
- start_info->cmd_line[MAX_CMDLINE - 1] = '\0';
+ strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
+ start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
munmap(start_info, PAGE_SIZE);
DPRINTF(("done setting up start_info\n"));
@@ -480,19 +406,19 @@ xc_plan9_build(int xc_handle,
unsigned int control_evtchn, unsigned long flags)
{
dom0_op_t launch_op, op;
- unsigned long load_addr;
+ unsigned long load_addr = 0;
long tot_pages;
int kernel_fd = -1;
gzFile kernel_gfd = NULL;
int rc, i;
- full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
+ vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
unsigned long virt_startinfo_addr;
- if ((tot_pages = get_tot_pages(xc_handle, domid)) < 0) {
+ if ((tot_pages = xc_get_tot_pages(xc_handle, domid)) < 0) {
PERROR("Could not find total pages for domain");
return 1;
}
- DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
+ DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
kernel_fd = open(image_name, O_RDONLY);
if (kernel_fd < 0) {
@@ -506,30 +432,35 @@ xc_plan9_build(int xc_handle,
return 1;
}
- DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
+ DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
if (mlock(&st_ctxt, sizeof (st_ctxt))) {
- PERROR("Unable to mlock ctxt");
+ PERROR("xc_plan9_build: ctxt mlock failed");
return 1;
}
op.cmd = DOM0_GETDOMAININFO;
op.u.getdomaininfo.domain = (domid_t) domid;
- op.u.getdomaininfo.ctxt = ctxt;
if ((do_dom0_op(xc_handle, &op) < 0) ||
((u32) op.u.getdomaininfo.domain != domid)) {
PERROR("Could not get info on domain");
goto error_out;
}
- DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
+ DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
+
+ if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ {
+ PERROR("Could not get vcpu context");
+ goto error_out;
+ }
if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)
- || (op.u.getdomaininfo.ctxt->pt_base != 0)) {
+ || (ctxt->pt_base != 0)) {
ERROR("Domain is already constructed");
goto error_out;
}
- DPRINTF(("get_tot_pages returns %ld pages\n", tot_pages));
- if (setup_guestos(xc_handle, domid, kernel_gfd, tot_pages,
+ DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages));
+ if (setup_guest(xc_handle, domid, kernel_gfd, tot_pages,
&virt_startinfo_addr,
&load_addr, &st_ctxt, cmdline,
op.u.getdomaininfo.shared_info_frame,
@@ -548,37 +479,36 @@ xc_plan9_build(int xc_handle,
/*
* Initial register values:
- * DS,ES,FS,GS = FLAT_GUESTOS_DS
- * CS:EIP = FLAT_GUESTOS_CS:start_pc
- * SS:ESP = FLAT_GUESTOS_DS:start_stack
+ * DS,ES,FS,GS = FLAT_KERNEL_DS
+ * CS:EIP = FLAT_KERNEL_CS:start_pc
+ * SS:ESP = FLAT_KERNEL_DS:start_stack
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
* EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
*/
- ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
- ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
- ctxt->cpu_ctxt.eip = load_addr;
- ctxt->cpu_ctxt.eip = 0x80100020;
+ ctxt->user_regs.ds = FLAT_KERNEL_DS;
+ ctxt->user_regs.es = FLAT_KERNEL_DS;
+ ctxt->user_regs.fs = FLAT_KERNEL_DS;
+ ctxt->user_regs.gs = FLAT_KERNEL_DS;
+ ctxt->user_regs.ss = FLAT_KERNEL_DS;
+ ctxt->user_regs.cs = FLAT_KERNEL_CS;
+ ctxt->user_regs.eip = load_addr;
+ ctxt->user_regs.eip = 0x80100020;
/* put stack at top of second page */
- ctxt->cpu_ctxt.esp = 0x80000000 + (STACKPAGE << PAGE_SHIFT);
+ ctxt->user_regs.esp = 0x80000000 + (STACKPAGE << PAGE_SHIFT);
/* why is this set? */
- ctxt->cpu_ctxt.esi = ctxt->cpu_ctxt.esp;
- ctxt->cpu_ctxt.eflags = (1 << 9) | (1 << 2);
+ ctxt->user_regs.esi = ctxt->user_regs.esp;
+ ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
/* FPU is set up to default initial state. */
- memset(ctxt->fpu_ctxt, 0, sizeof (ctxt->fpu_ctxt));
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
/* Virtual IDT is empty at start-of-day. */
for (i = 0; i < 256; i++) {
ctxt->trap_ctxt[i].vector = i;
- ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
+ ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
}
- ctxt->fast_trap_idx = 0;
/* No LDT. */
ctxt->ldt_ents = 0;
@@ -588,24 +518,31 @@ xc_plan9_build(int xc_handle,
/* Ring 1 stack is the initial stack. */
/* put stack at top of second page */
- ctxt->guestos_ss = FLAT_GUESTOS_DS;
- ctxt->guestos_esp = ctxt->cpu_ctxt.esp;
+ ctxt->kernel_ss = FLAT_KERNEL_DS;
+ ctxt->kernel_sp = ctxt->user_regs.esp;
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof (ctxt->debugreg));
/* No callback handlers. */
- ctxt->event_callback_cs = FLAT_GUESTOS_CS;
- ctxt->event_callback_eip = 0;
- ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
+#if defined(__i386__)
+ ctxt->event_callback_cs = FLAT_KERNEL_CS;
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_eip = 0;
+ ctxt->syscall_callback_eip = 0;
+#endif
memset(&launch_op, 0, sizeof (launch_op));
- launch_op.u.builddomain.domain = (domid_t) domid;
- // launch_op.u.builddomain.num_vifs = 1;
- launch_op.u.builddomain.ctxt = ctxt;
- launch_op.cmd = DOM0_BUILDDOMAIN;
+ launch_op.u.setdomaininfo.domain = (domid_t) domid;
+ launch_op.u.setdomaininfo.vcpu = 0;
+ // launch_op.u.setdomaininfo.num_vifs = 1;
+ launch_op.u.setdomaininfo.ctxt = ctxt;
+ launch_op.cmd = DOM0_SETDOMAININFO;
rc = do_dom0_op(xc_handle, &launch_op);
fprintf(stderr, "RC is %d\n", rc);
diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index e4316e6994..87e5ecd1f3 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -4,6 +4,7 @@
* Helper functions for the rest of the library.
*/
+#include <zlib.h>
#include "xc_private.h"
void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
@@ -12,18 +13,18 @@ void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot,
privcmd_mmapbatch_t ioctlx;
void *addr;
addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0);
- if ( addr != NULL )
+ if ( addr == MAP_FAILED )
+ return NULL;
+
+ ioctlx.num=num;
+ ioctlx.dom=dom;
+ ioctlx.addr=(unsigned long)addr;
+ ioctlx.arr=arr;
+ if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 )
{
- ioctlx.num=num;
- ioctlx.dom=dom;
- ioctlx.addr=(unsigned long)addr;
- ioctlx.arr=arr;
- if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 )
- {
- perror("XXXXXXXX");
- munmap(addr, num*PAGE_SIZE);
- return 0;
- }
+ perror("XXXXXXXX");
+ munmap(addr, num*PAGE_SIZE);
+ return NULL;
}
return addr;
@@ -39,19 +40,19 @@ void *xc_map_foreign_range(int xc_handle, u32 dom,
privcmd_mmap_entry_t entry;
void *addr;
addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
- if ( addr != NULL )
+ if ( addr == MAP_FAILED )
+ return NULL;
+
+ ioctlx.num=1;
+ ioctlx.dom=dom;
+ ioctlx.entry=&entry;
+ entry.va=(unsigned long) addr;
+ entry.mfn=mfn;
+ entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
+ if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 )
{
- ioctlx.num=1;
- ioctlx.dom=dom;
- ioctlx.entry=&entry;
- entry.va=(unsigned long) addr;
- entry.mfn=mfn;
- entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
- if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 )
- {
- munmap(addr, size);
- return 0;
- }
+ munmap(addr, size);
+ return NULL;
}
return addr;
}
@@ -91,28 +92,37 @@ unsigned int get_pfn_type(int xc_handle,
/*******************/
-#define FIRST_MMU_UPDATE 1
+int pin_table(
+ int xc_handle, unsigned int type, unsigned long mfn, domid_t dom)
+{
+ struct mmuext_op op;
+
+ op.cmd = type;
+ op.mfn = mfn;
+
+ if ( do_mmuext_op(xc_handle, &op, 1, dom) < 0 )
+ return 1;
+
+ return 0;
+}
static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
{
int err = 0;
privcmd_hypercall_t hypercall;
- if ( mmu->idx == FIRST_MMU_UPDATE )
+ if ( mmu->idx == 0 )
return 0;
- mmu->updates[0].ptr = MMU_EXTENDED_COMMAND;
- mmu->updates[0].val = MMUEXT_SET_FOREIGNDOM;
- mmu->updates[0].val |= (unsigned long)mmu->subject << 16;
-
hypercall.op = __HYPERVISOR_mmu_update;
hypercall.arg[0] = (unsigned long)mmu->updates;
hypercall.arg[1] = (unsigned long)mmu->idx;
hypercall.arg[2] = 0;
+ hypercall.arg[3] = mmu->subject;
if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
{
- PERROR("Could not lock pagetable update array");
+ PERROR("flush_mmu_updates: mmu updates mlock failed");
err = 1;
goto out;
}
@@ -123,7 +133,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
err = 1;
}
- mmu->idx = FIRST_MMU_UPDATE;
+ mmu->idx = 0;
(void)munlock(mmu->updates, sizeof(mmu->updates));
@@ -136,7 +146,7 @@ mmu_t *init_mmu_updates(int xc_handle, domid_t dom)
mmu_t *mmu = malloc(sizeof(mmu_t));
if ( mmu == NULL )
return mmu;
- mmu->idx = FIRST_MMU_UPDATE;
+ mmu->idx = 0;
mmu->subject = dom;
return mmu;
}
@@ -159,20 +169,20 @@ int finish_mmu_updates(int xc_handle, mmu_t *mmu)
}
-long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid )
+long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu )
{
dom0_op_t op;
- op.cmd = DOM0_GETDOMAININFO;
- op.u.getdomaininfo.domain = (domid_t)domid;
- op.u.getdomaininfo.ctxt = NULL;
- if ( (do_dom0_op(xc_handle, &op) < 0) ||
- ((u16)op.u.getdomaininfo.domain != domid) )
+ op.cmd = DOM0_GETVCPUCONTEXT;
+ op.u.getvcpucontext.domain = (domid_t)domid;
+ op.u.getvcpucontext.vcpu = (u16)vcpu;
+ op.u.getvcpucontext.ctxt = NULL;
+ if ( (do_dom0_op(xc_handle, &op) < 0) )
{
PERROR("Could not get info on domain");
return -1;
}
- return op.u.getdomaininfo.cpu_time;
+ return op.u.getvcpucontext.cpu_time;
}
@@ -201,5 +211,160 @@ unsigned long xc_get_m2p_start_mfn ( int xc_handle )
return mfn;
}
+int xc_get_pfn_list(int xc_handle,
+ u32 domid,
+ unsigned long *pfn_buf,
+ unsigned long max_pfns)
+{
+ dom0_op_t op;
+ int ret;
+ op.cmd = DOM0_GETMEMLIST;
+ op.u.getmemlist.domain = (domid_t)domid;
+ op.u.getmemlist.max_pfns = max_pfns;
+ op.u.getmemlist.buffer = pfn_buf;
+
+
+ if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 )
+ {
+ PERROR("xc_get_pfn_list: pfn_buf mlock failed");
+ return -1;
+ }
+
+ ret = do_dom0_op(xc_handle, &op);
+
+ (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long));
+
+#if 0
+#ifdef DEBUG
+ DPRINTF(("Ret for xc_get_pfn_list is %d\n", ret));
+ if (ret >= 0) {
+ int i, j;
+ for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
+ fprintf(stderr, "0x%x: ", i);
+ for (j = 0; j < 16; j++)
+ fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
+ fprintf(stderr, "\n");
+ }
+ }
+#endif
+#endif
+
+ return (ret < 0) ? -1 : op.u.getmemlist.num_pfns;
+}
+long xc_get_tot_pages(int xc_handle, u32 domid)
+{
+ dom0_op_t op;
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ return (do_dom0_op(xc_handle, &op) < 0) ?
+ -1 : op.u.getdomaininfo.tot_pages;
+}
+
+int xc_copy_to_domain_page(int xc_handle,
+ u32 domid,
+ unsigned long dst_pfn,
+ void *src_page)
+{
+ void *vaddr = xc_map_foreign_range(
+ xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
+ if ( vaddr == NULL )
+ return -1;
+ memcpy(vaddr, src_page, PAGE_SIZE);
+ munmap(vaddr, PAGE_SIZE);
+ return 0;
+}
+
+unsigned long xc_get_filesz(int fd)
+{
+ u16 sig;
+ u32 _sz = 0;
+ unsigned long sz;
+
+ lseek(fd, 0, SEEK_SET);
+ if ( read(fd, &sig, sizeof(sig)) != sizeof(sig) )
+ return 0;
+ sz = lseek(fd, 0, SEEK_END);
+ if ( sig == 0x8b1f ) /* GZIP signature? */
+ {
+ lseek(fd, -4, SEEK_END);
+ if ( read(fd, &_sz, 4) != 4 )
+ return 0;
+ sz = _sz;
+ }
+ lseek(fd, 0, SEEK_SET);
+ return sz;
+}
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size)
+{
+ int kernel_fd = -1;
+ gzFile kernel_gfd = NULL;
+ char *image = NULL;
+ unsigned int bytes;
+
+ if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
+ {
+ PERROR("Could not open kernel image");
+ goto out;
+ }
+
+ if ( (*size = xc_get_filesz(kernel_fd)) == 0 )
+ {
+ PERROR("Could not read kernel image");
+ goto out;
+ }
+
+ if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
+ {
+ PERROR("Could not allocate decompression state for state file");
+ goto out;
+ }
+
+ if ( (image = malloc(*size)) == NULL )
+ {
+ PERROR("Could not allocate memory for kernel image");
+ goto out;
+ }
+
+ if ( (bytes = gzread(kernel_gfd, image, *size)) != *size )
+ {
+ PERROR("Error reading kernel image, could not"
+ " read the whole image (%d != %ld).", bytes, *size);
+ free(image);
+ image = NULL;
+ }
+
+ out:
+ if ( kernel_gfd != NULL )
+ gzclose(kernel_gfd);
+ else if ( kernel_fd >= 0 )
+ close(kernel_fd);
+ return image;
+}
+
+void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
+ int xch, u32 dom, unsigned long *parray,
+ unsigned long vstart)
+{
+ char *va;
+ unsigned long chunksz, done, pa;
+
+ for ( done = 0; done < size; done += chunksz )
+ {
+ pa = dst + done - vstart;
+ va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
+ chunksz = size - done;
+ if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+ memcpy(va + (pa & (PAGE_SIZE-1)), src + done, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+}
+
+int xc_dom0_op(int xc_handle, dom0_op_t *op)
+{
+ return do_dom0_op(xc_handle, op);
+}
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index a48156000a..baf1e5f26d 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -29,12 +29,25 @@
#define _PAGE_PSE 0x080
#define _PAGE_GLOBAL 0x100
-
+#if defined(__i386__)
#define L1_PAGETABLE_SHIFT 12
#define L2_PAGETABLE_SHIFT 22
-
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+#endif
+
+#if defined(__i386__)
#define ENTRIES_PER_L1_PAGETABLE 1024
#define ENTRIES_PER_L2_PAGETABLE 1024
+#elif defined(__x86_64__)
+#define L1_PAGETABLE_ENTRIES 512
+#define L2_PAGETABLE_ENTRIES 512
+#define L3_PAGETABLE_ENTRIES 512
+#define L4_PAGETABLE_ENTRIES 512
+#endif
#define PAGE_SHIFT L1_PAGETABLE_SHIFT
#define PAGE_SIZE (1UL << PAGE_SHIFT)
@@ -42,11 +55,51 @@
typedef unsigned long l1_pgentry_t;
typedef unsigned long l2_pgentry_t;
+#if defined(__x86_64__)
+typedef unsigned long l3_pgentry_t;
+typedef unsigned long l4_pgentry_t;
+#endif
+#if defined(__i386__)
#define l1_table_offset(_a) \
(((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
#define l2_table_offset(_a) \
((_a) >> L2_PAGETABLE_SHIFT)
+#elif defined(__x86_64__)
+#define l1_table_offset(_a) \
+ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(_a) \
+ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(_a) \
+ (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(_a) \
+ (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+#endif
+
+struct domain_setup_info
+{
+ unsigned long v_start;
+ unsigned long v_end;
+ unsigned long v_kernstart;
+ unsigned long v_kernend;
+ unsigned long v_kernentry;
+
+ unsigned int load_symtab;
+ unsigned long symtab_addr;
+ unsigned long symtab_len;
+};
+
+typedef int (*parseimagefunc)(char *image, unsigned long image_size,
+ struct domain_setup_info *dsi);
+typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch,
+ u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi);
+
+struct load_funcs
+{
+ parseimagefunc parseimage;
+ loadimagefunc loadimage;
+};
#define ERROR(_m, _a...) \
fprintf(stderr, "ERROR: " _m "\n" , ## _a )
@@ -72,7 +125,7 @@ static inline int do_xen_hypercall(int xc_handle,
static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
{
- int ret = -1, retries = 0;
+ int ret = -1, errno_saved;
privcmd_hypercall_t hypercall;
op->interface_version = DOM0_INTERFACE_VERSION;
@@ -86,26 +139,19 @@ static inline int do_dom0_op(int xc_handle, dom0_op_t *op)
goto out1;
}
- again:
if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
{
- if ( (errno == EAGAIN) && (retries++ < 10) )
- {
- /*
- * This was added for memory allocation, where we can get EAGAIN
- * if memory is unavailable because it is on the scrub list.
- */
- sleep(1);
- goto again;
- }
if ( errno == EACCES )
fprintf(stderr, "Dom0 operation failed -- need to"
" rebuild the user-space tool set?\n");
- goto out2;
}
- out2: (void)munlock(op, sizeof(*op));
- out1: return ret;
+ errno_saved = errno;
+ (void)munlock(op, sizeof(*op));
+ errno = errno_saved;
+
+ out1:
+ return ret;
}
static inline int do_dom_mem_op(int xc_handle,
@@ -117,7 +163,8 @@ static inline int do_dom_mem_op(int xc_handle,
{
privcmd_hypercall_t hypercall;
long ret = -EINVAL;
-
+ int errno_saved;
+
hypercall.op = __HYPERVISOR_dom_mem_op;
hypercall.arg[0] = (unsigned long)memop;
hypercall.arg[1] = (unsigned long)extent_list;
@@ -125,7 +172,47 @@ static inline int do_dom_mem_op(int xc_handle,
hypercall.arg[3] = (unsigned long)extent_order;
hypercall.arg[4] = (unsigned long)domid;
- if ( mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0 )
+ if ( (extent_list != NULL) &&
+ (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) )
+ {
+ PERROR("Could not lock memory for Xen hypercall");
+ goto out1;
+ }
+
+ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
+ {
+ fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
+ " rebuild the user-space tool set?\n",ret,errno);
+ }
+
+ if ( extent_list != NULL )
+ {
+ errno_saved = errno;
+ (void)munlock(extent_list, nr_extents*sizeof(unsigned long));
+ errno = errno_saved;
+ }
+
+ out1:
+ return ret;
+}
+
+static inline int do_mmuext_op(
+ int xc_handle,
+ struct mmuext_op *op,
+ unsigned int nr_ops,
+ domid_t dom)
+{
+ privcmd_hypercall_t hypercall;
+ long ret = -EINVAL;
+ int errno_saved;
+
+ hypercall.op = __HYPERVISOR_mmuext_op;
+ hypercall.arg[0] = (unsigned long)op;
+ hypercall.arg[1] = (unsigned long)nr_ops;
+ hypercall.arg[2] = (unsigned long)0;
+ hypercall.arg[3] = (unsigned long)dom;
+
+ if ( mlock(op, nr_ops*sizeof(*op)) != 0 )
{
PERROR("Could not lock memory for Xen hypercall");
goto out1;
@@ -135,11 +222,14 @@ static inline int do_dom_mem_op(int xc_handle,
{
fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to"
" rebuild the user-space tool set?\n",ret,errno);
- goto out2;
}
- out2: (void)munlock(extent_list, nr_extents*sizeof(unsigned long));
- out1: return ret;
+ errno_saved = errno;
+ (void)munlock(op, nr_ops*sizeof(*op));
+ errno = errno_saved;
+
+ out1:
+ return ret;
}
@@ -195,8 +285,24 @@ typedef struct mfn_mapper {
} mfn_mapper_t;
-#include "xc_io.h"
+unsigned long xc_get_m2p_start_mfn (int xc_handle);
+
+int xc_copy_to_domain_page(int xc_handle, u32 domid,
+ unsigned long dst_pfn, void *src_page);
+
+unsigned long xc_get_filesz(int fd);
+
+char *xc_read_kernel_image(const char *filename, unsigned long *size);
+
+void xc_map_memcpy(unsigned long dst, char *src, unsigned long size,
+ int xch, u32 dom, unsigned long *parray,
+ unsigned long vstart);
+
+int pin_table(int xc_handle, unsigned int type, unsigned long mfn,
+ domid_t dom);
-unsigned long xc_get_m2p_start_mfn ( int xc_handle );
+/* image loading */
+int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs);
+int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs);
#endif /* __XC_PRIVATE_H__ */
diff --git a/tools/libxc/xc_ptrace.c b/tools/libxc/xc_ptrace.c
new file mode 100644
index 0000000000..1db45a7bbb
--- /dev/null
+++ b/tools/libxc/xc_ptrace.c
@@ -0,0 +1,396 @@
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include "xc_private.h"
+#include <time.h>
+
+
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
+#define PG_FRAME (~((unsigned long)BSD_PAGE_MASK)
+#define PDRSHIFT 22
+#define PSL_T 0x00000100 /* trace enable bit */
+
+#define VCPU 0 /* XXX */
+
+/*
+ * long
+ * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
+ */
+
+
+int waitdomain(int domain, int *status, int options);
+
+char * ptrace_names[] = {
+ "PTRACE_TRACEME",
+ "PTRACE_PEEKTEXT",
+ "PTRACE_PEEKDATA",
+ "PTRACE_PEEKUSER",
+ "PTRACE_POKETEXT",
+ "PTRACE_POKEDATA",
+ "PTRACE_POKEUSER",
+ "PTRACE_CONT",
+ "PTRACE_KILL",
+ "PTRACE_SINGLESTEP",
+ "PTRACE_INVALID",
+ "PTRACE_INVALID",
+ "PTRACE_GETREGS",
+ "PTRACE_SETREGS",
+ "PTRACE_GETFPREGS",
+ "PTRACE_SETFPREGS",
+ "PTRACE_ATTACH",
+ "PTRACE_DETACH",
+ "PTRACE_GETFPXREGS",
+ "PTRACE_SETFPXREGS",
+ "PTRACE_INVALID",
+ "PTRACE_INVALID",
+ "PTRACE_INVALID",
+ "PTRACE_INVALID",
+ "PTRACE_SYSCALL",
+};
+
+struct gdb_regs {
+ long ebx; /* 0 */
+ long ecx; /* 4 */
+ long edx; /* 8 */
+ long esi; /* 12 */
+ long edi; /* 16 */
+ long ebp; /* 20 */
+ long eax; /* 24 */
+ int xds; /* 28 */
+ int xes; /* 32 */
+ int xfs; /* 36 */
+ int xgs; /* 40 */
+ long orig_eax; /* 44 */
+ long eip; /* 48 */
+ int xcs; /* 52 */
+ long eflags; /* 56 */
+ long esp; /* 60 */
+ int xss; /* 64 */
+};
+
+#define FETCH_REGS(cpu) \
+ if (!regs_valid[cpu]) \
+ { \
+ int retval = xc_domain_get_vcpu_context(xc_handle, domid, cpu, &ctxt[cpu]); \
+ if (retval) \
+ goto error_out; \
+ cr3[cpu] = ctxt[cpu].pt_base; /* physical address */ \
+ regs_valid[cpu] = 1; \
+ } \
+
+#define printval(x) printf("%s = %lx\n", #x, (long)x);
+#define SET_PT_REGS(pt, xc) \
+{ \
+ pt.ebx = xc.ebx; \
+ pt.ecx = xc.ecx; \
+ pt.edx = xc.edx; \
+ pt.esi = xc.esi; \
+ pt.edi = xc.edi; \
+ pt.ebp = xc.ebp; \
+ pt.eax = xc.eax; \
+ pt.eip = xc.eip; \
+ pt.xcs = xc.cs; \
+ pt.eflags = xc.eflags; \
+ pt.esp = xc.esp; \
+ pt.xss = xc.ss; \
+ pt.xes = xc.es; \
+ pt.xds = xc.ds; \
+ pt.xfs = xc.fs; \
+ pt.xgs = xc.gs; \
+}
+
+#define SET_XC_REGS(pt, xc) \
+{ \
+ xc.ebx = pt->ebx; \
+ xc.ecx = pt->ecx; \
+ xc.edx = pt->edx; \
+ xc.esi = pt->esi; \
+ xc.edi = pt->edi; \
+ xc.ebp = pt->ebp; \
+ xc.eax = pt->eax; \
+ xc.eip = pt->eip; \
+ xc.cs = pt->xcs; \
+ xc.eflags = pt->eflags; \
+ xc.esp = pt->esp; \
+ xc.ss = pt->xss; \
+ xc.es = pt->xes; \
+ xc.ds = pt->xds; \
+ xc.fs = pt->xfs; \
+ xc.gs = pt->xgs; \
+}
+
+
+#define vtopdi(va) ((va) >> PDRSHIFT)
+#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
+
+/* XXX application state */
+
+
+static int xc_handle;
+static long nr_pages = 0;
+unsigned long *page_array = NULL;
+static int regs_valid[MAX_VIRT_CPUS];
+static unsigned long cr3[MAX_VIRT_CPUS];
+static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
+
+/* --------------------- */
+
+static void *
+map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
+{
+ unsigned long pde, page;
+ unsigned long va = (unsigned long)guest_va;
+ long npgs = xc_get_tot_pages(xc_handle, domid);
+
+ static unsigned long cr3_phys[MAX_VIRT_CPUS];
+ static unsigned long *cr3_virt[MAX_VIRT_CPUS];
+ static unsigned long pde_phys[MAX_VIRT_CPUS];
+ static unsigned long *pde_virt[MAX_VIRT_CPUS];
+ static unsigned long page_phys[MAX_VIRT_CPUS];
+ static unsigned long *page_virt[MAX_VIRT_CPUS];
+
+ static int prev_perm[MAX_VIRT_CPUS];
+
+ if (nr_pages != npgs) {
+ if (nr_pages > 0)
+ free(page_array);
+ nr_pages = npgs;
+ if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+ printf("Could not allocate memory\n");
+ goto error_out;
+ }
+
+ if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
+ printf("Could not get the page frame list\n");
+ goto error_out;
+ }
+ }
+
+ FETCH_REGS(cpu);
+
+ if (cr3[cpu] != cr3_phys[cpu])
+ {
+ cr3_phys[cpu] = cr3[cpu];
+ if (cr3_virt[cpu])
+ munmap(cr3_virt[cpu], PAGE_SIZE);
+ if ((cr3_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+ PROT_READ,
+ cr3_phys[cpu] >> PAGE_SHIFT)) == NULL)
+ goto error_out;
+ }
+ if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+ goto error_out;
+ if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (pde != pde_phys[cpu])
+ {
+ pde_phys[cpu] = pde;
+ if (pde_virt[cpu])
+ munmap(pde_virt[cpu], PAGE_SIZE);
+ if ((pde_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+ PROT_READ,
+ pde_phys[cpu] >> PAGE_SHIFT)) == NULL)
+ goto error_out;
+ }
+ if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+ goto error_out;
+ if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (page != page_phys[cpu] || perm != prev_perm[cpu])
+ {
+ page_phys[cpu] = page;
+ if (page_virt[cpu])
+ munmap(page_virt[cpu], PAGE_SIZE);
+ if ((page_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
+ perm,
+ page_phys[cpu] >> PAGE_SHIFT)) == NULL) {
+ printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va));
+ page_phys[cpu] = 0;
+ goto error_out;
+ }
+ prev_perm[cpu] = perm;
+ }
+ return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
+
+ error_out:
+ return 0;
+}
+
+int
+xc_waitdomain(int domain, int *status, int options)
+{
+ dom0_op_t op;
+ int retval;
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10*1000*1000;
+
+ if (!xc_handle)
+ if ((xc_handle = xc_interface_open()) < 0)
+ {
+ printf("xc_interface_open failed\n");
+ return -1;
+ }
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = domain;
+ retry:
+
+ retval = do_dom0_op(xc_handle, &op);
+ if (retval || op.u.getdomaininfo.domain != domain) {
+ printf("getdomaininfo failed\n");
+ goto done;
+ }
+ *status = op.u.getdomaininfo.flags;
+
+ if (options & WNOHANG)
+ goto done;
+
+
+ if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)) {
+ nanosleep(&ts,NULL);
+ goto retry;
+ }
+ done:
+ return retval;
+
+}
+
+long
+xc_ptrace(enum __ptrace_request request, u32 domid, long eaddr, long edata)
+{
+ dom0_op_t op;
+ int status = 0;
+ struct gdb_regs pt;
+ long retval = 0;
+ unsigned long *guest_va;
+ int cpu = VCPU;
+ void *addr = (char *)eaddr;
+ void *data = (char *)edata;
+
+ op.interface_version = DOM0_INTERFACE_VERSION;
+
+ if (!xc_handle)
+ if ((xc_handle = xc_interface_open()) < 0)
+ return -1;
+#if 0
+ printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
+#endif
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, PROT_READ)) == NULL) {
+ status = EFAULT;
+ goto error_out;
+ }
+
+ retval = *guest_va;
+ break;
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, PROT_READ|PROT_WRITE)) == NULL) {
+ status = EFAULT;
+ goto error_out;
+ }
+
+ *guest_va = (unsigned long)data;
+ break;
+ case PTRACE_GETREGS:
+ case PTRACE_GETFPREGS:
+ case PTRACE_GETFPXREGS:
+ FETCH_REGS(cpu);
+
+ if (request == PTRACE_GETREGS) {
+ SET_PT_REGS(pt, ctxt[cpu].user_regs);
+ memcpy(data, &pt, sizeof(struct gdb_regs));
+ } else if (request == PTRACE_GETFPREGS)
+ memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+ else /*if (request == PTRACE_GETFPXREGS)*/
+ memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+ break;
+ case PTRACE_SETREGS:
+ op.cmd = DOM0_SETDOMAININFO;
+ SET_XC_REGS(((struct gdb_regs *)data), ctxt[VCPU].user_regs);
+ op.u.setdomaininfo.domain = domid;
+ /* XXX need to understand multiple vcpus */
+ op.u.setdomaininfo.vcpu = cpu;
+ op.u.setdomaininfo.ctxt = &ctxt[cpu];
+ retval = do_dom0_op(xc_handle, &op);
+ if (retval)
+ goto error_out;
+
+ break;
+ case PTRACE_ATTACH:
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = domid;
+ retval = do_dom0_op(xc_handle, &op);
+ if (retval || op.u.getdomaininfo.domain != domid) {
+ perror("dom0 op failed");
+ goto error_out;
+ }
+ if (op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) {
+ printf("domain currently paused\n");
+ goto error_out;
+ }
+ printf("domain not currently paused\n");
+ op.cmd = DOM0_PAUSEDOMAIN;
+ op.u.pausedomain.domain = domid;
+ retval = do_dom0_op(xc_handle, &op);
+ break;
+ case PTRACE_SINGLESTEP:
+ ctxt[VCPU].user_regs.eflags |= PSL_T;
+ op.cmd = DOM0_SETDOMAININFO;
+ op.u.setdomaininfo.domain = domid;
+ op.u.setdomaininfo.vcpu = 0;
+ op.u.setdomaininfo.ctxt = &ctxt[cpu];
+ retval = do_dom0_op(xc_handle, &op);
+ if (retval) {
+ perror("dom0 op failed");
+ goto error_out;
+ }
+ /* FALLTHROUGH */
+ case PTRACE_CONT:
+ case PTRACE_DETACH:
+ if (request != PTRACE_SINGLESTEP) {
+ FETCH_REGS(cpu);
+ /* Clear trace flag */
+ if (ctxt[cpu].user_regs.eflags & PSL_T) {
+ ctxt[cpu].user_regs.eflags &= ~PSL_T;
+ op.cmd = DOM0_SETDOMAININFO;
+ op.u.setdomaininfo.domain = domid;
+ op.u.setdomaininfo.vcpu = cpu;
+ op.u.setdomaininfo.ctxt = &ctxt[cpu];
+ retval = do_dom0_op(xc_handle, &op);
+ if (retval) {
+ perror("dom0 op failed");
+ goto error_out;
+ }
+ }
+ }
+ regs_valid[cpu] = 0;
+ op.cmd = DOM0_UNPAUSEDOMAIN;
+ op.u.unpausedomain.domain = domid > 0 ? domid : -domid;
+ retval = do_dom0_op(xc_handle, &op);
+ break;
+ case PTRACE_SETFPREGS:
+ case PTRACE_SETFPXREGS:
+ case PTRACE_PEEKUSER:
+ case PTRACE_POKEUSER:
+ case PTRACE_SYSCALL:
+ case PTRACE_KILL:
+#ifdef DEBUG
+ printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+#endif
+ /* XXX not yet supported */
+ status = ENOSYS;
+ break;
+ case PTRACE_TRACEME:
+ printf("PTRACE_TRACEME is an invalid request under Xen\n");
+ status = EINVAL;
+ }
+
+ if (status) {
+ errno = status;
+ retval = -1;
+ }
+ error_out:
+ return retval;
+}
diff --git a/tools/libxc/xc_ptrace_core.c b/tools/libxc/xc_ptrace_core.c
new file mode 100644
index 0000000000..ec7a6980c3
--- /dev/null
+++ b/tools/libxc/xc_ptrace_core.c
@@ -0,0 +1,295 @@
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include "xc_private.h"
+#include <time.h>
+
+
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
+#define PG_FRAME (~((unsigned long)BSD_PAGE_MASK)
+#define PDRSHIFT 22
+#define PSL_T 0x00000100 /* trace enable bit */
+
+#define VCPU 0 /* XXX */
+
+/*
+ * long
+ * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
+ */
+
+
+struct gdb_regs {
+ long ebx; /* 0 */
+ long ecx; /* 4 */
+ long edx; /* 8 */
+ long esi; /* 12 */
+ long edi; /* 16 */
+ long ebp; /* 20 */
+ long eax; /* 24 */
+ int xds; /* 28 */
+ int xes; /* 32 */
+ int xfs; /* 36 */
+ int xgs; /* 40 */
+ long orig_eax; /* 44 */
+ long eip; /* 48 */
+ int xcs; /* 52 */
+ long eflags; /* 56 */
+ long esp; /* 60 */
+ int xss; /* 64 */
+};
+
+#define printval(x) printf("%s = %lx\n", #x, (long)x);
+#define SET_PT_REGS(pt, xc) \
+{ \
+ pt.ebx = xc.ebx; \
+ pt.ecx = xc.ecx; \
+ pt.edx = xc.edx; \
+ pt.esi = xc.esi; \
+ pt.edi = xc.edi; \
+ pt.ebp = xc.ebp; \
+ pt.eax = xc.eax; \
+ pt.eip = xc.eip; \
+ pt.xcs = xc.cs; \
+ pt.eflags = xc.eflags; \
+ pt.esp = xc.esp; \
+ pt.xss = xc.ss; \
+ pt.xes = xc.es; \
+ pt.xds = xc.ds; \
+ pt.xfs = xc.fs; \
+ pt.xgs = xc.gs; \
+}
+
+#define SET_XC_REGS(pt, xc) \
+{ \
+ xc.ebx = pt->ebx; \
+ xc.ecx = pt->ecx; \
+ xc.edx = pt->edx; \
+ xc.esi = pt->esi; \
+ xc.edi = pt->edi; \
+ xc.ebp = pt->ebp; \
+ xc.eax = pt->eax; \
+ xc.eip = pt->eip; \
+ xc.cs = pt->xcs; \
+ xc.eflags = pt->eflags; \
+ xc.esp = pt->esp; \
+ xc.ss = pt->xss; \
+ xc.es = pt->xes; \
+ xc.ds = pt->xds; \
+ xc.fs = pt->xfs; \
+ xc.gs = pt->xgs; \
+}
+
+
+#define vtopdi(va) ((va) >> PDRSHIFT)
+#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
+
+/* XXX application state */
+
+
+static long nr_pages = 0;
+static unsigned long *p2m_array = NULL;
+static unsigned long *m2p_array = NULL;
+static unsigned long pages_offset;
+static unsigned long cr3[MAX_VIRT_CPUS];
+static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
+
+/* --------------------- */
+
+static unsigned long
+map_mtop_offset(unsigned long ma)
+{
+ return pages_offset + (m2p_array[ma >> PAGE_SHIFT] << PAGE_SHIFT);
+}
+
+
+static void *
+map_domain_va(unsigned long domfd, int cpu, void * guest_va)
+{
+ unsigned long pde, page;
+ unsigned long va = (unsigned long)guest_va;
+ void *v;
+
+ static unsigned long cr3_phys[MAX_VIRT_CPUS];
+ static unsigned long *cr3_virt[MAX_VIRT_CPUS];
+ static unsigned long pde_phys[MAX_VIRT_CPUS];
+ static unsigned long *pde_virt[MAX_VIRT_CPUS];
+ static unsigned long page_phys[MAX_VIRT_CPUS];
+ static unsigned long *page_virt[MAX_VIRT_CPUS];
+
+ if (cr3[cpu] != cr3_phys[cpu])
+ {
+ cr3_phys[cpu] = cr3[cpu];
+ if (cr3_virt[cpu])
+ munmap(cr3_virt[cpu], PAGE_SIZE);
+ v = mmap(
+ NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
+ map_mtop_offset(cr3_phys[cpu]));
+ if (v == MAP_FAILED)
+ {
+ perror("mmap failed");
+ goto error_out;
+ }
+ cr3_virt[cpu] = v;
+ }
+ if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+ goto error_out;
+ if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (pde != pde_phys[cpu])
+ {
+ pde_phys[cpu] = pde;
+ if (pde_virt[cpu])
+ munmap(pde_virt[cpu], PAGE_SIZE);
+ v = mmap(
+ NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
+ map_mtop_offset(pde_phys[cpu]));
+ if (v == MAP_FAILED)
+ goto error_out;
+ pde_virt[cpu] = v;
+ }
+ if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+ goto error_out;
+ if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+ page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+ if (page != page_phys[cpu])
+ {
+ page_phys[cpu] = page;
+ if (page_virt[cpu])
+ munmap(page_virt[cpu], PAGE_SIZE);
+ v = mmap(
+ NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
+ map_mtop_offset(page_phys[cpu]));
+ if (v == MAP_FAILED) {
+ printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va));
+ page_phys[cpu] = 0;
+ goto error_out;
+ }
+ page_virt[cpu] = v;
+ }
+ return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
+
+ error_out:
+ return 0;
+}
+
+int
+xc_waitdomain_core(int domfd, int *status, int options)
+{
+ int retval = -1;
+ int nr_vcpus;
+ int i;
+ xc_core_header_t header;
+
+ if (nr_pages == 0) {
+
+ if (read(domfd, &header, sizeof(header)) != sizeof(header))
+ return -1;
+
+ nr_pages = header.xch_nr_pages;
+ nr_vcpus = header.xch_nr_vcpus;
+ pages_offset = header.xch_pages_offset;
+
+ if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) !=
+ sizeof(vcpu_guest_context_t)*nr_vcpus)
+ return -1;
+
+ for (i = 0; i < nr_vcpus; i++) {
+ cr3[i] = ctxt[i].pt_base;
+ }
+ if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+ printf("Could not allocate p2m_array\n");
+ goto error_out;
+ }
+ if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) !=
+ sizeof(unsigned long)*nr_pages)
+ return -1;
+
+ if ((m2p_array = malloc((1<<20) * sizeof(unsigned long))) == NULL) {
+ printf("Could not allocate m2p array\n");
+ goto error_out;
+ }
+ bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
+
+ for (i = 0; i < nr_pages; i++) {
+ m2p_array[p2m_array[i]] = i;
+ }
+
+ }
+ retval = 0;
+ error_out:
+ return retval;
+
+}
+
+long
+xc_ptrace_core(enum __ptrace_request request, u32 domfd, long eaddr, long edata)
+{
+ int status = 0;
+ struct gdb_regs pt;
+ long retval = 0;
+ unsigned long *guest_va;
+ int cpu = VCPU;
+ void *addr = (char *)eaddr;
+ void *data = (char *)edata;
+
+#if 0
+ printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
+#endif
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == NULL) {
+ status = EFAULT;
+ goto error_out;
+ }
+
+ retval = *guest_va;
+ break;
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == NULL) {
+ status = EFAULT;
+ goto error_out;
+ }
+ *guest_va = (unsigned long)data;
+ break;
+ case PTRACE_GETREGS:
+ case PTRACE_GETFPREGS:
+ case PTRACE_GETFPXREGS:
+ if (request == PTRACE_GETREGS) {
+ SET_PT_REGS(pt, ctxt[cpu].user_regs);
+ memcpy(data, &pt, sizeof(struct gdb_regs));
+ } else if (request == PTRACE_GETFPREGS)
+ memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+ else /*if (request == PTRACE_GETFPXREGS)*/
+ memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+ break;
+ case PTRACE_ATTACH:
+ retval = 0;
+ break;
+ case PTRACE_SETREGS:
+ case PTRACE_SINGLESTEP:
+ case PTRACE_CONT:
+ case PTRACE_DETACH:
+ case PTRACE_SETFPREGS:
+ case PTRACE_SETFPXREGS:
+ case PTRACE_PEEKUSER:
+ case PTRACE_POKEUSER:
+ case PTRACE_SYSCALL:
+ case PTRACE_KILL:
+#ifdef DEBUG
+ printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+#endif
+ status = ENOSYS;
+ break;
+ case PTRACE_TRACEME:
+ printf("PTRACE_TRACEME is an invalid request under Xen\n");
+ status = EINVAL;
+ }
+
+ if (status) {
+ errno = status;
+ retval = -1;
+ }
+ error_out:
+ return retval;
+}
diff --git a/tools/libxc/xc_rrobin.c b/tools/libxc/xc_rrobin.c
deleted file mode 100644
index ad37962f3b..0000000000
--- a/tools/libxc/xc_rrobin.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/******************************************************************************
- * xc_rrobin.c
- *
- * API for manipulating parameters of the Round Robin scheduler
- *
- * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
- */
-
-#include "xc_private.h"
-
-int xc_rrobin_global_set(int xc_handle, u64 slice)
-{
- dom0_op_t op;
- op.cmd = DOM0_SCHEDCTL;
- op.u.schedctl.sched_id = SCHED_RROBIN;
- op.u.schedctl.direction = SCHED_INFO_PUT;
-
- op.u.schedctl.u.rrobin.slice = slice;
- return do_dom0_op(xc_handle, &op);
-}
-
-
-int xc_rrobin_global_get(int xc_handle, u64 *slice)
-{
- dom0_op_t op;
- int ret;
-
- op.cmd = DOM0_SCHEDCTL;
- op.u.schedctl.sched_id = SCHED_RROBIN;
- op.u.schedctl.direction = SCHED_INFO_GET;
-
- ret = do_dom0_op(xc_handle, &op);
-
- *slice = op.u.schedctl.u.rrobin.slice;
-
- return ret;
-}
diff --git a/tools/libxc/xc_sedf.c b/tools/libxc/xc_sedf.c
new file mode 100644
index 0000000000..a3a0598a7d
--- /dev/null
+++ b/tools/libxc/xc_sedf.c
@@ -0,0 +1,51 @@
+/******************************************************************************
+ * xc_sedf.c
+ *
+ * API for manipulating parameters of the Simple EDF scheduler.
+ *
+ * changes by Stephan Diestelhorst
+ * based on code
+ * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge.
+ */
+
+#include "xc_private.h"
+
+int xc_sedf_domain_set(int xc_handle,
+ u32 domid, u64 period, u64 slice,u64 latency, u16 extratime,u16 weight)
+{
+ dom0_op_t op;
+ struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t)domid;
+ op.u.adjustdom.sched_id = SCHED_SEDF;
+ op.u.adjustdom.direction = SCHED_INFO_PUT;
+
+ p->period = period;
+ p->slice = slice;
+ p->latency = latency;
+ p->extratime = extratime;
+ p->weight = weight;
+ return do_dom0_op(xc_handle, &op);
+}
+
+int xc_sedf_domain_get(int xc_handle, u32 domid, u64 *period, u64 *slice, u64* latency, u16* extratime, u16* weight)
+{
+ dom0_op_t op;
+ int ret;
+ struct sedf_adjdom *p = &op.u.adjustdom.u.sedf;
+
+ op.cmd = DOM0_ADJUSTDOM;
+ op.u.adjustdom.domain = (domid_t)domid;
+ op.u.adjustdom.sched_id = SCHED_SEDF;
+ op.u.adjustdom.direction = SCHED_INFO_GET;
+
+ ret = do_dom0_op(xc_handle, &op);
+
+ *period = p->period;
+ *slice = p->slice;
+ *latency = p->latency;
+ *extratime = p->extratime;
+ *weight = p->weight;
+ return ret;
+}
diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c
new file mode 100644
index 0000000000..c30fd4c49b
--- /dev/null
+++ b/tools/libxc/xc_vmx_build.c
@@ -0,0 +1,758 @@
+/******************************************************************************
+ * xc_vmx_build.c
+ */
+
+#include <stddef.h>
+#include "xc_private.h"
+#define ELFSIZE 32
+#include "xc_elf.h"
+#include <stdlib.h>
+#include <zlib.h>
+#include "linux_boot_params.h"
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+#define LINUX_BOOT_PARAMS_ADDR 0x00090000
+#define LINUX_KERNEL_ENTR_ADDR 0x00100000
+#define LINUX_PAGE_OFFSET 0xC0000000
+
+static int
+parseelfimage(
+ char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
+static int
+loadelfimage(
+ char *elfbase, int xch, u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi);
+
+static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
+{
+ int nr_map = 0;
+
+ /* XXX: Doesn't work for > 4GB yet */
+ mem_mapp->map[0].addr = 0x0;
+ mem_mapp->map[0].size = 0x9F800;
+ mem_mapp->map[0].type = E820_RAM;
+ mem_mapp->map[0].caching_attr = MEMMAP_WB;
+ nr_map++;
+
+ mem_mapp->map[1].addr = 0x9F800;
+ mem_mapp->map[1].size = 0x800;
+ mem_mapp->map[1].type = E820_RESERVED;
+ mem_mapp->map[1].caching_attr = MEMMAP_UC;
+ nr_map++;
+
+ mem_mapp->map[2].addr = 0xA0000;
+ mem_mapp->map[2].size = 0x20000;
+ mem_mapp->map[2].type = E820_IO;
+ mem_mapp->map[2].caching_attr = MEMMAP_UC;
+ nr_map++;
+
+ mem_mapp->map[3].addr = 0xF0000;
+ mem_mapp->map[3].size = 0x10000;
+ mem_mapp->map[3].type = E820_RESERVED;
+ mem_mapp->map[3].caching_attr = MEMMAP_UC;
+ nr_map++;
+
+ mem_mapp->map[4].addr = 0x100000;
+ mem_mapp->map[4].size = mem_size - 0x100000 - PAGE_SIZE;
+ mem_mapp->map[4].type = E820_RAM;
+ mem_mapp->map[4].caching_attr = MEMMAP_WB;
+ nr_map++;
+
+ mem_mapp->map[5].addr = mem_size - PAGE_SIZE;
+ mem_mapp->map[5].size = PAGE_SIZE;
+ mem_mapp->map[5].type = E820_SHARED;
+ mem_mapp->map[5].caching_attr = MEMMAP_WB;
+ nr_map++;
+
+ mem_mapp->map[6].addr = mem_size;
+ mem_mapp->map[6].size = 0x3 * PAGE_SIZE;
+ mem_mapp->map[6].type = E820_NVS;
+ mem_mapp->map[6].caching_attr = MEMMAP_UC;
+ nr_map++;
+
+ mem_mapp->map[7].addr = mem_size + 0x3 * PAGE_SIZE;
+ mem_mapp->map[7].size = 0xA * PAGE_SIZE;
+ mem_mapp->map[7].type = E820_ACPI;
+ mem_mapp->map[7].caching_attr = MEMMAP_WB;
+ nr_map++;
+
+ mem_mapp->map[8].addr = 0xFEC00000;
+ mem_mapp->map[8].size = 0x1400000;
+ mem_mapp->map[8].type = E820_IO;
+ mem_mapp->map[8].caching_attr = MEMMAP_UC;
+ nr_map++;
+
+ mem_mapp->nr_map = nr_map;
+}
+
+static int zap_mmio_range(int xc_handle, u32 dom,
+ l2_pgentry_t *vl2tab,
+ unsigned long mmio_range_start,
+ unsigned long mmio_range_size)
+{
+ unsigned long mmio_addr;
+ unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+ unsigned long vl2e;
+ l1_pgentry_t *vl1tab;
+
+ mmio_addr = mmio_range_start & PAGE_MASK;
+ for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+ vl2e = vl2tab[l2_table_offset(mmio_addr)];
+ vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+ if (vl1tab == 0) {
+ PERROR("Failed zap MMIO range");
+ return -1;
+ }
+ vl1tab[l1_table_offset(mmio_addr)] = 0;
+ munmap(vl1tab, PAGE_SIZE);
+ }
+ return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, u32 dom,
+ unsigned long l2tab,
+ struct mem_map *mem_mapp)
+{
+ int i;
+ l2_pgentry_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l2tab >> PAGE_SHIFT);
+ if (vl2tab == 0)
+ return -1;
+ for (i = 0; i < mem_mapp->nr_map; i++) {
+ if ((mem_mapp->map[i].type == E820_IO)
+ && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+ if (zap_mmio_range(xc_handle, dom, vl2tab,
+ mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
+ return -1;
+ }
+ munmap(vl2tab, PAGE_SIZE);
+ return 0;
+}
+
+static int setup_guest(int xc_handle,
+ u32 dom, int memsize,
+ char *image, unsigned long image_size,
+ gzFile initrd_gfd, unsigned long initrd_len,
+ unsigned long nr_pages,
+ vcpu_guest_context_t *ctxt,
+ const char *cmdline,
+ unsigned long shared_info_frame,
+ unsigned int control_evtchn,
+ unsigned long flags,
+ struct mem_map * mem_mapp)
+{
+ l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+ l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
+ unsigned long *page_array = NULL;
+ unsigned long l2tab;
+ unsigned long l1tab;
+ unsigned long count, i;
+ shared_info_t *shared_info;
+ struct linux_boot_params * boot_paramsp;
+ __u16 * boot_gdtp;
+ mmu_t *mmu = NULL;
+ int rc;
+
+ unsigned long nr_pt_pages;
+ unsigned long ppt_alloc;
+
+ struct domain_setup_info dsi;
+ unsigned long vinitrd_start;
+ unsigned long vinitrd_end;
+ unsigned long vboot_params_start;
+ unsigned long vboot_params_end;
+ unsigned long vboot_gdt_start;
+ unsigned long vboot_gdt_end;
+ unsigned long vpt_start;
+ unsigned long vpt_end;
+ unsigned long v_end;
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
+ goto error_out;
+
+ if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
+ {
+ PERROR("Guest OS must load to a page boundary.\n");
+ goto error_out;
+ }
+
+ /*
+ * Why do we need this? The number of page-table frames depends on the
+ * size of the bootstrap address space. But the size of the address space
+ * depends on the number of page-table frames (since each one is mapped
+ * read-only). We have a pair of simultaneous equations in two unknowns,
+ * which we solve by exhaustive search.
+ */
+ vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
+ vboot_params_end = vboot_params_start + PAGE_SIZE;
+ vboot_gdt_start = vboot_params_end;
+ vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
+
+ /* memsize is in megabytes */
+ v_end = memsize << 20;
+ vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
+ vinitrd_start = vinitrd_end - initrd_len;
+ vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
+
+ if(initrd_len == 0)
+ vinitrd_start = vinitrd_end = 0;
+
+ nr_pt_pages = 1 + ((memsize + 3) >> 2);
+ vpt_start = v_end;
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+
+ printf("VIRTUAL MEMORY ARRANGEMENT:\n"
+ " Boot_params: %08lx->%08lx\n"
+ " boot_gdt: %08lx->%08lx\n"
+ " Loaded kernel: %08lx->%08lx\n"
+ " Init. ramdisk: %08lx->%08lx\n"
+ " Page tables: %08lx->%08lx\n"
+ " TOTAL: %08lx->%08lx\n",
+ vboot_params_start, vboot_params_end,
+ vboot_gdt_start, vboot_gdt_end,
+ dsi.v_kernstart, dsi.v_kernend,
+ vinitrd_start, vinitrd_end,
+ vpt_start, vpt_end,
+ dsi.v_start, v_end);
+ printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+ printf(" INITRD LENGTH: %08lx\n", initrd_len);
+
+ if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+ {
+ printf("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+ goto error_out;
+ }
+
+ if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+ {
+ PERROR("Could not allocate memory");
+ goto error_out;
+ }
+
+ if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
+ {
+ PERROR("Could not get the page frame list");
+ goto error_out;
+ }
+
+ loadelfimage(image, xc_handle, dom, page_array, &dsi);
+
+ /* Load the initial ramdisk image. */
+ if ( initrd_len != 0 )
+ {
+ for ( i = (vinitrd_start - dsi.v_start);
+ i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
+ {
+ char page[PAGE_SIZE];
+ if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
+ {
+ PERROR("Error reading initrd image, could not");
+ goto error_out;
+ }
+ xc_copy_to_domain_page(xc_handle, dom,
+ page_array[i>>PAGE_SHIFT], page);
+ }
+ }
+
+ if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
+ goto error_out;
+
+ /* First allocate page for page dir. */
+ ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+ l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+ ctxt->pt_base = l2tab;
+
+ /* Initialise the page tables. */
+ if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l2tab >> PAGE_SHIFT)) == NULL )
+ goto error_out;
+ memset(vl2tab, 0, PAGE_SIZE);
+ vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+ {
+ l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+ if ( vl1tab != NULL )
+ munmap(vl1tab, PAGE_SIZE);
+ if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l1tab >> PAGE_SHIFT)) == NULL )
+ {
+ munmap(vl2tab, PAGE_SIZE);
+ goto error_out;
+ }
+ memset(vl1tab, 0, PAGE_SIZE);
+ vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+ *vl2e++ = l1tab | L2_PROT;
+ }
+
+ *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+ vl1e++;
+ }
+ munmap(vl1tab, PAGE_SIZE);
+ munmap(vl2tab, PAGE_SIZE);
+
+ /* Write the machine->phys table entries. */
+ for ( count = 0; count < nr_pages; count++ )
+ {
+ if ( add_mmu_update(xc_handle, mmu,
+ (page_array[count] << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, count) )
+ goto error_out;
+ }
+
+
+ if ((boot_paramsp = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+ goto error_out;
+ memset(boot_paramsp, 0, sizeof(*boot_paramsp));
+
+ strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
+ boot_paramsp->cmd_line[0x800-1] = '\0';
+ boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
+
+ boot_paramsp->setup_sects = 0;
+ boot_paramsp->mount_root_rdonly = 1;
+ boot_paramsp->swapdev = 0x0;
+ boot_paramsp->ramdisk_flags = 0x0;
+ boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
+
+ /* we don't have a ps/2 mouse now.
+ * 0xAA means a aux mouse is there.
+ * See detect_auxiliary_port() in pc_keyb.c.
+ */
+ boot_paramsp->aux_device_info = 0x0;
+
+ boot_paramsp->header_magic[0] = 0x48; /* "H" */
+ boot_paramsp->header_magic[1] = 0x64; /* "d" */
+ boot_paramsp->header_magic[2] = 0x72; /* "r" */
+ boot_paramsp->header_magic[3] = 0x53; /* "S" */
+
+ boot_paramsp->protocol_version = 0x0203; /* 2.03 */
+ boot_paramsp->loader_type = 0x71; /* GRUB */
+ boot_paramsp->loader_flags = 0x1; /* loaded high */
+ boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
+ boot_paramsp->initrd_start = vinitrd_start;
+ boot_paramsp->initrd_size = initrd_len;
+
+ i = ((memsize - 1) << 10) - 4;
+ boot_paramsp->alt_mem_k = i; /* alt_mem_k */
+ boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
+
+ /*
+ * Stuff SCREAN_INFO
+ */
+ boot_paramsp->screen.info.orig_x = 0;
+ boot_paramsp->screen.info.orig_y = 0;
+ boot_paramsp->screen.info.orig_video_page = 8;
+ boot_paramsp->screen.info.orig_video_mode = 3;
+ boot_paramsp->screen.info.orig_video_cols = 80;
+ boot_paramsp->screen.info.orig_video_ega_bx = 0;
+ boot_paramsp->screen.info.orig_video_lines = 25;
+ boot_paramsp->screen.info.orig_video_isVGA = 1;
+ boot_paramsp->screen.info.orig_video_points = 0x0010;
+
+ /* seems we may NOT stuff boot_paramsp->apm_bios_info */
+ /* seems we may NOT stuff boot_paramsp->drive_info */
+ /* seems we may NOT stuff boot_paramsp->sys_desc_table */
+ *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
+ boot_paramsp->drive_info.dummy[2] = 4;
+ boot_paramsp->drive_info.dummy[14] = 32;
+
+ /* memsize is in megabytes */
+ build_e820map(mem_mapp, memsize << 20);
+ if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
+ goto error_out;
+ boot_paramsp->e820_map_nr = mem_mapp->nr_map;
+ for (i=0; i<mem_mapp->nr_map; i++) {
+ boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
+ boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
+ boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
+ }
+ munmap(boot_paramsp, PAGE_SIZE);
+
+ if ((boot_gdtp = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+ goto error_out;
+ memset(boot_gdtp, 0, PAGE_SIZE);
+ boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
+ boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
+ boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
+ boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
+ munmap(boot_gdtp, PAGE_SIZE);
+
+ /* shared_info page starts its life empty. */
+ if ((shared_info = xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ shared_info_frame)) == 0)
+ goto error_out;
+ memset(shared_info, 0, sizeof(shared_info_t));
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ munmap(shared_info, PAGE_SIZE);
+
+ /*
+ * Pin down l2tab addr as page dir page - causes hypervisor to provide
+ * correct protection for the page
+ */
+ if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, l2tab>>PAGE_SHIFT, dom) )
+ goto error_out;
+
+ /* Send the page update requests down to the hypervisor. */
+ if ( finish_mmu_updates(xc_handle, mmu) )
+ goto error_out;
+
+ free(mmu);
+ free(page_array);
+
+ /*
+ * Initial register values:
+ */
+ ctxt->user_regs.ds = 0x68;
+ ctxt->user_regs.es = 0x0;
+ ctxt->user_regs.fs = 0x0;
+ ctxt->user_regs.gs = 0x0;
+ ctxt->user_regs.ss = 0x68;
+ ctxt->user_regs.cs = 0x60;
+ ctxt->user_regs.eip = dsi.v_kernentry;
+ ctxt->user_regs.edx = vboot_gdt_start;
+ ctxt->user_regs.eax = 0x800;
+ ctxt->user_regs.esp = vboot_gdt_end;
+ ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
+ ctxt->user_regs.ecx = mem_mapp->nr_map;
+ ctxt->user_regs.esi = vboot_params_start;
+ ctxt->user_regs.edi = vboot_params_start + 0x2d0;
+
+ ctxt->user_regs.eflags = 0;
+
+ return 0;
+
+ error_out:
+ if ( mmu != NULL )
+ free(mmu);
+ if ( page_array != NULL )
+ free(page_array);
+ return -1;
+}
+
+
+#define VMX_FEATURE_FLAG 0x20
+
+int vmx_identify(void)
+{
+ int eax, ecx;
+
+#ifdef __i386__
+ __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx"
+ : "=a" (eax), "=c" (ecx)
+ : "0" (1)
+ : "dx");
+#elif defined __x86_64__
+ __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
+ : "=a" (eax), "=c" (ecx)
+ : "0" (1)
+ : "dx");
+#endif
+
+ if (!(ecx & VMX_FEATURE_FLAG)) {
+ return -1;
+ }
+ return 0;
+}
+
+int xc_vmx_build(int xc_handle,
+ u32 domid,
+ int memsize,
+ const char *image_name,
+ struct mem_map *mem_mapp,
+ const char *ramdisk_name,
+ const char *cmdline,
+ unsigned int control_evtchn,
+ unsigned long flags)
+{
+ dom0_op_t launch_op, op;
+ int initrd_fd = -1;
+ gzFile initrd_gfd = NULL;
+ int rc, i;
+ vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
+ unsigned long nr_pages;
+ char *image = NULL;
+ unsigned long image_size, initrd_size=0;
+
+ if ( vmx_identify() < 0 )
+ {
+ PERROR("CPU doesn't support VMX Extensions");
+ goto error_out;
+ }
+
+ if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
+ {
+ PERROR("Could not find total pages for domain");
+ goto error_out;
+ }
+
+ if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
+ goto error_out;
+
+ if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
+ {
+ if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
+ {
+ PERROR("Could not open the initial ramdisk image");
+ goto error_out;
+ }
+
+ initrd_size = xc_get_filesz(initrd_fd);
+
+ if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
+ {
+ PERROR("Could not allocate decompression state for initrd");
+ goto error_out;
+ }
+ }
+
+ if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
+ {
+ PERROR("xc_vmx_build: ctxt mlock failed");
+ return 1;
+ }
+
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)domid;
+ if ( (do_dom0_op(xc_handle, &op) < 0) ||
+ ((u16)op.u.getdomaininfo.domain != domid) )
+ {
+ PERROR("Could not get info on domain");
+ goto error_out;
+ }
+
+ if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
+ {
+ PERROR("Could not get vcpu context");
+ goto error_out;
+ }
+
+ if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
+ (ctxt->pt_base != 0) )
+ {
+ ERROR("Domain is already constructed");
+ goto error_out;
+ }
+
+ if ( setup_guest(xc_handle, domid, memsize, image, image_size,
+ initrd_gfd, initrd_size, nr_pages,
+ ctxt, cmdline,
+ op.u.getdomaininfo.shared_info_frame,
+ control_evtchn, flags, mem_mapp) < 0 )
+ {
+ ERROR("Error constructing guest OS");
+ goto error_out;
+ }
+
+ if ( initrd_fd >= 0 )
+ close(initrd_fd);
+ if ( initrd_gfd )
+ gzclose(initrd_gfd);
+ if ( image != NULL )
+ free(image);
+
+ ctxt->flags = VGCF_VMX_GUEST;
+ /* FPU is set up to default initial state. */
+ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt->trap_ctxt[i].vector = i;
+ ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+
+ /* No LDT. */
+ ctxt->ldt_ents = 0;
+
+ /* Use the default Xen-provided GDT. */
+ ctxt->gdt_ents = 0;
+
+ /* Ring 1 stack is the initial stack. */
+/*
+ ctxt->kernel_ss = FLAT_KERNEL_DS;
+ ctxt->kernel_sp = vstartinfo_start;
+*/
+ /* No debugging. */
+ memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
+
+ /* No callback handlers. */
+#if defined(__i386__)
+ ctxt->event_callback_cs = FLAT_KERNEL_CS;
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
+ ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_eip = 0;
+ ctxt->syscall_callback_eip = 0;
+#endif
+
+ memset( &launch_op, 0, sizeof(launch_op) );
+
+ launch_op.u.setdomaininfo.domain = (domid_t)domid;
+ launch_op.u.setdomaininfo.vcpu = 0;
+ launch_op.u.setdomaininfo.ctxt = ctxt;
+
+ launch_op.cmd = DOM0_SETDOMAININFO;
+ rc = do_dom0_op(xc_handle, &launch_op);
+
+ return rc;
+
+ error_out:
+ if ( initrd_gfd != NULL )
+ gzclose(initrd_gfd);
+ else if ( initrd_fd >= 0 )
+ close(initrd_fd);
+ if ( image != NULL )
+ free(image);
+
+ return -1;
+}
+
+static inline int is_loadable_phdr(Elf_Phdr *phdr)
+{
+ return ((phdr->p_type == PT_LOAD) &&
+ ((phdr->p_flags & (PF_W|PF_X)) != 0));
+}
+
+static int parseelfimage(char *elfbase,
+ unsigned long elfsize,
+ struct domain_setup_info *dsi)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+ Elf_Phdr *phdr;
+ Elf_Shdr *shdr;
+ unsigned long kernstart = ~0UL, kernend=0UL;
+ char *shstrtab;
+ int h;
+
+ if ( !IS_ELF(*ehdr) )
+ {
+ ERROR("Kernel image does not have an ELF header.");
+ return -EINVAL;
+ }
+
+ if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
+ {
+ ERROR("ELF program headers extend beyond end of image.");
+ return -EINVAL;
+ }
+
+ if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
+ {
+ ERROR("ELF section headers extend beyond end of image.");
+ return -EINVAL;
+ }
+
+ /* Find the section-header strings table. */
+ if ( ehdr->e_shstrndx == SHN_UNDEF )
+ {
+ ERROR("ELF image has no section-header strings table (shstrtab).");
+ return -EINVAL;
+ }
+ shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff +
+ (ehdr->e_shstrndx*ehdr->e_shentsize));
+ shstrtab = elfbase + shdr->sh_offset;
+
+ for ( h = 0; h < ehdr->e_phnum; h++ )
+ {
+ phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+ if ( !is_loadable_phdr(phdr) )
+ continue;
+ if ( phdr->p_paddr < kernstart )
+ kernstart = phdr->p_paddr;
+ if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
+ kernend = phdr->p_paddr + phdr->p_memsz;
+ }
+
+ if ( (kernstart > kernend) ||
+ (ehdr->e_entry < kernstart) ||
+ (ehdr->e_entry > kernend) )
+ {
+ ERROR("Malformed ELF image.");
+ return -EINVAL;
+ }
+
+ dsi->v_start = 0x00000000;
+
+ dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
+ dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
+ dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
+
+ dsi->v_end = dsi->v_kernend;
+
+ return 0;
+}
+
+static int
+loadelfimage(
+ char *elfbase, int xch, u32 dom, unsigned long *parray,
+ struct domain_setup_info *dsi)
+{
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+ Elf_Phdr *phdr;
+ int h;
+
+ char *va;
+ unsigned long pa, done, chunksz;
+
+ for ( h = 0; h < ehdr->e_phnum; h++ )
+ {
+ phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+ if ( !is_loadable_phdr(phdr) )
+ continue;
+
+ for ( done = 0; done < phdr->p_filesz; done += chunksz )
+ {
+ pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
+ if ((va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE,
+ parray[pa>>PAGE_SHIFT])) == 0)
+ return -1;
+ chunksz = phdr->p_filesz - done;
+ if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+ memcpy(va + (pa & (PAGE_SIZE-1)),
+ elfbase + phdr->p_offset + done, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+
+ for ( ; done < phdr->p_memsz; done += chunksz )
+ {
+ pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
+ if ((va = xc_map_foreign_range(
+ xch, dom, PAGE_SIZE, PROT_WRITE,
+ parray[pa>>PAGE_SHIFT])) == 0)
+ return -1;
+ chunksz = phdr->p_memsz - done;
+ if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+ chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+ memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
+ munmap(va, PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 7bfe0fad6b..5eebbfda12 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -5,7 +5,6 @@ INSTALL_DIR = $(INSTALL) -d -m0755
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
-CC = gcc
CFLAGS += -Wall -Werror -O3
INCLUDES += -I $(XEN_XC)
@@ -14,29 +13,36 @@ CFLAGS += $(INCLUDES)
HDRS = $(wildcard *.h)
-TARGETS = xenperf
+TARGETS = xenperf xc_shadow
INSTALL_BIN = $(TARGETS) xencons
-INSTALL_SBIN = netfix xm xend xensv xenperf
+INSTALL_SBIN = netfix xm xend xenperf
all: build
build: $(TARGETS)
$(MAKE) -C miniterm
+ $(MAKE) -C cpuperf
+ $(MAKE) -C mbootpack
install: build
[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
[ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin
$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
$(INSTALL_PROG) $(INSTALL_SBIN) $(DESTDIR)/usr/sbin
+ $(MAKE) -C cpuperf install
# No sense in installing miniterm on the Xen box.
# $(MAKE) -C miniterm install
+# Likewise mbootpack
+# $(MAKE) -C mbootpack install
clean:
$(RM) *.o $(TARGETS) *~
$(MAKE) -C miniterm clean
+ $(MAKE) -C cpuperf clean
+ $(MAKE) -C mbootpack clean
%.o: %.c $(HDRS) Makefile
$(CC) -c $(CFLAGS) -o $@ $<
$(TARGETS): %: %.o Makefile
- $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil
+ $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
diff --git a/tools/misc/cpuperf/Makefile b/tools/misc/cpuperf/Makefile
new file mode 100644
index 0000000000..c33e7d9250
--- /dev/null
+++ b/tools/misc/cpuperf/Makefile
@@ -0,0 +1,51 @@
+#
+# Make Performance counter tool
+#
+# $Id: Makefile,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+#
+# $Log: Makefile,v $
+# Revision 1.1 2003/10/13 16:49:44 jrb44
+# Initial revision
+#
+#
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+# these are for Xen
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CC = gcc
+CFLAGS = -Wall -O3
+
+HDRS = $(wildcard *.h)
+SRCS = $(wildcard *.c)
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+
+TARGETS = cpuperf-xen cpuperf-perfcntr
+
+INSTALL_BIN = $(TARGETS)
+
+
+all: $(TARGETS)
+
+clean:
+ $(RM) *.o $(TARGETS)
+
+%: %.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -o $@ $<
+
+cpuperf-xen: cpuperf.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -DXENO -o $@ $<
+
+cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
+
+install: all
+ $(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
+
+
+# End of $RCSfile: Makefile,v $
+
diff --git a/tools/misc/cpuperf/README.txt b/tools/misc/cpuperf/README.txt
new file mode 100644
index 0000000000..96fdec1e28
--- /dev/null
+++ b/tools/misc/cpuperf/README.txt
@@ -0,0 +1,371 @@
+Usage
+=====
+
+Use either xen-cpuperf, cpuperf-perfcntr as appropriate to the system
+in use.
+
+To write:
+
+ cpuperf -E <escr> -C <cccr>
+
+ optional: all numbers in base 10 unless specified
+
+ -d Debug mode
+ -c <cpu> CPU number
+ -t <thread> ESCR thread bits - default is 12 (Thread 0 all rings)
+ bit 0: Thread 1 in rings 1,2,3
+ bit 1: Thread 1 in ring 0
+ bit 2: Thread 0 in rings 1,2,3
+ bit 3: Thread 0 in ring 0
+ -e <eventsel> Event selection number
+ -m <eventmask> Event mask bits
+ -T <value> ESCR tag value
+ -k Sets CCCR 'compare' bit
+ -n Sets CCCR 'complement' bit
+ -g Sets CCCR 'edge' bit
+ -P <bit> Set the specified bit in MSR_P4_PEBS_ENABLE
+ -V <bit> Set the specified bit in MSR_P4_PEBS_MATRIX_VERT
+ (-V and -P may be used multiple times to set multiple bits.)
+
+To read:
+
+ cpuperf -r
+
+ optional: all numbers in base 10 unless specified
+
+ -c <cpu> CPU number
+
+<cccr> values:
+
+ BPU_CCCR0
+ BPU_CCCR1
+ BPU_CCCR2
+ BPU_CCCR3
+ MS_CCCR0
+ MS_CCCR1
+ MS_CCCR2
+ MS_CCCR3
+ FLAME_CCCR0
+ FLAME_CCCR1
+ FLAME_CCCR2
+ FLAME_CCCR3
+ IQ_CCCR0
+ IQ_CCCR1
+ IQ_CCCR2
+ IQ_CCCR3
+ IQ_CCCR4
+ IQ_CCCR5
+ NONE - do not program any CCCR, used when setting up an ESCR for tagging
+
+<escr> values:
+
+ BSU_ESCR0
+ BSU_ESCR1
+ FSB_ESCR0
+ FSB_ESCR1
+ MOB_ESCR0
+ MOB_ESCR1
+ PMH_ESCR0
+ PMH_ESCR1
+ BPU_ESCR0
+ BPU_ESCR1
+ IS_ESCR0
+ IS_ESCR1
+ ITLB_ESCR0
+ ITLB_ESCR1
+ IX_ESCR0
+ IX_ESCR1
+ MS_ESCR0
+ MS_ESCR1
+ TBPU_ESCR0
+ TBPU_ESCR1
+ TC_ESCR0
+ TC_ESCR1
+ FIRM_ESCR0
+ FIRM_ESCR1
+ FLAME_ESCR0
+ FLAME_ESCR1
+ DAC_ESCR0
+ DAC_ESCR1
+ SAAT_ESCR0
+ SAAT_ESCR1
+ U2L_ESCR0
+ U2L_ESCR1
+ CRU_ESCR0
+ CRU_ESCR1
+ CRU_ESCR2
+ CRU_ESCR3
+ CRU_ESCR4
+ CRU_ESCR5
+ IQ_ESCR0
+ IQ_ESCR1
+ RAT_ESCR0
+ RAT_ESCR1
+ SSU_ESCR0
+ SSU_ESCR1
+ ALF_ESCR0
+ ALF_ESCR1
+
+
+Example configurations
+======================
+
+Note than in most cases there is a choice of ESCRs and CCCRs for
+each metric although not all combinations are allowed. Each ESCR and
+counter/CCCR can be used only once.
+
+Mispredicted branches retired
+=============================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 3 -m 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 3 -m 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 3 -m 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 3 -m 1
+
+Tracecache misses
+=================
+
+cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
+cpuperf -E BPU_ESCR0 -C BPU_CCCR1 -e 3 -m 1
+cpuperf -E BPU_ESCR1 -C BPU_CCCR2 -e 3 -m 1
+cpuperf -E BPU_ESCR1 -C BPU_CCCR3 -e 3 -m 1
+
+I-TLB
+=====
+
+cpuperf -E ITLB_ESCR0 -C BPU_CCCR0 -e 24
+cpuperf -E ITLB_ESCR0 -C BPU_CCCR1 -e 24
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR3 -e 24
+
+ -m <n> : bit 0 count HITS, bit 1 MISSES, bit 2 uncacheable hit
+
+ e.g. all ITLB misses -m 2
+
+Load replays
+============
+
+cpuperf -E MOB_ESCR0 -C BPU_CCCR0 -e 3
+cpuperf -E MOB_ESCR0 -C BPU_CCCR1 -e 3
+cpuperf -E MOB_ESCR1 -C BPU_CCCR2 -e 3
+cpuperf -E MOB_ESCR1 -C BPU_CCCR3 -e 3
+
+ -m <n> : bit mask, replay due to...
+ 1: unknown store address
+ 3: unknown store data
+ 4: partially overlapped data access between LD/ST
+ 5: unaligned address between LD/ST
+
+Page walks
+==========
+
+cpuperf -E PMH_ESCR0 -C BPU_CCCR0 -e 1
+cpuperf -E PMH_ESCR0 -C BPU_CCCR1 -e 1
+cpuperf -E PMH_ESCR1 -C BPU_CCCR2 -e 1
+cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1
+
+ -m <n> : bit 0 counts walks for a D-TLB miss, bit 1 for I-TLB miss
+
+L2/L3 cache accesses
+====================
+
+cpuperf -E BSU_ESCR0 -C BPU_CCCR0 -e 12
+cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12
+cpuperf -E BSU_ESCR1 -C BPU_CCCR2 -e 12
+cpuperf -E BSU_ESCR1 -C BPU_CCCR3 -e 12
+
+ -m <n> : where the bit mask is:
+ 0: Read L2 HITS Shared
+ 1: Read L2 HITS Exclusive
+ 2: Read L2 HITS Modified
+ 3: Read L3 HITS Shared
+ 4: Read L3 HITS Exclusive
+ 5: Read L3 HITS Modified
+ 8: Read L2 MISS
+ 9: Read L3 MISS
+ 10: Write L2 MISS
+
+Front side bus activity
+=======================
+
+cpuperf -E FSB_ESCR0 -C BPU_CCCR0 -e 23 -k -g
+cpuperf -E FSB_ESCR0 -C BPU_CCCR1 -e 23 -k -g
+cpuperf -E FSB_ESCR1 -C BPU_CCCR2 -e 23 -k -g
+cpuperf -E FSB_ESCR1 -C BPU_CCCR3 -e 23 -k -g
+
+ -m <n> : where the bit mask is for bus events:
+ 0: DRDY_DRV Processor drives bus
+ 1: DRDY_OWN Processor reads bus
+ 2: DRDY_OTHER Data on bus not being sampled by processor
+ 3: DBSY_DRV Processor reserves bus for driving
+ 4: DBSY_OWN Other entity reserves bus for sending to processor
+ 5: DBSY_OTHER Other entity reserves bus for sending elsewhere
+
+ e.g. -m 3 to get cycles bus actually in use.
+
+Pipeline clear (entire)
+=======================
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 2
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 2
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 2
+
+ -m <n> : bit mask:
+ 0: counts a portion of cycles while clear (use -g for edge trigger)
+ 1: counts each time machine clears for memory ordering issues
+ 2: counts each time machine clears for self modifying code
+
+Instructions retired
+====================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 2
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 2
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 2
+
+ -m <n> : bit mask:
+ 0: counts non-bogus, not tagged instructions
+ 1: counts non-bogus, tagged instructions
+ 2: counts bogus, not tagged instructions
+ 3: counts bogus, tagged instructions
+
+ e.g. -m 3 to count legit retirements
+
+Uops retired
+============
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 1
+cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 1
+cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 1
+
+ -m <n> : bit mask:
+ 0: Non-bogus
+ 1: Bogus
+
+x87 FP uops
+===========
+
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR1 -e 4 -m 32768
+cpuperf -E FIRM_ESCR1 -C FLAME_CCCR2 -e 4 -m 32768
+cpuperf -E FIRM_ESCR1 -C FLAME_CCCR3 -e 4 -m 32768
+
+Replay tagging mechanism
+========================
+
+Counts retirement of uops tagged with the replay tagging mechanism
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 9
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 9
+
+ -m <n> : bit mask:
+ 0: Non-bogus (set this bit for all events listed below)
+ 1: Bogus
+
+Set replay tagging mechanism bits with -P and -V:
+
+ L1 cache load miss retired: -P 0 -P 24 -P 25 -V 0
+ L2 cache load miss retired: -P 1 -P 24 -P 25 -V 0 (read manual)
+ DTLB load miss retired: -P 2 -P 24 -P 25 -V 0
+ DTLB store miss retired: -P 2 -P 24 -P 25 -V 1
+ DTLB all miss retired: -P 2 -P 24 -P 25 -V 0 -V 1
+
+e.g. to count all DTLB misses
+
+ cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 -m 1 P 2 -P 24 -P 25 -V 0 -V 1
+
+Front end event
+===============
+
+To count tagged uops:
+
+cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 8
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 8
+cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8
+cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 8
+
+ -m <n> : bit 0 for non-bogus uops, bit 1 for bogus uops
+
+Must have another ESCR programmed to tag uops as required
+
+cpuperf -E RAT_ESCR0 -C NONE -e 2
+cpuperf -E RAT_ESCR1 -C NONE -e 2
+
+ -m <n> : bit 1 for LOADs, bit 2 for STOREs
+
+An example set of counters
+===========================
+
+# instructions retired
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3
+
+# trace cache misses
+cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
+
+# L1 D cache misses (load misses retired)
+cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 -m 1 -P 0 -P 24 -P 25 -V 0
+
+# L2 misses (load and store)
+cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 -m 1280
+
+# I-TLB misses
+cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 -m 2
+
+# D-TLB misses (as PT walks)
+cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 -m 1
+
+# Other 'bonus' counters would be:
+# number of loads executed - need both command lines
+cpuperf -E RAT_ESCR0 -C NONE -e 2 -m 2
+cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 -m 3
+
+# number of mispredicted branches
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
+
+# x87 FP uOps
+cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
+
+The above has counter assignments
+
+0 Trace cache misses
+1 L2 Misses
+2 I-TLB misses
+3 D-TLB misses
+4
+5
+6
+7
+8 x87 FP uOps
+9
+10
+11
+12 Instructions retired
+13 L1 D cache misses
+14 Mispredicted branches
+15 Loads executed
+16
+17
+
+Counting instructions retired on each logical CPU
+=================================================
+
+cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 -t 12
+cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 -m 3 -t 3
+
+Cannot count mispred branches as well due to CRU_ESCR1 use.
diff --git a/tools/misc/cpuperf/cpuperf.c b/tools/misc/cpuperf/cpuperf.c
new file mode 100644
index 0000000000..093cde556d
--- /dev/null
+++ b/tools/misc/cpuperf/cpuperf.c
@@ -0,0 +1,301 @@
+/*
+ * User mode program to program performance counters.
+ *
+ * JRB/IAP October 2003.
+ *
+ * $Id: cpuperf.c,v 1.2 2003/10/14 11:00:59 jrb44 Exp $
+ *
+ * $Log: cpuperf.c,v $
+ * Revision 1.2 2003/10/14 11:00:59 jrb44
+ * Added dcefault CPU. Added NONE CCCR.
+ *
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#include <sys/types.h>
+#include <sched.h>
+#include <error.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "p4perf.h"
+
+static inline void cpus_wrmsr(int cpu_mask,
+ int msr,
+ unsigned int low,
+ unsigned int high )
+{
+ fprintf(stderr, "No backend to write MSR 0x%x <= 0x%08x%08x on %08x\n",
+ msr, high, low, cpu_mask);
+}
+
+static inline unsigned long long cpus_rdmsr( int cpu_mask, int msr )
+{
+ fprintf(stderr, "No backend to read MSR 0x%x on %08x\n", msr, cpu_mask);
+ return 0;
+}
+
+#ifdef PERFCNTR
+#include "cpuperf_perfcntr.h"
+#define cpus_wrmsr perfcntr_wrmsr
+#define cpus_rdmsr perfcntr_rdmsr
+#endif
+
+#ifdef XENO
+#include "cpuperf_xeno.h"
+#define cpus_wrmsr dom0_wrmsr
+#define cpus_rdmsr dom0_rdmsr
+#endif
+
+struct macros {
+ char *name;
+ unsigned long msr_addr;
+ int number;
+};
+
+#define NO_CCCR 0xfffffffe
+
+struct macros msr[] = {
+ {"BPU_COUNTER0", 0x300, 0},
+ {"BPU_COUNTER1", 0x301, 1},
+ {"BPU_COUNTER2", 0x302, 2},
+ {"BPU_COUNTER3", 0x303, 3},
+ {"MS_COUNTER0", 0x304, 4},
+ {"MS_COUNTER1", 0x305, 5},
+ {"MS_COUNTER2", 0x306, 6},
+ {"MS_COUNTER3", 0x307, 7},
+ {"FLAME_COUNTER0", 0x308, 8},
+ {"FLAME_COUNTER1", 0x309, 9},
+ {"FLAME_COUNTER2", 0x30a, 10},
+ {"FLAME_COUNTER3", 0x30b, 11},
+ {"IQ_COUNTER0", 0x30c, 12},
+ {"IQ_COUNTER1", 0x30d, 13},
+ {"IQ_COUNTER2", 0x30e, 14},
+ {"IQ_COUNTER3", 0x30f, 15},
+ {"IQ_COUNTER4", 0x310, 16},
+ {"IQ_COUNTER5", 0x311, 17},
+ {"BPU_CCCR0", 0x360, 0},
+ {"BPU_CCCR1", 0x361, 1},
+ {"BPU_CCCR2", 0x362, 2},
+ {"BPU_CCCR3", 0x363, 3},
+ {"MS_CCCR0", 0x364, 4},
+ {"MS_CCCR1", 0x365, 5},
+ {"MS_CCCR2", 0x366, 6},
+ {"MS_CCCR3", 0x367, 7},
+ {"FLAME_CCCR0", 0x368, 8},
+ {"FLAME_CCCR1", 0x369, 9},
+ {"FLAME_CCCR2", 0x36a, 10},
+ {"FLAME_CCCR3", 0x36b, 11},
+ {"IQ_CCCR0", 0x36c, 12},
+ {"IQ_CCCR1", 0x36d, 13},
+ {"IQ_CCCR2", 0x36e, 14},
+ {"IQ_CCCR3", 0x36f, 15},
+ {"IQ_CCCR4", 0x370, 16},
+ {"IQ_CCCR5", 0x371, 17},
+ {"BSU_ESCR0", 0x3a0, 7},
+ {"BSU_ESCR1", 0x3a1, 7},
+ {"FSB_ESCR0", 0x3a2, 6},
+ {"FSB_ESCR1", 0x3a3, 6},
+ {"MOB_ESCR0", 0x3aa, 2},
+ {"MOB_ESCR1", 0x3ab, 2},
+ {"PMH_ESCR0", 0x3ac, 4},
+ {"PMH_ESCR1", 0x3ad, 4},
+ {"BPU_ESCR0", 0x3b2, 0},
+ {"BPU_ESCR1", 0x3b3, 0},
+ {"IS_ESCR0", 0x3b4, 1},
+ {"IS_ESCR1", 0x3b5, 1},
+ {"ITLB_ESCR0", 0x3b6, 3},
+ {"ITLB_ESCR1", 0x3b7, 3},
+ {"IX_ESCR0", 0x3c8, 5},
+ {"IX_ESCR1", 0x3c9, 5},
+ {"MS_ESCR0", 0x3c0, 0},
+ {"MS_ESCR1", 0x3c1, 0},
+ {"TBPU_ESCR0", 0x3c2, 2},
+ {"TBPU_ESCR1", 0x3c3, 2},
+ {"TC_ESCR0", 0x3c4, 1},
+ {"TC_ESCR1", 0x3c5, 1},
+ {"FIRM_ESCR0", 0x3a4, 1},
+ {"FIRM_ESCR1", 0x3a5, 1},
+ {"FLAME_ESCR0", 0x3a6, 0},
+ {"FLAME_ESCR1", 0x3a7, 0},
+ {"DAC_ESCR0", 0x3a8, 5},
+ {"DAC_ESCR1", 0x3a9, 5},
+ {"SAAT_ESCR0", 0x3ae, 2},
+ {"SAAT_ESCR1", 0x3af, 2},
+ {"U2L_ESCR0", 0x3b0, 3},
+ {"U2L_ESCR1", 0x3b1, 3},
+ {"CRU_ESCR0", 0x3b8, 4},
+ {"CRU_ESCR1", 0x3b9, 4},
+ {"CRU_ESCR2", 0x3cc, 5},
+ {"CRU_ESCR3", 0x3cd, 5},
+ {"CRU_ESCR4", 0x3e0, 6},
+ {"CRU_ESCR5", 0x3e1, 6},
+ {"IQ_ESCR0", 0x3ba, 0},
+ {"IQ_ESCR1", 0x3bb, 0},
+ {"RAT_ESCR0", 0x3bc, 2},
+ {"RAT_ESCR1", 0x3bd, 2},
+ {"SSU_ESCR0", 0x3be, 3},
+ {"SSU_ESCR1", 0x3bf, 3},
+ {"ALF_ESCR0", 0x3ca, 1},
+ {"ALF_ESCR1", 0x3cb, 1},
+ {"PEBS_ENABLE", 0x3f1, 0},
+ {"PEBS_MATRIX_VERT", 0x3f2, 0},
+ {"NONE", NO_CCCR, 0},
+ {NULL, 0, 0}
+};
+
+struct macros *lookup_macro(char *str)
+{
+ struct macros *m;
+
+ m = msr;
+ while (m->name) {
+ if (strcmp(m->name, str) == 0)
+ return m;
+ m++;
+ }
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
+ unsigned int cpu_mask = 1;
+ struct macros *escr = NULL, *cccr = NULL;
+ unsigned long escr_val, cccr_val;
+ int debug = 0;
+ unsigned long pebs = 0, pebs_vert = 0;
+ int pebs_x = 0, pebs_vert_x = 0;
+ int read = 0;
+ int compare = 0;
+ int complement = 0;
+ int edge = 0;
+
+#ifdef XENO
+ xen_init();
+#endif
+
+
+ while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:rkng")) != -1) {
+ switch((char)c) {
+ case 'P':
+ pebs |= 1 << atoi(optarg);
+ pebs_x = 1;
+ break;
+ case 'V':
+ pebs_vert |= 1 << atoi(optarg);
+ pebs_vert_x = 1;
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 'c':
+ {
+ int cpu = atoi(optarg);
+ cpu_mask = (cpu == -1)?(~0):(1<<cpu);
+ }
+ break;
+ case 't': // ESCR thread bits
+ t = atoi(optarg);
+ break;
+ case 'e': // eventsel
+ es = atoi(optarg);
+ break;
+ case 'm': // eventmask
+ em = atoi(optarg);
+ break;
+ case 'T': // tag value
+ tv = atoi(optarg);
+ te = 1;
+ break;
+ case 'E':
+ escr = lookup_macro(optarg);
+ if (!escr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'C':
+ cccr = lookup_macro(optarg);
+ if (!cccr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'r':
+ read = 1;
+ break;
+ case 'k':
+ compare = 1;
+ break;
+ case 'n':
+ complement = 1;
+ break;
+ case 'g':
+ edge = 1;
+ break;
+ }
+ }
+
+ if (read) {
+ while((cpu_mask&1)) {
+ int i;
+ for (i=0x300;i<0x312;i++) {
+ printf("%010llu ",cpus_rdmsr( cpu_mask, i ) );
+ }
+ printf("\n");
+ cpu_mask>>=1;
+ }
+ exit(1);
+ }
+
+ if (!escr) {
+ fprintf(stderr, "Need an ESCR.\n");
+ exit(1);
+ }
+ if (!cccr) {
+ fprintf(stderr, "Need a counter number.\n");
+ exit(1);
+ }
+
+ escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
+ P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
+ cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
+ ((compare)?P4_CCCR_COMPARE:0) |
+ ((complement)?P4_CCCR_COMPLEMENT:0) |
+ ((edge)?P4_CCCR_EDGE:0) |
+ P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
+
+ if (debug) {
+ fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
+ if (cccr->msr_addr != NO_CCCR)
+ fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
+ cccr->msr_addr, cccr_val, cccr->number);
+ if (pebs_x)
+ fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_ENABLE, pebs);
+ if (pebs_vert_x)
+ fprintf(stderr, "PMV 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
+ }
+
+ cpus_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
+ if (cccr->msr_addr != NO_CCCR)
+ cpus_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
+
+ if (pebs_x)
+ cpus_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
+
+ if (pebs_vert_x)
+ cpus_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
+
+ return 0;
+}
+
+// End of $RCSfile: cpuperf.c,v $
+
diff --git a/tools/misc/cpuperf/cpuperf_perfcntr.h b/tools/misc/cpuperf/cpuperf_perfcntr.h
new file mode 100644
index 0000000000..a75a4fc0f7
--- /dev/null
+++ b/tools/misc/cpuperf/cpuperf_perfcntr.h
@@ -0,0 +1,41 @@
+/*
+ * Interface to JRB44's /proc/perfcntr interface.
+ *
+ * $Id: cpuperf_perfcntr.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+ *
+ * $Log: cpuperf_perfcntr.h,v $
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#define PROC_PERFCNTR "/proc/perfcntr"
+
+static inline void perfcntr_wrmsr(int cpu_mask,
+ int msr,
+ unsigned int low,
+ unsigned int high )
+{
+ FILE *fd;
+ unsigned long long value = low | (((unsigned long long)high) << 32);
+
+ fd = fopen(PROC_PERFCNTR, "w");
+ if (fd == NULL)
+ {
+ perror("open " PROC_PERFCNTR);
+ exit(1);
+ }
+
+ fprintf(fd, "%x %x %llx \n", cpu_mask, msr, value);
+ fprintf(stderr, "%x %x %llx \n", cpu_mask, msr, value);
+ fclose(fd);
+}
+
+static inline unsigned long long perfcntr_rdmsr( int cpu_mask, int msr )
+{
+ fprintf(stderr, "WARNING: rdmsr not yet implemented for perfcntr.\n");
+ return 0;
+}
+
+// End of $RCSfile: cpuperf_perfcntr.h,v $
+
diff --git a/tools/misc/cpuperf/cpuperf_xeno.h b/tools/misc/cpuperf/cpuperf_xeno.h
new file mode 100644
index 0000000000..4f7da770e1
--- /dev/null
+++ b/tools/misc/cpuperf/cpuperf_xeno.h
@@ -0,0 +1,38 @@
+/*
+ * Interface to Xen MSR hypercalls.
+ *
+ * $Id: cpuperf_xeno.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
+ *
+ * $Log: cpuperf_xeno.h,v $
+ * Revision 1.1 2003/10/13 16:49:44 jrb44
+ * Initial revision
+ *
+ */
+
+#include <xc.h>
+
+static int xc_handle;
+
+void xen_init()
+{
+ if ( (xc_handle = xc_interface_open()) == -1 )
+ {
+ fprintf(stderr, "Error opening xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ exit(-1);
+ }
+
+}
+
+void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
+{
+ xc_msr_write (xc_handle, cpu_mask, msr, low, high);
+}
+
+unsigned long long dom0_rdmsr( int cpu_mask, int msr )
+{
+ return xc_msr_read(xc_handle, cpu_mask, msr);
+}
+
+// End of $RCSfile: cpuperf_xeno.h,v $
+
diff --git a/tools/misc/cpuperf/module/Makefile b/tools/misc/cpuperf/module/Makefile
new file mode 100644
index 0000000000..0a1c976ef4
--- /dev/null
+++ b/tools/misc/cpuperf/module/Makefile
@@ -0,0 +1,16 @@
+#############################################################################
+# (C) 2005 - Rolf Neugebauer - Intel Research Cambridge
+#############################################################################
+#
+# File: Makefile
+# Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
+# Date: Mar 2005
+#
+# Environment:
+#
+
+# invoke:
+# make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules_install
+
+obj-m := perfcntr.o
+
diff --git a/tools/misc/cpuperf/module/perfcntr.c b/tools/misc/cpuperf/module/perfcntr.c
new file mode 100644
index 0000000000..6a8f48c938
--- /dev/null
+++ b/tools/misc/cpuperf/module/perfcntr.c
@@ -0,0 +1,730 @@
+/*
+ * Linux loadable kernel module to use P4 performance counters.
+ *
+ * James Bulpin, Feb 2003.
+ *
+ * $Id$
+ *
+ * $Log$
+ */
+
+#define DRV_NAME "perfcntr"
+#define DRV_VERSION "0.2"
+#define DRV_RELDATE "02 Jun 2004"
+
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define NOHT
+
+#include "../p4perf.h"
+
+#ifdef NOHT
+# define CPUMASK 0x00000003
+#else
+# define CPUMASK 0x00000005
+#endif
+
+/*****************************************************************************
+ * Module admin *
+ *****************************************************************************/
+
+MODULE_AUTHOR("James Bulpin <James.Bulpin@cl.cam.ac.uk>");
+MODULE_DESCRIPTION("P4 Performance Counters access "
+ DRV_VERSION " " DRV_RELDATE);
+MODULE_LICENSE("GPL");
+
+static char version[] __devinitdata =
+DRV_NAME ": James Bulpin.\n";
+
+static unsigned char foobar[4];
+
+/* rpcc: get full 64-bit Pentium TSC value
+ */
+static __inline__ unsigned long long int rpcc(void)
+{
+ unsigned int __h, __l;
+ __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
+ return (((unsigned long long)__h) << 32) + __l;
+}
+
+/*****************************************************************************
+ * Display the counters *
+ *****************************************************************************/
+
+//#define processor cpu // post 2.4.16
+
+typedef union {
+ struct {
+ unsigned long lo;
+ unsigned long hi;
+ };
+ unsigned long long cnt;
+} cpu_perfcntr_t;
+
+typedef struct counters_t_struct {
+ int processor;
+ unsigned long long tsc;
+ cpu_perfcntr_t counters[18];
+} counters_t;
+
+typedef struct perfcntr_t_struct {
+ unsigned long cpu_mask;
+ counters_t cpus[4]; // Actually for each cpu in system
+} perfcntr_t;
+
+#ifdef HUMAN_READABLE
+# define SHOW_COUNTER(c) rdmsr (c, l, h);\
+ seq_printf(m, "0x%03x: 0x%08x%08x\n", c, h, l)
+#else
+# define SHOW_COUNTER(c) rdmsr (c, l, h);\
+ seq_printf(m, " %llu", \
+ (unsigned long long)h << 32 | (unsigned long long)l)
+#endif
+
+#if 0
+static unsigned long last_l = 0, last_h = 0, last_msr = 0;
+static int last_cpu = 0;
+#endif
+
+#define READ_COUNTER(_i, _msr) rdmsr((_msr), l, h); c->counters[_i].lo = l; \
+ c->counters[_i].hi = h;
+
+static perfcntr_t perfcntrs;
+
+static void show_perfcntr_for(void *v)
+{
+ unsigned int l, h;
+
+ perfcntr_t *p = &perfcntrs;
+ counters_t *c;
+
+ if (!((1 << smp_processor_id()) & p->cpu_mask))
+ return;
+
+ c = &p->cpus[smp_processor_id()];
+
+ c->processor = smp_processor_id();
+ c->tsc = rpcc();
+
+ READ_COUNTER(0, MSR_P4_BPU_COUNTER0);
+ READ_COUNTER(1, MSR_P4_BPU_COUNTER1);
+ READ_COUNTER(2, MSR_P4_BPU_COUNTER2);
+ READ_COUNTER(3, MSR_P4_BPU_COUNTER3);
+
+ READ_COUNTER(4, MSR_P4_MS_COUNTER0);
+ READ_COUNTER(5, MSR_P4_MS_COUNTER1);
+ READ_COUNTER(6, MSR_P4_MS_COUNTER2);
+ READ_COUNTER(7, MSR_P4_MS_COUNTER3);
+
+ READ_COUNTER(8, MSR_P4_FLAME_COUNTER0);
+ READ_COUNTER(9, MSR_P4_FLAME_COUNTER1);
+ READ_COUNTER(10, MSR_P4_FLAME_COUNTER2);
+ READ_COUNTER(11, MSR_P4_FLAME_COUNTER3);
+
+ READ_COUNTER(12, MSR_P4_IQ_COUNTER0);
+ READ_COUNTER(13, MSR_P4_IQ_COUNTER1);
+ READ_COUNTER(14, MSR_P4_IQ_COUNTER2);
+ READ_COUNTER(15, MSR_P4_IQ_COUNTER3);
+ READ_COUNTER(16, MSR_P4_IQ_COUNTER4);
+ READ_COUNTER(17, MSR_P4_IQ_COUNTER5);
+
+ return;
+}
+
+static int show_perfcntr(struct seq_file *m, void *v)
+{
+ int i, j;
+
+ // Get each physical cpu to read counters
+ perfcntrs.cpu_mask = CPUMASK;
+
+ smp_call_function(show_perfcntr_for, NULL, 1, 1);
+ show_perfcntr_for(NULL);
+
+ for (i = 0; i < 32; i++) {
+ if (((1 << i) & (perfcntrs.cpu_mask = CPUMASK))) {
+ counters_t *c = &perfcntrs.cpus[i];
+ seq_printf(m, "%u %llu", c->processor, c->tsc);
+ for (j = 0; j < 18; j++) {
+ seq_printf(m, " %llu", c->counters[j].cnt);
+ }
+ seq_printf(m, "\n");
+ }
+ }
+
+#if 0
+ unsigned long long t;
+ unsigned int l, h;
+
+ t = rpcc();
+
+
+
+#ifdef HUMAN_READABLE
+ seq_printf(m,
+ "show_perfcntr\nprocessor: %u\ntime: %llu\n"
+ "last write: 0x%08lx%08lx -> 0x%lx (CPU%u)\n",
+ smp_processor_id(),
+ t,
+ last_h,
+ last_l,
+ last_msr,
+ last_cpu);
+#else
+ seq_printf(m, "%u %llu", smp_processor_id(), t);
+#endif
+
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER0);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER1);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER2);
+ SHOW_COUNTER(MSR_P4_BPU_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_MS_COUNTER0);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER1);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER2);
+ SHOW_COUNTER(MSR_P4_MS_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER0);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER1);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER2);
+ SHOW_COUNTER(MSR_P4_FLAME_COUNTER3);
+
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER0);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER1);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER2);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER3);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER4);
+ SHOW_COUNTER(MSR_P4_IQ_COUNTER5);
+
+#ifndef HUMAN_READBLE
+ seq_printf(m, "\n");
+#endif
+
+#endif
+
+ return 0;
+}
+
+/*****************************************************************************
+ * Show counter configuration *
+ *****************************************************************************/
+
+typedef union {
+ struct {
+ unsigned long lo;
+ unsigned long hi;
+ };
+ unsigned long long cnt;
+} cpu_perfcfg_t;
+
+typedef struct configs_t_struct {
+ int processor;
+ unsigned long long tsc;
+ cpu_perfcfg_t cccr[18];
+ cpu_perfcfg_t escr[0x42];
+} configs_t;
+
+typedef struct perfcfg_t_struct {
+ unsigned long cpu_mask;
+ configs_t cpus[4]; // Actually for each cpu in system
+} perfcfg_t;
+
+static perfcfg_t perfcfgs;
+
+#define READ_CCCR(_i, _msr) rdmsr((_msr), l, h); c->cccr[_i].lo = l; \
+ c->cccr[_i].hi = h;
+#define READ_ESCR(_i, _msr) rdmsr((_msr), l, h); c->escr[_i].lo = l; \
+ c->escr[_i].hi = h;
+
+static void show_perfcfg_for(void *v)
+{
+ unsigned int l, h;
+
+ perfcfg_t *p = &perfcfgs;
+ configs_t *c;
+
+ if (!((1 << smp_processor_id()) & p->cpu_mask))
+ return;
+
+ c = &p->cpus[smp_processor_id()];
+
+ c->processor = smp_processor_id();
+ c->tsc = rpcc();
+
+ READ_CCCR(0, MSR_P4_BPU_CCCR0);
+ READ_CCCR(1, MSR_P4_BPU_CCCR1);
+ READ_CCCR(2, MSR_P4_BPU_CCCR2);
+ READ_CCCR(3, MSR_P4_BPU_CCCR3);
+
+ READ_CCCR(4, MSR_P4_MS_CCCR0);
+ READ_CCCR(5, MSR_P4_MS_CCCR1);
+ READ_CCCR(6, MSR_P4_MS_CCCR2);
+ READ_CCCR(7, MSR_P4_MS_CCCR3);
+
+ READ_CCCR(8, MSR_P4_FLAME_CCCR0);
+ READ_CCCR(9, MSR_P4_FLAME_CCCR1);
+ READ_CCCR(10, MSR_P4_FLAME_CCCR2);
+ READ_CCCR(11, MSR_P4_FLAME_CCCR3);
+
+ READ_CCCR(12, MSR_P4_IQ_CCCR0);
+ READ_CCCR(13, MSR_P4_IQ_CCCR1);
+ READ_CCCR(14, MSR_P4_IQ_CCCR2);
+ READ_CCCR(15, MSR_P4_IQ_CCCR3);
+ READ_CCCR(16, MSR_P4_IQ_CCCR4);
+ READ_CCCR(17, MSR_P4_IQ_CCCR5);
+
+ READ_ESCR(0x00, MSR_P4_BSU_ESCR0);
+ READ_ESCR(0x02, MSR_P4_FSB_ESCR0);
+ READ_ESCR(0x0a, MSR_P4_MOB_ESCR0);
+ READ_ESCR(0x0c, MSR_P4_PMH_ESCR0);
+ READ_ESCR(0x12, MSR_P4_BPU_ESCR0);
+ READ_ESCR(0x14, MSR_P4_IS_ESCR0);
+ READ_ESCR(0x16, MSR_P4_ITLB_ESCR0);
+ READ_ESCR(0x28, MSR_P4_IX_ESCR0);
+ READ_ESCR(0x01, MSR_P4_BSU_ESCR1);
+ READ_ESCR(0x03, MSR_P4_FSB_ESCR1);
+ READ_ESCR(0x0b, MSR_P4_MOB_ESCR1);
+ READ_ESCR(0x0d, MSR_P4_PMH_ESCR1);
+ READ_ESCR(0x13, MSR_P4_BPU_ESCR1);
+ READ_ESCR(0x15, MSR_P4_IS_ESCR1);
+ READ_ESCR(0x17, MSR_P4_ITLB_ESCR1);
+ READ_ESCR(0x29, MSR_P4_IX_ESCR1);
+ READ_ESCR(0x20, MSR_P4_MS_ESCR0);
+ READ_ESCR(0x22, MSR_P4_TBPU_ESCR0);
+ READ_ESCR(0x24, MSR_P4_TC_ESCR0);
+ READ_ESCR(0x21, MSR_P4_MS_ESCR1);
+ READ_ESCR(0x23, MSR_P4_TBPU_ESCR1);
+ READ_ESCR(0x25, MSR_P4_TC_ESCR1);
+ READ_ESCR(0x04, MSR_P4_FIRM_ESCR0);
+ READ_ESCR(0x06, MSR_P4_FLAME_ESCR0);
+ READ_ESCR(0x08, MSR_P4_DAC_ESCR0);
+ READ_ESCR(0x0e, MSR_P4_SAAT_ESCR0);
+ READ_ESCR(0x10, MSR_P4_U2L_ESCR0);
+ READ_ESCR(0x05, MSR_P4_FIRM_ESCR1);
+ READ_ESCR(0x07, MSR_P4_FLAME_ESCR1);
+ READ_ESCR(0x09, MSR_P4_DAC_ESCR1);
+ READ_ESCR(0x0f, MSR_P4_SAAT_ESCR1);
+ READ_ESCR(0x11, MSR_P4_U2L_ESCR1);
+ READ_ESCR(0x18, MSR_P4_CRU_ESCR0);
+ READ_ESCR(0x2c, MSR_P4_CRU_ESCR2);
+ READ_ESCR(0x40, MSR_P4_CRU_ESCR4);
+ READ_ESCR(0x1a, MSR_P4_IQ_ESCR0);
+ READ_ESCR(0x1c, MSR_P4_RAT_ESCR0);
+ READ_ESCR(0x1e, MSR_P4_SSU_ESCR0);
+ READ_ESCR(0x2a, MSR_P4_ALF_ESCR0);
+ READ_ESCR(0x19, MSR_P4_CRU_ESCR1);
+ READ_ESCR(0x2d, MSR_P4_CRU_ESCR3);
+ READ_ESCR(0x41, MSR_P4_CRU_ESCR5);
+ READ_ESCR(0x1b, MSR_P4_IQ_ESCR1);
+ READ_ESCR(0x1d, MSR_P4_RAT_ESCR1);
+ READ_ESCR(0x2b, MSR_P4_ALF_ESCR1);
+
+ return;
+}
+
+static char *escr_names[] = {
+ "BSU_ESCR0",
+ "BSU_ESCR1",
+ "FSB_ESCR0",
+ "FSB_ESCR1",
+ "FIRM_ESCR0",
+ "FIRM_ESCR1",
+ "FLAME_ESCR0",
+ "FLAME_ESCR1",
+ "DAC_ESCR0",
+ "DAC_ESCR1",
+ "MOB_ESCR0",
+ "MOB_ESCR1",
+ "PMH_ESCR0",
+ "PMH_ESCR1",
+ "SAAT_ESCR0",
+ "SAAT_ESCR1",
+ "U2L_ESCR0",
+ "U2L_ESCR1",
+ "BPU_ESCR0",
+ "BPU_ESCR1",
+ "IS_ESCR0",
+ "IS_ESCR1",
+ "ITLB_ESCR0",
+ "ITLB_ESCR1",
+ "CRU_ESCR0",
+ "CRU_ESCR1",
+ "IQ_ESCR0",
+ "IQ_ESCR1",
+ "RAT_ESCR0",
+ "RAT_ESCR1",
+ "SSU_ESCR0",
+ "SSU_ESCR1",
+ "MS_ESCR0",
+ "MS_ESCR1",
+ "TBPU_ESCR0",
+ "TBPU_ESCR1",
+ "TC_ESCR0",
+ "TC_ESCR1",
+ "0x3c6",
+ "0x3c7",
+ "IX_ESCR0",
+ "IX_ESCR1",
+ "ALF_ESCR0",
+ "ALF_ESCR1",
+ "CRU_ESCR2",
+ "CRU_ESCR3",
+ "0x3ce",
+ "0x3cf",
+ "0x3d0",
+ "0x3d1",
+ "0x3d2",
+ "0x3d3",
+ "0x3d4",
+ "0x3d5",
+ "0x3d6",
+ "0x3d7",
+ "0x3d8",
+ "0x3d9",
+ "0x3da",
+ "0x3db",
+ "0x3dc",
+ "0x3dd",
+ "0x3de",
+ "0x3df",
+ "CRU_ESCR4",
+ "CRU_ESCR5"
+};
+
+static unsigned long escr_map_0[] =
+{MSR_P4_BPU_ESCR0, MSR_P4_IS_ESCR0,
+ MSR_P4_MOB_ESCR0, MSR_P4_ITLB_ESCR0,
+ MSR_P4_PMH_ESCR0, MSR_P4_IX_ESCR0,
+ MSR_P4_FSB_ESCR0, MSR_P4_BSU_ESCR0}; //BPU even
+static unsigned long escr_map_1[] =
+ {MSR_P4_BPU_ESCR1, MSR_P4_IS_ESCR1,
+ MSR_P4_MOB_ESCR1, MSR_P4_ITLB_ESCR1,
+ MSR_P4_PMH_ESCR1, MSR_P4_IX_ESCR1,
+ MSR_P4_FSB_ESCR1, MSR_P4_BSU_ESCR1}; //BPU odd
+static unsigned long escr_map_2[] =
+ {MSR_P4_MS_ESCR0, MSR_P4_TC_ESCR0, MSR_P4_TBPU_ESCR0,
+ 0, 0, 0, 0, 0}; //MS even
+static unsigned long escr_map_3[] =
+ {MSR_P4_MS_ESCR1, MSR_P4_TC_ESCR1, MSR_P4_TBPU_ESCR1,
+ 0, 0, 0, 0, 0}; //MS odd
+static unsigned long escr_map_4[] =
+ {MSR_P4_FLAME_ESCR0, MSR_P4_FIRM_ESCR0, MSR_P4_SAAT_ESCR0,
+ MSR_P4_U2L_ESCR0, 0, MSR_P4_DAC_ESCR0, 0, 0}; //FLAME even
+static unsigned long escr_map_5[] =
+ {MSR_P4_FLAME_ESCR1, MSR_P4_FIRM_ESCR1, MSR_P4_SAAT_ESCR1,
+ MSR_P4_U2L_ESCR1, 0, MSR_P4_DAC_ESCR1, 0, 0}; //FLAME odd
+static unsigned long escr_map_6[] =
+ {MSR_P4_IQ_ESCR0, MSR_P4_ALF_ESCR0,
+ MSR_P4_RAT_ESCR0, MSR_P4_SSU_ESCR0,
+ MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR4, 0}; //IQ even
+static unsigned long escr_map_7[] =
+ {MSR_P4_IQ_ESCR1, MSR_P4_ALF_ESCR1,
+ MSR_P4_RAT_ESCR1, 0,
+ MSR_P4_CRU_ESCR1, MSR_P4_CRU_ESCR3, MSR_P4_CRU_ESCR5, 0}; //IQ odd
+
+static unsigned long *escr_map[] = {
+ escr_map_0,
+ escr_map_1,
+ escr_map_2,
+ escr_map_3,
+ escr_map_4,
+ escr_map_5,
+ escr_map_6,
+ escr_map_7,
+};
+
+unsigned long get_escr_msr(int c, int e)
+{
+ int index = -1;
+
+ // Get the ESCR MSR address from the counter number and the ESCR number.
+ switch (c) {
+ case P4_BPU_COUNTER0_NUMBER:
+ case P4_BPU_COUNTER1_NUMBER:
+ index = 0;
+ break;
+ case P4_BPU_COUNTER2_NUMBER:
+ case P4_BPU_COUNTER3_NUMBER:
+ index = 1;
+ break;
+ case P4_MS_COUNTER0_NUMBER:
+ case P4_MS_COUNTER1_NUMBER:
+ index = 2; // probably !
+ break;
+ case P4_MS_COUNTER2_NUMBER:
+ case P4_MS_COUNTER3_NUMBER:
+ index = 3; // probably !
+ break;
+ case P4_FLAME_COUNTER0_NUMBER:
+ case P4_FLAME_COUNTER1_NUMBER:
+ index = 4; // probably !
+ break;
+ case P4_FLAME_COUNTER2_NUMBER:
+ case P4_FLAME_COUNTER3_NUMBER:
+ index = 5; // probably !
+ break;
+ case P4_IQ_COUNTER0_NUMBER:
+ case P4_IQ_COUNTER1_NUMBER:
+ case P4_IQ_COUNTER4_NUMBER:
+ index = 6;
+ break;
+ case P4_IQ_COUNTER2_NUMBER:
+ case P4_IQ_COUNTER3_NUMBER:
+ case P4_IQ_COUNTER5_NUMBER:
+ index = 7;
+ break;
+ }
+
+ if (index != -1) {
+ return escr_map[index][e];
+ }
+
+ return 0;
+}
+
+static char null_string[] = "";
+static char *get_escr(int c, int e)
+{
+ unsigned long msr = get_escr_msr(c, e);
+
+ if ((msr >= 0x3a0) && (msr <= 0x3e1))
+ return escr_names[(int)(msr - 0x3a0)];
+ return null_string;
+}
+
+static int show_perfcfg(struct seq_file *m, void *v)
+{
+ int i, j;
+
+ // Get each physical cpu to read configs
+ perfcfgs.cpu_mask = CPUMASK;
+
+ smp_call_function(show_perfcfg_for, NULL, 1, 1);
+ show_perfcfg_for(NULL);
+
+ for (i = 0; i < 32; i++) {
+ if (((1 << i) & (perfcfgs.cpu_mask = CPUMASK))) {
+ configs_t *c = &perfcfgs.cpus[i];
+ seq_printf(m, "----------------------------------------\n");
+ seq_printf(m, "%u %llu\n", c->processor, c->tsc);
+ for (j = 0; j < 18; j++) {
+ seq_printf(m, "%08lx", c->cccr[j].lo);
+
+ if (!(c->cccr[j].lo & P4_CCCR_ENABLE))
+ seq_printf(m, " DISABLED");
+ else {
+ unsigned long escr_msr =
+ get_escr_msr(i, (int)((c->cccr[j].lo >> 13)&7));
+ seq_printf(m, " ESCR=%s",
+ get_escr(i, (int)((c->cccr[j].lo >> 13)&7)));
+ if ((escr_msr >= 0x3a0) && (escr_msr <= 0x3e1)) {
+ unsigned long e = c->escr[(int)(escr_msr - 0x3a0)].lo;
+ seq_printf(m, "(%08lx es=%lx mask=%lx", e,
+ (e >> 25) & 0x7f,
+ (e >> 9) & 0xffff);
+ if ((e & P4_ESCR_T0_USR))
+ seq_printf(m, " T(0)USR");
+ if ((e & P4_ESCR_T0_OS))
+ seq_printf(m, " T(0)OS");
+ if ((e & P4_ESCR_T1_USR))
+ seq_printf(m, " T1USR");
+ if ((e & P4_ESCR_T1_OS))
+ seq_printf(m, " T1OS");
+ seq_printf(m, ")");
+ }
+ seq_printf(m, " AT=%u", (int)((c->cccr[j].lo >> 16)&3));
+
+ if ((c->cccr[j].lo & P4_CCCR_OVF))
+ seq_printf(m, " OVF");
+ if ((c->cccr[j].lo & P4_CCCR_CASCADE))
+ seq_printf(m, " CASC");
+ if ((c->cccr[j].lo & P4_CCCR_FORCE_OVF))
+ seq_printf(m, " F-OVF");
+ if ((c->cccr[j].lo & P4_CCCR_EDGE))
+ seq_printf(m, " EDGE");
+ if ((c->cccr[j].lo & P4_CCCR_COMPLEMENT))
+ seq_printf(m, " COMPL");
+ if ((c->cccr[j].lo & P4_CCCR_COMPARE))
+ seq_printf(m, " CMP");
+ if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T0))
+ seq_printf(m, " OVF_PMI(_T0)");
+ if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T1))
+ seq_printf(m, " OVF_PMI_T1");
+ }
+ seq_printf(m, "\n");
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ * Handle writes *
+ *****************************************************************************/
+
+static int set_msr_cpu_mask;
+static unsigned long set_msr_addr;
+static unsigned long set_msr_lo;
+static unsigned long set_msr_hi;
+
+static void perfcntr_write_for(void *unused)
+{
+#ifdef NOHT
+ if (((1 << smp_processor_id()) & set_msr_cpu_mask)) {
+#endif
+ //printk("perfcntr: wrmsr(%08lx, %08lx, %08lx)\n",
+ // set_msr_addr, set_msr_lo, set_msr_hi);
+ wrmsr(set_msr_addr, set_msr_lo, set_msr_hi);
+#ifdef NOHT
+ }
+#endif
+}
+
+ssize_t perfcntr_write(struct file *f,
+ const char *data,
+ size_t size,
+ loff_t *pos)
+{
+ char *endp;
+ ssize_t ret = 0;
+ //unsigned long l, h, msr;
+ unsigned long long v;
+
+ set_msr_cpu_mask = (int)simple_strtoul(data, &endp, 16);
+ endp++; // skip past space
+ if ((endp - data) >= size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_msr_addr = simple_strtoul(endp, &endp, 16);
+ endp++; // skip past space
+ if ((endp - data) >= size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ v = simple_strtoul(endp, &endp, 16);
+ set_msr_lo = (unsigned long)(v & 0xffffffffULL);
+ set_msr_hi = (unsigned long)(v >> 32);
+
+ smp_call_function(perfcntr_write_for, NULL, 1, 1);
+ perfcntr_write_for(NULL);
+
+#if 0
+ wrmsr(msr, l, h);
+ last_l = l;
+ last_h = h;
+ last_msr = msr;
+ last_cpu = smp_processor_id();
+#endif
+ ret = size;
+
+ out:
+ return ret;
+}
+
+/*****************************************************************************
+ * /proc stuff *
+ *****************************************************************************/
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ //return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+ return *pos == 0 ? foobar : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations perfcntr_op = {
+ start: c_start,
+ next: c_next,
+ stop: c_stop,
+ show: show_perfcntr,
+};
+
+struct seq_operations perfcfg_op = {
+ start: c_start,
+ next: c_next,
+ stop: c_stop,
+ show: show_perfcfg,
+};
+
+static int perfcntr_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &perfcntr_op);
+}
+
+static int perfcfg_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &perfcfg_op);
+}
+
+static struct file_operations proc_perfcntr_operations = {
+ open: perfcntr_open,
+ read: seq_read,
+ write: perfcntr_write,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
+static struct file_operations proc_perfcfg_operations = {
+ open: perfcfg_open,
+ read: seq_read,
+ write: perfcntr_write,
+ llseek: seq_lseek,
+ release: seq_release,
+};
+
+static void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
+{
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry(name, mode, NULL);
+ if (entry)
+ entry->proc_fops = f;
+}
+
+/*****************************************************************************
+ * Module init and cleanup *
+ *****************************************************************************/
+
+static int __init perfcntr_init(void)
+{
+ printk(version);
+
+ create_seq_entry("perfcntr", 0777, &proc_perfcntr_operations);
+ create_seq_entry("perfcntr_config", 0777, &proc_perfcfg_operations);
+
+ return 0;
+}
+
+static void __exit perfcntr_exit(void)
+{
+ remove_proc_entry("perfcntr", NULL);
+ remove_proc_entry("perfcntr_config", NULL);
+}
+
+module_init(perfcntr_init);
+module_exit(perfcntr_exit);
+
+/* End of $RCSfile$ */
diff --git a/tools/misc/cpuperf/p4perf.h b/tools/misc/cpuperf/p4perf.h
new file mode 100644
index 0000000000..04eef39b3c
--- /dev/null
+++ b/tools/misc/cpuperf/p4perf.h
@@ -0,0 +1,382 @@
+/*
+ * P4 Performance counter stuff.
+ *
+ * P4 Xeon with Hyperthreading has counters per physical package which can
+ * count events from either logical CPU. However, in many cases more than
+ * ECSR and CCCR/counter can be used to count the same event. For instr or
+ * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
+ *
+ * $Id: p4perf.h,v 1.2 2003/10/13 16:51:41 jrb44 Exp $
+ *
+ * $Log: p4perf.h,v $
+ * Revision 1.2 2003/10/13 16:51:41 jrb44
+ * *** empty log message ***
+ *
+ */
+
+#ifndef P4PERF_H
+#define P4PERF_H
+
+#ifdef __KERNEL__
+#include <asm/msr.h>
+#endif
+
+/*****************************************************************************
+ * Performance counter configuration. *
+ *****************************************************************************/
+
+#ifndef P6_EVNTSEL_OS
+# define P6_EVNTSEL_OS (1 << 17)
+# define P6_EVNTSEL_USR (1 << 16)
+# define P6_EVNTSEL_E (1 << 18)
+# define P6_EVNTSEL_EN (1 << 22)
+#endif
+#define P6_PERF_INST_RETIRED 0xc0
+#define P6_PERF_UOPS_RETIRED 0xc2
+
+#define P4_ESCR_USR (1 << 2)
+#define P4_ESCR_OS (1 << 3)
+#define P4_ESCR_T0_USR (1 << 2) /* First logical CPU */
+#define P4_ESCR_T0_OS (1 << 3)
+#define P4_ESCR_T1_USR (1 << 0) /* Second logical CPU */
+#define P4_ESCR_T1_OS (1 << 1)
+#define P4_ESCR_TE (1 << 4)
+#define P4_ESCR_THREADS(t) (t)
+#define P4_ESCR_TV(tag) (tag << 5)
+#define P4_ESCR_EVNTSEL(e) (e << 25)
+#define P4_ESCR_EVNTMASK(e) (e << 9)
+
+#define P4_ESCR_EVNTSEL_FRONT_END 0x08
+#define P4_ESCR_EVNTSEL_EXECUTION 0x0c
+#define P4_ESCR_EVNTSEL_REPLAY 0x09
+#define P4_ESCR_EVNTSEL_INSTR_RETIRED 0x02
+#define P4_ESCR_EVNTSEL_UOPS_RETIRED 0x01
+#define P4_ESCR_EVNTSEL_UOP_TYPE 0x02
+#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x05
+//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x04
+
+#define P4_ESCR_EVNTMASK_FE_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_FE_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0 0x01
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1 0x02
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2 0x04
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3 0x08
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS0 0x10
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS1 0x20
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS2 0x40
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS3 0x80
+
+#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_REPLAY_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_IRET_NB_NTAG 0x01
+#define P4_ESCR_EVNTMASK_IRET_NB_TAG 0x02
+#define P4_ESCR_EVNTMASK_IRET_B_NTAG 0x04
+#define P4_ESCR_EVNTMASK_IRET_B_TAG 0x08
+
+#define P4_ESCR_EVNTMASK_URET_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_URET_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_UOP_LOADS 0x02
+#define P4_ESCR_EVNTMASK_UOP_STORES 0x04
+
+#define P4_ESCR_EVNTMASK_RMBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RMBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RMBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RMBRT_INDIR 0x10
+
+#define P4_ESCR_EVNTMASK_RBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RBRT_INDIR 0x10
+
+//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01 /* Non bogus, not tagged */
+//#define P4_ESCR_EVNTMASK_UOPS_RETIRED 0x01 /* Non bogus */
+
+#define P4_CCCR_OVF (1 << 31)
+#define P4_CCCR_CASCADE (1 << 30)
+#define P4_CCCR_FORCE_OVF (1 << 25)
+#define P4_CCCR_EDGE (1 << 24)
+#define P4_CCCR_COMPLEMENT (1 << 19)
+#define P4_CCCR_COMPARE (1 << 18)
+#define P4_CCCR_THRESHOLD(t) (t << 20)
+#define P4_CCCR_ENABLE (1 << 12)
+#define P4_CCCR_ESCR(escr) (escr << 13)
+#define P4_CCCR_ACTIVE_THREAD(t) (t << 16) /* Set to 11 */
+#define P4_CCCR_OVF_PMI_T0 (1 << 26)
+#define P4_CCCR_OVF_PMI_T1 (1 << 27)
+#define P4_CCCR_RESERVED (3 << 16)
+#define P4_CCCR_OVF_PMI (1 << 26)
+
+// BPU
+#define MSR_P4_BPU_COUNTER0 0x300
+#define MSR_P4_BPU_COUNTER1 0x301
+#define MSR_P4_BPU_CCCR0 0x360
+#define MSR_P4_BPU_CCCR1 0x361
+
+#define MSR_P4_BPU_COUNTER2 0x302
+#define MSR_P4_BPU_COUNTER3 0x303
+#define MSR_P4_BPU_CCCR2 0x362
+#define MSR_P4_BPU_CCCR3 0x363
+
+#define MSR_P4_BSU_ESCR0 0x3a0
+#define MSR_P4_FSB_ESCR0 0x3a2
+#define MSR_P4_MOB_ESCR0 0x3aa
+#define MSR_P4_PMH_ESCR0 0x3ac
+#define MSR_P4_BPU_ESCR0 0x3b2
+#define MSR_P4_IS_ESCR0 0x3b4
+#define MSR_P4_ITLB_ESCR0 0x3b6
+#define MSR_P4_IX_ESCR0 0x3c8
+
+#define P4_BSU_ESCR0_NUMBER 7
+#define P4_FSB_ESCR0_NUMBER 6
+#define P4_MOB_ESCR0_NUMBER 2
+#define P4_PMH_ESCR0_NUMBER 4
+#define P4_BPU_ESCR0_NUMBER 0
+#define P4_IS_ESCR0_NUMBER 1
+#define P4_ITLB_ESCR0_NUMBER 3
+#define P4_IX_ESCR0_NUMBER 5
+
+#define MSR_P4_BSU_ESCR1 0x3a1
+#define MSR_P4_FSB_ESCR1 0x3a3
+#define MSR_P4_MOB_ESCR1 0x3ab
+#define MSR_P4_PMH_ESCR1 0x3ad
+#define MSR_P4_BPU_ESCR1 0x3b3
+#define MSR_P4_IS_ESCR1 0x3b5
+#define MSR_P4_ITLB_ESCR1 0x3b7
+#define MSR_P4_IX_ESCR1 0x3c9
+
+#define P4_BSU_ESCR1_NUMBER 7
+#define P4_FSB_ESCR1_NUMBER 6
+#define P4_MOB_ESCR1_NUMBER 2
+#define P4_PMH_ESCR1_NUMBER 4
+#define P4_BPU_ESCR1_NUMBER 0
+#define P4_IS_ESCR1_NUMBER 1
+#define P4_ITLB_ESCR1_NUMBER 3
+#define P4_IX_ESCR1_NUMBER 5
+
+// MS
+#define MSR_P4_MS_COUNTER0 0x304
+#define MSR_P4_MS_COUNTER1 0x305
+#define MSR_P4_MS_CCCR0 0x364
+#define MSR_P4_MS_CCCR1 0x365
+
+#define MSR_P4_MS_COUNTER2 0x306
+#define MSR_P4_MS_COUNTER3 0x307
+#define MSR_P4_MS_CCCR2 0x366
+#define MSR_P4_MS_CCCR3 0x367
+
+#define MSR_P4_MS_ESCR0 0x3c0
+#define MSR_P4_TBPU_ESCR0 0x3c2
+#define MSR_P4_TC_ESCR0 0x3c4
+
+#define P4_MS_ESCR0_NUMBER 0
+#define P4_TBPU_ESCR0_NUMBER 2
+#define P4_TC_ESCR0_NUMBER 1
+
+#define MSR_P4_MS_ESCR1 0x3c1
+#define MSR_P4_TBPU_ESCR1 0x3c3
+#define MSR_P4_TC_ESCR1 0x3c5
+
+#define P4_MS_ESCR1_NUMBER 0
+#define P4_TBPU_ESCR1_NUMBER 2
+#define P4_TC_ESCR1_NUMBER 1
+
+// FLAME
+#define MSR_P4_FLAME_COUNTER0 0x308
+#define MSR_P4_FLAME_COUNTER1 0x309
+#define MSR_P4_FLAME_CCCR0 0x368
+#define MSR_P4_FLAME_CCCR1 0x369
+
+#define MSR_P4_FLAME_COUNTER2 0x30a
+#define MSR_P4_FLAME_COUNTER3 0x30b
+#define MSR_P4_FLAME_CCCR2 0x36a
+#define MSR_P4_FLAME_CCCR3 0x36b
+
+#define MSR_P4_FIRM_ESCR0 0x3a4
+#define MSR_P4_FLAME_ESCR0 0x3a6
+#define MSR_P4_DAC_ESCR0 0x3a8
+#define MSR_P4_SAAT_ESCR0 0x3ae
+#define MSR_P4_U2L_ESCR0 0x3b0
+
+#define P4_FIRM_ESCR0_NUMBER 1
+#define P4_FLAME_ESCR0_NUMBER 0
+#define P4_DAC_ESCR0_NUMBER 5
+#define P4_SAAT_ESCR0_NUMBER 2
+#define P4_U2L_ESCR0_NUMBER 3
+
+#define MSR_P4_FIRM_ESCR1 0x3a5
+#define MSR_P4_FLAME_ESCR1 0x3a7
+#define MSR_P4_DAC_ESCR1 0x3a9
+#define MSR_P4_SAAT_ESCR1 0x3af
+#define MSR_P4_U2L_ESCR1 0x3b1
+
+#define P4_FIRM_ESCR1_NUMBER 1
+#define P4_FLAME_ESCR1_NUMBER 0
+#define P4_DAC_ESCR1_NUMBER 5
+#define P4_SAAT_ESCR1_NUMBER 2
+#define P4_U2L_ESCR1_NUMBER 3
+
+// IQ
+#define MSR_P4_IQ_COUNTER0 0x30c
+#define MSR_P4_IQ_COUNTER1 0x30d
+#define MSR_P4_IQ_CCCR0 0x36c
+#define MSR_P4_IQ_CCCR1 0x36d
+
+#define MSR_P4_IQ_COUNTER2 0x30e
+#define MSR_P4_IQ_COUNTER3 0x30f
+#define MSR_P4_IQ_CCCR2 0x36e
+#define MSR_P4_IQ_CCCR3 0x36f
+
+#define MSR_P4_IQ_COUNTER4 0x310
+#define MSR_P4_IQ_COUNTER5 0x311
+#define MSR_P4_IQ_CCCR4 0x370
+#define MSR_P4_IQ_CCCR5 0x371
+
+#define MSR_P4_CRU_ESCR0 0x3b8
+#define MSR_P4_CRU_ESCR2 0x3cc
+#define MSR_P4_CRU_ESCR4 0x3e0
+#define MSR_P4_IQ_ESCR0 0x3ba
+#define MSR_P4_RAT_ESCR0 0x3bc
+#define MSR_P4_SSU_ESCR0 0x3be
+#define MSR_P4_ALF_ESCR0 0x3ca
+
+#define P4_CRU_ESCR0_NUMBER 4
+#define P4_CRU_ESCR2_NUMBER 5
+#define P4_CRU_ESCR4_NUMBER 6
+#define P4_IQ_ESCR0_NUMBER 0
+#define P4_RAT_ESCR0_NUMBER 2
+#define P4_SSU_ESCR0_NUMBER 3
+#define P4_ALF_ESCR0_NUMBER 1
+
+#define MSR_P4_CRU_ESCR1 0x3b9
+#define MSR_P4_CRU_ESCR3 0x3cd
+#define MSR_P4_CRU_ESCR5 0x3e1
+#define MSR_P4_IQ_ESCR1 0x3bb
+#define MSR_P4_RAT_ESCR1 0x3bd
+#define MSR_P4_ALF_ESCR1 0x3cb
+
+#define P4_CRU_ESCR1_NUMBER 4
+#define P4_CRU_ESCR3_NUMBER 5
+#define P4_CRU_ESCR5_NUMBER 6
+#define P4_IQ_ESCR1_NUMBER 0
+#define P4_RAT_ESCR1_NUMBER 2
+#define P4_ALF_ESCR1_NUMBER 1
+
+#define P4_BPU_COUNTER0_NUMBER 0
+#define P4_BPU_COUNTER1_NUMBER 1
+#define P4_BPU_COUNTER2_NUMBER 2
+#define P4_BPU_COUNTER3_NUMBER 3
+
+#define P4_MS_COUNTER0_NUMBER 4
+#define P4_MS_COUNTER1_NUMBER 5
+#define P4_MS_COUNTER2_NUMBER 6
+#define P4_MS_COUNTER3_NUMBER 7
+
+#define P4_FLAME_COUNTER0_NUMBER 8
+#define P4_FLAME_COUNTER1_NUMBER 9
+#define P4_FLAME_COUNTER2_NUMBER 10
+#define P4_FLAME_COUNTER3_NUMBER 11
+
+#define P4_IQ_COUNTER0_NUMBER 12
+#define P4_IQ_COUNTER1_NUMBER 13
+#define P4_IQ_COUNTER2_NUMBER 14
+#define P4_IQ_COUNTER3_NUMBER 15
+#define P4_IQ_COUNTER4_NUMBER 16
+#define P4_IQ_COUNTER5_NUMBER 17
+
+/* PEBS
+ */
+#define MSR_P4_PEBS_ENABLE 0x3F1
+#define MSR_P4_PEBS_MATRIX_VERT 0x3F2
+
+#define P4_PEBS_ENABLE_MY_THR (1 << 25)
+#define P4_PEBS_ENABLE_OTH_THR (1 << 26)
+#define P4_PEBS_ENABLE (1 << 24)
+#define P4_PEBS_BIT0 (1 << 0)
+#define P4_PEBS_BIT1 (1 << 1)
+#define P4_PEBS_BIT2 (1 << 2)
+
+#define P4_PEBS_MATRIX_VERT_BIT0 (1 << 0)
+#define P4_PEBS_MATRIX_VERT_BIT1 (1 << 1)
+#define P4_PEBS_MATRIX_VERT_BIT2 (1 << 2)
+
+/* Replay tagging.
+ */
+#define P4_REPLAY_TAGGING_PEBS_L1LMR P4_PEBS_BIT0
+#define P4_REPLAY_TAGGING_PEBS_L2LMR P4_PEBS_BIT1
+#define P4_REPLAY_TAGGING_PEBS_DTLMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTSMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTAMR P4_PEBS_BIT2
+
+#define P4_REPLAY_TAGGING_VERT_L1LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_L2LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTLMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTSMR P4_PEBS_MATRIX_VERT_BIT1
+#define P4_REPLAY_TAGGING_VERT_DTAMR P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
+
+
+
+
+/*****************************************************************************
+ * *
+ *****************************************************************************/
+
+// x87_FP_uop
+#define EVENT_SEL_x87_FP_uop 0x04
+#define EVENT_MASK_x87_FP_uop_ALL (1 << 15)
+
+// execution event (at retirement)
+#define EVENT_SEL_execution_event 0x0C
+
+// scalar_SP_uop
+#define EVENT_SEL_scalar_SP_uop 0x0a
+#define EVENT_MASK_scalar_SP_uop_ALL (1 << 15)
+
+// scalar_DP_uop
+#define EVENT_SEL_scalar_DP_uop 0x0e
+#define EVENT_MASK_scalar_DP_uop_ALL (1 << 15)
+
+// Instruction retired
+#define EVENT_SEL_instr_retired 0x02
+#define EVENT_MASK_instr_retired_ALL 0x0f
+
+// uOps retired
+#define EVENT_SEL_uops_retired 0x01
+#define EVENT_MASK_uops_retired_ALL 0x03
+
+// L1 misses retired
+#define EVENT_SEL_replay_event 0x09
+#define EVENT_MASK_replay_event_ALL 0x03
+
+// Trace cache
+#define EVENT_SEL_BPU_fetch_request 0x03
+#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
+
+// Bus activity
+#define EVENT_SEL_FSB_data_activity 0x17
+#define EVENT_MASK_FSB_data_activity_DRDY_DRV 0x01
+#define EVENT_MASK_FSB_data_activity_DRDY_OWN 0x02
+#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER 0x04
+#define EVENT_MASK_FSB_data_activity_DBSY_DRV 0x08
+#define EVENT_MASK_FSB_data_activity_DBSY_OWN 0x10
+#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER 0x20
+
+// Cache L2
+#define EVENT_SEL_BSQ_cache_reference 0x0c
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
+#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
+
+#endif
+
+/* End of $RCSfile: p4perf.h,v $ */
diff --git a/tools/misc/mbootpack/GPL b/tools/misc/mbootpack/GPL
new file mode 100644
index 0000000000..5b6e7c66c2
--- /dev/null
+++ b/tools/misc/mbootpack/GPL
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/tools/misc/mbootpack/Makefile b/tools/misc/mbootpack/Makefile
new file mode 100644
index 0000000000..69d26dc014
--- /dev/null
+++ b/tools/misc/mbootpack/Makefile
@@ -0,0 +1,74 @@
+#
+# Makefile for mbootpack
+#
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+XEN_ROOT=../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+all: build
+build: mbootpack
+
+install: build
+ $(INSTALL_PROG) mbootpack $(DESTDIR)/usr/bin
+
+# Tools etc.
+RM := rm -f
+GDB := gdb
+INCS := -I. -I-
+DEFS :=
+LDFLAGS :=
+CC := gcc
+CFLAGS := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format
+CFLAGS += -Wmissing-prototypes
+#CFLAGS += -pipe -g -O0 -Wcast-align
+CFLAGS += -pipe -O3
+
+# What object files need building for the program
+OBJS := mbootpack.o buildimage.o
+
+# Get gcc to generate the dependencies for us.
+DEPFLAGS = -Wp,-MD,.$(@F).d
+DEPS = .*.d
+
+mbootpack: $(OBJS)
+ $(CC) -o $@ $(filter-out %.a, $^) $(LDFLAGS)
+
+clean:
+ $(RM) mbootpack *.o $(DEPS) bootsect setup bzimage_header.c bin2c
+
+bootsect: bootsect.S
+ $(CC) $(CFLAGS) $(INCS) $(DEFS) -D__MB_ASM -c bootsect.S -o bootsect.o
+ $(LD) -m elf_i386 -Ttext 0x0 -s --oformat binary bootsect.o -o $@
+
+setup: setup.S
+ $(CC) $(CFLAGS) $(INCS) $(DEFS) -D__MB_ASM -c setup.S -o setup.o
+ $(LD) -m elf_i386 -Ttext 0x0 -s --oformat binary setup.o -o $@
+
+bin2c: bin2c.o
+ $(CC) -o $@ $^
+
+bzimage_header.c: bootsect setup bin2c
+ ./bin2c -n 8 -b1 -a bzimage_bootsect bootsect > bzimage_header.c
+ ./bin2c -n 8 -b1 -a bzimage_setup setup >> bzimage_header.c
+
+buildimage.c: bzimage_header.c
+ @
+
+%.o: %.S
+ $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@
+
+.PHONY: all clean gdb
+.PRECIOUS: $(OBJS) $(OBJS:.o=.c) $(DEPS)
+.SUFFIXES:
+
+-include $(DEPS)
+
+#
+# EOF
+#
diff --git a/tools/misc/mbootpack/README b/tools/misc/mbootpack/README
new file mode 100644
index 0000000000..07516529b4
--- /dev/null
+++ b/tools/misc/mbootpack/README
@@ -0,0 +1,77 @@
+
+mbootpack
+---------
+
+This is a utility to take a multiboot kernel and modules and repackage
+them in a form that a standard linux bootloader will be able to load them.
+It statically allocates memory addresses based on a 'standard' PC memory
+layout, and then saves the image of the loaded system, along with an
+almost-standard linux bzImage header which takes care of the start-of-day
+requirements of a multiboot kernel (setting up 32-bit protected mode, etc.)
+
+Example invocation, to package a xen VMM and xenlinux guest and initrd:
+
+ mbootpack -o bzImage -m ./xenlinux -m ./initrd.img ./xen-image
+
+You can now boot the 'bzImage' file using your favourite linux bootloader.
+
+The kernel command line will be provided at boot time by the bootloader
+(you can specify a kernel command-line using the '-c' flag, but it will
+be overridden at boot time unledd the bootloder provides an entirely
+empty command line). If you wan to override the command line for the
+first module (i.e. domain 0 kernel in Xen) at boot time, append ' -- '
+and the module commadn line to the bootloader command line, e.g.:
+
+ boot: bzImage com1=9600,8n1 console=com1 dom0_mem=49152 -- root=/dev/sda3 ro console=ttyS0,9600n8
+
+Everything before the '--' is passed to the kernel (xen) as its command
+line; everything after is passed to the first module (xenlinux).
+
+This is ALPHA code: there are execution paths which have *not* been
+tested, though it works for loading the Xen hypervisor using GrUB, LILO
+or SYSLINUX. Bug reports and patches are very welcome.
+
+Possible features for future versions (all look possible, if there's any
+demand for them):
+
+ - support for kernels that load below 1MB
+ - zImage-style compressed images
+ - sane error messgaes for insane load addresses
+ - support for the MULTIBOOT_VIDEO_MODE bit
+ - proper support for passing E820h memory-maps from bzImage
+
+
+Tim Deegan <tjd21@cl.cam.ac.uk>, March 2005
+
+
+
+License and attributions
+------------------------
+
+The bzImage header block was originally taken from the Linux kernel.
+http://www.kernel.org/
+
+Some parts of the Multiboot loader code are based on GNU GRUB.
+mb_info.h and mb_header.h are taken from GNU GRUB.
+http://www.gnu.org/software/grub/
+
+Bin2C was written by Nicolas Doualot; I tidied it a bit for a clean compile.
+http://slubman.celeonet.fr/program.php?style=Default&project=bin2c
+
+All other code is copyright (C) 2003-2005 Tim Deegan (tjd21@cl.cam.ac.uk)
+
+mbootpack is distributed under the GNU General Public License: see "GPL"
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
diff --git a/tools/misc/mbootpack/bin2c.c b/tools/misc/mbootpack/bin2c.c
new file mode 100644
index 0000000000..609335da7d
--- /dev/null
+++ b/tools/misc/mbootpack/bin2c.c
@@ -0,0 +1,356 @@
+/***************************************************************************************
+ Project informations:
+ Project: bin2c
+ Version: 1.00
+ Plateforme: PC
+ Copyright: DNDD.INC
+ Date: 28/03/2004
+
+ File informations:
+ Name: bin2c.c
+ Description:Convert any file to a C array
+
+ Author informations:
+ Author: DOUALOT Nicolas
+ E-Mail: slubman@laposte.net
+ site: http://membres.lycos.fr/slubman/gp32
+***************************************************************************************/
+
+
+#include <stdio.h> /*perror */
+#include <sys/mman.h> /*PROT_READ,MAP_xxx */
+#include <fcntl.h> /*O_RDONLY */
+#include <sys/stat.h> /*stat */
+#include <stdlib.h> /*atoi */
+#include <string.h> /*strcmp */
+#include <ctype.h> /*toupper */
+
+#define VERSION "1.10"
+
+
+static void help(void)
+{
+ fprintf(stdout, "\nbin2c v"VERSION"\n");
+ fprintf(stdout, "Slubman DevSoft (c)2003-2004 slubman.dndd@laposte.net \n\n");
+
+ fprintf(stdout, "Usage: bin2c [flags] <infile>\n\n");
+
+ //fprintf(stdout, "\t-quiet :\tdon't output standard messages\n");
+ //fprintf(stdout, "\t-slash :\tappend backslash at end of line\n");
+ fprintf(stdout, "\t-n <count> :\tnumber of items per line\n");
+ fprintf(stdout, "\t-b1 :\tgenerate unsigned char array\n");
+ fprintf(stdout, "\t-b2 :\tgenerate unsigned short array\n");
+ fprintf(stdout, "\t-b4 :\tgenerate unsigned long array\n");
+ fprintf(stdout, "\t-a <name> :\tgenerate an array with given name\n");
+ fprintf(stdout, "\t-ss <nr> :\tskip number of bytes at begin of inputfile\n");
+ fprintf(stdout, "\t-se <nr> :\tskip number of bytes at end of inputfile\n");
+ fprintf(stdout, "\t-lb <nr> :\tinsert an additionally linebreak every nr line\n");
+ fprintf(stdout, "\t-h :\tproduce an header\n");
+ fprintf(stdout, "\tinfile :\tname of infile\n");
+ fprintf(stdout, "\toutfile :\tname of outfile (use \"-\" for stdout)\n\n");
+
+ fprintf(stdout, " \tconverts binary file to C array data\n");
+}
+
+static void UnknownFlag(char *flag)
+{
+ fprintf(stderr, "Error: unknown flag %s\n", flag);
+ help();
+ exit(EXIT_FAILURE);
+}
+
+static void WriteHeader(FILE * outFile, char *oFileName, char *iFileName)
+{
+ // File Header
+ fprintf(outFile, "/***************************************************************************************\n");
+ fprintf(outFile, "* File Name:\n");
+ fprintf(outFile, "* Name: %s\n", oFileName);
+ fprintf(outFile, "* From: %s\n", iFileName);
+ fprintf(outFile, "* Created by :bin2c v"VERSION"\n*\n");
+ fprintf(outFile, "* bin2c v"VERSION":\n");
+ fprintf(outFile, "* Author: DOUALOT Nicolas\n");
+ fprintf(outFile, "* E-Mail: slubman.dndd@laposte.net\n");
+ fprintf(outFile, "* site: http://www.slubman.linux-fan.com/\n");
+ fprintf(outFile, "***************************************************************************************/\n\n");
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *inFile = stdin, *outFile = stdout;
+ int a, i, nbLine = 0;
+ unsigned char *memory;
+ struct stat st;
+
+ // Options
+ char arrayName[255] = "array"; // Array name
+ char *iFileName = NULL; // File to convert
+ char *oFileName = NULL; // File to write
+ int bpd = 1; // Array item length
+ int lb = 0; // Array blank line each lb line(s)
+ int nbCol = 15; // Nuber of items per line
+ int SkeepStart = 0; // Number of byte to skip at file begining
+ int SkeepEnd = 0; // Number of byte to skip at file end
+ int header = 0; // Produce an header
+
+ // Is there the good number of arguments
+ if (argc < 2)
+ {
+ help();
+ return 0;
+ }
+
+ // On récupère les arguments (Ready for more options)
+ for (a = 1; a < argc; a++)
+ {
+ // An option
+ if (argv[a][0] == '-')
+ {
+ // Wich flag is it ?
+ switch (argv[a][1])
+ {
+ // Writting on stdout
+ case 0:
+ printf("%s\n", argv[a]);
+ outFile = stdout;
+ break;
+
+ // ArrayName flag
+ case 'a':
+ strcpy(arrayName, argv[++a]);
+ break;
+
+ // Data type
+ case 'b':
+ switch (argv[a][2])
+ {
+ case '1':
+ bpd = 1;
+ break;
+
+ case '2':
+ bpd = 2;
+ break;
+
+ case '4':
+ bpd = 4;
+ break;
+
+ default:
+ UnknownFlag(argv[a]);
+ }
+ break;
+
+ // Produce an header
+ case 'h':
+ header = 1;
+ break;
+
+ // New line each n line
+ case 'l':
+ switch (argv[a][2])
+ {
+ case 'b':
+ lb = atoi(argv[++a]);
+ break;
+
+ default:
+ UnknownFlag(argv[a]);
+ }
+
+ // Number of bit per line
+ case 'n':
+ nbCol = atoi(argv[++a]);
+ break;
+
+ // Skip bytes
+ case 's':
+ switch (argv[a][2])
+ {
+ // Beginig of file
+ case 's':
+ SkeepStart = atoi(argv[++a]);
+ break;
+
+ // End of file
+ case 'e':
+ SkeepEnd = atoi(argv[++a]);
+ break;
+
+ // Flag inconnu
+ default:
+ UnknownFlag(argv[a]);
+ }
+
+ // Flag inconnu
+ default:
+ UnknownFlag(argv[a]);
+ }
+ }
+ // A filename
+ else
+ {
+ if (iFileName == NULL)
+ {
+ iFileName = argv[a];
+ if ((inFile = fopen(iFileName, "rb")) == NULL)
+ {
+ fprintf(stderr, "Error: can't open %s\n", iFileName);
+ exit(EXIT_FAILURE);
+ }
+ }
+ else
+ {
+ if (oFileName == NULL)
+ {
+ oFileName = argv[a];
+ if ((outFile = fopen(oFileName, "wb")) == NULL)
+ {
+ fprintf(stderr, "Error: can't open %s\n", oFileName);
+ exit(EXIT_FAILURE);
+ }
+ }
+ else
+ {
+ fprintf(stderr, "Error: Too many filesnames given!\n");
+ help();
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ }
+
+ if (!iFileName)
+ exit(EXIT_FAILURE);
+
+ // Get file informations
+ if (stat(iFileName, &st) != 0)
+ {
+ fprintf(stderr, "Error: when scanning file %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ // Allocating memory
+ if (!(memory = malloc(st.st_size + 3)))
+ {
+ memset(memory, 0, st.st_size + 3);
+ fprintf(stderr, "Error: not enought memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ // Reading the file
+ if (fread(memory, 1, st.st_size, inFile) != (size_t)st.st_size)
+ {
+ fprintf(stderr, "Error: when reading file %s\n", argv[1]);
+ fclose(inFile);
+ exit(EXIT_FAILURE);
+ }
+ fclose(inFile);
+
+ // Must produce an header
+ if (header)
+ {
+ unsigned int i;
+ char hFileName[256], *def = NULL;
+ FILE *hFile = stdout;
+
+ if (oFileName)
+ {
+ strcpy(hFileName, oFileName);
+ hFileName[strlen(hFileName) - 1] = 'h';
+ hFile = fopen(hFileName, "wt");
+ }
+
+ WriteHeader(hFile, hFileName, iFileName);
+
+ // Replace all '.' by '_'
+ for (i = 0; i < strlen(hFileName); i++)
+ if (hFileName[i] == '.')
+ hFileName[i] = '_';
+ else
+ hFileName[i] = toupper(hFileName[i]);
+
+ // the #ifdef at the begining
+ def = strrchr(hFileName, '/');
+ def = def ? def + 1 : hFileName;
+ fprintf(hFile, "#ifndef __%s__\n#define __%s__\n\n", def, def);
+
+ // Define array size
+ fprintf(hFile, "#define _%s_size_ %u\n\n", arrayName, (unsigned int) (st.st_size - SkeepStart - SkeepEnd) / bpd);
+
+ // Begin the array
+ fprintf(hFile, "extern unsigned ");
+ fprintf(hFile, "%s ", bpd == 1 ? "char" : bpd == 2 ? "short" : "long");
+ fprintf(hFile, "%s[", arrayName);
+ fprintf(hFile, "%u];\n\n", (unsigned int) (st.st_size - SkeepStart - SkeepEnd) / bpd);
+
+ // the #endif at the end
+ fprintf(hFile, "#endif\n\n");
+
+ if (oFileName)
+ fclose(hFile);
+ }
+
+ WriteHeader(outFile, oFileName, iFileName);
+
+ // Define array size
+ if (!header)
+ fprintf(outFile, "#define _%s_size_ %u\n\n", arrayName, (unsigned int) (st.st_size - SkeepStart - SkeepEnd) / bpd);
+
+ // Begin the array
+ fprintf(outFile, "unsigned ");
+ fprintf(outFile, "%s ", bpd == 1 ? "char" : bpd == 2 ? "short" : "long");
+ fprintf(outFile, "%s[", arrayName);
+ fprintf(outFile, "%u] = {\n\t", (unsigned int) (st.st_size - SkeepStart - SkeepEnd) / bpd);
+
+ // Writing file elements
+ for (i = 0; i < (st.st_size - SkeepEnd - SkeepStart) / bpd; /*i+=bpd */ i++)
+ {
+ // We write an item of bpd byte(s)
+ switch (bpd)
+ {
+ case 1:
+ fprintf(outFile, "0x%02x", *(unsigned char *) &memory[SkeepStart + i]);
+ break;
+
+ case 2:
+ fprintf(outFile, "0x%04x", *(unsigned short *) &memory[SkeepStart + i]);
+ break;
+
+ case 4:
+ fprintf(outFile, "0x%08lx", *(unsigned long *) &memory[SkeepStart + i]);
+ break;
+ }
+
+ // Must put a coma ?
+ if (i != st.st_size - 1)
+ fprintf(outFile, ",");
+
+ // End of a line ?
+ if (i && !((i + 1) % nbCol))
+ {
+ // -lb option
+ if (lb && !((++nbLine) % lb))
+ fprintf(outFile, "\n");
+ fprintf(outFile, "\n\t");
+ }
+ // Add a space
+ else
+ fprintf(outFile, " ");
+ }
+
+ // The last line as nbCol elements
+ if (((st.st_size - SkeepStart - SkeepEnd) / bpd) % nbCol)
+ fprintf(outFile, "\n");
+
+ // Close the array
+ fprintf(outFile, "};\n");
+
+ // CLose the output file
+ if (outFile != stdout)
+ fclose(outFile);
+
+ // Free allocated memory
+ free(memory);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/misc/mbootpack/bootsect.S b/tools/misc/mbootpack/bootsect.S
new file mode 100644
index 0000000000..2cc9ee106c
--- /dev/null
+++ b/tools/misc/mbootpack/bootsect.S
@@ -0,0 +1,136 @@
+/*
+ * bootsect.S
+ *
+ * This is bootsect.S from the linux 2.6.9 sources,
+ * with minor changes for mbootpack.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * $Id: bootsect.S,v 1.2 2005/03/23 10:39:11 tjd21 Exp $
+ *
+ */
+
+#include "mbootpack.h"
+
+/*
+ * bootsect.S Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * modified by Drew Eckhardt
+ * modified by Bruce Evans (bde)
+ * modified by Chris Noe (May 1999) (as86 -> gas)
+ * gutted by H. Peter Anvin (Jan 2003)
+ *
+ * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment
+ * addresses must be multiplied by 16 to obtain their respective linear
+ * addresses. To avoid confusion, linear addresses are written using leading
+ * hex while segment addresses are written as segment:offset.
+ *
+ */
+
+/* #include <asm/boot.h> */
+/* Definitions we should have got from there */
+#define DEF_INITSEG 0x9000
+#define DEF_SYSSEG 0x1000
+#define DEF_SETUPSEG 0x9020
+#define DEF_SYSSIZE 0x7F00
+#define NORMAL_VGA 0xffff
+#define EXTENDED_VGA 0xfffe
+#define ASK_VGA 0xfffd
+
+
+/* SETUPSECTS = 4 */ /* default nr of setup-sectors */
+BOOTSEG = 0x07C0 /* original address of boot-sector */
+INITSEG = DEF_INITSEG /* we move boot here - out of the way */
+SETUPSEG = DEF_SETUPSEG /* setup starts here */
+SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
+SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
+ /* to be loaded */
+ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
+SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */
+
+#ifndef SVGA_MODE
+/* #define SVGA_MODE ASK_VGA */
+#define SVGA_MODE NORMAL_VGA
+#endif
+
+#ifndef RAMDISK
+#define RAMDISK 0
+#endif
+
+#ifndef ROOT_RDONLY
+#define ROOT_RDONLY 1
+#endif
+
+.code16
+.text
+
+.global _start
+_start:
+
+ # Normalize the start address
+ jmpl $BOOTSEG, $start2
+
+start2:
+ movw %cs, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ movw $0x7c00, %sp
+ sti
+ cld
+
+ movw $bugger_off_msg, %si
+
+msg_loop:
+ lodsb
+ andb %al, %al
+ jz die
+ movb $0xe, %ah
+ movw $7, %bx
+ int $0x10
+ jmp msg_loop
+
+die:
+ # Allow the user to press a key, then reboot
+ xorw %ax, %ax
+ int $0x16
+ int $0x19
+
+ # int 0x19 should never return. In case it does anyway,
+ # invoke the BIOS reset code...
+ ljmp $0xf000,$0xfff0
+
+
+bugger_off_msg:
+ .ascii "Direct booting from floppy is no longer supported.\r\n"
+ .ascii "Please use a boot loader program instead.\r\n"
+ .ascii "\n"
+ .ascii "Remove disk and press any key to reboot . . .\r\n"
+ .byte 0
+
+
+ # Kernel attributes; used by setupbegtext
+
+ .org 497
+setup_sects: .byte SETUPSECTS
+root_flags: .word ROOT_RDONLY
+syssize: .word SYSSIZE
+swap_dev: .word SWAP_DEV
+ram_size: .word RAMDISK
+vid_mode: .word SVGA_MODE
+root_dev: .word ROOT_DEV
+boot_flag: .word 0xAA55
diff --git a/tools/misc/mbootpack/buildimage.c b/tools/misc/mbootpack/buildimage.c
new file mode 100644
index 0000000000..223172803d
--- /dev/null
+++ b/tools/misc/mbootpack/buildimage.c
@@ -0,0 +1,174 @@
+/*
+ * buildimage.c
+ *
+ * Takes the memory image of a loaded kernel and modules and repackages
+ * it as a linux bzImage
+ *
+ * Copyright (C) 2003-2004 Tim Deegan (tjd21@cl.cam.ac.uk)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * $Id: buildimage.c,v 1.2 2005/03/23 10:39:19 tjd21 Exp $
+ *
+ */
+
+
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <elf.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <asm/page.h>
+
+#include "mbootpack.h"
+#include "mb_header.h"
+
+/* We will build an image that a bzImage-capable bootloader will load like
+ * this:
+ *
+ * ============== (0)
+ * (BIOS memory)
+ * --------------
+ * (Bootloader)
+ * --------------
+ * bzImage startup code
+ * MBI, command-lines, module info
+ * ============== (0xa0000)
+ * (memory hole)
+ * ============== (0x100000)
+ * Kernel and modules
+ * ==============
+ *
+ * The bzImage startup code is mostly taken straight from the linux kernel
+ * (see bootsect.S, startup.S). It does the usual unpleasant start-of-day
+ * tasks to get to 32-bit protected mode, then sets registers appropriately
+ * and jumps to the kernel's entry address.
+ *
+ * It also does some relocation to make sure the MBI is where we expect it,
+ * and parses the linux command line.
+ */
+
+#define BZ_SETUP_OFFSET (512 * (1 + SETUPSECTS))
+#define BZ_ENTRY_OFFSET 0x30
+#define BZ_MBI_OFFSET 0x34
+/* These *MUST* fit the offsets of entry_address and mbi_address in setup.S */
+
+/* Bring in the bzImage boot sector and setup code */
+#include "bzimage_header.c"
+
+address_t place_mbi(long int size)
+/* Find space at the top of *low* memory for the MBI and associated red tape */
+{
+ address_t start;
+ start = 0xa000 - size;
+ if (start < 0x9000 + sizeof(bzimage_bootsect) + sizeof(bzimage_setup)) {
+ printf("Fatal: command-lines too long: need %i, have %i bytes\n",
+ size,
+ 0x1000 - (sizeof(bzimage_bootsect) + sizeof(bzimage_setup)));
+ exit(1);
+ }
+ if (!quiet) {
+ printf("Placed MBI and strings (%p+%p)\n",
+ start, size);
+ }
+ return start;
+}
+
+void make_bzImage(section_t *sections,
+ address_t entry,
+ address_t mbi,
+ FILE *fp)
+/* Rework this list of sections into a bzImage and write it out to fp */
+{
+ int i;
+ size_t offset;
+ section_t *s;
+
+ /* Patch the kernel and mbi addresses into the setup code */
+ *(address_t *)(bzimage_setup + BZ_ENTRY_OFFSET) = entry;
+ *(address_t *)(bzimage_setup + BZ_MBI_OFFSET) = mbi;
+ if (!quiet) printf("Kernel entry is %p, MBI is %p.\n", entry, mbi);
+
+ /* Write out header and trampoline */
+ if (fseek(fp, 0, SEEK_SET) < 0) {
+ printf("Fatal: error seeking in output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if (fwrite(bzimage_bootsect, sizeof(bzimage_bootsect), 1, fp) != 1) {
+ printf("Fatal: error writing to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if (fwrite(bzimage_setup, sizeof(bzimage_setup), 1, fp) != 1) {
+ printf("Fatal: error writing to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ if (!quiet) printf("Wrote bzImage header: %i + %i bytes.\n",
+ sizeof(bzimage_bootsect), sizeof(bzimage_setup));
+
+ /* Sorted list of sections below 1MB: write them out */
+ for (s = sections, i = 0; s; s = s->next) {
+ if (s->start >= HIGHMEM_START) continue;
+ offset = (s->start - 0x9000);
+ if (fseek(fp, offset, SEEK_SET) < 0) {
+ printf("Fatal: error seeking in output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if (fwrite(s->buffer, s->size, 1, fp) != 1) {
+ printf("Fatal: error writing to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ i++;
+ }
+
+ if (!quiet) printf("Wrote %i low-memory sections.\n", i);
+
+ /* Sorted list of sections higher than 1MB: write them out */
+ for (s = sections, i = 0; s; s = s->next) {
+ if (s->start < HIGHMEM_START) continue;
+ offset = (s->start - HIGHMEM_START) + BZ_SETUP_OFFSET;
+ if (fseek(fp, offset, SEEK_SET) < 0) {
+ printf("Fatal: error seeking in output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if (fwrite(s->buffer, s->size, 1, fp) != 1) {
+ printf("Fatal: error writing to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ i++;
+ }
+
+ if (!quiet) printf("Wrote %i high-memory sections.\n", i);
+}
+
+
+/*
+ * EOF(buildimage.c)
+ */
diff --git a/tools/misc/mbootpack/mb_header.h b/tools/misc/mbootpack/mb_header.h
new file mode 100644
index 0000000000..21934574f3
--- /dev/null
+++ b/tools/misc/mbootpack/mb_header.h
@@ -0,0 +1,90 @@
+/*
+ * GRUB -- GRand Unified Bootloader
+ * Copyright (C) 2000 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * MultiBoot Header description
+ */
+
+struct multiboot_header
+{
+ /* Must be MULTIBOOT_MAGIC - see below. */
+ unsigned magic;
+
+ /* Feature flags - see below. */
+ unsigned flags;
+
+ /*
+ * Checksum
+ *
+ * The above fields plus this one must equal 0 mod 2^32.
+ */
+ unsigned checksum;
+
+ /* These are only valid if MULTIBOOT_AOUT_KLUDGE is set. */
+ unsigned header_addr;
+ unsigned load_addr;
+ unsigned load_end_addr;
+ unsigned bss_end_addr;
+ unsigned entry_addr;
+
+ /* These are only valid if MULTIBOOT_VIDEO_MODE is set. */
+ unsigned mode_type;
+ unsigned width;
+ unsigned height;
+ unsigned depth;
+};
+
+/*
+ * The entire multiboot_header must be contained
+ * within the first MULTIBOOT_SEARCH bytes of the kernel image.
+ */
+#define MULTIBOOT_SEARCH 8192
+#define MULTIBOOT_FOUND(addr, len) \
+ (! ((addr) & 0x3) \
+ && (len) >= 12 \
+ && *((int *) (addr)) == MULTIBOOT_MAGIC \
+ && ! (*((unsigned *) (addr)) + *((unsigned *) (addr + 4)) \
+ + *((unsigned *) (addr + 8))) \
+ && (! (MULTIBOOT_AOUT_KLUDGE & *((int *) (addr + 4))) || (len) >= 32) \
+ && (! (MULTIBOOT_VIDEO_MODE & *((int *) (addr + 4))) || (len) >= 48))
+
+/* Magic value identifying the multiboot_header. */
+#define MULTIBOOT_MAGIC 0x1BADB002
+
+/*
+ * Features flags for 'flags'.
+ * If a boot loader sees a flag in MULTIBOOT_MUSTKNOW set
+ * and it doesn't understand it, it must fail.
+ */
+#define MULTIBOOT_MUSTKNOW 0x0000FFFF
+
+/* currently unsupported flags... this is a kind of version number. */
+#define MULTIBOOT_UNSUPPORTED 0x0000FFF8
+
+/* Align all boot modules on i386 page (4KB) boundaries. */
+#define MULTIBOOT_PAGE_ALIGN 0x00000001
+
+/* Must pass memory information to OS. */
+#define MULTIBOOT_MEMORY_INFO 0x00000002
+
+/* Must pass video information to OS. */
+#define MULTIBOOT_VIDEO_MODE 0x00000004
+
+/* This flag indicates the use of the address fields in the header. */
+#define MULTIBOOT_AOUT_KLUDGE 0x00010000
diff --git a/tools/misc/mbootpack/mb_info.h b/tools/misc/mbootpack/mb_info.h
new file mode 100644
index 0000000000..fb37f10ff0
--- /dev/null
+++ b/tools/misc/mbootpack/mb_info.h
@@ -0,0 +1,217 @@
+/*
+ * GRUB -- GRand Unified Bootloader
+ * Copyright (C) 2000 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * The structure type "mod_list" is used by the "multiboot_info" structure.
+ */
+
+struct mod_list
+{
+ /* the memory used goes from bytes 'mod_start' to 'mod_end-1' inclusive */
+ unsigned long mod_start;
+ unsigned long mod_end;
+
+ /* Module command line */
+ unsigned long cmdline;
+
+ /* padding to take it to 16 bytes (must be zero) */
+ unsigned long pad;
+};
+
+
+/*
+ * INT-15, AX=E820 style "AddressRangeDescriptor"
+ * ...with a "size" parameter on the front which is the structure size - 4,
+ * pointing to the next one, up until the full buffer length of the memory
+ * map has been reached.
+ */
+
+struct AddrRangeDesc
+{
+ unsigned long size;
+ unsigned long long BaseAddr;
+ unsigned long long Length;
+ unsigned long Type;
+
+ /* unspecified optional padding... */
+};
+
+/* usable memory "Type", all others are reserved. */
+#define MB_ARD_MEMORY 1
+
+
+/* Drive Info structure. */
+struct drive_info
+{
+ /* The size of this structure. */
+ unsigned long size;
+
+ /* The BIOS drive number. */
+ unsigned char drive_number;
+
+ /* The access mode (see below). */
+ unsigned char drive_mode;
+
+ /* The BIOS geometry. */
+ unsigned short drive_cylinders;
+ unsigned char drive_heads;
+ unsigned char drive_sectors;
+
+ /* The array of I/O ports used for the drive. */
+ unsigned short drive_ports[0];
+};
+
+/* Drive Mode. */
+#define MB_DI_CHS_MODE 0
+#define MB_DI_LBA_MODE 1
+
+
+/* APM BIOS info. */
+struct apm_info
+{
+ unsigned short version;
+ unsigned short cseg;
+ unsigned long offset;
+ unsigned short cseg_16;
+ unsigned short dseg_16;
+ unsigned short cseg_len;
+ unsigned short cseg_16_len;
+ unsigned short dseg_16_len;
+};
+
+
+/*
+ * MultiBoot Info description
+ *
+ * This is the struct passed to the boot image. This is done by placing
+ * its address in the EAX register.
+ */
+
+struct multiboot_info
+{
+ /* MultiBoot info version number */
+ unsigned long flags;
+
+ /* Available memory from BIOS */
+ unsigned long mem_lower;
+ unsigned long mem_upper;
+
+ /* "root" partition */
+ unsigned long boot_device;
+
+ /* Kernel command line */
+ unsigned long cmdline;
+
+ /* Boot-Module list */
+ unsigned long mods_count;
+ unsigned long mods_addr;
+
+ union
+ {
+ struct
+ {
+ /* (a.out) Kernel symbol table info */
+ unsigned long tabsize;
+ unsigned long strsize;
+ unsigned long addr;
+ unsigned long pad;
+ }
+ a;
+
+ struct
+ {
+ /* (ELF) Kernel section header table */
+ unsigned long num;
+ unsigned long size;
+ unsigned long addr;
+ unsigned long shndx;
+ }
+ e;
+ }
+ syms;
+
+ /* Memory Mapping buffer */
+ unsigned long mmap_length;
+ unsigned long mmap_addr;
+
+ /* Drive Info buffer */
+ unsigned long drives_length;
+ unsigned long drives_addr;
+
+ /* ROM configuration table */
+ unsigned long config_table;
+
+ /* Boot Loader Name */
+ unsigned long boot_loader_name;
+
+ /* APM table */
+ unsigned long apm_table;
+
+ /* Video */
+ unsigned long vbe_control_info;
+ unsigned long vbe_mode_info;
+ unsigned short vbe_mode;
+ unsigned short vbe_interface_seg;
+ unsigned short vbe_interface_off;
+ unsigned short vbe_interface_len;
+};
+
+/*
+ * Flags to be set in the 'flags' parameter above
+ */
+
+/* is there basic lower/upper memory information? */
+#define MB_INFO_MEMORY 0x00000001
+/* is there a boot device set? */
+#define MB_INFO_BOOTDEV 0x00000002
+/* is the command-line defined? */
+#define MB_INFO_CMDLINE 0x00000004
+/* are there modules to do something with? */
+#define MB_INFO_MODS 0x00000008
+
+/* These next two are mutually exclusive */
+
+/* is there a symbol table loaded? */
+#define MB_INFO_AOUT_SYMS 0x00000010
+/* is there an ELF section header table? */
+#define MB_INFO_ELF_SHDR 0x00000020
+
+/* is there a full memory map? */
+#define MB_INFO_MEM_MAP 0x00000040
+
+/* Is there drive info? */
+#define MB_INFO_DRIVE_INFO 0x00000080
+
+/* Is there a config table? */
+#define MB_INFO_CONFIG_TABLE 0x00000100
+
+/* Is there a boot loader name? */
+#define MB_INFO_BOOT_LOADER_NAME 0x00000200
+
+/* Is there a APM table? */
+#define MB_INFO_APM_TABLE 0x00000400
+
+/* Is there video information? */
+#define MB_INFO_VIDEO_INFO 0x00000800
+
+/*
+ * The following value must be present in the EAX register.
+ */
+
+#define MULTIBOOT_VALID 0x2BADB002
diff --git a/tools/misc/mbootpack/mbootpack.c b/tools/misc/mbootpack/mbootpack.c
new file mode 100644
index 0000000000..42a3151cd9
--- /dev/null
+++ b/tools/misc/mbootpack/mbootpack.c
@@ -0,0 +1,703 @@
+/*
+ * mbootpack.c
+ *
+ * Takes a multiboot image, command-line and modules, and repackages
+ * them as if they were a linux kernel. Only supports a subset of
+ * the multiboot info page options (enough to boot the Xen hypervisor).
+ *
+ * Copyright (C) 2003-2004 Tim Deegan (tjd21@cl.cam.ac.uk)
+ *
+ * Parts based on GNU GRUB, Copyright (C) 2000 Free Software Foundation, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * $Id: mbootpack.c,v 1.3 2005/03/23 10:38:36 tjd21 Exp tjd21 $
+ *
+ */
+
+#define _GNU_SOURCE
+#include "mbootpack.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <elf.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <asm/page.h>
+
+/* From GNU GRUB */
+#include "mb_header.h"
+#include "mb_info.h"
+
+
+/*
+ * The plan: Marshal up the multiboot modules and strings as if we
+ * were loading them into memory on a fresh ix86 PC. Attach
+ * a linux bzImage header to the front, which sets up the machine
+ * appropriately and then jumps to the kernel entry address.
+ *
+ * The memory map will be made up roughly like so:
+ *
+ * =============
+ * multiboot information (mbi) struct
+ * -------
+ * kernel command line
+ * -------
+ * bootloader name
+ * -------
+ * module command lines
+ * -------
+ * module information structs
+ * =============
+ * (memory hole)
+ * =============
+ * kernel
+ * -------------
+ * module 1
+ * -------------
+ * module 2
+ * -------------
+ * .
+ * .
+ * .
+ *
+ * ==============
+ *
+ *
+ * For allocation of memory we assume that the target machine has 'low'
+ * memory from 0 to 640K and 'high' memory starting at 1M. We allocate
+ * the kernel first, wherever it wants to be. After that, sections
+ * are added at the next available aligned address, always in the order
+ * given above, and skipping the memory hole at 640K. Allocated sections
+ * are stored in a linked list of buffers.
+ *
+ * Re-packaging as a bzImage file happens in buildimage.c
+ *
+ */
+
+/* Version */
+static const char version_string[] = "mbootpack " MBOOTPACK_VERSION_STRING;
+
+/* Flags */
+int quiet = 0;
+
+/* How much of the start of a kernel we read looking for headers.
+ * Must be >= MULTIBOOT_SEARCH */
+#define HEADERBUF_SIZE MULTIBOOT_SEARCH
+
+
+/* Linked list of loaded sections, and a pointer to the next
+ * available space (i.e. just above the highest allocation so far). */
+static section_t *sections = NULL;
+static section_t *last_section = NULL;
+static address_t next_free_space = 0;
+
+static void usage(void)
+/* If we don't understand the command-line options */
+{
+ printf(
+"Usage: mbpack [OPTIONS] kernel-image\n\n"
+" -h --help Print this text.\n"
+" -q --quiet Only output errors and warnings.\n"
+" -o --output=filename Output to filename (default \"bzImage\").\n"
+" -M --multiboot-output Produce a multiboot kernel, not a bzImage\n"
+" (sets default output file to \"mbImage\").\n"
+" -c --command-line=STRING Set the kernel command line (DEPRECATED!).\n"
+" -m --module=\"MOD arg1 arg2...\" Load module MOD with arguments \"arg1...\"\n"
+" (can be used multiple times).\n"
+"\n");
+ exit(1);
+}
+
+
+static void place_kernel_section(address_t start, long int size)
+/* Place the kernel in memory, checking for the memory hole. */
+{
+ if (start >= MEM_HOLE_END) {
+ /* Above the memory hole: easy */
+ next_free_space = MAX(next_free_space, start + size);
+ if (!quiet) {
+ printf("Placed kernel section (%p+%p)\n", start, size);
+ }
+ return;
+ }
+
+ if (start >= MEM_HOLE_START) {
+ /* In the memory hole. Not so good */
+ printf("Fatal: kernel load address (%p) is in the memory hole.\n",
+ start);
+ exit(1);
+ }
+
+ if (start + size > MEM_HOLE_START) {
+ /* Too big for low memory */
+ printf("Fatal: kernel (%p+%p) runs into the memory hole.\n",
+ start, size);
+ exit(1);
+ }
+
+ /* Kernel loads below the memory hole */
+ next_free_space = MAX(next_free_space, start + size);
+
+ if (!quiet) {
+ printf("Placed kernel section (%p+%p)\n", start, size);
+ }
+}
+
+
+static address_t place_section(long int size, int align)
+/* Find the next available place for this section.
+ * "align" must be a power of 2 */
+{
+ address_t start;
+ assert(next_free_space != 0);
+ assert(((~align + 1) & align) == align);
+
+ start = ROUNDUP_P2(next_free_space, align);
+
+ /* Check that we don't hit the memory hole */
+ if (start < MEM_HOLE_END && (start + size) > MEM_HOLE_START)
+ start = ROUNDUP_P2(MEM_HOLE_END, align);
+
+ next_free_space = start + size;
+
+ if (!quiet) {
+ printf("Placed section (%p+%p), align=%p\n",
+ start, size, align);
+ }
+ return start;
+}
+
+
+
+
+static address_t load_kernel(const char *filename)
+/* Load an elf32/multiboot kernel from this file
+ * Returns the entry address for the kernel. */
+{
+ unsigned int i;
+ address_t start;
+ size_t len;
+ long int size, loadsize;
+ FILE *fp;
+ char *buffer;
+ section_t *sec, *s;
+ Elf32_Ehdr *ehdr;
+ Elf32_Phdr *phdr;
+ struct multiboot_header *mbh;
+ struct stat sb;
+
+ static char headerbuf[HEADERBUF_SIZE];
+
+ /* Stat and open the file */
+ if (stat(filename, &sb) != 0) {
+ printf("Fatal: cannot stat %s: %s\n", filename, strerror(errno));
+ exit(1);
+ }
+ if ((fp = fopen(filename, "r")) == NULL) {
+ printf("Fatal: cannot open %s: %s\n", filename, strerror(errno));
+ exit(1);
+ }
+
+ /* Load the first 8k of the file */
+ if (fseek(fp, 0, SEEK_SET) < 0) {
+ printf("Fatal: seek error in %s: %s\n", filename, strerror(errno));
+ exit(1);
+ }
+ if ((len = fread(headerbuf, 1, HEADERBUF_SIZE, fp))
+ < HEADERBUF_SIZE)
+ {
+ if (feof(fp)) /* Short file */
+ {
+ if (len < 12) {
+ printf("Fatal: %s is too short to be a multiboot file.",
+ filename);
+ exit(1);
+ }
+ } else {
+ printf("Fatal: read error in %s: %s\n", filename, strerror(errno));
+ exit(1);
+ }
+ }
+
+ /* Sanity-check: is this file compressed? */
+ if ((headerbuf[0] == '\037' &&
+ (headerbuf[1] == '\235' /* .Z */ ||
+ headerbuf[1] == '\213' /* .gz */)) ||
+ (headerbuf[0] == 'B' && headerbuf[1] == 'Z') /* .bz[2] */) {
+ printf("Warning: %s looks like a compressed file.\n"
+ " You should uncompress it first!\n", filename);
+ }
+
+ /* Now look for a multiboot header */
+ for (i = 0; i <= MIN(len - 12, MULTIBOOT_SEARCH - 12); i += 4)
+ {
+ mbh = (struct multiboot_header *)(headerbuf + i);
+ if (mbh->magic != MULTIBOOT_MAGIC
+ || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff))
+ {
+ /* Not a multiboot header */
+ continue;
+ }
+ if (mbh->flags & MULTIBOOT_UNSUPPORTED) {
+ /* Requires options we don't support */
+ printf("Fatal: found a multiboot header, but it "
+ "requires multiboot options that I\n"
+ "don't understand. Sorry.\n");
+ exit(1);
+ }
+ if (mbh->flags & MULTIBOOT_VIDEO_MODE) {
+ /* Asked for screen mode information */
+ /* XXX carry on regardless */
+ printf("Warning: found a multiboot header which asks "
+ "for screen mode information.\n"
+ " This kernel will NOT be given valid"
+ "screen mode information at boot time.\n");
+ }
+ /* This kernel will do: place and load it */
+
+ if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) {
+
+ /* Load using the offsets in the multiboot header */
+ if(!quiet)
+ printf("Loading %s using multiboot header.\n", filename);
+
+ /* How much is there? */
+ start = mbh->load_addr;
+ if (mbh->load_end_addr != 0)
+ loadsize = mbh->load_end_addr - mbh->load_addr;
+ else
+ loadsize = sb.st_size;
+
+ /* How much memory will it take up? */
+ if (mbh->bss_end_addr != 0)
+ size = mbh->bss_end_addr - mbh->load_addr;
+ else
+ size = loadsize;
+
+ if (loadsize > size) {
+ printf("Fatal: can't load %i bytes of kernel into %i bytes "
+ "of memory.\n", loadsize, size);
+ exit(1);
+ }
+
+ /* Does it fit where it wants to be? */
+ place_kernel_section(start, size);
+
+ /* Load the kernel */
+ if ((buffer = malloc(size)) == NULL) {
+ printf("Fatal: malloc() for kernel load failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if ((fread(buffer, loadsize, 1, fp)) != 1) {
+ printf("Fatal: cannot read %s: %s\n",
+ filename, strerror(errno));
+ exit(1);
+ }
+ fclose(fp);
+
+ /* Clear the kernel BSS */
+ memset(buffer + loadsize, 0, size - loadsize);
+
+ /* Start off the linked list of sections */
+ if ((sec = (section_t *)malloc(sizeof (section_t))) == NULL) {
+ printf("Fatal: malloc() for section_t failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ sec->buffer = buffer;
+ sec->start = start;
+ sec->size = size;
+ sec->next = NULL;
+ sec->prev = NULL;
+ sections = sec;
+ last_section = sec;
+
+ /* Done. */
+ if (!quiet) printf("Loaded kernel from %s\n", filename);
+ return mbh->entry_addr;
+
+ } else {
+
+ /* Now look for an ELF32 header */
+ ehdr = (Elf32_Ehdr *)headerbuf;
+ if (*(unsigned long *)ehdr != 0x464c457f
+ || ehdr->e_ident[EI_DATA] != ELFDATA2LSB
+ || ehdr->e_ident[EI_CLASS] != ELFCLASS32
+ || ehdr->e_machine != EM_386)
+ {
+ printf("Fatal: kernel has neither ELF32/x86 nor multiboot load"
+ " headers.\n");
+ exit(1);
+ }
+ if (ehdr->e_phoff + ehdr->e_phnum*sizeof(*phdr) > HEADERBUF_SIZE) {
+ /* Don't expect this will happen with sane kernels */
+ printf("Fatal: too much ELF for me. Try increasing "
+ "HEADERBUF_SIZE in mbootpack.\n");
+ exit(1);
+ }
+ if (ehdr->e_phoff + ehdr->e_phnum*sizeof (*phdr) > len) {
+ printf("Fatal: malformed ELF header overruns EOF.\n");
+ exit(1);
+ }
+ if (ehdr->e_phnum <= 0) {
+ printf("Fatal: ELF kernel has no program headers.\n");
+ exit(1);
+ }
+
+ if(!quiet)
+ printf("Loading %s using ELF header.\n", filename);
+
+ if (ehdr->e_type != ET_EXEC
+ || ehdr->e_version != EV_CURRENT
+ || ehdr->e_phentsize != sizeof (Elf32_Phdr)) {
+ printf("Warning: funny-looking ELF header.\n");
+ }
+ phdr = (Elf32_Phdr *)(headerbuf + ehdr->e_phoff);
+
+ /* Obey the program headers to load the kernel */
+ for(i = 0; i < ehdr->e_phnum; i++) {
+
+ start = phdr[i].p_paddr;
+ size = phdr[i].p_memsz;
+ if (phdr[i].p_type != PT_LOAD)
+ loadsize = 0;
+ else
+ loadsize = MIN((long int)phdr[i].p_filesz, size);
+
+ if ((buffer = malloc(size)) == NULL) {
+ printf("Fatal: malloc() for kernel load failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ /* Place the section where it wants to be */
+ place_kernel_section(start, size);
+
+ /* Load section from file */
+ if (loadsize > 0) {
+ if (fseek(fp, phdr[i].p_offset, SEEK_SET) != 0) {
+ printf("Fatal: seek failed in %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if ((fread(buffer, loadsize, 1, fp)) != 1) {
+ printf("Fatal: cannot read %s: %s\n",
+ filename, strerror(errno));
+ exit(1);
+ }
+ }
+
+ /* Clear the rest of the buffer */
+ memset(buffer + loadsize, 0, size - loadsize);
+
+ /* Add this section to the list (keeping it ordered) */
+ if ((sec = (section_t *)malloc(sizeof (section_t))) == NULL) {
+ printf("Fatal: malloc() for section_t failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ sec->buffer = buffer;
+ sec->start = start;
+ sec->size = size;
+
+ for(s = sections; s; s = s->next) {
+ if (s->start > start) {
+ sec->next = s;
+ if (s->prev == NULL) {
+ /* sec becomes the new first item */
+ s->prev = sec;
+ sections = sec;
+ } else {
+ /* sec goes between s->prev and s */
+ sec->prev = s->prev;
+ sec->prev->next = sec;
+ s->prev = sec;
+ }
+ break;
+ }
+ }
+ if (s == NULL) {
+ /* sec becomes the new last item */
+ sec->next = NULL;
+ sec->prev = last_section;
+ if (last_section) {
+ last_section->next = sec;
+ } else {
+ sections = sec;
+ }
+ last_section = sec;
+ }
+ }
+
+ /* Done! */
+ if (!quiet) printf("Loaded kernel from %s\n", filename);
+ return ehdr->e_entry;
+ }
+
+ }
+
+ /* This is not a multiboot kernel */
+ printf("Fatal: %s is not a multiboot kernel.\n", filename);
+ exit(1);
+}
+
+
+
+
+int main(int argc, char **argv)
+{
+ char *buffer, *imagename, *command_line, *p;
+ char *mod_filename, *mod_command_line, *mod_clp;
+ char *out_filename;
+ section_t *sec;
+ FILE *fp;
+ struct stat sb;
+ struct multiboot_info *mbi;
+ struct mod_list *modp;
+ address_t start, kernel_entry;
+ long int size, mod_command_line_space, command_line_len;
+ int modules, opt, mbi_reloc_offset, make_multiboot;
+
+ static const char short_options[] = "hc:m:o:qM";
+ static const struct option options[] = {
+ { "help", 0, 0, 'h' },
+ { "command-line", 1, 0, 'c' },
+ { "append", 1, 0, 'c' },
+ { "module", 1, 0, 'm' },
+ { "output", 1, 0, 'o' },
+ { "quiet", 0, 0, 'q' },
+ { 0, 0, 0, 0 },
+ };
+
+ /* Parse the command line */
+ out_filename = NULL;
+ command_line = "";
+ command_line_len = 0;
+ modules = 0;
+ mod_command_line_space = 0;
+ while((opt = getopt_long(argc, argv, short_options, options, 0)) != -1)
+ {
+ switch(opt) {
+ case 'c':
+ command_line = optarg;
+ break;
+ case 'm':
+ modules++;
+ mod_command_line_space += strlen(optarg) + 1;
+ break;
+ case 'o':
+ out_filename = optarg;
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'h':
+ case '?':
+ default:
+ usage();
+ }
+ }
+ imagename = argv[optind];
+ if (!imagename || strlen(imagename) == 0) usage();
+ command_line_len = strlen(command_line) + strlen(imagename) + 2;
+ /* Leave space to overwritethe command-line at boot time */
+ command_line_len = MAX(command_line_len, CMD_LINE_SPACE);
+ if (!out_filename) out_filename = "bzImage";
+
+ /* Place and load the kernel */
+ kernel_entry = load_kernel(imagename);
+ assert(sections != NULL);
+ assert(last_section != NULL);
+ assert(next_free_space != 0);
+
+ /* Next section is all the metadata between kernel and modules */
+ size = ((((sizeof (struct multiboot_info)
+ + command_line_len
+ + strlen(version_string) + 1
+ + mod_command_line_space)
+ + 3 ) & ~3)
+ + modules * sizeof (struct mod_list));
+ /* Locate this section after the setup sectors, in *low* memory */
+ start = place_mbi(size);
+
+ if ((buffer = malloc(size)) == NULL) {
+ printf("Fatal: malloc() for boot metadata failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ if ((sec = (section_t *)malloc(sizeof (section_t))) == NULL) {
+ printf("Fatal: malloc() for section_t failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ sec->buffer = buffer;
+ sec->start = start;
+ sec->size = size;
+ sec->next = NULL;
+ sec->prev = last_section;
+ last_section->next = sec;
+ last_section = sec;
+
+ /* Multiboot info struct */
+ mbi = (struct multiboot_info *)buffer;
+ memset(buffer, 0, sizeof (struct multiboot_info));
+ mbi_reloc_offset = start - (address_t)buffer;
+
+ /* Command line */
+ p = (char *)(mbi + 1);
+ sprintf(p, "%s %s", imagename, command_line);
+ mbi->cmdline = ((address_t)p) + mbi_reloc_offset;
+ p += command_line_len;
+
+ /* Bootloader ID */
+ sprintf(p, version_string);
+ mbi->boot_loader_name = ((address_t)p) + mbi_reloc_offset;
+ p += strlen(version_string) + 1;
+
+ /* Next is space for the module command lines */
+ mod_clp = p;
+
+ /* Last come the module info structs */
+ modp = (struct mod_list *)
+ ((((address_t)p + mod_command_line_space) + 3) & ~3);
+ mbi->mods_count = modules;
+ mbi->mods_addr = ((address_t)modp) + mbi_reloc_offset;
+
+ /* Memory information will be added at boot time, by setup.S
+ * or trampoline.S. */
+ mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME;
+
+
+ /* Load the modules */
+ if (modules) {
+ mbi->flags |= MB_INFO_MODS;
+
+ /* Go back and parse the module command lines */
+ optind = opterr = 1;
+ while((opt = getopt_long(argc, argv,
+ short_options, options, 0)) != -1)
+ {
+ if (opt != 'm') continue;
+
+ /* Split module filename from command line */
+ mod_command_line = mod_filename = optarg;
+ if ((p = strchr(mod_filename, ' ')) != NULL) {
+ /* See as I discard the 'const' modifier */
+ *p = '\0';
+ }
+
+ /* Find space for it */
+ if (stat(mod_filename, &sb) != 0) {
+ printf("Fatal: cannot stat %s: %s\n",
+ mod_filename, strerror(errno));
+ exit(1);
+ }
+ size = sb.st_size;
+ start = place_section(size, X86_PAGE_SIZE);
+ /* XXX should be place_section(size, 4) if the MBH hasn't got
+ * XXX MULTIBOOT_PAGE_ALIGN set, but that breaks Xen */
+
+ /* Load it */
+ if ((buffer = malloc(sb.st_size)) == NULL) {
+ printf("Fatal: malloc failed for module load: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ if ((fp = fopen(mod_filename, "r")) == NULL) {
+ printf("Fatal: cannot open %s: %s\n",
+ mod_filename, strerror(errno));
+ exit(1);
+ }
+ if ((fread(buffer, sb.st_size, 1, fp)) != 1) {
+ printf("Fatal: cannot read %s: %s\n",
+ mod_filename, strerror(errno));
+ exit(1);
+ }
+ fclose(fp);
+
+ /* Sanity-check: is this file compressed? */
+ if ((buffer[0] == '\037' &&
+ (buffer[1] == '\235' /* .Z */ ||
+ buffer[1] == '\213' /* .gz */)) ||
+ (buffer[0] == 'B' && buffer[1] == 'Z') /* .bz[2] */) {
+ printf("Warning: %s looks like a compressed file.\n",
+ mod_filename);
+ }
+
+ if (!quiet) printf("Loaded module from %s\n", mod_filename);
+
+ /* Restore the command line to its former glory */
+ if (p != NULL) *p = ' ';
+
+ /* Fill in the module info struct */
+ modp->mod_start = start;
+ modp->mod_end = start + size;
+ modp->cmdline = (address_t)mod_clp + mbi_reloc_offset;
+ modp->pad = 0;
+ modp++;
+
+ /* Store the module command line */
+ sprintf(mod_clp, "%s", mod_command_line);
+ mod_clp += strlen(mod_clp) + 1;
+
+ /* Add the section to the list */
+ if ((sec = (section_t *)malloc(sizeof (section_t))) == NULL) {
+ printf("Fatal: malloc() for section_t failed: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ sec->buffer = buffer;
+ sec->start = start;
+ sec->size = size;
+ sec->next = NULL;
+ sec->prev = last_section;
+ last_section->next = sec;
+ last_section = sec;
+
+ }
+
+ }
+
+ /* Everything is placed and loaded. Now we package it all up
+ * as a bzImage */
+ if ((fp = fopen(out_filename, "w")) == NULL) {
+ printf("Fatal: cannot open %s: %s\n", out_filename, strerror(errno));
+ exit(1);
+ }
+ make_bzImage(sections,
+ kernel_entry,
+ ((address_t)mbi) + mbi_reloc_offset,
+ fp);
+ fclose(fp);
+
+ /* Success! */
+ if(!quiet) printf("Finished.\n");
+ return 0;
+}
+
+/*
+ * EOF (mbootpack.c)
+ */
+
diff --git a/tools/misc/mbootpack/mbootpack.h b/tools/misc/mbootpack/mbootpack.h
new file mode 100644
index 0000000000..b28718b88c
--- /dev/null
+++ b/tools/misc/mbootpack/mbootpack.h
@@ -0,0 +1,91 @@
+/*
+ * mbootpack.h
+ *
+ * Common definitions for mbootpack
+ *
+ * Copyright (C) 2003-2004 Tim Deegan (tjd21@cl.cam.ac.uk)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * $Id: mbootpack.h,v 1.2 2005/03/23 10:38:37 tjd21 Exp $
+ *
+ */
+
+#ifndef __MBOOTPACK__H__
+#define __MBOOTPACK__H__
+
+#ifndef __MB_ASM
+
+#undef NDEBUG
+#include <stdio.h>
+
+/* Flags */
+extern int quiet;
+
+/* Types */
+typedef unsigned long address_t;
+
+typedef struct section_t {
+ char *buffer;
+ address_t start;
+ long int size;
+ struct section_t *prev;
+ struct section_t *next;
+} section_t;
+
+/* buildimage.c */
+extern void make_bzImage(section_t *sections,
+ address_t entry,
+ address_t mbi,
+ FILE *fp);
+
+address_t place_mbi(long int size);
+
+
+/* trampoline.S */
+extern unsigned char mb_trampoline[];
+extern unsigned char mb_trampoline_end[];
+extern volatile address_t mb_mbi_address, mb_entry_address;
+
+/* Macros */
+#define MIN(_x,_y) (((_x)<=(_y))?(_x):(_y))
+#define MAX(_x,_y) (((_x)<=(_y))?(_y):(_x))
+#define ROUNDUP_P2(_x, _a) (((_x)+((_a)-1))&(~((_a)-1)))
+
+#endif
+
+/* x86 memory: such fun */
+#define MEM_HOLE_START 0xa0000
+#define MEM_HOLE_END 0x100000
+#define HIGHMEM_START MEM_HOLE_END
+#define X86_PAGE_SIZE 0x1000
+
+/* How much command line we'll take from the bootloader. */
+#define CMD_LINE_SPACE 0x300
+
+/* Number of 512-byte sectors to load in low memory (max 7) */
+#define SETUPSECTS 7
+
+
+/* Who are we? */
+#define MBOOTPACK_VERSION_STRING "v0.2 (alpha)"
+
+#endif /* __MBOOTPACK__H__ */
+
+/*
+ * EOF (mbootpack.h)
+ */
+
diff --git a/tools/misc/mbootpack/setup.S b/tools/misc/mbootpack/setup.S
new file mode 100644
index 0000000000..f429312df6
--- /dev/null
+++ b/tools/misc/mbootpack/setup.S
@@ -0,0 +1,1064 @@
+/*
+ * bootsect.S
+ *
+ * This is setup.S from the linux 2.6.9 source code,
+ * with heavy cuts and changes for mbootpack
+ * November 2004 Tim Deegan <tjd21@cl.cam.ac.uk>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ *
+ * $Id: setup.S,v 1.4 2005/03/23 10:39:03 tjd21 Exp $
+ *
+ */
+
+#include "mbootpack.h"
+
+/*
+ * setup.S Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * setup.s is responsible for getting the system data from the BIOS,
+ * and putting them into the appropriate places in system memory.
+ * both setup.s and system has been loaded by the bootblock.
+ *
+ * This code asks the bios for memory/disk/other parameters, and
+ * puts them in a "safe" place: 0x90000-0x901FF, ie where the
+ * boot-block used to be. It is then up to the protected mode
+ * system to read them from there before the area is overwritten
+ * for buffer-blocks.
+ *
+ * Move PS/2 aux init code to psaux.c
+ * (troyer@saifr00.cfsat.Honeywell.COM) 03Oct92
+ *
+ * some changes and additional features by Christoph Niemann,
+ * March 1993/June 1994 (Christoph.Niemann@linux.org)
+ *
+ * add APM BIOS checking by Stephen Rothwell, May 1994
+ * (sfr@canb.auug.org.au)
+ *
+ * High load stuff, initrd support and position independency
+ * by Hans Lermen & Werner Almesberger, February 1996
+ * <lermen@elserv.ffm.fgan.de>, <almesber@lrc.epfl.ch>
+ *
+ * Video handling moved to video.S by Martin Mares, March 1996
+ * <mj@k332.feld.cvut.cz>
+ *
+ * Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
+ * parsons) to avoid loadlin confusion, July 1997
+ *
+ * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
+ * <stiker@northlink.com>
+ *
+ * Fix to work around buggy BIOSes which dont use carry bit correctly
+ * and/or report extended memory in CX/DX for e801h memory size detection
+ * call. As a result the kernel got wrong figures. The int15/e801h docs
+ * from Ralf Brown interrupt list seem to indicate AX/BX should be used
+ * anyway. So to avoid breaking many machines (presumably there was a reason
+ * to orginally use CX/DX instead of AX/BX), we do a kludge to see
+ * if CX/DX have been changed in the e801 call and if so use AX/BX .
+ * Michael Miller, April 2001 <michaelm@mjmm.org>
+ *
+ * New A20 code ported from SYSLINUX by H. Peter Anvin. AMD Elan bugfixes
+ * by Robert Schwebel, December 2001 <robert@schwebel.de>
+ */
+
+/*
+#include <linux/config.h>
+#include <asm/segment.h>
+#include <linux/version.h>
+#include <linux/compile.h>
+#include <asm/boot.h>
+#include <asm/e820.h>
+#include <asm/page.h>
+*/
+
+/* Definitions that should have come from these includes */
+#define DEF_INITSEG 0x9000
+#define DEF_SYSSEG 0x1000
+#define DEF_SETUPSEG 0x9020
+#define DEF_SYSSIZE 0x7F00
+#define NORMAL_VGA 0xffff
+#define EXTENDED_VGA 0xfffe
+#define ASK_VGA 0xfffd
+#define GDT_ENTRY_BOOT_CS 2
+#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
+#define __PAGE_OFFSET (0xC0000000)
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 32 /* number of entries in E820MAP */
+#define E820NR 0x1e8 /* # entries in E820MAP */
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS 4
+#define __BIG_KERNEL__
+
+
+/* Signature words to ensure LILO loaded us right */
+#define SIG1 0xAA55
+#define SIG2 0x5A5A
+
+INITSEG = DEF_INITSEG # 0x9000, we move boot here, out of the way
+SYSSEG = DEF_SYSSEG # 0x1000, system loaded at 0x10000 (65536).
+SETUPSEG = DEF_SETUPSEG # 0x9020, this is the current segment
+ # ... and the former contents of CS
+
+DELTA_INITSEG = SETUPSEG - INITSEG # 0x0020
+
+.code16
+.globl _start, begtext, begdata, begbss, endtext, enddata, endbss
+
+.text
+begtext:
+.data
+begdata:
+.bss
+begbss:
+.text
+
+_start:
+start:
+ jmp trampoline
+
+# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
+
+ .ascii "HdrS" # header signature
+ .word 0x0203 # header version number (>= 0x0105)
+ # or else old loadlin-1.5 will fail)
+realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
+start_sys_seg: .word SYSSEG
+ .word kernel_version # pointing to kernel version string
+ # above section of header is compatible
+ # with loadlin-1.5 (header v1.5). Don't
+ # change it.
+
+type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
+ # Bootlin, SYSLX, bootsect...)
+ # See Documentation/i386/boot.txt for
+ # assigned ids
+
+# flags, unused bits must be zero (RFU) bit within loadflags
+loadflags:
+LOADED_HIGH = 1 # If set, the kernel is loaded high
+CAN_USE_HEAP = 0x80 # If set, the loader also has set
+ # heap_end_ptr to tell how much
+ # space behind setup.S can be used for
+ # heap purposes.
+ # Only the loader knows what is free
+#ifndef __BIG_KERNEL__
+ .byte 0
+#else
+ .byte LOADED_HIGH
+#endif
+
+setup_move_size: .word 0x8000 # size to move, when setup is not
+ # loaded at 0x90000. We will move setup
+ # to 0x90000 then just before jumping
+ # into the kernel. However, only the
+ # loader knows how much data behind
+ # us also needs to be loaded.
+
+/* N.B. these next addresses are entirely ignored by this code -- it
+ * assumes it was loaded with the 32bit code at 0x100000, and doesn't
+ * touch the ramdisk. */
+code32_start: # here loaders can put a different
+ # start address for 32-bit code.
+#ifndef __BIG_KERNEL__
+ .long 0x1000 # 0x1000 = default for zImage
+#else
+ .long 0x100000 # 0x100000 = default for big kernel
+#endif
+
+ramdisk_image: .long 0 # address of loaded ramdisk image
+ # Here the loader puts the 32-bit
+ # address where it loaded the image.
+ # This only will be read by the kernel.
+
+ramdisk_size: .long 0 # its size in bytes
+
+bootsect_kludge:
+ .long 0 # obsolete
+
+heap_end_ptr: .word modelist+1024 # (Header version 0x0201 or later)
+ # space from here (exclusive) down to
+ # end of setup code can be used by setup
+ # for local heap purposes.
+
+pad1: .word 0
+cmd_line_ptr: .long 0 # (Header version 0x0202 or later)
+ # If nonzero, a 32-bit pointer
+ # to the kernel command line.
+ # The command line should be
+ # located between the start of
+ # setup and the end of low
+ # memory (0xa0000), or it may
+ # get overwritten before it
+ # gets read. If this field is
+ # used, there is no longer
+ # anything magical about the
+ # 0x90000 segment; the setup
+ # can be located anywhere in
+ # low memory 0x10000 or higher.
+
+ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff
+ # (Header version 0x0203 or later)
+ # The highest safe address for
+ # the contents of an initrd
+
+/* Add more known locations: the image builder will overwrite
+ * these with the entry point and MBI location for the multiboot kernel.
+ * These offsets *must* match the definitions in buildimage.c */
+
+entry_address: .long 0 # This will be offset 0x30 (0x230 from b'sect)
+mbi_address: .long 0 # This will be offset 0x34
+
+/* Storage space for the size of memory */
+highmem_size: .long 0
+
+trampoline: call start_of_setup
+ .space 1024
+# End of setup header #####################################################
+
+start_of_setup:
+# Bootlin depends on this being done early
+ movw $0x01500, %ax
+ movb $0x81, %dl
+ int $0x13
+
+#ifdef SAFE_RESET_DISK_CONTROLLER
+# Reset the disk controller.
+ movw $0x0000, %ax
+ movb $0x80, %dl
+ int $0x13
+#endif
+
+# Set %ds = %cs, we know that SETUPSEG = %cs at this point
+ movw %cs, %ax # aka SETUPSEG
+ movw %ax, %ds
+
+# Check signature at end of setup
+ cmpw $SIG1, setup_sig1
+ jne bad_sig
+
+ cmpw $SIG2, setup_sig2
+ jne bad_sig
+
+ jmp good_sig1
+
+# Routine to print asciiz string at ds:si
+prtstr:
+ lodsb
+ andb %al, %al
+ jz fin
+
+ call prtchr
+ jmp prtstr
+
+fin: ret
+
+# Space printing
+prtsp2: call prtspc # Print double space
+prtspc: movb $0x20, %al # Print single space (note: fall-thru)
+
+# Part of above routine, this one just prints ascii al
+prtchr: pushw %ax
+ pushw %cx
+ movw $7,%bx
+ movw $0x01, %cx
+ movb $0x0e, %ah
+ int $0x10
+ popw %cx
+ popw %ax
+ ret
+
+beep: movb $0x07, %al
+ jmp prtchr
+
+no_sig_mess: .string "No setup signature found ..."
+
+good_sig1:
+ jmp good_sig
+
+# We now have to find the rest of the setup code/data
+bad_sig:
+ movw %cs, %ax # SETUPSEG
+ subw $DELTA_INITSEG, %ax # INITSEG
+ movw %ax, %ds
+ xorb %bh, %bh
+ movb (497), %bl # get setup sect from bootsect
+ subw $4, %bx # LILO loads 4 sectors of setup
+ shlw $8, %bx # convert to words (1sect=2^8 words)
+ movw %bx, %cx
+ shrw $3, %bx # convert to segment
+ addw $SYSSEG, %bx
+ movw %bx, %cs:start_sys_seg
+# Move rest of setup code/data to here
+ movw $2048, %di # four sectors loaded by LILO
+ subw %si, %si
+ pushw %cs
+ popw %es
+ movw $SYSSEG, %ax
+ movw %ax, %ds
+ rep
+ movsw
+ movw %cs, %ax # aka SETUPSEG
+ movw %ax, %ds
+ cmpw $SIG1, setup_sig1
+ jne no_sig
+
+ cmpw $SIG2, setup_sig2
+ jne no_sig
+
+ jmp good_sig
+
+no_sig:
+ lea no_sig_mess, %si
+ call prtstr
+
+no_sig_loop:
+ hlt
+ jmp no_sig_loop
+
+mb_hello_mess1:
+ .string "mboot"
+
+good_sig:
+ lea mb_hello_mess1, %si
+ call prtstr
+
+ movw %cs, %ax # aka SETUPSEG
+ subw $DELTA_INITSEG, %ax # aka INITSEG
+ movw %ax, %ds
+# Check if an old loader tries to load a big-kernel
+ testb $LOADED_HIGH, %cs:loadflags # Do we have a big kernel?
+ jz loader_ok # No, no danger for old loaders.
+
+ cmpb $0, %cs:type_of_loader # Do we have a loader that
+ # can deal with us?
+ jnz loader_ok # Yes, continue.
+
+ pushw %cs # No, we have an old loader,
+ popw %ds # die.
+ lea loader_panic_mess, %si
+ call prtstr
+
+ jmp no_sig_loop
+
+loader_panic_mess: .string "Wrong loader, giving up..."
+
+loader_ok:
+
+# Get memory size (extended mem, kB)
+
+/* We'll be storing this in highmem_size, to be copied to the mbi */
+
+# Try three different memory detection schemes. First, try
+# e820h, which lets us assemble a memory map, then try e801h,
+# which returns a 32-bit memory size, and finally 88h, which
+# returns 0-64m
+
+ xorl %edx, %edx
+ xorl %eax, %eax
+ movl %eax, (0x1e0)
+ movl %eax, highmem_size
+ movb %al, (E820NR)
+
+# method E820H:
+# the memory map from hell. e820h returns memory classified into
+# a whole bunch of different types, and allows memory holes and
+# everything. We scan through this memory map and build a list
+# of the first 32 memory areas, which we return at [E820MAP].
+# This is documented at http://www.acpi.info/, in the ACPI 2.0 specification.
+
+#define SMAP 0x534d4150
+
+meme820:
+ xorl %ebx, %ebx # continuation counter
+ movw $E820MAP, %di # point into the whitelist
+ # so we can have the bios
+ # directly write into it.
+
+jmpe820:
+ movl $0x0000e820, %eax # e820, upper word zeroed
+ movl $SMAP, %edx # ascii 'SMAP'
+ movl $20, %ecx # size of the e820rec
+ pushw %ds # data record.
+ popw %es
+ int $0x15 # make the call
+ jc bail820 # fall to e801 if it fails
+
+ cmpl $SMAP, %eax # check the return is `SMAP'
+ jne bail820 # fall to e801 if it fails
+
+# cmpl $1, 16(%di) # is this usable memory?
+# jne again820
+
+ # If this is usable memory, we save it by simply advancing %di by
+ # sizeof(e820rec).
+ #
+good820:
+ movb (E820NR), %al # up to 32 entries
+ cmpb $E820MAX, %al
+ jnl bail820
+
+ incb (E820NR)
+ movw %di, %ax
+ addw $20, %ax
+ movw %ax, %di
+again820:
+ cmpl $0, %ebx # check to see if
+ jne jmpe820 # %ebx is set to EOF
+
+/* Multiboot spec says high mem should be the address of the first
+ * upper memory hole, minus 1 MB */
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ movw $E820MAP, %di # Start at the beginning
+calc_highmem_loop:
+ cmpl $1, 16(%di) # is it usable memory?
+ jnz calc_highmem_next
+ cmpl $0, 4(%di) # is base < 4GB?
+ jnz calc_highmem_next
+ cmpl $0x100000, 0(%di) # is base <= 1MB?
+ jg calc_highmem_next
+ movl 8(%di), %ecx # Calculate base+length
+ shrl $10, %ecx # in kilobytes
+ movl 12(%di), %edx
+ shll $22, %edx
+ orl %edx, %ecx
+ movl 0(%di), %edx
+ shrl $10, %edx
+ addl %edx, %ecx
+ subl $1024, %ecx # - 1 MB
+ cmpl %cs:highmem_size, %ecx
+ jl calc_highmem_next
+ movl %ecx, %cs:highmem_size
+calc_highmem_next:
+ add $1, %bl
+ add $20, %di
+ cmp %bl, (E820NR)
+ je calc_highmem_done
+ jmp calc_highmem_loop
+calc_highmem_done:
+
+bail820:
+
+# method E801H:
+# memory size is in 1k chunksizes, to avoid confusing loadlin.
+
+meme801:
+ stc # fix to work around buggy
+ xorw %cx,%cx # BIOSes which dont clear/set
+ xorw %dx,%dx # carry on pass/error of
+ # e801h memory size call
+ # or merely pass cx,dx though
+ # without changing them.
+ movw $0xe801, %ax
+ int $0x15
+ jc mem88
+
+ cmpw $0x0, %cx # Kludge to handle BIOSes
+ jne e801usecxdx # which report their extended
+ cmpw $0x0, %dx # memory in AX/BX rather than
+ jne e801usecxdx # CX/DX. The spec I have read
+ movw %ax, %cx # seems to indicate AX/BX
+ movw %bx, %dx # are more reasonable anyway...
+
+e801usecxdx:
+ andl $0xffff, %edx # clear sign extend
+ shll $6, %edx # and go from 64k to 1k chunks
+ andl $0xffff, %ecx # clear sign extend
+ addl %ecx, %edx
+
+ cmpl %cs:highmem_size, %edx # store extended mem size
+ jl mem88 # if it's bigger than
+ movl %edx, %cs:highmem_size # what we already have
+
+# Ye Olde Traditional Methode. Returns the memory size (up to 16mb or
+# 64mb, depending on the bios) in ax.
+mem88:
+ movb $0x88, %ah
+ int $0x15
+
+ andl $0xffff, %eax # clear sign extend
+ cmpl %cs:highmem_size, %eax # store extended mem size
+ jl have_memsize # if it's bigger than
+ movl %eax, %cs:highmem_size # what we already have
+
+have_memsize:
+
+/* Culled: HDD probes, APM, speedstep */
+
+# Now we want to move to protected mode ...
+ cmpw $0, %cs:realmode_swtch
+ jz rmodeswtch_normal
+
+ lcall *%cs:realmode_swtch
+
+ jmp rmodeswtch_end
+
+rmodeswtch_normal:
+ pushw %cs
+ call default_switch
+
+rmodeswtch_end:
+
+/* Culled: code to take the 32bit entry address from the loader */
+/* Culled: code to relocate non-bzImage kernels */
+
+ # then we load the segment descriptors
+ movw %cs, %ax # aka SETUPSEG
+ movw %ax, %ds
+
+# Check whether we need to be downward compatible with version <=201
+ cmpl $0, cmd_line_ptr
+ jne end_move_self # loader uses version >=202 features
+ cmpb $0x20, type_of_loader
+ je end_move_self # bootsect loader, we know of it
+
+# Boot loader doesnt support boot protocol version 2.02.
+# If we have our code not at 0x90000, we need to move it there now.
+# We also then need to move the params behind it (commandline)
+# Because we would overwrite the code on the current IP, we move
+# it in two steps, jumping high after the first one.
+ movw %cs, %ax
+ cmpw $SETUPSEG, %ax
+ je end_move_self
+
+ cli # make sure we really have
+ # interrupts disabled !
+ # because after this the stack
+ # should not be used
+ subw $DELTA_INITSEG, %ax # aka INITSEG
+ movw %ss, %dx
+ cmpw %ax, %dx
+ jb move_self_1
+
+ addw $INITSEG, %dx
+ subw %ax, %dx # this will go into %ss after
+ # the move
+move_self_1:
+ movw %ax, %ds
+ movw $INITSEG, %ax # real INITSEG
+ movw %ax, %es
+ movw %cs:setup_move_size, %cx
+ std # we have to move up, so we use
+ # direction down because the
+ # areas may overlap
+ movw %cx, %di
+ decw %di
+ movw %di, %si
+ subw $move_self_here+0x200, %cx
+ rep
+ movsb
+ ljmp $SETUPSEG, $move_self_here
+
+move_self_here:
+ movw $move_self_here+0x200, %cx
+ rep
+ movsb
+ movw $SETUPSEG, %ax
+ movw %ax, %ds
+ movw %dx, %ss
+end_move_self: # now we are at the right place
+
+#
+# Enable A20. This is at the very best an annoying procedure.
+# A20 code ported from SYSLINUX 1.52-1.63 by H. Peter Anvin.
+# AMD Elan bug fix by Robert Schwebel.
+#
+
+#if defined(CONFIG_X86_ELAN)
+ movb $0x02, %al # alternate A20 gate
+ outb %al, $0x92 # this works on SC410/SC520
+a20_elan_wait:
+ call a20_test
+ jz a20_elan_wait
+ jmp a20_done
+#endif
+
+
+A20_TEST_LOOPS = 32 # Iterations per wait
+A20_ENABLE_LOOPS = 255 # Total loops to try
+
+
+#ifndef CONFIG_X86_VOYAGER
+a20_try_loop:
+
+ # First, see if we are on a system with no A20 gate.
+a20_none:
+ call a20_test
+ jnz a20_done
+
+ # Next, try the BIOS (INT 0x15, AX=0x2401)
+a20_bios:
+ movw $0x2401, %ax
+ pushfl # Be paranoid about flags
+ int $0x15
+ popfl
+
+ call a20_test
+ jnz a20_done
+
+ # Try enabling A20 through the keyboard controller
+#endif /* CONFIG_X86_VOYAGER */
+a20_kbc:
+ call empty_8042
+
+#ifndef CONFIG_X86_VOYAGER
+ call a20_test # Just in case the BIOS worked
+ jnz a20_done # but had a delayed reaction.
+#endif
+
+ movb $0xD1, %al # command write
+ outb %al, $0x64
+ call empty_8042
+
+ movb $0xDF, %al # A20 on
+ outb %al, $0x60
+ call empty_8042
+
+#ifndef CONFIG_X86_VOYAGER
+ # Wait until a20 really *is* enabled; it can take a fair amount of
+ # time on certain systems; Toshiba Tecras are known to have this
+ # problem.
+a20_kbc_wait:
+ xorw %cx, %cx
+a20_kbc_wait_loop:
+ call a20_test
+ jnz a20_done
+ loop a20_kbc_wait_loop
+
+ # Final attempt: use "configuration port A"
+a20_fast:
+ inb $0x92, %al # Configuration Port A
+ orb $0x02, %al # "fast A20" version
+ andb $0xFE, %al # don't accidentally reset
+ outb %al, $0x92
+
+ # Wait for configuration port A to take effect
+a20_fast_wait:
+ xorw %cx, %cx
+a20_fast_wait_loop:
+ call a20_test
+ jnz a20_done
+ loop a20_fast_wait_loop
+
+ # A20 is still not responding. Try frobbing it again.
+ #
+ decb (a20_tries)
+ jnz a20_try_loop
+
+ movw $a20_err_msg, %si
+ call prtstr
+
+a20_die:
+ hlt
+ jmp a20_die
+
+a20_tries:
+ .byte A20_ENABLE_LOOPS
+
+a20_err_msg:
+ .ascii "linux: fatal error: A20 gate not responding!"
+ .byte 13, 10, 0
+
+ # If we get here, all is good
+a20_done:
+
+
+#endif /* CONFIG_X86_VOYAGER */
+
+/* Another print, to show protected mode and A20 are OK */
+
+ jmp mb_hello_mess2_end
+mb_hello_mess2:
+ .string "pack "
+mb_hello_mess2_end:
+ lea mb_hello_mess2, %si
+ call prtstr
+
+# set up gdt and idt
+/* lidt idt_48 # load idt with 0,0 */
+/* Multiboot kernels must set up their own IDT: leave this for now,
+ * so we can print diagnostics */
+
+ xorl %eax, %eax # Compute gdt_base
+ movw %ds, %ax # (Convert %ds:gdt to a linear ptr)
+ shll $4, %eax
+ addl $gdt, %eax
+ movl %eax, (gdt_48+2)
+ lgdt gdt_48 # load gdt with whatever is
+ # appropriate
+
+# make sure any possible coprocessor is properly reset..
+ xorw %ax, %ax
+ outb %al, $0xf0
+ call delay
+
+ outb %al, $0xf1
+ call delay
+
+
+# well, that went ok, I hope. Now we mask all interrupts - the rest
+# is done in init_IRQ().
+ movb $0xFF, %al # mask all interrupts for now
+ outb %al, $0xA1
+ call delay
+
+ movb $0xFB, %al # mask all irq's but irq2 which
+ outb %al, $0x21 # is cascaded
+
+# Well, that certainly wasn't fun :-(. Hopefully it works, and we don't
+# need no steenking BIOS anyway (except for the initial loading :-).
+# The BIOS-routine wants lots of unnecessary data, and it's less
+# "interesting" anyway. This is how REAL programmers do it.
+
+/* Tailor the jump below so the target is the 32bit trampoline code */
+
+ xorl %eax, %eax # Calculate
+ movw %cs, %ax # the linear
+ shll $4, %eax # address of
+ addl $trampoline32, %eax # %cs:trampoline32
+ movl %eax, %cs:code32 # Stick it into the jmpi
+
+ /* Load a 32-bit pointer to the entry address into %ecx */
+ xorl %ecx, %ecx # Calculate
+ movw %cs, %cx # the linear
+ shll $4, %ecx # address of
+ addl $entry_address, %ecx # %cs:entry_address
+
+# Well, now's the time to actually move into protected mode.
+
+ lea mb_ready_mess, %si
+ call prtstr
+
+/* May as well load this IDT now */
+ lidt idt_48
+
+ xorl %eax, %eax
+ movw $1, %ax # protected mode (PE) bit
+ lmsw %ax # This is it!
+ jmp flush_instr
+flush_instr:
+
+ /* Set up segment registers */
+ movw $__BOOT_DS, %dx
+ movw %dx, %ds
+ movw %dx, %es
+ movw %dx, %fs
+ movw %dx, %gs
+ movw %dx, %ss
+
+ /* Trampoline expects this in %eax */
+ movl %ecx, %eax
+
+ /* Jump to the 32-bit trampoline */
+
+# NOTE: For high loaded big kernels we need a
+# jmpi 0x100000,__BOOT_CS
+#
+# but we yet haven't reloaded the CS register, so the default size
+# of the target offset still is 16 bit.
+# However, using an operand prefix (0x66), the CPU will properly
+# take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
+# Manual, Mixing 16-bit and 32-bit code, page 16-6)
+
+ .byte 0x66, 0xea # prefix + jmpi-opcode
+code32: .long 0x1000 # will be set to trampoline32
+ # by code above.
+ .word __BOOT_CS
+
+# Here's a bunch of information about your current kernel..
+
+kernel_version: .string "mbootpack changeling bzImage"
+mb_ready_mess:
+ .ascii MBOOTPACK_VERSION_STRING
+ .ascii "\r\n"
+ .byte 0
+
+# This is the default real mode switch routine.
+# to be called just before protected mode transition
+default_switch:
+ cli # no interrupts allowed !
+ movb $0x80, %al # disable NMI for bootup
+ # sequence
+ outb %al, $0x70
+ lret
+
+
+#ifndef CONFIG_X86_VOYAGER
+# This routine tests whether or not A20 is enabled. If so, it
+# exits with zf = 0.
+#
+# The memory address used, 0x200, is the int $0x80 vector, which
+# should be safe.
+
+A20_TEST_ADDR = 4*0x80
+
+a20_test:
+ pushw %cx
+ pushw %ax
+ xorw %cx, %cx
+ movw %cx, %fs # Low memory
+ decw %cx
+ movw %cx, %gs # High memory area
+ movw $A20_TEST_LOOPS, %cx
+ movw %fs:(A20_TEST_ADDR), %ax
+ pushw %ax
+a20_test_wait:
+ incw %ax
+ movw %ax, %fs:(A20_TEST_ADDR)
+ call delay # Serialize and make delay constant
+ cmpw %gs:(A20_TEST_ADDR+0x10), %ax
+ loope a20_test_wait
+
+ popw %fs:(A20_TEST_ADDR)
+ popw %ax
+ popw %cx
+ ret
+
+#endif /* CONFIG_X86_VOYAGER */
+
+# This routine checks that the keyboard command queue is empty
+# (after emptying the output buffers)
+#
+# Some machines have delusions that the keyboard buffer is always full
+# with no keyboard attached...
+#
+# If there is no keyboard controller, we will usually get 0xff
+# to all the reads. With each IO taking a microsecond and
+# a timeout of 100,000 iterations, this can take about half a
+# second ("delay" == outb to port 0x80). That should be ok,
+# and should also be plenty of time for a real keyboard controller
+# to empty.
+#
+
+empty_8042:
+ pushl %ecx
+ movl $100000, %ecx
+
+empty_8042_loop:
+ decl %ecx
+ jz empty_8042_end_loop
+
+ call delay
+
+ inb $0x64, %al # 8042 status port
+ testb $1, %al # output buffer?
+ jz no_output
+
+ call delay
+ inb $0x60, %al # read it
+ jmp empty_8042_loop
+
+no_output:
+ testb $2, %al # is input buffer full?
+ jnz empty_8042_loop # yes - loop
+empty_8042_end_loop:
+ popl %ecx
+ ret
+
+# Read the cmos clock. Return the seconds in al
+gettime:
+ pushw %cx
+ movb $0x02, %ah
+ int $0x1a
+ movb %dh, %al # %dh contains the seconds
+ andb $0x0f, %al
+ movb %dh, %ah
+ movb $0x04, %cl
+ shrb %cl, %ah
+ aad
+ popw %cx
+ ret
+
+# Delay is needed after doing I/O
+delay:
+ outb %al,$0x80
+ ret
+
+# Descriptor tables
+#
+# NOTE: The intel manual says gdt should be sixteen bytes aligned for
+# efficiency reasons. However, there are machines which are known not
+# to boot with misaligned GDTs, so alter this at your peril! If you alter
+# GDT_ENTRY_BOOT_CS (in asm/segment.h) remember to leave at least two
+# empty GDT entries (one for NULL and one reserved).
+#
+# NOTE: On some CPUs, the GDT must be 8 byte aligned. This is
+# true for the Voyager Quad CPU card which will not boot without
+# This directive. 16 byte aligment is recommended by intel.
+#
+
+
+/* The boot-time code segment is set at the jmpi above */
+/* Dont change this without checking everything still matches */
+
+ .align 16
+gdt:
+ .fill GDT_ENTRY_BOOT_CS,8,0
+
+ .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
+ .word 0 # base address = 0
+ .word 0x9A00 # code read/exec
+ .word 0x00CF # granularity = 4096, 386
+ # (+5th nibble of limit)
+
+ .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
+ .word 0 # base address = 0
+ .word 0x9200 # data read/write
+ .word 0x00CF # granularity = 4096, 386
+ # (+5th nibble of limit)
+gdt_end:
+ .align 4
+
+ .word 0 # alignment byte
+idt_48:
+ .word 0 # idt limit = 0
+ .word 0, 0 # idt base = 0L
+
+ .word 0 # alignment byte
+gdt_48:
+ .word gdt_end - gdt - 1 # gdt limit
+ .word 0, 0 # gdt base (filled in later)
+
+# Include video setup & detection code
+
+/* #include "video.S" */
+
+.code32
+trampoline32:
+ /* Here, %eax = 32-bit pointer to entry_address */
+
+ /* Check if the bootloader gave us a (non-empty) command line */
+ movl -8(%eax), %ebx # cmd_line_ptr
+ cmpl $0, %ebx
+ je no_cmd_line
+ cmpb $0, 0(%ebx)
+ je no_cmd_line
+
+ /* Find the MBI command line */
+ movl %eax, %ecx # &entry_address
+ addl $(begtext-entry_address), %ecx # --> start of setup
+ subl $0x9200, %ecx # --> reloc offset
+ movl %ecx, %esi # (copy offset)
+ movl %ecx, %ebx # (copy offset)
+ addl 4(%eax), %ecx # --> current addr of MBI
+ addl 16(%ecx), %ebx # --> cur. addr of MB cmdline
+
+ /* Overwrite the built-in MBI kernel command line */
+ movl -8(%eax), %ecx
+ movl $0, %edi
+
+ /* Give the kernel a 'self' word, that linux doesn't get */
+ movw $0x202E, 0(%ebx) # '. '
+ addl $0x2, %ebx
+
+cmd_line_copy:
+ movb (%ecx, %edi), %dl
+ movb %dl, (%ebx, %edi)
+ inc %edi
+ cmp $CMD_LINE_SPACE-3, %edi
+ je cmd_line_copy_end
+
+ cmpb $0x0, %dl
+ jne cmd_line_copy
+cmd_line_copy_end:
+ movb $0x0, (%ebx, %edi)
+ subl $0x2, %ebx
+
+ /* Look for '--' in the kernel command line */
+cmd_line_scan:
+ inc %ebx
+ cmpb $0x0, 0(%ebx)
+ je no_cmd_line
+ cmpl $0x202D2D20, 0(%ebx) # ' -- '
+ jne cmd_line_scan
+
+ /* Found it: terminate kernel's command line */
+ movb $0x0, 0(%ebx)
+ inc %ebx
+ /* Relocate address to where it will be moved to */
+ subl %esi, %ebx
+
+ /* Is there a module 0? */
+ movl %esi, %ecx # Reloc offset
+ addl 4(%eax), %ecx # --> current addr of MBI
+ cmpl $0x0, 20(%ecx) # (check module count)
+ je no_cmd_line
+ /* Overwrite module 0's command line */
+ movl %esi, %edx # Reloc offset
+ addl 24(%ecx), %edx # --> cur. add. of Module 0
+ movl %ebx, 8(%edx) # --> blat mod. 0's cmdline
+no_cmd_line:
+
+
+ /* Relocate the MBI from after the setup code to its proper home
+ * between the MBI pointer and 0xa000 */
+ movl %eax, %ecx # &entry_address
+ addl $(begtext-entry_address), %ecx # --> start of setup
+ subl $0x9200, %ecx # --> reloc offset
+ addl 4(%eax), %ecx # --> current addr of MBI
+
+ movl $0xa000, %ebx # End of MBI
+ subl 4(%eax), %ebx # --> size of MBI
+ movl %ebx, %edi
+
+ movl 4(%eax), %ebx # Destination of MBI
+
+mbi_copy:
+ dec %edi
+ movb (%ecx, %edi), %dl
+ movb %dl, (%ebx, %edi)
+ cmp $0x0, %edi
+ jne mbi_copy
+
+ /* Copy memory size into MBI structure */
+ movl 4(%eax), %ebx # MBI pointer
+ movl 8(%eax), %ecx # highmem_size
+ movl %ecx, 8(%ebx) # --> mbi.mem_upper
+ movl $0x280, %ecx
+ movl %ecx, 4(%ebx) # --> mbi.mem_lower
+ /* Set the MB_INFO_MEMORY bit */
+ orl $1, 0(%ebx)
+
+ /* Recover the MBI pointer into %ebx */
+ movl 4(%eax), %ebx # MBI pointer
+ /* Extract the load address into %ecx */
+ movl 0(%eax), %ecx
+ /* Let the kernel know we're a multiboot loader */
+ movl $0x2BADB002, %eax
+ /* Jump to the kernel address supplied */
+ jmp *%ecx
+
+# Setup signature -- must be last
+setup_sig1: .word SIG1
+setup_sig2: .word SIG2
+
+# After this point, there is some free space which is used by the video mode
+# handling code to store the temporary mode table (not used by the kernel).
+
+modelist:
+
+.text
+endtext:
+.data
+enddata:
+.bss
+endbss:
diff --git a/tools/misc/netfix b/tools/misc/netfix
index 32f1021732..e18923f132 100644
--- a/tools/misc/netfix
+++ b/tools/misc/netfix
@@ -11,6 +11,7 @@ from getopt import getopt
# add fallback path for non-native python path installs if needed
sys.path.append('/usr/lib/python')
+sys.path.append('/usr/lib64/python')
from xen.util.Brctl import *
short_options = 'hvqni:b:c'
diff --git a/tools/misc/xc_shadow.c b/tools/misc/xc_shadow.c
new file mode 100644
index 0000000000..ba21a34126
--- /dev/null
+++ b/tools/misc/xc_shadow.c
@@ -0,0 +1,70 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2005 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: xc_shadow.c
+ * Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
+ * Date: Mar 2005
+ *
+ * Description:
+ */
+
+
+#include <xc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <string.h>
+
+void usage()
+{
+ printf("xc_shadow: -[0|1|2]\n");
+ printf(" set shadow mode\n");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int xc_handle;
+ int mode;
+
+ if ( argc > 1 )
+ {
+ char *p = argv[1];
+ if (*p++ == '-') {
+ if (*p == '1')
+ mode = 1;
+ else if (*p == '2')
+ mode = 2;
+ else if (*p == '0')
+ mode = 0;
+ else
+ usage();
+ } else
+ usage();
+ }
+ else
+ usage();
+
+ if ( (xc_handle = xc_interface_open()) == -1 )
+ {
+ fprintf(stderr, "Error opening xc interface: %d (%s)\n",
+ errno, strerror(errno));
+ return 1;
+ }
+
+ if ( xc_shadow_control(xc_handle,
+ 0,
+ mode,
+ NULL,
+ 0,
+ NULL) < 0 )
+ {
+ fprintf(stderr, "Error reseting performance counters: %d (%s)\n",
+ errno, strerror(errno));
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/misc/xend b/tools/misc/xend
index a73ca579a8..768cd20f94 100644
--- a/tools/misc/xend
+++ b/tools/misc/xend
@@ -21,9 +21,18 @@
"""
import os
import sys
+import socket
+import signal
+import time
+
+XCS_PATH = "/var/lib/xen/xcs_socket"
+XCS_EXEC = "/usr/sbin/xcs"
+XCS_PIDFILE = "/var/run/xcs.pid"
+XCS_ARGS = (XCS_EXEC, "-p", XCS_PIDFILE)
# add fallback path for non-native python path installs if needed
sys.path.append('/usr/lib/python')
+sys.path.append('/usr/lib64/python')
from xen.xend.server import SrvDaemon
class CheckError(ValueError):
@@ -51,36 +60,6 @@ def check_logging():
hline()
raise CheckError("logging is not installed")
-def check_twisted_version():
- """Check twisted is installed with a supported version and print a warning if not.
- Raises an error if twisted is not installed.
- """
- # Supported twisted release and major version.
- RELEASE = 1
- MAJOR = 3
- try:
- from twisted.copyright import version
- except ImportError:
- hline()
- msg("The Twisted framework is not installed.")
- msg("Use 'make install-twisted' at the xen root to install.")
- msg("")
- msg("Alternatively download and install version %d.%d or higher" % (RELEASE, MAJOR))
- msg("from http://www.twistedmatrix.com/products")
- hline()
- raise CheckError("twisted is not installed")
-
-
- (release, major, minor) = version.split('.')
- release = int(release)
- major = int(major)
- if release > RELEASE: return
- if release == RELEASE and major >= MAJOR: return
- hline()
- msg("Warning: Twisted version not supported: %s" % version)
- msg("Use Twisted version %d.%d.0 or higher" % (RELEASE, MAJOR))
- hline()
-
def check_user():
"""Check that the effective user id is 0 (root).
"""
@@ -89,14 +68,58 @@ def check_user():
msg("Xend must be run as root.")
hline()
raise CheckError("invalid user")
+
+def xcs_running():
+ """ See if the control switch is running.
+ """
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ try:
+ s.connect( (XCS_PATH) )
+ s.close()
+ except:
+ try:
+ os.remove(XCS_PIDFILE)
+ except:
+ pass
+ return 0
+ return 1
+def start_xcs():
+ if (not xcs_running()):
+ if os.fork():
+ time.sleep(0.1) # let xcs start
+ else:
+ if not os.path.isdir(os.path.dirname(XCS_PATH)):
+ os.makedirs(os.path.dirname(XCS_PATH))
+ try:
+ os.execvp(XCS_EXEC, XCS_ARGS)
+ except:
+ hline()
+ msg("Tried to start xcs, but failed. Is it installed?")
+ hline()
+ raise CheckError("couldn't start xcs")
+ if (not xcs_running()):
+ hline()
+ msg("Failed to start the control interface switch.")
+ hline()
+ raise CheckError("xcs not running")
+
+def stop_xcs():
+ try:
+ xcs_pidfile = open(XCS_PIDFILE)
+ xcs_pid = int(xcs_pidfile.read().strip())
+ os.kill(xcs_pid, signal.SIGTERM)
+ xcs_pidfile.close()
+ except:
+ return
+
def main():
try:
check_logging()
- check_twisted_version()
check_user()
except CheckError:
sys.exit(1)
+
daemon = SrvDaemon.instance()
if not sys.argv[1:]:
print 'usage: %s {start|stop|restart}' % sys.argv[0]
@@ -104,12 +127,17 @@ def main():
pid, status = os.wait()
return status >> 8
elif sys.argv[1] == 'start':
+ start_xcs()
return daemon.start()
elif sys.argv[1] == 'trace_start':
+ start_xcs()
return daemon.start(trace=1)
elif sys.argv[1] == 'stop':
+ stop_xcs()
return daemon.stop()
elif sys.argv[1] == 'restart':
+ stop_xcs()
+ start_xcs()
return daemon.stop() or daemon.start()
elif sys.argv[1] == 'status':
return daemon.status()
diff --git a/tools/misc/xenperf.c b/tools/misc/xenperf.c
index d2846224b1..16ddfcb293 100644
--- a/tools/misc/xenperf.c
+++ b/tools/misc/xenperf.c
@@ -22,18 +22,32 @@ int main(int argc, char *argv[])
{
int i, j, xc_handle;
xc_perfc_desc_t *pcd;
- unsigned int num, sum, reset = 0;
+ unsigned int num, sum, reset = 0, full = 0;
if ( argc > 1 )
{
char *p = argv[1];
- if ( (*p++ == '-') && (*p == 'r') )
- reset = 1;
+ if ( p[0] == '-' )
+ {
+ switch ( p[1] )
+ {
+ case 'f':
+ full = 1;
+ break;
+ case 'r':
+ reset = 1;
+ break;
+ default:
+ goto error;
+ }
+ }
else
{
+ error:
printf("%s: [-r]\n", argv[0]);
- printf("no args: print xen performance counters\n");
- printf(" -r : reset xen performance counters\n");
+ printf("no args: print digested counters\n");
+ printf(" -f : print full arrays/histograms\n");
+ printf(" -r : reset counters\n");
return 0;
}
}
@@ -94,8 +108,9 @@ int main(int argc, char *argv[])
sum += pcd[i].vals[j];
printf ("T=%10u ", (unsigned int)sum);
- for ( j = 0; j < pcd[i].nr_vals; j++ )
- printf(" %10u", (unsigned int)pcd[i].vals[j]);
+ if ( full || (pcd[i].nr_vals <= 4) )
+ for ( j = 0; j < pcd[i].nr_vals; j++ )
+ printf(" %10u", (unsigned int)pcd[i].vals[j]);
printf("\n");
}
diff --git a/tools/misc/xensv b/tools/misc/xensv
deleted file mode 100755
index 8596457cd7..0000000000
--- a/tools/misc/xensv
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-# -*- mode: python; -*-
-#============================================================================
-# Copyright (C) 2004 Tom Wilkie <tw275@cl.cam.ac.uk>
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-#============================================================================
-
-"""SV web interface Lives in /usr/sbin.
- Provides pretty HTML management interface.
-
- Run:
-
- sv start
-
- The daemon is stopped with:
-
- sv stop
-
- The daemon will be accessible from http://localhost:8080/
-"""
-import os
-import sys
-import re
-
-# add fallback path for non-native python path installs if needed
-sys.path.append('/usr/lib/python')
-from xen.xend.server.params import XEND_PID_FILE
-
-class CheckError(ValueError):
- pass
-
-def hline():
- print >>sys.stderr, "*" * 70
-
-def msg(message):
- print >>sys.stderr, "*" * 3, message
-
-def check_logging():
- """Check python logging is installed and raise an error if not.
- Logging is standard from Python 2.3 on.
- """
- try:
- import logging
- except ImportError:
- hline()
- msg("Python logging is not installed.")
- msg("Use 'make install-logging' at the xen root to install.")
- msg("")
- msg("Alternatively download and install from")
- msg("http://www.red-dove.com/python_logging.html")
- hline()
- raise CheckError("logging is not installed")
-
-def check_twisted_version():
- """Check twisted is installed with a supported version and print a warning if not.
- Raises an error if twisted is not installed.
- """
- # Supported twisted release and major version.
- RELEASE = 1
- MAJOR = 3
- try:
- from twisted.copyright import version
- except ImportError:
- hline()
- msg("The Twisted framework is not installed.")
- msg("Use 'make install-twisted' at the xen root to install.")
- msg("")
- msg("Alternatively download and install version %d.%d or higher" % (RELEASE, MAJOR))
- msg("from http://www.twistedmatrix.com/products")
- hline()
- raise CheckError("twisted is not installed")
-
-
- (release, major, minor) = version.split('.')
- release = int(release)
- major = int(major)
- if release > RELEASE: return
- if release == RELEASE and major >= MAJOR: return
- hline()
- msg("Warning: Twisted version not supported: %s" % version)
- msg("Use Twisted version %d.%d.0 or higher" % (RELEASE, MAJOR))
- hline()
-
-def check_xend():
- """Check xend is running
- """
-
- if not os.path.isfile(XEND_PID_FILE) or not os.path.getsize(XEND_PID_FILE):
- hline()
- msg( "Warning: Xend has not been detected as running." )
- msg( "Please start it immediately with: xend start " )
- hline()
- return 0
-
- # Read the pid of the previous invocation and search active process list.
- pid = open(XEND_PID_FILE, 'r').read()
- lines = os.popen('ps ' + pid + ' 2>/dev/null').readlines()
- for line in lines:
- if re.search('^ *' + pid + '.+xend', line):
- return 1
-
- hline()
- msg( "Warning: Xend has not been detected as running." )
- msg( "Please start it immediately with: xend start " )
- hline()
- return 0
-
-def main():
- try:
- check_logging()
- check_twisted_version()
- check_xend()
- except CheckError:
- sys.exit(1)
-
- from xen.sv import Daemon
-
- daemon = Daemon.instance()
-
- if not sys.argv[1:]:
- print 'usage: %s {start|stop|restart}' % sys.argv[0]
- elif os.fork():
- pid, status = os.wait()
- return status >> 8
- elif sys.argv[1] == 'start':
- return daemon.start()
- elif sys.argv[1] == 'stop':
- return daemon.stop()
- elif sys.argv[1] == 'restart':
- return daemon.stop() or daemon.start()
- else:
- print 'not an option:', sys.argv[1]
- return 1
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/tools/misc/xm b/tools/misc/xm
index 6bf8931323..80972ccef9 100755
--- a/tools/misc/xm
+++ b/tools/misc/xm
@@ -4,6 +4,7 @@ import sys
# add fallback path for non-native python path installs if needed
sys.path.append('/usr/lib/python')
+sys.path.append('/usr/lib64/python')
from xen.xm import main
main.main(sys.argv)
diff --git a/tools/pygrub/Makefile b/tools/pygrub/Makefile
new file mode 100644
index 0000000000..a676cdf0e9
--- /dev/null
+++ b/tools/pygrub/Makefile
@@ -0,0 +1,18 @@
+
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+all: build
+build:
+ CFLAGS="$(CFLAGS)" python setup.py build
+
+ifndef XEN_PYTHON_NATIVE_INSTALL
+install: all
+ CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr"
+else
+install: all
+ CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)"
+endif
+
+clean:
+ rm -rf build *.pyc *.pyo *.o *.a *~
diff --git a/tools/pygrub/README b/tools/pygrub/README
new file mode 100644
index 0000000000..b58c1b96a8
--- /dev/null
+++ b/tools/pygrub/README
@@ -0,0 +1,15 @@
+pygrub is a grub-like bootloader for xen. This tool is to use to boot domU images.
+
+To compile pygrub, you will need the following packages installed:
+
+1) Libraries of ext2fs, which is the following package (depend on your Linux distribution):
+ - e2fslibs-dev on Debian based distributions (Debian, Ubuntu, Linspire, Libranet, Xandros, etc...)
+ - e2fsprogs-devel on RedHat, Fedora Core
+ - libext2fs2-devel on Mandriva/Mandrake
+ - e2fsprogs on Gentoo
+
+2) Libraries of reiserfs, which is the following package (depend on your Linux distribution):
+ - libreiserfs-dev on Debian based distributions (Debian, Ubuntu, Xandros, Libranet, Xandros, etc...)
+ - progsreiserfs-devel on RedHat
+ - progreiserfs on Gentoo
+
diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py
new file mode 100644
index 0000000000..b72ea38857
--- /dev/null
+++ b/tools/pygrub/setup.py
@@ -0,0 +1,37 @@
+from distutils.core import setup, Extension
+import os
+
+extra_compile_args = [ "-fno-strict-aliasing", "-Wall", "-Werror" ]
+
+fsys_mods = []
+fsys_pkgs = []
+
+if os.path.exists("/usr/include/ext2fs/ext2_fs.h"):
+ ext2 = Extension("grub.fsys.ext2._pyext2",
+ extra_compile_args = extra_compile_args,
+ libraries = ["ext2fs"],
+ sources = ["src/fsys/ext2/ext2module.c"])
+ fsys_mods.append(ext2)
+ fsys_pkgs.append("grub.fsys.ext2")
+
+if os.path.exists("/usr/include/reiserfs/reiserfs.h"):
+ reiser = Extension("grub.fsys.reiser._pyreiser",
+ extra_compile_args = extra_compile_args,
+ libraries = ["reiserfs"],
+ sources = ["src/fsys/reiser/reisermodule.c"])
+ fsys_mods.append(reiser)
+ fsys_pkgs.append("grub.fsys.reiser")
+
+setup(name='pygrub',
+ version='0.2',
+ description='Boot loader that looks a lot like grub for Xen',
+ author='Jeremy Katz',
+ author_email='katzj@redhat.com',
+ license='GPL',
+ package_dir={'grub': 'src'},
+ scripts = ["src/pygrub"],
+ packages=['grub',
+ 'grub.fsys'].extend(fsys_pkgs),
+ ext_modules = fsys_mods
+ )
+
diff --git a/tools/pygrub/src/GrubConf.py b/tools/pygrub/src/GrubConf.py
new file mode 100644
index 0000000000..3603b72243
--- /dev/null
+++ b/tools/pygrub/src/GrubConf.py
@@ -0,0 +1,229 @@
+#
+# GrubConf.py - Simple grub.conf parsing
+#
+# Copyright 2005 Red Hat, Inc.
+# Jeremy Katz <katzj@redhat.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+import os, sys
+import logging
+
+def grub_split(s, maxsplit = -1):
+ """Split a grub option screen separated with either '=' or whitespace."""
+ eq = s.find('=')
+ if eq == -1:
+ return s.split(None, maxsplit)
+
+ # see which of a space or tab is first
+ sp = s.find(' ')
+ tab = s.find('\t')
+ if (tab != -1 and tab < sp) or (tab != -1 and sp == -1):
+ sp = tab
+
+ if eq != -1 and eq < sp or (eq != -1 and sp == -1):
+ return s.split('=', maxsplit)
+ else:
+ return s.split(None, maxsplit)
+
+def get_path(s):
+ """Returns a tuple of (GrubDiskPart, path) corresponding to string."""
+ if not s.startswith('('):
+ return (None, s)
+ idx = s.find(')')
+ if idx == -1:
+ raise ValueError, "Unable to find matching ')'"
+ d = s[:idx]
+ return (GrubDiskPart(d), s[idx + 1:])
+
+class GrubDiskPart(object):
+ def __init__(self, str):
+ if str.find(',') != -1:
+ (self.disk, self.part) = str.split(",", 2)
+ else:
+ self.disk = str
+ self.part = None
+
+ def __repr__(self):
+ if self.part is not None:
+ return "d%dp%d" %(self.disk, self.part)
+ else:
+ return "d%d" %(self,disk,)
+
+ def get_disk(self):
+ return self._disk
+ def set_disk(self, val):
+ val = val.replace("(", "").replace(")", "")
+ self._disk = int(val[2:])
+ disk = property(get_disk, set_disk)
+
+ def get_part(self):
+ return self._part
+ def set_part(self, val):
+ if val is None:
+ self._part = val
+ return
+ val = val.replace("(", "").replace(")", "")
+ self._part = int(val)
+ part = property(get_part, set_part)
+
+class GrubImage(object):
+ def __init__(self, lines):
+ self._root = self._initrd = self._kernel = self._args = None
+ for l in lines:
+ (com, arg) = grub_split(l, 1)
+
+ if self.commands.has_key(com):
+ if self.commands[com] is not None:
+ exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
+ else:
+ logging.info("Ignored image directive %s" %(com,))
+ else:
+ logging.warning("Unknown image directive %s" %(com,))
+
+ def __repr__(self):
+ return ("title: %s\n"
+ " root: %s\n"
+ " kernel: %s\n"
+ " args: %s\n"
+ " initrd: %s" %(self.title, self.root, self.kernel,
+ self.args, self.initrd))
+
+ def set_root(self, val):
+ self._root = GrubDiskPart(val)
+ def get_root(self):
+ return self._root
+ root = property(get_root, set_root)
+
+ def set_kernel(self, val):
+ if val.find(" ") == -1:
+ self._kernel = get_path(val)
+ self._args = None
+ return
+ (kernel, args) = val.split(None, 1)
+ self._kernel = get_path(kernel)
+ self._args = args
+ def get_kernel(self):
+ return self._kernel
+ def get_args(self):
+ return self._args
+ kernel = property(get_kernel, set_kernel)
+ args = property(get_args)
+
+ def set_initrd(self, val):
+ self._initrd = get_path(val)
+ def get_initrd(self):
+ return self._initrd
+ initrd = property(get_initrd, set_initrd)
+
+ # set up command handlers
+ commands = { "title": "self.title",
+ "root": "self.root",
+ "rootnoverify": "self.root",
+ "kernel": "self.kernel",
+ "initrd": "self.initrd",
+ "chainloader": None,
+ "module": None}
+
+
+class GrubConfigFile(object):
+ def __init__(self, fn = None):
+ self.filename = fn
+ self.images = []
+ self.timeout = -1
+
+ if fn is not None:
+ self.parse()
+
+ def parse(self, buf = None):
+ if buf is None:
+ if self.filename is None:
+ raise ValueError, "No config file defined to parse!"
+
+ f = open(self.filename, 'r')
+ lines = f.readlines()
+ f.close()
+ else:
+ lines = buf.split("\n")
+
+ img = []
+ for l in lines:
+ l = l.strip()
+ # skip blank lines
+ if len(l) == 0:
+ continue
+ # skip comments
+ if l.startswith('#'):
+ continue
+ # new image
+ if l.startswith("title"):
+ if len(img) > 0:
+ self.images.append(GrubImage(img))
+ img = [l]
+ continue
+
+ if len(img) > 0:
+ img.append(l)
+ continue
+
+ try:
+ (com, arg) = grub_split(l, 1)
+ except ValueError:
+ com = l
+ arg = ""
+
+ if self.commands.has_key(com):
+ if self.commands[com] is not None:
+ exec("%s = r\"%s\"" %(self.commands[com], arg.strip()))
+ else:
+ logging.info("Ignored directive %s" %(com,))
+ else:
+ logging.warning("Unknown directive %s" %(com,))
+
+ if len(img) > 0:
+ self.images.append(GrubImage(img))
+
+ def _get_default(self):
+ return self._default
+ def _set_default(self, val):
+ if val == "saved":
+ self._default = -1
+ else:
+ self._default = int(val)
+
+ if self._default < 0:
+ raise ValueError, "default must be positive number"
+ default = property(_get_default, _set_default)
+
+ def set_splash(self, val):
+ self._splash = get_path(val)
+ def get_splash(self):
+ return self._splash
+ splash = property(get_splash, set_splash)
+
+ # set up command handlers
+ commands = { "default": "self.default",
+ "timeout": "self.timeout",
+ "fallback": "self.fallback",
+ "hiddenmenu": "self.hiddenmenu",
+ "splashimage": "self.splash",
+ "password": "self.password" }
+ for c in ("bootp", "color", "device", "dhcp", "hide", "ifconfig",
+ "pager", "partnew", "parttype", "rarp", "serial",
+ "setkey", "terminal", "terminfo", "tftpserver", "unhide"):
+ commands[c] = None
+ del c
+
+
+if __name__ == "__main__":
+ if sys.argv < 2:
+ raise RuntimeError, "Need a grub.conf to read"
+ g = GrubConfigFile(sys.argv[1])
+ for i in g.images:
+ print i #, i.title, i.root, i.kernel, i.args, i.initrd
diff --git a/tools/python/xen/xend/util.py b/tools/pygrub/src/__init__.py
index e69de29bb2..e69de29bb2 100644
--- a/tools/python/xen/xend/util.py
+++ b/tools/pygrub/src/__init__.py
diff --git a/tools/pygrub/src/fsys/__init__.py b/tools/pygrub/src/fsys/__init__.py
new file mode 100644
index 0000000000..07e12c95b6
--- /dev/null
+++ b/tools/pygrub/src/fsys/__init__.py
@@ -0,0 +1,64 @@
+#
+# Copyright 2005 Red Hat, Inc.
+# Jeremy Katz <katzj@redhat.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+import os
+import sys
+
+fstypes = {}
+
+def register_fstype(x):
+ if x.name in fstypes.keys():
+ return
+ fstypes[x.name] = x
+
+class FileSystemType(object):
+ """A simple representation for a file system that gives a fs name
+ and a method for sniffing a file to see if it's of the given fstype."""
+ def __init__(self):
+ self.name = ""
+
+ def sniff_magic(self, fn, offset = 0):
+ """Look at the filesystem at fn for the appropriate magic starting at
+ offset offset."""
+ raise RuntimeError, "sniff_magic not implemented"
+
+ def open_fs(self, fn, offset = 0):
+ """Open the given filesystem and return a filesystem object."""
+ raise RuntimeError, "open_fs not implemented"
+
+class FileSystem(object):
+ def open(self, name, flags = 0, block_size = 0):
+ """Open the fsys on name with given flags and block_size."""
+ raise RuntimeError, "open not implemented"
+
+ def close(self):
+ """Close the fsys."""
+ raise RuntimeError, "close not implemented"
+
+ def open_file(self, file, flags = None):
+ """Open the file 'name' with the given flags. The returned object
+ should look similar to a native file object."""
+ raise RuntimeError, "open_file not implemented"
+
+ def file_exist(self, file):
+ """Check to see if the give file is existed.
+ Return true if file existed, return false otherwise."""
+ raise RuntimeError, "file_exist not implemented"
+
+mydir = sys.modules['grub.fsys'].__path__[0]
+for f in os.listdir(mydir):
+ if not os.path.isdir("%s/%s" %(mydir, f)):
+ continue
+ try:
+ exec "import grub.fsys.%s" %(f,)
+ except ImportError, e:
+ pass
diff --git a/tools/pygrub/src/fsys/ext2/__init__.py b/tools/pygrub/src/fsys/ext2/__init__.py
new file mode 100644
index 0000000000..ff8f7af48f
--- /dev/null
+++ b/tools/pygrub/src/fsys/ext2/__init__.py
@@ -0,0 +1,38 @@
+# Copyright 2005 Red Hat, Inc.
+# Jeremy Katz <katzj@redhat.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+from grub.fsys import register_fstype, FileSystemType
+from _pyext2 import *
+
+import os, struct
+
+class Ext2FileSystemType(FileSystemType):
+ def __init__(self):
+ FileSystemType.__init__(self)
+ self.name = "ext2"
+
+ def sniff_magic(self, fn, offset = 0):
+ fd = os.open(fn, os.O_RDONLY)
+ os.lseek(fd, offset, 0)
+ buf = os.read(fd, 2048)
+
+ if len(buf) > 1082 and \
+ struct.unpack("<H", buf[1080:1082]) == (0xef53,):
+ return True
+ return False
+
+ def open_fs(self, fn, offset = 0):
+ if not self.sniff_magic(fn, offset):
+ raise ValueError, "Not an ext2 filesystem"
+ return Ext2Fs(fn)
+
+register_fstype(Ext2FileSystemType())
+
diff --git a/tools/pygrub/src/fsys/ext2/ext2module.c b/tools/pygrub/src/fsys/ext2/ext2module.c
new file mode 100644
index 0000000000..bef4bb6f9f
--- /dev/null
+++ b/tools/pygrub/src/fsys/ext2/ext2module.c
@@ -0,0 +1,365 @@
+/*
+ * ext2module.c - simple python binding for libext2fs
+ *
+ * Copyright 2005 Red Hat, Inc.
+ * Jeremy Katz <katzj@redhat.com>
+ *
+ * This software may be freely redistributed under the terms of the GNU
+ * general public license.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <Python.h>
+
+#include <ext2fs/ext2fs.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#if (PYTHON_API_VERSION >= 1011)
+#define PY_PAD 0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L
+#else
+#define PY_PAD 0L,0L,0L,0L
+#endif
+
+
+/* global error object */
+PyObject *Ext2Error;
+
+typedef struct _Ext2Fs Ext2Fs;
+struct _Ext2Fs {
+ PyObject_HEAD;
+ ext2_filsys fs;
+};
+
+typedef struct _Ext2File Ext2File;
+struct _Ext2File {
+ PyObject_HEAD;
+ ext2_file_t file;
+};
+
+/* ext2 file object */
+
+static PyObject *
+ext2_file_close (Ext2File *file, PyObject *args)
+{
+ if (file->file != NULL)
+ ext2fs_file_close(file->file);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+ext2_file_read (Ext2File *file, PyObject *args)
+{
+ int err, size = 0;
+ unsigned int n, total = 0;
+ PyObject * buffer = NULL;
+
+ if (file->file == NULL) {
+ PyErr_SetString(PyExc_ValueError, "Cannot read from closed file");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "|i", &size))
+ return NULL;
+
+ buffer = PyString_FromStringAndSize((char *) NULL, (size) ? size : 4096);
+ if (buffer == NULL)
+ return buffer;
+
+ while (1) {
+ err = ext2fs_file_read(file->file, PyString_AS_STRING(buffer) + total,
+ (size) ? size : 4096, &n);
+ if (err) {
+ if (buffer != NULL) { Py_DECREF(buffer); }
+ Py_DECREF(buffer);
+ PyErr_SetString(PyExc_ValueError, "read error");
+ return NULL;
+ }
+
+ total += n;
+ if (n == 0)
+ break;
+
+ if (size && size == total)
+ break;
+
+ if (!size) {
+ _PyString_Resize(&buffer, total + 4096);
+ }
+ }
+
+ _PyString_Resize(&buffer, total);
+ return buffer;
+}
+
+static void
+ext2_file_dealloc (Ext2File * file)
+{
+ if (file->file != NULL)
+ ext2fs_file_close(file->file);
+ PyMem_DEL(file);
+}
+
+static struct PyMethodDef Ext2FileMethods[] = {
+ { "close",
+ (PyCFunction) ext2_file_close,
+ METH_VARARGS, NULL },
+ { "read",
+ (PyCFunction) ext2_file_read,
+ METH_VARARGS, NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+ext2_file_getattr (Ext2File * file, char * name)
+{
+ return Py_FindMethod (Ext2FileMethods, (PyObject *) file, name);
+}
+
+static char Ext2FileType__doc__[] = "This is the ext2 filesystem object";
+PyTypeObject Ext2FileType = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "Ext2File", /* tp_name */
+ sizeof(Ext2File), /* tp_size */
+ 0, /* tp_itemsize */
+ (destructor) ext2_file_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ (getattrfunc) ext2_file_getattr, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ 0L, /* tp_flags */
+ Ext2FileType__doc__,
+ PY_PAD
+};
+
+static PyObject *
+ext2_file_open (Ext2Fs *fs, char * name, int flags)
+{
+ int err;
+ ext2_file_t f;
+ ext2_ino_t ino;
+ Ext2File * file;
+
+ file = (Ext2File *) PyObject_NEW(Ext2File, &Ext2FileType);
+ file->file = NULL;
+
+ err = ext2fs_namei_follow(fs->fs, EXT2_ROOT_INO, EXT2_ROOT_INO, name, &ino);
+ if (err) {
+ PyErr_SetString(PyExc_ValueError, "unable to open file");
+ return NULL;
+ }
+
+ err = ext2fs_file_open(fs->fs, ino, flags, &f);
+ if (err) {
+ PyErr_SetString(PyExc_ValueError, "unable to open file");
+ return NULL;
+ }
+
+ file->file = f;
+ return (PyObject *) file;
+}
+
+static PyObject *
+ext2_file_exist (Ext2Fs *fs, char * name)
+{
+ int err;
+ ext2_ino_t ino;
+ Ext2File * file;
+
+ file = (Ext2File *) PyObject_NEW(Ext2File, &Ext2FileType);
+ file->file = NULL;
+
+ err = ext2fs_namei_follow(fs->fs, EXT2_ROOT_INO, EXT2_ROOT_INO, name, &ino);
+ if (err) {
+ Py_INCREF(Py_False);
+ return Py_False;
+ }
+ Py_INCREF(Py_True);
+ return Py_True;
+}
+
+/* ext2fs object */
+
+static PyObject *
+ext2_fs_close (Ext2Fs *fs, PyObject *args)
+{
+ if (fs->fs != NULL)
+ ext2fs_close(fs->fs);
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+ext2_fs_open (Ext2Fs *fs, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "name", "flags", "superblock",
+ "block_size", NULL };
+ char * name;
+ int flags = 0, superblock = 0, err;
+ unsigned int block_size = 0;
+ ext2_filsys efs;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|iii", kwlist,
+ &name, &flags, &superblock, &block_size))
+ return NULL;
+
+ if (fs->fs != NULL) {
+ PyErr_SetString(PyExc_ValueError, "already have an fs object");
+ return NULL;
+ }
+
+ err = ext2fs_open(name, flags, superblock, block_size,
+ unix_io_manager, &efs);
+ if (err) {
+ PyErr_SetString(PyExc_ValueError, "unable to open file");
+ return NULL;
+ }
+
+ fs->fs = efs;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+ext2_fs_open_file (Ext2Fs *fs, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "name", "flags", NULL };
+ char * name;
+ int flags = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|i", kwlist,
+ &name, &flags))
+ return NULL;
+
+ return ext2_file_open(fs, name, flags);
+}
+
+static PyObject *
+ext2_fs_file_exist (Ext2Fs *fs, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "name", NULL };
+ char * name;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &name))
+ return NULL;
+
+ return ext2_file_exist(fs, name);
+}
+
+static void
+ext2_fs_dealloc (Ext2Fs * fs)
+{
+ if (fs->fs != NULL)
+ ext2fs_close(fs->fs);
+ PyMem_DEL(fs);
+}
+
+static struct PyMethodDef Ext2FsMethods[] = {
+ { "close",
+ (PyCFunction) ext2_fs_close,
+ METH_VARARGS, NULL },
+ { "open",
+ (PyCFunction) ext2_fs_open,
+ METH_VARARGS|METH_KEYWORDS, NULL },
+ { "open_file",
+ (PyCFunction) ext2_fs_open_file,
+ METH_VARARGS|METH_KEYWORDS, NULL },
+ { "file_exist",
+ (PyCFunction) ext2_fs_file_exist,
+ METH_VARARGS|METH_KEYWORDS, NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+ext2_fs_getattr (Ext2Fs * fs, char * name)
+{
+ return Py_FindMethod (Ext2FsMethods, (PyObject *) fs, name);
+}
+
+static char Ext2FsType__doc__[] = "This is the ext2 filesystem object";
+PyTypeObject Ext2FsType = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "Ext2Fs", /* tp_name */
+ sizeof(Ext2Fs), /* tp_size */
+ 0, /* tp_itemsize */
+ (destructor) ext2_fs_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ (getattrfunc) ext2_fs_getattr, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ 0L, /* tp_flags */
+ Ext2FsType__doc__,
+ PY_PAD
+};
+
+static PyObject *
+ext2_fs_new(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "name", "flags", "superblock",
+ "block_size", NULL };
+ char * name;
+ int flags = 0, superblock = 0;
+ unsigned int block_size = 0;
+ Ext2Fs *pfs;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|iii", kwlist,
+ &name, &flags, &superblock, &block_size))
+ return NULL;
+
+ pfs = (Ext2Fs *) PyObject_NEW(Ext2Fs, &Ext2FsType);
+ if (pfs == NULL)
+ return NULL;
+ pfs->fs = NULL;
+
+ if (!ext2_fs_open(pfs,
+ Py_BuildValue("siii", name, flags, superblock, block_size),
+ NULL))
+ return NULL;
+
+ return (PyObject *)pfs;
+}
+
+static struct PyMethodDef Ext2ModuleMethods[] = {
+ { "Ext2Fs", (PyCFunction) ext2_fs_new, METH_VARARGS|METH_KEYWORDS, NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+void init_pyext2(void) {
+ PyObject *m;
+
+ m = Py_InitModule("_pyext2", Ext2ModuleMethods);
+ /*
+ * PyObject *d;
+ * d = PyModule_GetDict(m);
+ * o = PyObject_NEW(PyObject, yExt2FsConstructorType);
+ * PyDict_SetItemString(d, "PyExt2Fs", o);
+ * Py_DECREF(o);
+ */
+}
diff --git a/tools/pygrub/src/fsys/ext2/test.py b/tools/pygrub/src/fsys/ext2/test.py
new file mode 100644
index 0000000000..eeb79506ee
--- /dev/null
+++ b/tools/pygrub/src/fsys/ext2/test.py
@@ -0,0 +1,15 @@
+#!/usr/bin/python
+
+
+import _pyext2
+import struct, os, sys
+
+fs = _pyext2.Ext2Fs("test.img")
+
+f = fs.open_file("/boot/vmlinuz-2.6.11-1.1177_FC4")
+buf = f.read()
+o = open("vmlinuz", "wb+")
+o.write(buf)
+o.close()
+
+f.close()
diff --git a/tools/pygrub/src/fsys/reiser/__init__.py b/tools/pygrub/src/fsys/reiser/__init__.py
new file mode 100644
index 0000000000..e49e7c3e0a
--- /dev/null
+++ b/tools/pygrub/src/fsys/reiser/__init__.py
@@ -0,0 +1,39 @@
+#
+# Copyright (C) 2005 Nguyen Anh Quynh <aquynh@gmail.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+from grub.fsys import register_fstype, FileSystemType
+from _pyreiser import *
+
+import os
+
+FSMAGIC2 = 'ReIsEr2'
+FSMAGIC3 = 'ReIsEr3'
+
+class ReiserFileSystemType(FileSystemType):
+ def __init__(self):
+ FileSystemType.__init__(self)
+ self.name = "reiser"
+
+ def sniff_magic(self, fn, offset = 0):
+ fd = os.open(fn, os.O_RDONLY)
+ os.lseek(fd, 0x10000, 0)
+ buf = os.read(fd, 0x40)
+ if len(buf) == 0x40 and (buf[0x34:0x3B] in [FSMAGIC2, FSMAGIC3]) :
+ return True
+ return False
+
+ def open_fs(self, fn, offset = 0):
+ if not self.sniff_magic(fn, offset):
+ raise ValueError, "Not a reiserfs filesystem"
+ return ReiserFs(fn)
+
+register_fstype(ReiserFileSystemType())
+
diff --git a/tools/pygrub/src/fsys/reiser/reisermodule.c b/tools/pygrub/src/fsys/reiser/reisermodule.c
new file mode 100644
index 0000000000..dea7152593
--- /dev/null
+++ b/tools/pygrub/src/fsys/reiser/reisermodule.c
@@ -0,0 +1,345 @@
+/*
+ * reisermodule.c - simple python binding for libreiserfs{2,3}
+ *
+ * Copyright (C) 2005 Nguyen Anh Quynh <aquynh@gmail.com>
+ *
+ * This software may be freely redistributed under the terms of the GNU
+ * general public license.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <Python.h>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <dal/file_dal.h>
+#include <reiserfs/reiserfs.h>
+
+#if (PYTHON_API_VERSION >= 1011)
+#define PY_PAD 0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L
+#else
+#define PY_PAD 0L,0L,0L,0L
+#endif
+
+
+/* global error object */
+PyObject *ReiserError;
+
+typedef struct {
+ PyObject_HEAD
+ reiserfs_fs_t *fs;
+ dal_t *dal;
+} ReiserFs;
+
+typedef struct _ReiserFile ReiserFile;
+struct _ReiserFile {
+ PyObject_HEAD
+ reiserfs_file_t *file;
+};
+
+void file_dal_close(dal_t *dal) {
+
+ if (!dal) return;
+
+ close((int)dal->dev);
+ dal_free(dal);
+}
+
+/* reiser file object */
+
+static PyObject *
+reiser_file_close (ReiserFile *file, PyObject *args)
+{
+ if (file->file != NULL)
+ {
+ reiserfs_file_close(file->file);
+ file->file = NULL;
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+reiser_file_read (ReiserFile *file, PyObject *args)
+{
+ int size = 0;
+ size_t n, total = 0;
+ PyObject * buffer = NULL;
+
+ if (file->file == NULL) {
+ PyErr_SetString(PyExc_ValueError, "Cannot read from closed file");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "|i", &size))
+ return NULL;
+
+ buffer = PyString_FromStringAndSize((char *) NULL, (size) ? size : 4096);
+ if (buffer == NULL)
+ return buffer;
+
+ while (1) {
+ n = reiserfs_file_read(file->file, PyString_AS_STRING(buffer) + total,
+ (size) ? size : 4096);
+ if (n == 0)
+ break;
+
+ total += n;
+
+ if (size && size == total)
+ break;
+
+ if (!size) {
+ _PyString_Resize(&buffer, total + 4096);
+ }
+ }
+
+ _PyString_Resize(&buffer, total);
+ return buffer;
+}
+
+static void
+reiser_file_dealloc (ReiserFile * file)
+{
+ if (file->file != NULL) {
+ reiserfs_file_close(file->file);
+ file->file = NULL;
+ }
+ PyObject_DEL(file);
+}
+
+static struct PyMethodDef ReiserFileMethods[] = {
+ { "close", (PyCFunction) reiser_file_close, METH_VARARGS, NULL },
+ { "read", (PyCFunction) reiser_file_read, METH_VARARGS, NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+reiser_file_getattr (ReiserFile * file, char * name)
+{
+ return Py_FindMethod (ReiserFileMethods, (PyObject *) file, name);
+}
+
+static char ReiserFileType__doc__[] = "This is the reiser filesystem object";
+PyTypeObject ReiserFileType = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "ReiserFile", /* tp_name */
+ sizeof(ReiserFile), /* tp_size */
+ 0, /* tp_itemsize */
+ (destructor) reiser_file_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ (getattrfunc) reiser_file_getattr, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ ReiserFileType__doc__,
+ PY_PAD
+};
+
+static PyObject *
+reiser_file_open (ReiserFs *fs, char *name, int flags)
+{
+ ReiserFile *file;
+ reiserfs_file_t *f;
+
+ file = (ReiserFile *) PyObject_NEW(ReiserFile, &ReiserFileType);
+
+ f = reiserfs_file_open(fs->fs, name, flags);
+ file->file = f;
+
+ if (!f) {
+ PyErr_SetString(PyExc_ValueError, "unable to open file");
+ return NULL;
+ }
+
+ return (PyObject *) file;
+}
+
+static PyObject *
+reiser_file_exist (ReiserFs *fs, char *name)
+{
+ reiserfs_file_t *f;
+
+ f = reiserfs_file_open(fs->fs, name, O_RDONLY);
+
+ if (!f) {
+ Py_INCREF(Py_False);
+ return Py_False;
+ }
+ reiserfs_file_close(f);
+ Py_INCREF(Py_True);
+ return Py_True;
+}
+
+/* reiserfs object */
+
+static PyObject *
+reiser_fs_close (ReiserFs *fs, PyObject *args)
+{
+ if (fs->fs != NULL)
+ {
+ reiserfs_fs_close(fs->fs);
+ file_dal_close(fs->dal);
+ fs->fs = NULL;
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+reiser_fs_open (ReiserFs *fs, PyObject *args)
+{
+ char *name;
+ size_t block_size = DEFAULT_BLOCK_SIZE;
+ dal_t *dal;
+ reiserfs_fs_t *rfs;
+
+ if (!PyArg_ParseTuple(args, "s|i", &name, &block_size))
+ return NULL;
+
+ if (fs->fs != NULL) {
+ PyErr_SetString(PyExc_ValueError, "already have an fs object");
+ return NULL;
+ }
+
+ if (!(dal = file_dal_open(name, block_size, O_RDONLY))) {
+ PyErr_SetString(PyExc_ValueError, "Couldn't create device abstraction");
+ return NULL;
+ }
+
+ if (!(rfs = reiserfs_fs_open_fast(dal, dal))) {
+ file_dal_close(dal);
+ PyErr_SetString(PyExc_ValueError, "unable to open file");
+ return NULL;
+ }
+
+ fs->fs = rfs;
+ fs->dal = dal;
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *
+reiser_fs_open_file (ReiserFs *fs, PyObject *args)
+{
+ char *name;
+ int flags = 0;
+
+ if (!PyArg_ParseTuple(args, "s|i", &name, &flags))
+ return NULL;
+
+ return reiser_file_open(fs, name, flags);
+}
+
+static PyObject *
+reiser_fs_file_exist (ReiserFs *fs, PyObject *args)
+{
+ char * name;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return NULL;
+
+ return reiser_file_exist(fs, name);
+}
+
+static void
+reiser_fs_dealloc (ReiserFs * fs)
+{
+ if (fs->fs != NULL)
+ {
+ reiserfs_fs_close(fs->fs);
+ file_dal_close(fs->dal);
+ fs->fs = NULL;
+ }
+ PyObject_DEL(fs);
+}
+
+static struct PyMethodDef ReiserFsMethods[] = {
+ { "close", (PyCFunction) reiser_fs_close, METH_VARARGS, NULL },
+ { "open", (PyCFunction) reiser_fs_open, METH_VARARGS, NULL },
+ { "open_file", (PyCFunction) reiser_fs_open_file, METH_VARARGS, NULL },
+ { "file_exist", (PyCFunction) reiser_fs_file_exist, METH_VARARGS, NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+reiser_fs_getattr (ReiserFs * fs, char * name)
+{
+ return Py_FindMethod (ReiserFsMethods, (PyObject *) fs, name);
+}
+
+static char ReiserFsType__doc__[] = "This is the reiser filesystem object";
+
+PyTypeObject ReiserFsType = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0, /* ob_size */
+ "ReiserFs", /* tp_name */
+ sizeof(ReiserFs), /* tp_size */
+ 0, /* tp_itemsize */
+ (destructor) reiser_fs_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ (getattrfunc) reiser_fs_getattr, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ ReiserFsType__doc__,
+ PY_PAD
+};
+
+static PyObject *
+reiser_fs_new(PyObject *o, PyObject *args)
+{
+ char *name;
+ size_t block_size = DEFAULT_BLOCK_SIZE;
+ ReiserFs *pfs;
+
+ if (!PyArg_ParseTuple(args, "s|i", &name, &block_size))
+ return NULL;
+
+ pfs = (ReiserFs *) PyObject_NEW(ReiserFs, &ReiserFsType);
+ if (pfs == NULL)
+ return NULL;
+
+ pfs->fs = NULL;
+
+ if (!reiser_fs_open(pfs, Py_BuildValue("si", name, block_size)))
+ return NULL;
+
+ return (PyObject *)pfs;
+}
+
+static struct PyMethodDef ReiserModuleMethods[] = {
+ { "ReiserFs", (PyCFunction) reiser_fs_new, METH_VARARGS},
+ { NULL, NULL, 0}
+};
+
+void init_pyreiser(void) {
+ Py_InitModule("_pyreiser", ReiserModuleMethods);
+}
diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
new file mode 100644
index 0000000000..545feea54f
--- /dev/null
+++ b/tools/pygrub/src/pygrub
@@ -0,0 +1,278 @@
+#!/usr/bin/python
+#
+# pygrub - simple python-based bootloader for Xen
+#
+# Copyright 2005 Red Hat, Inc.
+# Jeremy Katz <katzj@redhat.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+import os, sys, string, struct, tempfile
+import logging
+
+import curses, _curses, curses.wrapper
+import getopt
+
+sys.path = [ '/usr/lib/python' ] + sys.path
+
+import grub.GrubConf
+import grub.fsys
+
+PYGRUB_VER = 0.02
+
+
+def draw_window():
+ stdscr = curses.initscr()
+ curses.use_default_colors()
+ try:
+ curses.curs_set(0)
+ except _curses.error:
+ pass
+
+ stdscr.addstr(1, 4, "pyGRUB version %s" %(PYGRUB_VER,))
+
+ win = curses.newwin(10, 74, 2, 1)
+ win.box()
+ win.refresh()
+
+ stdscr.addstr(12, 5, "Use the U and D keys to select which entry is highlighted.")
+ stdscr.addstr(13, 5, "Press enter to boot the selected OS. 'e' to edit the")
+ stdscr.addstr(14, 5, "commands before booting, 'a' to modify the kernel arguments ")
+ stdscr.addstr(15, 5, "before booting, or 'c' for a command line.")
+ stdscr.addch(12, 13, curses.ACS_UARROW)
+ stdscr.addch(12, 19, curses.ACS_DARROW)
+ (y, x) = stdscr.getmaxyx()
+ stdscr.move(y - 1, x - 1)
+
+ stdscr.refresh()
+ return (stdscr, win)
+
+def fill_entries(win, cfg, selected):
+ y = 0
+
+ for i in cfg.images:
+ if (0, y) > win.getmaxyx():
+ break
+ if y == selected:
+ attr = curses.A_REVERSE
+ else:
+ attr = 0
+ win.addstr(y + 1, 2, i.title.ljust(70), attr)
+ y += 1
+ win.refresh()
+
+def select(win, line):
+ win.attron(curses.A_REVERSE)
+ win.redrawln(line + 1, 1)
+ win.refresh()
+
+def is_disk_image(file):
+ fd = os.open(file, os.O_RDONLY)
+ buf = os.read(fd, 512)
+ os.close(fd)
+
+ if len(buf) >= 512 and struct.unpack("H", buf[0x1fe: 0x200]) == (0xaaff):
+ return True
+ return False
+
+def get_config(fn):
+ if not os.access(fn, os.R_OK):
+ raise RuntimeError, "Unable to access %s" %(fn,)
+
+ cf = grub.GrubConf.GrubConfigFile()
+
+ if is_disk_image(fn):
+ raise RuntimeError, "appears to be a full disk image... unable to handle this yet"
+
+ # open the image and read the grub config
+ fs = None
+ for fstype in grub.fsys.fstypes.values():
+ if fstype.sniff_magic(fn):
+ fs = fstype.open_fs(fn)
+ break
+
+ if fs is not None:
+ if fs.file_exist("/boot/grub/menu.lst"):
+ grubfile = "/boot/grub/menu.lst"
+ elif fs.file_exist("/boot/grub/grub.conf"):
+ grubfile = "/boot/grub/grub.conf"
+ else:
+ raise RuntimeError, "we couldn't find /boot/grub{menu.lst,grub.conf} " + \
+ "in the image provided. halt!"
+ f = fs.open_file(grubfile)
+ buf = f.read()
+ f.close()
+ fs.close()
+ # then parse the grub config
+ cf.parse(buf)
+ else:
+ # set the config file and parse it
+ cf.filename = fn
+ cf.parse()
+
+ return cf
+
+def get_entry_idx(cf, entry):
+ # first, see if the given entry is numeric
+ try:
+ idx = string.atoi(entry)
+ return idx
+ except ValueError:
+ pass
+
+ # it's not, now check the labels for a match
+ for i in range(len(cf.images)):
+ if entry == cf.images[i].title:
+ return i
+
+ return None
+
+def main(cf = None):
+ mytime = 0
+
+ (stdscr, win) = draw_window()
+ stdscr.timeout(1000)
+ selected = cf.default
+
+ while (mytime < int(cf.timeout)):
+ if cf.timeout != -1 and mytime != -1:
+ stdscr.addstr(20, 5, "Will boot selected entry in %2d seconds"
+ %(int(cf.timeout) - mytime))
+ else:
+ stdscr.addstr(20, 5, " " * 80)
+
+ fill_entries(win, cf, selected)
+ c = stdscr.getch()
+ if mytime != -1:
+ mytime += 1
+# if c == ord('q'):
+# selected = -1
+# break
+ elif c == ord('c'):
+ # FIXME: needs to go to command line mode
+ continue
+ elif c == ord('a'):
+ # FIXME: needs to go to append mode
+ continue
+ elif c == ord('e'):
+ # FIXME: needs to go to edit mode
+ continue
+ elif c in (curses.KEY_ENTER, ord('\n'), ord('\r')):
+ break
+ elif c == curses.KEY_UP:
+ mytime = -1
+ selected -= 1
+ elif c == curses.KEY_DOWN:
+ mytime = -1
+ selected += 1
+ else:
+ pass
+
+ # bound at the top and bottom
+ if selected < 0:
+ selected = 0
+ elif selected >= len(cf.images):
+ selected = len(cf.images) - 1
+
+ if selected >= 0:
+ return selected
+
+if __name__ == "__main__":
+ sel = None
+
+ def run_main(scr, *args):
+ global sel
+ sel = main(cf)
+
+ def usage():
+ print >> sys.stderr, "Usage: %s [-q|--quiet] [--output=] [--entry=] <image>" %(sys.argv[0],)
+
+ try:
+ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qh::',
+ ["quiet", "help", "output=", "entry="])
+ except getopt.GetoptError:
+ usage()
+ sys.exit(1)
+
+ if len(args) < 1:
+ usage()
+ sys.exit(1)
+ file = args[0]
+
+ output = None
+ entry = None
+ interactive = True
+ for o, a in opts:
+ if o in ("-q", "--quiet"):
+ interactive = False
+ elif o in ("-h", "--help"):
+ usage()
+ sys.exit()
+ elif o in ("--output",):
+ output = a
+ elif o in ("--entry",):
+ entry = a
+ # specifying the entry to boot implies non-interactive
+ interactive = False
+
+ if output is None or output == "-":
+ fd = sys.stdout.fileno()
+ else:
+ fd = os.open(output, os.O_WRONLY)
+
+ cf = get_config(file)
+ if interactive:
+ curses.wrapper(run_main)
+ else:
+ sel = cf.default
+
+ # set the entry to boot as requested
+ if entry is not None:
+ idx = get_entry_idx(cf, entry)
+ if idx is not None and idx > 0 and idx < len(cf.images):
+ sel = idx
+
+ img = cf.images[sel]
+ print "Going to boot %s" %(img.title)
+ print " kernel: %s" %(img.kernel[1],)
+ if img.initrd:
+ print " initrd: %s" %(img.initrd[1],)
+
+ if is_disk_image(file):
+ raise RuntimeError, "unable to handle full disk images yet"
+
+ # read the kernel and initrd onto the hostfs
+ fs = None
+ for fstype in grub.fsys.fstypes.values():
+ if fstype.sniff_magic(file):
+ fs = fstype.open_fs(file)
+ break
+
+ if fs is None:
+ raise RuntimeError, "Unable to open filesystem"
+
+ kernel = fs.open_file(img.kernel[1],).read()
+ (tfd, fn) = tempfile.mkstemp(prefix="vmlinuz.")
+ os.write(tfd, kernel)
+ os.close(tfd)
+ sxp = "linux (kernel %s)" %(fn,)
+
+ if img.initrd:
+ initrd = fs.open_file(img.initrd[1],).read()
+ (tfd, fn) = tempfile.mkstemp(prefix="initrd.")
+ os.write(tfd, initrd)
+ os.close(tfd)
+ sxp += "(ramdisk %s)" %(fn,)
+ else:
+ initrd = None
+ sxp += "(args '%s')" %(img.args,)
+
+ sys.stdout.flush()
+ os.write(fd, sxp)
+
diff --git a/tools/python/setup.py b/tools/python/setup.py
index 99069d0be4..fabe80bd8b 100644
--- a/tools/python/setup.py
+++ b/tools/python/setup.py
@@ -9,14 +9,15 @@ extra_compile_args = [ "-fno-strict-aliasing", "-Wall", "-Werror" ]
include_dirs = [ XEN_ROOT + "/tools/python/xen/lowlevel/xu",
XEN_ROOT + "/tools/libxc",
- XEN_ROOT + "/tools/libxutil",
+ XEN_ROOT + "/tools/xenstore",
+ XEN_ROOT + "/tools/xcs",
]
library_dirs = [ XEN_ROOT + "/tools/libxc",
- XEN_ROOT + "/tools/libxutil",
+ XEN_ROOT + "/tools/xenstore",
]
-libraries = [ "xc", "xutil" ]
+libraries = [ "xc", "xenstore" ]
xc = Extension("xc",
extra_compile_args = extra_compile_args,
@@ -31,7 +32,14 @@ xu = Extension("xu",
library_dirs = library_dirs,
libraries = libraries,
sources = [ "xen/lowlevel/xu/xu.c" ])
-
+
+xs = Extension("xs",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs + [ "xen/lowlevel/xs" ],
+ library_dirs = library_dirs,
+ libraries = libraries,
+ sources = [ "xen/lowlevel/xs/xs.c" ])
+
setup(name = 'xen',
version = '2.0',
description = 'Xen',
@@ -40,11 +48,12 @@ setup(name = 'xen',
'xen.util',
'xen.xend',
'xen.xend.server',
- 'xen.sv',
+ 'xen.xend.xenstore',
'xen.xm',
+ 'xen.web',
],
ext_package = "xen.lowlevel",
- ext_modules = [ xc, xu ]
+ ext_modules = [ xc, xu, xs ]
)
os.chdir('logging')
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index d2b7da0eba..13d60be08e 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -14,8 +14,9 @@
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
+
#include "xc_private.h"
-#include "gzip_stream.h"
+#include "linux_boot_params.h"
/* Needed for Python versions earlier than 2.3. */
#ifndef PyMODINIT_FUNC
@@ -35,26 +36,55 @@ typedef struct {
* Definitions for the 'xc' object type.
*/
+static PyObject *pyxc_domain_dumpcore(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+
+ u32 dom;
+ char *corefile;
+
+ static char *kwd_list[] = { "dom", "corefile", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list, &dom, &corefile) )
+ goto exit;
+
+ if ( (corefile == NULL) || (corefile[0] == '\0') )
+ goto exit;
+
+ if ( xc_domain_dumpcore(xc->xc_handle, dom, corefile) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+
+ exit:
+ return NULL;
+}
+
+static PyObject *pyxc_handle(PyObject *self)
+{
+ XcObject *xc = (XcObject *)self;
+
+ return PyInt_FromLong(xc->xc_handle);
+}
+
static PyObject *pyxc_domain_create(PyObject *self,
PyObject *args,
PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
- unsigned int mem_kb = 0;
- int cpu = -1;
- float cpu_weight = 1;
u32 dom = 0;
int ret;
- static char *kwd_list[] = { "dom", "mem_kb", "cpu", "cpu_weight", NULL };
+ static char *kwd_list[] = { "dom", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiif", kwd_list,
- &dom, &mem_kb, &cpu, &cpu_weight))
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwd_list, &dom))
return NULL;
- if ( (ret = xc_domain_create(
- xc->xc_handle, mem_kb, cpu, cpu_weight, &dom)) < 0 )
+ if ( (ret = xc_domain_create(xc->xc_handle, &dom)) < 0 )
return PyErr_SetFromErrno(xc_error);
return PyInt_FromLong(dom);
@@ -127,15 +157,38 @@ static PyObject *pyxc_domain_pincpu(PyObject *self,
XcObject *xc = (XcObject *)self;
u32 dom;
- int cpu = -1;
+ int vcpu = 0;
+ cpumap_t cpumap = 0xFFFFFFFF;
- static char *kwd_list[] = { "dom", "cpu", NULL };
+ static char *kwd_list[] = { "dom", "vcpu", "cpumap", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list,
- &dom, &cpu) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|ii", kwd_list,
+ &dom, &vcpu, &cpumap) )
return NULL;
- if ( xc_domain_pincpu(xc->xc_handle, dom, cpu) != 0 )
+ if ( xc_domain_pincpu(xc->xc_handle, dom, vcpu, &cpumap) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
+}
+
+static PyObject *pyxc_domain_setcpuweight(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
+{
+ XcObject *xc = (XcObject *)self;
+
+ u32 dom;
+ float cpuweight = 1;
+
+ static char *kwd_list[] = { "dom", "cpuweight", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|f", kwd_list,
+ &dom, &cpuweight) )
+ return NULL;
+
+ if ( xc_domain_setcpuweight(xc->xc_handle, dom, cpuweight) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -147,10 +200,10 @@ static PyObject *pyxc_domain_getinfo(PyObject *self,
PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
- PyObject *list;
+ PyObject *list, *vcpu_list, *cpumap_list, *info_dict;
u32 first_dom = 0;
- int max_doms = 1024, nr_doms, i;
+ int max_doms = 1024, nr_doms, i, j;
xc_dominfo_t *info;
static char *kwd_list[] = { "first_dom", "max_doms", NULL };
@@ -167,23 +220,33 @@ static PyObject *pyxc_domain_getinfo(PyObject *self,
list = PyList_New(nr_doms);
for ( i = 0 ; i < nr_doms; i++ )
{
- PyList_SetItem(
- list, i,
- Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i"
- ",s:l,s:L,s:l,s:i}",
- "dom", info[i].domid,
- "cpu", info[i].cpu,
- "dying", info[i].dying,
- "crashed", info[i].crashed,
- "shutdown", info[i].shutdown,
- "paused", info[i].paused,
- "blocked", info[i].blocked,
- "running", info[i].running,
- "mem_kb", info[i].nr_pages*4,
- "cpu_time", info[i].cpu_time,
- "maxmem_kb", info[i].max_memkb,
- "shutdown_reason", info[i].shutdown_reason
- ));
+ vcpu_list = PyList_New(MAX_VIRT_CPUS);
+ cpumap_list = PyList_New(MAX_VIRT_CPUS);
+ for ( j = 0; j < MAX_VIRT_CPUS; j++ ) {
+ PyList_SetItem( vcpu_list, j,
+ Py_BuildValue("i", info[i].vcpu_to_cpu[j]));
+ PyList_SetItem( cpumap_list, j,
+ Py_BuildValue("i", info[i].cpumap[j]));
+ }
+
+ info_dict = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i"
+ ",s:l,s:L,s:l,s:i}",
+ "dom", info[i].domid,
+ "vcpus", info[i].vcpus,
+ "dying", info[i].dying,
+ "crashed", info[i].crashed,
+ "shutdown", info[i].shutdown,
+ "paused", info[i].paused,
+ "blocked", info[i].blocked,
+ "running", info[i].running,
+ "mem_kb", info[i].nr_pages*4,
+ "cpu_time", info[i].cpu_time,
+ "maxmem_kb", info[i].max_memkb,
+ "shutdown_reason", info[i].shutdown_reason);
+ PyDict_SetItemString( info_dict, "vcpu_to_cpu", vcpu_list );
+ PyDict_SetItemString( info_dict, "cpumap", cpumap_list );
+ PyList_SetItem( list, i, info_dict);
+
}
free(info);
@@ -191,155 +254,37 @@ static PyObject *pyxc_domain_getinfo(PyObject *self,
return list;
}
-static int file_save(XcObject *xc, XcIOContext *ctxt, char *state_file)
-{
- int rc = -1;
- int fd = -1;
- int open_flags = (O_CREAT | O_EXCL | O_WRONLY);
- int open_mode = 0644;
-
- printf("%s>\n", __FUNCTION__);
-
- if ( (fd = open(state_file, open_flags, open_mode)) < 0 )
- {
- xcio_perror(ctxt, "Could not open file for writing");
- goto exit;
- }
-
- printf("%s>gzip_stream_fdopen... \n", __FUNCTION__);
-
- /* Compression rate 1: we want speed over compression.
- * We're mainly going for those zero pages, after all.
- */
- ctxt->io = gzip_stream_fdopen(fd, "wb1");
- if ( ctxt->io == NULL )
- {
- xcio_perror(ctxt, "Could not allocate compression state");
- goto exit;
- }
-
- printf("%s> xc_linux_save...\n", __FUNCTION__);
-
- rc = xc_linux_save(xc->xc_handle, ctxt);
-
- exit:
- if ( ctxt->io != NULL )
- IOStream_close(ctxt->io);
- if ( fd >= 0 )
- close(fd);
- unlink(state_file);
- printf("%s> rc=%d\n", __FUNCTION__, rc);
- return rc;
-}
-
-static PyObject *pyxc_linux_save(PyObject *self,
- PyObject *args,
- PyObject *kwds)
+static PyObject *pyxc_linux_build(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
- char *state_file;
- int progress = 1, debug = 0;
- PyObject *val = NULL;
- int rc = -1;
- XcIOContext ioctxt = { .info = iostdout, .err = iostderr };
-
- static char *kwd_list[] = { "dom", "state_file", "vmconfig", "progress", "debug", NULL };
-
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is|sii", kwd_list,
- &ioctxt.domain,
- &state_file,
- &ioctxt.vmconfig,
- &progress,
- &debug) )
- goto exit;
-
- ioctxt.vmconfig_n = (ioctxt.vmconfig ? strlen(ioctxt.vmconfig) : 0);
-
- if ( progress )
- ioctxt.flags |= XCFLAGS_VERBOSE;
- if ( debug )
- ioctxt.flags |= XCFLAGS_DEBUG;
-
- if ( (state_file == NULL) || (state_file[0] == '\0') )
- goto exit;
-
- rc = file_save(xc, &ioctxt, state_file);
- if ( rc != 0 )
- {
- PyErr_SetFromErrno(xc_error);
- goto exit;
- }
-
- Py_INCREF(zero);
- val = zero;
-
- exit:
- return val;
-}
-
-
-static int file_restore(XcObject *xc, XcIOContext *ioctxt, char *state_file)
-{
- int rc = -1;
+ u32 dom;
+ char *image, *ramdisk = NULL, *cmdline = "";
+ int flags = 0, vcpus = 1;
+ int control_evtchn, store_evtchn;
+ unsigned long store_mfn = 0;
- ioctxt->io = gzip_stream_fopen(state_file, "rb");
- if ( ioctxt->io == NULL )
- {
- xcio_perror(ioctxt, "Could not open file for reading");
- return rc;
- }
+ static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
+ "image", "ramdisk", "cmdline", "flags",
+ "vcpus", NULL };
- rc = xc_linux_restore(xc->xc_handle, ioctxt);
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssii", kwd_list,
+ &dom, &control_evtchn, &store_evtchn,
+ &image, &ramdisk, &cmdline, &flags,
+ &vcpus) )
+ return NULL;
- IOStream_close(ioctxt->io);
+ if ( xc_linux_build(xc->xc_handle, dom, image,
+ ramdisk, cmdline, control_evtchn, flags, vcpus,
+ store_evtchn, &store_mfn) != 0 )
+ return PyErr_SetFromErrno(xc_error);
- return rc;
-}
-
-static PyObject *pyxc_linux_restore(PyObject *self,
- PyObject *args,
- PyObject *kwds)
-{
- XcObject *xc = (XcObject *)self;
- char *state_file;
- int progress = 1, debug = 0;
- PyObject *val = NULL;
- XcIOContext ioctxt = { .info = iostdout, .err = iostderr };
- int rc =-1;
-
- static char *kwd_list[] = { "state_file", "progress", "debug", NULL };
-
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "s|ii", kwd_list,
- &state_file,
- &progress,
- &debug) )
- goto exit;
-
- if ( progress )
- ioctxt.flags |= XCFLAGS_VERBOSE;
- if ( debug )
- ioctxt.flags |= XCFLAGS_DEBUG;
-
- if ( (state_file == NULL) || (state_file[0] == '\0') )
- goto exit;
-
- rc = file_restore(xc, &ioctxt, state_file);
- if ( rc != 0 )
- {
- PyErr_SetFromErrno(xc_error);
- goto exit;
- }
-
- val = Py_BuildValue("{s:i,s:s}",
- "dom", ioctxt.domain,
- "vmconfig", ioctxt.vmconfig);
-
- exit:
- return val;
+ return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
}
-static PyObject *pyxc_linux_build(PyObject *self,
+static PyObject *pyxc_plan9_build(PyObject *self,
PyObject *args,
PyObject *kwds)
{
@@ -349,24 +294,24 @@ static PyObject *pyxc_linux_build(PyObject *self,
char *image, *ramdisk = NULL, *cmdline = "";
int control_evtchn, flags = 0;
- static char *kwd_list[] = { "dom", "control_evtchn",
+ static char *kwd_list[] = { "dom", "control_evtchn",
"image", "ramdisk", "cmdline", "flags",
NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssi", kwd_list,
- &dom, &control_evtchn,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssi", kwd_list,
+ &dom, &control_evtchn,
&image, &ramdisk, &cmdline, &flags) )
return NULL;
- if ( xc_linux_build(xc->xc_handle, dom, image,
- ramdisk, cmdline, control_evtchn, flags) != 0 )
+ if ( xc_plan9_build(xc->xc_handle, dom, image,
+ cmdline, control_evtchn, flags) != 0 )
return PyErr_SetFromErrno(xc_error);
-
+
Py_INCREF(zero);
return zero;
}
-static PyObject *pyxc_plan9_build(PyObject *self,
+static PyObject *pyxc_vmx_build(PyObject *self,
PyObject *args,
PyObject *kwds)
{
@@ -374,21 +319,78 @@ static PyObject *pyxc_plan9_build(PyObject *self,
u32 dom;
char *image, *ramdisk = NULL, *cmdline = "";
+ PyObject *memmap;
int control_evtchn, flags = 0;
+ int numItems, i;
+ int memsize;
+ struct mem_map mem_map;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", "flags",
+ "memsize",
+ "image", "memmap",
+ "ramdisk", "cmdline", "flags",
NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|ssi", kwd_list,
- &dom, &control_evtchn,
- &image, &ramdisk, &cmdline, &flags) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisO!|ssi", kwd_list,
+ &dom, &control_evtchn,
+ &memsize,
+ &image, &PyList_Type, &memmap,
+ &ramdisk, &cmdline, &flags) )
return NULL;
- if ( xc_plan9_build(xc->xc_handle, dom, image,
- cmdline, control_evtchn, flags) != 0 )
- return PyErr_SetFromErrno(xc_error);
+ memset(&mem_map, 0, sizeof(mem_map));
+ /* Parse memmap */
+ /* get the number of lines passed to us */
+ numItems = PyList_Size(memmap) - 1; /* removing the line
+ containing "memmap" */
+ printf ("numItems: %d\n", numItems);
+ mem_map.nr_map = numItems;
+
+ /* should raise an error here. */
+ if (numItems < 0) return NULL; /* Not a list */
+
+ /* iterate over items of the list, grabbing ranges and parsing them */
+ for (i = 1; i <= numItems; i++) { // skip over "memmap"
+ PyObject *item, *f1, *f2, *f3, *f4;
+ int numFields;
+ unsigned long lf1, lf2, lf3, lf4;
+ char *sf1, *sf2;
+
+ /* grab the string object from the next element of the list */
+ item = PyList_GetItem(memmap, i); /* Can't fail */
+
+ /* get the number of lines passed to us */
+ numFields = PyList_Size(item);
+
+ if (numFields != 4)
+ return NULL;
+
+ f1 = PyList_GetItem(item, 0);
+ f2 = PyList_GetItem(item, 1);
+ f3 = PyList_GetItem(item, 2);
+ f4 = PyList_GetItem(item, 3);
+
+ /* Convert objects to strings/longs */
+ sf1 = PyString_AsString(f1);
+ sf2 = PyString_AsString(f2);
+ lf3 = PyLong_AsLong(f3);
+ lf4 = PyLong_AsLong(f4);
+ if ( sscanf(sf1, "%lx", &lf1) != 1 )
+ return NULL;
+ if ( sscanf(sf2, "%lx", &lf2) != 1 )
+ return NULL;
+
+ mem_map.map[i-1].addr = lf1;
+ mem_map.map[i-1].size = lf2 - lf1;
+ mem_map.map[i-1].type = lf3;
+ mem_map.map[i-1].caching_attr = lf4;
+ }
+
+ if ( xc_vmx_build(xc->xc_handle, dom, memsize, image, &mem_map,
+ ramdisk, cmdline, control_evtchn, flags) != 0 )
+ return PyErr_SetFromErrno(xc_error);
+
Py_INCREF(zero);
return zero;
}
@@ -496,11 +498,12 @@ static PyObject *pyxc_evtchn_alloc_unbound(PyObject *self,
XcObject *xc = (XcObject *)self;
u32 dom;
- int port;
+ int port = 0;
- static char *kwd_list[] = { "dom", NULL };
+ static char *kwd_list[] = { "dom", "port", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &dom) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list,
+ &dom, &port) )
return NULL;
if ( xc_evtchn_alloc_unbound(xc->xc_handle, dom, &port) != 0 )
@@ -679,7 +682,8 @@ static PyObject *pyxc_readconsolering(PyObject *self,
XcObject *xc = (XcObject *)self;
unsigned int clear = 0;
- char str[32768];
+ char _str[32768], *str = _str;
+ unsigned int count = 32768;
int ret;
static char *kwd_list[] = { "clear", NULL };
@@ -687,11 +691,11 @@ static PyObject *pyxc_readconsolering(PyObject *self,
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwd_list, &clear) )
return NULL;
- ret = xc_readconsolering(xc->xc_handle, str, sizeof(str), clear);
+ ret = xc_readconsolering(xc->xc_handle, &str, &count, clear);
if ( ret < 0 )
return PyErr_SetFromErrno(xc_error);
- return PyString_FromStringAndSize(str, ret);
+ return PyString_FromStringAndSize(str, count);
}
static PyObject *pyxc_physinfo(PyObject *self,
@@ -715,74 +719,50 @@ static PyObject *pyxc_physinfo(PyObject *self,
"cpu_khz", info.cpu_khz);
}
-static PyObject *pyxc_atropos_domain_set(PyObject *self,
+static PyObject *pyxc_sedf_domain_set(PyObject *self,
PyObject *args,
PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
u32 domid;
u64 period, slice, latency;
- int xtratime;
-
- static char *kwd_list[] = { "dom", "period", "slice", "latency",
- "xtratime", NULL };
+ u16 extratime, weight;
+ static char *kwd_list[] = { "dom", "period", "slice", "latency", "extratime", "weight",NULL };
- if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLi", kwd_list, &domid,
- &period, &slice, &latency, &xtratime) )
+ if( !PyArg_ParseTupleAndKeywords(args, kwds, "iLLLhh", kwd_list, &domid,
+ &period, &slice, &latency, &extratime, &weight) )
return NULL;
-
- if ( xc_atropos_domain_set(xc->xc_handle, domid, period, slice,
- latency, xtratime) != 0 )
+ if ( xc_sedf_domain_set(xc->xc_handle, domid, period, slice, latency, extratime,weight) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
return zero;
}
-static PyObject *pyxc_atropos_domain_get(PyObject *self,
+static PyObject *pyxc_sedf_domain_get(PyObject *self,
PyObject *args,
PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
u32 domid;
- u64 period, slice, latency;
- int xtratime;
+ u64 period, slice,latency;
+ u16 weight, extratime;
static char *kwd_list[] = { "dom", NULL };
if( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &domid) )
return NULL;
- if ( xc_atropos_domain_get( xc->xc_handle, domid, &period,
- &slice, &latency, &xtratime ) )
+ if ( xc_sedf_domain_get( xc->xc_handle, domid, &period,
+ &slice,&latency,&extratime,&weight) )
return PyErr_SetFromErrno(xc_error);
return Py_BuildValue("{s:i,s:L,s:L,s:L,s:i}",
- "domain", domid,
- "period", period,
- "slice", slice,
- "latency", latency,
- "xtratime", xtratime);
-}
-
-
-static PyObject *pyxc_rrobin_global_set(PyObject *self,
- PyObject *args,
- PyObject *kwds)
-{
- XcObject *xc = (XcObject *)self;
- u64 slice;
-
- static char *kwd_list[] = { "slice", NULL };
-
- if( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &slice) )
- return NULL;
-
- if ( xc_rrobin_global_set(xc->xc_handle, slice) != 0 )
- return PyErr_SetFromErrno(xc_error);
-
- Py_INCREF(zero);
- return zero;
+ "domain", domid,
+ "period", period,
+ "slice", slice,
+ "latency", latency,
+ "extratime", extratime);
}
static PyObject *pyxc_shadow_control(PyObject *self,
@@ -807,38 +787,44 @@ static PyObject *pyxc_shadow_control(PyObject *self,
return zero;
}
-static PyObject *pyxc_rrobin_global_get(PyObject *self,
- PyObject *args,
- PyObject *kwds)
+static PyObject *pyxc_domain_setmaxmem(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
- u64 slice;
- if ( !PyArg_ParseTuple(args, "") )
+ u32 dom;
+ unsigned long maxmem_kb;
+
+ static char *kwd_list[] = { "dom", "maxmem_kb", NULL };
+
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list,
+ &dom, &maxmem_kb) )
return NULL;
- if ( xc_rrobin_global_get(xc->xc_handle, &slice) != 0 )
+ if ( xc_domain_setmaxmem(xc->xc_handle, dom, maxmem_kb) != 0 )
return PyErr_SetFromErrno(xc_error);
- return Py_BuildValue("{s:L}", "slice", slice);
+ Py_INCREF(zero);
+ return zero;
}
-static PyObject *pyxc_domain_setmaxmem(PyObject *self,
- PyObject *args,
- PyObject *kwds)
+static PyObject *pyxc_domain_memory_increase_reservation(PyObject *self,
+ PyObject *args,
+ PyObject *kwds)
{
XcObject *xc = (XcObject *)self;
u32 dom;
- unsigned long maxmem_kb;
+ unsigned long mem_kb;
- static char *kwd_list[] = { "dom", "maxmem_kb", NULL };
+ static char *kwd_list[] = { "dom", "mem_kb", NULL };
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list,
- &dom, &maxmem_kb) )
+ &dom, &mem_kb) )
return NULL;
- if ( xc_domain_setmaxmem(xc->xc_handle, dom, maxmem_kb) != 0 )
+ if ( xc_domain_memory_increase_reservation(xc->xc_handle, dom, mem_kb) )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
@@ -847,14 +833,27 @@ static PyObject *pyxc_domain_setmaxmem(PyObject *self,
static PyMethodDef pyxc_methods[] = {
+ { "handle",
+ (PyCFunction)pyxc_handle,
+ 0, "\n"
+ "Query the xc control interface file descriptor.\n\n"
+ "Returns: [int] file descriptor\n" },
+
{ "domain_create",
(PyCFunction)pyxc_domain_create,
METH_VARARGS | METH_KEYWORDS, "\n"
"Create a new domain.\n"
" dom [int, 0]: Domain identifier to use (allocated if zero).\n"
- " mem_kb [int, 0]: Memory allocation, in kilobytes.\n"
"Returns: [int] new domain identifier; -1 on error.\n" },
+ { "domain_dumpcore",
+ (PyCFunction)pyxc_domain_dumpcore,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Dump core of a domain.\n"
+ " dom [int]: Identifier of domain to dump core of.\n"
+ " corefile [string]: Name of corefile to be created.\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
{ "domain_pause",
(PyCFunction)pyxc_domain_pause,
METH_VARARGS | METH_KEYWORDS, "\n"
@@ -879,9 +878,18 @@ static PyMethodDef pyxc_methods[] = {
{ "domain_pincpu",
(PyCFunction)pyxc_domain_pincpu,
METH_VARARGS | METH_KEYWORDS, "\n"
- "Pin a domain to a specified CPU.\n"
- " dom [int]: Identifier of domain to be pinned.\n"
- " cpu [int, -1]: CPU to pin to, or -1 to unpin\n\n"
+ "Pin a VCPU to a specified set CPUs.\n"
+ " dom [int]: Identifier of domain to which VCPU belongs.\n"
+ " vcpu [int, 0]: VCPU being pinned.\n"
+ " cpumap [int, -1]: Bitmap of usable CPUs.\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_setcpuweight",
+ (PyCFunction)pyxc_domain_setcpuweight,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Set cpuweight scheduler parameter for domain.\n"
+ " dom [int]: Identifier of domain to be changed.\n"
+ " cpuweight [float, 1]: VCPU being pinned.\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "domain_getinfo",
@@ -896,6 +904,7 @@ static PyMethodDef pyxc_methods[] = {
" domain-id space was reached.\n"
" dom [int]: Identifier of domain to which this info pertains\n"
" cpu [int]: CPU to which this domain is bound\n"
+ " vcpus [int]: Number of Virtual CPUS in this domain\n"
" dying [int]: Bool - is the domain dying?\n"
" crashed [int]: Bool - has the domain crashed?\n"
" shutdown [int]: Bool - has the domain shut itself down?\n"
@@ -906,16 +915,20 @@ static PyMethodDef pyxc_methods[] = {
" maxmem_kb [int]: Maximum memory limit, in kilobytes\n"
" cpu_time [long]: CPU time consumed, in nanoseconds\n"
" shutdown_reason [int]: Numeric code from guest OS, explaining "
- "reason why it shut itself down.\n" },
+ "reason why it shut itself down.\n"
+ " vcpu_to_cpu [[int]]: List that maps VCPUS to CPUS\n" },
- { "linux_save",
- (PyCFunction)pyxc_linux_save,
+ { "linux_build",
+ (PyCFunction)pyxc_linux_build,
METH_VARARGS | METH_KEYWORDS, "\n"
- "Save the CPU and memory state of a Linux guest OS.\n"
- " dom [int]: Identifier of domain to be saved.\n"
- " state_file [str]: Name of state file. Must not currently exist.\n"
- " progress [int, 1]: Bool - display a running progress indication?\n\n"
+ "Build a new Linux guest OS.\n"
+ " dom [int]: Identifier of domain to build into.\n"
+ " image [str]: Name of kernel image file. May be gzipped.\n"
+ " ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
+ " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
+ " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+
{ "plan9_build",
(PyCFunction)pyxc_plan9_build,
METH_VARARGS | METH_KEYWORDS, "\n"
@@ -925,20 +938,13 @@ static PyMethodDef pyxc_methods[] = {
" cmdline [str, n/a]: Kernel parameters, if any.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "linux_restore",
- (PyCFunction)pyxc_linux_restore,
- METH_VARARGS | METH_KEYWORDS, "\n"
- "Restore the CPU and memory state of a Linux guest OS.\n"
- " state_file [str]: Name of state file. Must not currently exist.\n"
- " progress [int, 1]: Bool - display a running progress indication?\n\n"
- "Returns: [int] new domain identifier on success; -1 on error.\n" },
-
- { "linux_build",
- (PyCFunction)pyxc_linux_build,
+ { "vmx_build",
+ (PyCFunction)pyxc_vmx_build,
METH_VARARGS | METH_KEYWORDS, "\n"
"Build a new Linux guest OS.\n"
" dom [int]: Identifier of domain to build into.\n"
" image [str]: Name of kernel image file. May be gzipped.\n"
+ " memmap [str]: Memory map.\n\n"
" ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
" cmdline [str, n/a]: Kernel parameters, if any.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
@@ -981,44 +987,30 @@ static PyMethodDef pyxc_methods[] = {
" warpu [long]: Unwarp requirement.\n"
" warpl [long]: Warp limit,\n"
},
-
- { "atropos_domain_set",
- (PyCFunction)pyxc_atropos_domain_set,
+
+ { "sedf_domain_set",
+ (PyCFunction)pyxc_sedf_domain_set,
METH_KEYWORDS, "\n"
"Set the scheduling parameters for a domain when running with Atropos.\n"
- " dom [int]: domain to set\n"
- " period [long]: domain's scheduling period\n"
- " slice [long]: domain's slice per period\n"
- " latency [long]: wakeup latency hint\n"
- " xtratime [int]: boolean\n"
+ " dom [int]: domain to set\n"
+ " period [long]: domain's scheduling period\n"
+ " slice [long]: domain's slice per period\n"
+ " latency [long]: domain's wakeup latency hint\n"
+ " extratime [int]: domain aware of extratime?\n"
"Returns: [int] 0 on success; -1 on error.\n" },
- { "atropos_domain_get",
- (PyCFunction)pyxc_atropos_domain_get,
+ { "sedf_domain_get",
+ (PyCFunction)pyxc_sedf_domain_get,
METH_KEYWORDS, "\n"
"Get the current scheduling parameters for a domain when running with\n"
"the Atropos scheduler."
- " dom [int]: domain to query\n"
- "Returns: [dict]\n"
- " domain [int]: domain ID\n"
- " period [long]: scheduler period\n"
- " slice [long]: CPU reservation per period\n"
- " latency [long]: unblocking latency hint\n"
- " xtratime [int] : 0 if not using slack time, nonzero otherwise\n" },
-
- { "rrobin_global_set",
- (PyCFunction)pyxc_rrobin_global_set,
- METH_KEYWORDS, "\n"
- "Set Round Robin scheduler slice.\n"
- " slice [long]: Round Robin scheduler slice\n"
- "Returns: [int] 0 on success, throws an exception on failure\n" },
-
- { "rrobin_global_get",
- (PyCFunction)pyxc_rrobin_global_get,
- METH_KEYWORDS, "\n"
- "Get Round Robin scheduler settings\n"
- "Returns [dict]:\n"
- " slice [long]: Scheduler time slice.\n" },
+ " dom [int]: domain to query\n"
+ "Returns: [dict]\n"
+ " domain [int]: domain ID\n"
+ " period [long]: scheduler period\n"
+ " slice [long]: CPU reservation per period\n"
+ " latency [long]: domain's wakeup latency hint\n"
+ " extratime [int]: domain aware of extratime?\n"},
{ "evtchn_alloc_unbound",
(PyCFunction)pyxc_evtchn_alloc_unbound,
@@ -1115,6 +1107,14 @@ static PyMethodDef pyxc_methods[] = {
" maxmem_kb [long]: .\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+ { "domain_memory_increase_reservation",
+ (PyCFunction)pyxc_domain_memory_increase_reservation,
+ METH_VARARGS | METH_KEYWORDS, "\n"
+ "Increase a domain's memory reservation\n"
+ " dom [int]: Identifier of domain.\n"
+ " mem_kb [long]: .\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
{ NULL, NULL, 0, NULL }
};
@@ -1187,6 +1187,7 @@ PyMODINIT_FUNC initxc(void)
d = PyModule_GetDict(m);
xc_error = PyErr_NewException(XENPKG ".error", NULL, NULL);
PyDict_SetItemString(d, "error", xc_error);
+ PyDict_SetItemString(d, "VIRQ_DOM_EXC", PyInt_FromLong(VIRQ_DOM_EXC));
zero = PyInt_FromLong(0);
diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c
new file mode 100644
index 0000000000..6ecddc6fd4
--- /dev/null
+++ b/tools/python/xen/lowlevel/xs/xs.c
@@ -0,0 +1,617 @@
+#include <Python.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "xs.h"
+
+/** @file
+ * Python interface to the Xen Store Daemon (xs).
+ */
+
+/* Needed for Python versions earlier than 2.3. */
+//#ifndef PyMODINIT_FUNC
+//#define PyMODINIT_FUNC DL_EXPORT(void)
+//#endif
+
+#define PYPKG "xen.lowlevel.xs"
+
+/** Python wrapper round an xs handle.
+ */
+typedef struct XsHandle {
+ PyObject_HEAD;
+ struct xs_handle *xh;
+} XsHandle;
+
+static inline struct xs_handle *xshandle(PyObject *self)
+{
+ struct xs_handle *xh = ((XsHandle*)self)->xh;
+ if (!xh)
+ PyErr_SetString(PyExc_RuntimeError, "invalid xenstore daemon handle");
+ return xh;
+}
+
+static inline PyObject *pyvalue_int(int val) {
+ return (val
+ ? PyInt_FromLong(val)
+ : PyErr_SetFromErrno(PyExc_RuntimeError));
+}
+
+static inline PyObject *pyvalue_str(char *val) {
+ return (val
+ ? PyString_FromString(val)
+ : PyErr_SetFromErrno(PyExc_RuntimeError));
+}
+
+static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "data", "create", "excl", NULL };
+ static char *arg_spec = "ss#|ii";
+ char *path = NULL;
+ char *data = NULL;
+ int data_n = 0;
+ int create = 0;
+ int excl = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int flags = 0;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &data, &data_n, &create, &excl))
+ goto exit;
+ if (create)
+ flags |= O_CREAT;
+ if (excl)
+ flags |= O_EXCL;
+ xsval = xs_write(xh, path, data, data_n, flags);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_read(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ char *xsval = NULL;
+ unsigned int xsval_n = 0;
+ PyObject *val = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path))
+ goto exit;
+ xsval = xs_read(xh, path, &xsval_n);
+ if (!xsval) {
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = PyString_FromStringAndSize(xsval, xsval_n);
+ exit:
+ if (xsval)
+ free(xsval);
+ return val;
+}
+
+static PyObject *xspy_mkdir(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_mkdir(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_ls(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ char **xsval = NULL;
+ unsigned int xsval_n = 0;
+ int i;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_directory(xh, path, &xsval_n);
+ if (!xsval) {
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = PyList_New(xsval_n);
+ for (i = 0; i < xsval_n; i++)
+ PyList_SetItem(val, i, PyString_FromString(xsval[i]));
+ exit:
+ return val;
+}
+
+static PyObject *xspy_rm(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_rm(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_get_permissions(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ struct xs_permissions *perms;
+ unsigned int perms_n = 0;
+ int i;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ perms = xs_get_permissions(xh, path, &perms_n);
+ if (!perms) {
+ PyErr_SetFromErrno(PyExc_RuntimeError);
+ goto exit;
+ }
+ val = PyList_New(perms_n);
+ for (i = 0; i < perms_n; i++, perms++) {
+ PyObject *p = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i}",
+ "dom", perms->id,
+ "read", (perms->perms & XS_PERM_READ),
+ "write", (perms->perms & XS_PERM_WRITE),
+ "create", (perms->perms & XS_PERM_CREATE),
+ "owner", (perms->perms & XS_PERM_OWNER));
+ PyList_SetItem(val, i, p);
+ }
+ exit:
+ return val;
+}
+
+static PyObject *xspy_set_permissions(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "perms", NULL };
+ static char *arg_spec = "sO";
+ char *path = NULL;
+ PyObject *perms = NULL;
+ static char *perm_names[] = { "dom", "read", "write", "create", "owner",
+ NULL };
+ static char *perm_spec = "i|iiii";
+
+ struct xs_handle *xh = xshandle(self);
+ int i, xsval;
+ struct xs_permissions *xsperms = NULL;
+ int xsperms_n = 0;
+ PyObject *tuple0 = NULL;
+ PyObject *val = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &perms))
+ goto exit;
+ if (!PyList_Check(perms)) {
+ PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
+ goto exit;
+ }
+ xsperms_n = PyList_Size(perms);
+ xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
+ if (!xsperms) {
+ PyErr_SetString(PyExc_RuntimeError, "out of memory");
+ goto exit;
+ }
+ tuple0 = PyTuple_New(0);
+ if (!tuple0)
+ goto exit;
+ for (i = 0; i < xsperms_n; i++) {
+ /* Domain the permissions apply to. */
+ int dom = 0;
+ /* Read/write perms. Set these. */
+ int p_read = 0, p_write = 0;
+ /* Create/owner perms. Ignore them.
+ * This is so the output from get_permissions() can be used
+ * as input to set_permissions().
+ */
+ int p_create = 0, p_owner = 0;
+ PyObject *p = PyList_GetItem(perms, i);
+ if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names,
+ &dom, &p_read, &p_write, &p_create,
+ &p_owner))
+ goto exit;
+ xsperms[i].id = dom;
+ if (p_read)
+ xsperms[i].perms |= XS_PERM_READ;
+ if (p_write)
+ xsperms[i].perms |= XS_PERM_WRITE;
+ }
+ xsval = xs_set_permissions(xh, path, xsperms, xsperms_n);
+ val = pyvalue_int(xsval);
+ exit:
+ Py_XDECREF(tuple0);
+ if (xsperms)
+ free(xsperms);
+ return val;
+}
+
+static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", "priority", NULL };
+ static char *arg_spec = "s|i";
+ char *path = NULL;
+ int priority = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &path, &priority))
+ goto exit;
+ xsval = xs_watch(xh, path, priority);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_read_watch(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ char *xsval = NULL;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_read_watch(xh);
+ val = pyvalue_str(xsval);
+ exit:
+ if (xsval)
+ free(xsval);
+ return val;
+}
+
+static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_acknowledge_watch(xh);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_unwatch(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_unwatch(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_transaction_start(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "path", NULL };
+ static char *arg_spec = "s|";
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
+ goto exit;
+ xsval = xs_transaction_start(xh, path);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_transaction_end(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "abort", NULL };
+ static char *arg_spec = "|i";
+ int abort = 0;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort))
+ goto exit;
+ xsval = xs_transaction_end(xh, abort);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_introduce_domain(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "dom", "page", "port", "path", NULL };
+ static char *arg_spec = "iiis|";
+ domid_t dom = 0;
+ unsigned long page = 0;
+ unsigned int port = 0;
+ char *path = NULL;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &dom, &page, &port, &path))
+ goto exit;
+ printf("%s> dom=%u page=0x%08lx port=%u path=%s\n", __FUNCTION__, dom,
+ page, port, path);
+ xsval = xs_introduce_domain(xh, dom, page, port, path);
+ printf("%s> xsval=%d\n", __FUNCTION__, xsval);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_release_domain(PyObject *self, PyObject *args,
+ PyObject *kwds)
+{
+ static char *kwd_spec[] = { "dom", NULL };
+ static char *arg_spec = "i|";
+ domid_t dom;
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &dom))
+ goto exit;
+ printf("%s> dom=%u\n", __FUNCTION__, dom);
+ xsval = xs_release_domain(xh, dom);
+ printf("%s> xsval=%d\n", __FUNCTION__, xsval);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_close(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 1;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xs_daemon_close(xh);
+ ((XsHandle*)self)->xh = NULL;
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+static PyObject *xspy_shutdown(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { NULL };
+ static char *arg_spec = "";
+
+ struct xs_handle *xh = xshandle(self);
+ PyObject *val = NULL;
+ int xsval = 0;
+
+ if (!xh)
+ goto exit;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec))
+ goto exit;
+ xsval = xs_shutdown(xh);
+ val = pyvalue_int(xsval);
+ exit:
+ return val;
+}
+
+#define XSPY_METH(_name) \
+ #_name, \
+ (PyCFunction) xspy_ ## _name, \
+ (METH_VARARGS | METH_KEYWORDS)
+// mtime
+// ctime
+
+static PyMethodDef xshandle_methods[] = {
+ { XSPY_METH(read),
+ "read(path) : read data\n" },
+ { XSPY_METH(write),
+ "write(path, data, [creat], [excl]): write data\n" },
+ { XSPY_METH(ls),
+ "ls(path): list directory.\n" },
+ { XSPY_METH(mkdir),
+ "mkdir(path): make a directory.\n" },
+ { XSPY_METH(rm),
+ "rm(path): remove a path (dir must be empty).\n" },
+ { XSPY_METH(get_permissions),
+ "get_permissions(path)\n" },
+ { XSPY_METH(set_permissions),
+ "set_permissions(path)\n" },
+ { XSPY_METH(watch),
+ "watch(path)\n" },
+ { XSPY_METH(read_watch),
+ "read_watch()\n" },
+ { XSPY_METH(acknowledge_watch),
+ "acknowledge_watch()\n" },
+ { XSPY_METH(unwatch),
+ "unwatch()\n" },
+ { XSPY_METH(transaction_start),
+ "transaction_start()\n" },
+ { XSPY_METH(transaction_end),
+ "transaction_end([abort])\n" },
+ { XSPY_METH(introduce_domain),
+ "introduce_domain(dom, page, port)\n" },
+ { XSPY_METH(release_domain),
+ "release_domain(dom)\n" },
+ { XSPY_METH(close),
+ "close()\n" },
+ { XSPY_METH(shutdown),
+ "shutdown()\n" },
+ { NULL, NULL, 0, NULL }
+};
+
+static PyObject *xshandle_getattr(PyObject *self, char *name)
+{
+ PyObject *val = NULL;
+ if (strcmp(name, "fileno") == 0) {
+ struct xs_handle *xh = xshandle(self);
+ val = PyInt_FromLong((xh ? xs_fileno(xh) : -1));
+ } else
+ val = Py_FindMethod(xshandle_methods, self, name);
+ return val;
+}
+
+static void xshandle_dealloc(PyObject *self)
+{
+ XsHandle *xh = (XsHandle*)self;
+ if (xh->xh) {
+ xs_daemon_close(xh->xh);
+ xh->xh = NULL;
+ }
+ PyObject_Del(self);
+}
+
+static PyTypeObject xshandle_type = {
+ PyObject_HEAD_INIT(&PyType_Type)
+ 0,
+ "xshandle",
+ sizeof(XsHandle),
+ 0,
+ xshandle_dealloc, /* tp_dealloc */
+ NULL, /* tp_print */
+ xshandle_getattr, /* tp_getattr */
+ NULL, /* tp_setattr */
+ NULL, /* tp_compare */
+ NULL, /* tp_repr */
+ NULL, /* tp_as_number */
+ NULL, /* tp_as_sequence */
+ NULL, /* tp_as_mapping */
+ NULL /* tp_hash */
+};
+
+static PyObject *xshandle_open(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ static char *kwd_spec[] = { "readonly", NULL };
+ static char *arg_spec = "|i";
+ int readonly = 0;
+
+ XsHandle *xsh = NULL;
+ PyObject *val = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+ &readonly))
+ goto exit;
+
+ xsh = PyObject_New(XsHandle, &xshandle_type);
+ if (!xsh)
+ goto exit;
+ xsh->xh = (readonly ? xs_daemon_open_readonly() : xs_daemon_open());
+ if (!xsh->xh) {
+ PyObject_Del(xsh);
+ val = pyvalue_int(0);
+ goto exit;
+ }
+ val = (PyObject *)xsh;
+ exit:
+ return val;
+}
+
+static PyMethodDef xs_methods[] = {
+ { "open", (PyCFunction)xshandle_open, (METH_VARARGS | METH_KEYWORDS),
+ "Open a connection to the xenstore daemon.\n" },
+ { NULL, NULL, 0, NULL }
+};
+
+PyMODINIT_FUNC initxs (void)
+{
+ PyObject *module;
+
+ module = Py_InitModule(PYPKG, xs_methods);
+}
diff --git a/tools/python/xen/lowlevel/xu/xu.c b/tools/python/xen/lowlevel/xu/xu.c
index 79c18795c9..65660ba6dc 100644
--- a/tools/python/xen/lowlevel/xu/xu.c
+++ b/tools/python/xen/lowlevel/xu/xu.c
@@ -13,10 +13,10 @@
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/socket.h>
+#include <sys/un.h>
#include <sys/mman.h>
#include <sys/poll.h>
#include <sys/sysmacros.h>
-#include <netinet/in.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -49,16 +49,9 @@
/* Size of a machine page frame. */
#define PAGE_SIZE 4096
-#if defined(__i386__)
-#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
-#define wmb() __asm__ __volatile__ ( "" : : : "memory" )
-#else
-#error "Define barriers"
-#endif
-
-
/* Set the close-on-exec flag on a file descriptor. Doesn't currently bother
* to check for errors. */
+/*
static void set_cloexec(int fd)
{
int flags = fcntl(fd, F_GETFD, 0);
@@ -69,93 +62,299 @@ static void set_cloexec(int fd)
flags |= FD_CLOEXEC;
fcntl(fd, F_SETFD, flags);
}
-
+*/
/*
- * *********************** NOTIFIER ***********************
+ * *********************** XCS INTERFACE ***********************
*/
-typedef struct {
- PyObject_HEAD;
- int evtchn_fd;
-} xu_notifier_object;
+#include <arpa/inet.h>
+#include <xcs_proto.h>
-static PyObject *xu_notifier_read(PyObject *self, PyObject *args)
+static int xcs_ctrl_fd = -1; /* control connection to the xcs server. */
+static int xcs_data_fd = -1; /* data connection to the xcs server. */
+static u32 xcs_session_id = 0;
+
+static int xcs_ctrl_send(xcs_msg_t *msg);
+static int xcs_ctrl_read(xcs_msg_t *msg);
+static int xcs_data_send(xcs_msg_t *msg);
+static int xcs_data_read(xcs_msg_t *msg);
+
+static int xcs_connect(char *path)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- u16 v;
- int bytes;
+ struct sockaddr_un addr;
+ int ret, len, flags;
+ xcs_msg_t msg;
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
+ if (xcs_data_fd != -1) /* already connected */
+ return 0;
- while ( (bytes = read(xun->evtchn_fd, &v, sizeof(v))) == -1 )
+ xcs_ctrl_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (xcs_ctrl_fd < 0)
{
- if ( errno == EINTR )
- continue;
- if ( errno == EAGAIN )
- goto none;
- return PyErr_SetFromErrno(PyExc_IOError);
+ printf("error creating xcs socket!\n");
+ goto fail;
}
- if ( bytes == sizeof(v) )
- return PyInt_FromLong(v);
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, path);
+ len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
- none:
- Py_INCREF(Py_None);
- return Py_None;
+ ret = connect(xcs_ctrl_fd, (struct sockaddr *)&addr, len);
+ if (ret < 0)
+ {
+ printf("error connecting to xcs(ctrl)! (%d)\n", errno);
+ goto ctrl_fd_fail;
+ }
+
+ /*set_cloexec(xcs_ctrl_fd);*/
+
+ msg.type = XCS_CONNECT_CTRL;
+ msg.u.connect.session_id = xcs_session_id;
+ xcs_ctrl_send(&msg);
+ xcs_ctrl_read(&msg); /* TODO: timeout + error! */
+
+ if (msg.result != XCS_RSLT_OK)
+ {
+ printf("error connecting xcs control channel!\n");
+ goto ctrl_fd_fail;
+ }
+ xcs_session_id = msg.u.connect.session_id;
+
+ /* now the data connection. */
+ xcs_data_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (xcs_data_fd < 0)
+ {
+ printf("error creating xcs data socket!\n");
+ goto ctrl_fd_fail;
+ }
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, path);
+ len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
+
+ ret = connect(xcs_data_fd, (struct sockaddr *)&addr, len);
+ if (ret < 0)
+ {
+ printf("error connecting to xcs(data)! (%d)\n", errno);
+ goto data_fd_fail;
+ }
+
+ //set_cloexec(xcs_data_fd);
+ msg.type = XCS_CONNECT_DATA;
+ msg.u.connect.session_id = xcs_session_id;
+ xcs_data_send(&msg);
+ xcs_data_read(&msg); /* TODO: timeout + error! */
+
+ if (msg.result != XCS_RSLT_OK)
+ {
+ printf("error connecting xcs control channel!\n");
+ goto ctrl_fd_fail;
+ }
+
+ if ( ((flags = fcntl(xcs_data_fd, F_GETFL, 0)) < 0) ||
+ (fcntl(xcs_data_fd, F_SETFL, flags | O_NONBLOCK) < 0) )
+ {
+ printf("Unable to set non-blocking status on data socket.");
+ goto data_fd_fail;
+ }
+
+ return 0;
+
+data_fd_fail:
+ close(xcs_data_fd);
+ xcs_data_fd = -1;
+
+ctrl_fd_fail:
+ close(xcs_ctrl_fd);
+ xcs_ctrl_fd = -1;
+
+fail:
+ return -1;
+
}
-static PyObject *xu_notifier_unmask(PyObject *self, PyObject *args)
+static void xcs_disconnect(void)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- u16 v;
- int idx;
+ close(xcs_data_fd);
+ xcs_data_fd = -1;
+ close(xcs_ctrl_fd);
+ xcs_ctrl_fd = -1;
+}
- if ( !PyArg_ParseTuple(args, "i", &idx) )
- return NULL;
+static int xcs_ctrl_read(xcs_msg_t *msg)
+{
+ int ret;
+
+ ret = read(xcs_ctrl_fd, msg, sizeof(xcs_msg_t));
+ return ret;
+}
- v = (u16)idx;
+static int xcs_ctrl_send(xcs_msg_t *msg)
+{
+ int ret;
- (void)write(xun->evtchn_fd, &v, sizeof(v));
+ ret = send(xcs_ctrl_fd, msg, sizeof(xcs_msg_t), 0);
+ return ret;
+}
- Py_INCREF(Py_None);
- return Py_None;
+static int xcs_data_read(xcs_msg_t *msg)
+{
+ int ret;
+
+ ret = read(xcs_data_fd, msg, sizeof(xcs_msg_t));
+ return ret;
}
-static PyObject *xu_notifier_bind(PyObject *self, PyObject *args)
+static int xcs_data_send(xcs_msg_t *msg)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- int idx;
+ int ret;
+
+ ret = send(xcs_data_fd, msg, sizeof(xcs_msg_t), 0);
+ return ret;
+}
+
+
+typedef struct kme_st {
+ xcs_msg_t msg;
+ struct kme_st *next;
+} xcs_msg_ent_t;
+
- if ( !PyArg_ParseTuple(args, "i", &idx) )
+#define XCS_RING_SIZE 64
+static xcs_msg_ent_t *req_ring[64];
+static unsigned req_prod = 0;
+static unsigned req_cons = 0;
+
+static xcs_msg_ent_t *rsp_ring[64];
+static unsigned rsp_prod = 0;
+static unsigned rsp_cons = 0;
+
+#define REQ_RING_ENT(_idx) (req_ring[(_idx) % XCS_RING_SIZE])
+#define RSP_RING_ENT(_idx) (rsp_ring[(_idx) % XCS_RING_SIZE])
+#define REQ_RING_FULL ( req_prod - req_cons == XCS_RING_SIZE )
+#define RSP_RING_FULL ( rsp_prod - rsp_cons == XCS_RING_SIZE )
+#define REQ_RING_EMPTY ( req_prod == req_cons )
+#define RSP_RING_EMPTY ( rsp_prod == rsp_cons )
+/*
+ * *********************** NOTIFIER ***********************
+ */
+
+typedef struct {
+ PyObject_HEAD;
+ int evtchn_fd;
+} xu_notifier_object;
+
+static PyObject *xu_notifier_read(PyObject *self, PyObject *args)
+{
+ xcs_msg_ent_t *ent;
+ int ret;
+
+ if ( !PyArg_ParseTuple(args, "") )
return NULL;
+
+ while ((!REQ_RING_FULL) && (!RSP_RING_FULL))
+ {
+ ent = (xcs_msg_ent_t *)malloc(sizeof(xcs_msg_ent_t));
+ ret = xcs_data_read(&ent->msg);
- if ( ioctl(xun->evtchn_fd, EVTCHN_BIND, idx) != 0 )
- return PyErr_SetFromErrno(PyExc_IOError);
+ if (ret == -1)
+ {
+ free(ent);
+ if ( errno == EINTR )
+ continue;
+ if ( errno == EAGAIN )
+ break;
+ return PyErr_SetFromErrno(PyExc_IOError);
+ }
+
+ switch (ent->msg.type)
+ {
+ case XCS_REQUEST:
+ REQ_RING_ENT(req_prod) = ent;
+ req_prod++;
+ continue;
+ case XCS_RESPONSE:
+ RSP_RING_ENT(rsp_prod) = ent;
+ rsp_prod++;
+ continue;
+
+ case XCS_VIRQ:
+ ret = ent->msg.u.control.local_port;
+ free(ent);
+ return PyInt_FromLong(ret);
+
+ default:
+ /*printf("Throwing away xcs msg type: %u\n", ent->msg.type);*/
+ free(ent);
+ }
+ }
+
+ if (!REQ_RING_EMPTY)
+ {
+ return PyInt_FromLong(REQ_RING_ENT(req_cons)->msg.u.control.local_port);
+ }
+
+ if (!RSP_RING_EMPTY)
+ {
+ return PyInt_FromLong(RSP_RING_ENT(rsp_cons)->msg.u.control.local_port);
+ }
+
Py_INCREF(Py_None);
return Py_None;
}
-static PyObject *xu_notifier_unbind(PyObject *self, PyObject *args)
+static PyObject *xu_notifier_bind_virq(PyObject *self,
+ PyObject *args, PyObject *kwds)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- int idx;
+ int virq;
+ xcs_msg_t kmsg;
- if ( !PyArg_ParseTuple(args, "i", &idx) )
+ static char *kwd_list[] = { "virq", NULL };
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &virq) )
return NULL;
+
+ kmsg.type = XCS_VIRQ_BIND;
+ kmsg.u.virq.virq = virq;
+ xcs_ctrl_send(&kmsg);
+ xcs_ctrl_read(&kmsg);
+
+ if ( kmsg.result != XCS_RSLT_OK )
+ {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+
+ return PyInt_FromLong(kmsg.u.virq.port);
+}
- if ( ioctl(xun->evtchn_fd, EVTCHN_UNBIND, idx) != 0 )
- return PyErr_SetFromErrno(PyExc_IOError);
+static PyObject *xu_notifier_virq_send(PyObject *self,
+ PyObject *args, PyObject *kwds)
+{
+ int port;
+ xcs_msg_t kmsg;
- Py_INCREF(Py_None);
- return Py_None;
+ static char *kwd_list[] = { "port", NULL };
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &port) )
+ return NULL;
+
+ kmsg.type = XCS_VIRQ;
+ kmsg.u.control.local_port = port;
+ xcs_ctrl_send(&kmsg);
+ xcs_ctrl_read(&kmsg);
+
+ if ( kmsg.result != XCS_RSLT_OK )
+ {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+
+ return PyInt_FromLong(kmsg.u.virq.port);
}
static PyObject *xu_notifier_fileno(PyObject *self, PyObject *args)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- return PyInt_FromLong(xun->evtchn_fd);
+ return PyInt_FromLong(xcs_data_fd);
}
static PyMethodDef xu_notifier_methods[] = {
@@ -164,20 +363,17 @@ static PyMethodDef xu_notifier_methods[] = {
METH_VARARGS,
"Read a @port with pending notifications.\n" },
- { "unmask",
- (PyCFunction)xu_notifier_unmask,
- METH_VARARGS,
- "Unmask notifications for a @port.\n" },
-
- { "bind",
- (PyCFunction)xu_notifier_bind,
- METH_VARARGS,
- "Get notifications for a @port.\n" },
-
- { "unbind",
- (PyCFunction)xu_notifier_unbind,
- METH_VARARGS,
- "No longer get notifications for a @port.\n" },
+ { "bind_virq",
+ (PyCFunction)xu_notifier_bind_virq,
+ METH_VARARGS | METH_KEYWORDS,
+ "Get notifications for a virq.\n"
+ " virq [int]: VIRQ to bind.\n\n" },
+
+ { "virq_send",
+ (PyCFunction)xu_notifier_virq_send,
+ METH_VARARGS | METH_KEYWORDS,
+ "Fire a virq notification.\n"
+ " port [int]: port that VIRQ is bound to.\n\n" },
{ "fileno",
(PyCFunction)xu_notifier_fileno,
@@ -189,35 +385,22 @@ static PyMethodDef xu_notifier_methods[] = {
staticforward PyTypeObject xu_notifier_type;
+/* connect to xcs if we aren't already, and return a dummy object. */
static PyObject *xu_notifier_new(PyObject *self, PyObject *args)
{
xu_notifier_object *xun;
- struct stat st;
+ int i;
if ( !PyArg_ParseTuple(args, "") )
return NULL;
xun = PyObject_New(xu_notifier_object, &xu_notifier_type);
- /* Make sure any existing device file links to correct device. */
- if ( (lstat(EVTCHN_DEV_NAME, &st) != 0) ||
- !S_ISCHR(st.st_mode) ||
- (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) )
- (void)unlink(EVTCHN_DEV_NAME);
-
- reopen:
- xun->evtchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
- if ( xun->evtchn_fd == -1 )
- {
- if ( (errno == ENOENT) &&
- ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
- (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
- makedev(EVTCHN_DEV_MAJOR,EVTCHN_DEV_MINOR)) == 0) )
- goto reopen;
- PyObject_Del((PyObject *)xun);
- return PyErr_SetFromErrno(PyExc_IOError);
- }
- set_cloexec(xun->evtchn_fd);
+ for (i = 0; i < XCS_RING_SIZE; i++)
+ REQ_RING_ENT(i) = RSP_RING_ENT(i) = NULL;
+
+ (void)xcs_connect(XCS_SUN_PATH);
+
return (PyObject *)xun;
}
@@ -229,8 +412,7 @@ static PyObject *xu_notifier_getattr(PyObject *obj, char *name)
static void xu_notifier_dealloc(PyObject *self)
{
- xu_notifier_object *xun = (xu_notifier_object *)self;
- (void)close(xun->evtchn_fd);
+ xcs_disconnect();
PyObject_Del(self);
}
@@ -311,6 +493,24 @@ static PyTypeObject xu_notifier_type = {
PyDict_SetItemString(dict, #_field, obj); \
} while ( 0 )
+#define PSTR2CHAR(_struct, _field) \
+ do { \
+ PyObject *obj; \
+ if ( (obj = PyDict_GetItemString(payload, #_field)) != NULL ) \
+ { \
+ if ( PyString_Check(obj) ) \
+ { \
+ char *buffer = PyString_AsString(obj); \
+ \
+ strcpy(((_struct *)&xum->msg.msg[0])->_field, \
+ buffer); \
+ /* Should complain about length - think later */ \
+ dict_items_parsed++; \
+ } \
+ } \
+ xum->msg.length = sizeof(_struct); \
+ } while ( 0 )
+
typedef struct {
PyObject_HEAD;
control_msg_t msg;
@@ -423,6 +623,8 @@ static PyObject *xu_message_get_payload(PyObject *self, PyObject *args)
case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE):
C2P(blkif_be_vbd_create_t, domid, Int, Long);
C2P(blkif_be_vbd_create_t, blkif_handle, Int, Long);
+ C2P(blkif_be_vbd_create_t, pdevice, Int, Long);
+ C2P(blkif_be_vbd_create_t, dev_handle, Int, Long);
C2P(blkif_be_vbd_create_t, vdevice, Int, Long);
C2P(blkif_be_vbd_create_t, readonly, Int, Long);
C2P(blkif_be_vbd_create_t, status, Int, Long);
@@ -433,23 +635,6 @@ static PyObject *xu_message_get_payload(PyObject *self, PyObject *args)
C2P(blkif_be_vbd_destroy_t, vdevice, Int, Long);
C2P(blkif_be_vbd_destroy_t, status, Int, Long);
return dict;
- case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW):
- C2P(blkif_be_vbd_grow_t, domid, Int, Long);
- C2P(blkif_be_vbd_grow_t, blkif_handle, Int, Long);
- C2P(blkif_be_vbd_grow_t, vdevice, Int, Long);
- C2P(blkif_be_vbd_grow_t, extent.sector_start,
- Long, UnsignedLongLong);
- C2P(blkif_be_vbd_grow_t, extent.sector_length,
- Long, UnsignedLongLong);
- C2P(blkif_be_vbd_grow_t, extent.device, Int, Long);
- C2P(blkif_be_vbd_grow_t, status, Int, Long);
- return dict;
- case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_SHRINK):
- C2P(blkif_be_vbd_shrink_t, domid, Int, Long);
- C2P(blkif_be_vbd_shrink_t, blkif_handle, Int, Long);
- C2P(blkif_be_vbd_shrink_t, vdevice, Int, Long);
- C2P(blkif_be_vbd_shrink_t, status, Int, Long);
- return dict;
case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DRIVER_STATUS):
C2P(blkif_be_driver_status_t, status, Int, Long);
return dict;
@@ -486,6 +671,13 @@ static PyObject *xu_message_get_payload(PyObject *self, PyObject *args)
C2P(netif_be_destroy_t, netif_handle, Int, Long);
C2P(netif_be_destroy_t, status, Int, Long);
return dict;
+ case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT):
+ C2P(netif_be_creditlimit_t, domid, Int, Long);
+ C2P(netif_be_creditlimit_t, netif_handle, Int, Long);
+ C2P(netif_be_creditlimit_t, credit_bytes, Int, Long);
+ C2P(netif_be_creditlimit_t, period_usec, Int, Long);
+ C2P(netif_be_creditlimit_t, status, Int, Long);
+ return dict;
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT):
C2P(netif_be_connect_t, domid, Int, Long);
C2P(netif_be_connect_t, netif_handle, Int, Long);
@@ -502,13 +694,59 @@ static PyObject *xu_message_get_payload(PyObject *self, PyObject *args)
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DRIVER_STATUS):
C2P(netif_be_driver_status_t, status, Int, Long);
return dict;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED):
+ C2P(usbif_fe_interface_status_changed_t, status, Int, Long);
+ C2P(usbif_fe_interface_status_changed_t, evtchn, Int, Long);
+ C2P(usbif_fe_interface_status_changed_t, domid, Int, Long);
+ C2P(usbif_fe_interface_status_changed_t, bandwidth, Int, Long);
+ C2P(usbif_fe_interface_status_changed_t, num_ports, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED):
+ C2P(usbif_fe_driver_status_changed_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT):
+ C2P(usbif_fe_interface_connect_t, shmem_frame, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT):
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE):
+ C2P(usbif_be_create_t, domid, Int, Long);
+ C2P(usbif_be_create_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY):
+ C2P(usbif_be_destroy_t, domid, Int, Long);
+ C2P(usbif_be_destroy_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT):
+ C2P(usbif_be_connect_t, domid, Int, Long);
+ C2P(usbif_be_connect_t, shmem_frame, Int, Long);
+ C2P(usbif_be_connect_t, evtchn, Int, Long);
+ C2P(usbif_be_connect_t, bandwidth, Int, Long);
+ C2P(usbif_be_connect_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT):
+ C2P(usbif_be_disconnect_t, domid, Int, Long);
+ C2P(usbif_be_disconnect_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DRIVER_STATUS_CHANGED):
+ C2P(usbif_be_driver_status_changed_t, status, Int, Long);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT):
+ C2P(usbif_be_claim_port_t, domid, Int, Long);
+ C2P(usbif_be_claim_port_t, usbif_port, Int, Long);
+ C2P(usbif_be_claim_port_t, status, Int, Long);
+ C2P(usbif_be_claim_port_t, path, String, String);
+ return dict;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT):
+ C2P(usbif_be_release_port_t, path, String, String);
+ return dict;
case TYPE(CMSG_MEM_REQUEST, CMSG_MEM_REQUEST_SET):
C2P(mem_request_t, target, Int, Long);
C2P(mem_request_t, status, Int, Long);
return dict;
}
- return PyString_FromStringAndSize(xum->msg.msg, xum->msg.length);
+ return PyString_FromStringAndSize((char *)xum->msg.msg, xum->msg.length);
}
static PyObject *xu_message_get_header(PyObject *self, PyObject *args)
@@ -605,6 +843,8 @@ static PyObject *xu_message_new(PyObject *self, PyObject *args)
case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE):
P2C(blkif_be_vbd_create_t, domid, u32);
P2C(blkif_be_vbd_create_t, blkif_handle, u32);
+ P2C(blkif_be_vbd_create_t, pdevice, blkif_pdev_t);
+ P2C(blkif_be_vbd_create_t, dev_handle, u32);
P2C(blkif_be_vbd_create_t, vdevice, blkif_vdev_t);
P2C(blkif_be_vbd_create_t, readonly, u16);
break;
@@ -613,19 +853,6 @@ static PyObject *xu_message_new(PyObject *self, PyObject *args)
P2C(blkif_be_vbd_destroy_t, blkif_handle, u32);
P2C(blkif_be_vbd_destroy_t, vdevice, blkif_vdev_t);
break;
- case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW):
- P2C(blkif_be_vbd_grow_t, domid, u32);
- P2C(blkif_be_vbd_grow_t, blkif_handle, u32);
- P2C(blkif_be_vbd_grow_t, vdevice, blkif_vdev_t);
- P2C(blkif_be_vbd_grow_t, extent.sector_start, blkif_sector_t);
- P2C(blkif_be_vbd_grow_t, extent.sector_length, blkif_sector_t);
- P2C(blkif_be_vbd_grow_t, extent.device, blkif_pdev_t);
- break;
- case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_SHRINK):
- P2C(blkif_be_vbd_shrink_t, domid, u32);
- P2C(blkif_be_vbd_shrink_t, blkif_handle, u32);
- P2C(blkif_be_vbd_shrink_t, vdevice, blkif_vdev_t);
- break;
case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_STATUS):
P2C(netif_fe_interface_status_t, handle, u32);
P2C(netif_fe_interface_status_t, status, u32);
@@ -647,11 +874,23 @@ static PyObject *xu_message_new(PyObject *self, PyObject *args)
P2C(netif_be_create_t, mac[3], u8);
P2C(netif_be_create_t, mac[4], u8);
P2C(netif_be_create_t, mac[5], u8);
+ P2C(netif_be_create_t, be_mac[0], u8);
+ P2C(netif_be_create_t, be_mac[1], u8);
+ P2C(netif_be_create_t, be_mac[2], u8);
+ P2C(netif_be_create_t, be_mac[3], u8);
+ P2C(netif_be_create_t, be_mac[4], u8);
+ P2C(netif_be_create_t, be_mac[5], u8);
break;
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY):
P2C(netif_be_destroy_t, domid, u32);
P2C(netif_be_destroy_t, netif_handle, u32);
break;
+ case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT):
+ P2C(netif_be_creditlimit_t, domid, u32);
+ P2C(netif_be_creditlimit_t, netif_handle, u32);
+ P2C(netif_be_creditlimit_t, credit_bytes, u32);
+ P2C(netif_be_creditlimit_t, period_usec, u32);
+ break;
case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT):
P2C(netif_be_connect_t, domid, u32);
P2C(netif_be_connect_t, netif_handle, u32);
@@ -669,7 +908,53 @@ static PyObject *xu_message_new(PyObject *self, PyObject *args)
break;
case TYPE(CMSG_MEM_REQUEST, CMSG_MEM_REQUEST_SET):
P2C(mem_request_t, target, u32);
- P2C(mem_request_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED):
+ P2C(usbif_fe_interface_status_changed_t, status, u32);
+ P2C(usbif_fe_interface_status_changed_t, evtchn, u16);
+ P2C(usbif_fe_interface_status_changed_t, domid, domid_t);
+ P2C(usbif_fe_interface_status_changed_t, bandwidth, u32);
+ P2C(usbif_fe_interface_status_changed_t, num_ports, u32);
+ break;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED):
+ P2C(usbif_fe_driver_status_changed_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT):
+ P2C(usbif_fe_interface_connect_t, shmem_frame, memory_t);
+ break;
+ case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT):
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE):
+ P2C(usbif_be_create_t, domid, domid_t);
+ P2C(usbif_be_create_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY):
+ P2C(usbif_be_destroy_t, domid, domid_t);
+ P2C(usbif_be_destroy_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT):
+ P2C(usbif_be_connect_t, domid, domid_t);
+ P2C(usbif_be_connect_t, shmem_frame, memory_t);
+ P2C(usbif_be_connect_t, evtchn, u32);
+ P2C(usbif_be_connect_t, bandwidth, u32);
+ P2C(usbif_be_connect_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT):
+ P2C(usbif_be_disconnect_t, domid, domid_t);
+ P2C(usbif_be_disconnect_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DRIVER_STATUS_CHANGED):
+ P2C(usbif_be_driver_status_changed_t, status, u32);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT):
+ P2C(usbif_be_claim_port_t, domid, domid_t);
+ P2C(usbif_be_claim_port_t, usbif_port, u32);
+ P2C(usbif_be_claim_port_t, status, u32);
+ PSTR2CHAR(usbif_be_claim_port_t, path);
+ printf("dict items parsed = %d", dict_items_parsed);
+ break;
+ case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT):
+ PSTR2CHAR(usbif_be_release_port_t, path);
break;
case TYPE(CMSG_SHUTDOWN, CMSG_SHUTDOWN_SYSRQ):
P2C(shutdown_sysrq_t, key, char);
@@ -723,84 +1008,62 @@ static PyTypeObject xu_message_type = {
* *********************** PORT ***********************
*/
-static control_if_t *map_control_interface(int fd, unsigned long pfn,
- u32 dom)
-{
- char *vaddr = xc_map_foreign_range( fd, dom, PAGE_SIZE,
- PROT_READ|PROT_WRITE, pfn );
- if ( vaddr == NULL )
- return NULL;
- return (control_if_t *)(vaddr + 2048);
-}
-static void unmap_control_interface(int fd, control_if_t *c)
-{
- char *vaddr = (char *)c - 2048;
- (void)munmap(vaddr, PAGE_SIZE);
-}
-
typedef struct xu_port_object {
PyObject_HEAD;
int xc_handle;
int connected;
u32 remote_dom;
int local_port, remote_port;
- control_if_t *interface;
- CONTROL_RING_IDX tx_req_cons, tx_resp_prod;
- CONTROL_RING_IDX rx_req_prod, rx_resp_cons;
+ struct xu_port_object *fix_next;
} xu_port_object;
static PyObject *port_error;
-static PyObject *xu_port_notify(PyObject *self, PyObject *args)
-{
- xu_port_object *xup = (xu_port_object *)self;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
-
- (void)xc_evtchn_send(xup->xc_handle, xup->local_port);
-
- Py_INCREF(Py_None);
- return Py_None;
-}
-
static PyObject *xu_port_read_request(PyObject *self, PyObject *args)
{
xu_port_object *xup = (xu_port_object *)self;
xu_message_object *xum;
- CONTROL_RING_IDX c = xup->tx_req_cons;
- control_if_t *cif = xup->interface;
control_msg_t *cmsg;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
-
- if ( (c == cif->tx_req_prod) ||
- ((c - xup->tx_resp_prod) == CONTROL_RING_SIZE) )
- {
- PyErr_SetString(port_error, "no request to read");
- return NULL;
+ unsigned i;
+ xcs_msg_ent_t *ent = NULL;
+
+ for ( i = req_cons; (i != req_prod); i++ ) {
+ ent = REQ_RING_ENT(i);
+ if (ent == NULL)
+ continue;
+ if (ent->msg.u.control.remote_dom == xup->remote_dom)
+ break;
}
+
+ if ((ent == NULL) ||
+ (ent->msg.u.control.remote_dom != xup->remote_dom))
+ goto none;
- /* Need to ensure we see the request, despite seeing the index update.*/
- rmb();
-
- cmsg = &cif->tx_ring[MASK_CONTROL_IDX(c)];
+ cmsg = &ent->msg.u.control.msg;
xum = PyObject_New(xu_message_object, &xu_message_type);
memcpy(&xum->msg, cmsg, sizeof(*cmsg));
if ( xum->msg.length > sizeof(xum->msg.msg) )
xum->msg.length = sizeof(xum->msg.msg);
- xup->tx_req_cons++;
+ free(ent);
+
+ /* remove the entry from the ring and advance the consumer if possible */
+ REQ_RING_ENT(i) = NULL;
+ while ( (REQ_RING_ENT(req_cons) == NULL) && (!REQ_RING_EMPTY) )
+ req_cons++;
+
return (PyObject *)xum;
+
+none:
+ Py_INCREF(Py_None);
+ return Py_None;
+
}
static PyObject *xu_port_write_request(PyObject *self, PyObject *args)
{
xu_port_object *xup = (xu_port_object *)self;
xu_message_object *xum;
- CONTROL_RING_IDX p = xup->rx_req_prod;
- control_if_t *cif = xup->interface;
- control_msg_t *cmsg;
+ xcs_msg_t kmsg;
if ( !PyArg_ParseTuple(args, "O", (PyObject **)&xum) )
return NULL;
@@ -811,18 +1074,11 @@ static PyObject *xu_port_write_request(PyObject *self, PyObject *args)
return NULL;
}
- if ( ((p - xup->rx_resp_cons) == CONTROL_RING_SIZE) )
- {
- PyErr_SetString(port_error, "no space to write request");
- return NULL;
- }
-
- cmsg = &cif->rx_ring[MASK_CONTROL_IDX(p)];
- memcpy(cmsg, &xum->msg, sizeof(*cmsg));
-
- wmb();
- xup->rx_req_prod = cif->rx_req_prod = p + 1;
-
+ kmsg.type = XCS_REQUEST;
+ kmsg.u.control.remote_dom = xup->remote_dom;
+ memcpy(&kmsg.u.control.msg, &xum->msg, sizeof(control_msg_t));
+ xcs_data_send(&kmsg);
+
Py_INCREF(Py_None);
return Py_None;
}
@@ -831,38 +1087,47 @@ static PyObject *xu_port_read_response(PyObject *self, PyObject *args)
{
xu_port_object *xup = (xu_port_object *)self;
xu_message_object *xum;
- CONTROL_RING_IDX c = xup->rx_resp_cons;
- control_if_t *cif = xup->interface;
control_msg_t *cmsg;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
-
- if ( (c == cif->rx_resp_prod) || (c == xup->rx_req_prod) )
- {
- PyErr_SetString(port_error, "no response to read");
- return NULL;
+ unsigned i;
+ xcs_msg_ent_t *ent = NULL;
+
+ for ( i = rsp_cons; (i != rsp_prod); i++ ) {
+ ent = RSP_RING_ENT(i);
+ if (ent == NULL)
+ continue;
+ if (ent->msg.u.control.remote_dom == xup->remote_dom)
+ break;
}
+
+ if ((ent == NULL) ||
+ (ent->msg.u.control.remote_dom != xup->remote_dom))
+ goto none;
- /* Need to ensure we see the response, despite seeing the index update.*/
- rmb();
-
- cmsg = &cif->rx_ring[MASK_CONTROL_IDX(c)];
+ cmsg = &ent->msg.u.control.msg;
xum = PyObject_New(xu_message_object, &xu_message_type);
memcpy(&xum->msg, cmsg, sizeof(*cmsg));
if ( xum->msg.length > sizeof(xum->msg.msg) )
xum->msg.length = sizeof(xum->msg.msg);
- xup->rx_resp_cons++;
+ free(ent);
+
+ /* remove the entry from the ring and advance the consumer if possible */
+ RSP_RING_ENT(i) = NULL;
+ while ( (RSP_RING_ENT(rsp_cons) == NULL) && (!RSP_RING_EMPTY) )
+ rsp_cons++;
+
return (PyObject *)xum;
+
+none:
+ Py_INCREF(Py_None);
+ return Py_None;
+
}
static PyObject *xu_port_write_response(PyObject *self, PyObject *args)
{
xu_port_object *xup = (xu_port_object *)self;
xu_message_object *xum;
- CONTROL_RING_IDX p = xup->tx_resp_prod;
- control_if_t *cif = xup->interface;
- control_msg_t *cmsg;
+ xcs_msg_t kmsg;
if ( !PyArg_ParseTuple(args, "O", (PyObject **)&xum) )
return NULL;
@@ -873,17 +1138,10 @@ static PyObject *xu_port_write_response(PyObject *self, PyObject *args)
return NULL;
}
- if ( p == xup->tx_req_cons )
- {
- PyErr_SetString(port_error, "no space to write response");
- return NULL;
- }
-
- cmsg = &cif->tx_ring[MASK_CONTROL_IDX(p)];
- memcpy(cmsg, &xum->msg, sizeof(*cmsg));
-
- wmb();
- xup->tx_resp_prod = cif->tx_resp_prod = p + 1;
+ kmsg.type = XCS_RESPONSE;
+ kmsg.u.control.remote_dom = xup->remote_dom;
+ memcpy(&kmsg.u.control.msg, &xum->msg, sizeof(control_msg_t));
+ xcs_data_send(&kmsg);
Py_INCREF(Py_None);
return Py_None;
@@ -891,140 +1149,138 @@ static PyObject *xu_port_write_response(PyObject *self, PyObject *args)
static PyObject *xu_port_request_to_read(PyObject *self, PyObject *args)
{
- xu_port_object *xup = (xu_port_object *)self;
- CONTROL_RING_IDX c = xup->tx_req_cons;
- control_if_t *cif = xup->interface;
-
+ xu_port_object *xup = (xu_port_object *)self;
+ xcs_msg_ent_t *ent;
+ int found = 0;
+ unsigned i;
+
if ( !PyArg_ParseTuple(args, "") )
return NULL;
- if ( (c == cif->tx_req_prod) ||
- ((c - xup->tx_resp_prod) == CONTROL_RING_SIZE) )
- return PyInt_FromLong(0);
-
- return PyInt_FromLong(1);
+ for ( i = req_cons; (i != req_prod); i++ ) {
+ ent = REQ_RING_ENT(i);
+ if (ent == NULL)
+ continue;
+ if (ent->msg.u.control.remote_dom == xup->remote_dom) {
+ found = 1;
+ break;
+ }
+ }
+
+ return PyInt_FromLong(found);
}
-static PyObject *xu_port_space_to_write_request(PyObject *self, PyObject *args)
+static PyObject *xu_port_response_to_read(PyObject *self, PyObject *args)
{
- xu_port_object *xup = (xu_port_object *)self;
- CONTROL_RING_IDX p = xup->rx_req_prod;
-
+ xu_port_object *xup = (xu_port_object *)self;
+ xcs_msg_ent_t *ent;
+ int found = 0;
+ unsigned i;
+
if ( !PyArg_ParseTuple(args, "") )
return NULL;
- if ( ((p - xup->rx_resp_cons) == CONTROL_RING_SIZE) )
- return PyInt_FromLong(0);
-
- return PyInt_FromLong(1);
+ for ( i = rsp_cons; (i != rsp_prod); i++ ) {
+ ent = RSP_RING_ENT(i);
+ if (ent == NULL)
+ continue;
+ if (ent->msg.u.control.remote_dom == xup->remote_dom) {
+ found = 1;
+ break;
+ }
+ }
+
+ return PyInt_FromLong(found);
}
-static PyObject *xu_port_response_to_read(PyObject *self, PyObject *args)
+static void _xu_port_close(xu_port_object *xup )
{
- xu_port_object *xup = (xu_port_object *)self;
- CONTROL_RING_IDX c = xup->rx_resp_cons;
- control_if_t *cif = xup->interface;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
-
- if ( (c == cif->rx_resp_prod) || (c == xup->rx_req_prod) )
- return PyInt_FromLong(0);
-
- return PyInt_FromLong(1);
+ if ( xup->connected && xup->remote_dom != 0 )
+ {
+ xcs_msg_t kmsg;
+ kmsg.type = XCS_CIF_FREE_CC;
+ kmsg.u.interface.dom = xup->remote_dom;
+ kmsg.u.interface.local_port = xup->local_port;
+ kmsg.u.interface.remote_port = xup->remote_port;
+ xcs_ctrl_send(&kmsg);
+ xcs_ctrl_read(&kmsg);
+ xup->connected = 0;
+ }
}
-static PyObject *xu_port_space_to_write_response(
- PyObject *self, PyObject *args)
+static PyObject *xu_port_close(PyObject *self, PyObject *args)
{
- xu_port_object *xup = (xu_port_object *)self;
- CONTROL_RING_IDX p = xup->tx_resp_prod;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
+ xu_port_object *xup = (xu_port_object *)self;
- if ( p == xup->tx_req_cons )
- return PyInt_FromLong(0);
+ _xu_port_close(xup);
- return PyInt_FromLong(1);
+ Py_INCREF(Py_None);
+ return Py_None;
}
-static int __xu_port_connect(xu_port_object *xup)
+static PyObject *xu_port_register(PyObject *self, PyObject *args,
+ PyObject *kwds)
{
- xc_dominfo_t info;
+ int type;
+ xcs_msg_t msg;
+ xu_port_object *xup = (xu_port_object *)self;
+ static char *kwd_list[] = { "type", NULL };
- if ( xup->connected )
- {
- return 0;
- }
-
- if ( (xc_domain_getinfo(xup->xc_handle, xup->remote_dom, 1, &info) != 1) ||
- (info.domid != xup->remote_dom) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list,
+ &type) )
+ return NULL;
+
+ if (!xup->connected)
{
- PyErr_SetString(port_error, "Failed to obtain domain status");
- return -1;
+ return PyInt_FromLong(0);
}
-
- xup->interface =
- map_control_interface(xup->xc_handle, info.shared_info_frame,
- xup->remote_dom);
-
- if ( xup->interface == NULL )
+
+ msg.type = XCS_MSG_BIND;
+ msg.u.bind.port = xup->local_port;
+ msg.u.bind.type = type;
+ xcs_ctrl_send(&msg);
+ xcs_ctrl_read(&msg);
+
+ if (msg.result != XCS_RSLT_OK)
{
- PyErr_SetString(port_error, "Failed to map domain control interface");
- return -1;
+ return PyInt_FromLong(0);
}
-
- /* Synchronise ring indexes. */
- xup->tx_resp_prod = xup->interface->tx_resp_prod;
- xup->tx_req_cons = xup->interface->tx_resp_prod;
- xup->rx_req_prod = xup->interface->rx_req_prod;
- xup->rx_resp_cons = xup->interface->rx_resp_prod;
-
- xup->connected = 1;
-
- return 0;
-}
-
-static void __xu_port_disconnect(xu_port_object *xup)
-{
- if ( xup->connected )
- unmap_control_interface(xup->xc_handle, xup->interface);
- xup->connected = 0;
-}
-
-static PyObject *xu_port_connect(PyObject *self, PyObject *args)
-{
- xu_port_object *xup = (xu_port_object *)self;
-
- if ( !PyArg_ParseTuple(args, "") )
- return NULL;
-
- if ( __xu_port_connect(xup) != 0 )
- return NULL;
-
- Py_INCREF(Py_None);
- return Py_None;
+
+ return PyInt_FromLong(1);
}
-static PyObject *xu_port_disconnect(PyObject *self, PyObject *args)
+static PyObject *xu_port_deregister(PyObject *self, PyObject *args,
+ PyObject *kwds)
{
- xu_port_object *xup = (xu_port_object *)self;
+ int type;
+ xcs_msg_t msg;
+ xu_port_object *xup = (xu_port_object *)self;
+ static char *kwd_list[] = { "type", NULL };
- if ( !PyArg_ParseTuple(args, "") )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list,
+ &type) )
return NULL;
- __xu_port_disconnect(xup);
-
- Py_INCREF(Py_None);
- return Py_None;
+ if (!xup->connected)
+ {
+ return PyInt_FromLong(0);
+ }
+
+ msg.type = XCS_MSG_UNBIND;
+ msg.u.bind.port = xup->local_port;
+ msg.u.bind.type = type;
+ xcs_ctrl_send(&msg);
+ xcs_ctrl_read(&msg);
+
+ if (msg.result != XCS_RSLT_OK)
+ {
+ return PyInt_FromLong(0);
+ }
+
+ return PyInt_FromLong(1);
}
static PyMethodDef xu_port_methods[] = {
- { "notify",
- (PyCFunction)xu_port_notify,
- METH_VARARGS,
- "Send a notification to the remote end.\n" },
{ "read_request",
(PyCFunction)xu_port_read_request,
@@ -1051,30 +1307,26 @@ static PyMethodDef xu_port_methods[] = {
METH_VARARGS,
"Returns TRUE if there is a request message to read.\n" },
- { "space_to_write_request",
- (PyCFunction)xu_port_space_to_write_request,
- METH_VARARGS,
- "Returns TRUE if there is space to write a request message.\n" },
{ "response_to_read",
(PyCFunction)xu_port_response_to_read,
METH_VARARGS,
"Returns TRUE if there is a response message to read.\n" },
- { "space_to_write_response",
- (PyCFunction)xu_port_space_to_write_response,
+ { "register",
+ (PyCFunction)xu_port_register,
+ METH_VARARGS | METH_KEYWORDS,
+ "Register to receive a type of message on this channel.\n" },
+
+ { "deregister",
+ (PyCFunction)xu_port_deregister,
+ METH_VARARGS | METH_KEYWORDS,
+ "Stop receiving a type of message on this port.\n" },
+
+ { "close",
+ (PyCFunction)xu_port_close,
METH_VARARGS,
- "Returns TRUE if there is space to write a response message.\n" },
-
- { "connect",
- (PyCFunction)xu_port_connect,
- METH_VARARGS,
- "Synchronously connect to remote domain.\n" },
-
- { "disconnect",
- (PyCFunction)xu_port_disconnect,
- METH_VARARGS,
- "Synchronously disconnect from remote domain.\n" },
+ "Close the port.\n" },
{ NULL, NULL, 0, NULL }
};
@@ -1086,6 +1338,7 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args, PyObject *kwds)
xu_port_object *xup;
u32 dom;
int port1 = 0, port2 = 0;
+ xcs_msg_t kmsg;
static char *kwd_list[] = { "dom", "local_port", "remote_port", NULL };
@@ -1097,72 +1350,58 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args, PyObject *kwds)
xup->connected = 0;
xup->remote_dom = dom;
-
- if ( (xup->xc_handle = xc_interface_open()) == -1 )
- {
- PyErr_SetString(port_error, "Could not open Xen control interface");
+
+ kmsg.type = XCS_CIF_NEW_CC;
+ kmsg.u.interface.dom = xup->remote_dom;
+ kmsg.u.interface.local_port = port1;
+ kmsg.u.interface.remote_port = port2;
+ xcs_ctrl_send(&kmsg);
+ xcs_ctrl_read(&kmsg);
+
+ if ( kmsg.result != XCS_RSLT_OK )
goto fail1;
- }
-
- if ( dom == 0 )
- {
- /*
- * The control-interface event channel for DOM0 is already set up.
- * We use an ioctl to discover the port at our end of the channel.
- */
- port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL);
- port2 = -1; /* We don't need the remote end of the DOM0 link. */
- if ( port1 < 0 )
- {
- PyErr_SetString(port_error, "Could not open channel to DOM0");
- goto fail2;
- }
- }
- else if ( xc_evtchn_bind_interdomain(xup->xc_handle,
- DOMID_SELF, dom,
- &port1, &port2) != 0 )
- {
- PyErr_SetString(port_error, "Could not open channel to domain");
- goto fail2;
- }
-
- xup->local_port = port1;
- xup->remote_port = port2;
-
- if ( __xu_port_connect(xup) != 0 )
- goto fail3;
-
+
+ xup->local_port = kmsg.u.interface.local_port;
+ xup->remote_port = kmsg.u.interface.remote_port;
+ xup->connected = 1;
+
return (PyObject *)xup;
-
- fail3:
- if ( dom != 0 )
- (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, port1);
- fail2:
- (void)xc_interface_close(xup->xc_handle);
+
fail1:
PyObject_Del((PyObject *)xup);
- return NULL;
+ PyErr_SetString(PyExc_ValueError, "cannot create port");
+ return NULL;
}
static PyObject *xu_port_getattr(PyObject *obj, char *name)
{
xu_port_object *xup = (xu_port_object *)obj;
+
if ( strcmp(name, "local_port") == 0 )
- return PyInt_FromLong(xup->local_port);
+ {
+ return PyInt_FromLong(xup->connected ? xup->local_port : -1);
+ }
if ( strcmp(name, "remote_port") == 0 )
- return PyInt_FromLong(xup->remote_port);
+ {
+ return PyInt_FromLong(xup->connected ? xup->remote_port : -1);
+ }
if ( strcmp(name, "remote_dom") == 0 )
+ {
return PyInt_FromLong(xup->remote_dom);
+ }
+ if ( strcmp(name, "connected") == 0 )
+ {
+ return PyInt_FromLong(xup->connected);
+ }
return Py_FindMethod(xu_port_methods, obj, name);
}
static void xu_port_dealloc(PyObject *self)
{
xu_port_object *xup = (xu_port_object *)self;
- __xu_port_disconnect(xup);
- if ( xup->remote_dom != 0 )
- (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
- (void)xc_interface_close(xup->xc_handle);
+
+ _xu_port_close(xup);
+
PyObject_Del(self);
}
@@ -1345,6 +1584,26 @@ static PyObject *xu_buffer_full(PyObject *self, PyObject *args)
return PyInt_FromLong(0);
}
+static PyObject *xu_buffer_size(PyObject *self, PyObject *args)
+{
+ xu_buffer_object *xub = (xu_buffer_object *)self;
+
+ if ( !PyArg_ParseTuple(args, "") )
+ return NULL;
+
+ return PyInt_FromLong(xub->prod - xub->cons);
+}
+
+static PyObject *xu_buffer_space(PyObject *self, PyObject *args)
+{
+ xu_buffer_object *xub = (xu_buffer_object *)self;
+
+ if ( !PyArg_ParseTuple(args, "") )
+ return NULL;
+
+ return PyInt_FromLong(BUFSZ - (xub->prod - xub->cons));
+}
+
static PyMethodDef xu_buffer_methods[] = {
{ "peek",
(PyCFunction)xu_buffer_peek,
@@ -1376,6 +1635,16 @@ static PyMethodDef xu_buffer_methods[] = {
METH_VARARGS,
"Return TRUE if the buffer is full.\n" },
+ { "size",
+ (PyCFunction)xu_buffer_size,
+ METH_VARARGS,
+ "Return number of bytes in the buffer.\n" },
+
+ { "space",
+ (PyCFunction)xu_buffer_space,
+ METH_VARARGS,
+ "Return space left in the buffer.\n" },
+
{ NULL, NULL, 0, NULL }
};
diff --git a/tools/python/xen/sv/CreateDomain.py b/tools/python/xen/sv/CreateDomain.py
deleted file mode 100644
index 4378897e5c..0000000000
--- a/tools/python/xen/sv/CreateDomain.py
+++ /dev/null
@@ -1,163 +0,0 @@
-from xen.sv.Wizard import *
-from xen.sv.util import *
-from xen.sv.GenTabbed import PreTab
-
-from xen.xm.create import make_config, OptVals
-
-from xen.xend.XendClient import server
-
-class CreateDomain( Wizard ):
- def __init__( self, urlWriter ):
-
- sheets = [ CreatePage0,
- CreatePage1,
- CreatePage2,
- CreatePage3,
- CreatePage4,
- CreateFinish ]
-
- Wizard.__init__( self, urlWriter, "Create Domain", sheets )
-
-class CreatePage0( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "General", 0 )
- self.addControl( InputControl( 'name', 'VM Name', 'VM Name:', "[\\w|\\S]+", "You must enter a name in this field" ) )
- self.addControl( InputControl( 'memory', '64', 'Memory (Mb):', "[\\d]+", "You must enter a number in this field" ) )
- self.addControl( InputControl( 'cpu', '0', 'CPU:', "[\\d]+", "You must enter a number in this feild" ) )
- self.addControl( InputControl( 'cpu_weight', '1', 'CPU Weight:', "[\\d]+", "You must enter a number in this feild" ) )
-
-class CreatePage1( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Setup Kernel Image", 1 )
-# For now we don't need to select a builder...
-# self.addControl( ListControl( 'builder', [('linux', 'Linux'), ('netbsd', 'NetBSD')], 'Kernel Type:' ) )
- self.addControl( FileControl( 'kernel', '/boot/vmlinuz-2.6.9-xenU', 'Kernel Image:' ) )
- self.addControl( InputControl( 'extra', '', 'Kernel Command Line Parameters:' ) )
-
-class CreatePage2( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 2 )
- self.addControl( InputControl( 'num_vbds', '1', 'Number of VBDs:', '[\\d]+', "You must enter a number in this field" ) )
-
-class CreatePage3( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Setup Virtual Block Device", 3 )
-
- def write_BODY( self, request, err ):
- if not self.passback: self.parseForm( request )
-
- previous_values = sxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference
-
- num_vbds = previous_values.get( 'num_vbds' )
-
- for i in range( int( num_vbds ) ):
- self.addControl( InputControl( 'vbd%s_dom0' % i, 'phy:sda%s' % str(i + 1), 'Device %s name:' % i ) )
- self.addControl( InputControl( 'vbd%s_domU' % i, 'sda%s' % str(i + 1), 'Virtualized device %s:' % i ) )
- self.addControl( ListControl( 'vbd%s_mode' % i, [('w', 'Read + Write'), ('r', 'Read Only')], 'Device %s mode:' % i ) )
-
- self.addControl( InputControl( 'root', '/dev/sda1', 'Root device (in VM):' ) )
-
- Sheet.write_BODY( self, request, err )
-
-class CreatePage4( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Network settings", 4 )
- self.addControl( ListControl( 'dhcp', [('off', 'No'), ('dhcp', 'Yes')], 'Use DHCP:' ) )
- self.addControl( InputControl( 'hostname', 'hostname', 'VM Hostname:' ) )
- self.addControl( InputControl( 'ip_addr', '1.2.3.4', 'VM IP Address:' ) )
- self.addControl( InputControl( 'ip_subnet', '255.255.255.0', 'VM Subnet Mask:' ) )
- self.addControl( InputControl( 'ip_gateway', '1.2.3.4', 'VM Gateway:' ) )
- self.addControl( InputControl( 'ip_nfs', '1.2.3.4', 'NFS Server:' ) )
-
-class CreateFinish( Sheet ):
-
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "All Done", 5 )
-
- def write_BODY( self, request, err ):
-
- if not self.passback: self.parseForm( request )
-
- xend_sxp = self.translate_sxp( string2sxp( self.passback ) )
-
- try:
- dom_sxp = server.xend_domain_create( xend_sxp )
- success = "Your domain was successfully created.\n"
- except:
- success = "There was an error creating your domain.\nThe configuration used is as follows:\n"
- dom_sxp = xend_sxp
-
-
-
- pt = PreTab( success + sxp2prettystring( dom_sxp ) )
- pt.write_BODY( request )
-
- request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location )
-
- def translate_sxp( self, fin_sxp ):
- fin_hash = ssxp2hash( fin_sxp )
-
- def get( key ):
- ret = fin_hash.get( key )
- if ret:
- return ret
- else:
- return ""
-
- vals = OptVals()
-
- vals.name = get( 'name' )
- vals.memory = get( 'memory' )
- vals.maxmem = get( 'maxmem' )
- vals.cpu = get( 'cpu' )
- vals.cpu_weight = get( 'cpu_weight' )
-
- vals.builder = get( 'builder' )
- vals.kernel = get( 'kernel' )
- vals.root = get( 'root' )
- vals.extra = get( 'extra' )
-
- #setup vbds
-
- vbds = []
-
- for i in range( int( get( 'num_vbds' ) ) ):
- vbds.append( ( get( 'vbd%s_dom0' % i ), get('vbd%s_domU' % i ), get( 'vbd%s_mode' % i ) ) )
-
- vals.disk = vbds
-
- #misc
-
- vals.pci = []
-
- vals.blkif = None
- vals.netif = None
- vals.restart = None
- vals.console = None
- vals.ramdisk = None
-
- #setup vifs
-
- vals.vif = []
- vals.nics = 1
-
- ip = get( 'ip_addr' )
- nfs = get( 'ip_nfs' )
- gate = get( 'ip_gateway' )
- mask = get( 'ip_subnet' )
- host = get( 'hostname' )
- dhcp = get( 'dhcp' )
-
- vals.cmdline_ip = "%s:%s:%s:%s:%s:eth0:%s" % (ip, nfs, gate, mask, host, dhcp)
-
- try:
- return make_config( vals )
- except:
- return [["Error creating domain config."]]
-
diff --git a/tools/python/xen/sv/Daemon.py b/tools/python/xen/sv/Daemon.py
deleted file mode 100644
index 5a8d18e5e4..0000000000
--- a/tools/python/xen/sv/Daemon.py
+++ /dev/null
@@ -1,110 +0,0 @@
-###########################################################
-## XenSV Web Control Interface Daemon
-## Copyright (C) 2004, K A Fraser (University of Cambridge)
-## Copyright (C) 2004, Mike Wray <mike.wray@hp.com>
-## Copyright (C) 2004, Tom Wilkie <tw275@cam.ac.uk>
-###########################################################
-
-import os
-import os.path
-import sys
-import re
-
-from xen.sv.params import *
-
-from twisted.internet import reactor
-from twisted.web import static, server, script
-
-from xen.util.ip import _readline, _readlines
-
-class Daemon:
- """The xend daemon.
- """
- def __init__(self):
- self.shutdown = 0
- self.traceon = 0
-
- def daemon_pids(self):
- pids = []
- pidex = '(?P<pid>\d+)'
- pythonex = '(?P<python>\S*python\S*)'
- cmdex = '(?P<cmd>.*)'
- procre = re.compile('^\s*' + pidex + '\s*' + pythonex + '\s*' + cmdex + '$')
- xendre = re.compile('^/usr/sbin/xend\s*(start|restart)\s*.*$')
- procs = os.popen('ps -e -o pid,args 2>/dev/null')
- for proc in procs:
- pm = procre.match(proc)
- if not pm: continue
- xm = xendre.match(pm.group('cmd'))
- if not xm: continue
- #print 'pid=', pm.group('pid'), 'cmd=', pm.group('cmd')
- pids.append(int(pm.group('pid')))
- return pids
-
- def new_cleanup(self, kill=0):
- err = 0
- pids = self.daemon_pids()
- if kill:
- for pid in pids:
- print "Killing daemon pid=%d" % pid
- os.kill(pid, signal.SIGHUP)
- elif pids:
- err = 1
- print "Daemon already running: ", pids
- return err
-
- def cleanup(self, kill=False):
- # No cleanup to do if PID_FILE is empty.
- if not os.path.isfile(PID_FILE) or not os.path.getsize(PID_FILE):
- return 0
- # Read the pid of the previous invocation and search active process list.
- pid = open(PID_FILE, 'r').read()
- lines = _readlines(os.popen('ps ' + pid + ' 2>/dev/null'))
- for line in lines:
- if re.search('^ *' + pid + '.+xensv', line):
- if not kill:
- print "Daemon is already running (pid %d)" % int(pid)
- return 1
- # Old daemon is still active: terminate it.
- os.kill(int(pid), 1)
- # Delete the stale PID_FILE.
- os.remove(PID_FILE)
- return 0
-
- def start(self, trace=0):
- if self.cleanup(kill=False):
- return 1
-
- # Fork -- parent writes PID_FILE and exits.
- pid = os.fork()
- if pid:
- # Parent
- pidfile = open(PID_FILE, 'w')
- pidfile.write(str(pid))
- pidfile.close()
- return 0
- # Child
- self.run()
- return 0
-
- def stop(self):
- return self.cleanup(kill=True)
-
- def run(self):
- root = static.File( SV_ROOT )
- root.indexNames = [ 'Main.rpy' ]
- root.processors = { '.rpy': script.ResourceScript }
- reactor.listenTCP( SV_PORT, server.Site( root ) )
- reactor.run()
-
- def exit(self):
- reactor.disconnectAll()
- sys.exit(0)
-
-def instance():
- global inst
- try:
- inst
- except:
- inst = Daemon()
- return inst
diff --git a/tools/python/xen/sv/DomInfo.py b/tools/python/xen/sv/DomInfo.py
deleted file mode 100755
index 8aabb19057..0000000000
--- a/tools/python/xen/sv/DomInfo.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from xen.xend.XendClient import getAsynchServer
-server = getAsynchServer()
-from xen.xend import PrettyPrint
-
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.util import *
-from xen.sv.GenTabbed import *
-
-DEBUG=1
-
-class DomInfo( GenTabbed ):
-
- def __init__( self, urlWriter ):
-
- self.dom = 0;
-
- def tabUrlWriter( tab ):
- return urlWriter( "&dom=%s%s" % ( self.dom, tab ) )
-
- GenTabbed.__init__( self, "Domain Info", tabUrlWriter, [ 'General', 'SXP', 'Devices' ], [ DomGeneralTab, DomSXPTab, NullTab ] )
-
- def write_BODY( self, request ):
- dom = request.args.get('dom')
-
- if dom is None or len(dom) != 1:
- request.write( "<p>Please Select a Domain</p>" )
- return None
- else:
- self.dom = dom[0]
-
- GenTabbed.write_BODY( self, request )
-
- def write_MENU( self, request ):
- pass
-
-class DomGeneralTab( CompositeTab ):
- def __init__( self ):
- CompositeTab.__init__( self, [ DomGenTab, DomActionTab ] )
-
-class DomGenTab( GeneralTab ):
-
- def __init__( self ):
-
- titles = {}
-
- titles[ 'ID' ] = 'dom'
- titles[ 'Name' ] = 'name'
- titles[ 'CPU' ] = 'cpu'
- titles[ 'Memory' ] = ( 'mem', memoryFormatter )
- titles[ 'State' ] = ( 'state', stateFormatter )
- titles[ 'Total CPU' ] = ( 'cpu_time', smallTimeFormatter )
- titles[ 'Up Time' ] = ( 'up_time', bigTimeFormatter )
-
- GeneralTab.__init__( self, {}, titles )
-
- def write_BODY( self, request ):
-
- self.dom = getVar('dom', request)
-
- if self.dom is None:
- request.write( "<p>Please Select a Domain</p>" )
- return None
-
- self.dict = getDomInfoHash( self.dom )
-
- GeneralTab.write_BODY( self, request )
-
-class DomSXPTab( PreTab ):
-
- def __init__( self ):
- self.dom = 0
- PreTab.__init__( self, "" )
-
-
- def write_BODY( self, request ):
- self.dom = getVar('dom', request)
-
- if self.dom is None:
- request.write( "<p>Please Select a Domain</p>" )
- return None
-
- try:
- domInfo = server.xend_domain( self.dom )
- except:
- domInfo = [["Error getting domain details."]]
-
- self.source = sxp2prettystring( domInfo )
-
- PreTab.write_BODY( self, request )
-
-class DomActionTab( ActionTab ):
-
- def __init__( self ):
- actions = { "shutdown" : ( "Shutdown the Domain", "shutdown.png" ),
- "reboot" : ( "Reboot the Domain", "reboot.png" ),
- "pause" : ( "Pause the Domain", "pause.png" ),
- "unpause" : ( "Unpause the Domain", "unpause.png" ),
- "destroy" : ( "Destroy the Domain", "destroy.png" ) }
- ActionTab.__init__( self, actions )
-
- def op_shutdown( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != '0':
- if DEBUG: print ">DomShutDown %s" % dom
- try:
- server.xend_domain_shutdown( int( dom ), "halt" )
- except:
- pass
-
- def op_reboot( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != '0':
- if DEBUG: print ">DomReboot %s" % dom
- try:
- server.xend_domain_shutdown( int( dom ), "reboot" )
- except:
- pass
-
- def op_pause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != '0':
- if DEBUG: print ">DomPause %s" % dom
- try:
- server.xend_domain_pause( int( dom ) )
- except:
- pass
-
- def op_unpause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != '0':
- if DEBUG: print ">DomUnpause %s" % dom
- try:
- server.xend_domain_unpause( int( dom ) )
- except:
- pass
-
- def op_destroy( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != '0':
- if DEBUG: print ">DomDestroy %s" % dom
- try:
- server.xend_domain_destroy( int( dom ), "halt" )
- except:
- pass
-
-
-
-
-
diff --git a/tools/python/xen/sv/DomList.py b/tools/python/xen/sv/DomList.py
deleted file mode 100755
index 86976b7af3..0000000000
--- a/tools/python/xen/sv/DomList.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from xen.xend.XendClient import server
-from xen.xend import sxp
-
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.util import *
-
-class DomList( HTMLBase ):
-
- isLeaf = True
-
- def __init__( self, urlWriter ):
- HTMLBase.__init__(self)
- self.urlWriter = urlWriter
-
- def write_MENU( self, request ):
- return self.write_BODY( request, head=True, long=False )
-
- def write_BODY( self, request, head=True, long=True ):
-
- domains = None
-
- try:
- domains = server.xend_domains()
- domains.sort()
- except:
- pass
-
- request.write( "\n<table style='border:0px solid white' cellspacing='0' cellpadding='0' border='0' width='100%'>\n" )
-
- if head:
- request.write( "<tr class='domainInfoHead'>" )
- self.write_DOMAIN_HEAD( request, long )
- request.write( "</tr>" )
-
- odd = True
-
- if not domains is None:
- for domain in domains:
- if odd:
- request.write( "<tr class='domainInfoOdd'>\n" )
- odd = False
- else:
- request.write( "<tr class='domainInfoEven'>\n" )
- odd = True
- self.write_DOMAIN( request, getDomInfoHash( domain ), long )
- request.write( "</tr>\n" )
- else:
- request.write( "<tr colspan='10'><p class='small'>Error getting domain list<br/>Perhaps XenD not running?</p></tr>")
-
- request.write( "</table>\n" )
-
- def write_DOMAIN( self, request, domInfoHash, long=True ):
- request.write( "<td class='domainInfo' align='center'>%(id)s</td>\n" % domInfoHash )
-
- url = self.urlWriter( "&mod=info&dom=%(id)s" % domInfoHash )
-
- request.write( "<td class='domainInfo' align='center'><a href='%s'>%s</a></td>\n" % ( url, domInfoHash['name'] ) )
- if long:
- request.write( "<td class='domainInfo' align='center'>%(memory)5s</td>\n" % domInfoHash )
- request.write( "<td class='domainInfo' align='center'>%(cpu)2s</td>\n" % domInfoHash )
- request.write( "<td class='domainInfo' align='center'>%(state)5s</td>\n" % domInfoHash )
- if domInfoHash[ 'id' ] != "0":
- request.write( "<td class='domainInfo' align='center'>" )
-
- if domInfoHash[ 'state' ][ 2 ] == "-":
- request.write( "<img src='images/small-pause.png' onclick='doOp2( \"pause\", \"%(dom)-4s\" )'>" % domInfoHash )
- else:
- request.write( "<img src='images/small-unpause.png' onclick='doOp2( \"unpause\", \"%(dom)-4s\" )'>" % domInfoHash )
-
- request.write( "<img src='images/small-destroy.png' onclick='doOp2( \"destroy\", \"%(dom)-4s\" )'></td>" % domInfoHash)
- else:
- request.write( "<td>&nbsp;</td>" )
-
- def write_DOMAIN_HEAD( self, request, long=True ):
- request.write( "<td class='domainInfoHead' align='center'>Domain</td>\n" )
- request.write( "<td class='domainInfoHead' align='center'>Name</td>\n" )
- if long:
- request.write( "<td class='domainInfoHead' align='center'>Memory / Mb</td>\n" )
- request.write( "<td class='domainInfoHead' align='center'>CPU</td>\n" )
- request.write( "<td class='domainInfoHead' align='center'>State</td>\n" )
- request.write( "<td class='domainInfoHead' align='center'></td>\n" )
diff --git a/tools/python/xen/sv/GenTabbed.py b/tools/python/xen/sv/GenTabbed.py
deleted file mode 100755
index aef1848e7a..0000000000
--- a/tools/python/xen/sv/GenTabbed.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import types
-
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.TabView import TabView
-from xen.sv.util import getVar
-
-class GenTabbed( HTMLBase ):
-
- def __init__( self, title, urlWriter, tabStrings, tabObjects ):
- HTMLBase.__init__(self)
- self.tabStrings = tabStrings
- self.tabObjects = tabObjects
- self.urlWriter = urlWriter
- self.title = title
-
- def write_BODY( self, request, urlWriter = None ):
-
- tab = int( getVar( 'tab', request, 0 ) )
-
- request.write( "<table style='' width='100%' border='0' cellspacing='0' cellpadding='0'>" )
- request.write( "<tr><td>" )
-
- request.write( "<p align='center'><u>%s</u></p>" % self.title )
-
- TabView( tab, self.tabStrings, self.urlWriter ).write_BODY( request )
-
- request.write( "</td></tr><tr><td>" )
-
- render_tab = self.tabObjects[ tab ]
-
- if render_tab is None:
- request.write( "<p>Bad Tab</p>" )
- self.finish_BODY( request )
- else:
- render_tab().write_BODY( request )
-
- request.write( "</td></tr></table>" )
-
- def perform( self, request ):
- tab = int( getVar( 'tab', request, 0 ) )
-
- op_tab = self.tabObjects[ tab ]
-
- if op_tab:
- op_tab().perform( request )
-
-class PreTab( HTMLBase ):
-
- def __init__( self, source ):
- HTMLBase.__init__( self )
- self.source = source
-
- def write_BODY( self, request ):
-
- request.write( "<div style='display: block; overflow: auto; border: 0px solid black; width: 540px; padding: 5px; z-index:0; align: center'><pre>" )
-
- request.write( self.source )
-
- request.write( "</pre></div>" )
-
-class GeneralTab( HTMLBase ):
-
- def __init__( self, dict, titles ):
- HTMLBase.__init__( self )
- self.dict = dict
- self.titles = titles
-
- def write_BODY( self, request ):
-
- request.write( "<table width='100%' cellspacing='0' cellpadding='0' border='0'>" )
-
- def writeAttr( niceName, attr, formatter=None ):
- if type( attr ) is types.TupleType:
- ( attr, formatter ) = attr
-
- if attr in self.dict:
- if formatter:
- temp = formatter( self.dict[ attr ] )
- else:
- temp = str( self.dict[ attr ] )
- request.write( "<tr><td width='50%%'><p>%s:</p></td><td width='50%%'><p>%s</p></td></tr>" % ( niceName, temp ) )
-
- for niceName, attr in self.titles.items():
- writeAttr( niceName, attr )
-
- request.write( "</table>" )
-
-class NullTab( HTMLBase ):
-
- def __init__( self ):
- HTMLBase.__init__( self )
- self.title = "Null Tab"
-
- def write_BODY( self, request ):
- request.write( "<p>%s</p>" % self.title )
-
-class ActionTab( HTMLBase ):
-
- def __init__( self, actions ):
- self.actions = actions
- HTMLBase.__init__( self )
-
- def write_BODY( self, request ):
- request.write("<p align='center'>")
-
- for ( command, ( text, image ) ) in self.actions.items():
- request.write("<img src='images/%s' width='54' height='54' onclick='doOp( \"%s\" )' onmouseover='update( \"button_desc\", \"%s\" )' " % ( image, command, text ) )
- request.write("onmouseout='update( \"button_desc\", \"&nbsp;\" )' style='button'>")
- request.write("&nbsp;&nbsp;")
-
- request.write("<p align='center'><span id='button_desc'>&nbsp;</span></p>")
- request.write("</p>")
-
-class CompositeTab( HTMLBase ):
-
- def __init__( self, tabs ):
- HTMLBase.__init__( self )
- self.tabs = tabs
-
- def write_BODY( self, request ):
- for tab in self.tabs:
- request.write( "<br/>" )
- tab().write_BODY( request )
-
- def perform( self, request ):
- for tab in self.tabs:
- tab().perform( request )
-
-
-
-
diff --git a/tools/python/xen/sv/HTMLBase.py b/tools/python/xen/sv/HTMLBase.py
deleted file mode 100755
index e67784d558..0000000000
--- a/tools/python/xen/sv/HTMLBase.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from twisted.web.resource import Resource
-from xen.sv.util import *
-
-class HTMLBase( Resource ):
-
- isLeaf = True
-
- def __init__( self ):
- Resource.__init__(self)
-
- def render_POST( self, request ):
- self.perform( request )
- return self.render_GET( request )
-
- def render_GET( self, request ):
- self.write_TOP( request )
- self.write_BODY( request )
- self.write_BOTTOM( request )
- return ''
-
- def write_BODY( self, request ):
- request.write( "BODY" )
-
- def write_TOP( self, request ):
- request.write( '<html><head><title>Xen</title><link rel="stylesheet" type="text/css" href="inc/style.css" />' )
- request.write( '<script src="inc/script.js"></script>' )
- request.write( '</head><body>' )
- request.write('<form method="post" action="%s">' % request.uri)
-
- def write_BOTTOM( self, request ):
- request.write('<input type="hidden" name="op" value="">')
- request.write('<input type="hidden" name="args" value="">')
- request.write('</form>')
- request.write( "</body></html>" )
-
- def get_op_method(self, op):
- """Get the method for an operation.
- For operation 'foo' looks for 'op_foo'.
-
- op operation name
- returns method or None
- """
- op_method_name = 'op_' + op
- return getattr(self, op_method_name, None)
-
- def perform(self, req):
- """General operation handler for posted operations.
- For operation 'foo' looks for a method op_foo and calls
- it with op_foo(req). Replies with code 500 if op_foo
- is not found.
-
- The method must return a list when req.use_sxp is true
- and an HTML string otherwise (or list).
- Methods may also return a Deferred (for incomplete processing).
-
- req request
- """
- op = req.args.get('op')
- if not op is None and len(op) == 1:
- op = op[0]
- op_method = self.get_op_method(op)
- if op_method:
- op_method( req )
diff --git a/tools/python/xen/sv/Main.py b/tools/python/xen/sv/Main.py
deleted file mode 100755
index 196e1c1450..0000000000
--- a/tools/python/xen/sv/Main.py
+++ /dev/null
@@ -1,113 +0,0 @@
-from xen.sv.HTMLBase import HTMLBase
-from xen.sv.DomList import DomList
-from xen.sv.NodeInfo import NodeInfo
-from xen.sv.DomInfo import DomInfo
-from xen.sv.CreateDomain import CreateDomain
-from xen.sv.MigrateDomain import MigrateDomain
-from xen.sv.SaveDomain import SaveDomain
-from xen.sv.RestoreDomain import RestoreDomain
-
-from xen.xend.XendClient import server
-
-from xen.sv.util import getVar
-
-class Main( HTMLBase ):
-
- isLeaf = True
-
- def __init__( self, urlWriter = None ):
- self.modules = { "node": NodeInfo,
- "list": DomList,
- "info": DomInfo,
- "create": CreateDomain,
- "migrate" : MigrateDomain,
- "save" : SaveDomain,
- "restore" : RestoreDomain }
-
- # ordered list of module menus to display
- self.module_menus = [ "node", "create", "migrate", "save",
- "restore", "list" ]
- HTMLBase.__init__(self)
-
- def render_POST( self, request ):
-
- #decide what module post'd the action
-
- args = getVar( 'args', request )
-
- mod = getVar( 'mod', request )
-
- if not mod is None and args is None:
- module = self.modules[ mod ]
- #check module exists
- if module:
- module( self.mainUrlWriter ).perform( request )
- else:
- self.perform( request )
-
- return self.render_GET( request )
-
- def mainUrlWriter( self, module ):
- def fun( f ):
- return "Main.rpy?mod=%s%s" % ( module, f )
- return fun
-
- def write_BODY( self, request ):
-
- request.write( "\n<table style='border:0px solid black; background: url(images/orb_01.jpg) no-repeat' cellspacing='0' cellpadding='0' border='0' width='780px' height='536px'>\n" )
- request.write( "<tr>\n" )
- request.write( " <td width='15px'>&nbsp;</td>" )
- request.write( " <td width='175px' align='center' valign'center'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0' width='100%' height='100%'>" )
- request.write( " <tr><td height='140px' align='center' valign='bottom'><a href='http://www.cl.cam.ac.uk/Research/SRG/netos/xen/'>" )
- request.write( " <img src='images/xen.png' width='150' height='75' border='0'/></a><br/></td></tr>" )
- request.write( " <tr><td height='60px' align='center'><p class='small'>SV Web Interface<br/>(C) <a href='mailto:tw275@cam.ac.uk'>Tom Wilkie</a> 2004</p></td></tr>")
- request.write( " <tr><td align='center' valign='top'>" )
-
- for modName in self.module_menus:
- self.modules[modName]( self.mainUrlWriter( modName ) ).write_MENU( request )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " &nbsp;" )
- request.write( " </td>\n" )
- request.write( " <td width='15px'>&nbsp;</td>" )
- request.write( " <td width='558px' align='left' valign='top'>" )
- request.write( " <table cellspacing='0' cellpadding='0' border='0' width='100%' height='100%'>" )
- request.write( " <tr><td height='20px'></td></tr>" )
- request.write( " <tr><td align='center' valign='top'>" )
-
- modName = getVar('mod', request)
-
- if modName is None:
- request.write( '<p>Please select a module</p>' )
- else:
- module = self.modules[ modName ]
- if module:
- module( self.mainUrlWriter( modName ) ).write_BODY( request )
- else:
- request.write( '<p>Invalid module. Please select another</p>' )
-
- request.write( " </td></tr>" )
- request.write( " </table>" )
- request.write( " </td>\n" )
- request.write( " <td width='17px'>&nbsp;</td>" )
- request.write( "</tr>\n" )
-
- request.write( "</table>\n" )
-
-
- def op_destroy( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_destroy( int( dom ), "halt" )
-
- def op_pause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_pause( int( dom ) )
-
- def op_unpause( self, request ):
- dom = getVar( 'dom', request )
- if not dom is None and dom != "0":
- server.xend_domain_unpause( int( dom ) )
diff --git a/tools/python/xen/sv/MigrateDomain.py b/tools/python/xen/sv/MigrateDomain.py
deleted file mode 100644
index 928acf3f42..0000000000
--- a/tools/python/xen/sv/MigrateDomain.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from xen.sv.Wizard import *
-from xen.sv.util import *
-from xen.sv.GenTabbed import PreTab
-
-from xen.xm.create import make_config, OptVals
-
-from xen.xend.XendClient import server
-
-class MigrateDomain( Wizard ):
- def __init__( self, urlWriter ):
-
- sheets = [ ChooseMigrateDomain,
- DoMigrate ]
-
- Wizard.__init__( self, urlWriter, "Migrate Domain", sheets )
-
-
-class ChooseMigrateDomain( Sheet ):
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Configure Migration", 0)
- try:
- domains = server.xend_domains()
- domains.sort()
- except:
- pass
-
- domnames = []
- for i in domains:
- if i != 'Domain-0': domnames.append((i,i))
-
- self.addControl( ListControl('domid',
- domnames,
- 'Domain ID:') )
- self.addControl( TickControl('live',
- 'True',
- 'Live migrate:') )
- self.addControl( InputControl('rate',
- '0',
- 'Rate limit:') )
- self.addControl( InputControl( 'dest', 'myhost.mydomain',
- 'Name or IP address:',
- ".*") )
-
-class DoMigrate( Sheet ):
- def __init__(self, urlWriter ):
- Sheet.__init__(self, urlWriter, "Migration Done", 1)
-
- def write_BODY( self, request, err ):
-
- if not self.passback: self.parseForm( request )
-
-# print string2sxp(self.passback)
-
- config = ssxp2hash ( string2sxp( self.passback ) )
-
- try:
- print config
- print config['domid'], config['dest']
- dom_sxp = server.xend_domain_migrate( config['domid'],
- config['dest'],
- config.get('live') == 'True',
- config['rate'] )
- success = "Your domain was successfully Migrated.\n"
- except Exception, e:
- success = "There was an error migrating your domain\n"
- dom_sxp = str(e)
-
- pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) )
- pt.write_BODY( request )
-
- request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location )
diff --git a/tools/python/xen/sv/NodeInfo.py b/tools/python/xen/sv/NodeInfo.py
deleted file mode 100755
index 5db1a34c50..0000000000
--- a/tools/python/xen/sv/NodeInfo.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from xen.xend.XendClient import server
-
-from xen.sv.util import *
-from xen.sv.GenTabbed import *
-
-class NodeInfo( GenTabbed ):
-
- def __init__( self, urlWriter ):
-
- GenTabbed.__init__( self, "Node Details", urlWriter, [ 'General', 'Dmesg', ], [ NodeGeneralTab, NodeDmesgTab ] )
-
- def write_MENU( self, request ):
- request.write( "<p class='small'><a href='%s'>Node details</a></p>" % self.urlWriter( '' ) )
-
-class NodeGeneralTab( CompositeTab ):
- def __init__( self ):
- CompositeTab.__init__( self, [ NodeInfoTab, NodeActionTab ] )
-
-class NodeInfoTab( GeneralTab ):
-
- def __init__( self ):
-
- nodeInfo = {}
- try:
- nodeInfo = sxp2hash( server.xend_node() )
- except:
- nodeInfo[ 'system' ] = 'Error getting node info'
-
- dictTitles = {}
- dictTitles[ 'System' ] = 'system'
- dictTitles[ 'Hostname' ] = 'host'
- dictTitles[ 'Release' ] = 'release'
- dictTitles[ 'Version' ] ='version'
- dictTitles[ 'Machine' ] = 'machine'
- dictTitles[ 'Cores' ] = 'cores'
- dictTitles[ 'Hyperthreading' ] = ( 'hyperthreads_per_core', hyperthreadFormatter )
- dictTitles[ 'CPU Speed' ] = ( 'cpu_mhz', cpuFormatter )
- dictTitles[ 'Memory' ] = ( 'memory', memoryFormatter )
- dictTitles[ 'Free Memory' ] = ( 'free_memory', memoryFormatter )
-
- GeneralTab.__init__( self, dict=nodeInfo, titles=dictTitles )
-
-class NodeDmesgTab( PreTab ):
-
- def __init__( self ):
- try:
- dmesg = server.xend_node_get_dmesg()
- except:
- dmesg = "Error getting node information: XenD not running?"
- PreTab.__init__( self, dmesg )
-
-class NodeActionTab( ActionTab ):
-
- def __init__( self ):
- ActionTab.__init__( self, { "shutdown" : ( "Shutdown the Node", "shutdown.png" ),
- "reboot" : ( "Reboot the Node", "reboot.png" ) } )
-
- def op_shutdown( self, request ):
- print ">NodeShutDown"
- server.xend_node_shutdown()
-
- def op_reboot( self, request ):
- print ">NodeReboot"
- server.xend_node_reboot()
diff --git a/tools/python/xen/sv/RestoreDomain.py b/tools/python/xen/sv/RestoreDomain.py
deleted file mode 100644
index be8b4f558a..0000000000
--- a/tools/python/xen/sv/RestoreDomain.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from xen.sv.Wizard import *
-from xen.sv.util import *
-from xen.sv.GenTabbed import PreTab
-
-from xen.xm.create import make_config, OptVals
-
-from xen.xend.XendClient import server
-
-class RestoreDomain( Wizard ):
- def __init__( self, urlWriter ):
-
- sheets = [ ChooseRestoreDomain,
- DoRestore ]
-
- Wizard.__init__( self, urlWriter, "Restore Domain", sheets )
-
-
-class ChooseRestoreDomain( Sheet ):
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Configure Restore", 0)
-
- self.addControl( InputControl( 'file', '',
- 'Suspend file name:',
- ".*") )
-
-class DoRestore( Sheet ):
- def __init__(self, urlWriter ):
- Sheet.__init__(self, urlWriter, "Restore Done", 1)
-
- def write_BODY( self, request, err ):
-
- if not self.passback: self.parseForm( request )
- config = ssxp2hash ( string2sxp( self.passback ) )
-
- try:
- dom_sxp = server.xend_domain_restore( config['file'] )
- success = "Your domain was successfully restored.\n"
- except Exception, e:
- success = "There was an error restoring your domain\n"
- dom_sxp = str(e)
-
- pt = PreTab( success + sxp2prettystring( dom_sxp ) )
- pt.write_BODY( request )
-
- request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location )
diff --git a/tools/python/xen/sv/SaveDomain.py b/tools/python/xen/sv/SaveDomain.py
deleted file mode 100644
index 4c4e315272..0000000000
--- a/tools/python/xen/sv/SaveDomain.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from xen.sv.Wizard import *
-from xen.sv.util import *
-from xen.sv.GenTabbed import PreTab
-
-from xen.xm.create import make_config, OptVals
-
-from xen.xend.XendClient import server
-
-class SaveDomain( Wizard ):
- def __init__( self, urlWriter ):
-
- sheets = [ ChooseSaveDomain,
- DoSave ]
-
- Wizard.__init__( self, urlWriter, "Save Domain", sheets )
-
-
-class ChooseSaveDomain( Sheet ):
- def __init__( self, urlWriter ):
- Sheet.__init__( self, urlWriter, "Configure Save", 0)
- try:
- domains = server.xend_domains()
- domains.sort()
- except:
- pass
-
- domnames = []
- for i in domains:
- if i != 'Domain-0': domnames.append((i,i))
-
- self.addControl( ListControl('domid',
- domnames,
- 'Domain ID:') )
- self.addControl( InputControl( 'file', '',
- 'Suspend file name:',
- ".*") )
-
-class DoSave( Sheet ):
- def __init__(self, urlWriter ):
- Sheet.__init__(self, urlWriter, "Save Done", 1)
-
- def write_BODY( self, request, err ):
-
- if not self.passback: self.parseForm( request )
- config = ssxp2hash ( string2sxp( self.passback ) )
-
- try:
- dom_sxp = server.xend_domain_save( config['domid'],
- config['file'] )
- success = "Your domain was successfully saved.\n"
- except Exception, e:
- success = "There was an error saving your domain\n"
- dom_sxp = str(e)
-
- pt = PreTab( success + dom_sxp ) # sxp2prettystring( dom_sxp ) )
- pt.write_BODY( request )
-
- request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location )
diff --git a/tools/python/xen/sv/TabView.py b/tools/python/xen/sv/TabView.py
deleted file mode 100755
index cada51c4e8..0000000000
--- a/tools/python/xen/sv/TabView.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from xen.sv.HTMLBase import HTMLBase
-
-class TabView( HTMLBase ):
-
- def __init__( self, tab, tabs, urlWriter ):
- HTMLBase.__init__(self)
- self.tab = tab # interger - tab id
- self.tabs = tabs
- self.urlWriter = urlWriter
-
- def write_BODY( self, request ):
- request.write( "<table style='' border='0' cellspacing='0' cellpadding='0' align='center'>" )
- request.write( "<tr height='22'>" )
-
- if self.tab == 0:
- image = "left-end-highlight.jpg"
- else:
- image = "left-end-no-highlight.jpg"
-
- request.write( "<td height='22' width='14'><image src='images/%s' width='14' height='22'></td>" % image )
-
- count = len( self.tabs )
-
- for i in range( count ):
-
- if i == self.tab:
- image = "middle-highlight.jpg"
- else:
- image = "middle-no-highlight.jpg"
-
- request.write( "<td style='background: url(images/%s)'><p align='center'><a href='%s'>%s</a></p></td>" % ( image, self.urlWriter( "&tab=%s" % i ), self.tabs[ i ] ) )
-
- if i < count-1:
- if i == self.tab:
- image = "seperator-left-highlight.jpg"
- elif self.tab == i+1:
- image = "seperator-right-highlight.jpg"
- else:
- image = "seperator.jpg"
-
- request.write( "<td height='22' width='23'><image src='images/%s' width='23' height='22'></td>" % image )
-
- if self.tab == count - 1:
- image = "right-end-highlight.jpg"
- else:
- image = "right-end-no-highlight.jpg"
-
- request.write( "<td height='22' width='14'><image src='images/%s' width='14' height='22'></td>" % image )
- request.write( "</tr></table>" )
diff --git a/tools/python/xen/sv/Wizard.py b/tools/python/xen/sv/Wizard.py
deleted file mode 100755
index 089d3f2e67..0000000000
--- a/tools/python/xen/sv/Wizard.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from xen.sv.util import *
-from xen.sv.HTMLBase import HTMLBase
-from xen.xend import sxp
-
-import re
-
-DEBUG = 0
-
-class Wizard( HTMLBase ):
-
- def __init__( self, urlWriter, title, sheets ):
- HTMLBase.__init__( self )
- self.title = title
- self.sheets = sheets
- self.urlWriter = urlWriter
-
- def write_MENU( self, request ):
- request.write( "<p class='small'><a href='%s'>%s</a></p>" % (self.urlWriter( '' ), self.title) )
-
- def write_BODY( self, request ):
-
- request.write( "<table width='100%' border='0' cellspacing='0' cellpadding='0'><tr><td>" )
- request.write( "<p align='center'><u>%s</u></p></td></tr><tr><td>" % self.title )
-
- currSheet = getVar( 'sheet', request )
-
- if not currSheet is None:
- currSheet = int( currSheet )
- else:
- currSheet = 0
-
- sheet = self.sheets[ currSheet ]( self.urlWriter )
-
- err = not sheet.validate( request )
-
- if not err:
- op = getVar( 'op', request )
-
- if op == 'next':
- currSheet += 1
- elif op == 'prev':
- currSheet -= 1
-
- sheet = self.sheets[ currSheet ]( self.urlWriter )
-
- if getVar( 'visited-sheet%s' % currSheet, request ):
- sheet.write_BODY( request, err )
- else:
- sheet.write_BODY( request, False )
-
-
- request.write( "</td></tr><tr><td><table width='100%' border='0' cellspacing='0' cellpadding='0'><tr>" )
- request.write( "<td width='80%'></td><td width='20%' align='center'><p align='center'>" )
- if currSheet > 0:
- request.write( "<img src='images/previous.png' onclick='doOp( \"prev\" )' onmouseover='update( \"wizText\", \"Previous\" )' onmouseout='update( \"wizText\", \"&nbsp;\" )'>&nbsp;" )
- if currSheet < ( len( self.sheets ) - 2 ):
- request.write( "<img src='images/next.png' onclick='doOp( \"next\" )' onmouseover='update( \"wizText\", \"Next\" )' onmouseout='update( \"wizText\", \"&nbsp;\" )'>" )
- elif currSheet == ( len( self.sheets ) - 2 ):
- request.write( "<img src='images/finish.png' onclick='doOp( \"next\" )' onmouseover='update( \"wizText\", \"Finish\" )' onmouseout='update( \"wizText\", \"&nbsp;\" )'>" )
- request.write( "</p><p align='center'><span id='wizText'></span></p></td></tr></table>" )
- request.write( "</td></tr></table>" )
-
- def op_next( self, request ):
- pass
-
- def op_prev( self, request ):
- pass
-
- def op_finish( self, request ):
- pass
-
-class Sheet( HTMLBase ):
-
- def __init__( self, urlWriter, title, location ):
- HTMLBase.__init__( self )
- self.urlWriter = urlWriter
- self.feilds = []
- self.title = title
- self.location = location
- self.passback = None
-
- def parseForm( self, request ):
- do_not_parse = [ 'mod', 'op', 'sheet', 'passback' ]
-
- passed_back = request.args
-
- temp_passback = passed_back.get( "passback" )
-
- if temp_passback is not None and len( temp_passback ) > 0:
- temp_passback = temp_passback[ len( temp_passback )-1 ]
- else:
- temp_passback = "( )"
-
- last_passback = ssxp2hash( string2sxp( temp_passback ) ) #use special function - will work with no head on sxp
-
- if DEBUG: print last_passback
-
- for (key, value) in passed_back.items():
- if key not in do_not_parse:
- last_passback[ key ] = value[ len( value ) - 1 ]
-
- self.passback = sxp2string( hash2sxp( last_passback ) ) #store the sxp
-
- if DEBUG: print self.passback
-
- def write_BODY( self, request, err ):
-
- if not self.passback: self.parseForm( request )
-
- request.write( "<p>%s</p>" % self.title )
-
- previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference
-
- request.write( "<table width='100%' cellpadding='0' cellspacing='1' border='0'>" )
-
- for (feild, control) in self.feilds:
- control.write_Control( request, previous_values.get( feild ) )
- if err and not control.validate( previous_values.get( feild ) ):
- control.write_Help( request )
-
- request.write( "</table>" )
-
- request.write( "<input type='hidden' name='passback' value=\"%s\"></p>" % self.passback )
- request.write( "<input type='hidden' name='sheet' value='%s'></p>" % self.location )
- request.write( "<input type='hidden' name='visited-sheet%s' value='True'></p>" % self.location )
-
- def addControl( self, control ):
- self.feilds.append( [ control.getName(), control ] )
-
- def validate( self, request ):
-
- if not self.passback: self.parseForm( request )
-
- check = True
-
- previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the hash for quick reference
- if DEBUG: print previous_values
-
- for (feild, control) in self.feilds:
- if not control.validate( previous_values.get( feild ) ):
- check = False
- if DEBUG: print "> %s = %s" % (feild, previous_values.get( feild ))
-
- return check
-
-class SheetControl( HTMLBase ):
-
- def __init__( self, reg_exp = ".*" ):
- HTMLBase.__init__( self )
- self.name = ""
- self.reg_exp = reg_exp
-
- def write_Control( self, request, persistedValue ):
- request.write( "<tr colspan='2'><td>%s</td></tr>" % persistedValue )
-
- def write_Help( self, request ):
- request.write( "<tr><td align='right' colspan='2'><p class='small'>Text must match pattern:" )
- request.write( " %s</p></td></tr>" % self.reg_exp )
-
- def validate( self, persistedValue ):
- if persistedValue is None:
- persistedValue = ""
-
- return not re.compile( self.reg_exp ).match( persistedValue ) is None
-
- def getName( self ):
- return self.name
-
- def setName( self, name ):
- self.name = name
-
-class InputControl( SheetControl ):
-
- def __init__( self, name, defaultValue, humanText, reg_exp = ".*", help_text = "You must enter the appropriate details in this feild." ):
- SheetControl.__init__( self, reg_exp )
- self.setName( name )
-
- self.defaultValue = defaultValue
- self.humanText = humanText
- self.help_text = help_text
-
- def write_Control( self, request, persistedValue ):
- if persistedValue is None:
- persistedValue = self.defaultValue
-
- request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'><input size='40'type='text' name='%s' value=\"%s\"></td></tr>" % (self.humanText, self.getName(), persistedValue) )
-
- def write_Help( self, request ):
- request.write( "<tr><td align='right' colspan='2'><p class='small'>" )
- request.write( " %s</p></td></tr>" % self.help_text )
-
-class TextControl( SheetControl ):
-
- def __init__( self, text ):
- SheetControl.__init__( self )
- self.text = text
-
- def write_Control( self, request, persistedValue ):
- request.write( "<tr><td colspan='2'><p>%s</p></td></tr>" % self.text )
-
-class SmallTextControl( SheetControl ):
-
- def __init__( self, text ):
- SheetControl.__init__( self )
- self.text = text
-
- def write_Control( self, request, persistedValue ):
- request.write( "<tr><td colspan='2'><p class='small'>%s</p></tr></td>" % self.text )
-
-class ListControl( SheetControl ):
-
- def __init__( self, name, options, humanText ):
- SheetControl.__init__( self )
- self.setName( name )
- self.options = options
- self.humanText = humanText
-
- def write_Control( self, request, persistedValue ):
- request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>" % self.humanText )
- request.write( "<select name='%s'>" % self.getName() )
- for (value, text) in self.options:
- if value == persistedValue:
- request.write( "<option value='%s' selected>%s\n" % (value, text) )
- else:
- request.write( "<option value='%s'>%s\n" % (value, text) )
- request.write( "</select></td></tr>" )
-
- def validate( self, persistedValue ):
- for (value, text) in self.options:
- if value == persistedValue:
- return True
-
- return False
-
-class FileControl( InputControl ):
-
- def __init__( self, name, defaultValue, humanText, reg_exp = ".*", help_text = "You must enter the appropriate details in this feild." ):
- InputControl.__init__( self, name, defaultValue, humanText )
-
- def validate( self, persistedValue ):
- if persistedValue is None: return False
- try:
- open( persistedValue )
- return True
- except IOError, TypeError:
- return False
-
- def write_Help( self, request ):
- request.write( "<tr><td colspan='2' align='right'><p class='small'>File does not exist: you must enter a valid, absolute file path.</p></td></tr>" )
-
-class TickControl( SheetControl ):
-
- def __init__( self, name, defaultValue, humanText ):
- SheetControl.__init__( self )
- self.setName( name )
- self.defaultValue = defaultValue
- self.humanText = humanText
-
- def write_Control( self, request, persistedValue ):
- request.write( "<tr><td width='50%%'><p>%s</p></td><td width='50%%'>" % self.humanText )
-
- if persistedValue == 'True':
- request.write( "<input type='checkbox' name='%s' value='True' checked>" % self.getName() )
- else:
- request.write( "<input type='checkbox' name='%s' value='True'>" % self.getName() )
-
- request.write( "</select></td></tr>" )
-
-
diff --git a/tools/python/xen/sv/__init__.py b/tools/python/xen/sv/__init__.py
deleted file mode 100755
index 8d1c8b69c3..0000000000
--- a/tools/python/xen/sv/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/tools/python/xen/sv/params.py b/tools/python/xen/sv/params.py
deleted file mode 100644
index beed647a4f..0000000000
--- a/tools/python/xen/sv/params.py
+++ /dev/null
@@ -1,3 +0,0 @@
-SV_PORT = 8080
-SV_ROOT = "/var/lib/xen/sv/"
-PID_FILE = "/var/run/xen-sv.pid"
diff --git a/tools/python/xen/sv/util.py b/tools/python/xen/sv/util.py
deleted file mode 100755
index 3207b8f0cc..0000000000
--- a/tools/python/xen/sv/util.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from xen.xend.XendClient import server
-from xen.xend import sxp
-from xen.xend import PrettyPrint
-
-import types
-
-def getDomInfoHash( domain ):
- domInfoHash = {}
- try:
- domInfoHash = sxp2hash( server.xend_domain( domain ) )
- domInfoHash['dom'] = domain
- except:
- domInfoHash['name'] = "Error getting domain details"
- return domInfoHash
-
-def sxp2hash( s ):
- sxphash = {}
-
- for child in sxp.children( s ):
- if isinstance( child, types.ListType ) and len( child ) > 1:
- if isinstance( child[1], types.ListType ) and len( child ) > 1:
- sxphash[ child[0] ] = sxp2hash( child[1] )
- else:
- sxphash[ child[0] ] = child[1]
-
- return sxphash
-
-def ssxp2hash( s ):
- sxphash = {}
-
- for i in s:
- if isinstance( i, types.ListType ) and len( i ) > 1:
- sxphash[ i[0] ] = i[1]
-
- return sxphash
-
-def hash2sxp( h ):
- hashsxp = []
-
- for (key, item) in h.items():
- hashsxp.append( [key, item] )
-
- return hashsxp
-
-def string2sxp( string ):
- pin = sxp.Parser()
- pin.input( string )
- return pin.get_val()
-
-def sxp2string( sexp ):
- return sxp.to_string( sexp )
-
-def sxp2prettystring( sxp ):
- class tmp:
- def __init__( self ):
- self.str = ""
- def write( self, str ):
- self.str = self.str + str
- temp = tmp()
- PrettyPrint.prettyprint( sxp, out=temp )
- return temp.str
-
-def getVar( var, request, default=None ):
-
- arg = request.args.get( var )
-
- if arg is None:
- return default
- else:
- return arg[ len( arg )-1 ]
-
-def bigTimeFormatter( time ):
- time = float( time )
- weeks = time // 604800
- remainder = time % 604800
- days = remainder // 86400
-
- remainder = remainder % 86400
-
- hms = smallTimeFormatter( remainder )
-
- return "%d weeks, %d days, %s" % ( weeks, days, hms )
-
-def smallTimeFormatter( time ):
- time = float( time )
- hours = time // 3600
- remainder = time % 3600
- mins = remainder // 60
- secs = time % 60
- return "%02d:%02d:%04.1f (hh:mm:ss.s)" % ( hours, mins, secs )
-
-def stateFormatter( state ):
- states = [ 'Running', 'Blocked', 'Paused', 'Shutdown', 'Crashed' ]
-
- stateStr = ""
-
- for i in range( len( state ) ):
- if state[i] != "-":
- stateStr += "%s, " % states[ i ]
-
- return stateStr + " (%s)" % state
-
-def memoryFormatter( mem ):
- mem = int( mem )
- if mem >= 1024:
- mem = float( mem ) / 1024
- return "%3.2fGb" % mem
- else:
- return "%7dMb" % mem
-
-def cpuFormatter( mhz ):
- mhz = int( mhz )
- if mhz > 1000:
- ghz = float( mhz ) / 1000.0
- return "%4.2fGHz" % ghz
- else:
- return "%4dMHz" % mhz
-
-def hyperthreadFormatter( threads ):
- if int( threads ) > 1:
- return "Yes (%d)" % threads
- else:
- return "No"
diff --git a/tools/python/xen/util/blkif.py b/tools/python/xen/util/blkif.py
new file mode 100644
index 0000000000..0caf03b5cf
--- /dev/null
+++ b/tools/python/xen/util/blkif.py
@@ -0,0 +1,84 @@
+import os
+import re
+import string
+
+from xen.xend.XendLogging import log
+
+def expand_dev_name(name):
+ if not name:
+ return name
+ if re.match( '^/dev/', name ):
+ return name
+ else:
+ return '/dev/' + name
+
+def blkdev_name_to_number(name):
+ """Take the given textual block-device name (e.g., '/dev/sda1',
+ 'hda') and return the device number used by the OS. """
+
+ n = expand_dev_name(name)
+
+ try:
+ return os.stat(n).st_rdev
+ except Exception, ex:
+ log.debug("exception looking up device number for %s: %s", name, ex)
+ pass
+
+ if re.match( '/dev/sd[a-p]([0-9]|1[0-5])', n):
+ return 8 * 256 + 16 * (ord(n[7:8]) - ord('a')) + int(n[8:])
+
+ if re.match( '/dev/hd[a-t]([1-9]|[1-5][0-9]|6[0-3])?', n):
+ ide_majors = [ 3, 22, 33, 34, 56, 57, 88, 89, 90, 91 ]
+ major = ide_majors[(ord(n[7:8]) - ord('a')) / 2]
+ minor = ((ord(n[7:8]) - ord('a')) % 2) * 64 + int(n[8:] or 0)
+ return major * 256 + minor
+
+ # see if this is a hex device number
+ if re.match( '^(0x)?[0-9a-fA-F]+$', name ):
+ return string.atoi(name,16)
+
+ return None
+
+def blkdev_segment(name):
+ """Take the given block-device name (e.g. '/dev/sda1', 'hda')
+ and return a dictionary { device, start_sector,
+ nr_sectors, type }
+ device: Device number of the given partition
+ start_sector: Index of first sector of the partition
+ nr_sectors: Number of sectors comprising this partition
+ type: 'Disk' or identifying name for partition type
+ """
+ val = None
+ n = blkdev_name_to_number(name)
+ if n:
+ val = { 'device' : n,
+ 'start_sector' : long(0),
+ 'nr_sectors' : long(1L<<63),
+ 'type' : 'Disk' }
+ return val
+
+def blkdev_uname_to_file(uname):
+ """Take a blkdev uname and return the corresponding filename."""
+ fn = None
+ if uname.find(":") != -1:
+ (typ, fn) = uname.split(":")
+ if typ == "phy" and not fn.startswith("/dev/"):
+ fn = "/dev/%s" %(fn,)
+ return fn
+
+def mount_mode(name):
+ mode = None
+ name = expand_dev_name(name)
+ lines = os.popen('mount 2>/dev/null').readlines()
+ exp = re.compile('^' + name + ' .*[\(,]r(?P<mode>[ow])[,\)]')
+ for line in lines:
+ pm = exp.match(line)
+ if not pm: continue
+ mode = pm.group('mode')
+ break
+ if mode == 'w':
+ return mode
+ if mode == 'o':
+ mode = 'r'
+ return mode
+
diff --git a/tools/python/xen/util/console_client.py b/tools/python/xen/util/console_client.py
index 8bd3178eab..c0acacfad3 100644
--- a/tools/python/xen/util/console_client.py
+++ b/tools/python/xen/util/console_client.py
@@ -57,9 +57,18 @@ def __send_to_sock(sock):
raise
sys.exit(0)
-def connect(host,port):
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
- sock.connect((host,port))
+def connect(host, port, path=None):
+ # Try inet first. If 'path' is given and the error
+ # was connection refused, try unix-domain on 'path'.
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((host, port))
+ except socket.error, err:
+ if (path is None) or (err[0] != errno.ECONNREFUSED):
+ raise
+ # Try unix-domain.
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock.connect(path)
oattrs = tcgetattr(0)
nattrs = tcgetattr(0)
@@ -86,7 +95,14 @@ def connect(host,port):
__send_to_sock(sock)
if __name__ == '__main__':
- if len(sys.argv) != 3:
- print sys.argv[0] + " <host> <port>"
+ argc = len(sys.argv)
+ if argc < 3 or argc > 4:
+ print >>sys.stderr, sys.argv[0], "<host> <port> [<path>]"
sys.exit(1)
- connect(str(sys.argv[1]),int(sys.argv[2]))
+ host = sys.argv[1]
+ port = int(sys.argv[2])
+ if argc > 3:
+ path = sys.argv[3]
+ else:
+ path = None
+ connect(host, port, path=path)
diff --git a/tools/python/xen/util/ip.py b/tools/python/xen/util/ip.py
index 9dd558a178..9133e886f2 100644
--- a/tools/python/xen/util/ip.py
+++ b/tools/python/xen/util/ip.py
@@ -4,96 +4,72 @@ import socket
import struct
import errno
-def _readlines(fd):
- """Version of readlines safe against EINTR.
- """
- import errno
-
- lines = []
- while 1:
- try:
- line = fd.readline()
- except IOError, ex:
- if ex.errno == errno.EINTR:
- continue
- else:
- raise
- if line == '': break
- lines.append(line)
- return lines
-
-def _readline(fd):
- """Version of readline safe against EINTR.
- """
- while 1:
- try:
- return fd.readline()
- except IOError, ex:
- if ex.errno == errno.EINTR:
- continue
- else:
- raise
-
##### Networking-related functions
-"""Bridge for network backend.
-When bridging is used, eth0 may not have an IP address,
-as it may have been moved onto the bridge.
-"""
-NBE_BRIDGE = 'xen-br0'
+def get_defaultroute():
+ fd = os.popen('/sbin/ip route list 2>/dev/null')
+ for line in fd.readlines():
+ m = re.search('^default via ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) dev ([^ ]*)',
+ line)
+ if m:
+ return [m.group(1), m.group(2)]
+ return [None, None]
-def get_current_ipaddr(dev='eth0'):
+def get_current_ipaddr(dev='defaultroute'):
"""Get the primary IP address for the given network interface.
- dev network interface (default eth0)
+ dev network interface (default: default route device)
returns interface address as a string
"""
+ if dev == 'defaultroute':
+ dev = get_defaultroute()[1]
+ if not dev:
+ return
fd = os.popen( '/sbin/ifconfig ' + dev + ' 2>/dev/null' )
- lines = _readlines(fd)
- for line in lines:
+ for line in fd.readlines():
m = re.search( '^\s+inet addr:([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*',
line )
if m:
return m.group(1)
- if dev == 'eth0':
- return get_current_ipaddr(NBE_BRIDGE)
return None
-def get_current_ipmask(dev='eth0'):
+def get_current_ipmask(dev='defaultroute'):
"""Get the primary IP netmask for a network interface.
- dev network interface (default eth0)
+ dev network interface (default: default route device)
returns interface netmask as a string
"""
+ if dev == 'defaultroute':
+ dev = get_defaultroute()[1]
+ if not dev:
+ return
fd = os.popen( '/sbin/ifconfig ' + dev + ' 2>/dev/null' )
- lines = _readlines(fd)
- for line in lines:
+ for line in fd.readlines():
m = re.search( '^.+Mask:([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+).*',
line )
if m:
return m.group(1)
- if dev == 'eth0':
- return get_current_ipmask(NBE_BRIDGE)
return None
-def get_current_ipgw(dev='eth0'):
+def get_current_ipgw(dev='defaultroute'):
"""Get the IP gateway for a network interface.
- dev network interface (default eth0)
+ dev network interface (default: default route device)
returns gateway address as a string
"""
+ if dev == 'defaultroute':
+ return get_defaultroute()[0]
+ if not dev:
+ return
fd = os.popen( '/sbin/route -n' )
- lines = _readlines(fd)
- for line in lines:
+ for line in fd.readlines():
m = re.search( '^\S+\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)' +
'\s+\S+\s+\S*G.*' + dev + '.*', line )
if m:
return m.group(1)
- if dev == 'eth0':
- return get_current_ipgw(NBE_BRIDGE)
return None
def inet_aton(addr):
diff --git a/tools/python/xen/util/mac.py b/tools/python/xen/util/mac.py
new file mode 100644
index 0000000000..47dffd80d5
--- /dev/null
+++ b/tools/python/xen/util/mac.py
@@ -0,0 +1,11 @@
+
+from string import join, split
+
+def macToString(mac):
+ return ':'.join(map(lambda x: "%02x" % x, mac))
+
+def macFromString(str):
+ mac = [ int(x, 16) for x in str.split(':') ]
+ if len(mac) != 6:
+ raise ValueError("invalid mac: %s" % str)
+ return mac
diff --git a/tools/python/xen/util/memmap.py b/tools/python/xen/util/memmap.py
new file mode 100644
index 0000000000..2899a87535
--- /dev/null
+++ b/tools/python/xen/util/memmap.py
@@ -0,0 +1,41 @@
+mem_caching_attr = {
+ 'UC' : 0,
+ 'WC' : 1,
+ 'WT' : 4,
+ 'WP' : 5,
+ 'WB' : 6,
+ };
+
+e820_mem_type = {
+ 'AddressRangeMemory' : 1,
+ 'AddressRangeReserved' : 2,
+ 'AddressRangeACPI' : 3,
+ 'AddressRangeNVS' : 4,
+ 'AddressRangeIO' : 16,
+ 'AddressRangeShared' : 17,
+};
+
+MT_COL = 2
+MA_COL = 3
+
+def strmap(row):
+ if (type(row) != type([])):
+ return row
+ row[MT_COL] = e820_mem_type[row[MT_COL]]
+ row[MA_COL] = mem_caching_attr[row[MA_COL]]
+ return row
+
+def memmap_parse(memmap):
+ return map(strmap, memmap)
+
+if __name__ == '__main__':
+ memmap = [ 'memmap',
+ [ '1', '2', 'AddressRangeMemory', 'UC'],
+ [ '1', '2', 'AddressRangeReserved', 'UC'],
+ [ '1', '2', 'AddressRangeACPI', 'WB'],
+ [ '1', '2', 'AddressRangeNVS', 'WB'],
+ [ '1', '2', 'AddressRangeIO', 'WB'],
+ [ '1', '2', 'AddressRangeShared', 'WB']]
+ print memmap_parse(memmap);
+
+
diff --git a/tools/python/xen/util/process.py b/tools/python/xen/util/process.py
new file mode 100644
index 0000000000..07bc73b505
--- /dev/null
+++ b/tools/python/xen/util/process.py
@@ -0,0 +1,37 @@
+# Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+
+# os.system() replacement which outputs through the logger
+
+import popen2
+import select
+import string
+
+from xen.xend.XendLogging import log
+
+def runscript(cmd):
+ # split after first space, then grab last component of path
+ cmdname = "[%s] " % cmd.split()[0].split('/')[-1]
+ # run command and grab stdin, stdout and stderr
+ cout, cin, cerr = popen2.popen3(cmd)
+ # close stdin to get command to terminate if it waits for input
+ cin.close()
+ # wait for output and process
+ p = select.poll()
+ p.register(cout)
+ p.register(cerr)
+ stdout = ""
+ while True:
+ r = p.poll()
+ for (fd, event) in r:
+ if event == select.POLLHUP:
+ return stdout
+ if fd == cout.fileno():
+ stdout = stdout + cout.readline()
+ if fd == cerr.fileno():
+ l = cerr.readline()
+ if l[0] == '-':
+ log.debug(cmdname + l[1:].rstrip())
+ elif l[0] == '*':
+ log.info(cmdname + l[1:].rstrip())
+ else:
+ log.error(cmdname + l.rstrip())
diff --git a/tools/python/xen/util/xpopen.py b/tools/python/xen/util/xpopen.py
new file mode 100644
index 0000000000..b0c880fafe
--- /dev/null
+++ b/tools/python/xen/util/xpopen.py
@@ -0,0 +1,169 @@
+#
+# Copyright (c) 2001, 2002, 2003, 2004 Python Software Foundation; All Rights Reserved
+#
+# PSF LICENSE AGREEMENT FOR PYTHON 2.3
+# ------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using Python 2.3 software in source or binary form and its
+# associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python 2.3
+# alone or in any derivative version, provided, however, that PSF's
+# License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
+# 2001, 2002, 2003, 2004 Python Software Foundation; All Rights Reserved" are
+# retained in Python 2.3 alone or in any derivative version prepared by
+# Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python 2.3 or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python 2.3.
+#
+# 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
+# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee. This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python 2.3, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+# Modifications: Copyright (c) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+# - add support for excluding a list of file descriptors from being
+# closed, allowing access to those file descriptors from the command.
+#
+
+"""Spawn a command with pipes to its stdin, stdout, and optionally stderr.
+
+The normal os.popen(cmd, mode) call spawns a shell command and provides a
+file interface to just the input or output of the process depending on
+whether mode is 'r' or 'w'. This module provides the functions xpopen2(cmd)
+and xpopen3(cmd) which return two or three pipes to the spawned command.
+Optionally exclude a list of file descriptors from being closed, allowing
+access to those file descriptors from the command.
+"""
+
+import os
+import sys
+
+try:
+ MAXFD = os.sysconf('SC_OPEN_MAX')
+except (AttributeError, ValueError):
+ MAXFD = 256
+
+_active = []
+
+def _cleanup():
+ for inst in _active[:]:
+ inst.poll()
+
+class xPopen3:
+ """Class representing a child process. Normally instances are created
+ by the factory functions popen2() and popen3()."""
+
+ sts = -1 # Child not completed yet
+
+ def __init__(self, cmd, capturestderr=False, bufsize=-1, passfd=()):
+ """The parameter 'cmd' is the shell command to execute in a
+ sub-process. The 'capturestderr' flag, if true, specifies that
+ the object should capture standard error output of the child process.
+ The default is false. If the 'bufsize' parameter is specified, it
+ specifies the size of the I/O buffers to/from the child process."""
+ _cleanup()
+ self.passfd = passfd
+ p2cread, p2cwrite = os.pipe()
+ c2pread, c2pwrite = os.pipe()
+ if capturestderr:
+ errout, errin = os.pipe()
+ self.pid = os.fork()
+ if self.pid == 0:
+ # Child
+ os.dup2(p2cread, 0)
+ os.dup2(c2pwrite, 1)
+ if capturestderr:
+ os.dup2(errin, 2)
+ self._run_child(cmd)
+ os.close(p2cread)
+ self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
+ os.close(c2pwrite)
+ self.fromchild = os.fdopen(c2pread, 'r', bufsize)
+ if capturestderr:
+ os.close(errin)
+ self.childerr = os.fdopen(errout, 'r', bufsize)
+ else:
+ self.childerr = None
+ _active.append(self)
+
+ def _run_child(self, cmd):
+ if isinstance(cmd, basestring):
+ cmd = ['/bin/sh', '-c', cmd]
+ for i in range(3, MAXFD):
+ if i in self.passfd:
+ continue
+ try:
+ os.close(i)
+ except OSError:
+ pass
+ try:
+ os.execvp(cmd[0], cmd)
+ finally:
+ os._exit(127)
+
+ def poll(self):
+ """Return the exit status of the child process if it has finished,
+ or -1 if it hasn't finished yet."""
+ if self.sts < 0:
+ try:
+ pid, sts = os.waitpid(self.pid, os.WNOHANG)
+ if pid == self.pid:
+ self.sts = sts
+ _active.remove(self)
+ except os.error:
+ pass
+ return self.sts
+
+ def wait(self):
+ """Wait for and return the exit status of the child process."""
+ if self.sts < 0:
+ pid, sts = os.waitpid(self.pid, 0)
+ if pid == self.pid:
+ self.sts = sts
+ _active.remove(self)
+ return self.sts
+
+
+def xpopen2(cmd, bufsize=-1, mode='t', passfd=[]):
+ """Execute the shell command 'cmd' in a sub-process. If 'bufsize' is
+ specified, it sets the buffer size for the I/O pipes. The file objects
+ (child_stdout, child_stdin) are returned."""
+ inst = xPopen3(cmd, False, bufsize, passfd)
+ return inst.fromchild, inst.tochild
+
+def xpopen3(cmd, bufsize=-1, mode='t', passfd=[]):
+ """Execute the shell command 'cmd' in a sub-process. If 'bufsize' is
+ specified, it sets the buffer size for the I/O pipes. The file objects
+ (child_stdout, child_stdin, child_stderr) are returned."""
+ inst = xPopen3(cmd, True, bufsize, passfd)
+ return inst.fromchild, inst.tochild, inst.childerr
diff --git a/tools/python/xen/web/SrvBase.py b/tools/python/xen/web/SrvBase.py
new file mode 100644
index 0000000000..099eebc449
--- /dev/null
+++ b/tools/python/xen/web/SrvBase.py
@@ -0,0 +1,75 @@
+# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+
+import types
+
+
+from xen.xend import sxp
+from xen.xend import PrettyPrint
+from xen.xend.Args import ArgError
+from xen.xend.XendError import XendError
+from xen.xend.XendLogging import log
+
+import resource
+import http
+import httpserver
+
+def uri_pathlist(p):
+ """Split a path into a list.
+ p path
+ return list of path elements
+ """
+ l = []
+ for x in p.split('/'):
+ if x == '': continue
+ l.append(x)
+ return l
+
+class SrvBase(resource.Resource):
+ """Base class for services.
+ """
+
+
+ def use_sxp(self, req):
+ return req.useSxp()
+
+ def get_op_method(self, op):
+ """Get the method for an operation.
+ For operation 'foo' looks for 'op_foo'.
+
+ op operation name
+ returns method or None
+ """
+ op_method_name = 'op_' + op
+ return getattr(self, op_method_name, None)
+
+ def perform(self, req):
+ """General operation handler for posted operations.
+ For operation 'foo' looks for a method op_foo and calls
+ it with op_foo(op, req). Replies with code 500 if op_foo
+ is not found.
+
+ The method must return a list when req.use_sxp is true
+ and an HTML string otherwise (or list).
+ Methods may also return a ThreadRequest (for incomplete processing).
+
+ req request
+ """
+ op = req.args.get('op')
+ if op is None or len(op) != 1:
+ req.setResponseCode(http.NOT_ACCEPTABLE, "Invalid request")
+ return ''
+ op = op[0]
+ op_method = self.get_op_method(op)
+ if op_method is None:
+ req.setResponseCode(http.NOT_IMPLEMENTED, "Operation not implemented: " + op)
+ req.setHeader("Content-Type", "text/plain")
+ req.write("Operation not implemented: " + op)
+ return ''
+ else:
+ return op_method(op, req)
+
+ def print_path(self, req):
+ """Print the path with hyperlinks.
+ """
+ req.printPath()
+
diff --git a/tools/python/xen/xend/server/SrvDir.py b/tools/python/xen/web/SrvDir.py
index 712521c7b3..b168a8ef48 100644
--- a/tools/python/xen/xend/server/SrvDir.py
+++ b/tools/python/xen/web/SrvDir.py
@@ -1,28 +1,26 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-from twisted.protocols import http
-from twisted.web import error
+import types
from xen.xend import sxp
+from xen.xend import PrettyPrint
+from xen.xend.Args import ArgError
from xen.xend.XendError import XendError
+#from xen.xend.XendLogging import log
-from SrvBase import SrvBase
+import resource
+import http
-class SrvError(error.ErrorPage):
-
- def render(self, request):
- val = error.ErrorPage.render(self, request)
- request.setResponseCode(self.code, self.brief)
- return val
+from xen.web.SrvBase import SrvBase
class SrvConstructor:
"""Delayed constructor for sub-servers.
Does not import the sub-server class or create the object until needed.
"""
-
+
def __init__(self, klass):
"""Create a constructor. It is assumed that the class
- should be imported as 'import klass from klass'.
+ should be imported as 'from xen.xend.server.klass import klass'.
klass name of its class
"""
@@ -34,7 +32,7 @@ class SrvConstructor:
necessary.
"""
if not self.obj:
- exec 'from %s import %s' % (self.klass, self.klass)
+ exec 'from xen.xend.server.%s import %s' % (self.klass, self.klass)
klassobj = eval(self.klass)
self.obj = klassobj()
return self.obj
@@ -50,7 +48,7 @@ class SrvDir(SrvBase):
self.order = []
def noChild(self, msg):
- return SrvError(http.NOT_FOUND, msg, msg)
+ return resource.ErrorPage(http.NOT_FOUND, msg=msg)
def getChild(self, x, req):
if x == '': return self
@@ -59,36 +57,36 @@ class SrvDir(SrvBase):
except XendError, ex:
return self.noChild(str(ex))
if val is None:
- return self.noChild('Not found ' + str(x))
+ return self.noChild('Not found: ' + str(x))
else:
return val
def get(self, x):
val = self.table.get(x)
- if val is not None:
+ if isinstance(val, SrvConstructor):
val = val.getobj()
return val
- def add(self, x, xclass = None):
- if xclass is None:
- xclass = 'SrvDir'
- self.table[x] = SrvConstructor(xclass)
+ def add(self, x, v=None):
+ if v is None:
+ v = 'SrvDir'
+ if isinstance(v, types.StringType):
+ v = SrvConstructor(v)
+ self.table[x] = v
self.order.append(x)
+ return v
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-type", sxp.mime_type)
- self.ls(req, 1)
- else:
- req.write('<html><head></head><body>')
- self.print_path(req)
- self.ls(req)
- self.form(req)
- req.write('</body></html>')
- return ''
- except Exception, ex:
- self._perform_err(ex, "GET", req)
+ if self.use_sxp(req):
+ req.setHeader("Content-type", sxp.mime_type)
+ self.ls(req, 1)
+ else:
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ self.ls(req)
+ self.form(req)
+ req.write('</body></html>')
+ return ''
def ls(self, req, use_sxp=0):
url = req.prePathURL()
diff --git a/tools/python/xen/web/__init__.py b/tools/python/xen/web/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/tools/python/xen/web/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tools/python/xen/web/connection.py b/tools/python/xen/web/connection.py
new file mode 100644
index 0000000000..9e0f891ce1
--- /dev/null
+++ b/tools/python/xen/web/connection.py
@@ -0,0 +1,398 @@
+import sys
+import threading
+import select
+import socket
+
+from errno import EAGAIN, EINTR, EWOULDBLOCK
+
+"""General classes to support server and client sockets, without
+specifying what kind of socket they are. There are subclasses
+for TCP and unix-domain sockets (see tcp.py and unix.py).
+"""
+
+"""We make sockets non-blocking so that operations like accept()
+don't block. We also select on a timeout. Otherwise we have no way
+of getting the threads to shutdown.
+"""
+SELECT_TIMEOUT = 2.0
+
+class SocketServerConnection:
+ """An accepted connection to a server.
+ """
+
+ def __init__(self, sock, protocol, addr, server):
+ self.sock = sock
+ self.protocol = protocol
+ self.addr = addr
+ self.server = server
+ self.buffer_n = 1024
+ self.thread = None
+ self.connected = True
+ protocol.setTransport(self)
+ protocol.connectionMade(addr)
+
+ def run(self):
+ self.thread = threading.Thread(target=self.main)
+ #self.thread.setDaemon(True)
+ self.thread.start()
+
+ def main(self):
+ while True:
+ if not self.thread: break
+ if self.select(): break
+ if not self.thread: break
+ data = self.read()
+ if data is None: continue
+ if data is True: break
+ if self.dataReceived(data): break
+
+ def select(self):
+ try:
+ select.select([self.sock], [], [], SELECT_TIMEOUT)
+ return False
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return False
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def read(self):
+ try:
+ data = self.sock.recv(self.buffer_n)
+ if data == '':
+ self.loseConnection()
+ return True
+ return data
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return None
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def dataReceived(self, data):
+ if not self.connected:
+ return True
+ if not self.protocol:
+ return True
+ try:
+ self.protocol.dataReceived(data)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.loseConnection(ex)
+ return True
+ return False
+
+ def write(self, data):
+ self.sock.send(data)
+
+ def loseConnection(self, reason=None):
+ self.thread = None
+ self.closeSocket(reason)
+ self.closeProtocol(reason)
+
+ def closeSocket(self, reason):
+ try:
+ self.sock.close()
+ except SystemExit:
+ raise
+ except:
+ pass
+
+ def closeProtocol(self, reason):
+ try:
+ if self.connected:
+ self.connected = False
+ if self.protocol:
+ self.protocol.connectionLost(reason)
+ except SystemExit:
+ raise
+ except:
+ pass
+
+ def getHost(self):
+ return self.sock.getsockname()
+
+ def getPeer(self):
+ return self.addr
+
+class SocketListener:
+ """A server socket, running listen in a thread.
+ Accepts connections and runs a thread for each one.
+ """
+
+ def __init__(self, factory, backlog=None):
+ if backlog is None:
+ backlog = 5
+ self.factory = factory
+ self.sock = None
+ self.backlog = backlog
+ self.thread = None
+
+ def createSocket(self):
+ raise NotImplementedError()
+
+ def acceptConnection(self, sock, protocol, addr):
+ return SocketServerConnection(sock, protocol, addr, self)
+
+ def startListening(self):
+ if self.sock or self.thread:
+ raise IOError("already listening")
+ self.sock = self.createSocket()
+ self.sock.setblocking(0)
+ self.sock.listen(self.backlog)
+ self.run()
+
+ def stopListening(self, reason=None):
+ self.loseConnection(reason)
+
+ def run(self):
+ self.factory.doStart()
+ self.thread = threading.Thread(target=self.main)
+ #self.thread.setDaemon(True)
+ self.thread.start()
+
+ def main(self):
+ while True:
+ if not self.thread: break
+ if self.select(): break
+ if self.accept(): break
+
+ def select(self):
+ try:
+ select.select([self.sock], [], [], SELECT_TIMEOUT)
+ return False
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return False
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def accept(self):
+ try:
+ (sock, addr) = self.sock.accept()
+ sock.setblocking(0)
+ return self.accepted(sock, addr)
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return False
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def accepted(self, sock, addr):
+ protocol = self.factory.buildProtocol(addr)
+ if protocol is None:
+ self.loseConnection()
+ return True
+ connection = self.acceptConnection(sock, protocol, addr)
+ connection.run()
+ return False
+
+ def loseConnection(self, reason=None):
+ self.thread = None
+ self.closeSocket(reason)
+ self.closeFactory(reason)
+
+ def closeSocket(self, reason):
+ try:
+ self.sock.close()
+ except SystemExit:
+ raise
+ except Exception, ex:
+ pass
+
+ def closeFactory(self, reason):
+ try:
+ self.factory.doStop()
+ except SystemExit:
+ raise
+ except:
+ pass
+
+class SocketClientConnection:
+ """A connection to a server from a client.
+
+ Call connectionMade() on the protocol in a thread when connected.
+ It is completely up to the protocol what to do.
+ """
+
+ def __init__(self, connector):
+ self.addr = None
+ self.connector = connector
+ self.buffer_n = 1024
+ self.connected = False
+
+ def createSocket (self):
+ raise NotImplementedError()
+
+ def write(self, data):
+ if self.sock:
+ return self.sock.send(data)
+ else:
+ return 0
+
+ def connect(self, timeout):
+ #todo: run a timer to cancel on timeout?
+ try:
+ sock = self.createSocket()
+ sock.connect(self.addr)
+ self.sock = sock
+ self.connected = True
+ self.protocol = self.connector.buildProtocol(self.addr)
+ self.protocol.setTransport(self)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.connector.connectionFailed(ex)
+ return False
+
+ self.thread = threading.Thread(target=self.main)
+ #self.thread.setDaemon(True)
+ self.thread.start()
+ return True
+
+ def main(self):
+ try:
+ # Call the protocol in a thread.
+ # Up to it what to do.
+ self.protocol.connectionMade(self.addr)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.loseConnection(ex)
+
+ def mainLoop(self):
+ # Something a protocol could call.
+ while True:
+ if not self.thread: break
+ if self.select(): break
+ if not self.thread: break
+ data = self.read()
+ if data is None: continue
+ if data is True: break
+ if self.dataReceived(data): break
+
+ def select(self):
+ try:
+ select.select([self.sock], [], [], SELECT_TIMEOUT)
+ return False
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return False
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def read(self):
+ try:
+ data = self.sock.recv(self.buffer_n)
+ return data
+ except socket.error, ex:
+ if ex.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ return None
+ else:
+ self.loseConnection(ex)
+ return True
+
+ def dataReceived(self, data):
+ if not self.protocol:
+ return True
+ try:
+ self.protocol.dataReceived(data)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.loseConnection(ex)
+ return True
+ return False
+
+ def loseConnection(self, reason=None):
+ self.thread = None
+ self.closeSocket(reason)
+ self.closeProtocol(reason)
+ self.closeConnector(reason)
+
+ def closeSocket(self, reason):
+ try:
+ if self.sock:
+ self.sock.close()
+ except SystemExit:
+ raise
+ except:
+ pass
+
+ def closeProtocol(self, reason):
+ try:
+ if self.connected:
+ self.connected = False
+ if self.protocol:
+ self.protocol.connectionLost(reason)
+ except SystemExit:
+ raise
+ except:
+ pass
+ self.protocol = None
+
+ def closeConnector(self, reason):
+ try:
+ self.connector.connectionLost(reason)
+ except SystemExit:
+ raise
+ except:
+ pass
+
+class SocketConnector:
+ """A client socket. Connects to a server and runs the client protocol
+ in a thread.
+ """
+
+ def __init__(self, factory):
+ self.factoryStarted = False
+ self.clientLost = False
+ self.clientFailed = False
+ self.factory = factory
+ self.state = "disconnected"
+ self.transport = None
+
+ def getDestination(self):
+ raise NotImplementedError()
+
+ def connectTransport(self):
+ raise NotImplementedError()
+
+ def connect(self):
+ if self.state != "disconnected":
+ raise socket.error(EINVAL, "cannot connect in state " + self.state)
+ self.state = "connecting"
+ self.clientLost = False
+ self.clientFailed = False
+ if not self.factoryStarted:
+ self.factoryStarted = True
+ self.factory.doStart()
+ self.factory.startedConnecting(self)
+ self.connectTransport()
+ self.state = "connected"
+
+ def stopConnecting(self):
+ if self.state != "connecting":
+ return
+ self.state = "disconnected"
+ self.transport.disconnect()
+
+ def buildProtocol(self, addr):
+ return self.factory.buildProtocol(addr)
+
+ def connectionLost(self, reason=None):
+ if not self.clientLost:
+ self.clientLost = True
+ self.factory.clientConnectionLost(self, reason)
+
+ def connectionFailed(self, reason=None):
+ if not self.clientFailed:
+ self.clientFailed = True
+ self.factory.clientConnectionFailed(self, reason)
+
diff --git a/tools/python/xen/web/http.py b/tools/python/xen/web/http.py
new file mode 100644
index 0000000000..36c7ecfec1
--- /dev/null
+++ b/tools/python/xen/web/http.py
@@ -0,0 +1,514 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+#============================================================================
+# Parts of this library are derived from Twisted:
+# Copyright (C) 2001 Matthew W. Lefkowitz
+#
+# Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
+#============================================================================
+
+from mimetools import Message
+from cStringIO import StringIO
+import math
+import time
+import cgi
+
+CONTINUE = 100
+SWITCHING_PROTOCOLS = 101
+
+OK = 200
+CREATED = 201
+ACCEPTED = 202
+NON_AUTHORITATIVE_INFORMATION = 203
+NO_CONTENT = 204
+RESET_CONTENT = 205
+PARTIAL_CONTENT = 206
+MULTI_STATUS = 207
+
+MULTIPLE_CHOICE = 300
+MOVED_PERMANENTLY = 301
+FOUND = 302
+SEE_OTHER = 303
+NOT_MODIFIED = 304
+USE_PROXY = 305
+TEMPORARY_REDIRECT = 307
+
+BAD_REQUEST = 400
+UNAUTHORIZED = 401
+PAYMENT_REQUIRED = 402
+FORBIDDEN = 403
+NOT_FOUND = 404
+NOT_ALLOWED = 405
+NOT_ACCEPTABLE = 406
+PROXY_AUTH_REQUIRED = 407
+REQUEST_TIMEOUT = 408
+CONFLICT = 409
+GONE = 410
+LENGTH_REQUIRED = 411
+PRECONDITION_FAILED = 412
+REQUEST_ENTITY_TOO_LARGE = 413
+REQUEST_URI_TOO_LONG = 414
+UNSUPPORTED_MEDIA_TYPE = 415
+REQUESTED_RANGE_NOT_SATISFIABLE = 416
+EXPECTATION_FAILED = 417
+
+INTERNAL_SERVER_ERROR = 500
+NOT_IMPLEMENTED = 501
+BAD_GATEWAY = 502
+SERVICE_UNAVAILABLE = 503
+GATEWAY_TIMEOUT = 504
+VERSION_NOT_SUPPORTED = 505
+INSUFFICIENT_STORAGE_SPACE = 507
+NOT_EXTENDED = 510
+
+NO_BODY_CODES = [ NO_CONTENT, NOT_MODIFIED ]
+
+
+STATUS = {
+ CONTINUE : "Continue",
+ SWITCHING_PROTOCOLS : "Switching protocols",
+
+ OK : "OK",
+ CREATED : "Created",
+ ACCEPTED : "Accepted",
+ NON_AUTHORITATIVE_INFORMATION : "Non-authoritative information",
+ NO_CONTENT : "No content",
+ RESET_CONTENT : "Reset content",
+ PARTIAL_CONTENT : "Partial content",
+ MULTI_STATUS : "Multi-status",
+
+ MULTIPLE_CHOICE : "Multiple choice",
+ MOVED_PERMANENTLY : "Moved permanently",
+ FOUND : "Found",
+ SEE_OTHER : "See other",
+ NOT_MODIFIED : "Not modified",
+ USE_PROXY : "Use proxy",
+ TEMPORARY_REDIRECT : "Temporary redirect",
+
+ BAD_REQUEST : "Bad request",
+ UNAUTHORIZED : "Unauthorized",
+ PAYMENT_REQUIRED : "Payment required",
+ FORBIDDEN : "Forbidden",
+ NOT_FOUND : "Not found",
+ NOT_ALLOWED : "Not allowed",
+ NOT_ACCEPTABLE : "Not acceptable",
+ PROXY_AUTH_REQUIRED : "Proxy authentication required",
+ REQUEST_TIMEOUT : "Request timeout",
+ CONFLICT : "Conflict",
+ GONE : "Gone",
+ LENGTH_REQUIRED : "Length required",
+ PRECONDITION_FAILED : "Precondition failed",
+ REQUEST_ENTITY_TOO_LARGE : "Request entity too large",
+ REQUEST_URI_TOO_LONG : "Request URI too long",
+ UNSUPPORTED_MEDIA_TYPE : "Unsupported media type",
+ REQUESTED_RANGE_NOT_SATISFIABLE : "Requested range not satisfiable",
+ EXPECTATION_FAILED : "Expectation failed",
+
+ INTERNAL_SERVER_ERROR : "Internal server error",
+ NOT_IMPLEMENTED : "Not implemented",
+ BAD_GATEWAY : "Bad gateway",
+ SERVICE_UNAVAILABLE : "Service unavailable",
+ GATEWAY_TIMEOUT : "Gateway timeout",
+ VERSION_NOT_SUPPORTED : "HTTP version not supported",
+ INSUFFICIENT_STORAGE_SPACE : "Insufficient storage space",
+ NOT_EXTENDED : "Not extended",
+ }
+
+def getStatus(code):
+ return STATUS.get(code, "unknown")
+
+MULTIPART_FORM_DATA = 'multipart/form-data'
+URLENCODED = 'application/x-www-form-urlencoded'
+
+parseQueryArgs = cgi.parse_qs
+
+def timegm(year, month, day, hour, minute, second):
+ """Convert time tuple in GMT to seconds since epoch, GMT"""
+ EPOCH = 1970
+ assert year >= EPOCH
+ assert 1 <= month <= 12
+ days = 365*(year-EPOCH) + calendar.leapdays(EPOCH, year)
+ for i in range(1, month):
+ days = days + calendar.mdays[i]
+ if month > 2 and calendar.isleap(year):
+ days = days + 1
+ days = days + day - 1
+ hours = days*24 + hour
+ minutes = hours*60 + minute
+ seconds = minutes*60 + second
+ return seconds
+
+def stringToDatetime(dateString):
+ """Convert an HTTP date string to seconds since epoch."""
+ parts = dateString.split(' ')
+ day = int(parts[1])
+ month = int(monthname.index(parts[2]))
+ year = int(parts[3])
+ hour, min, sec = map(int, parts[4].split(':'))
+ return int(timegm(year, month, day, hour, min, sec))
+
+class HttpRequest:
+
+ http_version = (1, 1)
+
+ http_version_string = ("HTTP/%d.%d" % http_version)
+
+ max_content_length = 10000
+ max_headers = 500
+
+ request_line = None
+ request_method = None
+ request_uri = None
+ request_path = None
+ request_query = None
+ request_version = None
+ content_length = 0
+ content = None
+ etag = None
+ close_connection = True
+ response_code = 200
+ response_status = "OK"
+ response_sent = False
+ cached = False
+ last_modified = None
+
+ forceSSL = False
+
+ def __init__(self, host, rin, out):
+ self.host = host
+ self.rin = rin
+ self.out = out
+ self.request_args = {}
+ self.args = self.request_args
+ self.request_headers = {}
+ self.request_cookies = {}
+ self.response_headers = {}
+ self.response_cookies = {}
+ self.output = StringIO()
+ self.parseRequest()
+
+ def isSecure(self):
+ return self.forceSSL
+
+ def getRequestMethod(self):
+ return self.request_method
+
+ def trim(self, str, ends):
+ for end in ends:
+ if str.endswith(end):
+ str = str[ : -len(end) ]
+ break
+ return str
+
+ def requestError(self, code, msg=None):
+ self.sendError(code, msg)
+ raise ValueError(self.response_status)
+
+ def sendError(self, code, msg=None):
+ self.setResponseCode(code, msg=msg)
+ self.sendResponse()
+
+ def parseRequestVersion(self, version):
+ try:
+ if not version.startswith('HTTP/'):
+ raise ValueError
+ version_string = version.split('/', 1)[1]
+ version_codes = version_string.split('.')
+ if len(version_codes) != 2:
+ raise ValueError
+ request_version = (int(version_codes[0]), int(version_codes[1]))
+ except (ValueError, IndexError):
+ self.requestError(400, "Bad request version (%s)" % `version`)
+
+ def parseRequestLine(self):
+ line = self.trim(self.request_line, ['\r\n', '\n'])
+ line_fields = line.split()
+ n = len(line_fields)
+ if n == 3:
+ [method, uri, version] = line_fields
+ elif n == 2:
+ [method, uri] = line_fields
+ version = 'HTTP/0.9'
+ else:
+ self.requestError(BAD_REQUEST,
+ "Bad request (%s)" % `line`)
+
+ request_version = self.parseRequestVersion(version)
+
+ if request_version > (2, 0):
+ self.requestError(VERSION_NOT_SUPPORTED,
+ "HTTP version not supported (%s)" % `version`)
+ #if request_version >= (1, 1) and self.http_version >= (1, 1):
+ # self.close_connection = False
+ #else:
+ # self.close_connection = True
+
+ self.request_method = method
+ self.method = method
+ self.request_uri = uri
+ self.request_version = version
+
+ uri_query = uri.split('?')
+ if len(uri_query) == 1:
+ self.request_path = uri
+ else:
+ self.request_path = uri_query[0]
+ self.request_query = uri_query[1]
+ self.request_args = parseQueryArgs(self.request_query)
+ self.args = self.request_args
+
+
+ def parseRequestHeaders(self):
+ header_bytes = ""
+ header_count = 0
+ while True:
+ if header_count >= self.max_headers:
+ self.requestError(BAD_REQUEST,
+ "Bad request (too many headers)")
+ line = self.rin.readline()
+ header_bytes += line
+ header_count += 1
+ if line == '\r\n' or line == '\n' or line == '':
+ break
+ header_input = StringIO(header_bytes)
+ self.request_headers = Message(header_input)
+
+ def parseRequestCookies(self):
+ cookie_hdr = self.getHeader("cookie")
+ if not cookie_hdr: return
+ for cookie in cookie_hdr.split(';'):
+ try:
+ cookie = cookie.lstrip()
+ (k, v) = cookie.split('=', 1)
+ self.request_cookies[k] = v
+ except ValueError:
+ pass
+
+ def parseRequestArgs(self):
+ if ((self.content is None) or
+ (self.request_method != "POST")):
+ return
+ content_type = self.getHeader('content-type')
+ if not content_type:
+ return
+ (encoding, params) = cgi.parse_header(content_type)
+ if encoding == URLENCODED:
+ xargs = cgi.parse_qs(self.content.getvalue(),
+ keep_blank_values=True)
+ elif encoding == MULTIPART_FORM_DATA:
+ xargs = cgi.parse_multipart(self.content, params)
+ else:
+ xargs = {}
+ self.request_args.update(xargs)
+
+ def getCookie(self, k):
+ return self.request_cookies[k]
+
+ def readContent(self):
+ try:
+ self.content_length = int(self.getHeader("Content-Length"))
+ except:
+ return
+ if self.content_length > self.max_content_length:
+ self.requestError(REQUEST_ENTITY_TOO_LARGE)
+ self.content = self.rin.read(self.content_length)
+ self.content = StringIO(self.content)
+ self.content.seek(0,0)
+
+ def parseRequest(self):
+ self.request_line = self.rin.readline()
+ self.parseRequestLine()
+ self.parseRequestHeaders()
+ self.parseRequestCookies()
+ connection_mode = self.getHeader('Connection')
+ self.setCloseConnection(connection_mode)
+ self.readContent()
+ self.parseRequestArgs()
+
+ def setCloseConnection(self, mode):
+ if not mode: return
+ mode = mode.lower()
+ if mode == 'close':
+ self.close_connection = True
+ elif (mode == 'keep-alive') and (self.http_version >= (1, 1)):
+ self.close_connection = False
+
+ def getCloseConnection(self):
+ return self.close_connection
+
+ def getHeader(self, k, v=None):
+ return self.request_headers.get(k, v)
+
+ def getRequestMethod(self):
+ return self.request_method
+
+ def getRequestPath(self):
+ return self.request_path
+
+ def setResponseCode(self, code, status=None, msg=None):
+ self.response_code = code
+ if not status:
+ status = getStatus(code)
+ self.response_status = status
+
+ def setResponseHeader(self, k, v):
+ k = k.lower()
+ self.response_headers[k] = v
+ if k == 'connection':
+ self.setCloseConnection(v)
+
+ setHeader = setResponseHeader
+
+ def setLastModified(self, when):
+ # time.time() may be a float, but the HTTP-date strings are
+ # only good for whole seconds.
+ when = long(math.ceil(when))
+ if (not self.last_modified) or (self.last_modified < when):
+ self.lastModified = when
+
+ modified_since = self.getHeader('if-modified-since')
+ if modified_since:
+ modified_since = stringToDatetime(modified_since)
+ if modified_since >= when:
+ self.setResponseCode(NOT_MODIFIED)
+ self.cached = True
+
+ def setContentType(self, ty):
+ self.setResponseHeader("Content-Type", ty)
+
+ def setEtag(self, etag):
+ if etag:
+ self.etag = etag
+
+ tags = self.getHeader("if-none-match")
+ if tags:
+ tags = tags.split()
+ if (etag in tags) or ('*' in tags):
+ if self.request_method in ("HEAD", "GET"):
+ code = NOT_MODIFIED
+ else:
+ code = PRECONDITION_FAILED
+ self.setResponseCode(code)
+ self.cached = True
+
+ def addCookie(self, k, v, expires=None, domain=None, path=None,
+ max_age=None, comment=None, secure=None):
+ cookie = v
+ if expires != None:
+ cookie += "; Expires=%s" % expires
+ if domain != None:
+ cookie += "; Domain=%s" % domain
+ if path != None:
+ cookie += "; Path=%s" % path
+ if max_age != None:
+ cookie += "; Max-Age=%s" % max_age
+ if comment != None:
+ cookie += "; Comment=%s" % comment
+ if secure:
+ cookie += "; Secure"
+ self.response_cookies[k] = cookie
+
+ def sendResponseHeaders(self):
+ if self.etag:
+ self.setResponseHeader("ETag", self.etag)
+ for (k, v) in self.response_headers.items():
+ self.send("%s: %s\r\n" % (k.capitalize(), v))
+ for (k, v) in self.response_cookies.items():
+ self.send("Set-Cookie: %s=%s\r\n" % (k, v))
+ self.send("\r\n")
+
+ def sendResponse(self):
+ if self.response_sent:
+ return
+ self.response_sent = True
+ send_body = self.hasBody()
+ if not self.close_connection:
+ self.setResponseHeader("Connection", "keep-alive")
+ if send_body:
+ self.output.seek(0, 0)
+ body = self.output.getvalue()
+ body_length = len(body)
+ self.setResponseHeader("Content-Length", body_length)
+ if self.http_version > (0, 9):
+ self.send("%s %d %s\r\n" % (self.http_version_string,
+ self.response_code,
+ self.response_status))
+ self.sendResponseHeaders()
+ if send_body:
+ self.send(body)
+ self.flush()
+
+ def write(self, data):
+ self.output.write(data)
+
+ def send(self, data):
+ #print 'send>', data
+ self.out.write(data)
+
+ def flush(self):
+ self.out.flush()
+
+ def hasNoBody(self):
+ return ((self.request_method == "HEAD") or
+ (self.response_code in NO_BODY_CODES) or
+ self.cached)
+
+ def hasBody(self):
+ return not self.hasNoBody()
+
+ def process(self):
+ pass
+ return self.close_connection
+
+ def getRequestHostname(self):
+ """Get the hostname that the user passed in to the request.
+
+ Uses the 'Host:' header if it is available, and the
+ host we are listening on otherwise.
+ """
+ return (self.getHeader('host') or
+ socket.gethostbyaddr(self.getHostAddr())[0]
+ ).split(':')[0]
+
+ def getHost(self):
+ return self.host
+
+ def getHostAddr(self):
+ return self.host[0]
+
+ def getPort(self):
+ return self.host[1]
+
+ def setHost(self, host, port, ssl=0):
+ """Change the host and port the request thinks it's using.
+
+ This method is useful for working with reverse HTTP proxies (e.g.
+ both Squid and Apache's mod_proxy can do this), when the address
+ the HTTP client is using is different than the one we're listening on.
+
+ For example, Apache may be listening on https://www.example.com, and then
+ forwarding requests to http://localhost:8080, but we don't want HTML produced
+ to say 'http://localhost:8080', they should say 'https://www.example.com',
+ so we do::
+
+ request.setHost('www.example.com', 443, ssl=1)
+
+ """
+ self.forceSSL = ssl
+ self.received_headers["host"] = host
+ self.host = (host, port)
+
+
+
diff --git a/tools/python/xen/web/httpserver.py b/tools/python/xen/web/httpserver.py
new file mode 100644
index 0000000000..265a75c32d
--- /dev/null
+++ b/tools/python/xen/web/httpserver.py
@@ -0,0 +1,342 @@
+import threading
+
+import string
+import socket
+import types
+from urllib import quote, unquote
+import os
+import os.path
+
+from xen.xend import sxp
+from xen.xend.Args import ArgError
+from xen.xend.XendError import XendError
+
+import http
+from resource import Resource, ErrorPage
+from SrvDir import SrvDir
+
+class ThreadRequest:
+ """A request to complete processing using a thread.
+ """
+
+ def __init__(self, processor, req, fn, args, kwds):
+ self.processor = processor
+ self.req = req
+ self.fn = fn
+ self.args = args
+ self.kwds = kwds
+
+ def run(self):
+ self.processor.setInThread()
+ thread = threading.Thread(target=self.main)
+ thread.setDaemon(True)
+ thread.start()
+
+ def call(self):
+ try:
+ self.fn(*self.args, **self.kwds)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.req.resultErr(ex)
+ self.req.finish()
+
+ def main(self):
+ self.call()
+ self.processor.process()
+
+
+class RequestProcessor:
+ """Processor for requests on a connection to an http server.
+ Requests are executed synchonously unless they ask for a thread by returning
+ a ThreadRequest.
+ """
+
+ done = False
+
+ inThread = False
+
+ def __init__(self, server, sock, addr):
+ self.server = server
+ self.sock = sock
+ self.srd = sock.makefile('rb')
+ self.srw = sock.makefile('wb')
+ self.srvaddr = server.getServerAddr()
+
+ def isInThread(self):
+ return self.inThread
+
+ def setInThread(self):
+ self.inThread = True
+
+ def getServer(self):
+ return self.server
+
+ def getRequest(self):
+ return HttpServerRequest(self, self.srvaddr, self.srd, self.srw)
+
+ def close(self):
+ try:
+ self.sock.close()
+ except:
+ pass
+
+ def finish(self):
+ self.done = True
+ self.close()
+
+ def process(self):
+ while not self.done:
+ req = self.getRequest()
+ res = req.process()
+ if isinstance(res, ThreadRequest):
+ if self.isInThread():
+ res.call()
+ else:
+ res.run()
+ break
+ else:
+ req.finish()
+
+class HttpServerRequest(http.HttpRequest):
+ """A single request to an http server.
+ """
+
+ def __init__(self, processor, addr, srd, srw):
+ self.processor = processor
+ self.prepath = ''
+ http.HttpRequest.__init__(self, addr, srd, srw)
+
+ def getServer(self):
+ return self.processor.getServer()
+
+ def process(self):
+ """Process the request. If the return value is a ThreadRequest
+ it is evaluated in a thread.
+ """
+ try:
+ self.prepath = []
+ self.postpath = map(unquote, string.split(self.request_path[1:], '/'))
+ resource = self.getResource()
+ return self.render(resource)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.processError(ex)
+
+ def processError(self, ex):
+ import traceback; traceback.print_exc()
+ self.sendError(http.INTERNAL_SERVER_ERROR, msg=str(ex))
+ self.setCloseConnection('close')
+
+ def finish(self):
+ self.sendResponse()
+ if self.close_connection:
+ self.processor.finish()
+
+ def prePathURL(self):
+ url_host = self.getRequestHostname()
+ port = self.getPort()
+ if self.isSecure():
+ url_proto = "https"
+ default_port = 443
+ else:
+ url_proto = "http"
+ default_port = 80
+ if port != default_port:
+ url_host += (':%d' % port)
+ url_path = quote(string.join(self.prepath, '/'))
+ return ('%s://%s/%s' % (url_proto, url_host, url_path))
+
+ def getResource(self):
+ return self.getServer().getResource(self)
+
+ def render(self, resource):
+ val = None
+ if resource is None:
+ self.sendError(http.NOT_FOUND)
+ else:
+ try:
+ while True:
+ val = resource.render(self)
+ if not isinstance(val, Resource):
+ break
+ val = self.result(val)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ self.resultErr(ex)
+ return val
+
+ def threadRequest(self, _fn, *_args, **_kwds):
+ """Create a request to finish request processing in a thread.
+ Use this to create a ThreadRequest to return from rendering a
+ resource if you need a thread to complete processing.
+ """
+ return ThreadRequest(self.processor, self, _fn, _args, _kwds)
+
+ def result(self, val):
+ if isinstance(val, Exception):
+ return self.resultErr(val)
+ else:
+ return self.resultVal(val)
+
+ def resultVal(self, val):
+ """Callback to complete the request.
+
+ @param val: the value
+ """
+ if val is None:
+ return val
+ elif isinstance(val, ThreadRequest):
+ return val
+ elif self.useSxp():
+ self.setHeader("Content-Type", sxp.mime_type)
+ sxp.show(val, out=self)
+ else:
+ self.write('<html><head></head><body>')
+ self.printPath()
+ if isinstance(val, types.ListType):
+ self.write('<code><pre>')
+ PrettyPrint.prettyprint(val, out=self)
+ self.write('</pre></code>')
+ else:
+ self.write(str(val))
+ self.write('</body></html>')
+ return None
+
+ def resultErr(self, err):
+ """Error callback to complete a request.
+
+ @param err: the error
+ """
+ if not isinstance(err, (ArgError, sxp.ParseError, XendError)):
+ raise
+ #log.exception("op=%s: %s", op, str(err))
+ if self.useSxp():
+ self.setHeader("Content-Type", sxp.mime_type)
+ sxp.show(['xend.err', str(err)], out=self)
+ else:
+ self.setHeader("Content-Type", "text/plain")
+ self.write('Error ')
+ self.write(': ')
+ self.write(str(err))
+ return None
+
+ def useSxp(self):
+ """Determine whether to send an SXP response to a request.
+ Uses SXP if there is no User-Agent, no Accept, or application/sxp is in Accept.
+
+ returns 1 for SXP, 0 otherwise
+ """
+ ok = 0
+ user_agent = self.getHeader('User-Agent')
+ accept = self.getHeader('Accept')
+ if (not user_agent) or (not accept) or (accept.find(sxp.mime_type) >= 0):
+ ok = 1
+ return ok
+
+ def printPath(self):
+ pathlist = [x for x in self.prepath if x != '' ]
+ s = "/"
+ self.write('<h1><a href="/">/</a>')
+ for x in pathlist:
+ s += x + "/"
+ self.write(' <a href="%s">%s</a>/' % (s, x))
+ self.write("</h1>")
+
+class HttpServer:
+
+ backlog = 5
+
+ closed = False
+
+ def __init__(self, interface='', port=8080, root=None):
+ if root is None:
+ root = SrvDir()
+ self.interface = interface
+ self.port = port
+ self.root = root
+
+ def getRoot(self):
+ return self.root
+
+ def getPort(self):
+ return self.port
+
+ def run(self):
+ self.bind()
+ self.listen()
+ self.requestLoop()
+
+ def stop(self):
+ self.close()
+
+ def bind(self):
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ self.socket.bind((self.interface, self.port))
+
+ def listen(self):
+ self.socket.listen(self.backlog)
+
+ def accept(self):
+ return self.socket.accept()
+
+ def requestLoop(self):
+ while not self.closed:
+ self.acceptRequest()
+
+ def close(self):
+ self.closed = True
+ try:
+ self.socket.close()
+ except:
+ pass
+
+ def acceptRequest(self):
+ try:
+ (sock, addr) = self.accept()
+ self.processRequest(sock, addr)
+ except socket.error:
+ return
+
+ def processRequest(self, sock, addr):
+ try:
+ rp = RequestProcessor(self, sock, addr)
+ rp.process()
+ except SystemExit:
+ raise
+ except Exception, ex:
+ print 'HttpServer>processRequest> exception: ', ex
+ try:
+ sock.close()
+ except:
+ pass
+
+ def getServerAddr(self):
+ return (socket.gethostname(), self.port)
+
+ def getResource(self, req):
+ return self.root.getRequestResource(req)
+
+class UnixHttpServer(HttpServer):
+
+ def __init__(self, path=None, root=None):
+ HttpServer.__init__(self, interface='localhost', root=root)
+ self.path = path
+
+ def bind(self):
+ pathdir = os.path.dirname(self.path)
+ if not os.path.exists(pathdir):
+ os.makedirs(pathdir)
+ else:
+ try:
+ os.unlink(self.path)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ pass
+ self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ #self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ self.socket.bind(self.path)
diff --git a/tools/python/xen/web/protocol.py b/tools/python/xen/web/protocol.py
new file mode 100644
index 0000000000..5f2d26cabb
--- /dev/null
+++ b/tools/python/xen/web/protocol.py
@@ -0,0 +1,126 @@
+class Factory:
+ """Generic protocol factory.
+ """
+
+ starts = 0
+
+ def __init__(self):
+ pass
+
+ def doStart(self):
+ if self.starts == 0:
+ self.startFactory()
+ self.starts += 1
+
+ def doStop(self):
+ if self.starts > 0:
+ self.starts -= 1
+ else:
+ return
+ if self.starts == 0:
+ self.stopFactory()
+
+ def buildProtocol(self, addr):
+ return Protocol(self)
+
+ def startFactory(self):
+ pass
+
+ def stopFactory(self):
+ pass
+
+class ServerFactory(Factory):
+ """Factory for server protocols.
+ """
+ pass
+
+class ClientFactory(Factory):
+ """Factory for client protocols.
+ """
+
+ def startedConnecting(self, connector):
+ pass
+
+ def clientConnectionLost(self, connector, reason):
+ pass
+
+ def clientConnectionFailed(self, connector, reason):
+ pass
+
+
+class Protocol:
+
+ factory = None
+ transport = None
+ connected = False
+
+ def __init__(self, factory):
+ self.factory = factory
+
+ def setTransport(self, transport):
+ self.transport = transport
+ self.connected = bool(transport)
+
+ def getTransport(self):
+ return self.transport
+
+ def connectionMade(self, addr):
+ print 'Protocol>connectionMade>', addr
+ pass
+
+ def connectionLost(self, reason=None):
+ print 'Protocol>connectionLost>', reason
+ pass
+
+ def dataReceived(self, data):
+ print 'Protocol>dataReceived>'
+ pass
+
+ def write(self, data):
+ if self.transport:
+ return self.transport.write(data)
+ else:
+ return 0
+
+ def read(self):
+ if self.transport:
+ return self.transport.read()
+ else:
+ return None
+
+class TestClientFactory(ClientFactory):
+
+ def buildProtocol(self, addr):
+ print 'TestClientFactory>buildProtocol>', addr
+ return TestClientProtocol(self)
+
+ def startedConnecting(self, connector):
+ print 'TestClientFactory>startedConnecting>', connector
+
+ def clientConnectionLost(self, connector, reason):
+ print 'TestClientFactory>clientConnectionLost>', connector, reason
+
+ def clientConnectionFailed(self, connector, reason):
+ print 'TestClientFactory>clientConnectionFailed>', connector, reason
+
+class TestClientProtocol(Protocol):
+
+ def connectionMade(self, addr):
+ print 'TestClientProtocol>connectionMade>', addr
+ self.write("hello")
+ self.write("there")
+
+class TestServerFactory(Factory):
+
+ def buildProtocol(self, addr):
+ print 'TestServerFactory>buildProtocol>', addr
+ return TestServerProtocol(self)
+
+class TestServerProtocol(Protocol):
+
+ def dataReceived(self, data):
+ print 'TestServerProtocol>dataReceived>', len(data), data
+ #sys.exit(0)
+ import os
+ os._exit(0)
+
diff --git a/tools/python/xen/web/reactor.py b/tools/python/xen/web/reactor.py
new file mode 100644
index 0000000000..1ebb5c84a0
--- /dev/null
+++ b/tools/python/xen/web/reactor.py
@@ -0,0 +1,2 @@
+from unix import listenUNIX, connectUNIX
+from tcp import listenTCP, connectTCP
diff --git a/tools/python/xen/web/resource.py b/tools/python/xen/web/resource.py
new file mode 100644
index 0000000000..3b5e745671
--- /dev/null
+++ b/tools/python/xen/web/resource.py
@@ -0,0 +1,91 @@
+import http
+
+def findResource(resource, request):
+ """Traverse resource tree to find who will handle the request."""
+ while request.postpath and not resource.isLeaf:
+ #print 'findResource:', resource, request.postpath
+ pathElement = request.postpath.pop(0)
+ request.prepath.append(pathElement)
+ next = resource.getPathResource(pathElement, request)
+ if not next: break
+ resource = next
+ return resource
+
+class Resource:
+
+ isLeaf = False
+
+ def __init__(self):
+ self.children = {}
+
+ def getRequestResource(self, req):
+ return findResource(self, req)
+
+ def getChild(self, path, request):
+ return None
+
+ def getPathResource(self, path, request):
+ #print 'getPathResource>', self, path
+ if self.children.has_key(path):
+ val = self.children[path]
+ else:
+ val = self.getChild(path, request)
+ #print 'getPathResource<', val
+ return val
+
+ def putChild(self, path, child):
+ self.children[path] = child
+ #child.server = self.server
+
+ def render(self, req):
+ meth = getattr(self, 'render_' + req.getRequestMethod(), self.unsupported)
+ return meth(req)
+
+ def supportedMethods(self):
+ l = []
+ s = 'render_'
+ for x in dir(self):
+ if x.startswith(s):
+ l.append(x[len(s):])
+ return l
+
+ def render_HEAD(self, req):
+ return self.render_GET(req)
+
+ def render_GET(self, req):
+ req.setContentType("text/plain")
+ req.write("GET")
+
+ def render_POST(self, req):
+ req.setContentType("text/plain")
+ req.write("POST")
+
+ def unsupported(self, req):
+ req.setHeader("Accept", ",".join(self.supportedMethods()))
+ req.setResponseCode(http.NOT_IMPLEMENTED)
+ req.setContentType("text/plain")
+ req.write("Request method not supported (%s)" % req.getRequestMethod())
+
+class ErrorPage(Resource):
+
+ isLeaf = True
+
+ def __init__(self, code, status=None, msg=None):
+ Resource.__init__(self)
+ if status is None:
+ status = http.getStatus(code)
+ if msg is None:
+ msg = status
+ self.code = code
+ self.status = status
+ self.msg = msg
+
+ def render(self, req):
+ req.setResponseCode(self.code, self.status)
+ req.setContentType("text/plain")
+ req.write(self.msg)
+
+
+
+
+
diff --git a/tools/python/xen/web/static.py b/tools/python/xen/web/static.py
new file mode 100644
index 0000000000..430be6cf0d
--- /dev/null
+++ b/tools/python/xen/web/static.py
@@ -0,0 +1,45 @@
+import os
+
+from resource import Resource
+
+class File(Resource):
+
+ isLeaf = True
+
+ def __init__(self, filename, defaultType=None):
+ if defaultType is None:
+ defaultType = "text/plain"
+ self.filename = filename
+ self.type = defaultType
+ self.encoding = None
+
+ def getFileSize(self):
+ try:
+ info = os.stat(self.filename)
+ return info.st_size
+ except:
+ return 0
+
+ def render(self, req):
+ if self.type:
+ req.setHeader('Content-Type', self.type)
+ if self.encoding:
+ req.setHeader('Content-Encoding', self.encoding)
+ req.setHeader('Content-Length', self.getFileSize())
+ try:
+ io = file(self.filename, "r")
+ while True:
+ buf = io.read(1024)
+ if not buf:
+ break
+ req.write(buf)
+ except IOError:
+ pass
+ try:
+ if io:
+ io.close()
+ except:
+ pass
+
+
+
diff --git a/tools/python/xen/web/tcp.py b/tools/python/xen/web/tcp.py
new file mode 100644
index 0000000000..01a8e73865
--- /dev/null
+++ b/tools/python/xen/web/tcp.py
@@ -0,0 +1,90 @@
+import sys
+import socket
+import types
+
+from connection import *
+from protocol import *
+
+class TCPServerConnection(SocketServerConnection):
+ pass
+
+class TCPListener(SocketListener):
+
+ def __init__(self, port, factory, backlog=None, interface=''):
+ SocketListener.__init__(self, factory, backlog=backlog)
+ self.port = port
+ self.interface = interface
+
+ def createSocket(self):
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ addr = (self.interface, self.port)
+ sock.bind(addr)
+ return sock
+
+ def acceptConnection(self, sock, protocol, addr):
+ return TCPServerConnection(sock, protocol, addr, self)
+
+class TCPClientConnection(SocketClientConnection):
+
+ def __init__(self, host, port, bindAddress, connector):
+ SocketClientConnection.__init__(self, connector)
+ self.addr = (host, port)
+ self.bindAddress = bindAddress
+
+ def createSocket(self):
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ if self.bindAddress is not None:
+ sock.bind(self.bindAddress)
+ return sock
+
+class TCPConnector(SocketConnector):
+
+ def __init__(self, host, port, factory, timeout=None, bindAddress=None):
+ SocketConnector.__init__(self, factory)
+ self.host = host
+ self.port = self.servicePort(port)
+ self.bindAddress = bindAddress
+ self.timeout = timeout
+
+ def servicePort(self, port):
+ if isinstance(port, types.StringTypes):
+ try:
+ port = socket.getservbyname(port, 'tcp')
+ except socket.error, ex:
+ raise IOError("unknown service: " + ex)
+ return port
+
+ def getDestination(self):
+ return (self.host, self.port)
+
+ def connectTransport(self):
+ self.transport = TCPClientConnection(
+ self.host, self.port, self.bindAddress, self)
+ self.transport.connect(self.timeout)
+
+def listenTCP(port, factory, interface='', backlog=None):
+ l = TCPListener(port, factory, interface=interface, backlog=backlog)
+ l.startListening()
+ return l
+
+def connectTCP(host, port, factory, timeout=None, bindAddress=None):
+ c = TCPConnector(host, port, factory, timeout=timeout, bindAddress=bindAddress)
+ c.connect()
+ return c
+
+def main(argv):
+ host = 'localhost'
+ port = 8005
+ if argv[1] == "client":
+ c = connectTCP(host, port, TestClientFactory())
+ print 'client:', c
+ else:
+ s = listenTCP(port, TestServerFactory())
+ print 'server:', s
+
+if __name__ == "__main__":
+ main(sys.argv)
+
+
+
diff --git a/tools/python/xen/web/unix.py b/tools/python/xen/web/unix.py
new file mode 100644
index 0000000000..7381816031
--- /dev/null
+++ b/tools/python/xen/web/unix.py
@@ -0,0 +1,81 @@
+import sys
+import socket
+import os
+import os.path
+
+from connection import *
+from protocol import *
+
+class UnixServerConnection(SocketServerConnection):
+ pass
+
+class UnixListener(SocketListener):
+
+ def __init__(self, path, factory, backlog=None):
+ SocketListener.__init__(self, factory, backlog=backlog)
+ self.path = path
+
+ def createSocket(self):
+ pathdir = os.path.dirname(self.path)
+ if not os.path.exists(pathdir):
+ os.makedirs(pathdir)
+ else:
+ try:
+ os.unlink(self.path)
+ except SystemExit:
+ raise
+ except Exception, ex:
+ pass
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock.bind(self.path)
+ return sock
+
+ def acceptConnection(self, sock, protocol, addr):
+ return UnixServerConnection(sock, protocol, self.path, self)
+
+class UnixClientConnection(SocketClientConnection):
+
+ def __init__(self, addr, connector):
+ SocketClientConnection.__init__(self, connector)
+ self.addr = addr
+
+ def createSocket(self):
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ return sock
+
+class UnixConnector(SocketConnector):
+
+ def __init__(self, path, factory, timeout=None):
+ SocketConnector.__init__(self, factory)
+ self.addr = path
+ self.timeout = timeout
+
+ def getDestination(self):
+ return self.addr
+
+ def connectTransport(self):
+ self.transport = UnixClientConnection(self.addr, self)
+ self.transport.connect(self.timeout)
+
+def listenUNIX(path, factory, backlog=None):
+ l = UnixListener(path, factory, backlog=backlog)
+ l.startListening()
+ return l
+
+def connectUNIX(path, factory, timeout=None):
+ c = UnixConnector(path, factory, timeout=timeout)
+ c.connect()
+ return c
+
+def main(argv):
+ path = "/tmp/test-foo"
+ if argv[1] == "client":
+ c = connectUNIX(path, TestClientFactory())
+ print "client:", c
+ else:
+ s = listenUNIX(path, TestServeractory())
+ print "server:", s
+
+if __name__ == "__main__":
+ main(sys.argv)
+
diff --git a/tools/python/xen/xend/Blkctl.py b/tools/python/xen/xend/Blkctl.py
index d90c7ce51d..a161f4d42b 100644
--- a/tools/python/xen/xend/Blkctl.py
+++ b/tools/python/xen/xend/Blkctl.py
@@ -4,11 +4,10 @@ import os
import os.path
import sys
import string
+import xen.util.process
from xen.xend import XendRoot
-from xen.util.ip import _readline, _readlines
-
xroot = XendRoot.instance()
"""Where network control scripts live."""
@@ -30,7 +29,8 @@ def block(op, type, dets, script=None):
raise ValueError('Invalid operation:' + op)
# Special case phy devices - they don't require any (un)binding
- if type == 'phy':
+ # Parallax also doesn't need script-based binding.
+ if (type == 'phy') or (type == 'parallax'):
return dets
if script is None:
@@ -38,8 +38,6 @@ def block(op, type, dets, script=None):
script = os.path.join(SCRIPT_DIR, script)
args = [op] + string.split(dets, ':')
args = ' '.join(args)
- out = os.popen(script + ' ' + args)
-
- output = _readline(out)
- out.close()
- return string.rstrip(output)
+ ret = xen.util.process.runscript(script + ' ' + args)
+ if len(ret):
+ return ret.splitlines()[0]
diff --git a/tools/python/xen/xend/EventServer.py b/tools/python/xen/xend/EventServer.py
index 20c567ada7..ad0128aa06 100644
--- a/tools/python/xen/xend/EventServer.py
+++ b/tools/python/xen/xend/EventServer.py
@@ -3,8 +3,9 @@
"""
import string
+from threading import Lock
-from twisted.internet import reactor
+import scheduler
# subscribe a.b.c h: map a.b.c -> h
# subscribe a.b.* h: map a.b.* -> h
@@ -38,20 +39,30 @@ class EventServer:
self.handlers = {}
self.run = run
self.queue = []
+ self.lock = Lock()
def start(self):
"""Enable event handling. Sends any queued events.
"""
- self.run = 1
- for (e,v) in self.queue:
+ try:
+ self.lock.acquire()
+ self.run = 1
+ queue = self.queue
+ self.queue = []
+ finally:
+ self.lock.release()
+ for (e,v) in queue:
self.inject(e, v)
- self.queue = []
def stop(self):
"""Suspend event handling. Events injected while suspended
are queued until we are started again.
"""
- self.run = 0
+ try:
+ self.lock.acquire()
+ self.run = 0
+ finally:
+ self.lock.release()
def subscribe(self, event, handler):
"""Subscribe to an event. For example 'a.b.c.d'.
@@ -62,21 +73,29 @@ class EventServer:
event event name
handler event handler fn(event, val)
"""
- hl = self.handlers.get(event)
- if hl is None:
- self.handlers[event] = [handler]
- else:
- hl.append(handler)
+ try:
+ self.lock.acquire()
+ hl = self.handlers.get(event)
+ if hl is None:
+ self.handlers[event] = [handler]
+ else:
+ hl.append(handler)
+ finally:
+ self.lock.release()
def unsubscribe_all(self, event=None):
"""Unsubscribe all handlers for a given event, or all handlers.
event event (optional)
"""
- if event == None:
- self.handlers.clear()
- elif event in self.handlers:
- del self.handlers[event]
+ try:
+ self.lock.acquire()
+ if event == None:
+ self.handlers.clear()
+ elif event in self.handlers:
+ del self.handlers[event]
+ finally:
+ self.lock.release()
def unsubscribe(self, event, handler):
"""Unsubscribe a given event and handler.
@@ -84,11 +103,15 @@ class EventServer:
event event
handler handler
"""
- hl = self.handlers.get(event)
- if hl is None:
- return
- if handler in hl:
- hl.remove(handler)
+ try:
+ self.lock.acquire()
+ hl = self.handlers.get(event)
+ if hl is None:
+ return
+ if handler in hl:
+ hl.remove(handler)
+ finally:
+ self.lock.release()
def inject(self, event, val, async=1):
"""Inject an event. Handlers for it are called if running, otherwise
@@ -97,13 +120,18 @@ class EventServer:
event event type
val event value
"""
- if self.run:
- if async:
- reactor.callLater(0, self.call_handlers, event, val)
- else:
- self.notify_handlers(event, val)
+ try:
+ self.lock.acquire()
+ if not self.run:
+ self.queue.append( (event, val) )
+ return
+ finally:
+ self.lock.release()
+
+ if async:
+ scheduler.now(self.call_handlers, [event, val])
else:
- self.queue.append( (event, val) )
+ self.call_handlers(event, val)
def call_handlers(self, event, val):
"""Internal method to call event handlers.
@@ -121,13 +149,19 @@ class EventServer:
event event type
val event value
"""
- hl = self.handlers.get(key)
- if hl is None:
- return
- # Copy the handler list so that handlers can call
- # subscribe/unsubscribe safely - python list iteration
- # is not safe against list modification.
- for h in hl[:]:
+ try:
+ self.lock.acquire()
+ hl = self.handlers.get(key)
+ if hl is None:
+ return
+ # Copy the handler list so that handlers can call
+ # subscribe/unsubscribe safely - python list iteration
+ # is not safe against list modification.
+ hl = hl[:]
+ finally:
+ self.lock.release()
+ # Must not hold the lock while calling the handlers.
+ for h in hl:
try:
h(event, val)
except:
diff --git a/tools/python/xen/xend/EventTypes.py b/tools/python/xen/xend/EventTypes.py
deleted file mode 100644
index 6350baa5dd..0000000000
--- a/tools/python/xen/xend/EventTypes.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-## XEND_DOMAIN_CREATE = "xend.domain.create": dom
-## create:
-## xend.domain.destroy: dom, reason:died/crashed
-## xend.domain.up ?
-
-## xend.domain.unpause: dom
-## xend.domain.pause: dom
-## xend.domain.shutdown: dom
-## xend.domain.destroy: dom
-
-## xend.domain.migrate.begin: dom, to
-## Begin tells: src host, src domain uri, dst host. Dst id known?
-## err: src host, src domain uri, dst host, dst id if known, status (of domain: ok, dead,...), reason
-## end: src host, src domain uri, dst host, dst uri
-
-## Events for both ends of migrate: for exporter and importer?
-## Include migrate id so can tie together.
-## Have uri /xend/migrate/<id> for migrate info (migrations in progress).
-
-## (xend.domain.migrate.begin (src <host>) (src.domain <id>)
-## (dst <host>) (id <migrate id>))
-
-## xend.domain.migrate.end:
-## (xend.domain.migrate.end (domain <id>) (to <host>)
-
-## xend.node.up: xend uri
-## xend.node.down: xend uri
-
-## xend.error ?
-
-## format:
-
diff --git a/tools/python/xen/xend/PrettyPrint.py b/tools/python/xen/xend/PrettyPrint.py
index 9e91b11448..a57a3c6b52 100644
--- a/tools/python/xen/xend/PrettyPrint.py
+++ b/tools/python/xen/xend/PrettyPrint.py
@@ -285,6 +285,19 @@ def prettyprint(sxpr, out=sys.stdout, width=80):
sxp.show(sxpr, out=out)
print >> out
+def prettyprintstring(sxpr, width=80):
+ """Prettyprint an SXP form to a string.
+
+ sxpr s-expression
+ width maximum output width
+ """
+ io = StringIO.StringIO()
+ prettyprint(sxpr, out=io, width=width)
+ io.seek(0)
+ val = io.getvalue()
+ io.close()
+ return val
+
def main():
pin = sxp.Parser()
while 1:
diff --git a/tools/python/xen/xend/Vifctl.py b/tools/python/xen/xend/Vifctl.py
index fe33ecbc71..0bc58f4480 100644
--- a/tools/python/xen/xend/Vifctl.py
+++ b/tools/python/xen/xend/Vifctl.py
@@ -3,6 +3,7 @@
import os
import os.path
import sys
+import xen.util.process
from xen.xend import XendRoot
xroot = XendRoot.instance()
@@ -35,7 +36,9 @@ def network(op, script=None, bridge=None, antispoof=None):
else:
args.append("antispoof=no")
args = ' '.join(args)
- os.system(script + ' ' + args)
+ ret = xen.util.process.runscript(script + ' ' + args)
+ if len(ret):
+ return ret.splitlines()[0]
def set_vif_name(vif_old, vif_new):
if vif_old == vif_new:
@@ -80,5 +83,6 @@ def vifctl(op, vif=None, script=None, domain=None, mac=None, bridge=None, ipaddr
ips = ' '.join(ipaddr)
args.append("ip='%s'" % ips)
args = ' '.join(args)
- os.system(script + ' ' + args)
-
+ ret = xen.util.process.runscript(script + ' ' + args)
+ if len(ret):
+ return ret.splitlines()[0]
diff --git a/tools/python/xen/xend/XendAsynchProtocol.py b/tools/python/xen/xend/XendAsynchProtocol.py
deleted file mode 100644
index 6afaf14285..0000000000
--- a/tools/python/xen/xend/XendAsynchProtocol.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-from twisted.protocols import http
-from twisted.internet.protocol import ClientCreator
-from twisted.internet.defer import Deferred
-from twisted.internet import reactor
-
-from XendProtocol import XendClientProtocol, XendRequest
-
-class AsynchXendClient(http.HTTPClient):
- """A subclass of twisted's HTTPClient to deal with a connection to xend.
- Makes the request when connected, and delegates handling responses etc.
- to its protocol (usually an AsynchXendClientProtocol instance).
- """
- def __init__(self, protocol, request):
- self.protocol = protocol
- self.request = request
-
- def connectionMade(self):
- request = self.request
- url = self.request.url
- self.sendCommand(request.method, url.fullpath())
- self.sendHeader('Host', url.location())
- for (k, v) in request.headers.items():
- self.sendHeader(k, v)
- if request.data:
- self.sendHeader('Content-Length', len(request.data))
- self.endHeaders()
- if request.data:
- self.transport.write(request.data)
-
- def handleStatus(self, version, status, message):
- return self.protocol.handleStatus(version, status, message)
-
- def handleHeader(self, key, val):
- return self.protocol.handleHeader(key, val)
-
- def handleResponse(self, data):
- return self.protocol.handleResponse(data)
-
-class AsynchXendClientProtocol(XendClientProtocol):
- """An asynchronous xend client. Uses twisted to connect to xend
- and make the request. It does not block waiting for the result,
- but sets up a deferred that is called when the result becomes available.
-
- Uses AsynchXendClient to manage the connection.
- """
- def __init__(self):
- self.err = None
- self.headers = {}
-
- def xendRequest(self, url, method, args=None):
- """Make a request to xend. The returned deferred is called when
- the result is available.
-
- @param url: xend request url
- @param method: http method: POST or GET
- @param args: request arguments (dict)
- @return: deferred
- """
- request = XendRequest(url, method, args)
- self.deferred = Deferred()
- clientCreator = ClientCreator(reactor, AsynchXendClient, self, request)
- clientCreator.connectTCP(url.host, url.port)
- return self.deferred
-
- def callErrback(self, err):
- if not self.deferred.called:
- self.err = err
- self.deferred.errback(err)
- return err
-
- def callCallback(self, val):
- if not self.deferred.called:
- self.deferred.callback(val)
- return val
-
- def handleException(self, err):
- return self.callErrback(err)
-
- def handleHeader(self, key, val):
- self.headers[key.lower()] = val
-
- def getHeader(self, key):
- return self.headers.get(key.lower())
-
- def handleResponse(self, data):
- if self.err: return self.err
- val = XendClientProtocol.handleResponse(self, data)
- if isinstance(val, Exception):
- self.callErrback(val)
- else:
- self.callCallback(val)
- return val
diff --git a/tools/python/xen/xend/XendBootloader.py b/tools/python/xen/xend/XendBootloader.py
new file mode 100644
index 0000000000..8fee40b50f
--- /dev/null
+++ b/tools/python/xen/xend/XendBootloader.py
@@ -0,0 +1,94 @@
+#
+# XendBootloader.py - Framework to run a boot loader for picking the kernel
+#
+# Copyright 2005 Red Hat, Inc.
+# Jeremy Katz <katzj@redhat.com>
+#
+# This software may be freely redistributed under the terms of the GNU
+# general public license.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+import os, sys, select, errno
+import sxp
+
+from XendLogging import log
+from XendError import VmError
+
+BL_FIFO = "/var/lib/xen/xenbl"
+
+def bootloader(blexec, disk, quiet = 0, vcpus = None, entry = None):
+ """Run the boot loader executable on the given disk and return a
+ config image.
+ @param blexec Binary to use as the boot loader
+ @param disk Disk to run the boot loader on.
+ @param quiet Run in non-interactive mode, just booting the default.
+ @param vcpus Number of vcpus for the domain.
+ @param entry Default entry to boot."""
+
+ if not os.access(blexec, os.X_OK):
+ msg = "Bootloader isn't executable"
+ log.error(msg)
+ raise VmError(msg)
+ if not os.access(disk, os.R_OK):
+ msg = "Disk isn't accessible"
+ log.error(msg)
+ raise VmError(msg)
+
+ os.mkfifo(BL_FIFO, 0600)
+
+ child = os.fork()
+ if (not child):
+ args = [ blexec ]
+ if quiet:
+ args.append("-q")
+ args.append("--output=%s" %(BL_FIFO,))
+ if entry is not None:
+ args.append("--entry=%s" %(entry,))
+ args.append(disk)
+
+ try:
+ os.execvp(args[0], args)
+ except OSError, e:
+ print e
+ pass
+ os._exit(1)
+
+ while 1:
+ try:
+ r = os.open(BL_FIFO, os.O_RDONLY)
+ except OSError, e:
+ if e.errno == errno.EINTR:
+ continue
+ break
+ ret = ""
+ while 1:
+ select.select([r], [], [])
+ s = os.read(r, 1024)
+ ret = ret + s
+ if len(s) == 0:
+ break
+
+ (pid, status) = os.waitpid(child, 0)
+ os.close(r)
+ os.unlink(BL_FIFO)
+
+ if len(ret) == 0:
+ msg = "Boot loader didn't return any data!"
+ log.error(msg)
+ raise VmError, msg
+
+ pin = sxp.Parser()
+ pin.input(ret)
+ pin.input_eof()
+
+ config_image = pin.val
+ if vcpus and sxp.child_value(config_image, "vcpus") is None:
+ config_image.append(['vcpus', vcpus])
+
+ config = ['image', config_image]
+ return config
+
diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py
new file mode 100644
index 0000000000..654fb022c5
--- /dev/null
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -0,0 +1,141 @@
+# Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
+
+# This file is subject to the terms and conditions of the GNU General
+# Public License. See the file "COPYING" in the main directory of
+# this archive for more details.
+
+import errno
+import os
+import select
+import sxp
+from string import join
+from struct import pack, unpack, calcsize
+from xen.util.xpopen import xPopen3
+import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+
+from XendError import XendError
+from XendLogging import log
+
+SIGNATURE = "LinuxGuestRecord"
+PAGE_SIZE = 4096
+PATH_XC_SAVE = "/usr/libexec/xen/xc_save"
+PATH_XC_RESTORE = "/usr/libexec/xen/xc_restore"
+
+sizeof_int = calcsize("i")
+sizeof_unsigned_long = calcsize("L")
+
+def write_exact(fd, buf, errmsg):
+ if os.write(fd, buf) != len(buf):
+ raise XendError(errmsg)
+
+def read_exact(fd, size, errmsg):
+ buf = os.read(fd, size)
+ if len(buf) != size:
+ raise XendError(errmsg)
+ return buf
+
+def save(xd, fd, dominfo):
+ write_exact(fd, SIGNATURE, "could not write guest state file: signature")
+
+ config = sxp.to_string(dominfo.sxpr())
+ write_exact(fd, pack("!i", len(config)),
+ "could not write guest state file: config len")
+ write_exact(fd, config, "could not write guest state file: config")
+
+ cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
+ str(dominfo.id)]
+ log.info("[xc_save] " + join(cmd))
+ child = xPopen3(cmd, True, -1, [fd, xc.handle()])
+
+ lasterr = ""
+ p = select.poll()
+ p.register(child.fromchild.fileno())
+ p.register(child.childerr.fileno())
+ while True:
+ r = p.poll()
+ for (fd, event) in r:
+ if not event & select.POLLIN:
+ continue
+ if fd == child.childerr.fileno():
+ l = child.childerr.readline()
+ log.error(l.rstrip())
+ lasterr = l.rstrip()
+ if fd == child.fromchild.fileno():
+ l = child.fromchild.readline()
+ if l.rstrip() == "suspend":
+ log.info("suspending %d" % dominfo.id)
+ xd.domain_shutdown(dominfo.id, reason='suspend')
+ dominfo.state_wait("suspended")
+ log.info("suspend %d done" % dominfo.id)
+ child.tochild.write("done\n")
+ child.tochild.flush()
+ if filter(lambda (fd, event): event & select.POLLHUP, r):
+ break
+
+ if child.wait() >> 8 == 127:
+ lasterr = "popen %s failed" % PATH_XC_SAVE
+ if child.wait() != 0:
+ raise XendError("xc_save failed: %s" % lasterr)
+
+ xd.domain_destroy(dominfo.id)
+ return None
+
+def restore(xd, fd):
+ signature = read_exact(fd, len(SIGNATURE),
+ "not a valid guest state file: signature read")
+ if signature != SIGNATURE:
+ raise XendError("not a valid guest state file: found '%s'" %
+ signature)
+
+ l = read_exact(fd, sizeof_int,
+ "not a valid guest state file: config size read")
+ vmconfig_size = unpack("!i", l)[0]
+ vmconfig_buf = read_exact(fd, vmconfig_size,
+ "not a valid guest state file: config read")
+
+ p = sxp.Parser()
+ p.input(vmconfig_buf)
+ if not p.ready:
+ raise XendError("not a valid guest state file: config parse")
+
+ vmconfig = p.get_val()
+ dominfo = xd.domain_configure(vmconfig)
+
+ l = read_exact(fd, sizeof_unsigned_long,
+ "not a valid guest state file: pfn count read")
+ nr_pfns = unpack("=L", l)[0] # XXX endianess
+ if nr_pfns > 1024*1024: # XXX
+ raise XendError(
+ "not a valid guest state file: pfn count out of range")
+
+ cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
+ str(dominfo.id), str(nr_pfns)]
+ log.info("[xc_restore] " + join(cmd))
+ child = xPopen3(cmd, True, -1, [fd, xc.handle()])
+ child.tochild.close()
+
+ lasterr = ""
+ p = select.poll()
+ p.register(child.fromchild.fileno())
+ p.register(child.childerr.fileno())
+ while True:
+ r = p.poll()
+ for (fd, event) in r:
+ if not event & select.POLLIN:
+ continue
+ if fd == child.childerr.fileno():
+ l = child.childerr.readline()
+ log.error(l.rstrip())
+ lasterr = l.rstrip()
+ if fd == child.fromchild.fileno():
+ l = child.fromchild.readline()
+ log.info(l.rstrip())
+ if filter(lambda (fd, event): event & select.POLLHUP, r):
+ break
+
+ if child.wait() >> 8 == 127:
+ lasterr = "popen %s failed" % PATH_XC_RESTORE
+ if child.wait() != 0:
+ raise XendError("xc_restore failed: %s" % lasterr)
+
+ return dominfo
diff --git a/tools/python/xen/xend/XendClient.py b/tools/python/xen/xend/XendClient.py
index 4e733b7fde..012a8b8dcc 100644
--- a/tools/python/xen/xend/XendClient.py
+++ b/tools/python/xen/xend/XendClient.py
@@ -2,7 +2,7 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
"""Client API for the HTTP interface on xend.
Callable as a script - see main().
-Supports synchronous or asynchronous connection to xend.
+Supports inet or unix connection to xend.
This API is the 'control-plane' for xend.
The 'data-plane' is done separately. For example, consoles
@@ -15,7 +15,9 @@ import types
import sxp
import PrettyPrint
-from XendProtocol import XendClientProtocol, SynchXendClientProtocol, XendError
+from XendProtocol import HttpXendClientProtocol, \
+ UnixXendClientProtocol, \
+ XendError
DEBUG = 0
@@ -32,15 +34,6 @@ def fileof(val):
return val
raise XendError('cannot convert value')
-# todo: need to sort of what urls/paths are using for objects.
-# e.g. for domains at the moment return '0'.
-# should probably return abs path w.r.t. server, e.g. /xend/domain/0.
-# As an arg, assume abs path is obj uri, otherwise just id.
-
-# Function to convert to full url: Xend.uri(path), e.g.
-# maps /xend/domain/0 to http://wray-m-3.hpl.hp.com:8000/xend/domain/0
-# And should accept urls for ids?
-
class URL:
"""A URL.
"""
@@ -115,7 +108,7 @@ class Xend:
@param root: xend root path on the server
"""
if client is None:
- client = SynchXendClientProtocol()
+ client = HttpXendClientProtocol()
self.client = client
self.bind(srv, root)
@@ -162,9 +155,6 @@ class Xend:
def vneturl(self, id=''):
return self.url.relative('vnet/' + str(id))
- def eventurl(self, id=''):
- return self.url.relative('event/' + str(id))
-
def xend(self):
return self.xendGet(self.url)
@@ -189,11 +179,6 @@ class Xend:
def xend_node_log(self):
return self.xendGet(self.nodeurl('log'))
- def xend_node_cpu_rrobin_slice_set(self, slice):
- return self.xendPost(self.nodeurl(),
- {'op' : 'cpu_rrobin_slice_set',
- 'slice' : slice })
-
def xend_node_cpu_bvt_slice_set(self, ctx_allow):
return self.xendPost(self.nodeurl(),
{'op' : 'cpu_bvt_slice_set',
@@ -251,10 +236,11 @@ class Xend:
'live' : live,
'resource' : resource })
- def xend_domain_pincpu(self, id, cpu):
+ def xend_domain_pincpu(self, id, vcpu, cpumap):
return self.xendPost(self.domainurl(id),
{'op' : 'pincpu',
- 'cpu' : cpu })
+ 'vcpu' : vcpu,
+ 'cpumap' : cpumap })
def xend_domain_cpu_bvt_set(self, id, mcuadv, warpback, warpvalue, warpl, warpu):
return self.xendPost(self.domainurl(id),
@@ -265,42 +251,55 @@ class Xend:
'warpl' : warpl,
'warpu' : warpu })
- def xend_domain_cpu_atropos_set(self, id, period, slice, latency, xtratime):
+ def xend_domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
return self.xendPost(self.domainurl(id),
- {'op' : 'cpu_atropos_set',
- 'period' : period,
- 'slice' : slice,
- 'latency' : latency,
- 'xtratime': xtratime })
+ {'op' : 'cpu_sedf_set',
+ 'period' : period,
+ 'slice' : slice,
+ 'latency' : latency,
+ 'extratime' : extratime,
+ 'weight' : weight })
def xend_domain_maxmem_set(self, id, memory):
return self.xendPost(self.domainurl(id),
- { 'op' : 'maxmem_set',
- 'memory' : memory })
-
- def xend_domain_vifs(self, id):
- return self.xendGet(self.domainurl(id),
- { 'op' : 'vifs' })
+ { 'op' : 'maxmem_set',
+ 'memory' : memory })
- def xend_domain_vif(self, id, vif):
- return self.xendGet(self.domainurl(id),
- { 'op' : 'vif',
- 'vif' : vif })
+ def xend_domain_mem_target_set(self, id, mem_target):
+ val = self.xendPost(self.domainurl(id),
+ {'op' : 'mem_target_set',
+ 'target' : mem_target })
+ return val
- def xend_domain_vbds(self, id):
- return self.xendGet(self.domainurl(id),
- {'op' : 'vbds'})
+ def xend_domain_vif_limit(self, id, vif, credit, period):
+ return self.xendPost(self.domainurl(id),
+ { 'op' : 'vif_limit_set',
+ 'vif' : vif,
+ 'credit' : credit,
+ 'period' : period })
- def xend_domain_vbd(self, id, vbd):
- return self.xendGet(self.domainurl(id),
- {'op' : 'vbd',
- 'vbd' : vbd })
+ def xend_domain_devices(self, id, type):
+ return self.xendPost(self.domainurl(id),
+ {'op' : 'devices',
+ 'type' : type })
+ def xend_domain_device(self, id, type, idx):
+ return self.xendPost(self.domainurl(id),
+ {'op' : 'device',
+ 'type' : type,
+ 'idx' : idx })
+
def xend_domain_device_create(self, id, config):
return self.xendPost(self.domainurl(id),
{'op' : 'device_create',
'config' : fileof(config) })
+ def xend_domain_device_refresh(self, id, type, idx):
+ return self.xendPost(self.domainurl(id),
+ {'op' : 'device_refresh',
+ 'type' : type,
+ 'idx' : idx })
+
def xend_domain_device_destroy(self, id, type, idx):
return self.xendPost(self.domainurl(id),
{'op' : 'device_destroy',
@@ -338,63 +337,29 @@ class Xend:
return self.xendPost(self.vneturl(id),
{'op' : 'delete' })
- def xend_event_inject(self, sxpr):
- val = self.xendPost(self.eventurl(),
- {'op' : 'inject',
- 'event' : fileof(sxpr) })
-
- def xend_domain_mem_target_set(self, id, mem_target):
- val = self.xendPost(self.domainurl(id),
- {'op' : 'mem_target_set',
- 'target' : mem_target })
- return val
-
-def getAsynchXendClientProtocol():
- """Load AsynchXendClientProtocol on demand to avoid the cost.
+def getHttpServer(srv=None):
+ """Create and return a xend client.
"""
- global AsynchXendClientProtocol
- try:
- AsynchXendClientProtocol
- except:
- from XendAsynchProtocol import AsynchXendClientProtocol
- return AsynchXendClientProtocol
-
-def getAsynchServer():
- """Load AsynchXendClientProtocol and create an asynch xend client.
+ return Xend(srv=srv, client=XendClientProtocol())
- @return asynch Xend
+def getUnixServer(srv=None):
+ """Create and return a unix-domain xend client.
"""
- getAsynchXendClientProtocol()
- return Xend(AsynchXendClientProtocol())
+ return Xend(client=UnixXendClientProtocol(srv))
-def xendmain(srv, asynch, fn, args):
- if asynch:
- getAsynchXendClientProtocol()
- client = AsynchXendClientProtocol()
+def xendmain(srv, fn, args, unix=False):
+ if unix:
+ xend = getUnixServer(srv)
else:
- client = None
- xend = Xend(srv=srv, client=client)
+ xend = getHttpServer(srv)
xend.rc = 0
try:
v = getattr(xend, fn)(*args)
+ PrettyPrint.prettyprint(v)
+ return 0
except XendError, err:
print 'ERROR:', err
return 1
- if asynch:
- def cbok(val):
- PrettyPrint.prettyprint(val)
- reactor.stop()
- def cberr(err):
- print 'ERROR:', err
- xend.rc = 1
- reactor.stop()
- v.addCallback(cbok)
- v.addErrback(cberr)
- reactor.run()
- return xend.rc
- else:
- PrettyPrint.prettyprint(v)
- return 0
def main(argv):
"""Call an API function:
@@ -411,16 +376,16 @@ python XendClient.py domain 0
"""
global DEBUG
from getopt import getopt
- short_options = 'x:ad'
- long_options = ['xend=', 'asynch', 'debug']
+ short_options = 'x:au:d'
+ long_options = ['xend=', 'unix=', 'debug']
(options, args) = getopt(argv[1:], short_options, long_options)
srv = None
- asynch = 0
+ unix = 1
for k, v in options:
if k in ['-x', '--xend']:
srv = v
- elif k in ['-a', '--asynch']:
- asynch = 1
+ elif k in ['-u', '--unix']:
+ unix = int(v)
elif k in ['-d', '--debug']:
DEBUG = 1
if len(args):
@@ -431,9 +396,9 @@ python XendClient.py domain 0
args = []
if not fn.startswith('xend'):
fn = 'xend_' + fn
- sys.exit(xendmain(srv, asynch, fn, args))
+ sys.exit(xendmain(srv, fn, args, unix=unix))
if __name__ == "__main__":
main(sys.argv)
else:
- server = Xend()
+ server = getUnixServer()
diff --git a/tools/python/xen/xend/XendConsole.py b/tools/python/xen/xend/XendConsole.py
index fd7b603cd6..7a04f5ddde 100644
--- a/tools/python/xen/xend/XendConsole.py
+++ b/tools/python/xen/xend/XendConsole.py
@@ -1,32 +1,26 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+import XendRoot; xroot = XendRoot.instance()
from XendError import XendError
-import EventServer
-eserver = EventServer.instance()
-
-from xen.xend.server import SrvDaemon
-daemon = SrvDaemon.instance()
-
class XendConsole:
def __init__(self):
pass
- eserver.subscribe('xend.domain.died', self.onDomainDied)
- eserver.subscribe('xend.domain.destroy', self.onDomainDied)
-
- def onDomainDied(self, event, val):
- pass
def console_ls(self):
return [ c.console_port for c in self.consoles() ]
def consoles(self):
- return daemon.get_consoles()
-
- def console_create(self, dom, console_port=None):
- consinfo = daemon.console_create(dom, console_port=console_port)
- return consinfo
+ l = []
+ xd = XendRoot.get_component('xen.xend.XendDomain')
+ for vm in xd.list():
+ ctrl = vm.getDeviceController("console", error=False)
+ if (not ctrl): continue
+ console = ctrl.getDevice(0)
+ if (not console): continue
+ l.append(console)
+ return l
def console_get(self, id):
id = int(id)
diff --git a/tools/python/xen/xend/XendDB.py b/tools/python/xen/xend/XendDB.py
index 6a27e65b58..1701b5183b 100644
--- a/tools/python/xen/xend/XendDB.py
+++ b/tools/python/xen/xend/XendDB.py
@@ -20,6 +20,12 @@ class XendDB:
self.dbpath = os.path.join(self.dbpath, path)
pass
+ def listdir(self, dpath):
+ try:
+ return dircache.listdir(dpath)
+ except:
+ return []
+
def filepath(self, path):
return os.path.join(self.dbpath, path)
@@ -52,21 +58,37 @@ class XendDB:
return self.savefile(fpath, sxpr)
def savefile(self, fpath, sxpr):
+ backup = False
fdir = os.path.dirname(fpath)
if not os.path.isdir(fdir):
os.makedirs(fdir)
+ if os.path.exists(fpath):
+ backup = True
+ real_fpath = fpath
+ fpath += ".new."
+
fout = file(fpath, "wb+")
try:
- t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- fout.write("# %s %s\n" % (fpath, t))
- sxp.show(sxpr, out=fout)
- finally:
- fout.close()
+ try:
+ t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+ fout.write("# %s %s\n" % (fpath, t))
+ sxp.show(sxpr, out=fout)
+ finally:
+ fout.close()
+ except:
+ if backup:
+ try:
+ os.unlink(fpath)
+ except:
+ pass
+ raise
+ if backup:
+ os.rename(fpath, real_fpath)
def fetchall(self, path):
dpath = self.filepath(path)
d = {}
- for k in dircache.listdir(dpath):
+ for k in self.listdir(dpath):
try:
v = self.fetchfile(os.path.join(dpath, k))
d[k] = v
@@ -84,8 +106,7 @@ class XendDB:
def ls(self, path):
dpath = self.filepath(path)
- return dircache.listdir(dpath)
-
+ return self.listdir(dpath)
diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py
index 1b01b43cfa..ff688f6df1 100644
--- a/tools/python/xen/xend/XendDomain.py
+++ b/tools/python/xen/xend/XendDomain.py
@@ -1,53 +1,48 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+# Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
"""Handler for domain operations.
Nothing here is persistent (across reboots).
Needs to be persistent for one uptime.
"""
+import errno
+import os
import sys
+import time
import traceback
-from twisted.internet import defer
-#defer.Deferred.debug = 1
-from twisted.internet import reactor
-
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-import sxp
-import XendRoot
-xroot = XendRoot.instance()
-import XendDB
-import XendDomainInfo
-import XendMigrate
-import EventServer
-from XendError import XendError
-from XendLogging import log
+from xen.xend import sxp
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendCheckpoint
+from xen.xend.XendDomainInfo import XendDomainInfo, shutdown_reason
+from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend.XendError import XendError
+from xen.xend.XendLogging import log
+from xen.xend import scheduler
+from xen.xend.server import channel
+from xen.xend.server import relocate
+from xen.xend.uuid import getUuid
+from xen.xend.xenstore import XenNode, DBMap
+__all__ = [ "XendDomain" ]
-from xen.xend.server import SrvDaemon
-xend = SrvDaemon.instance()
+SHUTDOWN_TIMEOUT = 30
-eserver = EventServer.instance()
+class XendDomainDict(dict):
+ def get_by_name(self, name):
+ try:
+ return filter(lambda d: d.name == name, self.values())[0]
+ except IndexError, err:
+ return None
-__all__ = [ "XendDomain" ]
-
class XendDomain:
"""Index of all domains. Singleton.
"""
- """Path to domain database."""
- dbpath = "domain"
-
- """Table of domain info indexed by domain id."""
- domain_by_id = {}
- domain_by_name = {}
-
- """Table of domains to restart, indexed by domain id."""
- restarts_by_id = {}
- restarts_by_name = {}
-
- """Table of delayed calls."""
- schedule = {}
+ """Dict of domain info indexed by domain id."""
+ domains = None
def __init__(self):
# Hack alert. Python does not support mutual imports, but XendDomainInfo
@@ -55,268 +50,204 @@ class XendDomain:
# to import XendDomain from XendDomainInfo causes unbounded recursion.
# So we stuff the XendDomain instance (self) into xroot's components.
xroot.add_component("xen.xend.XendDomain", self)
- # Table of domain info indexed by domain id.
- self.db = XendDB.XendDB(self.dbpath)
- self.domain_db = self.db.fetchall("")
- # XXXcl maybe check if there's only dom0 if we _really_ need
- # to remove the db
- # self.rm_all()
+ self.domains = XendDomainDict()
+ self.dbmap = DBMap(db=XenNode("/domain"))
eserver.subscribe('xend.virq', self.onVirq)
self.initial_refresh()
- def onVirq(self, event, val):
- """Event handler for virq.
- """
- self.reap()
-
- def schedule_later(self, _delay, _name, _fn, *args):
- """Schedule a function to be called later (if not already scheduled).
-
- @param _delay: delay in seconds
- @param _name: schedule name
- @param _fn: function
- @param args: arguments
- """
- if self.schedule.get(_name): return
- self.schedule[_name] = reactor.callLater(_delay, _fn, *args)
-
- def schedule_cancel(self, name):
- """Cancel a scheduled function call.
-
- @param name: schedule name to cancel
- """
- callid = self.schedule.get(name)
- if not callid:
- return
- if callid.active():
- callid.cancel()
- del self.schedule[name]
-
- def reap_schedule(self, delay=0):
- """Schedule reap to be called later.
+ def list(self):
+ """Get list of domain objects.
- @param delay: delay in seconds
+ @return: domain objects
"""
- self.schedule_later(delay, 'reap', self.reap)
-
- def reap_cancel(self):
- """Cancel any scheduled reap.
+ return self.domains.values()
+
+ def onVirq(self, event, val):
+ """Event handler for virq.
"""
- self.schedule_cancel('reap')
+ self.refresh(cleanup=True)
- def refresh_schedule(self, delay=0):
- """Schedule refresh to be called later.
-
- @param delay: delay in seconds
+ def xen_domains(self):
+ """Get table of domains indexed by id from xc.
"""
- self.schedule_later(delay, 'refresh', self.refresh)
+ domlist = xc.domain_getinfo()
+ doms = {}
+ for d in domlist:
+ domid = d['dom']
+ doms[domid] = d
+ return doms
- def refresh_cancel(self):
- """Cancel any scheduled refresh.
- """
- self.schedule_cancel('refresh')
+ def xen_domain(self, dom):
+ """Get info about a single domain from xc.
+ Returns None if not found.
- def domain_restarts_schedule(self, delay=0):
- """Schedule domain_restarts to be called later.
-
- @param delay: delay in seconds
+ @param dom domain id (int)
"""
- self.schedule_later(delay, 'domain_restarts', self.domain_restarts)
-
- def domain_restarts_cancel(self):
- """Cancel any scheduled call of domain_restarts.
- """
- self.schedule_cancel('domain_restarts')
-
- def rm_all(self):
- """Remove all domain info. Used after reboot.
- """
- for (k, v) in self.domain_db.items():
- self._delete_domain(k, notify=0)
+ dominfo = xc.domain_getinfo(dom, 1)
+ if dominfo == [] or dominfo[0]['dom'] != dom:
+ dominfo = None
+ else:
+ dominfo = dominfo[0]
+ return dominfo
def initial_refresh(self):
- """Refresh initial domain info from domain_db.
+ """Refresh initial domain info from db.
"""
-
- def cb_all_ok(val):
- self.refresh()
-
- domlist = xc.domain_getinfo()
- doms = {}
- for d in domlist:
- domid = str(d['dom'])
- doms[domid] = d
- dlist = []
- for config in self.domain_db.values():
- domid = str(sxp.child_value(config, 'id'))
- if domid in doms:
- d_dom = self._new_domain(config, doms[domid])
- dlist.append(d_dom)
+ doms = self.xen_domains()
+ self.dbmap.readDB()
+ for domdb in self.dbmap.values():
+ try:
+ domid = int(domdb.id)
+ except:
+ domid = None
+ # XXX if domid in self.domains, then something went wrong
+ if (domid is None) or (domid in self.domains):
+ domdb.delete()
+ elif domid in doms:
+ try:
+ self._new_domain(domdb, doms[domid])
+ except Exception, ex:
+ log.exception("Error recreating domain info: id=%d", domid)
+ self._delete_domain(domid)
else:
self._delete_domain(domid)
- d_all = defer.DeferredList(dlist, fireOnOneErrback=1)
- d_all.addCallback(cb_all_ok)
-
- def sync(self):
- """Sync domain db to disk.
- """
- self.db.saveall("", self.domain_db)
-
- def sync_domain(self, dom):
- """Sync info for a domain to disk.
-
- dom domain id (string)
- """
- self.db.save(dom, self.domain_db[dom])
+ self.refresh(cleanup=True)
def close(self):
pass
- def _new_domain(self, savedinfo, info):
+ def _new_domain(self, db, info):
"""Create a domain entry from saved info.
- @param savedinfo: saved info from the db
- @param info: domain info from xen
- @return: deferred
+ @param db: saved info from the db
+ @param info: domain info from xen
+ @return: domain
"""
- def cbok(dominfo):
- self.domain_by_id[dominfo.id] = dominfo
- self.domain_by_name[dominfo.name] = dominfo
- if dominfo.restart_pending():
- self.domain_restart_add(dominfo)
-
- deferred = XendDomainInfo.vm_recreate(savedinfo, info)
- deferred.addCallback(cbok)
- return deferred
+ dominfo = XendDomainInfo.recreate(db, info)
+ self.domains[dominfo.id] = dominfo
+ return dominfo
- def _add_domain(self, info, notify=1):
+ def _add_domain(self, info, notify=True):
"""Add a domain entry to the tables.
@param info: domain info object
@param notify: send a domain created event if true
"""
- self.domain_by_id[info.id] = info
- self.domain_db[info.id] = info.sxpr()
- for k, d in self.domain_by_name.items():
- if k != d.name:
- del self.domain_by_name[k]
- if info.name:
- self.domain_by_name[info.name] = info
- self.sync_domain(info.id)
- if notify: eserver.inject('xend.domain.create', [info.name, info.id])
-
- def _delete_domain(self, id, notify=1):
+ # Remove entries under the wrong id.
+ for i, d in self.domains.items():
+ if i != d.id:
+ del self.domains[i]
+ self.dbmap.delete(d.uuid)
+ if info.id in self.domains:
+ notify = False
+ self.domains[info.id] = info
+ info.exportToDB(save=True)
+ if notify:
+ eserver.inject('xend.domain.create', [info.name, info.id])
+
+ def _delete_domain(self, id, notify=True):
"""Remove a domain from the tables.
@param id: domain id
@param notify: send a domain died event if true
"""
- for (k, info) in self.domain_by_name.items():
- if info.id == id:
- del self.domain_by_name[k]
- if id in self.domain_by_id:
- info = self.domain_by_id[id]
- del self.domain_by_id[id]
- if notify: eserver.inject('xend.domain.died', [info.name, info.id])
- if id in self.domain_db:
- del self.domain_db[id]
- self.db.delete(id)
+ try:
+ if self.xen_domain(id):
+ return
+ except:
+ pass
+ info = self.domains.get(id)
+ if info:
+ del self.domains[id]
+ info.cleanup()
+ info.delete()
+ if notify:
+ eserver.inject('xend.domain.died', [info.name, info.id])
+ # XXX this should not be needed
+ for domdb in self.dbmap.values():
+ try:
+ domid = int(domdb.id)
+ except:
+ domid = None
+ if (domid is None) or (domid == id):
+ domdb.delete()
def reap(self):
"""Look for domains that have crashed or stopped.
Tidy them up.
"""
- self.reap_cancel()
- domlist = xc.domain_getinfo()
casualties = []
- for d in domlist:
+ doms = self.xen_domains()
+ for d in doms.values():
dead = 0
dead = dead or (d['crashed'] or d['shutdown'])
dead = dead or (d['dying'] and
not(d['running'] or d['paused'] or d['blocked']))
if dead:
casualties.append(d)
- destroyed = 0
for d in casualties:
- id = str(d['dom'])
- dominfo = self.domain_by_id.get(id)
+ id = d['dom']
+ dominfo = self.domains.get(id)
name = (dominfo and dominfo.name) or '??'
- log.debug('XendDomain>reap> domain died name=%s id=%s', name, id)
+ if dominfo and dominfo.is_terminated():
+ continue
+ log.debug('XendDomain>reap> domain died name=%s id=%d', name, id)
if d['shutdown']:
- reason = XendDomainInfo.shutdown_reason(d['shutdown_reason'])
- log.debug('XendDomain>reap> shutdown id=%s reason=%s', id, reason)
+ reason = shutdown_reason(d['shutdown_reason'])
+ log.debug('XendDomain>reap> shutdown name=%s id=%d reason=%s', name, id, reason)
if reason in ['suspend']:
if dominfo and dominfo.is_terminated():
- log.debug('XendDomain>reap> Suspended domain died id=%s', id)
+ log.debug('XendDomain>reap> Suspended domain died id=%d', id)
else:
eserver.inject('xend.domain.suspended', [name, id])
+ if dominfo:
+ dominfo.state_set("suspended")
continue
if reason in ['poweroff', 'reboot']:
eserver.inject('xend.domain.exit', [name, id, reason])
self.domain_restart_schedule(id, reason)
else:
+ if xroot.get_enable_dump():
+ self.domain_dumpcore(id)
eserver.inject('xend.domain.exit', [name, id, 'crash'])
- destroyed += 1
self.final_domain_destroy(id)
- if self.domain_restarts_exist():
- self.domain_restarts_schedule()
- if destroyed:
- self.refresh_schedule(delay=1)
- def refresh(self):
+ def refresh(self, cleanup=False):
"""Refresh domain list from Xen.
"""
- self.refresh_cancel()
- domlist = xc.domain_getinfo()
- # Index the domlist by id.
+ if cleanup:
+ self.reap()
+ doms = self.xen_domains()
# Add entries for any domains we don't know about.
- doms = {}
- for d in domlist:
- id = str(d['dom'])
- doms[id] = d
- if id not in self.domain_by_id:
- savedinfo = None
- deferred = XendDomainInfo.vm_recreate(savedinfo, d)
- def cbok(dominfo):
- self._add_domain(dominfo)
- deferred.addCallback(cbok)
+ for id in doms.keys():
+ if id not in self.domains:
+ self.domain_lookup(id)
# Remove entries for domains that no longer exist.
- for d in self.domain_by_id.values():
+ # Update entries for existing domains.
+ do_domain_restarts = False
+ for d in self.domains.values():
info = doms.get(d.id)
if info:
d.update(info)
+ elif d.restart_pending():
+ do_domain_restarts = True
else:
self._delete_domain(d.id)
- self.reap_schedule(delay=1)
+ if cleanup and do_domain_restarts:
+ scheduler.now(self.domain_restarts)
def update_domain(self, id):
- """Update the saved info for a domain.
+ """Update information for a single domain.
@param id: domain id
"""
- dominfo = self.domain_by_id.get(id)
+ dominfo = self.xen_domain(id)
if dominfo:
- self.domain_db[id] = dominfo.sxpr()
- self.sync_domain(id)
-
- def refresh_domain(self, id):
- """Refresh information for a single domain.
-
- @param id: domain id
- """
- dom = int(id)
- dominfo = xc.domain_getinfo(dom, 1)
- if dominfo == [] or dominfo[0]['dom'] != dom:
- try:
- self._delete_domain(id)
- except:
- log.exception('refresh_domain> error')
- raise
- pass
- else:
- d = self.domain_by_id.get(id)
+ d = self.domains.get(id)
if d:
- d.update(dominfo[0])
+ d.update(dominfo)
+ else:
+ self._delete_domain(id)
def domain_ls(self):
"""Get list of domain names.
@@ -324,7 +255,9 @@ class XendDomain:
@return: domain names
"""
self.refresh()
- return self.domain_by_name.keys()
+ doms = self.domains.values()
+ doms.sort(lambda x, y: cmp(x.name, y.name))
+ return map(lambda x: x.name, doms)
def domain_ls_ids(self):
"""Get list of domain ids.
@@ -332,104 +265,98 @@ class XendDomain:
@return: domain names
"""
self.refresh()
- return self.domain_by_id.keys()
-
- def domains(self):
- """Get list of domain objects.
+ return self.domains.keys()
- @return: domain objects
- """
- self.refresh()
- return self.domain_by_id.values()
-
def domain_create(self, config):
"""Create a domain from a configuration.
@param config: configuration
- @return: deferred
+ @return: domain
"""
- def cbok(dominfo):
- self._add_domain(dominfo)
- return dominfo
- deferred = XendDomainInfo.vm_create(config)
- deferred.addCallback(cbok)
- return deferred
+ dominfo = XendDomainInfo.create(self.dbmap, config)
+ return dominfo
def domain_restart(self, dominfo):
"""Restart a domain.
@param dominfo: domain object
- @return: deferred
"""
- def cbok(dominfo):
- self._add_domain(dominfo)
- return dominfo
- log.info("Restarting domain: id=%s name=%s", dominfo.id, dominfo.name)
+ log.info("Restarting domain: name=%s id=%s", dominfo.name, dominfo.id)
eserver.inject("xend.domain.restart",
[dominfo.name, dominfo.id, "begin"])
- deferred = dominfo.restart()
- deferred.addCallback(cbok)
- return deferred
+ try:
+ dominfo.restart()
+ log.info('Restarted domain name=%s id=%s', dominfo.name, dominfo.id)
+ eserver.inject("xend.domain.restart",
+ [dominfo.name, dominfo.id, "success"])
+ self.domain_unpause(dominfo.id)
+ except Exception, ex:
+ log.exception("Exception restarting domain: name=%s id=%s",
+ dominfo.name, dominfo.id)
+ eserver.inject("xend.domain.restart",
+ [dominfo.name, dominfo.id, "fail"])
+ return dominfo
- def domain_configure(self, id, vmconfig):
+ def domain_configure(self, vmconfig):
"""Configure an existing domain. This is intended for internal
use by domain restore and migrate.
- @param id: domain id
@param vmconfig: vm configuration
- @return: deferred
"""
config = sxp.child_value(vmconfig, 'config')
- dominfo = self.domain_lookup(id)
- log.debug('domain_configure> id=%s config=%s', str(id), str(config))
- if dominfo.config:
- raise XendError("Domain already configured: " + dominfo.id)
- def cbok(dominfo):
- self._add_domain(dominfo)
- return dominfo
- deferred = dominfo.dom_construct(dominfo.dom, config)
- deferred.addCallback(cbok)
- return deferred
-
- def domain_restore(self, src, progress=0):
+ uuid = sxp.child_value(vmconfig, 'uuid')
+ dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+ return dominfo
+
+ def domain_restore(self, src, progress=False):
"""Restore a domain from file.
@param src: source file
@param progress: output progress if true
- @return: deferred
"""
-
- if 0:
- def cbok(dominfo):
- self._add_domain(dominfo)
- return dominfo
- deferred = XendDomainInfo.vm_restore(src, progress=progress)
- deferred.addCallback(cbok)
- else:
- xmigrate = XendMigrate.instance()
- deferred = xmigrate.restore_begin(src)
- return deferred
-
+
+ try:
+ fd = os.open(src, os.O_RDONLY)
+ return XendCheckpoint.restore(self, fd)
+ except OSError, ex:
+ raise XendError("can't read guest state file %s: %s" %
+ (src, ex[1]))
+
def domain_get(self, id):
"""Get up-to-date info about a domain.
@param id: domain id
@return: domain object (or None)
"""
- id = str(id)
- self.refresh_domain(id)
- return self.domain_by_id.get(id)
+ self.update_domain(id)
+ return self.domains.get(id)
- def domain_lookup(self, name):
- name = str(name)
- dominfo = self.domain_by_name.get(name) or self.domain_by_id.get(name)
- if dominfo:
- return dominfo
- raise XendError('invalid domain:' + name)
+ def domain_lookup(self, id):
+ dominfo = self.domains.get(id)
+ if not dominfo:
+ try:
+ info = self.xen_domain(id)
+ if info:
+ uuid = getUuid()
+ log.info(
+ "Creating entry for unknown domain: id=%d uuid=%s",
+ id, uuid)
+ db = self.dbmap.addChild(uuid)
+ dominfo = XendDomainInfo.recreate(db, info)
+ self._add_domain(dominfo)
+ except Exception, ex:
+ log.exception("Error creating domain info: id=%d", id)
+ return dominfo
- def domain_exists(self, name):
- name = str(name)
- return self.domain_by_name.get(name) or self.domain_by_id.get(name)
+ def domain_lookup_by_name(self, name):
+ dominfo = self.domains.get_by_name(name)
+ if not dominfo:
+ try:
+ id = int(name)
+ dominfo = self.domain_lookup(id)
+ except ValueError:
+ pass
+ return dominfo
def domain_unpause(self, id):
"""Unpause domain execution.
@@ -439,7 +366,7 @@ class XendDomain:
dominfo = self.domain_lookup(id)
eserver.inject('xend.domain.unpause', [dominfo.name, dominfo.id])
try:
- return xc.domain_unpause(dom=dominfo.dom)
+ return xc.domain_unpause(dom=dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -451,7 +378,7 @@ class XendDomain:
dominfo = self.domain_lookup(id)
eserver.inject('xend.domain.pause', [dominfo.name, dominfo.id])
try:
- return xc.domain_pause(dom=dominfo.dom)
+ return xc.domain_pause(dom=dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -467,107 +394,99 @@ class XendDomain:
@param reason: shutdown type: poweroff, reboot, suspend, halt
"""
dominfo = self.domain_lookup(id)
- if reason == 'halt':
- self.domain_restart_cancel(dominfo.id)
- else:
- self.domain_restart_schedule(dominfo.id, reason, force=1)
+ self.domain_restart_schedule(dominfo.id, reason, force=True)
eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.id, reason])
if reason == 'halt':
reason = 'poweroff'
- val = xend.domain_shutdown(dominfo.id, reason, key)
- self.refresh_schedule()
+ val = dominfo.shutdown(reason, key=key)
+ if not reason in ['suspend', 'sysrq']:
+ self.domain_shutdowns()
return val
- def domain_restart_schedule(self, id, reason, force=0):
+ def domain_shutdowns(self):
+ """Process pending domain shutdowns.
+ Destroys domains whose shutdowns have timed out.
+ """
+ timeout = SHUTDOWN_TIMEOUT + 1
+ for dominfo in self.domains.values():
+ if not dominfo.shutdown_pending:
+ # domain doesn't need shutdown
+ continue
+ id = dominfo.id
+ left = dominfo.shutdown_time_left(SHUTDOWN_TIMEOUT)
+ if left <= 0:
+ # Shutdown expired - destroy domain.
+ try:
+ log.info("Domain shutdown timeout expired: name=%s id=%s",
+ dominfo.name, id)
+ self.domain_destroy(id, reason=
+ dominfo.shutdown_pending['reason'])
+ except Exception:
+ pass
+ else:
+ # Shutdown still pending.
+ timeout = min(timeout, left)
+ if timeout <= SHUTDOWN_TIMEOUT:
+ # Pending shutdowns remain - reschedule.
+ scheduler.later(timeout, self.domain_shutdowns)
+
+ def domain_restart_schedule(self, id, reason, force=False):
"""Schedule a restart for a domain if it needs one.
@param id: domain id
@param reason: shutdown reason
"""
- log.debug('domain_restart_schedule> %s %s %d', id, reason, force)
+ log.debug('domain_restart_schedule> %d %s %d', id, reason, force)
dominfo = self.domain_lookup(id)
if not dominfo:
return
- if dominfo.id in self.restarts_by_id:
- return
restart = (force and reason == 'reboot') or dominfo.restart_needed(reason)
if restart:
+ log.info('Scheduling restart for domain: name=%s id=%s',
+ dominfo.name, dominfo.id)
+ eserver.inject("xend.domain.restart",
+ [dominfo.name, dominfo.id, "schedule"])
dominfo.restarting()
- self.domain_restart_add(dominfo)
-
- def domain_restart_add(self, dominfo):
- self.restarts_by_name[dominfo.name] = dominfo
- self.restarts_by_id[dominfo.id] = dominfo
- log.info('Scheduling restart for domain: name=%s id=%s', dominfo.name, dominfo.id)
- eserver.inject("xend.domain.restart",
- [dominfo.name, dominfo.id, "schedule"])
- self.domain_restarts_schedule()
-
- def domain_restart_cancel(self, id):
- """Cancel any restart scheduled for a domain.
-
- @param id: domain id
- """
- dominfo = self.restarts_by_id.get(id) or self.restarts_by_name.get(id)
- if dominfo:
- log.info('Cancelling restart for domain: name=%s id=%s', dominfo.name, dominfo.id)
+ else:
+ log.info('Cancelling restart for domain: name=%s id=%s',
+ dominfo.name, dominfo.id)
eserver.inject("xend.domain.restart",
[dominfo.name, dominfo.id, "cancel"])
dominfo.restart_cancel()
- del self.restarts_by_id[dominfo.id]
- del self.restarts_by_name[dominfo.name]
def domain_restarts(self):
"""Execute any scheduled domain restarts for domains that have gone.
"""
- self.domain_restarts_cancel()
- for dominfo in self.restarts_by_id.values():
- if dominfo.id in self.domain_by_id:
+ doms = self.xen_domains()
+ for dominfo in self.domains.values():
+ if not dominfo.restart_pending():
+ continue
+ print 'domain_restarts>', dominfo.name, dominfo.id
+ info = doms.get(dominfo.id)
+ if info:
# Don't execute restart for domains still running.
+ print 'domain_restarts> still runnning: ', dominfo.name
continue
# Remove it from the restarts.
- del self.restarts_by_id[dominfo.id]
- del self.restarts_by_name[dominfo.name]
- try:
- def cbok(dominfo):
- log.info('Restarted domain name=%s id=%s', dominfo.name, dominfo.id)
- eserver.inject("xend.domain.restart",
- [dominfo.name, dominfo.id, "success"])
- self.domain_unpause(dominfo.id)
- def cberr(err):
- log.exception("Delayed exception restarting domain: name=%s id=%s",
- dominfo.name, dominfo.id)
- eserver.inject("xend.domain.restart",
- [dominfo.name, dominfo.id, "fail"])
-
- deferred = self.domain_restart(dominfo)
- deferred.addCallback(cbok)
- deferred.addErrback(cberr)
- except:
- log.exception("Exception restarting domain: name=%s id=%s",
- dominfo.name, dominfo.id)
- eserver.inject("xend.domain.restart",
- [dominfo.name, dominfo.id, "fail"])
- if self.domain_restarts_exist():
- # Run again later if any restarts remain.
- self.refresh_schedule(delay=5)
-
- def domain_restarts_exist(self):
- return len(self.restarts_by_id)
-
+ print 'domain_restarts> restarting: ', dominfo.name
+ self.domain_restart(dominfo)
+
def final_domain_destroy(self, id):
"""Final destruction of a domain..
@param id: domain id
"""
- dominfo = self.domain_lookup(id)
- log.info('Destroying domain: name=%s', dominfo.name)
- eserver.inject('xend.domain.destroy', [dominfo.name, dominfo.id])
- if dominfo:
+ try:
+ dominfo = self.domain_lookup(id)
+ log.info('Destroying domain: name=%s', dominfo.name)
+ eserver.inject('xend.domain.destroy', [dominfo.name, dominfo.id])
val = dominfo.destroy()
- else:
+ except:
#todo
- val = xc.domain_destroy(dom=dominfo.dom)
+ try:
+ val = xc.domain_destroy(dom=id)
+ except Exception, ex:
+ raise XendError(str(ex))
return val
def domain_destroy(self, id, reason='halt'):
@@ -577,48 +496,64 @@ class XendDomain:
@param id: domain id
"""
- if reason == 'halt':
- self.domain_restart_cancel(id)
- elif reason == 'reboot':
- self.domain_restart_schedule(id, reason, force=1)
+ self.domain_restart_schedule(id, reason, force=True)
val = self.final_domain_destroy(id)
- self.refresh_schedule()
return val
- def domain_migrate(self, id, dst, live=0, resource=0):
+ def domain_migrate(self, id, dst, live=False, resource=0):
"""Start domain migration.
@param id: domain id
- @return: deferred
"""
# Need a cancel too?
# Don't forget to cancel restart for it.
dominfo = self.domain_lookup(id)
- xmigrate = XendMigrate.instance()
- val = xmigrate.migrate_begin(dominfo, dst, live=live, resource=resource)
- return val
- def domain_save(self, id, dst, progress=0):
+ port = xroot.get_xend_relocation_port()
+ sock = relocate.setupRelocation(dst, port)
+
+ # temporarily rename domain for localhost migration
+ if dst == "localhost":
+ dominfo.name = "tmp-" + dominfo.name
+
+ try:
+ XendCheckpoint.save(self, sock.fileno(), dominfo)
+ except:
+ if dst == "localhost":
+ dominfo.name = string.replace(dominfo.name, "tmp-", "", 1)
+ raise
+
+ return None
+
+ def domain_save(self, id, dst, progress=False):
"""Start saving a domain to file.
@param id: domain id
@param dst: destination file
@param progress: output progress if true
- @return: deferred
"""
- dominfo = self.domain_lookup(id)
- xmigrate = XendMigrate.instance()
- return xmigrate.save_begin(dominfo, dst)
-
- def domain_pincpu(self, id, cpu):
- """Pin a domain to a cpu.
- @param id: domain
- @param cpu: cpu number
+ try:
+ dominfo = self.domain_lookup(id)
+
+ fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+
+ return XendCheckpoint.save(self, fd, dominfo)
+
+ except OSError, ex:
+ raise XendError("can't write guest state file %s: %s" %
+ (dst, ex[1]))
+
+ def domain_pincpu(self, id, vcpu, cpumap):
+ """Set which cpus vcpu can use
+
+ @param id: domain
+ @param vcpu: vcpu number
+ @param cpumap: bitmap of usbale cpus
"""
dominfo = self.domain_lookup(id)
try:
- return xc.domain_pincpu(int(dominfo.id), cpu)
+ return xc.domain_pincpu(dominfo.id, vcpu, cpumap)
except Exception, ex:
raise XendError(str(ex))
@@ -627,7 +562,7 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.bvtsched_domain_set(dom=dominfo.dom, mcuadv=mcuadv,
+ return xc.bvtsched_domain_set(dom=dominfo.id, mcuadv=mcuadv,
warpback=warpback, warpvalue=warpvalue,
warpl=warpl, warpu=warpu)
except Exception, ex:
@@ -638,25 +573,26 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.bvtsched_domain_get(dominfo.dom)
+ return xc.bvtsched_domain_get(dominfo.id)
except Exception, ex:
raise XendError(str(ex))
- def domain_cpu_atropos_set(self, id, period, slice, latency, xtratime):
- """Set Atropos scheduler parameters for a domain.
+
+ def domain_cpu_sedf_set(self, id, period, slice, latency, extratime, weight):
+ """Set Simple EDF scheduler parameters for a domain.
"""
dominfo = self.domain_lookup(id)
try:
- return xc.atropos_domain_set(dominfo.dom, period, slice, latency, xtratime)
+ return xc.sedf_domain_set(dominfo.id, period, slice, latency, extratime, weight)
except Exception, ex:
raise XendError(str(ex))
- def domain_cpu_atropos_get(self, id):
- """Get Atropos scheduler parameters for a domain.
+ def domain_cpu_sedf_get(self, id):
+ """Get Simple EDF scheduler parameters for a domain.
"""
dominfo = self.domain_lookup(id)
try:
- return xc.atropos_domain_get(dominfo.dom)
+ return xc.sedf_domain_get(dominfo.id)
except Exception, ex:
raise XendError(str(ex))
@@ -665,98 +601,79 @@ class XendDomain:
@param id: domain id
@param devconfig: device configuration
- @return: deferred
"""
dominfo = self.domain_lookup(id)
- self.refresh_schedule()
val = dominfo.device_create(devconfig)
- self.update_domain(dominfo.id)
+ dominfo.exportToDB()
return val
- def domain_device_configure(self, id, devconfig, idx):
+ def domain_device_configure(self, id, devconfig, devid):
"""Configure an existing device for a domain.
@param id: domain id
@param devconfig: device configuration
- @param idx: device index
+ @param devid: device id
@return: updated device configuration
"""
dominfo = self.domain_lookup(id)
- self.refresh_schedule()
- val = dominfo.device_configure(devconfig, idx)
- self.update_domain(dominfo.id)
+ val = dominfo.device_configure(devconfig, devid)
+ dominfo.exportToDB()
return val
+ def domain_device_refresh(self, id, type, devid):
+ """Refresh a device.
- def domain_device_destroy(self, id, type, idx):
+ @param id: domain id
+ @param devid: device id
+ @param type: device type
+ """
+ dominfo = self.domain_lookup(id)
+ val = dominfo.device_refresh(type, devid)
+ dominfo.exportToDB()
+ return val
+
+ def domain_device_destroy(self, id, type, devid):
"""Destroy a device.
@param id: domain id
- @param idx: device index
+ @param devid: device id
@param type: device type
"""
dominfo = self.domain_lookup(id)
- self.refresh_schedule()
- val = dominfo.device_destroy(type, idx)
- self.update_domain(dominfo.id)
+ val = dominfo.device_destroy(type, devid)
+ dominfo.exportToDB()
return val
def domain_devtype_ls(self, id, type):
- """Get list of device indexes for a domain.
+ """Get list of device sxprs for a domain.
@param id: domain
@param type: device type
- @return: device indexes
+ @return: device sxprs
"""
dominfo = self.domain_lookup(id)
- devs = dominfo.get_devices(type)
- return devs
+ return dominfo.getDeviceSxprs(type)
- def domain_devtype_get(self, id, type, idx):
+ def domain_devtype_get(self, id, type, devid):
"""Get a device from a domain.
-
+
@param id: domain
@param type: device type
- @param idx: device index
+ @param devid: device id
@return: device object (or None)
"""
dominfo = self.domain_lookup(id)
- return dominfo.get_device_by_index(type, idx)
+ return dominfo.getDevice(type, devid)
- def domain_vif_ls(self, id):
- """Get list of virtual network interface (vif) indexes for a domain.
-
- @param id: domain
- @return: vif indexes
+ def domain_vif_limit_set(self, id, vif, credit, period):
+ """Limit the vif's transmission rate
"""
- return self.domain_devtype_ls(id, 'vif')
-
- def domain_vif_get(self, id, vif):
- """Get a virtual network interface (vif) from a domain.
-
- @param id: domain
- @param vif: vif index
- @return: vif device object (or None)
- """
- return self.domain_devtype_get(id, 'vif', vif)
-
- def domain_vbd_ls(self, id):
- """Get list of virtual block device (vbd) indexes for a domain.
-
- @param id: domain
- @return: vbd indexes
- """
- return self.domain_devtype_ls(id, 'vbd')
-
- def domain_vbd_get(self, id, vbd):
- """Get a virtual block device (vbd) from a domain.
-
- @param id: domain
- @param vbd: vbd index
- @return: vbd device (or None)
- """
- return self.domain_devtype_get(id, 'vbd', vbd)
-
+ dominfo = self.domain_lookup(id)
+ dev = dominfo.getDevice('vif', vif)
+ if not dev:
+ raise XendError("invalid vif")
+ return dev.setCreditLimit(credit, period)
+
def domain_shadow_control(self, id, op):
"""Shadow page control.
@@ -765,29 +682,47 @@ class XendDomain:
"""
dominfo = self.domain_lookup(id)
try:
- return xc.shadow_control(dominfo.dom, op)
+ return xc.shadow_control(dominfo.id, op)
except Exception, ex:
raise XendError(str(ex))
def domain_maxmem_set(self, id, mem):
"""Set the memory limit for a domain.
- @param dom: domain
+ @param id: domain
@param mem: memory limit (in MB)
@return: 0 on success, -1 on error
"""
dominfo = self.domain_lookup(id)
maxmem = int(mem) * 1024
try:
- return xc.domain_setmaxmem(dominfo.dom, maxmem_kb = maxmem)
+ return xc.domain_setmaxmem(dominfo.id, maxmem_kb = maxmem)
except Exception, ex:
raise XendError(str(ex))
- def domain_mem_target_set(self, id, target):
- return xend.domain_mem_target_set(id, target)
-
+ def domain_mem_target_set(self, id, mem):
+ """Set the memory target for a domain.
+ @param id: domain
+ @param mem: memory target (in MB)
+ @return: 0 on success, -1 on error
+ """
+ dominfo = self.domain_lookup(id)
+ return dominfo.mem_target_set(mem)
+
+ def domain_dumpcore(self, id):
+ """Save a core dump for a crashed domain.
+ @param id: domain
+ """
+ dominfo = self.domain_lookup(id)
+ corefile = "/var/xen/dump/%s.%s.core"% (dominfo.name, dominfo.id)
+ try:
+ xc.domain_dumpcore(dom=dominfo.id, corefile=corefile)
+ except Exception, ex:
+ log.warning("Dumpcore failed, id=%s name=%s: %s",
+ dominfo.id, dominfo.name, ex)
+
def instance():
"""Singleton constructor. Use this instead of the class constructor.
"""
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index c946031309..16415d78a7 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -4,40 +4,33 @@
Includes support for domain construction, using
open-ended configurations.
-Author: Mike Wray <mike.wray@hpl.hp.com>
+Author: Mike Wray <mike.wray@hp.com>
"""
import string
-import types
-import re
-import sys
import os
import time
-
-from twisted.internet import defer
+import threading
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-import xen.util.ip
-from xen.util.ip import _readline, _readlines
-
-import sxp
+from xen.util.ip import check_subnet, get_current_ipgw
+from xen.util.blkif import blkdev_uname_to_file
-import XendConsole
-xendConsole = XendConsole.instance()
-from XendLogging import log
-from XendRoot import get_component
+from xen.xend.server import controller
+from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
+from xen.xend.server import messages
+from xen.xend.server.channel import EventChannel, channelFactory
-import server.SrvDaemon
-xend = server.SrvDaemon.instance()
+from xen.xend import sxp
+from xen.xend.PrettyPrint import prettyprintstring
+from xen.xend.XendBootloader import bootloader
+from xen.xend.XendLogging import log
+from XendError import XendError, VmError
+from xen.xend.XendRoot import get_component
-from XendError import VmError
-
-"""The length of domain names that Xen can handle.
-The names stored in Xen itself are not used for much, and
-xend can handle domain names of any length.
-"""
-MAX_DOMAIN_NAME = 15
+from xen.xend.uuid import getUuid
+from xen.xend.xenstore import DBVar
"""Flag for a block device backend domain."""
SIF_BLK_BE_DOMAIN = (1<<4)
@@ -47,16 +40,32 @@ SIF_NET_BE_DOMAIN = (1<<5)
"""Shutdown code for poweroff."""
DOMAIN_POWEROFF = 0
+
"""Shutdown code for reboot."""
DOMAIN_REBOOT = 1
+
"""Shutdown code for suspend."""
DOMAIN_SUSPEND = 2
+"""Shutdown code for crash."""
+DOMAIN_CRASH = 3
+
"""Map shutdown codes to strings."""
shutdown_reasons = {
DOMAIN_POWEROFF: "poweroff",
DOMAIN_REBOOT : "reboot",
- DOMAIN_SUSPEND : "suspend" }
+ DOMAIN_SUSPEND : "suspend",
+ DOMAIN_CRASH : "crash",
+ }
+
+"""Map shutdown reasons to the message type to use.
+"""
+shutdown_messages = {
+ 'poweroff' : 'shutdown_poweroff_t',
+ 'reboot' : 'shutdown_reboot_t',
+ 'suspend' : 'shutdown_suspend_t',
+ 'sysrq' : 'shutdown_sysrq_t',
+ }
RESTART_ALWAYS = 'always'
RESTART_ONREBOOT = 'onreboot'
@@ -73,12 +82,13 @@ STATE_RESTART_BOOTING = 'booting'
STATE_VM_OK = "ok"
STATE_VM_TERMINATED = "terminated"
+STATE_VM_SUSPENDED = "suspended"
def domain_exists(name):
# See comment in XendDomain constructor.
xd = get_component('xen.xend.XendDomain')
- return xd.domain_exists(name)
+ return xd.domain_lookup_by_name(name)
def shutdown_reason(code):
"""Get a shutdown reason from a code.
@@ -90,38 +100,6 @@ def shutdown_reason(code):
"""
return shutdown_reasons.get(code, "?")
-def vif_up(iplist):
- """send an unsolicited ARP reply for all non link-local IP addresses.
-
- @param iplist: IP addresses
- """
-
- IP_NONLOCAL_BIND = '/proc/sys/net/ipv4/ip_nonlocal_bind'
-
- def get_ip_nonlocal_bind():
- return int(open(IP_NONLOCAL_BIND, 'r').read()[0])
-
- def set_ip_nonlocal_bind(v):
- print >> open(IP_NONLOCAL_BIND, 'w'), str(v)
-
- def link_local(ip):
- return xen.util.ip.check_subnet(ip, '169.254.0.0', '255.255.0.0')
-
- def arping(ip, gw):
- cmd = '/usr/sbin/arping -A -b -I eth0 -c 1 -s %s %s' % (ip, gw)
- log.debug(cmd)
- os.system(cmd)
-
- gateway = xen.util.ip.get_current_ipgw() or '255.255.255.255'
- nlb = get_ip_nonlocal_bind()
- if not nlb: set_ip_nonlocal_bind(1)
- try:
- for ip in iplist:
- if not link_local(ip):
- arping(ip, gateway)
- finally:
- if not nlb: set_ip_nonlocal_bind(0)
-
config_handlers = {}
def add_config_handler(name, h):
@@ -139,117 +117,17 @@ def get_config_handler(name):
"""
return config_handlers.get(name)
-"""Table of handlers for virtual machine images.
-Indexed by image type.
-"""
-image_handlers = {}
-
-def add_image_handler(name, h):
- """Add a handler for an image type
- @param name: image type
- @param h: handler: fn(config, name, memory, image)
- """
- image_handlers[name] = h
-
-def get_image_handler(name):
- """Get the handler for an image type.
- @param name: image type
- @return: handler or None
- """
- return image_handlers.get(name)
-
"""Table of handlers for devices.
Indexed by device type.
"""
device_handlers = {}
-def add_device_handler(name, h):
- """Add a handler for a device type.
-
- @param name: device type
- @param h: handler: fn(vm, dev)
- """
- device_handlers[name] = h
+def add_device_handler(name, type):
+ device_handlers[name] = type
def get_device_handler(name):
- """Get the handler for a device type.
-
- @param name : device type
- @return; handler or None
- """
- return device_handlers.get(name)
-
-def vm_create(config):
- """Create a VM from a configuration.
- If a vm has been partially created and there is an error it
- is destroyed.
-
- @param config configuration
- @return: Deferred
- @raise: VmError for invalid configuration
- """
- vm = XendDomainInfo()
- return vm.construct(config)
+ return device_handlers[name]
-def vm_recreate(savedinfo, info):
- """Create the VM object for an existing domain.
-
- @param savedinfo: saved info from the domain DB
- @type savedinfo: sxpr
- @param info: domain info from xc
- @type info: xc domain dict
- @return: deferred
- """
- vm = XendDomainInfo()
- vm.recreate = 1
- vm.savedinfo = savedinfo
- vm.setdom(info['dom'])
- #vm.name = info['name']
- vm.memory = info['mem_kb']/1024
- start_time = sxp.child_value(savedinfo, 'start_time')
- if start_time is not None:
- vm.start_time = float(start_time)
- vm.restart_state = sxp.child_value(savedinfo, 'restart_state')
- restart_time = sxp.child_value(savedinfo, 'restart_time')
- if restart_time is not None:
- vm.restart_time = float(restart_time)
- config = sxp.child_value(savedinfo, 'config')
- if config:
- d = vm.construct(config)
- else:
- vm.name = sxp.child_value(savedinfo, 'name', "Domain-%d" % info['dom'])
- d = defer.succeed(vm)
- vm.recreate = 0
- vm.savedinfo = None
- return d
-
-def vm_restore(src, progress=0):
- """Restore a VM from a disk image.
-
- src saved state to restore
- progress progress reporting flag
- returns deferred
- raises VmError for invalid configuration
- """
- vm = XendDomainInfo()
- ostype = "linux" #todo Set from somewhere (store in the src?).
- restorefn = getattr(xc, "%s_restore" % ostype)
- d = restorefn(state_file=src, progress=progress)
- dom = int(d['dom'])
- if dom < 0:
- raise VmError('restore failed')
- try:
- vmconfig = sxp.from_string(d['vmconfig'])
- config = sxp.child_value(vmconfig, 'config')
- except Exception, ex:
- raise VmError('config error: ' + str(ex))
- deferred = vm.dom_construct(dom, config)
- def vifs_cb(val, vm):
- vif_up(vm.ipaddrs)
- return vm
- deferred.addCallback(vifs_cb, vm)
- return deferred
-
def dom_get(dom):
"""Get info from xen for an existing domain.
@@ -261,27 +139,6 @@ def dom_get(dom):
return domlist[0]
return None
-def append_deferred(dlist, v):
- """Append a value to a deferred list if it is a deferred.
-
- @param dlist: list of deferreds
- @param v: value to add
- """
- if isinstance(v, defer.Deferred):
- dlist.append(v)
-
-def dlist_err(val):
- """Error callback suitable for a deferred list.
- In a deferred list the error callback is called with with Failure((error, index)).
- This callback extracts the error and returns it.
-
- @param val: Failure containing (error, index)
- @type val: twisted.internet.failure.Failure
- """
-
- (error, index) = val.value
- return error
-
class XendDomainInfo:
"""Virtual machine object."""
@@ -289,44 +146,163 @@ class XendDomainInfo:
"""
MINIMUM_RESTART_TIME = 20
- def __init__(self):
+ def create(cls, parentdb, config):
+ """Create a VM from a configuration.
+
+ @param parentdb: parent db
+ @param config configuration
+ @raise: VmError for invalid configuration
+ """
+ uuid = getUuid()
+ db = parentdb.addChild(uuid)
+ vm = cls(db)
+ vm.construct(config)
+ vm.saveDB(sync=True)
+ return vm
+
+ create = classmethod(create)
+
+ def recreate(cls, db, info):
+ """Create the VM object for an existing domain.
+
+ @param db: domain db
+ @param info: domain info from xc
+ """
+ dom = info['dom']
+ vm = cls(db)
+ db.readDB()
+ vm.importFromDB()
+ config = vm.config
+ log.debug('info=' + str(info))
+ log.debug('config=' + prettyprintstring(config))
+
+ vm.setdom(dom)
+ vm.memory = info['mem_kb']/1024
+
+ if config:
+ try:
+ vm.recreate = True
+ vm.construct(config)
+ finally:
+ vm.recreate = False
+ else:
+ vm.setName("Domain-%d" % dom)
+
+ vm.exportToDB(save=True)
+ return vm
+
+ recreate = classmethod(recreate)
+
+ def restore(cls, parentdb, config, uuid=None):
+ """Create a domain and a VM object to do a restore.
+
+ @param parentdb: parent db
+ @param config: domain configuration
+ @param uuid: uuid to use
+ """
+ db = parentdb.addChild(uuid)
+ vm = cls(db)
+ dom = xc.domain_create()
+ vm.setdom(dom)
+ vm.dom_construct(vm.id, config)
+ vm.saveDB(sync=True)
+ return vm
+
+ restore = classmethod(restore)
+
+ __exports__ = [
+ DBVar('id', ty='str'),
+ DBVar('name', ty='str'),
+ DBVar('uuid', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('start_time', ty='float'),
+ DBVar('state', ty='str'),
+ DBVar('store_mfn', ty='long'),
+ DBVar('restart_mode', ty='str'),
+ DBVar('restart_state', ty='str'),
+ DBVar('restart_time', ty='float'),
+ DBVar('restart_count', ty='int'),
+ ]
+
+ def __init__(self, db):
+ self.db = db
+ self.uuid = db.getName()
+
self.recreate = 0
self.restore = 0
+
self.config = None
self.id = None
- self.dom = None
self.cpu_weight = 1
self.start_time = None
self.name = None
self.memory = None
self.image = None
- self.ramdisk = None
- self.cmdline = None
- self.console = None
- self.devices = {}
- self.device_index = {}
- self.configs = []
+
+ self.channel = None
+ self.store_channel = None
+ self.store_mfn = None
+ self.controllers = {}
+
self.info = None
- self.ipaddrs = []
- self.blkif_backend = 0
- self.netif_backend = 0
+ self.blkif_backend = False
+ self.netif_backend = False
#todo: state: running, suspended
self.state = STATE_VM_OK
+ self.state_updated = threading.Condition()
+ self.shutdown_pending = None
+
#todo: set to migrate info if migrating
self.migrate = None
+
self.restart_mode = RESTART_ONREBOOT
self.restart_state = None
self.restart_time = None
+ self.restart_count = 0
+
self.console_port = None
- self.savedinfo = None
+ self.vcpus = 1
+ self.bootloader = None
+
+ def setDB(self, db):
+ self.db = db
+
+ def saveDB(self, save=False, sync=False):
+ self.db.saveDB(save=save, sync=sync)
+
+ def exportToDB(self, save=False, sync=False):
+ if self.channel:
+ self.channel.saveToDB(self.db.addChild("channel"))
+ if self.store_channel:
+ self.store_channel.saveToDB(self.db.addChild("store_channel"))
+ self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
def setdom(self, dom):
"""Set the domain id.
@param dom: domain id
"""
- self.dom = int(dom)
- self.id = str(dom)
+ self.id = int(dom)
+ #self.db.id = self.id
+
+ def getDomain(self):
+ return self.id
+
+ def setName(self, name):
+ self.name = name
+ self.db.name = self.name
+
+ def getName(self):
+ return self.name
+
+ def getChannel(self):
+ return self.channel
+
+ def getStoreChannel(self):
+ return self.store_channel
def update(self, info):
"""Update with info from xc.domain_getinfo().
@@ -334,26 +310,90 @@ class XendDomainInfo:
self.info = info
self.memory = self.info['mem_kb'] / 1024
+ def state_set(self, state):
+ self.state_updated.acquire()
+ if self.state != state:
+ self.state = state
+ self.state_updated.notifyAll()
+ self.state_updated.release()
+ self.saveDB()
+
+ def state_wait(self, state):
+ self.state_updated.acquire()
+ while self.state != state:
+ self.state_updated.wait()
+ self.state_updated.release()
+
def __str__(self):
s = "domain"
- s += " id=" + self.id
+ s += " id=" + str(self.id)
s += " name=" + self.name
s += " memory=" + str(self.memory)
- if self.console:
- s += " console=" + str(self.console.console_port)
- if self.image:
- s += " image=" + self.image
+ console = self.getConsole()
+ if console:
+ s += " console=" + str(console.console_port)
s += ""
return s
__repr__ = __str__
+ def getDeviceTypes(self):
+ return self.controllers.keys()
+
+ def getDeviceControllers(self):
+ return self.controllers.values()
+
+ def getDeviceController(self, type, error=True):
+ ctrl = self.controllers.get(type)
+ if not ctrl and error:
+ raise XendError("invalid device type:" + type)
+ return ctrl
+
+ def findDeviceController(self, type):
+ return (self.getDeviceController(type, error=False)
+ or self.createDeviceController(type))
+
+ def createDeviceController(self, type):
+ ctrl = controller.createDevController(type, self, recreate=self.recreate)
+ self.controllers[type] = ctrl
+ return ctrl
+
+ def createDevice(self, type, devconfig, change=False):
+ ctrl = self.findDeviceController(type)
+ return ctrl.createDevice(devconfig, recreate=self.recreate,
+ change=change)
+
+ def configureDevice(self, type, id, devconfig):
+ ctrl = self.getDeviceController(type)
+ return ctrl.configureDevice(id, devconfig)
+
+ def destroyDevice(self, type, id, change=False, reboot=False):
+ ctrl = self.getDeviceController(type)
+ return ctrl.destroyDevice(id, change=change, reboot=reboot)
+
+ def deleteDevice(self, type, id):
+ ctrl = self.getDeviceController(type)
+ return ctrl.deleteDevice(id)
+
+ def getDevice(self, type, id, error=True):
+ ctrl = self.getDeviceController(type)
+ return ctrl.getDevice(id, error=error)
+
+ def getDeviceIds(self, type):
+ ctrl = self.getDeviceController(type)
+ return ctrl.getDeviceIds()
+
+ def getDeviceSxprs(self, type):
+ ctrl = self.getDeviceController(type)
+ return ctrl.getDeviceSxprs()
+
def sxpr(self):
sxpr = ['domain',
['id', self.id],
['name', self.name],
['memory', self.memory] ]
-
+ if self.uuid:
+ sxpr.append(['uuid', self.uuid])
if self.info:
sxpr.append(['maxmem', self.info['maxmem_kb']/1024 ])
run = (self.info['running'] and 'r') or '-'
@@ -366,16 +406,27 @@ class XendDomainInfo:
if self.info['shutdown']:
reason = shutdown_reason(self.info['shutdown_reason'])
sxpr.append(['shutdown_reason', reason])
- sxpr.append(['cpu', self.info['cpu']])
+ sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
sxpr.append(['cpu_time', self.info['cpu_time']/1e9])
+ sxpr.append(['vcpus', self.info['vcpus']])
+ sxpr.append(['cpumap', self.info['cpumap']])
+ sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x),
+ self.info['vcpu_to_cpu'][0:self.info['vcpus']]))])
if self.start_time:
up_time = time.time() - self.start_time
sxpr.append(['up_time', str(up_time) ])
sxpr.append(['start_time', str(self.start_time) ])
- if self.console:
- sxpr.append(self.console.sxpr())
+ if self.channel:
+ sxpr.append(self.channel.sxpr())
+ if self.store_channel:
+ sxpr.append(self.store_channel.sxpr())
+ console = self.getConsole()
+ if console:
+ sxpr.append(console.sxpr())
+ if self.restart_count:
+ sxpr.append(['restart_count', self.restart_count])
if self.restart_state:
sxpr.append(['restart_state', self.restart_state])
if self.restart_time:
@@ -388,16 +439,19 @@ class XendDomainInfo:
return sxpr
def sxpr_devices(self):
- sxpr = ['devices']
- for devs in self.devices.values():
- for dev in devs:
- if hasattr(dev, 'sxpr'):
- sxpr.append(dev.sxpr())
+ sxpr = []
+ for ty in self.getDeviceTypes():
+ devs = self.getDeviceSxprs(ty)
+ sxpr += devs
+ if sxpr:
+ sxpr.insert(0, 'devices')
+ else:
+ sxpr = None
return sxpr
def check_name(self, name):
- """Check if a vm name is valid. Valid names start with a non-digit
- and contain alphabetic characters, digits, or characters in '_-.:/+'.
+ """Check if a vm name is valid. Valid names contain alphabetic characters,
+ digits, or characters in '_-.:/+'.
The same name cannot be used for more than one vm at the same time.
@param name: name
@@ -406,8 +460,6 @@ class XendDomainInfo:
if self.recreate: return
if name is None or name == '':
raise VmError('missing vm name')
- if name[0] in string.digits:
- raise VmError('invalid vm name')
for c in name:
if c in string.digits: continue
if c in '_-.:/+': continue
@@ -421,224 +473,149 @@ class XendDomainInfo:
return
if dominfo.is_terminated():
return
- if not self.dom or (dominfo.dom != self.dom):
+ if not self.id or (dominfo.id != self.id):
raise VmError('vm name clash: ' + name)
def construct(self, config):
"""Construct the vm instance from its configuration.
@param config: configuration
- @return: deferred
@raise: VmError on error
"""
# todo - add support for scheduling params?
self.config = config
try:
- self.name = sxp.child_value(config, 'name')
+ # Initial domain create.
+ self.setName(sxp.child_value(config, 'name'))
self.check_name(self.name)
- try:
- self.cpu_weight = float(sxp.child_value(config, 'cpu_weight', '1'))
- except:
- raise VmError('invalid cpu weight')
- self.memory = int(sxp.child_value(config, 'memory'))
- if self.memory is None:
- raise VmError('missing memory size')
- cpu = sxp.child_value(config, 'cpu')
- if self.recreate and self.dom and cpu is not None:
- xc.domain_pincpu(self.dom, int(cpu))
-
+ self.init_image()
+ self.configure_cpus(config)
self.init_domain()
- self.configure_console()
+ self.register_domain()
+ self.configure_bootloader()
+
+ # Create domain devices.
self.configure_backends()
- self.construct_image()
+ self.configure_console()
self.configure_restart()
- deferred = self.configure()
- def cberr(err):
- self.destroy()
- return err
- deferred.addErrback(cberr)
- except StandardError, ex:
+ self.construct_image()
+ self.configure()
+ self.exportToDB()
+ except Exception, ex:
# Catch errors, cleanup and re-raise.
+ print 'Domain construction error:', ex
+ import traceback
+ traceback.print_exc()
self.destroy()
raise
- return deferred
- def construct_image(self):
- """Construct the boot image for the domain.
+ def register_domain(self):
+ xd = get_component('xen.xend.XendDomain')
+ xd._add_domain(self)
+ self.exportToDB()
- @return vm
+ def configure_cpus(self, config):
+ try:
+ self.cpu_weight = float(sxp.child_value(config, 'cpu_weight', '1'))
+ except:
+ raise VmError('invalid cpu weight')
+ self.memory = int(sxp.child_value(config, 'memory'))
+ if self.memory is None:
+ raise VmError('missing memory size')
+ cpu = sxp.child_value(config, 'cpu')
+ if self.recreate and self.id and cpu is not None and int(cpu) >= 0:
+ xc.domain_pincpu(self.id, 0, 1<<int(cpu))
+ try:
+ image = sxp.child_value(self.config, 'image')
+ vcpus = sxp.child_value(image, 'vcpus')
+ if vcpus:
+ self.vcpus = int(vcpus)
+ except:
+ raise VmError('invalid vcpus value')
+
+ def init_image(self):
+ """Create boot image handler for the domain.
"""
image = sxp.child_value(self.config, 'image')
if image is None:
raise VmError('missing image')
- image_name = sxp.name(image)
- if image_name is None:
- raise VmError('missing image name')
- image_handler = get_image_handler(image_name)
- if image_handler is None:
- raise VmError('unknown image type: ' + image_name)
- image_handler(self, image)
- return self
-
- def config_devices(self, name):
- """Get a list of the 'device' nodes of a given type from the config.
-
- @param name: device type
- @type name: string
- @return: device configs
- @rtype: list
- """
- devices = []
- for d in sxp.children(self.config, 'device'):
- dev = sxp.child0(d)
- if dev is None: continue
- if name == sxp.name(dev):
- devices.append(dev)
- return devices
-
- def config_device(self, type, idx):
- """Get a device config from the device nodes of a given type
- from the config.
-
- @param type: device type
- @type type: string
- @param idx: index
- @type idx: int
- @return config or None
- """
- devs = self.config_devices(type)
- if 0 <= idx < len(devs):
- return devs[idx]
- else:
- return None
+ self.image = ImageHandler.create(self, image)
- def next_device_index(self, type):
- """Get the next index for a given device type.
-
- @param type: device type
- @type type: string
- @return device index
- @rtype: int
- """
- idx = self.device_index.get(type, 0)
- self.device_index[type] = idx + 1
- return idx
-
- def add_device(self, type, dev):
- """Add a device to a virtual machine.
-
- @param type: device type
- @param dev: device to add
- """
- dl = self.devices.get(type, [])
- dl.append(dev)
- self.devices[type] = dl
-
- def remove_device(self, type, dev):
- """Remove a device from a virtual machine.
-
- @param type: device type
- @param dev: device
- """
- dl = self.devices.get(type, [])
- if dev in dl:
- dl.remove(dev)
-
- def get_devices(self, type):
- """Get a list of the devices of a given type.
-
- @param type: device type
- @return: devices
- """
- val = self.devices.get(type, [])
- return val
-
- def get_device_by_id(self, type, id):
- """Get the device with the given id.
-
- @param id: device id
- @return: device or None
- """
- dl = self.get_devices(type)
- for d in dl:
- if d.getprop('id') == id:
- return d
- return None
-
- def get_device_by_index(self, type, idx):
- """Get the device with the given index.
-
- @param idx: device index
- @return: device or None
- """
- idx = str(idx)
- dl = self.get_devices(type)
- for d in dl:
- if d.getidx() == idx:
- return d
- return None
-
- def get_device_savedinfo(self, type, index):
- val = None
- if self.savedinfo is None:
- return val
- index = str(index)
- devinfo = sxp.child(self.savedinfo, 'devices')
- if devinfo is None:
- return val
- for d in sxp.children(devinfo, type):
- dindex = sxp.child_value(d, 'index')
- if dindex is None: continue
- if str(dindex) == index:
- val = d
- break
- return val
-
- def get_device_recreate(self, type, index):
- return self.get_device_savedinfo(type, index) or self.recreate
-
- def add_config(self, val):
- """Add configuration data to a virtual machine.
-
- @param val: data to add
+ def construct_image(self):
+ """Construct the boot image for the domain.
"""
- self.configs.append(val)
-
- def destroy(self):
- """Completely destroy the vm.
+ self.create_channel()
+ self.image.createImage()
+ self.image.exportToDB()
+ #if self.store_channel:
+ # self.db.introduceDomain(self.id,
+ # self.store_mfn,
+ # self.store_channel)
+
+ def delete(self):
+ """Delete the vm's db.
"""
- self.cleanup()
- return self.destroy_domain()
+ if self.dom_get(self.id):
+ return
+ self.id = None
+ self.saveDB(sync=True)
+ try:
+ # Todo: eventually will have to wait for devices to signal
+ # destruction before can delete the db.
+ if self.db:
+ self.db.delete()
+ except Exception, ex:
+ log.warning("error in domain db delete: %s", ex)
+ pass
def destroy_domain(self):
"""Destroy the vm's domain.
The domain will not finally go away unless all vm
devices have been released.
"""
- if self.dom is None: return 0
- self.destroy_console()
- chan = xend.getDomChannel(self.dom)
- if chan:
- log.debug("Closing channel to domain %d", self.dom)
- chan.close()
+ if self.id is None:
+ return
try:
- return xc.domain_destroy(dom=self.dom)
+ xc.domain_destroy(dom=self.id)
except Exception, err:
log.exception("Domain destroy failed: %s", self.name)
- def destroy_console(self):
- if self.console:
- if self.restart_pending():
- self.console.deregisterChannel()
- else:
- log.debug('Closing console, domain %s', self.id)
- self.console.close()
-
def cleanup(self):
"""Cleanup vm resources: release devices.
"""
self.state = STATE_VM_TERMINATED
self.release_devices()
+ if self.channel:
+ try:
+ self.channel.close()
+ self.channel = None
+ except:
+ pass
+ if self.store_channel:
+ try:
+ self.store_channel.close()
+ self.store_channel = None
+ except:
+ pass
+ #try:
+ # self.db.releaseDomain(self.id)
+ #except Exception, ex:
+ # log.warning("error in domain release on xenstore: %s", ex)
+ # pass
+ if self.image:
+ try:
+ self.image.destroy()
+ self.image = None
+ except:
+ pass
+
+ def destroy(self):
+ """Clenup vm and destroy domain.
+ """
+ self.cleanup()
+ self.destroy_domain()
+ self.saveDB()
+ return 0
def is_terminated(self):
"""Check if a domain has been terminated.
@@ -648,48 +625,17 @@ class XendDomainInfo:
def release_devices(self):
"""Release all vm devices.
"""
- self.release_vifs()
- self.release_vbds()
-
- self.devices = {}
- self.device_index = {}
- self.configs = []
- self.ipaddrs = []
-
- def release_vifs(self):
- """Release vm virtual network devices (vifs).
- """
- if self.dom is None: return
- ctrl = xend.netif_get(self.dom)
- if ctrl:
- log.debug("Destroying vifs for domain %d", self.dom)
- ctrl.destroy()
-
- def release_vbds(self):
- """Release vm virtual block devices (vbds).
- """
- if self.dom is None: return
- ctrl = xend.blkif_get(self.dom)
- if ctrl:
- log.debug("Destroying vbds for domain %d", self.dom)
- ctrl.destroy()
+ reboot = self.restart_pending()
+ for ctrl in self.getDeviceControllers():
+ if ctrl.isDestroyed(): continue
+ ctrl.destroyController(reboot=reboot)
def show(self):
"""Print virtual machine info.
"""
- print "[VM dom=%d name=%s memory=%d" % (self.dom, self.name, self.memory)
+ print "[VM dom=%d name=%s memory=%d" % (self.id, self.name, self.memory)
print "image:"
sxp.show(self.image)
- print
- for dl in self.devices:
- for dev in dl:
- print "device:"
- sxp.show(dev)
- print
- for val in self.configs:
- print "config:"
- sxp.show(val)
- print
print "]"
def init_domain(self):
@@ -699,180 +645,115 @@ class XendDomainInfo:
return
if self.start_time is None:
self.start_time = time.time()
- if self.restore:
- return
- dom = self.dom or 0
- memory = self.memory
- name = self.name
- # If the name is over the xen limit, use the end of it.
- if len(name) > MAX_DOMAIN_NAME:
- name = name[-MAX_DOMAIN_NAME:]
try:
cpu = int(sxp.child_value(self.config, 'cpu', '-1'))
except:
raise VmError('invalid cpu')
- cpu_weight = self.cpu_weight
- dom = xc.domain_create(dom= dom, mem_kb= memory * 1024,
- cpu= cpu, cpu_weight= cpu_weight)
- if dom <= 0:
- raise VmError('Creating domain failed: name=%s memory=%d'
- % (name, memory))
- log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, name, memory)
- self.setdom(dom)
-
- def build_domain(self, ostype, kernel, ramdisk, cmdline):
- """Build the domain boot image.
- """
- if self.recreate or self.restore: return
- if not os.path.isfile(kernel):
- raise VmError('Kernel image does not exist: %s' % kernel)
- if ramdisk and not os.path.isfile(ramdisk):
- raise VmError('Kernel ramdisk does not exist: %s' % ramdisk)
- if len(cmdline) >= 256:
- log.warning('kernel cmdline too long, domain %d', self.dom)
- dom = self.dom
- buildfn = getattr(xc, '%s_build' % ostype)
- flags = 0
- if self.netif_backend: flags |= SIF_NET_BE_DOMAIN
- if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN
- err = buildfn(dom = dom,
- image = kernel,
- control_evtchn = self.console.getRemotePort(),
- cmdline = cmdline,
- ramdisk = ramdisk,
- flags = flags)
- if err != 0:
- raise VmError('Building domain failed: type=%s dom=%d err=%d'
- % (ostype, dom, err))
-
- def create_domain(self, ostype, kernel, ramdisk, cmdline):
- """Create a domain. Builds the image but does not configure it.
-
- @param ostype: OS type
- @param kernel: kernel image
- @param ramdisk: kernel ramdisk
- @param cmdline: kernel commandline
- """
-
- self.create_channel()
- if self.console:
- self.console.registerChannel()
- else:
- self.console = xendConsole.console_create(
- self.dom, console_port=self.console_port)
- self.build_domain(ostype, kernel, ramdisk, cmdline)
- self.image = kernel
- self.ramdisk = ramdisk
- self.cmdline = cmdline
+ dom = self.image.initDomain(self.id, self.memory, cpu, self.cpu_weight)
+ log.debug('init_domain> Created domain=%d name=%s memory=%d',
+ dom, self.name, self.memory)
+ if not self.restore:
+ self.setdom(dom)
+ def openChannel(self, key, local, remote):
+ """Create a channel to the domain.
+ If saved info is available recreate the channel.
+
+ @param key db key for the saved data (if any)
+ @param local default local port
+ @param remote default remote port
+ """
+ db = self.db.addChild(key)
+ chan = channelFactory().restoreFromDB(db, self.id, local, remote)
+ #todo: save here?
+ #chan.saveToDB(db)
+ return chan
+
+ def eventChannel(self, key):
+ db = self.db.addChild(key)
+ return EventChannel.restoreFromDB(db, 0, self.id)
+
def create_channel(self):
- """Create the channel to the domain.
- If saved info is available recreate the channel using the saved ports.
-
- @return: channel
+ """Create the channels to the domain.
"""
- local = 0
- remote = 1
- if self.savedinfo:
- consinfo = sxp.child(self.savedinfo, "console")
- if consinfo:
- local = int(sxp.child_value(consinfo, "local_port", 0))
- remote = int(sxp.child_value(consinfo, "remote_port", 1))
- return xend.createDomChannel(self.dom, local_port=local,
- remote_port=remote)
+ self.channel = self.openChannel("channel", 0, 1)
+ self.store_channel = self.eventChannel("store_channel")
+ def create_configured_devices(self):
+ devices = sxp.children(self.config, 'device')
+ for d in devices:
+ dev_config = sxp.child0(d)
+ if dev_config is None:
+ raise VmError('invalid device')
+ dev_type = sxp.name(dev_config)
+ ctrl_type = get_device_handler(dev_type)
+ if ctrl_type is None:
+ raise VmError('unknown device type: ' + dev_type)
+ self.createDevice(ctrl_type, dev_config)
+
def create_devices(self):
"""Create the devices for a vm.
- @return: Deferred
@raise: VmError for invalid devices
"""
- dlist = []
- devices = sxp.children(self.config, 'device')
- index = {}
- for d in devices:
- dev = sxp.child0(d)
- if dev is None:
- raise VmError('invalid device')
- dev_name = sxp.name(dev)
- dev_index = index.get(dev_name, 0)
- dev_handler = get_device_handler(dev_name)
- if dev_handler is None:
- raise VmError('unknown device type: ' + dev_name)
- v = dev_handler(self, dev, dev_index)
- append_deferred(dlist, v)
- index[dev_name] = dev_index + 1
- deferred = defer.DeferredList(dlist, fireOnOneErrback=1)
- deferred.addErrback(dlist_err)
- return deferred
+ if self.rebooting():
+ for ctrl in self.getDeviceControllers():
+ ctrl.initController(reboot=True)
+ else:
+ self.create_configured_devices()
def device_create(self, dev_config):
"""Create a new device.
@param dev_config: device configuration
- @return: deferred
"""
- dev_name = sxp.name(dev_config)
- dev_handler = get_device_handler(dev_name)
- if dev_handler is None:
- raise VmError('unknown device type: ' + dev_name)
- devs = self.get_devices(dev_name)
- dev_index = len(devs)
- self.config.append(['device', dev_config])
- d = dev_handler(self, dev_config, dev_index, change=1)
- def cbok(dev):
- return dev.sxpr()
- d.addCallback(cbok)
- return d
-
- def device_configure(self, dev_config, idx):
+ dev_type = sxp.name(dev_config)
+ dev = self.createDevice(self, dev_config, change=True)
+ self.config.append(['device', dev.getConfig()])
+ return dev.sxpr()
+
+ def device_configure(self, dev_config, id):
"""Configure an existing device.
@param dev_config: device configuration
- @param idx: device index
+ @param id: device id
"""
type = sxp.name(dev_config)
- dev = self.get_device_by_index(type, idx)
- if not dev:
- raise VmError('invalid device: %s %s' % (type, idx))
- new_config = dev.configure(dev_config, change=1)
- devs = self.devices.get(type)
- index = devs.index(dev)
- # Patch new config into device configs.
- dev_configs = self.config_devices(type)
- old_config = dev_configs[index]
- dev_configs[index] = new_config
+ dev = self.getDevice(type, id)
+ old_config = dev.getConfig()
+ new_config = dev.configure(dev_config, change=True)
# Patch new config into vm config.
new_full_config = ['device', new_config]
old_full_config = ['device', old_config]
old_index = self.config.index(old_full_config)
self.config[old_index] = new_full_config
return new_config
+
+ def device_refresh(self, type, id):
+ """Refresh a device.
+
+ @param type: device type
+ @param id: device id
+ """
+ dev = self.getDevice(type, id)
+ dev.refresh()
- def device_destroy(self, type, idx):
- """Destroy a device.
+ def device_delete(self, type, id):
+ """Destroy and remove a device.
@param type: device type
- @param idx: device index
+ @param id: device id
"""
- dev = self.get_device_by_index(type, idx)
- if not dev:
- raise VmError('invalid device: %s %s' % (type, idx))
- devs = self.devices.get(type)
- index = devs.index(dev)
- dev_config = self.config_device(type, index)
+ dev = self.getDevice(type, id)
+ dev_config = dev.getConfig()
if dev_config:
self.config.remove(['device', dev_config])
- dev.destroy(change=1)
- self.remove_device(type, dev)
+ self.deleteDevice(type, dev.getId())
- def configure_memory(self):
- """Configure vm memory limit.
+ def configure_bootloader(self):
+ """Configure boot loader.
"""
- maxmem = sxp.child_value(self.config, "maxmem")
- if maxmem is None:
- maxmem = self.memory
- xc.domain_setmaxmem(self.dom, maxmem_kb = maxmem * 1024)
+ self.bootloader = sxp.child_value(self.config, "bootloader")
def configure_console(self):
"""Configure the vm console port.
@@ -898,15 +779,15 @@ class XendDomainInfo:
for the given reason.
@param reason: shutdown reason
- @return 1 if needs restaert, 0 otherwise
+ @return True if needs restart, False otherwise
"""
if self.restart_mode == RESTART_NEVER:
- return 0
+ return False
if self.restart_mode == RESTART_ALWAYS:
- return 1
+ return True
if self.restart_mode == RESTART_ONREBOOT:
return reason == 'reboot'
- return 0
+ return False
def restart_cancel(self):
"""Cancel a vm restart.
@@ -923,6 +804,9 @@ class XendDomainInfo:
"""
return self.restart_state == STATE_RESTART_PENDING
+ def rebooting(self):
+ return self.restart_state == STATE_RESTART_BOOTING
+
def restart_check(self):
"""Check if domain restart is OK.
To prevent restart loops, raise an error if it is
@@ -937,20 +821,42 @@ class XendDomainInfo:
log.error(msg)
raise VmError(msg)
self.restart_time = tnow
+ self.restart_count += 1
def restart(self):
"""Restart the domain after it has exited.
Reuses the domain id and console port.
- @return: deferred
"""
try:
+ self.state = STATE_VM_OK
+ self.shutdown_pending = None
self.restart_check()
self.restart_state = STATE_RESTART_BOOTING
- d = self.construct(self.config)
+ if self.bootloader:
+ self.config = self.bootloader_config()
+ self.construct(self.config)
+ self.saveDB()
finally:
self.restart_state = None
- return d
+
+ def bootloader_config(self):
+ # if we're restarting with a bootloader, we need to run it
+ # FIXME: this assumes the disk is the first device and
+ # that we're booting from the first disk
+ blcfg = None
+ # FIXME: this assumes that we want to use the first disk
+ dev = sxp.child_value(self.config, "device")
+ if dev:
+ disk = sxp.child_value(dev, "uname")
+ fn = blkdev_uname_to_file(disk)
+ blcfg = bootloader(self.bootloader, fn, 1, self.vcpus)
+ if blcfg is None:
+ msg = "Had a bootloader specified, but can't find disk"
+ log.error(msg)
+ raise VmError(msg)
+ config = sxp.merge(['vm', blconfig ], self.config)
+ return config
def configure_backends(self):
"""Set configuration flags if the vm is a backend for netif or blkif.
@@ -960,71 +866,70 @@ class XendDomainInfo:
v = sxp.child0(c)
name = sxp.name(v)
if name == 'blkif':
- self.blkif_backend = 1
+ self.blkif_backend = True
elif name == 'netif':
- self.netif_backend = 1
+ self.netif_backend = True
+ elif name == 'usbif':
+ self.usbif_backend = True
else:
raise VmError('invalid backend type:' + str(name))
def configure(self):
"""Configure a vm.
- @return: deferred - calls callback with vm
"""
- d = self.create_devices()
- d.addCallback(lambda x: self.create_blkif())
- d.addCallback(self._configure)
- return d
-
- def _configure(self, val):
- d = self.configure_fields()
- def cbok(results):
- return self
- def cberr(err):
- self.destroy()
- return err
- d.addCallback(cbok)
- d.addErrback(cberr)
- return d
+ self.configure_fields()
+ self.create_console()
+ self.create_devices()
+ self.create_blkif()
+
+ def create_console(self):
+ console = self.getConsole()
+ if not console:
+ config = ['console']
+ if self.console_port:
+ config.append(['console_port', self.console_port])
+ console = self.createDevice('console', config)
+ return console
+
+ def getConsole(self):
+ console_ctrl = self.getDeviceController("console", error=False)
+ if console_ctrl:
+ return console_ctrl.getDevice(0)
+ return None
def create_blkif(self):
"""Create the block device interface (blkif) for the vm.
The vm needs a blkif even if it doesn't have any disks
at creation time, for example when it uses NFS root.
- @return: deferred
"""
- if self.get_devices("vbd") == []:
- ctrl = xend.blkif_create(self.dom, recreate=self.recreate)
- back = ctrl.getBackendInterface(0)
- return back.connect(recreate=self.recreate)
- else:
- return None
+ blkif = self.getDeviceController("vbd", error=False)
+ if not blkif:
+ blkif = self.createDeviceController("vbd")
+ backend = blkif.getBackend(0)
+ backend.connect(recreate=self.recreate)
def dom_construct(self, dom, config):
"""Construct a vm for an existing domain.
@param dom: domain id
@param config: domain configuration
- @return: deferred
"""
d = dom_get(dom)
if not d:
raise VmError("Domain not found: %d" % dom)
try:
- self.restore = 1
+ self.restore = True
self.setdom(dom)
- #self.name = d['name']
self.memory = d['mem_kb']/1024
- deferred = self.construct(config)
+ self.construct(config)
finally:
- self.restore = 0
- return deferred
+ self.restore = False
def configure_fields(self):
"""Process the vm configuration fields using the registered handlers.
"""
- dlist = []
index = {}
for field in sxp.children(self.config):
field_name = sxp.name(field)
@@ -1033,157 +938,35 @@ class XendDomainInfo:
# Ignore unknown fields. Warn?
if field_handler:
v = field_handler(self, self.config, field, field_index)
- append_deferred(dlist, v)
else:
log.warning("Unknown config field %s", field_name)
index[field_name] = field_index + 1
- d = defer.DeferredList(dlist, fireOnOneErrback=1)
- d.addErrback(dlist_err)
- return d
-
-def vm_image_linux(vm, image):
- """Create a VM for a linux image.
-
- @param name: vm name
- @param memory: vm memory
- @param image: image config
- @return: vm
- """
- kernel = sxp.child_value(image, "kernel")
- cmdline = ""
- ip = sxp.child_value(image, "ip", None)
- if ip:
- cmdline += " ip=" + ip
- root = sxp.child_value(image, "root")
- if root:
- cmdline += " root=" + root
- args = sxp.child_value(image, "args")
- if args:
- cmdline += " " + args
- ramdisk = sxp.child_value(image, "ramdisk", '')
- vm.create_domain("linux", kernel, ramdisk, cmdline)
- return vm
-
-def vm_image_plan9(vm, image):
- """Create a VM for a Plan 9 image.
-
- name vm name
- memory vm memory
- image image config
-
- returns vm
- """
- #todo: Same as for linux. Is that right? If so can unify them.
- kernel = sxp.child_value(image, "kernel")
- cmdline = ""
- ip = sxp.child_value(image, "ip", "dhcp")
- if ip:
- cmdline += "ip=" + ip
- root = sxp.child_value(image, "root")
- if root:
- cmdline += "root=" + root
- args = sxp.child_value(image, "args")
- if args:
- cmdline += " " + args
- ramdisk = sxp.child_value(image, "ramdisk", '')
- vifs = vm.config_devices("vif")
- vm.create_domain("plan9", kernel, ramdisk, cmdline)
- return vm
-
-
-
-def vm_dev_vif(vm, val, index, change=0):
- """Create a virtual network interface (vif).
-
- @param vm: virtual machine
- @param val: vif config
- @param index: vif index
- @return: deferred
- """
- vif = vm.next_device_index('vif')
- vmac = sxp.child_value(val, "mac")
- ctrl = xend.netif_create(vm.dom, recreate=vm.recreate)
- log.debug("Creating vif dom=%d vif=%d mac=%s", vm.dom, vif, str(vmac))
- recreate = vm.get_device_recreate('vif', index)
- defer = ctrl.attachDevice(vif, val, recreate=recreate)
- def cbok(dev):
- dev.vifctl('up', vmname=vm.name)
- dev.setIndex(index)
- vm.add_device('vif', dev)
- if change:
- dev.interfaceChanged()
- return dev
- defer.addCallback(cbok)
- return defer
-
-def vm_dev_vbd(vm, val, index, change=0):
- """Create a virtual block device (vbd).
-
- @param vm: virtual machine
- @param val: vbd config
- @param index: vbd index
- @return: deferred
- """
- idx = vm.next_device_index('vbd')
- uname = sxp.child_value(val, 'uname')
- log.debug("Creating vbd dom=%d uname=%s", vm.dom, uname)
- ctrl = xend.blkif_create(vm.dom, recreate=vm.recreate)
- recreate = vm.get_device_recreate('vbd', index)
- defer = ctrl.attachDevice(idx, val, recreate=recreate)
- def cbok(dev):
- dev.setIndex(index)
- vm.add_device('vbd', dev)
- if change:
- dev.interfaceChanged()
- return dev
- defer.addCallback(cbok)
- return defer
-
-def parse_pci(val):
- """Parse a pci field.
- """
- if isinstance(val, types.StringType):
- radix = 10
- if val.startswith('0x') or val.startswith('0X'):
- radix = 16
- v = int(val, radix)
- else:
- v = val
- return v
-
-def vm_dev_pci(vm, val, index, change=0):
- """Add a pci device.
-
- @param vm: virtual machine
- @param val: device configuration
- @param index: device index
- @return: 0 on success
- """
- bus = sxp.child_value(val, 'bus')
- if not bus:
- raise VmError('pci: Missing bus')
- dev = sxp.child_value(val, 'dev')
- if not dev:
- raise VmError('pci: Missing dev')
- func = sxp.child_value(val, 'func')
- if not func:
- raise VmError('pci: Missing func')
- try:
- bus = parse_pci(bus)
- dev = parse_pci(dev)
- func = parse_pci(func)
- except:
- raise VmError('pci: invalid parameter')
- log.debug("Creating pci device dom=%d bus=%x dev=%x func=%x", vm.dom, bus, dev, func)
- rc = xc.physdev_pci_access_modify(dom=vm.dom, bus=bus, dev=dev,
- func=func, enable=1)
- if rc < 0:
- #todo non-fatal
- raise VmError('pci: Failed to configure device: bus=%s dev=%s func=%s' %
- (bus, dev, func))
- return rc
-
+ def mem_target_set(self, target):
+ """Set domain memory target in pages.
+ """
+ if self.channel:
+ msg = messages.packMsg('mem_request_t', { 'target' : target * (1 << 8)} )
+ self.channel.writeRequest(msg)
+
+ def shutdown(self, reason, key=0):
+ msgtype = shutdown_messages.get(reason)
+ if not msgtype:
+ raise XendError('invalid reason:' + reason)
+ extra = {}
+ if reason == 'sysrq':
+ extra['key'] = key
+ if self.channel:
+ msg = messages.packMsg(msgtype, extra)
+ self.channel.writeRequest(msg)
+ if not reason in ['suspend', 'sysrq']:
+ self.shutdown_pending = {'start':time.time(), 'reason':reason,
+ 'key':key}
+
+ def shutdown_time_left(self, timeout):
+ if not self.shutdown_pending:
+ return 0
+ return timeout - (time.time() - self.shutdown_pending['start'])
def vm_field_ignore(vm, config, val, index):
"""Dummy config field handler used for fields with built-in handling.
@@ -1210,16 +993,20 @@ def vm_field_maxmem(vm, config, val, index):
maxmem = int(maxmem)
except:
raise VmError("invalid maxmem: " + str(maxmem))
- xc.domain_setmaxmem(vm.dom, maxmem_kb = maxmem * 1024)
+ xc.domain_setmaxmem(vm.id, maxmem_kb = maxmem * 1024)
+#============================================================================
# Register image handlers.
-add_image_handler('linux', vm_image_linux)
-add_image_handler('plan9', vm_image_plan9)
+from image import \
+ addImageHandlerClass, \
+ ImageHandler, \
+ LinuxImageHandler, \
+ Plan9ImageHandler, \
+ VmxImageHandler
-# Register device handlers.
-add_device_handler('vif', vm_dev_vif)
-add_device_handler('vbd', vm_dev_vbd)
-add_device_handler('pci', vm_dev_pci)
+addImageHandlerClass(LinuxImageHandler)
+addImageHandlerClass(Plan9ImageHandler)
+addImageHandlerClass(VmxImageHandler)
# Ignore the fields we already handle.
add_config_handler('name', vm_field_ignore)
@@ -1231,6 +1018,32 @@ add_config_handler('restart', vm_field_ignore)
add_config_handler('image', vm_field_ignore)
add_config_handler('device', vm_field_ignore)
add_config_handler('backend', vm_field_ignore)
+add_config_handler('vcpus', vm_field_ignore)
+add_config_handler('bootloader', vm_field_ignore)
# Register other config handlers.
add_config_handler('maxmem', vm_field_maxmem)
+
+#============================================================================
+# Register device controllers and their device config types.
+
+from server import console
+controller.addDevControllerClass("console", console.ConsoleController)
+
+from server import blkif
+controller.addDevControllerClass("vbd", blkif.BlkifController)
+add_device_handler("vbd", "vbd")
+
+from server import netif
+controller.addDevControllerClass("vif", netif.NetifController)
+add_device_handler("vif", "vif")
+
+from server import pciif
+controller.addDevControllerClass("pci", pciif.PciController)
+add_device_handler("pci", "pci")
+
+from xen.xend.server import usbif
+controller.addDevControllerClass("usb", usbif.UsbifController)
+add_device_handler("usb", "usb")
+
+#============================================================================
diff --git a/tools/python/xen/xend/XendMigrate.py b/tools/python/xen/xend/XendMigrate.py
deleted file mode 100644
index f2570ae02b..0000000000
--- a/tools/python/xen/xend/XendMigrate.py
+++ /dev/null
@@ -1,555 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-import traceback
-
-import errno
-import sys
-import socket
-import time
-import types
-
-from twisted.internet import reactor
-from twisted.internet import defer
-#defer.Deferred.debug = 1
-from twisted.internet.protocol import Protocol
-from twisted.internet.protocol import ClientFactory
-from twisted.python.failure import Failure
-
-import sxp
-import XendDB
-import EventServer; eserver = EventServer.instance()
-from XendError import XendError
-from XendLogging import log
-
-"""The port for the migrate/save daemon xfrd."""
-XFRD_PORT = 8002
-
-"""The transfer protocol major version number."""
-XFR_PROTO_MAJOR = 1
-"""The transfer protocol minor version number."""
-XFR_PROTO_MINOR = 0
-
-class Xfrd(Protocol):
- """Protocol handler for a connection to the migration/save daemon xfrd.
- """
-
- def __init__(self, xinfo):
- self.parser = sxp.Parser()
- self.xinfo = xinfo
-
- def connectionMade(self):
- # Send hello.
- self.request(['xfr.hello', XFR_PROTO_MAJOR, XFR_PROTO_MINOR])
- # Send request.
- self.xinfo.request(self)
-
- def request(self, req):
- sxp.show(req, out=self.transport)
-
- def loseConnection(self):
- self.transport.loseConnection()
-
- def connectionLost(self, reason):
- self.xinfo.connectionLost(reason)
-
- def dataReceived(self, data):
- self.parser.input(data)
- if self.parser.ready():
- val = self.parser.get_val()
- self.xinfo.dispatch(self, val)
- if self.parser.at_eof():
- self.loseConnection()
-
-
-class XfrdClientFactory(ClientFactory):
- """Factory for clients of the migration/save daemon xfrd.
- """
-
- def __init__(self, xinfo):
- #ClientFactory.__init__(self)
- self.xinfo = xinfo
-
- def startedConnecting(self, connector):
- pass
-
- def buildProtocol(self, addr):
- return Xfrd(self.xinfo)
-
- def clientConnectionLost(self, connector, reason):
- pass
-
- def clientConnectionFailed(self, connector, reason):
- self.xinfo.error(reason)
-
-class XfrdInfo:
- """Abstract class for info about a session with xfrd.
- Has subclasses for save and migrate.
- """
-
- """Suspend timeout (seconds).
- We set a timeout because suspending a domain can hang."""
- timeout = 10
-
- def __init__(self):
- from xen.xend import XendDomain
- self.xd = XendDomain.instance()
- self.deferred = defer.Deferred()
- self.suspended = {}
- self.paused = {}
- self.state = 'init'
- # List of errors encountered.
- self.errors = []
-
- def vmconfig(self):
- dominfo = self.xd.domain_get(self.src_dom)
- if dominfo:
- val = sxp.to_string(dominfo.sxpr())
- else:
- val = None
- return val
-
- def add_error(self, err):
- """Add an error to the error list.
- Returns the error added (which may have been unwrapped if it
- was a Twisted Failure).
- """
- while isinstance(err, Failure):
- err = err.value
- if err not in self.errors:
- self.errors.append(err)
- return err
-
- def error_summary(self, msg=None):
- """Get a XendError summarising the errors (if any).
- """
- if msg is None:
- msg = "errors"
- if self.errors:
- errmsg = msg + ': ' + ', '.join(map(str, self.errors))
- else:
- errmsg = msg
- return XendError(errmsg)
-
- def get_errors(self):
- """Get the list of errors.
- """
- return self.errors
-
- def error(self, err):
- self.state = 'error'
- self.add_error(err)
- if not self.deferred.called:
- self.deferred.errback(self.error_summary())
-
- def dispatch(self, xfrd, val):
-
- def cbok(v):
- if v is None: return
- sxp.show(v, out=xfrd.transport)
-
- def cberr(err):
- v = ['xfr.err', errno.EINVAL]
- sxp.show(v, out=xfrd.transport)
- self.error(err)
-
- op = sxp.name(val)
- op = op.replace('.', '_')
- if op.startswith('xfr_'):
- fn = getattr(self, op, self.unknown)
- else:
- fn = self.unknown
- val = fn(xfrd, val)
- if isinstance(val, defer.Deferred):
- val.addCallback(cbok)
- val.addErrback(cberr)
- else:
- cbok(val)
-
- def unknown(self, xfrd, val):
- xfrd.loseConnection()
- return None
-
- def xfr_err(self, xfrd, val):
- # If we get an error with non-zero code the operation failed.
- # An error with code zero indicates hello success.
- v = sxp.child0(val)
- err = int(sxp.child0(val))
- if not err: return
- self.error("transfer daemon (xfrd) error: " + str(err))
- xfrd.loseConnection()
- return None
-
- def xfr_progress(self, xfrd, val):
- return None
-
- def xfr_vm_destroy(self, xfrd, val):
- try:
- vmid = sxp.child0(val)
- val = self.xd.domain_destroy(vmid)
- if vmid in self.paused:
- del self.paused[vmid]
- if vmid in self.suspended:
- del self.suspended[vmid]
- except StandardError, err:
- self.add_error("vm_destroy failed")
- self.add_error(err)
- val = errno.EINVAL
- return ['xfr.err', val]
-
- def xfr_vm_pause(self, xfrd, val):
- try:
- vmid = sxp.child0(val)
- val = self.xd.domain_pause(vmid)
- self.paused[vmid] = 1
- except StandardError, err:
- self.add_error("vm_pause failed")
- self.add_error(err)
- val = errno.EINVAL
- return ['xfr.err', val]
-
- def xfr_vm_unpause(self, xfrd, val):
- try:
- vmid = sxp.child0(val)
- val = self.xd.domain_unpause(vmid)
- if vmid in self.paused:
- del self.paused[vmid]
- except StandardError, err:
- self.add_error("vm_unpause failed")
- self.add_error(err)
- val = errno.EINVAL
- return ['xfr.err', val]
-
- def xfr_vm_suspend(self, xfrd, val):
- """Suspend a domain. Suspending takes time, so we return
- a Deferred that is called when the suspend completes.
- Suspending can hang, so we set a timeout and fail if it
- takes too long.
- """
- try:
- vmid = sxp.child0(val)
- d = defer.Deferred()
- # Subscribe to 'suspended' events so we can tell when the
- # suspend completes. Subscribe to 'died' events so we can tell if
- # the domain died. Set a timeout and error handler so the subscriptions
- # will be cleaned up if suspending hangs or there is an error.
- def onSuspended(e, v):
- if v[1] != vmid: return
- subscribe(on=0)
- if not d.called:
- d.callback(v)
-
- def onDied(e, v):
- if v[1] != vmid: return
- if not d.called:
- d.errback(XendError('Domain %s died while suspending' % vmid))
-
- def subscribe(on=1):
- if on:
- action = eserver.subscribe
- else:
- action = eserver.unsubscribe
- action('xend.domain.suspended', onSuspended)
- action('xend.domain.died', onDied)
-
- def cberr(err):
- subscribe(on=0)
- self.add_error("suspend failed")
- self.add_error(err)
- return err
-
- d.addErrback(cberr)
- d.setTimeout(self.timeout)
- subscribe()
- val = self.xd.domain_shutdown(vmid, reason='suspend')
- self.suspended[vmid] = 1
- return d
- except Exception, err:
- self.add_error("suspend failed")
- self.add_error(err)
- traceback.print_exc()
- val = errno.EINVAL
- return ['xfr.err', val]
-
- def connectionLost(self, reason=None):
- for vmid in self.suspended:
- try:
- self.xd.domain_destroy(vmid)
- except:
- pass
- for vmid in self.paused:
- try:
- self.xd.domain_unpause(vmid)
- except:
- pass
-
-class XendMigrateInfo(XfrdInfo):
- """Representation of a migrate in-progress and its interaction with xfrd.
- """
-
- def __init__(self, xid, dominfo, host, port, live=0, resource=0):
- XfrdInfo.__init__(self)
- self.xid = xid
- self.dominfo = dominfo
- self.state = 'begin'
- self.src_host = socket.gethostname()
- self.src_dom = dominfo.id
- self.dst_host = host
- self.dst_port = port
- self.dst_dom = None
- self.live = live
- self.resource = resource
- self.start = 0
-
- def sxpr(self):
- sxpr = ['migrate',
- ['id', self.xid ],
- ['state', self.state ],
- ['live', self.live ],
- ['resource', self.resource ] ]
- sxpr_src = ['src', ['host', self.src_host], ['domain', self.src_dom] ]
- sxpr.append(sxpr_src)
- sxpr_dst = ['dst', ['host', self.dst_host] ]
- if self.dst_dom:
- sxpr_dst.append(['domain', self.dst_dom])
- sxpr.append(sxpr_dst)
- return sxpr
-
- def request(self, xfrd):
- vmconfig = self.vmconfig()
- if not vmconfig:
- self.error(XendError("vm config not found"))
- xfrd.loseConnection()
- return
- log.info('Migrate BEGIN: %s' % str(self.sxpr()))
- eserver.inject('xend.domain.migrate',
- [ self.dominfo.name, self.dominfo.id, "begin", self.sxpr() ])
- xfrd.request(['xfr.migrate',
- self.src_dom,
- vmconfig,
- self.dst_host,
- self.dst_port,
- self.live,
- self.resource ])
-
- def xfr_migrate_ok(self, xfrd, val):
- dom = int(sxp.child0(val))
- self.state = 'ok'
- self.dst_dom = dom
- self.xd.domain_destroy(self.src_dom)
- if not self.deferred.called:
- self.deferred.callback(self)
-
- def connectionLost(self, reason=None):
- XfrdInfo.connectionLost(self, reason)
- if self.state =='ok':
- log.info('Migrate OK: ' + str(self.sxpr()))
- else:
- self.state = 'error'
- self.error("migrate failed")
- log.info('Migrate ERROR: ' + str(self.sxpr()))
- eserver.inject('xend.domain.migrate',
- [ self.dominfo.name, self.dominfo.id, self.state, self.sxpr() ])
-
-class XendSaveInfo(XfrdInfo):
- """Representation of a save in-progress and its interaction with xfrd.
- """
-
- def __init__(self, xid, dominfo, file):
- XfrdInfo.__init__(self)
- self.xid = xid
- self.dominfo = dominfo
- self.state = 'begin'
- self.src_dom = dominfo.id
- self.file = file
- self.start = 0
-
- def sxpr(self):
- sxpr = ['save',
- ['id', self.xid],
- ['state', self.state],
- ['domain', self.src_dom],
- ['file', self.file] ]
- return sxpr
-
- def request(self, xfrd):
- vmconfig = self.vmconfig()
- if not vmconfig:
- self.error(XendError("vm config not found"))
- xfrd.loseConnection()
- return
- log.info('Save BEGIN: ' + str(self.sxpr()))
- eserver.inject('xend.domain.save',
- [ self.dominfo.name, self.dominfo.id,
- "begin", self.sxpr() ])
- xfrd.request(['xfr.save', self.src_dom, vmconfig, self.file ])
-
- def xfr_save_ok(self, xfrd, val):
- self.state = 'ok'
- self.xd.domain_destroy(self.src_dom)
- if not self.deferred.called:
- self.deferred.callback(self)
-
- def connectionLost(self, reason=None):
- XfrdInfo.connectionLost(self, reason)
- if self.state =='ok':
- log.info('Save OK: ' + str(self.sxpr()))
- else:
- self.state = 'error'
- self.error("save failed")
- log.info('Save ERROR: ' + str(self.sxpr()))
- eserver.inject('xend.domain.save',
- [ self.dominfo.name, self.dominfo.id,
- self.state, self.sxpr() ])
-
-class XendRestoreInfo(XfrdInfo):
- """Representation of a restore in-progress and its interaction with xfrd.
- """
-
- def __init__(self, xid, file):
- XfrdInfo.__init__(self)
- self.xid = xid
- self.state = 'begin'
- self.file = file
-
- def sxpr(self):
- sxpr = ['restore',
- ['id', self.xid],
- ['file', self.file] ]
- return sxpr
-
- def request(self, xfrd):
- log.info('restore BEGIN: ' + str(self.sxpr()))
- eserver.inject('xend.restore', [ 'begin', self.sxpr()])
-
- xfrd.request(['xfr.restore', self.file ])
-
- def xfr_restore_ok(self, xfrd, val):
- dom = int(sxp.child0(val))
- dominfo = self.xd.domain_get(dom)
- self.state = 'ok'
- if not self.deferred.called:
- self.deferred.callback(dominfo)
-
- def connectionLost(self, reason=None):
- XfrdInfo.connectionLost(self, reason)
- if self.state =='ok':
- log.info('Restore OK: ' + self.file)
- else:
- self.state = 'error'
- self.error("restore failed")
- log.info('Restore ERROR: ' + str(self.sxpr()))
- eserver.inject('xend.restore', [ self.state, self.sxpr()])
-
-class XendMigrate:
- """External api for interaction with xfrd for migrate and save.
- Singleton.
- """
- # Use log for indications of begin/end/errors?
- # Need logging of: domain create/halt, migrate begin/end/fail
- # Log via event server?
-
- dbpath = "migrate"
-
- def __init__(self):
- self.db = XendDB.XendDB(self.dbpath)
- self.session = {}
- self.session_db = self.db.fetchall("")
- self.xid = 0
-
- def nextid(self):
- self.xid += 1
- return "%d" % self.xid
-
- def sync(self):
- self.db.saveall("", self.session_db)
-
- def sync_session(self, xid):
- self.db.save(xid, self.session_db[xid])
-
- def close(self):
- pass
-
- def _add_session(self, info):
- xid = info.xid
- self.session[xid] = info
- self.session_db[xid] = info.sxpr()
- self.sync_session(xid)
-
- def _delete_session(self, xid):
- if xid in self.session:
- del self.session[xid]
- if xid in self.session_db:
- del self.session_db[xid]
- self.db.delete(xid)
-
- def session_ls(self):
- return self.session.keys()
-
- def sessions(self):
- return self.session.values()
-
- def session_get(self, xid):
- return self.session.get(xid)
-
- def session_begin(self, info):
- """Add the session to the table and start it.
- Set up callbacks to remove the session from the table
- when it finishes.
-
- @param info: session
- @return: deferred
- """
- dfr = defer.Deferred()
- def cbok(val):
- self._delete_session(info.xid)
- if not dfr.called:
- dfr.callback(val)
- return val
- def cberr(err):
- self._delete_session(info.xid)
- if not dfr.called:
- dfr.errback(err)
- return err
- self._add_session(info)
- info.deferred.addCallback(cbok)
- info.deferred.addErrback(cberr)
- xcf = XfrdClientFactory(info)
- reactor.connectTCP('localhost', XFRD_PORT, xcf)
- return dfr
-
- def migrate_begin(self, dominfo, host, port=XFRD_PORT, live=0, resource=0):
- """Begin to migrate a domain to another host.
-
- @param dominfo: domain info
- @param host: destination host
- @param port: destination port
- @return: deferred
- """
- xid = self.nextid()
- info = XendMigrateInfo(xid, dominfo, host, port, live, resource)
- return self.session_begin(info)
-
- def save_begin(self, dominfo, file):
- """Begin saving a domain to file.
-
- @param dominfo: domain info
- @param file: destination file
- @return: deferred
- """
- xid = self.nextid()
- info = XendSaveInfo(xid, dominfo, file)
- return self.session_begin(info)
-
- def restore_begin(self, file):
- xid = self.nextid()
- info = XendRestoreInfo(xid, file)
- return self.session_begin(info)
-
-
-def instance():
- global inst
- try:
- inst
- except:
- inst = XendMigrate()
- return inst
diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py
index 5fff0f62dd..d915f0ba0f 100644
--- a/tools/python/xen/xend/XendNode.py
+++ b/tools/python/xen/xend/XendNode.py
@@ -30,12 +30,6 @@ class XendNode:
def cpu_bvt_slice_get(self):
return self.xc.bvtsched_global_get()
- def cpu_rrobin_slice_set(self, slice):
- return self.xc.rrobin_global_set(slice=slice)
-
- def cpu_rrobin_slice_get(self):
- return self.xc.rrobin_global_get()
-
def info(self):
return self.nodeinfo() + self.physinfo()
diff --git a/tools/python/xen/xend/XendProtocol.py b/tools/python/xen/xend/XendProtocol.py
index db4de7940f..bd936741ff 100644
--- a/tools/python/xen/xend/XendProtocol.py
+++ b/tools/python/xen/xend/XendProtocol.py
@@ -1,11 +1,14 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+import socket
import httplib
import types
from encode import *
import sxp
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+
DEBUG = 0
HTTP_OK = 200
@@ -122,12 +125,19 @@ class XendClientProtocol:
"""
raise NotImplementedError()
-class SynchXendClientProtocol(XendClientProtocol):
+class HttpXendClientProtocol(XendClientProtocol):
"""A synchronous xend client. This will make a request, wait for
the reply and return the result.
"""
resp = None
+ request = None
+
+ def makeConnection(self, url):
+ return httplib.HTTPConnection(url.location())
+
+ def makeRequest(self, url, method, args):
+ return XendRequest(url, method, args)
def xendRequest(self, url, method, args=None):
"""Make a request to xend.
@@ -136,8 +146,8 @@ class SynchXendClientProtocol(XendClientProtocol):
@param method: http method: POST or GET
@param args: request arguments (dict)
"""
- self.request = XendRequest(url, method, args)
- conn = httplib.HTTPConnection(url.location())
+ self.request = self.makeRequest(url, method, args)
+ conn = self.makeConnection(url)
if DEBUG: conn.set_debuglevel(1)
conn.request(method, url.fullpath(), self.request.data, self.request.headers)
resp = conn.getresponse()
@@ -154,3 +164,27 @@ class SynchXendClientProtocol(XendClientProtocol):
def getHeader(self, key):
return self.resp.getheader(key)
+class UnixConnection(httplib.HTTPConnection):
+ """Subclass of Python library HTTPConnection that uses a unix-domain socket.
+ """
+
+ def __init__(self, path):
+ httplib.HTTPConnection.__init__(self, 'localhost')
+ self.path = path
+
+ def connect(self):
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock.connect(self.path)
+ self.sock = sock
+
+class UnixXendClientProtocol(HttpXendClientProtocol):
+ """A synchronous xend client using a unix-domain socket.
+ """
+
+ def __init__(self, path=None):
+ if path is None:
+ path = xroot.get_xend_unix_path()
+ self.path = path
+
+ def makeConnection(self, url):
+ return UnixConnection(self.path)
diff --git a/tools/python/xen/xend/XendRoot.py b/tools/python/xen/xend/XendRoot.py
index 5623381f1d..045a5a5fa4 100644
--- a/tools/python/xen/xend/XendRoot.py
+++ b/tools/python/xen/xend/XendRoot.py
@@ -15,6 +15,7 @@ import sys
import EventServer
from XendLogging import XendLogging
+from XendError import XendError
# Initial create of the event server.
eserver = EventServer.instance()
@@ -24,9 +25,6 @@ import sxp
class XendRoot:
"""Root of the management classes."""
- """Default path to the root of the database."""
- dbroot_default = "/var/lib/xen/xend-db"
-
"""Default path to the config file."""
config_default = "/etc/xen/xend-config.sxp"
@@ -45,16 +43,34 @@ class XendRoot:
"""Default level of information to be logged."""
loglevel_default = 'DEBUG'
+ """Default for the flag indicating whether xend should run an http server."""
+ xend_http_server_default = 'no'
+
"""Default interface address xend listens at. """
xend_address_default = ''
+ """Default for the flag indicating whether xend should run a relocation server."""
+ xend_relocation_server_default = 'yes'
+
+ """Default interface address the xend relocation server listens at. """
+ xend_relocation_address_default = ''
+
"""Default port xend serves HTTP at. """
xend_port_default = '8000'
"""Default port xend serves events at. """
xend_event_port_default = '8001'
- """Default inteface address xend listens at for consoles."""
+ """Default port xend serves relocation at. """
+ xend_relocation_port_default = '8002'
+
+ """Default for the flag indicating whether xend should run a unix-domain server."""
+ xend_unix_server_default = 'yes'
+
+ """Default path the unix-domain server listens at."""
+ xend_unix_path_default = '/var/lib/xend/xend-socket'
+
+ """Default interface address xend listens at for consoles."""
console_address_default = 'localhost'
"""Default port xend serves consoles at. """
@@ -63,7 +79,6 @@ class XendRoot:
components = {}
def __init__(self):
- self.dbroot = None
self.config_path = None
self.config = None
self.logging = None
@@ -152,13 +167,16 @@ class XendRoot:
def configure(self):
self.set_config()
self.configure_logger()
- self.dbroot = self.get_config_value("dbroot", self.dbroot_default)
def configure_logger(self):
logfile = self.get_config_value("logfile", self.logfile_default)
loglevel = self.get_config_value("loglevel", self.loglevel_default)
self.logging = XendLogging(logfile, level=loglevel)
+ from xen.xend.server import params
+ if params.XEND_DEBUG:
+ self.logging.addLogStderr()
+
def get_logging(self):
"""Get the XendLogging instance.
"""
@@ -169,11 +187,6 @@ class XendRoot:
"""
return self.logging and self.logging.getLogger()
- def get_dbroot(self):
- """Get the path to the database root.
- """
- return self.dbroot
-
def set_config(self):
"""If the config file exists, read it. If not, ignore it.
@@ -219,15 +232,45 @@ class XendRoot:
"""
return sxp.child_value(self.config, name, val=val)
+ def get_config_bool(self, name, val=None):
+ v = self.get_config_value(name, val)
+ if v in ['yes', '1', 'on', 'true', 1, True]:
+ return True
+ if v in ['no', '0', 'off', 'false', 0, False]:
+ return False
+ raise XendError("invalid xend config %s: expected bool: %s" % (name, v))
+
+ def get_config_int(self, name, val=None):
+ v = self.get_config_value(name, val)
+ try:
+ return int(v)
+ except Exception, ex:
+ raise XendError("invalid xend config %s: expected int: %s" % (name, v))
+
+ def get_xend_http_server(self):
+ """Get the flag indicating whether xend should run an http server.
+ """
+ return self.get_config_bool("xend-http-server", self.xend_http_server_default)
+
+ def get_xend_relocation_server(self):
+ """Get the flag indicating whether xend should run a relocation server.
+ """
+ return self.get_config_bool("xend-relocation-server", self.xend_relocation_server_default)
+
def get_xend_port(self):
"""Get the port xend listens at for its HTTP interface.
"""
- return int(self.get_config_value('xend-port', self.xend_port_default))
+ return self.get_config_int('xend-port', self.xend_port_default)
def get_xend_event_port(self):
"""Get the port xend listens at for connection to its event server.
"""
- return int(self.get_config_value('xend-event-port', self.xend_event_port_default))
+ return self.get_config_int('xend-event-port', self.xend_event_port_default)
+
+ def get_xend_relocation_port(self):
+ """Get the port xend listens at for connection to its relocation server.
+ """
+ return self.get_config_int('xend-relocation-port', self.xend_relocation_port_default)
def get_xend_address(self):
"""Get the address xend listens at for its HTTP and event ports.
@@ -237,6 +280,24 @@ class XendRoot:
"""
return self.get_config_value('xend-address', self.xend_address_default)
+ def get_xend_relocation_address(self):
+ """Get the address xend listens at for its relocation server port.
+ This defaults to the empty string which allows all hosts to connect.
+ If this is set to 'localhost' only the localhost will be able to connect
+ to the HTTP and event ports.
+ """
+ return self.get_config_value('xend-relocation-address', self.xend_relocation_address_default)
+
+ def get_xend_unix_server(self):
+ """Get the flag indicating whether xend should run a unix-domain server.
+ """
+ return self.get_config_bool("xend-unix-server", self.xend_unix_server_default)
+
+ def get_xend_unix_path(self):
+ """Get the path the xend unix-domain server listens at.
+ """
+ return self.get_config_value("xend-unix-path", self.xend_unix_path_default)
+
def get_console_address(self):
"""Get the address xend listens at for its console ports.
This defaults to 'localhost', allowing only the localhost to connect
@@ -248,7 +309,7 @@ class XendRoot:
def get_console_port_base(self):
"""Get the base port number used to generate console ports for domains.
"""
- return int(self.get_config_value('console-port-base', self.console_port_base_default))
+ return self.get_config_int('console-port-base', self.console_port_base_default)
def get_block_script(self, type):
return self.get_config_value('block-%s' % type, '')
@@ -256,6 +317,9 @@ class XendRoot:
def get_network_script(self):
return self.get_config_value('network-script', 'network')
+ def get_enable_dump(self):
+ return self.get_config_bool('enable-dump', 'no')
+
def get_vif_bridge(self):
return self.get_config_value('vif-bridge', 'xen-br0')
@@ -263,8 +327,7 @@ class XendRoot:
return self.get_config_value('vif-script', 'vif-bridge')
def get_vif_antispoof(self):
- v = self.get_config_value('vif-antispoof', 'yes')
- return v in ['yes', '1', 'on']
+ return self.get_config_bool('vif-antispoof', 'yes')
def instance():
"""Get an instance of XendRoot.
diff --git a/tools/python/xen/xend/XendVnet.py b/tools/python/xen/xend/XendVnet.py
index d95fd204aa..3614127c49 100644
--- a/tools/python/xen/xend/XendVnet.py
+++ b/tools/python/xen/xend/XendVnet.py
@@ -4,11 +4,10 @@
"""
from xen.util import Brctl
-
-import sxp
-import XendDB
-from XendError import XendError
-from XendLogging import log
+from xen.xend import sxp
+from xen.xend.XendError import XendError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore import XenNode, DBMap
def vnet_cmd(cmd):
out = None
@@ -63,14 +62,15 @@ class XendVnet:
"""Index of all vnets. Singleton.
"""
- dbpath = "vnet"
+ dbpath = "/vnet"
def __init__(self):
# Table of vnet info indexed by vnet id.
self.vnet = {}
- self.db = XendDB.XendDB(self.dbpath)
- vnets = self.db.fetchall("")
- for config in vnets.values():
+ self.dbmap = DBMap(db=XenNode(self.dbpath))
+ self.dbmap.readDB()
+ for vnetdb in self.dbmap.values():
+ config = vnetdb.config
info = XendVnetInfo(config)
self.vnet[info.id] = info
try:
@@ -115,7 +115,7 @@ class XendVnet:
"""
info = XendVnetInfo(config)
self.vnet[info.id] = info
- self.db.save(info.id, info.sxpr())
+ self.dbmap["%s/config" % info.id] = info.sxpr()
info.configure()
def vnet_delete(self, id):
@@ -126,7 +126,7 @@ class XendVnet:
info = self.vnet_get(id)
if info:
del self.vnet[id]
- self.db.delete(id)
+ self.dbmap.delete(id)
info.delete()
def instance():
diff --git a/tools/python/xen/xend/encode.py b/tools/python/xen/xend/encode.py
index 48815defa9..38c9351db7 100644
--- a/tools/python/xen/xend/encode.py
+++ b/tools/python/xen/xend/encode.py
@@ -14,8 +14,6 @@ import httplib
import random
import md5
-from xen.util.ip import _readline, _readlines
-
# Extract from HTML4 spec.
## The following example illustrates "multipart/form-data"
## encoding. Suppose we have the following form:
@@ -124,7 +122,7 @@ def encode_multipart(d):
out.write('"\r\n')
out.write('Content-Type: application/octet-stream\r\n')
out.write('\r\n')
- for l in _readlines(v):
+ for l in v.readlines():
out.write(l)
else:
out.write('Content-Disposition: form-data; name="')
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
new file mode 100644
index 0000000000..e0d70581bf
--- /dev/null
+++ b/tools/python/xen/xend/image.py
@@ -0,0 +1,339 @@
+import os
+
+import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+from xen.xend import sxp
+from xen.xend.XendError import VmError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore import DBVar
+
+class ImageHandler:
+ """Abstract base class for image handlers.
+
+ initDomain() is called to initialise the domain memory.
+
+ createImage() is called to configure and build the domain from its
+ kernel image and ramdisk etc.
+
+ The method buildDomain() is used to build the domain, and must be
+ defined in a subclass. Usually this is the only method that needs
+ defining in a subclass.
+
+ The method createDeviceModel() is called to create the domain device
+ model if it needs one. The default is to do nothing.
+
+ The method destroy() is called when the domain is destroyed.
+ The default is to do nothing.
+
+ """
+
+ #======================================================================
+ # Class vars and methods.
+
+ """Table of image handler classes for virtual machine images.
+ Indexed by image type.
+ """
+ imageHandlerClasses = {}
+
+ def addImageHandlerClass(cls, h):
+ """Add a handler class for an image type
+ @param h: handler: ImageHandler subclass
+ """
+ cls.imageHandlerClasses[h.ostype] = h
+
+ addImageHandlerClass = classmethod(addImageHandlerClass)
+
+ def findImageHandlerClass(cls, image):
+ """Find the image handler class for an image config.
+
+ @param image config
+ @return ImageHandler subclass or None
+ """
+ ty = sxp.name(image)
+ if ty is None:
+ raise VmError('missing image type')
+ imageClass = cls.imageHandlerClasses.get(ty)
+ if imageClass is None:
+ raise VmError('unknown image type: ' + ty)
+ return imageClass
+
+ findImageHandlerClass = classmethod(findImageHandlerClass)
+
+ def create(cls, vm, image):
+ """Create an image handler for a vm.
+
+ @param vm vm
+ @param image image config
+ @return ImageHandler instance
+ """
+ imageClass = cls.findImageHandlerClass(image)
+ return imageClass(vm, image)
+
+ create = classmethod(create)
+
+ #======================================================================
+ # Instance vars and methods.
+
+ db = None
+ ostype = None
+
+ config = None
+ kernel = None
+ ramdisk = None
+ cmdline = None
+ flags = 0
+
+ __exports__ = [
+ DBVar('ostype', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('kernel', ty='str'),
+ DBVar('ramdisk', ty='str'),
+ DBVar('cmdline', ty='str'),
+ DBVar('flags', ty='int'),
+ ]
+
+ def __init__(self, vm, config):
+ self.vm = vm
+ self.db = vm.db.addChild('/image')
+ self.config = config
+
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
+
+ def unlink(self, f):
+ if not f: return
+ try:
+ os.unlink(f)
+ except OSError, ex:
+ log.warning("error removing bootloader file '%s': %s", f, ex)
+
+ def initDomain(self, dom, memory, cpu, cpu_weight):
+ """Initial domain create.
+
+ @return domain id
+ """
+
+ mem_kb = self.getDomainMemory(memory)
+ if not self.vm.restore:
+ dom = xc.domain_create(dom = dom or 0)
+ # if bootloader, unlink here. But should go after buildDomain() ?
+ if self.vm.bootloader:
+ self.unlink(self.kernel)
+ self.unlink(self.ramdisk)
+ if dom <= 0:
+ raise VmError('Creating domain failed: name=%s' % self.vm.name)
+ log.debug("initDomain: cpu=%d mem_kb=%d dom=%d", cpu, mem_kb, dom)
+ # xc.domain_setuuid(dom, uuid)
+ xc.domain_setcpuweight(dom, cpu_weight)
+ xc.domain_setmaxmem(dom, mem_kb)
+ xc.domain_memory_increase_reservation(dom, mem_kb)
+ if cpu != -1:
+ xc.domain_pincpu(dom, 0, 1<<int(cpu))
+ return dom
+
+ def createImage(self):
+ """Entry point to create domain memory image.
+ Override in subclass if needed.
+ """
+ self.configure()
+ self.createDomain()
+
+ def configure(self):
+ """Config actions common to all unix-like domains."""
+ self.kernel = sxp.child_value(self.config, "kernel")
+ self.cmdline = ""
+ ip = sxp.child_value(self.config, "ip", None)
+ if ip:
+ self.cmdline += " ip=" + ip
+ root = sxp.child_value(self.config, "root")
+ if root:
+ self.cmdline += " root=" + root
+ args = sxp.child_value(self.config, "args")
+ if args:
+ self.cmdline += " " + args
+ self.ramdisk = sxp.child_value(self.config, "ramdisk", '')
+
+ def createDomain(self):
+ """Build the domain boot image.
+ """
+ # Set params and call buildDomain().
+ self.flags = 0
+ if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN
+ if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN
+
+ if self.vm.recreate or self.vm.restore:
+ return
+ if not os.path.isfile(self.kernel):
+ raise VmError('Kernel image does not exist: %s' % self.kernel)
+ if self.ramdisk and not os.path.isfile(self.ramdisk):
+ raise VmError('Kernel ramdisk does not exist: %s' % self.ramdisk)
+ if len(self.cmdline) >= 256:
+ log.warning('kernel cmdline too long, domain %d', self.vm.getDomain())
+
+ log.info("buildDomain os=%s dom=%d vcpus=%d", self.ostype,
+ self.vm.getDomain(), self.vm.vcpus)
+ err = self.buildDomain()
+ if err != 0:
+ raise VmError('Building domain failed: ostype=%s dom=%d err=%d'
+ % (self.ostype, self.vm.getDomain(), err))
+
+ def getDomainMemory(self, mem_mb):
+ """Memory (in KB) the domain will need for mem_mb (in MB)."""
+ return mem_mb * 1024
+
+ def buildDomain(self):
+ """Build the domain. Define in subclass."""
+ raise NotImplementedError()
+
+ def createDeviceModel(self):
+ """Create device model for the domain (define in subclass if needed)."""
+ pass
+
+ def destroy(self):
+ """Extra cleanup on domain destroy (define in subclass if needed)."""
+ pass
+
+addImageHandlerClass = ImageHandler.addImageHandlerClass
+
+class LinuxImageHandler(ImageHandler):
+
+ ostype = "linux"
+
+ def buildDomain(self):
+ if self.vm.store_channel:
+ store_evtchn = self.vm.store_channel.port2
+ else:
+ store_evtchn = 0
+ ret = xc.linux_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = self.vm.channel.getRemotePort(),
+ store_evtchn = store_evtchn,
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags,
+ vcpus = self.vm.vcpus)
+ if isinstance(ret, dict):
+ self.vm.store_mfn = ret.get('store_mfn')
+ return 0
+ return ret
+
+class Plan9ImageHandler(ImageHandler):
+
+ ostype = "plan9"
+
+ def buildDomain(self):
+ return xc.plan9_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = self.vm.channel.getRemotePort(),
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags,
+ vcpus = self.vm.vcpus)
+
+class VmxImageHandler(ImageHandler):
+
+ __exports__ = ImageHandler.__exports__ + [
+ DBVar('memmap', ty='str'),
+ DBVar('memmap_value', ty='sxpr'),
+ # device channel?
+ ]
+
+ ostype = "vmx"
+ memmap = None
+ memmap_value = None
+ device_channel = None
+
+ def createImage(self):
+ """Create a VM for the VMX environment.
+ """
+ self.configure()
+ self.parseMemmap()
+ self.createDomain()
+
+ def buildDomain(self):
+ return xc.vmx_build(dom = self.vm.getDomain(),
+ image = self.kernel,
+ control_evtchn = 0,
+ memsize = self.vm.memory,
+ memmap = self.memmap_value,
+ cmdline = self.cmdline,
+ ramdisk = self.ramdisk,
+ flags = self.flags)
+
+ def parseMemmap(self):
+ self.memmap = sxp.child_value(self.vm.config, "memmap")
+ if self.memmap is None:
+ raise VmError("missing memmap")
+ memmap = sxp.parse(open(self.memmap))[0]
+ from xen.util.memmap import memmap_parse
+ self.memmap_value = memmap_parse(memmap)
+
+ def createDeviceModel_old(self):
+ device_model = sxp.child_value(self.vm.config, 'device_model')
+ if not device_model:
+ raise VmError("vmx: missing device model")
+ device_config = sxp.child_value(self.vm.config, 'device_config')
+ if not device_config:
+ raise VmError("vmx: missing device config")
+ # Create an event channel.
+ self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+ # Execute device model.
+ #todo: Error handling
+ os.system(device_model
+ + " -f %s" % device_config
+ + " -d %d" % self.vm.getDomain()
+ + " -p %d" % self.device_channel['port1']
+ + " -m %s" % self.vm.memory)
+
+ def createDeviceModel(self):
+ device_model = sxp.child_value(self.vm.config, 'device_model')
+ if not device_model:
+ raise VmError("vmx: missing device model")
+ device_config = sxp.child_value(self.vm.config, 'device_config')
+ if not device_config:
+ raise VmError("vmx: missing device config")
+ # Create an event channel
+ self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+ # Execute device model.
+ #todo: Error handling
+ # XXX RN: note that the order of args matter!
+ os.system(device_model
+ + " -f %s" % device_config
+ + self.vncParams()
+ + " -d %d" % self.vm.getDomain()
+ + " -p %d" % self.device_channel['port1']
+ + " -m %s" % self.vm.memory)
+
+ def vncParams(self):
+ # see if a vncviewer was specified
+ # XXX RN: bit of a hack. should unify this, maybe stick in config space
+ vncconnect=""
+ image = self.config
+ args = sxp.child_value(image, "args")
+ if args:
+ arg_list = string.split(args)
+ for arg in arg_list:
+ al = string.split(arg, '=')
+ if al[0] == "VNC_VIEWER":
+ vncconnect=" -v %s" % al[1]
+ break
+ return vncconnect
+
+ def destroy(self):
+ channel.eventChannelClose(self.device_channel)
+
+ def getDomainMemory(self, mem_mb):
+ return (mem_mb * 1024) + self.getPageTableSize(mem_mb)
+
+ def getPageTableSize(self, mem_mb):
+ """Return the size of memory needed for 1:1 page tables for physical
+ mode.
+
+ @param mem_mb: size in MB
+ @return size in KB
+ """
+ # Logic x86-32 specific.
+ # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
+ return (1 + ((mem_mb + 3) >> 2)) * 4
diff --git a/tools/python/xen/xend/scheduler.py b/tools/python/xen/xend/scheduler.py
new file mode 100644
index 0000000000..6f6f611c0e
--- /dev/null
+++ b/tools/python/xen/xend/scheduler.py
@@ -0,0 +1,24 @@
+import threading
+
+def later(delay, fn, args=(), kwargs={}):
+ """Schedule a function to be called later.
+
+ @param _delay: delay in seconds
+ @param _fn: function
+ @param args: arguments (list)
+ @param kwargs keyword arguments (map)
+ """
+ timer = threading.Timer(delay, fn, args=args, kwargs=kwargs)
+ timer.start()
+ return timer
+
+def now(fn, args=(), kwargs={}):
+ """Schedule a function to be called now.
+
+ @param _fn: function
+ @param args: arguments (list)
+ @param kwargs keyword arguments (map)
+ """
+ thread = threading.Thread(target=fn, args=args, kwargs=kwargs)
+ thread.start()
+ return thread
diff --git a/tools/python/xen/xend/server/SrvBase.py b/tools/python/xen/xend/server/SrvBase.py
deleted file mode 100644
index 5990733d75..0000000000
--- a/tools/python/xen/xend/server/SrvBase.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-import cgi
-
-import os
-import sys
-import types
-import StringIO
-
-from twisted.internet import defer
-from twisted.internet import reactor
-from twisted.protocols import http
-from twisted.web import error
-from twisted.web import resource
-from twisted.web import server
-from twisted.python.failure import Failure
-
-from xen.xend import sxp
-from xen.xend import PrettyPrint
-from xen.xend.Args import ArgError
-from xen.xend.XendError import XendError
-from xen.xend.XendLogging import log
-
-def uri_pathlist(p):
- """Split a path into a list.
- p path
- return list of path elements
- """
- l = []
- for x in p.split('/'):
- if x == '': continue
- l.append(x)
- return l
-
-class SrvBase(resource.Resource):
- """Base class for services.
- """
-
- def parse_form(self, req, method):
- """Parse the data for a request, GET using the URL, POST using encoded data.
- Posts should use enctype='multipart/form-data' in the <form> tag,
- rather than 'application/x-www-form-urlencoded'. Only 'multipart/form-data'
- handles file upload.
-
- req request
- returns a cgi.FieldStorage instance
- """
- env = {}
- env['REQUEST_METHOD'] = method
- if self.query:
- env['QUERY_STRING'] = self.query
- val = cgi.FieldStorage(fp=req.rfile, headers=req.headers, environ=env)
- return val
-
- def use_sxp(self, req):
- """Determine whether to send an SXP response to a request.
- Uses SXP if there is no User-Agent, no Accept, or application/sxp is in Accept.
-
- req request
- returns 1 for SXP, 0 otherwise
- """
- ok = 0
- user_agent = req.getHeader('User-Agent')
- accept = req.getHeader('Accept')
- if (not user_agent) or (not accept) or (accept.find(sxp.mime_type) >= 0):
- ok = 1
- return ok
-
- def get_op_method(self, op):
- """Get the method for an operation.
- For operation 'foo' looks for 'op_foo'.
-
- op operation name
- returns method or None
- """
- op_method_name = 'op_' + op
- return getattr(self, op_method_name, None)
-
- def perform(self, req):
- """General operation handler for posted operations.
- For operation 'foo' looks for a method op_foo and calls
- it with op_foo(op, req). Replies with code 500 if op_foo
- is not found.
-
- The method must return a list when req.use_sxp is true
- and an HTML string otherwise (or list).
- Methods may also return a Deferred (for incomplete processing).
-
- req request
- """
- op = req.args.get('op')
- if op is None or len(op) != 1:
- req.setResponseCode(http.NOT_ACCEPTABLE, "Invalid request")
- return ''
- op = op[0]
- op_method = self.get_op_method(op)
- if op_method is None:
- req.setResponseCode(http.NOT_IMPLEMENTED, "Operation not implemented: " + op)
- req.setHeader("Content-Type", "text/plain")
- req.write("Operation not implemented: " + op)
- return ''
- else:
- return self._perform(op, op_method, req)
-
- def _perform(self, op, op_method, req):
- try:
- val = op_method(op, req)
- except Exception, err:
- self._perform_err(err, op, req)
- return ''
-
- if isinstance(val, defer.Deferred):
- val.addCallback(self._perform_cb, op, req, dfr=1)
- val.addErrback(self._perform_err, op, req, dfr=1)
- return server.NOT_DONE_YET
- else:
- self._perform_cb(val, op, req, dfr=0)
- return ''
-
- def _perform_cb(self, val, op, req, dfr=0):
- """Callback to complete the request.
- May be called from a Deferred.
-
- @param err: the error
- @param req: request causing the error
- @param dfr: deferred flag
- """
- if isinstance(val, error.ErrorPage):
- req.write(val.render(req))
- elif self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- sxp.show(val, out=req)
- else:
- req.write('<html><head></head><body>')
- self.print_path(req)
- if isinstance(val, types.ListType):
- req.write('<code><pre>')
- PrettyPrint.prettyprint(val, out=req)
- req.write('</pre></code>')
- else:
- req.write(str(val))
- req.write('</body></html>')
- if dfr:
- req.finish()
-
- def _perform_err(self, err, op, req, dfr=0):
- """Error callback to complete a request.
- May be called from a Deferred.
-
- @param err: the error
- @param req: request causing the error
- @param dfr: deferred flag
- """
- if isinstance(err, Failure):
- err = err.getErrorMessage()
- elif not (isinstance(err, ArgError) or
- isinstance(err, sxp.ParseError) or
- isinstance(err, XendError)):
- if dfr:
- return err
- else:
- raise
- log.exception("op=%s: %s", op, str(err))
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- sxp.show(['xend.err', str(err)], out=req)
- else:
- req.setHeader("Content-Type", "text/plain")
- req.write('Error ')
- req.write(': ')
- req.write(str(err))
- if dfr:
- req.finish()
-
-
- def print_path(self, req):
- """Print the path with hyperlinks.
- """
- pathlist = [x for x in req.prepath if x != '' ]
- s = "/"
- req.write('<h1><a href="/">/</a>')
- for x in pathlist:
- s += x + "/"
- req.write(' <a href="%s">%s</a>/' % (s, x))
- req.write("</h1>")
diff --git a/tools/python/xen/xend/server/SrvConsole.py b/tools/python/xen/xend/server/SrvConsole.py
index 4105fa6e16..f147f2810b 100644
--- a/tools/python/xen/xend/server/SrvConsole.py
+++ b/tools/python/xen/xend/server/SrvConsole.py
@@ -2,7 +2,7 @@
from xen.xend import sxp
from xen.xend import XendConsole
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvConsole(SrvDir):
"""An individual console.
@@ -21,22 +21,18 @@ class SrvConsole(SrvDir):
return self.perform(req)
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- sxp.show(self.info.sxpr(), out=req)
- else:
- req.write('<html><head></head><body>')
- self.print_path(req)
- #self.ls()
- req.write('<p>%s</p>' % self.info)
- req.write('<p><a href="%s">Connect to domain %d</a></p>'
- % (self.info.uri(), self.info.dom))
- self.form(req)
- req.write('</body></html>')
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", sxp.mime_type)
+ sxp.show(self.info.sxpr(), out=req)
+ else:
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ #self.ls()
+ req.write('<p>%s</p>' % self.info)
+ req.write('<p><a href="%s">Connect to domain %d</a></p>'
+ % (self.info.uri(), self.info.id))
+ self.form(req)
+ req.write('</body></html>')
def form(self, req):
req.write('<form method="post" action="%s">' % req.prePathURL())
diff --git a/tools/python/xen/xend/server/SrvConsoleDir.py b/tools/python/xen/xend/server/SrvConsoleDir.py
index 139f302536..5c15eebb0f 100644
--- a/tools/python/xen/xend/server/SrvConsoleDir.py
+++ b/tools/python/xen/xend/server/SrvConsoleDir.py
@@ -1,6 +1,6 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
from SrvConsole import SrvConsole
from xen.xend import XendConsole
from xen.xend import sxp
@@ -31,20 +31,16 @@ class SrvConsoleDir(SrvDir):
return v
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- self.ls_console(req, 1)
- else:
- req.write("<html><head></head><body>")
- self.print_path(req)
- self.ls(req)
- self.ls_console(req)
- #self.form(req.wfile)
- req.write("</body></html>")
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", sxp.mime_type)
+ self.ls_console(req, 1)
+ else:
+ req.write("<html><head></head><body>")
+ self.print_path(req)
+ self.ls(req)
+ self.ls_console(req)
+ #self.form(req.wfile)
+ req.write("</body></html>")
def ls_console(self, req, use_sxp=0):
url = req.prePathURL()
diff --git a/tools/python/xen/xend/server/SrvDaemon.py b/tools/python/xen/xend/server/SrvDaemon.py
index 9bf5fd9bb0..133df206b9 100644
--- a/tools/python/xen/xend/server/SrvDaemon.py
+++ b/tools/python/xen/xend/server/SrvDaemon.py
@@ -17,320 +17,30 @@ import StringIO
import traceback
import time
-from twisted.internet import pollreactor
-pollreactor.install()
-
-from twisted.internet import reactor
-from twisted.internet import protocol
-from twisted.internet import abstract
-from twisted.internet import defer
-
from xen.lowlevel import xu
from xen.xend import sxp
from xen.xend import PrettyPrint
-from xen.xend import EventServer
-eserver = EventServer.instance()
+from xen.xend import EventServer; eserver = EventServer.instance()
from xen.xend.XendError import XendError
from xen.xend.server import SrvServer
-from xen.xend import XendRoot
from xen.xend.XendLogging import log
-from xen.util.ip import _readline, _readlines
-
import channel
-import blkif
-import netif
-import console
-import domain
+import controller
+import event
+import relocate
from params import *
-DAEMONIZE = 1
-DEBUG = 1
-
-class NotifierProtocol(protocol.Protocol):
- """Asynchronous handler for i/o on the notifier (event channel).
- """
-
- def __init__(self, channelFactory):
- self.channelFactory = channelFactory
-
- def notificationReceived(self, idx):
- channel = self.channelFactory.getChannel(idx)
- if channel:
- channel.notificationReceived()
-
- def connectionLost(self, reason=None):
- pass
-
- def doStart(self):
- pass
-
- def doStop(self):
- pass
-
- def startProtocol(self):
- pass
-
- def stopProtocol(self):
- pass
-
-class NotifierPort(abstract.FileDescriptor):
- """Transport class for the event channel.
- """
-
- def __init__(self, daemon, notifier, proto, reactor=None):
- assert isinstance(proto, NotifierProtocol)
- abstract.FileDescriptor.__init__(self, reactor)
- self.daemon = daemon
- self.notifier = notifier
- self.protocol = proto
-
- def startListening(self):
- self._bindNotifier()
- self._connectToProtocol()
-
- def stopListening(self):
- if self.connected:
- result = self.d = defer.Deferred()
- else:
- result = None
- self.loseConnection()
- return result
-
- def fileno(self):
- return self.notifier.fileno()
-
- def _bindNotifier(self):
- self.connected = 1
-
- def _connectToProtocol(self):
- self.protocol.makeConnection(self)
- self.startReading()
-
- def loseConnection(self):
- if self.connected:
- self.stopReading()
- self.disconnecting = 1
- reactor.callLater(0, self.connectionLost)
-
- def connectionLost(self, reason=None):
- abstract.FileDescriptor.connectionLost(self, reason)
- if hasattr(self, 'protocol'):
- self.protocol.doStop()
- self.connected = 0
- #self.notifier.close() # Not implemented.
- os.close(self.fileno())
- del self.notifier
- if hasattr(self, 'd'):
- self.d.callback(None)
- del self.d
-
- def doRead(self):
- count = 0
- while 1:
- notification = self.notifier.read()
- if not notification:
- break
- self.protocol.notificationReceived(notification)
- self.notifier.unmask(notification)
- count += 1
-
-class EventProtocol(protocol.Protocol):
- """Asynchronous handler for a connected event socket.
- """
-
- def __init__(self, daemon):
- #protocol.Protocol.__init__(self)
- self.daemon = daemon
- # Event queue.
- self.queue = []
- # Subscribed events.
- self.events = []
- self.parser = sxp.Parser()
- self.pretty = 0
-
- # For debugging subscribe to everything and make output pretty.
- self.subscribe(['*'])
- self.pretty = 1
-
- def dataReceived(self, data):
- try:
- self.parser.input(data)
- if self.parser.ready():
- val = self.parser.get_val()
- res = self.dispatch(val)
- self.send_result(res)
- if self.parser.at_eof():
- self.loseConnection()
- except SystemExit:
- raise
- except:
- if DEBUG:
- raise
- else:
- self.send_error()
-
- def loseConnection(self):
- if self.transport:
- self.transport.loseConnection()
- if self.connected:
- reactor.callLater(0, self.connectionLost)
-
- def connectionLost(self, reason=None):
- self.unsubscribe()
-
- def send_reply(self, sxpr):
- io = StringIO.StringIO()
- if self.pretty:
- PrettyPrint.prettyprint(sxpr, out=io)
- else:
- sxp.show(sxpr, out=io)
- print >> io
- io.seek(0)
- return self.transport.write(io.getvalue())
-
- def send_result(self, res):
- return self.send_reply(['ok', res])
-
- def send_error(self):
- (extype, exval) = sys.exc_info()[:2]
- return self.send_reply(['err',
- ['type', str(extype)],
- ['value', str(exval)]])
-
- def send_event(self, val):
- return self.send_reply(['event', val[0], val[1]])
-
- def unsubscribe(self):
- for event in self.events:
- eserver.unsubscribe(event, self.queue_event)
-
- def subscribe(self, events):
- self.unsubscribe()
- for event in events:
- eserver.subscribe(event, self.queue_event)
- self.events = events
-
- def queue_event(self, name, v):
- # Despite the name we don't queue the event here.
- # We send it because the transport will queue it.
- self.send_event([name, v])
-
- def opname(self, name):
- return 'op_' + name.replace('.', '_')
-
- def operror(self, name, req):
- raise XendError('Invalid operation: ' +name)
-
- def dispatch(self, req):
- op_name = sxp.name(req)
- op_method_name = self.opname(op_name)
- op_method = getattr(self, op_method_name, self.operror)
- return op_method(op_name, req)
-
- def op_help(self, name, req):
- def nameop(x):
- if x.startswith('op_'):
- return x[3:].replace('_', '.')
- else:
- return x
-
- l = [ nameop(k) for k in dir(self) if k.startswith('op_') ]
- return l
-
- def op_quit(self, name, req):
- self.loseConnection()
-
- def op_exit(self, name, req):
- sys.exit(0)
-
- def op_pretty(self, name, req):
- self.pretty = 1
- return ['ok']
-
- def op_console_disconnect(self, name, req):
- id = sxp.child_value(req, 'id')
- if not id:
- raise XendError('Missing console id')
- id = int(id)
- self.daemon.console_disconnect(id)
- return ['ok']
-
- def op_info(self, name, req):
- val = ['info']
- val += self.daemon.consoles()
- val += self.daemon.blkifs()
- val += self.daemon.netifs()
- return val
-
- def op_sys_subscribe(self, name, v):
- # (sys.subscribe event*)
- # Subscribe to the events:
- self.subscribe(v[1:])
- return ['ok']
-
- def op_sys_inject(self, name, v):
- # (sys.inject event)
- event = v[1]
- eserver.inject(sxp.name(event), event)
- return ['ok']
-
- def op_trace(self, name, v):
- mode = (v[1] == 'on')
- self.daemon.tracing(mode)
-
- def op_log_stderr(self, name, v):
- mode = v[1]
- logging = XendRoot.instance().get_logging()
- if mode == 'on':
- logging.addLogStderr()
- else:
- logging.removeLogStderr()
-
- def op_debug_msg(self, name, v):
- mode = v[1]
- import messages
- messages.DEBUG = (mode == 'on')
-
- def op_debug_controller(self, name, v):
- mode = v[1]
- import controller
- controller.DEBUG = (mode == 'on')
-
-
-class EventFactory(protocol.Factory):
- """Asynchronous handler for the event server socket.
- """
- protocol = EventProtocol
- service = None
-
- def __init__(self, daemon):
- #protocol.Factory.__init__(self)
- self.daemon = daemon
-
- def buildProtocol(self, addr):
- proto = self.protocol(self.daemon)
- proto.factory = self
- return proto
-
-class VirqClient:
- def __init__(self, daemon):
- self.daemon = daemon
-
- def virqReceived(self, virq):
- print 'VirqClient.virqReceived>', virq
- eserver.inject('xend.virq', virq)
-
- def lostChannel(self, channel):
- print 'VirqClient.lostChannel>', channel
-
class Daemon:
"""The xend daemon.
"""
def __init__(self):
+ self.channelF = None
self.shutdown = 0
self.traceon = 0
+ self.tracefile = None
+ self.traceindent = 0
def daemon_pids(self):
pids = []
@@ -345,7 +55,6 @@ class Daemon:
if not pm: continue
xm = xendre.match(pm.group('cmd'))
if not xm: continue
- #print 'pid=', pm.group('pid'), 'cmd=', pm.group('cmd')
pids.append(int(pm.group('pid')))
return pids
@@ -385,7 +94,7 @@ class Daemon:
"""
running = 0
if pid:
- lines = _readlines(os.popen('ps %d 2>/dev/null' % pid))
+ lines = os.popen('ps %d 2>/dev/null' % pid).readlines()
exp = '^ *%d.+%s' % (pid, name)
for line in lines:
if re.search(exp, line):
@@ -416,21 +125,20 @@ class Daemon:
def cleanup_xend(self, kill=False):
return self.cleanup_process(XEND_PID_FILE, "xend", kill)
- def cleanup_xfrd(self, kill=False):
- return self.cleanup_process(XFRD_PID_FILE, "xfrd", kill)
+ def cleanup_xenstored(self, kill=False):
+ return self.cleanup_process(XENSTORED_PID_FILE, "xenstored", kill)
def cleanup(self, kill=False):
self.cleanup_xend(kill=kill)
- self.cleanup_xfrd(kill=kill)
-
+ #self.cleanup_xenstored(kill=kill)
+
def status(self):
- """Returns the status of the xend and xfrd daemons.
+ """Returns the status of the xend daemon.
The return value is defined by the LSB:
0 Running
3 Not running
"""
- if (self.cleanup_process(XEND_PID_FILE, "xend", False) == 0 or
- self.cleanup_process(XFRD_PID_FILE, "xfrd", False) == 0):
+ if self.cleanup_process(XEND_PID_FILE, "xend", False) == 0:
return 3
else:
return 0
@@ -459,18 +167,29 @@ class Daemon:
pidfile.close()
return pid
- def start_xfrd(self):
- """Fork and exec xfrd, writing its pid to XFRD_PID_FILE.
+ def start_xenstored(self):
+ """Fork and exec xenstored, writing its pid to XENSTORED_PID_FILE.
"""
- if self.fork_pid(XFRD_PID_FILE):
+ def mkdirs(p):
+ try:
+ os.makedirs(p)
+ except:
+ pass
+ mkdirs(XENSTORED_RUN_DIR)
+ mkdirs(XENSTORED_LIB_DIR)
+
+ pid = self.fork_pid(XENSTORED_PID_FILE)
+ if pid:
# Parent
- pass
+ log.info("Started xenstored, pid=%d", pid)
else:
# Child
- os.execl("/usr/sbin/xfrd", "xfrd")
+ if XEND_DAEMONIZE and (not XENSTORED_DEBUG):
+ self.daemonize()
+ os.execl("/usr/sbin/xenstored", "xenstored", "--no-fork")
def daemonize(self):
- if not DAEMONIZE: return
+ if not XEND_DAEMONIZE: return
# Detach from TTY.
os.setsid()
@@ -480,16 +199,16 @@ class Daemon:
os.close(0)
os.close(1)
os.close(2)
- if DEBUG:
+ if XEND_DEBUG:
os.open('/dev/null', os.O_RDONLY)
# XXX KAF: Why doesn't this capture output from C extensions that
# fprintf(stdout) or fprintf(stderr) ??
- os.open('/var/log/xend-debug.log', os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
os.dup(1)
else:
os.open('/dev/null', os.O_RDWR)
os.dup(0)
- os.open('/var/log/xend-debug.log', os.O_WRONLY|os.O_CREAT)
+ os.open(XEND_DEBUG_LOG, os.O_WRONLY|os.O_CREAT)
def start(self, trace=0):
@@ -499,17 +218,15 @@ class Daemon:
4 Insufficient privileges
"""
xend_pid = self.cleanup_xend()
- xfrd_pid = self.cleanup_xfrd()
-
+ xenstored_pid = self.cleanup_xenstored()
- self.daemonize()
-
if self.set_user():
return 4
os.chdir("/")
- if xfrd_pid == 0:
- self.start_xfrd()
+ if xenstored_pid == 0:
+ self.start_xenstored()
+
if xend_pid > 0:
# Trying to run an already-running service is a success.
return 0
@@ -599,143 +316,49 @@ class Daemon:
os.setuid(pwd.getpwnam(XEND_USER)[2])
return 0
except KeyError, error:
- print "Error: no such user '%s'" % XEND_USER
+ print >>sys.stderr, "Error: no such user '%s'" % XEND_USER
return 1
def stop(self):
return self.cleanup(kill=True)
def run(self):
- xroot = XendRoot.instance()
- log.info("Xend Daemon started")
- self.createFactories()
- self.listenEvent(xroot)
- self.listenNotifier()
- self.listenVirq()
- SrvServer.create(bridge=1)
- reactor.run()
-
+ try:
+ log.info("Xend Daemon started")
+ self.createFactories()
+ event.listenEvent(self)
+ relocate.listenRelocation()
+ self.listenChannels()
+ servers = SrvServer.create()
+ self.daemonize()
+ servers.start()
+ except Exception, ex:
+ print >>sys.stderr, 'Exception starting xend:', ex
+ if XEND_DEBUG:
+ traceback.print_exc()
+ log.exception("Exception starting xend")
+ self.exit(1)
+
def createFactories(self):
self.channelF = channel.channelFactory()
- self.domainCF = domain.DomainControllerFactory()
- self.blkifCF = blkif.BlkifControllerFactory()
- self.netifCF = netif.NetifControllerFactory()
- self.consoleCF = console.ConsoleControllerFactory()
-
- def listenEvent(self, xroot):
- protocol = EventFactory(self)
- port = xroot.get_xend_event_port()
- interface = xroot.get_xend_address()
- return reactor.listenTCP(port, protocol, interface=interface)
-
- def listenNotifier(self):
- protocol = NotifierProtocol(self.channelF)
- p = NotifierPort(self, self.channelF.notifier, protocol, reactor)
- p.startListening()
- return p
-
- def listenVirq(self):
- virqChan = self.channelF.virqChannel(channel.VIRQ_DOM_EXC)
- virqChan.registerClient(VirqClient(self))
-
- def exit(self):
- reactor.disconnectAll()
- sys.exit(0)
-
- def getDomChannel(self, dom):
- """Get the channel to a domain.
-
- @param dom: domain
- @return: channel (or None)
- """
- return self.channelF.getDomChannel(dom)
-
- def createDomChannel(self, dom, local_port=0, remote_port=0):
- """Get the channel to a domain, creating if necessary.
-
- @param dom: domain
- @param local_port: optional local port to re-use
- @param remote_port: optional remote port to re-use
- @return: channel
- """
- return self.channelF.domChannel(dom, local_port=local_port,
- remote_port=remote_port)
-
- def blkif_create(self, dom, recreate=0):
- """Create or get a block device interface controller.
-
- Returns controller
- """
- blkif = self.blkifCF.getController(dom)
- blkif.daemon = self
- return blkif
-
- def blkifs(self):
- return [ x.sxpr() for x in self.blkifCF.getControllers() ]
-
- def blkif_get(self, dom):
- return self.blkifCF.getControllerByDom(dom)
-
- def netif_create(self, dom, recreate=0):
- """Create or get a network interface controller.
-
- """
- return self.netifCF.getController(dom)
-
- def netifs(self):
- return [ x.sxpr() for x in self.netifCF.getControllers() ]
-
- def netif_get(self, dom):
- return self.netifCF.getControllerByDom(dom)
-
- def console_create(self, dom, console_port=None):
- """Create a console for a domain.
- """
- console = self.consoleCF.getControllerByDom(dom)
- if console is None:
- console = self.consoleCF.createController(dom, console_port)
- return console
-
- def consoles(self):
- return [ c.sxpr() for c in self.consoleCF.getControllers() ]
-
- def get_consoles(self):
- return self.consoleCF.getControllers()
- def get_console(self, id):
- return self.consoleCF.getControllerByIndex(id)
+ def listenChannels(self):
+ def virqReceived(virq):
+ eserver.inject('xend.virq', virq)
- def get_domain_console(self, dom):
- return self.consoleCF.getControllerByDom(dom)
-
- def console_disconnect(self, id):
- """Disconnect any connected console client.
- """
- console = self.get_console(id)
- if not console:
- raise XendError('Invalid console id')
- console.disconnect()
+ self.channelF.setVirqHandler(virqReceived)
+ self.channelF.start()
- def domain_shutdown(self, dom, reason, key=0):
- """Shutdown a domain.
- """
- dom = int(dom)
- ctrl = self.domainCF.getController(dom)
- if not ctrl:
- raise XendError('No domain controller: %s' % dom)
- ctrl.shutdown(reason, key)
- return 0
+ def exit(self, rc=0):
+ if self.channelF:
+ self.channelF.stop()
+ # Calling sys.exit() raises a SystemExit exception, which only
+ # kills the current thread. Calling os._exit() makes the whole
+ # Python process exit immediately. There doesn't seem to be another
+ # way to exit a Python with running threads.
+ #sys.exit(rc)
+ os._exit(rc)
- def domain_mem_target_set(self, dom, target):
- """Set memory target for a domain.
- """
- dom = int(dom)
- ctrl = self.domainCF.getController(dom)
- if not ctrl:
- raise XendError('No domain controller: %s' % dom)
- ctrl.mem_target_set(target)
- return 0
-
def instance():
global inst
try:
diff --git a/tools/python/xen/xend/server/SrvDeviceDir.py b/tools/python/xen/xend/server/SrvDeviceDir.py
deleted file mode 100644
index 52f428540d..0000000000
--- a/tools/python/xen/xend/server/SrvDeviceDir.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-from SrvDir import SrvDir
-
-class SrvDeviceDir(SrvDir):
- """Device directory.
- """
-
- pass
diff --git a/tools/python/xen/xend/server/SrvDmesg.py b/tools/python/xen/xend/server/SrvDmesg.py
index 11fa4547c2..fbf337712a 100644
--- a/tools/python/xen/xend/server/SrvDmesg.py
+++ b/tools/python/xen/xend/server/SrvDmesg.py
@@ -5,7 +5,7 @@ import os
from xen.xend import sxp
from xen.xend import XendDmesg
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvDmesg(SrvDir):
"""Xen Dmesg output.
@@ -19,19 +19,15 @@ class SrvDmesg(SrvDir):
self.perform(req)
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", "text/plain")
- req.write(self.info())
- else:
- req.write('<html><head></head><body>')
- self.print_path(req)
- req.write('<pre>')
- req.write(self.info())
- req.write('</pre></body></html>')
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", "text/plain")
+ req.write(self.info())
+ else:
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ req.write('<pre>')
+ req.write(self.info())
+ req.write('</pre></body></html>')
def info(self):
return self.xd.info()
diff --git a/tools/python/xen/xend/server/SrvDomain.py b/tools/python/xen/xend/server/SrvDomain.py
index a4c06f6c8d..255e6157bf 100644
--- a/tools/python/xen/xend/server/SrvDomain.py
+++ b/tools/python/xen/xend/server/SrvDomain.py
@@ -1,6 +1,6 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-from twisted.protocols import http
+from xen.web import http
from xen.xend import sxp
from xen.xend import XendDomain
@@ -8,7 +8,7 @@ from xen.xend import XendConsole
from xen.xend import PrettyPrint
from xen.xend.Args import FormFn
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvDomain(SrvDir):
"""Service managing a single domain.
@@ -26,24 +26,23 @@ class SrvDomain(SrvDir):
not a domain name.
"""
fn = FormFn(self.xd.domain_configure,
- [['dom', 'int'],
+ [['dom', 'int'],
['config', 'sxpr']])
- deferred = fn(req.args, {'dom': self.dom.dom})
- return deferred
+ return fn(req.args, {'dom': self.dom.id})
def op_unpause(self, op, req):
- val = self.xd.domain_unpause(self.dom.name)
+ val = self.xd.domain_unpause(self.dom.id)
return val
def op_pause(self, op, req):
- val = self.xd.domain_pause(self.dom.name)
+ val = self.xd.domain_pause(self.dom.id)
return val
def op_shutdown(self, op, req):
fn = FormFn(self.xd.domain_shutdown,
- [['dom', 'str'],
+ [['dom', 'int'],
['reason', 'str'],
- ['key', 'int']])
+ ['key', 'int']])
val = fn(req.args, {'dom': self.dom.id})
req.setResponseCode(http.ACCEPTED)
req.setHeader("Location", "%s/.." % req.prePathURL())
@@ -51,128 +50,133 @@ class SrvDomain(SrvDir):
def op_destroy(self, op, req):
fn = FormFn(self.xd.domain_destroy,
- [['dom', 'str'],
+ [['dom', 'int'],
['reason', 'str']])
val = fn(req.args, {'dom': self.dom.id})
req.setHeader("Location", "%s/.." % req.prePathURL())
return val
def op_save(self, op, req):
+ return req.threadRequest(self.do_save, op, req)
+
+ def do_save(self, op, req):
fn = FormFn(self.xd.domain_save,
- [['dom', 'str'],
+ [['dom', 'int'],
['file', 'str']])
- deferred = fn(req.args, {'dom': self.dom.id})
- deferred.addCallback(self._op_save_cb, req)
- return deferred
-
- def _op_save_cb(self, val, req):
+ val = fn(req.args, {'dom': self.dom.id})
return 0
def op_migrate(self, op, req):
+ return req.threadRequest(self.do_migrate, op, req)
+
+ def do_migrate(self, op, req):
fn = FormFn(self.xd.domain_migrate,
- [['dom', 'str'],
+ [['dom', 'int'],
['destination', 'str'],
- ['live', 'int'],
- ['resource', 'int']])
- deferred = fn(req.args, {'dom': self.dom.id})
- deferred.addCallback(self._op_migrate_cb, req)
- return deferred
-
- def _op_migrate_cb(self, info, req):
- print '_op_migrate_cb>', info, req
- #req.setResponseCode(http.ACCEPTED)
- host = info.dst_host
- port = info.dst_port
- dom = info.dst_dom
- url = "http://%s:%d/xend/domain/%d" % (host, port, dom)
- req.setHeader("Location", url)
- print '_op_migrate_cb> url=', url
- return url
+ ['live', 'int'],
+ ['resource', 'int']])
+ return fn(req.args, {'dom': self.dom.id})
def op_pincpu(self, op, req):
fn = FormFn(self.xd.domain_pincpu,
- [['dom', 'str'],
- ['cpu', 'int']])
+ [['dom', 'int'],
+ ['vcpu', 'int'],
+ ['cpumap', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_cpu_bvt_set(self, op, req):
fn = FormFn(self.xd.domain_cpu_bvt_set,
- [['dom', 'str'],
- ['mcuadv', 'int'],
- ['warpback', 'int'],
+ [['dom', 'int'],
+ ['mcuadv', 'int'],
+ ['warpback', 'int'],
['warpvalue', 'int'],
- ['warpl', 'long'],
- ['warpu', 'long']])
+ ['warpl', 'long'],
+ ['warpu', 'long']])
val = fn(req.args, {'dom': self.dom.id})
return val
- def op_cpu_atropos_set(self, op, req):
- fn = FormFn(self.xd.domain_cpu_atropos_set,
- [['dom', 'str'],
+
+ def op_cpu_sedf_set(self, op, req):
+ fn = FormFn(self.xd.domain_cpu_sedf_set,
+ [['dom', 'int'],
['period', 'int'],
['slice', 'int'],
- ['latency', 'int'],
- ['xtratime', 'int']])
+ ['latency', 'int'],
+ ['extratime', 'int'],
+ ['weight', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_maxmem_set(self, op, req):
fn = FormFn(self.xd.domain_maxmem_set,
- [['dom', 'str'],
+ [['dom', 'int'],
['memory', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
+
+ def op_mem_target_set(self, op, req):
+ fn = FormFn(self.xd.domain_mem_target_set,
+ [['dom', 'int'],
+ ['target', 'int']])
+ val = fn(req.args, {'dom': self.dom.id})
+ return val
+
+ def op_devices(self, op, req):
+ fn = FormFn(self.xd.domain_devtype_ls,
+ [['dom', 'int'],
+ ['type', 'str']])
+ val = fn(req.args, {'dom': self.dom.id})
+ return val
+
+ def op_device(self, op, req):
+ fn = FormFn(self.xd.domain_devtype_get,
+ [['dom', 'int'],
+ ['type', 'str'],
+ ['idx', 'int']])
+ val = fn(req.args, {'dom': self.dom.id})
+ if val:
+ return val.sxpr()
+ else:
+ raise XendError("invalid device")
def op_device_create(self, op, req):
fn = FormFn(self.xd.domain_device_create,
- [['dom', 'str'],
+ [['dom', 'int'],
['config', 'sxpr']])
- d = fn(req.args, {'dom': self.dom.id})
- return d
+ val = fn(req.args, {'dom': self.dom.id})
+ return val
+
+ def op_device_refresh(self, op, req):
+ fn = FormFn(self.xd.domain_device_refresh,
+ [['dom', 'int'],
+ ['type', 'str'],
+ ['idx', 'str']])
+ val = fn(req.args, {'dom': self.dom.id})
+ return val
def op_device_destroy(self, op, req):
fn = FormFn(self.xd.domain_device_destroy,
- [['dom', 'str'],
+ [['dom', 'int'],
['type', 'str'],
- ['idx', 'str']])
+ ['idx', 'str']])
val = fn(req.args, {'dom': self.dom.id})
return val
def op_device_configure(self, op, req):
fn = FormFn(self.xd.domain_device_configure,
- [['dom', 'str'],
+ [['dom', 'int'],
['config', 'sxpr'],
- ['idx', 'str']])
- d = fn(req.args, {'dom': self.dom.id})
- return d
-
- def op_vifs(self, op, req):
- devs = self.xd.domain_vif_ls(self.dom.id)
- return [ dev.sxpr() for dev in devs ]
-
- def op_vif(self, op, req):
- fn = FormFn(self.xd.domain_vif_get,
- [['dom', 'str'],
- ['vif', 'str']])
- val = fn(req.args, {'dom': self.dom.id})
- return val
-
- def op_vbds(self, op, req):
- devs = self.xd.domain_vbd_ls(self.dom.id)
- return [ dev.sxpr() for dev in devs ]
-
- def op_vbd(self, op, req):
- fn = FormFn(self.xd.domain_vbd_get,
- [['dom', 'str'],
- ['vbd', 'str']])
+ ['idx', 'str']])
val = fn(req.args, {'dom': self.dom.id})
return val
- def op_mem_target_set(self, op, req):
- fn = FormFn(self.xd.domain_mem_target_set,
- [['dom', 'str'],
- ['target', 'int']])
+ def op_vif_limit_set(self, op, req):
+ fn = FormFn(self.xd.domain_vif_limit_set,
+ [['dom', 'int'],
+ ['vif', 'int'],
+ ['credit', 'int'],
+ ['period', 'int']])
val = fn(req.args, {'dom': self.dom.id})
return val
@@ -181,8 +185,13 @@ class SrvDomain(SrvDir):
def render_GET(self, req):
op = req.args.get('op')
- if op and op[0] in ['vifs', 'vif', 'vbds', 'vbd', 'mem_target_set']:
- return self.perform(req)
+ #
+ # XXX SMH: below may be useful once again if we ever try to get
+ # the raw 'web' interface to xend working once more. But for now
+ # is useless and out of date (i.e. no ops called 'v???' anymore).
+ #
+ # if op and op[0] in ['vifs', 'vif', 'vbds', 'vbd', 'mem_target_set']:
+ # return self.perform(req)
if self.use_sxp(req):
req.setHeader("Content-Type", sxp.mime_type)
sxp.show(self.dom.sxpr(), out=req)
diff --git a/tools/python/xen/xend/server/SrvDomainDir.py b/tools/python/xen/xend/server/SrvDomainDir.py
index 2fc8ee4877..d6f6291716 100644
--- a/tools/python/xen/xend/server/SrvDomainDir.py
+++ b/tools/python/xen/xend/server/SrvDomainDir.py
@@ -3,9 +3,7 @@
import traceback
from StringIO import StringIO
-from twisted.protocols import http
-from twisted.web import error
-from twisted.python.failure import Failure
+from xen.web import http
from xen.xend import sxp
from xen.xend import XendDomain
@@ -13,7 +11,7 @@ from xen.xend.Args import FormFn
from xen.xend.XendError import XendError
from xen.xend.XendLogging import log
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
from SrvDomain import SrvDomain
class SrvDomainDir(SrvDir):
@@ -26,7 +24,7 @@ class SrvDomainDir(SrvDir):
def domain(self, x):
val = None
- dom = self.xd.domain_lookup(x)
+ dom = self.xd.domain_lookup_by_name(x)
if not dom:
raise XendError('No such domain ' + str(x))
val = SrvDomain(dom)
@@ -62,16 +60,15 @@ class SrvDomainDir(SrvDir):
if not ok:
raise XendError(errmsg)
try:
- deferred = self.xd.domain_create(config)
- deferred.addCallback(self._op_create_cb, configstring, req)
- return deferred
+ dominfo = self.xd.domain_create(config)
+ return self._op_create_cb(dominfo, configstring, req)
except Exception, ex:
print 'op_create> Exception creating domain:'
traceback.print_exc()
raise XendError("Error creating domain: " + str(ex))
def _op_create_cb(self, dominfo, configstring, req):
- """Callback to handle deferred domain creation.
+ """Callback to handle domain creation.
"""
dom = dominfo.name
domurl = "%s/%s" % (req.prePathURL(), dom)
@@ -93,15 +90,13 @@ class SrvDomainDir(SrvDir):
def op_restore(self, op, req):
"""Restore a domain from file.
- @return: deferred
"""
+ return req.threadRequest(self.do_restore, op, req)
+
+ def do_restore(self, op, req):
fn = FormFn(self.xd.domain_restore,
[['file', 'str']])
- deferred = fn(req.args)
- deferred.addCallback(self._op_restore_cb, req)
- return deferred
-
- def _op_restore_cb(self, dominfo, req):
+ dominfo = fn(req.args)
dom = dominfo.name
domurl = "%s/%s" % (req.prePathURL(), dom)
req.setResponseCode(http.CREATED)
@@ -120,20 +115,16 @@ class SrvDomainDir(SrvDir):
return self.perform(req)
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- self.ls_domain(req, 1)
- else:
- req.write("<html><head></head><body>")
- self.print_path(req)
- self.ls(req)
- self.ls_domain(req)
- self.form(req)
- req.write("</body></html>")
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", sxp.mime_type)
+ self.ls_domain(req, 1)
+ else:
+ req.write("<html><head></head><body>")
+ self.print_path(req)
+ self.ls(req)
+ self.ls_domain(req)
+ self.form(req)
+ req.write("</body></html>")
def ls_domain(self, req, use_sxp=0):
url = req.prePathURL()
@@ -143,7 +134,7 @@ class SrvDomainDir(SrvDir):
domains = self.xd.domain_ls()
sxp.show(domains, out=req)
else:
- domains = self.xd.domains()
+ domains = self.xd.list()
domains.sort(lambda x, y: cmp(x.name, y.name))
req.write('<ul>')
for d in domains:
diff --git a/tools/python/xen/xend/server/SrvEventDir.py b/tools/python/xen/xend/server/SrvEventDir.py
deleted file mode 100644
index 02871a426a..0000000000
--- a/tools/python/xen/xend/server/SrvEventDir.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-from xen.xend import sxp
-from xen.xend import EventServer
-from SrvDir import SrvDir
-
-class SrvEventDir(SrvDir):
- """Event directory.
- """
-
- def __init__(self):
- SrvDir.__init__(self)
- self.eserver = EventServer.instance()
-
- def op_inject(self, op, req):
- eventstring = req.args.get('event')
- pin = sxp.Parser()
- pin.input(eventstring)
- pin.input_eof()
- sxpr = pin.get_val()
- self.eserver.inject(sxp.name(sxpr), sxpr)
- if req.use_sxp:
- sxp.name(sxpr)
- else:
- return '<code>' + eventstring + '</code>'
-
- def render_POST(self, req):
- return self.perform(req)
-
- def form(self, req):
- action = req.prePathURL()
- req.write('<form method="post" action="%s" enctype="multipart/form-data">'
- % action)
- req.write('<button type="submit" name="op" value="inject">Inject</button>')
- req.write('Event <input type="text" name="event" size="40"><br>')
- req.write('</form>')
- req.write('<form method="post" action="%s" enctype="multipart/form-data">'
- % action)
- req.write('<button type="submit" name="op" value="inject">Inject</button>')
- req.write('Event file<input type="file" name="event"><br>')
- req.write('</form>')
diff --git a/tools/python/xen/xend/server/SrvNode.py b/tools/python/xen/xend/server/SrvNode.py
index c1b3ab560e..6768e9b5fe 100644
--- a/tools/python/xen/xend/server/SrvNode.py
+++ b/tools/python/xen/xend/server/SrvNode.py
@@ -2,7 +2,7 @@
import os
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
from xen.xend import sxp
from xen.xend import XendNode
from xen.xend.Args import FormFn
@@ -25,12 +25,6 @@ class SrvNode(SrvDir):
val = self.xn.reboot()
return val
- def op_cpu_rrobin_slice_set(self, op, req):
- fn = FormFn(self.xn.cpu_rrobin_slice_set,
- [['slice', 'int']])
- val = fn(req.args, {})
- return val
-
def op_cpu_bvt_slice_set(self, op, req):
fn = FormFn(self.xn.cpu_bvt_slice_set,
[['ctx_allow', 'int']])
@@ -41,26 +35,22 @@ class SrvNode(SrvDir):
return self.perform(req)
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- sxp.show(['node'] + self.info(), out=req)
- else:
- url = req.prePathURL()
- if not url.endswith('/'):
- url += '/'
- req.write('<html><head></head><body>')
- self.print_path(req)
- req.write('<ul>')
- for d in self.info():
- req.write('<li> %10s: %s' % (d[0], str(d[1])))
- req.write('<li><a href="%sdmesg">Xen dmesg output</a>' % url)
- req.write('<li><a href="%slog>Xend log</a>' % url)
- req.write('</ul>')
- req.write('</body></html>')
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", sxp.mime_type)
+ sxp.show(['node'] + self.info(), out=req)
+ else:
+ url = req.prePathURL()
+ if not url.endswith('/'):
+ url += '/'
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ req.write('<ul>')
+ for d in self.info():
+ req.write('<li> %10s: %s' % (d[0], str(d[1])))
+ req.write('<li><a href="%sdmesg">Xen dmesg output</a>' % url)
+ req.write('<li><a href="%slog>Xend log</a>' % url)
+ req.write('</ul>')
+ req.write('</body></html>')
def info(self):
return self.xn.info()
diff --git a/tools/python/xen/xend/server/SrvRoot.py b/tools/python/xen/xend/server/SrvRoot.py
index 8d38937b72..84ef008118 100644
--- a/tools/python/xen/xend/server/SrvRoot.py
+++ b/tools/python/xen/xend/server/SrvRoot.py
@@ -2,7 +2,7 @@
from xen.xend import XendRoot
xroot = XendRoot.instance()
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvRoot(SrvDir):
"""The root of the xend server.
@@ -16,8 +16,6 @@ class SrvRoot(SrvDir):
('node', 'SrvNode' ),
('domain', 'SrvDomainDir' ),
('console', 'SrvConsoleDir' ),
- ('event', 'SrvEventDir' ),
- ('device', 'SrvDeviceDir' ),
('vnet', 'SrvVnetDir' ),
]
@@ -28,3 +26,7 @@ class SrvRoot(SrvDir):
for (name, klass) in self.subdirs:
self.get(name)
xroot.start()
+
+ def __repr__(self):
+ return "<SrvRoot %x %s>" %(id(self), self.table.keys())
+
diff --git a/tools/python/xen/xend/server/SrvServer.py b/tools/python/xen/xend/server/SrvServer.py
index 353d6eed24..1816c1c56d 100644
--- a/tools/python/xen/xend/server/SrvServer.py
+++ b/tools/python/xen/xend/server/SrvServer.py
@@ -25,34 +25,41 @@
# todo Support security settings etc. in the config file.
# todo Support command-line args.
-from twisted.web import server, static
-from twisted.web import resource, script
-from twisted.internet import reactor
+from threading import Thread
-from xen.xend import XendRoot
-xroot = XendRoot.instance()
+from xen.web.httpserver import HttpServer, UnixHttpServer
+from xen.xend import XendRoot; xroot = XendRoot.instance()
from xen.xend import Vifctl
+from xen.xend.XendLogging import log
+from xen.web.SrvDir import SrvDir
from SrvRoot import SrvRoot
-def create(port=None, interface=None, bridge=0):
- if port is None:
- port = xroot.get_xend_port()
- if interface is None:
- interface = xroot.get_xend_address()
- if bridge:
- Vifctl.network('start')
- root = resource.Resource()
- xend = SrvRoot()
- root.putChild('xend', xend)
- site = server.Site(root)
- reactor.listenTCP(port, site, interface=interface)
+class XendServers:
-def main(port=None, interface=None):
- create(port, interface)
- reactor.run()
+ def __init__(self):
+ self.servers = []
+ def add(self, server):
+ self.servers.append(server)
-if __name__ == '__main__':
- main()
+ def start(self):
+ Vifctl.network('start')
+ for server in self.servers:
+ thread = Thread(target=server.run)
+ thread.start()
+
+def create():
+ root = SrvDir()
+ root.putChild('xend', SrvRoot())
+ servers = XendServers()
+ if xroot.get_xend_http_server():
+ port = xroot.get_xend_port()
+ interface = xroot.get_xend_address()
+ servers.add(HttpServer(root=root, interface=interface, port=port))
+ if xroot.get_xend_unix_server():
+ path = xroot.get_xend_unix_path()
+ log.info('unix path=' + path)
+ servers.add(UnixHttpServer(path=path, root=root))
+ return servers
diff --git a/tools/python/xen/xend/server/SrvVnetDir.py b/tools/python/xen/xend/server/SrvVnetDir.py
index 3577cb631f..bc5b583b42 100644
--- a/tools/python/xen/xend/server/SrvVnetDir.py
+++ b/tools/python/xen/xend/server/SrvVnetDir.py
@@ -5,7 +5,7 @@ from xen.xend.Args import FormFn
from xen.xend import PrettyPrint
from xen.xend import XendVnet
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvVnet(SrvDir):
@@ -75,20 +75,16 @@ class SrvVnetDir(SrvDir):
return self.perform(req)
def render_GET(self, req):
- try:
- if self.use_sxp(req):
- req.setHeader("Content-Type", sxp.mime_type)
- self.ls_vnet(req, 1)
- else:
- req.write("<html><head></head><body>")
- self.print_path(req)
- self.ls(req)
- self.ls_vnet(req)
- self.form(req)
- req.write("</body></html>")
- return ''
- except Exception, ex:
- self._perform_err(ex, req)
+ if self.use_sxp(req):
+ req.setHeader("Content-Type", sxp.mime_type)
+ self.ls_vnet(req, 1)
+ else:
+ req.write("<html><head></head><body>")
+ self.print_path(req)
+ self.ls(req)
+ self.ls_vnet(req)
+ self.form(req)
+ req.write("</body></html>")
def ls_vnet(self, req, use_sxp=0):
url = req.prePathURL()
diff --git a/tools/python/xen/xend/server/SrvXendLog.py b/tools/python/xen/xend/server/SrvXendLog.py
index 0edb110572..9b8a7dc0e8 100644
--- a/tools/python/xen/xend/server/SrvXendLog.py
+++ b/tools/python/xen/xend/server/SrvXendLog.py
@@ -1,10 +1,10 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-from twisted.web import static
+from xen.web import static
from xen.xend import XendRoot
-from SrvDir import SrvDir
+from xen.web.SrvDir import SrvDir
class SrvXendLog(SrvDir):
"""Xend log.
@@ -18,7 +18,4 @@ class SrvXendLog(SrvDir):
self.logfile.encoding = None
def render_GET(self, req):
- try:
- return self.logfile.render(req)
- except Exception, ex:
- self._perform_err(ex, req)
+ return self.logfile.render(req)
diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py
index e90b45f74c..75a76e8bda 100755
--- a/tools/python/xen/xend/server/blkif.py
+++ b/tools/python/xen/xend/server/blkif.py
@@ -1,138 +1,62 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
"""Support for virtual block devices.
"""
+import string
-from twisted.internet import defer
-
+from xen.util import blkif
+from xen.xend.XendError import XendError, VmError
+from xen.xend.XendRoot import get_component
+from xen.xend.XendLogging import log
from xen.xend import sxp
from xen.xend import Blkctl
-from xen.xend.XendLogging import log
-from xen.xend.XendError import XendError, VmError
-
-import os
-import re
-import string
-import channel
-import controller
-from messages import *
-
-from xen.util.ip import _readline, _readlines
-
-def expand_dev_name(name):
- if re.match( '^/dev/', name ):
- return name
- else:
- return '/dev/' + name
-
-def check_mounted(self, name):
- mode = None
- name = expand_dev_name(name)
- lines = _readlines(os.popen('mount 2>/dev/null'))
- exp = re.compile('^' + name + ' .*[\(,]r(?P<mode>[ow])[,\)]')
- for line in lines:
- pm = exp.match(line)
- if not pm: continue
- mode = pm.group('mode')
- break
- if mode is 'w':
- return mode
- if mode is 'o':
- mode = 'r'
- blkifs = self.ctrl.daemon.blkifs()
- for blkif in blkifs:
- if blkif[1][1] is self.ctrl.dom:
- continue
- for dev in self.ctrl.daemon.blkif_get(blkif[1][1]).getDevices():
- if dev.type == 'phy' and name == expand_dev_name(dev.params):
- mode = dev.mode
- if 'w' in mode:
- return 'w'
- if mode and 'r' in mode:
- return 'r'
- return None
-
-def blkdev_name_to_number(name):
- """Take the given textual block-device name (e.g., '/dev/sda1',
- 'hda') and return the device number used by the OS. """
-
- n = expand_dev_name(name)
-
- try:
- return os.stat(n).st_rdev
- except Exception, ex:
- log.debug("exception looking up device number for %s: %s", name, ex)
- pass
-
- if re.match( '/dev/sd[a-p]([0-9]|1[0-5])', n):
- return 8 * 256 + 16 * (ord(n[7:8]) - ord('a')) + int(n[8:])
-
- if re.match( '/dev/hd[a-t]([1-9]|[1-5][0-9]|6[0-3])?', n):
- ide_majors = [ 3, 22, 33, 34, 56, 57, 88, 89, 90, 91 ]
- major = ide_majors[(ord(n[7:8]) - ord('a')) / 2]
- minor = ((ord(n[7:8]) - ord('a')) % 2) * 64 + int(n[8:] or 0)
- return major * 256 + minor
-
- # see if this is a hex device number
- if re.match( '^(0x)?[0-9a-fA-F]+$', name ):
- return string.atoi(name,16)
-
- return None
-
-def blkdev_segment(name):
- """Take the given block-device name (e.g. '/dev/sda1', 'hda')
- and return a dictionary { device, start_sector,
- nr_sectors, type }
- device: Device number of the given partition
- start_sector: Index of first sector of the partition
- nr_sectors: Number of sectors comprising this partition
- type: 'Disk' or identifying name for partition type
- """
- val = None
- n = blkdev_name_to_number(name)
- if n:
- val = { 'device' : n,
- 'start_sector' : long(0),
- 'nr_sectors' : long(1L<<63),
- 'type' : 'Disk' }
- return val
-
-class BlkifBackendController(controller.BackendController):
- """ Handler for the 'back-end' channel to a block device driver domain.
- """
-
- def __init__(self, factory, dom):
- controller.BackendController.__init__(self, factory, dom)
- self.addMethod(CMSG_BLKIF_BE,
- CMSG_BLKIF_BE_DRIVER_STATUS,
- self.recv_be_driver_status)
- self.registerChannel()
+from xen.xend.xenstore import DBVar
- def recv_be_driver_status(self, msg, req):
- """Request handler for be_driver_status messages.
-
- @param msg: message
- @type msg: xu message
- @param req: request flag (true if the msg is a request)
- @type req: bool
- """
- val = unpackMsg('blkif_be_driver_status_t', msg)
- status = val['status']
+from xen.xend.server import channel
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
-class BlkifBackendInterface(controller.BackendInterface):
+class BlkifBackend:
""" Handler for the 'back-end' channel to a block device driver domain
on behalf of a front-end domain.
Must be connected using connect() before it can be used.
- Do not create directly - use getBackendInterface() on the BlkifController.
"""
- def __init__(self, ctrl, dom, handle):
- controller.BackendInterface.__init__(self, ctrl, dom, handle)
- self.connected = 0
+ def __init__(self, controller, id, dom, recreate=False):
+ self.controller = controller
+ self.id = id
+ self.frontendDomain = self.controller.getDomain()
+ self.frontendChannel = None
+ self.backendDomain = dom
+ self.backendChannel = None
+ self.destroyed = False
+ self.connected = False
self.evtchn = None
+ self.status = None
+
+ def init(self, recreate=False, reboot=False):
+ self.destroyed = False
self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
+ self.frontendDomain = self.controller.getDomain()
+ self.frontendChannel = self.controller.getChannel()
+ cf = channel.channelFactory()
+ self.backendChannel = cf.openChannel(self.backendDomain)
def __str__(self):
- return '<BlkifBackendInterface %d %d>' % (self.controller.dom, self.dom)
+ return ('<BlkifBackend frontend=%d backend=%d id=%d>'
+ % (self.frontendDomain,
+ self.backendDomain,
+ self.id))
+
+ def getId(self):
+ return self.id
+
+ def closeEvtchn(self):
+ if self.evtchn:
+ channel.eventChannelClose(self.evtchn)
+ self.evtchn = None
+
+ def openEvtchn(self):
+ self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain)
def getEventChannelBackend(self):
val = 0
@@ -146,91 +70,78 @@ class BlkifBackendInterface(controller.BackendInterface):
val = self.evtchn['port2']
return val
- def connect(self, recreate=0):
+ def connect(self, recreate=False):
"""Connect to the blkif control interface.
@param recreate: true if after xend restart
- @return: deferred
"""
log.debug("Connecting blkif %s", str(self))
if recreate or self.connected:
- d = defer.succeed(self)
+ self.connected = True
+ pass
else:
- d = self.send_be_create()
- d.addCallback(self.respond_be_create)
- return d
+ self.send_be_create()
def send_be_create(self):
- d = defer.Deferred()
+ log.debug("send_be_create %s", str(self))
msg = packMsg('blkif_be_create_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : self.handle })
- self.writeRequest(msg, response=d)
- return d
-
- def respond_be_create(self, msg):
- val = unpackMsg('blkif_be_create_t', msg)
- self.connected = 1
- return self
-
- def destroy(self):
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.id })
+ msg = self.backendChannel.requestResponse(msg)
+ #todo: check return status
+ self.connected = True
+
+ def destroy(self, change=False, reboot=False):
"""Disconnect from the blkif control interface and destroy it.
"""
- def cb_destroy(val):
- self.send_be_destroy()
- self.close()
- d = defer.Deferred()
- d.addCallback(cb_destroy)
- if self.evtchn:
- channel.eventChannelClose(self.evtchn)
- self.send_be_disconnect(response=d)
-
- def send_be_disconnect(self, response=None):
+ self.send_be_disconnect()
+ self.send_be_destroy()
+ self.closeEvtchn()
+ self.destroyed = True
+ # For change true need to notify front-end, or back-end will do it?
+
+ def send_be_disconnect(self):
msg = packMsg('blkif_be_disconnect_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : self.handle })
- self.writeRequest(msg, response=response)
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.id })
+ self.backendChannel.requestResponse(msg)
+ #todo: check return status
+ self.connected = False
- def send_be_destroy(self, response=None):
+ def send_be_destroy(self):
msg = packMsg('blkif_be_destroy_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : self.handle })
- self.writeRequest(msg, response=response)
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.id })
+ self.backendChannel.requestResponse(msg)
+ #todo: check return status
def connectInterface(self, val):
- self.evtchn = channel.eventChannel(self.dom, self.controller.dom)
+ self.openEvtchn()
log.debug("Connecting blkif to event channel %s ports=%d:%d",
str(self), self.evtchn['port1'], self.evtchn['port2'])
msg = packMsg('blkif_be_connect_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : self.handle,
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.id,
'evtchn' : self.getEventChannelBackend(),
'shmem_frame' : val['shmem_frame'] })
- d = defer.Deferred()
- d.addCallback(self.respond_be_connect)
- self.writeRequest(msg, response=d)
-
- def respond_be_connect(self, msg):
- """Response handler for a be_connect message.
-
- @param msg: message
- @type msg: xu message
- """
+ msg = self.backendChannel.requestResponse(msg)
+ #todo: check return status
val = unpackMsg('blkif_be_connect_t', msg)
self.status = BLKIF_INTERFACE_STATUS_CONNECTED
self.send_fe_interface_status()
- def send_fe_interface_status(self, response=None):
+ def send_fe_interface_status(self):
msg = packMsg('blkif_fe_interface_status_t',
- { 'handle' : self.handle,
+ { 'handle' : self.id,
'status' : self.status,
- 'domid' : self.dom,
+ 'domid' : self.backendDomain,
'evtchn' : self.getEventChannelFrontend() })
- self.controller.writeRequest(msg, response=response)
+ self.frontendChannel.writeRequest(msg)
def interfaceDisconnected(self):
self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
- #todo?: Do this: self.evtchn = None
+ #todo?: Close evtchn:
+ #self.closeEvtchn()
self.send_fe_interface_status()
def interfaceChanged(self):
@@ -238,83 +149,30 @@ class BlkifBackendInterface(controller.BackendInterface):
The front-end should then probe for devices.
"""
msg = packMsg('blkif_fe_interface_status_t',
- { 'handle' : self.handle,
+ { 'handle' : self.id,
'status' : BLKIF_INTERFACE_STATUS_CHANGED,
- 'domid' : self.dom,
+ 'domid' : self.backendDomain,
'evtchn' : 0 })
- self.controller.writeRequest(msg)
-
-class BlkifControllerFactory(controller.SplitControllerFactory):
- """Factory for creating block device interface controllers.
- """
-
- def __init__(self):
- controller.SplitControllerFactory.__init__(self)
-
- def createController(self, dom, recreate=0):
- """Create a block device controller for a domain.
-
- @param dom: domain
- @type dom: int
- @param recreate: if true it's a recreate (after xend restart)
- @type recreate: bool
- @return: block device controller
- @rtype: BlkifController
- """
- blkif = self.getControllerByDom(dom)
- if blkif is None:
- blkif = BlkifController(self, dom)
- self.addController(blkif)
- return blkif
-
- def createBackendController(self, dom):
- """Create a block device backend controller.
-
- @param dom: backend domain
- @return: backend controller
- """
- return BlkifBackendController(self, dom)
-
- def createBackendInterface(self, ctrl, dom, handle):
- """Create a block device backend interface.
-
- @param ctrl: controller
- @param dom: backend domain
- @param handle: interface handle
- @return: backend interface
- """
- return BlkifBackendInterface(ctrl, dom, handle)
-
- def getDomainDevices(self, dom):
- """Get the block devices for a domain.
+ self.frontendChannel.writeRequest(msg)
- @param dom: domain
- @type dom: int
- @return: devices
- @rtype: [device]
- """
- blkif = self.getControllerByDom(dom)
- return (blkif and blkif.getDevices()) or []
-
- def getDomainDevice(self, dom, idx):
- """Get a block device from a domain.
-
- @param dom: domain
- @type dom: int
- @param idx: device index
- @type idx: int
- @return: device
- @rtype: device
- """
- blkif = self.getControllerByDom(dom)
- return (blkif and blkif.getDevice(idx)) or None
-
-class BlkDev(controller.SplitDev):
+class BlkDev(Dev):
"""Info record for a block device.
"""
- def __init__(self, idx, ctrl, config):
- controller.SplitDev.__init__(self, idx, ctrl)
+ __exports__ = Dev.__exports__ + [
+ DBVar('dev', ty='str'),
+ DBVar('vdev', ty='int'),
+ DBVar('mode', ty='str'),
+ DBVar('viftype', ty='str'),
+ DBVar('params', ty='str'),
+ DBVar('node', ty='str'),
+ DBVar('device', ty='long'),
+ DBVar('start_sector', ty='long'),
+ DBVar('nr_sectors', ty='long'),
+ ]
+
+ def __init__(self, controller, id, config, recreate=False):
+ Dev.__init__(self, controller, id, config, recreate=recreate)
self.dev = None
self.uname = None
self.vdev = None
@@ -323,12 +181,27 @@ class BlkDev(controller.SplitDev):
self.params = None
self.node = None
self.device = None
+ self.dev_handle = 0
self.start_sector = None
self.nr_sectors = None
- self.ctrl = ctrl
- self.configure(config)
-
- def configure(self, config):
+
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = None
+ self.backendDomain = None
+ self.backendChannel = None
+ self.backendId = 0
+ self.configure(self.config, recreate=recreate)
+
+ def init(self, recreate=False, reboot=False):
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = self.getChannel()
+ backend = self.getBackend()
+ self.backendChannel = backend.backendChannel
+ self.backendId = backend.id
+
+ def configure(self, config, change=False, recreate=False):
+ if change:
+ raise XendError("cannot reconfigure vbd")
self.config = config
self.uname = sxp.child_value(config, 'uname')
if not self.uname:
@@ -340,23 +213,28 @@ class BlkDev(controller.SplitDev):
if not self.dev:
raise VmError('vbd: Missing dev')
self.mode = sxp.child_value(config, 'mode', 'r')
- # todo: The 'dev' should be looked up in the context of the domain.
- self.vdev = blkdev_name_to_number(self.dev)
+
+ self.vdev = blkif.blkdev_name_to_number(self.dev)
if not self.vdev:
raise VmError('vbd: Device not found: %s' % self.dev)
+
try:
- self.backendDomain = int(sxp.child_value(config, 'backend', '0'))
+ xd = get_component('xen.xend.XendDomain')
+ self.backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
except:
raise XendError('invalid backend domain')
- def recreate(self, savedinfo):
- node = sxp.child_value(savedinfo, 'node')
- self.setNode(node)
+ return self.config
- def attach(self):
- node = Blkctl.block('bind', self.type, self.params)
- self.setNode(node)
- return self.attachBackend()
+ def attach(self, recreate=False, change=False):
+ if recreate:
+ pass
+ else:
+ node = Blkctl.block('bind', self.type, self.params)
+ self.setNode(node)
+ self.attachBackend()
+ if change:
+ self.interfaceChanged()
def unbind(self):
if self.node is None: return
@@ -365,15 +243,30 @@ class BlkDev(controller.SplitDev):
Blkctl.block('unbind', self.type, self.node)
def setNode(self, node):
- mounted_mode = check_mounted(self, node)
+
+ # NOTE:
+ # This clause is testing code for storage system experiments.
+ # Add a new disk type that will just pass an opaque id in the
+ # dev_handle and use an experimental device type.
+ # Please contact andrew.warfield@cl.cam.ac.uk with any concerns.
+ if self.type == 'parallax':
+ self.node = node
+ self.device = 61440 # (240,0)
+ self.dev_handle = long(self.params)
+ self.nr_sectors = long(0)
+ return
+ # done.
+
+ mounted_mode = self.check_mounted(node)
if not '!' in self.mode and mounted_mode:
- if mounted_mode is "w":
+ if mounted_mode == "w":
raise VmError("vbd: Segment %s is in writable use" %
self.uname)
elif 'w' in self.mode:
raise VmError("vbd: Segment %s is in read-only use" %
self.uname)
- segment = blkdev_segment(node)
+
+ segment = blkif.blkdev_segment(node)
if not segment:
raise VmError("vbd: Segment not found: uname=%s" % self.uname)
self.node = node
@@ -381,12 +274,28 @@ class BlkDev(controller.SplitDev):
self.start_sector = segment['start_sector']
self.nr_sectors = segment['nr_sectors']
+ def check_mounted(self, name):
+ mode = blkif.mount_mode(name)
+ xd = get_component('xen.xend.XendDomain')
+ for vm in xd.list():
+ ctrl = vm.getDeviceController(self.getType(), error=False)
+ if (not ctrl): continue
+ for dev in ctrl.getDevices():
+ if dev is self: continue
+ if dev.type == 'phy' and name == blkif.expand_dev_name(dev.params):
+ mode = dev.mode
+ if 'w' in mode:
+ return 'w'
+ if mode and 'r' in mode:
+ return 'r'
+ return None
+
def readonly(self):
return 'w' not in self.mode
def sxpr(self):
val = ['vbd',
- ['idx', self.idx],
+ ['id', self.id],
['vdev', self.vdev],
['device', self.device],
['mode', self.mode]]
@@ -396,188 +305,158 @@ class BlkDev(controller.SplitDev):
val.append(['uname', self.uname])
if self.node:
val.append(['node', self.node])
- if self.index is not None:
- val.append(['index', self.index])
return val
- def destroy(self, change=0):
+ def getBackend(self):
+ return self.controller.getBackend(self.backendDomain)
+
+ def refresh(self):
+ log.debug("Refreshing vbd domain=%d id=%s", self.frontendDomain,
+ self.id)
+ self.interfaceChanged()
+
+ def destroy(self, change=False, reboot=False):
"""Destroy the device. If 'change' is true notify the front-end interface.
@param change: change flag
"""
- log.debug("Destroying vbd domain=%d idx=%s", self.controller.dom, self.idx)
- d = self.send_be_vbd_destroy()
+ self.destroyed = True
+ log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain,
+ self.id)
+ self.send_be_vbd_destroy()
if change:
- d.addCallback(lambda val: self.interfaceChanged())
- d.addCallback(lambda val: self.unbind())
+ self.interfaceChanged()
+ self.unbind()
def interfaceChanged(self):
"""Tell the back-end to notify the front-end that a device has been
added or removed.
"""
- self.getBackendInterface().interfaceChanged()
+ self.getBackend().interfaceChanged()
def attachBackend(self):
"""Attach the device to its controller.
"""
- backend = self.getBackendInterface()
- d1 = backend.connect()
- d2 = defer.Deferred()
- d2.addCallback(self.send_be_vbd_create)
- d1.chainDeferred(d2)
- return d2
+ self.getBackend().connect()
+ self.send_be_vbd_create()
- def send_be_vbd_create(self, val):
- d = defer.Deferred()
- d.addCallback(self.respond_be_vbd_create)
- backend = self.getBackendInterface()
+ def send_be_vbd_create(self):
msg = packMsg('blkif_be_vbd_create_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : backend.handle,
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.backendId,
+ 'pdevice' : self.device,
+ 'dev_handle' : self.dev_handle,
'vdevice' : self.vdev,
'readonly' : self.readonly() })
- backend.writeRequest(msg, response=d)
- return d
+ msg = self.backendChannel.requestResponse(msg)
- def respond_be_vbd_create(self, msg):
- """Response handler for a be_vbd_create message.
- Tries to grow the vbd.
-
- @param msg: message
- @type msg: xu message
- """
val = unpackMsg('blkif_be_vbd_create_t', msg)
- d = self.send_be_vbd_grow()
- d.addCallback(self.respond_be_vbd_grow)
- return d
-
- def send_be_vbd_grow(self):
- d = defer.Deferred()
- backend = self.getBackendInterface()
- msg = packMsg('blkif_be_vbd_grow_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : backend.handle,
- 'vdevice' : self.vdev,
- 'extent.device' : self.device,
- 'extent.sector_start' : self.start_sector,
- 'extent.sector_length' : self.nr_sectors })
- backend.writeRequest(msg, response=d)
- return d
-
- def respond_be_vbd_grow(self, msg):
- """Response handler for a be_vbd_grow message.
-
- @param msg: message
- @type msg: xu message
- """
- val = unpackMsg('blkif_be_vbd_grow_t', msg)
- status = val['status']
- if status != BLKIF_BE_STATUS_OKAY:
- raise XendError("Adding extent to vbd failed: device %s, error %d"
+ status = val['status']
+ if status != BLKIF_BE_STATUS_OKAY:
+ raise XendError("Creating vbd failed: device %s, error %d"
% (sxp.to_string(self.config), status))
- return self
def send_be_vbd_destroy(self):
- d = defer.Deferred()
- backend = self.getBackendInterface()
msg = packMsg('blkif_be_vbd_destroy_t',
- { 'domid' : self.controller.dom,
- 'blkif_handle' : backend.handle,
+ { 'domid' : self.frontendDomain,
+ 'blkif_handle' : self.backendId,
'vdevice' : self.vdev })
- self.controller.delDevice(self.vdev)
- backend.writeRequest(msg, response=d)
- return d
+ return self.backendChannel.writeRequest(msg)
-
-class BlkifController(controller.SplitController):
+class BlkifController(DevController):
"""Block device interface controller. Handles all block devices
for a domain.
"""
- def __init__(self, factory, dom):
+ def __init__(self, vm, recreate=False):
"""Create a block device controller.
- Do not call directly - use createController() on the factory instead.
"""
- controller.SplitController.__init__(self, factory, dom)
- self.addMethod(CMSG_BLKIF_FE,
- CMSG_BLKIF_FE_DRIVER_STATUS,
- self.recv_fe_driver_status)
- self.addMethod(CMSG_BLKIF_FE,
- CMSG_BLKIF_FE_INTERFACE_CONNECT,
- self.recv_fe_interface_connect)
- self.registerChannel()
+ DevController.__init__(self, vm, recreate=recreate)
+ self.backends = {}
+ self.backendId = 0
+ self.rcvr = None
+
+ def initController(self, recreate=False, reboot=False):
+ self.destroyed = False
+ # Add our handlers for incoming requests.
+ self.rcvr = CtrlMsgRcvr(self.getChannel())
+ self.rcvr.addHandler(CMSG_BLKIF_FE,
+ CMSG_BLKIF_FE_DRIVER_STATUS,
+ self.recv_fe_driver_status)
+ self.rcvr.addHandler(CMSG_BLKIF_FE,
+ CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ self.recv_fe_interface_connect)
+ self.rcvr.registerChannel()
+ if reboot:
+ self.rebootBackends()
+ self.rebootDevices()
def sxpr(self):
- val = ['blkif', ['dom', self.dom]]
+ val = ['blkif', ['dom', self.getDomain()]]
return val
- def addDevice(self, idx, config):
- """Add a device to the device table.
+ def rebootBackends(self):
+ for backend in self.backends.values():
+ backend.init(reboot=True)
- @param vdev: device index
- @type vdev: int
- @param config: device configuration
- @return: device
- @rtype: BlkDev
- """
- if idx in self.devices:
- raise XendError('device exists: ' + str(idx))
- dev = BlkDev(idx, self, config )
- self.devices[idx] = dev
- return dev
+ def getBackendById(self, id):
+ return self.backends.get(id)
+
+ def getBackendByDomain(self, dom):
+ for backend in self.backends.values():
+ if backend.backendDomain == dom:
+ return backend
+ return None
- def attachDevice(self, idx, config, recreate=0):
- """Attach a device to the specified interface.
- On success the returned deferred will be called with the device.
+ def getBackend(self, dom):
+ backend = self.getBackendByDomain(dom)
+ if backend: return backend
+ backend = BlkifBackend(self, self.backendId, dom)
+ self.backendId += 1
+ self.backends[backend.getId()] = backend
+ backend.init()
+ return backend
- @param idx: device id
+ def newDevice(self, id, config, recreate=False):
+ """Create a device..
+
+ @param id: device id
@param config: device configuration
@param recreate: if true it's being recreated (after xend restart)
@type recreate: bool
- @return: deferred
- @rtype: Deferred
+ @return: device
+ @rtype: BlkDev
"""
- dev = self.addDevice(idx, config)
- if recreate:
- dev.recreate(recreate)
- d = defer.succeed(dev)
- else:
- d = dev.attach()
- return d
-
- def destroy(self):
+ return BlkDev(self, id, config, recreate=recreate)
+
+ def destroyController(self, reboot=False):
"""Destroy the controller and all devices.
"""
- log.debug("Destroying blkif domain=%d", self.dom)
- self.destroyDevices()
- self.destroyBackends()
+ self.destroyed = True
+ log.debug("Destroying blkif domain=%d", self.getDomain())
+ self.destroyDevices(reboot=reboot)
+ self.destroyBackends(reboot=reboot)
+ self.rcvr.deregisterChannel()
- def destroyDevices(self):
- """Destroy all devices.
- """
- for dev in self.getDevices():
- dev.destroy()
-
- def destroyBackends(self):
- for backend in self.getBackendInterfaces():
- backend.destroy()
+ def destroyBackends(self, reboot=False):
+ for backend in self.backends.values():
+ backend.destroy(reboot=reboot)
- def recv_fe_driver_status(self, msg, req):
+ def recv_fe_driver_status(self, msg):
val = unpackMsg('blkif_fe_driver_status_t', msg)
- print 'recv_fe_driver_status>', val
- for backend in self.getBackendInterfaces():
+ for backend in self.backends.values():
backend.interfaceDisconnected()
- def recv_fe_interface_connect(self, msg, req):
+ def recv_fe_interface_connect(self, msg):
val = unpackMsg('blkif_fe_interface_connect_t', msg)
- handle = val['handle']
- backend = self.getBackendInterfaceByHandle(handle)
+ id = val['handle']
+ backend = self.getBackendById(id)
if backend:
- backend.connectInterface(val)
+ try:
+ backend.connectInterface(val)
+ except IOError, ex:
+ log.error("Exception connecting backend: %s", ex)
else:
- log.error('interface connect on unknown interface: handle=%d', handle)
-
-
-
+ log.error('interface connect on unknown interface: id=%d', id)
diff --git a/tools/python/xen/xend/server/channel.py b/tools/python/xen/xend/server/channel.py
index bdf7efd588..00f451a7b8 100755
--- a/tools/python/xen/xend/server/channel.py
+++ b/tools/python/xen/xend/server/channel.py
@@ -1,120 +1,267 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+import threading
+import select
+
import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
from xen.lowlevel import xu
-from messages import msgTypeName, printMsg
-VIRQ_MISDIRECT = 0 # Catch-all interrupt for unbound VIRQs.
-VIRQ_TIMER = 1 # Timebase update, and/or requested timeout.
-VIRQ_DEBUG = 2 # Request guest to dump debug info.
-VIRQ_CONSOLE = 3 # (DOM0) bytes received on emergency console.
-VIRQ_DOM_EXC = 4 # (DOM0) Exceptional event for some domain.
+from xen.xend.XendLogging import log
-def eventChannel(dom1, dom2):
- """Create an event channel between domains.
- The returned dict contains dom1, dom2, port1 and port2 on success.
+from messages import *
- @return dict (empty on error)
- """
- evtchn = xc.evtchn_bind_interdomain(dom1=dom1, dom2=dom2)
- if evtchn:
- evtchn['dom1'] = dom1
- evtchn['dom2'] = dom2
- return evtchn
+DEBUG = 0
-def eventChannelClose(evtchn):
- """Close an event channel that was opened by eventChannel().
+RESPONSE_TIMEOUT = 20.0
+
+class EventChannel(dict):
+ """An event channel between domains.
"""
- def evtchn_close(dom, port):
- if (dom is None) or (port is None): return
+
+ def interdomain(cls, dom1, dom2, port1=0, port2=0):
+ """Create an event channel between domains.
+
+ @return EventChannel (None on error)
+ """
+ v = xc.evtchn_bind_interdomain(dom1=dom1, dom2=dom2,
+ port1=port1, port2=port2)
+ if v:
+ v = cls(dom1, dom2, v)
+ return v
+
+ interdomain = classmethod(interdomain)
+
+ def restoreFromDB(cls, db, dom1, dom2, port1=0, port2=0):
+ """Create an event channel using db info if available.
+ Inverse to saveToDB().
+
+ @param db db
+ @param dom1
+ @param dom2
+ @param port1
+ @param port2
+ """
try:
- xc.evtchn_close(dom=dom, port=port)
- except Exception, ex:
- pass
+ dom1 = int(db['dom1'])
+ except: pass
+ try:
+ dom2 = int(db['dom2'])
+ except: pass
+ try:
+ port1 = int(db['port1'])
+ except: pass
+ try:
+ port2 = int(db['port2'])
+ except: pass
+ evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2)
+ return evtchn
+
+ restoreFromDB = classmethod(restoreFromDB)
+
+ def __init__(self, dom1, dom2, d):
+ d['dom1'] = dom1
+ d['dom2'] = dom2
+ self.update(d)
+ self.dom1 = dom1
+ self.dom2 = dom2
+ self.port1 = d.get('port1')
+ self.port2 = d.get('port2')
+
+ def close(self):
+ """Close the event channel.
+ """
+ def evtchn_close(dom, port):
+ try:
+ xc.evtchn_close(dom=dom, port=port)
+ except Exception, ex:
+ pass
+
+ if DEBUG:
+ print 'EventChannel>close>', self
+ evtchn_close(self.dom1, self.port1)
+ evtchn_close(self.dom2, self.port2)
+
+ def saveToDB(self, db):
+ """Save the event channel to the db so it can be restored later,
+ using restoreFromDB() on the class.
+
+ @param db db
+ """
+ db['dom1'] = str(self.dom1)
+ db['dom2'] = str(self.dom2)
+ db['port1'] = str(self.port1)
+ db['port2'] = str(self.port2)
+ db.saveDB()
+
+ def sxpr(self):
+ return ['event-channel',
+ ['dom1', self.dom1 ],
+ ['port1', self.port1 ],
+ ['dom2', self.dom2 ],
+ ['port2', self.port2 ]
+ ]
+
+ def __repr__(self):
+ return ("<EventChannel dom1:%d:%d dom2:%d:%d>"
+ % (self.dom1, self.port1, self.dom2, self.port2))
+
+def eventChannel(dom1, dom2, port1=0, port2=0):
+ """Create an event channel between domains.
+ @return EventChannel (None on error)
+ """
+ return EventChannel.interdomain(dom1, dom2, port1=port1, port2=port2)
+
+def eventChannelClose(evtchn):
+ """Close an event channel.
+ """
if not evtchn: return
- evtchn_close(evtchn.get('dom1'), evtchn.get('port1'))
- evtchn_close(evtchn.get('dom2'), evtchn.get('port2'))
-
+ evtchn.close()
class ChannelFactory:
- """Factory for creating channels.
+ """Factory for creating control channels.
Maintains a table of channels.
"""
""" Channels indexed by index. """
- channels = {}
+ channels = None
+
+ thread = None
+
+ notifier = None
+
+ """Map of ports to the virq they signal."""
+ virqPorts = None
def __init__(self):
"""Constructor - do not use. Use the channelFactory function."""
+ self.channels = {}
+ self.virqPorts = {}
self.notifier = xu.notifier()
+ # Register interest in virqs.
+ self.bind_virq(xen.lowlevel.xc.VIRQ_DOM_EXC)
+ self.virqHandler = None
+
+ def bind_virq(self, virq):
+ port = self.notifier.bind_virq(virq)
+ self.virqPorts[port] = virq
+ log.info("Virq %s on port %s", virq, port)
+
+ def start(self):
+ """Fork a thread to read messages.
+ """
+ if self.thread: return
+ self.thread = threading.Thread(name="ChannelFactory",
+ target=self.main)
+ self.thread.setDaemon(True)
+ self.thread.start()
+
+ def stop(self):
+ """Signal the thread to stop.
+ """
+ self.thread = None
+
+ def main(self):
+ """Main routine for the thread.
+ Reads the notifier and dispatches to channels.
+ """
+ while True:
+ if self.thread == None: return
+ port = self.notifier.read()
+ if port:
+ virq = self.virqPorts.get(port)
+ if virq is not None:
+ self.virqReceived(virq)
+ else:
+ self.msgReceived(port)
+ else:
+ select.select([self.notifier], [], [], 1.0)
+
+ def msgReceived(self, port):
+ # We run the message handlers in their own threads.
+ # Note we use keyword args to lambda to save the values -
+ # otherwise lambda will use the variables, which will get
+ # assigned by the loop and the lambda will get the changed values.
+ received = 0
+ for chan in self.channels.values():
+ if self.thread == None: return
+ msg = chan.readResponse()
+ if msg:
+ received += 1
+ chan.responseReceived(msg)
+ for chan in self.channels.values():
+ if self.thread == None: return
+ msg = chan.readRequest()
+ if msg:
+ received += 1
+ self.runInThread(lambda chan=chan, msg=msg: chan.requestReceived(msg))
+ if port and received == 0:
+ log.warning("Port %s notified, but no messages found", port)
+
+ def runInThread(self, thunk):
+ thread = threading.Thread(target = thunk)
+ thread.setDaemon(True)
+ thread.start()
+
+ def setVirqHandler(self, virqHandler):
+ self.virqHandler = virqHandler
+
+ def virqReceived(self, virq):
+ if DEBUG:
+ print 'virqReceived>', virq
+ if not self.virqHandler: return
+ self.runInThread(lambda virq=virq: self.virqHandler(virq))
+
+ def newChannel(self, dom, local_port, remote_port):
+ """Create a new channel.
+ """
+ return self.addChannel(Channel(self, dom, local_port, remote_port))
def addChannel(self, channel):
- """Add a channel. Registers with the notifier.
+ """Add a channel.
"""
- idx = channel.idx
- self.channels[idx] = channel
- self.notifier.bind(idx)
+ self.channels[channel.getKey()] = channel
+ return channel
- def getChannel(self, idx):
- """Get the channel with the given index (if any).
+ def delChannel(self, channel):
+ """Remove the channel.
"""
- return self.channels.get(idx)
+ key = channel.getKey()
+ if key in self.channels:
+ del self.channels[key]
- def delChannel(self, idx):
- """Remove the channel with the given index (if any).
- Deregisters with the notifier.
+ def getChannel(self, dom, local_port, remote_port):
+ """Get the channel with the given domain and ports (if any).
"""
- if idx in self.channels:
- del self.channels[idx]
- self.notifier.unbind(idx)
+ key = (dom, local_port, remote_port)
+ return self.channels.get(key)
- def domChannel(self, dom, local_port=0, remote_port=0):
- """Get the channel for the given domain.
- Construct if necessary.
+ def findChannel(self, dom, local_port=0, remote_port=0):
+ """Find a channel. Ports given as zero are wildcards.
dom domain
returns channel
"""
- chan = self.getDomChannel(dom)
- if not chan:
- chan = Channel(self, dom, local_port=local_port,
- remote_port=remote_port)
- self.addChannel(chan)
- return chan
-
- def getDomChannel(self, dom):
- """Get the channel for the given domain.
-
- dom domain
-
- returns channel (or None)
- """
- dom = int(dom)
- for chan in self.channels.values():
- if not isinstance(chan, Channel): continue
- if chan.dom == dom:
- return chan
+ chan = self.getChannel(dom, local_port, remote_port)
+ if chan: return chan
+ if local_port and remote_port:
+ return None
+ for c in self.channels.values():
+ if c.dom != dom: continue
+ if local_port and local_port != c.getLocalPort(): continue
+ if remote_port and remote_port != c.getRemotePort(): continue
+ return c
return None
-
- def virqChannel(self, virq):
- """Get the channel for the given virq.
- Construct if necessary.
- """
- for chan in self.channels.values():
- if not isinstance(chan, VirqChannel): continue
- if chan.virq == virq:
- return chan
- chan = VirqChannel(self, virq)
- self.addChannel(chan)
+ def openChannel(self, dom, local_port=0, remote_port=0):
+ chan = self.findChannel(dom, local_port=local_port,
+ remote_port=remote_port)
+ if chan:
+ return chan
+ chan = self.newChannel(dom, local_port, remote_port)
return chan
-
- def channelClosed(self, channel):
- """The given channel has been closed - remove it.
- """
- self.delChannel(channel.idx)
+
def createPort(self, dom, local_port=0, remote_port=0):
"""Create a port for a channel to the given domain.
@@ -133,8 +280,31 @@ class ChannelFactory:
@type remote: int
@return: port object
"""
- return xu.port(dom, local_port=int(local_port),
- remote_port=int(remote_port))
+ return xu.port(dom, local_port=local_port, remote_port=remote_port)
+
+ def restoreFromDB(self, db, dom, local, remote):
+ """Create a channel using ports restored from the db (if available).
+ Otherwise use the given ports. This is the inverse operation to
+ saveToDB() on a channel.
+
+ @param db db
+ @param dom domain the channel connects to
+ @param local default local port
+ @param remote default remote port
+ """
+ try:
+ local_port = int(db['local_port'])
+ except:
+ local_port = local
+ try:
+ remote_port = int(db['remote_port'])
+ except:
+ remote_port = remote
+ try:
+ chan = self.openChannel(dom, local_port, remote_port)
+ except:
+ return None
+ return chan
def channelFactory():
"""Singleton constructor for the channel factory.
@@ -147,119 +317,70 @@ def channelFactory():
inst = ChannelFactory()
return inst
-class BaseChannel:
- """Abstract superclass for channels.
-
- The subclass constructor must set idx to the port to use.
+class Channel:
+ """Control channel to a domain.
+ Maintains a list of device handlers to dispatch requests to, based
+ on the request type.
"""
- def __init__(self, factory):
+ def __init__(self, factory, dom, local_port, remote_port):
self.factory = factory
- self.idx = -1
- self.closed = 0
+ self.dom = int(dom)
+ # Registered device handlers.
+ self.devs = []
+ # Handlers indexed by the message types they handle.
+ self.devs_by_type = {}
+ self.port = self.factory.createPort(self.dom,
+ local_port=local_port,
+ remote_port=remote_port)
+ self.closed = False
+ # Queue of waiters for responses to requests.
+ self.queue = ResponseQueue(self)
+ # Make sure the port will deliver all the messages.
+ self.port.register(TYPE_WILDCARD)
- def getIndex(self):
- """Get the channel index.
- """
- return self.idx
+ def saveToDB(self, db):
+ """Save the channel ports to the db so the channel can be restored later,
+ using restoreFromDB() on the factory.
- def notificationReceived(self):
- """Called when a notification is received.
- Calls handleNotification(), which should be defined
- in a subclass.
+ @param db db
"""
if self.closed: return
- self.handleNotification()
+ db['local_port'] = str(self.getLocalPort())
+ db['remote_port'] = str(self.getRemotePort())
+ db.saveDB()
- def close(self):
- """Close the channel. Calls channelClosed() on the factory.
- Override in subclass.
+ def getKey(self):
+ """Get the channel key.
"""
- self.factory.channelClosed(self)
-
- def handleNotification(self):
- """Handle notification.
- Define in subclass.
- """
- pass
-
-
-class VirqChannel(BaseChannel):
- """A channel for handling a virq.
- """
-
- def __init__(self, factory, virq):
- """Create a channel for the given virq using the given factory.
-
- Do not call directly, use virqChannel on the factory.
- """
- BaseChannel.__init__(self, factory)
- self.virq = virq
- # Notification port (int).
- self.port = xc.evtchn_bind_virq(virq)
- self.idx = self.port
- # Clients to call when a virq arrives.
- self.clients = []
-
- def __repr__(self):
- return ('<VirqChannel virq=%d port=%d>'
- % (self.virq, self.port))
-
- def getVirq(self):
- """Get the channel's virq.
- """
- return self.virq
+ return (self.dom, self.getLocalPort(), self.getRemotePort())
+
+ def sxpr(self):
+ val = ['channel']
+ val.append(['domain', self.dom])
+ if self.port:
+ val.append(['local_port', self.port.local_port])
+ val.append(['remote_port', self.port.remote_port])
+ return val
def close(self):
- """Close the channel. Calls lostChannel(self) on all its clients and
- channelClosed() on the factory.
- """
- for c in self.clients[:]:
- c.lostChannel(self)
- self.clients = []
- BaseChannel.close(self)
-
- def registerClient(self, client):
- """Register a client. The client will be called with
- client.virqReceived(virq) when a virq is received.
- The client will be called with client.lostChannel(self) if the
- channel is closed.
- """
- self.clients.append(client)
-
- def handleNotification(self):
- for c in self.clients:
- c.virqReceived(self.virq)
-
- def notify(self):
- xc.evtchn_send(self.port)
-
-
-class Channel(BaseChannel):
- """A control channel to a domain. Messages for the domain device controllers
- are multiplexed over the channel (console, block devs, net devs).
- """
-
- def __init__(self, factory, dom, local_port=0, remote_port=0):
- """Create a channel to the given domain using the given factory.
-
- Do not call directly, use domChannel on the factory.
+ """Close the channel.
"""
- BaseChannel.__init__(self, factory)
- # Domain.
- self.dom = int(dom)
- # Domain port (object).
- self.port = self.factory.createPort(dom, local_port=local_port,
- remote_port=remote_port)
- # Channel port (int).
- self.idx = self.port.local_port
- # Registered devices.
+ if DEBUG:
+ print 'Channel>close>', self
+ if self.closed: return
+ self.closed = True
+ self.factory.delChannel(self)
+ for d in self.devs[:]:
+ d.lostChannel(self)
self.devs = []
- # Devices indexed by the message types they handle.
self.devs_by_type = {}
- # Output queue.
- self.queue = []
- self.closed = 0
+ if self.port:
+ self.port.close()
+ #self.port = None
+
+ def getDomain(self):
+ return self.dom
def getLocalPort(self):
"""Get the local port.
@@ -279,25 +400,19 @@ class Channel(BaseChannel):
if self.closed: return -1
return self.port.remote_port
- def close(self):
- """Close the channel. Calls lostChannel() on all its devices and
- channelClosed() on the factory.
- """
- if self.closed: return
- self.closed = 1
- for d in self.devs[:]:
- d.lostChannel()
- self.factory.channelClosed(self)
- self.devs = []
- self.devs_by_type = {}
- self.port.disconnect()
+ def __repr__(self):
+ return ('<Channel dom=%d ports=%d:%d>'
+ % (self.dom,
+ self.getLocalPort(),
+ self.getRemotePort()))
+
def registerDevice(self, types, dev):
- """Register a device controller.
+ """Register a device message handler.
- @param types: message types the controller handles
+ @param types: message types handled
@type types: array of ints
- @param dev: device controller
+ @param dev: device handler
"""
if self.closed: return
self.devs.append(dev)
@@ -305,9 +420,9 @@ class Channel(BaseChannel):
self.devs_by_type[ty] = dev
def deregisterDevice(self, dev):
- """Remove the registration for a device controller.
+ """Remove the registration for a device handler.
- @param dev: device controller
+ @param dev: device handler
"""
if dev in self.devs:
self.devs.remove(dev)
@@ -316,139 +431,187 @@ class Channel(BaseChannel):
del self.devs_by_type[ty]
def getDevice(self, type):
- """Get the device controller handling a message type.
+ """Get the handler for a message type.
@param type: message type
@type type: int
@return: controller or None
- @rtype: device controller
+ @rtype: device handler
"""
return self.devs_by_type.get(type)
- def getMessageType(self, msg):
- """Get a 2-tuple of the message type and subtype.
-
- @param msg: message
- @type msg: xu message
- @return: type info
- @rtype: (int, int)
- """
- hdr = msg.get_header()
- return (hdr['type'], hdr.get('subtype'))
-
- def __repr__(self):
- return ('<Channel dom=%d ports=%d:%d>'
- % (self.dom,
- self.getLocalPort(),
- self.getRemotePort()))
-
- def handleNotification(self):
- """Process outstanding messages in repsonse to notification on the port.
- """
- if self.closed:
- print 'handleNotification> Notification on closed channel', self
- return
- work = 0
- work += self.handleRequests()
- work += self.handleResponses()
- work += self.handleWrites()
- if work:
- self.notify()
-
- def notify(self):
- """Notify the other end of the port that messages have been processed.
- """
- if self.closed: return
- self.port.notify()
-
- def handleRequests(self):
- work = 0
- while 1:
- msg = self.readRequest()
- if not msg: break
- self.requestReceived(msg)
- work += 1
- return work
-
def requestReceived(self, msg):
- (ty, subty) = self.getMessageType(msg)
- #todo: Must respond before writing any more messages.
- #todo: Should automate this (respond on write)
- responded = 0
+ """A request has been received on the channel.
+ Disptach it to the device handlers.
+ Called from the channel factory thread.
+ """
+ if DEBUG:
+ print 'Channel>requestReceived>', self,
+ printMsg(msg)
+ (ty, subty) = getMessageType(msg)
+ responded = False
dev = self.getDevice(ty)
if dev:
responded = dev.requestReceived(msg, ty, subty)
+ elif DEBUG:
+ print "Channel>requestReceived> No device handler", self,
+ printMsg(msg)
else:
- print ("requestReceived> No device: Message type %s %d:%d"
- % (msgTypeName(ty, subty), ty, subty)), self
+ pass
if not responded:
- self.port.write_response(msg)
-
- def handleResponses(self):
- work = 0
- while 1:
- msg = self.readResponse()
- if not msg: break
- self.responseReceived(msg)
- work += 1
- return work
+ self.writeResponse(msg)
- def responseReceived(self, msg):
- (ty, subty) = self.getMessageType(msg)
- dev = self.getDevice(ty)
- if dev:
- dev.responseReceived(msg, ty, subty)
- else:
- print ("responseReceived> No device: Message type %d:%d"
- % (msgTypeName(ty, subty), ty, subty)), self
-
- def handleWrites(self):
- work = 0
- # Pull data from producers.
- for dev in self.devs:
- work += dev.produceRequests()
- # Flush the queue.
- while self.queue and self.port.space_to_write_request():
- msg = self.queue.pop(0)
- self.port.write_request(msg)
- work += 1
- return work
-
- def writeRequest(self, msg, notify=1):
- if self.closed:
- val = -1
- elif self.writeReady():
- self.port.write_request(msg)
- if notify: self.notify()
- val = 1
- else:
- self.queue.append(msg)
- val = 0
- return val
-
- def writeResponse(self, msg):
+ def writeRequest(self, msg):
+ """Write a request to the channel.
+ """
+ if DEBUG:
+ print 'Channel>writeRequest>', self,
+ printMsg(msg, all=True)
if self.closed: return -1
- self.port.write_response(msg)
+ self.port.write_request(msg)
return 1
- def writeReady(self):
- if self.closed or self.queue: return 0
- return self.port.space_to_write_request()
+ def writeResponse(self, msg):
+ """Write a response to the channel.
+ """
+ if DEBUG:
+ print 'Channel>writeResponse>', self,
+ printMsg(msg, all=True)
+ if self.port:
+ self.port.write_response(msg)
+ return 1
def readRequest(self):
+ """Read a request from the channel.
+ Called internally.
+ """
if self.closed:
- return None
- if self.port.request_to_read():
- val = self.port.read_request()
+ val = None
else:
- val = None
+ val = self.port.read_request()
return val
def readResponse(self):
+ """Read a response from the channel.
+ Called internally.
+ """
if self.closed:
- return None
- if self.port.response_to_read():
- val = self.port.read_response()
- else:
val = None
+ else:
+ val = self.port.read_response()
+ if DEBUG and val:
+ print 'Channel>readResponse>', self,
+ printMsg(val, all=True)
return val
+
+ def requestResponse(self, msg, timeout=None):
+ """Write a request and wait for a response.
+ Raises IOError on timeout.
+
+ @param msg request message
+ @param timeout timeout (0 is forever)
+ @return response message
+ """
+ if self.closed:
+ raise IOError("closed")
+ if self.closed:
+ return None
+ if timeout is None:
+ timeout = RESPONSE_TIMEOUT
+ elif timeout <= 0:
+ timeout = None
+ return self.queue.call(msg, timeout)
+
+ def responseReceived(self, msg):
+ """A response has been received, look for a waiter to
+ give it to.
+ Called internally.
+ """
+ if DEBUG:
+ print 'Channel>responseReceived>', self,
+ printMsg(msg)
+ self.queue.response(getMessageId(msg), msg)
+
+ def virq(self):
+ self.factory.virq()
+
+class Response:
+ """Entry in the response queue.
+ Used to signal a response to a message.
+ """
+
+ def __init__(self, mid):
+ self.mid = mid
+ self.msg = None
+ self.ready = threading.Event()
+
+ def response(self, msg):
+ """Signal arrival of a response to a waiting thread.
+ Passing msg None cancels the wait with an IOError.
+ """
+ if msg:
+ self.msg = msg
+ else:
+ self.mid = -1
+ self.ready.set()
+
+ def wait(self, timeout):
+ """Wait up to 'timeout' seconds for a response.
+ Returns the response or raises an IOError.
+ """
+ self.ready.wait(timeout)
+ if self.mid < 0:
+ raise IOError("wait canceled")
+ if self.msg is None:
+ raise IOError("response timeout")
+ return self.msg
+
+class ResponseQueue:
+ """Response queue. Manages waiters for responses to messages.
+ """
+
+ def __init__(self, channel):
+ self.channel = channel
+ self.lock = threading.Lock()
+ self.responses = {}
+
+ def add(self, mid):
+ r = Response(mid)
+ self.responses[mid] = r
+ return r
+
+ def get(self, mid):
+ return self.responses.get(mid)
+
+ def remove(self, mid):
+ r = self.responses.get(mid)
+ if r:
+ del self.responses[mid]
+ return r
+
+ def response(self, mid, msg):
+ """Process a response - signals any waiter that a response
+ has arrived.
+ """
+ try:
+ self.lock.acquire()
+ r = self.remove(mid)
+ finally:
+ self.lock.release()
+ if r:
+ r.response(msg)
+
+ def call(self, msg, timeout):
+ """Send the message and wait for 'timeout' seconds for a response.
+ Returns the response.
+ Raises IOError on timeout.
+ """
+ mid = getMessageId(msg)
+ try:
+ self.lock.acquire()
+ r = self.add(mid)
+ finally:
+ self.lock.release()
+ self.channel.writeRequest(msg)
+ return r.wait(timeout)
+
diff --git a/tools/python/xen/xend/server/console.py b/tools/python/xen/xend/server/console.py
index efe85acccc..743ace4aec 100755
--- a/tools/python/xen/xend/server/console.py
+++ b/tools/python/xen/xend/server/console.py
@@ -1,52 +1,55 @@
# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
import socket
-
-from twisted.internet import reactor
-from twisted.internet import protocol
+import threading
+from errno import EAGAIN, EINTR, EWOULDBLOCK
+
+from xen.web import reactor, protocol
from xen.lowlevel import xu
from xen.xend.XendError import XendError
-from xen.xend import EventServer
-eserver = EventServer.instance()
+from xen.xend import EventServer; eserver = EventServer.instance()
from xen.xend.XendLogging import log
-from xen.xend import XendRoot
-xroot = XendRoot.instance()
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import sxp
+from xen.xend.xenstore import DBVar
-import controller
-from messages import *
-from params import *
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
+from xen.xend.server.params import *
class ConsoleProtocol(protocol.Protocol):
- """Asynchronous handler for a console TCP socket.
+ """Asynchronous handler for a console socket.
"""
- def __init__(self, controller, idx):
- self.controller = controller
- self.idx = idx
+ def __init__(self, console, id):
+ self.console = console
+ self.id = id
self.addr = None
- self.binary = 0
- def connectionMade(self):
+ def connectionMade(self, addr=None):
peer = self.transport.getPeer()
- self.addr = (peer.host, peer.port)
- if self.controller.connect(self.addr, self):
+ self.addr = addr
+ if self.console.connect(self.addr, self):
self.transport.write("Cannot connect to console %d on domain %d\n"
- % (self.idx, self.controller.dom))
+ % (self.id, self.console.getDomain()))
self.loseConnection()
return
else:
- # KAF: A nice quiet successful connect.
- #self.transport.write("Connected to console %d on domain %d\n"
- # % (self.idx, self.controller.dom))
+ if len(self.addr) == 2:
+ host = str(self.addr[0])
+ port = str(self.addr[1])
+ else:
+ host = 'localhost'
+ port = str(addr)
log.info("Console connected %s %s %s",
- self.idx, str(self.addr[0]), str(self.addr[1]))
+ self.id, host, port)
eserver.inject('xend.console.connect',
- [self.idx, self.addr[0], self.addr[1]])
+ [self.id, host, port])
def dataReceived(self, data):
- if self.controller.handleInput(self, data):
+ if self.console.receiveInput(self, data):
self.loseConnection()
def write(self, data):
@@ -55,55 +58,17 @@ class ConsoleProtocol(protocol.Protocol):
def connectionLost(self, reason=None):
log.info("Console disconnected %s %s %s",
- self.idx, str(self.addr[0]), str(self.addr[1]))
+ str(self.id), str(self.addr[0]), str(self.addr[1]))
eserver.inject('xend.console.disconnect',
- [self.idx, self.addr[0], self.addr[1]])
- self.controller.disconnect(conn=self)
+ [self.id, self.addr[0], self.addr[1]])
+ self.console.disconnect(conn=self)
def loseConnection(self):
self.transport.loseConnection()
-class ConsoleFactory(protocol.ServerFactory):
- """Asynchronous handler for a console server socket.
- """
- protocol = ConsoleProtocol
-
- def __init__(self, controller, idx):
- #protocol.ServerFactory.__init__(self)
- self.controller = controller
- self.idx = idx
-
- def buildProtocol(self, addr):
- proto = self.protocol(self.controller, self.idx)
- proto.factory = self
- return proto
-
-class ConsoleControllerFactory(controller.ControllerFactory):
- """Factory for creating console controllers.
- """
-
- def createController(self, dom, console_port=None):
- if console_port is None:
- console_port = xroot.get_console_port_base() + dom
- for c in self.getControllers():
- if c.console_port == console_port:
- raise XendError('console port in use: ' + str(console_port))
- console = ConsoleController(self, dom, console_port)
- self.addController(console)
- log.info("Created console id=%s domain=%d port=%d",
- console.idx, console.dom, console.console_port)
- eserver.inject('xend.console.create',
- [console.idx, console.dom, console.console_port])
- return console
-
- def consoleClosed(self, console):
- log.info("Closed console id=%s", console.idx)
- eserver.inject('xend.console.close', console.idx)
- self.delController(console)
-
-class ConsoleController(controller.Controller):
- """Console controller for a domain.
- Does not poll for i/o itself, but relies on the notifier to post console
+class ConsoleDev(Dev, protocol.ServerFactory):
+ """Console device for a domain.
+ Does not poll for i/o itself, but relies on the domain to post console
output and the connected TCP sockets to post console input.
"""
@@ -112,43 +77,90 @@ class ConsoleController(controller.Controller):
STATUS_CONNECTED = 'connected'
STATUS_LISTENING = 'listening'
- def __init__(self, factory, dom, console_port):
- controller.Controller.__init__(self, factory, dom)
- self.addMethod(CMSG_CONSOLE, 0, None)
+ __exports__ = Dev.__exports__ + [
+ DBVar('status', ty='str'),
+ #DBVar('listening', ty='str'),
+ DBVar('console_port', ty='int'),
+ ]
+
+ def __init__(self, controller, id, config, recreate=False):
+ Dev.__init__(self, controller, id, config)
+ self.lock = threading.RLock()
self.status = self.STATUS_NEW
self.addr = None
self.conn = None
- self.rbuf = xu.buffer()
- self.wbuf = xu.buffer()
+ self.console_port = None
+ self.obuf = xu.buffer()
+ self.ibuf = xu.buffer()
+ self.channel = None
+ self.listening = False
+ self.unix_listener = None
+ self.tcp_listener = None
+
+ console_port = sxp.child_value(self.config, "console_port")
+ if console_port is None:
+ console_port = xroot.get_console_port_base() + self.getDomain()
+ self.checkConsolePort(console_port)
self.console_port = console_port
-
- self.registerChannel()
- self.listener = None
- self.listen()
-
+
+ log.info("Created console id=%d domain=%d port=%d",
+ self.id, self.getDomain(), self.console_port)
+ eserver.inject('xend.console.create',
+ [self.id, self.getDomain(), self.console_port])
+
+ def init(self, recreate=False, reboot=False):
+ try:
+ self.lock.acquire()
+ self.destroyed = False
+ self.channel = self.getChannel()
+ self.listen()
+ finally:
+ self.lock.release()
+
+ def checkConsolePort(self, console_port):
+ """Check that a console port is not in use by another console.
+ """
+ xd = XendRoot.get_component('xen.xend.XendDomain')
+ for vm in xd.list():
+ ctrl = vm.getDeviceController(self.getType(), error=False)
+ if (not ctrl): continue
+ ctrl.checkConsolePort(console_port)
+
def sxpr(self):
- val = ['console',
- ['status', self.status ],
- ['id', self.idx ],
- ['domain', self.dom ] ]
- val.append(['local_port', self.getLocalPort() ])
- val.append(['remote_port', self.getRemotePort() ])
- val.append(['console_port', self.console_port ])
- if self.addr:
- val.append(['connected', self.addr[0], self.addr[1]])
+ try:
+ self.lock.acquire()
+ val = ['console',
+ ['status', self.status ],
+ ['id', self.id ],
+ ['domain', self.getDomain() ] ]
+ val.append(['local_port', self.getLocalPort() ])
+ val.append(['remote_port', self.getRemotePort() ])
+ val.append(['console_port', self.console_port ])
+ if self.addr:
+ val.append(['connected', self.addr[0], self.addr[1]])
+ finally:
+ self.lock.release()
return val
def getLocalPort(self):
- if self.channel:
- return self.channel.getLocalPort()
- else:
- return 0
+ try:
+ self.lock.acquire()
+ if self.channel:
+ return self.channel.getLocalPort()
+ else:
+ return 0
+ finally:
+ self.lock.release()
def getRemotePort(self):
- if self.channel:
- return self.channel.getRemotePort()
- else:
- return 0
+ try:
+ self.lock.acquire()
+ if self.channel:
+ return self.channel.getRemotePort()
+ else:
+ return 0
+ finally:
+ self.lock.release()
def uri(self):
"""Get the uri to use to connect to the console.
@@ -159,42 +171,60 @@ class ConsoleController(controller.Controller):
host = socket.gethostname()
return "telnet://%s:%d" % (host, self.console_port)
- def ready(self):
- return not (self.closed() or self.rbuf.empty())
-
def closed(self):
return self.status == self.STATUS_CLOSED
def connected(self):
return self.status == self.STATUS_CONNECTED
- def close(self):
- """Close the console controller.
- """
- self.lostChannel()
-
- def lostChannel(self):
- """The channel to the domain has been lost.
- Cleanup: disconnect TCP connections and listeners, notify the controller.
+ def destroy(self, change=False, reboot=False):
+ """Close the console.
"""
- self.status = self.STATUS_CLOSED
- if self.conn:
- self.conn.loseConnection()
- self.listener.stopListening()
- controller.Controller.lostChannel(self)
+ if reboot:
+ return
+ try:
+ self.lock.acquire()
+ self.status = self.STATUS_CLOSED
+ self.listening = False
+ if self.conn:
+ self.conn.loseConnection()
+ if self.tcp_listener:
+ self.tcp_listener.stopListening()
+ self.tcp_listener = None
+ if self.unix_listener:
+ self.unix_listener.stopListening()
+ self.unix_listener = None
+ finally:
+ self.lock.release()
def listen(self):
"""Listen for TCP connections to the console port..
"""
- if self.closed(): return
- self.status = self.STATUS_LISTENING
- if self.listener:
- #self.listener.startListening()
- pass
- else:
- f = ConsoleFactory(self, self.idx)
- interface = xroot.get_console_address()
- self.listener = reactor.listenTCP(self.console_port, f, interface=interface)
+ try:
+ self.lock.acquire()
+ if self.closed():
+ return
+ if self.listening:
+ pass
+ else:
+ self.listening = True
+ self.status = self.STATUS_LISTENING
+ if xroot.get_xend_unix_server():
+ path = '/var/lib/xend/console-%s' % self.console_port
+ self.unix_listener = reactor.listenUNIX(path, self)
+ if xroot.get_xend_http_server():
+ interface = xroot.get_console_address()
+ self.tcp_listener = reactor.listenTCP(self.console_port, self, interface=interface)
+ finally:
+ self.lock.release()
+
+ def buildProtocol(self, addr):
+ """Factory function called to create the protocol when a connection is accepted
+ by listenTCP.
+ """
+ proto = ConsoleProtocol(self, self.id)
+ proto.factory = self
+ return proto
def connect(self, addr, conn):
"""Connect a TCP connection to the console.
@@ -205,84 +235,156 @@ class ConsoleController(controller.Controller):
returns 0 if ok, negative otherwise
"""
- if self.closed(): return -1
- if self.connected(): return -1
- self.addr = addr
- self.conn = conn
- self.status = self.STATUS_CONNECTED
- self.handleOutput()
+ try:
+ self.lock.acquire()
+ if self.closed():
+ return -1
+ if self.connected():
+ return -1
+ self.addr = addr
+ self.conn = conn
+ self.status = self.STATUS_CONNECTED
+ self.writeOutput()
+ finally:
+ self.lock.release()
return 0
def disconnect(self, conn=None):
"""Disconnect the TCP connection to the console.
"""
- if conn and conn != self.conn: return
- if self.conn:
- self.conn.loseConnection()
- self.addr = None
- self.conn = None
- self.listen()
-
- def requestReceived(self, msg, type, subtype):
- """Receive console data from the console channel.
+ try:
+ self.lock.acquire()
+ if conn and conn != self.conn: return
+ if self.conn:
+ self.conn.loseConnection()
+ self.addr = None
+ self.conn = None
+ self.status = self.STATUS_LISTENING
+ self.listen()
+ finally:
+ self.lock.release()
+
+ def receiveOutput(self, msg):
+ """Receive output console data from the console channel.
msg console message
type major message type
subtype minor message typ
"""
- self.rbuf.write(msg.get_payload())
- self.handleOutput()
+ # Treat the obuf as a ring buffer.
+ try:
+ self.lock.acquire()
+ data = msg.get_payload()
+ data_n = len(data)
+ if self.obuf.space() < data_n:
+ self.obuf.discard(data_n)
+ if self.obuf.space() < data_n:
+ data = data[-self.obuf.space():]
+ self.obuf.write(data)
+ self.writeOutput()
+ finally:
+ self.lock.release()
- def responseReceived(self, msg, type, subtype):
- """Handle a response to a request written to the console channel.
- Just ignore it because the return values are not interesting.
+ def writeOutput(self):
+ """Handle buffered output from the console device.
+ Sends it to the connected TCP connection (if any).
+ """
+ try:
+ self.lock.acquire()
+ if self.closed():
+ return -1
+ writes = 0
+ while self.conn and (writes < 100) and (not self.obuf.empty()):
+ try:
+ writes += 1
+ bytes = self.conn.write(self.obuf.peek())
+ if bytes > 0:
+ self.obuf.discard(bytes)
+ except socket.error, err:
+ if err.args[0] in (EWOULDBLOCK, EAGAIN, EINTR):
+ pass
+ else:
+ self.disconnect()
+ break
+
+ finally:
+ self.lock.release()
+ return 0
+
+ def receiveInput(self, conn, data):
+ """Receive console input from a TCP connection. Ignores the
+ input if the calling connection (conn) is not the one
+ connected to the console (self.conn).
- msg console message
- type major message type
- subtype minor message typ
+ conn connection
+ data input data
"""
- pass
+ try:
+ self.lock.acquire()
+ if self.closed(): return -1
+ if conn != self.conn: return 0
+ self.ibuf.write(data)
+ self.writeInput()
+ finally:
+ self.lock.release()
+ return 0
- def produceRequests(self):
- """Write pending console data to the console channel.
+ def writeInput(self):
+ """Write pending console input to the console channel.
Writes as much to the channel as it can.
"""
- work = 0
- while self.channel and not self.wbuf.empty() and self.channel.writeReady():
- msg = xu.message(CMSG_CONSOLE, 0, 0)
- msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
- work += self.channel.writeRequest(msg, notify=0)
- return work
-
- def handleInput(self, conn, data):
- """Handle some external input aimed at the console.
- Called from a TCP connection (conn). Ignores the input
- if the calling connection (conn) is not the one connected
- to the console (self.conn).
+ try:
+ self.lock.acquire()
+ while self.channel and not self.ibuf.empty():
+ msg = xu.message(CMSG_CONSOLE, 0, 0)
+ msg.append_payload(self.ibuf.read(msg.MAX_PAYLOAD))
+ self.channel.writeRequest(msg)
+ finally:
+ self.lock.release()
+
+class ConsoleController(DevController):
+ """Device controller for all the consoles for a domain.
+ """
- conn connection
- data input data
+ def __init__(self, vm, recreate=False):
+ DevController.__init__(self, vm, recreate=recreate)
+ self.rcvr = None
+
+ def initController(self, recreate=False, reboot=False):
+ self.destroyed = False
+ self.rcvr = CtrlMsgRcvr(self.getChannel())
+ self.rcvr.addHandler(CMSG_CONSOLE,
+ 0,
+ self.receiveOutput)
+ self.rcvr.registerChannel()
+ if reboot:
+ self.rebootDevices()
+
+ def destroyController(self, reboot=False):
+ self.destroyed = True
+ self.destroyDevices(reboot=reboot)
+ self.rcvr.deregisterChannel()
+
+ def newDevice(self, id, config, recreate=False):
+ return ConsoleDev(self, id, config, recreate=recreate)
+
+ def checkConsolePort(self, console_port):
+ """Check that a console port is not in use by a console.
"""
- if self.closed(): return -1
- if conn != self.conn: return 0
- self.wbuf.write(data)
- if self.channel and self.produceRequests():
- self.channel.notify()
- return 0
+ for c in self.getDevices():
+ if c.console_port == console_port:
+ raise XendError('console port in use: ' + str(console_port))
- def handleOutput(self):
- """Handle buffered output from the console.
- Sends it to the connected console (if any).
+ def receiveOutput(self, msg):
+ """Handle a control request.
+ The CMSG_CONSOLE messages just contain data, and no console id,
+ so just send to console 0 (if there is one).
+
+ todo: extend CMSG_CONSOLE to support more than one console?
"""
- if self.closed():
- return -1
- if not self.conn:
- return 0
- while not self.rbuf.empty():
- try:
- bytes = self.conn.write(self.rbuf.peek())
- if bytes > 0:
- self.rbuf.discard(bytes)
- except socket.error, error:
- pass
- return 0
+ console = self.getDevice(0)
+ if console:
+ console.receiveOutput(msg)
+ else:
+ log.warning('no console: domain %d', self.getDomain())
+
diff --git a/tools/python/xen/xend/server/controller.py b/tools/python/xen/xend/server/controller.py
index c8962d4675..d1e19efee1 100755
--- a/tools/python/xen/xend/server/controller.py
+++ b/tools/python/xen/xend/server/controller.py
@@ -3,84 +3,30 @@
for a domain.
"""
-from twisted.internet import defer
-#defer.Deferred.debug = 1
-
-import channel
-from messages import msgTypeName, printMsg
+from xen.xend.XendError import XendError
+from xen.xend.xenstore import DBVar
+from xen.xend.server.messages import msgTypeName, printMsg, getMessageType
DEBUG = 0
-class Responder:
- """Handler for a response to a message with a specified id.
- """
-
- def __init__(self, mid, deferred):
- """Create a responder.
-
- @param mid: message id of response to handle
- @type mid: int
- @param deferred: deferred object holding the callbacks
- @type deferred: Deferred
- """
- self.mid = mid
- self.deferred = deferred
-
- def responseReceived(self, msg):
- """Entry point called when a response message with the right id arrives.
- Calls callback on I{self.deferred} with the message.
-
- @param msg: response message
- @type msg: xu message
- """
- if self.deferred.called: return
- self.deferred.callback(msg)
-
- def error(self, err):
- """Entry point called when there has been an error.
- Calls errback on I{self.deferred} with the error.
-
- @param err: error
- @type err: Exception
- """
- if self.deferred.called: return
- self.deferred.errback(err)
-
class CtrlMsgRcvr:
- """Abstract class for things that deal with a control interface to a domain.
+ """Utility class to dispatch messages on a control channel.
Once I{registerChannel} has been called, our message types are registered
- with the channel to the domain. The channel will call I{requestReceived}
- when a request arrives, or I{responseReceived} when a response arrives,
- if they have one of our message types.
+ with the channel. The channel will call I{requestReceived}
+ when a request arrives if it has one of our message types.
- @ivar dom: the domain we are a control interface for
- @type dom: int
+ @ivar channel: channel to a domain
+ @type channel: Channel
@ivar majorTypes: major message types we are interested in
@type majorTypes: {int:{int:method}}
- @ivar timeout: timeout (in seconds) for message handlers
- @type timeout: int
- @ivar channel: channel to the domain
- @type channel: Channel
- @ivar idx: channel index
- @ivar idx: string
- @ivar responders: table of message response handlers
- @type responders: {int:Responder}
"""
- def __init__(self):
- self.channelFactory = channel.channelFactory()
+ def __init__(self, channel):
self.majorTypes = {}
- self.dom = None
- self.channel = None
- self.idx = None
- self.responders = {}
- self.timeout = 10
-
- def setTimeout(self, timeout):
- self.timeout = timeout
+ self.channel = channel
- def getMethod(self, type, subtype):
+ def getHandler(self, type, subtype):
"""Get the method for a type and subtype.
@param type: major message type
@@ -93,7 +39,7 @@ class CtrlMsgRcvr:
method = subtypes.get(subtype)
return method
- def addMethod(self, type, subtype, method):
+ def addHandler(self, type, subtype, method):
"""Add a method to handle a message type and subtype.
@param type: major message type
@@ -124,102 +70,32 @@ class CtrlMsgRcvr:
"""
if DEBUG:
print 'requestReceived>',
- printMsg(msg, all=1)
+ printMsg(msg, all=True)
responded = 0
- method = self.getMethod(type, subtype)
+ method = self.getHandler(type, subtype)
if method:
- responded = method(msg, 1)
+ responded = method(msg)
elif DEBUG:
print ('requestReceived> No handler: Message type %s %d:%d'
% (msgTypeName(type, subtype), type, subtype)), self
return responded
- def responseReceived(self, msg, type, subtype):
- """Dispatch a response to handlers.
- Called by the channel for responses with one of our types.
-
- First looks for a message responder for the message's id.
- See L{callResponders}, L{addResponder}.
- If there is no responder, looks for a message handler for
- the message type/subtype.
-
- @param msg: message
- @type msg: xu message
- @param type: major message type
- @type type: int
- @param subtype: minor message type
- @type subtype: int
- """
- if DEBUG:
- print 'responseReceived>',
- printMsg(msg, all=1)
- if self.callResponders(msg):
- return
- method = self.getMethod(type, subtype)
- if method:
- method(msg, 0)
- elif DEBUG:
- print ('responseReceived> No handler: Message type %s %d:%d'
- % (msgTypeName(type, subtype), type, subtype)), self
-
- def addResponder(self, mid, deferred):
- """Add a responder for a message id.
- The I{deferred} is called with callback(msg) when a response
- with message id I{mid} arrives.
-
- Responders have a timeout set and I{deferred} will error
- on expiry.
-
- @param mid: message id of response expected
- @type mid: int
- @param deferred: handler for the response
- @type deferred: Deferred
- @return: responder
- @rtype: Responder
- """
- resp = Responder(mid, deferred)
- self.responders[resp.mid] = resp
- if self.timeout > 0:
- deferred.setTimeout(self.timeout)
- return resp
-
- def callResponders(self, msg):
- """Call any waiting responders for a response message.
- Looks for a responder registered for the message's id.
- See L{addResponder}.
-
- @param msg: response message
- @type msg: xu message
- @return: 1 if there was a responder for the message, 0 otherwise
- @rtype : bool
- """
- hdr = msg.get_header()
- mid = hdr['id']
- handled = 0
- resp = self.responders.get(mid)
- if resp:
- handled = 1
- resp.responseReceived(msg)
- del self.responders[mid]
- # Clean up called responders.
- for resp in self.responders.values():
- if resp.deferred.called:
- del self.responders[resp.mid]
- return handled
def lostChannel(self):
"""Called when the channel to the domain is lost.
"""
- pass
+ if DEBUG:
+ print 'CtrlMsgRcvr>lostChannel>',
+ self.channel = None
def registerChannel(self):
"""Register interest in our major message types with the
channel to our domain. Once we have registered, the channel
- will call requestReceived or responseReceived for our messages.
+ will call requestReceived for our messages.
"""
- self.channel = self.channelFactory.domChannel(self.dom)
- self.idx = self.channel.getIndex()
- if self.majorTypes:
+ if DEBUG:
+ print 'CtrlMsgRcvr>registerChannel>', self.channel, self.getMajorTypes()
+ if self.channel:
self.channel.registerDevice(self.getMajorTypes(), self)
def deregisterChannel(self):
@@ -229,470 +105,399 @@ class CtrlMsgRcvr:
"""
if self.channel:
self.channel.deregisterDevice(self)
- self.channel = None
- def produceRequests(self):
- """Produce any queued requests.
-
- @return: number produced
- @rtype: int
- """
- return 0
-
- def writeRequest(self, msg, response=None):
- """Write a request to the channel.
-
- @param msg: request message
- @type msg: xu message
- @param response: response handler
- @type response: Deferred
- """
- if self.channel:
- if DEBUG:
- print 'CtrlMsgRcvr>writeRequest>',
- printMsg(msg, all=1)
- if response:
- self.addResponder(msg.get_header()['id'], response)
- self.channel.writeRequest(msg)
- else:
- print 'CtrlMsgRcvr>writeRequest>', 'no channel!', self
-
- def writeResponse(self, msg):
- """Write a response to the channel. This acknowledges
- a request message.
-
- @param msg: message
- @type msg: xu message
- """
- if self.channel:
- if DEBUG:
- print 'CtrlMsgRcvr>writeResponse>',
- printMsg(msg, all=0)
- self.channel.writeResponse(msg)
- else:
- print 'CtrlMsgRcvr>writeResponse>', 'no channel!', self
-
-class ControllerFactory:
- """Abstract class for factories creating controllers for a domain.
- Maintains a table of controllers.
-
- @ivar controllers: mapping of index to controller instance
- @type controllers: {String: Controller}
- @ivar dom: domain
- @type dom: int
+class DevControllerTable:
+ """Table of device controller classes, indexed by type name.
"""
def __init__(self):
- self.controllers = {}
-
- def addController(self, controller):
- """Add a controller instance (under its index).
- """
- self.controllers[controller.idx] = controller
-
- def getControllers(self):
- """Get a list of all controllers.
- """
- return self.controllers.values()
-
- def getControllerByIndex(self, idx):
- """Get a controller from its index.
- """
- return self.controllers.get(idx)
-
- def getControllerByDom(self, dom):
- """Get the controller for the given domain.
+ self.controllerClasses = {}
- @param dom: domain id
- @type dom: int
- @return: controller or None
- """
- for inst in self.controllers.values():
- if inst.dom == dom:
- return inst
- return None
+ def getDevControllerClass(self, type):
+ return self.controllerClasses.get(type)
- def getController(self, dom):
- """Create or find the controller for a domain.
+ def addDevControllerClass(self, cls):
+ self.controllerClasses[cls.getType()] = cls
- @param dom: domain
- @return: controller
- """
- ctrl = self.getControllerByDom(dom)
- if ctrl is None:
- ctrl = self.createController(dom)
- self.addController(ctrl)
- return ctrl
-
- def createController(self, dom):
- """Create a controller. Define in a subclass.
+ def delDevControllerClass(self, type):
+ if type in self.controllerClasses:
+ del self.controllerClasses[type]
- @param dom: domain
- @type dom: int
- @return: controller instance
- @rtype: Controller (or subclass)
- """
- raise NotImplementedError()
-
- def delController(self, controller):
- """Delete a controller instance from the table.
+ def createDevController(self, type, vm, recreate=False):
+ cls = self.getDevControllerClass(type)
+ if not cls:
+ raise XendError("unknown device type: " + type)
+ return cls.createDevController(vm, recreate=recreate)
- @param controller: controller instance
- """
- if controller.idx in self.controllers:
- del self.controllers[controller.idx]
+def getDevControllerTable():
+ """Singleton constructor for the controller table.
+ """
+ global devControllerTable
+ try:
+ devControllerTable
+ except:
+ devControllerTable = DevControllerTable()
+ return devControllerTable
+
+def addDevControllerClass(name, cls):
+ """Add a device controller class to the controller table.
+ """
+ cls.type = name
+ getDevControllerTable().addDevControllerClass(cls)
- def controllerClosed(self, controller):
- """Callback called when a controller is closed (usually by the controller).
-
- @param controller: controller instance
- """
- self.delController(controller)
+def createDevController(name, vm, recreate=False):
+ return getDevControllerTable().createDevController(name, vm, recreate=recreate)
-class Controller(CtrlMsgRcvr):
+class DevController:
"""Abstract class for a device controller attached to a domain.
+ A device controller manages all the devices of a given type for a domain.
+ There is exactly one device controller for each device type for
+ a domain.
- @ivar factory: controller factory
- @type factory: ControllerFactory
- @ivar dom: domain
- @type dom: int
- @ivar channel: channel to the domain
- @type channel: Channel
- @ivar idx: channel index
- @type idx: String
"""
- def __init__(self, factory, dom):
- CtrlMsgRcvr.__init__(self)
- self.factory = factory
- self.dom = int(dom)
- self.channel = None
- self.idx = None
+ # State:
+ # controller/<type> : for controller
+ # device/<type>/<id> : for each device
- def close(self):
- """Close the controller.
+ def createDevController(cls, vm, recreate=False):
+ """Class method to create a dev controller.
"""
- self.lostChannel()
-
- def lostChannel(self):
- """The controller channel has been lost.
- """
- self.deregisterChannel()
- self.factory.controllerClosed(self)
+ ctrl = cls(vm, recreate=recreate)
+ ctrl.initController(recreate=recreate)
+ ctrl.exportToDB()
+ return ctrl
-class SplitControllerFactory(ControllerFactory):
- """Abstract class for factories creating split controllers for a domain.
- Maintains a table of backend controllers.
- """
+ createDevController = classmethod(createDevController)
- def __init__(self):
- ControllerFactory.__init__(self)
- self.backendControllers = {}
+ def getType(cls):
+ return cls.type
- def getBackendControllers(self):
- return self.backendControllers.values()
+ getType = classmethod(getType)
- def getBackendControllerByDomain(self, dom):
- """Get the backend controller for a domain if there is one.
+ __exports__ = [
+ DBVar('type', 'str'),
+ DBVar('destroyed', 'bool'),
+ ]
- @param dom: backend domain
- @return: backend controller
- """
- return self.backendControllers.get(dom)
+ # Set when registered.
+ type = None
- def getBackendController(self, dom):
- """Get the backend controller for a domain, creating
- if necessary.
+ def __init__(self, vm, recreate=False):
+ self.destroyed = False
+ self.vm = vm
+ self.db = self.getDB()
+ self.deviceId = 0
+ self.devices = {}
+ self.device_order = []
- @param dom: backend domain
- @return: backend controller
+ def getDB(self):
+ """Get the db node to use for a controller.
"""
- b = self.getBackendControllerByDomain(dom)
- if b is None:
- b = self.createBackendController(dom)
- self.backendControllers[b.dom] = b
- return b
-
- def createBackendController(self, dom):
- """Create a backend controller. Define in a subclass.
+ return self.vm.db.addChild("/controller/%s" % self.getType())
- @param dom: backend domain
- @return: backend controller
+ def getDevDB(self, id):
+ """Get the db node to use for a device.
"""
- raise NotImplementedError()
+ return self.vm.db.addChild("/device/%s/%s" % (self.getType(), id))
- def delBackendController(self, ctrlr):
- """Remove a backend controller.
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
- @param ctrlr: backend controller
- """
- if ctrlr.dom in self.backendControllers:
- del self.backendControllers[ctrlr.dom]
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
- def backendControllerClosed(self, ctrlr):
- """Callback called when a backend is closed.
- """
- self.delBackendController(ctrlr)
-
- def createBackendInterface(self, ctrl, dom, handle):
- """Create a backend interface. Define in a subclass.
+ def getDevControllerType(self):
+ return self.dctype
- @param ctrl: frontend controller
- @param dom: backend domain
- @return: backend interface
- """
- raise NotImplementedError()
+ def getDomain(self):
+ return self.vm.getDomain()
-class BackendController(Controller):
- """Abstract class for a backend device controller attached to a domain.
-
- @ivar factory: backend controller factory
- @type factory: BackendControllerFactory
- @ivar dom: backend domain
- @type dom: int
- @ivar channel: channel to the domain
- @type channel: Channel
- """
+ def getDomainName(self):
+ return self.vm.getName()
+ def getChannel(self):
+ chan = self.vm.getChannel()
+ return chan
- def __init__(self, factory, dom):
- CtrlMsgRcvr.__init__(self)
- self.factory = factory
- self.dom = int(dom)
- self.channel = None
- self.backendInterfaces = {}
-
- def close(self):
- self.lostChannel()
+ def getDomainInfo(self):
+ return self.vm
- def lostChannel(self):
- self.deregisterChannel()
- self.backend.backendClosed(self)
+ #----------------------------------------------------------------------------
+ # Subclass interface.
+ # Subclasses should define the unimplemented methods..
+ # Redefinitions must have the same arguments.
- def registerInterface(self, intf):
- key = intf.getInterfaceKey()
- self.backendInterfaces[key] = intf
+ def initController(self, recreate=False, reboot=False):
+ """Initialise the controller. Called when the controller is
+ first created, and again after the domain is rebooted (with reboot True).
+ If called with recreate True (and reboot False) the controller is being
+ recreated after a xend restart.
- def deregisterInterface(self, intf):
- key = intf.getInterfaceKey()
- if key in self.backendInterfaces:
- del self.backendInterfaces[key]
+ As this can be a re-init (after reboot) any controller state should
+ be reset. For example the destroyed flag.
+ """
+ self.destroyed = False
+ if reboot:
+ self.rebootDevices()
- def getInterface(self, dom, handle):
- key = (dom, handle)
- return self.backendInterfaces.get(key)
+ def newDevice(self, id, config, recreate=False):
+ """Create a device with the given config.
+ Must be defined in subclass.
+ Called with recreate True when the device is being recreated after a
+ xend restart.
-
- def createBackendInterface(self, ctrl, dom, handle):
- """Create a backend interface. Define in a subclass.
-
- @param ctrl: controller
- @param dom: backend domain
- @param handle: backend handle
+ @return device
"""
raise NotImplementedError()
+ def createDevice(self, config, recreate=False, change=False):
+ """Create a device and attach to its front- and back-ends.
+ If recreate is true the device is being recreated after a xend restart.
+ If change is true the device is a change to an existing domain,
+ i.e. it is being added at runtime rather than when the domain is created.
+ """
+ dev = self.newDevice(self.nextDeviceId(), config, recreate=recreate)
+ if self.vm.recreate:
+ dev.importFromDB()
+ dev.init(recreate=recreate)
+ self.addDevice(dev)
+ if not recreate:
+ dev.exportToDB()
+ dev.attach(recreate=recreate, change=change)
+ dev.exportToDB()
+
+ def configureDevice(self, id, config, change=False):
+ """Reconfigure an existing device.
+ May be defined in subclass."""
+ dev = self.getDevice(id, error=True)
+ dev.configure(config, change=change)
+
+ def destroyDevice(self, id, change=False, reboot=False):
+ """Destroy a device.
+ May be defined in subclass.
+
+ If reboot is true the device is being destroyed for a domain reboot.
+
+ The device is not deleted, since it may be recreated later.
+ """
+ dev = self.getDevice(id, error=True)
+ dev.destroy(change=change, reboot=reboot)
+ return dev
+
+ def deleteDevice(self, id, change=True):
+ """Destroy a device and delete it.
+ Normally called to remove a device from a domain at runtime.
+ """
+ dev = self.destroyDevice(id, change=change)
+ self.removeDevice(dev)
+
+ def destroyController(self, reboot=False):
+ """Destroy all devices and clean up.
+ May be defined in subclass.
+ If reboot is true the controller is being destroyed for a domain reboot.
+ Called at domain shutdown.
+ """
+ self.destroyed = True
+ self.destroyDevices(reboot=reboot)
+
+ #----------------------------------------------------------------------------
-class BackendInterface:
- """Abstract class for a domain's interface onto a backend controller.
- """
-
- def __init__(self, controller, dom, handle):
- """
-
- @param controller: front-end controller
- @param dom: back-end domain
- @param handle: back-end interface handle
- """
- self.factory = controller.factory
- self.controller = controller
- self.dom = int(dom)
- self.handle = handle
- self.backend = self.getBackendController()
-
- def registerInterface(self):
- self.backend.registerInterface(self)
-
- def getInterfaceKey(self):
- return (self.controller.dom, self.handle)
+ def isDestroyed(self):
+ return self.destroyed
- def getBackendController(self):
- return self.factory.getBackendController(self.dom)
+ def getDevice(self, id, error=False):
+ dev = self.devices.get(id)
+ if error and not dev:
+ raise XendError("invalid device id: " + id)
+ return dev
- def writeRequest(self, msg, response=None):
- return self.backend.writeRequest(msg, response=response)
-
- def writeResponse(self, msg):
- return self.backend.writeResponse(msg)
-
- def close(self):
- self.backend.deregisterInterface(self)
- self.controller.backendInterfaceClosed(self)
-
-class SplitController(Controller):
- """Abstract class for a device controller attached to a domain.
- A SplitController manages a BackendInterface for each backend domain
- it has at least one device for.
- """
-
- def __init__(self, factory, dom):
- Controller.__init__(self, factory, dom)
- self.backendInterfaces = {}
- self.backendHandle = 0
- self.devices = {}
+ def getDeviceIds(self):
+ return [ dev.getId() for dev in self.device_order ]
def getDevices(self):
- """Get a list of the devices..
- """
- return self.devices.values()
+ return self.device_order
- def delDevice(self, idx):
- """Remove the device with the given index from the device table.
+ def getDeviceConfig(self, id):
+ return self.getDevice(id).getConfig()
- @param idx device index
- """
- if idx in self.devices:
- del self.devices[idx]
+ def getDeviceConfigs(self):
+ return [ dev.getConfig() for dev in self.device_order ]
- def getDevice(self, idx):
- """Get the device with a given index.
+ def getDeviceSxprs(self):
+ return [ dev.sxpr() for dev in self.device_order ]
- @param idx device index
- @return device (or None)
- """
- return self.devices.get(idx)
+ def addDevice(self, dev):
+ self.devices[dev.getId()] = dev
+ self.device_order.append(dev)
+ return dev
- def findDevice(self, idx):
- """Find a device. If idx is non-negative,
- get the device with the given index. If idx is negative,
- look for the device with least index greater than -idx - 2.
- For example, if idx is -2, look for devices with index
- greater than 0, i.e. 1 or above.
+ def removeDevice(self, dev):
+ if dev.getId() in self.devices:
+ del self.devices[dev.getId()]
+ if dev in self.device_order:
+ self.device_order.remove(dev)
- @param idx device index
- @return device (or None)
- """
- if idx < 0:
- idx = -idx - 2
- val = None
- for dev in self.devices.values():
- if dev.idx <= idx: continue
- if (val is None) or (dev.idx < val.idx):
- val = dev
- else:
- val = getDevice(idx)
- return val
-
- def getMaxDeviceIdx(self):
- """Get the maximum id used by devices.
-
- @return maximum idx
+ def rebootDevices(self):
+ for dev in self.getDevices():
+ dev.reboot()
+
+ def destroyDevices(self, reboot=False):
+ """Destroy all devices.
"""
- maxIdx = 0
- for dev in self.devices:
- if dev.idx > maxIdx:
- maxIdx = dev.idx
- return maxIdx
-
- def getBackendInterfaces(self):
- return self.backendInterfaces.values()
+ for dev in self.getDevices():
+ dev.destroy(reboot=reboot)
- def getBackendInterfaceByHandle(self, handle):
- for b in self.getBackendInterfaces():
- if b.handle == handle:
- return b
- return None
+ def getMaxDeviceId(self):
+ maxid = 0
+ for id in self.devices:
+ if id > maxid:
+ maxid = id
+ return maxid
- def getBackendInterfaceByDomain(self, dom):
- return self.backendInterfaces.get(dom)
+ def nextDeviceId(self):
+ id = self.deviceId
+ self.deviceId += 1
+ return id
- def getBackendInterface(self, dom):
- """Get the backend interface for a domain.
+ def getDeviceCount(self):
+ return len(self.devices)
- @param dom: domain
- @return: backend controller
- """
- b = self.getBackendInterfaceByDomain(dom)
- if b is None:
- handle = self.backendHandle
- self.backendHandle += 1
- b = self.factory.createBackendInterface(self, dom, handle)
- b.registerInterface()
- self.backendInterfaces[b.dom] = b
- return b
-
- def delBackendInterface(self, ctrlr):
- """Remove a backend controller.
-
- @param ctrlr: backend controller
- """
- if ctrlr.dom in self.backendInterfaces:
- del self.backendInterfaces[ctrlr.dom]
-
- def backendInterfaceClosed(self, ctrlr):
- """Callback called when a backend is closed.
- """
- self.delBackendInterface(ctrlr)
-
class Dev:
"""Abstract class for a device attached to a device controller.
- @ivar idx: identifier
- @type idx: String
+ @ivar id: identifier
+ @type id: int
@ivar controller: device controller
- @type controller: DeviceController
- @ivar props: property table
- @type props: { String: value }
+ @type controller: DevController
"""
- def __init__(self, idx, controller):
- self.idx = str(idx)
+ # ./status : need 2: actual and requested?
+ # down-down: initial.
+ # up-up: fully up.
+ # down-up: down requested, still up. Watch front and back, when both
+ # down go to down-down. But what if one (or both) is not connected?
+ # Still have front/back trees with status? Watch front/status, back/status?
+ # up-down: up requested, still down.
+ # Back-end watches ./status, front/status
+ # Front-end watches ./status, back/status
+ # i.e. each watches the other 2.
+ # Each is status/request status/actual?
+ #
+ # backend?
+ # frontend?
+
+ __exports__ = [
+ DBVar('id', ty='int'),
+ DBVar('type', ty='str'),
+ DBVar('config', ty='sxpr'),
+ DBVar('destroyed', ty='bool'),
+ ]
+
+ def __init__(self, controller, id, config, recreate=False):
self.controller = controller
- self.props = {}
+ self.id = id
+ self.config = config
+ self.destroyed = False
+ self.type = self.getType()
- def getidx(self):
- return self.idx
+ self.db = controller.getDevDB(id)
- def setprop(self, k, v):
- self.props[k] = v
+ def exportToDB(self, save=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save)
- def getprop(self, k, v=None):
- return self.props.get(k, v)
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
- def hasprop(self, k):
- return k in self.props
+ def getDomain(self):
+ return self.controller.getDomain()
- def delprop(self, k):
- if k in self.props:
- del self.props[k]
+ def getDomainName(self):
+ return self.controller.getDomainName()
- def sxpr(self):
- """Get the s-expression for the deivice.
- Implement in a subclass.
+ def getChannel(self):
+ return self.controller.getChannel()
+
+ def getDomainInfo(self):
+ return self.controller.getDomainInfo()
+
+ def getController(self):
+ return self.controller
- @return: sxpr
+ def getType(self):
+ return self.controller.getType()
+
+ def getId(self):
+ return self.id
+
+ def getConfig(self):
+ return self.config
+
+ def isDestroyed(self):
+ return self.destroyed
+
+ #----------------------------------------------------------------------------
+ # Subclass interface.
+ # Define methods in subclass as needed.
+ # Redefinitions must have the same arguments.
+
+ def init(self, recreate=False, reboot=False):
+ """Initialization. Called on initial create (when reboot is False)
+ and on reboot (when reboot is True). When xend is restarting is
+ called with recreate True. Define in subclass if needed.
+
+ Device instance variables must be defined in the class constructor,
+ but given null or default values. The real values should be initialised
+ in this method. This allows devices to be re-initialised.
+
+ Since this can be called to re-initialise a device any state flags
+ should be reset.
"""
- raise NotImplementedError()
+ self.destroyed = False
- def configure(self, config, change=0):
- raise NotImplementedError()
+ def attach(self, recreate=False, change=False):
+ """Attach the device to its front and back ends.
+ Define in subclass if needed.
+ """
+ pass
-class SplitDev(Dev):
+ def reboot(self):
+ """Reconnect the device when the domain is rebooted.
+ """
+ self.init(reboot=True)
+ self.attach()
- def __init__(self, idx, controller):
- Dev.__init__(self, idx, controller)
- self.backendDomain = 0
- self.index = None
+ def sxpr(self):
+ """Get the s-expression for the deivice.
+ Implement in a subclass if needed.
- def getBackendInterface(self):
- return self.controller.getBackendInterface(self.backendDomain)
+ @return: sxpr
+ """
+ return self.getConfig()
- def getIndex(self):
- return self.index
+ def configure(self, config, change=False):
+ """Reconfigure the device.
- def setIndex(self, index):
- self.index = index
+ Implement in subclass.
+ """
+ raise NotImplementedError()
+ def refresh(self):
+ """Refresh the device..
+ Default no-op. Define in subclass if needed.
+ """
+ pass
+ def destroy(self, change=False, reboot=False):
+ """Destroy the device.
+ If change is True notify destruction (runtime change).
+ If reboot is True the device is being destroyed for a reboot.
+ Redefine in subclass if needed.
+ Called at domain shutdown and when a device is deleted from
+ a running domain (with change True).
+ """
+ self.destroyed = True
+ pass
+ #----------------------------------------------------------------------------
diff --git a/tools/python/xen/xend/server/domain.py b/tools/python/xen/xend/server/domain.py
deleted file mode 100644
index eb0dbcf48b..0000000000
--- a/tools/python/xen/xend/server/domain.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
-
-from xen.xend.XendError import XendError
-
-import channel
-import controller
-from messages import *
-
-class DomainControllerFactory(controller.ControllerFactory):
- """Factory for creating domain controllers.
- """
-
- def createController(self, dom):
- """Create a domain controller.
-
- dom domain
-
- returns domain controller
- """
- return DomainController(self, dom)
-
-class DomainController(controller.Controller):
- """Generic controller for a domain.
- Used for domain shutdown.
- """
-
- """Map shutdown reasons to the message type to use.
- """
- reasons = {'poweroff' : 'shutdown_poweroff_t',
- 'reboot' : 'shutdown_reboot_t',
- 'suspend' : 'shutdown_suspend_t',
- 'sysrq' : 'shutdown_sysrq_t' }
-
- def __init__(self, factory, dom):
- controller.Controller.__init__(self, factory, dom)
- self.addMethod(CMSG_SHUTDOWN, 0, None)
- self.addMethod(CMSG_MEM_REQUEST, 0, None)
- self.registerChannel()
-
- def shutdown(self, reason, key=0):
- """Shutdown a domain.
-
- reason shutdown reason
- key sysrq key (only if reason is 'sysrq')
- """
- msgtype = self.reasons.get(reason)
- if not msgtype:
- raise XendError('invalid reason:' + reason)
- extra = {}
- if reason == 'sysrq': extra['key'] = key
- print extra
- self.writeRequest(packMsg(msgtype, extra))
-
- def mem_target_set(self, target):
- """Set domain memory target in pages.
- """
- msg = packMsg('mem_request_t', { 'target' : target * (1 << 8)} )
- self.writeRequest(msg)
diff --git a/tools/python/xen/xend/server/event.py b/tools/python/xen/xend/server/event.py
new file mode 100644
index 0000000000..f16c786c3e
--- /dev/null
+++ b/tools/python/xen/xend/server/event.py
@@ -0,0 +1,218 @@
+import sys
+import StringIO
+
+from xen.web import reactor, protocol
+
+from xen.xend import scheduler
+from xen.xend import sxp
+from xen.xend import PrettyPrint
+from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend.XendError import XendError
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+
+DEBUG = 0
+
+class EventProtocol(protocol.Protocol):
+ """Asynchronous handler for a connected event socket.
+ """
+
+ def __init__(self, daemon):
+ #protocol.Protocol.__init__(self)
+ self.daemon = daemon
+ # Event queue.
+ self.queue = []
+ # Subscribed events.
+ self.events = []
+ self.parser = sxp.Parser()
+ self.pretty = 0
+
+ # For debugging subscribe to everything and make output pretty.
+ #self.subscribe(['*'])
+ self.pretty = 1
+
+ def dataReceived(self, data):
+ try:
+ self.parser.input(data)
+ if self.parser.ready():
+ val = self.parser.get_val()
+ res = self.dispatch(val)
+ self.send_result(res)
+ if self.parser.at_eof():
+ self.loseConnection()
+ except SystemExit:
+ raise
+ except:
+ self.send_error()
+
+ def loseConnection(self):
+ if self.transport:
+ self.transport.loseConnection()
+ if self.connected:
+ scheduler.now(self.connectionLost)
+
+ def connectionLost(self, reason=None):
+ self.unsubscribe()
+
+ def send_reply(self, sxpr):
+ io = StringIO.StringIO()
+ if self.pretty:
+ PrettyPrint.prettyprint(sxpr, out=io)
+ else:
+ sxp.show(sxpr, out=io)
+ print >> io
+ io.seek(0)
+ if self.transport:
+ return self.transport.write(io.getvalue())
+ else:
+ return 0
+
+ def send_result(self, res):
+ if res is None:
+ resp = ['ok']
+ else:
+ resp = ['ok', res]
+ return self.send_reply(resp)
+
+ def send_error(self):
+ (extype, exval) = sys.exc_info()[:2]
+ return self.send_reply(['err',
+ ['type', str(extype)],
+ ['value', str(exval)]])
+
+ def send_event(self, val):
+ return self.send_reply(['event', val[0], val[1]])
+
+ def unsubscribe(self):
+ for event in self.events:
+ eserver.unsubscribe(event, self.queue_event)
+
+ def subscribe(self, events):
+ self.unsubscribe()
+ for event in events:
+ eserver.subscribe(event, self.queue_event)
+ self.events = events
+
+ def queue_event(self, name, v):
+ # Despite the name we don't queue the event here.
+ # We send it because the transport will queue it.
+ self.send_event([name, v])
+
+ def opname(self, name):
+ return 'op_' + name.replace('.', '_')
+
+ def operror(self, name, req):
+ raise XendError('Invalid operation: ' +name)
+
+ def dispatch(self, req):
+ op_name = sxp.name(req)
+ op_method_name = self.opname(op_name)
+ op_method = getattr(self, op_method_name, self.operror)
+ return op_method(op_name, req)
+
+ def op_help(self, name, req):
+ def nameop(x):
+ if x.startswith('op_'):
+ return x[3:].replace('_', '.')
+ else:
+ return x
+
+ l = [ nameop(k) for k in dir(self) if k.startswith('op_') ]
+ return l
+
+ def op_quit(self, name, req):
+ self.loseConnection()
+
+ def op_exit(self, name, req):
+ sys.exit(0)
+
+ def op_pretty(self, name, req):
+ self.pretty = 1
+
+ def op_console_disconnect(self, name, req):
+ id = sxp.child_value(req, 'id')
+ if not id:
+ raise XendError('Missing console id')
+ id = int(id)
+ self.daemon.console_disconnect(id)
+
+ def op_info(self, name, req):
+ val = ['info']
+ #val += self.daemon.consoles()
+ #val += self.daemon.blkifs()
+ #val += self.daemon.netifs()
+ #val += self.daemon.usbifs()
+ return val
+
+ def op_sys_subscribe(self, name, v):
+ # (sys.subscribe event*)
+ # Subscribe to the events:
+ self.subscribe(v[1:])
+
+ def op_sys_inject(self, name, v):
+ # (sys.inject event)
+ event = v[1]
+ eserver.inject(sxp.name(event), event)
+
+ def op_trace(self, name, v):
+ mode = (v[1] == 'on')
+ self.daemon.tracing(mode)
+
+ def op_log_stderr(self, name, v):
+ mode = v[1]
+ logging = xroot.get_logging()
+ if mode == 'on':
+ logging.addLogStderr()
+ else:
+ logging.removeLogStderr()
+
+ def op_debug_msg(self, name, v):
+ mode = v[1]
+ import messages
+ messages.DEBUG = (mode == 'on')
+
+ def op_debug_controller(self, name, v):
+ mode = v[1]
+ import controller
+ controller.DEBUG = (mode == 'on')
+
+ def op_domain_ls(self, name, v):
+ xd = xroot.get_component("xen.xend.XendDomain")
+ return xd.domain_ls()
+
+ def op_domain_configure(self, name, v):
+ domid = sxp.child_value(v, "dom")
+ config = sxp.child_value(v, "config")
+ if domid is None:
+ raise XendError("missing domain id")
+ if config is None:
+ raise XendError("missing domain config")
+ xd = xroot.get_component("xen.xend.XendDomain")
+ xd.domain_configure(domid, config)
+
+ def op_domain_unpause(self, name, v):
+ domid = sxp.child_value(v, "dom")
+ if domid is None:
+ raise XendError("missing domain id")
+ xd = xroot.get_component("xen.xend.XendDomain")
+ xd.domain_unpause(domid)
+
+class EventFactory(protocol.ServerFactory):
+ """Asynchronous handler for the event server socket.
+ """
+
+ def __init__(self, daemon):
+ #protocol.ServerFactory.__init__(self)
+ self.daemon = daemon
+
+ def buildProtocol(self, addr):
+ return EventProtocol(self.daemon)
+
+def listenEvent(daemon):
+ factory = EventFactory(daemon)
+ if xroot.get_xend_unix_server():
+ path = '/var/lib/xend/event-socket'
+ reactor.listenUNIX(path, factory)
+ if xroot.get_xend_http_server():
+ port = xroot.get_xend_event_port()
+ interface = xroot.get_xend_address()
+ reactor.listenTCP(port, factory, interface=interface)
diff --git a/tools/python/xen/xend/server/messages.py b/tools/python/xen/xend/server/messages.py
index 35f9db986e..0cea725e3c 100644
--- a/tools/python/xen/xend/server/messages.py
+++ b/tools/python/xen/xend/server/messages.py
@@ -4,7 +4,12 @@ import types
from xen.lowlevel import xu
-DEBUG = 0
+DEBUG = False
+
+#PORT_WILDCARD = 0xefffffff
+
+"""Wildcard for the control message types."""
+TYPE_WILDCARD = 0xffff
""" All message formats.
Added to incrementally for the various message types.
@@ -41,8 +46,6 @@ CMSG_BLKIF_BE_CONNECT = 2
CMSG_BLKIF_BE_DISCONNECT = 3
CMSG_BLKIF_BE_VBD_CREATE = 4
CMSG_BLKIF_BE_VBD_DESTROY = 5
-CMSG_BLKIF_BE_VBD_GROW = 6
-CMSG_BLKIF_BE_VBD_SHRINK = 7
CMSG_BLKIF_BE_DRIVER_STATUS = 32
BLKIF_DRIVER_STATUS_DOWN = 0
@@ -61,7 +64,7 @@ BLKIF_BE_STATUS_INTERFACE_CONNECTED = 4
BLKIF_BE_STATUS_VBD_EXISTS = 5
BLKIF_BE_STATUS_VBD_NOT_FOUND = 6
BLKIF_BE_STATUS_OUT_OF_MEMORY = 7
-BLKIF_BE_STATUS_EXTENT_NOT_FOUND = 8
+BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND = 8
BLKIF_BE_STATUS_MAPPING_ERROR = 9
blkif_formats = {
@@ -86,11 +89,6 @@ blkif_formats = {
(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE),
# Create a vbd device.
- 'blkif_be_vbd_grow_t':
- (CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW),
- # Change the size of a vbd device. Remove?
- # Do in one go in blkif_be_vbd_create_t.
-
'blkif_be_vbd_destroy_t':
(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_DESTROY),
# Destroy a vbd.
@@ -101,7 +99,6 @@ blkif_formats = {
(CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_STATUS),
# Notify device status to fe.
# Also used to notify 'any' device change with status BLKIF_INTERFACE_STATUS_CHANGED.
- # Rename to blkif_fe_interface_status.
'blkif_fe_driver_status_t':
(CMSG_BLKIF_FE, CMSG_BLKIF_FE_DRIVER_STATUS),
@@ -109,7 +106,6 @@ blkif_formats = {
# Xend sets be(s) to BLKIF_INTERFACE_STATUS_DISCONNECTED,
# sends blkif_fe_interface_status_t to fe (from each be).
#
- # Rename to blkif_fe_driver_status.
# Reply with i/f count.
# The i/f sends probes (using -ve trick), we reply with the info.
@@ -150,6 +146,7 @@ CMSG_NETIF_BE_CREATE = 0
CMSG_NETIF_BE_DESTROY = 1
CMSG_NETIF_BE_CONNECT = 2
CMSG_NETIF_BE_DISCONNECT = 3
+CMSG_NETIF_BE_CREDITLIMIT = 4
CMSG_NETIF_BE_DRIVER_STATUS = 32
NETIF_INTERFACE_STATUS_CLOSED = 0 #/* Interface doesn't exist. */
@@ -173,6 +170,9 @@ netif_formats = {
'netif_be_destroy_t':
(CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY),
+ 'netif_be_creditlimit_t':
+ (CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT),
+
'netif_be_driver_status_t':
(CMSG_NETIF_BE, CMSG_NETIF_BE_DRIVER_STATUS),
@@ -189,6 +189,80 @@ netif_formats = {
msg_formats.update(netif_formats)
#============================================================================
+# USB interface message types.
+#============================================================================
+
+CMSG_USBIF_BE = 8
+CMSG_USBIF_FE = 9
+
+CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED = 0
+
+CMSG_USBIF_FE_DRIVER_STATUS_CHANGED = 32
+CMSG_USBIF_FE_INTERFACE_CONNECT = 33
+CMSG_USBIF_FE_INTERFACE_DISCONNECT = 34
+
+USBIF_DRIVER_STATUS_DOWN = 0
+USBIF_DRIVER_STATUS_UP = 1
+
+USBIF_INTERFACE_STATUS_DESTROYED = 0 #/* Interface doesn't exist. */
+USBIF_INTERFACE_STATUS_DISCONNECTED = 1 #/* Exists but is disconnected. */
+USBIF_INTERFACE_STATUS_CONNECTED = 2 #/* Exists and is connected. */
+
+CMSG_USBIF_BE_CREATE = 0
+CMSG_USBIF_BE_DESTROY = 1
+CMSG_USBIF_BE_CONNECT = 2
+
+CMSG_USBIF_BE_DISCONNECT = 3
+CMSG_USBIF_BE_CLAIM_PORT = 4
+CMSG_USBIF_BE_RELEASE_PORT = 5
+
+CMSG_USBIF_BE_DRIVER_STATUS_CHANGED = 32
+
+USBIF_BE_STATUS_OKAY = 0
+USBIF_BE_STATUS_ERROR = 1
+
+USBIF_BE_STATUS_INTERFACE_EXISTS = 2
+USBIF_BE_STATUS_INTERFACE_NOT_FOUND = 3
+USBIF_BE_STATUS_INTERFACE_CONNECTED = 4
+USBIF_BE_STATUS_OUT_OF_MEMORY = 7
+USBIF_BE_STATUS_MAPPING_ERROR = 9
+
+usbif_formats = {
+ 'usbif_be_create_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE),
+
+ 'usbif_be_destroy_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY),
+
+ 'usbif_be_connect_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT),
+
+ 'usbif_be_disconnect_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT),
+
+ 'usbif_be_claim_port_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT),
+
+ 'usbif_be_release_port_t':
+ (CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT),
+
+ 'usbif_fe_interface_status_changed_t':
+ (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED),
+
+ 'usbif_fe_driver_status_changed_t':
+ (CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED),
+
+ 'usbif_fe_interface_connect_t':
+ (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT),
+
+ 'usbif_fe_interface_disconnect_t':
+ (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT),
+
+ }
+
+msg_formats.update(usbif_formats)
+
+#============================================================================
# Domain shutdown message types.
#============================================================================
@@ -266,9 +340,9 @@ def packMsg(ty, params):
(major, minor) = msg_formats[ty]
args = {}
for (k, v) in params.items():
- if k == 'mac':
+ if k in ['mac', 'be_mac']:
for i in range(0, 6):
- args['mac[%d]' % i] = v[i]
+ args['%s[%d]' % (k, i)] = v[i]
else:
args[k] = v
msg = xu.message(major, minor, msgid, args)
@@ -303,8 +377,8 @@ def unpackMsg(ty, msg):
pass
if macs:
args['mac'] = mac
- print 'macs=', macs
- print 'args=', args
+ #print 'macs=', macs
+ #print 'args=', args
for k in macs:
del args[k]
if DEBUG:
@@ -327,7 +401,7 @@ def msgTypeName(ty, subty):
return name
return None
-def printMsg(msg, out=sys.stdout, all=0):
+def printMsg(msg, out=sys.stdout, all=False):
"""Print a message.
@param msg: message
@@ -346,3 +420,18 @@ def printMsg(msg, out=sys.stdout, all=0):
if all:
print >>out, 'payload=', msg.get_payload()
+
+def getMessageType(msg):
+ """Get a 2-tuple of the message type and subtype.
+
+ @param msg: message
+ @type msg: xu message
+ @return: type info
+ @rtype: (int, int)
+ """
+ hdr = msg.get_header()
+ return (hdr['type'], hdr.get('subtype'))
+
+def getMessageId(msg):
+ hdr = msg.get_header()
+ return hdr['id']
diff --git a/tools/python/xen/xend/server/netif.py b/tools/python/xen/xend/server/netif.py
index 5d3ef5a18d..0a49842522 100755
--- a/tools/python/xen/xend/server/netif.py
+++ b/tools/python/xen/xend/server/netif.py
@@ -4,112 +4,118 @@
import random
-from twisted.internet import defer
+from xen.util.mac import macFromString, macToString
from xen.xend import sxp
from xen.xend import Vifctl
-from xen.xend.XendError import XendError
+from xen.xend.XendError import XendError, VmError
from xen.xend.XendLogging import log
from xen.xend import XendVnet
from xen.xend.XendRoot import get_component
+from xen.xend.xenstore import DBVar
-import channel
-import controller
-from messages import *
+from xen.xend.server import channel
+from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController
+from xen.xend.server.messages import *
-class NetifBackendController(controller.BackendController):
- """Handler for the 'back-end' channel to a network device driver domain.
+class NetDev(Dev):
+ """A network device.
"""
-
- def __init__(self, ctrl, dom):
- controller.BackendController.__init__(self, ctrl, dom)
- self.addMethod(CMSG_NETIF_BE,
- CMSG_NETIF_BE_DRIVER_STATUS,
- self.recv_be_driver_status)
- self.registerChannel()
-
- def recv_be_driver_status(self, msg, req):
- val = unpackMsg('netif_be_driver_status_t', msg)
- status = val['status']
-
-class NetifBackendInterface(controller.BackendInterface):
- """Handler for the 'back-end' channel to a network device driver domain
- on behalf of a front-end domain.
-
- Each network device is handled separately, so we add no functionality
- here.
- """
-
- pass
-
-class NetifControllerFactory(controller.SplitControllerFactory):
- """Factory for creating network interface controllers.
- """
-
- def __init__(self):
- controller.SplitControllerFactory.__init__(self)
- def createController(self, dom):
- """Create a network interface controller for a domain.
-
- @param dom: domain
- @return: netif controller
- """
- return NetifController(self, dom)
-
- def createBackendController(self, dom):
- """Create a network device backend controller.
-
- @param dom: backend domain
- @return: backend controller
- """
- return NetifBackendController(self, dom)
-
- def createBackendInterface(self, ctrl, dom, handle):
- """Create a network device backend interface.
-
- @param ctrl: controller
- @param dom: backend domain
- @param handle: interface handle
- @return: backend interface
- """
- return NetifBackendInterface(ctrl, dom, handle)
-
- def getDomainDevices(self, dom):
- """Get the network devices for a domain.
-
- @param dom: domain
- @return: netif controller list
- """
- netif = self.getControllerByDom(dom)
- return (netif and netif.getDevices()) or []
-
- def getDomainDevice(self, dom, vif):
- """Get a virtual network interface device for a domain.
-
- @param dom: domain
- @param vif: virtual interface index
- @return: NetDev
- """
- netif = self.getControllerByDom(dom)
- return (netif and netif.getDevice(vif)) or None
-
-class NetDev(controller.SplitDev):
- """Info record for a network device.
- """
-
- def __init__(self, vif, ctrl, config):
- controller.SplitDev.__init__(self, vif, ctrl)
- self.vif = vif
+ # State:
+ # inherited +
+ # ./config
+ # ./mac
+ # ./be_mac
+ # ./bridge
+ # ./script
+ # ./ipaddr ?
+ #
+ # ./credit
+ # ./period
+ #
+ # ./vifctl: up/down?
+ # ./vifname
+ #
+ #
+ # Poss should have no backend state here - except for ref to backend's own tree
+ # for the device? And a status - the one we want.
+ # ./back/dom
+ # ./back/devid - id for back-end (netif_handle) - same as front/devid
+ # ./back/id - backend id (if more than one b/e per domain)
+ # ./back/status
+ # ./back/tx_shmem_frame - actually these belong in back-end state
+ # ./back/rx_shmem_frame
+ #
+ # ./front/dom
+ # ./front/devid
+ # ./front/status - need 2: one for requested, one for actual? Or drive from dev status
+ # and this is front status only.
+ # ./front/tx_shmem_frame
+ # ./front/rx_shmem_frame
+ #
+ # ./evtchn/front - here or in front/back?
+ # ./evtchn/back
+ # ./evtchn/status ?
+ # At present created by dev: but should be created unbound by front/back
+ # separately and then bound (by back)?
+
+ __exports__ = Dev.__exports__ + [
+ DBVar('config', ty='sxpr'),
+ DBVar('mac', ty='mac'),
+ DBVar('be_mac', ty='mac'),
+ DBVar('bridge', ty='str'),
+ DBVar('script', ty='str'),
+ #DBVar('ipaddr'),
+ DBVar('credit', ty='int'),
+ DBVar('period', ty='int'),
+ DBVar('vifname', ty='str'),
+ DBVar('evtchn'), #todo: export fields (renamed)
+ ]
+
+ def __init__(self, controller, id, config, recreate=False):
+ Dev.__init__(self, controller, id, config, recreate=recreate)
+ self.vif = int(self.id)
self.evtchn = None
- self.configure(config)
+ self.status = None
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = None
+ self.backendDomain = None
+ self.backendChannel = None
+ self.credit = None
+ self.period = None
+ self.mac = None
+ self.be_mac = None
+ self.bridge = None
+ self.script = None
+ self.ipaddr = None
+ self.vifname = None
+ self.configure(self.config, recreate=recreate)
+
+ def init(self, recreate=False, reboot=False):
+ self.destroyed = False
self.status = NETIF_INTERFACE_STATUS_DISCONNECTED
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = self.getChannel()
+ cf = channel.channelFactory()
+ self.backendChannel = cf.openChannel(self.backendDomain)
def _get_config_mac(self, config):
vmac = sxp.child_value(config, 'mac')
if not vmac: return None
- mac = [ int(x, 16) for x in vmac.split(':') ]
- if len(mac) != 6: raise XendError("invalid mac")
+ try:
+ mac = macFromString(vmac)
+ except:
+ raise XendError("invalid mac: %s" % vmac)
+ return mac
+
+ def _get_config_be_mac(self, config):
+ vmac = sxp.child_value(config, 'be_mac')
+ if not vmac: return None
+ try:
+ mac = macFromString(vmac)
+ except:
+ raise XendError("invalid backend mac: %s" % vmac)
return mac
def _get_config_ipaddr(self, config):
@@ -122,11 +128,12 @@ class NetDev(controller.SplitDev):
val = None
return val
- def configure(self, config, change=0):
+ def configure(self, config, change=False, recreate=False):
if change:
return self.reconfigure(config)
self.config = config
self.mac = None
+ self.be_mac = None
self.bridge = None
self.script = None
self.ipaddr = []
@@ -141,15 +148,22 @@ class NetDev(controller.SplitDev):
if mac is None:
raise XendError("invalid mac")
self.mac = mac
+ self.be_mac = self._get_config_be_mac(config)
self.bridge = sxp.child_value(config, 'bridge')
self.script = sxp.child_value(config, 'script')
self.ipaddr = self._get_config_ipaddr(config) or []
+ self._config_credit_limit(config)
try:
- xd = get_component('xen.xend.XendDomain')
- self.backendDomain = int(xd.domain_lookup(sxp.child_value(config, 'backend', '0')).id)
+ if recreate:
+ self.backendDomain = int(sxp.child_value(config, 'backend', '0'))
+ else:
+ #todo: Code below will fail on xend restart when backend is not domain 0.
+ xd = get_component('xen.xend.XendDomain')
+ self.backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
except:
raise XendError('invalid backend domain')
+ return self.config
def reconfigure(self, config):
"""Reconfigure the interface with new values.
@@ -165,14 +179,19 @@ class NetDev(controller.SplitDev):
"""
changes = {}
mac = self._get_config_mac(config)
+ be_mac = self._get_config_be_mac(config)
bridge = sxp.child_value(config, 'bridge')
script = sxp.child_value(config, 'script')
ipaddr = self._get_config_ipaddr(config)
+
xd = get_component('xen.xend.XendDomain')
- backendDomain = str(xd.domain_lookup(sxp.child_value(config, 'backend', '0')).id)
+ backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).id
+
if (mac is not None) and (mac != self.mac):
raise XendError("cannot change mac")
- if (backendDomain is not None) and (backendDomain != str(self.backendDomain)):
+ if (be_mac is not None) and (be_mac != self.be_mac):
+ raise XendError("cannot change backend mac")
+ if (backendDomain is not None) and (backendDomain != self.backendDomain):
raise XendError("cannot change backend")
if (bridge is not None) and (bridge != self.bridge):
changes['bridge'] = bridge
@@ -187,29 +206,57 @@ class NetDev(controller.SplitDev):
setattr(self, k, v)
self.config = sxp.merge(config, self.config)
self.vifctl("up")
+
+ self._config_credit_limit(config, change=True)
return self.config
+ def _config_credit_limit(self, config, change=False):
+ period = sxp.child_value(config, 'period')
+ credit = sxp.child_value(config, 'credit')
+ if period and credit:
+ try:
+ period = int(period)
+ credit = int(credit)
+ except ex:
+ raise XendError('vif: invalid credit limit')
+ if change:
+ self.setCreditLimit(credit, period)
+ self.config = sxp.merge([sxp.name(self.config),
+ ['credit', credit],
+ ['period', period]],
+ self.config)
+ else:
+ self.period = period
+ self.credit = credit
+ elif period or credit:
+ raise XendError('vif: invalid credit limit')
+
def sxpr(self):
vif = str(self.vif)
mac = self.get_mac()
val = ['vif',
- ['idx', self.idx],
+ ['id', self.id],
['vif', vif],
['mac', mac],
['vifname', self.vifname],
]
+
+ if self.be_mac:
+ val.append(['be_mac', self.get_be_mac()])
if self.bridge:
val.append(['bridge', self.bridge])
if self.script:
val.append(['script', self.script])
for ip in self.ipaddr:
val.append(['ip', ip])
+ if self.credit:
+ val.append(['credit', self.credit])
+ if self.period:
+ val.append(['period', self.period])
if self.evtchn:
val.append(['evtchn',
self.evtchn['port1'],
self.evtchn['port2']])
- if self.index is not None:
- val.append(['index', self.index])
return val
def get_vifname(self):
@@ -218,24 +265,29 @@ class NetDev(controller.SplitDev):
return self.vifname
def default_vifname(self):
- return "vif%d.%d" % (self.controller.dom, self.vif)
+ return "vif%d.%d" % (self.frontendDomain, self.vif)
def get_mac(self):
"""Get the MAC address as a string.
"""
- return ':'.join(map(lambda x: "%02x" % x, self.mac))
+ return macToString(self.mac)
+
+ def get_be_mac(self):
+ """Get the backend MAC address as a string.
+ """
+ return macToString(self.be_mac)
def vifctl_params(self, vmname=None):
"""Get the parameters to pass to vifctl.
"""
- dom = self.controller.dom
+ dom = self.frontendDomain
if vmname is None:
xd = get_component('xen.xend.XendDomain')
try:
vm = xd.domain_lookup(dom)
vmname = vm.name
except:
- vmname = 'DOM%d' % dom
+ vmname = 'Domain-%d' % dom
return { 'domain': vmname,
'vif' : self.get_vifname(),
'mac' : self.get_mac(),
@@ -258,11 +310,23 @@ class NetDev(controller.SplitDev):
if vnet:
vnet.vifctl(op, self.get_vifname(), self.get_mac())
- def attach(self):
- d = self.send_be_create()
- d.addCallback(self.respond_be_create)
- return d
+ def attach(self, recreate=False, change=False):
+ if recreate:
+ pass
+ else:
+ self.send_be_create()
+ if self.credit and self.period:
+ self.send_be_creditlimit(self.credit, self.period)
+ self.vifctl('up', vmname=self.getDomainName())
+
+ def closeEvtchn(self):
+ if self.evtchn:
+ channel.eventChannelClose(self.evtchn)
+ self.evtchn = None
+ def openEvtchn(self):
+ self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain)
+
def getEventChannelBackend(self):
val = 0
if self.evtchn:
@@ -276,75 +340,81 @@ class NetDev(controller.SplitDev):
return val
def send_be_create(self):
- d = defer.Deferred()
msg = packMsg('netif_be_create_t',
- { 'domid' : self.controller.dom,
+ { 'domid' : self.frontendDomain,
'netif_handle' : self.vif,
+ 'be_mac' : self.be_mac or [0, 0, 0, 0, 0, 0],
'mac' : self.mac,
#'vifname' : self.vifname
})
- self.getBackendInterface().writeRequest(msg, response=d)
- return d
-
- def respond_be_create(self, msg):
- val = unpackMsg('netif_be_create_t', msg)
- return self
+ msg = self.backendChannel.requestResponse(msg)
+ # todo: check return status
- def destroy(self, change=0):
+ def destroy(self, change=False, reboot=False):
"""Destroy the device's resources and disconnect from the back-end
device controller. If 'change' is true notify the front-end interface.
@param change: change flag
"""
+ self.destroyed = True
self.status = NETIF_INTERFACE_STATUS_CLOSED
- def cb_destroy(val):
- self.send_be_destroy()
- self.getBackendInterface().close()
- if change:
- self.reportStatus()
- log.debug("Destroying vif domain=%d vif=%d", self.controller.dom, self.vif)
- if self.evtchn:
- channel.eventChannelClose(self.evtchn)
+ log.debug("Destroying vif domain=%d vif=%d", self.frontendDomain, self.vif)
+ self.closeEvtchn()
self.vifctl('down')
- d = self.send_be_disconnect()
- d.addCallback(cb_destroy)
+ self.send_be_disconnect()
+ self.send_be_destroy()
+ if change:
+ self.reportStatus()
def send_be_disconnect(self):
- d = defer.Deferred()
msg = packMsg('netif_be_disconnect_t',
- { 'domid' : self.controller.dom,
+ { 'domid' : self.frontendDomain,
'netif_handle' : self.vif })
- self.getBackendInterface().writeRequest(msg, response=d)
- return d
+ self.backendChannel.requestResponse(msg)
+ #todo: check return status
def send_be_destroy(self, response=None):
msg = packMsg('netif_be_destroy_t',
- { 'domid' : self.controller.dom,
+ { 'domid' : self.frontendDomain,
'netif_handle' : self.vif })
- self.controller.delDevice(self.vif)
- self.getBackendInterface().writeRequest(msg, response=response)
+ self.backendChannel.requestResponse(msg)
+ #todo: check return status
- def recv_fe_interface_connect(self, val, req):
- if not req: return
- self.evtchn = channel.eventChannel(self.backendDomain, self.controller.dom)
+ def recv_fe_interface_connect(self, val):
+ self.openEvtchn()
msg = packMsg('netif_be_connect_t',
- { 'domid' : self.controller.dom,
+ { 'domid' : self.frontendDomain,
'netif_handle' : self.vif,
'evtchn' : self.getEventChannelBackend(),
'tx_shmem_frame' : val['tx_shmem_frame'],
'rx_shmem_frame' : val['rx_shmem_frame'] })
- d = defer.Deferred()
- d.addCallback(self.respond_be_connect)
- self.getBackendInterface().writeRequest(msg, response=d)
-
- def respond_be_connect(self, msg):
- val = unpackMsg('netif_be_connect_t', msg)
- dom = val['domid']
- vif = val['netif_handle']
+ msg = self.backendChannel.requestResponse(msg)
+ #todo: check return status
self.status = NETIF_INTERFACE_STATUS_CONNECTED
self.reportStatus()
- def reportStatus(self, resp=0):
+ def setCreditLimit(self, credit, period):
+ #todo: these params should be in sxpr and vif config.
+ self.credit = credit
+ self.period = period
+ self.send_be_creditlimit(credit, period)
+
+ def getCredit(self):
+ return self.credit
+
+ def getPeriod(self):
+ return self.period
+
+ def send_be_creditlimit(self, credit, period):
+ msg = packMsg('netif_be_creditlimit_t',
+ { 'domid' : self.frontendDomain,
+ 'netif_handle' : self.vif,
+ 'credit_bytes' : credit,
+ 'period_usec' : period })
+ msg = self.backendChannel.requestResponse(msg)
+ # todo: check return status
+
+ def reportStatus(self, resp=False):
msg = packMsg('netif_fe_interface_status_t',
{ 'handle' : self.vif,
'status' : self.status,
@@ -352,90 +422,79 @@ class NetDev(controller.SplitDev):
'domid' : self.backendDomain,
'mac' : self.mac })
if resp:
- self.controller.writeResponse(msg)
+ self.frontendChannel.writeResponse(msg)
else:
- self.controller.writeRequest(msg)
+ self.frontendChannel.writeRequest(msg)
def interfaceChanged(self):
- """Notify the font-end that a device has been added or removed.
+ """Notify the front-end that a device has been added or removed.
"""
self.reportStatus()
-class NetifController(controller.SplitController):
+class NetifController(DevController):
"""Network interface controller. Handles all network devices for a domain.
"""
- def __init__(self, factory, dom):
- controller.SplitController.__init__(self, factory, dom)
- self.devices = {}
- self.addMethod(CMSG_NETIF_FE,
- CMSG_NETIF_FE_DRIVER_STATUS,
- self.recv_fe_driver_status)
- self.addMethod(CMSG_NETIF_FE,
- CMSG_NETIF_FE_INTERFACE_STATUS,
- self.recv_fe_interface_status)
- self.addMethod(CMSG_NETIF_FE,
- CMSG_NETIF_FE_INTERFACE_CONNECT,
- self.recv_fe_interface_connect)
- self.registerChannel()
+ def __init__(self, vm, recreate=False):
+ DevController.__init__(self, vm, recreate=recreate)
+ self.channel = None
+ self.rcvr = None
+ self.channel = None
+
+ def initController(self, recreate=False, reboot=False):
+ self.destroyed = False
+ self.channel = self.getChannel()
+ # Register our handlers for incoming requests.
+ self.rcvr = CtrlMsgRcvr(self.channel)
+ self.rcvr.addHandler(CMSG_NETIF_FE,
+ CMSG_NETIF_FE_DRIVER_STATUS,
+ self.recv_fe_driver_status)
+ self.rcvr.addHandler(CMSG_NETIF_FE,
+ CMSG_NETIF_FE_INTERFACE_STATUS,
+ self.recv_fe_interface_status)
+ self.rcvr.addHandler(CMSG_NETIF_FE,
+ CMSG_NETIF_FE_INTERFACE_CONNECT,
+ self.recv_fe_interface_connect)
+ self.rcvr.registerChannel()
+ if reboot:
+ self.rebootDevices()
+
+ def destroyController(self, reboot=False):
+ """Destroy the controller and all devices.
+ """
+ self.destroyed = True
+ log.debug("Destroying netif domain=%d", self.getDomain())
+ self.destroyDevices(reboot=reboot)
+ if self.rcvr:
+ self.rcvr.deregisterChannel()
def sxpr(self):
- val = ['netif', ['dom', self.dom]]
+ val = ['netif', ['dom', self.getDomain()]]
return val
- def lostChannel(self):
- """Method called when the channel has been lost.
- """
- controller.Controller.lostChannel(self)
-
- def addDevice(self, vif, config):
- """Add a network interface.
-
- @param vif: device index
- @param config: device configuration
- @return: device
- """
- if vif in self.devices:
- raise XendError('device exists:' + str(vif))
- dev = NetDev(vif, self, config)
- self.devices[vif] = dev
- return dev
-
- def destroy(self):
- """Destroy the controller and all devices.
- """
- self.destroyDevices()
-
- def destroyDevices(self):
- """Destroy all devices.
- """
- for dev in self.getDevices():
- dev.destroy()
+ def newDevice(self, id, config, recreate=False):
+ """Create a network device.
- def attachDevice(self, vif, config, recreate=0):
- """Attach a network device.
-
- @param vif: interface index
+ @param id: interface id
@param config: device configuration
@param recreate: recreate flag (true after xend restart)
- @return: deferred
"""
- dev = self.addDevice(vif, config)
- if recreate:
- d = defer.succeed(dev)
- else:
- d = dev.attach()
- return d
+ return NetDev(self, id, config, recreate=recreate)
- def recv_fe_driver_status(self, msg, req):
- if not req: return
- print
- print 'recv_fe_driver_status>'
+ def limitDevice(self, vif, credit, period):
+ if vif not in self.devices:
+ raise XendError('device does not exist for credit limit: vif'
+ + str(self.getDomain()) + '.' + str(vif))
+
+ dev = self.devices[vif]
+ return dev.setCreditLimit(credit, period)
+
+ def recv_fe_driver_status(self, msg):
msg = packMsg('netif_fe_driver_status_t',
{ 'status' : NETIF_DRIVER_STATUS_UP,
## FIXME: max_handle should be max active interface id
- 'max_handle' : len(self.devices)
- #'max_handle' : self.getMaxDeviceIdx()
+ 'max_handle' : self.getDeviceCount()
+ #'max_handle' : self.getMaxDeviceId()
})
# Two ways of doing it:
# 1) front-end requests driver status, we reply with the interface count,
@@ -448,43 +507,38 @@ class NetifController(controller.SplitController):
#
# We really want to use 1), but at the moment the xenU kernel panics
# in that mode, so we're sticking to 2) for now.
- resp = 0
+ resp = False
if resp:
- self.writeResponse(msg)
+ self.channel.writeResponse(msg)
else:
for dev in self.devices.values():
dev.reportStatus()
- self.writeRequest(msg)
+ self.channel.writeRequest(msg)
return resp
- def recv_fe_interface_status(self, msg, req):
- if not req: return
- print
+ def recv_fe_interface_status(self, msg):
val = unpackMsg('netif_fe_interface_status_t', msg)
- print "recv_fe_interface_status>", val
vif = val['handle']
dev = self.findDevice(vif)
if dev:
- print 'recv_fe_interface_status>', 'dev=', dev
- dev.reportStatus(resp=1)
+ dev.reportStatus(resp=True)
else:
+ log.error('Received netif_fe_interface_status for unknown vif: dom=%d vif=%d',
+ self.getDomain(), vif)
msg = packMsg('netif_fe_interface_status_t',
{ 'handle' : -1,
'status' : NETIF_INTERFACE_STATUS_CLOSED,
});
- print 'recv_fe_interface_status>', 'no dev, returning -1'
- self.writeResponse(msg)
- return 1
+ self.channel.writeResponse(msg)
+ return True
-
- def recv_fe_interface_connect(self, msg, req):
+ def recv_fe_interface_connect(self, msg):
val = unpackMsg('netif_fe_interface_connect_t', msg)
vif = val['handle']
- print
- print "recv_fe_interface_connect", val
dev = self.getDevice(vif)
if dev:
- dev.recv_fe_interface_connect(val, req)
+ dev.recv_fe_interface_connect(val)
else:
log.error('Received netif_fe_interface_connect for unknown vif: dom=%d vif=%d',
- self.dom, vif)
+ self.getDomain(), vif)
+
diff --git a/tools/python/xen/xend/server/params.py b/tools/python/xen/xend/server/params.py
index 0f8632a4f2..2565c2dfcd 100644
--- a/tools/python/xen/xend/server/params.py
+++ b/tools/python/xen/xend/server/params.py
@@ -1,7 +1,34 @@
-# The following parameters could be placed in a configuration file.
-XEND_PID_FILE = '/var/run/xend.pid'
-XFRD_PID_FILE = '/var/run/xfrd.pid'
-XEND_TRACE_FILE = '/var/log/xend.trace'
+import os
+
+def getenv(var, val, conv=None):
+ """Get a value from the environment, with optional conversion.
-XEND_USER = 'root'
+ @param var name of environment variable
+ @param val default value
+ @param conv conversion function to apply to env value
+ @return converted value or default
+ """
+ try:
+ v = os.getenv(var)
+ if v is None:
+ v = val
+ else:
+ print var, '=', v
+ if conv:
+ v = conv(v)
+ except:
+ v = val
+ return v
+
+# The following parameters could be placed in a configuration file.
+XEND_PID_FILE = '/var/run/xend.pid'
+XEND_TRACE_FILE = '/var/log/xend.trace'
+XEND_DEBUG_LOG = '/var/log/xend-debug.log'
+XEND_USER = 'root'
+XEND_DEBUG = getenv("XEND_DEBUG", 0, conv=int)
+XEND_DAEMONIZE = getenv("XEND_DAEMONIZE", not XEND_DEBUG, conv=int)
+XENSTORED_PID_FILE = '/var/run/xenstored.pid'
+XENSTORED_RUN_DIR = '/var/run/xenstored'
+XENSTORED_LIB_DIR = '/var/lib/xenstored'
+XENSTORED_DEBUG = getenv("XENSTORED_DEBUG", 0, conv=int)
diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py
new file mode 100644
index 0000000000..83f061a0b2
--- /dev/null
+++ b/tools/python/xen/xend/server/pciif.py
@@ -0,0 +1,59 @@
+import types
+
+import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+
+from xen.xend import sxp
+from xen.xend.XendError import VmError
+
+from controller import Dev, DevController
+
+def parse_pci(val):
+ """Parse a pci field.
+ """
+ if isinstance(val, types.StringType):
+ radix = 10
+ if val.startswith('0x') or val.startswith('0X'):
+ radix = 16
+ v = int(val, radix)
+ else:
+ v = val
+ return v
+
+class PciDev(Dev):
+
+ def __init__(self, controller, id, config, recreate=False):
+ Dev.__init__(self, controller, id, config, recreate=recreate)
+ bus = sxp.child_value(self.config, 'bus')
+ if not bus:
+ raise VmError('pci: Missing bus')
+ dev = sxp.child_value(self.config, 'dev')
+ if not dev:
+ raise VmError('pci: Missing dev')
+ func = sxp.child_value(self.config, 'func')
+ if not func:
+ raise VmError('pci: Missing func')
+ try:
+ bus = parse_pci(bus)
+ dev = parse_pci(dev)
+ func = parse_pci(func)
+ except:
+ raise VmError('pci: invalid parameter')
+
+ def attach(self, recreate=False, change=False):
+ rc = xc.physdev_pci_access_modify(dom = self.getDomain(),
+ bus = bus,
+ dev = dev,
+ func = func,
+ enable = True)
+ if rc < 0:
+ #todo non-fatal
+ raise VmError('pci: Failed to configure device: bus=%s dev=%s func=%s' %
+ (bus, dev, func))
+
+ def destroy(self, change=False, reboot=False):
+ pass
+
+class PciController(DevController):
+
+ def newDevice(self, id, config, recreate=False):
+ return PciDev(self, id, config, recreate=recreate)
diff --git a/tools/python/xen/xend/server/relocate.py b/tools/python/xen/xend/server/relocate.py
new file mode 100644
index 0000000000..07cd5e7fbb
--- /dev/null
+++ b/tools/python/xen/xend/server/relocate.py
@@ -0,0 +1,139 @@
+
+import socket
+import sys
+import StringIO
+
+from xen.web import reactor, protocol
+
+from xen.xend import scheduler
+from xen.xend import sxp
+from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend.XendError import XendError
+from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend.XendLogging import log
+from xen.xend import XendCheckpoint
+
+DEBUG = 0
+
+class RelocationProtocol(protocol.Protocol):
+ """Asynchronous handler for a connected relocation socket.
+ """
+
+ def __init__(self):
+ #protocol.Protocol.__init__(self)
+ self.parser = sxp.Parser()
+
+ def dataReceived(self, data):
+ try:
+ self.parser.input(data)
+ if self.parser.ready():
+ val = self.parser.get_val()
+ res = self.dispatch(val)
+ self.send_result(res)
+ if self.parser.at_eof():
+ self.loseConnection()
+ except SystemExit:
+ raise
+ except:
+ self.send_error()
+
+ def loseConnection(self):
+ if self.transport:
+ self.transport.loseConnection()
+ if self.connected:
+ scheduler.now(self.connectionLost)
+
+ def connectionLost(self, reason=None):
+ pass
+
+ def send_reply(self, sxpr):
+ io = StringIO.StringIO()
+ sxp.show(sxpr, out=io)
+ print >> io
+ io.seek(0)
+ if self.transport:
+ return self.transport.write(io.getvalue())
+ else:
+ return 0
+
+ def send_result(self, res):
+ if res is None:
+ resp = ['ok']
+ else:
+ resp = ['ok', res]
+ return self.send_reply(resp)
+
+ def send_error(self):
+ (extype, exval) = sys.exc_info()[:2]
+ return self.send_reply(['err',
+ ['type', str(extype)],
+ ['value', str(exval)]])
+
+ def opname(self, name):
+ return 'op_' + name.replace('.', '_')
+
+ def operror(self, name, req):
+ raise XendError('Invalid operation: ' +name)
+
+ def dispatch(self, req):
+ op_name = sxp.name(req)
+ op_method_name = self.opname(op_name)
+ op_method = getattr(self, op_method_name, self.operror)
+ return op_method(op_name, req)
+
+ def op_help(self, name, req):
+ def nameop(x):
+ if x.startswith('op_'):
+ return x[3:].replace('_', '.')
+ else:
+ return x
+
+ l = [ nameop(k) for k in dir(self) if k.startswith('op_') ]
+ return l
+
+ def op_quit(self, name, req):
+ self.loseConnection()
+
+ def op_receive(self, name, req):
+ if self.transport:
+ self.send_reply(["ready", name])
+ self.transport.sock.setblocking(1)
+ xd = xroot.get_component("xen.xend.XendDomain")
+ XendCheckpoint.restore(xd, self.transport.sock.fileno())
+ self.transport.sock.setblocking(0)
+ else:
+ log.error(name + ": no transport")
+ raise XendError(name + ": no transport")
+
+class RelocationFactory(protocol.ServerFactory):
+ """Asynchronous handler for the relocation server socket.
+ """
+
+ def __init__(self):
+ #protocol.ServerFactory.__init__(self)
+ pass
+
+ def buildProtocol(self, addr):
+ return RelocationProtocol()
+
+def listenRelocation():
+ factory = RelocationFactory()
+ if xroot.get_xend_unix_server():
+ path = '/var/lib/xend/relocation-socket'
+ reactor.listenUNIX(path, factory)
+ if xroot.get_xend_relocation_server():
+ port = xroot.get_xend_relocation_port()
+ interface = xroot.get_xend_relocation_address()
+ reactor.listenTCP(port, factory, interface=interface)
+
+def setupRelocation(dst, port):
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((dst, port))
+ except socket.error, err:
+ raise XendError("can't connect: %s" % err[1])
+
+ sock.send("receive\n")
+ print sock.recv(80)
+
+ return sock
diff --git a/tools/python/xen/xend/server/usbif.py b/tools/python/xen/xend/server/usbif.py
new file mode 100644
index 0000000000..d366985740
--- /dev/null
+++ b/tools/python/xen/xend/server/usbif.py
@@ -0,0 +1,350 @@
+# Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+# Copyright (C) 2004 Intel Research Cambridge
+# Copyright (C) 2004 Mark Williamson <mark.williamson@cl.cam.ac.uk>
+"""Support for virtual USB hubs.
+"""
+
+from xen.xend import sxp
+from xen.xend.XendLogging import log
+from xen.xend.XendError import XendError
+from xen.xend.xenstore import DBVar
+
+from xen.xend.server import channel
+from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.messages import *
+
+class UsbBackend:
+ """Handler for the 'back-end' channel to a USB device driver domain
+ on behalf of a front-end domain.
+ """
+ def __init__(self, controller, id, dom):
+ self.controller = controller
+ self.id = id
+ self.destroyed = False
+ self.connected = False
+ self.connecting = False
+ self.frontendDomain = self.controller.getDomain()
+ self.backendDomain = dom
+ self.frontendChannel = None
+ self.backendChannel = None
+
+ def init(self, recreate=False, reboot=False):
+ self.frontendChannel = self.controller.getChannel()
+ cf = channel.channelFactory()
+ self.backendChannel = cf.openChannel(self.backendDomain)
+
+ def __str__(self):
+ return ('<UsbifBackend frontend=%d backend=%d id=%d>'
+ % (self.frontendDomain,
+ self.backendDomain,
+ self.id))
+
+ def closeEvtchn(self):
+ if self.evtchn:
+ channel.eventChannelClose(self.evtchn)
+ self.evtchn = None
+
+ def openEvtchn(self):
+ self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain)
+
+ def getEventChannelBackend(self):
+ val = 0
+ if self.evtchn:
+ val = self.evtchn['port1']
+ return val
+
+ def getEventChannelFrontend(self):
+ val = 0
+ if self.evtchn:
+ val = self.evtchn['port2']
+ return val
+
+ def connect(self, recreate=False):
+ """Connect the controller to the usbif control interface.
+
+ @param recreate: true if after xend restart
+ """
+ log.debug("Connecting usbif %s", str(self))
+ if recreate or self.connected or self.connecting:
+ pass
+ else:
+ self.send_be_create()
+
+ def send_be_create(self):
+ msg = packMsg('usbif_be_create_t',
+ { 'domid' : self.frontendDomain })
+ msg = self.backendChannel.requestResponse(msg)
+ val = unpackMsg('usbif_be_create_t', msg)
+ log.debug('>UsbifBackendController>respond_be_create> %s', str(val))
+ self.connected = True
+
+ def destroy(self, reboot=False):
+ """Disconnect from the usbif control interface and destroy it.
+ """
+ self.destroyed = True
+ self.send_be_disconnect()
+ self.send_be_destroy()
+ self.closeEvtchn()
+
+ def send_be_disconnect(self):
+ log.debug('>UsbifBackendController>send_be_disconnect> %s', str(self))
+ msg = packMsg('usbif_be_disconnect_t',
+ { 'domid' : self.frontendDomain })
+ self.backendChannel.requestResponse(msg)
+
+ def send_be_destroy(self, response=None):
+ log.debug('>UsbifBackendController>send_be_destroy> %s', str(self))
+ msg = packMsg('usbif_be_destroy_t',
+ { 'domid' : self.frontendDomain })
+ self.backendChannel.requestResponse(msg)
+ #todo: check return status
+
+
+ def connectInterface(self, val):
+ self.openEvtchn()
+ log.debug(">UsbifBackendController>connectInterface> connecting usbif to event channel %s ports=%d:%d",
+ str(self),
+ self.getEventChannelBackend(),
+ self.getEventChannelFrontend())
+ msg = packMsg('usbif_be_connect_t',
+ { 'domid' : self.frontendDomain,
+ 'evtchn' : self.getEventChannelBackend(),
+ 'shmem_frame' : val['shmem_frame'],
+ 'bandwidth' : 500 # XXX fix bandwidth!
+ })
+ msg = self.backendChannel.requestResponse(msg)
+ self.respond_be_connect(msg)
+
+ def respond_be_connect(self, msg):
+ """Response handler for a be_connect message.
+
+ @param msg: message
+ @type msg: xu message
+ """
+ val = unpackMsg('usbif_be_connect_t', msg)
+ log.debug('>UsbifBackendController>respond_be_connect> %s, %s', str(self), str(val))
+ self.send_fe_interface_status_changed()
+ log.debug(">UsbifBackendController> Successfully connected USB interface for domain %d" % self.frontendDomain)
+ self.controller.claim_ports()
+
+ def send_fe_interface_status_changed(self):
+ msg = packMsg('usbif_fe_interface_status_changed_t',
+ { 'status' : USBIF_INTERFACE_STATUS_CONNECTED,
+ 'domid' : self.backendDomain,
+ 'evtchn' : self.getEventChannelFrontend(),
+ 'bandwidth' : 500,
+ 'num_ports' : len(self.controller.devices)
+ })
+ self.frontendChannel.writeRequest(msg)
+
+ def interfaceChanged(self):
+ self.send_fe_interface_status_changed()
+
+
+class UsbDev(Dev):
+
+ __exports__ = Dev.__exports__ + [
+ DBVar('port', ty='int'),
+ DBVar('path', ty='str'),
+ ]
+
+ def __init__(self, controller, id, config, recreate=False):
+ Dev.__init__(self, controller, id, config, recreate=recreate)
+ self.port = id
+ self.path = None
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = None
+ self.backendDomain = 0
+ self.backendChannel = None
+ self.configure(self.config, recreate=recreate)
+
+ def init(self, recreate=False, reboot=False):
+ self.destroyed = False
+ self.frontendDomain = self.getDomain()
+ self.frontendChannel = self.getChannel()
+ backend = self.getBackend()
+ self.backendChannel = backend.backendChannel
+
+ def configure(self, config, change=False, recreate=False):
+ if change:
+ raise XendError("cannot reconfigure usb")
+ #todo: FIXME: Use sxp access methods to get this value.
+ # Must not use direct indexing.
+ self.path = config[1][1]
+
+ #todo: FIXME: Support configuring the backend domain.
+## try:
+## self.backendDomain = int(sxp.child_value(config, 'backend', '0'))
+## except:
+## raise XendError('invalid backend domain')
+
+ def attach(self, recreate=False, change=False):
+ if recreate:
+ pass
+ else:
+ self.attachBackend()
+ if change:
+ self.interfaceChanged()
+
+ def sxpr(self):
+ val = ['usb',
+ ['id', self.id],
+ ['port', self.port],
+ ['path', self.path],
+ ]
+ return val
+
+ def getBackend(self):
+ return self.controller.getBackend(self.backendDomain)
+
+ def destroy(self, change=False, reboot=False):
+ """Destroy the device. If 'change' is true notify the front-end interface.
+
+ @param change: change flag
+ """
+ self.destroyed = True
+ log.debug("Destroying usb domain=%d id=%s", self.frontendDomain, self.id)
+ self.send_be_release_port()
+ if change:
+ self.interfaceChanged()
+
+ def interfaceChanged(self):
+ """Tell the back-end to notify the front-end that a device has been
+ added or removed.
+ """
+ self.getBackend().interfaceChanged()
+
+ def attachBackend(self):
+ """Attach the device to its controller.
+
+ """
+ self.getBackend().connect()
+
+ def send_be_claim_port(self):
+ log.debug(">UsbifBackendController>send_be_claim_port> about to claim port %s" % self.path)
+ msg = packMsg('usbif_be_claim_port_t',
+ { 'domid' : self.frontendDomain,
+ 'path' : self.path,
+ 'usbif_port' : self.port,
+ 'status' : 0})
+ self.backendChannel.writeRequest(msg)
+ log.debug(">UsbifBackendController> Claim port completed")
+ # No need to add any callbacks, since the guest polls its virtual ports
+ # anyhow, somewhat like a UHCI controller ;-)
+
+ def send_be_release_port(self):
+ msg = packMsg('usbif_be_release_port_t',
+ { 'domid' : self.frontendDomain,
+ 'path' : self.path })
+ self.backendChannel.writeRequest(msg)
+ log.debug(">UsbifBackendController> Release port completed")
+ # No need to add any callbacks, since the guest polls its virtual ports
+ # anyhow, somewhat like a UHCI controller ;-)
+
+class UsbifController(DevController):
+ """USB device interface controller. Handles all USB devices
+ for a domain.
+ """
+
+ def __init__(self, vm, recreate=False):
+ """Create a USB device controller.
+ """
+ DevController.__init__(self, vm, recreate=recreate)
+ self.backends = {}
+ self.backendId = 0
+ self.rcvr = None
+
+ def init(self, recreate=False, reboot=False):
+ self.destroyed = False
+ self.rcvr = CtrlMsgRcvr(self.getChannel())
+ self.rcvr.addHandler(CMSG_USBIF_FE,
+ CMSG_USBIF_FE_DRIVER_STATUS_CHANGED,
+ self.recv_fe_driver_status_changed)
+ self.rcvr.addHandler(CMSG_USBIF_FE,
+ CMSG_USBIF_FE_INTERFACE_CONNECT,
+ self.recv_fe_interface_connect)
+ self.rcvr.registerChannel()
+ if reboot:
+ self.rebootBackends()
+ self.rebootDevices()
+
+ def sxpr(self):
+ val = ['usbif',
+ ['dom', self.getDomain()]]
+ return val
+
+ def newDevice(self, id, config, recreate=False):
+ return UsbDev(self, id, config, recreate=recreate)
+
+ def destroyController(self, reboot=False):
+ """Destroy the controller and all devices.
+ """
+ self.destroyed = True
+ log.debug("Destroying blkif domain=%d", self.getDomain())
+ self.destroyDevices(reboot=reboot)
+ self.destroyBackends(reboot=reboot)
+ if self.rcvr:
+ self.rcvr.deregisterChannel()
+
+ def rebootBackends(self):
+ for backend in self.backends.values():
+ backend.init(reboot=True)
+
+ def getBackendById(self, id):
+ return self.backends.get(id)
+
+ def getBackendByDomain(self, dom):
+ for backend in self.backends.values():
+ if backend.backendDomain == dom:
+ return backend
+ return None
+
+ def getBackend(self, dom):
+ backend = self.getBackendByDomain(dom)
+ if backend: return backend
+ backend = UsbBackend(self, self.backendId, dom)
+ self.backendId += 1
+ self.backends[backend.getId()] = backend
+ backend.init()
+ return backend
+
+ def destroyBackends(self, reboot=False):
+ for backend in self.backends.values():
+ backend.destroy(reboot=reboot)
+
+ def recv_fe_driver_status_changed(self, msg):
+ val = unpackMsg('usbif_fe_driver_status_changed_t', msg)
+ log.debug('>UsbifController>recv_fe_driver_status_changed> %s', str(val))
+ #todo: FIXME: For each backend?
+ msg = packMsg('usbif_fe_interface_status_changed_t',
+ { 'status' : USBIF_INTERFACE_STATUS_DISCONNECTED,
+ 'domid' : 0, #todo: FIXME: should be domid of backend
+ 'evtchn' : 0 })
+ msg = self.getChannel().requestResponse(msg)
+ self.disconnected_resp(msg)
+
+ def disconnected_resp(self, msg):
+ val = unpackMsg('usbif_fe_interface_status_changed_t', msg)
+ if val['status'] != USBIF_INTERFACE_STATUS_DISCONNECTED:
+ log.error(">UsbifController>disconnected_resp> unexpected status change")
+ else:
+ log.debug(">UsbifController>disconnected_resp> interface disconnected OK")
+
+ def recv_fe_interface_connect(self, msg):
+ val = unpackMsg('usbif_fe_interface_status_changed_t', msg)
+ log.debug(">UsbifController>recv_fe_interface_connect> notifying backend")
+ #todo: FIXME: generalise to more than one backend.
+ id = 0
+ backend = self.getBackendById(id)
+ if backend:
+ try:
+ backend.connectInterface(val)
+ except IOError, ex:
+ log.error("Exception connecting backend: %s", ex)
+ else:
+ log.error('interface connect on unknown interface: id=%d', id)
+
+ def claim_ports(self):
+ for dev in self.devices.values():
+ dev.send_be_claim_port()
+
diff --git a/tools/python/xen/xend/sxp.py b/tools/python/xen/xend/sxp.py
index e2c0de5c5b..f1de3619d5 100644
--- a/tools/python/xen/xend/sxp.py
+++ b/tools/python/xen/xend/sxp.py
@@ -17,7 +17,6 @@ import types
import errno
import string
from StringIO import StringIO
-from xen.util.ip import _readline, _readlines
__all__ = [
"mime_type",
@@ -714,7 +713,7 @@ def parse(io):
"""
pin = Parser()
while 1:
- buf = _readline(io)
+ buf = io.readline()
pin.input(buf)
if len(buf) == 0:
break
diff --git a/tools/python/xen/xend/uuid.py b/tools/python/xen/xend/uuid.py
new file mode 100644
index 0000000000..096fef7f9f
--- /dev/null
+++ b/tools/python/xen/xend/uuid.py
@@ -0,0 +1,65 @@
+"""Universal(ly) Unique Identifiers (UUIDs).
+"""
+import commands
+import random
+
+def uuidgen(random=True):
+ """Generate a UUID using the command uuidgen.
+
+ If random is true (default) generates a random uuid.
+ If random is false generates a time-based uuid.
+ """
+ cmd = "uuidgen"
+ if random:
+ cmd += " -r"
+ else:
+ cmd += " -t"
+ return commands.getoutput(cmd)
+
+class UuidFactoryUuidgen:
+
+ """A uuid factory using uuidgen."""
+
+ def __init__(self):
+ pass
+
+ def getUuid(self):
+ return uuidgen()
+
+class UuidFactoryRandom:
+
+ """A random uuid factory."""
+
+ def __init__(self):
+ f = file("/dev/urandom", "r")
+ seed = f.read(16)
+ f.close()
+ self.rand = random.Random(seed)
+
+ def randBytes(self, n):
+ return [ self.rand.randint(0, 255) for i in range(0, n) ]
+
+ def getUuid(self):
+ bytes = self.randBytes(16)
+ # Encode the variant.
+ bytes[6] = (bytes[6] & 0x0f) | 0x40
+ bytes[8] = (bytes[8] & 0x3f) | 0x80
+ f = "%02x"
+ return ( "-".join([f*4, f*2, f*2, f*2, f*6]) % tuple(bytes) )
+
+def getFactory():
+ """Get the factory to use for creating uuids.
+ This is so it's easy to change the uuid factory.
+ For example, for testing we might want repeatable uuids
+ rather than the random ones we normally use.
+ """
+ global uuidFactory
+ try:
+ uuidFactory
+ except:
+ #uuidFactory = UuidFactoryUuidgen()
+ uuidFactory = UuidFactoryRandom()
+ return uuidFactory
+
+def getUuid():
+ return getFactory().getUuid()
diff --git a/tools/python/xen/xend/xenstore/__init__.py b/tools/python/xen/xend/xenstore/__init__.py
new file mode 100644
index 0000000000..6772d2ceca
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/__init__.py
@@ -0,0 +1,2 @@
+from xsnode import *
+from xsobj import *
diff --git a/tools/python/xen/xend/xenstore/xsnode.py b/tools/python/xen/xend/xenstore/xsnode.py
new file mode 100644
index 0000000000..ae770219ab
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsnode.py
@@ -0,0 +1,382 @@
+import errno
+import os
+import os.path
+import select
+import sys
+import time
+
+from xen.lowlevel import xs
+from xen.xend import sxp
+from xen.xend.PrettyPrint import prettyprint
+
+SELECT_TIMEOUT = 2.0
+
+def getEventPath(event):
+ return os.path.join("/_event", event)
+
+def getEventIdPath(event):
+ return os.path.join(eventPath(event), "@eid")
+
+class Subscription:
+
+ def __init__(self, event, fn, id):
+ self.event = event
+ self.watcher = None
+ self.fn = fn
+ self.id = id
+
+ def watch(self, watcher):
+ self.watcher = watcher
+ watcher.addSubs(self)
+
+ def unwatch(self):
+ watcher = self.watcher
+ if watcher:
+ self.watcher = None
+ watcher.delSubs(self)
+
+ def notify(self, event):
+ try:
+ self.fn(event, id)
+ except SystemExitException:
+ raise
+ except:
+ pass
+
+class Watcher:
+
+ def __init__(self, store, event):
+ self.path = getEventPath(event)
+ self.eidPath = getEventIdPath(event)
+ store.mkdirs(self.path)
+ if not store.exists(self.eidPath):
+ store.writeInt(self.eidPath, 0)
+ self.xs = None
+ self.subs = []
+
+ def __getattr__(self, k, v):
+ if k == "fileno":
+ if self.xs:
+ return self.xs.fileno
+ else:
+ return -1
+ else:
+ return self.__dict__.get(k, v)
+
+ def addSubs(self, subs):
+ self.subs.append(subs)
+ self.watch()
+
+ def delSubs(self, subs):
+ self.subs.remove(subs)
+ if len(self.subs) == 0:
+ self.unwatch()
+
+ def getEvent(self):
+ return self.event
+
+ def watch(self):
+ if self.xs: return
+ self.xs = xs.open()
+ self.xs.watch(path)
+
+ def unwatch(self):
+ if self.xs:
+ self.xs.unwatch(self.path)
+ self.xs.close()
+ self.xs = None
+
+ def watching(self):
+ return self.xs is not None
+
+ def getNotification(self):
+ p = self.xs.read_watch()
+ self.xs.acknowledge_watch()
+ eid = self.xs.readInt(self.eidPath)
+ return p
+
+ def notify(self, subs):
+ p = self.getNotification()
+ for s in subs:
+ s.notify(p)
+
+class XenStore:
+
+ def __init__(self):
+ self.xs = None
+ #self.xs = xs.open()
+ self.subscription = {}
+ self.subscription_id = 0
+ self.events = {}
+ self.write("/", "")
+
+ def getxs(self):
+ if self.xs is None:
+ ex = None
+ for i in range(0,20):
+ try:
+ self.xs = xs.open()
+ ex = None
+ break
+ except Exception, ex:
+ print >>stderr, "Exception connecting to xsdaemon:", ex
+ print >>stderr, "Trying again..."
+ time.sleep(1)
+ else:
+ raise ex
+
+ #todo would like to reconnect if xs conn closes (e.g. daemon restart).
+ return self.xs
+
+ def dump(self, path="/", out=sys.stdout):
+ print 'dump>', path
+ val = ['node']
+ val.append(['path', path])
+## perms = ['perms']
+## for p in self.getPerms(path):
+## l = ['perm']
+## l.append('dom', p.get['dom'])
+## for k in ['read', 'write', 'create', 'owner']:
+## v = p.get(k)
+## l.append([k, v])
+## perms.append(l)
+## val.append(perms)
+ data = self.read(path)
+ if data:
+ val.append(['data', data])
+ children = ['children']
+ for x in self.lsPaths(path):
+ print 'dump>', 'child=', x
+ children.append(self.dump(x))
+ if len(children) > 1:
+ val.append(children)
+ prettyprint(val, out=out)
+ return val
+
+ def getPerms(self, path):
+ return self.getxs().get_permissions(path)
+
+ def ls(self, path="/"):
+ return self.getxs().ls(path)
+
+ def lsPaths(self, path="/"):
+ return [ os.path.join(path, x) for x in self.ls(path) ]
+
+ def lsr(self, path="/", list=None):
+ if list is None:
+ list = []
+ list.append(path)
+ for x in self.lsPaths(path):
+ list.append(x)
+ self.lsr(x, list=list)
+ return list
+
+ def rm(self, path):
+ try:
+ #for x in self.lsPaths():
+ # self.getxs().rm(x)
+ self.getxs().rm(path)
+ except:
+ pass
+
+ def exists(self, path):
+ try:
+ self.getxs().ls(path)
+ return True
+ except RuntimeError, ex:
+ if ex.args[0] == errno.ENOENT:
+ return False
+ else:
+ raise
+
+ def mkdirs(self, path):
+ if self.exists(path):
+ return
+ elts = path.split("/")
+ p = "/"
+ for x in elts:
+ if x == "": continue
+ p = os.path.join(p, x)
+ if not self.exists(p):
+ self.getxs().write(p, "", create=True)
+
+ def read(self, path):
+ try:
+ return self.getxs().read(path)
+ except RuntimeError, ex:
+ if ex.args[0] == errno.EISDIR:
+ return None
+ else:
+ raise
+
+ def create(self, path, excl=False):
+ self.write(path, "", create=True, excl=excl)
+
+ def write(self, path, data, create=True, excl=False):
+ self.mkdirs(path)
+ self.getxs().write(path, data, create=create, excl=excl)
+
+ def begin(self, path):
+ self.getxs().begin_transaction(path)
+
+ def commit(self, abandon=False):
+ self.getxs().end_transaction(abort=abandon)
+
+ def subscribe(self, event, fn):
+ watcher = self.watchEvent(event)
+ self.subscription_id += 1
+ subs = Subscription(event, fn, self.subscription_id)
+ self.subscription[subs.id] = subs
+ subs.watch(watcher)
+ return subs.id
+
+ def unsubscribe(self, sid):
+ s = self.subscription.get(sid)
+ if not s: return
+ del self.subscription[s.id]
+ s.unwatch()
+ unwatchEvent(s.event)
+
+ def sendEvent(self, event, data):
+ eventPath = getEventPath(event)
+ eidPath = getEventIdPath(event)
+ try:
+ self.begin(eventPath)
+ self.mkdirs(eventPath)
+ if self.exists(eidPath):
+ eid = self.readInt(eidPath)
+ eid += 1
+ else:
+ eid = 1
+ self.writeInt(eidPath, eid)
+ self.write(os.path.join(eventPath, str(eid)), data)
+ finally:
+ self.commit()
+
+ def watchEvent(self, event):
+ if event in self.events:
+ return
+ watcher = Watcher(event)
+ self.watchers[watcher.getEvent()] = watcher
+ self.watchStart()
+ return watcher
+
+ def unwatchEvent(self, event):
+ watcher = self.watchers.get(event)
+ if not watcher:
+ return
+ if not watcher.watching():
+ del self.watchers[event]
+
+ def watchStart(self):
+ if self.watchThread: return
+
+ def watchMain(self):
+ try:
+ while True:
+ if self.watchThread is None: return
+ if not self.events:
+ return
+ rd = self.watchers.values()
+ try:
+ (rd, wr, er) = select.select(rd, [], [], SELECT_TIMEOUT)
+ for watcher in rd:
+ watcher.notify()
+ except socket.error, ex:
+ if ex.args[0] in (EAGAIN, EINTR):
+ pass
+ else:
+ raise
+ finally:
+ self.watchThread = None
+
+ def introduceDomain(self, dom, page, evtchn, path):
+ self.getxs().introduce_domain(dom, page, evtchn.port1, path)
+
+ def releaseDomain(self, dom):
+ self.getxs().release_domain(dom)
+
+def getXenStore():
+ global xenstore
+ try:
+ return xenstore
+ except:
+ xenstore = XenStore()
+ return xenstore
+
+class XenNode:
+
+ def __init__(self, path="/", create=True):
+ self.store = getXenStore()
+ self.path = path
+ if not self.store.exists(path):
+ if create:
+ self.store.create(path)
+ else:
+ raise ValueError("path does not exist: '%s'" % path)
+
+ def relPath(self, path=""):
+ if not path:
+ return self.path
+ if path and path.startswith("/"):
+ path = path[1:]
+ return os.path.join(self.path, path)
+
+ def delete(self, path=""):
+ self.store.rm(self.relPath(path))
+
+ def exists(self, path=""):
+ return self.store.exists(self.relPath(path))
+
+ def getNode(self, path="", create=True):
+ if path == "":
+ return self
+ else:
+ return XenNode(self.relPath(path=path), create=create)
+
+ getChild = getNode
+
+ def getData(self, path=""):
+ path = self.relPath(path)
+ try:
+ return self.store.read(path)
+ except:
+ return None
+
+ def setData(self, data, path=""):
+ path = self.relPath(path)
+ #print 'XenNode>setData>', 'path=', path, 'data=', data
+ return self.store.write(path, data)
+
+ def getLock(self):
+ return None
+
+ def lock(self, lockid):
+ return None
+
+ def unlock(self, lockid):
+ return None
+
+ def deleteChild(self, name):
+ self.delete(name)
+
+ def deleteChildren(self):
+ for name in self.ls():
+ self.deleteChild(name)
+
+ def getChildren(self):
+ return [ self.getNode(name) for name in self.ls() ]
+
+ def ls(self):
+ return self.store.ls(self.path)
+
+ def introduceDomain(self, dom, page, evtchn, path):
+ self.store.introduceDomain(dom, page, evtchn, path)
+
+ def releaseDomain(self, dom):
+ self.store.releaseDomain(dom)
+
+ def __repr__(self):
+ return "<XenNode %s>" % self.path
+
+
diff --git a/tools/python/xen/xend/xenstore/xsobj.py b/tools/python/xen/xend/xenstore/xsobj.py
new file mode 100644
index 0000000000..b1c9a4f1d1
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsobj.py
@@ -0,0 +1,522 @@
+import string
+import types
+
+from xen.xend import sxp
+from xsnode import XenNode
+from xen.util.mac import macToString, macFromString
+
+VALID_KEY_CHARS = string.ascii_letters + string.digits + "_-@"
+
+def hasAttr(obj, attr):
+ if isinstance(obj, dict):
+ return obj.contains(attr)
+ else:
+ return hasattr(obj, attr)
+
+def getAttr(obj, attr):
+ if isinstance(obj, dict):
+ return dict.get(attr)
+ else:
+ return getattr(obj, attr, None)
+
+def setAttr(obj, attr, val):
+ if isinstance(obj, dict):
+ dict[attr] = val
+ else:
+ setattr(obj, attr, val)
+
+class DBConverter:
+ """Conversion of values to and from strings in xenstore.
+ """
+
+ converters = {}
+
+ def checkType(cls, ty):
+ if ty is None or ty in cls.converters:
+ return
+ raise ValueError("invalid converter type: '%s'" % ty)
+
+ checkType = classmethod(checkType)
+
+ def getConverter(cls, ty=None):
+ if ty is None:
+ ty = "str"
+ conv = cls.converters.get(ty)
+ if not conv:
+ raise ValueError("no converter for type: '%s'" % ty)
+ return conv
+
+ getConverter = classmethod(getConverter)
+
+ def convertToDB(cls, val, ty=None):
+ return cls.getConverter(ty).toDB(val)
+
+ convertToDB = classmethod(convertToDB)
+
+ def convertFromDB(cls, val, ty=None):
+ return cls.getConverter(ty).fromDB(val)
+
+ convertFromDB = classmethod(convertFromDB)
+
+ # Must define in subclass.
+ name = None
+
+ def __init__(self):
+ self.register()
+
+ def register(self):
+ if not self.name:
+ raise ValueError("invalid converter name: '%s'" % self.name)
+ self.converters[self.name] = self
+
+ def toDB(self, val):
+ raise NotImplementedError()
+
+ def fromDB(self, val):
+ raise NotImplementedError()
+
+class StrConverter(DBConverter):
+
+ name = "str"
+
+ def toDB(self, val):
+ # Convert True/False to 1/0, otherwise they convert to
+ # 'True' and 'False' rather than '1' and '0', even though
+ # isinstance(True/False, int) is true.
+ if isinstance(val, bool):
+ val = int(val)
+ return str(val)
+
+ def fromDB(self, data):
+ return data
+
+StrConverter()
+
+class BoolConverter(DBConverter):
+
+ name = "bool"
+
+ def toDB(self, val):
+ return str(int(bool(val)))
+
+ def fromDB(self, data):
+ return bool(int(data))
+
+BoolConverter()
+
+class SxprConverter(DBConverter):
+
+ name = "sxpr"
+
+ def toDB(self, val):
+ return sxp.to_string(val)
+
+ def fromDB(self, data):
+ return sxp.from_string(data)
+
+SxprConverter()
+
+class IntConverter(DBConverter):
+
+ name = "int"
+
+ def toDB(self, val):
+ return str(int(val))
+
+ def fromDB(self, data):
+ return int(data)
+
+IntConverter()
+
+class FloatConverter(DBConverter):
+
+ name = "float"
+
+ def toDB(self, val):
+ return str(float(val))
+
+ def fromDB(self, data):
+ return float(data)
+
+FloatConverter()
+
+class LongConverter(DBConverter):
+
+ name = "long"
+
+ def toDB(self, val):
+ return str(long(val))
+
+ def fromDB(self, data):
+ return long(data)
+
+LongConverter()
+
+class MacConverter(DBConverter):
+
+ name = "mac"
+
+ def toDB(self, val):
+ return macToString(val)
+
+ def fromDB(self, data):
+ return macFromString(data)
+
+MacConverter()
+
+class DBVar:
+
+ def __init__(self, var, ty=None, path=None):
+ DBConverter.checkType(ty)
+ if path is None:
+ path = var
+ self.var = var
+ self.ty = ty
+ self.path = path
+ varpath = filter(bool, self.var.split())
+ self.attrpath = varpath[:-1]
+ self.attr = varpath[-1]
+
+ def exportToDB(self, db, obj):
+ self.setDB(db, self.getObj(obj))
+
+ def importFromDB(self, db, obj):
+ self.setObj(obj, self.getDB(db))
+
+ def getObj(self, obj):
+ o = obj
+ for x in self.attrpath:
+ o = getAttr(o, x)
+ if o is None:
+ return None
+ return getAttr(o, self.attr)
+
+ def setObj(self, obj, val):
+ o = obj
+ for x in self.attrpath:
+ o = getAttr(o, x)
+ # Don't set obj attr if val is None.
+ if val is None and hasAttr(o, self.attr):
+ return
+ setAttr(o, self.attr, val)
+
+ def getDB(self, db):
+ try:
+ data = getattr(db, self.path)
+ except AttributeError:
+ return None
+ return DBConverter.convertFromDB(data, ty=self.ty)
+
+ def setDB(self, db, val):
+ # Don't set in db if val is None.
+ #print 'DBVar>setDB>', self.path, 'val=', val
+ if val is None:
+ return
+ data = DBConverter.convertToDB(val, ty=self.ty)
+ #print 'DBVar>setDB>', self.path, 'data=', data
+ setattr(db, self.path, data)
+
+
+class DBMap(dict):
+ """A persistent map. Extends dict with persistence.
+ Set and get values using the usual map syntax:
+
+ m[k], m.get(k)
+ m[k] = v
+
+ Also supports being treated as an object with attributes.
+ When 'k' is a legal identifier you may also use
+
+ m.k, getattr(m, k)
+ m.k = v, setattr(m, k)
+ k in m, hasattr(m, k)
+
+ When setting you can pass in a normal value, for example
+
+ m.x = 3
+
+ Getting works too:
+
+ m.x ==> 3
+
+ while m['x'] will return the map for x.
+
+ m['x'].getData() ==> 3
+
+ To get values from subdirs use get() to get the subdir first:
+
+ get(m, 'foo').x
+ m['foo'].x
+
+ instead of m.foo.x, because m.foo will return the data for field foo,
+ not the directory.
+
+ You can assign values into a subdir by passing a map:
+
+ m.foo = {'x': 1, 'y':2 }
+
+ You can also use paths as keys:
+
+ m['foo/x'] = 1
+
+ sets field x in subdir foo.
+
+ """
+
+ __db__ = None
+ __data__ = None
+ __perms__ = None
+ __parent__ = None
+ __name__ = ""
+
+ __transaction__ = False
+
+ # True if value set since saved (or never saved).
+ __dirty__ = True
+
+ def __init__(self, parent=None, name="", db=None):
+ if parent is None:
+ self.__name__ = name
+ else:
+ if not isinstance(parent, DBMap):
+ raise ValueError("invalid parent")
+ self.__parent__ = parent
+ self.__name__ = name
+ db = self.__parent__.getChildDB(name)
+ self.setDB(db)
+
+ def getName(self):
+ return self.__name__
+
+ def getPath(self):
+ return self.__db__ and self.__db__.relPath()
+
+ def introduceDomain(self, dom, page, evtchn, path=None):
+ db = self.__db__
+ if path is None:
+ path = db.relPath()
+ print 'DBMap>introduceDomain>', dom, page, evtchn, path
+ try:
+ db.introduceDomain(dom, page, evtchn, path)
+ except Exception, ex:
+ import traceback
+ traceback.print_exc()
+ print 'DBMap>introduceDomain>', ex
+ pass # todo: don't ignore
+
+ def releaseDomain(self, dom):
+ db = self.__db__
+ print 'DBMap>releaseDomain>', dom
+ try:
+ db.releaseDomain(dom)
+ except Exception, ex:
+ import traceback
+ traceback.print_exc()
+ print 'DBMap>releaseDomain>', ex
+ pass # todo: don't ignore
+
+ def transactionBegin(self):
+ # Begin a transaction.
+ pass
+
+ def transactionCommit(self):
+ # Commit writes to db.
+ pass
+
+ def transactionFail(self):
+ # Fail a transaction.
+ # We have changed values, what do we do?
+ pass
+
+ def watch(self, fn):
+ pass
+
+ def unwatch(self, watch):
+ pass
+
+ def checkName(self, k):
+ if k == "":
+ raise ValueError("invalid key, empty string")
+ for c in k:
+ if c in VALID_KEY_CHARS: continue
+ raise ValueError("invalid key char '%s'" % c)
+
+ def _setData(self, v):
+ #print 'DBMap>_setData>', self.getPath(), 'data=', v
+ if v != self.__data__:
+ self.__dirty__ = True
+ self.__data__ = v
+
+ def setData(self, v):
+ if isinstance(v, dict):
+ for (key, val) in v.items():
+ self[key] = val
+ else:
+ self._setData(v)
+
+ def getData(self):
+ return self.__data__
+
+ def _set(self, k, v):
+ dict.__setitem__(self, k, v)
+
+ def _get(self, k):
+ try:
+ return dict.__getitem__(self, k)
+ except:
+ return None
+
+ def _del(self, k, v):
+ try:
+ dict.__delitem__(self, k)
+ except:
+ pass
+
+ def _contains(self, k):
+ return dict.__contains__(self, k)
+
+ def __setitem__(self, k, v, save=False):
+ node = self.addChild(k)
+ node.setData(v)
+ if save:
+ node.saveDB()
+
+ def __getitem__(self, k):
+ if self._contains(k):
+ v = self._get(k)
+ else:
+ v = self.readChildDB(k)
+ self._set(k, v)
+ return v
+
+ def __delitem__(self, k):
+ self._del(k)
+ self.deleteChildDB(k)
+
+ def __repr__(self):
+ if len(self):
+ return dict.__repr__(self)
+ else:
+ return repr(self.__data__)
+
+ def __setattr__(self, k, v):
+ if k.startswith("__"):
+ object.__setattr__(self, k, v)
+ else:
+ self.__setitem__(k, v, save=True)
+ return v
+
+ def __getattr__(self, k):
+ if k.startswith("__"):
+ v = object.__getattr__(self, k)
+ else:
+ try:
+ v = self.__getitem__(k).getData()
+ except LookupError, ex:
+ raise AttributeError(ex.args)
+ return v
+
+ def __delattr__(self, k):
+ return self.__delitem__(k)
+
+ def delete(self):
+ dict.clear(self)
+ self.__data__ = None
+ if self.__db__:
+ self.__db__.delete()
+
+ def clear(self):
+ dict.clear(self)
+ if self.__db__:
+ self.__db__.deleteChildren()
+
+ def getChild(self, k):
+ return self._get(k)
+
+ def getChildDB(self, k):
+ self.checkName(k)
+ return self.__db__ and self.__db__.getChild(k)
+
+ def deleteChildDB(self, k):
+ if self.__db__:
+ self.__db__.deleteChild(k)
+
+ def _addChild(self, k):
+ kid = self._get(k)
+ if kid is None:
+ kid = DBMap(parent=self, name=k, db=self.getChildDB(k))
+ self._set(k, kid)
+ return kid
+
+ def addChild(self, path):
+ l = path.split("/")
+ n = self
+ for x in l:
+ if x == "": continue
+ n = n._addChild(x)
+ return n
+
+ def setDB(self, db):
+ if (db is not None) and not isinstance(db, XenNode):
+ raise ValueError("invalid db")
+ self.__db__ = db
+ for (k, v) in self.items():
+ if v is None: continue
+ if isinstance(v, DBMap):
+ v._setDB(self.addChild(k), restore)
+
+ def readDB(self):
+ if self.__db__ is None:
+ return
+ self.__data__ = self.__db__.getData()
+ for k in self.__db__.ls():
+ n = self.addChild(k)
+ n.readDB()
+ self.__dirty__ = False
+
+ def readChildDB(self, k):
+ if self.__db__ and (k in self.__db__.ls()):
+ n = self.addChild(k)
+ n.readDB()
+ raise LookupError("invalid key '%s'" % k)
+
+ def saveDB(self, sync=False, save=False):
+ """Save unsaved data to db.
+ If save or sync is true, saves whether dirty or not.
+ If sync is true, removes db entries not in the map.
+ """
+
+ if self.__db__ is None:
+ #print 'DBMap>saveDB>',self.getPath(), 'no db'
+ return
+ # Write data.
+ #print 'DBMap>saveDB>', self.getPath(), 'dirty=', self.__dirty__, 'data=', self.__data__
+ if ((self.__data__ is not None)
+ and (sync or save or self.__dirty__)):
+ self.__db__.setData(self.__data__)
+ self.__dirty__ = False
+ else:
+ #print 'DBMap>saveDB>', self.getPath(), 'not written'
+ pass
+ # Write children.
+ for (name, node) in self.items():
+ if not isinstance(node, DBMap): continue
+ node.saveDB(sync=sync, save=save)
+ # Remove db nodes not in children.
+ if sync:
+ for name in self.__db__.ls():
+ if name not in self:
+ self.__db__.delete(name)
+
+ def importFromDB(self, obj, fields):
+ """Set fields in obj from db fields.
+ """
+ for f in fields:
+ f.importFromDB(self, obj)
+
+ def exportToDB(self, obj, fields, save=False, sync=False):
+ """Set fields in db from obj fields.
+ """
+ for f in fields:
+ f.exportToDB(self, obj)
+ self.saveDB(save=save, sync=sync)
diff --git a/tools/python/xen/xend/xenstore/xsresource.py b/tools/python/xen/xend/xenstore/xsresource.py
new file mode 100644
index 0000000000..37011bdea3
--- /dev/null
+++ b/tools/python/xen/xend/xenstore/xsresource.py
@@ -0,0 +1,136 @@
+#============================================================================
+# Copyright (C) 2005 Mike Wray <mike.wray@hp.com>
+#============================================================================
+# HTTP interface onto xenstore (read-only).
+# Mainly intended for testing.
+
+import os
+import os.path
+
+from xen.web.httpserver import HttpServer, UnixHttpServer
+from xen.web.SrvBase import SrvBase
+from xen.web.SrvDir import SrvDir
+from xen.xend.Args import FormFn
+from xen.xend.xenstore import XenNode
+
+def pathurl(req):
+ url = req.prePathURL()
+ if not url.endswith('/'):
+ url += '/'
+ return url
+
+def writelist(req, l):
+ req.write('(')
+ for k in l:
+ req.write(' ' + k)
+ req.write(')')
+
+def lsData(dbnode, req, url):
+ v = dbnode.getData()
+ if v is None:
+ req.write('<p>No data')
+ else:
+ req.write('<p>Data: <pre>')
+ req.write(str(v))
+ req.write('</pre>')
+ v = dbnode.getLock()
+ if v is None:
+ req.write("<p>Unlocked")
+ else:
+ req.write("<p>Lock = %s" % v)
+
+def lsChildren(dbnode, req, url):
+ l = dbnode.ls()
+ if l:
+ req.write('<p>Children: <ul>')
+ for key in l:
+ child = dbnode.getChild(key)
+ data = child.getData()
+ if data is None: data = ""
+ req.write('<li><a href="%(url)s%(key)s">%(key)s</a> %(data)s</li>'
+ % { "url": url, "key": key, "data": data })
+ req.write('</ul>')
+ else:
+ req.write('<p>No children')
+
+
+class DBDataResource(SrvBase):
+ """Resource for the node data.
+ """
+
+ def __init__(self, dbnode):
+ SrvBase.__init__(self)
+ self.dbnode = dbnode
+
+ def render_GET(self, req):
+ req.write('<html><head></head><body>')
+ self.print_path(req)
+ req.write("<pre>")
+ req.write(self.getData() or self.getNoData())
+ req.write("</pre>")
+ req.write('</body></html>')
+
+ def getContentType(self):
+ # Use content-type from metadata.
+ return "text/plain"
+
+ def getData(self):
+ v = self.dbnode.getData()
+ if v is None: return v
+ return str(v)
+
+ def getNoData(self):
+ return ""
+
+class DBNodeResource(SrvDir):
+ """Resource for a DB node.
+ """
+
+ def __init__(self, dbnode):
+ SrvDir.__init__(self)
+ self.dbnode = dbnode
+
+ def get(self, x):
+ val = None
+ if x == "__data__":
+ val = DBDataResource(self.dbnode)
+ else:
+ if self.dbnode.exists(x):
+ child = self.dbnode.getChild(x, create=False)
+ else:
+ child = None
+ if child is not None:
+ val = DBNodeResource(child)
+ return val
+
+ def render_POST(self, req):
+ return self.perform(req)
+
+ def ls(self, req, use_sxp=0):
+ if use_sxp:
+ writelist(req, self.dbnode.getChildren())
+ else:
+ url = pathurl(req)
+ req.write("<fieldset>")
+ lsData(self.dbnode, req, url)
+ lsChildren(self.dbnode, req, url)
+ req.write("</fieldset>")
+
+ def form(self, req):
+ url = req.prePathURL()
+ pass
+
+class DBRootResource(DBNodeResource):
+ """Resource for the root of a DB.
+ """
+
+ def __init__(self):
+ DBNodeResource.__init__(self, XenNode())
+
+def main(argv):
+ root = SrvDir()
+ root.putChild('xenstore', DBRootResource())
+ interface = ''
+ port = 8003
+ server = HttpServer(root=root, interface=interface, port=port)
+ server.run()
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index d36c89af7b..23001cc458 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -10,6 +10,8 @@ import socket
from xen.xend import sxp
from xen.xend import PrettyPrint
from xen.xend.XendClient import server, XendError
+from xen.xend.XendBootloader import bootloader
+from xen.util import blkif
from xen.util import console_client
@@ -94,6 +96,14 @@ gopts.var('name', val='NAME',
fn=set_value, default=None,
use="Domain name. Must be unique.")
+gopts.var('bootloader', val='FILE',
+ fn=set_value, default=None,
+ use="Path to bootloader.")
+
+gopts.var('bootentry', val='NAME',
+ fn=set_value, default=None,
+ use="Entry to boot via boot loader")
+
gopts.var('kernel', val='FILE',
fn=set_value, default=None,
use="Path to kernel image.")
@@ -118,6 +128,10 @@ gopts.var('cpu', val='CPU',
fn=set_int, default=None,
use="CPU to run the domain on.")
+gopts.var('vcpus', val='VCPUS',
+ fn=set_int, default=1,
+ use="# of Virtual CPUS in domain.")
+
gopts.var('cpu_weight', val='WEIGHT',
fn=set_float, default=None,
use="""Set the new domain's cpu weight.
@@ -156,15 +170,22 @@ gopts.var('pci', val='BUS,DEV,FUNC',
For example '-pci c0,02,1a'.
The option may be repeated to add more than one pci device.""")
+gopts.var('usb', val='PATH',
+ fn=append_value, default=[],
+ use="""Add a physical USB port to a domain, as specified by the path
+ to that port. This option may be repeated to add more than one port.""")
+
gopts.var('ipaddr', val="IPADDR",
fn=append_value, default=[],
use="Add an IP address to the domain.")
-gopts.var('vif', val="mac=MAC,bridge=BRIDGE,script=SCRIPT,backend=DOM,vifname=NAME",
+gopts.var('vif', val="mac=MAC,be_mac=MAC,bridge=BRIDGE,script=SCRIPT,backend=DOM,vifname=NAME",
fn=append_value, default=[],
use="""Add a network interface with the given MAC address and bridge.
The vif is configured by calling the given configuration script.
If mac is not specified a random MAC address is used.
+ The MAC address of the backend interface can be selected with be_mac.
+ If not specified then the network backend chooses it's own MAC address.
If bridge is not specified the default bridge is used.
If script is not specified the default script is used.
If backend is not specified the default backend driver domain is used.
@@ -221,6 +242,18 @@ gopts.var('nfs_root', val="PATH",
fn=set_value, default=None,
use="Set the path of the root NFS directory.")
+gopts.var('memmap', val='FILE',
+ fn=set_value, default='',
+ use="Path to memap SXP file.")
+
+gopts.var('device_model', val='FILE',
+ fn=set_value, default='',
+ use="Path to device model program.")
+
+gopts.var('device_config', val='FILE',
+ fn=set_value, default='',
+ use="Path to device model configuration.")
+
def strip(pre, s):
"""Strip prefix 'pre' if present.
"""
@@ -229,7 +262,7 @@ def strip(pre, s):
else:
return s
-def configure_image(config, vals):
+def configure_image(opts, config, vals):
"""Create the image config.
"""
config_image = [ vals.builder ]
@@ -244,9 +277,12 @@ def configure_image(config, vals):
config_image.append(['root', cmdline_root])
if vals.extra:
config_image.append(['args', vals.extra])
+ if vals.vcpus:
+ config_image.append(['vcpus', vals.vcpus])
config.append(['image', config_image ])
+
-def configure_disks(config_devs, vals):
+def configure_disks(opts, config_devs, vals):
"""Create the config for disks (virtual block devices).
"""
for (uname, dev, mode, backend) in vals.disk:
@@ -258,13 +294,18 @@ def configure_disks(config_devs, vals):
config_vbd.append(['backend', backend])
config_devs.append(['device', config_vbd])
-def configure_pci(config_devs, vals):
+def configure_pci(opts, config_devs, vals):
"""Create the config for pci devices.
"""
for (bus, dev, func) in vals.pci:
config_pci = ['pci', ['bus', bus], ['dev', dev], ['func', func]]
config_devs.append(['device', config_pci])
+def configure_usb(opts, config_devs, vals):
+ for path in vals.usb:
+ config_usb = ['usb', ['path', path]]
+ config_devs.append(['device', config_usb])
+
def randomMAC():
"""Generate a random MAC address.
@@ -284,7 +325,7 @@ def randomMAC():
random.randint(0x00, 0xff) ]
return ':'.join(map(lambda x: "%02x" % x, mac))
-def configure_vifs(config_devs, vals):
+def configure_vifs(opts, config_devs, vals):
"""Create the config for virtual network interfaces.
"""
vifs = vals.vif
@@ -296,6 +337,7 @@ def configure_vifs(config_devs, vals):
mac = d.get('mac')
if not mac:
mac = randomMAC()
+ be_mac = d.get('be_mac')
bridge = d.get('bridge')
script = d.get('script')
backend = d.get('backend')
@@ -303,6 +345,7 @@ def configure_vifs(config_devs, vals):
vifname = d.get('vifname')
else:
mac = randomMAC()
+ be_mac = None
bridge = None
script = None
backend = None
@@ -312,6 +355,8 @@ def configure_vifs(config_devs, vals):
config_vif.append(['mac', mac])
if vifname:
config_vif.append(['vifname', vifname])
+ if be_mac:
+ config_vif.append(['be_mac', be_mac])
if bridge:
config_vif.append(['bridge', bridge])
if script:
@@ -322,7 +367,7 @@ def configure_vifs(config_devs, vals):
config_vif.append(['ip', ip])
config_devs.append(['device', config_vif])
-def configure_vfr(config, vals):
+def configure_vfr(opts, config, vals):
if not vals.ipaddr: return
config_vfr = ['vfr']
idx = 0 # No way of saying which IP is for which vif?
@@ -330,8 +375,31 @@ def configure_vfr(config, vals):
config_vfr.append(['vif', ['id', idx], ['ip', ip]])
config.append(config_vfr)
-
-def make_config(vals):
+def configure_vmx(opts, config_devs, vals):
+ """Create the config for VMX devices.
+ """
+ memmap = vals.memmap
+ device_model = vals.device_model
+ device_config = vals.device_config
+ config_devs.append(['memmap', memmap])
+ config_devs.append(['device_model', device_model])
+ config_devs.append(['device_config', device_config])
+
+def run_bootloader(opts, config, vals):
+ if not os.access(vals.bootloader, os.X_OK):
+ opts.err("Bootloader isn't executable")
+ if len(vals.disk) < 1:
+ opts.err("No disks configured and boot loader requested")
+ (uname, dev, mode, backend) = vals.disk[0]
+ file = blkif.blkdev_uname_to_file(uname)
+
+ blcfg = bootloader(vals.bootloader, file, not vals.console_autoconnect,
+ vals.vcpus, vals.blentry)
+
+ config.append(['bootloader', vals.bootloader])
+ config.append(blcfg)
+
+def make_config(opts, vals):
"""Create the domain configuration.
"""
@@ -352,13 +420,19 @@ def make_config(vals):
config.append(['restart', vals.restart])
if vals.console:
config.append(['console', vals.console])
-
- configure_image(config, vals)
+
+ if vals.bootloader:
+ run_bootloader(opts, config, vals)
+ else:
+ configure_image(opts, config, vals)
config_devs = []
- configure_disks(config_devs, vals)
- configure_pci(config_devs, vals)
- configure_vifs(config_devs, vals)
+ configure_disks(opts, config_devs, vals)
+ configure_pci(opts, config_devs, vals)
+ configure_vifs(opts, config_devs, vals)
+ configure_usb(opts, config_devs, vals)
+ configure_vmx(opts, config_devs, vals)
config += config_devs
+
return config
def preprocess_disk(opts, vals):
@@ -398,7 +472,7 @@ def preprocess_vifs(opts, vals):
(k, v) = b.strip().split('=', 1)
k = k.strip()
v = v.strip()
- if k not in ['mac', 'bridge', 'script', 'backend', 'ip', 'vifname']:
+ if k not in ['mac', 'be_mac', 'bridge', 'script', 'backend', 'ip', 'vifname']:
opts.err('Invalid vif specifier: ' + vif)
d[k] = v
vifs.append(d)
@@ -458,7 +532,8 @@ def choose_vnc_display():
return None
def spawn_vnc(display):
- os.system("vncviewer -listen %d &" % display)
+ os.system("vncviewer -log *:stdout:0 -listen %d &" %
+ (VNC_BASE_PORT + display))
return VNC_BASE_PORT + display
def preprocess_vnc(opts, vals):
@@ -541,13 +616,15 @@ def main(argv):
preprocess(opts, opts.vals)
if not opts.getopt('name') and opts.getopt('defconfig'):
opts.setopt('name', os.path.basename(opts.getopt('defconfig')))
- config = make_config(opts.vals)
+ config = make_config(opts, opts.vals)
+
if opts.vals.dryrun:
PrettyPrint.prettyprint(config)
else:
(dom, console) = make_domain(opts, config)
if opts.vals.console_autoconnect:
- console_client.connect('localhost', console)
+ path = "/var/lib/xend/console-%s" % console
+ console_client.connect('localhost', console, path=path)
if __name__ == '__main__':
main(sys.argv)
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index 411da44106..d02a190ac4 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -6,6 +6,8 @@ import os.path
import sys
from getopt import getopt
import socket
+import warnings
+warnings.filterwarnings('ignore', category=FutureWarning)
from xen.xend import PrettyPrint
from xen.xend import sxp
@@ -14,6 +16,31 @@ from xen.xend.XendClient import main as xend_client_main
from xen.xm import create, destroy, migrate, shutdown, sysrq
from xen.xm.opts import *
+def unit(c):
+ if not c.isalpha():
+ return 0
+ base = 1
+ if c == 'G' or c == 'g': base = 1024 * 1024 * 1024
+ elif c == 'M' or c == 'm': base = 1024 * 1024
+ elif c == 'K' or c == 'k': base = 1024
+ else:
+ print 'ignoring unknown unit'
+ return base
+
+def int_unit(str, dest):
+ base = unit(str[-1])
+ if not base:
+ return int(str)
+
+ value = int(str[:-1])
+ dst_base = unit(dest)
+ if dst_base == 0:
+ dst_base = 1
+ if dst_base > base:
+ return value / (dst_base / base)
+ else:
+ return value * (base / dst_base)
+
class Group:
name = ""
@@ -315,8 +342,8 @@ class ProgList(Prog):
name = "list"
info = """List information about domains."""
- short_options = 'l'
- long_options = ['long']
+ short_options = 'lv'
+ long_options = ['long','vcpus']
def help(self, args):
if help:
@@ -325,11 +352,13 @@ class ProgList(Prog):
Either all domains or the domains given.
-l, --long Get more detailed information.
+ -v, --vcpus Show VCPU to CPU mapping.
"""
return
def main(self, args):
use_long = 0
+ show_vcpus = 0
(options, params) = getopt(args[1:],
self.short_options,
self.long_options)
@@ -337,6 +366,8 @@ class ProgList(Prog):
for (k, v) in options:
if k in ['-l', '--long']:
use_long = 1
+ if k in ['-v', '--vcpus']:
+ show_vcpus = 1
if n == 0:
doms = server.xend_domains()
@@ -346,11 +377,13 @@ class ProgList(Prog):
if use_long:
self.long_list(doms)
+ elif show_vcpus:
+ self.show_vcpus(doms)
else:
self.brief_list(doms)
def brief_list(self, doms):
- print 'Name Id Mem(MB) CPU State Time(s) Console'
+ print 'Name Id Mem(MB) CPU VCPU(s) State Time(s) Console'
for dom in doms:
info = server.xend_domain(dom)
d = {}
@@ -358,6 +391,7 @@ class ProgList(Prog):
d['name'] = sxp.child_value(info, 'name', '??')
d['mem'] = int(sxp.child_value(info, 'memory', '0'))
d['cpu'] = int(sxp.child_value(info, 'cpu', '0'))
+ d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0'))
d['state'] = sxp.child_value(info, 'state', '??')
d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0'))
console = sxp.child(info, 'console')
@@ -365,9 +399,27 @@ class ProgList(Prog):
d['port'] = sxp.child_value(console, 'console_port')
else:
d['port'] = ''
- print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(state)5s %(cpu_time)7.1f %(port)4s"
+ print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s"
% d)
+ def show_vcpus(self, doms):
+ print 'Name Id VCPU CPU CPUMAP'
+ for dom in doms:
+ info = server.xend_domain(dom)
+ vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '?').replace('-','')
+ cpumap = sxp.child_value(info, 'cpumap', [])
+ mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1
+ count = 0
+ for cpu in vcpu_to_cpu:
+ d = {}
+ d['name'] = sxp.child_value(info, 'name', '??')
+ d['dom'] = int(sxp.child_value(info, 'id', '-1'))
+ d['vcpu'] = int(count)
+ d['cpu'] = int(cpu)
+ d['cpumap'] = int(cpumap[count])&mask
+ count = count + 1
+ print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d 0x%(cpumap)x" % d)
+
def long_list(self, doms):
for dom in doms:
info = server.xend_domain(dom)
@@ -449,17 +501,35 @@ xm.prog(ProgUnpause)
class ProgPincpu(Prog):
group = 'domain'
name = "pincpu"
- info = """Pin a domain to a cpu. """
+ info = """Set which cpus a VCPU can use. """
def help(self, args):
- print args[0],'DOM CPU'
- print '\nPin domain DOM to cpu CPU.'
+ print args[0],'DOM VCPU CPUS'
+ print '\nSet which cpus VCPU in domain DOM can use.'
+
+ # convert list of cpus to bitmap integer value
+ def make_map(self, cpulist):
+ cpus = []
+ cpumap = 0
+ for c in cpulist.split(','):
+ if c.find('-') != -1:
+ (x,y) = c.split('-')
+ for i in range(int(x),int(y)+1):
+ cpus.append(int(i))
+ else:
+ cpus.append(int(c))
+ cpus.sort()
+ for c in cpus:
+ cpumap = cpumap | 1<<c
+
+ return cpumap
def main(self, args):
- if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- cpu = int(args[2])
- server.xend_domain_pincpu(dom, cpu)
+ if len(args) != 4: self.err("%s: Invalid argument(s)" % args[0])
+ dom = args[1]
+ vcpu = int(args[2])
+ cpumap = self.make_map(args[3]);
+ server.xend_domain_pincpu(dom, vcpu, cpumap)
xm.prog(ProgPincpu)
@@ -475,7 +545,7 @@ class ProgMaxmem(Prog):
def main(self, args):
if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
dom = args[1]
- mem = int(args[2])
+ mem = int_unit(args[2], 'm')
server.xend_domain_maxmem_set(dom, mem)
xm.prog(ProgMaxmem)
@@ -493,7 +563,7 @@ MEMORY_TARGET megabytes"""
def main(self, args):
if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
dom = args[1]
- mem_target = int(args[2])
+ mem_target = int_unit(args[2], 'm')
server.xend_domain_mem_target_set(dom, mem_target)
xm.prog(ProgBalloon)
@@ -566,39 +636,22 @@ class ProgBvtslice(Prog):
xm.prog(ProgBvtslice)
-
-class ProgAtropos(Prog):
+class ProgSedf(Prog):
group = 'scheduler'
- name= "atropos"
- info = """Set atropos parameters."""
+ name= "sedf"
+ info = """Set simple EDF parameters."""
def help(self, args):
- print args[0], "DOM PERIOD SLICE LATENCY XTRATIME"
- print "\nSet atropos parameters."
+ print args[0], "DOM PERIOD SLICE LATENCY EXTRATIME WEIGHT"
+ print "\nSet simple EDF parameters."
def main(self, args):
- if len(args) != 6: self.err("%s: Invalid argument(s)" % args[0])
- dom = args[1]
- v = map(int, args[2:6])
- server.xend_domain_cpu_atropos_set(dom, *v)
+ if len(args) != 7: self.err("%s: Invalid argument(s)" % args[0])
+ dom = args[1]
+ v = map(int, args[2:7])
+ server.xend_domain_cpu_sedf_set(dom, *v)
-xm.prog(ProgAtropos)
-
-class ProgRrobin(Prog):
- group = 'scheduler'
- name = "rrobin"
- info = """Set round robin slice."""
-
- def help(self, args):
- print args[0], "SLICE"
- print "\nSet round robin scheduler slice."
-
- def main(self, args):
- if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
- rrslice = int(args[1])
- server.xend_node_rrobin_set(rrslice)
-
-xm.prog(ProgRrobin)
+xm.prog(ProgSedf)
class ProgInfo(Prog):
group = 'host'
@@ -653,7 +706,8 @@ class ProgConsole(Prog):
self.err("No console information")
port = sxp.child_value(console, "console_port")
from xen.util import console_client
- console_client.connect("localhost", int(port))
+ path = "/var/lib/xend/console-%s" % port
+ console_client.connect("localhost", int(port), path=path)
xm.prog(ProgConsole)
@@ -717,6 +771,23 @@ class ProgLog(Prog):
xm.prog(ProgLog)
+class ProgVifCreditLimit(Prog):
+ group = 'vif'
+ name= "vif-limit"
+ info = """Limit the transmission rate of a virtual network interface."""
+
+ def help(self, args):
+ print args[0], "DOMAIN VIF CREDIT_IN_BYTES PERIOD_IN_USECS"
+ print "\nSet the credit limit of a virtual network interface."
+
+ def main(self, args):
+ if len(args) != 5: self.err("%s: Invalid argument(s)" % args[0])
+ dom = args[1]
+ v = map(int, args[2:5])
+ server.xend_domain_vif_limit(dom, *v)
+
+xm.prog(ProgVifCreditLimit)
+
class ProgVifList(Prog):
group = 'vif'
name = 'vif-list'
@@ -729,7 +800,7 @@ class ProgVifList(Prog):
def main(self, args):
if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
dom = args[1]
- for x in server.xend_domain_vifs(dom):
+ for x in server.xend_domain_devices(dom, 'vif'):
sxp.show(x)
print
@@ -747,7 +818,7 @@ class ProgVbdList(Prog):
def main(self, args):
if len(args) != 2: self.err("%s: Invalid argument(s)" % args[0])
dom = args[1]
- for x in server.xend_domain_vbds(dom):
+ for x in server.xend_domain_devices(dom, 'vbd'):
sxp.show(x)
print
@@ -783,6 +854,28 @@ Create a virtual block device for a domain.
xm.prog(ProgVbdCreate)
+class ProgVbdRefresh(Prog):
+ group = 'vbd'
+ name = 'vbd-refresh'
+ info = """Refresh a virtual block device for a domain"""
+
+ def help(self, args):
+ print args[0], "DOM DEV"
+ print """
+Refresh a virtual block device for a domain.
+
+ DEV - idx field in the device information
+"""
+
+ def main(self, args):
+ if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
+ dom = args[1]
+ dev = args[2]
+ server.xend_domain_device_refresh(dom, 'vbd', dev)
+
+xm.prog(ProgVbdRefresh)
+
+
class ProgVbdDestroy(Prog):
group = 'vbd'
name = 'vbd-destroy'
@@ -800,7 +893,7 @@ information. This is visible in 'xm vbd-list'."""
if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0])
dom = args[1]
dev = args[2]
- server.xend_domain_device_destroy(dom, "vbd", dev)
+ server.xend_domain_device_destroy(dom, 'vbd', dev)
xm.prog(ProgVbdDestroy)
diff --git a/tools/python/xen/xm/migrate.py b/tools/python/xen/xm/migrate.py
index 147c0c4d08..374b80b257 100644
--- a/tools/python/xen/xm/migrate.py
+++ b/tools/python/xen/xm/migrate.py
@@ -14,8 +14,7 @@ DOM0_ID = '0'
gopts = Opts(use="""[options] DOM HOST
Migrate domain DOM to host HOST.
-The transfer daemon xfrd must be running on the
-local host and on HOST.
+Xend must be running on the local host and on HOST.
""")
gopts.opt('help', short='h',
diff --git a/tools/sv/Main.rpy b/tools/sv/Main.rpy
deleted file mode 100755
index 6b75ea9431..0000000000
--- a/tools/sv/Main.rpy
+++ /dev/null
@@ -1,3 +0,0 @@
-from xen.sv.Main import Main
-
-resource = Main()
diff --git a/tools/sv/Makefile b/tools/sv/Makefile
deleted file mode 100755
index 4da91e0674..0000000000
--- a/tools/sv/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-sv_insdir := /var/lib/xen/sv
-INSTALL = install
-INSTALL_DIR = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m0644
-
-all:
-
-IMAGES = xen.png orb_01.jpg orb_02.jpg
-IMAGES += left-end-highlight.jpg left-end-no-highlight.jpg
-IMAGES += right-end-highlight.jpg right-end-no-highlight.jpg
-IMAGES += middle-highlight.jpg middle-no-highlight.jpg
-IMAGES += seperator.jpg
-IMAGES += seperator-left-highlight.jpg seperator-right-highlight.jpg
-IMAGES += shutdown.png reboot.png pause.png unpause.png destroy.png
-IMAGES += small-destroy.png small-pause.png small-unpause.png
-IMAGES += next.png previous.png finish.png
-
-install:
- # copy XenSV Main.rpy file
- @[ -d $(DESTDIR)$(sv_insdir) ] || $(INSTALL_DIR) $(DESTDIR)$(sv_insdir)
- @$(INSTALL_DATA) Main.rpy $(DESTDIR)$(sv_insdir)
-
- # copy XenSV images
- @[ -d $(DESTDIR)$(sv_insdir)/images ] || \
- $(INSTALL_DIR) $(DESTDIR)$(sv_insdir)/images
- @(cd images && $(INSTALL_DATA) $(IMAGES) $(DESTDIR)$(sv_insdir)/images)
-
- # copy XenSV stylesheet
- @[ -d $(DESTDIR)$(sv_insdir)/inc ] || \
- $(INSTALL_DIR) $(DESTDIR)$(sv_insdir)/inc
- @$(INSTALL_DATA) inc/style.css inc/script.js $(DESTDIR)$(sv_insdir)/inc
-
-clean:
-
diff --git a/tools/sv/images/destroy.png b/tools/sv/images/destroy.png
deleted file mode 100644
index 9545fc4837..0000000000
--- a/tools/sv/images/destroy.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/finish.png b/tools/sv/images/finish.png
deleted file mode 100644
index 6c5d18a9b7..0000000000
--- a/tools/sv/images/finish.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/left-end-highlight.jpg b/tools/sv/images/left-end-highlight.jpg
deleted file mode 100644
index 3922eb593a..0000000000
--- a/tools/sv/images/left-end-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/left-end-no-highlight.jpg b/tools/sv/images/left-end-no-highlight.jpg
deleted file mode 100644
index ded4e3933b..0000000000
--- a/tools/sv/images/left-end-no-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/middle-highlight.jpg b/tools/sv/images/middle-highlight.jpg
deleted file mode 100644
index db9829d9bf..0000000000
--- a/tools/sv/images/middle-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/middle-no-highlight.jpg b/tools/sv/images/middle-no-highlight.jpg
deleted file mode 100644
index da3b4f68a0..0000000000
--- a/tools/sv/images/middle-no-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/next.png b/tools/sv/images/next.png
deleted file mode 100644
index da10bbfb9b..0000000000
--- a/tools/sv/images/next.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/orb_01.jpg b/tools/sv/images/orb_01.jpg
deleted file mode 100755
index e30efc4985..0000000000
--- a/tools/sv/images/orb_01.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/orb_02.jpg b/tools/sv/images/orb_02.jpg
deleted file mode 100755
index e5ebc8e4b1..0000000000
--- a/tools/sv/images/orb_02.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/pause.png b/tools/sv/images/pause.png
deleted file mode 100644
index 6e16daa177..0000000000
--- a/tools/sv/images/pause.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/previous.png b/tools/sv/images/previous.png
deleted file mode 100644
index 22292d6e9c..0000000000
--- a/tools/sv/images/previous.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/reboot.png b/tools/sv/images/reboot.png
deleted file mode 100755
index 358e6deb8f..0000000000
--- a/tools/sv/images/reboot.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/right-end-highlight.jpg b/tools/sv/images/right-end-highlight.jpg
deleted file mode 100644
index 1ffee341ab..0000000000
--- a/tools/sv/images/right-end-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/right-end-no-highlight.jpg b/tools/sv/images/right-end-no-highlight.jpg
deleted file mode 100644
index 0ddd058d67..0000000000
--- a/tools/sv/images/right-end-no-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/seperator-left-highlight.jpg b/tools/sv/images/seperator-left-highlight.jpg
deleted file mode 100644
index c90f4ffd12..0000000000
--- a/tools/sv/images/seperator-left-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/seperator-right-highlight.jpg b/tools/sv/images/seperator-right-highlight.jpg
deleted file mode 100644
index f37e6cd546..0000000000
--- a/tools/sv/images/seperator-right-highlight.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/seperator.jpg b/tools/sv/images/seperator.jpg
deleted file mode 100644
index c77e2c5500..0000000000
--- a/tools/sv/images/seperator.jpg
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/shutdown.png b/tools/sv/images/shutdown.png
deleted file mode 100755
index 48a52dce21..0000000000
--- a/tools/sv/images/shutdown.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/small-destroy.png b/tools/sv/images/small-destroy.png
deleted file mode 100644
index f800bd7685..0000000000
--- a/tools/sv/images/small-destroy.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/small-pause.png b/tools/sv/images/small-pause.png
deleted file mode 100644
index 7bbdbfaafe..0000000000
--- a/tools/sv/images/small-pause.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/small-unpause.png b/tools/sv/images/small-unpause.png
deleted file mode 100644
index 6ae5687a0c..0000000000
--- a/tools/sv/images/small-unpause.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/unpause.png b/tools/sv/images/unpause.png
deleted file mode 100644
index c971308814..0000000000
--- a/tools/sv/images/unpause.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/images/xen.png b/tools/sv/images/xen.png
deleted file mode 100644
index 344c361b3c..0000000000
--- a/tools/sv/images/xen.png
+++ /dev/null
Binary files differ
diff --git a/tools/sv/inc/script.js b/tools/sv/inc/script.js
deleted file mode 100755
index 47cd10399c..0000000000
--- a/tools/sv/inc/script.js
+++ /dev/null
@@ -1,22 +0,0 @@
-function update( objRef, text )
-{
- if ( document.all || document.getElementById )
- {
- obj = ( document.getElementById )? document.getElementById( objRef ) : document.all( objRef );
-
- obj.innerHTML= text
- }
-}
-
-function doOp( op )
-{
- document.forms[0].op.value = op
- document.forms[0].submit()
-}
-
-function doOp2( op, args )
-{
- document.forms[0].op.value = op
- document.forms[0].args.value = args
- document.forms[0].submit()
-}
diff --git a/tools/sv/inc/style.css b/tools/sv/inc/style.css
deleted file mode 100644
index 263ab59b23..0000000000
--- a/tools/sv/inc/style.css
+++ /dev/null
@@ -1,32 +0,0 @@
-
-P {font-family: verdana, arial; font-size: 12px; color: black}
-.small {font-size: 10px}
-
-TD.domainInfo {font-family: verdana, arial; font-size: 10px; color: black}
-TD.domainInfoHead {font-family: verdana, arial; font-size: 10px; color: white; font-face: bold}
-
-TD.domainInfoHead {background-color: black}
-TR.domainInfoOdd {background-color: white}
-TR.domainInfoEven {background-color: lightgrey}
-
-body {
- width: 670px;
- margin: 0px;
- padding: 0px;
- background-color: #fff;
- background-image: url(../images/orb_02.jpg);
- background-repeat: repeat-y;
- background-position: left top;
- font-family: Arial, Helvetica, sans-serif;
- font-weight: bold;
- color: #333333;
- letter-spacing: 0px;
- scrollbar-base-color: #333333;
- scrollbar-track-color: #666666;
- scrollbar-face-color: #fff;
-
-
- }
-
-.button (cursor:hand)
-
diff --git a/tools/tests/Makefile b/tools/tests/Makefile
new file mode 100644
index 0000000000..3e8962a714
--- /dev/null
+++ b/tools/tests/Makefile
@@ -0,0 +1,22 @@
+
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+TARGET := test_x86_emulator
+
+CC := gcc
+CFLAGS := -O2 -Wall -Werror -D__TEST_HARNESS__
+
+$(TARGET): x86_emulate.o test_x86_emulator.o
+ $(CC) -o $@ $^
+
+clean:
+ rm -rf $(TARGET) *.o *~ core
+
+install:
+
+x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c
+ $(CC) $(CFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
+
+%.o: %.c
+ $(CC) $(CFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
diff --git a/tools/tests/test_x86_emulator.c b/tools/tests/test_x86_emulator.c
new file mode 100644
index 0000000000..e6df8092bc
--- /dev/null
+++ b/tools/tests/test_x86_emulator.c
@@ -0,0 +1,262 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+#include <public/xen.h>
+#include <asm-x86/x86_emulate.h>
+
+static int read_any(
+ unsigned long addr,
+ unsigned long *val,
+ unsigned int bytes)
+{
+ switch ( bytes )
+ {
+ case 1: *val = *(u8 *)addr; break;
+ case 2: *val = *(u16 *)addr; break;
+ case 4: *val = *(u32 *)addr; break;
+ case 8: *val = *(unsigned long *)addr; break;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static int write_any(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes)
+{
+ switch ( bytes )
+ {
+ case 1: *(u8 *)addr = (u8)val; break;
+ case 2: *(u16 *)addr = (u16)val; break;
+ case 4: *(u32 *)addr = (u32)val; break;
+ case 8: *(unsigned long *)addr = val; break;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static int cmpxchg_any(
+ unsigned long addr,
+ unsigned long old,
+ unsigned long new,
+ unsigned int bytes)
+{
+ switch ( bytes )
+ {
+ case 1: *(u8 *)addr = (u8)new; break;
+ case 2: *(u16 *)addr = (u16)new; break;
+ case 4: *(u32 *)addr = (u32)new; break;
+ case 8: *(unsigned long *)addr = new; break;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static int cmpxchg8b_any(
+ unsigned long addr,
+ unsigned long old_lo,
+ unsigned long old_hi,
+ unsigned long new_lo,
+ unsigned long new_hi)
+{
+ ((unsigned long *)addr)[0] = new_lo;
+ ((unsigned long *)addr)[1] = new_hi;
+ return X86EMUL_CONTINUE;
+}
+
+static struct x86_mem_emulator emulops = {
+ read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
+};
+
+int main(int argc, char **argv)
+{
+ struct xen_regs regs;
+ char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
+ unsigned int res = 0x7FFFFFFF;
+ u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
+ unsigned long cr2;
+ int rc;
+
+ printf("%-40s", "Testing addl %%ecx,(%%eax)...");
+ instr[0] = 0x01; instr[1] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = 0x12345678;
+ cr2 = (unsigned long)&res;
+ res = 0x7FFFFFFF;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x92345677) ||
+ (regs.eflags != 0xa94) ||
+ (regs.eip != (unsigned long)&instr[2]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing xorl (%%eax),%%ecx...");
+ instr[0] = 0x33; instr[1] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+#ifdef __x86_64__
+ regs.ecx = 0xFFFFFFFF12345678UL;
+#else
+ regs.ecx = 0x12345678UL;
+#endif
+ cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x92345677) ||
+ (regs.ecx != 0x8000000FUL) ||
+ (regs.eip != (unsigned long)&instr[2]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
+ instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = 0x92345677UL;
+ regs.ecx = 0xAA;
+ cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x923456AA) ||
+ (regs.eflags != 0x244) ||
+ (regs.eax != 0x92345677UL) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
+ instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = 0xAABBCC77UL;
+ regs.ecx = 0xFF;
+ cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x923456AA) ||
+ ((regs.eflags&0x240) != 0x200) ||
+ (regs.eax != 0xAABBCCAA) ||
+ (regs.ecx != 0xFF) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing xchgl %%ecx,(%%eax)...");
+ instr[0] = 0x87; instr[1] = 0x08;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.ecx = 0x12345678;
+ cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x12345678) ||
+ (regs.eflags != 0x200) ||
+ (regs.ecx != 0x923456AA) ||
+ (regs.eip != (unsigned long)&instr[2]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
+ instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
+ regs.eflags = 0x200;
+ res = 0x923456AA;
+ regs.eip = (unsigned long)&instr[0];
+ regs.eax = 0x923456AAUL;
+ regs.ecx = 0xDDEEFF00L;
+ cr2 = (unsigned long)&res;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0xDDEEFF00) ||
+ (regs.eflags != 0x244) ||
+ (regs.eax != 0x923456AAUL) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing rep movsw...");
+ instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
+ res = 0x22334455;
+ regs.eflags = 0x200;
+ regs.ecx = 23;
+ regs.eip = (unsigned long)&instr[0];
+ regs.esi = (unsigned long)&res + 0;
+ regs.edi = (unsigned long)&res + 2;
+ regs.error_code = 0; /* read fault */
+ cr2 = regs.esi;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x44554455) ||
+ (regs.eflags != 0x200) ||
+ (regs.ecx != 22) ||
+ (regs.esi != ((unsigned long)&res + 2)) ||
+ (regs.edi != ((unsigned long)&res + 4)) ||
+ (regs.eip != (unsigned long)&instr[0]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing btrl $0x1,(%edi)...");
+ instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01;
+ res = 0x2233445F;
+ regs.eflags = 0x200;
+ regs.eip = (unsigned long)&instr[0];
+ regs.edi = (unsigned long)&res;
+ cr2 = regs.edi;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (res != 0x2233445D) ||
+ ((regs.eflags&0x201) != 0x201) ||
+ (regs.eip != (unsigned long)&instr[4]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]...");
+ instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
+ regs.eflags = 0x200;
+ regs.eax = cmpxchg8b_res[0];
+ regs.edx = cmpxchg8b_res[1];
+ regs.ebx = 0x9999AAAA;
+ regs.ecx = 0xCCCCFFFF;
+ regs.eip = (unsigned long)&instr[0];
+ regs.edi = (unsigned long)cmpxchg8b_res;
+ cr2 = regs.edi;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (cmpxchg8b_res[0] != 0x9999AAAA) ||
+ (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+ ((regs.eflags&0x240) != 0x240) ||
+ (regs.eip != (unsigned long)&instr[3]) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing cmpxchg8b (%edi) [failing]...");
+ instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
+ regs.eip = (unsigned long)&instr[0];
+ regs.edi = (unsigned long)cmpxchg8b_res;
+ cr2 = regs.edi;
+ rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
+ if ( (rc != 0) ||
+ (cmpxchg8b_res[0] != 0x9999AAAA) ||
+ (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
+ (regs.eax != 0x9999AAAA) ||
+ (regs.edx != 0xCCCCFFFF) ||
+ ((regs.eflags&0x240) != 0x200) ||
+ (regs.eip != (unsigned long)&instr[3]) )
+ goto fail;
+ printf("okay\n");
+
+ return 0;
+
+ fail:
+ printf("failed!\n");
+ return 1;
+}
diff --git a/tools/vnet/INSTALL b/tools/vnet/INSTALL
new file mode 100644
index 0000000000..8ece553116
--- /dev/null
+++ b/tools/vnet/INSTALL
@@ -0,0 +1,31 @@
+To compile and install run "make install"; if it fails or you need to reinstall
+run "make clean" first or the build will fail, at least that is what I have
+found under 2.6.10.
+
+Other important items:
+1) You will need to have your xen0 kernel compiled with HMAC_SUPPORT
+ 2.6.x = (MAIN MENU: Cryptographic Options -> HMAC Support)
+ BEFORE running "make install".
+
+2) You will want at least some of the other alogorithms listed under
+ "Cryptographic Options" for the kernel compiled as modules.
+
+3) You will want the networking IPsec/VLAN options compiled in as modules
+ 2.6.x = (MAIN MENU: Device Drivers -> Networking Support ->
+ Networking Options ->
+ IP: AH transformation
+ IP: ESP transformation
+ IP: IPComp transformation
+ IP: tunnel transformation
+
+ IPsec user configuration interface
+
+ 802.1Q VLAN Support
+
+4) The module (vnet_module) will not properly load from the command line
+ with a "modprobe vnet_module". Use network-vnet to properly configure
+ your system and load the module for you.
+
+Please refer to the additional documentation found in tools/vnet/doc for
+proper syntax and config file parameters.
+
diff --git a/tools/vnet/Makefile b/tools/vnet/Makefile
index 76d2abae05..a5156c4687 100644
--- a/tools/vnet/Makefile
+++ b/tools/vnet/Makefile
@@ -7,11 +7,12 @@ export prefix?=$(shell cd ../../dist/install && pwd)
.PHONY: all compile
.PHONY: gc-install gc-clean gc-prstine
-.PHONY: vnetd vnet-module install dist clean pristine
+.PHONY: libxutil vnetd vnet-module install dist clean pristine
all: compile
-compile: vnetd vnet-module
+compile: libxutil vnetd vnet-module
+#compile: vnet-module
gc.tar.gz:
wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@
@@ -20,10 +21,8 @@ gc: gc.tar.gz
tar xfz gc.tar.gz
ln -sf gc?.? gc
-gc/.configure-makefile:
- (cd gc && ./configure --prefix=`pwd`/install && touch .configure-makefile)
-
-gc-install: gc gc/.configure-makefile
+gc-install: gc
+ (cd gc && make test && ./configure --prefix=`pwd`/install)
make -C gc
make -C gc install
@@ -33,6 +32,9 @@ gc-clean:
gc-pristine:
-rm -rf gc?.? gc
+libxutil:
+ $(MAKE) -C libxutil
+
vnetd: gc-install
$(MAKE) -C vnetd
@@ -40,12 +42,15 @@ vnet-module:
$(MAKE) -C vnet-module
install: compile
+ $(MAKE) -C libxutil install
$(MAKE) -C vnetd install
$(MAKE) -C vnet-module install
$(MAKE) -C examples install
clean:
+ -$(MAKE) -C libxutil clean
-$(MAKE) -C vnetd clean
-$(MAKE) -C vnet-module clean
+ -rm -rf gc?.? gc
pristine: clean gc-pristine
diff --git a/tools/libxutil/Makefile b/tools/vnet/libxutil/Makefile
index 95e8597b3e..5e9adaac8b 100644
--- a/tools/libxutil/Makefile
+++ b/tools/vnet/libxutil/Makefile
@@ -1,5 +1,5 @@
-XEN_ROOT = ../..
+XEN_ROOT = ../../..
INSTALL = install
INSTALL_DATA = $(INSTALL) -m0644
INSTALL_PROG = $(INSTALL) -m0755
@@ -7,8 +7,6 @@ INSTALL_DIR = $(INSTALL) -d -m0755
include $(XEN_ROOT)/tools/Rules.mk
-CC = gcc
-
LIB_SRCS :=
LIB_SRCS += allocate.c
LIB_SRCS += enum.c
@@ -33,7 +31,7 @@ CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
CFLAGS += -Wp,-MD,.$(@F).d
DEPS = .*.d
-MAJOR := 2.0
+MAJOR := 3.0
MINOR := 0
LIB := libxutil.so
LIB += libxutil.so.$(MAJOR)
@@ -51,7 +49,7 @@ libxutil.so.$(MAJOR): libxutil.so.$(MAJOR).$(MINOR)
ln -sf $^ $@
libxutil.so.$(MAJOR).$(MINOR): $(PIC_OBJS)
- $(CC) -Wl,-soname -Wl,libxutil.so.$(MAJOR) -shared -o $@ $^
+ $(CC) $(CFLAGS) -Wl,-soname -Wl,libxutil.so.$(MAJOR) -shared -o $@ $^
libxutil.a: $(LIB_OBJS)
$(AR) rc $@ $^
@@ -65,11 +63,11 @@ check-for-zlib:
fi
install: build
- [ -d $(DESTDIR)/usr/lib ] || $(INSTALL_DIR) -p $(DESTDIR)/usr/lib
- $(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/lib
- $(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/lib
- ln -sf libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/lib/libxutil.so.$(MAJOR)
- ln -sf libxutil.so.$(MAJOR) $(DESTDIR)/usr/lib/libxutil.so
+ [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/$(LIBDIR)
+ ln -sf libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR)
+ ln -sf libxutil.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so
clean:
$(RM) *.a *.so* *.o *.opic *.rpm
diff --git a/tools/libxutil/allocate.c b/tools/vnet/libxutil/allocate.c
index 600ebabda6..600ebabda6 100644
--- a/tools/libxutil/allocate.c
+++ b/tools/vnet/libxutil/allocate.c
diff --git a/tools/libxutil/allocate.h b/tools/vnet/libxutil/allocate.h
index 391b7be2bd..391b7be2bd 100644
--- a/tools/libxutil/allocate.h
+++ b/tools/vnet/libxutil/allocate.h
diff --git a/tools/libxutil/debug.h b/tools/vnet/libxutil/debug.h
index 1f5a19d54d..1f5a19d54d 100644
--- a/tools/libxutil/debug.h
+++ b/tools/vnet/libxutil/debug.h
diff --git a/tools/libxutil/enum.c b/tools/vnet/libxutil/enum.c
index 95f6e31a87..95f6e31a87 100644
--- a/tools/libxutil/enum.c
+++ b/tools/vnet/libxutil/enum.c
diff --git a/tools/libxutil/enum.h b/tools/vnet/libxutil/enum.h
index cdc0f6f1b1..cdc0f6f1b1 100644
--- a/tools/libxutil/enum.h
+++ b/tools/vnet/libxutil/enum.h
diff --git a/tools/vnet/libxutil/fd_stream.c b/tools/vnet/libxutil/fd_stream.c
new file mode 100644
index 0000000000..428f0a5ff0
--- /dev/null
+++ b/tools/vnet/libxutil/fd_stream.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/** @file
+ * An IOStream implementation using fds.
+ */
+#ifndef __KERNEL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "allocate.h"
+#include "fd_stream.h"
+
+#define MODULE_NAME "fd_stream"
+#define DEBUG 1
+//#undef DEBUG
+#include "debug.h"
+
+static int fd_read(IOStream *s, void *buf, size_t n);
+static int fd_write(IOStream *s, const void *buf, size_t n);
+static int fd_error(IOStream *s);
+static int fd_close(IOStream *s);
+static void fd_free(IOStream *s);
+static int fd_flush(IOStream *s);
+
+/** Methods used by a fd IOStream. */
+static const IOMethods fd_methods = {
+ read: fd_read,
+ write: fd_write,
+ error: fd_error,
+ close: fd_close,
+ free: fd_free,
+ flush: fd_flush,
+};
+
+/** Get the fd data.
+ *
+ * @param io fd stream
+ * @return data
+ */
+static inline FDData * fd_data(IOStream *io){
+ return (FDData *)io->data;
+}
+
+/** Test if a stream is a fd stream.
+ *
+ * @param io stream
+ * @return 0 if a fd stream, -EINVAL if not
+ */
+int fd_stream_check(IOStream *io){
+ return (io && io->methods == &fd_methods ? 0 : -EINVAL);
+}
+
+/** Get the data for a fd stream.
+ *
+ * @param io stream
+ * @param data return value for the data
+ * @return 0 if a fd stream, -EINVAL if not
+ */
+int fd_stream_data(IOStream *io, FDData **data){
+ int err = fd_stream_check(io);
+ if(err){
+ *data = NULL;
+ } else {
+ *data = fd_data(io);
+ }
+ return err;
+}
+
+
+/** Write to the underlying fd.
+ *
+ * @param stream input
+ * @param buf where to put input
+ * @param n number of bytes to write
+ * @return number of bytes written
+ */
+static int fd_write(IOStream *s, const void *buf, size_t n){
+ FDData *data = fd_data(s);
+ int k;
+ k = write(data->fd, buf, n);
+ return k;
+}
+
+/** Read from the underlying stream;
+ *
+ * @param stream input
+ * @param buf where to put input
+ * @param n number of bytes to read
+ * @return number of bytes read
+ */
+static int fd_read(IOStream *s, void *buf, size_t n){
+ FDData *data = fd_data(s);
+ int k;
+ k = read(data->fd, buf, n);
+ //printf("> fd_read> buf=%p n=%d --> k=%d\n", buf, n, k);
+ return k;
+}
+
+/** Flush the fd (no-op).
+ *
+ * @param s fd stream
+ * @return 0 on success, error code otherwise
+ */
+static int fd_flush(IOStream *s){
+ return 0;
+}
+
+/** Check if a fd stream has an error (no-op).
+ *
+ * @param s fd stream
+ * @return 1 if has an error, 0 otherwise
+ */
+static int fd_error(IOStream *s){
+ return 0;
+}
+
+/** Close a fd stream.
+ *
+ * @param s fd stream to close
+ * @return result of the close
+ */
+static int fd_close(IOStream *s){
+ FDData *data = fd_data(s);
+ return close(data->fd);
+}
+
+/** Free a fd stream.
+ *
+ * @param s fd stream
+ */
+static void fd_free(IOStream *s){
+ FDData *data = fd_data(s);
+ deallocate(data);
+}
+
+/** Create an IOStream for a fd.
+ *
+ * @param fd fd to wtap
+ * @return new IOStream using fd for i/o
+ */
+IOStream *fd_stream_new(int fd){
+ int err = -ENOMEM;
+ IOStream *io = NULL;
+ FDData *data = NULL;
+
+ io = ALLOCATE(IOStream);
+ if(!io) goto exit;
+ io->methods = &fd_methods;
+ data = ALLOCATE(FDData);
+ if(!data) goto exit;
+ io->data = data;
+ data->fd = fd;
+ err = 0;
+ exit:
+ if(err){
+ if(io){
+ if(data) deallocate(data);
+ deallocate(io);
+ io = NULL;
+ }
+ }
+ return io;
+}
+
+#endif
diff --git a/tools/xfrd/lzi_stream.h b/tools/vnet/libxutil/fd_stream.h
index 959059eccf..b37a6863e2 100644
--- a/tools/xfrd/lzi_stream.h
+++ b/tools/vnet/libxutil/fd_stream.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003 Hewlett-Packard Company.
+ * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
@@ -16,20 +16,21 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#ifndef _XUTIL_LZI_STREAM_H_
-#define _XUTIL_LZI_STREAM_H_
+#ifndef _XMC_FD_STREAM_H_
+#define _XMC_FD_STREAM_H_
#ifndef __KERNEL__
#include "iostream.h"
-extern IOStream *lzi_stream_new(IOStream *io, const char *mode);
-extern IOStream *lzi_stream_fopen(const char *file, const char *mode);
-extern IOStream *lzi_stream_fdopen(int fd, const char *mode);
-extern IOStream *lzi_stream_io(IOStream *zio);
+/** Data associated with a fd stream. */
+typedef struct FDData {
+ /** The socket file descriptor. */
+ int fd;
+} FDData;
-extern int lzi_stream_plain_bytes(IOStream *io);
-extern int lzi_stream_comp_bytes(IOStream *io);
-extern float lzi_stream_ratio(IOStream *io);
+extern IOStream *fd_stream_new(int fd);
+extern int fd_stream_data(IOStream *io, FDData **data);
+extern int fd_stream_check(IOStream *io);
#endif
-#endif /* !_XUTIL_LZI_STREAM_H_ */
+#endif /* !_XMC_FD_STREAM_H_ */
diff --git a/tools/libxutil/file_stream.c b/tools/vnet/libxutil/file_stream.c
index ca7b9d1680..7adbac0fa7 100644
--- a/tools/libxutil/file_stream.c
+++ b/tools/vnet/libxutil/file_stream.c
@@ -46,18 +46,21 @@ static const IOMethods file_methods = {
static IOStream _iostdin = {
methods: &file_methods,
data: (void*)1,
+ nofree: 1,
};
/** IOStream for stdout. */
static IOStream _iostdout = {
methods: &file_methods,
data: (void*)2,
+ nofree: 1,
};
/** IOStream for stderr. */
static IOStream _iostderr = {
methods: &file_methods,
data: (void*)3,
+ nofree: 1,
};
/** IOStream for stdin. */
@@ -152,10 +155,7 @@ static int file_error(IOStream *s){
*/
static int file_close(IOStream *s){
int result = 0;
- if (s->data){
- result = fclose(get_file(s));
- s->data = (void*)0;
- }
+ result = fclose(get_file(s));
return result;
}
@@ -164,7 +164,7 @@ static int file_close(IOStream *s){
* @param s file stream
*/
static void file_free(IOStream *s){
- file_close(s);
+ // Nothing extra to do - close did it all.
}
/** Create an IOStream for a stream.
@@ -175,8 +175,8 @@ static void file_free(IOStream *s){
IOStream *file_stream_new(FILE *f){
IOStream *io = ALLOCATE(IOStream);
if(io){
- io->methods = &file_methods;
- io->data = (void*)f;
+ io->methods = &file_methods;
+ io->data = (void*)f;
}
return io;
}
@@ -191,10 +191,10 @@ IOStream *file_stream_fopen(const char *file, const char *flags){
IOStream *io = 0;
FILE *fin = fopen(file, flags);
if(fin){
- io = file_stream_new(fin);
- if(!io){
- fclose(fin);
- }
+ io = file_stream_new(fin);
+ if(!io){
+ fclose(fin);
+ }
}
return io;
}
@@ -211,8 +211,9 @@ IOStream *file_stream_fdopen(int fd, const char *flags){
FILE *fin = fdopen(fd, flags);
if(fin){
io = file_stream_new(fin);
- if(!io)
+ if(!io){
fclose(fin);
+ }
}
return io;
}
diff --git a/tools/libxutil/file_stream.h b/tools/vnet/libxutil/file_stream.h
index f717656c24..f717656c24 100644
--- a/tools/libxutil/file_stream.h
+++ b/tools/vnet/libxutil/file_stream.h
diff --git a/tools/libxutil/gzip_stream.c b/tools/vnet/libxutil/gzip_stream.c
index d3bedbf217..ea14b82580 100644
--- a/tools/libxutil/gzip_stream.c
+++ b/tools/vnet/libxutil/gzip_stream.c
@@ -39,7 +39,7 @@ static int gzip_flush(IOStream *s);
/** Methods used by a gzFile* IOStream. */
static const IOMethods gzip_methods = {
- read: gzip_read,
+ read: gzip_read,
write: gzip_write,
error: gzip_error,
close: gzip_close,
@@ -108,10 +108,7 @@ static int gzip_error(IOStream *s){
*/
static int gzip_close(IOStream *s){
int result = 0;
- if (s->data){
- result = gzclose(get_gzfile(s));
- s->data = (void*)0;
- }
+ result = gzclose(get_gzfile(s));
return result;
}
@@ -120,7 +117,7 @@ static int gzip_close(IOStream *s){
* @param s gzip stream
*/
static void gzip_free(IOStream *s){
- gzip_close(s);
+ // Nothing to do - close did it all.
}
/** Create an IOStream for a gzip stream.
@@ -131,8 +128,8 @@ static void gzip_free(IOStream *s){
IOStream *gzip_stream_new(gzFile *f){
IOStream *io = ALLOCATE(IOStream);
if(io){
- io->methods = &gzip_methods;
- io->data = (void*)f;
+ io->methods = &gzip_methods;
+ io->data = (void*)f;
}
return io;
}
diff --git a/tools/libxutil/gzip_stream.h b/tools/vnet/libxutil/gzip_stream.h
index fd28e39575..fd28e39575 100644
--- a/tools/libxutil/gzip_stream.h
+++ b/tools/vnet/libxutil/gzip_stream.h
diff --git a/tools/libxutil/hash_table.c b/tools/vnet/libxutil/hash_table.c
index 13da946e77..13da946e77 100644
--- a/tools/libxutil/hash_table.c
+++ b/tools/vnet/libxutil/hash_table.c
diff --git a/tools/libxutil/hash_table.h b/tools/vnet/libxutil/hash_table.h
index 6608b49cda..6608b49cda 100644
--- a/tools/libxutil/hash_table.h
+++ b/tools/vnet/libxutil/hash_table.h
diff --git a/tools/libxutil/iostream.c b/tools/vnet/libxutil/iostream.c
index 39a62173ad..39a62173ad 100644
--- a/tools/libxutil/iostream.c
+++ b/tools/vnet/libxutil/iostream.c
diff --git a/tools/libxutil/iostream.h b/tools/vnet/libxutil/iostream.h
index 1efd8f9985..f41ca56629 100644
--- a/tools/libxutil/iostream.h
+++ b/tools/vnet/libxutil/iostream.h
@@ -33,7 +33,7 @@
#include "allocate.h"
-/** End of input return value. */
+/** End of input return value (for getc). */
#define IOSTREAM_EOF -1
/** An input/output abstraction.
@@ -82,6 +82,8 @@ struct IOStream {
int written;
/** Number of bytes read. */
int read;
+ /** Flag indicating whether not to free when closed. */
+ int nofree;
};
@@ -107,14 +109,14 @@ extern int IOStream_vprint(IOStream *io, const char *format, va_list args);
static inline int IOStream_read(IOStream *stream, void *buf, size_t n){
int result;
if(stream->closed){
- result = IOSTREAM_EOF;
+ result = -EIO;
goto exit;
}
if(!stream->methods || !stream->methods->read){
result = -EINVAL;
goto exit;
}
- result = stream->methods->read(stream, buf, n);
+ result = (stream->methods->read)(stream, buf, n);
if(result > 0){
stream->read += result;
}
@@ -132,14 +134,14 @@ static inline int IOStream_read(IOStream *stream, void *buf, size_t n){
static inline int IOStream_write(IOStream *stream, const void *buf, size_t n){
int result;
if(stream->closed){
- result = IOSTREAM_EOF;
+ result = -EIO;
goto exit;
}
if(!stream->methods || !stream->methods->write){
result = -EINVAL;
goto exit;
}
- result = stream->methods->write(stream, buf, n);
+ result = (stream->methods->write)(stream, buf, n);
if(result > 0){
stream->written += result;
}
@@ -150,15 +152,14 @@ static inline int IOStream_write(IOStream *stream, const void *buf, size_t n){
/** Flush the stream.
*
* @param stream stream
- * @return 0 on success, IOSTREAM_EOF otherwise
+ * @return 0 on success, negative error code otherwise
*/
static inline int IOStream_flush(IOStream *stream){
int result = 0;
if(stream->closed){
- result = IOSTREAM_EOF;
+ result = -EIO;
} else if(stream->methods->flush){
- result = stream->methods->flush(stream);
- if(result < 0) result = IOSTREAM_EOF;
+ result = (stream->methods->flush)(stream);
}
return result;
}
@@ -171,7 +172,7 @@ static inline int IOStream_flush(IOStream *stream){
static inline int IOStream_error(IOStream *stream){
int err = 0;
if(stream->methods && stream->methods->error){
- err = stream->methods->error(stream);
+ err = (stream->methods->error)(stream);
}
return err;
}
@@ -179,14 +180,25 @@ static inline int IOStream_error(IOStream *stream){
/** Close the stream.
*
* @param stream to close
- * @return 1 for error, 0 otherwise
+ * @return 0 on success, negative error code otherwise
*/
static inline int IOStream_close(IOStream *stream){
- int err = 1;
+ int err = 0;
+ if(!stream || stream->closed){
+ err = -EIO;
+ goto exit;
+ }
if(stream->methods && stream->methods->close){
- err = stream->methods->close(stream);
+ err = (stream->methods->close)(stream);
stream->closed = 1;
}
+ if(stream->nofree) goto exit;
+ if(stream->methods && stream->methods->free){
+ (stream->methods->free)(stream);
+ }
+ *stream = (IOStream){};
+ deallocate(stream);
+ exit:
return err;
}
@@ -199,22 +211,6 @@ static inline int IOStream_is_closed(IOStream *stream){
return stream->closed;
}
-/** Free the memory used by the stream.
- *
- * @param stream to free
- */
-static inline void IOStream_free(IOStream *stream){
- if(!stream->closed && stream->methods && stream->methods->close){
- stream->methods->close(stream);
- }
- if(stream->methods && stream->methods->free){
- stream->methods->free(stream);
- }
- *stream = (IOStream){};
- deallocate(stream);
-}
-
-
/** Print a character to a stream, like fputc().
*
* @param stream to print to
diff --git a/tools/libxutil/kernel_stream.c b/tools/vnet/libxutil/kernel_stream.c
index a13051eec8..3c6b55223d 100644
--- a/tools/libxutil/kernel_stream.c
+++ b/tools/vnet/libxutil/kernel_stream.c
@@ -57,10 +57,10 @@ static void kernel_stream_unlock(IOStream *s);
/** Methods for a kernel stream. Output only. */
static const IOMethods kernel_methods = {
- write: kernel_write,
- free: kernel_free,
- lock: kernel_stream_lock,
- unlock: kernel_stream_unlock,
+ write: kernel_write,
+ free: kernel_free,
+ lock: kernel_stream_lock,
+ unlock: kernel_stream_unlock,
};
/** Shared state for kernel streams.
@@ -68,15 +68,16 @@ static const IOMethods kernel_methods = {
* shared state and avoid allocating it.
*/
static const KernelData kernel_data = {
- lock: SPIN_LOCK_UNLOCKED,
- flags: 0,
- buf_n: BUF_N,
+ lock: SPIN_LOCK_UNLOCKED,
+ flags: 0,
+ buf_n: BUF_N,
};
/** Stream for kernel printk. */
static IOStream iokernel = {
methods: &kernel_methods,
data: &kernel_data,
+ nofree: 1,
};
/** Stream for kernel printk. */
@@ -94,7 +95,7 @@ IOStream *iostderr = &iokernel;
* @return kernel stream
*/
IOStream get_stream_kernel(void){
- return iokernel;
+ return iokernel;
}
/** Obtain the lock on the stream state.
@@ -102,7 +103,7 @@ IOStream get_stream_kernel(void){
* @param kdata stream state
*/
static inline void KernelData_lock(KernelData *kdata){
- spin_lock_irqsave(&kdata->lock, kdata->flags);
+ spin_lock_irqsave(&kdata->lock, kdata->flags);
}
/** Release the lock on the stream state.
@@ -110,7 +111,7 @@ static inline void KernelData_lock(KernelData *kdata){
* @param kdata stream state
*/
static inline void KernelData_unlock(KernelData *kdata){
- spin_unlock_irqrestore(&kdata->lock, kdata->flags);
+ spin_unlock_irqrestore(&kdata->lock, kdata->flags);
}
/** Get the stream state.
@@ -119,7 +120,7 @@ static inline void KernelData_unlock(KernelData *kdata){
* @return stream state
*/
static inline KernelData *get_kernel_data(IOStream *s){
- return (KernelData*)s->data;
+ return (KernelData*)s->data;
}
/** Obtain the lock on the stream state.
@@ -146,14 +147,14 @@ void kernel_stream_unlock(IOStream *s){
* @return result of the print
*/
static int kernel_write(IOStream *stream, const void *buf, size_t n){
- KernelData *kdata = get_kernel_data(stream);
- int k;
- k = kdata->buf_n - 1;
- if(n < k) k = n;
- memcpy(kdata->buf, buf, k);
- kdata->buf[k] = '\0';
- printk(kdata->buf);
- return k;
+ KernelData *kdata = get_kernel_data(stream);
+ int k;
+ k = kdata->buf_n - 1;
+ if(n < k) k = n;
+ memcpy(kdata->buf, buf, k);
+ kdata->buf[k] = '\0';
+ printk(kdata->buf);
+ return k;
}
/** Free a kernel stream.
@@ -164,11 +165,11 @@ static int kernel_write(IOStream *stream, const void *buf, size_t n){
* @param io stream to free
*/
static void kernel_free(IOStream *io){
- KernelData *kdata;
- if(io == &iokernel) return;
- kdata = get_kernel_data(io);
- memset(kdata, 0, sizeof(*kdata));
- deallocate(kdata);
+ KernelData *kdata;
+ if(io == &iokernel) return;
+ kdata = get_kernel_data(io);
+ memset(kdata, 0, sizeof(*kdata));
+ deallocate(kdata);
}
#endif /* __KERNEL__ */
diff --git a/tools/libxutil/kernel_stream.h b/tools/vnet/libxutil/kernel_stream.h
index 0f18058d59..0f18058d59 100644
--- a/tools/libxutil/kernel_stream.h
+++ b/tools/vnet/libxutil/kernel_stream.h
diff --git a/tools/libxutil/lexis.c b/tools/vnet/libxutil/lexis.c
index d3441f0c38..d3441f0c38 100644
--- a/tools/libxutil/lexis.c
+++ b/tools/vnet/libxutil/lexis.c
diff --git a/tools/libxutil/lexis.h b/tools/vnet/libxutil/lexis.h
index be8fb653d3..d49a3547d6 100644
--- a/tools/libxutil/lexis.h
+++ b/tools/vnet/libxutil/lexis.h
@@ -34,7 +34,7 @@
#define space_class ((char []){ '\n', '\r', '\t', ' ', '\f' , 0 })
/** Class of separator characters. */
-#define sep_class "{}()<>[]@!;"
+#define sep_class "{}()<>[]!;\"'"
#define comment_class "#"
diff --git a/tools/libxutil/socket_stream.c b/tools/vnet/libxutil/socket_stream.c
index 9e90b4e06e..9e90b4e06e 100644
--- a/tools/libxutil/socket_stream.c
+++ b/tools/vnet/libxutil/socket_stream.c
diff --git a/tools/libxutil/socket_stream.h b/tools/vnet/libxutil/socket_stream.h
index 9da23e9f01..9da23e9f01 100644
--- a/tools/libxutil/socket_stream.h
+++ b/tools/vnet/libxutil/socket_stream.h
diff --git a/tools/libxutil/string_stream.c b/tools/vnet/libxutil/string_stream.c
index a535c922ff..907e7d70ee 100644
--- a/tools/libxutil/string_stream.c
+++ b/tools/vnet/libxutil/string_stream.c
@@ -28,11 +28,13 @@
static int string_error(IOStream *io);
static int string_close(IOStream *io);
static void string_free(IOStream *io);
+static int string_write(IOStream *io, const void *msg, size_t n);
+static int string_read(IOStream *io, void *buf, size_t n);
/** Methods for a string stream. */
static IOMethods string_methods = {
- //print: string_print,
- //getc: string_getc,
+ read: string_read,
+ write: string_write,
error: string_error,
close: string_close,
free: string_free,
@@ -47,6 +49,28 @@ static inline StringData *get_string_data(IOStream *io){
return (StringData*)io->data;
}
+static int string_write(IOStream *io, const void *msg, size_t n){
+ StringData *data = get_string_data(io);
+ int k;
+
+ k = data->end - data->out;
+ if(n > k) n = k;
+ memcpy(data->out, msg, n);
+ data->out += n;
+ return n;
+}
+
+static int string_read(IOStream *io, void *buf, size_t n){
+ StringData *data = get_string_data(io);
+ int k;
+
+ k = data->end - data->in;
+ if(n > k) n = k;
+ memcpy(buf, data->in, k);
+ data->in += n;
+ return n;
+}
+
/** Test if a string stream has an error.
*
* @param io string stream
@@ -70,7 +94,6 @@ static int string_close(IOStream *io){
}
/** Free a string stream.
- * The stream must have been allocated, not statically created.
* The stream state is freed, but the underlying string is not.
*
* @param io string stream
@@ -90,6 +113,9 @@ IOMethods *string_stream_get_methods(void){
}
/** Initialise a string stream, usually from static data.
+ * If the stream and StringData should be freed when
+ * the stream is closed, unset io->nofree.
+ * The string is not freed on close.
*
* @param io address of IOStream to fill in
* @param data address of StringData to fill in
@@ -107,10 +133,12 @@ void string_stream_init(IOStream *io, StringData *data, char *s, int n){
memzero(io, sizeof(*io));
io->methods = &string_methods;
io->data = data;
+ io->nofree = 1;
}
}
/** Allocate and initialise a string stream.
+ * The stream is freed on close, but the string is not.
*
* @param s string to use
* @param n length of the string
@@ -123,6 +151,7 @@ IOStream *string_stream_new(char *s, int n){
if(data && io){
ok = 1;
string_stream_init(io, data, s, n);
+ io->nofree = 0;
}
if(!ok){
deallocate(data);
diff --git a/tools/libxutil/string_stream.h b/tools/vnet/libxutil/string_stream.h
index 246e63d2fe..246e63d2fe 100644
--- a/tools/libxutil/string_stream.h
+++ b/tools/vnet/libxutil/string_stream.h
diff --git a/tools/libxutil/sxpr.c b/tools/vnet/libxutil/sxpr.c
index 02a5040b52..d264527f25 100644
--- a/tools/libxutil/sxpr.c
+++ b/tools/vnet/libxutil/sxpr.c
@@ -28,6 +28,19 @@
#include <errno.h>
#endif
+#ifdef __KERNEL__
+#include <linux/random.h>
+
+int rand(void){
+ int v;
+ get_random_bytes(&v, sizeof(v));
+ return v;
+}
+
+#else
+#include <stdlib.h>
+#endif
+
#undef free
/** @file
@@ -42,41 +55,51 @@
static int atom_print(IOStream *io, Sxpr obj, unsigned flags);
static int atom_equal(Sxpr x, Sxpr y);
static void atom_free(Sxpr obj);
+static Sxpr atom_copy(Sxpr obj);
static int string_print(IOStream *io, Sxpr obj, unsigned flags);
static int string_equal(Sxpr x, Sxpr y);
static void string_free(Sxpr obj);
+static Sxpr string_copy(Sxpr obj);
static int cons_print(IOStream *io, Sxpr obj, unsigned flags);
static int cons_equal(Sxpr x, Sxpr y);
static void cons_free(Sxpr obj);
+static Sxpr cons_copy(Sxpr obj);
static int null_print(IOStream *io, Sxpr obj, unsigned flags);
static int none_print(IOStream *io, Sxpr obj, unsigned flags);
static int int_print(IOStream *io, Sxpr obj, unsigned flags);
static int bool_print(IOStream *io, Sxpr obj, unsigned flags);
+static int err_print(IOStream *io, Sxpr obj, unsigned flags);
+static int nomem_print(IOStream *io, Sxpr obj, unsigned flags);
/** Type definitions. */
static SxprType types[1024] = {
- [T_NONE] { type: T_NONE, name: "none", print: none_print },
- [T_NULL] { type: T_NULL, name: "null", print: null_print },
- [T_UINT] { type: T_UINT, name: "int", print: int_print, },
- [T_BOOL] { type: T_BOOL, name: "bool", print: bool_print, },
- [T_ATOM] { type: T_ATOM, name: "atom", print: atom_print,
- pointer: TRUE,
- free: atom_free,
- equal: atom_equal,
- },
- [T_STRING] { type: T_STRING, name: "string", print: string_print,
- pointer: TRUE,
- free: string_free,
- equal: string_equal,
- },
- [T_CONS] { type: T_CONS, name: "cons", print: cons_print,
- pointer: TRUE,
- free: cons_free,
- equal: cons_equal,
- },
+ [T_NONE] { .type= T_NONE, .name= "none", .print= none_print },
+ [T_NULL] { .type= T_NULL, .name= "null", .print= null_print },
+ [T_UINT] { .type= T_UINT, .name= "int", .print= int_print, },
+ [T_BOOL] { .type= T_BOOL, .name= "bool", .print= bool_print, },
+ [T_ERR] { .type= T_ERR, .name= "err", .print= err_print, },
+ [T_NOMEM] { .type= T_ERR, .name= "nomem", .print= nomem_print, },
+ [T_ATOM] { .type= T_ATOM, .name= "atom", .print= atom_print,
+ .pointer= TRUE,
+ .free= atom_free,
+ .equal= atom_equal,
+ .copy= atom_copy,
+ },
+ [T_STRING] { .type= T_STRING, .name= "string", .print= string_print,
+ .pointer= TRUE,
+ .free= string_free,
+ .equal= string_equal,
+ .copy= string_copy,
+ },
+ [T_CONS] { .type= T_CONS, .name= "cons", .print= cons_print,
+ .pointer= TRUE,
+ .free= cons_free,
+ .equal= cons_equal,
+ .copy= cons_copy,
+ },
};
/** Number of entries in the types array. */
@@ -157,12 +180,29 @@ int objprint(IOStream *io, Sxpr x, unsigned flags){
int k = 0;
if(!io) return k;
if(flags & PRINT_TYPE){
- k += IOStream_print(io, "%s:", def->name);
+ k += IOStream_print(io, "%s:", def->name);
+ }
+ if(def->pointer && (flags & PRINT_ADDR)){
+ k += IOStream_print(io, "<%p>", get_ptr(x));
}
k += print_fn(io, x, flags);
return k;
}
+Sxpr objcopy(Sxpr x){
+ SxprType *def = get_sxpr_type(get_type(x));
+ ObjCopyFn *copy_fn = (def ? def->copy : NULL);
+ Sxpr v;
+ if(copy_fn){
+ v = copy_fn(x);
+ } else if(def->pointer){
+ v = ONOMEM;
+ } else {
+ v = x;
+ }
+ return v;
+}
+
/** General sxpr free function.
* Frees an sxpr using the free function for its type.
* Free functions must recursively free any subsxprs.
@@ -176,11 +216,11 @@ void objfree(Sxpr x){
SxprType *def = get_sxpr_type(get_type(x));
if(def){
- if(def->free){
- def->free(x);
- } else if (def->pointer){
- hfree(x);
- }
+ if(def->free){
+ def->free(x);
+ } else if (def->pointer){
+ hfree(x);
+ }
}
}
@@ -284,9 +324,9 @@ Sxpr cons_member_if(Sxpr l, ObjEqualFn *test_fn, Sxpr v){
*/
int cons_subset(Sxpr s, Sxpr t){
for( ; CONSP(t); t = CDR(t)){
- if(!CONSP(cons_member(s, CAR(t)))){
- return 0;
- }
+ if(!CONSP(cons_member(s, CAR(t)))){
+ return 0;
+ }
}
return 1;
}
@@ -365,18 +405,27 @@ Sxpr setf(Sxpr k, Sxpr v, Sxpr l){
#endif /* USE_GC */
/** Create a new atom with the given name.
+ * Makes an integer sxpr if the name can be parsed as an int.
*
* @param name the name
* @return new atom
*/
Sxpr atom_new(char *name){
Sxpr n, obj = ONOMEM;
+ long v;
- n = string_new(name);
- if(NOMEMP(n)) goto exit;
- obj = HALLOC(ObjAtom, T_ATOM);
- if(NOMEMP(obj)) goto exit;
- OBJ_ATOM(obj)->name = n;
+ if(convert_atol(name, &v) == 0){
+ obj = OINT(v);
+ } else {
+ n = string_new(name);
+ if(NOMEMP(n)) goto exit;
+ obj = HALLOC(ObjAtom, T_ATOM);
+ if(NOMEMP(obj)){
+ string_free(n);
+ goto exit;
+ }
+ OBJ_ATOM(obj)->name = n;
+ }
exit:
return obj;
}
@@ -392,6 +441,20 @@ void atom_free(Sxpr obj){
hfree(obj);
}
+/** Copy an atom.
+ *
+ * @param obj to copy
+ */
+Sxpr atom_copy(Sxpr obj){
+ Sxpr v;
+ if(OBJ_ATOM(obj)->interned){
+ v = obj;
+ } else {
+ v = atom_new(atom_name(obj));
+ }
+ return v;
+}
+
/** Print an atom. Prints the atom name.
*
* @param io stream to print to
@@ -400,8 +463,7 @@ void atom_free(Sxpr obj){
* @return number of bytes printed
*/
int atom_print(IOStream *io, Sxpr obj, unsigned flags){
- //return string_print(io, OBJ_ATOM(obj)->name, (flags | PRINT_RAW));
- return string_print(io, OBJ_ATOM(obj)->name, flags);
+ return objprint(io, OBJ_ATOM(obj)->name, flags);
}
/** Atom equality.
@@ -430,13 +492,17 @@ char * atom_name(Sxpr obj){
return string_string(OBJ_ATOM(obj)->name);
}
+int atom_length(Sxpr obj){
+ return string_length(OBJ_ATOM(obj)->name);
+}
+
/** Get the C string from a string sxpr.
*
* @param obj string sxpr
* @return string
*/
char * string_string(Sxpr obj){
- return OBJ_STRING(obj);
+ return OBJ_STRING(obj)->data;
}
/** Get the length of a string.
@@ -445,7 +511,7 @@ char * string_string(Sxpr obj){
* @return length
*/
int string_length(Sxpr obj){
- return strlen(OBJ_STRING(obj));
+ return OBJ_STRING(obj)->len;
}
/** Create a new string. The input string is copied,
@@ -456,12 +522,28 @@ int string_length(Sxpr obj){
*/
Sxpr string_new(char *s){
int n = (s ? strlen(s) : 0);
+ return string_new_n(s, n);
+}
+
+/** Create a new string. The input string is copied,
+ * and need not be null-terminated.
+ *
+ * @param s characters to put in the string (may be null)
+ * @param n string length
+ * @return new sxpr
+ */
+Sxpr string_new_n(char *s, int n){
Sxpr obj;
- obj = halloc(n+1, T_STRING);
+ obj = halloc(sizeof(ObjString) + n + 1, T_STRING);
if(!NOMEMP(obj)){
- char *str = OBJ_STRING(obj);
- strncpy(str, s, n);
- str[n] = '\0';
+ char *str = OBJ_STRING(obj)->data;
+ OBJ_STRING(obj)->len = n;
+ if(s){
+ memcpy(str, s, n);
+ str[n] = '\0';
+ } else {
+ memset(str, 0, n + 1);
+ }
}
return obj;
}
@@ -474,70 +556,180 @@ void string_free(Sxpr obj){
hfree(obj);
}
+/** Copy a string.
+ *
+ * @param obj to copy
+ */
+Sxpr string_copy(Sxpr obj){
+ return string_new_n(string_string(obj), string_length(obj));
+}
+
/** Determine if a string needs escapes when printed
* using the given flags.
*
* @param str string to check
+ * @param n string length
* @param flags print flags
* @return 1 if needs escapes, 0 otherwise
*/
-int needs_escapes(char *str, unsigned flags){
+int needs_escapes(char *str, int n, unsigned flags){
char *c;
+ int i;
int val = 0;
if(str){
- for(c=str; *c; c++){
- if(in_alpha_class(*c)) continue;
- if(in_decimal_digit_class(*c)) continue;
- if(in_class(*c, "/._+:@~-")) continue;
- val = 1;
- break;
- }
- }
- //printf("\n> val=%d str=|%s|\n", val, str);
+ for(i=0, c=str; i<n; i++, c++){
+ if(in_alpha_class(*c)) continue;
+ if(in_decimal_digit_class(*c)) continue;
+ if(in_class(*c, "/._+:@~-")) continue;
+ val = 1;
+ break;
+ }
+ }
return val;
}
+char randchar(void){
+ int r;
+ char c;
+ for( ; ; ){
+ r = rand();
+ c = (r >> 16) & 0xff;
+ if('a' <= c && c <= 'z') break;
+ }
+ return c;
+}
+
+int string_contains(char *s, int s_n, char *k, int k_n){
+ int i, n = s_n - k_n;
+ for(i=0; i < n; i++){
+ if(!memcmp(s+i, k, k_n)) return 1;
+ }
+ return 0;
+}
+
+int string_delim(char *s, int s_n, char *d, int d_n){
+ int i;
+ if(d_n < 4) return -1;
+ memset(d, 0, d_n+1);
+ for(i=0; i<3; i++){
+ d[i] = randchar();
+ }
+ for( ; i < d_n; i++){
+ if(!string_contains(s, s_n, d, i)){
+ return i;
+ }
+ d[i] = randchar();
+ }
+ return -1;
+}
+
+/** Print the bytes in a string as-is.
+ *
+ * @param io stream
+ * @param str string
+ * @param n length
+ * @return bytes written or error code
+ */
+int _string_print_raw(IOStream *io, char *str, int n){
+ int k = 0;
+ k = IOStream_write(io, str, n);
+ return k;
+}
+
+/** Print a string in counted data format.
+ *
+ * @param io stream
+ * @param str string
+ * @param n length
+ * @return bytes written or error code
+ */
+int _string_print_counted(IOStream *io, char *str, int n){
+ int k = 0;
+ k += IOStream_print(io, "%c%c%d%c",
+ c_data_open, c_data_count, n, c_data_count);
+ k += IOStream_write(io, str, n);
+ return k;
+}
+
+/** Print a string in quoted data format.
+ *
+ * @param io stream
+ * @param str string
+ * @param n length
+ * @return bytes written or error code
+ */
+int _string_print_quoted(IOStream *io, char *str, int n){
+ int k = 0;
+ char d[10];
+ int d_n;
+ d_n = string_delim(str, n, d, sizeof(d) - 1);
+ k += IOStream_print(io, "%c%c%s%c",
+ c_data_open, c_data_quote, d, c_data_quote);
+ k += IOStream_write(io, str, n);
+ k += IOStream_print(io, "%c%s%c", c_data_quote, d, c_data_quote);
+ return k;
+}
+
+/** Print a string as a quoted string.
+ *
+ * @param io stream
+ * @param str string
+ * @param n length
+ * @return bytes written or error code
+ */
+int _string_print_string(IOStream *io, char *str, int n){
+ int k = 0;
+
+ k += IOStream_print(io, "\"");
+ if(str){
+ char *s, *t;
+ for(s = str, t = str + n; s < t; s++){
+ if(*s < ' ' || *s >= 127 ){
+ switch(*s){
+ case '\a': k += IOStream_print(io, "\\a"); break;
+ case '\b': k += IOStream_print(io, "\\b"); break;
+ case '\f': k += IOStream_print(io, "\\f"); break;
+ case '\n': k += IOStream_print(io, "\\n"); break;
+ case '\r': k += IOStream_print(io, "\\r"); break;
+ case '\t': k += IOStream_print(io, "\\t"); break;
+ case '\v': k += IOStream_print(io, "\\v"); break;
+ default:
+ // Octal escape;
+ k += IOStream_print(io, "\\%o", *s);
+ break;
+ }
+ } else if(*s == c_double_quote ||
+ *s == c_single_quote ||
+ *s == c_escape){
+ k += IOStream_print(io, "\\%c", *s);
+ } else {
+ k+= IOStream_print(io, "%c", *s);
+ }
+ }
+ }
+ k += IOStream_print(io, "\"");
+ return k;
+}
+
/** Print a string to a stream, with escapes if necessary.
*
* @param io stream to print to
* @param str string
+ * @param n string length
* @param flags print flags
* @return number of bytes written
*/
-int _string_print(IOStream *io, char *str, unsigned flags){
+int _string_print(IOStream *io, char *str, int n, unsigned flags){
int k = 0;
- if((flags & PRINT_RAW) || !needs_escapes(str, flags)){
- k += IOStream_print(io, str);
+ if((flags & PRINT_COUNTED)){
+ k = _string_print_counted(io, str, n);
+ } else if((flags & PRINT_RAW) || !needs_escapes(str, n, flags)){
+ k = _string_print_raw(io, str, n);
+ } else if(n > 50){
+ k = _string_print_quoted(io, str, n);
} else {
- k += IOStream_print(io, "\"");
- if(str){
- char *s;
- for(s = str; *s; s++){
- if(*s < ' ' || *s >= 127 ){
- switch(*s){
- case '\a': k += IOStream_print(io, "\\a"); break;
- case '\b': k += IOStream_print(io, "\\b"); break;
- case '\f': k += IOStream_print(io, "\\f"); break;
- case '\n': k += IOStream_print(io, "\\n"); break;
- case '\r': k += IOStream_print(io, "\\r"); break;
- case '\t': k += IOStream_print(io, "\\t"); break;
- case '\v': k += IOStream_print(io, "\\v"); break;
- default:
- // Octal escape;
- k += IOStream_print(io, "\\%o", *s);
- break;
- }
- } else if(*s == c_double_quote ||
- *s == c_single_quote ||
- *s == c_escape){
- k += IOStream_print(io, "\\%c", *s);
- } else {
- k+= IOStream_print(io, "%c", *s);
- }
- }
- }
- k += IOStream_print(io, "\"");
+ k = _string_print_string(io, str, n);
}
return k;
}
@@ -550,7 +742,14 @@ int _string_print(IOStream *io, char *str, unsigned flags){
* @return number of bytes written
*/
int string_print(IOStream *io, Sxpr obj, unsigned flags){
- return _string_print(io, OBJ_STRING(obj), flags);
+ return _string_print(io,
+ OBJ_STRING(obj)->data,
+ OBJ_STRING(obj)->len,
+ flags);
+}
+
+int string_eq(char *s, int s_n, char *t, int t_n){
+ return (s_n == t_n) && (memcmp(s, t, s_n) == 0);
}
/** Compare an sxpr with a string for equality.
@@ -563,9 +762,13 @@ int string_equal(Sxpr x, Sxpr y){
int ok = 0;
ok = eq(x,y);
if(ok) goto exit;
- ok = has_type(y, T_STRING) && !strcmp(OBJ_STRING(x), OBJ_STRING(y));
+ ok = has_type(y, T_STRING) &&
+ string_eq(OBJ_STRING(x)->data, OBJ_STRING(x)->len,
+ OBJ_STRING(y)->data, OBJ_STRING(y)->len);
if(ok) goto exit;
- ok = has_type(y, T_ATOM) && !strcmp(OBJ_STRING(x), atom_name(y));
+ ok = has_type(y, T_ATOM) &&
+ string_eq(OBJ_STRING(x)->data, OBJ_STRING(x)->len,
+ atom_name(y), atom_length(y));
exit:
return ok;
}
@@ -613,15 +816,38 @@ int cons_push(Sxpr *list, Sxpr elt){
void cons_free(Sxpr obj){
Sxpr next;
for(; CONSP(obj); obj = next){
- next = CDR(obj);
- objfree(CAR(obj));
- hfree(obj);
+ next = CDR(obj);
+ objfree(CAR(obj));
+ hfree(obj);
}
if(!NULLP(obj)){
- objfree(obj);
+ objfree(obj);
}
}
+/** Copy a cons. Recursively copies the car and cdr.
+ *
+ * @param obj to copy
+ */
+Sxpr cons_copy(Sxpr obj){
+ Sxpr v = ONULL;
+ Sxpr l = ONULL, x = ONONE;
+ for(l = obj; CONSP(l); l = CDR(l)){
+ x = objcopy(CAR(l));
+ if(NOMEMP(x)) goto exit;
+ x = cons_new(x, v);
+ if(NOMEMP(x)) goto exit;
+ v = x;
+ }
+ v = nrev(v);
+ exit:
+ if(NOMEMP(x)){
+ objfree(v);
+ v = ONOMEM;
+ }
+ return v;
+}
+
/** Free a cons and its cdr cells, but not the car sxprs.
* Does nothing if called on something that is not a cons.
*
@@ -630,8 +856,8 @@ void cons_free(Sxpr obj){
void cons_free_cells(Sxpr obj){
Sxpr next;
for(; CONSP(obj); obj = next){
- next = CDR(obj);
- hfree(obj);
+ next = CDR(obj);
+ hfree(obj);
}
}
@@ -698,26 +924,26 @@ int cons_length(Sxpr obj){
*/
Sxpr nrev(Sxpr l){
if(CONSP(l)){
- // Iterate down the cells in the list making the cdr of
- // each cell point to the previous cell. The last cell
- // is the head of the reversed list.
- Sxpr prev = ONULL;
- Sxpr cell = l;
- Sxpr next;
-
- while(1){
- next = CDR(cell);
- CDR(cell) = prev;
- if(!CONSP(next)) break;
- prev = cell;
- cell = next;
- }
- l = cell;
+ // Iterate down the cells in the list making the cdr of
+ // each cell point to the previous cell. The last cell
+ // is the head of the reversed list.
+ Sxpr prev = ONULL;
+ Sxpr cell = l;
+ Sxpr next;
+
+ while(1){
+ next = CDR(cell);
+ CDR(cell) = prev;
+ if(!CONSP(next)) break;
+ prev = cell;
+ cell = next;
+ }
+ l = cell;
}
return l;
}
-/** Print the null sxpr.
+/** Print the null sxpr.
*
* @param io stream to print to
* @param obj to print
@@ -761,6 +987,30 @@ static int bool_print(IOStream *io, Sxpr obj, unsigned flags){
return IOStream_print(io, (OBJ_UINT(obj) ? k_true : k_false));
}
+/** Print an error.
+ *
+ * @param io stream to print to
+ * @param obj to print
+ * @param flags print flags
+ * @return number of bytes written
+ */
+static int err_print(IOStream *io, Sxpr obj, unsigned flags){
+ int err = OBJ_INT(obj);
+ if(err < 0) err = -err;
+ return IOStream_print(io, "[error:%d:%s]", err, strerror(err));
+}
+
+/** Print the 'nomem' sxpr.
+ *
+ * @param io stream to print to
+ * @param obj to print
+ * @param flags print flags
+ * @return number of bytes written
+ */
+static int nomem_print(IOStream *io, Sxpr obj, unsigned flags){
+ return IOStream_print(io, "[ENOMEM]");
+}
+
int sxprp(Sxpr obj, Sxpr name){
return CONSP(obj) && objequal(CAR(obj), name);
}
@@ -781,8 +1031,8 @@ Sxpr sxpr_name(Sxpr obj){
}
int sxpr_is(Sxpr obj, char *s){
- if(ATOMP(obj)) return !strcmp(atom_name(obj), s);
- if(STRINGP(obj)) return !strcmp(string_string(obj), s);
+ if(ATOMP(obj)) return string_eq(atom_name(obj), atom_length(obj), s, strlen(s));
+ if(STRINGP(obj)) return string_eq(string_string(obj), string_length(obj), s, strlen(s));
return 0;
}
@@ -915,11 +1165,11 @@ static int sym_equal_fn(void *x, void *y){
*/
static void sym_free_fn(HashTable *table, HTEntry *entry){
if(entry){
- objfree(((ObjAtom*)entry->value)->name);
- HTEntry_free(entry);
+ objfree(((ObjAtom*)entry->value)->name);
+ HTEntry_free(entry);
}
}
-
+
/** Initialize the symbol table.
*
* @return 0 on sucess, error code otherwise
@@ -929,7 +1179,7 @@ static int init_symbols(void){
if(symbols){
symbols->key_hash_fn = sym_hash_fn;
symbols->key_equal_fn = sym_equal_fn;
- symbols->entry_free_fn = sym_free_fn;
+ symbols->entry_free_fn = sym_free_fn;
return 0;
}
return -1;
@@ -950,8 +1200,8 @@ void cleanup_symbols(void){
Sxpr get_symbol(char *sym){
HTEntry *entry;
if(!symbols){
- if(init_symbols()) return ONOMEM;
- return ONULL;
+ if(init_symbols()) return ONOMEM;
+ return ONULL;
}
entry = HashTable_get_entry(symbols, sym);
if(entry){
@@ -969,10 +1219,10 @@ Sxpr get_symbol(char *sym){
Sxpr intern(char *sym){
Sxpr symbol = get_symbol(sym);
if(NULLP(symbol)){
- if(!symbols) return ONOMEM;
+ if(!symbols) return ONOMEM;
symbol = atom_new(sym);
if(!NOMEMP(symbol)){
- OBJ_ATOM(symbol)->interned = TRUE;
+ OBJ_ATOM(symbol)->interned = TRUE;
HashTable_add(symbols, atom_name(symbol), get_ptr(symbol));
}
}
diff --git a/tools/libxutil/sxpr.h b/tools/vnet/libxutil/sxpr.h
index 761b8b2da7..c9acd7b25f 100644
--- a/tools/libxutil/sxpr.h
+++ b/tools/vnet/libxutil/sxpr.h
@@ -52,15 +52,80 @@ typedef struct Sxpr {
/** Sxpr type. */
TypeCode type;
union {
- /** Sxpr value. */
+ /** Sxpr value. */
unsigned long ul;
- /** Pointer. */
+ /** Pointer. */
void *ptr;
} v;
} Sxpr;
-/** Sxpr type to indicate out of memory. */
-#define T_NOMEM ((TypeCode)-1)
+/** Get the integer value from an sxpr.
+ *
+ * @param obj sxpr
+ * @return value
+ */
+static inline unsigned long get_ul(Sxpr obj){
+ return obj.v.ul;
+}
+
+/** Get the pointer value from an sxpr.
+ *
+ * @param obj sxpr
+ * @return value
+ */
+static inline void * get_ptr(Sxpr obj){
+ return obj.v.ptr;
+}
+
+/** Create an sxpr containing a pointer.
+ *
+ * @param ty typecode
+ * @param val pointer
+ * @return sxpr
+ */
+static inline Sxpr obj_ptr(TypeCode ty, void *val){
+ return (Sxpr){ .type= ty, .v= { .ptr= val } };
+}
+
+/** Create an sxpr containing an integer.
+ *
+ * @param ty typecode
+ * @param val integer
+ * @return sxpr
+ */
+static inline Sxpr obj_ul(TypeCode ty, unsigned long val){
+ return (Sxpr){ .type= ty, .v= { .ul= val } };
+}
+
+/** Get the type of an sxpr.
+ *
+ * @param obj sxpr
+ * @return type
+ */
+static inline TypeCode get_type(Sxpr obj){
+ return obj.type;
+}
+
+/** Check the type of an sxpr.
+ *
+ * @param obj sxpr
+ * @param type to check
+ * @return 1 if has the type, 0 otherwise
+ */
+static inline int has_type(Sxpr obj, TypeCode type){
+ return get_type(obj) == type;
+}
+
+/** Compare sxprs for literal equality of type and value.
+ *
+ * @param x sxpr to compare
+ * @param y sxpr to compare
+ * @return 1 if equal, 0 otherwise
+ */
+static inline int eq(Sxpr x, Sxpr y){
+ return ((get_type(x) == get_type(y)) && (get_ul(x) == get_ul(y)));
+}
+
/** The 'unspecified' sxpr. */
#define T_NONE ((TypeCode)0)
/** The empty list. */
@@ -79,6 +144,13 @@ typedef struct Sxpr {
/** An error. */
#define T_ERR ((TypeCode)40)
+/** Sxpr type to indicate out of memory. */
+#define T_NOMEM ((TypeCode)41)
+
+typedef struct ObjString {
+ int len;
+ char data[];
+} ObjString;
/** An atom. */
typedef struct ObjAtom {
@@ -93,41 +165,27 @@ typedef struct ObjCons {
Sxpr cdr;
} ObjCons;
-/** A vector. */
-typedef struct ObjVector {
- int n;
- Sxpr data[0];
-} ObjVector;
-
/** Flags for sxpr printing. */
enum PrintFlags {
PRINT_RAW = 0x001,
PRINT_TYPE = 0x002,
PRINT_PRETTY = 0x004,
- PRINT_NUM = 0x008,
+ PRINT_COUNTED = 0x008,
+ PRINT_ADDR = 0x010,
};
+extern int _string_print(IOStream *io, char *str, int n, unsigned flags);
+extern int _string_print_raw(IOStream *io, char *str, int n);
+extern int _string_print_counted(IOStream *io, char *str, int n);
+extern int _string_print_quoted(IOStream *io, char *str, int n);
+extern int _string_print_string(IOStream *io, char *str, int n);
+
/** An integer sxpr.
*
* @param ty type
* @param val integer value
*/
-#define OBJI(ty, val) (Sxpr){ type: (ty), v: { ul: (val) }}
-
-/** A pointer sxpr.
- * If the pointer is non-null, returns an sxpr containing it.
- * If the pointer is null, returns ONOMEM.
- *
- * @param ty type
- * @param val pointer
- */
-#define OBJP(ty, val) ((val) ? (Sxpr){ type: (ty), v: { ptr: (val) }} : ONOMEM)
-
-/** Make an integer sxpr containing a pointer.
- *
- * @param val pointer
- */
-#define PTR(val) OBJP(T_UINT, (void*)(val))
+#define OBJI(ty, val) obj_ul(ty, val)
/** Make an integer sxpr.
* @param x value
@@ -155,6 +213,40 @@ enum PrintFlags {
/** True constant. */
#define OTRUE OBJI(T_BOOL, 1)
+/** A pointer sxpr.
+ * If the pointer is non-null, returns an sxpr containing it.
+ * If the pointer is null, returns ONOMEM.
+ *
+ * @param ty type
+ * @param val pointer
+ */
+static inline Sxpr OBJP(int ty, void *val){
+ return (val ? obj_ptr(ty, val) : ONOMEM);
+}
+
+/** Make an integer sxpr containing a pointer.
+ *
+ * @param val pointer
+ */
+#define PTR(val) OBJP(T_UINT, (void*)(val))
+
+/** Allocate some memory and return an sxpr containing it.
+ * Returns ONOMEM if allocation failed.
+ *
+ * @param n number of bytes to allocate
+ * @param ty typecode
+ * @return sxpr
+ */
+#define halloc(_n, _ty) OBJP(_ty, allocate(_n))
+
+/** Allocate an sxpr containing a pointer to the given type.
+ *
+ * @param _ctype type (uses sizeof to determine how many bytes to allocate)
+ * @param _tycode typecode
+ * @return sxpr, ONOMEM if allocation failed
+ */
+#define HALLOC(_ctype, _tycode) halloc(sizeof(_ctype), _tycode)
+
/* Recognizers for the various sxpr types. */
#define ATOMP(obj) has_type(obj, T_ATOM)
#define BOOLP(obj) has_type(obj, T_BOOL)
@@ -176,7 +268,7 @@ enum PrintFlags {
/* Conversions of sxprs to their values.
* No checking is done.
*/
-#define OBJ_STRING(x) ((char*)get_ptr(x))
+#define OBJ_STRING(x) ((ObjString*)get_ptr(x))
#define OBJ_CONS(x) ((ObjCons*)get_ptr(x))
#define OBJ_ATOM(x) ((ObjAtom*)get_ptr(x))
#define OBJ_SET(x) ((ObjSet*)get_ptr(x))
@@ -188,73 +280,6 @@ enum PrintFlags {
#define CDAR(x) (CDR(CAR(x)))
#define CDDR(x) (CDR(CDR(x)))
-/** Get the integer value from an sxpr.
- *
- * @param obj sxpr
- * @return value
- */
-static inline unsigned long get_ul(Sxpr obj){
- return obj.v.ul;
-}
-
-/** Get the pointer value from an sxpr.
- *
- * @param obj sxpr
- * @return value
- */
-static inline void * get_ptr(Sxpr obj){
- return obj.v.ptr;
-}
-
-/** Create an sxpr containing a pointer.
- *
- * @param type typecode
- * @param val pointer
- * @return sxpr
- */
-static inline Sxpr obj_ptr(TypeCode type, void *val){
- return (Sxpr){ type: type, v: { ptr: val } };
-}
-
-/** Create an sxpr containing an integer.
- *
- * @param type typecode
- * @param val integer
- * @return sxpr
- */
-static inline Sxpr obj_ul(TypeCode type, unsigned long val){
- return (Sxpr){ type: type, v: { ul: val } };
-}
-
-/** Get the type of an sxpr.
- *
- * @param obj sxpr
- * @return type
- */
-static inline TypeCode get_type(Sxpr obj){
- return obj.type;
-}
-
-/** Check the type of an sxpr.
- *
- * @param obj sxpr
- * @param type to check
- * @return 1 if has the type, 0 otherwise
- */
-static inline int has_type(Sxpr obj, TypeCode type){
- return get_type(obj) == type;
-}
-
-/** Compare sxprs for literal equality of type and value.
- *
- * @param x sxpr to compare
- * @param y sxpr to compare
- * @return 1 if equal, 0 otherwise
- */
-static inline int eq(Sxpr x, Sxpr y){
- return ((get_type(x) == get_type(y)) && (get_ul(x) == get_ul(y)));
-}
-
/** Checked version of CAR
*
* @param x sxpr
@@ -273,28 +298,10 @@ static inline Sxpr cdr(Sxpr x){
return (CONSP(x) ? CDR(x) : ONULL);
}
-/** Allocate some memory and return an sxpr containing it.
- * Returns ONOMEM if allocation failed.
- *
- * @param n number of bytes to allocate
- * @param ty typecode
- * @return sxpr
- */
-static inline Sxpr halloc(size_t n, TypeCode ty){
- return OBJP(ty, allocate(n));
-}
-
-/** Allocate an sxpr containing a pointer to the given type.
- *
- * @param ty type (uses sizeof to determine how many bytes to allocate)
- * @param code typecode
- * @return sxpr, ONOMEM if allocation failed
- */
-#define HALLOC(ty, code) halloc(sizeof(ty), code)
-
typedef int ObjPrintFn(IOStream *io, Sxpr obj, unsigned flags);
typedef int ObjEqualFn(Sxpr obj, Sxpr other);
typedef void ObjFreeFn(Sxpr obj);
+typedef Sxpr ObjCopyFn(Sxpr obj);
/** An sxpr type definition. */
typedef struct SxprType {
@@ -304,6 +311,7 @@ typedef struct SxprType {
ObjPrintFn *print;
ObjEqualFn *equal;
ObjFreeFn *free;
+ ObjCopyFn *copy;
} SxprType;
@@ -321,6 +329,7 @@ static inline void hfree(Sxpr x){
extern int objprint(IOStream *io, Sxpr x, unsigned flags);
extern int objequal(Sxpr x, Sxpr y);
extern void objfree(Sxpr x);
+extern Sxpr objcopy(Sxpr x);
extern void cons_free_cells(Sxpr obj);
extern Sxpr intern(char *s);
@@ -341,8 +350,10 @@ extern Sxpr cons_remove_if(Sxpr l, ObjEqualFn *test_fn, Sxpr v);
extern Sxpr atom_new(char *name);
extern char * atom_name(Sxpr obj);
+extern int atom_length(Sxpr obj);
extern Sxpr string_new(char *s);
+extern Sxpr string_new_n(char *s, int n);
extern char * string_string(Sxpr obj);
extern int string_length(Sxpr obj);
@@ -405,15 +416,20 @@ static inline Sxpr mkbool(int b){
#define k_true "true"
#define k_false "false"
-#define c_var '$'
#define c_escape '\\'
#define c_single_quote '\''
#define c_double_quote '"'
#define c_string_open c_double_quote
#define c_string_close c_double_quote
-#define c_data_open '['
-#define c_data_close ']'
-#define c_binary '*'
+
+#define c_data_open '<'
+#define c_data_quote '<'
+#define c_data_count '*'
+//#define c_data_open '['
+//#define c_data_close ']'
+//#define c_binary '*'
+
+#define c_var '$'
#define c_eval '!'
#define c_concat_open '{'
#define c_concat_close '}'
diff --git a/tools/libxutil/sxpr_parser.c b/tools/vnet/libxutil/sxpr_parser.c
index ba29c49f9c..f93d3505b2 100644
--- a/tools/libxutil/sxpr_parser.c
+++ b/tools/vnet/libxutil/sxpr_parser.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001 - 2004 Mike Wray <mike.wray@hp.com>
+ * Copyright (C) 2001 - 2005 Mike Wray <mike.wray@hp.com>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
@@ -26,6 +26,8 @@
# include <errno.h>
#endif
+#include "sys_net.h"
+
#include "iostream.h"
#include "lexis.h"
#include "sxpr_parser.h"
@@ -42,45 +44,25 @@
* @author Mike Wray <mike.wray@hpl.hp.com>
*/
+#ifdef DEBUG
#define dprintf(fmt, args...) IOStream_print(iostdout, "[DEBUG] %s" fmt, __FUNCTION__, ##args)
+#else
+#define dprintf(fmt, args...) do{ }while(0)
+#endif
+
#undef printf
#define printf(fmt, args...) IOStream_print(iostdout, fmt, ##args)
-static void reset(Parser *z);
-static int inputchar(Parser *p, char c);
-static int savechar(Parser *p, char c);
-extern void parse_error(Parser *in);
-extern void parse_error_id(Parser *in, ParseErrorId id);
-
-static int begin_start(Parser *p, char c);
static int state_start(Parser *p, char c);
-static int end_start(Parser *p);
-
-static int begin_comment(Parser *p, char c);
-static int state_comment(Parser *p, char c);
-static int end_comment(Parser *p);
-
-static int begin_string(Parser *p, char c);
-static int state_string(Parser *p, char c);
-static int end_string(Parser *p);
-static int state_escape(Parser *p, char c);
-static int state_octal(Parser *p, char c);
-static int state_hex(Parser *p, char c);
-
-static int begin_atom(Parser *p, char c);
-static int state_atom(Parser *p, char c);
-static int end_atom(Parser *p);
-
-static int state_list(Parser *p, char c);
-static int begin_list(Parser *p, char c);
-static int end_list(Parser *p);
+static int begin_start(Parser *p, char c);
+#if 0
/** Print a parse error.
*
* @param in parser
* @param msg format followed by printf arguments
*/
-void eprintf(Parser *in, char *msg, ...){
+static void eprintf(Parser *in, char *msg, ...){
va_list args;
if(in->error_out){
va_start(args, msg);
@@ -94,7 +76,7 @@ void eprintf(Parser *in, char *msg, ...){
* @param in parser
* @param msg format followed by printf arguments
*/
-void wprintf(Parser *in, char *msg, ...){
+static void wprintf(Parser *in, char *msg, ...){
va_list args;
if(in->error_out){
va_start(args, msg);
@@ -102,13 +84,15 @@ void wprintf(Parser *in, char *msg, ...){
va_end(args);
}
}
+#endif
+
/*============================================================================*/
/** Record defining the message for a parse error. */
typedef struct {
- ParseErrorId id;
- char *message;
+ ParseErrorId id;
+ char *message;
} ParseError;
/** Format for printing parse error messages. */
@@ -116,18 +100,191 @@ typedef struct {
/** Message catalog for the parse error codes. */
static ParseError catalog[] = {
- { PARSE_ERR_UNSPECIFIED, "unspecified error" },
- { PARSE_ERR_NOMEM, "out of memory" },
- { PARSE_ERR_UNEXPECTED_EOF, "unexpected end of input" },
- { PARSE_ERR_TOKEN_TOO_LONG, "token too long" },
- { PARSE_ERR_INVALID_SYNTAX, "syntax error" },
- { PARSE_ERR_INVALID_ESCAPE, "invalid escape" },
- { 0, NULL }
+ { PARSE_ERR_UNSPECIFIED, "unspecified error" },
+ { PARSE_ERR_NOMEM, "out of memory" },
+ { PARSE_ERR_UNEXPECTED_EOF, "unexpected end of input" },
+ { PARSE_ERR_TOKEN_TOO_LONG, "token too long" },
+ { PARSE_ERR_INVALID_SYNTAX, "syntax error" },
+ { PARSE_ERR_INVALID_ESCAPE, "invalid escape" },
+ { 0, NULL }
};
/** Number of entries in the message catalog. */
const static int catalog_n = sizeof(catalog)/sizeof(ParseError);
+/** Set the parser error stream.
+ * Parse errors are reported on the the error stream if it is non-null.
+ *
+ * @param z parser
+ * @param error_out error stream
+ */
+void Parser_set_error_stream(Parser *z, IOStream *error_out){
+ z->error_out = error_out;
+}
+
+/** Get the parser error message for an error code.
+ *
+ * @param id error code
+ * @return error message (empty string if the code is unknown)
+ */
+static char *get_message(ParseErrorId id){
+ int i;
+ for(i = 0; i < catalog_n; i++){
+ if(id == catalog[i].id){
+ return catalog[i].message;
+ }
+ }
+ return "";
+}
+
+#if 0
+/** Get the line number.
+ *
+ * @param in parser
+ */
+static int get_line(Parser *in){
+ return in->line_no;
+}
+
+/** Get the column number.
+ *
+ * @param in parser
+ */
+static int get_column(Parser *in){
+ return in->char_no;
+}
+#endif
+
+/** Get the line number the current token started on.
+ *
+ * @param in parser
+ */
+static int get_tok_line(Parser *in){
+ return in->tok_begin_line;
+}
+
+/** Get the column number the current token started on.
+ *
+ * @param in parser
+ */
+static int get_tok_column(Parser *in){
+ return in->tok_begin_char;
+}
+
+/** Return the current token.
+ * The return value points at the internal buffer, so
+ * it must not be modified (or freed). Use copy_token() if you need a copy.
+ *
+ * @param p parser
+ * @return token
+ */
+char *peek_token(Parser *p){
+ return p->tok;
+}
+
+int token_len(Parser *p){
+ return p->tok_end - p->tok;
+}
+
+/** Return a copy of the current token.
+ * The returned value should be freed when finished with.
+ *
+ * @param p parser
+ * @return copy of token
+ */
+char *copy_token(Parser *p){
+ int n = token_len(p);
+ char *buf = allocate(n + 1);
+ if(buf){
+ memcpy(buf, peek_token(p), n);
+ buf[n] = '\0';
+ }
+ return buf;
+}
+
+void new_token(Parser *p){
+ memset(p->buf, 0, p->buf_end - p->buf);
+ p->tok = p->buf;
+ p->tok_end = p->tok;
+ p->tok_begin_line = p->line_no;
+ p->tok_begin_char = p->char_no;
+}
+
+/** Report a parse error.
+ * Does nothing if the error stream is null or there is no error.
+ *
+ * @param in parser
+ */
+static void report_error(Parser *in){
+ if(in->error_out && in->err){
+ char *msg = get_message(in->err);
+ char *tok = peek_token(in);
+ IOStream_print(in->error_out, PARSE_ERR_FMT,
+ get_tok_line(in), get_tok_column(in), msg);
+ if(tok && tok[0]){
+ IOStream_print(in->error_out, " '%s'", tok);
+ }
+ IOStream_print(in->error_out, "\n");
+ }
+}
+
+/** Get the error message for the current parse error code.
+ * Does nothing if there is no error.
+ *
+ * @param in parser
+ * @param buf where to place the message
+ * @param n maximum number of characters to place in buf
+ * @return current error code (zero for no error)
+ */
+int Parser_error_message(Parser *in, char *buf, int n){
+ if(in->err){
+ char *msg = get_message(in->err);
+ snprintf(buf, n, PARSE_ERR_FMT, get_tok_line(in),
+ get_tok_column(in), msg);
+ }
+ return in->err;
+}
+
+/** Flag a parse error. All subsequent reads will fail.
+ * Does not change the parser error code if it is already set.
+ *
+ * @param in parser
+ * @param id error code
+ */
+int Parser_error_id(Parser *in, ParseErrorId id){
+ if(!in->err){
+ in->err = id;
+ report_error(in);
+ }
+ return -EINVAL;
+}
+
+/** Flag an unspecified parse error.
+ *
+ * @param in parser
+ */
+int Parser_error(Parser *in){
+ return Parser_error_id(in, PARSE_ERR_INVALID_SYNTAX);
+}
+
+/** Test if the parser's error flag is set.
+ *
+ * @param in parser
+ * @return 1 if set, 0 otherwise
+ */
+int Parser_has_error(Parser *in){
+ return (in->err > 0);
+}
+
+/** Test if the parser is at end of input.
+ *
+ * @param in parser
+ * @return 1 if at EOF, 0 otherwise
+ */
+int Parser_at_eof(Parser *p){
+ return p->eof;
+}
+
void ParserState_free(ParserState *z){
if(!z) return;
objfree(z->val);
@@ -136,21 +293,30 @@ void ParserState_free(ParserState *z){
int ParserState_new(ParserStateFn *fn, char *name,
ParserState *parent, ParserState **val){
- int err = 0;
+ int err = -ENOMEM;
ParserState *z;
z = ALLOCATE(ParserState);
- if(z){
- z->name = name;
- z->fn = fn;
- z->parent = parent;
- z->val = ONULL;
- } else {
- err = -ENOMEM;
- }
- if(!err) *val = z;
+ if(!z) goto exit;
+ z->name = name;
+ z->fn = fn;
+ z->parent = parent;
+ z->val = ONULL;
+ err = 0;
+ exit:
+ *val = (err ? NULL : z);
return err;
}
+void Parser_pop(Parser *p){
+ ParserState *s = p->state;
+ if(!s) return;
+ p->state = s->parent;
+ if (p->start_state == s) {
+ p->start_state = NULL;
+ }
+ ParserState_free(s);
+}
+
/** Free a parser.
* No-op if the parser is null.
*
@@ -158,22 +324,80 @@ int ParserState_new(ParserStateFn *fn, char *name,
*/
void Parser_free(Parser *z){
if(!z) return;
+ // Hmmm. Need to free states, but careful about double free of values.
+ while(z->state){
+ objfree(z->state->val);
+ Parser_pop(z);
+ }
+ if(z->buf) deallocate(z->buf);
objfree(z->val);
z->val = ONONE;
- if (z->buf)
- deallocate(z->buf);
deallocate(z);
}
+int Parser_push(Parser *p, ParserStateFn *fn, char *name){
+ return ParserState_new(fn, name, p->state, &p->state);
+}
+
+int Parser_return(Parser *p){
+ int err = 0;
+ Sxpr val = ONONE;
+ if(!p->state){
+ err = -EINVAL;
+ goto exit;
+ }
+ val = p->state->val;
+ p->state->val = ONONE;
+ Parser_pop(p);
+ if(p->state){
+ err = cons_push(&p->state->val, val);
+ } else {
+ val = nrev(val);
+ p->val = val;
+ }
+ exit:
+ if(err){
+ objfree(val);
+ }
+ return err;
+}
+
+/** Reset the fields of a parser to initial values.
+ *
+ * @param z parser
+ */
+static void reset(Parser *z){
+ // leave flags
+ // leave error_out
+ while(z->state){
+ Parser_pop(z);
+ }
+ z->val = ONONE;
+ z->eof = 0;
+ z->err = 0;
+ z->line_no = 1;
+ z->char_no = 0;
+ memset(z->buf, 0, z->buf_end - z->buf);
+ z->tok = z->buf;
+ z->tok_end = z->tok;
+ z->tok_begin_line = 0;
+ z->tok_begin_char = 0;
+ z->start_state = NULL;
+}
+
/** Create a new parser. The error stream defaults to null.
*/
Parser * Parser_new(void){
Parser *z = ALLOCATE(Parser);
+ int n = PARSER_BUF_SIZE;
int err = -ENOMEM;
if(!z) goto exit;
+ z->buf = allocate(n);
+ if(!z->buf) goto exit;
err = 0;
- z->buf = NULL;
+ z->buf_end = z->buf + n;
+ z->begin = begin_start;
reset(z);
exit:
if(err){
@@ -190,7 +414,7 @@ Parser * Parser_new(void){
* @param p parser
* @return error flag: 0 on success, non-zero on error
*/
-static int inputchar(Parser *p, char c){
+static int input_char(Parser *p, char c){
int err = 0;
if(c=='\n'){
p->line_no++;
@@ -201,103 +425,24 @@ static int inputchar(Parser *p, char c){
return err;
}
-static int savechar(Parser *p, char c){
+int save_char(Parser *p, char c){
int err = 0;
- if(p->buf_i >= p->buf_n){
- char *nbuf;
- nbuf = allocate(2 * (p->buf_n + 1));
- if (nbuf == NULL) {
+ if(p->tok_end >= p->buf_end){
+ int buf_n = (p->buf_end - p->buf) + PARSER_BUF_INCREMENT;
+ char *buf = allocate(buf_n);
+ if(!buf){
err = -ENOMEM;
goto exit;
}
- memcpy(nbuf, p->buf, p->buf_i);
+ memcpy(buf, p->buf, p->tok_end - p->buf);
+ p->buf_end = buf + buf_n;
+ p->tok = buf + (p->tok - p->buf);
+ p->tok_end = buf + (p->tok_end - p->buf);
deallocate(p->buf);
- p->buf = nbuf;
- p->buf_n = 2 * (p->buf_n + 1) - 1;
- }
- p->buf[p->buf_i] = c;
- p->buf_i++;
- exit:
- return err;
-}
-
-int Parser_input_char(Parser *p, char c){
- int err = 0;
- if(at_eof(p)){
- //skip;
- } else {
- inputchar(p, c);
- }
- if(!p->state){
- err = begin_start(p, c);
- if(err) goto exit;
- }
- err = p->state->fn(p, c);
- exit:
- return err;
-}
-
-int Parser_input_eof(Parser *p){
- int err = 0;
- p->eof = 1;
- err = Parser_input_char(p, IOSTREAM_EOF);
- return err;
-}
-
-int Parser_input(Parser *p, char *buf, int buf_n){
- int err = 0;
- int i = 0;
- if(buf_n <= 0){
- err = Parser_input_eof(p);
- goto exit;
- }
- for(i = 0; i<buf_n; i++){
- err = Parser_input_char(p, buf[i]);
- if(err) goto exit;
+ p->buf = buf;
}
+ *p->tok_end++ = c;
exit:
- err = (err < 0 ? err : buf_n);
- return err;
-}
-
-int Parser_push(Parser *p, ParserStateFn *fn, char *name){
- int err = 0;
- err = ParserState_new(fn, name, p->state, &p->state);
- return err;
-}
-
-int Parser_pop(Parser *p){
- int err = 0;
- ParserState *s = p->state;
- p->state = s->parent;
- if (p->start_state == s) {
- p->start_state = NULL;
- }
- ParserState_free(s);
- return err;
-}
-
-int Parser_return(Parser *p){
- int err = 0;
- Sxpr val = ONONE;
- if(!p->state){
- err = -EINVAL;
- goto exit;
- }
- val = p->state->val;
- p->state->val = ONONE;
- err = Parser_pop(p);
- if(err) goto exit;
- if(p->state){
- err = cons_push(&p->state->val, val);
- } else {
- val = nrev(val);
- p->val = val;
- }
- exit:
- if(err){
- objfree(val);
- }
return err;
}
@@ -311,30 +456,8 @@ static int is_separator(Parser *p, char c){
return in_sep_class(c);
}
-/** Return the current token.
- * The return value points at the internal buffer, so
- * it must not be modified (or freed). Use copy_token() if you need a copy.
- *
- * @param p parser
- * @return token
- */
-char *peek_token(Parser *p){
- return p->buf;
-}
-
-/** Return a copy of the current token.
- * The returned value should be freed when finished with.
- *
- * @param p parser
- * @return copy of token
- */
-char *copy_token(Parser *p){
- return strdup(peek_token(p));
-}
-
-static int do_intern(Parser *p){
+int Parser_set_value(Parser *p, Sxpr obj){
int err = 0;
- Sxpr obj = intern(peek_token(p));
if(NOMEMP(obj)){
err = -ENOMEM;
} else {
@@ -342,27 +465,33 @@ static int do_intern(Parser *p){
}
return err;
}
+
+int Parser_intern(Parser *p){
+ Sxpr obj = intern(peek_token(p));
+ return Parser_set_value(p, obj);
+}
-static int do_string(Parser *p){
- int err = 0;
- Sxpr obj;
- obj = string_new(peek_token(p));
- if(NOMEMP(obj)){
- err = -ENOMEM;
- } else {
- p->state->val = obj;
- }
- return err;
+int Parser_atom(Parser *p){
+ Sxpr obj = atom_new(peek_token(p));
+ return Parser_set_value(p, obj);
}
-void newtoken(Parser *p){
- memset(p->buf, 0, p->buf_n);
- p->buf_i = 0;
- p->tok_begin_line = p->line_no;
- p->tok_begin_char = p->char_no;
+int Parser_string(Parser *p){
+ Sxpr obj = string_new_n(peek_token(p), token_len(p));
+ return Parser_set_value(p, obj);
+}
+
+int Parser_data(Parser *p){
+ Sxpr obj = string_new_n(peek_token(p), token_len(p));
+ return Parser_set_value(p, obj);
}
-int get_escape(char c, char *d){
+int Parser_uint(Parser *p){
+ unsigned int x = htonl(*(unsigned int *)peek_token(p));
+ return Parser_set_value(p, OINT(x));
+}
+
+static int get_escape(char c, char *d){
int err = 0;
switch(c){
case 'a': *d = '\a'; break;
@@ -388,15 +517,18 @@ int Parser_ready(Parser *p){
Sxpr Parser_get_val(Parser *p){
Sxpr v = ONONE;
if(CONSP(p->val)){
- v = CAR(p->val);
- p->val = CDR(p->val);
- } else if (CONSP(p->start_state->val)){
+ } else if (p->start_state && CONSP(p->start_state->val)){
p->val = p->start_state->val;
p->val = nrev(p->val);
p->start_state->val = ONULL;
- v = CAR(p->val);
- p->val = CDR(p->val);
- }
+ } else {
+ goto exit;
+ }
+ Sxpr w = p->val;
+ v = CAR(w);
+ p->val = CDR(w);
+ hfree(w);
+ exit:
return v;
}
@@ -412,151 +544,51 @@ Sxpr Parser_get_all(Parser *p){
}
return v;
}
-
-int begin_start(Parser *p, char c){
- int err = 0;
- err = Parser_push(p, state_start, "start");
- if(err) goto exit;
- p->start_state = p->state;
- exit:
- return err;
-}
-int state_start(Parser *p, char c){
+static int state_comment(Parser *p, char c){
int err = 0;
- if(at_eof(p)){
- err = end_start(p);
- } else if(in_space_class(c)){
- //skip
- } else if(in_comment_class(c)){
- begin_comment(p, c);
- } else if(c == c_list_open){
- begin_list(p, c);
- } else if(c == c_list_close){
- parse_error(p);
- err = -EINVAL;
- } else if(in_string_quote_class(c)){
- begin_string(p, c);
- } else if(in_printable_class(c)){
- begin_atom(p, c);
- } else if(c == 0x04){
- //ctrl-D, EOT: end-of-text.
- Parser_input_eof(p);
+ if(c == '\n' || Parser_at_eof(p)){
+ Parser_pop(p);
} else {
- parse_error(p);
- err = -EINVAL;
+ err = input_char(p, c);
}
return err;
}
-int end_start(Parser *p){
- int err = 0;
- err = Parser_return(p);
- return err;
-}
-
-int begin_comment(Parser *p, char c){
+static int begin_comment(Parser *p, char c){
int err = 0;
err = Parser_push(p, state_comment, "comment");
if(err) goto exit;
- err = inputchar(p, c);
- exit:
- return err;
-}
-
-int state_comment(Parser *p, char c){
- int err = 0;
- if(c == '\n' || at_eof(p)){
- err = end_comment(p);
- } else {
- err = inputchar(p, c);
- }
- return err;
-}
-
-int end_comment(Parser *p){
- return Parser_pop(p);
-}
-
-int begin_string(Parser *p, char c){
- int err = 0;
- err = Parser_push(p, state_string, "string");
- if(err) goto exit;
- newtoken(p);
- p->state->delim = c;
+ err = input_char(p, c);
exit:
return err;
}
-int state_string(Parser *p, char c){
- int err = 0;
- if(at_eof(p)){
- parse_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
- err = -EINVAL;
- } else if(c == p->state->delim){
- err = end_string(p);
- } else if(c == '\\'){
- err = Parser_push(p, state_escape, "escape");
- } else {
- err = savechar(p, c);
- }
- return err;
-}
-
-int end_string(Parser *p){
+static int end_string(Parser *p){
int err = 0;
- err = do_string(p);
+ err = Parser_string(p);
if(err) goto exit;
err = Parser_return(p);
exit:
return err;
}
-int state_escape(Parser *p, char c){
- int err = 0;
- char d;
- if(at_eof(p)){
- parse_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
- err = -EINVAL;
- goto exit;
- }
- if(get_escape(c, &d) == 0){
- err = savechar(p, d);
- if(err) goto exit;
- err = Parser_pop(p);
- } else if(c == 'x'){
- p->state->fn = state_hex;
- p->state->ival = 0;
- p->state->count = 0;
- } else {
- p->state->fn = state_octal;
- p->state->ival = 0;
- p->state->count = 0;
- err = Parser_input_char(p, c);
- }
- exit:
- return err;
-}
-
-int octaldone(Parser *p){
+static int octaldone(Parser *p){
int err = 0;
char d = (char)(p->state->ival & 0xff);
- err = Parser_pop(p);
- if(err) goto exit;
+ Parser_pop(p);
err = Parser_input_char(p, d);
- exit:
return err;
}
-int octaldigit(Parser *p, char c){
+static int octaldigit(Parser *p, int d){
int err = 0;
p->state->ival *= 8;
- p->state->ival += c - '0';
+ p->state->ival += d;
p->state->count++;
if(err) goto exit;
if(p->state->ival < 0 || p->state->ival > 0xff){
- parse_error(p);
- err = -EINVAL;
+ err = Parser_error(p);
goto exit;
}
if(p->state->count == 3){
@@ -566,14 +598,13 @@ int octaldigit(Parser *p, char c){
return err;
}
-int state_octal(Parser *p, char c){
+static int state_octal(Parser *p, char c){
int err = 0;
- if(at_eof(p)){
- parse_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
- err = -EINVAL;
+ if(Parser_at_eof(p)){
+ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
goto exit;
} else if('0' <= c && c <= '7'){
- err = octaldigit(p, c);
+ err = octaldigit(p, c - '0');
} else {
err = octaldone(p);
if(err) goto exit;
@@ -583,25 +614,22 @@ int state_octal(Parser *p, char c){
return err;
}
-int hexdone(Parser *p){
+static int hexdone(Parser *p){
int err = 0;
char d = (char)(p->state->ival & 0xff);
- err = Parser_pop(p);
- if(err) goto exit;
+ Parser_pop(p);
err = Parser_input_char(p, d);
- exit:
return err;
}
-int hexdigit(Parser *p, char c, char d){
+static int hexdigit(Parser *p, int d){
int err = 0;
p->state->ival *= 16;
- p->state->ival += c - d;
+ p->state->ival += d;
p->state->count++;
if(err) goto exit;
if(p->state->ival < 0 || p->state->ival > 0xff){
- parse_error(p);
- err = -EINVAL;
+ err = Parser_error(p);
goto exit;
}
if(p->state->count == 2){
@@ -611,20 +639,19 @@ int hexdigit(Parser *p, char c, char d){
return err;
}
-int state_hex(Parser *p, char c){
+static int state_hex(Parser *p, char c){
int err = 0;
- if(at_eof(p)){
- parse_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
- err = -EINVAL;
+ if(Parser_at_eof(p)){
+ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
goto exit;
} else if('0' <= c && c <= '9'){
- err = hexdigit(p, c, '0');
+ err = hexdigit(p, c - '0');
} else if('A' <= c && c <= 'F'){
- err = hexdigit(p, c, 'A');
+ err = hexdigit(p, c - 'A' + 10);
} else if('a' <= c && c <= 'f'){
- err = hexdigit(p, c, 'a');
+ err = hexdigit(p, c - 'a' + 10);
} else if(p->state->count){
- err =hexdone(p);
+ err = hexdone(p);
if(err) goto exit;
Parser_input_char(p, c);
}
@@ -632,19 +659,67 @@ int state_hex(Parser *p, char c){
return err;
}
-int begin_atom(Parser *p, char c){
+static int state_escape(Parser *p, char c){
int err = 0;
- err = Parser_push(p, state_atom, "atom");
+ char d;
+ if(Parser_at_eof(p)){
+ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
+ goto exit;
+ }
+ if(get_escape(c, &d) == 0){
+ err = save_char(p, d);
+ if(err) goto exit;
+ Parser_pop(p);
+ } else if(c == 'x'){
+ p->state->fn = state_hex;
+ p->state->ival = 0;
+ p->state->count = 0;
+ } else {
+ p->state->fn = state_octal;
+ p->state->ival = 0;
+ p->state->count = 0;
+ err = Parser_input_char(p, c);
+ }
+ exit:
+ return err;
+}
+
+static int state_string(Parser *p, char c){
+ int err = 0;
+ if(Parser_at_eof(p)){
+ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
+ } else if(c == p->state->delim){
+ err = end_string(p);
+ } else if(c == '\\'){
+ err = Parser_push(p, state_escape, "escape");
+ } else {
+ err = save_char(p, c);
+ }
+ return err;
+}
+
+static int begin_string(Parser *p, char c){
+ int err = 0;
+ err = Parser_push(p, state_string, "string");
+ if(err) goto exit;
+ new_token(p);
+ p->state->delim = c;
+ exit:
+ return err;
+}
+
+static int end_atom(Parser *p){
+ int err = 0;
+ err = Parser_atom(p);
if(err) goto exit;
- newtoken(p);
- err = savechar(p, c);
+ err = Parser_return(p);
exit:
return err;
}
-int state_atom(Parser *p, char c){
+static int state_atom(Parser *p, char c){
int err = 0;
- if(at_eof(p)){
+ if(Parser_at_eof(p)){
err = end_atom(p);
} else if(is_separator(p, c) ||
in_space_class(c) ||
@@ -653,266 +728,232 @@ int state_atom(Parser *p, char c){
if(err) goto exit;
err = Parser_input_char(p, c);
} else {
- err = savechar(p, c);
+ err = save_char(p, c);
}
exit:
return err;
}
-int end_atom(Parser *p){
+static int begin_atom(Parser *p, char c){
int err = 0;
- err = do_intern(p);
+ err = Parser_push(p, state_atom, "atom");
if(err) goto exit;
- err = Parser_return(p);
+ new_token(p);
+ err = save_char(p, c);
exit:
return err;
}
-int state_list(Parser *p, char c){
+static int end_data(Parser *p){
int err = 0;
- if(at_eof(p)){
- parse_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
- err = -EINVAL;
- } else if(c == c_list_close){
- p->state->val = nrev(p->state->val);
- err = end_list(p);
- } else {
- err = state_start(p, c);
- }
+ err = Parser_data(p);
+ if(err) goto exit;
+ err = Parser_return(p);
+ exit:
return err;
-
-}
-
-int begin_list(Parser *p, char c){
- return Parser_push(p, state_list, "list");
-}
-
-int end_list(Parser *p){
- return Parser_return(p);
}
-/** Reset the fields of a parser to initial values.
- *
- * @param z parser
- */
-static void reset(Parser *z){
- IOStream *error_out = z->error_out;
- int flags = z->flags;
- int buf_n = z->buf_n;
- char *buf = z->buf;
- memzero(z, sizeof(Parser));
- if (buf) {
- z->buf = buf;
- z->buf_n = buf_n;
- } else {
- z->buf = (char *)allocate(PARSER_BUF_SIZE);
- z->buf_n = PARSER_BUF_SIZE - 1;
- }
- z->buf_i = 0;
- z->line_no = 1;
- z->char_no = 0;
- z->error_out = error_out;
- z->flags = flags;
-}
-
-/** Set the parser error stream.
- * Parse errors are reported on the the error stream if it is non-null.
- *
- * @param z parser
- * @param error_out error stream
- */
-void set_error_stream(Parser *z, IOStream *error_out){
- if(z){
- z->error_out = error_out;
- }
-}
-
-/** Get the parser error message for an error code.
- *
- * @param id error code
- * @return error message (empty string if the code is unknown)
- */
-static char *get_message(ParseErrorId id){
- int i;
- for(i=0; i<catalog_n; i++){
- if(id == catalog[i].id){
- return catalog[i].message;
+static int counted_data(Parser *p, char c){
+ int err = 0;
+ err = save_char(p, c);
+ if(err) goto exit;
+ if(token_len(p) == p->state->count){
+ err = end_data(p);
}
- }
- return "";
-}
-
-/** Get the line number.
- *
- * @param in parser
- */
-int get_line(Parser *in){
- return in->line_no;
-}
-
-/** Get the column number.
- *
- * @param in parser
- */
-int get_column(Parser *in){
- return in->char_no;
+ exit:
+ return err;
}
-/** Get the line number the current token started on.
- *
- * @param in parser
- */
-int get_tok_line(Parser *in){
- return in->tok_begin_line;
+static int counted_data_count(Parser *p, char c){
+ int err = 0;
+ if(c == p->state->delim){
+ new_token(p);
+ p->state->count = p->state->ival;
+ p->state->fn = counted_data;
+ } else if('0' <= c && c <= '9'){
+ p->state->ival *= 10;
+ p->state->ival += c - '0';
+ } else {
+ err = -EINVAL;
+ }
+ return err;
}
-/** Get the column number the current token started on.
- *
- * @param in parser
- */
-int get_tok_column(Parser *in){
- return in->tok_begin_char;
+static int quoted_data(Parser *p, char c){
+ int err = 0;
+ int count = p->state->count;
+ err = save_char(p, c);
+ if(err) goto exit;
+ // Check that buf is longer than delim and
+ // ends with delim. If so, trim delim off and return.
+ if((token_len(p) >= count) &&
+ !memcmp(p->tok_end - count, p->buf, count)){
+ p->tok_end -= count;
+ end_data(p);
+ }
+ exit:
+ return err;
}
-/** Report a parse error.
- * Does nothing if the error stream is null or there is no error.
- *
- * @param in parser
- */
-static void report_error(Parser *in){
- if(in->error_out && in->err){
- char *msg = get_message(in->err);
- char *tok = peek_token(in);
- IOStream_print(in->error_out, PARSE_ERR_FMT,
- get_tok_line(in), get_tok_column(in), msg);
- if(tok && tok[0]){
- IOStream_print(in->error_out, " '%s'", tok);
+static int quoted_data_delim(Parser *p, char c){
+ // Saves the delim in the token buffer.
+ int err = 0;
+ err = save_char(p, c);
+ if(err) goto exit;
+ if(c == p->state->delim){
+ p->state->fn = quoted_data;
+ p->state->count = token_len(p);
+ // Advance the token pointer past the delim.
+ p->tok = p->tok_end;
}
- IOStream_print(in->error_out, "\n");
- }
+ exit:
+ return err;
}
-/** Get the error message for the current parse error code.
- * Does nothing if there is no error.
- *
- * @param in parser
- * @param buf where to place the message
- * @param n maximum number of characters to place in buf
- * @return current error code (zero for no error)
- */
-int parse_error_message(Parser *in, char *buf, int n){
- if(in->err){
- char *msg = get_message(in->err);
- snprintf(buf, n, PARSE_ERR_FMT, get_tok_line(in), get_tok_column(in), msg);
+static int state_data(Parser *p, char c){
+ // Quoted data:
+ // <<delim< anything not containing delimiter<delim<
+ // Where 'delim' is anything not containing '<'.
+ // Counted data:
+ // <*nnn..* N bytes
+ // Where nnn... is N in decimal (
+ int err = 0;
+ switch(c){
+ case c_data_count:
+ p->state->delim = c;
+ p->state->fn = counted_data_count;
+ p->state->ival = 0;
+ new_token(p);
+ break;
+ case c_data_quote:
+ p->state->delim = c;
+ p->state->fn = quoted_data_delim;
+ new_token(p);
+ err = save_char(p, c);
+ break;
+ default:
+ err = Parser_error(p);
+ break;
}
- return in->err;
+ return err;
}
-/** Flag an unspecified parse error. All subsequent reads will fail.
- *
- * @param in parser
- */
-void parse_error(Parser *in){
- parse_error_id(in, PARSE_ERR_INVALID_SYNTAX);
+static int begin_data(Parser *p, char c){
+ int err = 0;
+ err = Parser_push(p, state_data, "data");
+ if(err) goto exit;
+ new_token(p);
+ exit:
+ return err;
}
-/** Flag a parse error. All subsequent reads will fail.
- * Does not change the parser error code if it is already set.
- *
- * @param in parser
- * @param id error code
- */
-void parse_error_id(Parser *in, ParseErrorId id){
- if(!in->err){
- in->err = id;
- report_error(in);
+static int state_list(Parser *p, char c){
+ int err = 0;
+ dprintf(">\n");
+ if(Parser_at_eof(p)){
+ err = Parser_error_id(p, PARSE_ERR_UNEXPECTED_EOF);
+ } else if(c == c_list_close){
+ p->state->val = nrev(p->state->val);
+ err = Parser_return(p);
+ } else {
+ err = state_start(p, c);
}
+ dprintf("< err=%d\n", err);
+ return err;
+
}
-/** Test if the parser's error flag is set.
- *
- * @param in parser
- * @return 1 if set, 0 otherwise
- */
-int has_error(Parser *in){
- return (in->err > 0);
-}
-
-/** Test if the parser is at end of input.
- *
- * @param in parser
- * @return 1 if at EOF, 0 otherwise
- */
-int at_eof(Parser *p){
- return p->eof;
+static int begin_list(Parser *p, char c){
+ return Parser_push(p, state_list, "list");
}
-#ifdef SXPR_PARSER_MAIN
-/* Stuff for standalone testing. */
-
-#include "file_stream.h"
-#include "string_stream.h"
-
-extern int stringof(Sxpr exp, char **s);
-int child_string(Sxpr exp, Sxpr key, char **s){
+static int state_start(Parser *p, char c){
int err = 0;
- Sxpr val = sxpr_child_value(exp, key, ONONE);
- err = stringof(val, s);
+ dprintf(">\n");
+ if(Parser_at_eof(p)){
+ err = Parser_return(p);
+ } else if(in_space_class(c)){
+ //skip
+ } else if(in_comment_class(c)){
+ begin_comment(p, c);
+ } else if(c == c_list_open){
+ begin_list(p, c);
+ } else if(c == c_list_close){
+ err = Parser_error(p);
+ } else if(in_string_quote_class(c)){
+ begin_string(p, c);
+ } else if(c == c_data_open){
+ begin_data(p, c);
+ } else if(in_printable_class(c)){
+ begin_atom(p, c);
+ } else if(c == 0x04){
+ //ctrl-D, EOT: end-of-text.
+ Parser_input_eof(p);
+ } else {
+ err = Parser_error(p);
+ }
+ dprintf("< err=%d\n", err);
return err;
}
-extern int intof(Sxpr exp, int *v);
-int child_int(Sxpr exp, Sxpr key, int *v){
+int begin_start(Parser *p, char c){
int err = 0;
- Sxpr val = sxpr_child_value(exp, key, ONONE);
- err = intof(val, v);
+ dprintf(">\n");
+ err = Parser_push(p, state_start, "start");
+ if(err) goto exit;
+ p->start_state = p->state;
+ exit:
+ dprintf("< err=%d\n", err);
return err;
}
-int eval_vnet(Sxpr exp){
+int Parser_input_char(Parser *p, char c){
int err = 0;
- Sxpr oid = intern("id");
- int id;
- err = child_int(exp, oid, &id);
- if(err) goto exit;
- dprintf("> vnet id=%d\n", id);
- exit:
- dprintf("< err=%d\n", err);
+ if(Parser_at_eof(p)){
+ //skip;
+ } else {
+ input_char(p, c);
+ }
+ if(!p->state){
+ err = p->begin(p, c);
+ if(err) goto exit;
+ }
+ err = p->state->fn(p, c);
+ exit:
return err;
}
-int eval_connect(Sxpr exp){
+int Parser_input_eof(Parser *p){
int err = 0;
- Sxpr ovif = intern("vif");
- Sxpr ovnet = intern("vnet");
- char *vif;
- int vnet;
-
- err = child_string(exp, ovif, &vif);
- if(err) goto exit;
- err = child_int(exp, ovnet, &vnet);
- if(err) goto exit;
- dprintf("> connect vif=%s vnet=%d\n", vif, vnet);
- exit:
- dprintf("< err=%d\n", err);
+ p->eof = 1;
+ err = Parser_input_char(p, IOSTREAM_EOF);
return err;
}
-int eval(Sxpr exp){
+int Parser_input(Parser *p, char *buf, int buf_n){
int err = 0;
- Sxpr oconnect = intern("connect");
- Sxpr ovnet = intern("vnet");
-
- if(sxpr_elementp(exp, ovnet)){
- err = eval_vnet(exp);
- } else if(sxpr_elementp(exp, oconnect)){
- err = eval_connect(exp);
- } else {
- err = -EINVAL;
+ int i = 0;
+ dprintf("> |%s|\n", buf);
+ if(buf_n <= 0){
+ err = Parser_input_eof(p);
+ goto exit;
+ }
+ for(i = 0; i < buf_n; i++){
+ err = Parser_input_char(p, buf[i]);
+ if(err) goto exit;
}
+ exit:
+ err = (err < 0 ? err : buf_n);
+ dprintf("< err=%d\n", err);
return err;
}
+#ifdef SXPR_PARSER_MAIN
+/* Stuff for standalone testing. */
+
+#include "file_stream.h"
+//#include "string_stream.h"
+
/** Main program for testing.
* Parses input and prints it.
*
@@ -926,14 +967,16 @@ int main(int argc, char *argv[]){
char buf[1024];
int k;
Sxpr obj;
- //Sxpr l, x;
int i = 0;
pin = Parser_new();
- set_error_stream(pin, iostdout);
+ Parser_set_error_stream(pin, iostdout);
dprintf("> parse...\n");
while(1){
- k = fread(buf, 1, 1, stdin);
+ k = fread(buf, 1, 100, stdin);
+ if(k>=0){
+ buf[k+1] = '\0';
+ }
err = Parser_input(pin, buf, k);
while(Parser_ready(pin)){
obj = Parser_get_val(pin);
@@ -942,12 +985,6 @@ int main(int argc, char *argv[]){
}
if(k <= 0) break;
}
-/* obj = Parser_get_all(pin); */
-/* for(l = obj ; CONSP(l); l = CDR(l)){ */
-/* x = CAR(l); */
-/* objprint(iostdout, x, 0); printf("\n"); */
-/* eval(x); */
-/* } */
dprintf("> err=%d\n", err);
return 0;
}
diff --git a/tools/libxutil/sxpr_parser.h b/tools/vnet/libxutil/sxpr_parser.h
index a47554633d..591ed95572 100644
--- a/tools/libxutil/sxpr_parser.h
+++ b/tools/vnet/libxutil/sxpr_parser.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001 - 2004 Mike Wray <mike.wray@hp.com>
+ * Copyright (C) 2001 - 2005 Mike Wray <mike.wray@hp.com>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
@@ -25,10 +25,13 @@
* Sxpr parsing definitions.
*/
-/** Size of a parser input buffer.
- * Tokens read must fit into this size (including trailing null).
+/** Initial size of a parser input buffer.
*/
-#define PARSER_BUF_SIZE 1024
+#define PARSER_BUF_SIZE 512
+
+/** Input buffer size increment (when it's full).
+ */
+#define PARSER_BUF_INCREMENT 512
struct Parser;
typedef int ParserStateFn(struct Parser *, char c);
@@ -43,13 +46,14 @@ typedef struct ParserState {
char *name;
} ParserState;
-/** Structure representing an input source for the parser.
- * Can read from any IOStream implementation.
- */
typedef struct Parser {
+ /** Initial state function. */
+ ParserStateFn *begin;
+ /** Parse value. */
Sxpr val;
/** Error reporting stream (null for no reports). */
IOStream *error_out;
+ /** End-of-file flag, */
int eof;
/** Error flag. Non-zero if there has been a read error. */
int err;
@@ -57,13 +61,11 @@ typedef struct Parser {
int line_no;
/** Column number of input (reset on new line). */
int char_no;
- /** Lookahead character. */
- char c;
/** Buffer for reading tokens. */
char *buf;
- /** Size of token buffer. */
- int buf_n;
- int buf_i;
+ char *buf_end;
+ char *tok;
+ char *tok_end;
/** Line the last token started on. */
int tok_begin_line;
/** Character number the last token started on. */
@@ -95,7 +97,7 @@ typedef enum {
* @param in parser
* @param flags flags mask
*/
-inline static void parser_flags_raise(Parser *in, int flags){
+inline static void Parser_flags_raise(Parser *in, int flags){
in->flags |= flags;
}
@@ -104,7 +106,7 @@ inline static void parser_flags_raise(Parser *in, int flags){
* @param in parser
* @param flags flags mask
*/
-inline static void parser_flags_lower(Parser *in, int flags){
+inline static void Parser_flags_lower(Parser *in, int flags){
in->flags &= ~flags;
}
@@ -112,7 +114,7 @@ inline static void parser_flags_lower(Parser *in, int flags){
*
* @param in parser
*/
-inline static void parser_flags_clear(Parser *in){
+inline static void Parser_flags_clear(Parser *in){
in->flags = 0;
}
@@ -121,14 +123,32 @@ extern Parser * Parser_new(void);
extern int Parser_input(Parser *p, char *buf, int buf_n);
extern int Parser_input_eof(Parser *p);
extern int Parser_input_char(Parser *p, char c);
-extern void set_error_stream(Parser *z, IOStream *error_out);
-
-extern int parse_error_message(Parser *in, char *buf, int n);
-extern int has_error(Parser *in);
-extern int at_eof(Parser *in);
-
-int Parser_ready(Parser *p);
-Sxpr Parser_get_val(Parser *p);
-Sxpr Parser_get_all(Parser *p);
+extern void Parser_set_error_stream(Parser *z, IOStream *error_out);
+
+extern int Parser_error_message(Parser *in, char *buf, int n);
+extern int Parser_has_error(Parser *in);
+extern int Parser_at_eof(Parser *in);
+
+extern int Parser_ready(Parser *p);
+extern Sxpr Parser_get_val(Parser *p);
+extern Sxpr Parser_get_all(Parser *p);
+
+/* Internal parser api. */
+void Parser_pop(Parser *p);
+int Parser_push(Parser *p, ParserStateFn *fn, char *name);
+int Parser_return(Parser *p);
+int Parser_at_eof(Parser *p);
+int Parser_error(Parser *in);
+int Parser_set_value(Parser *p, Sxpr val);
+int Parser_intern(Parser *p);
+int Parser_string(Parser *p);
+int Parser_data(Parser *p);
+int Parser_uint(Parser *p);
+
+char *peek_token(Parser *p);
+char *copy_token(Parser *p);
+void new_token(Parser *p);
+int save_char(Parser *p, char c);
+int token_len(Parser *p);
#endif /* ! _XUTIL_SXPR_PARSER_H_ */
diff --git a/tools/libxutil/sys_net.c b/tools/vnet/libxutil/sys_net.c
index 41436019b5..41436019b5 100644
--- a/tools/libxutil/sys_net.c
+++ b/tools/vnet/libxutil/sys_net.c
diff --git a/tools/libxutil/sys_net.h b/tools/vnet/libxutil/sys_net.h
index 61754940f2..61754940f2 100644
--- a/tools/libxutil/sys_net.h
+++ b/tools/vnet/libxutil/sys_net.h
diff --git a/tools/libxutil/sys_string.c b/tools/vnet/libxutil/sys_string.c
index 13a90dfd7d..22a8ae3688 100644
--- a/tools/libxutil/sys_string.c
+++ b/tools/vnet/libxutil/sys_string.c
@@ -49,6 +49,27 @@ inline static const char * convert_set_base(const char *s, int *base){
return s;
}
+/** Set the sign to use for converting a string to a number.
+ * Value is 1 for positive, -1 for negative.
+ *
+ * @param s input string
+ * @param sign where to put the sign
+ * @return rest of s to parse as a number
+ */
+inline static const char * convert_set_sign(const char *s, int *sign){
+ *sign = 1;
+ if(s){
+ if(*s == '+'){
+ *sign = 1;
+ s++;
+ } else if (*s == '-'){
+ *sign = -1;
+ s++;
+ }
+ }
+ return s;
+}
+
/** Get the numerical value of a digit in the given base.
*
* @param c digit character
@@ -103,6 +124,40 @@ int convert_atoul(const char *str, unsigned long *val){
return err;
}
+/** Convert a string to a long by parsing it as a number.
+ * Will accept hex or decimal in usual C syntax.
+ *
+ * @param str input string
+ * @param val where to put the result
+ * @return 0 if converted OK, negative otherwise
+ */
+int convert_atol(const char *str, long *val){
+ int err = 0;
+ unsigned long v = 0;
+ int base, sign = 1;
+ const char *s = str;
+
+ if(!s) {
+ err = -EINVAL;
+ goto exit;
+ }
+ s = convert_set_sign(s, &sign);
+ s = convert_set_base(s, &base);
+ for( ; !err && *s; s++){
+ int digit = convert_get_digit(*s, base);
+ if(digit<0){
+ err = -EINVAL;
+ goto exit;
+ }
+ v *= base;
+ v += digit;
+ }
+ if(sign < 0) v = -v;
+ exit:
+ *val = (err ? 0 : v);
+ return err;
+}
+
/** Combine a directory path with a relative path to produce
* a new path.
*
diff --git a/tools/libxutil/sys_string.h b/tools/vnet/libxutil/sys_string.h
index ea60401168..88d9d8db61 100644
--- a/tools/libxutil/sys_string.h
+++ b/tools/vnet/libxutil/sys_string.h
@@ -86,6 +86,7 @@ static inline size_t strnlen(const char *s, size_t n){
/*============================================================================*/
extern int convert_atoul(const char *s, unsigned long *v);
+extern int convert_atol(const char *s, long *v);
extern int path_concat(char *s, char *t, char **val);
#endif /* !_XUTIL_SYS_STRING_H_ */
diff --git a/tools/libxutil/util.c b/tools/vnet/libxutil/util.c
index 0ac388b3b8..0ac388b3b8 100644
--- a/tools/libxutil/util.c
+++ b/tools/vnet/libxutil/util.c
diff --git a/tools/libxutil/util.h b/tools/vnet/libxutil/util.h
index b4a170512f..b4a170512f 100644
--- a/tools/libxutil/util.h
+++ b/tools/vnet/libxutil/util.h
diff --git a/tools/vnet/vnet-module/Makefile-2.6 b/tools/vnet/vnet-module/Makefile-2.6
index 64e57ea5ff..053391e572 100644
--- a/tools/vnet/vnet-module/Makefile-2.6
+++ b/tools/vnet/vnet-module/Makefile-2.6
@@ -38,8 +38,8 @@ module modules:
.PHONY: install install-module modules_install
install install-module modules_install: module
- install -m 0755 -d $(DESTDIR)$(KERNEL_MODULE_DIR)
- install -m 0554 $(KERNEL_MODULE) $(DESTDIR)$(KERNEL_MODULE_DIR)
+ install -m 0755 -d $(DESTDIR)$(KERNEL_MODULE_DIR)/xen
+ install -m 0554 $(KERNEL_MODULE) $(DESTDIR)$(KERNEL_MODULE_DIR)/xen
.PHONY: clean
clean:
diff --git a/tools/vnet/vnet-module/Makefile.vnet b/tools/vnet/vnet-module/Makefile.vnet
index 366c2fc9b9..2bc07b8f92 100644
--- a/tools/vnet/vnet-module/Makefile.vnet
+++ b/tools/vnet/vnet-module/Makefile.vnet
@@ -24,7 +24,7 @@ else
SRC_DIR=$(src)/
endif
-LIB_DIR := $(SRC_DIR)../../libxutil
+LIB_DIR := $(SRC_DIR)../libxutil
VNET_SRC :=
VNET_SRC += esp.c
diff --git a/tools/vnet/vnet-module/if_varp.h b/tools/vnet/vnet-module/if_varp.h
index a9a7438a7e..c4b752ac49 100644
--- a/tools/vnet/vnet-module/if_varp.h
+++ b/tools/vnet/vnet-module/if_varp.h
@@ -36,7 +36,7 @@ typedef struct VnetMsgHdr {
} __attribute__((packed)) VnetMsgHdr;
typedef struct VarpHdr {
- VnetMsgHdr hdr;
+ VnetMsgHdr vnetmsghdr;
uint32_t vnet;
Vmac vmac;
uint32_t addr;
@@ -50,4 +50,4 @@ typedef struct VarpHdr {
-#endif /* ! _VNET_IF_VARP_H */
+#endif /* ! _VNET_IF_VARP_H */
diff --git a/tools/vnet/vnet-module/varp.c b/tools/vnet/vnet-module/varp.c
index 1e44a915b7..f7bdf81d97 100644
--- a/tools/vnet/vnet-module/varp.c
+++ b/tools/vnet/vnet-module/varp.c
@@ -368,8 +368,8 @@ int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,
// Varp header.
varph = (void*)skb_put(skbout, varp_n);
*varph = (VarpHdr){};
- varph->hdr.id = htons(VARP_ID);
- varph->hdr.opcode = htons(opcode);
+ varph->vnetmsghdr.id = htons(VARP_ID);
+ varph->vnetmsghdr.opcode = htons(opcode);
varph->vnet = htonl(vnet);
varph->vmac = *vmac;
varph->addr = saddr;
@@ -1076,9 +1076,9 @@ int varp_handle_message(struct sk_buff *skb){
goto exit;
}
mine = 1;
- if(varph->hdr.id != htons(VARP_ID)){
+ if(varph->vnetmsghdr.id != htons(VARP_ID)){
// It's not varp at all - ignore it.
- wprintf("> Unknown id: %d \n", ntohs(varph->hdr.id));
+ wprintf("> Unknown id: %d \n", ntohs(varph->vnetmsghdr.id));
goto exit;
}
if(1){
@@ -1086,13 +1086,13 @@ int varp_handle_message(struct sk_buff *skb){
NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr));
dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest));
dprintf("> opcode=%d vnet=%u vmac=" MACFMT " addr=" IPFMT "\n",
- ntohs(varph->hdr.opcode),
+ ntohs(varph->vnetmsghdr.opcode),
ntohl(varph->vnet),
MAC6TUPLE(varph->vmac.mac),
NIPQUAD(varph->addr));
varp_dprint();
}
- switch(ntohs(varph->hdr.opcode)){
+ switch(ntohs(varph->vnetmsghdr.opcode)){
case VARP_OP_REQUEST:
err = varp_handle_request(skb, varph);
break;
@@ -1100,7 +1100,7 @@ int varp_handle_message(struct sk_buff *skb){
err = varp_handle_announce(skb, varph);
break;
default:
- wprintf("> Unknown opcode: %d \n", ntohs(varph->hdr.opcode));
+ wprintf("> Unknown opcode: %d \n", ntohs(varph->vnetmsghdr.opcode));
break;
}
exit:
diff --git a/tools/vnet/vnetd/Makefile b/tools/vnet/vnetd/Makefile
index 3783fa3833..a7afa94384 100644
--- a/tools/vnet/vnetd/Makefile
+++ b/tools/vnet/vnetd/Makefile
@@ -25,7 +25,7 @@ include $(XEN_ROOT)/tools/Rules.mk
VNETD_INSTALL_DIR = /usr/sbin
-LIB_DIR = $(XEN_LIBXUTIL)
+LIB_DIR = ../libxutil
VNET_DIR = ../vnet-module
INCLUDES += -I$(LIB_DIR)
diff --git a/tools/vnet/vnetd/vcache.c b/tools/vnet/vnetd/vcache.c
index cd06988236..1ea81ba292 100644
--- a/tools/vnet/vnetd/vcache.c
+++ b/tools/vnet/vnetd/vcache.c
@@ -102,11 +102,11 @@ int varp_send(Conn *conn, uint16_t opcode, uint32_t vnet, Vmac *vmac, uint32_t a
int varp_n = sizeof(VarpHdr);
VarpHdr varph = {};
- varph.id = htons(VARP_ID);
- varph.opcode = htons(opcode);
- varph.vnet = vnet;
- varph.vmac = *vmac;
- varph.addr = addr;
+ varph.vnetmsghdr.id = htons(VARP_ID);
+ varph.vnetmsghdr.opcode = htons(opcode);
+ varph.vnet = vnet;
+ varph.vmac = *vmac;
+ varph.addr = addr;
if(0){
struct sockaddr_in self;
@@ -503,7 +503,7 @@ void VarpCache_sweep(VarpCache *z, int all){
* @param local whether it's local or not
*/
void vcache_forward_varp(VarpHdr *varph, int local){
- uint16_t opcode = ntohs(varph->opcode);
+ uint16_t opcode = ntohs(varph->vnetmsghdr.opcode);
if(local){
ConnList *l;
for(l = vnetd->connections; l; l = l->next){
@@ -611,7 +611,7 @@ int vcache_handle_message(IPMessage *msg, int local){
dprintf("> opcode=%d vnet=%u vmac=" MACFMT "\n",
ntohs(varph->opcode), ntohl(varph->vnet), MAC6TUPLE(varph->vmac.mac));
}
- switch(ntohs(varph->opcode)){
+ switch(ntohs(varph->vnetmsghdr.opcode)){
case VARP_OP_REQUEST:
err = vcache_handle_request(msg, varph, local);
break;
diff --git a/tools/x2d2/Makefile b/tools/x2d2/Makefile
deleted file mode 100644
index 43f6964cae..0000000000
--- a/tools/x2d2/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-XEN_ROOT=../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-CC = gcc
-CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
-
-CFLAGS += -I $(XEN_XC)
-CFLAGS += -I $(XEN_LIBXC)
-CFLAGS += -I $(XEN_LIBXUTIL)
-
-HDRS = $(wildcard *.h)
-OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
-
-BIN = minixend
-
-all: $(BIN)
-
-clean:
- $(RM) *.a *.so *.o *.rpm $(BIN)
-
-$(BIN): $(OBJS)
- $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -lxc -lxutil -lpthread
diff --git a/tools/x2d2/cntrl_con.c b/tools/x2d2/cntrl_con.c
deleted file mode 100644
index 46084dbdee..0000000000
--- a/tools/x2d2/cntrl_con.c
+++ /dev/null
@@ -1,457 +0,0 @@
-#define _GNU_SOURCE
-
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <assert.h>
-#include <ctype.h>
-#include <err.h>
-#include <errno.h>
-#include <pthread.h>
-#include <netinet/in.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "minixend.h"
-
-struct command {
- const char *name;
- void (*func)(struct open_connection *oc, const struct command *c,
- const char *, const char *);
-};
-
-static void
-domain_created(const char *name, int mem_kb, int domid)
-{
- struct domain *d;
- d = xmalloc(sizeof(*d));
- d->domid = domid;
- d->name = xstrdup(name);
- d->mem_kb = mem_kb;
- d->state = DOM_STATE_CREATED;
- d->control_evtchn = -1; /* Not connected yet. */
-
- memcpy(d->netif_mac, "\xaa\x00\x00\x02\x00\x00", 6);
- d->netif_mac[5] = d->domid;
-
- pthread_mutex_init(&d->mux, NULL);
- pthread_cond_init(&d->cond, NULL);
- pthread_create(&d->thread, NULL, domain_thread_func, d);
-
- list_insert_after(&d->domain_list, &head_domain);
-}
-
-static struct domain *
-find_domain(int domain_id)
-{
- struct domain *d;
-
- foreach_domain(d) {
- if (d->domid == domain_id)
- return d;
- }
- return NULL;
-}
-
-static int
-free_event_port(struct domain *d, int port)
-{
- if (d == NULL)
- return xc_evtchn_close(xc_handle, DOMID_SELF, port);
- else
- return xc_evtchn_close(xc_handle, d->domid, port);
-}
-
-static char *
-readline(struct open_connection *oc)
-{
- char *end;
- char *res;
- int line_length;
-
- if (oc->state == OC_STATE_ERROR)
- return NULL;
-
- end = memchr(oc->buf, '\r', oc->buf_used);
- assert(end != NULL);
- line_length = end - oc->buf;
-
- res = xmalloc(line_length + 1);
- memcpy(res, oc->buf, line_length);
- res[line_length] = 0;
- memmove(oc->buf, oc->buf + line_length + 2,
- oc->buf_used - line_length - 2);
-
- oc->buf_used -= line_length + 2;
-
- if (memchr(oc->buf, '\n', oc->buf_used))
- oc->state = OC_STATE_COMMAND_PENDING;
- else
- oc->state = OC_STATE_CONNECTED;
-
- return res;
-}
-
-static unsigned long
-find_domain_shared_info_mfn(struct domain *d)
-{
- xc_dominfo_t info;
-
- xc_domain_getinfo(xc_handle, d->domid, 1, &info);
- return info.shared_info_frame;
-}
-
-static void
-send_message(struct open_connection *oc, const char *fmt, ...)
-{
- char *buf;
- va_list ap;
- int size;
- int off;
- ssize_t r;
-
- if (oc->state == OC_STATE_ERROR)
- return;
-
- va_start(ap, fmt);
- size = vasprintf(&buf, fmt, ap);
- va_end(ap);
- if (size < 0)
- err(1, "preparing response to a query");
- assert(buf[0] == 'E' || buf[0] == 'N');
- assert(isdigit(buf[1]));
- assert(isdigit(buf[2]));
- assert(buf[3] == ' ' || buf[3] == '\n');
-
- off = 0;
- while (off < size) {
- r = write(oc->fd, buf + off, size - off);
- if (r < 0) {
- warn("sending response to remote");
- oc->state = OC_STATE_ERROR;
- free(buf);
- return;
- }
- off += r;
- }
- free(buf);
-}
-
-static void
-default_command_handler(struct open_connection *oc, const struct command *ign,
- const char *buf, const char *args)
-{
- warnx("bad command %s", buf);
- send_message(oc, "E00 unknown command %s\n", buf);
-}
-
-static void
-create_command_handler(struct open_connection *oc, const struct command *ign,
- const char *buf, const char *args)
-{
- char *name;
- unsigned mem_kb;
- int r;
- u32 domid = -1;
-
- r = sscanf(args, "%d %a[^\n]", &mem_kb, &name);
- if (r != 2) {
- send_message(oc, "E01 failed to parse %s\n", args);
- return;
- }
- r = xc_domain_create(xc_handle, mem_kb, -1, 0, &domid);
- if (r < 0) {
- send_message(oc, "E02 creating domain (%s)\n",
- strerror(errno));
- free(name);
- return;
- }
-
- domain_created(name, mem_kb, domid);
-
- send_message(oc, "N00 %d\n", domid);
- free(name);
-}
-
-static void
-build_command_handler(struct open_connection *oc, const struct command *ign,
- const char *buf, const char *args)
-{
- struct domain *d;
- int domain_id;
- char *image, *cmdline;
- int event_ports[2];
- int r;
-
- r = sscanf(args, "%d %a[^\t] %a[^\n]", &domain_id,
- &image, &cmdline);
- if (r != 3) {
- send_message(oc, "E03 failed to parse %s\n", args);
- return;
- }
- d = find_domain(domain_id);
- if (d == NULL) {
- send_message(oc, "E04 unknown domain %d\n", domain_id);
- goto out;
- }
- if (d->state != DOM_STATE_CREATED) {
- send_message(oc, "E05 domain %d in bad state\n", domain_id);
- goto out;
- }
-
- r = allocate_event_channel(d, event_ports);
- if (r < 0) {
- send_message(oc, "E06 allocating control event channel: %s\n",
- strerror(errno));
- goto out;
- }
-
- r = xc_linux_build(xc_handle, domain_id, image, NULL, cmdline,
- event_ports[1], 0);
- if (r < 0) {
- send_message(oc, "E07 building domain: %s\n",
- strerror(errno));
- free_event_port(NULL, event_ports[0]);
- free_event_port(d, event_ports[1]);
- goto out;
- }
-
- if (ioctl(evtchn_fd, EVTCHN_BIND, event_ports[0]) < 0)
- err(1, "binding to event control event channel");
-
- d->shared_info_mfn = find_domain_shared_info_mfn(d);
- d->shared_info = map_domain_mem(d, d->shared_info_mfn);
- if (d->shared_info == NULL)
- err(1, "maping domain shared info page at %lx.\n",
- d->shared_info_mfn);
- d->ctrl_if = (control_if_t *)((unsigned)d->shared_info + 2048);
-
- d->control_evtchn = event_ports[0];
- d->state = DOM_STATE_PAUSED;
-
- send_message(oc, "N00\n");
-
- out:
- free(image);
- free(cmdline);
- return;
-}
-
-static void
-unpause_command_handler(struct open_connection *oc,
- const struct command *ign,
- const char *buf,
- const char *args)
-{
- int domain_id;
- int r;
- struct domain *d;
-
- r = sscanf(args, "%d", &domain_id);
- if (r != 1) {
- send_message(oc, "E08 cannot parse %s\n", args);
- return;
- }
- d = find_domain(domain_id);
- if (d == NULL) {
- send_message(oc, "E09 cannot find domain %d\n", domain_id);
- return;
- }
- if (d->state != DOM_STATE_PAUSED) {
- send_message(oc, "E10 domain not paused\n");
- return;
- }
-
- r = xc_domain_unpause(xc_handle, d->domid);
- if (r < 0) {
- send_message(oc, "E11 unpausing domain: %s\n",
- strerror(errno));
- return;
- }
-
- d->state = DOM_STATE_RUNNING;
- send_message(oc, "N00\n");
-}
-
-static void
-console_command_handler(struct open_connection *oc,
- const struct command *ign,
- const char *buf,
- const char *args)
-{
- int domain_id;
- struct domain *d;
- int r;
- struct sockaddr_in name;
- socklen_t namelen;
-
- r = sscanf(args, "%d", &domain_id);
- if (r != 1) {
- send_message(oc, "E12 cannot parse %s\n", args);
- return;
- }
- d = find_domain(domain_id);
- if (d == NULL) {
- send_message(oc, "E13 cannot find domain %d\n", domain_id);
- return;
- }
- if (d->cc != NULL) {
- send_message(oc, "E14 console already exists\n");
- return;
- }
-
- d->cc = xmalloc(sizeof(*d->cc));
- d->cc->fd = socket(PF_INET, SOCK_STREAM, 0);
- if (d->cc->fd < 0)
- err(1, "creating console socket");
- d->cc->dom = d;
- d->cc->state = CC_STATE_PENDING;
- d->cc->buf_used = 0;
- d->cc->buf_allocated = 0;
- d->cc->buf = NULL;
-
- r = listen(d->cc->fd, 1);
- if (r < 0)
- err(1, "listening on console socket");
- namelen = sizeof(name);
- r = getsockname(d->cc->fd, (struct sockaddr *)&name, &namelen);
- if (r < 0)
- err(1, "getting name of console socket");
- assert(name.sin_family == AF_INET);
- assert(namelen == sizeof(name));
- list_insert_after(&d->cc->list, &head_console);
- send_message(oc, "N00 %d\n", ntohs(name.sin_port));
-}
-
-static void
-plug_command_handler(struct open_connection *oc,
- const struct command *ign,
- const char *buf,
- const char *args)
-{
- unsigned domid;
- int r;
- struct domain *d;
-
- r = sscanf(args, "%d", &domid);
- if (r != 1) {
- send_message(oc, "E15 cannot parse %s\n", args);
- return;
- }
- d = find_domain(domid);
- if (d == NULL) {
- send_message(oc, "E16 cannot find domain %d\n", domid);
- return;
- }
-
- d->plugged = 1;
- send_message(oc, "N00\n");
- PRINTF(1, "set domain %d plug state to %d\n", d->domid, d->plugged);
-}
-
-static void
-destroy_command_handler(struct open_connection *oc,
- const struct command *ign,
- const char *buf,
- const char *args)
-{
- unsigned domid;
- int r;
- struct domain *d;
-
- r = sscanf(args, "%d", &domid);
- if (r != 1) {
- send_message(oc, "E17 cannot parse %s\n", args);
- return;
- }
- d = find_domain(domid);
- if (d == NULL) {
- send_message(oc, "E18 cannot find domain %d\n", domid);
- return;
- }
-
- r = xc_domain_destroy(xc_handle, domid);
- if (r < 0) {
- send_message( oc, "E19 error destroying domain %d: %s\n",
- domid, strerror(errno) );
- return;
- }
- d->state = DOM_STATE_DEAD;
-
- send_message(oc, "N00\n");
-}
-
-static void
-list_command_handler(struct open_connection *oc,
- const struct command *ign,
- const char *buf,
- const char *args)
-{
- struct domain *d;
- static const char *const state_strings[] = {
- [DOM_STATE_CREATED] = "created",
- [DOM_STATE_PAUSED] = "paused",
- [DOM_STATE_RUNNING] = "running",
- [DOM_STATE_DEAD] = "dead"
- };
-
- foreach_domain(d) {
- send_message(oc, "N01 %d %s %d %s\n",
- d->domid,
- d->name,
- d->mem_kb,
- state_strings[d->state]);
- }
- send_message(oc, "N00\n");
-}
-
-static struct command
-default_command = { NULL, default_command_handler };
-
-static struct command
-commands[] = {
- { "build", build_command_handler },
- { "console", console_command_handler },
- { "create", create_command_handler },
- { "destroy", destroy_command_handler },
- { "plug", plug_command_handler },
- { "list", list_command_handler },
- { "unpause", unpause_command_handler }
-};
-
-void
-process_command(struct open_connection *oc)
-{
- char *buf, *b;
- int command_len;
- int x;
- struct command *cmd;
-
- buf = readline(oc);
- if (buf == NULL)
- return;
- b = strchr(buf, ' ');
- if (b == NULL)
- command_len = strlen(buf);
- else
- command_len = b - buf;
- b = buf + command_len;
- while (b[0] && b[0] == ' ')
- b++;
-
- cmd = &default_command;
- for (x = 0; x < sizeof(commands) / sizeof(commands[0]); x++) {
- if (strlen(commands[x].name) == command_len &&
- memcmp(commands[x].name, buf, command_len) == 0) {
- cmd = &commands[x];
- break;
- }
- }
- cmd->func(oc, cmd, buf, b);
- free(buf);
- return;
-}
diff --git a/tools/x2d2/minixend.c b/tools/x2d2/minixend.c
deleted file mode 100644
index 64fe27195a..0000000000
--- a/tools/x2d2/minixend.c
+++ /dev/null
@@ -1,939 +0,0 @@
-#define _GNU_SOURCE
-
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <asm/page.h>
-#include <assert.h>
-#include <ctype.h>
-#include <err.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include <printf.h>
-#include <pthread.h>
-#include <sched.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "minixend.h"
-
-#define NETWORK_SCRIPT "/etc/xen/scripts/network"
-#define VIFBRIDGE_SCRIPT "/etc/xen/scripts/vif-bridge"
-
-#define MINIXEND_PORT 5123
-
-#define mb() asm volatile ("" ::: "memory")
-
-static void send_control_message(int type, int subtype, int id,
- int size, void *payload,
- struct domain *target);
-
-struct list_head
-head_domain = LIST_HEAD(&head_domain);
-
-static struct list_head
-head_connection = LIST_HEAD(&head_connection);
-
-struct list_head
-head_console = LIST_HEAD(&head_console);
-
-#define foreach_open_connection(d) \
-foreach_item(d, &head_connection, struct open_connection, connection_list)
-
-/* Not modified after initial start up */
-static struct domain *dom0;
-unsigned xc_handle;
-static int listen_fd;
-int evtchn_fd;
-
-static struct list_head
-head_event_receiver = LIST_HEAD(&head_event_receiver);
-
-struct event_receiver {
- struct list_head list;
- int id;
- pthread_cond_t cond;
-};
-
-/* We're protected by the dom0 mutex in here */
-static struct event_receiver *
-allocate_event_receiver(struct domain *d)
-{
- static int next_message_id;
- struct event_receiver *work;
-
- assert(d == dom0);
- work = xmalloc(sizeof(*work));
- work->id = next_message_id++;
- pthread_cond_init(&work->cond, NULL);
-
- list_insert_after(&work->list, &head_event_receiver);
-
- return work;
-}
-
-static struct event_receiver *
-find_event_receiver(int id)
-{
- struct event_receiver *work;
- foreach_item(work, &head_event_receiver, struct event_receiver, list)
- if (work->id == id)
- return work;
- return NULL;
-}
-
-static void
-release_event_receiver(struct event_receiver *w)
-{
- list_remove(&w->list);
- pthread_cond_destroy(&w->cond);
- free(w);
-}
-
-/* Send a message to dom0, and then block awaiting a reply. */
-/* Make sure we don't hold any domain mutexs */
-static void
-send_dom0_message_block(control_msg_t *msg)
-{
- CONTROL_RING_IDX c;
- struct event_receiver *er;
- control_msg_t buf;
-
- PRINTF(0, "sending message to dom0 and blocking for reply.\n");
- pthread_mutex_lock(&dom0->mux);
- PRINTF(0, "got dom0 lock.\n");
- er = allocate_event_receiver(dom0);
- PRINTF(0, "allocated evetn receiver.\n");
- msg->id = er->id;
- PRINTF(1, "sending message with id %d\n", msg->id);
- send_control_message(msg->type, msg->subtype,
- msg->id, msg->length, msg->msg, dom0);
- xc_evtchn_send(xc_handle, dom0->control_evtchn);
-
- PRINTF(0, "waiting for reply\n");
- pthread_cond_wait(&er->cond, &dom0->mux);
- PRINTF(0, "got reply\n");
-
- c = dom0->rx_resp_cons % CONTROL_RING_SIZE;
- memcpy(&buf, &dom0->ctrl_if->rx_ring[c], sizeof(buf));
- assert(msg->id == buf.id);
- assert(msg->type == buf.type);
- assert(msg->subtype == buf.subtype);
- memcpy(msg, &buf, sizeof(*msg));
- dom0->rx_resp_cons++;
-
- release_event_receiver(er);
-
- pthread_mutex_unlock(&dom0->mux);
-
- PRINTF(1, "got reply to message with id %d\n", msg->id);
-}
-
-/* Allocate an interdomain event channel. event_ports[0] is the
- local event port number, event_ports[1] the remote */
-int
-allocate_event_channel(struct domain *d, int event_ports[2])
-{
- return xc_evtchn_bind_interdomain(xc_handle, DOMID_SELF,
- d->domid, event_ports,
- event_ports+1);
-}
-
-static void
-accept_new_connection(void)
-{
- int fd;
- struct open_connection *oc;
-
- fd = accept(listen_fd, NULL, NULL);
- if (fd < 0)
- return;
- oc = xmalloc(sizeof(*oc));
- oc->fd = fd;
- oc->state = OC_STATE_CONNECTED;
- oc->buf_used = 0;
- oc->buf_allocated = 16;
- oc->buf = xmalloc(oc->buf_allocated);
- list_insert_after(&oc->connection_list, &head_connection);
-}
-
-static void
-closedown_connection(struct open_connection *oc)
-{
- close(oc->fd);
- assert(oc->buf);
- free(oc->buf);
- free(oc);
-}
-
-#if 0
-/* Hackl for the benefit of domain replay */
-static unsigned
-report_work(u32 *ptr, u32 val, unsigned dom, int do_direct)
-{
- if (!do_direct) {
- int rc;
- asm("int $0x80" : "=a" (rc)
- : "0" (264), "b" (ptr), "c" (val), "d" (dom));
- if (rc < 0) {
- errno = -rc;
- rc = -1;
- }
- return rc;
- } else {
- *ptr = val;
- return 0;
- }
-}
-#else
-static unsigned
-report_work(u32 *ptr, u32 val, unsigned dom, int do_direct)
-{
- *ptr = val;
- return 0;
-}
-#endif
-
-static void
-send_control_reply(const control_msg_t *msg, struct domain *d)
-{
- CONTROL_RING_IDX c;
-
- PRINTF(3,"Control reply, type %d:%d, length %d.\n",
- msg->type, msg->subtype, msg->length);
- c = d->ctrl_if->tx_resp_prod % CONTROL_RING_SIZE;
- memcpy(&d->ctrl_if->tx_ring[c], msg, sizeof(*msg));
- report_work(&d->ctrl_if->tx_resp_prod,
- d->ctrl_if->tx_resp_prod + 1,
- d->domid,
- 0);
- PRINTF(4,"tx_resp_prod %ld.\n", d->ctrl_if->tx_resp_prod);
- assert(!d->plugged);
-}
-
-static void
-send_trivial_control_reply(const control_msg_t *msg, struct domain *d)
-{
- control_msg_t rep;
-
- memset(&rep, 0, sizeof(rep));
- rep.type = msg->type;
- rep.subtype = msg->subtype;
- rep.id = msg->id;
- send_control_reply(&rep, d);
-}
-
-static void
-process_console_control_message(control_msg_t *m, struct domain *d)
-{
- int off;
- int r;
-
- if (m->subtype != CMSG_CONSOLE_DATA) {
- warnx("unknown console message subtype %d",
- m->subtype);
- return;
- }
-
- if (m->length > 60) {
- warnx("truncating message from domain %d (was length %d)",
- d->domid, m->length);
- m->length = 60;
- }
- PRINTF(1, "DOM%d: %.*s\n", d->domid, m->length, m->msg);
- send_trivial_control_reply(m, d);
-
- if (d->cc) {
- PRINTF(5, "Have a console connection.\n");
- if (d->cc->state == CC_STATE_CONNECTED) {
- PRINTF(5, "Console is connected, sending directly.\n");
- for (off = 0; off < m->length; off += r) {
- r = write(d->cc->fd, m->msg + off,
- m->length - off);
- if (r <= 0) {
- d->cc->state = CC_STATE_ERROR;
- break;
- }
- }
- } else {
- PRINTF(5, "Console not connected, buffering.\n");
- if (d->cc->buf_allocated == 0) {
- d->cc->buf_allocated = 60;
- d->cc->buf = xmalloc(d->cc->buf_allocated);
- d->cc->buf_used = 0;
- } else if (d->cc->buf_allocated <
- d->cc->buf_used + m->length) {
- d->cc->buf_allocated += 60;
- d->cc->buf = xrealloc(d->cc->buf,
- d->cc->buf_allocated);
- }
- assert(d->cc->buf_allocated >=
- d->cc->buf_used + m->length);
- memcpy(d->cc->buf + d->cc->buf_used,
- m->msg,
- m->length);
- d->cc->buf_used += m->length;
- }
- }
-}
-
-static void
-process_blkif_fe_message(control_msg_t *m, struct domain *d)
-{
- switch (m->subtype) {
- default:
- warnx("unknown blkif front end message subtype %d",
- m->subtype);
- }
-}
-
-static void
-send_control_message(int type, int subtype, int id,
- int size, void *payload, struct domain *target)
-{
- control_msg_t msg;
- CONTROL_RING_IDX c;
-
- msg.type = type;
- msg.subtype = subtype;
- msg.id = id;
- msg.length = size;
- memcpy(msg.msg, payload, size);
-
- c = target->ctrl_if->rx_req_prod % CONTROL_RING_SIZE;
- memcpy(&target->ctrl_if->rx_ring[c], &msg, sizeof(msg));
- report_work(&target->ctrl_if->rx_req_prod,
- target->ctrl_if->rx_req_prod + 1,
- target->domid,
- 0);
- assert(!target->plugged);
-}
-
-/* Procedure for bringing a new netif front end up:
-
- -- Front end sends us NETIF_FE_DRIVER_STATUS_CHANGED
- -- We send back end NETIF_BE_CREATE, wait for a reply
- -- Back end creates a new netif for us, replies
- -- We send front end a NETIF_FE_DRIVER_STATUS_CHANGED message saying
- how many interfaces we've created for it
- -- We send front end a NETIF_FE_INTERFACE_STATUS_CHANGED for each
- netif created
- -- Front end sends us a NETIF_FE_INTERFACE_CONNECT for each netif
-*/
-static void
-handle_netif_fe_driver_status(control_msg_t *m,
- netif_fe_driver_status_t *sh,
- struct domain *d)
-{
- netif_fe_interface_status_t if_s;
- control_msg_t be_msg;
- netif_be_create_t *be = (void *)be_msg.msg;
- int r;
-
- switch (sh->status) {
- case NETIF_DRIVER_STATUS_UP:
- /* Tell the back end about the new interface coming
- * up. */
- if (d->created_netif_backend) {
- send_control_reply(m, d);
- send_control_message(CMSG_NETIF_FE,
- CMSG_NETIF_FE_DRIVER_STATUS,
- 1,
- sizeof(*sh),
- sh,
- d);
- return;
- }
- be_msg.type = CMSG_NETIF_BE;
- be_msg.subtype = CMSG_NETIF_BE_CREATE;
- be_msg.id = d->domid;
- be_msg.length = sizeof(*be);
- be->domid = d->domid;
- be->netif_handle = 0;
- memcpy(be->mac, d->netif_mac, 6);
-
- PRINTF(2,"Telling back end about new front end.\n");
- pthread_mutex_unlock(&d->mux);
- send_dom0_message_block(&be_msg);
- pthread_mutex_lock(&d->mux);
- PRINTF(3,"Done.\n");
-
- if (be->status != NETIF_BE_STATUS_OKAY) {
- /* Uh oh... can't bring back end
- * up. */
- send_control_reply(m, d);
- send_control_message(CMSG_NETIF_FE,
- CMSG_NETIF_FE_DRIVER_STATUS,
- 1,
- sizeof(*sh),
- sh,
- d);
- return;
- }
- d->created_netif_backend = 1;
-
- r = our_system(VIFBRIDGE_SCRIPT " up domain=%s mac=%.02x:%.02x:%.02x:%.02x:%.02x:%.02x vif=vif%d.0 bridge=xen-br0",
- d->name,
- d->netif_mac[0],
- d->netif_mac[1],
- d->netif_mac[2],
- d->netif_mac[3],
- d->netif_mac[4],
- d->netif_mac[5],
- d->domid);
- if (r != 0)
- warn("error %d running " VIFBRIDGE_SCRIPT, r);
-
- /* Tell domain how many interfaces it has to deal
- * with. */
- send_control_reply(m, d);
- send_control_message(CMSG_NETIF_FE,
- CMSG_NETIF_FE_DRIVER_STATUS,
- 1,
- sizeof(*sh),
- sh,
- d);
-
- PRINTF(2,"Telling front end about its interfaces.\n");
- if_s.handle = 0;
- if_s.status = NETIF_INTERFACE_STATUS_DISCONNECTED;
- send_control_message(CMSG_NETIF_FE,
- CMSG_NETIF_FE_INTERFACE_STATUS,
- 1,
- sizeof(if_s),
- &if_s,
- d);
- PRINTF(3,"Done.\n");
-
- break;
- default:
- warnx("unknown netif status %ld", sh->status);
- break;
- }
-}
-
-static void
-handle_netif_fe_interface_connect(control_msg_t *m,
- netif_fe_interface_connect_t *ic,
- struct domain *d)
-{
- control_msg_t be_msg;
- netif_be_connect_t *bmsg = (void *)be_msg.msg;
- netif_fe_interface_status_t fmsg = {0};
- int evtchn_ports[2];
- int r;
-
- PRINTF(4, "front end sent us an interface connect message.\n");
- send_trivial_control_reply(m, d);
-
- r = xc_evtchn_bind_interdomain(xc_handle,
- dom0->domid,
- d->domid,
- &evtchn_ports[0],
- &evtchn_ports[1]);
- if (r < 0)
- err(1, "allocating network event channel");
-
- be_msg.type = CMSG_NETIF_BE;
- be_msg.subtype = CMSG_NETIF_BE_CONNECT;
- be_msg.id = 0;
- be_msg.length = sizeof(*bmsg);
- bmsg->domid = d->domid;
- bmsg->netif_handle = ic->handle;
- bmsg->tx_shmem_frame = ic->tx_shmem_frame;
- bmsg->rx_shmem_frame = ic->rx_shmem_frame;
- bmsg->evtchn = evtchn_ports[0];
-
- pthread_mutex_unlock(&d->mux);
- send_dom0_message_block(&be_msg);
- pthread_mutex_lock(&d->mux);
-
- if (bmsg->status != NETIF_BE_STATUS_OKAY) {
- PRINTF(2, "error connected backend netif: %ld\n",
- bmsg->status);
- abort(); /* Need to handle this */
- } else {
- PRINTF(3, "connect backend netif\n");
-
- /* Tell the domain that we've connected it up. */
- fmsg.handle = ic->handle;
- fmsg.status = NETIF_INTERFACE_STATUS_CONNECTED;
- fmsg.evtchn = evtchn_ports[1];
- memcpy(fmsg.mac, d->netif_mac, 6);
-
- send_control_message(CMSG_NETIF_FE,
- CMSG_NETIF_FE_INTERFACE_STATUS,
- 0,
- sizeof(fmsg),
- &fmsg,
- d);
- }
-}
-
-static void
-process_netif_fe_message(control_msg_t *m, struct domain *d)
-{
- switch (m->subtype) {
- case CMSG_NETIF_FE_DRIVER_STATUS:
- {
- netif_fe_driver_status_t *sh =
- (netif_fe_driver_status_t *)m->msg;
- handle_netif_fe_driver_status(m, sh, d);
- break;
- }
- case CMSG_NETIF_FE_INTERFACE_CONNECT:
- {
- netif_fe_interface_connect_t *ic =
- (netif_fe_interface_connect_t *)m->msg;
- handle_netif_fe_interface_connect(m, ic, d);
- break;
- }
- default:
- warnx("unknown netif front end message subtype %d",
- m->subtype);
- }
-}
-
-static void
-process_control_message(control_msg_t *msg, struct domain *d)
-{
- control_msg_t m;
-
- /* Don't want a malicous domain messing us about, so copy the
- control mesasge into a local buffer. */
- memcpy(&m, msg, sizeof(m));
- switch (m.type) {
- case CMSG_CONSOLE:
- process_console_control_message(&m, d);
- break;
- case CMSG_BLKIF_FE:
- process_blkif_fe_message(&m, d);
- break;
- case CMSG_NETIF_FE:
- process_netif_fe_message(&m, d);
- break;
- default:
- warnx("unknown control message type %d", m.type);
- }
-}
-
-static void
-domain_did_control_event(struct domain *d)
-{
- CONTROL_RING_IDX c;
-
- /* Pick up and process control ring messages. */
- while (d->tx_req_cons != d->ctrl_if->tx_req_prod) {
- c = d->tx_req_cons % CONTROL_RING_SIZE;
- process_control_message(&d->ctrl_if->tx_ring[c], d);
- d->tx_req_cons++;
- assert(d->tx_req_cons <= d->ctrl_if->tx_req_prod);
- PRINTF(5, "req_cons %ld, req_prod %ld.\n",
- d->tx_req_cons, d->ctrl_if->tx_req_prod);
- }
-
- /* Take any replies off, and discard them. */
- if (d->rx_resp_cons != d->ctrl_if->rx_resp_prod)
- PRINTF(1, "discard %ld events\n",
- d->ctrl_if->rx_resp_prod -
- d->rx_resp_cons);
- d->rx_resp_cons = d->ctrl_if->rx_resp_prod;
-}
-
-/* This is the main function for domain control threads */
-void *
-domain_thread_func(void *D)
-{
- struct domain *d = D;
- int r;
- CONTROL_RING_IDX old_resp_prod, old_req_prod;
-
- pthread_mutex_lock(&d->mux);
- for (;;) {
- pthread_cond_wait(&d->cond, &d->mux);
-
- old_resp_prod = d->ctrl_if->tx_resp_prod;
- old_req_prod = d->ctrl_if->rx_req_prod;
-
- domain_did_control_event(d);
- if (d->cc && d->cc->in_buf_used != 0 && d->plugged == 0) {
- r = d->cc->in_buf_used;
- if (r > 60)
- r = 60;
- PRINTF(1, "Sending to domain: %.*s\n",
- r, d->cc->in_buf);
- send_control_message(CMSG_CONSOLE,
- CMSG_CONSOLE_DATA,
- 0,
- r,
- d->cc->in_buf,
- d);
- memmove(d->cc->in_buf, d->cc->in_buf + r,
- d->cc->in_buf_used - r);
- d->cc->in_buf_used -= r;
- }
-
- if (d->ctrl_if->tx_resp_prod != old_resp_prod ||
- d->ctrl_if->rx_req_prod != old_req_prod)
- xc_evtchn_send(xc_handle, d->control_evtchn);
- }
-}
-
-/* This is the only thing you can do with a domain structure if you're
- not in the thread which controls that domain. Domain 0 is
- special. */
-void
-signal_domain(struct domain *d)
-{
- CONTROL_RING_IDX c;
- int id;
- struct event_receiver *evt;
-
- pthread_mutex_lock(&d->mux);
- if (d == dom0) {
- /* Take events off of dom0's control ring, and send
- them to the event receivers. */
- while (d->tx_req_cons != d->ctrl_if->tx_req_prod) {
- c = d->tx_req_cons % CONTROL_RING_SIZE;
- id = d->ctrl_if->tx_ring[c].id;
- evt = find_event_receiver(id);
- if (evt != NULL) {
- PRINTF(1, "delivering event id %d\n", evt->id);
- pthread_cond_broadcast(&evt->cond);
- pthread_mutex_unlock(&d->mux);
- sched_yield();
- pthread_mutex_lock(&d->mux);
- } else {
- warnx("unexpected message id %d discarded",
- id);
- d->tx_req_cons++;
- }
- }
- while (d->rx_resp_cons != d->ctrl_if->rx_resp_prod) {
- c = d->rx_resp_cons % CONTROL_RING_SIZE;
- id = d->ctrl_if->rx_ring[c].id;
- evt = find_event_receiver(id);
- if (evt != NULL) {
- PRINTF(1, "delivering event rep id %d\n", evt->id);
- pthread_cond_broadcast(&evt->cond);
- pthread_mutex_unlock(&d->mux);
- sched_yield();
- pthread_mutex_lock(&d->mux);
- } else {
- warnx("unexpected message reply id %d discarded",
- id);
- d->rx_resp_cons++;
- }
- }
- } else {
- if (d->plugged) {
- d->event_pending = 1;
- } else {
- pthread_cond_broadcast(&d->cond);
- }
- }
- pthread_mutex_unlock(&d->mux);
-}
-
-static void
-handle_evtchn_event(void)
-{
- short port;
- struct domain *d;
-
- read(evtchn_fd, &port, sizeof(short));
- write(evtchn_fd, &port, sizeof(short));
- foreach_domain (d) {
- if (d->control_evtchn == port) {
- signal_domain(d);
- return;
- }
- }
- warnx("got an event on an unknown port %d", port);
-}
-
-void *
-map_domain_mem(struct domain *d, unsigned long mfn)
-{
- return xc_map_foreign_range(xc_handle, d->domid,
- PAGE_SIZE, PROT_READ | PROT_WRITE,
- mfn);
-}
-
-static void
-handle_console_event(struct console_connection *cc)
-{
- int r;
- int fd;
-
- switch (cc->state) {
- case CC_STATE_ERROR:
- /* Errors shouldn't get here. */
- abort();
- case CC_STATE_PENDING:
- fd = accept(cc->fd, NULL, NULL);
- if (fd >= 0) {
- PRINTF(3, "Accepted console connection for domain %d",
- cc->dom->domid);
- close(cc->fd);
- cc->fd = fd;
- cc->state = CC_STATE_CONNECTED;
- while (cc->buf_used != 0) {
- r = write(cc->fd,
- cc->buf,
- cc->buf_used);
- if (r <= 0) {
- cc->state = CC_STATE_ERROR;
- break;
- }
- memmove(cc->buf,
- cc->buf + r,
- cc->buf_used - r);
- cc->buf_used -= r;
- }
- free(cc->buf);
- cc->buf = NULL;
- cc->buf_allocated = 0;
- } else {
- PRINTF(1, "error %s accepting console", strerror(errno));
- }
- pthread_mutex_unlock(&cc->dom->mux);
- break;
- case CC_STATE_CONNECTED:
- if (cc->in_buf_allocated == 0) {
- assert(cc->in_buf_used == 0);
- cc->in_buf_allocated = 128;
- cc->in_buf = xmalloc(cc->in_buf_allocated);
- }
- if (cc->in_buf_used == cc->in_buf_allocated) {
- cc->in_buf_allocated *= 2;
- cc->in_buf = xrealloc(cc->in_buf, cc->in_buf_allocated);
- }
- r = read(cc->fd, cc->in_buf + cc->in_buf_used,
- cc->in_buf_allocated - cc->in_buf_used);
- if (r <= 0) {
- cc->state = CC_STATE_ERROR;
- } else {
- cc->in_buf_used += r;
- }
- pthread_mutex_unlock(&cc->dom->mux);
- signal_domain(cc->dom);
- break;
- }
-}
-
-static void
-handle_connection_event(struct open_connection *oc)
-{
- int r;
-
- /* We know that some amount of data is ready and waiting for
- us. Slurp it in. */
- if (oc->buf_used == oc->buf_allocated) {
- oc->buf_allocated *= 2;
- oc->buf = xrealloc(oc->buf, oc->buf_allocated);
- }
- r = read(oc->fd, oc->buf + oc->buf_used,
- oc->buf_allocated - oc->buf_used);
- if (r < 0) {
- warn("reading command from remote");
- oc->state = OC_STATE_ERROR;
- } else if (r == 0) {
- warnx("reading command from remote");
- oc->state = OC_STATE_ERROR;
- } else {
- oc->buf_used += r;
- if (strchr(oc->buf, '\n'))
- oc->state = OC_STATE_COMMAND_PENDING;
- }
-}
-
-static void
-get_and_process_event(void)
-{
- fd_set read_fds, except_fds;
- struct open_connection *oc;
- struct console_connection *cc;
- int max_fd = listen_fd;
- int r;
- struct list_head *li, *temp_li;
-
- FD_ZERO(&read_fds);
- FD_ZERO(&except_fds);
- FD_SET(listen_fd, &read_fds);
- FD_SET(evtchn_fd, &read_fds);
- if (evtchn_fd > max_fd)
- max_fd = evtchn_fd;
- foreach_open_connection(oc) {
- FD_SET(oc->fd, &read_fds);
- FD_SET(oc->fd, &except_fds);
- if (oc->fd > max_fd)
- max_fd = oc->fd;
- }
- foreach_console_connection(cc) {
- FD_SET(cc->fd, &read_fds);
- FD_SET(cc->fd, &except_fds);
- if (cc->fd > max_fd)
- max_fd = cc->fd;
- }
-
- r = select(max_fd + 1, &read_fds, NULL, &except_fds, NULL);
- if (r < 0)
- err(1, "select");
- if (FD_ISSET(listen_fd, &read_fds)) {
- accept_new_connection();
- } else if (FD_ISSET(evtchn_fd, &read_fds))
- handle_evtchn_event();
-
-
- foreach_open_connection(oc) {
- if (FD_ISSET(oc->fd, &read_fds))
- handle_connection_event(oc);
- if (FD_ISSET(oc->fd, &except_fds))
- oc->state = OC_STATE_ERROR;
- }
- list_foreach_safe(&head_console, li, temp_li) {
- cc = list_item(li, struct console_connection, list);
- if (FD_ISSET(cc->fd, &read_fds))
- handle_console_event(cc);
- if (FD_ISSET(cc->fd, &except_fds) ||
- cc->state == CC_STATE_ERROR) {
- PRINTF(1, "Cleaning up console connection");
- cc->dom->cc = NULL;
- list_remove(&cc->list);
- close(cc->fd);
- if (cc->buf_allocated != 0)
- free(cc->buf);
- if (cc->in_buf_allocated != 0)
- free(cc->in_buf);
- free(cc);
- }
- }
-
- /* Run pending stuff on the open connections. */
- list_foreach_safe(&head_connection, li, temp_li) {
- oc = list_item(li, struct open_connection, connection_list);
- switch (oc->state) {
- case OC_STATE_ERROR:
- list_remove(&oc->connection_list);
- closedown_connection(oc);
- break;
- case OC_STATE_COMMAND_PENDING:
- process_command(oc);
- break;
- case OC_STATE_CONNECTED:
- /* Don't need to do anything */
- break;
- }
- }
-}
-
-static int
-start_listening(void)
-{
- int sock;
- struct sockaddr_in inaddr;
-
- sock = socket(PF_INET, SOCK_STREAM, 0);
- if (sock < 0)
- err(1, "creating socket");
- memset(&inaddr, 0, sizeof(inaddr));
- inaddr.sin_family = AF_INET;
- inaddr.sin_port = htons(MINIXEND_PORT);
-
- if (bind(sock, (struct sockaddr *)&inaddr, sizeof(inaddr)) < 0)
- err(1, "binding to port %d", MINIXEND_PORT);
- if (listen(sock, 5) < 0)
- err(1, "listening for connections");
-
- return sock;
-}
-
-static struct domain *
-find_dom0(void)
-{
- int r;
- xc_dominfo_t info;
- struct domain *work;
-
- r = xc_domain_getinfo(xc_handle, 0, 1, &info);
- if (r < 0)
- err(1, "getting domain 0 information");
- work = xmalloc(sizeof(*work));
- work->control_evtchn = 2;
- if (ioctl(evtchn_fd, EVTCHN_BIND, 2) < 0)
- err(1, "binding to domain 0 control event channel");
-
- work->domid = 0;
- work->name = strdup("dom0");
- work->mem_kb = info.max_memkb;
- work->state = DOM_STATE_RUNNING;
- work->shared_info_mfn = info.shared_info_frame;
-
- work->shared_info = map_domain_mem(work, info.shared_info_frame);
- work->ctrl_if = (control_if_t *)((unsigned)work->shared_info + 2048);
- work->tx_req_cons = work->ctrl_if->tx_req_prod;
- work->rx_resp_cons = work->ctrl_if->rx_resp_prod;
-
- pthread_mutex_init(&work->mux, NULL);
- pthread_cond_init(&work->cond, NULL);
-
- list_insert_after(&work->domain_list, &head_domain);
-
- return work;
-}
-
-int
-main(int argc, char *argv[])
-{
- int r;
-
- r = our_system(NETWORK_SCRIPT " start antispoof=no");
- if (r < 0)
- err(1, "running " NETWORK_SCRIPT);
- if (!WIFEXITED(r)) {
- if (WIFSIGNALED(r)) {
- errx(1, NETWORK_SCRIPT " killed by signal %d",
- WTERMSIG(r));
- }
- errx(1, NETWORK_SCRIPT " terminated abnormally");
- }
- if (WEXITSTATUS(r) != 0)
- errx(1, NETWORK_SCRIPT " returned error status %d",
- WEXITSTATUS(r));
-
- xc_handle = xc_interface_open();
-
- listen_fd = start_listening();
-
- evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
- if (evtchn_fd < 0)
- err(1, "openning /dev/xen/evtchn");
-
- dom0 = find_dom0();
-
- while (1) {
- get_and_process_event();
-
- PRINTF(5, "Dom0 ring state:\n");
- PRINTF(5, "RX: req_prod %ld, resp_prod %ld, resp_cons %ld\n",
- dom0->ctrl_if->rx_req_prod,
- dom0->ctrl_if->rx_resp_prod,
- dom0->rx_resp_cons);
- PRINTF(5, "TX: req_prod %ld, resp_prod %ld, req_cons %ld\n",
- dom0->ctrl_if->tx_req_prod,
- dom0->ctrl_if->tx_resp_prod,
- dom0->tx_req_cons);
- }
-
- return 0;
-}
-
diff --git a/tools/x2d2/minixend.h b/tools/x2d2/minixend.h
deleted file mode 100644
index db28d48529..0000000000
--- a/tools/x2d2/minixend.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef MINIXEND_H__
-#define MINIXEND_H__
-
-#include <sys/types.h>
-#include <xc.h>
-
-struct list_head {
- struct list_head *next, **pprev;
-};
-
-struct open_connection {
- struct list_head connection_list;
- int fd;
- enum {
- OC_STATE_CONNECTED,
- OC_STATE_ERROR,
- OC_STATE_COMMAND_PENDING
- } state;
-
- /* Buffer of stuff coming from the remote until we get a whole
- command */
- int buf_used;
- int buf_allocated;
- char *buf;
-};
-
-struct console_connection;
-
-/* Only ever accessed from the domain's controlling thread, unless
- it's dom0, in which case we perform a moderately complex dance to
- avoid needing any sort of locking at all. */
-struct domain {
- struct list_head domain_list;
- int control_evtchn; /* the local port for the doain control
- interface event channel. */
- int domid;
- char *name;
- int mem_kb;
- enum {
- DOM_STATE_CREATED, /* created but not built */
- DOM_STATE_PAUSED, /* built but not started or paused */
- DOM_STATE_RUNNING, /* running normally */
- DOM_STATE_DEAD /* dead; either destroyed, crashed,
- or exitted. */
- } state;
-
- unsigned long shared_info_mfn;
- shared_info_t *shared_info;
- control_if_t *ctrl_if;
- CONTROL_RING_IDX tx_req_cons;
- CONTROL_RING_IDX rx_resp_cons;
-
- unsigned created_netif_backend:1;
- unsigned plugged:1;
- unsigned event_pending:1; /* True if an event arrived while
- the domain was plugged. */
-
- struct console_connection *cc;
-
- char netif_mac[6];
-
- /* Used for two purposes: waking up domain threads when
- necessary, and synchronising access to dom0, which doesn't
- have a domain thread. */
- pthread_mutex_t mux;
- pthread_cond_t cond;
-
- pthread_t thread;
-};
-
-struct console_connection {
- struct list_head list;
- int fd;
- struct domain *dom;
-
- enum {
- CC_STATE_PENDING,
- CC_STATE_CONNECTED,
- CC_STATE_ERROR
- } state;
-
- unsigned buf_allocated;
- unsigned buf_used;
- char *buf;
-
- unsigned in_buf_allocated;
- unsigned in_buf_used;
- char *in_buf;
-};
-
-
-void *domain_thread_func(void *d);
-void process_command(struct open_connection *oc);
-
-void *xmalloc(size_t s);
-void *xrealloc(void *x, size_t s);
-char *xstrdup(const char *s);
-
-int allocate_event_channel(struct domain *d, int event_ports[2]);
-void *map_domain_mem(struct domain *d, unsigned long mfn);
-void signal_domain(struct domain *d);
-int our_system(const char *fmt, ...);
-
-extern unsigned xc_handle;
-#define EVTCHN_BIND _IO('E', 2)
-extern int evtchn_fd;
-
-#define list_item(head, type, field) \
-((type *)((unsigned)(head) - offsetof(type, field)))
-
-#define foreach_item(iter, head, type, field) \
-for ((iter) = list_item((head)->next, type, field); \
- (iter) != list_item((head), type, field); \
- (iter) = list_item((iter)->field.next, type, field))
-
-#define list_insert_after(what, head) \
-do { \
- (what)->next = (head)->next; \
- (what)->pprev = &(head)->next; \
- (head)->next->pprev = &(what)->next; \
- (head)->next = what; \
-} while (0)
-
-#define list_remove(head) \
-(head)->next->pprev = (head)->pprev; \
-*(head)->pprev = (head)->next;
-
-#define list_foreach_safe(head, li, temp) \
-for ((li) = (head)->next, (temp) = (li)->next; \
- (li) != (head); \
- (li) = (temp), (temp) = (li)->next)
-
-#define LIST_HEAD(x) { (x), &(x)->next }
-
-
-extern struct list_head head_domain;
-extern struct list_head head_console;
-
-#define foreach_domain(d) \
-foreach_item(d, &head_domain, struct domain, domain_list)
-#define foreach_console_connection(cc) \
-foreach_item(cc, &head_console, struct console_connection, list)
-
-
-#define CURRENT_LOG_LEVEL 0
-
-#define PRINTF(level, ...) \
-do { \
- if ((level) >= CURRENT_LOG_LEVEL) \
- printf(__VA_ARGS__); \
-} while (0)
-
-
-#endif /* MINIXEND_H__ */
diff --git a/tools/x2d2/util.c b/tools/x2d2/util.c
deleted file mode 100644
index 9994c7dfb8..0000000000
--- a/tools/x2d2/util.c
+++ /dev/null
@@ -1,132 +0,0 @@
-#define _GNU_SOURCE
-
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <assert.h>
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-void *
-xmalloc(size_t s)
-{
- void *x;
-
- x = malloc(s);
- if (x == NULL)
- err(1, "allocating memory");
- memset(x, 0, s);
- return x;
-}
-
-void *
-xrealloc(void *x, size_t s)
-{
- void *y;
- y = realloc(x, s);
- if (y == NULL)
- err(1, "allocating more memory");
- return y;
-}
-
-char *
-xstrdup(const char *s)
-{
- char *x = strdup(s);
- if (x == NULL)
- err(1, "duplicating %s", s);
- return x;
-}
-
-/* Slightly less stupid implementation of system(). We return
- negative iff there is an error executing the shell; otherwise, we
- return the wait status as reported by waitpid(). Also, we support
- printf-style escapes. We don't handle setting the SIGCHLD handler
- to SIGIGN, though: in that case, we have a race. */
-int
-our_system(const char *fmt, ...)
-{
- char *cmd = NULL;
- int r;
- va_list ap;
- pid_t child = -1;
- int pip[2] = {-1, -1};
- int e;
- fd_set fds;
- struct timeval to;
- int res;
- pid_t c;
- unsigned status;
-
- va_start(ap, fmt);
- r = vasprintf(&cmd, fmt, ap);
- va_end(ap);
- if (r < 0)
- return r;
- r = pipe(pip);
- if (r < 0) {
- res = r;
- goto out;
- }
- child = fork();
- if (child < 0) {
- res = child;
- goto out;
- }
- if (child == 0) {
- close(pip[0]);
- fcntl(pip[1], F_SETFD, 1);
- r = execl("/bin/sh", "/bin/sh", "-c", cmd, NULL);
- /* Uh oh, exec failed */
- write(pip[1], &r, sizeof(r));
- _exit(1);
- }
-
- close(pip[1]);
- pip[1] = -1;
-
- c = waitpid(child, &status, 0);
- if (c < 0) {
- res = c;
- goto out;
- }
- assert(c == child);
- child = -1;
-
- /* Check execl result */
- FD_ZERO(&fds);
- FD_SET(pip[0], &fds);
- memset(&to, 0, sizeof(to));
- r = select(pip[0]+1, &fds, NULL, NULL, &to);
- if (r == 0) {
- res = status;
- } else {
- assert(FD_ISSET(pip[0], &fds));
- r = read(pip[0], &res, sizeof(res));
- if (r != sizeof(res))
- res = status;
- }
- close(pip[0]);
- pip[0] = -1;
-
- out:
- e = errno;
- if (child >= 0) {
- /* Not obvious what the correct thing to do here is. */
- /* Don't want to kill the child; that will create a
- zombie. */
-// kill(child, 9);
- }
- if (pip[0] >= 0)
- close(pip[0]);
- if (pip[1] >= 0)
- close(pip[1]);
- free(cmd);
- errno = e;
- return res;
-}
diff --git a/tools/xcs/Makefile b/tools/xcs/Makefile
new file mode 100644
index 0000000000..c3c4bdfa82
--- /dev/null
+++ b/tools/xcs/Makefile
@@ -0,0 +1,49 @@
+# Makefile for XCS
+# Andrew Warfield, 2004
+
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+XCS_INSTALL_DIR = /usr/sbin
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+CC = gcc
+CFLAGS = -Wall -Werror -g3 -D _XOPEN_SOURCE=600
+
+CFLAGS += -I $(XEN_XC)
+CFLAGS += -I $(XEN_LIBXC)
+
+SRCS :=
+SRCS += ctrl_interface.c
+SRCS += bindings.c
+SRCS += connection.c
+SRCS += evtchn.c
+SRCS += xcs.c
+
+HDRS = $(wildcard *.h)
+OBJS = $(patsubst %.c,%.o,$(SRCS))
+BIN = xcs
+
+all: $(BIN) xcsdump
+
+clean:
+ $(RM) *.a *.so *.o *.rpm $(BIN) xcsdump
+
+xcsdump: xcsdump.c dump.c
+ $(CC) $(CFLAGS) -o xcsdump xcsdump.c -L$(XEN_LIBXC) \
+ ctrl_interface.c evtchn.c dump.c -lxc
+
+$(BIN): $(OBJS)
+ $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxc
+
+$(OBJS): $(HDRS)
+
+install: xcs xcsdump
+ $(INSTALL_DIR) -p $(DESTDIR)/$(XCS_INSTALL_DIR)
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_PROG) xcs $(DESTDIR)/$(XCS_INSTALL_DIR)
+ $(INSTALL_PROG) xcsdump $(DESTDIR)/$(XCS_INSTALL_DIR)
+ $(INSTALL_PROG) xcs_proto.h $(DESTDIR)/usr/include
diff --git a/tools/xcs/bindings.c b/tools/xcs/bindings.c
new file mode 100644
index 0000000000..9b09f51568
--- /dev/null
+++ b/tools/xcs/bindings.c
@@ -0,0 +1,179 @@
+/* bindings.c
+ *
+ * Manage subscriptions for the control interface switch.
+ *
+ * (c) 2004, Andrew Warfield
+ *
+ */
+
+/* Interfaces:
+ *
+ * xcs_bind (port, type, connection)
+ * - Register connection to receive messages of this type.
+ * xcs_unbind (port, type, connection)
+ * - Remove an existing registration. (Must be an exact match)
+ * xcs_lookup (port, type)
+ * - Return a list of connections matching a registration.
+ *
+ * - All connections have a connection.bindings list of current bindings.
+ * - (port, type) pairs may be wildcarded with -1.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include "xcs.h"
+
+
+typedef struct binding_ent_st {
+ connection_t *con;
+ struct binding_ent_st *next;
+} binding_ent_t;
+
+#define BINDING_TABLE_SIZE 1024
+
+static binding_ent_t *binding_table[BINDING_TABLE_SIZE];
+
+#define PORT_WILD(_ent) ((_ent)->port == PORT_WILDCARD)
+#define TYPE_WILD(_ent) ((_ent)->type == TYPE_WILDCARD)
+#define FULLY_WILD(_ent) (PORT_WILD(_ent) && TYPE_WILD(_ent))
+
+#define BINDING_HASH(_key) \
+ ((((_key)->port * 11) ^ (_key)->type) % BINDING_TABLE_SIZE)
+
+
+void init_bindings(void)
+{
+ memset(binding_table, 0, sizeof(binding_table));
+}
+
+static int table_add(binding_ent_t *table[],
+ connection_t *con,
+ binding_key_t *key)
+{
+ binding_ent_t **curs, *ent;
+
+ curs = &table[BINDING_HASH(key)];
+
+ while (*curs != NULL) {
+ if ((*curs)->con == con) {
+ DPRINTF("Tried to add an ent that already existed.\n");
+ goto done;
+ }
+ curs = &(*curs)->next;
+ }
+
+ if (connection_add_binding(con, key) != 0)
+ {
+ DPRINTF("couldn't add binding on connection (%lu)\n", con->id);
+ goto fail;
+ }
+ ent = (binding_ent_t *)malloc(sizeof(binding_ent_t));
+ if (ent == 0) {
+ DPRINTF("couldn't alloc binding ent!\n");
+ goto fail;
+ }
+ ent->con = con;
+ ent->next = NULL;
+ *curs = ent;
+
+done:
+ return 0;
+
+fail:
+ return -1;
+}
+
+
+static inline int binding_has_colliding_hashes(connection_t *con,
+ binding_key_t *key)
+{
+ int hash, count = 0;
+ binding_key_ent_t *ent;
+
+ ent = con->bindings;
+ hash = BINDING_HASH(key);
+
+ while (ent != NULL) {
+ if (BINDING_HASH(&ent->key) == hash) count ++;
+ ent = ent->next;
+ }
+
+ return (count > 1);
+}
+static int table_remove(binding_ent_t *table[],
+ connection_t *con,
+ binding_key_t *key)
+{
+ binding_ent_t **curs, *ent;
+
+ if (!binding_has_colliding_hashes(con, key))
+ {
+
+ curs = &table[BINDING_HASH(key)];
+
+ while ((*curs != NULL) && ((*curs)->con != con))
+ curs = &(*curs)->next;
+
+ if (*curs != NULL) {
+ ent = *curs;
+ *curs = (*curs)->next;
+ free(ent);
+ }
+ }
+
+ connection_remove_binding(con, key);
+
+ return 0;
+}
+
+int xcs_bind(connection_t *con, int port, u16 type)
+{
+ binding_key_t key;
+
+ key.port = port;
+ key.type = type;
+
+ return table_add(binding_table, con, &key);
+}
+
+int xcs_unbind(connection_t *con, int port, u16 type)
+{
+ binding_key_t key;
+
+ key.port = port;
+ key.type = type;
+
+ return table_remove(binding_table, con, &key);
+}
+
+
+static void for_each_binding(binding_ent_t *list, binding_key_t *key,
+ void (*f)(connection_t *, void *), void *arg)
+{
+ while (list != NULL)
+ {
+ if (connection_has_binding(list->con, key))
+ f(list->con, arg);
+ list = list->next;
+ }
+}
+
+void xcs_lookup(int port, u16 type, void (*f)(connection_t *, void *),
+ void *arg)
+{
+ binding_key_t key;
+
+ key.port = port; key.type = type;
+ for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg);
+
+ key.port = port; key.type = TYPE_WILDCARD;
+ for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg);
+
+ key.port = PORT_WILDCARD; key.type = type;
+ for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg);
+
+ key.port = PORT_WILDCARD; key.type = TYPE_WILDCARD;
+ for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg);
+}
diff --git a/tools/xcs/connection.c b/tools/xcs/connection.c
new file mode 100644
index 0000000000..3b5747de68
--- /dev/null
+++ b/tools/xcs/connection.c
@@ -0,0 +1,157 @@
+/*
+ * connection.c
+ *
+ * State associated with a client connection to xcs.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "xcs.h"
+
+connection_t *connection_list = NULL;
+
+#define CONNECTED(_c) (((_c)->ctrl_fd != -1) || ((_c)->data_fd != -1))
+
+connection_t *get_con_by_session(unsigned long session_id)
+{
+ connection_t **c, *ent = NULL;
+
+ c = &connection_list;
+
+ DPRINTF("looking for id: %lu : %lu\n", session_id, (*c)->id);
+
+ while (*c != NULL)
+ {
+ if ((*c)->id == session_id)
+ return (*c);
+ c = &(*c)->next;
+ }
+
+ return ent;
+}
+
+connection_t *connection_new()
+{
+ connection_t *con;
+
+ con = (connection_t *)malloc(sizeof(connection_t));
+ if (con == NULL)
+ {
+ DPRINTF("couldn't allocate a new connection\n");
+ return NULL;
+ }
+
+ con->bindings = NULL;
+ con->data_fd = -1;
+ con->ctrl_fd = -1;
+
+ /* connections need a unique session id.
+ * - this approach probably gets fixed later, but for the moment
+ * is unique, and clearly identifies a connection.
+ */
+ con->id = (unsigned long)con;
+
+ /* add it to the connection list */
+ con->next = connection_list;
+ connection_list = con;
+
+ return (con);
+}
+
+void connection_free(connection_t *con)
+{
+ /* first free all subscribed bindings: */
+
+ while (con->bindings != NULL)
+ xcs_unbind(con, con->bindings->key.port, con->bindings->key.type);
+
+ /* now free the connection. */
+ free(con);
+}
+
+int connection_add_binding(connection_t *con, binding_key_t *key)
+{
+ binding_key_ent_t *key_ent;
+
+ key_ent = (binding_key_ent_t *)malloc(sizeof(binding_key_ent_t));
+ if (key_ent == NULL)
+ {
+ DPRINTF("couldn't alloc key in connection_add_binding\n");
+ return -1;
+ }
+
+ key_ent->key = *key;
+ key_ent->next = con->bindings;
+ con->bindings = key_ent;
+
+ return 0;
+}
+
+int connection_remove_binding(connection_t *con, binding_key_t *key)
+{
+ binding_key_ent_t *key_ent;
+ binding_key_ent_t **curs = &con->bindings;
+
+ while ((*curs != NULL) && (!BINDING_KEYS_EQUAL(&(*curs)->key, key)))
+ curs = &(*curs)->next;
+
+ if (*curs != NULL) {
+ key_ent = *curs;
+ *curs = (*curs)->next;
+ free(key_ent);
+ }
+
+ return 0;
+}
+
+
+int connection_has_binding(connection_t *con, binding_key_t *key)
+{
+ binding_key_ent_t *ent;
+ int ret = 0;
+
+ ent = con->bindings;
+
+ while (ent != NULL)
+ {
+ if (BINDING_KEYS_EQUAL(key, &ent->key))
+ {
+ ret = 1;
+ break;
+ }
+ ent = ent->next;
+ }
+
+ return ret;
+}
+
+
+void gc_connection_list(void)
+{
+ connection_t **c, *ent = NULL;
+ struct timeval now, delta;
+
+ c = &connection_list;
+ gettimeofday(&now, NULL);
+
+ while ( *c != NULL )
+ {
+ if ( !CONNECTED(*c) )
+ {
+ timersub(&now, &(*c)->disconnect_time, &delta);
+ if ( delta.tv_sec >= XCS_SESSION_TIMEOUT )
+ {
+ DPRINTF(" : Freeing connection %lu after %lds\n",
+ (*c)->id, delta.tv_sec);
+ ent = *c;
+ *c = (*c)->next;
+ connection_free(ent);
+ continue;
+ }
+ }
+ c = &(*c)->next;
+ }
+}
diff --git a/tools/xcs/ctrl_interface.c b/tools/xcs/ctrl_interface.c
new file mode 100644
index 0000000000..76271526cd
--- /dev/null
+++ b/tools/xcs/ctrl_interface.c
@@ -0,0 +1,269 @@
+/* control_interface.c
+ *
+ * Interfaces to control message rings to VMs.
+ *
+ * Most of this is directly based on the original xu interface to python
+ * written by Keir Fraser.
+ *
+ * (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include "xcs.h"
+
+static int xc_handle = -1;
+
+/* Called at start-of-day when using the control channel interface. */
+int ctrl_chan_init(void)
+{
+ if ( (xc_handle = xc_interface_open()) == -1 )
+ {
+ DPRINTF("Could not open Xen control interface");
+ return -1;
+ }
+
+ return 0;
+}
+
+static control_if_t *map_control_interface(int fd, unsigned long pfn,
+ u32 dom)
+{
+ char *vaddr = xc_map_foreign_range( fd, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, pfn );
+ if ( vaddr == NULL )
+ return NULL;
+ return (control_if_t *)(vaddr + 2048);
+}
+
+static void unmap_control_interface(int fd, control_if_t *c)
+{
+ char *vaddr = (char *)c - 2048;
+ (void)munmap(vaddr, PAGE_SIZE);
+}
+
+int ctrl_chan_notify(control_channel_t *cc)
+{
+ return xc_evtchn_send(xc_handle, cc->local_port);
+}
+
+int ctrl_chan_read_request(control_channel_t *cc, xcs_control_msg_t *dmsg)
+{
+ control_msg_t *smsg;
+ RING_IDX c = cc->tx_ring.req_cons;
+
+ if ( !RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring) )
+ {
+ DPRINTF("no request to read\n");
+ return -1;
+ }
+
+ rmb(); /* make sure we see the data associated with the request */
+ smsg = RING_GET_REQUEST(&cc->tx_ring, c);
+ memcpy(&dmsg->msg, smsg, sizeof(*smsg));
+ if ( dmsg->msg.length > sizeof(dmsg->msg.msg) )
+ dmsg->msg.length = sizeof(dmsg->msg.msg);
+ cc->tx_ring.req_cons++;
+ return 0;
+}
+
+int ctrl_chan_write_request(control_channel_t *cc,
+ xcs_control_msg_t *smsg)
+{
+ control_msg_t *dmsg;
+ RING_IDX p = cc->rx_ring.req_prod_pvt;
+
+ if ( RING_FULL(&cc->rx_ring) )
+ {
+ DPRINTF("no space to write request");
+ return -ENOSPC;
+ }
+
+ dmsg = RING_GET_REQUEST(&cc->rx_ring, p);
+ memcpy(dmsg, &smsg->msg, sizeof(*dmsg));
+
+ wmb();
+ cc->rx_ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS(&cc->rx_ring);
+
+ return 0;
+}
+
+int ctrl_chan_read_response(control_channel_t *cc, xcs_control_msg_t *dmsg)
+{
+ control_msg_t *smsg;
+ RING_IDX c = cc->rx_ring.rsp_cons;
+
+ if ( !RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring) )
+ {
+ DPRINTF("no response to read");
+ return -1;
+ }
+
+ rmb(); /* make sure we see the data associated with the request */
+ smsg = RING_GET_RESPONSE(&cc->rx_ring, c);
+ memcpy(&dmsg->msg, smsg, sizeof(*smsg));
+ if ( dmsg->msg.length > sizeof(dmsg->msg.msg) )
+ dmsg->msg.length = sizeof(dmsg->msg.msg);
+ cc->rx_ring.rsp_cons++;
+ return 0;
+}
+
+int ctrl_chan_write_response(control_channel_t *cc,
+ xcs_control_msg_t *smsg)
+{
+ control_msg_t *dmsg;
+ RING_IDX p = cc->tx_ring.rsp_prod_pvt;
+
+ /* akw: if the ring is synchronous, you should never need this test! */
+ /* (but it was in the original code... ) */
+ if ( cc->tx_ring.req_cons == cc->tx_ring.rsp_prod_pvt )
+ {
+ DPRINTF("no space to write response");
+ return -ENOSPC;
+ }
+
+ dmsg = RING_GET_RESPONSE(&cc->tx_ring, p);
+ memcpy(dmsg, &smsg->msg, sizeof(*dmsg));
+
+ wmb();
+ cc->tx_ring.rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(&cc->tx_ring);
+
+ return 0;
+}
+
+int ctrl_chan_request_to_read(control_channel_t *cc)
+{
+ return (RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring));
+}
+
+int ctrl_chan_space_to_write_request(control_channel_t *cc)
+{
+ return (!(RING_FULL(&cc->rx_ring)));
+}
+
+int ctrl_chan_response_to_read(control_channel_t *cc)
+{
+ return (RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring));
+}
+
+int ctrl_chan_space_to_write_response(control_channel_t *cc)
+{
+ /* again, there is something fishy here. */
+ return ( cc->tx_ring.req_cons != cc->tx_ring.rsp_prod_pvt );
+}
+
+int ctrl_chan_connect(control_channel_t *cc)
+{
+ xc_dominfo_t info;
+
+ if ( cc->connected )
+ {
+ return 0;
+ }
+
+ if ( (xc_domain_getinfo(xc_handle, cc->remote_dom, 1, &info) != 1) ||
+ (info.domid != cc->remote_dom) )
+ {
+ DPRINTF("Failed to obtain domain status");
+ return -1;
+ }
+
+ cc->interface =
+ map_control_interface(xc_handle, info.shared_info_frame,
+ cc->remote_dom);
+
+ if ( cc->interface == NULL )
+ {
+ DPRINTF("Failed to map domain control interface");
+ return -1;
+ }
+
+ /* Synchronise ring indexes. */
+ BACK_RING_ATTACH(&cc->tx_ring, &cc->interface->tx_ring, CONTROL_RING_MEM);
+ FRONT_RING_ATTACH(&cc->rx_ring, &cc->interface->rx_ring, CONTROL_RING_MEM);
+
+ cc->connected = 1;
+
+ return 0;
+}
+
+void ctrl_chan_disconnect(control_channel_t *cc)
+{
+ if ( cc->connected )
+ unmap_control_interface(xc_handle, cc->interface);
+ cc->connected = 0;
+}
+
+
+control_channel_t *ctrl_chan_new(u32 dom, int local_port, int remote_port)
+{
+ control_channel_t *cc;
+
+ cc = (control_channel_t *)malloc(sizeof(control_channel_t));
+ if ( cc == NULL ) return NULL;
+
+ cc->connected = 0;
+ cc->remote_dom = dom;
+
+ if ( dom == 0 )
+ {
+ /*
+ * The control-interface event channel for DOM0 is already set up.
+ * We use an ioctl to discover the port at our end of the channel.
+ */
+ local_port = ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN,
+ NULL);
+ remote_port = -1; /* We don't need the remote end of the DOM0 link. */
+ if ( local_port < 0 )
+ {
+ DPRINTF("Could not open channel to DOM0");
+ goto fail;
+ }
+ }
+ else if ( xc_evtchn_bind_interdomain(xc_handle,
+ DOMID_SELF, dom,
+ &local_port, &remote_port) != 0 )
+ {
+ DPRINTF("Could not open channel to domain");
+ goto fail;
+ }
+
+ cc->local_port = local_port;
+ cc->remote_port = remote_port;
+
+ if ( ctrl_chan_connect(cc) != 0 )
+ goto fail;
+
+ return cc;
+
+ fail:
+ if ( dom != 0 )
+ (void)xc_evtchn_close(xc_handle, DOMID_SELF, local_port);
+
+ free(cc);
+
+ return NULL;
+}
+
+void ctrl_chan_free(control_channel_t *cc)
+{
+ ctrl_chan_disconnect(cc);
+ if ( cc->remote_dom != 0 )
+ (void)xc_evtchn_close(xc_handle, DOMID_SELF, cc->local_port);
+ free(cc);
+}
+
+
+/* other libxc commands: */
+
+int ctrl_chan_bind_virq(int virq, int *port)
+{
+ return xc_evtchn_bind_virq(xc_handle, virq, port);
+}
diff --git a/tools/xcs/dump.c b/tools/xcs/dump.c
new file mode 100644
index 0000000000..4b9d215753
--- /dev/null
+++ b/tools/xcs/dump.c
@@ -0,0 +1,506 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "dump.h"
+
+#define str(a) # a
+#define error(a, ...) do { \
+ _error("%s:%s():L%d: " a, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);\
+ exit(1); \
+} while (0)
+#define warn(a, ...) do { \
+ _error("%s:%s():L%d: " a, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);\
+} while (0)
+#define debug(a, ...) do { \
+ _error(a, ## __VA_ARGS__);\
+} while (0)
+
+void _error(const char *fmt, ...);
+
+#define debug_begin(a, b) debug("CMSG_" a "_" b " {")
+#define debug_end(a, b) debug("}")
+#define debug_field(a, b, c) debug("\t." str(b) " = " c, a->b)
+#define debug_field_mac(a, b) \
+ debug("\t." str(b) " = %.2x:%.2x:%.2x:%.2x:%.2x:%.2x", \
+ a->b[0], a->b[1], a->b[2], a->b[3], a->b[4], a->b[5])
+
+#define debug_dump(a, b, c) debug_hex("\t." str(b) " = ", a->b, a->c)
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static int strcount(const char *str, char ch)
+{
+ int i;
+ int count = 0;
+
+ for (i = 0; str[i]; i++) {
+ if (str[i] == ch) {
+ count++;
+ }
+ }
+
+ return count;
+}
+
+void debug_hex(const char *info, const uint8_t *data, size_t length)
+{
+ int indent = strlen(info) + (strcount(info, '\t') * 8 - 1);
+ int words_per_row = (2 * (80 - indent - 2) / 7) & ~1;
+ size_t i;
+
+ for (i = 0; i < length; i += words_per_row) {
+ size_t ind;
+
+ if (i == 0) {
+ fprintf(stderr, "%s", info);
+ } else {
+ int j;
+ for (j = 0; j < indent; j++) {
+ fprintf(stderr, " ");
+ }
+ }
+
+ for (ind = 0; ind < words_per_row; ind++) {
+ if (ind % 2 == 0) {
+ fprintf(stderr, " ");
+ }
+
+ if (i + ind < length) {
+ fprintf(stderr, "%.2X", data[i + ind]);
+ } else {
+ fprintf(stderr, " ");
+ }
+ }
+
+ fprintf(stderr, " ");
+
+ for (ind = 0; ind < words_per_row; ind++) {
+ if (i + ind < length) {
+ if (isprint(data[i + ind])) {
+ fprintf(stderr, "%c", data[i + ind]);
+ } else {
+ fprintf(stderr, ".");
+ }
+ } else {
+ fprintf(stderr, " ");
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
+void dump_msg(const control_msg_t *msg, uint64_t flags)
+{
+ if ((flags & (1 << msg->type)) == 0) {
+ return;
+ }
+
+ switch (msg->type) {
+ case CMSG_CONSOLE:
+ if (msg->subtype == CMSG_CONSOLE_DATA) {
+ debug_begin("CONSOLE", "DATA");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("CONSOLE", "DATA");
+ } else {
+ debug_begin("CONSOLE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("CONSOLE", "UNKNOWN");
+ }
+ break;
+ case CMSG_BLKIF_BE:
+ if (msg->subtype == CMSG_BLKIF_BE_CREATE) {
+ blkif_be_create_t *load;
+ load = (blkif_be_create_t *)msg->msg;
+ debug_begin("BLKIF_BE", "CREATE");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "CREATE");
+ } else if (msg->subtype == CMSG_BLKIF_BE_DESTROY) {
+ blkif_be_destroy_t *load;
+ load = (blkif_be_destroy_t *)msg->msg;
+ debug_begin("BLKIF_BE", "DESTROY");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "DESTROY");
+ } else if (msg->subtype == CMSG_BLKIF_BE_CONNECT) {
+ blkif_be_connect_t *load;
+ load = (blkif_be_connect_t *)msg->msg;
+ debug_begin("BLKIF_BE", "CONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, shmem_frame, "%lu");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "CONNECT");
+ } else if (msg->subtype == CMSG_BLKIF_BE_DISCONNECT) {
+ blkif_be_disconnect_t *load;
+ load = (blkif_be_disconnect_t *)msg->msg;
+ debug_begin("BLKIF_BE", "DISCONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "DISCONNECT");
+ } else if (msg->subtype == CMSG_BLKIF_BE_VBD_CREATE) {
+ blkif_be_vbd_create_t *load;
+ load = (blkif_be_vbd_create_t *)msg->msg;
+ debug_begin("BLKIF_BE", "VBD_CREATE");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, pdevice, "%u");
+ debug_field(load, vdevice, "%u");
+ debug_field(load, readonly, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "VBD_CREATE");
+ } else if (msg->subtype == CMSG_BLKIF_BE_VBD_DESTROY) {
+ blkif_be_vbd_destroy_t *load;
+ load = (blkif_be_vbd_destroy_t *)msg->msg;
+ debug_begin("BLKIF_BE", "VBD_DESTROY");
+ debug_field(load, domid, "%u");
+ debug_field(load, blkif_handle, "%u");
+ debug_field(load, vdevice, "%u");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "VBD_DESTROY");
+ } else if (msg->subtype == CMSG_BLKIF_BE_DRIVER_STATUS) {
+ blkif_be_driver_status_t *load;
+ load = (blkif_be_driver_status_t *)msg->msg;
+ debug_begin("BLKIF_BE", "DRIVER_STATUS");
+ debug_field(load, status, "%u");
+ debug_end("BLKIF_BE", "DRIVER_STATUS");
+ } else {
+ debug_begin("BLKIF_BE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("BLKIF_BE", "UNKNOWN");
+ }
+ break;
+ case CMSG_BLKIF_FE:
+ if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_STATUS) {
+ blkif_fe_interface_status_t *load;
+ load = (blkif_fe_interface_status_t *)msg->msg;
+ debug_begin("BLKIF_FE", "INTERFACE_STATUS");
+ debug_field(load, handle, "%u");
+ debug_field(load, status, "%u");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, domid, "%u");
+ debug_end("BLKIF_FE", "INTERFACE_STATUS");
+ } else if (msg->subtype == CMSG_BLKIF_FE_DRIVER_STATUS) {
+ blkif_fe_driver_status_t *load;
+ load = (blkif_fe_driver_status_t *)msg->msg;
+ debug_begin("BLKIF_FE", "DRIVER_STATUS");
+ debug_field(load, status, "%u");
+ debug_field(load, max_handle, "%u");
+ debug_end("BLKIF_FE", "DRIVER_STATUS");
+ } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT) {
+ blkif_fe_interface_connect_t *load;
+ load = (blkif_fe_interface_connect_t *)msg->msg;
+ debug_begin("BLKIF_FE", "INTERFACE_CONNECT");
+ debug_field(load, handle, "%u");
+ debug_field(load, shmem_frame, "%lu");
+ debug_end("BLKIF_FE", "INTERFACE_CONNECT");
+ } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_DISCONNECT) {
+ blkif_fe_interface_disconnect_t *load;
+ load = (blkif_fe_interface_disconnect_t *)msg->msg;
+ debug_begin("BLKIF_FE", "INTERFACE_DISCONNECT");
+ debug_field(load, handle, "%u");
+ debug_end("BLKIF_FE", "INTERFACE_DISCONNECT");
+ } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_QUERY) {
+ blkif_fe_interface_query_t *load;
+ load = (blkif_fe_interface_query_t *)msg->msg;
+ debug_begin("BLKIF_FE", "INTERFACE_QUERY");
+ debug_field(load, handle, "%u");
+ debug_field(load, status, "%u");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, domid, "%u");
+ debug_end("BLKIF_FE", "INTERFACE_QUERY");
+ } else {
+ debug_begin("BLKIF_FE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("BLKIF_FE", "UNKNOWN");
+ }
+ break;
+ case CMSG_NETIF_BE:
+ if (msg->subtype == CMSG_NETIF_BE_CREATE) {
+ netif_be_create_t *load;
+ load = (netif_be_create_t *)msg->msg;
+ debug_begin("NETIF_BE", "CREATE");
+ debug_field(load, domid, "%u");
+ debug_field(load, netif_handle, "%u");
+ debug_field_mac(load, mac);
+ debug_field_mac(load, be_mac);
+ debug_field(load, status, "%u");
+ debug_end("NETIF_BE", "CREATE");
+ } else if (msg->subtype == CMSG_NETIF_BE_DESTROY) {
+ netif_be_destroy_t *load;
+ load = (netif_be_destroy_t *)msg->msg;
+ debug_begin("NETIF_BE", "DESTROY");
+ debug_field(load, domid, "%u");
+ debug_field(load, netif_handle, "%u");
+ debug_field(load, status, "%u");
+ debug_end("NETIF_BE", "DESTROY");
+ } else if (msg->subtype == CMSG_NETIF_BE_CONNECT) {
+ netif_be_connect_t *load;
+ load = (netif_be_connect_t *)msg->msg;
+ debug_begin("NETIF_BE", "CONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, netif_handle, "%u");
+ debug_field(load, tx_shmem_frame, "%lu");
+ debug_field(load, rx_shmem_frame, "%lu");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, status, "%u");
+ debug_end("NETIF_BE", "CONNECT");
+ } else if (msg->subtype == CMSG_NETIF_BE_DISCONNECT) {
+ netif_be_disconnect_t *load;
+ load = (netif_be_disconnect_t *)msg->msg;
+ debug_begin("NETIF_BE", "DISCONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, netif_handle, "%u");
+ debug_field(load, status, "%u");
+ debug_end("NETIF_BE", "DISCONNECT");
+ } else if (msg->subtype == CMSG_NETIF_BE_DRIVER_STATUS) {
+ netif_be_driver_status_t *load;
+ load = (netif_be_driver_status_t *)msg->msg;
+ debug_begin("NETIF_BE", "DRIVER_STATUS");
+ debug_field(load, status, "%u");
+ debug_end("NETIF_BE", "DRIVER_STATUS");
+ } else {
+ debug_begin("NETIF_BE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("NETIF_BE", "UNKNOWN");
+ }
+ break;
+ case CMSG_NETIF_FE:
+ if (msg->subtype == CMSG_NETIF_FE_INTERFACE_STATUS) {
+ netif_fe_interface_status_t *load;
+ load = (netif_fe_interface_status_t *)msg->msg;
+ debug_begin("NETIF_FE", "INTERFACE_STATUS");
+ debug_field(load, handle, "%u");
+ debug_field(load, status, "%u");
+ debug_field(load, evtchn, "%u");
+ debug_field_mac(load, mac);
+ debug_field(load, domid, "%u");
+ debug_end("NETIF_FE", "INTERFACE_STATUS");
+ } else if (msg->subtype == CMSG_NETIF_FE_DRIVER_STATUS) {
+ netif_fe_driver_status_t *load;
+ load = (netif_fe_driver_status_t *)msg->msg;
+ debug_begin("NETIF_FE", "DRIVER_STATUS");
+ debug_field(load, status, "%u");
+ debug_field(load, max_handle, "%u");
+ debug_end("NETIF_FE", "DRIVER_STATUS");
+ } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_CONNECT) {
+ netif_fe_interface_connect_t *load;
+ load = (netif_fe_interface_connect_t *)msg->msg;
+ debug_begin("NETIF_FE", "INTERFACE_CONNECT");
+ debug_field(load, handle, "%u");
+ debug_field(load, tx_shmem_frame, "%lu");
+ debug_field(load, rx_shmem_frame, "%lu");
+ debug_end("NETIF_FE", "INTERFACE_CONNECT");
+ } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_DISCONNECT) {
+ netif_fe_interface_disconnect_t *load;
+ load = (netif_fe_interface_disconnect_t *)msg->msg;
+ debug_begin("NETIF_FE", "INTERFACE_DISCONNECT");
+ debug_field(load, handle, "%u");
+ debug_end("NETIF_FE", "INTERFACE_DISCONNECT");
+ } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_QUERY) {
+ netif_fe_interface_query_t *load;
+ load = (netif_fe_interface_query_t *)msg->msg;
+ debug_begin("NETIF_FE", "INTERFACE_QUERY");
+ debug_field(load, handle, "%u");
+ debug_field(load, status, "%u");
+ debug_field(load, evtchn, "%u");
+ debug_field_mac(load, mac);
+ debug_field(load, domid, "%u");
+ debug_end("NETIF_FE", "INTERFACE_QUERY");
+ } else {
+ debug_begin("NETIF_FE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("NETIF_FE", "UNKNOWN");
+ }
+ break;
+ case CMSG_SHUTDOWN:
+ if (msg->subtype == CMSG_SHUTDOWN_POWEROFF) {
+ debug_begin("SHUTDOWN", "POWEROFF");
+ debug_end("SHUTDOWN", "POWEROFF");
+ } else if (msg->subtype == CMSG_SHUTDOWN_REBOOT) {
+ debug_begin("SHUTDOWN", "REBOOT");
+ debug_end("SHUTDOWN", "REBOOT");
+ } else if (msg->subtype == CMSG_SHUTDOWN_SUSPEND) {
+ debug_begin("SHUTDOWN", "SUSPEND");
+ debug_end("SHUTDOWN", "SUSPEND");
+ } else if (msg->subtype == CMSG_SHUTDOWN_SYSRQ) {
+ debug_begin("SHUTDOWN", "SYSRQ");
+ debug_end("SHUTDOWN", "SYSRQ");
+ } else {
+ debug_begin("SHUTDOWN", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("SHUTDOWN", "UNKNOWN");
+ }
+ break;
+ case CMSG_MEM_REQUEST:
+ if (msg->subtype == CMSG_MEM_REQUEST_SET) {
+ mem_request_t *load;
+ load = (mem_request_t *)msg->msg;
+ debug_begin("MEM_REQUEST", "SET");
+ debug_field(load, target, "%u");
+ debug_field(load, status, "%u");
+ debug_end("MEM_REQUEST", "SET");
+ } else {
+ debug_begin("MEM_REQUEST", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("MEM_REQUEST", "UNKNOWN");
+ }
+ break;
+ case CMSG_USBIF_BE:
+ if (msg->subtype == CMSG_USBIF_BE_CREATE) {
+ usbif_be_create_t *load;
+ load = (usbif_be_create_t *)msg->msg;
+ debug_begin("USBIF_BE", "CREATE");
+ debug_field(load, domid, "%u");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_BE", "CREATE");
+ } else if (msg->subtype == CMSG_USBIF_BE_DESTROY) {
+ usbif_be_destroy_t *load;
+ load = (usbif_be_destroy_t *)msg->msg;
+ debug_begin("USBIF_BE", "DESTROY");
+ debug_field(load, domid, "%u");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_BE", "DESTROY");
+ } else if (msg->subtype == CMSG_USBIF_BE_CONNECT) {
+ usbif_be_connect_t *load;
+ load = (usbif_be_connect_t *)msg->msg;
+ debug_begin("USBIF_BE", "CONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, shmem_frame, "%lu");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, bandwidth, "%u");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_BE", "CONNECT");
+ } else if (msg->subtype == CMSG_USBIF_BE_DISCONNECT) {
+ usbif_be_disconnect_t *load;
+ load = (usbif_be_disconnect_t *)msg->msg;
+ debug_begin("USBIF_BE", "DISCONNECT");
+ debug_field(load, domid, "%u");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_BE", "DISCONNECT");
+ } else if (msg->subtype == CMSG_USBIF_BE_CLAIM_PORT) {
+ usbif_be_claim_port_t *load;
+ load = (usbif_be_claim_port_t *)msg->msg;
+ debug_begin("USBIF_BE", "CLAIM_PORT");
+ debug_field(load, domid, "%u");
+ debug_field(load, usbif_port, "%u");
+ debug_field(load, status, "%u");
+ debug_field(load, path, "%s");
+ debug_end("USBIF_BE", "CLAIM_PORT");
+ } else if (msg->subtype == CMSG_USBIF_BE_RELEASE_PORT) {
+ usbif_be_release_port_t *load;
+ load = (usbif_be_release_port_t *)msg->msg;
+ debug_begin("USBIF_BE", "RELEASE_PORT");
+ debug_field(load, path, "%s");
+ debug_end("USBIF_BE", "RELEASE_PORT");
+ } else if (msg->subtype == CMSG_USBIF_BE_DRIVER_STATUS_CHANGED) {
+ usbif_be_driver_status_changed_t *load;
+ load = (usbif_be_driver_status_changed_t *)msg->msg;
+ debug_begin("USBIF_BE", "DRIVER_STATUS_CHANGED");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_BE", "DRIVER_STATUS_CHANGED");
+ } else {
+ debug_begin("USBIF_BE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("USBIF_BE", "UNKNOWN");
+ }
+ break;
+ case CMSG_USBIF_FE:
+ if (msg->subtype == CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED) {
+ usbif_fe_interface_status_changed_t *load;
+ load = (usbif_fe_interface_status_changed_t *)msg->msg;
+ debug_begin("USBIF_FE", "INTERFACE_STATUS_CHANGED");
+ debug_field(load, status, "%u");
+ debug_field(load, evtchn, "%u");
+ debug_field(load, domid, "%u");
+ debug_field(load, bandwidth, "%u");
+ debug_field(load, num_ports, "%u");
+ debug_end("USBIF_FE", "INTERFACE_STATUS_CHANGED");
+ } else if (msg->subtype == CMSG_USBIF_FE_DRIVER_STATUS_CHANGED) {
+ usbif_fe_driver_status_changed_t *load;
+ load = (usbif_fe_driver_status_changed_t *)msg->msg;
+ debug_begin("USBIF_FE", "DRIVER_STATUS_CHANGED");
+ debug_field(load, status, "%u");
+ debug_end("USBIF_FE", "DRIVER_STATUS_CHANGED");
+ } else if (msg->subtype == CMSG_USBIF_FE_INTERFACE_CONNECT) {
+ usbif_fe_interface_connect_t *load;
+ load = (usbif_fe_interface_connect_t *)msg->msg;
+ debug_begin("USBIF_FE", "INTERFACE_CONNECT");
+ debug_field(load, shmem_frame, "%lu");
+ debug_end("USBIF_FE", "INTERFACE_CONNECT");
+ } else if (msg->subtype == CMSG_USBIF_FE_INTERFACE_DISCONNECT) {
+ debug_begin("USBIF_FE", "INTERFACE_DISCONNECT");
+ debug_end("USBIF_FE", "INTERFACE_DISCONNECT");
+ } else {
+ debug_begin("USBIF_FE", "UNKNOWN");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("USBIF_FE", "UNKNOWN");
+ }
+ break;
+ default:
+ debug_begin("UNKNOWN", "UNKNOWN");
+ debug_field(msg, type, "%u");
+ debug_field(msg, subtype, "%u");
+ debug_field(msg, length, "%u");
+ debug_dump(msg, msg, length);
+ debug_end("UNKNOWN", "UNKNOWN");
+ break;
+ }
+}
+
+void _error(const char *fmt, ...)
+{
+ va_list ap;
+ char buffer[4096];
+
+ va_start(ap, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s\n", buffer);
+}
+
diff --git a/tools/xcs/dump.h b/tools/xcs/dump.h
new file mode 100644
index 0000000000..721ea86b32
--- /dev/null
+++ b/tools/xcs/dump.h
@@ -0,0 +1,28 @@
+/*\
+ * Copyright (C) International Business Machines Corp., 2005
+ * Author(s): Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+\*/
+
+#ifndef XENCTLD_ERROR_H
+#define XENCTLD_ERROR_H
+
+#include <stdint.h>
+#include <xc.h>
+#include <xen/io/domain_controller.h>
+
+void dump_msg(const control_msg_t *msg, uint64_t flags);
+
+#endif
diff --git a/tools/xcs/evtchn.c b/tools/xcs/evtchn.c
new file mode 100644
index 0000000000..71a297629d
--- /dev/null
+++ b/tools/xcs/evtchn.c
@@ -0,0 +1,106 @@
+/* evtchn.c
+ *
+ * Interfaces to event channel driver.
+ *
+ * Most of this is directly based on the original xu interface to python
+ * written by Keir Fraser.
+ *
+ * (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h> /* XOPEN drops makedev, this gets it back. */
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include "xcs.h"
+
+static int evtchn_fd = -1;
+
+/* NB. The following should be kept in sync with the kernel's evtchn driver. */
+#define EVTCHN_DEV_NAME "/dev/xen/evtchn"
+#define EVTCHN_DEV_MAJOR 10
+#define EVTCHN_DEV_MINOR 201
+/* /dev/xen/evtchn ioctls: */
+/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
+#define EVTCHN_RESET _IO('E', 1)
+/* EVTCHN_BIND: Bind to teh specified event-channel port. */
+#define EVTCHN_BIND _IO('E', 2)
+/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
+#define EVTCHN_UNBIND _IO('E', 3)
+
+int evtchn_read()
+{
+ u16 v;
+ int bytes;
+
+ while ( (bytes = read(evtchn_fd, &v, sizeof(v))) == -1 )
+ {
+ if ( errno == EINTR )
+ continue;
+ /* EAGAIN was cased to return 'None' in the python version... */
+ return -errno;
+ }
+
+ if ( bytes == sizeof(v) )
+ return v;
+
+ /* bad return */
+ return -1;
+}
+
+void evtchn_unmask(u16 idx)
+{
+ (void)write(evtchn_fd, &idx, sizeof(idx));
+}
+
+int evtchn_bind(int idx)
+{
+ if ( ioctl(evtchn_fd, EVTCHN_BIND, idx) != 0 )
+ return -errno;
+
+ return 0;
+}
+
+int evtchn_unbind(int idx)
+{
+ if ( ioctl(evtchn_fd, EVTCHN_UNBIND, idx) != 0 )
+ return -errno;
+
+ return 0;
+}
+
+int evtchn_open(void)
+{
+ struct stat st;
+
+ /* Make sure any existing device file links to correct device. */
+ if ( (lstat(EVTCHN_DEV_NAME, &st) != 0) ||
+ !S_ISCHR(st.st_mode) ||
+ (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) )
+ (void)unlink(EVTCHN_DEV_NAME);
+
+ reopen:
+ evtchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
+ if ( evtchn_fd == -1 )
+ {
+ if ( (errno == ENOENT) &&
+ ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+ (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
+ makedev(EVTCHN_DEV_MAJOR,EVTCHN_DEV_MINOR)) == 0) )
+ goto reopen;
+ return -errno;
+ }
+ return evtchn_fd;
+}
+
+void evtchn_close()
+{
+ (void)close(evtchn_fd);
+ evtchn_fd = -1;
+}
+
diff --git a/tools/xcs/xcs.c b/tools/xcs/xcs.c
new file mode 100644
index 0000000000..f865d5d977
--- /dev/null
+++ b/tools/xcs/xcs.c
@@ -0,0 +1,973 @@
+/* xcs.c
+ *
+ * xcs - Xen Control Switch
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ */
+
+/*
+
+ Things we need to select on in xcs:
+
+ 1. Events arriving on /dev/evtchn
+
+ These will kick a function to read everything off the fd, and scan the
+ associated control message rings, resulting in notifications sent on
+ data channels to connected clients.
+
+ 2. New TCP connections on XCS_PORT.
+
+ These will either be control (intially) or associated data connections.
+
+ Control connections will instantiate or rebind to an existing connnection
+ struct. The control channel is used to configure what events will be
+ received on an associated data channel. These two channels are split
+ out because the control channel is synchronous, all messages will return
+ a result from XCS. The data channel is effectively asynchronous, events
+ may arrive in the middle of a control message exchange. Additionally,
+ Having two TCP connections allows the client side to have a blocking
+ listen loop for data messages, while independently interacting on the
+ control channel at other places in the code.
+
+ Data connections attach to an existing control struct, using a session
+ id that is passed during the control connect. There is currently a
+ one-to-one relationship between data and control channels, but there
+ could just as easily be many data channels, if there were a set of
+ clients with identical interests, or if you wanted to trace an existing
+ client's data traffic.
+
+ 3. Messages arriving on open TCP connections.
+ There are three types of open connections:
+
+ 3a. Messages arriving on open control channel file descriptors.
+
+ [description of the control protocol here]
+
+ 3b. Messages arriving on open data channel file descriptors.
+
+ [description of the data protocol here]
+
+ 3c. Messages arriving on (new) unbound connections.
+
+ A connection must issue a XCS_CONNECT message to specify what
+ it is, after which the connection is moved into one of the above
+ two groups.
+
+ Additionally, we need a periodic timer to do housekeeping.
+
+ 4. Every XCS_GC_INTERVAL seconds, we need to clean up outstanding state.
+ Specifically, we garbage collect any sessions (connection_t structs)
+ that have been unconnected for a period of time (XCS_SESSION_TIMEOUT),
+ and close any connections that have been openned, but not connected
+ as a control or data connection (XCS_UFD_TIMEOUT).
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+#include <malloc.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include "xcs.h"
+
+#undef fd_max
+#define fd_max(x,y) ((x) > (y) ? (x) : (y))
+
+/* ------[ Control channel interfaces ]------------------------------------*/
+
+static control_channel_t *cc_list[NR_EVENT_CHANNELS];
+static int *dom_port_map = 0;
+static int dom_port_map_size = 0;
+
+static void map_dom_to_port(u32 dom, int port)
+{
+ if (dom >= dom_port_map_size) {
+ dom_port_map = (int *)realloc(dom_port_map,
+ (dom + 256) * sizeof(dom_port_map[0]));
+
+ if (dom_port_map == NULL) {
+ perror("realloc(dom_port_map)");
+ exit(1);
+ }
+
+ for (; dom_port_map_size < dom + 256; dom_port_map_size++) {
+ dom_port_map[dom_port_map_size] = -1;
+ }
+ }
+
+ dom_port_map[dom] = port;
+}
+
+static int dom_to_port(u32 dom)
+{
+ if (dom >= dom_port_map_size) return -1;
+
+ return dom_port_map[dom];
+}
+
+static void init_interfaces(void)
+{
+ memset(cc_list, 0, sizeof cc_list);
+}
+
+static control_channel_t *add_interface(u32 dom, int local_port,
+ int remote_port)
+{
+ control_channel_t *cc=NULL, *oldcc;
+ int ret;
+
+ if ((dom_to_port(dom) >= 0) && (cc_list[dom_to_port(dom)] != NULL))
+ {
+ return(cc_list[dom_to_port(dom)]);
+ }
+
+ if (cc_list[local_port] == NULL)
+ {
+ cc = ctrl_chan_new(dom, local_port, remote_port);
+ }
+
+ if (cc == NULL)
+ return NULL;
+
+ DPRINTF("added a new interface: dom: %u (l:%d,r:%d): %p\n",
+ dom, local_port, remote_port, cc);
+ DPRINTF("added a new interface: dom: %u (l:%d,r:%d): %p\n",
+ dom, cc->local_port, cc->remote_port, cc);
+
+ if ((ret = evtchn_bind(cc->local_port)) != 0)
+ {
+ DPRINTF("Got control interface, but couldn't bind evtchan!(%d)\n", ret);
+ ctrl_chan_free(cc);
+ return NULL;
+ }
+
+ if ( cc_list[cc->local_port] != NULL )
+ {
+ oldcc = cc_list[cc->local_port];
+
+ if ((oldcc->remote_dom != cc->remote_dom) ||
+ (oldcc->remote_port != cc->remote_port))
+ {
+ DPRINTF("CC conflict! (port: %d, old dom: %u, new dom: %u, "
+ "old ref_count: %d)\n",
+ cc->local_port, oldcc->remote_dom, cc->remote_dom,
+ oldcc->ref_count);
+ map_dom_to_port(oldcc->remote_dom, -1);
+ ctrl_chan_free(cc_list[cc->local_port]);
+ cc_list[cc->local_port] = NULL;
+ }
+ }
+
+ cc_list[cc->local_port] = cc;
+ map_dom_to_port(cc->remote_dom, cc->local_port);
+ cc->type = CC_TYPE_INTERDOMAIN;
+ cc->ref_count = 0;
+ return cc;
+}
+
+control_channel_t *add_virq(int virq)
+{
+ control_channel_t *cc;
+ int virq_port;
+
+ if (ctrl_chan_bind_virq(virq, &virq_port) == -1)
+ return NULL;
+
+ if ((cc_list[virq_port] != NULL) &&
+ (cc_list[virq_port]->type != CC_TYPE_VIRQ))
+ return NULL;
+
+ if ((cc_list[virq_port] != NULL) &&
+ (cc_list[virq_port]->type == CC_TYPE_VIRQ))
+ return cc_list[virq_port];
+
+ cc = (control_channel_t *)malloc(sizeof(control_channel_t));
+ if ( cc == NULL ) return NULL;
+
+ memset(cc, 0, sizeof(control_channel_t));
+ cc->type = CC_TYPE_VIRQ;
+ cc->local_port = virq_port;
+ cc->virq = virq;
+ cc->ref_count = 1;
+
+ if (evtchn_bind(cc->local_port) != 0)
+ {
+ DPRINTF("Got control interface, but couldn't bind evtchan!\n");
+ free(cc);
+ return NULL;
+ }
+
+ cc_list[cc->local_port] = cc;
+
+ return cc;
+}
+
+void get_interface(control_channel_t *cc)
+{
+ if (cc != NULL)
+ cc->ref_count++;
+}
+
+void put_interface(control_channel_t *cc)
+{
+ if (cc != NULL)
+ {
+ cc->ref_count--;
+ if (cc->ref_count <= 0)
+ {
+ DPRINTF("Freeing cc on port %d.\n", cc->local_port);
+ (void)evtchn_unbind(cc->local_port);
+ cc_list[cc->local_port] = NULL;
+ map_dom_to_port(cc->remote_dom, -1);
+ ctrl_chan_free(cc);
+ }
+ }
+}
+
+/* ------[ Simple helpers ]------------------------------------------------*/
+
+/* listen_socket() is straight from paul sheer's useful select_tut manpage. */
+static int listen_socket (char *listen_path)
+{
+ struct sockaddr_un a;
+ int s;
+ int yes;
+
+ if ((s = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
+ {
+ perror ("socket");
+ return -1;
+ }
+
+ yes = 1;
+
+ memset (&a, 0, sizeof (a));
+ a.sun_family = AF_UNIX;
+ strcpy(a.sun_path, listen_path);
+
+ /* remove an old socket if it exists. */
+ unlink(listen_path);
+
+ if (bind(s, (struct sockaddr *) &a, sizeof (a)) < 0)
+ {
+ fprintf (stderr, "bind('%s'): %s\n", listen_path, strerror(errno));
+ close (s);
+ return -1;
+ }
+ DPRINTF ("accepting connections on path %s\n", listen_path);
+ listen (s, 10);
+ return s;
+}
+
+/* ------[ Message handlers ]----------------------------------------------*/
+
+#define NO_CHANGE 0
+#define CONNECTED 1
+#define DISCONNECTED 2
+int handle_connect_msg( xcs_msg_t *msg, int fd )
+{
+ xcs_connect_msg_t *cmsg = &msg->u.connect;
+ connection_t *con;
+ int ret = NO_CHANGE;
+
+ switch (msg->type)
+ {
+ case XCS_CONNECT_CTRL:
+ {
+ if ( cmsg->session_id == 0 )
+ {
+ con = connection_new();
+ if ( con == NULL)
+ {
+ msg->result = XCS_RSLT_FAILED;
+ break;
+ }
+ msg->result = XCS_RSLT_OK;
+ cmsg->session_id = con->id;
+ con->ctrl_fd = fd;
+ ret = CONNECTED;
+ DPRINTF("New control connection\n");
+ break;
+ }
+
+ con = get_con_by_session(cmsg->session_id);
+ if ( con == NULL )
+ {
+ msg->result = XCS_RSLT_BADSESSION;
+ break;
+ }
+ if ( con->ctrl_fd != -1 )
+ {
+ msg->result = XCS_RSLT_CONINUSE;
+ break;
+ }
+ con->ctrl_fd = fd;
+ msg->result = XCS_RSLT_OK;
+ ret = CONNECTED;
+ DPRINTF("Rebound to control connection\n");
+ break;
+ }
+ case XCS_CONNECT_DATA:
+ {
+ con = get_con_by_session(cmsg->session_id);
+ if ( con == NULL )
+ {
+ msg->result = XCS_RSLT_BADSESSION;
+ break;
+ }
+ if ( con->data_fd != -1 )
+ {
+ msg->result = XCS_RSLT_CONINUSE;
+ break;
+ }
+ con->data_fd = fd;
+ msg->result = XCS_RSLT_OK;
+ ret = CONNECTED;
+ DPRINTF("Attached data connection\n");
+ break;
+
+ }
+ case XCS_CONNECT_BYE:
+ {
+ close ( fd );
+ ret = DISCONNECTED;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int handle_control_message( connection_t *con, xcs_msg_t *msg )
+{
+ int ret;
+ int reply_needed = 1;
+
+ DPRINTF("Got message, type %u.\n", msg->type);
+
+ switch (msg->type)
+ {
+ case XCS_MSG_BIND:
+ {
+ xcs_bind_msg_t *bmsg = &msg->u.bind;
+
+ if ( ! BIND_MSG_VALID(bmsg) )
+ {
+ msg->result = XCS_RSLT_BADREQUEST;
+ break;
+ }
+
+ ret = xcs_bind(con, bmsg->port, bmsg->type);
+ if (ret == 0) {
+ msg->result = XCS_RSLT_OK;
+ } else {
+ msg->result = XCS_RSLT_FAILED;
+ }
+ break;
+ }
+ case XCS_MSG_UNBIND:
+ {
+ xcs_bind_msg_t *bmsg = &msg->u.bind;
+
+ if ( ! BIND_MSG_VALID(bmsg) )
+ {
+ msg->result = XCS_RSLT_BADREQUEST;
+ break;
+ }
+
+ ret = xcs_unbind(con, bmsg->port, bmsg->type);
+ if (ret == 0) {
+ msg->result = XCS_RSLT_OK;
+ } else {
+ msg->result = XCS_RSLT_FAILED;
+ }
+ break;
+ }
+ case XCS_VIRQ_BIND:
+ {
+ control_channel_t *cc;
+ xcs_virq_msg_t *vmsg = &msg->u.virq;
+ if ( ! VIRQ_MSG_VALID(vmsg) )
+ {
+ msg->result = XCS_RSLT_BADREQUEST;
+ break;
+ }
+
+ cc = add_virq(vmsg->virq);
+ if (cc == NULL)
+ {
+ msg->result = XCS_RSLT_FAILED;
+ break;
+ }
+ ret = xcs_bind(con, cc->local_port, TYPE_VIRQ);
+ if (ret == 0) {
+ vmsg->port = cc->local_port;
+ msg->result = XCS_RSLT_OK;
+ } else {
+ msg->result = XCS_RSLT_FAILED;
+ }
+ break;
+ }
+
+ case XCS_CIF_NEW_CC:
+ {
+ control_channel_t *cc;
+ xcs_interface_msg_t *imsg = &msg->u.interface;
+
+ if ( ! INTERFACE_MSG_VALID(imsg) )
+ {
+ msg->result = XCS_RSLT_BADREQUEST;
+ break;
+ }
+
+ cc = add_interface(imsg->dom, imsg->local_port, imsg->remote_port);
+ if (cc != NULL) {
+ get_interface(cc);
+ msg->result = XCS_RSLT_OK;
+ imsg->local_port = cc->local_port;
+ imsg->remote_port = cc->remote_port;
+ } else {
+ msg->result = XCS_RSLT_FAILED;
+ }
+ break;
+ }
+
+ case XCS_CIF_FREE_CC:
+ {
+ control_channel_t *cc;
+ xcs_interface_msg_t *imsg = &msg->u.interface;
+
+ if ( ! INTERFACE_MSG_VALID(imsg) )
+ {
+ msg->result = XCS_RSLT_BADREQUEST;
+ break;
+ }
+
+ cc = add_interface(imsg->dom, imsg->local_port, imsg->remote_port);
+ if (cc != NULL) {
+ put_interface(cc);
+ }
+ msg->result = XCS_RSLT_OK;
+ break;
+ }
+ }
+ return reply_needed;
+}
+
+void handle_data_message( connection_t *con, xcs_msg_t *msg )
+{
+ control_channel_t *cc;
+ xcs_control_msg_t *cmsg = &msg->u.control;
+ int port;
+
+ switch (msg->type)
+ {
+ case XCS_REQUEST:
+ if ( cmsg->remote_dom > MAX_DOMS )
+ break;
+
+ port = dom_to_port(cmsg->remote_dom);
+ if (port == -1) break;
+ cc = cc_list[port];
+ if ((cc != NULL) && ( cc->type == CC_TYPE_INTERDOMAIN ))
+ {
+ DPRINTF("DN:REQ: dom:%d port: %d type: %d\n",
+ cc->remote_dom, cc->local_port,
+ cmsg->msg.type);
+ ctrl_chan_write_request(cc, cmsg);
+ ctrl_chan_notify(cc);
+ } else {
+ DPRINTF("tried to send a REQ to a null cc\n.");
+ }
+ break;
+
+ case XCS_RESPONSE:
+ if ( cmsg->remote_dom > MAX_DOMS )
+ break;
+
+ port = dom_to_port(cmsg->remote_dom);
+ if (port == -1) break;
+ cc = cc_list[port];
+ if ((cc != NULL) && ( cc->type == CC_TYPE_INTERDOMAIN ))
+ {
+ DPRINTF("DN:RSP: dom:%d port: %d type: %d\n",
+ cc->remote_dom, cc->local_port,
+ cmsg->msg.type);
+ ctrl_chan_write_response(cc, cmsg);
+ ctrl_chan_notify(cc);
+ }
+ break;
+
+ case XCS_VIRQ:
+ if ( !(PORT_VALID(cmsg->local_port)) )
+ break;
+
+ cc = cc_list[cmsg->local_port];
+
+ if ((cc != NULL) && ( cc->type == CC_TYPE_VIRQ ))
+ {
+ DPRINTF("DN:VIRQ: virq: %d port: %d\n",
+ cc->virq, cc->local_port);
+ ctrl_chan_notify(cc);
+ }
+ break;
+ }
+}
+
+/* ------[ Control interface handler ]-------------------------------------*/
+
+/* passed as a function pointer to the lookup. */
+void send_kmsg(connection_t *c, void *arg)
+{
+ xcs_msg_t *msg = (xcs_msg_t *)arg;
+
+ DPRINTF(" -> CONNECTION %d\n", c->data_fd);
+ if (c->data_fd > 0)
+ {
+ send(c->data_fd, msg, sizeof(xcs_msg_t), 0);
+ }
+}
+
+int handle_ctrl_if(void)
+{
+ control_channel_t *cc;
+ control_msg_t *msg;
+ xcs_msg_t kmsg;
+ int chan, ret;
+
+ DPRINTF("Event thread kicked!\n");
+again:
+ while ((chan = evtchn_read()) > 0)
+ {
+ evtchn_unmask(chan);
+ cc = cc_list[chan];
+ if (cc_list[chan] == NULL) {
+ DPRINTF("event from unknown channel (%d)\n", chan);
+ continue;
+ }
+
+ if ( cc_list[chan]->type == CC_TYPE_VIRQ )
+ {
+ DPRINTF("UP:VIRQ: virq:%d port: %d\n",
+ cc->virq, cc->local_port);
+ kmsg.type = XCS_VIRQ;
+ kmsg.u.control.local_port = cc->local_port;
+ xcs_lookup(cc->local_port, TYPE_VIRQ, send_kmsg, &kmsg);
+ continue;
+ }
+
+ while (ctrl_chan_request_to_read(cc))
+ {
+ msg = &kmsg.u.control.msg;
+ kmsg.type = XCS_REQUEST;
+ kmsg.u.control.remote_dom = cc->remote_dom;
+ kmsg.u.control.local_port = cc->local_port;
+ ret = ctrl_chan_read_request(cc, &kmsg.u.control);
+ DPRINTF("UP:REQ: dom:%d port: %d type: %d len: %d\n",
+ cc->remote_dom, cc->local_port,
+ msg->type, msg->length);
+ if (ret == 0)
+ xcs_lookup(cc->local_port, msg->type, send_kmsg, &kmsg);
+ }
+
+ while (ctrl_chan_response_to_read(cc))
+ {
+ msg = &kmsg.u.control.msg;
+ kmsg.type = XCS_RESPONSE;
+ kmsg.u.control.remote_dom = cc->remote_dom;
+ kmsg.u.control.local_port = cc->local_port;
+ ret = ctrl_chan_read_response(cc, &kmsg.u.control);
+ DPRINTF("UP:RSP: dom:%d port: %d type: %d len: %d\n",
+ cc->remote_dom, cc->local_port,
+ msg->type, msg->length);
+ if (ret == 0)
+ xcs_lookup(cc->local_port, msg->type, send_kmsg, &kmsg);
+ }
+ }
+
+ if (chan == -EINTR)
+ goto again;
+
+ return chan;
+}
+
+
+/* ------[ Main xcs code / big select loop ]-------------------------------*/
+
+
+typedef struct unbound_fd_st {
+ int fd;
+ struct timeval born;
+ struct unbound_fd_st *next;
+} unbound_fd_t;
+
+/* This makes ufd point to the next entry in the list, so need to *
+ * break/continue if called while iterating. */
+void delete_ufd(unbound_fd_t **ufd)
+{
+ unbound_fd_t *del_ufd;
+
+ del_ufd = *ufd;
+ *ufd = (*ufd)->next;
+ free( del_ufd );
+}
+
+void gc_ufd_list( unbound_fd_t **ufd )
+{
+ struct timeval now, delta;
+
+ gettimeofday(&now, NULL);
+
+ while ( *ufd != NULL )
+ {
+ timersub(&now, &(*ufd)->born, &delta);
+ if (delta.tv_sec > XCS_UFD_TIMEOUT)
+ {
+ DPRINTF("GC-UFD: closing fd: %d\n", (*ufd)->fd);
+ close((*ufd)->fd);
+ delete_ufd(ufd);
+ continue;
+ }
+ ufd = &(*ufd)->next;
+ }
+}
+
+void daemonize_xcs(void)
+{
+
+ /* detach from our controlling tty so that a shell does hang waiting for
+ stopped jobs. */
+
+ pid_t pid = fork();
+ int fd;
+
+ if (pid == -1) {
+ perror("fork()");
+ } else if (pid) {
+ exit(0);
+ }
+
+ fd = open("/var/log/xcs.log", O_WRONLY | O_APPEND | O_CREAT);
+ if ( fd == -1 ) {
+ fprintf(stderr, "xcs couldn't open logfile. Directing all output to "
+ "/dev/null instead.\n");
+ fd = open("/dev/null", O_WRONLY);
+ }
+
+ setsid();
+ close(2);
+ close(1);
+ close(0);
+ dup(fd);
+ dup(fd);
+}
+
+
+static char *pidfilename = NULL;
+void cleanup(int sig)
+{
+ /* throw away our pidfile if we created one. */
+ if ( pidfilename != NULL )
+ unlink(pidfilename);
+ exit(0);
+}
+
+int main (int argc, char *argv[])
+{
+ int listen_fd, evtchn_fd;
+ unbound_fd_t *unbound_fd_list = NULL, **ufd;
+ struct timeval timeout = { XCS_GC_INTERVAL, 0 };
+ connection_t **con;
+ int c, daemonize;
+ FILE *pidfile;
+ struct stat s;
+
+ daemonize = 1;
+ pidfile = NULL;
+
+ signal(SIGHUP, cleanup);
+ signal(SIGTERM, cleanup);
+ signal(SIGINT, cleanup);
+
+ /* Do a bunch of stuff before potentially daemonizing so we can
+ * print error messages sanely before redirecting output. */
+
+ /* Initialize xc and event connections. */
+ if (ctrl_chan_init() != 0)
+ {
+ printf("Couldn't open conneciton to libxc.\n");
+ exit(-1);
+ }
+
+ if ((evtchn_fd = evtchn_open()) < 0)
+ {
+ printf("Couldn't open event channel driver interface.\n");
+ exit(-1);
+ }
+
+ /* Bind listen_fd to the client socket. */
+ listen_fd = listen_socket(XCS_SUN_PATH);
+
+ while ((c = getopt (argc, argv, "ip:")) != -1)
+ {
+ switch (c)
+ {
+ case 'i': /* interactive */
+ daemonize = 0;
+ break;
+ case 'p': /* pid file */
+ pidfilename = optarg;
+ break;
+ case '?':
+ if (isprint (optopt))
+ fprintf (stderr, "Unknown option `-%c'.\n", optopt);
+ else
+ fprintf (stderr,
+ "Bad option character `\\x%x'.\n", optopt);
+ break;
+ }
+ }
+
+ if ( pidfilename != NULL )
+ {
+ if ( stat(pidfilename, &s) == 0 )
+ {
+ fprintf(stderr, "Thre specified pid file (%s) already exists.\n"
+ "Is another instance of xcs running?\n", pidfilename);
+ exit(-1);
+ }
+
+ pidfile = fopen(pidfilename, "w");
+ if (pidfile == NULL)
+ {
+ fprintf(stderr, "Error openning pidfile (%s).\n", pidfilename);
+ exit(-1);
+ }
+ }
+
+ if (daemonize == 1)
+ daemonize_xcs();
+
+ if (pidfile != NULL)
+ {
+ fprintf(pidfile, "%d", getpid());
+ fclose(pidfile);
+ }
+
+
+ /* Initialize control interfaces, bindings. */
+ init_interfaces();
+ init_bindings();
+
+
+ for (;;)
+ {
+ int n = 0, ret;
+ fd_set rd, wr, er;
+ FD_ZERO ( &rd );
+ FD_ZERO ( &wr );
+ FD_ZERO ( &er );
+
+ /* TCP listen fd: */
+ FD_SET ( listen_fd, &rd );
+ n = fd_max ( n, listen_fd );
+
+ /* Evtchn fd: */
+ FD_SET ( evtchn_fd, &rd );
+ n = fd_max ( n, evtchn_fd );
+
+ /* unbound connection fds: */
+ ufd = &unbound_fd_list;
+ while ((*ufd) != NULL)
+ {
+ FD_SET ( (*ufd)->fd, &rd );
+ n = fd_max ( n, (*ufd)->fd );
+ ufd = &(*ufd)->next;
+ }
+
+ /* control and data fds: */
+ con = &connection_list;
+ while ((*con) != NULL)
+ {
+ if ((*con)->ctrl_fd > 0)
+ {
+ FD_SET ( (*con)->ctrl_fd, &rd );
+ n = fd_max ( n, (*con)->ctrl_fd );
+ }
+ if ((*con)->data_fd > 0)
+ {
+ FD_SET ( (*con)->data_fd, &rd );
+ n = fd_max ( n, (*con)->data_fd );
+ }
+ con = &(*con)->next;
+ }
+
+ ret = select ( n + 1, &rd, &wr, &er, &timeout );
+
+ if ( (timeout.tv_sec == 0) && (timeout.tv_usec == 0) )
+ {
+ gc_ufd_list(&unbound_fd_list);
+ gc_connection_list();
+ timeout.tv_sec = XCS_GC_INTERVAL;
+ }
+
+ if ( (ret == -1) && (errno == EINTR) )
+ continue;
+ if ( ret < 0 )
+ {
+ perror ("select()");
+ exit(-1);
+ }
+
+ /* CASE 1: Events arriving on /dev/evtchn. */
+
+ if ( FD_ISSET (evtchn_fd, &rd ))
+ handle_ctrl_if();
+
+ /* CASE 2: New connection on the listen port. */
+ if ( FD_ISSET ( listen_fd, &rd ))
+ {
+ struct sockaddr_un remote_addr;
+ int size;
+ memset (&remote_addr, 0, sizeof (remote_addr));
+ size = sizeof remote_addr;
+ ret = accept(listen_fd, (struct sockaddr *)&remote_addr, (socklen_t *)&size);
+ if ( ret < 0 )
+ {
+ perror("accept()");
+ } else {
+ unbound_fd_t *new_ufd;
+
+ new_ufd = (unbound_fd_t *)malloc(sizeof(*new_ufd));
+
+ if (new_ufd != NULL)
+ {
+ gettimeofday(&new_ufd->born, NULL);
+ new_ufd->fd = ret;
+ new_ufd->next = unbound_fd_list;
+ unbound_fd_list = new_ufd;
+ } else {
+ perror("malloc unbound connection");
+ close(ret);
+ }
+ }
+ }
+
+ /* CASE 3a: Handle messages on control connections. */
+
+ con = &connection_list;
+ while ( *con != NULL )
+ {
+ if ( ((*con)->ctrl_fd > 0) && (FD_ISSET((*con)->ctrl_fd, &rd)) )
+ {
+ xcs_msg_t msg;
+ memset (&msg, 0, sizeof(msg));
+ ret = read( (*con)->ctrl_fd, &msg, sizeof(msg) );
+
+ if ( ret < 0 )
+ {
+ perror("reading ctrl fd.");
+ } else if ( ret == 0 )
+ {
+ DPRINTF("Control connection dropped.\n");
+ close ( (*con)->ctrl_fd );
+ (*con)->ctrl_fd = -1;
+ gettimeofday(&(*con)->disconnect_time, NULL);
+ } else
+ {
+ if ( ret != sizeof(msg) )
+ {
+ DPRINTF("Unexpected frame size!\n");
+ continue;
+ }
+
+ ret = handle_control_message( *con, &msg );
+
+ if ( ret == 1 )
+ send( (*con)->ctrl_fd, &msg, sizeof(msg), 0 );
+ }
+ }
+ con = &(*con)->next;
+ }
+
+ /* CASE 3b: Handle messages on data connections. */
+
+ con = &connection_list;
+ while ( *con != NULL )
+ {
+ if ( ((*con)->data_fd > 0) && (FD_ISSET((*con)->data_fd, &rd)) )
+ {
+ xcs_msg_t msg;
+ memset (&msg, 0, sizeof(msg));
+ ret = read( (*con)->data_fd, &msg, sizeof(msg) );
+
+ if ( ret < 0 )
+ {
+ perror("reading data fd.");
+ } else if ( ret == 0 )
+ {
+ DPRINTF("Data connection dropped.\n");
+ close ( (*con)->data_fd );
+ (*con)->data_fd = -1;
+ gettimeofday(&(*con)->disconnect_time, NULL);
+ } else
+ {
+ if ( ret != sizeof(msg) )
+ {
+ DPRINTF("Unexpected frame size!\n");
+ continue;
+ }
+
+ handle_data_message( *con, &msg );
+ }
+ }
+ con = &(*con)->next;
+ }
+
+ /* CASE 3c: Handle messages arriving on unbound connections. */
+ ufd = &unbound_fd_list;
+ while ((*ufd) != NULL)
+ {
+ if ( FD_ISSET( (*ufd)->fd, &rd ) )
+ {
+ xcs_msg_t msg;
+ memset (&msg, 0, sizeof(msg));
+ ret = read( (*ufd)->fd, &msg, sizeof(msg) );
+
+ if ( ret == 0 )
+ {
+ close ( (*ufd)->fd );
+ delete_ufd(ufd);
+ continue; /* we just advanced ufd */
+ } else {
+ if ( ret != sizeof(msg) )
+ {
+ DPRINTF("Unexpected frame size!\n");
+ continue;
+ }
+
+ ret = handle_connect_msg( &msg, (*ufd)->fd );
+
+ if ( (ret == CONNECTED) || (ret == NO_CHANGE) )
+ send( (*ufd)->fd, &msg, sizeof(msg), 0 );
+
+ if ( (ret = CONNECTED) || (ret = DISCONNECTED) )
+ {
+ delete_ufd( ufd );
+ continue;
+ }
+ }
+ }
+ ufd = &(*ufd)->next;
+ }
+ }
+}
+
diff --git a/tools/xcs/xcs.h b/tools/xcs/xcs.h
new file mode 100644
index 0000000000..5a26724ce1
--- /dev/null
+++ b/tools/xcs/xcs.h
@@ -0,0 +1,148 @@
+/* xcs.h
+ *
+ * public interfaces for the control interface switch (xcs).
+ *
+ * (c) 2004, Andrew Warfield
+ *
+ */
+
+
+#ifndef __XCS_H__
+#define __XCS_H__
+
+#include <pthread.h>
+#include <xc.h>
+#include <xen/xen.h>
+#include <xen/io/domain_controller.h>
+#include <xen/linux/privcmd.h>
+#include <sys/time.h>
+#include "xcs_proto.h"
+
+/* ------[ Debug macros ]--------------------------------------------------*/
+
+#if 0
+#define DPRINTF(_f, _a...) printf ( _f , ## _a )
+#else
+#define DPRINTF(_f, _a...) ((void)0)
+#endif
+
+/* ------[ XCS-specific defines and types ]--------------------------------*/
+
+#define MAX_DOMS 1024
+#define XCS_SESSION_TIMEOUT 10 /* (secs) disconnected session gc timeout */
+#define XCS_UFD_TIMEOUT 5 /* how long can connections be unbound? */
+#define XCS_GC_INTERVAL 5 /* How often to run gc handlers. */
+
+
+/* ------[ Other required defines ]----------------------------------------*/
+
+/* Size of a machine page frame. */
+#define PAGE_SIZE 4096
+
+#ifndef timersub /* XOPEN and __BSD don't cooperate well... */
+#define timersub(a, b, result) \
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif /*timersub*/
+
+/* ------[ Bindings Interface ]--------------------------------------------*/
+
+/*forward declare connection_t */
+typedef struct connection_st connection_t;
+
+typedef struct {
+ int port;
+ u16 type;
+} binding_key_t;
+
+typedef struct binding_key_ent_st {
+ binding_key_t key;
+ struct binding_key_ent_st *next;
+} binding_key_ent_t;
+
+#define BINDING_KEYS_EQUAL(_k1, _k2) \
+ (((_k1)->port == (_k2)->port) && ((_k1)->type == (_k2)->type))
+
+int xcs_bind(connection_t *con, int port, u16 type);
+int xcs_unbind(connection_t *con, int port, u16 type);
+void xcs_lookup(int port, u16 type, void (*f)(connection_t *, void *),
+ void *arg);
+void init_bindings(void);
+
+/* ------[ Connection Interface ]------------------------------------------*/
+
+struct connection_st {
+ unsigned long id; /* Unique session id */
+ int ctrl_fd; /* TCP descriptors */
+ int data_fd; /* */
+ binding_key_ent_t *bindings; /* List of bindings */
+ connection_t *next; /* Linked list of connections */
+ struct timeval disconnect_time; /* " " */
+}; /* previously typedefed as connection_t */
+
+
+extern connection_t *connection_list;
+
+connection_t *get_con_by_session(unsigned long session_id);
+connection_t *connection_new();
+void connection_free(connection_t *con);
+int connection_add_binding(connection_t *con, binding_key_t *key);
+int connection_remove_binding(connection_t *con, binding_key_t *key);
+int connection_has_binding(connection_t *con, binding_key_t *key);
+void gc_connection_list(void);
+
+/* ------[ Control Channel Interfaces ]------------------------------------*/
+
+typedef struct {
+ int connected;
+ int ref_count;
+ int type;
+ u32 remote_dom;
+ int local_port;
+ int remote_port;
+ control_if_t *interface;
+ ctrl_back_ring_t tx_ring;
+ ctrl_front_ring_t rx_ring;
+ int virq;
+} control_channel_t;
+
+/* cc types that we care about */
+#define CC_TYPE_INTERDOMAIN 0
+#define CC_TYPE_VIRQ 1
+
+control_channel_t
+ *ctrl_chan_new(u32 dom, int local_port, int remote_port);
+void ctrl_chan_free(control_channel_t *cc);
+int ctrl_chan_init(void);
+int ctrl_chan_notify(control_channel_t *cc);
+int ctrl_chan_read_request(control_channel_t *cc, xcs_control_msg_t *);
+int ctrl_chan_write_request(control_channel_t *cc,
+ xcs_control_msg_t *smsg);
+int ctrl_chan_read_response(control_channel_t *cc, xcs_control_msg_t *);
+int ctrl_chan_write_response(control_channel_t *cc,
+ xcs_control_msg_t *smsg);
+int ctrl_chan_request_to_read(control_channel_t *cc);
+int ctrl_chan_space_to_write_request(control_channel_t *cc);
+int ctrl_chan_response_to_read(control_channel_t *cc);
+int ctrl_chan_space_to_write_response(control_channel_t *cc);
+int ctrl_chan_connect(control_channel_t *cc);
+void ctrl_chan_disconnect(control_channel_t *cc);
+int ctrl_chan_bind_virq(int virq, int *port);
+
+/* ------[ Event notification interfaces ]---------------------------------*/
+
+
+int evtchn_open(void);
+void evtchn_close();
+int evtchn_bind(int idx);
+int evtchn_unbind(int idx);
+void evtchn_unmask(u16 idx);
+int evtchn_read();
+
+#endif /* __XCS_H__ */
diff --git a/tools/xcs/xcs_proto.h b/tools/xcs/xcs_proto.h
new file mode 100644
index 0000000000..fa04621f40
--- /dev/null
+++ b/tools/xcs/xcs_proto.h
@@ -0,0 +1,101 @@
+/* xcs_proto.h
+ *
+ * protocol interfaces for the control interface switch (xcs).
+ *
+ * (c) 2004, Andrew Warfield
+ *
+ */
+
+#ifndef __XCS_PROTO_H__
+#define __XCS_PROTO_H__
+
+#define XCS_SUN_PATH "/var/lib/xen/xcs_socket"
+
+/* xcs message types: */
+#define XCS_CONNECT_CTRL 0 /* This is a control connection. */
+#define XCS_CONNECT_DATA 1 /* This is a data connection. */
+#define XCS_CONNECT_BYE 2 /* Terminate a session. */
+#define XCS_MSG_BIND 3 /* Register for a message type. */
+#define XCS_MSG_UNBIND 4 /* Unregister for a message type. */
+#define XCS_VIRQ_BIND 5 /* Register for a virq. */
+#define XCS_MSG_WRITELOCK 6 /* Writelock a (dom,type) pair. */
+#define XCS_CIF_NEW_CC 7 /* Create a new control channel. */
+#define XCS_CIF_FREE_CC 8 /* Create a new control channel. */
+#define XCS_REQUEST 9 /* This is a request message. */
+#define XCS_RESPONSE 10 /* this is a response Message. */
+#define XCS_VIRQ 11 /* this is a virq notification. */
+
+/* xcs result values: */
+#define XCS_RSLT_OK 0
+#define XCS_RSLT_FAILED 1 /* something bad happened. */
+#define XCS_RSLT_ARECONNECTED 2 /* attempt to over connect. */
+#define XCS_RSLT_BADSESSION 3 /* request for unknown session id. */
+#define XCS_RSLT_NOSESSION 4 /* tried to do something before NEW. */
+#define XCS_RSLT_CONINUSE 5 /* Requested connection is taken. */
+#define XCS_RSLT_BADREQUEST 6 /* Request message didn't validate. */
+
+/* Binding wildcards */
+#define PORT_WILDCARD 0xefffffff
+#define TYPE_WILDCARD 0xffff
+#define TYPE_VIRQ 0xfffe
+
+typedef struct {
+ u32 session_id;
+} xcs_connect_msg_t;
+
+typedef struct {
+ int port;
+ u16 type;
+} xcs_bind_msg_t;
+
+typedef struct {
+ int port;
+ u16 virq;
+} xcs_virq_msg_t;
+
+typedef struct {
+ u32 dom;
+ int local_port;
+ int remote_port;
+} xcs_interface_msg_t;
+
+typedef struct {
+ u32 remote_dom;
+ int local_port;
+ control_msg_t msg;
+} xcs_control_msg_t;
+
+typedef struct {
+ u32 type;
+ u32 result;
+ union {
+ xcs_connect_msg_t connect; /* These are xcs ctrl message types */
+ xcs_bind_msg_t bind;
+ xcs_virq_msg_t virq;
+ xcs_interface_msg_t interface;
+
+ xcs_control_msg_t control; /* These are xcs data message types */
+ } PACKED u;
+} xcs_msg_t;
+
+/* message validation macros. */
+#define PORT_VALID(_p) \
+ ( (((_p) >= 0) && ((_p) < NR_EVENT_CHANNELS)) \
+ || ((_p) == PORT_WILDCARD) )
+
+#define TYPE_VALID(_t) \
+ ( ((_t) < 256) \
+ || ((_t) == TYPE_VIRQ) \
+ || ((_t) == TYPE_WILDCARD) )
+
+#define BIND_MSG_VALID(_b) \
+ ( PORT_VALID((_b)->port) && TYPE_VALID((_b)->type) )
+
+/* Port is overwritten, and we don't currently validate the requested virq. */
+#define VIRQ_MSG_VALID(_v) ( 1 )
+
+/* Interfaces may return with ports of -1, but may not be requested as such */
+#define INTERFACE_MSG_VALID(_i) \
+ ( PORT_VALID((_i)->local_port) && PORT_VALID((_i)->remote_port) )
+
+#endif /* __XCS_PROTO_H__ */
diff --git a/tools/xcs/xcsdump.c b/tools/xcs/xcsdump.c
new file mode 100644
index 0000000000..dfc166b154
--- /dev/null
+++ b/tools/xcs/xcsdump.c
@@ -0,0 +1,206 @@
+/* xcsdump.c
+ *
+ * little tool to sniff control messages.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Modifications by Anthony Liguori <aliguori@us.ibm.com> are:
+ * Copyright (C) 2005, International Business Machines, Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <ctype.h>
+#include <xc.h>
+#include <xen/xen.h>
+#include <xen/io/domain_controller.h>
+#include <getopt.h>
+#include "xcs_proto.h"
+#include "xcs.h"
+
+#include "dump.h"
+
+static int xcs_ctrl_fd = -1; /* connection to the xcs server. */
+static int xcs_data_fd = -1; /* connection to the xcs server. */
+
+int sock_connect(char *path)
+{
+ struct sockaddr_un addr;
+ int ret, len, fd;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0)
+ {
+ printf("error creating xcs socket!\n");
+ return -1;
+ }
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, path);
+ len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1;
+
+ ret = connect(fd, (struct sockaddr *)&addr, len);
+ if (ret < 0)
+ {
+ printf("error connecting to xcs!\n");
+ return -1;
+ }
+
+ return fd;
+}
+
+void sock_disconnect(int *fd)
+{
+ close(*fd);
+ *fd = -1;
+}
+
+void xcs_read(int fd, xcs_msg_t *msg)
+{
+ int ret;
+
+ ret = read(fd, msg, sizeof(xcs_msg_t));
+ if (ret != sizeof(xcs_msg_t)) {
+ printf("read error\n");
+ exit(-1);
+ }
+}
+
+void xcs_send(int fd, xcs_msg_t *msg)
+{
+ int ret;
+
+ ret = send(fd, msg, sizeof(xcs_msg_t), 0);
+ if (ret != sizeof(xcs_msg_t) )
+ {
+ printf("send error\n");
+ exit(-1);
+ }
+}
+
+
+int main(int argc, char* argv[])
+{
+ int ret;
+ xcs_msg_t msg;
+ control_msg_t *cmsg;
+ int verbose = 0;
+ int ch;
+
+ while ((ch = getopt(argc, argv, "hv:")) != -1)
+ {
+ switch (ch)
+ {
+ case 'v':
+ verbose = atoi(optarg);
+ break;
+ case 'h':
+ printf("Usage: %s [-v FLAGS]\n"
+"Displays XCS control message traffic.\n"
+"\n"
+"FLAGS is a bitmask where each bit (numbering starts from LSB) represents\n"
+"whether to display a particular message type.\n"
+"\n"
+"For example, -v 1022 will display all messages except for console messages.\n"
+ , argv[0]);
+ exit(0);
+ break;
+ }
+ }
+
+ ret = sock_connect(XCS_SUN_PATH);
+ if (ret < 0)
+ {
+ printf("connect failed!\n");
+ exit(-1);
+ }
+ xcs_ctrl_fd = ret;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = XCS_CONNECT_CTRL;
+ xcs_send(xcs_ctrl_fd, &msg);
+ xcs_read(xcs_ctrl_fd, &msg);
+ if (msg.result != XCS_RSLT_OK)
+ {
+ printf("Error connecting control channel\n");
+ exit(-1);
+ }
+
+ ret = sock_connect(XCS_SUN_PATH);
+ if (ret < 0)
+ {
+ printf("connect failed!\n");
+ exit(-1);
+ }
+ xcs_data_fd = ret;
+
+ msg.type = XCS_CONNECT_DATA;
+ /* session id is set from before... */
+ xcs_send(xcs_data_fd, &msg);
+ xcs_read(xcs_data_fd, &msg);
+ if (msg.result != XCS_RSLT_OK)
+ {
+ printf("Error connecting data channel\n");
+ exit(-1);
+ }
+
+ msg.type = XCS_MSG_BIND;
+ msg.u.bind.port = PORT_WILDCARD;
+ msg.u.bind.type = TYPE_WILDCARD;
+ xcs_send(xcs_ctrl_fd, &msg);
+ xcs_read(xcs_ctrl_fd, &msg);
+ if (msg.result != XCS_RSLT_OK)
+ {
+ printf("Error binding.\n");
+ exit(-1);
+ }
+
+
+ while (1)
+ {
+ xcs_read(xcs_data_fd, &msg);
+ cmsg = &msg.u.control.msg;
+
+ switch (msg.type)
+ {
+ case XCS_REQUEST:
+ if (!verbose || verbose & (1 << msg.u.control.msg.type))
+ {
+ printf("[REQUEST ] : (dom:%u port:%d) (type:(%d,%d) len %d)\n",
+ msg.u.control.remote_dom,
+ msg.u.control.local_port,
+ msg.u.control.msg.type,
+ msg.u.control.msg.subtype,
+ msg.u.control.msg.length);
+
+ dump_msg(cmsg, verbose);
+ }
+ break;
+ case XCS_RESPONSE:
+ if (!verbose || verbose & (1 << msg.u.control.msg.type))
+ {
+ printf("[RESPONSE] : (dom:%u port:%d) (type:(%d,%d) len %d)\n",
+ msg.u.control.remote_dom,
+ msg.u.control.local_port,
+ msg.u.control.msg.type,
+ msg.u.control.msg.subtype,
+ msg.u.control.msg.length);
+
+ dump_msg(cmsg, verbose);
+ }
+ break;
+ case XCS_VIRQ:
+ printf("[VIRQ ] : %d\n", msg.u.control.local_port);
+ break;
+ default:
+ printf("[UNKNOWN ] : %d\n", msg.type);
+ }
+ }
+
+ return(0);
+}
diff --git a/tools/xcutils/Makefile b/tools/xcutils/Makefile
new file mode 100644
index 0000000000..d4e0ebad0f
--- /dev/null
+++ b/tools/xcutils/Makefile
@@ -0,0 +1,53 @@
+#
+# tools/xcutils/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General
+# Public License. See the file "COPYING" in the main directory of
+# this archive for more details.
+#
+# Copyright (C) 2005 by Christian Limpach
+#
+
+INSTALL = install
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+PROGRAMS_INSTALL_DIR = /usr/libexec/xen
+
+INCLUDES += -I $(XEN_LIBXC)
+
+CC := gcc
+
+CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
+CFLAGS += $(INCLUDES)
+
+# Make gcc generate dependencies.
+CFLAGS += -Wp,-MD,.$(@F).d
+PROG_DEP = .*.d
+
+PROGRAMS = xc_restore xc_save
+
+LDLIBS = -L$(XEN_LIBXC) -lxc
+
+.PHONY: all
+all: build
+build: $(PROGRAMS)
+
+$(PROGRAMS): %: %.o
+ $(LINK.o) $^ $(LDLIBS) -o $@
+
+.PHONY: install
+install: build
+ [ -d $(DESTDIR)$(PROGRAMS_INSTALL_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(PROGRAMS_INSTALL_DIR)
+ $(INSTALL_PROG) $(PROGRAMS) $(DESTDIR)$(PROGRAMS_INSTALL_DIR)
+
+
+clean:
+ $(RM) *.o $(PROGRAMS)
+ $(RM) $(PROG_DEP)
+
+-include $(PROG_DEP)
diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c
new file mode 100644
index 0000000000..ebba6d698f
--- /dev/null
+++ b/tools/xcutils/xc_restore.c
@@ -0,0 +1,30 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of
+ * this archive for more details.
+ *
+ * Copyright (C) 2005 by Christian Limpach
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+
+#include <xc.h>
+
+int
+main(int argc, char **argv)
+{
+ unsigned int xc_fd, io_fd, domid, nr_pfns;
+
+ if (argc != 5)
+ errx(1, "usage: %s xcfd iofd domid nr_pfns", argv[0]);
+
+ xc_fd = atoi(argv[1]);
+ io_fd = atoi(argv[2]);
+ domid = atoi(argv[3]);
+ nr_pfns = atoi(argv[4]);
+
+ return xc_linux_restore(xc_fd, io_fd, domid, nr_pfns);
+}
diff --git a/tools/xcutils/xc_save.c b/tools/xcutils/xc_save.c
new file mode 100644
index 0000000000..6ca1d5cc6c
--- /dev/null
+++ b/tools/xcutils/xc_save.c
@@ -0,0 +1,29 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of
+ * this archive for more details.
+ *
+ * Copyright (C) 2005 by Christian Limpach
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+
+#include <xc.h>
+
+int
+main(int argc, char **argv)
+{
+ unsigned int xc_fd, io_fd, domid;
+
+ if (argc != 4)
+ errx(1, "usage: %s xcfd iofd domid", argv[0]);
+
+ xc_fd = atoi(argv[1]);
+ io_fd = atoi(argv[2]);
+ domid = atoi(argv[3]);
+
+ return xc_linux_save(xc_fd, io_fd, domid);
+}
diff --git a/tools/xenstore/.gdbinit b/tools/xenstore/.gdbinit
new file mode 100644
index 0000000000..9a71b20ac4
--- /dev/null
+++ b/tools/xenstore/.gdbinit
@@ -0,0 +1,4 @@
+set environment XENSTORED_RUNDIR=testsuite/tmp
+set environment XENSTORED_ROOTDIR=testsuite/tmp
+handle SIGUSR1 noprint nostop
+handle SIGPIPE noprint nostop
diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile
new file mode 100644
index 0000000000..408078efba
--- /dev/null
+++ b/tools/xenstore/Makefile
@@ -0,0 +1,102 @@
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+LIBDIR = lib
+XEN_LIBXC = $(XEN_ROOT)/tools/libxc
+
+INSTALL = install
+INSTALL_DATA = $(INSTALL) -m0644
+INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755
+
+PROFILE=#-pg
+BASECFLAGS=-Wall -W -g
+# Make gcc generate dependencies.
+BASECFLAGS += -Wp,-MD,.$(@F).d
+PROG_DEP = .*.d
+#BASECFLAGS+= -O3 $(PROFILE)
+#BASECFLAGS+= -I$(XEN_ROOT)/tools
+BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc
+BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public
+BASECFLAGS+= -I.
+
+CFLAGS += $(BASECFLAGS)
+ifeq ($(XEN_TARGET_ARCH),x86_64)
+CFLAGS += -fPIC
+endif
+LDFLAGS=$(PROFILE) -L$(XEN_LIBXC)
+TESTDIR=`pwd`/testsuite/tmp
+TESTFLAGS=-DTESTING
+TESTENV=XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)
+
+all: xen xenstored libxenstore.a
+
+testcode: xen xs_test xenstored_test xs_random
+
+xen:
+ ln -sf $(XEN_ROOT)/xen/include/public $@
+
+xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxc -o $@
+
+xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o
+ $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+xs_test: xs_test.o xs_lib.o utils.o
+xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
+xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
+
+xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
+
+xenstored_%_test.o: xenstored_%.c
+ $(COMPILE.c) -o $@ $<
+
+xs_test_lib.o: xs.c
+ $(COMPILE.c) -o $@ $<
+
+talloc_test.o: talloc.c
+ $(COMPILE.c) -o $@ $<
+
+libxenstore.a: libxenstore.a(xs.o) libxenstore.a(xs_lib.o)
+
+clean: testsuite-clean
+ rm -f *.o *.a xs_test xenstored xenstored_test xs_random xs_stress xen
+ -$(RM) $(PROG_DEP)
+
+check: testsuite-run randomcheck stresstest
+
+testsuite-run: xen xenstored_test xs_test
+ $(TESTENV) testsuite/test.sh
+
+testsuite-clean:
+ rm -rf $(TESTDIR)
+
+# Make this visible so they can see repeat tests without --fast if they
+# fail.
+RANDSEED=$(shell date +%s)
+randomcheck: xs_random xenstored_test
+ $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED)
+ $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
+
+stresstest: xs_stress xenstored_test
+ rm -rf $(TESTDIR)/store
+ export $(TESTENV); PID=`./xenstored_test --output-pid`; ./xs_stress 10000; ret=$$?; kill $$PID; exit $$ret
+
+TAGS:
+ etags `find . -name '*.[ch]'`
+
+tarball: clean
+ cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/
+
+install: xenstored libxenstore.a
+ $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) libxenstore.a $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DATA) xs.h $(DESTDIR)/usr/include
+ $(INSTALL_DATA) xs_lib.h $(DESTDIR)/usr/include
+
+-include $(PROG_DEP)
diff --git a/tools/xenstore/TODO b/tools/xenstore/TODO
new file mode 100644
index 0000000000..9e22afe536
--- /dev/null
+++ b/tools/xenstore/TODO
@@ -0,0 +1,7 @@
+TODO in no particular order. Some of these will never be done. There
+are omissions of important but necessary things. It is up to the
+reader to fill in the blanks.
+
+- Remove calls to system() from daemon
+- Timeout failed watch responses
+- Timeout blocking transactions
diff --git a/tools/xenstore/fake_libxc.c b/tools/xenstore/fake_libxc.c
new file mode 100644
index 0000000000..50e1db717c
--- /dev/null
+++ b/tools/xenstore/fake_libxc.c
@@ -0,0 +1,119 @@
+/*
+ Fake libxc which doesn't require hypervisor but talks to xs_test.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include "utils.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int sigfd;
+static int xs_test_pid;
+static u16 port;
+
+/* The event channel maps to a signal, shared page to an mmapped file. */
+int xc_evtchn_send(int xc_handle __attribute__((unused)), int local_port)
+{
+ assert(local_port == port);
+ if (kill(xs_test_pid, SIGUSR2) != 0)
+ barf_perror("fake event channel failed");
+ return 0;
+}
+
+void *xc_map_foreign_range(int xc_handle, u32 dom __attribute__((unused)),
+ int size, int prot,
+ unsigned long mfn __attribute__((unused)))
+{
+ void *ret;
+
+ ret = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+ if (ret == MAP_FAILED)
+ return NULL;
+
+ /* xs_test tells us pid and port by putting it in buffer, we reply. */
+ xs_test_pid = *(int *)(ret + 32);
+ port = *(int *)(ret + 36);
+ *(int *)(ret + 32) = getpid();
+ return ret;
+}
+
+int xc_interface_open(void)
+{
+ int fd;
+ char page[getpagesize()];
+
+ fd = open("/tmp/xcmap", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return fd;
+
+ memset(page, 0, sizeof(page));
+ if (!xs_write_all(fd, page, sizeof(page)))
+ barf_perror("Failed to write /tmp/xcmap page");
+
+ return fd;
+}
+
+int xc_interface_close(int xc_handle)
+{
+ close(xc_handle);
+ return 0;
+}
+
+static void send_to_fd(int signo __attribute__((unused)))
+{
+ int saved_errno = errno;
+ write(sigfd, &port, sizeof(port));
+ errno = saved_errno;
+}
+
+void fake_block_events(void)
+{
+ signal(SIGUSR2, SIG_IGN);
+}
+
+void fake_ack_event(void)
+{
+ signal(SIGUSR2, send_to_fd);
+}
+
+int fake_open_eventchn(void)
+{
+ int fds[2];
+
+ if (pipe(fds) != 0)
+ return -1;
+
+ if (signal(SIGUSR2, send_to_fd) == SIG_ERR) {
+ int saved_errno = errno;
+ close(fds[0]);
+ close(fds[1]);
+ errno = saved_errno;
+ return -1;
+ }
+ sigfd = fds[1];
+ return fds[0];
+}
diff --git a/tools/xenstore/list.h b/tools/xenstore/list.h
new file mode 100644
index 0000000000..eb35293d7f
--- /dev/null
+++ b/tools/xenstore/list.h
@@ -0,0 +1,508 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+/* Taken from Linux kernel code, but de-kernelized for userspace. */
+#include <stddef.h>
+
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+#define list_top(head, type, member) \
+({ \
+ struct list_head *_head = (head); \
+ list_empty(_head) ? NULL : list_entry(_head->next, type, member); \
+})
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add_rcu(struct list_head * new,
+ struct list_head * prev,
+ struct list_head * next)
+{
+ new->next = next;
+ new->prev = prev;
+ next->prev = new;
+ prev->next = new;
+}
+
+/**
+ * list_add_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head, head->next);
+}
+
+/**
+ * list_add_tail_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail_rcu(struct list_head *new, struct list_head *head)
+{
+ __list_add_rcu(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * Note: list_empty on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the list.
+ */
+static inline void list_del_rcu(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+ return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop counter.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+
+/**
+ * list_for_each_entry_continue - iterate over list of given type
+ * continuing after existing point
+ * @pos: the type * to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
+
+static __inline__ int hlist_unhashed(struct hlist_node *h)
+{
+ return !h->pprev;
+}
+
+static __inline__ int hlist_empty(struct hlist_head *h)
+{
+ return !h->first;
+}
+
+static __inline__ void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+ *pprev = next;
+ if (next)
+ next->pprev = pprev;
+}
+
+static __inline__ void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_del_rcu - deletes entry from hash list without re-initialization
+ * @entry: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ */
+static inline void hlist_del_rcu(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->pprev = LIST_POISON2;
+}
+
+static __inline__ void hlist_del_init(struct hlist_node *n)
+{
+ if (n->pprev) {
+ __hlist_del(n);
+ INIT_HLIST_NODE(n);
+ }
+}
+
+#define hlist_del_rcu_init hlist_del_init
+
+static __inline__ void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+static __inline__ void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ n->pprev = &h->first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+}
+
+/* next must be != NULL */
+static __inline__ void hlist_add_before(struct hlist_node *n, struct hlist_node *next)
+{
+ n->pprev = next->pprev;
+ n->next = next;
+ next->pprev = &n->next;
+ *(n->pprev) = n;
+}
+
+static __inline__ void hlist_add_after(struct hlist_node *n,
+ struct hlist_node *next)
+{
+ next->next = n->next;
+ *(next->pprev) = n;
+ n->next = next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+/* Cannot easily do prefetch unfortunately */
+#define hlist_for_each(pos, head) \
+ for (pos = (head)->first; pos; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->first; n = pos ? pos->next : 0, pos; \
+ pos = n)
+
+/**
+ * hlist_for_each_entry - iterate over list of given type
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member) \
+ for (pos = (pos)->next; \
+ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member) \
+ for (; pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop counter.
+ * @pos: the &struct hlist_node to use as a loop counter.
+ * @n: another &struct hlist_node to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \
+ for (pos = (head)->first; \
+ pos && ({ n = pos->next; 1; }) && \
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = n)
+
+#endif
diff --git a/tools/xenstore/talloc.c b/tools/xenstore/talloc.c
new file mode 100644
index 0000000000..8e93c28fe3
--- /dev/null
+++ b/tools/xenstore/talloc.c
@@ -0,0 +1,1143 @@
+/*
+ Samba Unix SMB/CIFS implementation.
+
+ Samba trivial allocation library - new interface
+
+ NOTE: Please read talloc_guide.txt for full documentation
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/*
+ inspired by http://swapped.cc/halloc/
+*/
+
+
+#ifdef _SAMBA_BUILD_
+#include "includes.h"
+#if ((SAMBA_VERSION_MAJOR==3)&&(SAMBA_VERSION_MINOR<9))
+/* This is to circumvent SAMBA3's paranoid malloc checker. Here in this file
+ * we trust ourselves... */
+#ifdef malloc
+#undef malloc
+#endif
+#ifdef realloc
+#undef realloc
+#endif
+#endif
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "talloc.h"
+/* assume a modern system */
+#define HAVE_VA_COPY
+#endif
+
+/* use this to force every realloc to change the pointer, to stress test
+ code that might not cope */
+#ifdef TESTING
+#define ALWAYS_REALLOC 1
+void *test_malloc(size_t size);
+#define malloc test_malloc
+#endif
+
+#define MAX_TALLOC_SIZE 0x10000000
+#define TALLOC_MAGIC 0xe814ec4f
+#define TALLOC_MAGIC_FREE 0x7faebef3
+#define TALLOC_MAGIC_REFERENCE ((const char *)1)
+
+/* by default we abort when given a bad pointer (such as when talloc_free() is called
+ on a pointer that came from malloc() */
+#ifndef TALLOC_ABORT
+#define TALLOC_ABORT(reason) abort()
+#endif
+
+#ifndef discard_const_p
+#if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T)
+# define discard_const_p(type, ptr) ((type *)((intptr_t)(ptr)))
+#else
+# define discard_const_p(type, ptr) ((type *)(ptr))
+#endif
+#endif
+
+/* this null_context is only used if talloc_enable_leak_report() or
+ talloc_enable_leak_report_full() is called, otherwise it remains
+ NULL
+*/
+static const void *null_context;
+static void *cleanup_context;
+static int (*malloc_fail_handler)(void *);
+static void *malloc_fail_data;
+
+struct talloc_reference_handle {
+ struct talloc_reference_handle *next, *prev;
+ void *ptr;
+};
+
+typedef int (*talloc_destructor_t)(void *);
+
+struct talloc_chunk {
+ struct talloc_chunk *next, *prev;
+ struct talloc_chunk *parent, *child;
+ struct talloc_reference_handle *refs;
+ size_t size;
+ unsigned magic;
+ talloc_destructor_t destructor;
+ const char *name;
+};
+
+/* panic if we get a bad magic value */
+static struct talloc_chunk *talloc_chunk_from_ptr(const void *ptr)
+{
+ struct talloc_chunk *tc = discard_const_p(struct talloc_chunk, ptr)-1;
+ if (tc->magic != TALLOC_MAGIC) {
+ if (tc->magic == TALLOC_MAGIC_FREE) {
+ TALLOC_ABORT("Bad talloc magic value - double free");
+ } else {
+ TALLOC_ABORT("Bad talloc magic value - unknown value");
+ }
+ }
+
+ return tc;
+}
+
+/* hook into the front of the list */
+#define _TLIST_ADD(list, p) \
+do { \
+ if (!(list)) { \
+ (list) = (p); \
+ (p)->next = (p)->prev = NULL; \
+ } else { \
+ (list)->prev = (p); \
+ (p)->next = (list); \
+ (p)->prev = NULL; \
+ (list) = (p); \
+ }\
+} while (0)
+
+/* remove an element from a list - element doesn't have to be in list. */
+#define _TLIST_REMOVE(list, p) \
+do { \
+ if ((p) == (list)) { \
+ (list) = (p)->next; \
+ if (list) (list)->prev = NULL; \
+ } else { \
+ if ((p)->prev) (p)->prev->next = (p)->next; \
+ if ((p)->next) (p)->next->prev = (p)->prev; \
+ } \
+ if ((p) && ((p) != (list))) (p)->next = (p)->prev = NULL; \
+} while (0)
+
+
+/*
+ return the parent chunk of a pointer
+*/
+static struct talloc_chunk *talloc_parent_chunk(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ while (tc->prev) tc=tc->prev;
+ return tc->parent;
+}
+
+void *talloc_parent(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_parent_chunk(ptr);
+ return (void *)(tc+1);
+}
+
+/*
+ Allocate a bit of memory as a child of an existing pointer
+*/
+void *_talloc(const void *context, size_t size)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ tc = malloc(sizeof(*tc)+size);
+ if (tc == NULL) {
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ tc = malloc(sizeof(*tc)+size);
+ if (!tc)
+ return NULL;
+ }
+
+ tc->size = size;
+ tc->magic = TALLOC_MAGIC;
+ tc->destructor = NULL;
+ tc->child = NULL;
+ tc->name = NULL;
+ tc->refs = NULL;
+
+ if (context) {
+ struct talloc_chunk *parent = talloc_chunk_from_ptr(context);
+
+ tc->parent = parent;
+
+ if (parent->child) {
+ parent->child->parent = NULL;
+ }
+
+ _TLIST_ADD(parent->child, tc);
+ } else {
+ tc->next = tc->prev = tc->parent = NULL;
+ }
+
+ return (void *)(tc+1);
+}
+
+
+/*
+ setup a destructor to be called on free of a pointer
+ the destructor should return 0 on success, or -1 on failure.
+ if the destructor fails then the free is failed, and the memory can
+ be continued to be used
+*/
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *))
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->destructor = destructor;
+}
+
+/*
+ increase the reference count on a piece of memory.
+*/
+void talloc_increase_ref_count(const void *ptr)
+{
+ talloc_reference(null_context, ptr);
+}
+
+/*
+ helper for talloc_reference()
+*/
+static int talloc_reference_destructor(void *ptr)
+{
+ struct talloc_reference_handle *handle = ptr;
+ struct talloc_chunk *tc1 = talloc_chunk_from_ptr(ptr);
+ struct talloc_chunk *tc2 = talloc_chunk_from_ptr(handle->ptr);
+ if (tc1->destructor != (talloc_destructor_t)-1) {
+ tc1->destructor = NULL;
+ }
+ _TLIST_REMOVE(tc2->refs, handle);
+ talloc_free(handle);
+ return 0;
+}
+
+/*
+ make a secondary reference to a pointer, hanging off the given context.
+ the pointer remains valid until both the original caller and this given
+ context are freed.
+
+ the major use for this is when two different structures need to reference the
+ same underlying data, and you want to be able to free the two instances separately,
+ and in either order
+*/
+void *talloc_reference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc;
+ struct talloc_reference_handle *handle;
+ if (ptr == NULL) return NULL;
+
+ tc = talloc_chunk_from_ptr(ptr);
+ handle = talloc_named_const(context, sizeof(*handle), TALLOC_MAGIC_REFERENCE);
+
+ if (handle == NULL) return NULL;
+
+ /* note that we hang the destructor off the handle, not the
+ main context as that allows the caller to still setup their
+ own destructor on the context if they want to */
+ talloc_set_destructor(handle, talloc_reference_destructor);
+ handle->ptr = discard_const_p(void, ptr);
+ _TLIST_ADD(tc->refs, handle);
+ return handle->ptr;
+}
+
+/*
+ remove a secondary reference to a pointer. This undo's what
+ talloc_reference() has done. The context and pointer arguments
+ must match those given to a talloc_reference()
+*/
+static int talloc_unreference(const void *context, const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ for (h=tc->refs;h;h=h->next) {
+ struct talloc_chunk *p = talloc_parent_chunk(h);
+ if ((p==NULL && context==NULL) || p+1 == context) break;
+ }
+ if (h == NULL) {
+ return -1;
+ }
+
+ talloc_set_destructor(h, NULL);
+ _TLIST_REMOVE(tc->refs, h);
+ talloc_free(h);
+ return 0;
+}
+
+/*
+ remove a specific parent context from a pointer. This is a more
+ controlled varient of talloc_free()
+*/
+int talloc_unlink(const void *context, void *ptr)
+{
+ struct talloc_chunk *tc_p, *new_p;
+ void *new_parent;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ if (context == NULL) {
+ context = null_context;
+ }
+
+ if (talloc_unreference(context, ptr) == 0) {
+ return 0;
+ }
+
+ if (context == NULL) {
+ if (talloc_parent_chunk(ptr) != NULL) {
+ return -1;
+ }
+ } else {
+ if (talloc_chunk_from_ptr(context) != talloc_parent_chunk(ptr)) {
+ return -1;
+ }
+ }
+
+ tc_p = talloc_chunk_from_ptr(ptr);
+
+ if (tc_p->refs == NULL) {
+ return talloc_free(ptr);
+ }
+
+ new_p = talloc_parent_chunk(tc_p->refs);
+ if (new_p) {
+ new_parent = new_p+1;
+ } else {
+ new_parent = NULL;
+ }
+
+ if (talloc_unreference(new_parent, ptr) != 0) {
+ return -1;
+ }
+
+ talloc_steal(new_parent, ptr);
+
+ return 0;
+}
+
+/*
+ add a name to an existing pointer - va_list version
+*/
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static void talloc_set_name_v(const void *ptr, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = talloc_vasprintf(ptr, fmt, ap);
+ if (tc->name) {
+ talloc_set_name_const(tc->name, ".name");
+ }
+}
+
+/*
+ add a name to an existing pointer
+*/
+void talloc_set_name(const void *ptr, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+}
+
+/*
+ more efficient way to add a name to a pointer - the name must point to a
+ true string constant
+*/
+void talloc_set_name_const(const void *ptr, const char *name)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ tc->name = name;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named(const void *context, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ create a named talloc pointer. Any talloc pointer can be named, and
+ talloc_named() operates just like talloc() except that it allows you
+ to name the pointer.
+*/
+void *talloc_named_const(const void *context, size_t size, const char *name)
+{
+ void *ptr;
+
+ ptr = _talloc(context, size);
+ if (ptr == NULL) {
+ return NULL;
+ }
+
+ talloc_set_name_const(ptr, name);
+
+ return ptr;
+}
+
+/*
+ return the name of a talloc ptr, or "UNNAMED"
+*/
+const char *talloc_get_name(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ if (tc->name == TALLOC_MAGIC_REFERENCE) {
+ return ".reference";
+ }
+ if (tc->name) {
+ return tc->name;
+ }
+ return "UNNAMED";
+}
+
+
+/*
+ check if a pointer has the given name. If it does, return the pointer,
+ otherwise return NULL
+*/
+void *talloc_check_name(const void *ptr, const char *name)
+{
+ const char *pname;
+ if (ptr == NULL) return NULL;
+ pname = talloc_get_name(ptr);
+ if (pname == name || strcmp(pname, name) == 0) {
+ return discard_const_p(void, ptr);
+ }
+ return NULL;
+}
+
+
+/*
+ this is for compatibility with older versions of talloc
+*/
+void *talloc_init(const char *fmt, ...)
+{
+ va_list ap;
+ void *ptr;
+
+ ptr = _talloc(NULL, 0);
+ if (ptr == NULL) return NULL;
+
+ va_start(ap, fmt);
+ talloc_set_name_v(ptr, fmt, ap);
+ va_end(ap);
+
+ return ptr;
+}
+
+/*
+ this is a replacement for the Samba3 talloc_destroy_pool functionality. It
+ should probably not be used in new code. It's in here to keep the talloc
+ code consistent across Samba 3 and 4.
+*/
+void talloc_free_children(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ while (tc->child) {
+ /* we need to work out who will own an abandoned child
+ if it cannot be freed. In priority order, the first
+ choice is owner of any remaining reference to this
+ pointer, the second choice is our parent, and the
+ final choice is the null context. */
+ void *child = tc->child+1;
+ const void *new_parent = null_context;
+ if (tc->child->refs) {
+ struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs);
+ if (p) new_parent = p+1;
+ }
+ if (talloc_free(child) == -1) {
+ if (new_parent == null_context) {
+ struct talloc_chunk *p = talloc_parent_chunk(ptr);
+ if (p) new_parent = p+1;
+ }
+ talloc_steal(new_parent, child);
+ }
+ }
+}
+
+/*
+ free a talloc pointer. This also frees all child pointers of this
+ pointer recursively
+
+ return 0 if the memory is actually freed, otherwise -1. The memory
+ will not be freed if the ref_count is > 1 or the destructor (if
+ any) returns non-zero
+*/
+int talloc_free(void *ptr)
+{
+ struct talloc_chunk *tc;
+
+ if (ptr == NULL) {
+ return -1;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (tc->refs) {
+ talloc_reference_destructor(tc->refs);
+ return -1;
+ }
+
+ if (tc->destructor) {
+ talloc_destructor_t d = tc->destructor;
+ if (d == (talloc_destructor_t)-1) {
+ return -1;
+ }
+ tc->destructor = (talloc_destructor_t)-1;
+ if (d(ptr) == -1) {
+ tc->destructor = d;
+ return -1;
+ }
+ tc->destructor = NULL;
+ }
+
+ talloc_free_children(ptr);
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->magic = TALLOC_MAGIC_FREE;
+
+ free(tc);
+ return 0;
+}
+
+
+
+/*
+ A talloc version of realloc. The context argument is only used if
+ ptr is NULL
+*/
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name)
+{
+ struct talloc_chunk *tc;
+ void *new_ptr;
+
+ /* size zero is equivalent to free() */
+ if (size == 0) {
+ talloc_free(ptr);
+ return NULL;
+ }
+
+ if (size >= MAX_TALLOC_SIZE) {
+ return NULL;
+ }
+
+ /* realloc(NULL) is equavalent to malloc() */
+ if (ptr == NULL) {
+ return talloc_named_const(context, size, name);
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ /* don't allow realloc on referenced pointers */
+ if (tc->refs) {
+ return NULL;
+ }
+
+ /* by resetting magic we catch users of the old memory */
+ tc->magic = TALLOC_MAGIC_FREE;
+
+#if ALWAYS_REALLOC
+ new_ptr = malloc(size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = malloc(size + sizeof(*tc));
+ }
+ if (new_ptr) {
+ memcpy(new_ptr, tc, tc->size + sizeof(*tc));
+ free(tc);
+ }
+#else
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ if (malloc_fail_handler)
+ if (malloc_fail_handler(malloc_fail_data))
+ new_ptr = realloc(tc, size + sizeof(*tc));
+ }
+#endif
+ if (!new_ptr) {
+ tc->magic = TALLOC_MAGIC;
+ return NULL;
+ }
+
+ tc = new_ptr;
+ tc->magic = TALLOC_MAGIC;
+ if (tc->parent) {
+ tc->parent->child = new_ptr;
+ }
+ if (tc->child) {
+ tc->child->parent = new_ptr;
+ }
+
+ if (tc->prev) {
+ tc->prev->next = tc;
+ }
+ if (tc->next) {
+ tc->next->prev = tc;
+ }
+
+ tc->size = size;
+ talloc_set_name_const(tc+1, name);
+
+ return (void *)(tc+1);
+}
+
+/*
+ move a lump of memory from one talloc context to another return the
+ ptr on success, or NULL if it could not be transferred.
+ passing NULL as ptr will always return NULL with no side effects.
+*/
+void *talloc_steal(const void *new_ctx, const void *ptr)
+{
+ struct talloc_chunk *tc, *new_tc;
+
+ if (!ptr) {
+ return NULL;
+ }
+
+ if (new_ctx == NULL) {
+ new_ctx = null_context;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ if (new_ctx == NULL) {
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = tc->next = tc->prev = NULL;
+ return discard_const_p(void, ptr);
+ }
+
+ new_tc = talloc_chunk_from_ptr(new_ctx);
+
+ if (tc == new_tc) {
+ return discard_const_p(void, ptr);
+ }
+
+ if (tc->parent) {
+ _TLIST_REMOVE(tc->parent->child, tc);
+ if (tc->parent->child) {
+ tc->parent->child->parent = tc->parent;
+ }
+ } else {
+ if (tc->prev) tc->prev->next = tc->next;
+ if (tc->next) tc->next->prev = tc->prev;
+ }
+
+ tc->parent = new_tc;
+ if (new_tc->child) new_tc->child->parent = NULL;
+ _TLIST_ADD(new_tc->child, tc);
+
+ return discard_const_p(void, ptr);
+}
+
+/*
+ return the total size of a talloc pool (subtree)
+*/
+off_t talloc_total_size(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total = tc->size;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_size(c+1);
+ }
+ return total;
+}
+
+/*
+ return the total number of blocks in a talloc pool (subtree)
+*/
+off_t talloc_total_blocks(const void *ptr)
+{
+ off_t total = 0;
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) {
+ return 0;
+ }
+ tc = talloc_chunk_from_ptr(ptr);
+
+ total++;
+ for (c=tc->child;c;c=c->next) {
+ total += talloc_total_blocks(c+1);
+ }
+ return total;
+}
+
+/*
+ return the number of external references to a pointer
+*/
+static int talloc_reference_count(const void *ptr)
+{
+ struct talloc_chunk *tc = talloc_chunk_from_ptr(ptr);
+ struct talloc_reference_handle *h;
+ int ret = 0;
+
+ for (h=tc->refs;h;h=h->next) {
+ ret++;
+ }
+ return ret;
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_depth(const void *ptr, FILE *f, int depth)
+{
+ struct talloc_chunk *c, *tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ if (c->name == TALLOC_MAGIC_REFERENCE) {
+ struct talloc_reference_handle *handle = (void *)(c+1);
+ const char *name2 = talloc_get_name(handle->ptr);
+ fprintf(f, "%*sreference to: %s\n", depth*4, "", name2);
+ } else {
+ const char *name = talloc_get_name(c+1);
+ fprintf(f, "%*s%-30s contains %6lu bytes in %3lu blocks (ref %d)\n",
+ depth*4, "",
+ name,
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1),
+ talloc_reference_count(c+1));
+ talloc_report_depth(c+1, f, depth+1);
+ }
+ }
+
+}
+
+/*
+ report on memory usage by all children of a pointer, giving a full tree view
+*/
+void talloc_report_full(const void *ptr, FILE *f)
+{
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"full talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ talloc_report_depth(ptr, f, 1);
+ fflush(f);
+}
+
+/*
+ report on memory usage by all children of a pointer
+*/
+void talloc_report(const void *ptr, FILE *f)
+{
+ struct talloc_chunk *c, *tc;
+
+ if (ptr == NULL) {
+ ptr = null_context;
+ }
+ if (ptr == NULL) return;
+
+ fprintf(f,"talloc report on '%s' (total %lu bytes in %lu blocks)\n",
+ talloc_get_name(ptr),
+ (unsigned long)talloc_total_size(ptr),
+ (unsigned long)talloc_total_blocks(ptr));
+
+ tc = talloc_chunk_from_ptr(ptr);
+
+ for (c=tc->child;c;c=c->next) {
+ fprintf(f, "\t%-30s contains %6lu bytes in %3lu blocks\n",
+ talloc_get_name(c+1),
+ (unsigned long)talloc_total_size(c+1),
+ (unsigned long)talloc_total_blocks(c+1));
+ }
+ fflush(f);
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report(null_context, stderr);
+ }
+}
+
+/*
+ report on any memory hanging off the null context
+*/
+static void talloc_report_null_full(void)
+{
+ if (talloc_total_size(null_context) != 0) {
+ talloc_report_full(null_context, stderr);
+ }
+}
+
+/*
+ enable tracking of the NULL context
+*/
+void talloc_enable_null_tracking(void)
+{
+ if (null_context == NULL) {
+ null_context = talloc_named_const(NULL, 0, "null_context");
+ }
+}
+
+/*
+ enable leak reporting on exit
+*/
+void talloc_enable_leak_report(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null);
+}
+
+/*
+ enable full leak reporting on exit
+*/
+void talloc_enable_leak_report_full(void)
+{
+ talloc_enable_null_tracking();
+ atexit(talloc_report_null_full);
+}
+
+/*
+ talloc and zero memory.
+*/
+void *_talloc_zero(const void *ctx, size_t size, const char *name)
+{
+ void *p = talloc_named_const(ctx, size, name);
+
+ if (p) {
+ memset(p, '\0', size);
+ }
+
+ return p;
+}
+
+
+/*
+ memdup with a talloc.
+*/
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name)
+{
+ void *newp = talloc_named_const(t, size, name);
+
+ if (newp) {
+ memcpy(newp, p, size);
+ }
+
+ return newp;
+}
+
+/*
+ strdup with a talloc
+*/
+char *talloc_strdup(const void *t, const char *p)
+{
+ char *ret;
+ if (!p) {
+ return NULL;
+ }
+ ret = talloc_memdup(t, p, strlen(p) + 1);
+ if (ret) {
+ talloc_set_name_const(ret, ret);
+ }
+ return ret;
+}
+
+/*
+ strndup with a talloc
+*/
+char *talloc_strndup(const void *t, const char *p, size_t n)
+{
+ size_t len;
+ char *ret;
+
+ for (len=0; p[len] && len<n; len++) ;
+
+ ret = _talloc(t, len + 1);
+ if (!ret) { return NULL; }
+ memcpy(ret, p, len);
+ ret[len] = 0;
+ talloc_set_name_const(ret, ret);
+ return ret;
+}
+
+#ifndef VA_COPY
+#ifdef HAVE_VA_COPY
+#define VA_COPY(dest, src) va_copy(dest, src)
+#elif defined(HAVE___VA_COPY)
+#define VA_COPY(dest, src) __va_copy(dest, src)
+#else
+#define VA_COPY(dest, src) (dest) = (src)
+#endif
+#endif
+
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap)
+{
+ int len;
+ char *ret;
+ va_list ap2;
+
+ VA_COPY(ap2, ap);
+
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ ret = _talloc(t, len+1);
+ if (ret) {
+ VA_COPY(ap2, ap);
+ vsnprintf(ret, len+1, fmt, ap2);
+ talloc_set_name_const(ret, ret);
+ }
+
+ return ret;
+}
+
+
+/*
+ Perform string formatting, and return a pointer to newly allocated
+ memory holding the result, inside a memory pool.
+ */
+char *talloc_asprintf(const void *t, const char *fmt, ...)
+{
+ va_list ap;
+ char *ret;
+
+ va_start(ap, fmt);
+ ret = talloc_vasprintf(t, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+
+/**
+ * Realloc @p s to append the formatted result of @p fmt and @p ap,
+ * and return @p s, which may have moved. Good for gradually
+ * accumulating output into a string buffer.
+ **/
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+
+static char *talloc_vasprintf_append(char *s, const char *fmt, va_list ap)
+{
+ struct talloc_chunk *tc;
+ int len, s_len;
+ va_list ap2;
+
+ if (s == NULL) {
+ return talloc_vasprintf(NULL, fmt, ap);
+ }
+
+ tc = talloc_chunk_from_ptr(s);
+
+ VA_COPY(ap2, ap);
+
+ s_len = tc->size - 1;
+ len = vsnprintf(NULL, 0, fmt, ap2);
+
+ s = talloc_realloc(NULL, s, char, s_len + len+1);
+ if (!s) return NULL;
+
+ VA_COPY(ap2, ap);
+
+ vsnprintf(s+s_len, len+1, fmt, ap2);
+ talloc_set_name_const(s, s);
+
+ return s;
+}
+
+/*
+ Realloc @p s to append the formatted result of @p fmt and return @p
+ s, which may have moved. Good for gradually accumulating output
+ into a string buffer.
+ */
+char *talloc_asprintf_append(char *s, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ s = talloc_vasprintf_append(s, fmt, ap);
+ va_end(ap);
+ return s;
+}
+
+/*
+ alloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return talloc_named_const(ctx, el_size * count, name);
+}
+
+/*
+ alloc an zero array, checking for integer overflow in the array size
+*/
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_zero(ctx, el_size * count, name);
+}
+
+
+/*
+ realloc an array, checking for integer overflow in the array size
+*/
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name)
+{
+ if (count >= MAX_TALLOC_SIZE/el_size) {
+ return NULL;
+ }
+ return _talloc_realloc(ctx, ptr, el_size * count, name);
+}
+
+/*
+ a function version of talloc_realloc(), so it can be passed as a function pointer
+ to libraries that want a realloc function (a realloc function encapsulates
+ all the basic capabilities of an allocation library, which is why this is useful)
+*/
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size)
+{
+ return _talloc_realloc(context, ptr, size, NULL);
+}
+
+
+static void talloc_autofree(void)
+{
+ talloc_free(cleanup_context);
+ cleanup_context = NULL;
+}
+
+/*
+ return a context which will be auto-freed on exit
+ this is useful for reducing the noise in leak reports
+*/
+void *talloc_autofree_context(void)
+{
+ if (cleanup_context == NULL) {
+ cleanup_context = talloc_named_const(NULL, 0, "autofree_context");
+ atexit(talloc_autofree);
+ }
+ return cleanup_context;
+}
+
+size_t talloc_get_size(const void *context)
+{
+ struct talloc_chunk *tc;
+
+ if (context == NULL)
+ return 0;
+
+ tc = talloc_chunk_from_ptr(context);
+
+ return tc->size;
+}
+
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *handler,
+ void *data)
+{
+ talloc_fail_handler *old = malloc_fail_handler;
+ malloc_fail_handler = handler;
+ malloc_fail_data = data;
+ return old;
+}
diff --git a/tools/xenstore/talloc.h b/tools/xenstore/talloc.h
new file mode 100644
index 0000000000..39bcb53fb7
--- /dev/null
+++ b/tools/xenstore/talloc.h
@@ -0,0 +1,134 @@
+#ifndef _TALLOC_H_
+#define _TALLOC_H_
+/*
+ Unix SMB/CIFS implementation.
+ Samba temporary memory allocation functions
+
+ Copyright (C) Andrew Tridgell 2004-2005
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/* this is only needed for compatibility with the old talloc */
+typedef void TALLOC_CTX;
+
+/*
+ this uses a little trick to allow __LINE__ to be stringified
+*/
+#define _STRING_LINE_(s) #s
+#define _STRING_LINE2_(s) _STRING_LINE_(s)
+#define __LINESTR__ _STRING_LINE2_(__LINE__)
+#define __location__ __FILE__ ":" __LINESTR__
+
+#ifndef TALLOC_DEPRECATED
+#define TALLOC_DEPRECATED 0
+#endif
+
+/* useful macros for creating type checked pointers */
+#define talloc(ctx, type) (type *)talloc_named_const(ctx, sizeof(type), #type)
+#define talloc_size(ctx, size) talloc_named_const(ctx, size, __location__)
+
+#define talloc_new(ctx) talloc_named_const(ctx, 0, "talloc_new: " __location__)
+
+#define talloc_zero(ctx, type) (type *)_talloc_zero(ctx, sizeof(type), #type)
+#define talloc_zero_size(ctx, size) _talloc_zero(ctx, size, __location__)
+
+#define talloc_zero_array(ctx, type, count) (type *)_talloc_zero_array(ctx, sizeof(type), count, #type)
+#define talloc_array(ctx, type, count) (type *)_talloc_array(ctx, sizeof(type), count, #type)
+#define talloc_array_size(ctx, size, count) _talloc_array(ctx, size, count, __location__)
+
+#define talloc_realloc(ctx, p, type, count) (type *)_talloc_realloc_array(ctx, p, sizeof(type), count, #type)
+#define talloc_realloc_size(ctx, ptr, size) _talloc_realloc(ctx, ptr, size, __location__)
+
+#define talloc_memdup(t, p, size) _talloc_memdup(t, p, size, __location__)
+
+#define malloc_p(type) (type *)malloc(sizeof(type))
+#define malloc_array_p(type, count) (type *)realloc_array(NULL, sizeof(type), count)
+#define realloc_p(p, type, count) (type *)realloc_array(p, sizeof(type), count)
+
+#define data_blob(ptr, size) data_blob_named(ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_talloc(ctx, ptr, size) data_blob_talloc_named(ctx, ptr, size, "DATA_BLOB: "__location__)
+#define data_blob_dup_talloc(ctx, blob) data_blob_talloc_named(ctx, (blob)->data, (blob)->length, "DATA_BLOB: "__location__)
+
+#define talloc_set_type(ptr, type) talloc_set_name_const(ptr, #type)
+#define talloc_get_type(ptr, type) (type *)talloc_check_name(ptr, #type)
+
+
+#if TALLOC_DEPRECATED
+#define talloc_zero_p(ctx, type) talloc_zero(ctx, type)
+#define talloc_p(ctx, type) talloc(ctx, type)
+#define talloc_array_p(ctx, type, count) talloc_array(ctx, type, count)
+#define talloc_realloc_p(ctx, p, type, count) talloc_realloc(ctx, p, type, count)
+#define talloc_destroy(ctx) talloc_free(ctx)
+#endif
+
+#ifndef PRINTF_ATTRIBUTE
+#if (__GNUC__ >= 3)
+/** Use gcc attribute to check printf fns. a1 is the 1-based index of
+ * the parameter containing the format, and a2 the index of the first
+ * argument. Note that some gcc 2.x versions don't handle this
+ * properly **/
+#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2)))
+#else
+#define PRINTF_ATTRIBUTE(a1, a2)
+#endif
+#endif
+
+
+/* The following definitions come from talloc.c */
+void *_talloc(const void *context, size_t size);
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+void talloc_increase_ref_count(const void *ptr);
+void *talloc_reference(const void *context, const void *ptr);
+int talloc_unlink(const void *context, void *ptr);
+void talloc_set_name(const void *ptr, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void talloc_set_name_const(const void *ptr, const char *name);
+void *talloc_named(const void *context, size_t size,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(3,4);
+void *talloc_named_const(const void *context, size_t size, const char *name);
+const char *talloc_get_name(const void *ptr);
+void *talloc_check_name(const void *ptr, const char *name);
+void talloc_report_depth(const void *ptr, FILE *f, int depth);
+void *talloc_parent(const void *ptr);
+void *talloc_init(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2);
+int talloc_free(void *ptr);
+void *_talloc_realloc(const void *context, void *ptr, size_t size, const char *name);
+void *talloc_steal(const void *new_ctx, const void *ptr);
+off_t talloc_total_size(const void *ptr);
+off_t talloc_total_blocks(const void *ptr);
+void talloc_report_full(const void *ptr, FILE *f);
+void talloc_report(const void *ptr, FILE *f);
+void talloc_enable_null_tracking(void);
+void talloc_enable_leak_report(void);
+void talloc_enable_leak_report_full(void);
+void *_talloc_zero(const void *ctx, size_t size, const char *name);
+void *_talloc_memdup(const void *t, const void *p, size_t size, const char *name);
+char *talloc_strdup(const void *t, const char *p);
+char *talloc_strndup(const void *t, const char *p, size_t n);
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap) PRINTF_ATTRIBUTE(2,0);
+char *talloc_asprintf(const void *t, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+char *talloc_asprintf_append(char *s,
+ const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
+void *_talloc_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_zero_array(const void *ctx, size_t el_size, unsigned count, const char *name);
+void *_talloc_realloc_array(const void *ctx, void *ptr, size_t el_size, unsigned count, const char *name);
+void *talloc_realloc_fn(const void *context, void *ptr, size_t size);
+void *talloc_autofree_context(void);
+size_t talloc_get_size(const void *ctx);
+
+typedef int talloc_fail_handler(void *);
+talloc_fail_handler *talloc_set_fail_handler(talloc_fail_handler *, void *);
+#endif
+
diff --git a/tools/xenstore/talloc_guide.txt b/tools/xenstore/talloc_guide.txt
new file mode 100644
index 0000000000..c23ac77cad
--- /dev/null
+++ b/tools/xenstore/talloc_guide.txt
@@ -0,0 +1,569 @@
+Using talloc in Samba4
+----------------------
+
+Andrew Tridgell
+September 2004
+
+The most current version of this document is available at
+ http://samba.org/ftp/unpacked/samba4/source/lib/talloc/talloc_guide.txt
+
+If you are used to talloc from Samba3 then please read this carefully,
+as talloc has changed a lot.
+
+The new talloc is a hierarchical, reference counted memory pool system
+with destructors. Quite a mounthful really, but not too bad once you
+get used to it.
+
+Perhaps the biggest change from Samba3 is that there is no distinction
+between a "talloc context" and a "talloc pointer". Any pointer
+returned from talloc() is itself a valid talloc context. This means
+you can do this:
+
+ struct foo *X = talloc(mem_ctx, struct foo);
+ X->name = talloc_strdup(X, "foo");
+
+and the pointer X->name would be a "child" of the talloc context "X"
+which is itself a child of mem_ctx. So if you do talloc_free(mem_ctx)
+then it is all destroyed, whereas if you do talloc_free(X) then just X
+and X->name are destroyed, and if you do talloc_free(X->name) then
+just the name element of X is destroyed.
+
+If you think about this, then what this effectively gives you is an
+n-ary tree, where you can free any part of the tree with
+talloc_free().
+
+If you find this confusing, then I suggest you run the testsuite to
+watch talloc in action. You may also like to add your own tests to
+testsuite.c to clarify how some particular situation is handled.
+
+
+Performance
+-----------
+
+All the additional features of talloc() over malloc() do come at a
+price. We have a simple performance test in Samba4 that measures
+talloc() versus malloc() performance, and it seems that talloc() is
+about 10% slower than malloc() on my x86 Debian Linux box. For Samba,
+the great reduction in code complexity that we get by using talloc
+makes this worthwhile, especially as the total overhead of
+talloc/malloc in Samba is already quite small.
+
+
+talloc API
+----------
+
+The following is a complete guide to the talloc API. Read it all at
+least twice.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc(const void *context, type);
+
+The talloc() macro is the core of the talloc library. It takes a
+memory context and a type, and returns a pointer to a new area of
+memory of the given type.
+
+The returned pointer is itself a talloc context, so you can use it as
+the context argument to more calls to talloc if you wish.
+
+The returned pointer is a "child" of the supplied context. This means
+that if you talloc_free() the context then the new child disappears as
+well. Alternatively you can free just the child.
+
+The context argument to talloc() can be NULL, in which case a new top
+level context is created.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_size(const void *context, size_t size);
+
+The function talloc_size() should be used when you don't have a
+convenient type to pass to talloc(). Unlike talloc(), it is not type
+safe (as it returns a void *), so you are on your own for type checking.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free(void *ptr);
+
+The talloc_free() function frees a piece of talloc memory, and all its
+children. You can call talloc_free() on any pointer returned by
+talloc().
+
+The return value of talloc_free() indicates success or failure, with 0
+returned for success and -1 for failure. The only possible failure
+condition is if the pointer had a destructor attached to it and the
+destructor returned -1. See talloc_set_destructor() for details on
+destructors.
+
+If this pointer has an additional parent when talloc_free() is called
+then the memory is not actually released, but instead the most
+recently established parent is destroyed. See talloc_reference() for
+details on establishing additional parents.
+
+For more control on which parent is removed, see talloc_unlink()
+
+talloc_free() operates recursively on its children.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_free_children(void *ptr);
+
+The talloc_free_children() walks along the list of all children of a
+talloc context and talloc_free()s only the children, not the context
+itself.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_reference(const void *context, const void *ptr);
+
+The talloc_reference() function makes "context" an additional parent
+of "ptr".
+
+The return value of talloc_reference() is always the original pointer
+"ptr", unless talloc ran out of memory in creating the reference in
+which case it will return NULL (each additional reference consumes
+around 48 bytes of memory on intel x86 platforms).
+
+If "ptr" is NULL, then the function is a no-op, and simply returns NULL.
+
+After creating a reference you can free it in one of the following
+ways:
+
+ - you can talloc_free() any parent of the original pointer. That
+ will reduce the number of parents of this pointer by 1, and will
+ cause this pointer to be freed if it runs out of parents.
+
+ - you can talloc_free() the pointer itself. That will destroy the
+ most recently established parent to the pointer and leave the
+ pointer as a child of its current parent.
+
+For more control on which parent to remove, see talloc_unlink()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+int talloc_unlink(const void *context, const void *ptr);
+
+The talloc_unlink() function removes a specific parent from ptr. The
+context passed must either be a context used in talloc_reference()
+with this pointer, or must be a direct parent of ptr.
+
+Note that if the parent has already been removed using talloc_free()
+then this function will fail and will return -1. Likewise, if "ptr"
+is NULL, then the function will make no modifications and return -1.
+
+Usually you can just use talloc_free() instead of talloc_unlink(), but
+sometimes it is useful to have the additional control on which parent
+is removed.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_destructor(const void *ptr, int (*destructor)(void *));
+
+The function talloc_set_destructor() sets the "destructor" for the
+pointer "ptr". A destructor is a function that is called when the
+memory used by a pointer is about to be released. The destructor
+receives the pointer as an argument, and should return 0 for success
+and -1 for failure.
+
+The destructor can do anything it wants to, including freeing other
+pieces of memory. A common use for destructors is to clean up
+operating system resources (such as open file descriptors) contained
+in the structure the destructor is placed on.
+
+You can only place one destructor on a pointer. If you need more than
+one destructor then you can create a zero-length child of the pointer
+and place an additional destructor on that.
+
+To remove a destructor call talloc_set_destructor() with NULL for the
+destructor.
+
+If your destructor attempts to talloc_free() the pointer that it is
+the destructor for then talloc_free() will return -1 and the free will
+be ignored. This would be a pointless operation anyway, as the
+destructor is only called when the memory is just about to go away.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_increase_ref_count(const void *ptr);
+
+The talloc_increase_ref_count(ptr) function is exactly equivalent to:
+
+ talloc_reference(NULL, ptr);
+
+You can use either syntax, depending on which you think is clearer in
+your code.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name(const void *ptr, const char *fmt, ...);
+
+Each talloc pointer has a "name". The name is used principally for
+debugging purposes, although it is also possible to set and get the
+name on a pointer in as a way of "marking" pointers in your code.
+
+The main use for names on pointer is for "talloc reports". See
+talloc_report() and talloc_report_full() for details. Also see
+talloc_enable_leak_report() and talloc_enable_leak_report_full().
+
+The talloc_set_name() function allocates memory as a child of the
+pointer. It is logically equivalent to:
+ talloc_set_name_const(ptr, talloc_asprintf(ptr, fmt, ...));
+
+Note that multiple calls to talloc_set_name() will allocate more
+memory without releasing the name. All of the memory is released when
+the ptr is freed using talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_set_name_const(const void *ptr, const char *name);
+
+The function talloc_set_name_const() is just like talloc_set_name(),
+but it takes a string constant, and is much faster. It is extensively
+used by the "auto naming" macros, such as talloc_p().
+
+This function does not allocate any memory. It just copies the
+supplied pointer into the internal representation of the talloc
+ptr. This means you must not pass a name pointer to memory that will
+disappear before the ptr is freed with talloc_free().
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named(const void *context, size_t size, const char *fmt, ...);
+
+The talloc_named() function creates a named talloc pointer. It is
+equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name(ptr, fmt, ....);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_named_const(const void *context, size_t size, const char *name);
+
+This is equivalent to:
+
+ ptr = talloc_size(context, size);
+ talloc_set_name_const(ptr, name);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+const char *talloc_get_name(const void *ptr);
+
+This returns the current name for the given talloc pointer. See
+talloc_set_name() for details.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_init(const char *fmt, ...);
+
+This function creates a zero length named talloc context as a top
+level context. It is equivalent to:
+
+ talloc_named(NULL, 0, fmt, ...);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_new(void *ctx);
+
+This is a utility macro that creates a new memory context hanging
+off an exiting context, automatically naming it "talloc_new: __location__"
+where __location__ is the source line it is called from. It is
+particularly useful for creating a new temporary working context.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_realloc(const void *context, void *ptr, type, count);
+
+The talloc_realloc() macro changes the size of a talloc
+pointer. The "count" argument is the number of elements of type "type"
+that you want the resulting pointer to hold.
+
+talloc_realloc() has the following equivalences:
+
+ talloc_realloc(context, NULL, type, 1) ==> talloc(context, type);
+ talloc_realloc(context, NULL, type, N) ==> talloc_array(context, type, N);
+ talloc_realloc(context, ptr, type, 0) ==> talloc_free(ptr);
+
+The "context" argument is only used if "ptr" is not NULL, otherwise it
+is ignored.
+
+talloc_realloc() returns the new pointer, or NULL on failure. The call
+will fail either due to a lack of memory, or because the pointer has
+more than one parent (see talloc_reference()).
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_size(const void *context, void *ptr, size_t size);
+
+the talloc_realloc_size() function is useful when the type is not
+known so the typesafe talloc_realloc() cannot be used.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_steal(const void *new_ctx, const void *ptr);
+
+The talloc_steal() function changes the parent context of a talloc
+pointer. It is typically used when the context that the pointer is
+currently a child of is going to be freed and you wish to keep the
+memory for a longer time.
+
+The talloc_steal() function returns the pointer that you pass it. It
+does not have any failure modes.
+
+NOTE: It is possible to produce loops in the parent/child relationship
+if you are not careful with talloc_steal(). No guarantees are provided
+as to your sanity or the safety of your data if you do this.
+
+talloc_steal (new_ctx, NULL) will return NULL with no sideeffects.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_size(const void *ptr);
+
+The talloc_total_size() function returns the total size in bytes used
+by this pointer and all child pointers. Mostly useful for debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+off_t talloc_total_blocks(const void *ptr);
+
+The talloc_total_blocks() function returns the total memory block
+count used by this pointer and all child pointers. Mostly useful for
+debugging.
+
+Passing NULL is allowed, but it will only give a meaningful result if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report(const void *ptr, FILE *f);
+
+The talloc_report() function prints a summary report of all memory
+used by ptr. One line of report is printed for each immediate child of
+ptr, showing the total memory and number of blocks used by that child.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_report_full(const void *ptr, FILE *f);
+
+This provides a more detailed report than talloc_report(). It will
+recursively print the ensire tree of memory referenced by the
+pointer. References in the tree are shown by giving the name of the
+pointer that is referenced.
+
+You can pass NULL for the pointer, in which case a report is printed
+for the top level memory context, but only if
+talloc_enable_leak_report() or talloc_enable_leak_report_full() has
+been called.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report(void);
+
+This enables calling of talloc_report(NULL, stderr) when the program
+exits. In Samba4 this is enabled by using the --leak-report command
+line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical talloc report:
+
+talloc report on 'null_context' (total 267 bytes in 15 blocks)
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(UTF8,CP850) contains 42 bytes in 2 blocks
+ libcli/auth/spnego_parse.c:55 contains 31 bytes in 2 blocks
+ iconv(CP850,UTF8) contains 42 bytes in 2 blocks
+ iconv(UTF8,UTF-16LE) contains 45 bytes in 2 blocks
+ iconv(UTF-16LE,UTF8) contains 45 bytes in 2 blocks
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_leak_report_full(void);
+
+This enables calling of talloc_report_full(NULL, stderr) when the
+program exits. In Samba4 this is enabled by using the
+--leak-report-full command line option.
+
+For it to be useful, this function must be called before any other
+talloc function as it establishes a "null context" that acts as the
+top of the tree. If you don't call this function first then passing
+NULL to talloc_report() or talloc_report_full() won't give you the
+full tree printout.
+
+Here is a typical full report:
+
+full talloc report on 'root' (total 18 bytes in 8 blocks)
+ p1 contains 18 bytes in 7 blocks (ref 0)
+ r1 contains 13 bytes in 2 blocks (ref 0)
+ reference to: p2
+ p2 contains 1 bytes in 1 blocks (ref 1)
+ x3 contains 1 bytes in 1 blocks (ref 0)
+ x2 contains 1 bytes in 1 blocks (ref 0)
+ x1 contains 1 bytes in 1 blocks (ref 0)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void talloc_enable_null_tracking(void);
+
+This enables tracking of the NULL memory context without enabling leak
+reporting on exit. Useful for when you want to do your own leak
+reporting call via talloc_report_null_full();
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_zero(const void *ctx, type);
+
+The talloc_zero() macro is equivalent to:
+
+ ptr = talloc(ctx, type);
+ if (ptr) memset(ptr, 0, sizeof(type));
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_zero_size(const void *ctx, size_t size)
+
+The talloc_zero_size() function is useful when you don't have a known type
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_memdup(const void *ctx, const void *p, size_t size);
+
+The talloc_memdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, size);
+ if (ptr) memcpy(ptr, p, size);
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strdup(const void *ctx, const char *p);
+
+The talloc_strdup() function is equivalent to:
+
+ ptr = talloc_size(ctx, strlen(p)+1);
+ if (ptr) memcpy(ptr, p, strlen(p)+1);
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_strndup(const void *t, const char *p, size_t n);
+
+The talloc_strndup() function is the talloc equivalent of the C
+library function strndup()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_vasprintf(const void *t, const char *fmt, va_list ap);
+
+The talloc_vasprintf() function is the talloc equivalent of the C
+library function vasprintf()
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf(const void *t, const char *fmt, ...);
+
+The talloc_asprintf() function is the talloc equivalent of the C
+library function asprintf()
+
+This functions sets the name of the new pointer to the passed
+string. This is equivalent to:
+ talloc_set_name_const(ptr, ptr)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+char *talloc_asprintf_append(char *s, const char *fmt, ...);
+
+The talloc_asprintf_append() function appends the given formatted
+string to the given string.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_array(const void *ctx, type, uint_t count);
+
+The talloc_array() macro is equivalent to:
+
+ (type *)talloc_size(ctx, sizeof(type) * count);
+
+except that it provides integer overflow protection for the multiply,
+returning NULL if the multiply overflows.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_array_size(const void *ctx, size_t size, uint_t count);
+
+The talloc_array_size() function is useful when the type is not
+known. It operates in the same way as talloc_array(), but takes a size
+instead of a type.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_realloc_fn(const void *ctx, void *ptr, size_t size);
+
+This is a non-macro version of talloc_realloc(), which is useful
+as libraries sometimes want a ralloc function pointer. A realloc()
+implementation encapsulates the functionality of malloc(), free() and
+realloc() in one call, which is why it is useful to be able to pass
+around a single function pointer.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_autofree_context(void);
+
+This is a handy utility function that returns a talloc context
+which will be automatically freed on program exit. This can be used
+to reduce the noise in memory leak reports.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+void *talloc_check_name(const void *ptr, const char *name);
+
+This function checks if a pointer has the specified name. If it does
+then the pointer is returned. It it doesn't then NULL is returned.
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+(type *)talloc_get_type(const void *ptr, type);
+
+This macro allows you to do type checking on talloc pointers. It is
+particularly useful for void* private pointers. It is equivalent to
+this:
+
+ (type *)talloc_check_name(ptr, #type)
+
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_set_type(const void *ptr, type);
+
+This macro allows you to force the name of a pointer to be a
+particular type. This can be used in conjunction with
+talloc_get_type() to do type checking on void* pointers.
+
+It is equivalent to this:
+ talloc_set_name_const(ptr, #type)
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+talloc_get_size(const void *ctx);
+
+This function lets you know the amount of memory alloced so far by
+this context. It does NOT account for subcontext memory.
+This can be used to calculate the size of an array.
+
diff --git a/tools/xenstore/testsuite/01simple.sh b/tools/xenstore/testsuite/01simple.sh
new file mode 100644
index 0000000000..9b1eb8f5c3
--- /dev/null
+++ b/tools/xenstore/testsuite/01simple.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Create an entry, read it.
+[ "`echo -e 'write /test create contents\nread /test' | ./xs_test 2>&1`" = "contents" ]
diff --git a/tools/xenstore/testsuite/02directory.sh b/tools/xenstore/testsuite/02directory.sh
new file mode 100644
index 0000000000..f63ef1ff3d
--- /dev/null
+++ b/tools/xenstore/testsuite/02directory.sh
@@ -0,0 +1,31 @@
+#! /bin/sh
+
+# Root directory has nothing in it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "" ]
+
+# Create a file.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+# Directory shows it.
+[ "`echo -e 'dir /' | ./xs_test 2>&1`" = "test" ]
+
+# Make a new directory.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Check it's there.
+DIR="`echo -e 'dir /' | ./xs_test 2>&1`"
+[ "$DIR" = "test
+dir" ] || [ "$DIR" = "dir
+test" ]
+
+# Check it's empty.
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create a file, check it exists.
+[ "`echo -e 'write /dir/test2 create contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1`" = "test2" ]
+[ "`echo -e 'read /dir/test2' | ./xs_test 2>&1`" = "contents2" ]
+
+# Creating dir over the top should fail.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
+[ "`echo -e 'mkdir /dir/test2' | ./xs_test 2>&1`" = "FATAL: mkdir: File exists" ]
diff --git a/tools/xenstore/testsuite/03write.sh b/tools/xenstore/testsuite/03write.sh
new file mode 100644
index 0000000000..cf5f897c54
--- /dev/null
+++ b/tools/xenstore/testsuite/03write.sh
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+# Write without create fails.
+[ "`echo -e 'write /test none contents' | ./xs_test 2>&1`" = "FATAL: write: No such file or directory" ]
+
+# Exclusive write succeeds
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents" ]
+
+# Exclusive write fails to overwrite.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "FATAL: write: File exists" ]
+
+# Non-exclusive overwrite succeeds.
+[ "`echo -e 'write /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'write /test create contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents3" ]
diff --git a/tools/xenstore/testsuite/04rm.sh b/tools/xenstore/testsuite/04rm.sh
new file mode 100644
index 0000000000..abadd6110a
--- /dev/null
+++ b/tools/xenstore/testsuite/04rm.sh
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+# Remove non-existant fails.
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+[ "`echo -e 'rm /dir/test' | ./xs_test 2>&1`" = "FATAL: rm: No such file or directory" ]
+
+# Create file and remove it
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /test' | ./xs_test 2>&1`" = "" ]
+
+# Create directory and remove it.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
+
+# Create directory, create file, remove all.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'write /dir/test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'rm /dir' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/05filepermissions.sh b/tools/xenstore/testsuite/05filepermissions.sh
new file mode 100644
index 0000000000..9d9043f191
--- /dev/null
+++ b/tools/xenstore/testsuite/05filepermissions.sh
@@ -0,0 +1,49 @@
+#! /bin/sh
+
+# Fail to get perms on non-existent file.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+[ "`echo -e 'getperm /dir/test' | ./xs_test 2>&1`" = "FATAL: getperm: No such file or directory" ]
+
+# Create file: we own it, noone has access.
+[ "`echo -e 'write /test excl contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to file.
+[ "`echo -e 'setperm /test 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to file.
+[ "`echo -e 'setperm /test 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /test none contents2' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents2" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /test 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents2" ]
+[ "`echo -e 'setid 1\nwrite /test none contents3' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /test 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\nread /test' | ./xs_test 2>&1`" = "contents3" ]
+[ "`echo -e 'setid 1\nwrite /test none contents4' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing.
+[ "`echo -e 'setid 2\nsetperm /test 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /test' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\nread /test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /test none contents4' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /test' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'read /test' | ./xs_test 2>&1`" = "contents4" ]
+[ "`echo -e 'write /test none contents5' | ./xs_test 2>&1`" = "" ]
diff --git a/tools/xenstore/testsuite/06dirpermissions.sh b/tools/xenstore/testsuite/06dirpermissions.sh
new file mode 100644
index 0000000000..922a794f04
--- /dev/null
+++ b/tools/xenstore/testsuite/06dirpermissions.sh
@@ -0,0 +1,61 @@
+#! /bin/sh
+
+# Root directory: owned by tool, everyone has read access.
+[ "`echo -e 'getperm /' | ./xs_test 2>&1`" = "0 READ" ]
+
+# Create directory: we own it, noone has access.
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "0 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nread /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone read access to directoy.
+[ "`echo -e 'setperm /dir 0 READ' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents2' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Grant everyone write access to directory.
+[ "`echo -e 'setperm /dir 0 WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 1\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'read /dir/test' | ./xs_test 2>&1`" = "contents" ]
+
+# Grant everyone both read and write access.
+[ "`echo -e 'setperm /dir 0 READ/WRITE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "0 READ/WRITE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1`" = "test" ]
+[ "`echo -e 'setid 1\nwrite /dir/test2 create contents' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\nread /dir/test2' | ./xs_test 2>&1`" = "contents" ]
+
+# Change so that user 1 owns it, noone else can do anything.
+[ "`echo -e 'setperm /dir 1 NONE' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e 'setid 1\ngetperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'setid 1\ndir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2" ]
+[ "`echo -e 'setid 1\nwrite /dir/test3 create contents' | ./xs_test 2>&1`" = "" ]
+
+# User 2 can do nothing. Can't even tell if file exists.
+[ "`echo -e 'setid 2\nsetperm /dir 2 NONE' | ./xs_test 2>&1`" = "FATAL: setperm: Permission denied" ]
+[ "`echo -e 'setid 2\ngetperm /dir' | ./xs_test 2>&1`" = "FATAL: getperm: Permission denied" ]
+[ "`echo -e 'setid 2\ndir /dir' | ./xs_test 2>&1`" = "FATAL: dir: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test2' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test3' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nread /dir/test4' | ./xs_test 2>&1`" = "FATAL: read: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 none contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 create contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+[ "`echo -e 'setid 2\nwrite /dir/test4 excl contents' | ./xs_test 2>&1`" = "FATAL: write: Permission denied" ]
+
+# Tools can always access things.
+[ "`echo -e 'getperm /dir' | ./xs_test 2>&1`" = "1 NONE" ]
+[ "`echo -e 'dir /dir' | ./xs_test 2>&1 | sort`" = "test
+test2
+test3" ]
+[ "`echo -e 'write /dir/test4 create contents' | ./xs_test 2>&1`" = "" ]
+
diff --git a/tools/xenstore/testsuite/07watch.sh b/tools/xenstore/testsuite/07watch.sh
new file mode 100644
index 0000000000..bedce6ad5b
--- /dev/null
+++ b/tools/xenstore/testsuite/07watch.sh
@@ -0,0 +1,32 @@
+#! /bin/sh
+
+# Watch something, write to it, check watch has fired.
+[ "`echo -e 'write /test create contents' | ./xs_test 2>&1`" = "" ]
+
+[ "`echo -e '1 watch /test 100\n2 write /test create contents2\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/test" ]
+
+# Check that reads don't set it off.
+[ "`echo -e '1 watch /test 100\n2 read /test\n1 waitwatch' | ./xs_test 2>&1`" = "2:contents2
+1:waitwatch timeout" ]
+
+# mkdir, setperm and rm should (also /tests watching dirs)
+[ "`echo -e 'mkdir /dir' | ./xs_test 2>&1`" = "" ]
+[ "`echo -e '1 watch /dir 100\n2 mkdir /dir/newdir\n1 waitwatch\n1 ackwatch\n2 setperm /dir/newdir 0 READ\n1 waitwatch\n1 ackwatch\n2 rm /dir/newdir\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/newdir
+1:/dir/newdir
+1:/dir/newdir" ]
+
+# ignore watches while doing commands, should work.
+[ "`echo -e 'watch /dir 100\nwrite /dir/test create contents\nread /dir/test\nwaitwatch\nackwatch' | ./xs_test 2>&1`" = "contents
+/dir/test" ]
+
+# watch priority /test.
+[ "`echo -e '1 watch /dir 1\n3 watch /dir 3\n2 watch /dir 2\nwrite /dir/test create contents\n3 waitwatch\n3 ackwatch\n2 waitwatch\n2 ackwatch\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "3:/dir/test
+2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without acking), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 waitwatch\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "2:/dir/test
+1:/dir/test" ]
+
+# If one dies (without reading at all), the other should still get ack.
+[ "`echo -e '1 watch /dir 0\n2 watch /dir 1\nwrite /dir/test create contents\n2 close\n1 waitwatch\n1 ackwatch' | ./xs_test 2>&1`" = "1:/dir/test" ]
diff --git a/tools/xenstore/testsuite/08transaction.sh b/tools/xenstore/testsuite/08transaction.sh
new file mode 100644
index 0000000000..2c23ed2496
--- /dev/null
+++ b/tools/xenstore/testsuite/08transaction.sh
@@ -0,0 +1,54 @@
+#! /bin/sh
+# Test transactions.
+
+# Simple transaction: create a file inside transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 commit
+2 read /entry1' | ./xs_test`" = "1:entry1
+2:contents" ]
+echo rm /entry1 | ./xs_test
+
+# Create a file and abort transaction.
+[ "`echo -e '1 start /
+1 write /entry1 create contents
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "1:entry1" ]
+
+echo write /entry1 create contents | ./xs_test
+# Delete in transaction, commit
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 commit
+2 dir /' | ./xs_test`" = "2:entry1" ]
+
+# Delete in transaction, abort.
+echo write /entry1 create contents | ./xs_test
+[ "`echo -e '1 start /
+1 rm /entry1
+2 dir /
+1 dir /
+1 abort
+2 dir /' | ./xs_test`" = "2:entry1
+2:entry1" ]
+
+# Transactions can take as long as the want...
+[ "`echo -e 'start /
+sleep 1
+rm /entry1
+commit
+dir /' | ./xs_test`" = "" ]
+
+# ... as long as noone is waiting.
+[ "`echo -e '1 start /
+2 mkdir /dir
+1 mkdir /dir
+1 dir /
+1 commit' | ./xs_test 2>&1`" = "1:dir
+FATAL: 1: commit: Connection timed out" ]
diff --git a/tools/xenstore/testsuite/09domain.sh b/tools/xenstore/testsuite/09domain.sh
new file mode 100644
index 0000000000..9208dda0ec
--- /dev/null
+++ b/tools/xenstore/testsuite/09domain.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+# Test domain communication.
+
+# Create a domain, write an entry.
+[ "`echo -e 'introduce 1 100 7 /my/home
+1 write /entry1 create contents
+dir /' | ./xs_test 2>&1`" = "handle is 1
+entry1" ]
+
+# Release that domain.
+[ "`echo -e 'release 1' | ./xs_test`" = "" ]
+
+# Introduce and release by same connection.
+[ "`echo -e 'introduce 1 100 7 /my/home
+release 1' | ./xs_test 2>&1`" = "handle is 1" ]
diff --git a/tools/xenstore/testsuite/test.sh b/tools/xenstore/testsuite/test.sh
new file mode 100755
index 0000000000..5718e84a15
--- /dev/null
+++ b/tools/xenstore/testsuite/test.sh
@@ -0,0 +1,44 @@
+#! /bin/sh
+
+set -e
+set -m
+
+run_test()
+{
+ rm -rf $XENSTORED_ROOTDIR
+ mkdir $XENSTORED_ROOTDIR
+# Weird failures with this.
+ if type valgrind >/dev/null 2>&1; then
+ valgrind -q --logfile-fd=3 ./xenstored_test --output-pid --no-fork 3>testsuite/tmp/vgout > /tmp/pid &
+ while [ ! -s /tmp/pid ]; do sleep 0; done
+ PID=`cat /tmp/pid`
+ rm /tmp/pid
+ else
+ PID=`./xenstored_test --output-pid`
+ fi
+ if sh -e $2 $1; then
+ if [ -s testsuite/tmp/vgout ]; then
+ kill $PID
+ echo VALGRIND errors:
+ cat testsuite/tmp/vgout
+ return 1
+ fi
+ echo shutdown | ./xs_test
+ return 0
+ else
+ # In case daemon is wedged.
+ kill $PID
+ sleep 1
+ return 1
+ fi
+}
+
+for f in testsuite/[0-9]*.sh; do
+ if run_test $f; then
+ echo Test $f passed...
+ else
+ echo Test $f failed, running verbosely...
+ run_test $f -x
+ exit 1
+ fi
+done
diff --git a/tools/xenstore/utils.c b/tools/xenstore/utils.c
new file mode 100644
index 0000000000..2345021f70
--- /dev/null
+++ b/tools/xenstore/utils.c
@@ -0,0 +1,143 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#include "utils.h"
+
+void xprintf(const char *fmt, ...)
+{
+ static FILE *out = NULL;
+ va_list args;
+ if (!out)
+ out = fopen("/dev/console", "w");
+ if (!out)
+ out = stderr;
+
+ va_start(args, fmt);
+ vfprintf(out, fmt, args);
+ va_end(args);
+ fflush(out);
+}
+
+void barf(const char *fmt, ...)
+{
+ char *str;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s\n", str);
+ free(str);
+ exit(1);
+}
+
+void barf_perror(const char *fmt, ...)
+{
+ char *str;
+ int err = errno;
+ va_list arglist;
+
+ xprintf("FATAL: ");
+
+ va_start(arglist, fmt);
+ vasprintf(&str, fmt, arglist);
+ va_end(arglist);
+
+ xprintf("%s: %s\n", str, strerror(err));
+ free(str);
+ exit(1);
+}
+
+void *_realloc_array(void *ptr, size_t size, size_t num)
+{
+ if (num >= SIZE_MAX/size)
+ return NULL;
+ return realloc_nofail(ptr, size * num);
+}
+
+void *realloc_nofail(void *ptr, size_t size)
+{
+ ptr = realloc(ptr, size);
+ if (ptr)
+ return ptr;
+ barf("realloc of %zu failed", size);
+}
+
+void *malloc_nofail(size_t size)
+{
+ void *ptr = malloc(size);
+ if (ptr)
+ return ptr;
+ barf("malloc of %zu failed", size);
+}
+
+/* Stevens. */
+void daemonize(void)
+{
+ pid_t pid;
+
+ /* Separate from our parent via fork, so init inherits us. */
+ if ((pid = fork()) < 0)
+ barf_perror("Failed to fork daemon");
+ if (pid != 0)
+ exit(0);
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ /* Session leader so ^C doesn't whack us. */
+ setsid();
+ /* Move off any mount points we might be in. */
+ chdir("/");
+ /* Discard our parent's old-fashioned umask prejudices. */
+ umask(0);
+}
+
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size)
+{
+ unsigned int max = 16384;
+ int ret, fd;
+ void *buffer;
+
+ if (streq(filename, "-"))
+ fd = dup(STDIN_FILENO);
+ else
+ fd = open(filename, O_RDONLY, 0);
+
+ if (fd < 0)
+ return NULL;
+
+ buffer = malloc(max+1);
+ *size = 0;
+ while ((ret = read(fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = realloc(buffer, max *= 2 + 1);
+ }
+ if (ret < 0) {
+ free(buffer);
+ buffer = NULL;
+ } else
+ ((char *)buffer)[*size] = '\0';
+ close(fd);
+ return buffer;
+}
+
+void release_file(void *data, unsigned long size __attribute__((unused)))
+{
+ free(data);
+}
diff --git a/tools/xenstore/utils.h b/tools/xenstore/utils.h
new file mode 100644
index 0000000000..a84f19a22a
--- /dev/null
+++ b/tools/xenstore/utils.h
@@ -0,0 +1,61 @@
+#ifndef _UTILS_H
+#define _UTILS_H
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+
+/* Is A == B ? */
+#define streq(a,b) (strcmp((a),(b)) == 0)
+
+/* Does A start with B ? */
+#define strstarts(a,b) (strncmp((a),(b),strlen(b)) == 0)
+
+/* Does A end in B ? */
+static inline bool strends(const char *a, const char *b)
+{
+ if (strlen(a) < strlen(b))
+ return false;
+
+ return streq(a + strlen(a) - strlen(b), b);
+}
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define ___stringify(x) #x
+#define __stringify(x) ___stringify(x)
+
+/* Convenient wrappers for malloc and realloc. Use them. */
+#define new(type) ((type *)malloc_nofail(sizeof(type)))
+#define new_array(type, num) realloc_array((type *)0, (num))
+#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num)))
+
+void *malloc_nofail(size_t size);
+void *realloc_nofail(void *ptr, size_t size);
+void *_realloc_array(void *ptr, size_t size, size_t num);
+
+void barf(const char *fmt, ...) __attribute__((noreturn));
+void barf_perror(const char *fmt, ...) __attribute__((noreturn));
+
+/* This version adds one byte (for nul term) */
+void *grab_file(const char *filename, unsigned long *size);
+void release_file(void *data, unsigned long size);
+
+/* For writing daemons, based on Stevens. */
+void daemonize(void);
+
+/* Signal handling: returns fd to listen on. */
+int signal_to_fd(int signal);
+void close_signal(int fd);
+
+void xprintf(const char *fmt, ...);
+
+#define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args)
+#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args)
+
+#ifdef DEBUG
+#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args)
+#else
+#define dprintf(_fmt, _args...) ((void)0)
+#endif
+
+#endif /* _UTILS_H */
diff --git a/tools/xenstore/xenstored.h b/tools/xenstore/xenstored.h
new file mode 100644
index 0000000000..784ec987a8
--- /dev/null
+++ b/tools/xenstore/xenstored.h
@@ -0,0 +1,81 @@
+/*
+ Simple prototyle Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_H
+#define _XENSTORED_H
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_SHUTDOWN,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_WATCH_ACK,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_OP_READ_ONLY = XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GETDOMAINPATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+ int errnum;
+ const char *errstring;
+};
+#define XSD_ERROR(x) { x, #x }
+static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(ETIMEDOUT),
+ XSD_ERROR(EISCONN),
+};
+struct xsd_sockmsg
+{
+ u32 type;
+ u32 len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+#endif /* _XENSTORED_H */
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
new file mode 100644
index 0000000000..1df00f37b4
--- /dev/null
+++ b/tools/xenstore/xenstored_core.c
@@ -0,0 +1,1362 @@
+/*
+ Simple prototype Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <errno.h>
+#include <dirent.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <setjmp.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "list.h"
+#include "talloc.h"
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "xenstored_core.h"
+#include "xenstored_watch.h"
+#include "xenstored_transaction.h"
+#include "xenstored_domain.h"
+
+static bool verbose;
+static LIST_HEAD(connections);
+
+#ifdef TESTING
+static bool failtest = false;
+
+/* We override talloc's malloc. */
+void *test_malloc(size_t size)
+{
+ /* 1 in 20 means only about 50% of connections establish. */
+ if (failtest && (random() % 32) == 0)
+ return NULL;
+ return malloc(size);
+}
+
+static void stop_failtest(int signum __attribute__((unused)))
+{
+ failtest = false;
+}
+
+/* Need these before we #define away write_all/mkdir in testing.h */
+bool test_write_all(int fd, void *contents, unsigned int len);
+bool test_write_all(int fd, void *contents, unsigned int len)
+{
+ if (failtest && (random() % 8) == 0) {
+ if (len)
+ len = random() % len;
+ write(fd, contents, len);
+ errno = ENOSPC;
+ return false;
+ }
+ return xs_write_all(fd, contents, len);
+}
+
+int test_mkdir(const char *dir, int perms);
+int test_mkdir(const char *dir, int perms)
+{
+ if (failtest && (random() % 8) == 0) {
+ errno = ENOSPC;
+ return -1;
+ }
+ return mkdir(dir, perms);
+}
+#endif /* TESTING */
+
+#include "xenstored_test.h"
+
+/* FIXME: Ideally, this should never be called. Some can be eliminated. */
+/* Something is horribly wrong: shutdown immediately. */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...)
+{
+ va_list arglist;
+ char *str;
+ int saved_errno = errno;
+
+ va_start(arglist, fmt);
+ str = talloc_vasprintf(NULL, fmt, arglist);
+ va_end(arglist);
+
+ eprintf("xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+#ifdef TESTING
+ /* Allow them to attach debugger. */
+ sleep(30);
+#endif
+ syslog(LOG_DAEMON,
+ "xenstored corruption: connection id %i: err %s: %s",
+ conn ? (int)conn->id : -1, strerror(saved_errno), str);
+ _exit(2);
+}
+
+static bool write_message(struct connection *conn)
+{
+ int ret;
+ struct buffered_data *out = conn->out;
+
+ if (out->inhdr) {
+ if (verbose)
+ xprintf("Writing msg %i out to %p\n",
+ out->hdr.msg.type, conn);
+ ret = conn->write(conn, out->hdr.raw + out->used,
+ sizeof(out->hdr) - out->used);
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used < sizeof(out->hdr))
+ return true;
+
+ out->inhdr = false;
+ out->used = 0;
+
+ /* Second write might block if non-zero. */
+ if (out->hdr.msg.len)
+ return true;
+ }
+
+ if (verbose)
+ xprintf("Writing data len %i out to %p\n",
+ out->hdr.msg.len, conn);
+ ret = conn->write(conn, out->buffer + out->used,
+ out->hdr.msg.len - out->used);
+
+ if (ret < 0)
+ return false;
+
+ out->used += ret;
+ if (out->used != out->hdr.msg.len)
+ return true;
+
+ conn->out = NULL;
+
+ /* If this was an event, we wait for ack, otherwise we're done. */
+ if (!is_watch_event(conn, out))
+ talloc_free(out);
+
+ queue_next_event(conn);
+ return true;
+}
+
+static int destroy_conn(void *_conn)
+{
+ struct connection *conn = _conn;
+
+ /* Flush outgoing if possible, but don't block. */
+ if (!conn->domain) {
+ fd_set set;
+ struct timeval none;
+
+ FD_ZERO(&set);
+ FD_SET(conn->fd, &set);
+ none.tv_sec = none.tv_usec = 0;
+
+ while (conn->out
+ && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ if (!write_message(conn))
+ break;
+ close(conn->fd);
+ }
+ list_del(&conn->list);
+ return 0;
+}
+
+static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
+ int event_fd)
+{
+ struct connection *i;
+ int max;
+
+ FD_ZERO(inset);
+ FD_ZERO(outset);
+ FD_SET(sock, inset);
+ max = sock;
+ FD_SET(ro_sock, inset);
+ if (ro_sock > max)
+ max = ro_sock;
+ FD_SET(event_fd, inset);
+ if (event_fd > max)
+ max = event_fd;
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+ if (!i->blocked)
+ FD_SET(i->fd, inset);
+ if (i->out)
+ FD_SET(i->fd, outset);
+ if (i->fd > max)
+ max = i->fd;
+ }
+ return max;
+}
+
+/* Read everything from a talloc_open'ed fd. */
+static void *read_all(int *fd, unsigned int *size)
+{
+ unsigned int max = 4;
+ int ret;
+ void *buffer = talloc_size(fd, max);
+
+ *size = 0;
+ while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
+ *size += ret;
+ if (*size == max)
+ buffer = talloc_realloc_size(fd, buffer, max *= 2);
+ }
+ if (ret < 0)
+ return NULL;
+ return buffer;
+}
+
+static int destroy_fd(void *_fd)
+{
+ int *fd = _fd;
+ close(*fd);
+ return 0;
+}
+
+/* Return a pointer to an fd, self-closing and attached to this pathname. */
+static int *talloc_open(const char *pathname, int flags, int mode)
+{
+ int *fd;
+
+ fd = talloc(pathname, int);
+ *fd = open(pathname, flags, mode);
+ if (*fd < 0) {
+ int saved_errno = errno;
+ talloc_free(fd);
+ errno = saved_errno;
+ return NULL;
+ }
+ talloc_set_destructor(fd, destroy_fd);
+ return fd;
+}
+
+/* Is child a subnode of parent, or equal? */
+bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+/* Answer never ends in /. */
+char *node_dir_outside_transaction(const char *node)
+{
+ if (streq(node, "/"))
+ return talloc_strdup(node, xs_daemon_store());
+ return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
+}
+
+static char *node_dir(struct transaction *trans, const char *node)
+{
+ if (!trans || !within_transaction(trans, node))
+ return node_dir_outside_transaction(node);
+ return node_dir_inside_transaction(trans, node);
+}
+
+static char *node_datafile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.data", node_dir(trans, node));
+}
+
+static char *node_permfile(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s/.perms", node_dir(trans, node));
+}
+
+struct buffered_data *new_buffer(void *ctx)
+{
+ struct buffered_data *data;
+
+ data = talloc(ctx, struct buffered_data);
+ data->inhdr = true;
+ data->used = 0;
+ data->buffer = NULL;
+
+ return data;
+}
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data, unsigned int offset)
+{
+ const char *nul;
+
+ if (offset >= data->used)
+ return 0;
+
+ nul = memchr(data->buffer + offset, 0, data->used - offset);
+ if (!nul)
+ return 0;
+
+ return nul - (data->buffer + offset) + 1;
+}
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num)
+{
+ unsigned int off, i, len;
+
+ off = i = 0;
+ while ((len = get_string(data, off)) != 0) {
+ if (i < num)
+ vec[i] = data->buffer + off;
+ i++;
+ off += len;
+ }
+ return i;
+}
+
+/* Returns "false", meaning "connection is not blocked". */
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len)
+{
+ struct buffered_data *bdata;
+
+ /* When data gets freed, we want list entry is destroyed (so
+ * list entry is a child). */
+ bdata = new_buffer(conn);
+ bdata->buffer = talloc_array(bdata, char, len);
+
+ bdata->hdr.msg.type = type;
+ bdata->hdr.msg.len = len;
+ memcpy(bdata->buffer, data, len);
+
+ /* There might be an event going out now. Queue behind it. */
+ if (conn->out) {
+ assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
+ assert(!conn->waiting_reply);
+ conn->waiting_reply = bdata;
+ } else
+ conn->out = bdata;
+ return false;
+}
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type)
+{
+ return send_reply(conn, type, "OK", sizeof("OK"));
+}
+
+bool send_error(struct connection *conn, int error)
+{
+ unsigned int i;
+
+ for (i = 0; error != xsd_errors[i].errnum; i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ corrupt(conn, "Unknown error %i (%s)", error,
+ strerror(error));
+
+ return send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
+ strlen(xsd_errors[i].errstring) + 1);
+}
+
+static bool valid_chars(const char *node)
+{
+ /* Nodes can have lots of crap. */
+ return (strspn(node,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-/_@") == strlen(node));
+}
+
+static bool is_valid_nodename(const char *node)
+{
+ /* Must start in /. */
+ if (!strstarts(node, "/"))
+ return false;
+
+ /* Cannot end in / (unless it's just "/"). */
+ if (strends(node, "/") && !streq(node, "/"))
+ return false;
+
+ /* No double //. */
+ if (strstr(node, "//"))
+ return false;
+
+ return valid_chars(node);
+}
+
+/* We expect one arg in the input: return NULL otherwise. */
+static const char *onearg(struct buffered_data *in)
+{
+ if (get_string(in, 0) != in->used)
+ return NULL;
+ return in->buffer;
+}
+
+/* If it fails, returns NULL and sets errno. */
+static struct xs_permissions *get_perms(struct transaction *transaction,
+ const char *node, unsigned int *num)
+{
+ unsigned int size;
+ char *strings;
+ struct xs_permissions *ret;
+ int *fd;
+
+ fd = talloc_open(node_permfile(transaction, node), O_RDONLY, 0);
+ if (!fd)
+ return NULL;
+ strings = read_all(fd, &size);
+ if (!strings)
+ return NULL;
+
+ *num = xs_count_strings(strings, size);
+ ret = talloc_array(node, struct xs_permissions, *num);
+ if (!xs_strings_to_perms(ret, *num, strings))
+ corrupt(NULL, "Permissions corrupt for %s", node);
+
+ return ret;
+}
+
+static char *perms_to_strings(const char *node,
+ struct xs_permissions *perms, unsigned int num,
+ unsigned int *len)
+{
+ unsigned int i;
+ char *strings = NULL;
+ char buffer[MAX_STRLEN(domid_t) + 1];
+
+ for (*len = 0, i = 0; i < num; i++) {
+ if (!xs_perm_to_string(&perms[i], buffer))
+ return NULL;
+
+ strings = talloc_realloc(node, strings, char,
+ *len + strlen(buffer) + 1);
+ strcpy(strings + *len, buffer);
+ *len += strlen(buffer) + 1;
+ }
+ return strings;
+}
+
+/* Destroy this, and its children, and its children's children. */
+int destroy_path(void *path)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(path);
+ if (!dir) {
+ if (unlink(path) == 0 || errno == ENOENT)
+ return 0;
+ corrupt(NULL, "Destroying path %s", path);
+ }
+
+ while ((dirent = readdir(dir)) != NULL) {
+ char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
+ sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
+ if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
+ destroy_path(fullpath);
+ }
+ closedir(dir);
+ if (rmdir(path) != 0)
+ corrupt(NULL, "Destroying directory %s", path);
+ return 0;
+}
+
+/* Create a self-destructing temporary file */
+static char *tempfile(const char *path, void *contents, unsigned int len)
+{
+ int *fd;
+ char *tmppath = talloc_asprintf(path, "%s.tmp", path);
+
+ fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd)
+ return NULL;
+ talloc_set_destructor(tmppath, destroy_path);
+ if (!xs_write_all(*fd, contents, len))
+ return NULL;
+
+ return tmppath;
+}
+
+/* We assume rename() doesn't fail on moves in same dir. */
+static void commit_tempfile(const char *path)
+{
+ char realname[strlen(path) + 1];
+ unsigned int len = strrchr(path, '.') - path;
+
+ memcpy(realname, path, len);
+ realname[len] = '\0';
+ if (rename(path, realname) != 0)
+ corrupt(NULL, "Committing %s", realname);
+ talloc_set_destructor(path, NULL);
+}
+
+static bool set_perms(struct transaction *transaction,
+ const char *node,
+ struct xs_permissions *perms, unsigned int num)
+{
+ unsigned int len;
+ char *permpath, *strings;
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return false;
+
+ /* Create then move. */
+ permpath = tempfile(node_permfile(transaction, node), strings, len);
+ if (!permpath)
+ return false;
+
+ commit_tempfile(permpath);
+ return true;
+}
+
+static char *get_parent(const char *node)
+{
+ char *slash = strrchr(node + 1, '/');
+ if (!slash)
+ return talloc_strdup(node, "/");
+ return talloc_asprintf(node, "%.*s", slash - node, node);
+}
+
+static enum xs_perm_type perm_for_id(domid_t id,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+
+ /* Owners and tools get it all... */
+ if (!id || perms[0].id == id)
+ return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_CREATE|XS_PERM_OWNER;
+
+ for (i = 1; i < num; i++)
+ if (perms[i].id == id)
+ return perms[i].perms;
+
+ return perms[0].perms;
+}
+
+/* We have a weird permissions system. You can allow someone into a
+ * specific node without allowing it in the parents. If it's going to
+ * fail, however, we don't want the errno to indicate any information
+ * about the node. */
+static int check_with_parents(struct connection *conn, const char *node,
+ int errnum)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ /* We always tell them about memory failures. */
+ if (errnum == ENOMEM)
+ return errnum;
+
+ do {
+ node = get_parent(node);
+ perms = get_perms(conn->transaction, node, &num);
+ if (perms)
+ break;
+ } while (!streq(node, "/"));
+
+ /* No permission at root? We're in trouble. */
+ if (!perms)
+ corrupt(conn, "No permissions file at root");
+
+ if (!(perm_for_id(conn->id, perms, num) & XS_PERM_READ))
+ return EACCES;
+
+ return errnum;
+}
+
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm)
+{
+ struct xs_permissions *perms;
+ unsigned int num;
+
+ if (!node) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!node || !is_valid_nodename(node)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!conn->can_write && (perm & XS_PERM_WRITE)) {
+ errno = EROFS;
+ return false;
+ }
+
+ perms = get_perms(conn->transaction, node, &num);
+ /* No permissions. If we want to create it and
+ * it doesn't exist, check parent directory. */
+ if (!perms && errno == ENOENT && (perm & XS_PERM_CREATE)) {
+ char *parent = get_parent(node);
+ if (!parent)
+ return false;
+
+ perms = get_perms(conn->transaction, parent, &num);
+ }
+ if (!perms) {
+ errno = check_with_parents(conn, node, errno);
+ return false;
+ }
+
+ if (perm_for_id(conn->id, perms, num) & perm)
+ return true;
+
+ errno = check_with_parents(conn, node, EACCES);
+ return false;
+}
+
+static bool send_directory(struct connection *conn, const char *node)
+{
+ char *path, *reply = talloc_strdup(node, "");
+ unsigned int reply_len = 0;
+ DIR *dir;
+ struct dirent *dirent;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ path = node_dir(conn->transaction, node);
+ dir = opendir(path);
+ if (!dir)
+ return send_error(conn, errno);
+
+ while ((dirent = readdir(dir)) != NULL) {
+ int len = strlen(dirent->d_name) + 1;
+
+ if (!valid_chars(dirent->d_name))
+ continue;
+
+ reply = talloc_realloc(path, reply, char, reply_len + len);
+ strcpy(reply + reply_len, dirent->d_name);
+ reply_len += len;
+ }
+ closedir(dir);
+
+ return send_reply(conn, XS_DIRECTORY, reply, reply_len);
+}
+
+static bool do_read(struct connection *conn, const char *node)
+{
+ char *value;
+ unsigned int size;
+ int *fd;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
+ if (!fd) {
+ /* Data file doesn't exist? We call that a directory */
+ if (errno == ENOENT)
+ errno = EISDIR;
+ return send_error(conn, errno);
+ }
+
+ value = read_all(fd, &size);
+ if (!value)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_READ, value, size);
+}
+
+/* Create a new directory. Optionally put data in it (if data != NULL) */
+static bool new_directory(struct connection *conn,
+ const char *node, void *data, unsigned int datalen)
+{
+ struct xs_permissions perms;
+ char *permstr;
+ unsigned int len;
+ int *fd;
+ char *dir = node_dir(conn->transaction, node);
+
+ if (mkdir(dir, 0750) != 0)
+ return false;
+
+ /* Set destructor so we clean up if neccesary. */
+ talloc_set_destructor(dir, destroy_path);
+
+ /* Default permisisons: we own it, noone else has permission. */
+ perms.id = conn->id;
+ perms.perms = XS_PERM_NONE;
+
+ permstr = perms_to_strings(dir, &perms, 1, &len);
+ fd = talloc_open(node_permfile(conn->transaction, node),
+ O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !xs_write_all(*fd, permstr, len))
+ return false;
+
+ if (data) {
+ char *datapath = node_datafile(conn->transaction, node);
+
+ fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
+ if (!fd || !xs_write_all(*fd, data, datalen))
+ return false;
+ }
+
+ /* Finished! */
+ talloc_set_destructor(dir, NULL);
+ return true;
+}
+
+/* path, flags, data... */
+static bool do_write(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int offset, datalen;
+ char *vec[2];
+ char *node, *tmppath;
+ enum xs_perm_type mode;
+ struct stat st;
+
+ /* Extra "strings" can be created by binary data. */
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ node = vec[0];
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ offset = strlen(vec[0]) + strlen(vec[1]) + 2;
+ datalen = in->used - offset;
+
+ if (streq(vec[1], XS_WRITE_NONE))
+ mode = XS_PERM_WRITE;
+ else if (streq(vec[1], XS_WRITE_CREATE))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ mode = XS_PERM_WRITE|XS_PERM_CREATE;
+ else
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, node, mode))
+ return send_error(conn, errno);
+
+ if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+ /* Does not exist... */
+ if (errno != ENOENT)
+ return send_error(conn, errno);
+
+ /* Not going to create it? */
+ if (!(mode & XS_PERM_CREATE))
+ return send_error(conn, ENOENT);
+
+ if (!new_directory(conn, node, in->buffer + offset, datalen))
+ return send_error(conn, errno);
+ } else {
+ /* Exists... */
+ if (streq(vec[1], XS_WRITE_CREATE_EXCL))
+ return send_error(conn, EEXIST);
+
+ tmppath = tempfile(node_datafile(conn->transaction, node),
+ in->buffer + offset, datalen);
+ if (!tmppath)
+ return send_error(conn, errno);
+
+ commit_tempfile(tmppath);
+ }
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_WRITE);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_mkdir(struct connection *conn, const char *node)
+{
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_CREATE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (!new_directory(conn, node, NULL, 0))
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_MKDIR);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_rm(struct connection *conn, const char *node)
+{
+ char *tmppath, *path;
+
+ if (!check_node_perms(conn, node, XS_PERM_WRITE))
+ return send_error(conn, errno);
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ if (streq(node, "/"))
+ return send_error(conn, EINVAL);
+
+ /* We move the directory to temporary name, destructor cleans up. */
+ path = node_dir(conn->transaction, node);
+ tmppath = talloc_asprintf(node, "%s.tmp", path);
+ talloc_set_destructor(tmppath, destroy_path);
+
+ if (rename(path, tmppath) != 0)
+ return send_error(conn, errno);
+
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_RM);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+static bool do_get_perms(struct connection *conn, const char *node)
+{
+ struct xs_permissions *perms;
+ char *strings;
+ unsigned int len, num;
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ perms = get_perms(conn->transaction, node, &num);
+ if (!perms)
+ return send_error(conn, errno);
+
+ strings = perms_to_strings(node, perms, num, &len);
+ if (!strings)
+ return send_error(conn, errno);
+
+ return send_reply(conn, XS_GET_PERMS, strings, len);
+}
+
+static bool do_set_perms(struct connection *conn, struct buffered_data *in)
+{
+ unsigned int num;
+ char *node;
+ struct xs_permissions *perms;
+
+ num = xs_count_strings(in->buffer, in->used);
+ if (num < 2)
+ return send_error(conn, EINVAL);
+
+ /* First arg is node name. */
+ node = in->buffer;
+ in->buffer += strlen(in->buffer) + 1;
+ num--;
+
+ if (!within_transaction(conn->transaction, node))
+ return send_error(conn, EROFS);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ /* We must own node to do this (tools can do this too). */
+ if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER))
+ return send_error(conn, errno);
+
+ perms = talloc_array(node, struct xs_permissions, num);
+ if (!xs_strings_to_perms(perms, num, in->buffer))
+ return send_error(conn, errno);
+
+ if (!set_perms(conn->transaction, node, perms, num))
+ return send_error(conn, errno);
+ add_change_node(conn->transaction, node);
+ send_ack(conn, XS_SET_PERMS);
+ fire_watches(conn->transaction, node);
+ return false;
+}
+
+/* Process "in" for conn: "in" will vanish after this conversation, so
+ * we can talloc off it for temporary variables. May free "conn".
+ * Returns true if can't complete due to block.
+ */
+static bool process_message(struct connection *conn, struct buffered_data *in)
+{
+ switch (in->hdr.msg.type) {
+ case XS_DIRECTORY:
+ return send_directory(conn, onearg(in));
+
+ case XS_READ:
+ return do_read(conn, onearg(in));
+
+ case XS_WRITE:
+ return do_write(conn, in);
+
+ case XS_MKDIR:
+ return do_mkdir(conn, onearg(in));
+
+ case XS_RM:
+ return do_rm(conn, onearg(in));
+
+ case XS_GET_PERMS:
+ return do_get_perms(conn, onearg(in));
+
+ case XS_SET_PERMS:
+ return do_set_perms(conn, in);
+
+ case XS_SHUTDOWN:
+ /* FIXME: Implement gentle shutdown too. */
+ /* Only tools can do this. */
+ if (conn->id != 0)
+ return send_error(conn, EACCES);
+ if (!conn->can_write)
+ return send_error(conn, EROFS);
+ send_ack(conn, XS_SHUTDOWN);
+ /* Everything hangs off auto-free context, freed at exit. */
+ exit(0);
+
+#ifdef TESTING
+ case XS_DEBUG: {
+ /* For testing, we allow them to set id. */
+ if (streq(in->buffer, "setid")) {
+ conn->id = atoi(in->buffer + get_string(in, 0));
+ send_ack(conn, XS_DEBUG);
+ } else if (streq(in->buffer, "failtest")) {
+ if (get_string(in, 0) < in->used)
+ srandom(atoi(in->buffer + get_string(in, 0)));
+ send_ack(conn, XS_DEBUG);
+ failtest = true;
+ }
+ return false;
+ }
+#endif /* TESTING */
+
+ case XS_WATCH:
+ return do_watch(conn, in);
+
+ case XS_WATCH_ACK:
+ return do_watch_ack(conn);
+
+ case XS_UNWATCH:
+ return do_unwatch(conn, onearg(in));
+
+ case XS_TRANSACTION_START:
+ return do_transaction_start(conn, onearg(in));
+
+ case XS_TRANSACTION_END:
+ return do_transaction_end(conn, onearg(in));
+
+ case XS_INTRODUCE:
+ return do_introduce(conn, in);
+
+ case XS_RELEASE:
+ return do_release(conn, onearg(in));
+
+ case XS_GETDOMAINPATH:
+ return do_get_domain_path(conn, onearg(in));
+
+ case XS_WATCH_EVENT:
+ default:
+ eprintf("Client unknown operation %i", in->hdr.msg.type);
+ send_error(conn, ENOSYS);
+ return false;
+ }
+}
+
+static int out_of_mem(void *data)
+{
+ longjmp(*(jmp_buf *)data, 1);
+}
+
+static void consider_message(struct connection *conn)
+{
+ struct buffered_data *in = NULL;
+ enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
+ jmp_buf talloc_fail;
+
+ /* For simplicity, we kill the connection on OOM. */
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(conn);
+ goto end;
+ }
+
+ if (verbose)
+ xprintf("Got message %i len %i from %p\n",
+ type, conn->in->hdr.msg.len, conn);
+
+ /* We might get a command while waiting for an ack: this means
+ * the other end discarded it: we will re-transmit. */
+ if (type != XS_WATCH_ACK)
+ reset_watch_event(conn);
+
+ /* Careful: process_message may free connection. We detach
+ * "in" beforehand and allocate the new buffer to avoid
+ * touching conn after process_message.
+ */
+ in = talloc_steal(talloc_autofree_context(), conn->in);
+ conn->in = new_buffer(conn);
+ if (process_message(conn, in)) {
+ /* Blocked by transaction: queue for re-xmit. */
+ talloc_free(conn->in);
+ conn->in = in;
+ in = NULL;
+ }
+
+end:
+ talloc_free(in);
+ talloc_set_fail_handler(NULL, NULL);
+ if (talloc_total_blocks(NULL)
+ != talloc_total_blocks(talloc_autofree_context()) + 1)
+ talloc_report_full(NULL, stderr);
+}
+
+/* Errors in reading or allocating here mean we get out of sync, so we
+ * drop the whole client connection. */
+void handle_input(struct connection *conn)
+{
+ int bytes;
+ struct buffered_data *in;
+
+ assert(!conn->blocked);
+ in = conn->in;
+
+ /* Not finished header yet? */
+ if (in->inhdr) {
+ bytes = conn->read(conn, in->hdr.raw + in->used,
+ sizeof(in->hdr) - in->used);
+ if (bytes <= 0)
+ goto bad_client;
+ in->used += bytes;
+ if (in->used != sizeof(in->hdr))
+ return;
+
+ if (in->hdr.msg.len > PATH_MAX) {
+ syslog(LOG_DAEMON, "Client tried to feed us %i",
+ in->hdr.msg.len);
+ goto bad_client;
+ }
+
+ in->buffer = talloc_array(in, char, in->hdr.msg.len);
+ if (!in->buffer)
+ goto bad_client;
+ in->used = 0;
+ in->inhdr = false;
+ return;
+ }
+
+ bytes = conn->read(conn, in->buffer + in->used,
+ in->hdr.msg.len - in->used);
+ if (bytes < 0)
+ goto bad_client;
+
+ in->used += bytes;
+ if (in->used != in->hdr.msg.len)
+ return;
+
+ consider_message(conn);
+ return;
+
+bad_client:
+ /* Kill it. */
+ talloc_free(conn);
+}
+
+void handle_output(struct connection *conn)
+{
+ if (!write_message(conn))
+ talloc_free(conn);
+}
+
+/* If a transaction has ended, see if we can unblock any connections. */
+static void unblock_connections(void)
+{
+ struct connection *i, *tmp;
+
+ list_for_each_entry_safe(i, tmp, &connections, list) {
+ if (!i->blocked)
+ continue;
+
+ if (!transaction_covering_node(i->blocked)) {
+ talloc_free(i->blocked);
+ i->blocked = NULL;
+ consider_message(i);
+ }
+ }
+
+ /* To balance bias, move first entry to end. */
+ if (!list_empty(&connections)) {
+ i = list_top(&connections, struct connection, list);
+ list_del(&i->list);
+ list_add_tail(&i->list, &connections);
+ }
+}
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
+{
+ struct connection *new;
+ jmp_buf talloc_fail;
+
+ new = talloc(talloc_autofree_context(), struct connection);
+ if (!new)
+ return NULL;
+
+ new->blocked = false;
+ new->out = new->waiting_reply = NULL;
+ new->event = NULL;
+ new->fd = -1;
+ new->id = 0;
+ new->domain = NULL;
+ new->transaction = NULL;
+ new->write = write;
+ new->read = read;
+ new->can_write = true;
+
+ talloc_set_fail_handler(out_of_mem, &talloc_fail);
+ if (setjmp(talloc_fail)) {
+ talloc_free(new);
+ return NULL;
+ }
+ new->in = new_buffer(new);
+ talloc_set_fail_handler(NULL, NULL);
+
+ list_add_tail(&new->list, &connections);
+ talloc_set_destructor(new, destroy_conn);
+ return new;
+}
+
+static int writefd(struct connection *conn, const void *data, unsigned int len)
+{
+ return write(conn->fd, data, len);
+}
+
+static int readfd(struct connection *conn, void *data, unsigned int len)
+{
+ return read(conn->fd, data, len);
+}
+
+static void accept_connection(int sock, bool canwrite)
+{
+ int fd;
+ struct connection *conn;
+
+ fd = accept(sock, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ conn = new_connection(writefd, readfd);
+ if (conn) {
+ conn->fd = fd;
+ conn->can_write = canwrite;
+ } else
+ close(fd);
+}
+
+/* Calc timespan from now to absolute time. */
+static void time_relative_to_now(struct timeval *tv)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+ if (timercmp(&now, tv, >))
+ timerclear(tv);
+ else {
+ tv->tv_sec -= now.tv_sec;
+ if (now.tv_usec > tv->tv_usec) {
+ tv->tv_sec--;
+ tv->tv_usec += 1000000;
+ }
+ tv->tv_usec -= now.tv_usec;
+ }
+}
+
+static struct option options[] = { { "no-fork", 0, NULL, 'N' },
+ { "verbose", 0, NULL, 'V' },
+ { "output-pid", 0, NULL, 'P' },
+ { NULL, 0, NULL, 0 } };
+
+int main(int argc, char *argv[])
+{
+ int opt, *sock, *ro_sock, event_fd, max, tmpout;
+ struct sockaddr_un addr;
+ fd_set inset, outset;
+ bool dofork = true;
+ bool outputpid = false;
+
+ while ((opt = getopt_long(argc, argv, "DV", options, NULL)) != -1) {
+ switch (opt) {
+ case 'N':
+ dofork = false;
+ break;
+ case 'V':
+ verbose = true;
+ break;
+ case 'P':
+ outputpid = true;
+ break;
+ }
+ }
+ if (optind != argc)
+ barf("%s: No arguments desired", argv[0]);
+
+ talloc_enable_leak_report_full();
+
+ /* Create sockets for them to listen to. */
+ sock = talloc(talloc_autofree_context(), int);
+ *sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*sock < 0)
+ barf_perror("Could not create socket");
+ ro_sock = talloc(talloc_autofree_context(), int);
+ *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (*ro_sock < 0)
+ barf_perror("Could not create socket");
+ talloc_set_destructor(sock, destroy_fd);
+ talloc_set_destructor(ro_sock, destroy_fd);
+
+ /* Don't kill us with SIGPIPE. */
+ signal(SIGPIPE, SIG_IGN);
+
+ /* FIXME: Be more sophisticated, don't mug running daemon. */
+ unlink(xs_daemon_socket());
+ unlink(xs_daemon_socket_ro());
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, xs_daemon_socket());
+ if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s", xs_daemon_socket());
+ strcpy(addr.sun_path, xs_daemon_socket_ro());
+ if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
+ barf_perror("Could not bind socket to %s",
+ xs_daemon_socket_ro());
+ if (chmod(xs_daemon_socket(), 0600) != 0
+ || chmod(xs_daemon_socket_ro(), 0660) != 0)
+ barf_perror("Could not chmod sockets");
+
+ if (listen(*sock, 1) != 0
+ || listen(*ro_sock, 1) != 0)
+ barf_perror("Could not listen on sockets");
+
+ /* If we're the first, create .perms file for root. */
+ if (mkdir(xs_daemon_store(), 0750) == 0) {
+ struct xs_permissions perms;
+ char *root = talloc_strdup(talloc_autofree_context(), "/");
+
+ perms.id = 0;
+ perms.perms = XS_PERM_READ;
+ if (!set_perms(NULL, root, &perms, 1))
+ barf_perror("Could not create permissions in root");
+ talloc_free(root);
+ mkdir(xs_daemon_transactions(), 0750);
+ } else if (errno != EEXIST)
+ barf_perror("Could not create root %s", xs_daemon_store());
+
+ /* Listen to hypervisor. */
+ event_fd = domain_init();
+
+ /* Debugging: daemonize() closes standard fds, so dup here. */
+ tmpout = dup(STDOUT_FILENO);
+ if (dofork) {
+ openlog("xenstored", 0, LOG_DAEMON);
+ daemonize();
+ }
+
+ if (outputpid) {
+ char buffer[20];
+ sprintf(buffer, "%i\n", getpid());
+ write(tmpout, buffer, strlen(buffer));
+ }
+ close(tmpout);
+
+#ifdef TESTING
+ signal(SIGUSR1, stop_failtest);
+#endif
+
+ /* Get ready to listen to the tools. */
+ max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
+
+ /* Main loop. */
+ for (;;) {
+ struct connection *i;
+ struct timeval *tvp = NULL, tv;
+
+ timerclear(&tv);
+ shortest_transaction_timeout(&tv);
+ if (timerisset(&tv)) {
+ time_relative_to_now(&tv);
+ tvp = &tv;
+ }
+
+ if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
+ if (errno == EINTR)
+ continue;
+ barf_perror("Select failed");
+ }
+
+ if (FD_ISSET(*sock, &inset))
+ accept_connection(*sock, true);
+
+ if (FD_ISSET(*ro_sock, &inset))
+ accept_connection(*ro_sock, false);
+
+ if (FD_ISSET(event_fd, &inset))
+ handle_event(event_fd);
+
+ list_for_each_entry(i, &connections, list) {
+ if (i->domain)
+ continue;
+
+ /* Operations can delete themselves or others
+ * (xs_release): list is not safe after input,
+ * so break. */
+ if (FD_ISSET(i->fd, &inset)) {
+ handle_input(i);
+ break;
+ }
+ if (FD_ISSET(i->fd, &outset)) {
+ handle_output(i);
+ break;
+ }
+ }
+
+ if (tvp)
+ check_transaction_timeout();
+
+ /* If transactions ended, we might be able to do more work. */
+ unblock_connections();
+
+ max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd);
+ }
+}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
new file mode 100644
index 0000000000..0d0ebcaae0
--- /dev/null
+++ b/tools/xenstore/xenstored_core.h
@@ -0,0 +1,126 @@
+/*
+ Internal interfaces for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_INTERNAL_H
+#define _XENSTORED_INTERNAL_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs_lib.h"
+#include "xenstored.h"
+#include "list.h"
+
+struct buffered_data
+{
+ /* Are we still doing the header? */
+ bool inhdr;
+ /* How far are we? */
+ unsigned int used;
+ union {
+ struct xsd_sockmsg msg;
+ char raw[sizeof(struct xsd_sockmsg)];
+ } hdr;
+ /* The actual data. */
+ char *buffer;
+};
+
+struct connection;
+typedef int connwritefn_t(struct connection *, const void *, unsigned int);
+typedef int connreadfn_t(struct connection *, void *, unsigned int);
+
+struct connection
+{
+ struct list_head list;
+
+ /* The file descriptor we came in on. */
+ int fd;
+
+ /* Who am I? 0 for socket connections. */
+ domid_t id;
+
+ /* Are we blocked waiting for a transaction to end? Contains node. */
+ char *blocked;
+
+ /* Is this a read-only connection? */
+ bool can_write;
+
+ /* Our current event. If all used, we're waiting for ack. */
+ struct watch_event *event;
+
+ /* Buffered incoming data. */
+ struct buffered_data *in;
+
+ /* Buffered output data */
+ struct buffered_data *out;
+
+ /* If we had a watch fire outgoing when we needed to reply... */
+ struct buffered_data *waiting_reply;
+
+ /* My transaction, if any. */
+ struct transaction *transaction;
+
+ /* The domain I'm associated with, if any. */
+ struct domain *domain;
+
+ /* Methods for communicating over this connection: write can be NULL */
+ connwritefn_t *write;
+ connreadfn_t *read;
+};
+
+/* Return length of string (including nul) at this offset. */
+unsigned int get_string(const struct buffered_data *data,
+ unsigned int offset);
+
+/* Break input into vectors, return the number, fill in up to num of them. */
+unsigned int get_strings(struct buffered_data *data,
+ char *vec[], unsigned int num);
+
+/* Is child node a child or equal to parent node? */
+bool is_child(const char *child, const char *parent);
+
+/* Create a new buffer with lifetime of context. */
+struct buffered_data *new_buffer(void *ctx);
+
+bool send_reply(struct connection *conn, enum xsd_sockmsg_type type,
+ const void *data, unsigned int len);
+
+/* Some routines (write, mkdir, etc) just need a non-error return */
+bool send_ack(struct connection *conn, enum xsd_sockmsg_type type);
+
+/* Send an error: error is usually "errno". */
+bool send_error(struct connection *conn, int error);
+
+/* Check permissions on this node. */
+bool check_node_perms(struct connection *conn, const char *node,
+ enum xs_perm_type perm);
+
+/* Path to this node outside transaction. */
+char *node_dir_outside_transaction(const char *node);
+
+/* Fail due to excessive corruption, capitalist pigdogs! */
+void __attribute__((noreturn)) corrupt(struct connection *conn,
+ const char *fmt, ...);
+
+struct connection *new_connection(connwritefn_t *write, connreadfn_t *read);
+
+void handle_input(struct connection *conn);
+void handle_output(struct connection *conn);
+
+/* Convenient talloc-style destructor for paths. */
+int destroy_path(void *path);
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
new file mode 100644
index 0000000000..a6f69ddf5b
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.c
@@ -0,0 +1,390 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <linux/ioctl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+//#define DEBUG
+#include "utils.h"
+#include "talloc.h"
+#include "xenstored_core.h"
+#include "xenstored_domain.h"
+#include "xenstored_test.h"
+
+static int *xc_handle;
+static int eventchn_fd;
+static unsigned int ringbuf_datasize;
+
+struct domain
+{
+ struct list_head list;
+
+ /* The id of this domain */
+ domid_t domid;
+
+ /* Event channel port */
+ u16 port;
+
+ /* Domain path in store. */
+ char *path;
+
+ /* Shared page. */
+ void *page;
+
+ /* Input and output ringbuffer heads. */
+ struct ringbuf_head *input, *output;
+
+ /* The connection associated with this. */
+ struct connection *conn;
+
+};
+
+static LIST_HEAD(domains);
+
+void domain_set_conn(struct domain *domain, struct connection *conn)
+{
+ domain->conn = conn;
+}
+
+struct ringbuf_head
+{
+ u32 write; /* Next place to write to */
+ u32 read; /* Next place to read from */
+ u8 flags;
+ char buf[0];
+} __attribute__((packed));
+
+#define EVENTCHN_BIND _IO('E', 2)
+#define EVENTCHN_UNBIND _IO('E', 3)
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, u32 *len)
+{
+ u32 read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, u32 *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, u32 len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+static bool buffer_has_input(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_input_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static bool buffer_has_output_room(const struct ringbuf_head *h)
+{
+ u32 len;
+
+ get_output_chunk(h, NULL, &len);
+ return (len != 0);
+}
+
+static int writechn(struct connection *conn, const void *data, unsigned int len)
+{
+ u32 avail;
+ void *dest;
+ struct ringbuf_head h;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->output;
+ mb();
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ dest = get_output_chunk(&h, conn->domain->output->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ memcpy(dest, data, len);
+ mb();
+ update_output_chunk(conn->domain->output, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int readchn(struct connection *conn, void *data, unsigned int len)
+{
+ u32 avail;
+ const void *src;
+ struct ringbuf_head h;
+ bool was_full;
+
+ /* Must read head once, and before anything else, and verified. */
+ h = *conn->domain->input;
+ mb();
+
+ if (!check_buffer(&h)) {
+ errno = EIO;
+ return -1;
+ }
+
+ src = get_input_chunk(&h, conn->domain->input->buf, &avail);
+ if (avail < len)
+ len = avail;
+
+ was_full = !buffer_has_output_room(&h);
+ memcpy(data, src, len);
+ mb();
+ update_input_chunk(conn->domain->input, len);
+ /* FIXME: Probably not neccessary. */
+ mb();
+
+ /* If it was full, tell them we've taken some. */
+ if (was_full)
+ xc_evtchn_send(*xc_handle, conn->domain->port);
+ return len;
+}
+
+static int destroy_domain(void *_domain)
+{
+ struct domain *domain = _domain;
+
+ list_del(&domain->list);
+
+ if (domain->port &&
+ (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0))
+ eprintf("> Unbinding port %i failed!\n", domain->port);
+
+ if(domain->page)
+ munmap(domain->page, getpagesize());
+
+ return 0;
+}
+
+static struct domain *find_domain(u16 port)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->port == port)
+ return i;
+ }
+ return NULL;
+}
+
+void handle_event(int event_fd)
+{
+ u16 port;
+ struct domain *domain;
+
+ if (read(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to read from event fd");
+
+ /* We have to handle *all* the data available before we ack:
+ * careful that handle_input/handle_output can destroy conn.
+ */
+ while ((domain = find_domain(port)) != NULL) {
+ if (!domain->conn->blocked && buffer_has_input(domain->input))
+ handle_input(domain->conn);
+ else if (domain->conn->out
+ && buffer_has_output_room(domain->output))
+ handle_output(domain->conn);
+ else
+ break;
+ }
+
+#ifndef TESTING
+ if (write(event_fd, &port, sizeof(port)) != sizeof(port))
+ barf_perror("Failed to write to event fd");
+#endif
+}
+
+/* domid, mfn, evtchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in)
+{
+ struct domain *domain;
+ char *vec[4];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ if (!conn->can_write)
+ return send_error(conn, EROFS);
+
+ /* Hang domain off "in" until we're finished. */
+ domain = talloc(in, struct domain);
+ domain->domid = atoi(vec[0]);
+ domain->port = atoi(vec[2]);
+ domain->path = talloc_strdup(domain, vec[3]);
+ talloc_set_destructor(domain, destroy_domain);
+ if (!domain->port || !domain->domid)
+ return send_error(conn, EINVAL);
+ domain->page = xc_map_foreign_range(*xc_handle, domain->domid,
+ getpagesize(),
+ PROT_READ|PROT_WRITE,
+ atol(vec[1]));
+ if (!domain->page)
+ return send_error(conn, errno);
+
+ /* One in each half of page. */
+ domain->input = domain->page;
+ domain->output = domain->page + getpagesize()/2;
+
+ /* Tell kernel we're interested in this event. */
+ if (ioctl(eventchn_fd, EVENTCHN_BIND, domain->port) != 0)
+ return send_error(conn, errno);
+
+ domain->conn = new_connection(writechn, readchn);
+ domain->conn->domain = domain;
+
+ talloc_steal(domain->conn, domain);
+ list_add(&domain->list, &domains);
+
+ return send_ack(conn, XS_INTRODUCE);
+}
+
+static struct domain *find_domain_by_domid(domid_t domid)
+{
+ struct domain *i;
+
+ list_for_each_entry(i, &domains, list) {
+ if (i->domid == domid)
+ return i;
+ }
+ return NULL;
+}
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (!domid)
+ return send_error(conn, EINVAL);
+
+ domain = find_domain_by_domid(domid);
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ if (!domain->conn)
+ return send_error(conn, EINVAL);
+
+ talloc_free(domain->conn);
+ return send_ack(conn, XS_RELEASE);
+}
+
+bool do_get_domain_path(struct connection *conn, const char *domid_str)
+{
+ struct domain *domain;
+ domid_t domid;
+
+ if (!domid_str)
+ return send_error(conn, EINVAL);
+
+ domid = atoi(domid_str);
+ if (domid == 0)
+ domain = conn->domain;
+ else
+ domain = find_domain_by_domid(domid);
+
+ if (!domain)
+ return send_error(conn, ENOENT);
+
+ return send_reply(conn, XS_GETDOMAINPATH, domain->path,
+ strlen(domain->path) + 1);
+}
+
+static int close_xc_handle(void *_handle)
+{
+ xc_interface_close(*(int *)_handle);
+ return 0;
+}
+
+/* Returns the event channel handle. */
+int domain_init(void)
+{
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ xc_handle = talloc(talloc_autofree_context(), int);
+ if (!xc_handle)
+ barf_perror("Failed to allocate domain handle");
+ *xc_handle = xc_interface_open();
+ if (*xc_handle < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ talloc_set_destructor(xc_handle, close_xc_handle);
+
+#ifdef TESTING
+ eventchn_fd = fake_open_eventchn();
+#else
+ eventchn_fd = open("/dev/xen/evtchn", O_RDWR);
+#endif
+ if (eventchn_fd < 0)
+ barf_perror("Failed to open connection to hypervisor");
+ return eventchn_fd;
+}
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
new file mode 100644
index 0000000000..20e85a54b5
--- /dev/null
+++ b/tools/xenstore/xenstored_domain.h
@@ -0,0 +1,38 @@
+/*
+ Domain communications for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_DOMAIN_H
+#define _XENSTORED_DOMAIN_H
+
+void handle_event(int event_fd);
+
+/* domid, mfn, eventchn, path */
+bool do_introduce(struct connection *conn, struct buffered_data *in);
+
+/* domid */
+bool do_release(struct connection *conn, const char *domid_str);
+
+/* domid */
+bool do_get_domain_path(struct connection *conn, const char *domid_str);
+
+/* Returns the event channel handle */
+int domain_init(void);
+
+void domain_set_conn(struct domain *domain, struct connection *conn);
+
+#endif /* _XENSTORED_DOMAIN_H */
diff --git a/tools/xenstore/xenstored_test.h b/tools/xenstore/xenstored_test.h
new file mode 100644
index 0000000000..cf607cf2e0
--- /dev/null
+++ b/tools/xenstore/xenstored_test.h
@@ -0,0 +1,37 @@
+/*
+ Testing replcements for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TEST_H
+#define _XENSTORED_TEST_H
+
+#ifdef TESTING
+bool test_write_all(int fd, void *contents, unsigned int len);
+#define xs_write_all test_write_all
+
+int test_mkdir(const char *dir, int perms);
+#define mkdir test_mkdir
+
+int fake_open_eventchn(void);
+void fake_block_events(void);
+void fake_ack_event(void);
+
+#define ioctl(a,b,c) 0
+
+#endif
+
+#endif /* _XENSTORED_INTERNAL_H */
diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c
new file mode 100644
index 0000000000..ca37307f8c
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.c
@@ -0,0 +1,284 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_transaction.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+struct changed_node
+{
+ /* The list within this transaction. */
+ struct list_head list;
+
+ /* The name of the node. */
+ char *node;
+};
+
+struct transaction
+{
+ /* Global list of transactions. */
+ struct list_head list;
+
+ /* My owner (conn->transaction == me). */
+ struct connection *conn;
+
+ /* Subtree this transaction covers */
+ char *node;
+
+ /* Base for this transaction. */
+ char *divert;
+
+ /* List of changed nodes. */
+ struct list_head changes;
+
+ /* Someone's waiting: time limit. */
+ struct timeval timeout;
+
+ /* We've timed out. */
+ bool destined_to_fail;
+};
+static LIST_HEAD(transactions);
+
+bool within_transaction(struct transaction *trans, const char *node)
+{
+ if (!trans)
+ return true;
+ return is_child(node, trans->node);
+}
+
+/* You are on notice: this transaction is blocking someone. */
+static void start_transaction_timeout(struct transaction *trans)
+{
+ if (timerisset(&trans->timeout))
+ return;
+
+ /* One second timeout. */
+ gettimeofday(&trans->timeout, NULL);
+ trans->timeout.tv_sec += 1;
+}
+
+struct transaction *transaction_covering_node(const char *node)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (i->destined_to_fail)
+ continue;
+ if (is_child(i->node, node) || is_child(node, i->node))
+ return i;
+ }
+ return NULL;
+}
+
+bool transaction_block(struct connection *conn, const char *node)
+{
+ struct transaction *trans;
+
+ /* Transactions don't overlap, so we can't be blocked by
+ * others if we're in one. */
+ if (conn->transaction)
+ return false;
+
+ trans = transaction_covering_node(node);
+ if (trans) {
+ start_transaction_timeout(trans);
+ conn->blocked = talloc_strdup(conn, node);
+ return true;
+ }
+ return false;
+}
+
+/* Callers get a change node (which can fail) and only commit after they've
+ * finished. This way they don't have to unwind eg. a write. */
+void add_change_node(struct transaction *trans, const char *node)
+{
+ struct changed_node *i;
+
+ if (!trans)
+ return;
+
+ list_for_each_entry(i, &trans->changes, list)
+ if (streq(i->node, node))
+ return;
+
+ i = talloc(trans, struct changed_node);
+ i->node = talloc_strdup(i, node);
+ INIT_LIST_HEAD(&i->list);
+ list_add_tail(&i->list, &trans->changes);
+}
+
+char *node_dir_inside_transaction(struct transaction *trans, const char *node)
+{
+ return talloc_asprintf(node, "%s%s", trans->divert,
+ node + strlen(trans->node));
+}
+
+void shortest_transaction_timeout(struct timeval *tv)
+{
+ struct transaction *i;
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (!timerisset(tv) || timercmp(&i->timeout, tv, <))
+ *tv = i->timeout;
+ }
+}
+
+void check_transaction_timeout(void)
+{
+ struct transaction *i;
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ list_for_each_entry(i, &transactions, list) {
+ if (!timerisset(&i->timeout))
+ continue;
+
+ if (timercmp(&i->timeout, &now, <))
+ i->destined_to_fail = true;
+ }
+}
+
+/* FIXME: Eliminate all uses of this */
+static bool do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1)
+ return false;
+ if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
+ errno = EIO;
+ return false;
+ }
+ return true;
+}
+
+static int destroy_transaction(void *_transaction)
+{
+ struct transaction *trans = _transaction;
+
+ list_del(&trans->list);
+ return destroy_path(trans->divert);
+}
+
+bool do_transaction_start(struct connection *conn, const char *node)
+{
+ struct transaction *transaction;
+ char *dir, *cmd;
+
+ if (conn->transaction)
+ return send_error(conn, EBUSY);
+
+ if (!check_node_perms(conn, node, XS_PERM_READ))
+ return send_error(conn, errno);
+
+ if (transaction_block(conn, node))
+ return true;
+
+ dir = node_dir_outside_transaction(node);
+
+ /* Attach transaction to node for autofree until it's complete */
+ transaction = talloc(node, struct transaction);
+ transaction->node = talloc_strdup(transaction, node);
+ transaction->divert = talloc_asprintf(transaction, "%s/%p/",
+ xs_daemon_transactions(),
+ transaction);
+ cmd = talloc_asprintf(node, "cp -a %s %s", dir, transaction->divert);
+ if (!do_command(cmd))
+ corrupt(conn, "Creating transaction %s", transaction->divert);
+
+ talloc_steal(conn, transaction);
+ INIT_LIST_HEAD(&transaction->changes);
+ transaction->conn = conn;
+ timerclear(&transaction->timeout);
+ transaction->destined_to_fail = false;
+ list_add_tail(&transaction->list, &transactions);
+ conn->transaction = transaction;
+ talloc_set_destructor(transaction, destroy_transaction);
+ return send_ack(transaction->conn, XS_TRANSACTION_START);
+}
+
+static bool commit_transaction(struct transaction *trans)
+{
+ char *tmp, *dir;
+ struct changed_node *i;
+
+ /* Move: orig -> .old, repl -> orig. Cleanup deletes .old. */
+ dir = node_dir_outside_transaction(trans->node);
+ tmp = talloc_asprintf(trans, "%s.old", dir);
+
+ if (rename(dir, tmp) != 0)
+ return false;
+ if (rename(trans->divert, dir) != 0)
+ corrupt(trans->conn, "Failed rename %s to %s",
+ trans->divert, dir);
+
+ trans->divert = tmp;
+
+ /* Fire off the watches for everything that changed. */
+ list_for_each_entry(i, &trans->changes, list)
+ fire_watches(NULL, i->node);
+ return true;
+}
+
+bool do_transaction_end(struct connection *conn, const char *arg)
+{
+ if (!arg || (!streq(arg, "T") && !streq(arg, "F")))
+ return send_error(conn, EINVAL);
+
+ if (!conn->transaction)
+ return send_error(conn, ENOENT);
+
+ if (streq(arg, "T")) {
+ if (conn->transaction->destined_to_fail) {
+ send_error(conn, ETIMEDOUT);
+ goto failed;
+ }
+ if (!commit_transaction(conn->transaction)) {
+ send_error(conn, errno);
+ goto failed;
+ }
+ }
+
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return send_ack(conn, XS_TRANSACTION_END);
+
+failed:
+ talloc_free(conn->transaction);
+ conn->transaction = NULL;
+ return false;
+}
+
diff --git a/tools/xenstore/xenstored_transaction.h b/tools/xenstore/xenstored_transaction.h
new file mode 100644
index 0000000000..a21bccad72
--- /dev/null
+++ b/tools/xenstore/xenstored_transaction.h
@@ -0,0 +1,50 @@
+/*
+ Transaction code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_TRANSACTION_H
+#define _XENSTORED_TRANSACTION_H
+#include "xenstored_core.h"
+
+struct transaction;
+
+bool do_transaction_start(struct connection *conn, const char *node);
+bool do_transaction_end(struct connection *conn, const char *arg);
+
+/* Is node covered by this transaction? */
+bool within_transaction(struct transaction *trans, const char *node);
+
+/* If a write op on this node blocked by another connections' transaction,
+ * mark conn, setup transaction timeout and return true.
+ */
+bool transaction_block(struct connection *conn, const char *node);
+
+/* Return transaction which covers this node. */
+struct transaction *transaction_covering_node(const char *node);
+
+/* Return directory of node within transaction t. */
+char *node_dir_inside_transaction(struct transaction *t, const char *node);
+
+/* This node was changed: can fail and longjmp. */
+void add_change_node(struct transaction *trans, const char *node);
+
+/* Get shortest timeout: leave tv unset if none. */
+void shortest_transaction_timeout(struct timeval *tv);
+
+/* Have any transactions timed out yet? */
+void check_transaction_timeout(void);
+#endif /* _XENSTORED_TRANSACTION_H */
diff --git a/tools/xenstore/xenstored_watch.c b/tools/xenstore/xenstored_watch.c
new file mode 100644
index 0000000000..2df83e1a54
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.c
@@ -0,0 +1,279 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include "talloc.h"
+#include "list.h"
+#include "xenstored_watch.h"
+#include "xs_lib.h"
+#include "utils.h"
+#include "xenstored_test.h"
+
+/* We create this if anyone is interested "node", then we pass it from
+ * watch to watch as each connection acks it.
+ */
+struct watch_event
+{
+ /* The watch we are firing for (watch->events) */
+ struct list_head list;
+
+ /* Watch we are currently attached to. */
+ struct watch *watch;
+
+ struct buffered_data *data;
+};
+
+struct watch
+{
+ struct list_head list;
+ unsigned int priority;
+
+ /* Current outstanding events applying to this watch. */
+ struct list_head events;
+
+ char *node;
+ struct connection *conn;
+};
+static LIST_HEAD(watches);
+
+static void reset_event(struct watch_event *event)
+{
+ event->data->inhdr = true;
+ event->data->used = 0;
+}
+
+/* We received a non-ACK response: re-queue any watch we just sent. */
+void reset_watch_event(struct connection *conn)
+{
+ if (waiting_for_ack(conn))
+ reset_event(conn->event);
+}
+
+/* We're waiting if we have an event and we sent it all. */
+bool waiting_for_ack(struct connection *conn)
+{
+ if (!conn->event)
+ return false;
+
+ if (conn->event->data->inhdr)
+ return false;
+ return conn->event->data->used == conn->event->data->hdr.msg.len;
+}
+
+bool is_watch_event(struct connection *conn, struct buffered_data *out)
+{
+ return (conn->event && out == conn->event->data);
+}
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn)
+{
+ struct watch *watch;
+
+ /* We had a reply queued already? Send it. */
+ if (conn->waiting_reply) {
+ conn->out = conn->waiting_reply;
+ conn->waiting_reply = NULL;
+ return;
+ }
+
+ /* If we're waiting for ack, don't queue more. */
+ if (waiting_for_ack(conn))
+ return;
+
+ /* Find a good event to send. */
+ if (!conn->event) {
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn != conn)
+ continue;
+
+ conn->event = list_top(&watch->events,
+ struct watch_event, list);
+ if (conn->event)
+ break;
+ }
+ if (!conn->event)
+ return;
+ }
+
+ conn->out = conn->event->data;
+}
+
+/* Watch on DIR applies to DIR, DIR/FILE, but not DIRLONG. */
+static bool watch_applies(const struct watch *watch, const char *node)
+{
+ return is_child(node, watch->node);
+}
+
+static struct watch *find_watch(const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+static struct watch *find_next_watch(struct watch *watch, const char *node)
+{
+ list_for_each_entry_continue(watch, &watches, list) {
+ if (watch_applies(watch, node))
+ return watch;
+ }
+ return NULL;
+}
+
+/* FIXME: we fail to fire on out of memory. Should drop connections. */
+void fire_watches(struct transaction *trans, const char *node)
+{
+ struct watch *watch;
+ struct watch_event *event;
+
+ /* During transactions, don't fire watches. */
+ if (trans)
+ return;
+
+ watch = find_watch(node);
+ if (!watch)
+ return;
+
+ /* Create and fill in info about event. */
+ event = talloc(talloc_autofree_context(), struct watch_event);
+ event->data = new_buffer(event);
+ event->data->hdr.msg.type = XS_WATCH_EVENT;
+ event->data->hdr.msg.len = strlen(node) + 1;
+ event->data->buffer = talloc_strdup(event->data, node);
+
+ /* Tie event to this watch. */
+ event->watch = watch;
+ list_add(&event->list, &watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!watch->conn->out)
+ queue_next_event(watch->conn);
+}
+
+/* We're done with this event: see if anyone else wants it. */
+static void move_event_onwards(struct watch_event *event)
+{
+ list_del(&event->list);
+ reset_event(event);
+
+ /* Remove from this watch, and find next watch to put this on. */
+ event->watch = find_next_watch(event->watch, event->data->buffer);
+ if (!event->watch) {
+ talloc_free(event);
+ return;
+ }
+
+ list_add(&event->list, &event->watch->events);
+
+ /* If connection not doing anything, queue this. */
+ if (!event->watch->conn->out)
+ queue_next_event(event->watch->conn);
+}
+
+static int destroy_watch(void *_watch)
+{
+ struct watch *watch = _watch;
+ struct watch_event *event;
+
+ /* Forget about sending out or waiting for acks for this watch. */
+ if (watch->conn->event && watch->conn->event->watch == watch)
+ watch->conn->event = NULL;
+
+ /* If we have pending events, pass them on to others. */
+ while ((event = list_top(&watch->events, struct watch_event, list)))
+ move_event_onwards(event);
+
+ /* Remove from global list. */
+ list_del(&watch->list);
+ return 0;
+}
+
+/* We keep watches in priority order. */
+static void insert_watch(struct watch *watch)
+{
+ struct watch *i;
+
+ list_for_each_entry(i, &watches, list) {
+ if (i->priority <= watch->priority) {
+ list_add_tail(&watch->list, &i->list);
+ return;
+ }
+ }
+
+ list_add_tail(&watch->list, &watches);
+}
+
+bool do_watch(struct connection *conn, struct buffered_data *in)
+{
+ struct watch *watch;
+ char *vec[2];
+
+ if (get_strings(in, vec, ARRAY_SIZE(vec)) != ARRAY_SIZE(vec))
+ return send_error(conn, EINVAL);
+
+ if (!check_node_perms(conn, vec[0], XS_PERM_READ))
+ return send_error(conn, errno);
+
+ watch = talloc(conn, struct watch);
+ watch->node = talloc_strdup(watch, vec[0]);
+ watch->conn = conn;
+ watch->priority = strtoul(vec[1], NULL, 0);
+ INIT_LIST_HEAD(&watch->events);
+
+ insert_watch(watch);
+ talloc_set_destructor(watch, destroy_watch);
+ return send_ack(conn, XS_WATCH);
+}
+
+bool do_watch_ack(struct connection *conn)
+{
+ struct watch_event *event;
+
+ if (!waiting_for_ack(conn))
+ return send_error(conn, ENOENT);
+
+ /* Remove this watch event. */
+ event = conn->event;
+ conn->event = NULL;
+
+ move_event_onwards(event);
+ return send_ack(conn, XS_WATCH_ACK);
+}
+
+bool do_unwatch(struct connection *conn, const char *node)
+{
+ struct watch *watch;
+
+ list_for_each_entry(watch, &watches, list) {
+ if (watch->conn == conn
+ && streq(watch->node, node)) {
+ talloc_free(watch);
+ return send_ack(conn, XS_UNWATCH);
+ }
+ }
+ return send_error(conn, ENOENT);
+}
diff --git a/tools/xenstore/xenstored_watch.h b/tools/xenstore/xenstored_watch.h
new file mode 100644
index 0000000000..656ce4c36b
--- /dev/null
+++ b/tools/xenstore/xenstored_watch.h
@@ -0,0 +1,42 @@
+/*
+ Watch code for Xen Store Daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#ifndef _XENSTORED_WATCH_H
+#define _XENSTORED_WATCH_H
+#include "xenstored_core.h"
+
+bool do_watch(struct connection *conn, struct buffered_data *in);
+bool do_watch_ack(struct connection *conn);
+bool do_unwatch(struct connection *conn, const char *node);
+
+/* Is this a watch event message for this connection? */
+bool is_watch_event(struct connection *conn, struct buffered_data *out);
+
+/* Look through our watches: if any of them have an event, queue it. */
+void queue_next_event(struct connection *conn);
+
+/* Is this connection waiting for a watch acknowledgement? */
+bool waiting_for_ack(struct connection *conn);
+
+/* Reset event if we were sending one */
+void reset_watch_event(struct connection *conn);
+
+/* Fire all watches. */
+void fire_watches(struct transaction *trans, const char *node);
+
+#endif /* _XENSTORED_WATCH_H */
diff --git a/tools/xenstore/xs.c b/tools/xenstore/xs.c
new file mode 100644
index 0000000000..d6e41380f9
--- /dev/null
+++ b/tools/xenstore/xs.c
@@ -0,0 +1,551 @@
+/*
+ Xen Store Daemon interface providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdint.h>
+#include <errno.h>
+#include "xs.h"
+#include "xenstored.h"
+#include "xs_lib.h"
+#include "utils.h"
+
+struct xs_handle
+{
+ int fd;
+};
+
+/* Get the socket from the store daemon handle.
+ */
+int xs_fileno(struct xs_handle *h)
+{
+ return h->fd;
+}
+
+static struct xs_handle *get_socket(const char *connect_to)
+{
+ struct sockaddr_un addr;
+ int sock, saved_errno;
+ struct xs_handle *h = NULL;
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0)
+ return NULL;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, connect_to);
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
+ h = malloc(sizeof(*h));
+ if (h) {
+ h->fd = sock;
+ return h;
+ }
+ }
+
+ saved_errno = errno;
+ close(sock);
+ free(h);
+ errno = saved_errno;
+ return NULL;
+}
+
+struct xs_handle *xs_daemon_open(void)
+{
+ return get_socket(xs_daemon_socket());
+}
+
+struct xs_handle *xs_daemon_open_readonly(void)
+{
+ return get_socket(xs_daemon_socket_ro());
+}
+
+void xs_daemon_close(struct xs_handle *h)
+{
+ if (h->fd >= 0)
+ close(h->fd);
+ free(h);
+}
+
+static bool read_all(int fd, void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = read(fd, data, len);
+ if (done < 0) {
+ if (errno == EINTR)
+ continue;
+ return false;
+ }
+ if (done == 0) {
+ /* It closed fd on us? EBADF is appropriate. */
+ errno = EBADF;
+ return false;
+ }
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+#ifdef XSTEST
+#define read_all read_all_choice
+#define xs_write_all write_all_choice
+#endif
+
+static int get_error(const char *errorstring)
+{
+ unsigned int i;
+
+ for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++)
+ if (i == ARRAY_SIZE(xsd_errors) - 1)
+ return EINVAL;
+ return xsd_errors[i].errnum;
+}
+
+static void *read_reply(int fd, enum xsd_sockmsg_type *type, unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret;
+ int saved_errno;
+
+ if (!read_all(fd, &msg, sizeof(msg)))
+ return NULL;
+
+ ret = malloc(msg.len);
+ if (!ret)
+ return NULL;
+
+ if (!read_all(fd, ret, msg.len)) {
+ saved_errno = errno;
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ *type = msg.type;
+ if (len)
+ *len = msg.len;
+ return ret;
+}
+
+/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */
+static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const struct iovec *iovec,
+ unsigned int num_vecs,
+ unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret = NULL;
+ int saved_errno;
+ unsigned int i;
+ struct sigaction ignorepipe, oldact;
+
+ msg.type = type;
+ msg.len = 0;
+ for (i = 0; i < num_vecs; i++)
+ msg.len += iovec[i].iov_len;
+
+ ignorepipe.sa_handler = SIG_IGN;
+ sigemptyset(&ignorepipe.sa_mask);
+ ignorepipe.sa_flags = 0;
+ sigaction(SIGPIPE, &ignorepipe, &oldact);
+
+ if (!xs_write_all(h->fd, &msg, sizeof(msg)))
+ goto fail;
+
+ for (i = 0; i < num_vecs; i++)
+ if (!xs_write_all(h->fd, iovec[i].iov_base, iovec[i].iov_len))
+ goto fail;
+
+ /* Watches can have fired before reply comes: daemon detects
+ * and re-transmits, so we can ignore this. */
+ do {
+ free(ret);
+ ret = read_reply(h->fd, &msg.type, len);
+ if (!ret)
+ goto fail;
+ } while (msg.type == XS_WATCH_EVENT);
+
+ sigaction(SIGPIPE, &oldact, NULL);
+ if (msg.type == XS_ERROR) {
+ saved_errno = get_error(ret);
+ free(ret);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ assert(msg.type == type);
+ return ret;
+
+fail:
+ /* We're in a bad state, so close fd. */
+ saved_errno = errno;
+ sigaction(SIGPIPE, &oldact, NULL);
+ close(h->fd);
+ h->fd = -1;
+ errno = saved_errno;
+ return NULL;
+}
+
+/* free(), but don't change errno. */
+static void free_no_errno(void *p)
+{
+ int saved_errno = errno;
+ free(p);
+ errno = saved_errno;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xs_handle *h, enum xsd_sockmsg_type type,
+ const char *string, unsigned int *len)
+{
+ struct iovec iovec;
+
+ iovec.iov_base = (void *)string;
+ iovec.iov_len = strlen(string) + 1;
+ return xs_talkv(h, type, &iovec, 1, len);
+}
+
+static bool xs_bool(char *reply)
+{
+ if (!reply)
+ return false;
+ free(reply);
+ return true;
+}
+
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num)
+{
+ char *strings, *p, **ret;
+ unsigned int len;
+
+ strings = xs_single(h, XS_DIRECTORY, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings. */
+ *num = xs_count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(char *) + len);
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+ memcpy(&ret[*num], strings, len);
+ free_no_errno(strings);
+
+ strings = (char *)&ret[*num];
+ for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+ ret[(*num)++] = p;
+ return ret;
+}
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len)
+{
+ return xs_single(h, XS_READ, path, len);
+}
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path,
+ const void *data, unsigned int len, int createflags)
+{
+ const char *flags;
+ struct iovec iovec[3];
+
+ /* Format: Flags (as string), path, data. */
+ if (createflags == 0)
+ flags = XS_WRITE_NONE;
+ else if (createflags == O_CREAT)
+ flags = XS_WRITE_CREATE;
+ else if (createflags == (O_CREAT|O_EXCL))
+ flags = XS_WRITE_CREATE_EXCL;
+ else {
+ errno = EINVAL;
+ return false;
+ }
+
+ iovec[0].iov_base = (void *)path;
+ iovec[0].iov_len = strlen(path) + 1;
+ iovec[1].iov_base = (void *)flags;
+ iovec[1].iov_len = strlen(flags) + 1;
+ iovec[2].iov_base = (void *)data;
+ iovec[2].iov_len = len;
+
+ return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+}
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_MKDIR, path, NULL));
+}
+
+/* Destroy a file or directory (directories must be empty).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_RM, path, NULL));
+}
+
+/* Get permissions of node (first element is owner).
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num)
+{
+ char *strings;
+ unsigned int len;
+ struct xs_permissions *ret;
+
+ strings = xs_single(h, XS_GET_PERMS, path, &len);
+ if (!strings)
+ return NULL;
+
+ /* Count the strings: each one perms then domid. */
+ *num = xs_count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = malloc(*num * sizeof(struct xs_permissions));
+ if (!ret) {
+ free_no_errno(strings);
+ return NULL;
+ }
+
+ if (!xs_strings_to_perms(ret, *num, strings)) {
+ free_no_errno(ret);
+ ret = NULL;
+ }
+
+ free(strings);
+ return ret;
+}
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h, const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms)
+{
+ unsigned int i;
+ struct iovec iov[1+num_perms];
+
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+
+ for (i = 0; i < num_perms; i++) {
+ char buffer[MAX_STRLEN(domid_t)+1];
+
+ if (!xs_perm_to_string(&perms[i], buffer))
+ goto unwind;
+
+ iov[i+1].iov_base = strdup(buffer);
+ iov[i+1].iov_len = strlen(buffer) + 1;
+ if (!iov[i+1].iov_base)
+ goto unwind;
+ }
+
+ if (!xs_bool(xs_talkv(h, XS_SET_PERMS, iov, 1+num_perms, NULL)))
+ goto unwind;
+ for (i = 0; i < num_perms; i++)
+ free(iov[i+1].iov_base);
+ return true;
+
+unwind:
+ num_perms = i;
+ for (i = 0; i < num_perms; i++)
+ free_no_errno(iov[i+1].iov_base);
+ return false;
+}
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority)
+{
+ char prio[MAX_STRLEN(priority)];
+ struct iovec iov[2];
+
+ sprintf(prio, "%u", priority);
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = prio;
+ iov[1].iov_len = strlen(prio) + 1;
+
+ return xs_bool(xs_talkv(h, XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
+}
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h)
+{
+ struct xsd_sockmsg msg;
+ char *path;
+
+ if (!read_all(h->fd, &msg, sizeof(msg)))
+ return NULL;
+
+ assert(msg.type == XS_WATCH_EVENT);
+ path = malloc(msg.len);
+ if (!path)
+ return NULL;
+
+ if (!read_all(h->fd, path, msg.len)) {
+ free_no_errno(path);
+ return NULL;
+ }
+ return path;
+}
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h)
+{
+ return xs_bool(xs_single(h, XS_WATCH_ACK, "OK", NULL));
+}
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path)
+{
+ return xs_bool(xs_single(h, XS_UNWATCH, path, NULL));
+}
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree)
+{
+ return xs_bool(xs_single(h, XS_TRANSACTION_START, subtree, NULL));
+}
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort)
+{
+ char abortstr[2];
+
+ if (abort)
+ strcpy(abortstr, "F");
+ else
+ strcpy(abortstr, "T");
+ return xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL));
+}
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page and event channel
+ * associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path)
+{
+ char domid_str[MAX_STRLEN(domid)];
+ char mfn_str[MAX_STRLEN(mfn)];
+ char eventchn_str[MAX_STRLEN(eventchn)];
+ struct iovec iov[4];
+
+ sprintf(domid_str, "%u", domid);
+ sprintf(mfn_str, "%lu", mfn);
+ sprintf(eventchn_str, "%u", eventchn);
+
+ iov[0].iov_base = domid_str;
+ iov[0].iov_len = strlen(domid_str) + 1;
+ iov[1].iov_base = mfn_str;
+ iov[1].iov_len = strlen(mfn_str) + 1;
+ iov[2].iov_base = eventchn_str;
+ iov[2].iov_len = strlen(eventchn_str) + 1;
+ iov[3].iov_base = (char *)path;
+ iov[3].iov_len = strlen(path) + 1;
+
+ return xs_bool(xs_talkv(h, XS_INTRODUCE, iov, ARRAY_SIZE(iov), NULL));
+}
+
+bool xs_release_domain(struct xs_handle *h,
+ domid_t domid)
+{
+ char domid_str[MAX_STRLEN(domid)];
+
+ sprintf(domid_str, "%u", domid);
+
+ return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL));
+}
+
+bool xs_shutdown(struct xs_handle *h)
+{
+ bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL));
+ if (ret) {
+ char c;
+ /* Wait for it to actually shutdown. */
+ read(h->fd, &c, 1);
+ }
+ return ret;
+}
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len)
+{
+ struct iovec iov[2];
+
+ iov[0].iov_base = (void *)cmd;
+ iov[0].iov_len = strlen(cmd) + 1;
+ iov[1].iov_base = data;
+ iov[1].iov_len = len;
+
+ return xs_talkv(h, XS_DEBUG, iov, ARRAY_SIZE(iov), NULL);
+}
diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h
new file mode 100644
index 0000000000..ff9481c3a6
--- /dev/null
+++ b/tools/xenstore/xs.h
@@ -0,0 +1,146 @@
+#ifndef _XS_H
+#define _XS_H
+/*
+ Xen Store Daemon providing simple tree-like database.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* On failure, these routines set errno. */
+#include "xs_lib.h"
+
+struct xs_handle;
+
+/* Connect to the xs daemon.
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open(void);
+
+/* Connect to the xs daemon (readonly for non-root clients).
+ * Returns a handle or NULL.
+ */
+struct xs_handle *xs_daemon_open_readonly(void);
+
+/* Close the connection to the xs daemon. */
+void xs_daemon_close(struct xs_handle *);
+
+/* Get contents of a directory.
+ * Returns a malloced array: call free() on it after use.
+ * Num indicates size.
+ */
+char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num);
+
+/* Get the value of a single file.
+ * Returns a malloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
+
+/* Write the value of a single file.
+ * Returns false on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ */
+bool xs_write(struct xs_handle *h, const char *path, const void *data, unsigned int len,
+ int createflags);
+
+/* Create a new directory.
+ * Returns false on failure.
+ */
+bool xs_mkdir(struct xs_handle *h, const char *path);
+
+/* Destroy a file or directory (and children).
+ * Returns false on failure.
+ */
+bool xs_rm(struct xs_handle *h, const char *path);
+
+/* Get permissions of node (first element is owner, first perms is "other").
+ * Returns malloced array, or NULL: call free() after use.
+ */
+struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+ const char *path,
+ unsigned int *num);
+
+/* Set permissions of node (must be owner).
+ * Returns false on failure.
+ */
+bool xs_set_permissions(struct xs_handle *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num_perms);
+
+/* Watch a node for changes (poll on fd to detect, or call read_watch()).
+ * When the node (or any child) changes, fd will become readable.
+ * Priority indicates order if multiple watchers: higher is first.
+ * Returns false on failure.
+ */
+bool xs_watch(struct xs_handle *h, const char *path, unsigned int priority);
+
+/* Return the FD to poll on to see if a watch has fired. */
+int xs_fileno(struct xs_handle *h);
+
+/* Find out what node change was on (will block if nothing pending).
+ * Returns malloced path, or NULL: call free() after use.
+ */
+char *xs_read_watch(struct xs_handle *h);
+
+/* Acknowledge watch on node. Watches must be acknowledged before
+ * any other watches can be read.
+ * Returns false on failure.
+ */
+bool xs_acknowledge_watch(struct xs_handle *h);
+
+/* Remove a watch on a node.
+ * Returns false on failure (no watch on that node).
+ */
+bool xs_unwatch(struct xs_handle *h, const char *path);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ * Transaction only applies to the given subtree.
+ * You can only have one transaction at any time.
+ * Returns false on failure.
+ */
+bool xs_transaction_start(struct xs_handle *h, const char *subtree);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ * Returns false on failure, which indicates an error: transactions will
+ * not fail spuriously.
+ */
+bool xs_transaction_end(struct xs_handle *h, bool abort);
+
+/* Introduce a new domain.
+ * This tells the store daemon about a shared memory page, event channel
+ * and store path associated with a domain: the domain uses these to communicate.
+ */
+bool xs_introduce_domain(struct xs_handle *h,
+ domid_t domid,
+ unsigned long mfn,
+ unsigned int eventchn,
+ const char *path);
+
+/* Release a domain.
+ * Tells the store domain to release the memory page to the domain.
+ */
+bool xs_release_domain(struct xs_handle *h, domid_t domid);
+
+/* Only useful for DEBUG versions */
+char *xs_debug_command(struct xs_handle *h, const char *cmd,
+ void *data, unsigned int len);
+
+/* Shut down the daemon. */
+bool xs_shutdown(struct xs_handle *h);
+
+#endif /* _XS_H */
diff --git a/tools/xenstore/xs_lib.c b/tools/xenstore/xs_lib.c
new file mode 100644
index 0000000000..3f4f4b0899
--- /dev/null
+++ b/tools/xenstore/xs_lib.c
@@ -0,0 +1,141 @@
+#include "xs_lib.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Common routines for the Xen store daemon and client library. */
+
+static const char *xs_daemon_rootdir(void)
+{
+ char *s = getenv("XENSTORED_ROOTDIR");
+ return (s ? s : "/var/lib/xenstored");
+}
+
+static const char *xs_daemon_rundir(void)
+{
+ char *s = getenv("XENSTORED_RUNDIR");
+ return (s ? s : "/var/run/xenstored");
+}
+
+const char *xs_daemon_socket(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_socket_ro(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/socket_ro", xs_daemon_rundir());
+ return buf;
+}
+
+const char *xs_daemon_store(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/store", xs_daemon_rootdir());
+ return buf;
+}
+
+const char *xs_daemon_transactions(void)
+{
+ static char buf[PATH_MAX];
+ sprintf(buf, "%s/transactions", xs_daemon_rootdir());
+ return buf;
+}
+
+/* Simple routines for writing to sockets, etc. */
+bool xs_write_all(int fd, const void *data, unsigned int len)
+{
+ while (len) {
+ int done;
+
+ done = write(fd, data, len);
+ if (done < 0 && errno == EINTR)
+ continue;
+ if (done <= 0)
+ return false;
+ data += done;
+ len -= done;
+ }
+
+ return true;
+}
+
+/* Convert strings to permissions. False if a problem. */
+bool xs_strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings)
+{
+ const char *p;
+ char *end;
+ unsigned int i;
+
+ for (p = strings, i = 0; i < num; i++) {
+ /* "r", "w", or "b" for both. */
+ switch (*p) {
+ case 'r':
+ perms[i].perms = XS_PERM_READ;
+ break;
+ case 'w':
+ perms[i].perms = XS_PERM_WRITE;
+ break;
+ case 'b':
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ break;
+ case 'n':
+ perms[i].perms = XS_PERM_NONE;
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ p++;
+ perms[i].id = strtol(p, &end, 0);
+ if (*end || !*p) {
+ errno = EINVAL;
+ return false;
+ }
+ p = end + 1;
+ }
+ return true;
+}
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool xs_perm_to_string(const struct xs_permissions *perm, char *buffer)
+{
+ switch (perm->perms) {
+ case XS_PERM_WRITE:
+ *buffer = 'w';
+ break;
+ case XS_PERM_READ:
+ *buffer = 'r';
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ *buffer = 'b';
+ break;
+ case XS_PERM_NONE:
+ *buffer = 'n';
+ break;
+ default:
+ errno = EINVAL;
+ return false;
+ }
+ sprintf(buffer+1, "%i", (int)perm->id);
+ return true;
+}
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int xs_count_strings(const char *strings, unsigned int len)
+{
+ unsigned int num;
+ const char *p;
+
+ for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+ num++;
+
+ return num;
+}
+
diff --git a/tools/xenstore/xs_lib.h b/tools/xenstore/xs_lib.h
new file mode 100644
index 0000000000..76ea9b67fe
--- /dev/null
+++ b/tools/xenstore/xs_lib.h
@@ -0,0 +1,63 @@
+#ifndef _XR_LIB_H
+#define _XR_LIB_H
+/*
+ Common routines between Xen store user library and daemon.
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <stdbool.h>
+#include <limits.h>
+#include <xc.h>
+
+/* Bitmask of permissions. */
+enum xs_perm_type {
+ XS_PERM_NONE = 0,
+ XS_PERM_READ = 1,
+ XS_PERM_WRITE = 2,
+ /* Internal use. */
+ XS_PERM_CREATE = 4,
+ XS_PERM_OWNER = 8,
+};
+
+struct xs_permissions
+{
+ domid_t id;
+ enum xs_perm_type perms;
+};
+
+/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */
+#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2)
+
+/* Path for various daemon things: env vars can override. */
+const char *xs_daemon_socket(void);
+const char *xs_daemon_socket_ro(void);
+const char *xs_daemon_store(void);
+const char *xs_daemon_transactions(void);
+
+/* Simple write function: loops for you. */
+bool xs_write_all(int fd, const void *data, unsigned int len);
+
+/* Convert strings to permissions. False if a problem. */
+bool xs_strings_to_perms(struct xs_permissions *perms, unsigned int num,
+ const char *strings);
+
+/* Convert permissions to a string (up to len MAX_STRLEN(domid_t)+1). */
+bool xs_perm_to_string(const struct xs_permissions *perm, char *buffer);
+
+/* Given a string and a length, count how many strings (nul terms). */
+unsigned int xs_count_strings(const char *strings, unsigned int len);
+
+#endif /* _XS_LIB_H */
diff --git a/tools/xenstore/xs_random.c b/tools/xenstore/xs_random.c
new file mode 100644
index 0000000000..675cc89093
--- /dev/null
+++ b/tools/xenstore/xs_random.c
@@ -0,0 +1,1646 @@
+/* Random tests.
+
+ We check that the results from a real filesystem are the same.
+*/
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include "xs.h"
+#include "talloc.h"
+#include "utils.h"
+
+struct ops
+{
+ char *name;
+
+ char **(*dir)(void *h, const char *path, unsigned int *num);
+
+ void *(*read)(void *h, const char *path, unsigned int *len);
+
+ bool (*write)(void *h, const char *path, const void *data,
+ unsigned int len, int createflags);
+
+ bool (*mkdir)(void *h, const char *path);
+
+ bool (*rm)(void *h, const char *path);
+
+ struct xs_permissions *(*get_perms)(void *h,
+ const char *path,
+ unsigned int *num);
+
+ bool (*set_perms)(void *h,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num);
+
+ bool (*transaction_start)(void *h, const char *subtree);
+ bool (*transaction_end)(void *h, bool abort);
+
+ /* Create and destroy a new handle. */
+ void *(*handle)(const char *path);
+ void (*close)(void *);
+};
+
+struct file_ops_info
+{
+ const char *base;
+ char *transact_base;
+ char *transact;
+};
+
+static void convert_to_dir(const char *dirname)
+{
+ char *tmpname = talloc_asprintf(dirname, "%s.tmp", dirname);
+ if (rename(dirname, tmpname) != 0)
+ barf_perror("Failed to rename %s to %s", dirname, tmpname);
+ if (mkdir(dirname, 0700) != 0)
+ barf_perror("Failed to mkdir %s", dirname);
+ if (rename(tmpname,talloc_asprintf(dirname, "%s/.DATA", dirname)) != 0)
+ barf_perror("Failed to rename into %s", dirname);
+ /* If perms exists, move it in. */
+ rename(talloc_asprintf(dirname, "%s.perms", dirname),
+ talloc_asprintf(dirname, "%s/.perms", dirname));
+}
+
+/* Files can be used as dirs, too. Convert them when they are. */
+static void maybe_convert_to_directory(const char *filename)
+{
+ struct stat st;
+ char *dirname = talloc_asprintf(filename, "%.*s",
+ strrchr(filename, '/') - filename,
+ filename);
+ if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
+ convert_to_dir(dirname);
+}
+
+static char *get_name(struct file_ops_info *info, const char *path)
+{
+ if (info->transact_base)
+ return talloc_asprintf(path, "%s%s", info->transact_base,
+ path);
+ return talloc_asprintf(path, "%s%s", info->base, path);
+}
+
+static char *path_to_name(struct file_ops_info *info, const char *path)
+{
+ char *filename = get_name(info, path);
+ maybe_convert_to_directory(filename);
+ return filename;
+}
+
+/* Is child a subnode of parent, or equal? */
+static bool is_child(const char *child, const char *parent)
+{
+ unsigned int len = strlen(parent);
+
+ /* / should really be "" for this algorithm to work, but that's a
+ * usability nightmare. */
+ if (streq(parent, "/"))
+ return true;
+
+ if (strncmp(child, parent, len) != 0)
+ return false;
+
+ return child[len] == '/' || child[len] == '\0';
+}
+
+static bool write_ok(struct file_ops_info *info, const char *path)
+{
+ if (info->transact && !is_child(path, info->transact)) {
+ errno = EROFS;
+ return false;
+ }
+ return true;
+}
+
+static char **file_directory(struct file_ops_info *info,
+ const char *path, unsigned int *num)
+{
+ char **ret;
+ DIR *dir;
+ struct dirent *dirent;
+ char *p, *dirname = path_to_name(info, path);
+ unsigned int i, len = 0;
+ struct stat st;
+
+ /* If it exists, but isn't a directory, we convert it. */
+ if (lstat(dirname, &st) == 0 && !S_ISDIR(st.st_mode))
+ convert_to_dir(dirname);
+
+ *num = 0;
+ dir = opendir(dirname);
+ if (!dir)
+ return NULL;;
+
+ /* Once to count them. */
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ len += strlen(dirent->d_name) + 1;
+ (*num)++;
+ }
+ rewinddir(dir);
+
+ /* Now allocate and fill in. */
+ ret = malloc(sizeof(char *) * *num + len);
+ p = (char *)&ret[*num];
+ i = 0;
+ while ((dirent = readdir(dir)) != NULL) {
+ if (strchr(dirent->d_name, '.'))
+ continue;
+ ret[i] = p;
+ strcpy(p, dirent->d_name);
+ p += strlen(p) + 1;
+ i++;
+ }
+ closedir(dir);
+
+ return ret;
+}
+
+static char *filename_to_data(const char *filename)
+{
+ struct stat st;
+
+ if (lstat(filename, &st) == 0 && S_ISDIR(st.st_mode))
+ return talloc_asprintf(filename, "%s/.DATA", filename);
+ return (char *)filename;
+}
+
+static void *file_read(struct file_ops_info *info,
+ const char *path, unsigned int *len)
+{
+ void *ret;
+ char *filename = filename_to_data(path_to_name(info, path));
+ unsigned long size;
+
+ ret = grab_file(filename, &size);
+ /* Directory exists, .DATA doesn't. */
+ if (!ret && errno == ENOENT && strends(filename, ".DATA"))
+ errno = EISDIR;
+ *len = size;
+ return ret;
+}
+
+static struct xs_permissions *file_get_perms(struct file_ops_info *info,
+ const char *path,
+ unsigned int *num)
+{
+ void *perms;
+ struct xs_permissions *ret;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ unsigned long size;
+ struct stat st;
+
+ /* No permfile: we didn't bother, return defaults. */
+ if (lstat(filename, &st) != 0)
+ return NULL;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ perms = grab_file(permfile, &size);
+ if (!perms) {
+ ret = new(struct xs_permissions);
+ ret[0].id = 0;
+ /* Default for root is readable. */
+ if (streq(path, "/"))
+ ret[0].perms = XS_PERM_READ;
+ else
+ ret[0].perms = XS_PERM_NONE;
+ *num = 1;
+ release_file(perms, size);
+ return ret;
+ }
+ *num = xs_count_strings(perms, size);
+
+ ret = new_array(struct xs_permissions, *num);
+ if (!xs_strings_to_perms(ret, *num, perms))
+ barf("Reading permissions from %s", permfile);
+ release_file(perms, size);
+ return ret;
+}
+
+static bool file_set_perms(struct file_ops_info *info,
+ const char *path,
+ struct xs_permissions *perms,
+ unsigned int num)
+{
+ unsigned int i;
+ char *filename = path_to_name(info, path);
+ char *permfile;
+ int fd;
+ struct stat st;
+
+ if (num < 1) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* Check non-perm file exists/ */
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (S_ISDIR(st.st_mode))
+ permfile = talloc_asprintf(path, "%s/.perms", filename);
+ else
+ permfile = talloc_asprintf(path, "%s.perms", filename);
+
+ fd = open(permfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ return false;
+
+ for (i = 0; i < num; i++) {
+ char buffer[100];
+
+ if (!xs_perm_to_string(&perms[i], buffer)) {
+ int saved_errno = errno;
+ close(fd);
+ errno = saved_errno;
+ return false;
+ }
+ if (write(fd, buffer, strlen(buffer) + 1)
+ != (int)strlen(buffer) + 1)
+ barf_perror("Failed to write perm");
+ }
+ close(fd);
+ return true;
+}
+
+static bool file_write(struct file_ops_info *info,
+ const char *path, const void *data,
+ unsigned int len, int createflags)
+{
+ char *filename = filename_to_data(path_to_name(info, path));
+ int fd;
+
+ /* Kernel isn't strict, but library is. */
+ if (createflags & ~(O_CREAT|O_EXCL)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (!write_ok(info, path))
+ return false;
+
+ /* We regard it as existing if dir exists. */
+ if (strends(filename, ".DATA")) {
+ if (!createflags)
+ createflags = O_CREAT;
+ if (createflags & O_EXCL) {
+ errno = EEXIST;
+ return false;
+ }
+ }
+
+ fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
+ if (fd < 0) {
+ /* FIXME: Another hack. */
+ if (!(createflags & O_CREAT) && errno == EISDIR)
+ errno = EEXIST;
+ return false;
+ }
+
+ if (write(fd, data, len) != (int)len)
+ barf_perror("Bad write to %s", filename);
+
+ close(fd);
+ return true;
+}
+
+static bool file_mkdir(struct file_ops_info *info, const char *path)
+{
+ char *dirname = path_to_name(info, path);
+
+ /* Same effective order as daemon, so error returns are right. */
+ if (mkdir(dirname, 0700) != 0) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ write_ok(info, path);
+ return false;
+ }
+
+ if (!write_ok(info, path)) {
+ int saved_errno = errno;
+ rmdir(dirname);
+ errno = saved_errno;
+ return false;
+ }
+ return true;
+}
+
+static void do_command(const char *cmd)
+{
+ int ret;
+
+ ret = system(cmd);
+ if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
+ barf_perror("Failed '%s': %i", cmd, ret);
+}
+
+static bool file_rm(struct file_ops_info *info, const char *path)
+{
+ char *filename = path_to_name(info, path);
+ struct stat st;
+
+ if (info->transact && streq(info->transact, path)) {
+ errno = EINVAL;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ if (!write_ok(info, path))
+ return false;
+
+ if (streq(path, "/")) {
+ errno = EINVAL;
+ return false;
+ }
+
+ do_command(talloc_asprintf(path, "rm -f %s.perms; rm -r %s",
+ filename, filename));
+ return true;
+}
+
+static bool file_transaction_start(struct file_ops_info *info,
+ const char *subtree)
+{
+ char *cmd;
+ char *filename = path_to_name(info, subtree);
+ struct stat st;
+
+ if (info->transact) {
+ errno = EBUSY;
+ return false;
+ }
+
+ if (lstat(filename, &st) != 0)
+ return false;
+
+ cmd = talloc_asprintf(NULL, "cp -r %s %s.transact",
+ info->base, info->base);
+ do_command(cmd);
+ talloc_free(cmd);
+
+ info->transact_base = talloc_asprintf(NULL, "%s.transact", info->base);
+ info->transact = talloc_strdup(NULL, subtree);
+ return true;
+}
+
+static bool file_transaction_end(struct file_ops_info *info, bool abort)
+{
+ char *old, *cmd;
+
+ if (!info->transact) {
+ errno = ENOENT;
+ return false;
+ }
+
+ if (abort) {
+ cmd = talloc_asprintf(NULL, "rm -r %s", info->transact_base);
+ do_command(cmd);
+ goto success;
+ }
+
+ old = talloc_asprintf(NULL, "rm -rf %s", info->base);
+ do_command(old);
+ talloc_free(old);
+
+ cmd = talloc_asprintf(NULL, "mv %s %s",
+ info->transact_base, info->base);
+ do_command(cmd);
+
+success:
+ talloc_free(cmd);
+ talloc_free(info->transact);
+ talloc_free(info->transact_base);
+ info->transact = NULL;
+ info->transact_base = NULL;
+ return true;
+}
+
+static struct file_ops_info *file_handle(const char *dir)
+{
+ struct file_ops_info *info = talloc(NULL, struct file_ops_info);
+
+ info->base = dir;
+ info->transact_base = NULL;
+ info->transact = NULL;
+ return info;
+}
+
+static void file_close(struct file_ops_info *handle)
+{
+ talloc_free(handle);
+}
+
+static struct xs_handle *xs_handle(const char *dir __attribute__((unused)))
+{
+ struct xs_handle *h;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Connecting to xs daemon");
+ return h;
+}
+
+static void xs_close(struct xs_handle *handle)
+{
+ xs_daemon_close(handle);
+}
+
+struct ops file_ops = {
+ .name = "FILE",
+ .dir = (void *)file_directory,
+ .read = (void *)file_read,
+ .write = (void *)file_write,
+ .mkdir = (void *)file_mkdir,
+ .rm = (void *)file_rm,
+ .get_perms = (void *)file_get_perms,
+ .set_perms = (void *)file_set_perms,
+ .transaction_start = (void *)file_transaction_start,
+ .transaction_end = (void *)file_transaction_end,
+ .handle = (void *)file_handle,
+ .close = (void *)file_close,
+};
+
+struct ops xs_ops = {
+ .name = "XS",
+ .dir = (void *)xs_directory,
+ .read = (void *)xs_read,
+ .write = (void *)xs_write,
+ .mkdir = (void *)xs_mkdir,
+ .rm = (void *)xs_rm,
+ .get_perms = (void *)xs_get_permissions,
+ .set_perms = (void *)xs_set_permissions,
+ .transaction_start = (void *)xs_transaction_start,
+ .transaction_end = (void *)xs_transaction_end,
+ .handle = (void *)xs_handle,
+ .close = (void *)xs_close,
+};
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static char *dump_dir(struct ops *ops,
+ void *h,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ char *ret = talloc_strdup(node, "");
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char *subret;
+ char *subnode = talloc_asprintf(node, "%s/%s", node, dir[i]);
+
+ perms = ops->get_perms(h, subnode, &numperms);
+ if (!perms)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!xs_perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ ret = talloc_asprintf_append(ret, "%s ", buffer);
+ }
+ free(perms);
+ ret = talloc_asprintf_append(ret, "\n");
+
+ /* Even directories can have contents. */
+ contents = ops->read(h, subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ return NULL;
+ } else {
+ ret = talloc_asprintf_append(ret, " %s(%.*s)\n",
+ spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = ops->dir(h, subnode, &subnum);
+ if (!subdirs)
+ return NULL;
+ subret = dump_dir(ops, h, subnode, subdirs, subnum, depth+1);
+ if (!subret)
+ return NULL;
+ ret = talloc_asprintf_append(ret, "%s", subret);
+ free(subdirs);
+ }
+ return ret;
+}
+
+static char *dump(struct ops *ops, void *h)
+{
+ char **subdirs;
+ unsigned int subnum;
+ char *ret = NULL, *root = talloc_strdup(NULL, "/");
+
+ subdirs = ops->dir(h, root, &subnum);
+ if (subdirs) {
+ ret = dump_dir(ops, h, talloc_strdup(root, ""), subdirs,
+ subnum, 0);
+ free(subdirs);
+ if (ret)
+ talloc_steal(NULL, ret);
+ }
+ talloc_free(root);
+ return ret;
+}
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ * hash(), hash2(), hash3, and mix() are externally useful functions.
+ * Routines to test the hash are included if SELF_TEST is defined.
+ * You can use this free for any purpose. It has no warranty.
+ *
+ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are surely my fault. -DaveM
+ */
+
+/* NOTE: Arguments are modified. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/* The golden ration: an arbitrary value */
+#define JHASH_GOLDEN_RATIO 0x9e3779b9
+
+/* The most generic version, hashes an arbitrary sequence
+ * of bytes. No alignment or length assumptions are made about
+ * the input key.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+ const u8 *k = key;
+
+ len = length;
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+
+ while (len >= 12) {
+ a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
+ b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
+ c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
+
+ __jhash_mix(a,b,c);
+
+ k += 12;
+ len -= 12;
+ }
+
+ c += length;
+ switch (len) {
+ case 11: c += ((u32)k[10]<<24);
+ case 10: c += ((u32)k[9]<<16);
+ case 9 : c += ((u32)k[8]<<8);
+ case 8 : b += ((u32)k[7]<<24);
+ case 7 : b += ((u32)k[6]<<16);
+ case 6 : b += ((u32)k[5]<<8);
+ case 5 : b += k[4];
+ case 4 : a += ((u32)k[3]<<24);
+ case 3 : a += ((u32)k[2]<<16);
+ case 2 : a += ((u32)k[1]<<8);
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+/* A special optimized version that handles 1 or more of u32s.
+ * The length parameter here is the number of u32s in the key.
+ */
+static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c, len;
+
+ a = b = JHASH_GOLDEN_RATIO;
+ c = initval;
+ len = length;
+
+ while (len >= 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ k += 3; len -= 3;
+ }
+
+ c += length * 4;
+
+ switch (len) {
+ case 2 : b += k[1];
+ case 1 : a += k[0];
+ };
+
+ __jhash_mix(a,b,c);
+
+ return c;
+}
+
+
+/* A special ultra-optimized versions that knows they are hashing exactly
+ * 3, 2 or 1 word(s).
+ *
+ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
+ * done at the end is not done here.
+ */
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += JHASH_GOLDEN_RATIO;
+ b += JHASH_GOLDEN_RATIO;
+ c += initval;
+
+ __jhash_mix(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return jhash_3words(a, b, 0, initval);
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return jhash_3words(a, 0, 0, initval);
+}
+
+static unsigned int get_randomness(int *state)
+{
+ return jhash_1word((*state)++, *state * 1103515243);
+}
+
+static char *random_path(int *state)
+{
+ unsigned int i;
+ char *ret = NULL;
+
+ if (get_randomness(state) % 20 == 0)
+ return talloc_strdup(NULL, "/");
+
+ for (i = 0; i < 1 || (get_randomness(state) % 2); i++) {
+ ret = talloc_asprintf_append(ret, "/%i",
+ get_randomness(state) % 15);
+ }
+ return ret;
+}
+
+static char *bool_to_errstring(bool result)
+{
+ if (result)
+ return talloc_strdup(NULL, "OK");
+
+ /* Real daemon can never return this. */
+ if (errno == ENOTDIR)
+ errno = ENOENT;
+ return talloc_asprintf(NULL, "FAILED:%s", strerror(errno));
+}
+
+static char *linearize_dir(char **dir, unsigned int *num)
+{
+ char *result = NULL;
+ unsigned int i;
+
+ if (!dir)
+ return bool_to_errstring(false);
+
+ if (!*num) {
+ free(dir);
+ return talloc_strdup(NULL, "");
+ }
+
+ sort_dir(dir, *num);
+ for (i = 0; i < *num; i++)
+ result = talloc_asprintf_append(result, "%s\n", dir[i]);
+ free(dir);
+ return result;
+}
+
+static char *linearize_read(char *read, unsigned int *size)
+{
+ char *ret;
+
+ if (!read)
+ return bool_to_errstring(false);
+
+ ret = talloc_asprintf(NULL, "%i:%.*s", *size, *size, read);
+ free(read);
+ return ret;
+}
+
+static char *linearize_perms(struct xs_permissions *perms, unsigned int *size)
+{
+ char *ret = NULL;
+ unsigned int i;
+
+ if (!perms)
+ return bool_to_errstring(false);
+
+ for (i = 0; i < *size; i++)
+ ret = talloc_asprintf_append(ret, "(%u %u)",
+ perms[i].id, perms[i].perms);
+
+ free(perms);
+ return ret;
+}
+
+static int random_flags(int *state)
+{
+ switch (get_randomness(state) % 4) {
+ case 0:
+ return 0;
+ case 1:
+ return O_CREAT;
+ case 2:
+ return O_CREAT|O_EXCL;
+ default:
+ return get_randomness(state);
+ }
+}
+
+/* Do the next operation, return the results. */
+static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
+{
+ char *name;
+ unsigned int num;
+ char *ret;
+
+ if (verbose)
+ printf("State %i: ", state);
+
+ name = random_path(&state);
+ switch (get_randomness(&state) % 9) {
+ case 0:
+ if (verbose)
+ printf("DIR %s\n", name);
+ ret = linearize_dir(ops->dir(h, name, &num), &num);
+ break;
+ case 1:
+ if (verbose)
+ printf("READ %s\n", name);
+ ret = linearize_read(ops->read(h, name, &num), &num);
+ break;
+ case 2: {
+ int flags = random_flags(&state);
+ char *contents = talloc_asprintf(NULL, "%i",
+ get_randomness(&state));
+ unsigned int len = get_randomness(&state)%(strlen(contents)+1);
+ if (verbose)
+ printf("WRITE %s %s %.*s\n", name,
+ flags == O_CREAT ? "O_CREAT"
+ : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
+ : flags == 0 ? "0" : "CRAPFLAGS",
+ len, contents);
+ ret = bool_to_errstring(ops->write(h, name, contents, len,
+ flags));
+ talloc_steal(ret, contents);
+ break;
+ }
+ case 3:
+ if (verbose)
+ printf("MKDIR %s\n", name);
+ ret = bool_to_errstring(ops->mkdir(h, name));
+ break;
+ case 4:
+ if (verbose)
+ printf("RM %s\n", name);
+ ret = bool_to_errstring(ops->rm(h, name));
+ break;
+ case 5:
+ if (verbose)
+ printf("GETPERMS %s\n", name);
+ ret = linearize_perms(ops->get_perms(h, name, &num),
+ &num);
+ break;
+ case 6: {
+ unsigned int i, num = get_randomness(&state)%8;
+ struct xs_permissions perms[num];
+
+ if (verbose)
+ printf("SETPERMS %s: ", name);
+ for (i = 0; i < num; i++) {
+ perms[i].id = get_randomness(&state)%8;
+ perms[i].perms = get_randomness(&state)%4;
+ if (verbose)
+ printf("%i%c ", perms[i].id,
+ perms[i].perms == XS_PERM_WRITE ? 'W'
+ : perms[i].perms == XS_PERM_READ ? 'R'
+ : perms[i].perms ==
+ (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
+ : 'N');
+ }
+ if (verbose)
+ printf("\n");
+ ret = bool_to_errstring(ops->set_perms(h, name, perms,
+ num));
+ break;
+ }
+ case 7: {
+ if (verbose)
+ printf("START %s\n", name);
+ ret = bool_to_errstring(ops->transaction_start(h, name));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_asprintf(NULL, "OK:START-TRANSACT:%s",
+ name);
+ }
+
+ break;
+ }
+ case 8: {
+ bool abort = (get_randomness(&state) % 2);
+
+ if (verbose)
+ printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
+ ret = bool_to_errstring(ops->transaction_end(h, abort));
+ if (streq(ret, "OK")) {
+ talloc_free(ret);
+ ret = talloc_strdup(NULL, "OK:STOP-TRANSACT");
+ }
+ break;
+ }
+ default:
+ barf("Impossible randomness");
+ }
+
+ talloc_steal(ret, name);
+ return ret;
+}
+
+static int daemon_pid;
+
+static void cleanup_xs_ops(void)
+{
+ char *cmd;
+ if (daemon_pid) {
+ struct xs_handle *h;
+ h = xs_daemon_open();
+ if (h) {
+ if (xs_shutdown(h)) {
+ waitpid(daemon_pid, NULL, 0);
+ daemon_pid = 0;
+ }
+ xs_daemon_close(h);
+ }
+ if (daemon_pid) {
+ kill(daemon_pid, SIGTERM);
+ waitpid(daemon_pid, NULL, 0);
+ }
+ }
+
+ cmd = talloc_asprintf(NULL, "rm -rf testsuite/tmp/*");
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup_file_ops(const char *dir)
+{
+ char *cmd;
+
+ cmd = talloc_asprintf(NULL, "rm -rf %s %s.transact", dir, dir);
+ do_command(cmd);
+ talloc_free(cmd);
+}
+
+static void cleanup(const char *dir)
+{
+ cleanup_xs_ops();
+ cleanup_file_ops(dir);
+}
+
+static void setup_file_ops(const char *dir)
+{
+ if (mkdir(dir, 0700) != 0)
+ barf_perror("Creating directory %s", dir);
+}
+
+static void setup_xs_ops(void)
+{
+ int fds[2];
+
+ /* Start daemon. */
+ pipe(fds);
+ if ((daemon_pid = fork())) {
+ /* Child writes PID when its ready: we wait for that. */
+ char buffer[20];
+ close(fds[1]);
+ if (read(fds[0], buffer, sizeof(buffer)) < 0)
+ barf("Failed to summon daemon");
+ close(fds[0]);
+ } else {
+ dup2(fds[1], STDOUT_FILENO);
+ close(fds[0]);
+#if 0
+ execlp("valgrind", "valgrind", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#else
+ execlp("./xenstored_test", "xenstored_test", "--output-pid",
+ "--no-fork", NULL);
+#endif
+ exit(1);
+ }
+}
+
+static void setup(const char *dir)
+{
+ setup_file_ops(dir);
+ setup_xs_ops();
+};
+
+struct simple_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ struct ops *ops;
+ const char *dir;
+};
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static unsigned int try_simple(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print;
+ void *h;
+ char *snapshot = NULL;
+ struct simple_data *data = _data;
+
+ if (data->ops == &xs_ops) {
+ cleanup_xs_ops();
+ setup_xs_ops();
+ } else {
+ cleanup_file_ops(data->dir);
+ setup_file_ops(data->dir);
+ }
+ h = data->ops->handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *ret;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+
+ if (trymap && !trymap[i])
+ continue;
+
+ ret = do_next_op(data->ops, h, i + data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+ if (streq(ret, "FAILED:Bad file descriptor"))
+ goto out;
+ if (kill(daemon_pid, 0) != 0)
+ goto out;
+
+ if (!data->fast) {
+ if (strstarts(ret, "OK:START-TRANSACT:")) {
+ void *pre = data->ops->handle(data->dir);
+
+ snapshot = dump(data->ops, pre);
+ if (!snapshot)
+ goto out;
+ data->ops->close(pre);
+ } else if (streq(ret, "OK:STOP-TRANSACT")) {
+ talloc_free(snapshot);
+ snapshot = NULL;
+ }
+ }
+
+ talloc_free(ret);
+
+ if (snapshot) {
+ void *pre = data->ops->handle(data->dir);
+ char *contents;
+
+ contents = dump(data->ops, pre);
+ if (!contents)
+ goto out;
+
+ if (!streq(contents, snapshot))
+ goto out;
+
+ talloc_free(contents);
+ data->ops->close(pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+out:
+ data->ops->close(h);
+ return i;
+}
+
+/* Binary elimination: try eliminating all of them, then reduce. */
+static void reduce(bool *map,
+ unsigned int number,
+ unsigned int try_start, unsigned int try_num,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *),
+ void *data)
+{
+ bool newmap[number];
+
+ if (try_num == 0)
+ return;
+
+ /* Try skipping everything between start and end. */
+ memcpy(newmap, map, sizeof(newmap));
+ memset(newmap + try_start, 0, try_num * sizeof(bool));
+
+ /* We want the *same* failure: must fail at "number-1". */
+ if (try(newmap, number, false, data) == number - 1) {
+ memset(map + try_start, 0, try_num * sizeof(bool));
+ return;
+ }
+
+ if (try_num == 1)
+ return;
+
+ /* Try each half... */
+ reduce(map, number, try_start, try_num/2, try, data);
+ reduce(map, number, try_start + try_num/2, try_num - try_num/2,
+ try, data);
+}
+
+static void reduce_problem(unsigned int failed,
+ unsigned int (*try)(const bool *map,
+ unsigned int number,
+ bool verbose,
+ void *data),
+ void *data)
+{
+ bool map[failed];
+
+ memset(map, 1, sizeof(map));
+ reduce(map, failed, 0, failed-1, try, data);
+
+ printf("Cut down:\n");
+ if (try(map, failed, true, data) != failed - 1) {
+ printf("Except, that didn't actually fail. Bugger!");
+ exit(2);
+ }
+ exit(1);
+}
+
+/* Just a random test. Don't care about results, just that it doesn't
+ * go boom. */
+static void simple_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast, bool verbose)
+{
+ struct simple_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.ops = &xs_ops;
+ data.dir = dir;
+
+ try = try_simple(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_simple, &data);
+}
+
+static bool ops_equal(struct ops *a, void *ah,
+ struct ops *b, void *bh,
+ const char *node,
+ struct ops **fail)
+{
+ char **dira = NULL, **dirb = NULL;
+ char *dataa = NULL, *datab = NULL;
+ unsigned int i, numa, numb, lena, lenb;
+ struct xs_permissions *permsa = NULL, *permsb = NULL;
+ unsigned int numpermsa, numpermsb;
+ char *nodename;
+ bool ret = false;
+
+ /* FILE backend expects talloc'ed pointer. */
+ nodename = talloc_strdup(NULL, node);
+ permsa = a->get_perms(ah, nodename, &numpermsa);
+ if (!permsa) {
+ *fail = a;
+ goto out;
+ }
+ permsb = b->get_perms(bh, nodename, &numpermsb);
+ if (!permsb) {
+ *fail = b;
+ goto out;
+ }
+ if (numpermsa != numpermsb)
+ goto out;
+ for (i = 0; i < numpermsa; i++) {
+ if (permsa[i].perms != permsb[i].perms)
+ goto out;
+ if (permsa[i].id != permsb[i].id)
+ goto out;
+ }
+
+ /* Non-pure-directory nodes contain data. */
+ dataa = a->read(ah, nodename, &lena);
+ if (!dataa && errno != EISDIR) {
+ *fail = a;
+ goto out;
+ }
+ datab = b->read(bh, nodename, &lenb);
+ if (!datab && errno != EISDIR) {
+ *fail = b;
+ goto out;
+ }
+
+ if (dataa) {
+ if (!datab)
+ goto out;
+ if (lena != lenb)
+ goto out;
+
+ if (memcmp(dataa, datab, lena) != 0)
+ goto out;
+ } else
+ if (datab)
+ goto out;
+
+ /* Everything is a directory. */
+ dira = a->dir(ah, nodename, &numa);
+ if (!dira) {
+ *fail = a;
+ goto out;
+ }
+ dirb = b->dir(bh, nodename, &numb);
+ if (!dirb) {
+ *fail = b;
+ goto out;
+ }
+ if (numa != numb)
+ goto out;
+ sort_dir(dira, numa);
+ sort_dir(dirb, numb);
+ for (i = 0; i < numa; i++) {
+ char subnode[strlen(node) + 1 + strlen(dira[i]) + 1];
+
+ if (!streq(dira[i], dirb[i]))
+ goto out;
+
+ strcpy(subnode, node);
+ if (!streq(node, "/"))
+ strcat(subnode, "/");
+ strcat(subnode, dira[i]);
+ if (!ops_equal(a, ah, b, bh, subnode, fail))
+ goto out;
+ }
+
+ ret = true;
+out:
+ free(permsa);
+ free(permsb);
+ free(dataa);
+ free(datab);
+ free(dira);
+ free(dirb);
+ talloc_free(nodename);
+ return ret;
+}
+
+struct diff_data
+{
+ unsigned int seed;
+ bool print_progress;
+ bool fast;
+ const char *dir;
+};
+
+/* Differential: try both file and xs backend, watch for differences. */
+static unsigned int try_diff(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ void *fileh, *xsh;
+ char *transact = NULL;
+ struct ops *fail;
+ struct diff_data *data = _data;
+ unsigned int i, print;
+
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ char *file, *xs;
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("FILE: ");
+
+ file = do_next_op(&file_ops, fileh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(file, '/') - file, file);
+
+ if (verbose)
+ printf("XS: ");
+ xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose);
+ if (verbose)
+ printf("-> %.*s\n", strchr(xs, '/') - xs, xs);
+
+ if (!streq(file, xs))
+ goto out;
+
+ if (strstarts(file, "OK:START-TRANSACT:"))
+ transact = talloc_strdup(NULL,
+ file +
+ strlen("OK:START-TRANSACT:"));
+ else if (streq(file, "OK:STOP-TRANSACT")) {
+ talloc_free(transact);
+ transact = NULL;
+ }
+
+ talloc_free(file);
+ talloc_free(xs);
+
+ if (data->fast)
+ continue;
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail)) {
+ if (fail)
+ barf("%s failed during test\n", fail->name);
+ if (verbose)
+ printf("Trees differ:\nXS:%s\nFILE%s\n",
+ dump(&xs_ops, xsh),
+ dump(&file_ops, fileh));
+ goto out;
+ }
+
+ if (transact) {
+ void *fileh_pre = file_handle(data->dir);
+ void *xsh_pre = xs_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, xsh_pre, &file_ops, fileh_pre,
+ transact, &fail)) {
+ if (fail)
+ barf("%s failed during transact\n",
+ fail->name);
+
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ goto out;
+ }
+ xs_daemon_close(xsh_pre);
+ talloc_free(fileh_pre);
+ }
+ }
+ if (data->print_progress)
+ printf("\n");
+
+ fail = NULL;
+ if (data->fast)
+ if (!ops_equal(&xs_ops, xsh, &file_ops, fileh, "/", &fail))
+ barf("Final result not the same: try without --fast");
+out:
+ file_ops.close(fileh);
+ xs_ops.close(xsh);
+ return i;
+}
+
+/* Differential random test: compare results against file backend. */
+static void diff_test(const char *dir,
+ unsigned int iters, unsigned int seed, bool fast,
+ bool verbose)
+{
+ struct diff_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.fast = fast;
+ data.dir = dir;
+
+ try = try_diff(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_diff, &data);
+}
+
+struct fail_data
+{
+ unsigned int seed;
+ bool print_progress;
+ const char *dir;
+};
+
+/* Try xs with inserted failures: every op should either succeed or fail. */
+static unsigned int try_fail(const bool *trymap,
+ unsigned int number,
+ bool verbose,
+ void *_data)
+{
+ unsigned int i, print, tried = 0, aborted = 0;
+ struct fail_data *data = _data;
+ struct xs_handle *tmpxsh;
+ struct file_ops_info *tmpfileh;
+ void *fileh, *xsh;
+ struct ops *fail;
+ char seed[20];
+
+ /* Make sure failures off to shut down. */
+ if (daemon_pid)
+ kill(daemon_pid, SIGUSR1);
+ cleanup(data->dir);
+ setup(data->dir);
+
+ fileh = file_handle(data->dir);
+ xsh = xs_handle(data->dir);
+
+ sprintf(seed, "%i", data->seed);
+ free(xs_debug_command(xsh, "failtest", seed, strlen(seed)+1));
+
+ print = number / 76;
+ if (!print)
+ print = 1;
+
+ for (i = 0; i < number; i++) {
+ unsigned int limit, failed;
+ char *ret;
+
+ /* A few times we fail due to other end OOM. */
+ limit = 0;
+ while (!xsh) {
+ xsh = xs_handle(data->dir);
+ if (!xsh && errno == ECONNREFUSED) {
+ if (verbose)
+ printf("Daemon refused connection\n");
+ goto out;
+ }
+ if (!xsh && limit++ == 5) {
+ printf("Daemon failed conn 5 times\n");
+ goto out;
+ }
+ }
+
+ if (data->print_progress) {
+ if (i % print == 0) {
+ printf(".");
+ fflush(stdout);
+ }
+ }
+ if (trymap && !trymap[i])
+ continue;
+
+ if (verbose)
+ printf("(%i) ", i);
+ ret = do_next_op(&xs_ops, xsh, i + data->seed, verbose);
+ if (streq(ret, "FAILED:Connection reset by peer")
+ || streq(ret, "FAILED:Bad file descriptor")
+ || streq(ret, "FAILED:Broken pipe")) {
+ xs_close(xsh);
+ xsh = NULL;
+ failed = 2;
+ } else if (strstarts(ret, "OK"))
+ failed = 0;
+ else
+ failed = 1;
+
+ tried++;
+ if (xsh)
+ aborted++;
+
+ if (verbose)
+ printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+
+ talloc_free(ret);
+
+ /* Turn off failures using signal. */
+ if (kill(daemon_pid, SIGUSR1) != 0) {
+ if (verbose)
+ printf("Failed to signal daemon\n");
+ goto out;
+ }
+
+ if (failed == 0) {
+ /* Succeeded? Do same thing to file backend
+ * to compare */
+ try_applying:
+ ret = do_next_op(&file_ops, fileh, i + data->seed,
+ false);
+ if (!strstarts(ret, "OK")) {
+ if (!verbose)
+ printf("File op failed on %i\n",
+ i + data->seed);
+ talloc_free(ret);
+ goto out;
+ }
+ talloc_free(ret);
+ }
+
+ tmpxsh = xs_handle(data->dir);
+ if (!tmpxsh) {
+ if (verbose)
+ printf("Failed to open signalled daemon");
+ goto out;
+ }
+ tmpfileh = file_handle(data->dir);
+
+ fail = NULL;
+ if (!ops_equal(&xs_ops, tmpxsh, &file_ops, tmpfileh, "/",
+ &fail)) {
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ if (fail) {
+ if (verbose)
+ printf("%s failed\n", fail->name);
+ goto out;
+ }
+ /* Maybe op succeeded: try comparing after local op? */
+ if (failed == 2) {
+ failed = 0;
+ if (verbose)
+ printf("(Looks like it succeeded)\n");
+ goto try_applying;
+ }
+ if (verbose)
+ printf("Two backends not equal\n");
+ goto out;
+ }
+
+ /* If we lost the xs handle, that ended the transaction */
+ if (!xsh)
+ file_transaction_end(fileh, true);
+
+ /* Turn failures back on. */
+ free(xs_debug_command(tmpxsh, "failtest", NULL, 0));
+ xs_close(tmpxsh);
+ file_close(tmpfileh);
+ }
+
+ printf("Total %u of %u not aborted\n", tried - aborted, tried);
+out:
+ if (xsh)
+ xs_close(xsh);
+ return i;
+}
+
+static void fail_test(const char *dir,
+ unsigned int iters, unsigned int seed,
+ bool fast __attribute__((unused)), bool verbose)
+{
+ struct fail_data data;
+ unsigned int try;
+
+ data.seed = seed;
+ data.print_progress = !verbose;
+ data.dir = dir;
+
+ try = try_fail(NULL, iters, verbose, &data);
+ if (try == iters) {
+ cleanup_xs_ops();
+ printf("Succeeded\n");
+ exit(0);
+ }
+ printf("Failed on iteration %u\n", try + 1);
+ fflush(stdout);
+ data.print_progress = false;
+ reduce_problem(try + 1, try_fail, &data);
+}
+
+int main(int argc, char *argv[])
+{
+ bool verbose = false;
+ bool simple = false;
+ bool fast = false;
+ bool fail = false;
+
+ if (argv[1] && streq(argv[1], "--fail")) {
+ fail = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--simple")) {
+ simple = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--fast")) {
+ fast = true;
+ argv++;
+ argc--;
+ }
+
+ if (argv[1] && streq(argv[1], "--verbose")) {
+ verbose = true;
+ argv++;
+ argc--;
+ }
+
+ if (argc != 4)
+ barf("Usage: xs_random [--fail|--simple] [--fast] [--verbose] <directory> <iterations> <seed>");
+
+ talloc_enable_null_tracking();
+
+ if (fail)
+ fail_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else if (simple)
+ simple_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ else
+ diff_test(argv[1], atoi(argv[2]), atoi(argv[3]), fast, verbose);
+ exit(2);
+}
diff --git a/tools/xenstore/xs_stress.c b/tools/xenstore/xs_stress.c
new file mode 100644
index 0000000000..9c480b1553
--- /dev/null
+++ b/tools/xenstore/xs_stress.c
@@ -0,0 +1,207 @@
+/* Stress test for Xen Store: multiple people hammering transactions */
+#include "xs.h"
+#include "utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#define NUM_HANDLES 2
+#define DIR_FANOUT 3
+#define DIR_DEPTH 3
+
+/* How often to print progress */
+static int print;
+
+/* Layout looks like /<num>/<num>/count. */
+static void work(unsigned int cycles, unsigned int childnum)
+{
+ unsigned int i;
+ struct xs_handle *handles[NUM_HANDLES];
+ char id;
+
+ if (childnum < 10)
+ id = '0' + childnum;
+ else
+ id = 'A' + childnum - 10;
+
+ for (i = 0; i < NUM_HANDLES; i++) {
+ handles[i] = xs_daemon_open();
+ if (!handles[i])
+ barf_perror("Opening handle %i", i);
+ }
+
+ srandom(childnum);
+ for (i = 0; i < cycles; i++) {
+ unsigned int lockdepth, j, len;
+ char file[100] = "", lockdir[100];
+ char *contents, tmp[100];
+ struct xs_handle *h = handles[random() % NUM_HANDLES];
+
+ lockdepth = random() % DIR_DEPTH;
+ for (j = 0; j < DIR_DEPTH; j++) {
+ if (j == lockdepth)
+ strcpy(lockdir, file);
+ sprintf(file + strlen(file), "/%li",
+ random()%DIR_FANOUT);
+ }
+ if (streq(lockdir, ""))
+ strcpy(lockdir, "/");
+
+ if (!xs_transaction_start(h, lockdir))
+ barf_perror("%i: starting transaction %i on %s",
+ childnum, i, lockdir);
+
+ sprintf(file + strlen(file), "/count");
+ contents = xs_read(h, file, &len);
+ if (!contents)
+ barf_perror("%i: can't read %s iter %i",
+ childnum, file, i);
+ sprintf(tmp, "%i", atoi(contents) + 1);
+ if (!xs_write(h, file, tmp, strlen(tmp)+1, 0))
+ barf_perror("%i: can't write %s iter %i",
+ childnum, file, i);
+
+ /* Abandon 1 in 10 */
+ if (random() % 10 == 0) {
+ if (!xs_transaction_end(h, true))
+ barf_perror("%i: can't abort transact %s",
+ childnum, lockdir);
+ i--;
+ } else {
+ if (!xs_transaction_end(h, false))
+ barf_perror("%i: can't commit transact %s",
+ childnum, lockdir);
+
+ /* Offset when we print . so kids don't all
+ * print at once. */
+ if ((i + print/(childnum+1)) % print == 0)
+ write(STDOUT_FILENO, &id, 1);
+ }
+ }
+}
+
+static void create_dirs(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i;
+ char filename[100];
+
+ if (togo == 0) {
+ sprintf(filename, "%s/count", base);
+ if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT))
+ barf_perror("Writing to %s", filename);
+ return;
+ }
+
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ if (!xs_mkdir(h, filename))
+ barf_perror("xs_mkdir %s", filename);
+ create_dirs(h, filename, togo-1);
+ }
+}
+
+static unsigned int add_count(struct xs_handle *h, const char *base, int togo)
+{
+ unsigned int i, count;
+ char filename[100];
+
+ if (togo == 0) {
+ char *answer;
+ unsigned int len;
+
+ sprintf(filename, "%s/count", base);
+ answer = xs_read(h, filename, &len);
+ if (!answer)
+ barf_perror("Reading %s", filename);
+ count = atoi(answer);
+ free(answer);
+ return count;
+ }
+
+ count = 0;
+ for (i = 0; i < DIR_FANOUT; i++) {
+ sprintf(filename, "%s/%i", base, i);
+ count += add_count(h, filename, togo-1);
+ }
+ return count;
+}
+
+static void setup(void)
+{
+ struct xs_handle *h;
+
+ /* Do setup. */
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+ create_dirs(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+}
+
+static unsigned int tally_counts(void)
+{
+ struct xs_handle *h;
+ unsigned int ret;
+
+ h = xs_daemon_open();
+ if (!h)
+ barf_perror("Contacting daemon");
+
+ ret = add_count(h, "", DIR_DEPTH);
+ xs_daemon_close(h);
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ bool failed = false;
+ int kids[10];
+
+ if (argc != 2)
+ barf("Usage: xs_stress <iterations>");
+
+ printf("Setting up directories...\n");
+ setup();
+
+ print = atoi(argv[1]) / 76;
+ if (!print)
+ print = 1;
+
+ printf("Running %i children...\n", ARRAY_SIZE(kids));
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ kids[i] = fork();
+ if (kids[i] == -1)
+ barf_perror("fork");
+ if (kids[i] == 0) {
+ work(atoi(argv[1]) / ARRAY_SIZE(kids), i);
+ exit(0);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(kids); i++) {
+ int status;
+ if (waitpid(kids[i], &status, 0) == -1)
+ barf_perror("waitpid");
+ if (!WIFEXITED(status))
+ barf("Kid %i died via signal %i\n",
+ i, WTERMSIG(status));
+ if (WEXITSTATUS(status) != 0) {
+ printf("Child %i exited %i\n", i, WEXITSTATUS(status));
+ failed = true;
+ }
+ }
+ if (failed)
+ exit(1);
+
+ printf("\nCounting results...\n");
+ i = tally_counts();
+ if (i != (unsigned)atoi(argv[1]))
+ barf("Total counts %i not %s", i, atoi(argv[1]));
+ printf("Success!\n");
+ exit(0);
+}
diff --git a/tools/xenstore/xs_test.c b/tools/xenstore/xs_test.c
new file mode 100644
index 0000000000..4d769e220d
--- /dev/null
+++ b/tools/xenstore/xs_test.c
@@ -0,0 +1,647 @@
+/*
+ Xen Store Daemon Test tool
+ Copyright (C) 2005 Rusty Russell IBM Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include "utils.h"
+#include "xs_lib.h"
+
+#define XSTEST
+
+static struct xs_handle *handles[10] = { NULL };
+
+struct ringbuf_head
+{
+ uint32_t write; /* Next place to write to */
+ uint32_t read; /* Next place to read from */
+ uint8_t flags;
+ char buf[0];
+} __attribute__((packed));
+
+static struct ringbuf_head *out, *in;
+static unsigned int ringbuf_datasize;
+static int daemon_pid;
+
+/* FIXME: Mark connection as broken (close it?) when this happens. */
+static bool check_buffer(const struct ringbuf_head *h)
+{
+ return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
+}
+
+/* We can't fill last byte: would look like empty buffer. */
+static void *get_output_chunk(const struct ringbuf_head *h,
+ void *buf, uint32_t *len)
+{
+ uint32_t read_mark;
+
+ if (h->read == 0)
+ read_mark = ringbuf_datasize - 1;
+ else
+ read_mark = h->read - 1;
+
+ /* Here to the end of buffer, unless they haven't read some out. */
+ *len = ringbuf_datasize - h->write;
+ if (read_mark >= h->write)
+ *len = read_mark - h->write;
+ return buf + h->write;
+}
+
+static const void *get_input_chunk(const struct ringbuf_head *h,
+ const void *buf, uint32_t *len)
+{
+ /* Here to the end of buffer, unless they haven't written some. */
+ *len = ringbuf_datasize - h->read;
+ if (h->write >= h->read)
+ *len = h->write - h->read;
+ return buf + h->read;
+}
+
+static void update_output_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->write += len;
+ if (h->write == ringbuf_datasize)
+ h->write = 0;
+}
+
+static void update_input_chunk(struct ringbuf_head *h, uint32_t len)
+{
+ h->read += len;
+ if (h->read == ringbuf_datasize)
+ h->read = 0;
+}
+
+/* FIXME: We spin, and we're sloppy. */
+static bool read_all_shmem(int fd __attribute__((unused)),
+ void *data, unsigned int len)
+{
+ unsigned int avail;
+
+ if (!check_buffer(in))
+ barf("Corrupt buffer");
+
+ while (len) {
+ const void *src = get_input_chunk(in, in->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(data, src, avail);
+ data += avail;
+ len -= avail;
+ update_input_chunk(in, avail);
+ }
+
+ /* Tell other end we read something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool write_all_shmem(int fd __attribute__((unused)),
+ const void *data, unsigned int len)
+{
+ uint32_t avail;
+
+ if (!check_buffer(out))
+ barf("Corrupt buffer");
+
+ while (len) {
+ void *dst = get_output_chunk(out, out->buf, &avail);
+ if (avail > len)
+ avail = len;
+ memcpy(dst, data, avail);
+ data += avail;
+ len -= avail;
+ update_output_chunk(out, avail);
+ }
+
+ /* Tell other end we wrote something. */
+ kill(daemon_pid, SIGUSR2);
+ return true;
+}
+
+static bool read_all(int fd, void *data, unsigned int len);
+static bool read_all_choice(int fd, void *data, unsigned int len)
+{
+ if (fd == -2)
+ return read_all_shmem(fd, data, len);
+ return read_all(fd, data, len);
+}
+
+static bool write_all_choice(int fd, const void *data, unsigned int len)
+{
+ if (fd == -2)
+ return write_all_shmem(fd, data, len);
+ return xs_write_all(fd, data, len);
+}
+
+/* We want access to internal functions. */
+#include "xs.c"
+
+static void __attribute__((noreturn)) usage(void)
+{
+ barf("Usage:\n"
+ " xs_test [--readonly] [--notimeout]\n"
+ "Reads commands from stdin, one per line:"
+ " dir <path>\n"
+ " read <path>\n"
+ " write <path> <flags> <value>...\n"
+ " setid <id>\n"
+ " mkdir <path>\n"
+ " rm <path>\n"
+ " getperm <path>\n"
+ " setperm <path> <id> <flags> ...\n"
+ " shutdown\n"
+ " watch <path> <prio>\n"
+ " waitwatch\n"
+ " ackwatch\n"
+ " unwatch <path> <token>\n"
+ " close\n"
+ " start <node>\n"
+ " abort\n"
+ " introduce <domid> <mfn> <eventchn> <path>\n"
+ " commit\n"
+ " sleep <seconds>\n"
+ " dump\n");
+}
+
+static char *arg(char *line, unsigned int num)
+{
+ static char *args[10];
+ unsigned int i, len = 0;
+
+ for (i = 0; i <= num; i++) {
+ line += len;
+ line += strspn(line, " \t\n");
+ len = strcspn(line, " \t\n");
+ if (!len)
+ barf("Can't get arg %u", num);
+ }
+
+ free(args[num]);
+ args[num] = malloc(len + 1);
+ memcpy(args[num], line, len);
+ args[num][len] = '\0';
+ return args[num];
+}
+
+static char *command;
+static void __attribute__((noreturn)) failed(int handle)
+{
+ if (handle)
+ barf_perror("%i: %s", handle, command);
+ barf_perror("%s", command);
+}
+
+static void do_dir(unsigned int handle, char *path)
+{
+ char **entries;
+ unsigned int i, num;
+
+ entries = xs_directory(handles[handle], path, &num);
+ if (!entries)
+ failed(handle);
+
+ for (i = 0; i < num; i++)
+ if (handle)
+ printf("%i:%s\n", handle, entries[i]);
+ else
+ printf("%s\n", entries[i]);
+ free(entries);
+}
+
+static void do_read(unsigned int handle, char *path)
+{
+ char *value;
+ unsigned int len;
+
+ value = xs_read(handles[handle], path, &len);
+ if (!value)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%.*s\n", handle, len, value);
+ else
+ printf("%.*s\n", len, value);
+}
+
+static void do_write(unsigned int handle, char *path, char *flags, char *data)
+{
+ int f;
+
+ if (streq(flags, "none"))
+ f = 0;
+ else if (streq(flags, "create"))
+ f = O_CREAT;
+ else if (streq(flags, "excl"))
+ f = O_CREAT | O_EXCL;
+ else if (streq(flags, "crap"))
+ f = 100;
+ else
+ barf("write flags 'none', 'create' or 'excl' only");
+
+ if (!xs_write(handles[handle], path, data, strlen(data)+1, f))
+ failed(handle);
+}
+
+static void do_setid(unsigned int handle, char *id)
+{
+ if (!xs_bool(xs_debug_command(handles[handle], "setid", id,
+ strlen(id)+1)))
+ failed(handle);
+}
+
+static void do_mkdir(unsigned int handle, char *path)
+{
+ if (!xs_mkdir(handles[handle], path))
+ failed(handle);
+}
+
+static void do_rm(unsigned int handle, char *path)
+{
+ if (!xs_rm(handles[handle], path))
+ failed(handle);
+}
+
+static void do_getperm(unsigned int handle, char *path)
+{
+ unsigned int i, num;
+ struct xs_permissions *perms;
+
+ perms = xs_get_permissions(handles[handle], path, &num);
+ if (!perms)
+ failed(handle);
+
+ for (i = 0; i < num; i++) {
+ char *permstring;
+
+ switch (perms[i].perms) {
+ case XS_PERM_NONE:
+ permstring = "NONE";
+ break;
+ case XS_PERM_WRITE:
+ permstring = "WRITE";
+ break;
+ case XS_PERM_READ:
+ permstring = "READ";
+ break;
+ case XS_PERM_READ|XS_PERM_WRITE:
+ permstring = "READ/WRITE";
+ break;
+ default:
+ barf("bad perm value %i", perms[i].perms);
+ }
+
+ if (handle)
+ printf("%i:%i %s\n", handle, perms[i].id, permstring);
+ else
+ printf("%i %s\n", perms[i].id, permstring);
+ }
+ free(perms);
+}
+
+static void do_setperm(unsigned int handle, char *path, char *line)
+{
+ unsigned int i;
+ struct xs_permissions perms[100];
+
+ strtok(line, " \t\n");
+ strtok(NULL, " \t\n");
+ for (i = 0; ; i++) {
+ char *arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ perms[i].id = atoi(arg);
+ arg = strtok(NULL, " \t\n");
+ if (!arg)
+ break;
+ if (streq(arg, "WRITE"))
+ perms[i].perms = XS_PERM_WRITE;
+ else if (streq(arg, "READ"))
+ perms[i].perms = XS_PERM_READ;
+ else if (streq(arg, "READ/WRITE"))
+ perms[i].perms = XS_PERM_READ|XS_PERM_WRITE;
+ else if (streq(arg, "NONE"))
+ perms[i].perms = XS_PERM_NONE;
+ else
+ barf("bad flags %s\n", arg);
+ }
+
+ if (!xs_set_permissions(handles[handle], path, perms, i))
+ failed(handle);
+}
+
+static void do_shutdown(unsigned int handle)
+{
+ if (!xs_shutdown(handles[handle]))
+ failed(handle);
+}
+
+static void do_watch(unsigned int handle, const char *node, const char *pri)
+{
+ if (!xs_watch(handles[handle], node, atoi(pri)))
+ failed(handle);
+}
+
+static void do_waitwatch(unsigned int handle)
+{
+ char *node;
+
+ node = xs_read_watch(handles[handle]);
+ if (!node)
+ failed(handle);
+
+ if (handle)
+ printf("%i:%s\n", handle, node);
+ else
+ printf("%s\n", node);
+ free(node);
+}
+
+static void do_ackwatch(unsigned int handle)
+{
+ if (!xs_acknowledge_watch(handles[handle]))
+ failed(handle);
+}
+
+static void do_unwatch(unsigned int handle, const char *node)
+{
+ if (!xs_unwatch(handles[handle], node))
+ failed(handle);
+}
+
+static void do_start(unsigned int handle, const char *node)
+{
+ if (!xs_transaction_start(handles[handle], node))
+ failed(handle);
+}
+
+static void do_end(unsigned int handle, bool abort)
+{
+ if (!xs_transaction_end(handles[handle], abort))
+ failed(handle);
+}
+
+static void do_introduce(unsigned int handle,
+ const char *domid,
+ const char *mfn,
+ const char *eventchn,
+ const char *path)
+{
+ unsigned int i;
+ int fd;
+
+ /* We poll, so ignore signal */
+ signal(SIGUSR2, SIG_IGN);
+ for (i = 0; i < ARRAY_SIZE(handles); i++)
+ if (!handles[i])
+ break;
+
+ fd = open("/tmp/xcmap", O_RDWR);
+ /* Set in and out pointers. */
+ out = mmap(NULL, getpagesize(), PROT_WRITE|PROT_READ, MAP_SHARED,fd,0);
+ if (out == MAP_FAILED)
+ barf_perror("Failed to map /tmp/xcmap page");
+ in = (void *)out + getpagesize() / 2;
+ close(fd);
+
+ /* Tell them the event channel and our PID. */
+ *(int *)((void *)out + 32) = getpid();
+ *(u16 *)((void *)out + 36) = atoi(eventchn);
+
+ /* Create new handle. */
+ handles[i] = new(struct xs_handle);
+ handles[i]->fd = -2;
+
+ if (!xs_introduce_domain(handles[handle], atoi(domid),
+ atol(mfn), atoi(eventchn), path))
+ failed(handle);
+ printf("handle is %i\n", i);
+
+ /* Read in daemon pid. */
+ daemon_pid = *(int *)((void *)out + 32);
+}
+
+static void do_release(unsigned int handle, const char *domid)
+{
+ if (!xs_release_domain(handles[handle], atoi(domid)))
+ failed(handle);
+}
+
+static int strptrcmp(const void *a, const void *b)
+{
+ return strcmp(*(char **)a, *(char **)b);
+}
+
+static void sort_dir(char **dir, unsigned int num)
+{
+ qsort(dir, num, sizeof(char *), strptrcmp);
+}
+
+static void dump_dir(unsigned int handle,
+ const char *node,
+ char **dir,
+ unsigned int numdirs,
+ unsigned int depth)
+{
+ unsigned int i;
+ char spacing[depth+1];
+
+ memset(spacing, ' ', depth);
+ spacing[depth] = '\0';
+
+ sort_dir(dir, numdirs);
+
+ for (i = 0; i < numdirs; i++) {
+ struct xs_permissions *perms;
+ unsigned int j, numperms;
+ unsigned int len;
+ char *contents;
+ unsigned int subnum;
+ char **subdirs;
+ char subnode[strlen(node) + 1 + strlen(dir[i]) + 1];
+
+ sprintf(subnode, "%s/%s", node, dir[i]);
+
+ perms = xs_get_permissions(handles[handle], subnode,&numperms);
+ if (!perms)
+ failed(handle);
+
+ printf("%s%s: ", spacing, dir[i]);
+ for (j = 0; j < numperms; j++) {
+ char buffer[100];
+ if (!xs_perm_to_string(&perms[j], buffer))
+ barf("perm to string");
+ printf("%s ", buffer);
+ }
+ free(perms);
+ printf("\n");
+
+ /* Even directories can have contents. */
+ contents = xs_read(handles[handle], subnode, &len);
+ if (!contents) {
+ if (errno != EISDIR)
+ failed(handle);
+ } else {
+ printf(" %s(%.*s)\n", spacing, len, contents);
+ free(contents);
+ }
+
+ /* Every node is a directory. */
+ subdirs = xs_directory(handles[handle], subnode, &subnum);
+ if (!subdirs)
+ failed(handle);
+ dump_dir(handle, subnode, subdirs, subnum, depth+1);
+ free(subdirs);
+ }
+}
+
+static void dump(int handle)
+{
+ char **subdirs;
+ unsigned int subnum;
+
+ subdirs = xs_directory(handles[handle], "/", &subnum);
+ if (!subdirs)
+ failed(handle);
+
+ dump_dir(handle, "", subdirs, subnum, 0);
+ free(subdirs);
+}
+
+int main(int argc, char *argv[])
+{
+ char line[1024];
+ bool readonly = false, timeout = true;
+ int handle;
+
+ static void alarmed(int sig __attribute__((unused)))
+ {
+ if (handle) {
+ char handlename[10];
+ sprintf(handlename, "%u:", handle);
+ write(STDOUT_FILENO, handlename, strlen(handlename));
+ }
+ write(STDOUT_FILENO, command, strlen(command));
+ write(STDOUT_FILENO, " timeout\n", strlen(" timeout\n"));
+ exit(1);
+ }
+
+ if (argc > 1 && streq(argv[1], "--readonly")) {
+ readonly = true;
+ argc--;
+ argv++;
+ }
+
+ if (argc > 1 && streq(argv[1], "--notimeout")) {
+ timeout = false;
+ argc--;
+ argv++;
+ }
+
+ if (argc != 1)
+ usage();
+
+ /* The size of the ringbuffer: half a page minus head structure. */
+ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
+
+ signal(SIGALRM, alarmed);
+ while (fgets(line, sizeof(line), stdin)) {
+ char *endp;
+
+ if (strspn(line, " \n") == strlen(line))
+ continue;
+ if (strstarts(line, "#"))
+ continue;
+
+ handle = strtoul(line, &endp, 10);
+ if (endp != line)
+ memmove(line, endp+1, strlen(endp));
+ else
+ handle = 0;
+
+ if (!handles[handle]) {
+ if (readonly)
+ handles[handle] = xs_daemon_open_readonly();
+ else
+ handles[handle] = xs_daemon_open();
+ if (!handles[handle])
+ barf_perror("Opening connection to daemon");
+ }
+ command = arg(line, 0);
+
+ if (timeout)
+ alarm(5);
+ if (streq(command, "dir"))
+ do_dir(handle, arg(line, 1));
+ else if (streq(command, "read"))
+ do_read(handle, arg(line, 1));
+ else if (streq(command, "write"))
+ do_write(handle,
+ arg(line, 1), arg(line, 2), arg(line, 3));
+ else if (streq(command, "setid"))
+ do_setid(handle, arg(line, 1));
+ else if (streq(command, "mkdir"))
+ do_mkdir(handle, arg(line, 1));
+ else if (streq(command, "rm"))
+ do_rm(handle, arg(line, 1));
+ else if (streq(command, "getperm"))
+ do_getperm(handle, arg(line, 1));
+ else if (streq(command, "setperm"))
+ do_setperm(handle, arg(line, 1), line);
+ else if (streq(command, "shutdown"))
+ do_shutdown(handle);
+ else if (streq(command, "watch"))
+ do_watch(handle, arg(line, 1), arg(line, 2));
+ else if (streq(command, "waitwatch"))
+ do_waitwatch(handle);
+ else if (streq(command, "ackwatch"))
+ do_ackwatch(handle);
+ else if (streq(command, "unwatch"))
+ do_unwatch(handle, arg(line, 1));
+ else if (streq(command, "close")) {
+ xs_daemon_close(handles[handle]);
+ handles[handle] = NULL;
+ } else if (streq(command, "start"))
+ do_start(handle, arg(line, 1));
+ else if (streq(command, "commit"))
+ do_end(handle, false);
+ else if (streq(command, "abort"))
+ do_end(handle, true);
+ else if (streq(command, "introduce"))
+ do_introduce(handle, arg(line, 1), arg(line, 2),
+ arg(line, 3), arg(line, 4));
+ else if (streq(command, "release"))
+ do_release(handle, arg(line, 1));
+ else if (streq(command, "dump"))
+ dump(handle);
+ else if (streq(command, "sleep"))
+ sleep(atoi(arg(line, 1)));
+ else
+ barf("Unknown command %s", command);
+ fflush(stdout);
+ alarm(0);
+ }
+ return 0;
+}
diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile
index 8e7dcfdbc2..329d71c3ee 100644
--- a/tools/xentrace/Makefile
+++ b/tools/xentrace/Makefile
@@ -6,12 +6,10 @@ INSTALL_DATA = $(INSTALL) -m0644
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
-CC = gcc
CFLAGS += -Wall -Werror -O3
CFLAGS += -I $(XEN_XC)
CFLAGS += -I $(XEN_LIBXC)
-CFLAGS += -I $(XEN_LIBXUTIL)
HDRS = $(wildcard *.h)
OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
@@ -38,4 +36,4 @@ clean:
$(RM) *.a *.so *.o *.rpm $(BIN)
%: %.c $(HDRS) Makefile
- $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -lxc -lxutil
+ $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc
diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 0452385484..8577590f20 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -1,35 +1,17 @@
-0x00010000 CPU%(cpu)d %(tsc).6f sched_add_domain(0x%(3)08x) [ dom id = 0x%(2)08x ]
-0x00010001 CPU%(cpu)d %(tsc).6f sched_rem_domain(0x%08(3)x) [ dom id = 0x%(2)08x ]
-0x00010002 CPU%(cpu)d %(tsc).6f __wake_up(0x%(3)08x) [ dom id = 0x%(2)08x ]
-0x00010003 CPU%(cpu)d %(tsc).6f do_block() [ current = 0x%(2)08x ]
-0x00010004 CPU%(cpu)d %(tsc).6f do_yield() [ current = %(2)08x ]
-0x00010005 CPU%(cpu)d %(tsc).6f do_set_timer_op(0x%(4)08x, 0x%(5)08x) [ current = 0x%(3)08x ]
-0x00010006 CPU%(cpu)d %(tsc).6f sched_ctl(0x%(1)08x)
-0x00010007 CPU%(cpu)d %(tsc).6f sched_adjdom(params) [ dom id = 0x%(2)08x ]
-0x00010008 CPU%(cpu)d %(tsc).6f __reschedule(0x%(3)08x) [ dom id = 0x(2)08x ]
-0x00010009 CPU%(cpu)d %(tsc).6f switching to task_struct 0x%(1)08x [ dom id = 0x%(1)x ]
-0x0001000A CPU%(cpu)d %(tsc).6f s_timer_fn(unused)
-0x0001000B CPU%(cpu)d %(tsc).6f t_timer_fn(unused)
-0x0001000C CPU%(cpu)d %(tsc).6f dom_timer_fn(data)
-0x0001000D CPU%(cpu)d %(tsc).6f fallback_timer_fn(unused)
+0x00020001 CPU%(cpu)d %(tsc)d sched_add_domain [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020002 CPU%(cpu)d %(tsc)d sched_rem_domain [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020003 CPU%(cpu)d %(tsc)d domain_sleep [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020004 CPU%(cpu)d %(tsc)d domain_wake [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020005 CPU%(cpu)d %(tsc)d do_yield [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020006 CPU%(cpu)d %(tsc)d do_block [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
+0x00020007 CPU%(cpu)d %(tsc)d domain_shutdown [ domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
+0x00020008 CPU%(cpu)d %(tsc)d sched_ctl
+0x00020009 CPU%(cpu)d %(tsc)d sched_adjdom [ domid = 0x%(1)08x ]
+0x0002000a CPU%(cpu)d %(tsc)d __enter_scheduler [ prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 0x%(4)08x ]
+0x0002000B CPU%(cpu)d %(tsc)d s_timer_fn
+0x0002000c CPU%(cpu)d %(tsc)d t_timer_fn
+0x0002000d CPU%(cpu)d %(tsc)d dom_timer_fn
-
-0x00020008 CPU%(cpu)d %(tsc).6f enter: dom0_create_dom ( )
-0x00030008 CPU%(cpu)d %(tsc).6f leave: dom0_create_dom ( )
-
-0x00020009 CPU%(cpu)d %(tsc).6f enter: dom0_destroy_dom ( dom=0x%(2)x )
-0x00030009 CPU%(cpu)d %(tsc).6f leave: dom0_destroy_dom ( dom=0x%(2)x ) = %(1)d
-
-0x0002000A CPU%(cpu)d %(tsc).6f enter: dom0_start_dom ( dom=0x%(2)x )
-0x0003000A CPU%(cpu)d %(tsc).6f leave: dom0_start_dom ( dom=0x%(2)x ) = %(1)d
-0x0002000B CPU%(cpu)d %(tsc).6f enter: dom0_stop_dom ( dom=0x%(2)x )
-0x0003000B CPU%(cpu)d %(tsc).6f leave: dom0_stop_dom ( dom=0x%(2)x ) = %(1)d
-0x0002000C CPU%(cpu)d %(tsc).6f enter: dom0_getinfo ( dom=0x%(2)x )
-0x0003000C CPU%(cpu)d %(tsc).6f leave: dom0_getinfo ( dom=0x%(2)x ) = %(1)d
-0x0002000D CPU%(cpu)d %(tsc).6f enter: dom0_build ( dom=0x%(2)x )
-0x0003000D CPU%(cpu)d %(tsc).6f leave: dom0_build ( dom=0x%(2)x ) = %(1)d
-
-0x00020019 CPU%(cpu)d %(tsc).6f enter: dom0_shadow_op ( dom=0x%(2)x, %(3)d )
-0x00030019 CPU%(cpu)d %(tsc).6f leave: dom0_shadow_op ( dom=0x%(2)x, %(3)d ) = %(1)d
-
-#0x0 CPU%(cpu)d %(tsc).6f %(event)x
+0x00080001 CPU%(cpu)d %(tsc)d VMX_VMEXIT [ domid = 0x%(1)08x, eip = 0x%(2)08x, reason = 0x%(3)08x ]
+0x00080002 CPU%(cpu)d %(tsc)d VMX_VECTOR [ domid = 0x%(1)08x, eip = 0x%(2)08x, vector = 0x%(3)08x ]
+0x00080003 CPU%(cpu)d %(tsc)d VMX_INT [ domid = 0x%(1)08x, trap = 0x%(2)08x, va = 0x%(3)08x ]
diff --git a/tools/xentrace/xenctx.c b/tools/xentrace/xenctx.c
new file mode 100644
index 0000000000..28dfd360ac
--- /dev/null
+++ b/tools/xentrace/xenctx.c
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * tools/xentrace/xenctx.c
+ *
+ * Tool for dumping the cpu context
+ *
+ * Copyright (C) 2005 by Intel Corp
+ *
+ * Author: Arun Sharma <arun.sharma@intel.com>
+ * Date: February 2005
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <argp.h>
+#include <signal.h>
+
+#include "xc.h"
+
+#ifdef __i386__
+void print_ctx(vcpu_guest_context_t *ctx1)
+{
+ struct cpu_user_regs *regs = &ctx1->user_regs;
+
+ printf("eip: %08lx\t", regs->eip);
+ printf("esp: %08lx\n", regs->esp);
+
+ printf("eax: %08lx\t", regs->eax);
+ printf("ebx: %08lx\t", regs->ebx);
+ printf("ecx: %08lx\t", regs->ecx);
+ printf("edx: %08lx\n", regs->edx);
+
+ printf("esi: %08lx\t", regs->esi);
+ printf("edi: %08lx\t", regs->edi);
+ printf("ebp: %08lx\n", regs->ebp);
+
+ printf(" cs: %08lx\t", regs->cs);
+ printf(" ds: %08lx\t", regs->ds);
+ printf(" fs: %08lx\t", regs->fs);
+ printf(" gs: %08lx\n", regs->gs);
+
+}
+#endif
+
+void dump_ctx(u32 domid, u32 vcpu)
+{
+ int ret;
+ xc_domaininfo_t info;
+ vcpu_guest_context_t ctx;
+
+ int xc_handle = xc_interface_open(); /* for accessing control interface */
+
+ ret = xc_domain_getfullinfo(xc_handle, domid, vcpu, &info, &ctx);
+ if (ret != 0) {
+ perror("xc_domain_getfullinfo");
+ exit(-1);
+ }
+ print_ctx(&ctx);
+ xc_interface_close(xc_handle);
+}
+
+int main(int argc, char **argv)
+{
+ int vcpu = 0;
+
+ if (argc < 2) {
+ printf("usage: xenctx <domid> <optional vcpu>\n");
+ exit(-1);
+ }
+
+ if (argc == 3)
+ vcpu = atoi(argv[2]);
+
+ dump_ctx(atoi(argv[1]), vcpu);
+
+ return 0;
+}
diff --git a/tools/xentrace/xentrace.c b/tools/xentrace/xentrace.c
index 351ecd71a8..50988c90f8 100644
--- a/tools/xentrace/xentrace.c
+++ b/tools/xentrace/xentrace.c
@@ -11,10 +11,10 @@
#include <time.h>
#include <stdlib.h>
-#include <sys/mman.h>
#include <stdio.h>
-#include <sys/types.h>
+#include <sys/mman.h>
#include <sys/stat.h>
+#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -22,6 +22,10 @@
#include <signal.h>
#include "xc_private.h"
+
+typedef struct { int counter; } atomic_t;
+#define _atomic_read(v) ((v).counter)
+
#include <xen/trace.h>
extern FILE *stderr;
@@ -77,8 +81,14 @@ struct timespec millis_to_timespec(unsigned long millis)
*/
void write_rec(unsigned int cpu, struct t_rec *rec, FILE *out)
{
- fwrite(&cpu, sizeof(cpu), 1, out);
- fwrite(rec, sizeof(*rec), 1, out);
+ size_t written = 0;
+ written += fwrite(&cpu, sizeof(cpu), 1, out);
+ written += fwrite(rec, sizeof(*rec), 1, out);
+ if ( written != 2 )
+ {
+ PERROR("Failed to write trace record");
+ exit(EXIT_FAILURE);
+ }
}
/**
@@ -95,8 +105,9 @@ void get_tbufs(unsigned long *mach_addr, unsigned long *size)
dom0_op_t op; /* dom0 op we'll build */
int xc_handle = xc_interface_open(); /* for accessing control interface */
- op.cmd = DOM0_GETTBUFS;
+ op.cmd = DOM0_TBUFCONTROL;
op.interface_version = DOM0_INTERFACE_VERSION;
+ op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
ret = do_dom0_op(xc_handle, &op);
@@ -108,8 +119,8 @@ void get_tbufs(unsigned long *mach_addr, unsigned long *size)
exit(EXIT_FAILURE);
}
- *mach_addr = op.u.gettbufs.mach_addr;
- *size = op.u.gettbufs.size;
+ *mach_addr = op.u.tbufcontrol.mach_addr;
+ *size = op.u.tbufcontrol.size;
}
/**
@@ -135,8 +146,8 @@ struct t_buf *map_tbufs(unsigned long tbufs_mach, unsigned int num,
}
tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */,
- size * num, PROT_READ,
- tbufs_mach >> PAGE_SHIFT);
+ size * num, PROT_READ,
+ tbufs_mach >> PAGE_SHIFT);
xc_interface_close(xc_handle);
@@ -146,7 +157,7 @@ struct t_buf *map_tbufs(unsigned long tbufs_mach, unsigned int num,
exit(EXIT_FAILURE);
}
- return (struct t_buf *)tbufs_mapped;
+ return tbufs_mapped;
}
@@ -175,8 +186,7 @@ struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
/* initialise pointers to the trace buffers - given the size of a trace
* buffer and the value of bufs_maped, we can easily calculate these */
for ( i = 0; i<num; i++ )
- user_ptrs[i] = (struct t_buf *)(
- (unsigned long)bufs_mapped + size * i);
+ user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
return user_ptrs;
}
@@ -208,9 +218,9 @@ struct t_rec **init_rec_ptrs(unsigned long tbufs_mach,
exit(EXIT_FAILURE);
}
- for ( i = 0; i<num; i++ )
- data[i] = (struct t_rec *)(meta[i]->data - tbufs_mach
- + (unsigned long)tbufs_mapped);
+ for ( i = 0; i < num; i++ )
+ data[i] = (struct t_rec *)(meta[i]->rec_addr - tbufs_mach
+ + (unsigned long)tbufs_mapped);
return data;
}
@@ -236,7 +246,7 @@ unsigned long *init_tail_idxs(struct t_buf **bufs, unsigned int num)
}
for ( i = 0; i<num; i++ )
- tails[i] = bufs[i]->head;
+ tails[i] = _atomic_read(bufs[i]->rec_idx);
return tails;
}
@@ -293,7 +303,7 @@ int monitor_tbufs(FILE *logfile)
get_tbufs(&tbufs_mach, &size);
tbufs_mapped = map_tbufs(tbufs_mach, num, size);
- size_in_recs = (size / sizeof(struct t_rec) )-1;
+ size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
/* build arrays of convenience ptrs */
meta = init_bufs_ptrs (tbufs_mapped, num, size);
@@ -304,11 +314,11 @@ int monitor_tbufs(FILE *logfile)
while ( !interrupted )
{
for ( i = 0; ( i < num ) && !interrupted; i++ )
- while( cons[i] != meta[i]->head )
- {
- write_rec(i, data[i] + (cons[i] % size_in_recs), logfile);
- cons[i]++;
- }
+ while( cons[i] != _atomic_read(meta[i]->rec_idx) )
+ {
+ write_rec(i, data[i] + cons[i], logfile);
+ cons[i] = (cons[i] + 1) % size_in_recs;
+ }
nanosleep(&opts.poll_sleep, NULL);
}
@@ -439,9 +449,11 @@ int main(int argc, char **argv)
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
- sigaction(SIGHUP, &act, 0);
- sigaction(SIGTERM, &act, 0);
- sigaction(SIGINT, &act, 0);
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
ret = monitor_tbufs(logfile);
diff --git a/tools/xentrace/xentrace_format b/tools/xentrace/xentrace_format
index d2e51265d4..82b406afe6 100644
--- a/tools/xentrace/xentrace_format
+++ b/tools/xentrace/xentrace_format
@@ -75,8 +75,6 @@ try:
except getopt.GetoptError:
usage()
-print mhz
-
signal.signal(signal.SIGTERM, sighand)
signal.signal(signal.SIGHUP, sighand)
signal.signal(signal.SIGINT, sighand)
@@ -85,8 +83,6 @@ interrupted = 0
defs = read_defs(arg[0])
-print defs
-
# structure of trace record + prepended CPU id (as output by xentrace):
# CPU(I) TSC(Q) EVENT(L) D1(L) D2(L) D3(L) D4(L) D5(L)
TRCREC = "IQLLLLLL"
diff --git a/tools/xfrd/Make.xfrd b/tools/xfrd/Make.xfrd
deleted file mode 100644
index 3bbfe4d5d6..0000000000
--- a/tools/xfrd/Make.xfrd
+++ /dev/null
@@ -1,33 +0,0 @@
-# -*- mode: Makefile; -*-
-#============================================================================
-
-UTIL_LIB = libutil.a
-
-UTIL_LIB_SRC =
-UTIL_LIB_SRC += allocate.c
-UTIL_LIB_SRC += enum.c
-UTIL_LIB_SRC += file_stream.c
-UTIL_LIB_SRC += gzip_stream.c
-UTIL_LIB_SRC += hash_table.c
-UTIL_LIB_SRC += iostream.c
-UTIL_LIB_SRC += lexis.c
-UTIL_LIB_SRC += lzi_stream.c
-UTIL_LIB_SRC += marshal.c
-UTIL_LIB_SRC += string_stream.c
-UTIL_LIB_SRC += sxpr.c
-UTIL_LIB_SRC += sxpr_parser.c
-UTIL_LIB_SRC += sys_net.c
-UTIL_LIB_SRC += sys_string.c
-#UTIL_LIB_SRC += util.c
-UTIL_LIB_SRC += xdr.c
-
-#----------------------------------------------------------------------------
-# Xfrd.
-
-XFRD_PROG_SRC =
-XFRD_PROG_SRC += xfrd.c
-XFRD_PROG_SRC += xen_domain.c
-XFRD_PROG_SRC += select.c
-XFRD_PROG_SRC += connection.c
-
-#============================================================================
diff --git a/tools/xfrd/Makefile b/tools/xfrd/Makefile
deleted file mode 100644
index fb6f2ae244..0000000000
--- a/tools/xfrd/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-# -*- mode: Makefile; -*-
-#============================================================================
-#
-# Mike Wray <mike.wray@hp.com>
-#============================================================================
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-XFRD_INSTALL_DIR = /usr/sbin
-
-vpath %.h $(XEN_LIBXC)
-INCLUDES += -I $(XEN_LIBXC)
-
-vpath %c $(XEN_LIBXUTIL)
-INCLUDES += -I $(XEN_LIBXUTIL)
-
-include Make.xfrd
-
-UTIL_LIB_OBJ = $(UTIL_LIB_SRC:.c=.o)
-
-XFRD_PROG_OBJ = $(XFRD_PROG_SRC:.c=.o)
-XFRD_PROG_OBJ += $(UTIL_LIB)
-
-# Flag controlling whether to use stubs.
-# Define to use stubs, undefine to use the real Xen functions.
-#CPPFLAGS += -D _XEN_XFR_STUB_
-
-ifeq ($(SXPR_DEBUG),1)
-CPPFLAGS += -D _XEN_XFR_STUB_ -D SXPR_PARSER_MAIN
-endif
-
-CC := gcc
-
-CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
-CFLAGS += $(INCLUDES)
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
-
-#$(warning XFRD_PROG_OBJ= $(XFRD_PROG_OBJ))
-#$(warning UTIL_LIB= $(UTIL_LIB))
-#$(warning UTIL_LIB_OBJ= $(UTIL_LIB_OBJ))
-
-# Libraries for xfrd.
-XFRD_LIBS :=
-
-XFRD_LIBS += -L $(XEN_LIBXC) -lxc
-XFRD_LIBS += -L $(XEN_LIBXUTIL) -lxutil
-
-# zlib library.
-XFRD_LIBS += -lz
-
-CURL_FLAGS = $(shell curl-config --cflags)
-CURL_LIBS = $(shell curl-config --libs)
-CFLAGS += $(CURL_FLAGS)
-# libcurl libraries.
-XFRD_LIBS += $(CURL_LIBS)
-
-#$(warning XFRD_LIBS = $(XFRD_LIBS))
-
-all: build
-build: xfrd
-
-xfrd: $(XFRD_PROG_OBJ)
- $(CC) -o $@ $^ $(XFRD_LIBS)
-
-.PHONY: install
-install: xfrd
- [ -d $(DESTDIR)$(XFRD_INSTALL_DIR) ] || \
- $(INSTALL_DIR) $(DESTDIR)$(XFRD_INSTALL_DIR)
- $(INSTALL_PROG) xfrd $(DESTDIR)$(XFRD_INSTALL_DIR)
-
-.PHONY: libutil
-libutil: $(UTIL_LIB)
-
-$(UTIL_LIB): $(UTIL_LIB_OBJ)
- $(AR) rc $@ $^
-
-.PHONY: clean
-clean:
- $(RM) *.o *.a *.so *~ xfrd
- $(RM) $(PROG_DEP)
-
-$(XFRD_PROG_OBJ): Makefile
--include $(PROG_DEP)
-
diff --git a/tools/xfrd/connection.c b/tools/xfrd/connection.c
deleted file mode 100644
index 36f2ca8475..0000000000
--- a/tools/xfrd/connection.c
+++ /dev/null
@@ -1,195 +0,0 @@
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-#include "connection.h"
-#include "file_stream.h"
-#include "lzi_stream.h"
-#include "sxpr_parser.h"
-
-#define dprintf(fmt, args...) fprintf(stdout, "[DEBUG] %s" fmt, __FUNCTION__, ##args)
-#define wprintf(fmt, args...) fprintf(stderr, "[WARN] %s" fmt, __FUNCTION__, ##args)
-#define iprintf(fmt, args...) fprintf(stdout, "[INFO] %s" fmt, __FUNCTION__, ##args)
-#define eprintf(fmt, args...) fprintf(stderr, "[ERROR] %s" fmt, __FUNCTION__, ##args)
-
-/** Compress magic header. */
-char compress_magic[2] = { 0x1f, 0x8b };
-
-/** Plain magic header. */
-char plain_magic[2] = { 0x0, 0x0 };
-
-int Conn_read_header(int sock, int *flags){
- int err = 0;
- char magic[2] = {};
- int k, n = sizeof(magic);
- k = read(sock, magic, n);
- if(k != n){
- err = -EINVAL;
- goto exit;
- }
- dprintf("> magic={ 0x%x, 0x%x }\n", magic[0], magic[1]);
- if(magic[0] == compress_magic[0] && magic[1] == compress_magic[1]){
- *flags |= CONN_READ_COMPRESS;
- dprintf("> Using compress read.\n");
- } else {
- dprintf("> Using plain read.\n");
- }
- exit:
- return err;
-}
-
-int Conn_write_header(int sock, int flags){
- int err = 0;
- if(flags & CONN_WRITE_COMPRESS){
- dprintf("> Using compress write.\n");
- err = write(sock, compress_magic, 2);
- } else {
- dprintf("> Using plain write.\n");
- err = write(sock, plain_magic, 2);
- }
- if(err == 2) err = 0;
- return err;
-}
-
-/** Initialize a file stream from a file desciptor.
- *
- * @param fd file descriptor
- * @param mode file mode
- * @param flags control compression and buffering
- * @param io return parameter for the stream
- * @return 0 on success, error code otherwise
- */
-int stream_init(int fd, const char *mode, int flags, int compress, IOStream **io){
- int err = 0;
- dprintf(">mode=%s flags=%x compress=%d\n", mode, flags, compress);
- if(compress){
- *io = lzi_stream_fdopen(fd, mode);
- } else {
- *io = file_stream_fdopen(fd, mode);
- }
- if(!*io){
- err = -errno;
- perror("fdopen");
- goto exit;
- }
- if(1 && (flags & CONN_NOBUFFER)){
- // Make unbuffered.
- dprintf("> unbuffer...\n");
- err = file_stream_setvbuf((compress ? lzi_stream_io(*io) : *io), NULL, _IONBF, 0);
- if(err){
- err = -errno;
- perror("setvbuf");
- goto exit;
- }
- }
- exit:
- if(err && *io){
- dprintf("> close err=%d\n", err);
- IOStream_close(*io);
- *io = NULL;
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Initialize a connection.
- *
- * @param conn connection
- * @param flags
- * @param sock socket
- * @param ipaddr ip address
- * @return 0 on success, error code otherwise
- */
-int Conn_init(Conn *conn, int flags, int sock, struct sockaddr_in addr){
- int err = 0;
- dprintf("> flags=%x\n", flags);
- conn->addr = addr;
- conn->sock = sock;
- dprintf("> write stream...\n");
- err = stream_init(sock, "w", flags, (flags & CONN_WRITE_COMPRESS), &conn->out);
- if(err) goto exit;
- IOStream_flush(conn->out);
- dprintf("> read stream...\n");
- err = stream_init(sock, "r", flags, (flags & CONN_READ_COMPRESS) , &conn->in);
- if(err) goto exit;
- exit:
- if(err) eprintf("< err=%d\n", err);
- return err;
-}
-
-/** Open a connection.
- *
- * @param conn connection
- * @param flags
- * @param ipaddr ip address to connect to
- * @param port port
- * @return 0 on success, error code otherwise
- */
-int Conn_connect(Conn *conn, int flags, struct in_addr ipaddr, uint16_t port){
- int err = 0;
- int sock;
- struct sockaddr_in addr_in;
- struct sockaddr *addr = (struct sockaddr *)&addr_in;
- socklen_t addr_n = sizeof(addr_in);
- dprintf("> addr=%s:%d\n", inet_ntoa(ipaddr), ntohs(port));
- sock = socket(AF_INET, SOCK_STREAM, 0);
- if(sock < 0){
- err = -errno;
- goto exit;
- }
- addr_in.sin_family = AF_INET;
- addr_in.sin_addr = ipaddr;
- addr_in.sin_port = port;
- err = connect(sock, addr, addr_n);
- if(err) goto exit;
- //err = Conn_write_header(sock, flags);
- //if(err < 0) goto exit;
- err = Conn_init(conn, flags, sock, addr_in);
- exit:
- if(err) eprintf("< err=%d\n", err);
- return err;
-}
-
-/** Close a connection.
- *
- * @param conn connection
- */
-void Conn_close(Conn *conn){
- if(conn->in) IOStream_close(conn->in);
- if(conn->out) IOStream_close(conn->out);
- shutdown(conn->sock, 2);
-}
-
-int Conn_sxpr(Conn *conn, Sxpr *sxpr){
- int err = 0;
- Sxpr val = ONONE;
- int c = 0;
-
- dprintf(">\n");
- if(!conn->parser){
- conn->parser = Parser_new();
- set_error_stream(conn->parser, iostdout);
- }
- while(!err && c >= 0 && !Parser_ready(conn->parser)){
- c = IOStream_getc(conn->in);
- printf("%c", (char)c);
- if(c < 0){
- err = Parser_input_eof(conn->parser);
- } else {
- err = Parser_input_char(conn->parser, c);
- }
- }
- if(Parser_ready(conn->parser)){
- val = Parser_get_val(conn->parser);
- }
- if(err){
- objfree(val);
- val = ONONE;
- }
- *sxpr = val;
- dprintf("< err=%d\n", err);
- return err;
-}
diff --git a/tools/xfrd/connection.h b/tools/xfrd/connection.h
deleted file mode 100644
index 2b67e767ba..0000000000
--- a/tools/xfrd/connection.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* $Id: connection.h,v 1.1 2003/10/17 15:48:43 mjw Exp $ */
-#ifndef _VFC_CONNECTION_H_
-#define _VFC_CONNECTION_H_
-
-#include <netinet/in.h>
-
-#include "iostream.h"
-#include "sxpr_parser.h"
-
-/** A connection.
- * The underlying transport is a socket.
- * Contains in and out streams using the socket.
- */
-typedef struct Conn {
- struct sockaddr_in addr;
- int sock;
- IOStream *in;
- IOStream *out;
- Parser *parser;
-} Conn;
-
-enum {
- CONN_NOBUFFER=1,
- CONN_READ_COMPRESS=2,
- CONN_WRITE_COMPRESS=4,
-};
-
-extern int Conn_read_header(int sock, int *flags);
-extern int Conn_write_header(int sock, int flags);
-extern int Conn_init(Conn *conn, int flags, int sock, struct sockaddr_in addr);
-extern int Conn_connect(Conn *conn, int flags, struct in_addr ipaddr, uint16_t port);
-extern void Conn_close(Conn *conn);
-
-extern int Conn_sxpr(Conn *conn, Sxpr *sxpr);
-
-#endif /* ! _VFC_CONNECTION_H_ */
diff --git a/tools/xfrd/debug.h b/tools/xfrd/debug.h
deleted file mode 100644
index 3df5345095..0000000000
--- a/tools/xfrd/debug.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
- *
- * This library is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#ifndef _XUTIL_DEBUG_H_
-#define _XUTIL_DEBUG_H_
-
-#ifndef MODULE_NAME
-#define MODULE_NAME ""
-#endif
-
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/kernel.h>
-
-#ifdef DEBUG
-
-#define dprintf(fmt, args...) printk(KERN_DEBUG "[DBG] " MODULE_NAME ">%s" fmt, __FUNCTION__, ##args)
-#define wprintf(fmt, args...) printk(KERN_WARNING "[WRN] " MODULE_NAME ">%s" fmt, __FUNCTION__, ##args)
-#define iprintf(fmt, args...) printk(KERN_INFO "[INF] " MODULE_NAME ">%s" fmt, __FUNCTION__, ##args)
-#define eprintf(fmt, args...) printk(KERN_ERR "[ERR] " MODULE_NAME ">%s" fmt, __FUNCTION__, ##args)
-
-#else
-
-#define dprintf(fmt, args...) do {} while(0)
-#define wprintf(fmt, args...) printk(KERN_WARNING "[WRN] " MODULE_NAME fmt, ##args)
-#define iprintf(fmt, args...) printk(KERN_INFO "[INF] " MODULE_NAME fmt, ##args)
-#define eprintf(fmt, args...) printk(KERN_ERR "[ERR] " MODULE_NAME fmt, ##args)
-
-#endif
-
-#else
-
-#include <stdio.h>
-#include <unistd.h>
-
-#ifdef DEBUG
-
-#define dprintf(fmt, args...) fprintf(stdout, "%d [DBG] " MODULE_NAME ">%s" fmt, getpid(), __FUNCTION__, ##args)
-#define wprintf(fmt, args...) fprintf(stderr, "%d [WRN] " MODULE_NAME ">%s" fmt, getpid(),__FUNCTION__, ##args)
-#define iprintf(fmt, args...) fprintf(stderr, "%d [INF] " MODULE_NAME ">%s" fmt, getpid(),__FUNCTION__, ##args)
-#define eprintf(fmt, args...) fprintf(stderr, "%d [ERR] " MODULE_NAME ">%s" fmt, getpid(),__FUNCTION__, ##args)
-
-#else
-
-#define dprintf(fmt, args...) do {} while(0)
-#define wprintf(fmt, args...) fprintf(stderr, "%d [WRN] " MODULE_NAME fmt, getpid(), ##args)
-#define iprintf(fmt, args...) fprintf(stderr, "%d [INF] " MODULE_NAME fmt, getpid(), ##args)
-#define eprintf(fmt, args...) fprintf(stderr, "%d [ERR] " MODULE_NAME fmt, getpid(), ##args)
-
-#endif
-
-#endif
-
-/** Print format for an IP address.
- * See NIPQUAD(), HIPQUAD()
- */
-#define IPFMT "%u.%u.%u.%u"
-
-#endif /* ! _XUTIL_DEBUG_H_ */
diff --git a/tools/xfrd/http.h b/tools/xfrd/http.h
deleted file mode 100644
index 711ccc9787..0000000000
--- a/tools/xfrd/http.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef _XFRD_HTTP_H_
-#define _XFRD_HTTP_H_
-
-enum {
- HTTP_OK = 200,
- HTTP_CREATED = 201,
- HTTP_ACCEPTED = 202,
- HTTP_NON_AUTHORITATIVE_INFORMATION = 203,
- HTTP_NO_CONTENT = 204,
- HTTP_RESET_CONTENT = 205,
- HTTP_PARTIAL_CONTENT = 206,
- HTTP_MULTI_STATUS = 207,
-
- HTTP_MULTIPLE_CHOICE = 300,
- HTTP_MOVED_PERMANENTLY = 301,
- HTTP_FOUND = 302,
- HTTP_SEE_OTHER = 303,
- HTTP_NOT_MODIFIED = 304,
- HTTP_USE_PROXY = 305,
- HTTP_TEMPORARY_REDIRECT = 307,
-
- HTTP_BAD_REQUEST = 400,
- HTTP_UNAUTHORIZED = 401,
- HTTP_PAYMENT_REQUIRED = 402,
- HTTP_FORBIDDEN = 403,
- HTTP_NOT_FOUND = 404,
- HTTP_NOT_ALLOWED = 405,
- HTTP_NOT_ACCEPTABLE = 406,
- HTTP_PROXY_AUTH_REQUIRED = 407,
- HTTP_REQUEST_TIMEOUT = 408,
- HTTP_CONFLICT = 409,
- HTTP_GONE = 410,
- HTTP_LENGTH_REQUIRED = 411,
- HTTP_PRECONDITION_FAILED = 412,
- HTTP_REQUEST_ENTITY_TOO_LARGE = 413,
- HTTP_REQUEST_URI_TOO_LONG = 414,
- HTTP_UNSUPPORTED_MEDIA_TYPE = 415,
- HTTP_REQUESTED_RANGE_NOT_SATISFIABLE = 416,
- HTTP_EXPECTATION_FAILED = 417,
-
- HTTP_INTERNAL_SERVER_ERROR = 500,
- HTTP_NOT_IMPLEMENTED = 501,
- HTTP_BAD_GATEWAY = 502,
- HTTP_SERVICE_UNAVAILABLE = 503,
- HTTP_GATEWAY_TIMEOUT = 504,
- HTTP_VERSION_NOT_SUPPORTED = 505,
- HTTP_INSUFFICIENT_STORAGE_SPACE = 507,
- HTTP_NOT_EXTENDED = 510,
-};
-#endif /* ! _XFRD_HTTP_H_ */
diff --git a/tools/xfrd/lzi_stream.c b/tools/xfrd/lzi_stream.c
deleted file mode 100644
index 5fbec775c4..0000000000
--- a/tools/xfrd/lzi_stream.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * Copyright (C) 2003 Hewlett-Packard Company.
- *
- * This library is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/** @file
- * An IOStream implementation using LZI to provide compression and decompression.
- * This is designed to provide compression without output latency.
- * Flushing an LZI stream flushes all pending data to the underlying stream.
- * This is essential for stream-based (e.g. networked) applications.
- *
- * A compressed data stream is a sequence of blocks.
- * Each block is the block size followed by the compressed data.
- * The last block has size zero.
- * Sizes are 4-byte unsigned in network order.
- *
- * This format allows compressed data to be read from a stream without reading
- * past the logical end of compressed data.
- *
- * @author Mike Wray <mike.wray@hpl.hp.com>
- */
-#ifndef __KERNEL__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-
-#include "zlib.h"
-
-#include "allocate.h"
-#include "lzi_stream.h"
-#include "file_stream.h"
-#include "marshal.h"
-
-#define dprintf(fmt, args...) fprintf(stdout, "[DEBUG] LZI>%s" fmt, __FUNCTION__, ##args)
-#define wprintf(fmt, args...) fprintf(stderr, "[WARN] LZI>%s" fmt, __FUNCTION__, ##args)
-#define iprintf(fmt, args...) fprintf(stdout, "[INFO] LZI>%s" fmt, __FUNCTION__, ##args)
-#define eprintf(fmt, args...) fprintf(stderr, "[ERROR] LZI>%s" fmt, __FUNCTION__, ##args)
-
-static int lzi_read(IOStream *s, void *buf, size_t n);
-static int lzi_write(IOStream *s, const void *buf, size_t n);
-static int lzi_error(IOStream *s);
-static int lzi_close(IOStream *s);
-static void lzi_free(IOStream *s);
-static int lzi_flush(IOStream *s);
-
-enum {
- LZI_WRITE = 1,
- LZI_READ = 2,
-};
-
-/** Methods used by a gzFile* IOStream. */
-static const IOMethods lzi_methods = {
- read: lzi_read,
- write: lzi_write,
- error: lzi_error,
- close: lzi_close,
- free: lzi_free,
- flush: lzi_flush,
-};
-
-#define BUFFER_SIZE (512 * 1024)
-
-typedef struct LZIState {
- z_stream zstream;
- void *inbuf;
- uint32_t inbuf_size;
- void *outbuf;
- uint32_t outbuf_size;
- /** Underlying stream for I/O. */
- IOStream *io;
- /** Flags. */
- int flags;
- /** Error indicator. */
- int error;
- int eof;
- int plain_bytes;
- int comp_bytes;
- int zstream_initialized;
- int flushed;
-} LZIState;
-
-static inline int LZIState_writeable(LZIState *s){
- return (s->flags & LZI_WRITE) != 0;
-}
-
-static inline int LZIState_readable(LZIState *s){
- return (s->flags & LZI_READ) != 0;
-}
-
-void LZIState_free(LZIState *z){
- if(!z) return;
- if(z->zstream_initialized){
- if(LZIState_writeable(z)){
- deflateEnd(&z->zstream);
- } else if(LZIState_readable(z)){
- inflateEnd(&z->zstream);
- }
- }
- deallocate(z->inbuf);
- deallocate(z->outbuf);
- deallocate(z);
-}
-
-static int mode_flags(const char *mode, int *flags){
- int err = 0;
- int r=0, w=0;
- if(!mode){
- err = -EINVAL;
- goto exit;
- }
- for(; *mode; mode++){
- if(*mode == 'w') w = 1;
- if(*mode == 'r') r = 1;
- }
- if(r + w != 1){
- err = -EINVAL;
- goto exit;
- }
- if(r) *flags |= LZI_READ;
- if(w) *flags |= LZI_WRITE;
- exit:
- return err;
-}
-
-/** Get the stream state.
- *
- * @param s lzi stream
- * @return stream state.
- */
-static inline LZIState * lzi_state(IOStream *io){
- return (LZIState*)io->data;
-}
-
-IOStream *lzi_stream_io(IOStream *io){
- LZIState *s = lzi_state(io);
- return s->io;
-}
-
-static inline void set_error(LZIState *s, int err){
- if(err < 0 && !s->error){
- s->error = err;
- }
-}
-
-static int zerror(LZIState *s, int err){
- if(err){
- //dprintf("> err=%d\n", err);
- if(err < 0) set_error(s, -EIO);
- }
- return s->error;
-}
-
-int lzi_stream_plain_bytes(IOStream *io){
- LZIState *s = lzi_state(io);
- return s->plain_bytes;
-}
-
-int lzi_stream_comp_bytes(IOStream *io){
- LZIState *s = lzi_state(io);
- return s->comp_bytes;
-}
-
-float lzi_stream_ratio(IOStream *io){
- LZIState *s = lzi_state(io);
- float ratio = 0.0;
- if(s->comp_bytes){
- ratio = ((float) s->comp_bytes)/((float) s->plain_bytes);
- }
- return ratio;
-}
-
-static int alloc(void **p, int n){
- *p = allocate(n);
- return (p ? 0 : -ENOMEM);
-}
-
-LZIState * LZIState_new(IOStream *io, int flags){
- int err = -ENOMEM;
- int zlevel = Z_BEST_SPEED; // Level 1 compression - fastest.
- int zstrategy = Z_DEFAULT_STRATEGY;
- int zwindow = MAX_WBITS;
- int zmemory = 8;
- LZIState *z = ALLOCATE(LZIState);
-
- //dprintf(">\n");
- if(!z) goto exit;
- z->io = io;
- z->flags = flags;
-
- if(LZIState_writeable(z)){
- z->outbuf_size = BUFFER_SIZE;
- /* windowBits is passed < 0 to suppress zlib header */
- err = deflateInit2(&z->zstream, zlevel, Z_DEFLATED, -zwindow, zmemory, zstrategy);
- if (err != Z_OK) goto exit;
- z->zstream_initialized = 1;
- err = alloc(&z->outbuf, z->outbuf_size);
- if(err) goto exit;
- z->zstream.next_out = z->outbuf;
- z->zstream.avail_out = z->outbuf_size;
- } else {
- z->inbuf_size = BUFFER_SIZE;
- err = alloc(&z->inbuf, z->inbuf_size);
- if(err) goto exit;
- ///z->zstream.next_in = z->inbuf;
-
- /* windowBits is passed < 0 to tell that there is no zlib header.
- * Note that in this case inflate *requires* an extra "dummy" byte
- * after the compressed stream in order to complete decompression and
- * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
- * present after the compressed stream.
- */
- err = inflateInit2(&z->zstream, -zwindow);
- if(err != Z_OK) goto exit;
- z->zstream_initialized = 1;
- }
-
- exit:
- if(err){
- LZIState_free(z);
- z = NULL;
- }
- //dprintf("< z=%p\n", z);
- return z;
-}
-
-int read_block(LZIState *s){
- int err = 0, k = 0;
- //dprintf(">\n");
- if(s->eof) goto exit;
- err = unmarshal_uint32(s->io, &k);
- if(err) goto exit;
- if(k > s->inbuf_size){
- err = -EINVAL;
- goto exit;
- }
- if(k){
- err = unmarshal_bytes(s->io, s->inbuf, k);
- if(err) goto exit;
- } else {
- s->eof = 1;
- }
- s->zstream.avail_in = k;
- s->zstream.next_in = s->inbuf;
- s->comp_bytes += 4;
- s->comp_bytes += k;
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int write_block(LZIState *s){
- int err = 0;
- int k = ((char*)s->zstream.next_out) - ((char*)s->outbuf);
- //int k2 = s->outbuf_size - s->zstream.avail_out;
- //dprintf("> k=%d k2=%d\n", k, k2);
- if(!k) goto exit;
- err = marshal_uint32(s->io, k);
- if(err) goto exit;
- err = marshal_bytes(s->io, s->outbuf, k);
- if(err) goto exit;
- s->zstream.next_out = s->outbuf;
- s->zstream.avail_out = s->outbuf_size;
- s->comp_bytes += 4;
- s->comp_bytes += k;
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int write_terminator(LZIState *s){
- int err = 0;
- char c = 0;
- err = marshal_uint32(s->io, 1);
- if(err) goto exit;
- err = marshal_bytes(s->io, &c, 1);
- if(err) goto exit;
- err = marshal_uint32(s->io, 0);
- if(err) goto exit;
- s->comp_bytes += 9;
- exit:
- return err;
-}
-
-/** Write to the underlying stream using fwrite();
- *
- * @param io destination
- * @param buf data
- * @param n number of bytes to write
- * @return number of bytes written
- */
-static int lzi_write(IOStream *io, const void *buf, size_t n){
- int err = 0;
- LZIState *s = lzi_state(io);
-
- //dprintf("> buf=%p n=%d\n", buf, n);
- if(!LZIState_writeable(s)){
- err = -EINVAL;
- goto exit;
- }
- s->flushed = 0;
- s->zstream.next_in = (void*)buf;
- s->zstream.avail_in = n;
- while(s->zstream.avail_in){
- if(s->zstream.avail_out == 0){
- err = write_block(s);
- if(err) goto exit;
- }
- //dprintf("> 1 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- //dprintf("> 1 deflate next_in=%p next_out=%p\n", s->zstream.next_in, s->zstream.next_out);
- err = zerror(s, deflate(&s->zstream, Z_NO_FLUSH));
- //dprintf("> 2 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- //dprintf("> 2 deflate next_in=%p next_out=%p\n", s->zstream.next_in, s->zstream.next_out);
- if(err) goto exit;
- }
- err = n;
- s->plain_bytes += n;
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-
-/** Read from the underlying stream.
- *
- * @param io input
- * @param buf where to put input
- * @param n number of bytes to read
- * @return number of bytes read
- */
-static int lzi_read(IOStream *io, void *buf, size_t n){
- int err, zerr;
- LZIState *s = lzi_state(io);
-
- //dprintf("> n=%d\n", n);
- if(!LZIState_readable(s)){
- err = -EINVAL;
- goto exit;
- }
- s->zstream.next_out = buf;
- s->zstream.avail_out = n;
- while(s->zstream.avail_out){
- if(s->zstream.avail_in == 0){
- err = read_block(s);
- }
- //dprintf("> 1 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- zerr = inflate(&s->zstream, Z_NO_FLUSH);
- //dprintf("> 2 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- if(zerr == Z_STREAM_END) break;
- //dprintf("> zerr=%d\n", zerr);
- err = zerror(s, zerr);
- if(err) goto exit;
- }
- err = n - s->zstream.avail_out;
- s->plain_bytes += err;
- exit:
- set_error(s, err);
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-static int flush_output(LZIState *s, int mode){
- int err = 0, zerr;
- int done = 0;
- int avail_out_old;
-
- //dprintf("> avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- if(s->flushed == 1 + mode) goto exit;
- //s->zstream.avail_in = 0; /* should be zero already anyway */
- for(;;){
- // Write any available output.
- if(done || s->zstream.avail_out == 0){
- err = write_block(s);
- if(err) goto exit;
- if(done) break;
- }
- //dprintf("> 1 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- avail_out_old = s->zstream.avail_out;
- zerr = deflate(&s->zstream, mode);
- err = zerror(s, zerr);
- //dprintf("> 2 deflate avail_in=%d avail_out=%d\n", s->zstream.avail_in, s->zstream.avail_out);
- //dprintf("> deflate=%d\n", err);
- //done = (s->zstream.avail_out != 0);
- //done = (s->zstream.avail_in == 0) && (s->zstream.avail_out == avail_out_old);
- if(0 && mode == Z_FINISH){
- done = (zerr == Z_STREAM_END);
- } else {
- done = (s->zstream.avail_in == 0)
- //&& (s->zstream.avail_out == avail_out_old)
- && (s->zstream.avail_out != 0);
- }
- }
- s->flushed = 1 + mode;
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Flush any pending input to the underlying stream.
- *
- * @param s lzi stream
- * @return 0 on success, error code otherwise
- */
-static int lzi_flush(IOStream *io){
- int err = 0;
- LZIState *s = lzi_state(io);
- //dprintf(">\n");
- if(!LZIState_writeable(s)){
- err = -EINVAL;
- goto exit;
- }
- err = flush_output(s, Z_SYNC_FLUSH);
- if(err) goto exit;
- err = IOStream_flush(s->io);
- exit:
- set_error(s, err);
- //dprintf("< err=%d\n", err);
- return (err < 0 ? err : 0);
-}
-
-/** Check if a stream has an error.
- *
- * @param s lzi stream
- * @return code if has an error, 0 otherwise
- */
-static int lzi_error(IOStream *s){
- int err = 0;
- LZIState *state = lzi_state(s);
- err = state->error;
- if(err) goto exit;
- err = IOStream_error(state->io);
- exit:
- return err;
-}
-
-/** Close an lzi stream.
- *
- * @param s lzi stream to close
- * @return result of the close
- */
-static int lzi_close(IOStream *io){
- int err = 0;
- LZIState *s = lzi_state(io);
- if(LZIState_writeable(s)){
- err = flush_output(s, Z_FINISH);
- if(err) goto exit;
- err = write_terminator(s);
- if(err) goto exit;
- err = IOStream_flush(s->io);
- }
- exit:
- err = IOStream_close(s->io);
- set_error(s, err);
- return err;
-}
-
-/** Free an lzi stream.
- *
- * @param s lzi stream
- */
-static void lzi_free(IOStream *s){
- LZIState *state = lzi_state(s);
- IOStream_free(state->io);
- LZIState_free(state);
- s->data = NULL;
-}
-
-/** Create an lzi stream for an IOStream.
- *
- * @param io stream to wrap
- * @return new IOStream using f for i/o
- */
-IOStream *lzi_stream_new(IOStream *io, const char *mode){
- int err = -ENOMEM;
- int flags = 0;
- IOStream *zio = NULL;
- LZIState *state = NULL;
-
- zio = ALLOCATE(IOStream);
- if(!zio) goto exit;
- err = mode_flags(mode, &flags);
- if(err) goto exit;
- state = LZIState_new(io, flags);
- if(!state) goto exit;
- err = 0;
- zio->data = state;
- zio->methods = &lzi_methods;
- exit:
- if(err){
- if(state) LZIState_free(state);
- if(zio) deallocate(zio);
- zio = NULL;
- }
- return zio;
-}
-
-/** IOStream version of fdopen().
- *
- * @param fd file descriptor
- * @param flags giving the mode to open in (as for fdopen())
- * @return new stream for the open file, or NULL if failed
- */
-IOStream *lzi_stream_fdopen(int fd, const char *mode){
- int err = -ENOMEM;
- IOStream *io = NULL, *zio = NULL;
- io = file_stream_fdopen(fd, mode);
- if(!io) goto exit;
- zio = lzi_stream_new(io, mode);
- if(!io) goto exit;
- err = 0;
- exit:
- if(err){
- IOStream_free(io);
- IOStream_free(zio);
- zio = NULL;
- }
- return zio;
-}
-#endif
diff --git a/tools/xfrd/marshal.c b/tools/xfrd/marshal.c
deleted file mode 100644
index 21691d4412..0000000000
--- a/tools/xfrd/marshal.c
+++ /dev/null
@@ -1,207 +0,0 @@
-#include <errno.h>
-#include "sys_net.h"
-#include "allocate.h"
-#include "marshal.h"
-
-#define dprintf(fmt, args...) IOStream_print(iostdout, "[DEBUG] %s" fmt, __FUNCTION__, ##args)
-#define wprintf(fmt, args...) IOStream_print(iostderr, "[WARN] %s" fmt, __FUNCTION__, ##args)
-#define iprintf(fmt, args...) IOStream_print(iostdout, "[INFO] %s" fmt, __FUNCTION__, ##args)
-#define eprintf(fmt, args...) IOStream_print(iostderr, "[ERROR] %s" fmt, __FUNCTION__, ##args)
-
-
-#define ARRAY_SIZE(ary) (sizeof(ary)/sizeof((ary)[0]))
-
-/* Messages are coded as msgid followed by message fields.
- * Initial message on any channel is hello - so can check version
- * compatibility.
- *
- * char* -> uint16_t:n <n bytes>
- * ints/uints go as suitable number of bytes (e.g. uint16_t is 2 bytes).
- * optional fields go as '1' <val> or '0' (the 0/1 is 1 byte).
- * lists go as ('1' <elt>)* '0'
- */
-
-int marshal_flush(IOStream *io){
- int err = 0;
- err = IOStream_flush(io);
- return err;
-}
-
-int marshal_bytes(IOStream *io, void *s, uint32_t s_n){
- int err = 0;
- int n;
- n = IOStream_write(io, s, s_n);
- if(n < 0){
- err = n;
- } else if (n < s_n){
- wprintf("> Wanted %d, got %d\n", s_n, n);
- err = -EIO;
- }
- return err;
-}
-
-int unmarshal_bytes(IOStream *io, void *s, uint32_t s_n){
- int err = 0;
- int n;
- //dprintf("> s_n=%d\n", s_n);
- n = IOStream_read(io, s, s_n);
- //dprintf("> n=%d\n", n);
- if(n < 0){
- err = n;
- } else if(n < s_n){
- wprintf("> Wanted %d, got %d\n", s_n, n);
- err = -EIO;
- }
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int marshal_uint8(IOStream *io, uint8_t x){
- return marshal_bytes(io, &x, sizeof(x));
-}
-
-int unmarshal_uint8(IOStream *io, uint8_t *x){
- return unmarshal_bytes(io, x, sizeof(*x));
-}
-
-int marshal_uint16(IOStream *io, uint16_t x){
- x = htons(x);
- return marshal_bytes(io, &x, sizeof(x));
-}
-
-int unmarshal_uint16(IOStream *io, uint16_t *x){
- int err = 0;
- err = unmarshal_bytes(io, x, sizeof(*x));
- *x = ntohs(*x);
- return err;
-}
-
-int marshal_int32(IOStream *io, int32_t x){
- int err = 0;
- //dprintf("> x=%d\n", x);
- x = htonl(x);
- err = marshal_bytes(io, &x, sizeof(x));
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unmarshal_int32(IOStream *io, int32_t *x){
- int err = 0;
- //dprintf(">\n");
- err = unmarshal_bytes(io, x, sizeof(*x));
- *x = ntohl(*x);
- //dprintf("< err=%d x=%d\n", err, *x);
- return err;
-}
-
-int marshal_uint32(IOStream *io, uint32_t x){
- int err = 0;
- //dprintf("> x=%u\n", x);
- x = htonl(x);
- err = marshal_bytes(io, &x, sizeof(x));
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unmarshal_uint32(IOStream *io, uint32_t *x){
- int err = 0;
- //dprintf(">\n");
- err = unmarshal_bytes(io, x, sizeof(*x));
- *x = ntohl(*x);
- //dprintf("< err=%d x=%u\n", err, *x);
- return err;
-}
-
-int marshal_uint64(IOStream *io, uint64_t x){
- int err;
- err = marshal_uint32(io, (uint32_t) ((x >> 32) & 0xffffffff));
- if(err) goto exit;
- err = marshal_uint32(io, (uint32_t) ( x & 0xffffffff));
- exit:
- return err;
-}
-
-int unmarshal_uint64(IOStream *io, uint64_t *x){
- int err = 0;
- uint32_t hi, lo;
- err = unmarshal_uint32(io, &hi);
- if(err) goto exit;
- err = unmarshal_uint32(io, &lo);
- *x = (((uint64_t) hi) << 32) | lo;
- exit:
- return err;
-}
-
-int marshal_net16(IOStream *io, net16_t x){
- return marshal_bytes(io, &x, sizeof(x));
-}
-
-int unmarshal_net16(IOStream *io, net16_t *x){
- int err = 0;
- err = unmarshal_bytes(io, x, sizeof(*x));
- return err;
-}
-
-int marshal_net32(IOStream *io, net32_t x){
- return marshal_bytes(io, &x, sizeof(x));
-}
-
-int unmarshal_net32(IOStream *io, net32_t *x){
- int err = 0;
- err = unmarshal_bytes(io, x, sizeof(*x));
- return err;
-}
-
-int marshal_string(IOStream *io, char *s, uint32_t s_n){
- int err;
- //dprintf("> s=%s\n", s);
- err = marshal_uint32(io, s_n);
- if(err) goto exit;
- err = marshal_bytes(io, s, s_n);
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unmarshal_string(IOStream *io, char *s, uint32_t s_n){
- int err = 0, val_n = 0;
- //dprintf(">\n");
- err = unmarshal_uint32(io, &val_n);
- if(err) goto exit;
- if(val_n >= s_n){
- err = -EINVAL;
- goto exit;
- }
- err = unmarshal_bytes(io, s, val_n);
- if(err) goto exit;
- s[val_n] = '\0';
- exit:
- //dprintf("< err=%d s=%s\n", err, s);
- return err;
-}
-
-int unmarshal_new_string(IOStream *io, char **s, uint32_t *s_n){
- int err = 0, val_n = 0;
- char *val = NULL;
- //dprintf(">\n");
- err = unmarshal_uint32(io, &val_n);
- if(err) goto exit;
- val = allocate(val_n + 1);
- if(!val){
- err = -ENOMEM;
- goto exit;
- }
- err = unmarshal_bytes(io, val, val_n);
- if(err) goto exit;
- val[val_n] = '\0';
- exit:
- if(err){
- if(val) deallocate(val);
- val = NULL;
- val_n = 0;
- }
- *s = val;
- if(s_n) *s_n = val_n;
- //dprintf("< err=%d s=%s\n", err, *s);
- return err;
-}
diff --git a/tools/xfrd/marshal.h b/tools/xfrd/marshal.h
deleted file mode 100644
index 65e9682cea..0000000000
--- a/tools/xfrd/marshal.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _XUTIL_MARSHAL_H_
-#define _XUTIL_MARSHAL_H_
-
-#include "iostream.h"
-
-/** A 16-bit uint in network order, e.g. a port number. */
-typedef uint16_t net16_t;
-
-/** A 32-bit uint in network order, e.g. an IP address. */
-typedef uint32_t net32_t;
-
-extern int marshal_flush(IOStream *io);
-
-extern int marshal_bytes(IOStream *io, void *s, uint32_t s_n);
-extern int unmarshal_bytes(IOStream *io, void *s, uint32_t s_n);
-
-extern int marshal_uint8(IOStream *io, uint8_t x);
-extern int unmarshal_uint8(IOStream *io, uint8_t *x);
-
-extern int marshal_uint16(IOStream *io, uint16_t x);
-extern int unmarshal_uint16(IOStream *io, uint16_t *x);
-
-extern int marshal_uint32(IOStream *io, uint32_t x);
-extern int unmarshal_uint32(IOStream *io, uint32_t *x);
-
-extern int marshal_int32(IOStream *io, int32_t x);
-extern int unmarshal_int32(IOStream *io, int32_t *x);
-
-extern int marshal_uint64(IOStream *io, uint64_t x);
-extern int unmarshal_uint64(IOStream *io, uint64_t *x);
-
-extern int marshal_net16(IOStream *io, net16_t x);
-extern int unmarshal_net16(IOStream *io, net16_t *x);
-
-extern int marshal_net32(IOStream *io, net32_t x);
-extern int unmarshal_net32(IOStream *io, net32_t *x);
-
-extern int marshal_string(IOStream *io, char *s, uint32_t s_n);
-extern int unmarshal_string(IOStream *io, char *s, uint32_t s_n);
-extern int unmarshal_new_string(IOStream *io, char **s, uint32_t *s_n);
-
-#endif /* ! _XUTIL_MARSHAL_H_ */
diff --git a/tools/xfrd/select.c b/tools/xfrd/select.c
deleted file mode 100644
index bdaccfe000..0000000000
--- a/tools/xfrd/select.c
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "select.h"
-
-/** Zero all the file descriptor sets.
- *
- * @param set select set
- * @param fd file descriptor
- * @return 0 on success, -1 otherwise
- */
-void SelectSet_zero(SelectSet *set){
- set->n = 0;
- FD_ZERO(&set->rd);
- FD_ZERO(&set->wr);
- FD_ZERO(&set->er);
-}
-
-/** Add a file descriptor to the write set.
- *
- * @param set select set
- * @param fd file descriptor
- * @return 0 on success, -1 otherwise
- */
-void SelectSet_add_read(SelectSet *set, int fd){
- FD_SET(fd, &set->rd);
- if(fd > set->n) set->n = fd;
-}
-
-/** Add a file descriptor to the write set.
- *
- * @param set select set
- * @param fd file descriptor
- * @return 0 on success, -1 otherwise
- */
-void SelectSet_add_write(SelectSet *set, int fd){
- FD_SET(fd, &set->wr);
- if(fd > set->n) set->n = fd;
-}
-
-/** Select on file descriptors.
- *
- * @param set select set
- * @param timeout timeout (may be NULL for no timeout)
- * @return 0 on success, -1 otherwise
- */
-int SelectSet_select(SelectSet *set, struct timeval *timeout){
- return select(set->n+1, &set->rd, &set->wr, &set->er, timeout);
-}
diff --git a/tools/xfrd/select.h b/tools/xfrd/select.h
deleted file mode 100644
index 2453f98536..0000000000
--- a/tools/xfrd/select.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _XFRD_SELECT_H_
-#define _XFRD_SELECT_H_
-
-/** Set of file descriptors for select.
- */
-typedef struct SelectSet {
- int n;
- fd_set rd, wr, er;
-} SelectSet;
-
-extern void SelectSet_zero(SelectSet *set);
-extern void SelectSet_add_read(SelectSet *set, int fd);
-extern void SelectSet_add_write(SelectSet *set, int fd);
-extern int SelectSet_select(SelectSet *set, struct timeval *timeout);
-
-#endif /* ! _XFRD_SELECT_H_ */
diff --git a/tools/xfrd/xdr.c b/tools/xfrd/xdr.c
deleted file mode 100644
index 61dbe44ed6..0000000000
--- a/tools/xfrd/xdr.c
+++ /dev/null
@@ -1,316 +0,0 @@
-#include <errno.h>
-#include "xdr.h"
-
-#define MODULE_NAME "XDR"
-//#define DEBUG 1
-#undef DEBUG
-#include "debug.h"
-
-/** @file
- * XDR packer/unpacker for elements.
- *
- * string -> [T_STRING] [len:u16] <len bytes>
- * atom -> [T_ATOM] [len:u16] <len bytes>
- * uint -> [T_UINT] [value]
- * cons -> [T_LIST] {1 elt}* 0
- * null -> [T_NULL]
- * none -> [T_NONE]
- * bool -> [T_BOOL] { 0:u8 | 1:u8 }
- *
- * types packed as u16.
- *
- * So (a b c) -> [T_CONS] a [T_CONS] b [T_CONS] c [T_NULL]
- * () -> [T_NULL]
- */
-
-int pack_bool(IOStream *io, int x){
- int err=0;
- //dprintf("> x=%d\n", x);
- err = IOStream_print(io, "%c", 0xff & x);
- if(err > 0) err = 0;
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_bool(IOStream *io, int *x){
- int err = 0;
- int c;
- //dprintf(">\n");
- c = IOStream_getc(io);
- *x = (c < 0 ? 0 : c);
- err = IOStream_error(io);
- if(c < 0 && !err) err = -EIO;
- //dprintf("< err=%d x=%d\n", err, *x);
- return err;
-}
-
-int pack_ushort(IOStream *io, unsigned short x){
- int err=0;
- //dprintf("> x=%u\n", x);
- err = IOStream_print(io, "%c%c",
- 0xff & (x >> 8),
- 0xff & (x ));
- if(err > 0) err = 0;
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_ushort(IOStream *io, unsigned short *x){
- int err = 0;
- int i, c = 0;
- //dprintf(">\n");
- *x = 0;
- for(i = 0; i< 2; i++){
- c = IOStream_getc(io);
- if(c < 0) break;
- *x <<= 8;
- *x |= (0xff & c);
- }
- err = IOStream_error(io);
-
- if(c < 0 && !err) err = -EIO;
- //dprintf("< err=%d x=%u\n", err, *x);
- return err;
-}
-
-int pack_type(IOStream *io, unsigned short x){
- return pack_ushort(io, x);
-}
-
-int unpack_type(IOStream *io, unsigned short *x){
- return unpack_ushort(io, x);
-}
-
-int pack_uint(IOStream *io, unsigned int x){
- int err=0;
- //dprintf("> x=%u\n", x);
- err = IOStream_print(io, "%c%c%c%c",
- 0xff & (x >> 24),
- 0xff & (x >> 16),
- 0xff & (x >> 8),
- 0xff & (x ));
- if(err > 0) err = 0;
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_uint(IOStream *io, unsigned int *x){
- int err = 0;
- int i, c = 0;
- //dprintf(">\n");
- *x = 0;
- for(i = 0; i< 4; i++){
- c = IOStream_getc(io);
- if(c < 0) break;
- *x <<= 8;
- *x |= (0xff & c);
- }
- err = IOStream_error(io);
- if(c < 0 && !err) err = -EIO;
- //dprintf("< err=%d x=%u\n", err, *x);
- return err;
-}
-
-int pack_string(IOStream *io, Sxpr x){
- int err = 0;
- unsigned short n = 0xffff & string_length(x);
- char *s = string_string(x);
- int i;
- //dprintf("> n=%d s=%s\n", n, s);
- err = pack_ushort(io, n);
- if(err) goto exit;
- for(i = 0; i < n; i++){
- err = IOStream_print(io, "%c", s[i]);
- if(err < 0) break;
- }
- if(err > 0) err = 0;
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_string(IOStream *io, Sxpr *x){
- int err;
- unsigned short n;
- int i, c = 0;
- char *s;
- Sxpr val = ONONE;
-
- //dprintf(">\n");
- err = unpack_ushort(io, &n);
- if(err) goto exit;
- val = halloc(n+1, T_STRING);
- if(NOMEMP(val)){
- err = -ENOMEM;
- goto exit;
- }
- s = string_string(val);
- for(i=0; i<n; i++){
- c = IOStream_getc(io);
- if(c < 0) break;
- s[i] = (char)c;
- }
- s[n] = '\0';
- exit:
- err = IOStream_error(io);
- if(c < 0 && !err) err = -EIO;
- if(err){
- objfree(val);
- val = ONONE;
- }
- *x = val;
- //IOStream_print(iostdout, "n=%d str=", n);
- //objprint(iostdout, *x, 0);
- //IOStream_print(iostdout, "\n");
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int pack_cons(IOStream *io, Sxpr x){
- int err = 0;
- Sxpr l;
- //dprintf(">\n");
- for(l = x; CONSP(l); l = CDR(l)){
- err = pack_bool(io, 1);
- if(err) goto exit;
- err = pack_sxpr(io, CAR(l));
- if(err) goto exit;
- }
- err = pack_bool(io, 0);
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_cons(IOStream *io, Sxpr *x){
- int err = 0;
- int more = 0;
- Sxpr u = ONONE, v = ONONE, val = ONULL;
-
- dprintf(">\n");
- while(1){
- err = unpack_bool(io, &more);
- if(err) goto exit;
- if(!more){
- //IOStream_print(iostdout, "unpack_cons 1 val=");
- ////objprint(iostdout, val, 0);
- IOStream_print(iostdout, "\n");
-
- val = nrev(val);
-
- //IOStream_print(iostdout, "unpack_cons 2 val=");
- //objprint(iostdout, val, 0);
- //IOStream_print(iostdout, "\n");
-
- break;
- }
- err = unpack_sxpr(io, &u);
- if(err) goto exit;
- v = cons_new(u, val);
- if(NOMEMP(v)){
- err = -ENOMEM;
- objfree(u);
- goto exit;
- }
- val = v;
- }
- exit:
- if(err){
- objfree(val);
- val = ONONE;
- }
- *x = val;
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int pack_sxpr(IOStream *io, Sxpr x){
- int err = 0;
- unsigned short type = get_type(x);
- //dprintf(">\n");
- //objprint(iostdout, x, 0);
- //IOStream_print(iostdout, "\n");
-
- err = pack_type(io, type);
- if(err) goto exit;
- switch(type){
- case T_NULL:
- break;
- case T_NONE:
- break;
- case T_BOOL:
- err = pack_bool(io, get_ul(x));
- break;
- case T_CONS:
- err = pack_cons(io, x);
- break;
- case T_ATOM:
- err = pack_string(io, OBJ_ATOM(x)->name);
- break;
- case T_STRING:
- err = pack_string(io, x);
- break;
- case T_UINT:
- err = pack_uint(io, get_ul(x));
- break;
- default:
- err = -EINVAL;
- IOStream_print(iostderr, "%s> invalid type %d\n", __FUNCTION__, type);
- break;
- }
- exit:
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int unpack_sxpr(IOStream *io, Sxpr *x){
- int err = 0;
- unsigned short type;
- unsigned int u;
- Sxpr val = ONONE, y;
-
- //dprintf(">\n");
- err = unpack_type(io, &type);
- if(err) goto exit;
- switch(type){
- case T_NULL:
- val = ONULL;
- break;
- case T_NONE:
- val = ONONE;
- break;
- case T_CONS:
- err = unpack_cons(io, &val);
- break;
- case T_BOOL:
- err = unpack_bool(io, &u);
- if(err) goto exit;
- val = (u ? OTRUE : OFALSE);
- break;
- case T_ATOM:
- err = unpack_string(io, &y);
- if(err) goto exit;
- val = intern(string_string(y));
- objfree(y);
- break;
- case T_STRING:
- err = unpack_string(io, &val);
- break;
- case T_UINT:
- err = unpack_uint(io, &u);
- if(err) goto exit;
- val = OBJI(type, u);
- break;
- default:
- err = -EINVAL;
- IOStream_print(iostderr, "%s> invalid type %d\n", __FUNCTION__, type);
- break;
- }
- exit:
- *x = (err ? ONONE : val);
- //IOStream_print(iostdout, "sxpr=");
- //objprint(iostdout, *x, 0);
- //IOStream_print(iostdout, "\n");
- //dprintf("< err=%d\n", err);
- return err;
-}
diff --git a/tools/xfrd/xdr.h b/tools/xfrd/xdr.h
deleted file mode 100644
index 793cd34a29..0000000000
--- a/tools/xfrd/xdr.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _XUTIL_XDR_H_
-#define _XUTIL_XDR_H_
-#include "iostream.h"
-#include "sxpr.h"
-
-int pack_type(IOStream *io, unsigned short x);
-
-int unpack_type(IOStream *io, unsigned short *x);
-
-int pack_bool(IOStream *io, int x);
-
-int unpack_bool(IOStream *io, int *x);
-
-int pack_uint(IOStream *out, unsigned int x);
-
-int unpack_uint(IOStream *in, unsigned int *x);
-
-int pack_string(IOStream *out, Sxpr x);
-
-int unpack_string(IOStream *in, Sxpr *x);
-
-int pack_cons(IOStream *out, Sxpr x);
-
-int unpack_cons(IOStream *in, Sxpr *x);
-
-int pack_sxpr(IOStream *out, Sxpr x);
-
-int unpack_sxpr(IOStream *in, Sxpr *x);
-
-#endif /* _XUTIL_XDR_H_ */
diff --git a/tools/xfrd/xen_domain.c b/tools/xfrd/xen_domain.c
deleted file mode 100644
index 8a374eba97..0000000000
--- a/tools/xfrd/xen_domain.c
+++ /dev/null
@@ -1,395 +0,0 @@
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#ifdef _XEN_XFR_STUB_
-typedef unsigned long u32;
-#else
-#include "xc.h"
-#include "xc_io.h"
-#endif
-
-#include "xen_domain.h"
-#include "marshal.h"
-#include "xdr.h"
-#include "xfrd.h"
-
-#define MODULE_NAME "XFRD"
-#define DEBUG 1
-#undef DEBUG
-#include "debug.h"
-
-int domain_suspend(void *data, u32 dom){
- int err = 0;
- Conn *xend = data;
-
- dprintf("> dom=%lu data=%p\n", dom, data);
- err = xfr_vm_suspend(xend, dom);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int domain_configure(void *data, u32 dom, char *vmconfig, int vmconfig_n){
- return xen_domain_configure(dom, vmconfig, vmconfig_n);
-}
-
-#ifndef _XEN_XFR_STUB_
-static int xc_handle = 0;
-
-int xcinit(void){
- if(xc_handle <= 0){
- xc_handle = xc_interface_open();
- }
- dprintf("< xc_handle=%d\n", xc_handle);
- return xc_handle;
-}
-
-void xcfini(void){
- if(xc_handle > 0){
- xc_interface_close(xc_handle);
- xc_handle = 0;
- }
-}
-#endif
-
-/** Write domain state.
- *
- * At some point during this the domain is suspended, and then there's no way back.
- * Even if something later goes wrong we can't restart the domain.
- */
-int xen_domain_snd(Conn *xend, IOStream *io,
- uint32_t dom,
- char *vmconfig, int vmconfig_n,
- int live, int resource){
- int err = 0;
-#ifdef _XEN_XFR_STUB_
- char buf[1024];
- int n, k, d, buf_n;
- dprintf("> dom=%d\n", dom);
- err = marshal_uint32(io, dom);
- if(err) goto exit;
- err = marshal_string(io, vmconfig, vmconfig_n);
- if(err) goto exit;
- n = 32 * 1024 * 1024;
- n = 32 * 1024;
- buf_n = sizeof(buf);
- err = marshal_uint32(io, n);
- for(k = 0; k < n; k += d){
- d = n - k;
- if(d > buf_n) d = buf_n;
- err = marshal_bytes(io, buf, d);
- if(err) goto exit;
- dprintf("> k=%d n=%d\n", k, n);
- }
-
- dom = 99;
- err = domain_suspend(xend, dom);
- IOStream_close(io);
- exit:
-#else
- XcIOContext _ioctxt = {}, *ioctxt = &_ioctxt;
- ioctxt->domain = dom;
- ioctxt->io = io;
- ioctxt->info = iostdout;
- ioctxt->err = iostderr;
- ioctxt->data = xend;
- ioctxt->suspend = domain_suspend;
- ioctxt->vmconfig = vmconfig;
- ioctxt->vmconfig_n = vmconfig_n;
- if(live){
- ioctxt->flags |= XCFLAGS_LIVE;
- }
- ioctxt->resource = resource;
- err = xc_linux_save(xcinit(), ioctxt);
-#endif
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Receive domain state.
- * Create a new domain and store the received state into it.
- */
-int xen_domain_rcv(IOStream *io,
- uint32_t *dom,
- char **vmconfig, int *vmconfig_n,
- int *configured){
- int err = 0;
-#ifdef _XEN_XFR_STUB_
- char buf[1024];
- int n, k, d, buf_n;
- dprintf(">\n");
- err = unmarshal_uint32(io, dom);
- if(err) goto exit;
- err = unmarshal_new_string(io, vmconfig, vmconfig_n);
- if(err) goto exit;
- err = unmarshal_uint32(io, &n);
- buf_n = sizeof(buf);
- for(k = 0; k < n; k += d){
- d = n - k;
- if(d > buf_n) d = buf_n;
- err = unmarshal_bytes(io, buf, d);
- if(err) goto exit;
- dprintf("> k=%d n=%d\n", k, n);
- }
- exit:
-#else
- XcIOContext _ioctxt = {}, *ioctxt = &_ioctxt;
- dprintf(">\n");
- ioctxt->io = io;
- ioctxt->info = iostdout;
- ioctxt->err = iostderr;
- ioctxt->configure = domain_configure;
- if ( !*configured )
- ioctxt->flags |= XCFLAGS_CONFIGURE;
-
- err = xc_linux_restore(xcinit(), ioctxt);
- *dom = ioctxt->domain;
- *vmconfig = ioctxt->vmconfig;
- *vmconfig_n = ioctxt->vmconfig_n;
- *configured = (ioctxt->flags & XCFLAGS_CONFIGURE);
-#endif
- dprintf("< err=%d\n", err);
- return err;
-}
-
-#include <curl/curl.h>
-#include "http.h"
-
-/** Flag indicating whether we need to initialize libcurl.
- */
-static int do_curl_global_init = 1;
-
-/** Get a curl handle, initializing libcurl if needed.
- *
- * @return curl handle
- */
-static CURL *curlinit(void){
- if(do_curl_global_init){
- do_curl_global_init = 0;
- // Stop libcurl using the proxy. There's a curl option to
- // set the proxy - but no option to defeat it.
- unsetenv("http_proxy");
- curl_global_init(CURL_GLOBAL_ALL);
- }
- return curl_easy_init();
-}
-
-/** Curl debug function.
- */
-int curldebug(CURL *curl, curl_infotype ty, char *buf, int buf_n, void *data){
- // printf("%*s\n", buf_n, buf); /* Does not compile correctly on non 32bit platforms */
- fwrite(data, buf_n, 1, stdout);
- printf("\n");
- return 0;
-}
-
-/** Setup a curl handle with a url.
- * Creates the url by formatting 'fmt' and the remaining arguments.
- *
- * @param pcurl return parameter for the curl handle
- * @param url url buffer
- * @param url_n size of url
- * @param fmt url format string, followed by parameters
- * @return 0 on success, error code otherwise
- */
-static int curlsetup(CURL **pcurl, struct curl_slist **pheaders, char *url, int url_n, char *fmt, ...){
- int err = 0;
- va_list args;
- CURL *curl = NULL;
- struct curl_slist *headers = NULL;
- int n = 0;
-
- curl = curlinit();
- if(!curl){
- eprintf("> Could not init libcurl\n");
- err = -ENOMEM;
- goto exit;
- }
- url_n -= 1;
- va_start(args, fmt);
- n = vsnprintf(url, url_n, fmt, args);
- va_end(args);
- if(n <= 0 || n >= url_n){
- err = -ENOMEM;
- eprintf("> Out of memory in url\n");
- goto exit;
- }
- dprintf("> url=%s\n", url);
-#if DEBUG
- // Verbose.
- curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
- // Call the debug function on data received.
- curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, curldebug);
-#else
- // No progress meter.
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
- // Completely quiet.
- curl_easy_setopt(curl, CURLOPT_MUTE, 1);
-#endif
- // Set the URL.
- curl_easy_setopt(curl, CURLOPT_URL, url);
-
- headers = curl_slist_append(headers, "Expect:");
- curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-
- exit:
- if(err && curl){
- curl_easy_cleanup(curl);
- curl = NULL;
- }
- *pcurl = curl;
- if (pheaders)
- *pheaders = headers;
- return err;
-}
-
-static void curlcleanup(CURL **pcurl, struct curl_slist **pheaders){
- if (*pcurl)
- curl_easy_cleanup(*pcurl);
- if (*pheaders)
- curl_slist_free_all(*pheaders);
- *pcurl = NULL;
- *pheaders = NULL;
-}
-/** Make the http request stored in the curl handle and get
- * the result code from the curl code and the http return code.
- *
- * @param curl curl handle
- * @return 0 for success, error code otherwise
- */
-int curlresult(CURL *curl){
- int err = 0;
- CURLcode curlcode = 0;
- long httpcode = 0;
-
- curlcode = curl_easy_perform(curl);
- if(curlcode){
- eprintf("> curlcode=%d\n", curlcode);
- err = -EINVAL;
- goto exit;
- }
- curl_easy_getinfo(curl, CURLINFO_HTTP_CODE, &httpcode);
- if(httpcode != HTTP_OK){
- eprintf("> httpcode=%d\n", (int)httpcode);
- err = -EINVAL;
- goto exit;
- }
- exit:
- return err;
-}
-
-/** Get xend to list domains.
- * We use this to force xend to refresh its domain list.
- *
- * @return 0 on success, error code otherwise
- */
-int xen_domain_ls(void){
- int err = 0;
- CURL *curl = NULL;
- struct curl_slist *headers = NULL;
- char url[128] = {};
- int url_n = sizeof(url);
-
- dprintf(">\n");
- err = curlsetup(&curl, &headers, url, url_n, "http://localhost:%d/xend/domain", XEND_PORT);
- if(err) goto exit;
- err = curlresult(curl);
- exit:
- curlcleanup(&curl, &headers);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Get xend to configure a new domain.
- *
- * @param dom domain id
- * @param vmconfig configuration string
- * @param vmconfig_n length of vmconfig
- * @return 0 on success, error code otherwise
- */
-int xen_domain_configure(uint32_t dom, char *vmconfig, int vmconfig_n){
- int err = 0;
- CURL *curl = NULL;
- struct curl_slist *headers = NULL;
- char url[128] = {};
- int url_n = sizeof(url);
- struct curl_httppost *form = NULL, *last = NULL;
- CURLFORMcode formcode = 0;
-
- dprintf("> dom=%u\n", dom);
- // List domains so that xend will update its domain list and notice the new domain.
- xen_domain_ls();
-
- err = curlsetup(&curl, &headers, url, url_n, "http://localhost:%d/xend/domain/%u", XEND_PORT, dom);
- if(err) goto exit;
-
- // Config field - set from vmconfig.
- formcode = curl_formadd(&form, &last,
- CURLFORM_COPYNAME, "config",
- CURLFORM_BUFFER, "config",
- CURLFORM_BUFFERPTR, vmconfig,
- CURLFORM_BUFFERLENGTH, vmconfig_n,
- CURLFORM_CONTENTTYPE, "application/octet-stream",
- CURLFORM_END);
- if(formcode){
- eprintf("> Error adding config field.\n");
- goto exit;
- }
- // Op field.
- formcode = curl_formadd(&form, &last,
- CURLFORM_COPYNAME, "op",
- CURLFORM_COPYCONTENTS, "configure",
- CURLFORM_END);
- if(formcode){
- eprintf("> Error adding op field.\n");
- err = -EINVAL;
- goto exit;
- }
- // POST the form.
- curl_easy_setopt(curl, CURLOPT_HTTPPOST, form);
- err = curlresult(curl);
- exit:
- curlcleanup(&curl, &headers);
- if(form) curl_formfree(form);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Get xend to unpause a domain.
- *
- * @param dom domain id
- * @return 0 on success, error code otherwise
- */
-int xen_domain_unpause(uint32_t dom){
- int err = 0;
- CURL *curl = NULL;
- struct curl_slist *headers = NULL;
- char url[128] = {};
- int url_n = sizeof(url);
- struct curl_httppost *form = NULL, *last = NULL;
- CURLFORMcode formcode = 0;
-
- dprintf("> dom=%u\n", dom);
-
- err = curlsetup(&curl, &headers, url, url_n, "http://localhost:%d/xend/domain/%u", XEND_PORT, dom);
- if(err) goto exit;
-
- // Op field.
- formcode = curl_formadd(&form, &last,
- CURLFORM_COPYNAME, "op",
- CURLFORM_COPYCONTENTS, "unpause",
- CURLFORM_END);
- if(formcode){
- eprintf("> Error adding op field.\n");
- err = -EINVAL;
- goto exit;
- }
- // POST the form.
- curl_easy_setopt(curl, CURLOPT_HTTPPOST, form);
- err = curlresult(curl);
- exit:
- curlcleanup(&curl, &headers);
- if(form) curl_formfree(form);
- dprintf("< err=%d\n", err);
- return err;
-}
diff --git a/tools/xfrd/xen_domain.h b/tools/xfrd/xen_domain.h
deleted file mode 100644
index c84e8b8d63..0000000000
--- a/tools/xfrd/xen_domain.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _XFRD_XEN_DOMAIN_H_
-#define _XFRD_XEN_DOMAIN_H_
-#include <sys/types.h>
-#include <iostream.h>
-#include "connection.h"
-
-/** Define to use stubs. Undefine to use Xen ops. */
-//#define _XEN_XFR_STUB_
-
-extern int xen_domain_snd(Conn *xend, IOStream *io,
- uint32_t dom,
- char *vmconfig, int vmconfig_n,
- int live, int resource);
-extern int xen_domain_rcv(IOStream *io,
- uint32_t *dom,
- char **vmconfig, int *vmconfig_n,
- int *configured);
-
-
-extern int xen_domain_configure(uint32_t dom, char *vmconfig, int vmconfig_n);
-extern int xen_domain_unpause(uint32_t dom);
-#endif
diff --git a/tools/xfrd/xfrd.c b/tools/xfrd/xfrd.c
deleted file mode 100644
index bd25ad8cd2..0000000000
--- a/tools/xfrd/xfrd.c
+++ /dev/null
@@ -1,1272 +0,0 @@
-/** @file
- * XFRD - Domain Transfer Daemon for Xen.
- *
- * The xfrd is forked by xend to transfer a vm to a remote system.
- *
- * The vm is suspended, then its state and memory are transferred to the remote system.
- * The remote system attempts to create a vm and copy the transferred state and memory into it,
- * finally resuming the vm. If all is OK the vm ends up running on the remote
- * system and is removed from the originating system. If the transfer does not complete
- * successfully the originating system attempts to resume the vm.
- * The children exit when the transfer completes.
- *
- * @author Mike Wray <mike.wray@hpl.hp.com>
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <time.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <string.h>
-
-#include <signal.h>
-#include <sys/wait.h>
-#include <sys/select.h>
-
-#include "allocate.h"
-#include "file_stream.h"
-#include "string_stream.h"
-#include "lzi_stream.h"
-#include "gzip_stream.h"
-#include "sys_net.h"
-#include "sys_string.h"
-
-//#include "xdr.h"
-#include "enum.h"
-#include "xfrd.h"
-
-#include "xen_domain.h"
-
-#include "connection.h"
-#include "select.h"
-
-#define MODULE_NAME "XFRD"
-
-#include "debug.h"
-
-/*
-sender:
- xend connects to xfrd and writes migrate message
- xend writes domain config to xfrd
-
- xfrd forks
-
- xfrd connects to peer
- xfrd sends hello, reads response
- xfrd sends domain
- xfrd reads response
- reports progress/status to xend
-
- xend reads xfrd for progress/status, disconnects
- If ok, destroys domain.
- If not ok, unpauses domain.
-
-receiver:
- xfrd accepts connection on inbound port
- xfrd forks and accepts connection
- xfrd receives hello, writes response
- xfrd receives domain
- xfrd connects to xend, configures new domain
- xfrd writes status back to peer, child exits
-
-
- (xfr.hello <major> <minor>)
- (xfr.err <code> <reason>)
-
- xend->xfrd (xfr.migrate <domain> <vmconfig> <host> <port> <live>)
- (xfr.save <domain> <vmconfig> <file>)
- xfrd->xend (xfr.suspend <domain>)
- xfrd->xend (xfr.progress <percent> <rate: kb/s>)
- xfrd->xend (xfr.err <code> <reason>) | (xfr.ok <domain>)
- xfrd->xfrd (xfr.xfr <domain>)
- xfrd->xfrd (xfr.err <code>) | (xfr.ok <domain>)
-
- xfrd->xend (xfr.configure <domain> <vmconfig>)
- */
-
-Sxpr oxfr_configure; // (xfr.configure <vmid> <vmconfig>)
-Sxpr oxfr_err; // (xfr.err <code>)
-Sxpr oxfr_hello; // (xfr.hello <major> <minor>)
-Sxpr oxfr_migrate; // (xfr.migrate <vmid> <vmconfig> <host> <port> <live> <resource>)
-Sxpr oxfr_migrate_ok;// (xfr.migrate.ok <value>)
-Sxpr oxfr_progress; // (xfr.progress <percent> <rate: kb/s>)
-Sxpr oxfr_restore; // (xfr.restore <file>)
-Sxpr oxfr_restore_ok;// (xfr.restore.ok <vmid>)
-Sxpr oxfr_save; // (xfr.save <vmid> <vmconfig> <file>)
-Sxpr oxfr_save_ok; // (xfr.save.ok)
-Sxpr oxfr_vm_destroy;// (xfr.vm.destroy <vmid>)
-Sxpr oxfr_vm_suspend;// (xfr.vm.suspend <vmid>)
-Sxpr oxfr_xfr; // (xfr.xfr <vmid>)
-Sxpr oxfr_xfr_ok; // (xfr.xfr.ok <vmid>)
-
-void xfr_init(void){
- oxfr_configure = intern("xfr.configure");
- oxfr_err = intern("xfr.err");
- oxfr_hello = intern("xfr.hello");
- oxfr_migrate = intern("xfr.migrate");
- oxfr_migrate_ok = intern("xfr.migrate.ok");
- oxfr_progress = intern("xfr.progress");
- oxfr_restore = intern("xfr.restore");
- oxfr_restore_ok = intern("xfr.restore.ok");
- oxfr_save = intern("xfr.save");
- oxfr_save_ok = intern("xfr.save.ok");
- oxfr_vm_destroy = intern("xfr.vm.destroy");
- oxfr_vm_suspend = intern("xfr.vm.suspend");
- oxfr_xfr = intern("xfr.xfr");
- oxfr_xfr_ok = intern("xfr.xfr.ok");
-}
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#define PROGRAM "xfrd"
-
-#define OPT_PORT 'P'
-#define KEY_PORT "port"
-#define DOC_PORT "<port>\n\txfr port (as a number or service name)"
-
-#define OPT_COMPRESS 'Z'
-#define KEY_COMPRESS "compress"
-#define DOC_COMPRESS "\n\tuse compression for migration"
-
-#define OPT_HELP 'h'
-#define KEY_HELP "help"
-#define DOC_HELP "\n\tprint help"
-
-#define OPT_VERSION 'v'
-#define KEY_VERSION "version"
-#define DOC_VERSION "\n\tprint version"
-
-#define OPT_VERBOSE 'V'
-#define KEY_VERBOSE "verbose"
-#define DOC_VERBOSE "\n\tverbose flag"
-
-/** Print a usage message.
- * Prints to stdout if err is zero, and exits with 0.
- * Prints to stderr if err is non-zero, and exits with 1.
- */
-void usage(int err){
- FILE *out = (err ? stderr : stdout);
-
- fprintf(out, "Usage: %s [options]\n", PROGRAM);
- fprintf(out, "-%c, --%s %s\n", OPT_PORT, KEY_PORT, DOC_PORT);
- fprintf(out, "-%c, --%s %s\n", OPT_COMPRESS, KEY_COMPRESS, DOC_COMPRESS);
- fprintf(out, "-%c, --%s %s\n", OPT_VERBOSE, KEY_VERBOSE, DOC_VERBOSE);
- fprintf(out, "-%c, --%s %s\n", OPT_VERSION, KEY_VERSION, DOC_VERSION);
- fprintf(out, "-%c, --%s %s\n", OPT_HELP, KEY_HELP, DOC_HELP);
- exit(err ? 1 : 0);
-}
-
-typedef struct Args {
- int bufsize;
- unsigned long port;
- int verbose;
- int compress;
-} Args;
-
-/** Transfer states. */
-enum {
- XFR_INIT,
- XFR_HELLO,
- XFR_STATE,
- XFR_RUN,
- XFR_FAIL,
- XFR_DONE,
- XFR_MAX
-};
-
-#ifndef SXPR_PARSER_MAIN
-/** Short options. Options followed by ':' take an argument. */
-static char *short_opts = (char[]){
- OPT_PORT, ':',
- OPT_COMPRESS,
- OPT_HELP,
- OPT_VERSION,
- OPT_VERBOSE,
- 0 };
-
-/** Long options. */
-static struct option const long_opts[] = {
- { KEY_PORT, required_argument, NULL, OPT_PORT },
- { KEY_COMPRESS, no_argument, NULL, OPT_COMPRESS },
- { KEY_HELP, no_argument, NULL, OPT_HELP },
- { KEY_VERSION, no_argument, NULL, OPT_VERSION },
- { KEY_VERBOSE, no_argument, NULL, OPT_VERBOSE },
- { NULL, 0, NULL, 0 }
-};
-
-/** Xfrd arguments. */
-static Args _args = {};
-
-/** Xfrd arguments. */
-static Args *args = &_args;
-#endif
-
-/** Initialize an array element for a constant to its string name. */
-#define VALDEF(val) { val, #val }
-
-/** Names for the transfer states. */
-static EnumDef xfr_states[] = {
- VALDEF(XFR_INIT),
- VALDEF(XFR_HELLO),
- VALDEF(XFR_STATE),
- VALDEF(XFR_RUN),
- VALDEF(XFR_FAIL),
- VALDEF(XFR_DONE),
- { 0, NULL }
-};
-
-
-/** State machine for transfer. */
-typedef struct XfrState {
- /** Current state. */
- int state;
- /** Error codes for the states. */
- int state_err[XFR_MAX];
- /** First error. */
- int err;
- /** State when first error happened. */
- int err_state;
-
- uint32_t vmid;
- char* vmconfig;
- int vmconfig_n;
- unsigned long xfr_port;
- char *xfr_host;
- uint32_t vmid_new;
- int live;
- int resource;
-} XfrState;
-
-/** Get the name of a transfer state.
- *
- * @param s state
- * @return name
- */
-char * xfr_state_name(int s){
- return enum_val_to_name(s, xfr_states);
-}
-
-/** Set the state of a transfer.
- *
- * @param s transfer
- * @param state state
- * @return state
- */
-int XfrState_set_state(XfrState *s, int state){
- s->state = state;
- return s->state;
-}
-
-/** Get the state of a transfer.
- *
- * @param s transfer
- * @return state
- */
-int XfrState_get_state(XfrState *s){
- return s->state;
-}
-
-/** Set an error in the current state.
- * Does nothing if an error is already set.
- *
- * @param s transfer
- * @param err error
- * @return error
- */
-int XfrState_set_err(XfrState *s, int err){
- if(!s->state_err[s->state]){
- s->state_err[s->state] = err;
- }
- if(!s->err){
- s->err = err;
- s->err_state = s->state;
- }
- return err;
-}
-
-/** Get the error in the current state.
- *
- * @param s transfer
- * @return error
- */
-int XfrState_get_err(XfrState *s){
- return s->state_err[s->state];
-}
-
-/** Get the first error of a transfer.
- *
- * @param s transfer
- * @return error
- */
-int XfrState_first_err(XfrState *s){
- return s->err;
-}
-
-/** Get the state a transfer was in when it had its first error.
- *
- * @param s transfer
- * @return error state
- */
-int XfrState_first_err_state(XfrState *s){
- return s->err_state;
-}
-
-/** Set xfrd default arguments.
- *
- * @param args arguments to set
- */
-void set_defaults(Args *args){
- args->compress = FALSE;
- args->bufsize = 128 * 1024;
- args->port = htons(XFRD_PORT);
-}
-
-int stringof(Sxpr exp, char **s){
- int err = 0;
- //dprintf(">\n"); objprint(iostdout, exp, PRINT_TYPE); IOStream_print(iostdout, "\n");
- if(ATOMP(exp)){
- *s = atom_name(exp);
- } else if(STRINGP(exp)){
- *s = string_string(exp);
- } else {
- err = -EINVAL;
- *s = NULL;
- }
- //dprintf("< err=%d s=%s\n", err, *s);
- return err;
-}
-
-int intof(Sxpr exp, int *v){
- int err = 0;
- char *s;
- unsigned long l;
- //dprintf(">\n"); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
- if(INTP(exp)){
- *v = OBJ_INT(exp);
- } else {
- err = stringof(exp, &s);
- if(err) goto exit;
- err = convert_atoul(s, &l);
- *v = (int)l;
- }
- exit:
- //dprintf("< err=%d v=%d\n", err, *v);
- return err;
-}
-
-int addrof(Sxpr exp, uint32_t *v){
- char *h;
- unsigned long a;
- int err = 0;
- //dprintf(">\n"); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
- err = stringof(exp, &h);
- if(err) goto exit;
- if(get_host_address(h, &a)){
- err = -EINVAL;
- goto exit;
- }
- *v = a;
- exit:
- //dprintf("< err=%d v=%x\n", err, *v);
- return err;
-}
-
-int portof(Sxpr exp, uint16_t *v){
- char *s;
- int err = 0;
- //dprintf(">\n"); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
- if(INTP(exp)){
- *v = get_ul(exp);
- *v = htons(*v);
- } else {
- unsigned long p;
- err = stringof(exp, &s);
- if(err) goto exit;
- err = convert_service_to_port(s, &p);
- if(err){
- err = -EINVAL;
- goto exit;
- }
- *v = p;
- }
- exit:
- //dprintf("< err=%d v=%u\n", err, *v);
- return err;
-}
-
-static inline struct in_addr inaddr(uint32_t addr){
- return (struct in_addr){ .s_addr = addr };
-}
-
-time_t stats(time_t t0, uint64_t offset, uint64_t memory, float *percent, float *rate){
- time_t t1 = time(NULL);
- *percent = (offset * 100.0f) / memory;
- t1 = time(NULL) - t0;
- *rate = (t1 ? offset/(t1 * 1024.0f) : 0.0f);
- return t1;
-}
-
-/** Notify success or error.
- *
- * @param conn connection
- * @param errcode error code
- * @return 0 on success, error code otherwise
- */
-int xfr_error(Conn *conn, int errcode){
- int err = 0;
-
- if(!conn->out) return -ENOTCONN;
- if(errcode <0) errcode = -errcode;
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_err), errcode);
- return (err < 0 ? err : 0);
-}
-
-/** Read a response message - error or ok.
- *
- * @param conn connection
- * @return 0 on success, error code otherwise
- */
-int xfr_response(Conn *conn){
- int err;
- Sxpr sxpr;
-
- dprintf(">\n");
- if(!conn->out) return -ENOTCONN;
- err = Conn_sxpr(conn, &sxpr);
- if(err) goto exit;
- if(sxpr_elementp(sxpr, oxfr_err)){
- int errcode;
- err = intof(sxpr_childN(sxpr, 0, ONONE), &errcode);
- if(err) goto exit;
- err = errcode;
- }
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Get the initial hello message and check the protocol version.
- * It is an error to receive anything other than a hello message
- * with the correct protocol version.
- *
- * @param conn connection
- * @return 0 on success, error code otherwise
- */
-int xfr_hello(Conn *conn){
- int err;
- uint32_t major = XFR_PROTO_MAJOR, minor = XFR_PROTO_MINOR;
- uint32_t hello_major, hello_minor;
- Sxpr sxpr;
- if(!conn->in) return -ENOTCONN;
- dprintf(">\n");
- err = Conn_sxpr(conn, &sxpr);
- if(err) goto exit;
- if(!sxpr_elementp(sxpr, oxfr_hello)){
- wprintf("> sxpr_elementp test failed\n");
- err = -EINVAL;
- goto exit;
- }
- err = intof(sxpr_childN(sxpr, 0, ONONE), &hello_major);
- if(err) goto exit;
- err = intof(sxpr_childN(sxpr, 1, ONONE), &hello_minor);
- if(err) goto exit;
- if(hello_major != major || hello_minor != minor){
- eprintf("> Wanted protocol version %d.%d, got %d.%d",
- major, minor, hello_major, hello_minor);
- err = -EINVAL;
- goto exit;
- }
- exit:
- xfr_error(conn, err);
- if(err){
- eprintf("> Hello failed: %d\n", err);
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Send the initial hello message.
- *
- * @param conn connection
- * @param msg message
- * @return 0 on success, error code otherwise
- */
-int xfr_send_hello(Conn *conn){
- int err = 0;
- dprintf(">\n");
-
- err = IOStream_print(conn->out, "(%s %d %d)",
- atom_name(oxfr_hello),
- XFR_PROTO_MAJOR,
- XFR_PROTO_MINOR);
- if(err < 0) goto exit;
- IOStream_flush(conn->out);
- err = xfr_response(conn);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int xfr_send_xfr(Conn *conn, uint32_t vmid){
- int err;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_xfr), vmid);
- return (err < 0 ? err : 0);
-}
-
-int xfr_send_xfr_ok(Conn *conn, uint32_t vmid){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_xfr_ok), vmid);
- return (err < 0 ? err : 0);
-}
-
-int xfr_send_migrate_ok(Conn *conn, uint32_t vmid){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_migrate_ok), vmid);
- return (err < 0 ? err : 0);
-}
-
-int xfr_send_restore_ok(Conn *conn, uint32_t vmid){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_restore_ok), vmid);
- return (err < 0 ? err : 0);
-}
-
-int xfr_send_save_ok(Conn *conn){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s)",
- atom_name(oxfr_save_ok));
- return (err < 0 ? err : 0);
-}
-
-int xfr_send_suspend(Conn *conn, uint32_t vmid){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_vm_suspend), vmid);
- return (err < 0 ? err : 0);
-}
-
-/** Suspend a vm on behalf of save/migrate.
- */
-int xfr_vm_suspend(Conn *xend, uint32_t vmid){
- int err = 0;
- dprintf("> vmid=%u\n", vmid);
- err = xfr_send_suspend(xend, vmid);
- if(err) goto exit;
- IOStream_flush(xend->out);
- err = xfr_response(xend);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int xfr_send_destroy(Conn *conn, uint32_t vmid){
- int err = 0;
-
- err = IOStream_print(conn->out, "(%s %d)",
- atom_name(oxfr_vm_destroy), vmid);
- return (err < 0 ? err : 0);
-}
-
-/** Destroy a vm on behalf of save/migrate.
- */
-int xfr_vm_destroy(Conn *xend, uint32_t vmid){
- int err = 0;
- dprintf("> vmid=%u\n", vmid);
- err = xfr_send_destroy(xend, vmid);
- if(err) goto exit;
- IOStream_flush(xend->out);
- err = xfr_response(xend);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Get vm state. Send transfer message.
- *
- * @param peer connection
- * @param msg message
- * @return 0 on success, error code otherwise
- */
-int xfr_send_state(XfrState *state, Conn *xend, Conn *peer){
- int err = 0;
- Sxpr sxpr;
-
- dprintf(">\n");
- XfrState_set_state(state, XFR_STATE);
- // Send xfr message and the domain state.
- err = xfr_send_xfr(peer, state->vmid);
- if(err) goto exit;
- dprintf(">*** Sending domain %u\n", state->vmid);
- err = xen_domain_snd(xend, peer->out,
- state->vmid,
- state->vmconfig, state->vmconfig_n,
- state->live, state->resource);
- dprintf(">*** Sent domain %u\n", state->vmid);
- if(err) goto exit;
- // Sending the domain suspends it, and there's no way back.
- // So destroy it now. If anything goes wrong now it's too late.
- dprintf(">*** Destroying domain %u\n", state->vmid);
- err = xfr_vm_destroy(xend, state->vmid);
- if(err) goto exit;
- err = xfr_error(peer, err);
- if(err) goto exit;
- IOStream_flush(peer->out);
- // Read the response from the peer.
- err = Conn_sxpr(peer, &sxpr);
- if(err) goto exit;
- if(sxpr_elementp(sxpr, oxfr_err)){
- // Error.
- int errcode;
- err = intof(sxpr_childN(sxpr, 0, ONONE), &errcode);
- if(!err) err = errcode;
- } else if(sxpr_elementp(sxpr, oxfr_xfr_ok)){
- // Ok - get the new domain id.
- err = intof(sxpr_childN(sxpr, 0, ONONE), &state->vmid_new);
- xfr_error(peer, err);
- } else {
- // Anything else is invalid. But it may be too late.
- err = -EINVAL;
- xfr_error(peer, err);
- }
- exit:
- XfrState_set_err(state, err);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Finish the transfer.
- */
-int xfr_send_done(XfrState *state, Conn *xend){
- int err = 0;
- int first_err = 0;
-
- first_err = XfrState_first_err(state);
- if(first_err){
- XfrState_set_state(state, XFR_FAIL);
- } else {
- XfrState_set_state(state, XFR_DONE);
- }
- if(first_err){
- err = xfr_error(xend, first_err);
- } else {
- // Report new domain id to xend.
- err = xfr_send_migrate_ok(xend, state->vmid_new);
- }
-
- XfrState_set_err(state, err);
- if(XfrState_first_err(state)){
- int s, serr;
-
- wprintf("> Transfer errors:\n");
- for(s = 0; s < XFR_MAX; s++){
- serr = state->state_err[s];
- if(!serr) continue;
- wprintf("> state=%-12s err=%d\n", xfr_state_name(s), serr);
- }
- } else {
- wprintf("> Transfer OK\n");
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Migrate a vm to another node.
- *
- * @param xend connection
- * @return 0 on success, error code otherwise
- */
-int xfr_send(Args *args, XfrState *state, Conn *xend, uint32_t addr, uint32_t port){
- int err = 0;
- Conn _peer = {}, *peer = &_peer;
- int flags = 0;
- struct in_addr xfr_addr;
- uint16_t xfr_port;
- time_t t0 = time(NULL), t1;
-
- dprintf(">\n");
- flags |= CONN_NOBUFFER;
- if(args->compress){
- flags |= CONN_WRITE_COMPRESS;
- }
- xfr_addr.s_addr = addr;
- xfr_port = port;
- if(!xfr_port) xfr_port = htons(XFRD_PORT);
- dprintf("> Xfr vmid=%u\n", state->vmid);
- dprintf("> Xfr xfr_addr=%s:%d\n", inet_ntoa(xfr_addr), ntohs(xfr_port));
- err = Conn_connect(peer, flags, xfr_addr, xfr_port);
- if(err) goto exit;
- XfrState_set_state(state, XFR_HELLO);
- // Send hello message.
- err = xfr_send_hello(peer);
- if(err) goto exit;
- printf("\n");
- // Send vm state.
- err = xfr_send_state(state, xend, peer);
- if(err) goto exit;
- if(args->compress){
- IOStream *zio = peer->out;
- int plain_bytes = lzi_stream_plain_bytes(zio);
- int comp_bytes = lzi_stream_comp_bytes(zio);
- float ratio = lzi_stream_ratio(zio);
- iprintf("> Compression: plain %d bytes, compressed %d bytes, ratio %3.2f\n",
- plain_bytes, comp_bytes, ratio);
- }
- exit:
- dprintf("> err=%d\n", err);
- if(err && !XfrState_get_err(state)){
- XfrState_set_err(state, err);
- }
- Conn_close(peer);
- if(!err){
- t1 = time(NULL) - t0;
- iprintf("> Transfer complete in %lu seconds\n", t1);
- }
- dprintf("> done err=%d, notifying xend...\n", err);
- xfr_send_done(state, xend);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Save a vm to file.
- */
-int xfr_save(Args *args, XfrState *state, Conn *xend, char *file){
- int err = 0;
- int compress = 0;
- IOStream *io = NULL;
-
- dprintf("> file=%s\n", file);
- if(compress){
- io = gzip_stream_fopen(file, "wb1");
- } else {
- io = file_stream_fopen(file, "wb");
- }
- if(!io){
- eprintf("> Failed to open %s\n", file);
- err = -EINVAL;
- goto exit;
- }
- err = xen_domain_snd(xend, io,
- state->vmid,
- state->vmconfig, state->vmconfig_n,
- 0, 0);
- if(err){
- err = xfr_error(xend, err);
- } else {
- err = xfr_send_save_ok(xend);
- }
- exit:
- if(io){
- IOStream_close(io);
- IOStream_free(io);
- }
- if(err){
- unlink(file);
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Restore a vm from file.
- *
- * @return 0 on success, error code otherwise
- */
-int xfr_restore(Args *args, XfrState *state, Conn *xend, char *file){
- int err = 0;
- IOStream *io = NULL;
- int configured=0;
-
- dprintf("> file=%s\n", file);
- io = gzip_stream_fopen(file, "rb");
- if(!io){
- eprintf("> Failed to open %s\n", file);
- err = -EINVAL;
- goto exit;
- }
- err = xen_domain_rcv(io,
- &state->vmid_new,
- &state->vmconfig, &state->vmconfig_n,
- &configured);
- if(err) goto exit;
- if(!configured){
- err = xen_domain_configure(state->vmid_new, state->vmconfig, state->vmconfig_n);
- if(err) goto exit;
- }
- err = xen_domain_unpause(state->vmid_new);
- exit:
- if(io){
- IOStream_close(io);
- IOStream_free(io);
- }
- if(err){
- xfr_error(xend, err);
- } else {
- xfr_send_restore_ok(xend, state->vmid_new);
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Accept the transfer of a vm from another node.
- *
- * @param peer connection
- * @param msg message
- * @return 0 on success, error code otherwise
- */
-int xfr_recv(Args *args, XfrState *state, Conn *peer){
- int err = 0;
- time_t t0 = time(NULL), t1;
- Sxpr sxpr;
- int configured=0;
-
- dprintf("> peer=%s\n", inet_ntoa(peer->addr.sin_addr));
- // If receiving from localhost set configured so that that xen_domain_rcv()
- // does not attempt to configure the new domain. This is because the old
- // domain still exists and will make it fail.
- if(peer->addr.sin_addr.s_addr == htonl(INADDR_LOOPBACK)){
- dprintf("> Peer is localhost\n");
- configured = 1;
- }
- err = xen_domain_rcv(peer->in,
- &state->vmid_new,
- &state->vmconfig, &state->vmconfig_n,
- &configured);
- if(err) goto exit;
- // Read from the peer. This is just so we wait before configuring.
- // When migrating to the same host the peer must destroy the domain
- // before we configure the new one.
- err = Conn_sxpr(peer, &sxpr);
- if(err) goto exit;
- if(!configured){
- dprintf("> Configuring...\n");
- err = xen_domain_configure(state->vmid_new, state->vmconfig, state->vmconfig_n);
- if(err) goto exit;
- err = xen_domain_unpause(state->vmid_new);
- if(err) goto exit;
- }
- // Report new domain id to peer.
- err = xfr_send_xfr_ok(peer, state->vmid_new);
- if(err) goto exit;
- // Get the final ok.
- err = xfr_response(peer);
- exit:
- if(!err){
- t1 = time(NULL) - t0;
- iprintf("> Transfer complete in %lu seconds\n", t1);
- }
- if(err){
- xfr_error(peer, err);
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Listen for a hello followed by a service request.
- * The request can be from the local xend or from xfrd on another node.
- *
- * @param peersock socket
- * @param peer_in peer address
- * @return 0 on success, error code otherwise
- */
-int xfrd_service(Args *args, int peersock, struct sockaddr_in peer_in){
- int err = 0;
- Sxpr sxpr;
- Conn _conn = {}, *conn = &_conn;
- int flags = CONN_NOBUFFER;
-
- dprintf(">\n");
- err = Conn_init(conn, flags, peersock, peer_in);
- if(err) goto exit;
- //dprintf(">xfr_hello... \n");
- err = xfr_hello(conn);
- if(err) goto exit;
- //dprintf("> sxpr...\n");
- err = Conn_sxpr(conn, &sxpr);
- if(err) goto exit;
- //dprintf("> sxpr=\n");
- //objprint(iostdout, sxpr, PRINT_TYPE); IOStream_print(iostdout, "\n");
- if(sxpr_elementp(sxpr, oxfr_migrate)){
- // Migrate message from xend.
- uint32_t addr;
- uint16_t port;
- XfrState _state = {}, *state = &_state;
- int n = 0;
-
- dprintf("> xfr.migrate\n");
- err = intof(sxpr_childN(sxpr, n++, ONONE), &state->vmid);
- if(err) goto exit;
- err = stringof(sxpr_childN(sxpr, n++, ONONE), &state->vmconfig);
- if(err) goto exit;
- state->vmconfig_n = strlen(state->vmconfig);
- err = addrof(sxpr_childN(sxpr, n++, ONONE), &addr);
- if(err) goto exit;
- err = portof(sxpr_childN(sxpr, n++, ONONE), &port);
- if(err) goto exit;
- err = intof(sxpr_childN(sxpr, n++, ONONE), &state->live);
- if(err) goto exit;
- err = intof(sxpr_childN(sxpr, n++, ONONE), &state->resource);
- if(err) goto exit;
- err = xfr_send(args, state, conn, addr, port);
-
- } else if(sxpr_elementp(sxpr, oxfr_save)){
- // Save message from xend.
- char *file;
- XfrState _state = {}, *state = &_state;
- int n = 0;
-
- dprintf("> xfr.save\n");
- err = intof(sxpr_childN(sxpr, n++, ONONE), &state->vmid);
- if(err) goto exit;
- err = stringof(sxpr_childN(sxpr, n++, ONONE), &state->vmconfig);
- if(err) goto exit;
- state->vmconfig_n = strlen(state->vmconfig);
- err = stringof(sxpr_childN(sxpr, n++, ONONE), &file);
- if(err) goto exit;
- err = xfr_save(args, state, conn, file);
-
- } else if(sxpr_elementp(sxpr, oxfr_restore)){
- // Restore message from xend.
- char *file;
- XfrState _state = {}, *state = &_state;
- int n = 0;
-
- dprintf("> xfr.restore\n");
- err = stringof(sxpr_childN(sxpr, n++, ONONE), &file);
- if(err) goto exit;
- err = xfr_restore(args, state, conn, file);
-
- } else if(sxpr_elementp(sxpr, oxfr_xfr)){
- // Xfr message from peer xfrd.
- XfrState _state = {}, *state = &_state;
- int n = 0;
-
- dprintf("> xfr.xfr\n");
- err = intof(sxpr_childN(sxpr, n++, ONONE), &state->vmid);
- if(err) goto exit;
- err = xfr_recv(args, state, conn);
-
- } else{
- // Anything else is invalid.
- err = -EINVAL;
- eprintf("> Invalid message: ");
- objprint(iostderr, sxpr, 0);
- IOStream_print(iostderr, "\n");
- xfr_error(conn, err);
- }
- exit:
- Conn_close(conn);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Accept an incoming connection.
- *
- * @param sock tcp socket
- * @return 0 on success, error code otherwise
- */
-int xfrd_accept(Args *args, int sock){
- struct sockaddr_in peer_in;
- struct sockaddr *peer = (struct sockaddr *)&peer_in;
- socklen_t peer_n = sizeof(peer_in);
- int peersock;
- pid_t pid;
- int err = 0;
-
- dprintf("> sock=%d\n", sock);
- dprintf("> accept...\n");
- peersock = accept(sock, peer, &peer_n);
- dprintf("> accept=%d\n", peersock);
- if(peersock < 0){
- perror("accept");
- err = -errno;
- goto exit;
- }
- iprintf("> Accepted connection from %s:%d on %d\n",
- inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port), sock);
- fflush(stdout);
- fflush(stderr);
- pid = fork();
- if(pid > 0){
- // Parent, fork succeeded.
- iprintf("> Forked child pid=%d\n", pid);
- close(peersock);
- } else if (pid < 0){
- // Parent, fork failed.
- perror("fork");
- close(peersock);
- } else {
- // Child.
- iprintf("> Xfr service for %s:%d\n",
- inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port));
- err = xfrd_service(args, peersock, peer_in);
- iprintf("> Xfr service err=%d\n", err);
- shutdown(peersock, 2);
- exit(err ? 1 : 0);
- }
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Socket select loop.
- * Accepts connections on the tcp socket.
- *
- * @param listen_sock tcp listen socket
- * @return 0 on success, error code otherwise
- */
-int xfrd_select(Args *args, int listen_sock){
- int err = 0;
- SelectSet set = {};
- dprintf("> socks: %d\n", listen_sock);
- while(1){
- SelectSet_zero(&set);
- SelectSet_add_read(&set, listen_sock);
- err = SelectSet_select(&set, NULL);
- if(err < 0){
- if(errno == EINTR) continue;
- perror("select");
- goto exit;
- }
- if(FD_ISSET(listen_sock, &set.rd)){
- xfrd_accept(args, listen_sock);
- }
- }
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Create a socket.
- *
- * @param args program arguments
- * @param socktype socket type
- * @param reuse whether to set SO_REUSEADDR
- * @param val return value for the socket
- * @return 0 on success, error code otherwise
- */
-int create_socket(Args *args, int socktype, int reuse, int *val){
- int err = 0;
- int sock = 0;
- struct sockaddr_in addr_in;
- struct sockaddr *addr = (struct sockaddr *)&addr_in;
- socklen_t addr_n = sizeof(addr_in);
-
- dprintf(">\n");
- // Create socket and bind it.
- sock = socket(AF_INET, socktype, 0);
- if(sock < 0){
- err = -errno;
- goto exit;
- }
- addr_in.sin_family = AF_INET;
- addr_in.sin_addr.s_addr = INADDR_ANY;
- addr_in.sin_port = args->port;
- dprintf("> port=%d\n", ntohs(addr_in.sin_port));
- if(reuse){
- // Set socket option to reuse address.
- int val = 1;
- err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
- if(err < 0){
- err = -errno;
- perror("setsockopt");
- goto exit;
- }
- }
- err = bind(sock, addr, addr_n);
- if(err < 0){
- err = -errno;
- perror("bind");
- goto exit;
- }
- exit:
- *val = (err ? -1 : sock);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Create the tcp listen socket.
- *
- * @param args program arguments
- * @param val return value for the socket
- * @return 0 on success, error code otherwise
- */
-int xfrd_listen_socket(Args *args, int *val){
- int err = 0;
- int sock;
- dprintf(">\n");
- err = create_socket(args, SOCK_STREAM, 1, &sock);
- if(err) goto exit;
- dprintf("> listen...\n");
- err = listen(sock, 5);
- if(err < 0){
- err = -errno;
- perror("listen");
- goto exit;
- }
- exit:
- *val = (err ? -1 : sock);
- if(err) close(sock);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Type for signal handling functions. */
-typedef void SignalAction(int code, siginfo_t *info, void *data);
-
-/** Handle SIGCHLD by getting child exit status.
- * This prevents child processes being defunct.
- *
- * @param code signal code
- * @param info signal info
- * @param data
- */
-void sigaction_SIGCHLD(int code, siginfo_t *info, void *data){
- int status;
- pid_t pid;
- //dprintf("> child_exit=%d waiting...\n", child_exit);
- pid = wait(&status);
- dprintf("> child pid=%d status=%d\n", pid, status);
-}
-
-/** Handle SIGPIPE.
- *
- * @param code signal code
- * @param info signal info
- * @param data
- */
-void sigaction_SIGPIPE(int code, siginfo_t *info, void *data){
- dprintf("> SIGPIPE\n");
- //fflush(stdout);
- //fflush(stderr);
- //exit(1);
-}
-
-/** Handle SIGALRM.
- *
- * @param code signal code
- * @param info signal info
- * @param data
- */
-void sigaction_SIGALRM(int code, siginfo_t *info, void *data){
- dprintf("> SIGALRM\n");
-}
-
-/** Install a handler for a signal.
- *
- * @param signum signal
- * @param action handler
- * @return 0 on success, error code otherwise
- */
-int catch_signal(int signum, SignalAction *action){
- int err = 0;
- struct sigaction sig = {};
- sig.sa_sigaction = action;
- sig.sa_flags = SA_SIGINFO;
- err = sigaction(signum, &sig, NULL);
- if(err){
- perror("sigaction");
- }
- return err;
-}
-
-/** Transfer daemon main program.
- *
- * @param args program arguments
- * @return 0 on success, error code otherwise
- */
-int xfrd_main(Args *args){
- int err = 0;
- int listen_sock;
-
- dprintf(">\n");
- catch_signal(SIGCHLD,sigaction_SIGCHLD);
- catch_signal(SIGPIPE,sigaction_SIGPIPE);
- catch_signal(SIGALRM,sigaction_SIGALRM);
- err = xfrd_listen_socket(args, &listen_sock);
- if(err) goto exit;
- err = xfrd_select(args, listen_sock);
- exit:
- close(listen_sock);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-#ifndef SXPR_PARSER_MAIN
-/** Parse command-line arguments and call the xfrd main program.
- *
- * @param arg argument count
- * @param argv arguments
- * @return 0 on success, 1 otherwise
- */
-int main(int argc, char *argv[]){
- int err = 0;
- int key = 0;
- int long_index = 0;
- static const char * LOGFILE = "/var/log/xfrd.log";
-
-#ifndef DEBUG
- freopen(LOGFILE, "w+", stdout);
- fclose(stderr);
- stderr = stdout;
-#endif
- dprintf(">\n");
- set_defaults(args);
- while(1){
- key = getopt_long(argc, argv, short_opts, long_opts, &long_index);
- if(key == -1) break;
- switch(key){
- case OPT_PORT:
- err = !convert_service_to_port(optarg, &args->port);
- if(err) goto exit;
- break;
- case OPT_COMPRESS:
- args->compress = TRUE;
- break;
- case OPT_HELP:
- usage(0);
- break;
- case OPT_VERBOSE:
- args->verbose = TRUE;
- break;
- case OPT_VERSION:
- printf("> Version %d.%d\n", XFR_PROTO_MAJOR, XFR_PROTO_MINOR);
- exit(0);
- break;
- default:
- usage(EINVAL);
- break;
- }
- }
- xfr_init();
- err = xfrd_main(args);
- exit:
- if(err && key > 0){
- fprintf(stderr, "Error in arg %c\n", key);
- }
- return (err ? 1 : 0);
-}
-#endif
diff --git a/tools/xfrd/xfrd.h b/tools/xfrd/xfrd.h
deleted file mode 100644
index 0671b383eb..0000000000
--- a/tools/xfrd/xfrd.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _XFRD_XFRD_H_
-#define _XFRD_XFRD_H_
-
-/** Xend port in host order. */
-#define XEND_PORT 8000
-
-/** Xfrd port in host order. */
-#define XFRD_PORT 8002
-
-/** Protocol version. */
-#define XFR_PROTO_MAJOR 1
-#define XFR_PROTO_MINOR 0
-
-struct Conn;
-extern int xfr_vm_suspend(struct Conn *xend, uint32_t vmid);
-extern int xfr_vm_destroy(struct Conn *xend, uint32_t vmid);
-#endif
diff --git a/tools/xfrd/xfrdClient.py b/tools/xfrd/xfrdClient.py
deleted file mode 100755
index 4badf454db..0000000000
--- a/tools/xfrd/xfrdClient.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/env python
-"""
-Test client for the migration daemon (xfrd).
-
-Author: Mike Wray <mike.wray@hp.com>
-
-"""
-import getopt
-import sys
-import os
-from socket import *
-import StringIO
-
-sys.path.append("/home/mjw/repos-bk/xeno-unstable.bk/tools/python")
-
-import xen.xend.sxp as sxp
-
-XFRD_PORT = 8002
-
-verbose = 0
-
-class TCPClient:
-
- def __init__(self, host, port):
- print ">TCPClient"
- self.sock = socket(AF_INET, SOCK_STREAM, 0)
- print ">TCPClient sock=", self.sock
- print ">TCPClient> connect ", host, port
- v = self.sock.connect((host, port))
- print ">TCPClient> connect=", v
- # Send plain header (no gzip).
- #self.sock.send("\0\0")
-
- self.sockin = self.sock.makefile("r")
- self.sockout = self.sock.makefile("w")
- #pass
-
- def request(self, req):
- print "request>", req
- sxp.show(req, out=self.sockout)
- self.sockout.flush()
- print "request<"
-
- def request_hello(self):
- self.request(['xfr.hello', XFR_PROTO_MAJOR, XFR_PROTO_MINOR])
-
- def request_migrate(self, vmid, vhost, vport, vmconfig='(vm)'):
- self.request(['xfr.migrate', vmid, vmconfig, vhost, vport])
-
- def read(self):
- while(1):
- v = self.sockin.read()
- print 'read>', v
- #if v[0] == 'xfr.err' and v[1]: return
- #if v[0] == 'xfr.ok': return
-
-XFR_PROTO_MAJOR = 1
-XFR_PROTO_MINOR = 0
-
-host_default = "localhost"
-port_default = XFRD_PORT
-vhost_default = "localhost"
-vport_default = 8003
-vmid_default = 1
-
-# Short options. Options followed by ':' need a parameter.
-short_opts = 'h'
-
-# Long options. Options ending in '=' need a parameter.
-long_opts = [ 'host=', 'port=', 'vhost=', 'vport=', 'vmid=', 'verbose', 'help']
-
-def usage(err=None):
- if err:
- out = sys.stderr
- else:
- out = sys.stdout
- print >> out, 'Usage: %s [options] [command...]\n' % sys.argv[0]
- print >> out, '--host <host>\n\tHost to initiate transfer on. Default %s.' % host_default
- print >> out, '--port <port>\n\tPort to initiate transfer on. Default %d.' % port_default
- print >> out, '--vhost <vhost>\n\tHost to transfer VM to. Default %s.' % vhost_default
- print >> out, '--vport <vport>\n\tPort to transfer VM to. Default %d.' % vport_default
- print >> out, '--vmid <vmid>\n\tVM id. Default %d.' % vmid_default
- print >> out, '--help\n\tPrint help.'
-
-def main(argv):
- global verbose
- host = host_default
- port = port_default
- vhost = vhost_default
- vport = vport_default
- vmid = vmid_default
-
- try:
- opts, args = getopt.getopt(argv[1:], short_opts, long_opts)
- except getopt.GetoptError, ex:
- print >>sys.stderr, 'Error:', ex
- usage(1)
- sys.exit(1)
-
- for key, val in opts:
- if key == '--help':
- usage()
- sys.exit(0)
- elif key == '--host':
- host = val
- elif key == '--port':
- port = int(val)
- elif key == '--vhost':
- vhost = val
- elif key == '--vport':
- vport = int(val)
- elif key == '--vmid':
- vmid = int(val)
-
- print "host=%s port=%d" % (host, port)
- print "vhost=%s vport=%d vmid=%d" % (vhost, vport, vmid)
- client = TCPClient(gethostbyname(host), port)
- client.request_hello()
- client.request_migrate(vmid, gethostbyname(vhost), vport)
- client.read()
-
-if __name__ == '__main__':
- main(sys.argv)
-
diff --git a/xen/Makefile b/xen/Makefile
index adb540638a..e71898cf4d 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -8,9 +8,9 @@ INSTALL_DIR = $(INSTALL) -d -m0755
# This is the correct place to edit the build version.
# All other places this is stored (eg. compile.h) should be autogenerated.
-export XEN_VERSION = 2
+export XEN_VERSION = 3
export XEN_SUBVERSION = 0
-export XEN_EXTRAVERSION = -testing
+export XEN_EXTRAVERSION = -devel
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
export BASEDIR := $(CURDIR)
@@ -83,9 +83,9 @@ include/xen/compile.h: include/xen/compile.h.in include/xen/banner.h
@mv -f $@.new $@
tools/figlet/figlet: tools/figlet/figlet.o
- $(CC) -o $@ $<
+ $(HOSTCC) -o $@ $<
tools/figlet/figlet.o: tools/figlet/figlet.c
- $(CC) -o $@ -c $<
+ $(HOSTCC) -o $@ -c $<
include/xen/banner.h: tools/figlet/figlet tools/figlet/xen.flf
tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) > $@.new
diff --git a/xen/Rules.mk b/xen/Rules.mk
index 82599afec0..221882814a 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -1,28 +1,28 @@
verbose ?= n
debug ?= n
-debugger ?= n
perfc ?= n
+perfc_arrays?= n
trace ?= n
optimize ?= y
+domu_debug ?= n
+crash_debug ?= n
-# Currently supported architectures: x86_32, x86_64
-COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/)
-TARGET_ARCH ?= $(COMPILE_ARCH)
+include $(BASEDIR)/../Config.mk
# Set ARCH/SUBARCH appropriately.
-override COMPILE_SUBARCH := $(COMPILE_ARCH)
-override TARGET_SUBARCH := $(TARGET_ARCH)
-override COMPILE_ARCH := $(patsubst x86%,x86,$(COMPILE_ARCH))
-override TARGET_ARCH := $(patsubst x86%,x86,$(TARGET_ARCH))
+override COMPILE_SUBARCH := $(XEN_COMPILE_ARCH)
+override TARGET_SUBARCH := $(XEN_TARGET_ARCH)
+override COMPILE_ARCH := $(patsubst x86%,x86,$(XEN_COMPILE_ARCH))
+override TARGET_ARCH := $(patsubst x86%,x86,$(XEN_TARGET_ARCH))
TARGET := $(BASEDIR)/xen
HDRS := $(wildcard $(BASEDIR)/include/xen/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/scsi/*.h)
HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
-# compile.h is always regenerated, but other files shouldn't be rebuilt
+# Do not depend on auto-generated header files.
+HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS))
HDRS := $(subst $(BASEDIR)/include/xen/banner.h,,$(HDRS))
HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS))
@@ -35,13 +35,10 @@ OBJS += $(patsubst %.c,%.o,$(C_SRCS))
ALL_OBJS := $(BASEDIR)/common/common.o
ALL_OBJS += $(BASEDIR)/drivers/char/driver.o
ALL_OBJS += $(BASEDIR)/drivers/acpi/driver.o
-ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
ALL_OBJS += $(BASEDIR)/arch/$(TARGET_ARCH)/arch.o
-HOSTCC = gcc
-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
-test-gcc-flag = $(shell gcc -v --help 2>&1 | grep -q " $(1) " && echo $(1))
+test-gcc-flag = $(shell $(CC) -v --help 2>&1 | grep -q " $(1) " && echo $(1))
include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
@@ -51,15 +48,22 @@ ifeq ($(verbose),y)
CFLAGS += -DVERBOSE
endif
else
-CFLAGS += -DVERBOSE
+CFLAGS += -g -DVERBOSE
+endif
+
+ifeq ($(domu_debug),y)
+CFLAGS += -DDOMU_DEBUG
endif
-ifeq ($(debugger),y)
-CFLAGS += -DXEN_DEBUGGER
+ifeq ($(crash_debug),y)
+CFLAGS += -g -DCRASH_DEBUG
endif
ifeq ($(perfc),y)
CFLAGS += -DPERF_COUNTERS
+ifeq ($(perfc_arrays),y)
+CFLAGS += -DPERF_ARRAYS
+endif
endif
ifeq ($(trace),y)
diff --git a/xen/arch/ia64/Makefile b/xen/arch/ia64/Makefile
new file mode 100644
index 0000000000..2e59a7d19d
--- /dev/null
+++ b/xen/arch/ia64/Makefile
@@ -0,0 +1,71 @@
+include $(BASEDIR)/Rules.mk
+
+# libs-y += arch/ia64/lib/lib.a
+
+OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \
+ xenmisc.o pdb-stub.o acpi.o hypercall.o \
+ machvec.o dom0_ops.o domain.o hpsimserial.o pcdp.o \
+ idle0_task.o pal.o hpsim.o efi.o efi_stub.o ivt.o mm_contig.o \
+ xenmem.o sal.o cmdline.o mm_init.o tlb.o smpboot.o \
+ extable.o linuxextable.o xenirq.o xentime.o \
+ regionreg.o entry.o unaligned.o privop.o vcpu.o \
+ irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \
+ sn_console.o
+
+ifeq ($(CONFIG_VTI),y)
+OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o \
+ vmx_phy_mode.o vmx_utility.o vmx_interrupt.o vmx_entry.o vmmu.o \
+ vtlb.o mmio.o vlsapic.o
+endif
+# perfmon.o
+# unwind.o needed for kernel unwinding (rare)
+
+OBJS := $(subst $(TARGET_ARCH)/asm-offsets.o,,$(OBJS))
+
+# remove following line if not privifying in memory
+# OBJS += privify.o
+
+default: $(OBJS) head.o ia64lib.o xen.lds.s
+ $(LD) -r -o arch.o $(OBJS) ia64lib.o
+ $(LD) $(LDFLAGS) -T $(BASEDIR)/arch/$(TARGET_ARCH)/xen.lds.s -N \
+ -Map map.out head.o $(ALL_OBJS) -o $(TARGET)-syms
+ $(OBJCOPY) -R .note -R .comment -S $(TARGET)-syms $(TARGET)
+ $(NM) -n $(TARGET)-syms | grep -v '\( [aUw] \)\|\(__crc_\)\|\( \$[adt]\)'\
+ > $(BASEDIR)/System.map
+
+
+asm-offsets.s: asm-offsets.c $(BASEDIR)/include/asm-ia64/.offsets.h.stamp
+ $(CC) $(CFLAGS) -S -o $@ $<
+
+$(BASEDIR)/include/asm-ia64/.offsets.h.stamp:
+# Need such symbol link to make linux headers available
+ [ -e $(BASEDIR)/include/linux ] \
+ || ln -s $(BASEDIR)/include/xen $(BASEDIR)/include/linux
+ [ -e $(BASEDIR)/include/asm-ia64/xen ] \
+ || ln -s $(BASEDIR)/include/asm-ia64/linux $(BASEDIR)/include/asm-ia64/xen
+# Solve circular reference on asm-offsets.h
+ [ -f $(BASEDIR)/include/asm-ia64/asm-offsets.h ] \
+ || echo "#define IA64_TASK_SIZE 0" > $(BASEDIR)/include/asm-ia64/asm-offsets.h
+#Bad hack. Force asm-offsets.h out-of-date
+ sleep 1
+ touch $@
+
+# I'm sure a Makefile wizard would know a better way to do this
+xen.lds.s: xen.lds.S
+ $(CC) -E $(CPPFLAGS) -P -DXEN -D__ASSEMBLY__ \
+ -o xen.lds.s xen.lds.S
+
+ia64lib.o:
+ $(MAKE) -C lib && cp lib/ia64lib.o .
+
+clean:
+ rm -f *.o *~ core xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
+ rm -f lib/*.o
+
+# setup.o contains bits of compile.h so it must be blown away
+delete-unfresh-files:
+ echo any unfresh-files to delete for ia64\?
+# rm -f setup.o
+
+.PHONY: default clean delete-unfresh-files
+
diff --git a/xen/arch/ia64/Rules.mk b/xen/arch/ia64/Rules.mk
new file mode 100644
index 0000000000..44fb4b40ba
--- /dev/null
+++ b/xen/arch/ia64/Rules.mk
@@ -0,0 +1,24 @@
+########################################
+# ia64-specific definitions
+
+CONFIG_VTI ?= n
+ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
+CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
+endif
+AFLAGS += -D__ASSEMBLY__
+CPPFLAGS += -I$(BASEDIR)/include -I$(BASEDIR)/include/asm-ia64
+CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
+#CFLAGS += -O3 # -O3 over-inlines making debugging tough!
+CFLAGS += -O2 # but no optimization causes compile errors!
+#CFLAGS += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE)
+CFLAGS += -iwithprefix include -Wall
+CFLAGS += -fomit-frame-pointer -I$(BASEDIR)/include -D__KERNEL__
+CFLAGS += -I$(BASEDIR)/include/asm-ia64
+CFLAGS += -Wno-pointer-arith -Wredundant-decls
+CFLAGS += -DIA64 -DXEN -DLINUX_2_6
+CFLAGS += -ffixed-r13 -mfixed-range=f12-f15,f32-f127
+CFLAGS += -w -g
+ifeq ($(CONFIG_VTI),y)
+CFLAGS += -DCONFIG_VTI
+endif
+LDFLAGS := -g
diff --git a/xen/arch/ia64/acpi.c b/xen/arch/ia64/acpi.c
new file mode 100644
index 0000000000..6dbc687b8b
--- /dev/null
+++ b/xen/arch/ia64/acpi.c
@@ -0,0 +1,678 @@
+/*
+ * acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Intel Corp.
+ * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
+ * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <asm/io.h>
+//#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+//#include <asm/cyclone.h>
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+
+unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;
+
+const char *
+acpi_get_sysname (void)
+{
+/* #ifdef CONFIG_IA64_GENERIC */
+ unsigned long rsdp_phys;
+ struct acpi20_table_rsdp *rsdp;
+ struct acpi_table_xsdt *xsdt;
+ struct acpi_table_header *hdr;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys) {
+ printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
+ return "dig";
+ }
+
+ rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+ if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
+ return "dig";
+ }
+
+ xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+ hdr = &xsdt->header;
+ if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
+ return "dig";
+ }
+
+ if (!strcmp(hdr->oem_id, "HP")) {
+ return "hpzx1";
+ }
+ else if (!strcmp(hdr->oem_id, "SGI")) {
+ return "sn2";
+ }
+
+ return "dig";
+/*
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+ return "hpzx1";
+# elif defined (CONFIG_IA64_SGI_SN2)
+ return "sn2";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+*/
+}
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS 256
+
+#if 0
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+ [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers. Provides interrupt vector for
+ * a generic platform event. Currently only CPEI is implemented.
+ */
+int
+acpi_request_vector (u32 int_type)
+{
+ int vector = -1;
+
+ if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+ /* corrected platform error interrupt */
+ vector = platform_intr_list[int_type];
+ } else
+ printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
+ return vector;
+}
+#endif
+char *
+__acpi_map_table (unsigned long phys_addr, unsigned long size)
+{
+ return __va(phys_addr);
+}
+
+/* --------------------------------------------------------------------------
+ Boot-time Table Parsing
+ -------------------------------------------------------------------------- */
+
+static int total_cpus __initdata;
+static int available_cpus __initdata;
+struct acpi_table_madt * acpi_madt __initdata;
+static u8 has_8259;
+
+#if 0
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic;
+
+ lapic = (struct acpi_table_lapic_addr_ovr *) header;
+
+ if (BAD_MADT_ENTRY(lapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic->address) {
+ iounmap((void *) ipi_base_addr);
+ ipi_base_addr = (unsigned long) ioremap(lapic->address, 0);
+ }
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lsapic *lsapic;
+
+ lsapic = (struct acpi_table_lsapic *) header;
+
+ if (BAD_MADT_ENTRY(lsapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid);
+
+ if (!lsapic->flags.enabled)
+ printk(" disabled");
+ else {
+ printk(" enabled");
+#ifdef CONFIG_SMP
+ smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
+ if (hard_smp_processor_id()
+ == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus])
+ printk(" (BSP)");
+#endif
+ ++available_cpus;
+ }
+
+ printk("\n");
+
+ total_cpus++;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lacpi_nmi;
+
+ lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+
+ if (BAD_MADT_ENTRY(lacpi_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support lapic_nmi entries */
+ return 0;
+}
+
+
+static int __init
+acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_iosapic *iosapic;
+
+ iosapic = (struct acpi_table_iosapic *) header;
+
+ if (BAD_MADT_ENTRY(iosapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ iosapic_init(iosapic->address, iosapic->global_irq_base);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_plat_int_src (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_plat_int_src *plintsrc;
+ int vector;
+
+ plintsrc = (struct acpi_table_plat_int_src *) header;
+
+ if (BAD_MADT_ENTRY(plintsrc, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /*
+ * Get vector assignment for this interrupt, set attributes,
+ * and program the IOSAPIC routing table.
+ */
+ vector = iosapic_register_platform_intr(plintsrc->type,
+ plintsrc->global_irq,
+ plintsrc->iosapic_vector,
+ plintsrc->eid,
+ plintsrc->id,
+ (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+ platform_intr_list[plintsrc->type] = vector;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *p;
+
+ p = (struct acpi_table_int_src_ovr *) header;
+
+ if (BAD_MADT_ENTRY(p, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ iosapic_override_isa_irq(p->bus_irq, p->global_irq,
+ (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries */
+ return 0;
+}
+/* Hook from generic ACPI tables.c */
+void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERMOW", 6))){
+
+ /* Unfortunatly ITC_DRIFT is not yet part of the
+ * official SAL spec, so the ITC_DRIFT bit is not
+ * set by the BIOS on this hardware.
+ */
+ sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ }
+}
+
+static int __init
+acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+{
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+
+ /* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+ has_8259 = 1; /* Firmware on old Itanium systems is broken */
+#else
+ has_8259 = acpi_madt->flags.pcat_compat;
+#endif
+ iosapic_system_init(has_8259);
+
+ /* Get base address of IPI Message Block */
+
+ if (acpi_madt->lapic_address)
+ ipi_base_addr = (unsigned long) ioremap(acpi_madt->lapic_address, 0);
+
+ printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr);
+
+ acpi_madt_oem_check(acpi_madt->header.oem_id,
+ acpi_madt->header.oem_table_id);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus; /* number of cpus */
+static u32 __initdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
+/* maps to convert between proximity domain and logical node ID */
+int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+int __initdata nid_to_pxm_map[MAX_NUMNODES];
+static struct acpi_table_slit __initdata *slit_table;
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init
+acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+ u32 len;
+
+ len = sizeof(struct acpi_table_header) + 8
+ + slit->localities * slit->localities;
+ if (slit->header.length != len) {
+ printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
+ len, slit->header.length);
+ memset(numa_slit, 10, sizeof(numa_slit));
+ return;
+ }
+ slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+{
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pa->proximity_domain);
+
+ node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
+ /* nid should be overridden as logical node id later */
+ node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+ srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+{
+ unsigned long paddr, size;
+ u8 pxm;
+ struct node_memblk_s *p, *q, *pend;
+
+ pxm = ma->proximity_domain;
+
+ /* fill node memory chunk structure */
+ paddr = ma->base_addr_hi;
+ paddr = (paddr << 32) | ma->base_addr_lo;
+ size = ma->length_hi;
+ size = (size << 32) | ma->length_lo;
+
+ /* Ignore disabled entries */
+ if (!ma->flags.enabled)
+ return;
+
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pxm);
+
+ /* Insertion sort based on base address */
+ pend = &node_memblk[num_node_memblks];
+ for (p = &node_memblk[0]; p < pend; p++) {
+ if (paddr < p->start_paddr)
+ break;
+ }
+ if (p < pend) {
+ for (q = pend - 1; q >= p; q--)
+ *(q + 1) = *q;
+ }
+ p->start_paddr = paddr;
+ p->size = size;
+ p->nid = pxm;
+ num_node_memblks++;
+}
+
+void __init
+acpi_numa_arch_fixup (void)
+{
+ int i, j, node_from, node_to;
+
+ /* If there's no SRAT, fix the phys_id */
+ if (srat_num_cpus == 0) {
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ return;
+ }
+
+ /* calculate total number of nodes in system from PXM bitmap */
+ numnodes = 0; /* init total nodes in system */
+
+ memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+ memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+ for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+ if (pxm_bit_test(i)) {
+ pxm_to_nid_map[i] = numnodes;
+ node_set_online(numnodes);
+ nid_to_pxm_map[numnodes++] = i;
+ }
+ }
+
+ /* set logical node id in memory chunk structure */
+ for (i = 0; i < num_node_memblks; i++)
+ node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+
+ /* assign memory bank numbers for each chunk on each node */
+ for (i = 0; i < numnodes; i++) {
+ int bank;
+
+ bank = 0;
+ for (j = 0; j < num_node_memblks; j++)
+ if (node_memblk[j].nid == i)
+ node_memblk[j].bank = bank++;
+ }
+
+ /* set logical node id in cpu structure */
+ for (i = 0; i < srat_num_cpus; i++)
+ node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+
+ printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
+ printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
+
+ if (!slit_table) return;
+ memset(numa_slit, -1, sizeof(numa_slit));
+ for (i=0; i<slit_table->localities; i++) {
+ if (!pxm_bit_test(i))
+ continue;
+ node_from = pxm_to_nid_map[i];
+ for (j=0; j<slit_table->localities; j++) {
+ if (!pxm_bit_test(j))
+ continue;
+ node_to = pxm_to_nid_map[j];
+ node_distance(node_from, node_to) =
+ slit_table->entry[i*slit_table->localities + j];
+ }
+ }
+
+#ifdef SLIT_DEBUG
+ printk("ACPI 2.0 SLIT locality table:\n");
+ for (i = 0; i < numnodes; i++) {
+ for (j = 0; j < numnodes; j++)
+ printk("%03d ", node_distance(i,j));
+ printk("\n");
+ }
+#endif
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+#if 0
+unsigned int
+acpi_register_gsi (u32 gsi, int polarity, int trigger)
+{
+ return acpi_register_irq(gsi, polarity, trigger);
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+static int __init
+acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_header *fadt_header;
+ struct fadt_descriptor_rev2 *fadt;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ fadt_header = (struct acpi_table_header *) __va(phys_addr);
+ if (fadt_header->revision != 3)
+ return -ENODEV; /* Only deal with ACPI 2.0 FADT */
+
+ fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+
+ if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
+ acpi_kbd_controller_present = 0;
+
+ if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+ acpi_legacy_devices = 1;
+
+ acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE);
+ return 0;
+}
+#endif
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi.acpi20)
+ rsdp_phys = __pa(efi.acpi20);
+ else if (efi.acpi)
+ printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
+ return rsdp_phys;
+}
+
+#if 0
+int __init
+acpi_boot_init (void)
+{
+
+ /*
+ * MADT
+ * ----
+ * Parse the Multiple APIC Description Table (MADT), if exists.
+ * Note that this table provides platform SMP configuration
+ * information -- the successor to MPS tables.
+ */
+
+ if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+ printk(KERN_ERR PREFIX "Can't find MADT\n");
+ goto skip_madt;
+ }
+
+ /* Local APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+ /* I/O APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
+
+ /* System-Level Interrupt Routing */
+
+ if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+ printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ skip_madt:
+
+ /*
+ * FADT says whether a legacy keyboard controller is present.
+ * The FADT also contains an SCI_INT line, by which the system
+ * gets interrupts such as power and sleep buttons. If it's not
+ * on a Legacy interrupt, it needs to be setup.
+ */
+ if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+ printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_SMP
+ if (available_cpus == 0) {
+ printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+ printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+ smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
+ available_cpus = 1; /* We've got at least one of these, no? */
+ }
+ smp_boot_data.cpu_count = available_cpus;
+
+ smp_build_cpu_map();
+# ifdef CONFIG_ACPI_NUMA
+ if (srat_num_cpus == 0) {
+ int cpu, i = 1;
+ for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+ if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
+ node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
+ }
+ build_cpu_to_node_map();
+# endif
+#endif
+ /* Make boot-up look pretty */
+ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
+ return 0;
+}
+int
+acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+{
+ int vector;
+
+ if (has_8259 && gsi < 16)
+ *irq = isa_irq_to_vector(gsi);
+ else {
+ vector = gsi_to_vector(gsi);
+ if (vector == -1)
+ return -1;
+
+ *irq = vector;
+ }
+ return 0;
+}
+
+int
+acpi_register_irq (u32 gsi, u32 polarity, u32 trigger)
+{
+ if (has_8259 && gsi < 16)
+ return isa_irq_to_vector(gsi);
+
+ return iosapic_register_intr(gsi,
+ (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+}
+EXPORT_SYMBOL(acpi_register_irq);
+#endif
+#endif /* CONFIG_ACPI_BOOT */
diff --git a/xen/arch/ia64/asm-offsets.c b/xen/arch/ia64/asm-offsets.c
new file mode 100644
index 0000000000..41bbbc7d5b
--- /dev/null
+++ b/xen/arch/ia64/asm-offsets.c
@@ -0,0 +1,276 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <public/xen.h>
+#ifdef CONFIG_VTI
+#include <asm/tlb.h>
+#include <asm/regs.h>
+#endif // CONFIG_VTI
+
+#define task_struct vcpu
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(_sym, _str, _mem) \
+ DEFINE(_sym, offsetof(_str, _mem));
+
+void foo(void)
+{
+ DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
+ DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
+ DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
+ DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
+ //DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
+ DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
+ //DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
+ DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+
+ BLANK();
+
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+
+ BLANK();
+
+ DEFINE(XSI_PSR_IC_OFS, offsetof(vcpu_info_t, arch.interrupt_collection_enabled));
+ DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.interrupt_collection_enabled)));
+ DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, arch.interrupt_delivery_enabled));
+ DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip));
+ DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr)));
+ DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr));
+ DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs));
+ DEFINE(XSI_ISR_OFS, offsetof(vcpu_info_t, arch.isr));
+ DEFINE(XSI_IIM_OFS, offsetof(vcpu_info_t, arch.iim));
+ DEFINE(XSI_BANKNUM_OFS, offsetof(vcpu_info_t, arch.banknum));
+ DEFINE(XSI_BANK0_OFS, offsetof(vcpu_info_t, arch.bank0_regs[0]));
+ DEFINE(XSI_BANK1_OFS, offsetof(vcpu_info_t, arch.bank1_regs[0]));
+ DEFINE(XSI_METAPHYS_OFS, offsetof(vcpu_info_t, arch.metaphysical_mode));
+ DEFINE(XSI_PRECOVER_IFS_OFS, offsetof(vcpu_info_t, arch.precover_ifs));
+ DEFINE(XSI_INCOMPL_REG_OFS, offsetof(vcpu_info_t, arch.incomplete_regframe));
+ DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption));
+ DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0]));
+ //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
+ //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
+ //DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ //DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
+ //DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
+ //DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
+ //DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
+ //DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
+ //DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
+ DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct vcpu, arch._thread.ksp));
+ DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack));
+
+ DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0));
+ DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0));
+ DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm));
+ DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva));
+
+ BLANK();
+
+ //DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
+
+ BLANK();
+
+ //DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
+ //group_stop_count));
+ //DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+
+ BLANK();
+
+ DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
+ DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
+ DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
+ DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
+ DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
+ DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
+ DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
+ DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
+ DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
+ DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
+ DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
+ DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
+ DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
+ DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
+ DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
+
+ DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
+ DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
+ DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
+ DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
+ DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
+ DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
+ DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
+ DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
+ DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
+ DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
+ DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
+ DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
+#ifdef CONFIG_VTI
+ DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct xen_regs, r4));
+ DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct xen_regs, r5));
+ DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct xen_regs, r6));
+ DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct xen_regs, r7));
+ DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct xen_regs, cr_iipa));
+ DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct xen_regs, cr_isr));
+ DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct xen_regs, eml_unat));
+ DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct xen_regs, rfi_pfs));
+ DEFINE(RFI_IIP_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_iip));
+ DEFINE(RFI_IPSR_OFFSET, offsetof(struct vcpu, arch.arch_vmx.rfi_ipsr));
+ DEFINE(RFI_IFS_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.rfi_ifs));
+ DEFINE(RFI_PFS_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.rfi_pfs));
+ DEFINE(SWITCH_MRR5_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.mrr5));
+ DEFINE(SWITCH_MRR6_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.mrr6));
+ DEFINE(SWITCH_MRR7_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.mrr7));
+ DEFINE(SWITCH_MPTA_OFFSET,offsetof(struct vcpu ,arch.arch_vmx.mpta));
+#endif //CONFIG_VTI
+ DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
+ DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
+ DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
+ DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
+ DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
+ DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
+ DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
+ DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
+ DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
+ DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
+ DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
+ DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
+ DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
+ DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
+ DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
+ DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
+ DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
+ DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
+ DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
+ DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
+ DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
+ DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
+ DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
+
+ BLANK();
+
+ DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
+ DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
+ DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
+ DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
+ DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
+ DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
+ DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
+ DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
+ DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
+ DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
+ DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
+ DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
+ DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
+ DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
+ DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
+ DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
+ DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
+ DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
+ DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
+ DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
+ DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
+ DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
+ DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
+ DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
+ DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
+ DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
+ DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
+ DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
+ DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
+ DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
+ DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
+ DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
+ DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
+ DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
+ DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
+ DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
+ DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
+ DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
+ DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
+ DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
+ DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
+ DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
+
+ BLANK();
+
+#ifdef CONFIG_VTI
+ DEFINE(IA64_VPD_BASE_OFFSET, offsetof (struct vcpu, arch.arch_vmx.vpd));
+ DEFINE(IA64_VPD_CR_VPTA_OFFSET, offsetof (cr_t, pta));
+ DEFINE(XXX_THASH_SIZE, sizeof (thash_data_t));
+
+ BLANK();
+#endif //CONFIG_VTI
+ //DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
+ //DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
+ //DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
+ //DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
+ //DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
+ //DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
+ //DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
+ //DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
+ //DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
+ //DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
+ //DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
+ //DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
+ //DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
+
+ //BLANK();
+
+ //DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
+
+ //BLANK();
+
+ //DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
+ //DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
+ //DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
+ //DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
+ //DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
+ //BLANK();
+ /* for assembly files which can't include sched.h: */
+ //DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
+ //DEFINE(IA64_CLONE_VM, CLONE_VM);
+
+ BLANK();
+ DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+ DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+
+
+ DEFINE(CLONE_IDLETASK_BIT, 12);
+ DEFINE(CLONE_SETTLS_BIT, 19);
+//#if CLONE_SETTLS != (1<<19)
+//# error "CLONE_SETTLS_BIT incorrect, please fix"
+//#endif
+
+ //BLANK();
+ //DEFINE(IA64_MCA_TLB_INFO_SIZE, sizeof (struct ia64_mca_tlb_info));
+ /* used by head.S */
+ DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, offsetof (struct cpuinfo_ia64, nsec_per_cyc));
+
+ BLANK();
+ /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+ //DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
+ //DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
+ //DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
+ //DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
+ //DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
+ //DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
+ //DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
+ //DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
+ //DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
+ //DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
+ //DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
+ //DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
+ //DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+}
diff --git a/xen/arch/ia64/dom0_ops.c b/xen/arch/ia64/dom0_ops.c
new file mode 100644
index 0000000000..e0b48080bc
--- /dev/null
+++ b/xen/arch/ia64/dom0_ops.c
@@ -0,0 +1,52 @@
+/******************************************************************************
+ * Arch-specific dom0_ops.c
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <public/dom0_ops.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <asm/pdb.h>
+#include <xen/trace.h>
+#include <xen/console.h>
+#include <public/sched_ctl.h>
+
+#define TRC_DOM0OP_ENTER_BASE 0x00020000
+#define TRC_DOM0OP_LEAVE_BASE 0x00030000
+
+static int msr_cpu_mask;
+static unsigned long msr_addr;
+static unsigned long msr_lo;
+static unsigned long msr_hi;
+
+long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
+{
+ long ret = 0;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ switch ( op->cmd )
+ {
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void arch_getdomaininfo_ctxt(struct domain *d, struct vcpu_guest_context *c)
+{
+ int i;
+
+ dummy();
+}
diff --git a/xen/arch/ia64/dom_fw.c b/xen/arch/ia64/dom_fw.c
new file mode 100644
index 0000000000..b57a727a50
--- /dev/null
+++ b/xen/arch/ia64/dom_fw.c
@@ -0,0 +1,576 @@
+/*
+ * Xen domain firmware emulation support
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include <xen/config.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <xen/acpi.h>
+
+#include <asm/dom_fw.h>
+
+struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+extern struct domain *dom0;
+extern unsigned long dom0_start;
+
+extern unsigned long running_on_sim;
+
+
+unsigned long dom_fw_base_mpa = -1;
+unsigned long imva_fw_base = -1;
+
+// return domain (meta)physical address for a given imva
+// this function is a call-back from dom_fw_init
+unsigned long dom_pa(unsigned long imva)
+{
+ if (dom_fw_base_mpa == -1 || imva_fw_base == -1) {
+ printf("dom_pa: uninitialized! (spinning...)\n");
+ while(1);
+ }
+ if (imva - imva_fw_base > PAGE_SIZE) {
+ printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p (spinning...)\n",imva,imva_fw_base);
+ while(1);
+ }
+ return dom_fw_base_mpa + (imva - imva_fw_base);
+}
+
+// builds a hypercall bundle at domain physical address
+void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall)
+{
+ unsigned long imva;
+
+ if (d == dom0) paddr += dom0_start;
+ imva = domain_mpa_to_imva(d,paddr);
+ build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1);
+}
+
+
+// builds a hypercall bundle at domain physical address
+void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall,unsigned long ret)
+{
+ unsigned long imva;
+
+ if (d == dom0) paddr += dom0_start;
+ imva = domain_mpa_to_imva(d,paddr);
+ build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret);
+}
+
+
+// FIXME: This is really a hack: Forcing the boot parameter block
+// at domain mpaddr 0 page, then grabbing only the low bits of the
+// Xen imva, which is the offset into the page
+unsigned long dom_fw_setup(struct domain *d, char *args, int arglen)
+{
+ struct ia64_boot_param *bp;
+
+ dom_fw_base_mpa = 0;
+ if (d == dom0) dom_fw_base_mpa += dom0_start;
+ imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa);
+ bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE);
+ return dom_pa((unsigned long)bp);
+}
+
+
+/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */
+
+#define MB (1024*1024UL)
+
+#define NUM_EFI_SYS_TABLES 6
+#define PASS_THRU_IOPORT_SPACE
+#ifdef PASS_THRU_IOPORT_SPACE
+# define NUM_MEM_DESCS 4
+#else
+# define NUM_MEM_DESCS 3
+#endif
+
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+ offset OFFSET seconds east of UTC,
+ and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+ Return nonzero if successful. */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+ const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ long int days, rem, y;
+ const unsigned short int *ip;
+
+ days = t / SECS_PER_DAY;
+ rem = t % SECS_PER_DAY;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+ tp->hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ tp->minute = rem / 60;
+ tp->second = rem % 60;
+ /* January 1, 1970 was a Thursday. */
+ y = 1970;
+
+# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+ while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long int yg = y + days / 365 - (days % 365 < 0);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+ - LEAPS_THRU_END_OF (y - 1));
+ y = yg;
+ }
+ tp->year = y;
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < (long int) ip[y]; --y)
+ continue;
+ days -= ip[y];
+ tp->month = y + 1;
+ tp->day = days + 1;
+ return 1;
+}
+
+extern void pal_emulator_static (void);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
+
+#ifndef XEN
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ struct {
+ int tv_sec; /* must be 32bits to work */
+ int tv_usec;
+ } tv32bits;
+
+ ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+ memset(tm, 0, sizeof(*tm));
+ offtime(tv32bits.tv_sec, tm);
+
+ if (tc)
+ memset(tc, 0, sizeof(*tc));
+#else
+# error Not implemented yet...
+#endif
+ return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+# error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+ return EFI_UNSUPPORTED;
+}
+#endif /* !XEN */
+
+struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+ unsigned long in3, unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ long r9 = 0;
+ long r10 = 0;
+ long r11 = 0;
+ long status;
+
+ /*
+ * Don't do a "switch" here since that gives us code that
+ * isn't self-relocatable.
+ */
+ status = 0;
+ if (index == SAL_FREQ_BASE) {
+ switch (in1) {
+ case SAL_FREQ_BASE_PLATFORM:
+ r9 = 200000000;
+ break;
+
+ case SAL_FREQ_BASE_INTERVAL_TIMER:
+ /*
+ * Is this supposed to be the cr.itc frequency
+ * or something platform specific? The SAL
+ * doc ain't exactly clear on this...
+ */
+ r9 = 700000000;
+ break;
+
+ case SAL_FREQ_BASE_REALTIME_CLOCK:
+ r9 = 1;
+ break;
+
+ default:
+ status = -1;
+ break;
+ }
+ } else if (index == SAL_PCI_CONFIG_READ) {
+ if (current->domain == dom0) {
+ u64 value;
+ // note that args 2&3 are swapped!!
+ status = ia64_sal_pci_config_read(in1,in3,in2,&value);
+ r9 = value;
+ }
+ else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n");
+ } else if (index == SAL_PCI_CONFIG_WRITE) {
+ if (current->domain == dom0) {
+ if (((in1 & ~0xffffffffUL) && (in4 == 0)) ||
+ (in4 > 1) ||
+ (in2 > 8) || (in2 & (in2-1)))
+ printf("*** SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3);
+ // note that args are in a different order!!
+ status = ia64_sal_pci_config_write(in1,in4,in2,in3);
+ }
+ else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n");
+ } else if (index == SAL_SET_VECTORS) {
+ printf("*** CALLED SAL_SET_VECTORS. IGNORED...\n");
+ } else if (index == SAL_GET_STATE_INFO) {
+ printf("*** CALLED SAL_GET_STATE_INFO. IGNORED...\n");
+ } else if (index == SAL_GET_STATE_INFO_SIZE) {
+ printf("*** CALLED SAL_GET_STATE_INFO_SIZE. IGNORED...\n");
+ } else if (index == SAL_CLEAR_STATE_INFO) {
+ printf("*** CALLED SAL_CLEAR_STATE_INFO. IGNORED...\n");
+ } else if (index == SAL_MC_RENDEZ) {
+ printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n");
+ } else if (index == SAL_MC_SET_PARAMS) {
+ printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n");
+ } else if (index == SAL_CACHE_FLUSH) {
+ printf("*** CALLED SAL_CACHE_FLUSH. IGNORED...\n");
+ } else if (index == SAL_CACHE_INIT) {
+ printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n");
+ } else if (index == SAL_UPDATE_PAL) {
+ printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n");
+ } else {
+ printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n");
+ status = -1;
+ }
+ return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+
+#define NFUNCPTRS 20
+
+void print_md(efi_memory_desc_t *md)
+{
+#if 1
+ printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+ md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ md->num_pages >> (20 - EFI_PAGE_SHIFT));
+#endif
+}
+
+#define LSAPIC_NUM 16 // TEMP
+static u32 lsapic_flag=1;
+
+/* Provide only one LP to guest */
+static int
+acpi_update_lsapic (acpi_table_entry_header *header)
+{
+ struct acpi_table_lsapic *lsapic;
+
+ lsapic = (struct acpi_table_lsapic *) header;
+ if (!lsapic)
+ return -EINVAL;
+
+ if (lsapic->flags.enabled && lsapic_flag) {
+ printk("enable lsapic entry: 0x%lx\n", (u64)lsapic);
+ lsapic_flag = 0; /* disable all the following processros */
+ } else if (lsapic->flags.enabled) {
+ printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic);
+ lsapic->flags.enabled = 0;
+ } else
+ printk("lsapic entry is already disabled: 0x%lx\n", (u64)lsapic);
+
+ return 0;
+}
+
+static int
+acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size)
+{
+ u8 checksum=0;
+ u8* ptr;
+ int len;
+ struct acpi_table_madt* acpi_madt;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+ acpi_madt->header.checksum=0;
+
+ /* re-calculate MADT checksum */
+ ptr = (u8*)acpi_madt;
+ len = acpi_madt->header.length;
+ while (len>0){
+ checksum = (u8)( checksum + (*ptr++) );
+ len--;
+ }
+ acpi_madt->header.checksum = 0x0 - checksum;
+
+ return 0;
+}
+
+/* base is physical address of acpi table */
+void touch_acpi_table(void)
+{
+ u64 count = 0;
+ count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, NR_CPUS);
+ if ( count < 1)
+ printk("Error parsing MADT - no LAPIC entires\n");
+ printk("Total %d lsapic entry\n", count);
+ acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
+
+ return;
+}
+
+
+struct ia64_boot_param *
+dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int fw_mem_size)
+{
+ efi_system_table_t *efi_systab;
+ efi_runtime_services_t *efi_runtime;
+ efi_config_table_t *efi_tables;
+ struct ia64_sal_systab *sal_systab;
+ efi_memory_desc_t *efi_memmap, *md;
+ unsigned long *pal_desc, *sal_desc;
+ struct ia64_sal_desc_entry_point *sal_ed;
+ struct ia64_boot_param *bp;
+ unsigned long *pfn;
+ unsigned char checksum = 0;
+ char *cp, *cmd_line, *fw_vendor;
+ int i = 0;
+ unsigned long maxmem = d->max_pages * PAGE_SIZE;
+ unsigned long start_mpaddr = ((d==dom0)?dom0_start:0);
+
+# define MAKE_MD(typ, attr, start, end, abs) \
+ do { \
+ md = efi_memmap + i++; \
+ md->type = typ; \
+ md->pad = 0; \
+ md->phys_addr = abs ? start : start_mpaddr + start; \
+ md->virt_addr = 0; \
+ md->num_pages = (end - start) >> 12; \
+ md->attribute = attr; \
+ print_md(md); \
+ } while (0)
+
+/* FIXME: should check size but for now we have a whole MB to play with.
+ And if stealing code from fw-emu.c, watch out for new fw_vendor on the end!
+ if (fw_mem_size < sizeof(fw_mem_proto)) {
+ printf("sys_fw_init: insufficient space for fw_mem\n");
+ return 0;
+ }
+*/
+ memset(fw_mem, 0, fw_mem_size);
+
+#ifdef XEN
+#else
+ pal_desc = (unsigned long *) &pal_emulator_static;
+ sal_desc = (unsigned long *) &sal_emulator;
+#endif
+
+ cp = fw_mem;
+ efi_systab = (void *) cp; cp += sizeof(*efi_systab);
+ efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+ efi_tables = (void *) cp; cp += NUM_EFI_SYS_TABLES * sizeof(*efi_tables);
+ sal_systab = (void *) cp; cp += sizeof(*sal_systab);
+ sal_ed = (void *) cp; cp += sizeof(*sal_ed);
+ efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+ bp = (void *) cp; cp += sizeof(*bp);
+ pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
+ cmd_line = (void *) cp;
+
+ if (args) {
+ if (arglen >= 1024)
+ arglen = 1023;
+ memcpy(cmd_line, args, arglen);
+ } else {
+ arglen = 0;
+ }
+ cmd_line[arglen] = '\0';
+
+ memset(efi_systab, 0, sizeof(efi_systab));
+ efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+ efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
+ efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+ cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit boundary
+#define FW_VENDOR "X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to 64-bit boundary
+
+ memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR));
+ efi_systab->fw_vendor = dom_pa(fw_vendor);
+
+ efi_systab->fw_revision = 1;
+ efi_systab->runtime = (void *) dom_pa(efi_runtime);
+ efi_systab->nr_tables = NUM_EFI_SYS_TABLES;
+ efi_systab->tables = dom_pa(efi_tables);
+
+ efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+ efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+ efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+#define EFI_HYPERCALL_PATCH(tgt,call) do { \
+ dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \
+ tgt = dom_pa(pfn); \
+ *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+ *pfn++ = 0; \
+ } while (0)
+
+ EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+ EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+
+ efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID;
+ efi_tables[0].table = dom_pa(sal_systab);
+ for (i = 1; i < NUM_EFI_SYS_TABLES; i++) {
+ efi_tables[i].guid = NULL_GUID;
+ efi_tables[i].table = 0;
+ }
+ if (d == dom0) {
+ printf("Domain0 EFI passthrough:");
+ i = 1;
+ if (efi.mps) {
+ efi_tables[i].guid = MPS_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.mps);
+ printf(" MPS=%0xlx",efi_tables[i].table);
+ i++;
+ }
+
+ touch_acpi_table();
+
+ if (efi.acpi20) {
+ efi_tables[i].guid = ACPI_20_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.acpi20);
+ printf(" ACPI 2.0=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.acpi) {
+ efi_tables[i].guid = ACPI_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.acpi);
+ printf(" ACPI=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.smbios) {
+ efi_tables[i].guid = SMBIOS_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.smbios);
+ printf(" SMBIOS=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.hcdp) {
+ efi_tables[i].guid = HCDP_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.hcdp);
+ printf(" HCDP=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ printf("\n");
+ }
+
+ /* fill in the SAL system table: */
+ memcpy(sal_systab->signature, "SST_", 4);
+ sal_systab->size = sizeof(*sal_systab);
+ sal_systab->sal_rev_minor = 1;
+ sal_systab->sal_rev_major = 0;
+ sal_systab->entry_count = 1;
+
+ strcpy(sal_systab->oem_id, "Xen/ia64");
+ strcpy(sal_systab->product_id, "Xen/ia64");
+
+ /* fill in an entry point: */
+ sal_ed->type = SAL_DESC_ENTRY_POINT;
+#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \
+ dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \
+ tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+ } while (0)
+ FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0);
+ FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1);
+ sal_ed->gp = 0; // will be ignored
+
+ for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+ checksum += *cp;
+
+ sal_systab->checksum = -checksum;
+
+ /* simulate 1MB free memory at physical address zero */
+ i = 0;
+ MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);
+ /* hypercall patches live here, masquerade as reserved PAL memory */
+ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+ MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0);
+#ifdef PASS_THRU_IOPORT_SPACE
+ if (d == dom0 && !running_on_sim) {
+ /* pass through the I/O port space */
+ efi_memory_desc_t *efi_get_io_md(void);
+ efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md();
+ u32 type;
+ u64 iostart, ioend, ioattr;
+
+ type = ia64_efi_io_md->type;
+ iostart = ia64_efi_io_md->phys_addr;
+ ioend = ia64_efi_io_md->phys_addr +
+ (ia64_efi_io_md->num_pages << 12);
+ ioattr = ia64_efi_io_md->attribute;
+ MAKE_MD(type,ioattr,iostart,ioend, 1);
+ }
+ else
+ MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0);
+#endif
+
+ bp->efi_systab = dom_pa(fw_mem);
+ bp->efi_memmap = dom_pa(efi_memmap);
+ bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_version = 1;
+ bp->command_line = dom_pa(cmd_line);
+ bp->console_info.num_cols = 80;
+ bp->console_info.num_rows = 25;
+ bp->console_info.orig_x = 0;
+ bp->console_info.orig_y = 24;
+ bp->fpswa = 0;
+
+ return bp;
+}
diff --git a/xen/arch/ia64/domain.c b/xen/arch/ia64/domain.c
new file mode 100644
index 0000000000..869396ed06
--- /dev/null
+++ b/xen/arch/ia64/domain.c
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * Copyright (C) 2005 Intel Co
+ * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
+ *
+ * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add CONFIG_VTI domain support
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/softirq.h>
+#include <xen/mm.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/mpspec.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+//#include <xen/shadow.h>
+#include <xen/console.h>
+
+#include <xen/elf.h>
+//#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h> /* for MAX_DMA_ADDRESS */
+
+#include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */
+
+#include <asm/vcpu.h> /* for function declarations */
+#ifdef CONFIG_VTI
+#include <asm/vmx.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/pal.h>
+#endif // CONFIG_VTI
+
+#define CONFIG_DOMAIN0_CONTIGUOUS
+unsigned long dom0_start = -1L;
+#ifdef CONFIG_VTI
+unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
+//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
+unsigned long dom0_align = 256*1024*1024;
+#else // CONFIG_VTI
+unsigned long dom0_size = 256*1024*1024; //FIXME: Should be configurable
+//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
+unsigned long dom0_align = 64*1024*1024;
+#endif // CONFIG_VTI
+#ifdef DOMU_BUILD_STAGING
+unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+unsigned long domU_staging_start;
+unsigned long domU_staging_align = 64*1024;
+unsigned long *domU_staging_area;
+#endif
+
+// initialized by arch/ia64/setup.c:find_initrd()
+unsigned long initrd_start = 0, initrd_end = 0;
+
+#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
+
+//extern int loadelfimage(char *);
+extern int readelfimage_base_and_size(char *, unsigned long,
+ unsigned long *, unsigned long *, unsigned long *);
+
+unsigned long map_domain_page0(struct domain *);
+extern unsigned long dom_fw_setup(struct domain *, char *, int);
+
+/* this belongs in include/asm, but there doesn't seem to be a suitable place */
+void free_perdomain_pt(struct domain *d)
+{
+ dummy();
+ //free_page((unsigned long)d->mm.perdomain_pt);
+}
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+static void default_idle(void)
+{
+ if ( hlt_counter == 0 )
+ {
+ local_irq_disable();
+ if ( !softirq_pending(smp_processor_id()) )
+ safe_halt();
+ //else
+ local_irq_enable();
+ }
+}
+
+void continue_cpu_idle_loop(void)
+{
+ int cpu = smp_processor_id();
+ for ( ; ; )
+ {
+#ifdef IA64
+// __IRQ_STAT(cpu, idle_timestamp) = jiffies
+#else
+ irq_stat[cpu].idle_timestamp = jiffies;
+#endif
+ while ( !softirq_pending(cpu) )
+ default_idle();
+ raise_softirq(SCHEDULE_SOFTIRQ);
+ do_softirq();
+ }
+}
+
+void startup_cpu_idle_loop(void)
+{
+ /* Just some sanity to ensure that the scheduler is set up okay. */
+ ASSERT(current->domain == IDLE_DOMAIN_ID);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+ do_softirq();
+
+ /*
+ * Declares CPU setup done to the boot processor.
+ * Therefore memory barrier to ensure state is visible.
+ */
+ smp_mb();
+#if 0
+//do we have to ensure the idle task has a shared page so that, for example,
+//region registers can be loaded from it. Apparently not...
+ idle0_task.shared_info = (void *)alloc_xenheap_page();
+ memset(idle0_task.shared_info, 0, PAGE_SIZE);
+ /* pin mapping */
+ // FIXME: Does this belong here? Or do only at domain switch time?
+ {
+ /* WARNING: following must be inlined to avoid nested fault */
+ unsigned long psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
+ pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)),
+ PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ }
+#endif
+
+ continue_cpu_idle_loop();
+}
+
+struct vcpu *arch_alloc_vcpu_struct(void)
+{
+ /* Per-vp stack is used here. So we need keep vcpu
+ * same page as per-vp stack */
+ return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
+}
+
+void arch_free_vcpu_struct(struct vcpu *v)
+{
+ free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
+}
+
+#ifdef CONFIG_VTI
+void arch_do_createdomain(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct thread_info *ti = alloc_thread_info(v);
+
+ /* If domain is VMX domain, shared info area is created
+ * by domain and then domain notifies HV by specific hypercall.
+ * If domain is xenolinux, shared info area is created by
+ * HV.
+ * Since we have no idea about whether domain is VMX now,
+ * (dom0 when parse and domN when build), postpone possible
+ * allocation.
+ */
+
+ /* FIXME: Because full virtual cpu info is placed in this area,
+ * it's unlikely to put it into one shareinfo page. Later
+ * need split vcpu context from vcpu_info and conforms to
+ * normal xen convention.
+ */
+ d->shared_info = NULL;
+ v->vcpu_info = (void *)alloc_xenheap_page();
+ if (!v->vcpu_info) {
+ printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
+ while (1);
+ }
+ memset(v->vcpu_info, 0, PAGE_SIZE);
+
+ /* Clear thread_info to clear some important fields, like preempt_count */
+ memset(ti, 0, sizeof(struct thread_info));
+
+ /* Allocate per-domain vTLB and vhpt */
+ v->arch.vtlb = init_domain_tlb(v);
+
+ /* Physical->machine page table will be allocated when
+ * final setup, since we have no the maximum pfn number in
+ * this stage
+ */
+
+ /* FIXME: This is identity mapped address for xenheap.
+ * Do we need it at all?
+ */
+ d->xen_vastart = 0xf000000000000000;
+ d->xen_vaend = 0xf300000000000000;
+ d->arch.breakimm = 0x1000;
+
+ // stay on kernel stack because may get interrupts!
+ // ia64_ret_from_clone (which b0 gets in new_thread) switches
+ // to user stack
+ v->arch._thread.on_ustack = 0;
+}
+#else // CONFIG_VTI
+void arch_do_createdomain(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ d->shared_info = (void *)alloc_xenheap_page();
+ v->vcpu_info = (void *)alloc_xenheap_page();
+ if (!v->vcpu_info) {
+ printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
+ while (1);
+ }
+ memset(v->vcpu_info, 0, PAGE_SIZE);
+ /* pin mapping */
+ // FIXME: Does this belong here? Or do only at domain switch time?
+#if 0
+ // this is now done in ia64_new_rr7
+ {
+ /* WARNING: following must be inlined to avoid nested fault */
+ unsigned long psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
+ pte_val(pfn_pte(ia64_tpa(d->shared_info) >> PAGE_SHIFT, PAGE_KERNEL)),
+ PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ }
+#endif
+ d->max_pages = (128*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
+ if ((d->arch.metaphysical_rr0 = allocate_metaphysical_rr0()) == -1UL)
+ BUG();
+ v->vcpu_info->arch.metaphysical_mode = 1;
+ v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
+ v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
+#define DOMAIN_RID_BITS_DEFAULT 18
+ if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
+ BUG();
+ // the following will eventually need to be negotiated dynamically
+ d->xen_vastart = 0xf000000000000000;
+ d->xen_vaend = 0xf300000000000000;
+ d->shared_info_va = 0xf100000000000000;
+ d->arch.breakimm = 0x1000;
+ v->arch.breakimm = d->arch.breakimm;
+ // stay on kernel stack because may get interrupts!
+ // ia64_ret_from_clone (which b0 gets in new_thread) switches
+ // to user stack
+ v->arch._thread.on_ustack = 0;
+}
+#endif // CONFIG_VTI
+
+void arch_do_boot_vcpu(struct vcpu *v)
+{
+ return;
+}
+
+int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ dummy();
+ return 1;
+}
+
+int arch_final_setup_guest(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ dummy();
+ return 1;
+}
+
+void domain_relinquish_resources(struct domain *d)
+{
+ dummy();
+}
+
+#ifdef CONFIG_VTI
+void new_thread(struct vcpu *v,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
+{
+ struct domain *d = v->domain;
+ struct switch_stack *sw;
+ struct xen_regs *regs;
+ struct ia64_boot_param *bp;
+ extern char ia64_ret_from_clone;
+ extern char saved_command_line[];
+ //char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca root=/dev/sdb1 ro";
+
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) start_pc += dom0_start;
+#endif
+ regs = (struct xen_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+ sw = (struct switch_stack *) regs - 1;
+ /* Sanity Clear */
+ memset(sw, 0, sizeof(struct xen_regs) + sizeof(struct switch_stack));
+
+ if (VMX_DOMAIN(v)) {
+ /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
+ regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */
+ } else {
+ regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
+ | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
+ regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
+ }
+ regs->cr_iip = start_pc;
+ regs->ar_rsc = 0x0;
+ regs->cr_ifs = 0x0;
+ regs->ar_fpsr = sw->ar_fpsr = FPSR_DEFAULT;
+ sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
+ printf("new_thread: v=%p, regs=%p, sw=%p, new_rbs=%p, IA64_STK_OFFSET=%p, &r8=%p\n",
+ v,regs,sw,sw->ar_bspstore,IA64_STK_OFFSET,&regs->r8);
+ printf("iip:0x%lx,ipsr:0x%lx\n", regs->cr_iip, regs->cr_ipsr);
+
+ sw->b0 = (unsigned long) &ia64_ret_from_clone;
+ v->arch._thread.ksp = (unsigned long) sw - 16;
+ printk("new_thread, about to call init_all_rr\n");
+ if (VMX_DOMAIN(v)) {
+ vmx_init_all_rr(v);
+ } else
+ init_all_rr(v);
+ // set up boot parameters (and fake firmware)
+ printk("new_thread, about to call dom_fw_setup\n");
+ VMX_VPD(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L); //FIXME
+ printk("new_thread, done with dom_fw_setup\n");
+
+ if (VMX_DOMAIN(v)) {
+ /* Virtual processor context setup */
+ VMX_VPD(v, vpsr) = IA64_PSR_BN;
+ VPD_CR(v, dcr) = 0;
+ } else {
+ // don't forget to set this!
+ v->vcpu_info->arch.banknum = 1;
+ }
+}
+#else // CONFIG_VTI
+
+// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
+// and linux/arch/ia64/kernel/process.c:kernel_thread()
+void new_thread(struct vcpu *v,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
+{
+ struct domain *d = v->domain;
+ struct switch_stack *sw;
+ struct pt_regs *regs;
+ unsigned long new_rbs;
+ struct ia64_boot_param *bp;
+ extern char ia64_ret_from_clone;
+ extern char saved_command_line[];
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) start_pc += dom0_start;
+#endif
+ regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+ sw = (struct switch_stack *) regs - 1;
+ memset(sw,0,sizeof(struct switch_stack)+sizeof(struct pt_regs));
+ new_rbs = (unsigned long) v + IA64_RBS_OFFSET;
+ regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
+ | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
+ regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
+ regs->cr_iip = start_pc;
+ regs->ar_rsc = 0; /* lazy mode */
+ regs->ar_rnat = 0;
+ regs->ar_fpsr = sw->ar_fpsr = FPSR_DEFAULT;
+ regs->loadrs = 0;
+ //regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */
+ //regs->r8 = 0x01234567890abcdef; // FIXME: temp marker
+ //regs->r12 = ((unsigned long) regs - 16); /* 16 byte scratch */
+ regs->cr_ifs = 1UL << 63;
+ regs->pr = 0;
+ sw->pr = 0;
+ regs->ar_pfs = 0;
+ sw->caller_unat = 0;
+ sw->ar_pfs = 0;
+ sw->ar_bspstore = new_rbs;
+ //regs->r13 = (unsigned long) v;
+printf("new_thread: v=%p, start_pc=%p, regs=%p, sw=%p, new_rbs=%p, IA64_STK_OFFSET=%p, &r8=%p\n",
+v,start_pc,regs,sw,new_rbs,IA64_STK_OFFSET,&regs->r8);
+ sw->b0 = (unsigned long) &ia64_ret_from_clone;
+ v->arch._thread.ksp = (unsigned long) sw - 16;
+ //v->thread_info->flags = 0;
+printk("new_thread, about to call init_all_rr\n");
+ init_all_rr(v);
+ // set up boot parameters (and fake firmware)
+printk("new_thread, about to call dom_fw_setup\n");
+ regs->r28 = dom_fw_setup(d,saved_command_line,256L); //FIXME
+printk("new_thread, done with dom_fw_setup\n");
+ // don't forget to set this!
+ v->vcpu_info->arch.banknum = 1;
+ memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
+}
+#endif // CONFIG_VTI
+
+static struct page * map_new_domain0_page(unsigned long mpaddr)
+{
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
+printk("map_new_domain0_page: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+ while(1);
+ }
+ return pfn_to_page((mpaddr >> PAGE_SHIFT));
+}
+
+/* allocate new page for domain and map it to the specified metaphysical addr */
+struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
+{
+ struct mm_struct *mm = d->arch.mm;
+ struct page *p = (struct page *)0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+extern unsigned long vhpt_paddr, vhpt_pend;
+
+ if (!mm->pgd) {
+ printk("map_new_domain_page: domain pgd must exist!\n");
+ return(p);
+ }
+ pgd = pgd_offset(mm,mpaddr);
+ if (pgd_none(*pgd))
+ pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
+
+ pud = pud_offset(pgd, mpaddr);
+ if (pud_none(*pud))
+ pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
+
+ pmd = pmd_offset(pud, mpaddr);
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
+// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
+
+ pte = pte_offset_map(pmd, mpaddr);
+ if (pte_none(*pte)) {
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) p = map_new_domain0_page(mpaddr);
+ else
+#endif
+ {
+ p = alloc_domheap_page(d);
+ // zero out pages for security reasons
+ memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+ }
+ if (unlikely(!p)) {
+printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
+ return(p);
+ }
+if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
+ printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
+}
+ set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ }
+ else printk("map_new_domain_page: page %p already mapped!\n",p);
+ return p;
+}
+
+void mpafoo(unsigned long mpaddr)
+{
+ extern unsigned long privop_trace;
+ if (mpaddr == 0x3800)
+ privop_trace = 1;
+}
+
+unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
+{
+ struct mm_struct *mm = d->arch.mm;
+ pgd_t *pgd = pgd_offset(mm, mpaddr);
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) {
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ //printk("lookup_domain_mpa: bad dom0 mpaddr %p!\n",mpaddr);
+//printk("lookup_domain_mpa: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+ mpafoo(mpaddr);
+ }
+ pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+ pte = &pteval;
+ return *(unsigned long *)pte;
+ }
+#endif
+tryagain:
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd,mpaddr);
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud,mpaddr);
+ if (pmd_present(*pmd)) {
+ pte = pte_offset_map(pmd,mpaddr);
+ if (pte_present(*pte)) {
+//printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
+ return *(unsigned long *)pte;
+ }
+ }
+ }
+ }
+ /* if lookup fails and mpaddr is "legal", "create" the page */
+ if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
+ if (map_new_domain_page(d,mpaddr)) goto tryagain;
+ }
+ printk("lookup_domain_mpa: bad mpa %p (> %p\n",
+ mpaddr,d->max_pages<<PAGE_SHIFT);
+ mpafoo(mpaddr);
+ return 0;
+}
+
+// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+ unsigned long pte = lookup_domain_mpa(d,mpaddr);
+ unsigned long imva;
+
+ pte &= _PAGE_PPN_MASK;
+ imva = __va(pte);
+ imva |= mpaddr & ~PAGE_MASK;
+ return(imva);
+}
+
+// remove following line if not privifying in memory
+//#define HAVE_PRIVIFY_MEMORY
+#ifndef HAVE_PRIVIFY_MEMORY
+#define privify_memory(x,y) do {} while(0)
+#endif
+
+// see arch/x86/xxx/domain_build.c
+int elf_sanity_check(Elf_Ehdr *ehdr)
+{
+ return (IS_ELF(*ehdr));
+}
+
+static void copy_memory(void *dst, void *src, int size)
+{
+ int remain;
+
+ if (IS_XEN_ADDRESS(dom0,src)) {
+ memcpy(dst,src,size);
+ }
+ else {
+ printf("About to call __copy_from_user(%p,%p,%d)\n",
+ dst,src,size);
+ while (remain = __copy_from_user(dst,src,size)) {
+ printf("incomplete user copy, %d remain of %d\n",
+ remain,size);
+ dst += size - remain; src += size - remain;
+ size -= remain;
+ }
+ }
+}
+
+void loaddomainelfimage(struct domain *d, unsigned long image_start)
+{
+ char *elfbase = image_start;
+ //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
+ Elf_Ehdr ehdr;
+ Elf_Phdr phdr;
+ int h, filesz, memsz, paddr;
+ unsigned long elfaddr, dom_mpaddr, dom_imva;
+ struct page *p;
+ unsigned long pteval;
+
+ copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
+ for ( h = 0; h < ehdr.e_phnum; h++ ) {
+ copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
+ sizeof(Elf_Phdr));
+ //if ( !is_loadable_phdr(phdr) )
+ if ((phdr.p_type != PT_LOAD)) {
+ continue;
+ }
+ filesz = phdr.p_filesz; memsz = phdr.p_memsz;
+ elfaddr = elfbase + phdr.p_offset;
+ dom_mpaddr = phdr.p_paddr;
+//printf("p_offset: %x, size=%x\n",elfaddr,filesz);
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) {
+ if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
+ printf("Domain0 doesn't fit in allocated space!\n");
+ while(1);
+ }
+ dom_imva = __va(dom_mpaddr + dom0_start);
+ copy_memory(dom_imva,elfaddr,filesz);
+ if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
+//FIXME: This test for code seems to find a lot more than objdump -x does
+ if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
+ }
+ else
+#endif
+ while (memsz > 0) {
+#ifdef DOMU_AUTO_RESTART
+ pteval = lookup_domain_mpa(d,dom_mpaddr);
+ if (pteval) dom_imva = __va(pteval & _PFN_MASK);
+ else { printf("loaddomainelfimage: BAD!\n"); while(1); }
+#else
+ p = map_new_domain_page(d,dom_mpaddr);
+ if (unlikely(!p)) BUG();
+ dom_imva = __va(page_to_phys(p));
+#endif
+ if (filesz > 0) {
+ if (filesz >= PAGE_SIZE)
+ copy_memory(dom_imva,elfaddr,PAGE_SIZE);
+ else { // copy partial page, zero the rest of page
+ copy_memory(dom_imva,elfaddr,filesz);
+ memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
+ }
+//FIXME: This test for code seems to find a lot more than objdump -x does
+ if (phdr.p_flags & PF_X)
+ privify_memory(dom_imva,PAGE_SIZE);
+ }
+ else if (memsz > 0) // always zero out entire page
+ memset(dom_imva,0,PAGE_SIZE);
+ memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
+ elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
+ }
+ }
+}
+
+int
+parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
+{
+ Elf_Ehdr ehdr;
+
+ copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
+
+ if ( !elf_sanity_check(&ehdr) ) {
+ printk("ELF sanity check failed.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
+ {
+ printk("ELF program headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
+ {
+ printk("ELF section headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+#if 0
+ /* Find the section-header strings table. */
+ if ( ehdr.e_shstrndx == SHN_UNDEF )
+ {
+ printk("ELF image has no section-header strings table (shstrtab).\n");
+ return -EINVAL;
+ }
+#endif
+
+ *entry = ehdr.e_entry;
+printf("parsedomainelfimage: entry point = %p\n",*entry);
+
+ return 0;
+}
+
+
+void alloc_dom0(void)
+{
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (platform_is_hp_ski()) {
+ dom0_size = 128*1024*1024; //FIXME: Should be configurable
+ }
+ printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024));
+
+ /* FIXME: The first trunk (say 256M) should always be assigned to
+ * Dom0, since Dom0's physical == machine address for DMA purpose.
+ * Some old version linux, like 2.4, assumes physical memory existing
+ * in 2nd 64M space.
+ */
+ dom0_start = alloc_boot_pages(
+ dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
+ dom0_start <<= PAGE_SHIFT;
+ if (!dom0_start) {
+ printf("construct_dom0: can't allocate contiguous memory size=%p\n",
+ dom0_size);
+ while(1);
+ }
+ printf("alloc_dom0: dom0_start=%p\n",dom0_start);
+#else
+ dom0_start = 0;
+#endif
+
+}
+
+#ifdef DOMU_BUILD_STAGING
+void alloc_domU_staging(void)
+{
+ domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+ printf("alloc_domU_staging: starting (initializing %d MB...)\n",domU_staging_size/(1024*1024));
+ domU_staging_start = alloc_boot_pages(
+ domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
+ domU_staging_start <<= PAGE_SHIFT;
+ if (!domU_staging_size) {
+ printf("alloc_domU_staging: can't allocate, spinning...\n");
+ while(1);
+ }
+ else domU_staging_area = (unsigned long *)__va(domU_staging_start);
+ printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
+
+}
+
+unsigned long
+domU_staging_read_8(unsigned long at)
+{
+ // no way to return errors so just do it
+ return domU_staging_area[at>>3];
+
+}
+
+unsigned long
+domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
+ unsigned long c, unsigned long d)
+{
+ if (at + 32 > domU_staging_size) return -1;
+ if (at & 0x1f) return -1;
+ at >>= 3;
+ domU_staging_area[at++] = a;
+ domU_staging_area[at++] = b;
+ domU_staging_area[at++] = c;
+ domU_staging_area[at] = d;
+ return 0;
+
+}
+#endif
+
+#ifdef CONFIG_VTI
+/* Up to whether domain is vmx one, different context may be setup
+ * here.
+ */
+void
+post_arch_do_create_domain(struct vcpu *v, int vmx_domain)
+{
+ struct domain *d = v->domain;
+
+ if (!vmx_domain) {
+ d->shared_info = (void*)alloc_xenheap_page();
+ if (!d->shared_info)
+ panic("Allocate share info for non-vmx domain failed.\n");
+ d->shared_info_va = 0xfffd000000000000;
+
+ printk("Build shared info for non-vmx domain\n");
+ build_shared_info(d);
+ /* Setup start info area */
+ }
+}
+
+/* For VMX domain, this is invoked when kernel model in domain
+ * request actively
+ */
+void build_shared_info(struct domain *d)
+{
+ int i;
+
+ /* Set up shared-info area. */
+ update_dom_time(d);
+ d->shared_info->domain_time = 0;
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ /* ... */
+}
+
+extern unsigned long running_on_sim;
+unsigned int vmx_dom0 = 0;
+int construct_dom0(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ unsigned long alloc_start, alloc_end;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct vcpu *v = d->vcpu[0];
+ struct domain_setup_info dsi;
+ unsigned long p_start;
+ unsigned long pkern_start;
+ unsigned long pkern_entry;
+ unsigned long pkern_end;
+ unsigned long ret;
+ unsigned long progress = 0;
+
+//printf("construct_dom0: starting\n");
+ /* Sanity! */
+#ifndef CLONE_DOMAIN0
+ if ( d != dom0 )
+ BUG();
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) )
+ BUG();
+#endif
+
+ printk("##Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ alloc_start = dom0_start;
+ alloc_end = dom0_start + dom0_size;
+ d->tot_pages = d->max_pages = (alloc_end - alloc_start)/PAGE_SIZE;
+ image_start = __va(ia64_boot_param->initrd_start);
+ image_len = ia64_boot_param->initrd_size;
+
+ dsi.image_addr = (unsigned long)image_start;
+ dsi.image_len = image_len;
+ rc = parseelfimage(&dsi);
+ if ( rc != 0 )
+ return rc;
+
+ /* Temp workaround */
+ if (running_on_sim)
+ dsi.xen_section_string = (char *)1;
+
+ if ((!vmx_enabled) && !dsi.xen_section_string) {
+ printk("Lack of hardware support for unmodified vmx dom0\n");
+ panic("");
+ }
+
+ if (vmx_enabled && !dsi.xen_section_string) {
+ printk("Dom0 is vmx domain!\n");
+ vmx_dom0 = 1;
+ }
+
+ p_start = dsi.v_start;
+ pkern_start = dsi.v_kernstart;
+ pkern_end = dsi.v_kernend;
+ pkern_entry = dsi.v_kernentry;
+
+ printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",
+ p_start,pkern_start,pkern_end,pkern_entry);
+
+ if ( (p_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
+
+ printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %lx->%lx\n"
+ " Entry address: %lx\n"
+ " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
+ pkern_start, pkern_end, pkern_entry);
+
+ if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ // Other sanity check about Dom0 image
+
+ /* Construct a frame-allocation list for the initial domain, since these
+ * pages are allocated by boot allocator and pfns are not set properly
+ */
+ for ( mfn = (alloc_start>>PAGE_SHIFT);
+ mfn < (alloc_end>>PAGE_SHIFT);
+ mfn++ )
+ {
+ page = &frame_table[mfn];
+ page_set_owner(page, d);
+ page->u.inuse.type_info = 0;
+ page->count_info = PGC_allocated | 1;
+ list_add_tail(&page->list, &d->page_list);
+
+ /* Construct 1:1 mapping */
+ machine_to_phys_mapping[mfn] = mfn;
+ }
+
+ post_arch_do_create_domain(v, vmx_dom0);
+
+ /* Load Dom0 image to its own memory */
+ loaddomainelfimage(d,image_start);
+
+ /* Copy the initial ramdisk. */
+
+ /* Sync d/i cache conservatively */
+ ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+ if (ret != PAL_STATUS_SUCCESS)
+ panic("PAL CACHE FLUSH failed for dom0.\n");
+ printk("Sync i/d cache for dom0 image SUCC\n");
+
+ /* Physical mode emulation initialization, including
+ * emulation ID allcation and related memory request
+ */
+ physical_mode_init(v);
+ /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
+ * for dom0
+ */
+ d->arch.pmt = NULL;
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
+ if (cmdline != NULL)
+ console_endboot(strstr(cmdline, "tty0") != NULL);
+
+ /* VMX specific construction for Dom0, if hardware supports VMX
+ * and Dom0 is unmodified image
+ */
+ printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
+ if (vmx_dom0)
+ vmx_final_setup_domain(dom0);
+
+ /* vpd is ready now */
+ vlsapic_reset(v);
+ vtm_init(v);
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+ new_thread(v, pkern_entry, 0, 0);
+
+ // FIXME: Hack for keyboard input
+#ifdef CLONE_DOMAIN0
+if (d == dom0)
+#endif
+ serial_input_init();
+ if (d == dom0) {
+ v->vcpu_info->arch.delivery_mask[0] = -1L;
+ v->vcpu_info->arch.delivery_mask[1] = -1L;
+ v->vcpu_info->arch.delivery_mask[2] = -1L;
+ v->vcpu_info->arch.delivery_mask[3] = -1L;
+ }
+ else __set_bit(0x30,v->vcpu_info->arch.delivery_mask);
+
+ return 0;
+}
+#else //CONFIG_VTI
+
+int construct_dom0(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ //l2_pgentry_t *l2tab, *l2start;
+ //l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct vcpu *v = d->vcpu[0];
+
+ struct domain_setup_info dsi;
+ unsigned long p_start;
+ unsigned long pkern_start;
+ unsigned long pkern_entry;
+ unsigned long pkern_end;
+
+//printf("construct_dom0: starting\n");
+ /* Sanity! */
+#ifndef CLONE_DOMAIN0
+ if ( d != dom0 )
+ BUG();
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) )
+ BUG();
+#endif
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ d->max_pages = dom0_size/PAGE_SIZE;
+ image_start = __va(ia64_boot_param->initrd_start);
+ image_len = ia64_boot_param->initrd_size;
+//printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
+//printk("First word of image: %lx\n",*(unsigned long *)image_start);
+
+//printf("construct_dom0: about to call parseelfimage\n");
+ dsi.image_addr = (unsigned long)image_start;
+ dsi.image_len = image_len;
+ rc = parseelfimage(&dsi);
+ if ( rc != 0 )
+ return rc;
+
+ p_start = dsi.v_start;
+ pkern_start = dsi.v_kernstart;
+ pkern_end = dsi.v_kernend;
+ pkern_entry = dsi.v_kernentry;
+
+//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
+
+ if ( (p_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
+
+ printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %lx->%lx\n"
+ " Entry address: %lx\n"
+ " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
+ pkern_start, pkern_end, pkern_entry);
+
+ if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ // if high 3 bits of pkern start are non-zero, error
+
+ // if pkern end is after end of metaphysical memory, error
+ // (we should be able to deal with this... later)
+
+
+ //
+
+#if 0
+ strcpy(d->name,"Domain0");
+#endif
+
+ // prepare domain0 pagetable (maps METAphysical to physical)
+ // following is roughly mm_init() in linux/kernel/fork.c
+ d->arch.mm = xmalloc(struct mm_struct);
+ if (unlikely(!d->arch.mm)) {
+ printk("Can't allocate mm_struct for domain0\n");
+ return -ENOMEM;
+ }
+ memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+ d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+ if (unlikely(!d->arch.mm->pgd)) {
+ printk("Can't allocate pgd for domain0\n");
+ return -ENOMEM;
+ }
+
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ /* Copy the OS image. */
+ //(void)loadelfimage(image_start);
+ loaddomainelfimage(d,image_start);
+
+ /* Copy the initial ramdisk. */
+ //if ( initrd_len != 0 )
+ // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+#if 0
+ /* Set up start info area. */
+ //si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = d->tot_pages;
+ si->shared_info = virt_to_phys(d->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ //si->pt_base = vpt_start;
+ //si->nr_pt_frames = nr_pt_pages;
+ //si->mfn_list = vphysmap_start;
+
+ if ( initrd_len != 0 )
+ {
+ //si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ zap_low_mappings(); /* Do the same for the idle page tables. */
+#endif
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
+#ifdef IA64
+ if (cmdline != NULL)
+#endif
+ console_endboot(strstr(cmdline, "tty0") != NULL);
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ new_thread(v, pkern_entry, 0, 0);
+ // FIXME: Hack for keyboard input
+#ifdef CLONE_DOMAIN0
+if (d == dom0)
+#endif
+ serial_input_init();
+ if (d == dom0) {
+ v->vcpu_info->arch.delivery_mask[0] = -1L;
+ v->vcpu_info->arch.delivery_mask[1] = -1L;
+ v->vcpu_info->arch.delivery_mask[2] = -1L;
+ v->vcpu_info->arch.delivery_mask[3] = -1L;
+ }
+ else __set_bit(0x30,v->vcpu_info->arch.delivery_mask);
+
+ return 0;
+}
+#endif // CONFIG_VTI
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int construct_domU(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ int i, rc;
+ struct vcpu *v = d->vcpu[0];
+ unsigned long pkern_entry;
+
+#ifndef DOMU_AUTO_RESTART
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
+#endif
+
+ printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
+
+ d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
+ // FIXME: use domain0 command line
+ rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
+ printk("parsedomainelfimage returns %d\n",rc);
+ if ( rc != 0 ) return rc;
+
+ d->arch.mm = xmalloc(struct mm_struct);
+ if (unlikely(!d->arch.mm)) {
+ printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
+ memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+ d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+ if (unlikely(!d->arch.mm->pgd)) {
+ printk("Can't allocate pgd for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
+
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ /* Copy the OS image. */
+ printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
+ loaddomainelfimage(d,image_start);
+ printk("loaddomainelfimage returns\n");
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ printk("calling new_thread, entry=%p\n",pkern_entry);
+#ifdef DOMU_AUTO_RESTART
+ v->domain->arch.image_start = image_start;
+ v->domain->arch.image_len = image_len;
+ v->domain->arch.entry = pkern_entry;
+#endif
+ new_thread(v, pkern_entry, 0, 0);
+ printk("new_thread returns\n");
+ __set_bit(0x30,v->vcpu_info->arch.delivery_mask);
+
+ return 0;
+}
+
+#ifdef DOMU_AUTO_RESTART
+void reconstruct_domU(struct vcpu *v)
+{
+ /* re-copy the OS image to reset data values to original */
+ printk("reconstruct_domU: restarting domain %d...\n",
+ v->domain->domain_id);
+ loaddomainelfimage(v->domain,v->domain->arch.image_start);
+ new_thread(v, v->domain->arch.entry, 0, 0);
+}
+#endif
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int launch_domainU(unsigned long size)
+{
+#ifdef CLONE_DOMAIN0
+ static int next = CLONE_DOMAIN0+1;
+#else
+ static int next = 1;
+#endif
+
+ struct domain *d = do_createdomain(next,0);
+ if (!d) {
+ printf("launch_domainU: couldn't create\n");
+ return 1;
+ }
+ else next++;
+ if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
+ printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
+ d->domain_id,domU_staging_area,size);
+ return 2;
+ }
+ domain_unpause_by_systemcontroller(d);
+}
+
+void machine_restart(char * __unused)
+{
+ if (platform_is_hp_ski()) dummy();
+ printf("machine_restart called: spinning....\n");
+ while(1);
+}
+
+void machine_halt(void)
+{
+ if (platform_is_hp_ski()) dummy();
+ printf("machine_halt called: spinning....\n");
+ while(1);
+}
+
+void dummy(void)
+{
+ if (platform_is_hp_ski()) asm("break 0;;");
+ printf("dummy called: spinning....\n");
+ while(1);
+}
+
+
+#if 0
+void switch_to(struct vcpu *prev, struct vcpu *next)
+{
+ struct vcpu *last;
+
+ __switch_to(prev,next,last);
+ //set_current(next);
+}
+#endif
+
+void domain_pend_keyboard_interrupt(int irq)
+{
+ vcpu_pend_interrupt(dom0->vcpu[0],irq);
+}
diff --git a/xen/arch/ia64/hpsimserial.c b/xen/arch/ia64/hpsimserial.c
new file mode 100644
index 0000000000..3e87aa3332
--- /dev/null
+++ b/xen/arch/ia64/hpsimserial.c
@@ -0,0 +1,23 @@
+/*
+ * HP Ski simulator serial I/O
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <xen/serial.h>
+#include <asm/hpsim_ssc.h>
+
+static void hp_ski_putc(struct serial_port *port, char c)
+{
+ ia64_ssc(c,0,0,0,SSC_PUTCHAR);
+}
+
+static struct uart_driver hp_ski = { .putc = hp_ski_putc };
+
+void hpsim_serial_init(void)
+{
+ serial_register_uart(0, &hp_ski, 0);
+}
diff --git a/xen/arch/ia64/hypercall.c b/xen/arch/ia64/hypercall.c
new file mode 100644
index 0000000000..0fcc6f7cf8
--- /dev/null
+++ b/xen/arch/ia64/hypercall.c
@@ -0,0 +1,127 @@
+/*
+ * Hypercall implementations
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h> /* FOR struct ia64_sal_retval */
+
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+void fooefi(void) {}
+
+int
+ia64_hypercall (struct pt_regs *regs)
+{
+ struct vcpu *v = (struct domain *) current;
+ struct ia64_sal_retval x;
+ unsigned long *tv, *tc;
+
+ switch (regs->r2) {
+ case FW_HYPERCALL_PAL_CALL:
+ //printf("*** PAL hypercall: index=%d\n",regs->r28);
+ //FIXME: This should call a C routine
+#if 1
+ // This is very conservative, but avoids a possible
+ // (and deadly) freeze in paravirtualized domains due
+ // to a yet-to-be-found bug where pending_interruption
+ // is zero when it shouldn't be. Since PAL is called
+ // in the idle loop, this should resolve it
+ v->vcpu_info->arch.pending_interruption = 1;
+#endif
+ x = pal_emulator_static(regs->r28);
+ if (regs->r28 == PAL_HALT_LIGHT) {
+ do_sched_op(SCHEDOP_yield);
+ //break;
+ }
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+ break;
+ case FW_HYPERCALL_SAL_CALL:
+ x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
+ vcpu_get_gr(v,34),vcpu_get_gr(v,35),
+ vcpu_get_gr(v,36),vcpu_get_gr(v,37),
+ vcpu_get_gr(v,38),vcpu_get_gr(v,39));
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+ break;
+ case FW_HYPERCALL_EFI_RESET_SYSTEM:
+ printf("efi.reset_system called ");
+ if (current->domain == dom0) {
+ printf("(by dom0)\n ");
+ (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+ }
+#ifdef DOMU_AUTO_RESTART
+ else {
+ reconstruct_domU(current);
+ return 0; // don't increment ip!
+ }
+#else
+ printf("(not supported for non-0 domain)\n");
+ regs->r8 = EFI_UNSUPPORTED;
+#endif
+ break;
+ case FW_HYPERCALL_EFI_GET_TIME:
+ tv = vcpu_get_gr(v,32);
+ tc = vcpu_get_gr(v,33);
+ //printf("efi_get_time(%p,%p) called...",tv,tc);
+ tv = __va(translate_domain_mpaddr(tv));
+ if (tc) tc = __va(translate_domain_mpaddr(tc));
+ regs->r8 = (*efi.get_time)(tv,tc);
+ //printf("and returns %lx\n",regs->r8);
+ break;
+ case FW_HYPERCALL_EFI_SET_TIME:
+ case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+ case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+ // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+ // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
+ // POINTER ARGUMENTS WILL BE VIRTUAL!!
+ case FW_HYPERCALL_EFI_GET_VARIABLE:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+ case FW_HYPERCALL_EFI_SET_VARIABLE:
+ case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+ // FIXME: need fixes in efi.h from 2.6.9
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ case 0xffff: // test dummy hypercall
+ regs->r8 = dump_privop_counts_to_user(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33));
+ break;
+ case 0xfffe: // test dummy hypercall
+ regs->r8 = zero_privop_counts_to_user(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33));
+ break;
+ case 0xfffd: // test dummy hypercall
+ regs->r8 = launch_domainU(
+ vcpu_get_gr(v,32));
+ break;
+ case 0xfffc: // test dummy hypercall
+ regs->r8 = domU_staging_write_32(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33),
+ vcpu_get_gr(v,34),
+ vcpu_get_gr(v,35),
+ vcpu_get_gr(v,36));
+ break;
+ case 0xfffb: // test dummy hypercall
+ regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32));
+ break;
+ }
+ return 1;
+}
diff --git a/xen/arch/ia64/hyperprivop.S b/xen/arch/ia64/hyperprivop.S
new file mode 100644
index 0000000000..6903c66782
--- /dev/null
+++ b/xen/arch/ia64/hyperprivop.S
@@ -0,0 +1,513 @@
+/*
+ * arch/ia64/kernel/hyperprivop.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <public/arch-ia64.h>
+
+#define FAST_HYPERPRIVOP_CNT
+#define FAST_REFLECT_CNT
+
+// Should be included from common header file (also in process.c)
+// NO PSR_CLR IS DIFFERENT! (CPL)
+#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
+#define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
+// note IA64_PSR_PK removed from following, why is this necessary?
+#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
+ IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
+ IA64_PSR_IT | IA64_PSR_BN)
+
+#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
+ IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
+ IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
+ IA64_PSR_MC | IA64_PSR_IS | \
+ IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
+ IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
+
+// Note: not hand-scheduled for now
+// Registers at entry
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC_OFS
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+GLOBAL_ENTRY(fast_hyperprivop)
+#if 1
+ // HYPERPRIVOP_SSM_I?
+ // assumes domain interrupts pending, so just do it
+ cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+(p7) br.sptk.many hyper_ssm_i;;
+#endif
+#if 1
+ // if domain interrupts pending, give up for now and do it the slow way
+ adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20] ;;
+ cmp.ne p7,p0=r0,r20
+(p7) br.sptk.many dispatch_break_fault ;;
+
+ // HYPERPRIVOP_RFI?
+ cmp.eq p7,p6=XEN_HYPER_RFI,r17
+(p7) br.sptk.many hyper_rfi;;
+
+// hard to test, because only called from rbs_switch
+ // HYPERPRIVOP_COVER?
+ cmp.eq p7,p6=XEN_HYPER_COVER,r17
+(p7) br.sptk.many hyper_cover;;
+#endif
+
+#if 1
+ // HYPERPRIVOP_SSM_DT?
+ cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+(p7) br.sptk.many hyper_ssm_dt;;
+#endif
+
+#if 1
+ // HYPERPRIVOP_RSM_DT?
+ cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+(p7) br.sptk.many hyper_rsm_dt;;
+#endif
+
+ // if not one of the above, give up for now and do it the slow way
+ br.sptk.many dispatch_break_fault ;;
+
+
+// give up for now if: ipsr.be==1, ipsr.pp==1
+// from reflect_interruption, don't need to:
+// - printf first extint (debug only)
+// - check for interrupt collection enabled (routine will force on)
+// - set ifa (not valid for extint)
+// - set iha (not valid for extint)
+// - set itir (not valid for extint)
+// DO need to
+// - increment the HYPER_SSM_I fast_hyperprivop counter
+// - set shared_mem iip to instruction after HYPER_SSM_I
+// - set cr.iip to guest iva+0x3000
+// - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
+// be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
+// i = shared_mem interrupt_delivery_enabled
+// ic = shared_mem interrupt_collection_enabled
+// ri = instruction after HYPER_SSM_I
+// all other bits unchanged from real cr.ipsr
+// - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
+// - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
+// and isr.ri to cr.isr.ri (all other bits zero)
+// - cover and set shared_mem precover_ifs to cr.ifs
+// ^^^ MISSED THIS FOR fast_break??
+// - set shared_mem ifs and incomplete_regframe to 0
+// - set shared_mem interrupt_delivery_enabled to 0
+// - set shared_mem interrupt_collection_enabled to 0
+// - set r31 to SHAREDINFO_ADDR
+// - virtual bank switch 0
+// maybe implement later
+// - verify that there really IS a deliverable interrupt pending
+// - set shared_mem iva
+// needs to be done but not implemented (in reflect_interruption)
+// - set shared_mem iipa
+// don't know for sure
+// - set shared_mem unat
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+ENTRY(hyper_ssm_i)
+ // give up for now if: ipsr.be==1, ipsr.pp==1
+ mov r30=cr.ipsr;;
+ mov r29=cr.iip;;
+ extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+ extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ // set shared_mem iip to instruction after HYPER_SSM_I
+ extr.u r20=r30,41,2 ;;
+ cmp.eq p6,p7=2,r20 ;;
+(p6) mov r20=0
+(p6) adds r29=16,r29
+(p7) adds r20=1,r20 ;;
+ dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
+ adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r29 ;;
+ // set shared_mem isr
+ extr.u r16=r16,38,1;; // grab cr.isr.ir bit
+ dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
+ dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
+ adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r16 ;;
+ // set cr.ipsr
+ mov r29=r30 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~DELIVER_PSR_CLR;;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+ extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p7=3,r29;;
+(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+ ;;
+ // FOR SSM_I ONLY, also turn on psr.i and psr.ic
+ movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
+ movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+ or r30=r30,r28;;
+ and r30=r30,r27;;
+ adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r30 ;;
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+ cover ;;
+ mov r20=cr.ifs;;
+ adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r21]=r0 ;;
+ adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r0 ;;
+ adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r20 ;;
+ // leave cr.ifs alone for later rfi
+ // set iip to go to domain IVA break instruction vector
+ mov r22=IA64_KR(CURRENT);;
+ adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+ ld8 r23=[r22];;
+ movl r24=0x3000;;
+ add r24=r24,r23;;
+ mov cr.iip=r24;;
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ st8 [r2]=r16,16; st8 [r3]=r17,16 ;;
+ st8 [r2]=r18,16; st8 [r3]=r19,16 ;;
+ st8 [r2]=r20,16; st8 [r3]=r21,16 ;;
+ st8 [r2]=r22,16; st8 [r3]=r23,16 ;;
+ st8 [r2]=r24,16; st8 [r3]=r25,16 ;;
+ st8 [r2]=r26,16; st8 [r3]=r27,16 ;;
+ st8 [r2]=r28,16; st8 [r3]=r29,16 ;;
+ st8 [r2]=r30,16; st8 [r3]=r31,16 ;;
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// reflect domain breaks directly to domain
+// FIXME: DOES NOT WORK YET
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+GLOBAL_ENTRY(fast_break_reflect)
+#define FAST_BREAK
+#ifndef FAST_BREAK
+ br.sptk.many dispatch_break_fault ;;
+#endif
+ mov r30=cr.ipsr;;
+ mov r29=cr.iip;;
+ extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0 ;;
+(p7) br.sptk.many dispatch_break_fault ;;
+ extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0 ;;
+(p7) br.sptk.many dispatch_break_fault ;;
+#if 1 /* special handling in case running on simulator */
+ movl r20=first_break;;
+ ld4 r23=[r20];;
+ movl r21=0x80001;
+ movl r22=0x80002;;
+ cmp.ne p7,p0=r23,r0;;
+(p7) br.sptk.many dispatch_break_fault ;;
+ cmp.eq p7,p0=r21,r17;
+(p7) br.sptk.many dispatch_break_fault ;;
+ cmp.eq p7,p0=r22,r17;
+(p7) br.sptk.many dispatch_break_fault ;;
+#endif
+#ifdef FAST_REFLECT_CNT
+ movl r20=fast_reflect_count+((0x2c00>>8)*8);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ // save iim in shared_info
+ adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r17;;
+ // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
+ adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r29;;
+ // set shared_mem isr
+ adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r16 ;;
+ // set cr.ipsr
+ mov r29=r30 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+ extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p7=3,r29;;
+(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+ ;;
+ movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
+ movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+ or r30=r30,r28;;
+ and r30=r30,r27;;
+ // also set shared_mem ipsr.i and ipsr.ic appropriately
+ ld8 r20=[r18];;
+ extr.u r22=r20,32,32
+ cmp4.eq p6,p7=r20,r0;;
+(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
+(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
+ cmp4.eq p6,p7=r22,r0;;
+(p6) dep r30=0,r30,IA64_PSR_I_BIT,1
+(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
+ adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r30 ;;
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+ cover ;;
+ mov r20=cr.ifs;;
+ adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r21]=r0 ;;
+ adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r0 ;;
+ adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r20 ;;
+ // vpsr.i = vpsr.ic = 0 on delivery of interruption
+ st8 [r18]=r0;;
+ // FIXME: need to save iipa and isr to be arch-compliant
+ // set iip to go to domain IVA break instruction vector
+ mov r22=IA64_KR(CURRENT);;
+ adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+ ld8 r23=[r22];;
+ movl r24=0x2c00;;
+ add r24=r24,r23;;
+ mov cr.iip=r24;;
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ st8 [r2]=r16,16; st8 [r3]=r17,16 ;;
+ st8 [r2]=r18,16; st8 [r3]=r19,16 ;;
+ st8 [r2]=r20,16; st8 [r3]=r21,16 ;;
+ st8 [r2]=r22,16; st8 [r3]=r23,16 ;;
+ st8 [r2]=r24,16; st8 [r3]=r25,16 ;;
+ st8 [r2]=r26,16; st8 [r3]=r27,16 ;;
+ st8 [r2]=r28,16; st8 [r3]=r29,16 ;;
+ st8 [r2]=r30,16; st8 [r3]=r31,16 ;;
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+
+// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
+ENTRY(hyper_rfi)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r21=[r20];; // r21 = vcr.ipsr
+ extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
+ // if turning on psr.be, give up for now and do it the slow way
+ cmp.ne p7,p0=r22,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+ // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
+ movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
+ and r22=r20,r21
+ ;;
+ cmp.ne p7,p0=r22,r20
+(p7) br.sptk.many dispatch_break_fault ;;
+ // if was in metaphys mode, do it the slow way (FIXME later?)
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r20=[r20];;
+ cmp.ne p7,p0=r20,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+ // if domain hasn't already done virtual bank switch
+ // do it the slow way (FIXME later?)
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r20=[r20];;
+ cmp.eq p7,p0=r20,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+ // validate vcr.iip, if in Xen range, do it the slow way
+ adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r22=[r20];;
+ movl r23=XEN_VIRT_SPACE_LOW
+ movl r24=XEN_VIRT_SPACE_HIGH ;;
+ cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
+(p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
+(p7) br.sptk.many dispatch_break_fault ;;
+
+ // OK now, let's do an rfi.
+ // r18=&vpsr.i|vpsr.ic, r21==vpsr, r20==&vcr.iip, r22=vcr.iip
+ mov cr.iip=r22;;
+ adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20];;
+ dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
+ mov cr.ifs=r20 ;;
+ // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
+ dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
+ // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
+ mov r19=r0 ;;
+ extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
+ cmp.ne p7,p6=r22,r0 ;;
+(p7) dep r19=-1,r19,32,1
+ extr.u r22=r21,IA64_PSR_IC_BIT,1 ;;
+ cmp.ne p7,p6=r22,r0 ;;
+(p7) dep r19=-1,r19,0,1 ;;
+ st8 [r18]=r19 ;;
+ // force on psr.ic, i, dt, rt, it, bn
+ movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
+ ;;
+ or r21=r21,r20
+ ;;
+ mov cr.ipsr=r21
+ mov pr=r31,-1
+ ;;
+ rfi
+ ;;
+
+ENTRY(hyper_cover)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
+ cover ;;
+ adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ mov r30=cr.ifs;;
+ adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
+ ld4 r21=[r20] ;;
+ cmp.eq p6,p7=r21,r0 ;;
+(p6) st8 [r22]=r30;;
+(p7) st4 [r20]=r0;;
+ mov cr.ifs=r0;;
+ // adjust return address to skip over break instruction
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+#if 1
+// return from metaphysical mode (meta=1) to virtual mode (meta=0)
+ENTRY(hyper_ssm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r20];;
+ cmp.eq p7,p0=r21,r0 // meta==0?
+(p7) br.spnt.many 1f ;; // already in virtual mode
+ mov r22=IA64_KR(CURRENT);;
+ adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
+ ld4 r23=[r22];;
+ mov rr[r0]=r23;;
+ srlz.i;;
+ st4 [r20]=r0 ;;
+ // adjust return address to skip over break instruction
+1: extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// go to metaphysical mode (meta=1) from virtual mode (meta=0)
+ENTRY(hyper_rsm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r20];;
+ cmp.ne p7,p0=r21,r0 // meta==0?
+(p7) br.spnt.many 1f ;; // already in metaphysical mode
+ mov r22=IA64_KR(CURRENT);;
+ adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
+ ld4 r23=[r22];;
+ mov rr[r0]=r23;;
+ srlz.i;;
+ adds r21=1,r0 ;;
+ st4 [r20]=r21 ;;
+ // adjust return address to skip over break instruction
+1: extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+#endif
diff --git a/xen/arch/ia64/idle0_task.c b/xen/arch/ia64/idle0_task.c
new file mode 100644
index 0000000000..bfb49f7591
--- /dev/null
+++ b/xen/arch/ia64/idle0_task.c
@@ -0,0 +1,58 @@
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/desc.h>
+
+#define INIT_MM(name) \
+{ \
+ .pgd = swapper_pg_dir, \
+ .mm_users = ATOMIC_INIT(2), \
+ .mm_count = ATOMIC_INIT(1), \
+ .page_table_lock = SPIN_LOCK_UNLOCKED, \
+ .mmlist = LIST_HEAD_INIT(name.mmlist), \
+}
+
+#define IDLE0_EXEC_DOMAIN(_ed,_d) \
+{ \
+ processor: 0, \
+ mm: 0, \
+ thread: INIT_THREAD, \
+ domain: (_d) \
+}
+
+#define IDLE0_DOMAIN(_t) \
+{ \
+ domain_id: IDLE_DOMAIN_ID, \
+ domain_flags:DOMF_idle_domain, \
+ refcnt: ATOMIC_INIT(1) \
+}
+
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+
+struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
+#if 0
+struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
+ &idle0_domain);
+#endif
+
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process stacks are
+ * handled. This is done by having a special ".data.init_task" section...
+ */
+union {
+ struct {
+ struct domain task;
+ } s;
+ unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
+// = {{
+ ;
+//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
+//};
+//};
+
+EXPORT_SYMBOL(init_task);
+
diff --git a/xen/arch/ia64/irq.c b/xen/arch/ia64/irq.c
new file mode 100644
index 0000000000..473e0afdae
--- /dev/null
+++ b/xen/arch/ia64/irq.c
@@ -0,0 +1,1496 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ * migration without lossing interrupts for iosapic
+ * architecture.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/signal.h>
+#endif
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#ifndef XEN
+#include <linux/random.h>
+#include <linux/cpu.h>
+#endif
+#include <linux/ctype.h>
+#ifndef XEN
+#include <linux/smp_lock.h>
+#endif
+#include <linux/init.h>
+#ifndef XEN
+#include <linux/kernel_stat.h>
+#endif
+#include <linux/irq.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+#include <linux/seq_file.h>
+#ifndef XEN
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#endif
+
+#include <asm/atomic.h>
+#ifndef XEN
+#include <asm/cpu.h>
+#endif
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#ifndef XEN
+#include <asm/tlbflush.h>
+#endif
+#include <asm/delay.h>
+#include <asm/irq.h>
+
+#ifdef XEN
+#include <xen/event.h>
+#define _irq_desc irq_desc
+#define irq_descp(irq) &irq_desc[irq]
+#define apicid_to_phys_cpu_present(x) 1
+#endif
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the appropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = {
+ [0 ... NR_IRQS-1] = {
+ .status = IRQ_DISABLED,
+ .handler = &no_irq_type,
+ .lock = SPIN_LOCK_UNLOCKED
+ }
+};
+
+/*
+ * This is updated when the user sets irq affinity via /proc
+ */
+cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+
+#ifdef CONFIG_IA64_GENERIC
+irq_desc_t * __ia64_irq_desc (unsigned int irq)
+{
+ return _irq_desc + irq;
+}
+
+ia64_vector __ia64_irq_to_vector (unsigned int irq)
+{
+ return (ia64_vector) irq;
+}
+
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+ return (unsigned int) vec;
+}
+#endif
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+#ifdef XEN
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+#else
+irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{ return IRQ_NONE; }
+#endif
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesn't deserve
+ * a generic callback i think.
+ */
+#ifdef CONFIG_X86
+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+#endif
+#endif
+#ifdef CONFIG_IA64
+ printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none disable_none
+#define end_none enable_none
+
+struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+#ifndef XEN
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int j, i = *(loff_t *) v;
+ struct irqaction * action;
+ irq_desc_t *idesc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ idesc = irq_descp(i);
+ spin_lock_irqsave(&idesc->lock, flags);
+ action = idesc->action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", idesc->handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&idesc->lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_puts(p, "NMI: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_puts(p, "LOC: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+ }
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+inline void synchronize_irq(unsigned int irq)
+{
+ while (irq_descp(irq)->status & IRQ_INPROGRESS)
+ cpu_relax();
+}
+EXPORT_SYMBOL(synchronize_irq);
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq,
+ struct pt_regs *regs, struct irqaction *action)
+{
+ int status = 1; /* Force the "do bottom halves" bit */
+ int retval = 0;
+
+#ifndef XEN
+ if (!(action->flags & SA_INTERRUPT))
+#endif
+ local_irq_enable();
+
+#ifdef XEN
+ action->handler(irq, action->dev_id, regs);
+#else
+ do {
+ status |= action->flags;
+ retval |= action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+#endif
+ local_irq_disable();
+ return retval;
+}
+
+#ifndef XEN
+static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ struct irqaction *action;
+
+ if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+ printk(KERN_ERR "irq event %d: bogus return value %x\n",
+ irq, action_ret);
+ } else {
+ printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+ }
+ dump_stack();
+ printk(KERN_ERR "handlers:\n");
+ action = desc->action;
+ do {
+ printk(KERN_ERR "[<%p>]", action->handler);
+ print_symbol(" (%s)",
+ (unsigned long)action->handler);
+ printk("\n");
+ action = action->next;
+ } while (action);
+}
+
+static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ static int count = 100;
+
+ if (count) {
+ count--;
+ __report_bad_irq(irq, desc, action_ret);
+ }
+}
+#endif
+
+static int noirqdebug;
+
+static int __init noirqdebug_setup(char *str)
+{
+ noirqdebug = 1;
+ printk("IRQ lockup detection disabled\n");
+ return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled then
+ * assume that the IRQ is stuck in some manner. Drop a diagnostic and try to
+ * turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly-functioning
+ * device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+#ifndef XEN
+static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ if (action_ret != IRQ_HANDLED) {
+ desc->irqs_unhandled++;
+ if (action_ret != IRQ_NONE)
+ report_bad_irq(irq, desc, action_ret);
+ }
+
+ desc->irq_count++;
+ if (desc->irq_count < 100000)
+ return;
+
+ desc->irq_count = 0;
+ if (desc->irqs_unhandled > 99900) {
+ /*
+ * The interrupt is stuck
+ */
+ __report_bad_irq(irq, desc, action_ret);
+ /*
+ * Now kill the IRQ
+ */
+ printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ desc->irqs_unhandled = 0;
+}
+#endif
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+/**
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ *
+ * This function may be called from IRQ context.
+ */
+
+inline void disable_irq_nosync(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(disable_irq_nosync);
+
+/**
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are
+ * nested.
+ * This function waits for any pending IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+ * This function may be called - with care - from IRQ context.
+ */
+
+void disable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+
+ disable_irq_nosync(irq);
+ if (desc->action)
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL(disable_irq);
+
+/**
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
+ *
+ * Undoes the effect of one call to disable_irq(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ *
+ * This function may be called from IRQ context.
+ */
+
+void enable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
+#ifndef XEN
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
+ }
+#endif
+ desc->handler->enable(irq);
+ /* fall-through */
+ }
+ default:
+ desc->depth--;
+ break;
+ case 0:
+ printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n",
+ irq, (void *) __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(enable_irq);
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+#ifndef XEN
+ kstat_this_cpu.irqs[irq]++;
+#endif
+ if (desc->status & IRQ_PER_CPU) {
+ irqreturn_t action_ret;
+
+ /*
+ * No locking required for CPU-local interrupts:
+ */
+ desc->handler->ack(irq);
+ action_ret = handle_IRQ_event(irq, regs, desc->action);
+#ifndef XEN
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret);
+#endif
+ desc->handler->end(irq);
+ return 1;
+ }
+
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+ * REPLAY is when Linux resends an IRQ that was dropped earlier
+ * WAITING is used by probe to mark irqs that are being tested
+ */
+#ifdef XEN
+ status = desc->status & ~IRQ_REPLAY;
+#else
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+#endif
+ status |= IRQ_PENDING; /* we _want_ to handle it */
+
+ /*
+ * If the IRQ is disabled for whatever reason, we cannot
+ * use the action we have.
+ */
+ action = NULL;
+ if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
+ }
+ desc->status = status;
+
+ /*
+ * If there is no IRQ handler or it was disabled, exit early.
+ * Since we set PENDING, if another processor is handling
+ * a different instance of this same irq, the other processor
+ * will take care of it.
+ */
+ if (unlikely(!action))
+ goto out;
+
+ /*
+ * Edge triggered interrupts need to remember
+ * pending events.
+ * This applies to any hw interrupts that allow a second
+ * instance of the same irq to arrive while we are in do_IRQ
+ * or in the handler. But the code here only handles the _second_
+ * instance of the irq, not the third or fourth. So it is mostly
+ * useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
+ */
+ for (;;) {
+ irqreturn_t action_ret;
+
+ spin_unlock(&desc->lock);
+
+ action_ret = handle_IRQ_event(irq, regs, action);
+
+ spin_lock(&desc->lock);
+#ifndef XEN
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret);
+#endif
+ if (likely(!(desc->status & IRQ_PENDING)))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+
+out:
+ /*
+ * The ->end() handler has to deal with interrupts which got
+ * disabled while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
+
+ return 1;
+}
+
+/**
+ * request_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. From the point this
+ * call is made your handler function may be invoked. Since
+ * your handler function must clear any interrupt the board
+ * raises, you must take care both to initialise your hardware
+ * and to set up the interrupt handler in the right order.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id
+ * as this is required when freeing the interrupt.
+ *
+ * Flags:
+ *
+ * SA_SHIRQ Interrupt is shared
+ *
+ * SA_INTERRUPT Disable local interrupts while processing
+ *
+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy
+ *
+ */
+
+int request_irq(unsigned int irq,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+{
+ int retval;
+ struct irqaction * action;
+
+#if 1
+ /*
+ * Sanity-check: shared interrupts should REALLY pass in
+ * a real dev-ID, otherwise we'll have trouble later trying
+ * to figure out which interrupt is which (messes up the
+ * interrupt freeing logic etc).
+ */
+ if (irqflags & SA_SHIRQ) {
+ if (!dev_id)
+ printk(KERN_ERR "Bad boy: %s called us without a dev_id!\n", devname);
+ }
+#endif
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+ if (!handler)
+ return -EINVAL;
+
+ action = xmalloc(struct irqaction);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+#ifndef XEN
+ action->flags = irqflags;
+ action->mask = 0;
+#endif
+ action->name = devname;
+#ifndef XEN
+ action->next = NULL;
+#endif
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval)
+ xfree(action);
+ return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
+/**
+ * free_irq - free an interrupt
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the
+ * interrupt line is no longer in use by any driver it is disabled.
+ * On a shared IRQ the caller must ensure the interrupt is disabled
+ * on the card it drives before calling this function. The function
+ * does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function must not be called from interrupt context.
+ */
+
+#ifdef XEN
+void free_irq(unsigned int irq)
+#else
+void free_irq(unsigned int irq, void *dev_id)
+#endif
+{
+ irq_desc_t *desc;
+ struct irqaction **p;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS)
+ return;
+
+ desc = irq_descp(irq);
+ spin_lock_irqsave(&desc->lock,flags);
+#ifdef XEN
+ if (desc->action) {
+ struct irqaction * action = desc->action;
+ desc->action = NULL;
+#else
+ p = &desc->action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!desc->action) {
+#endif
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
+#ifndef XEN
+ }
+#endif
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ /* Wait to make sure it's not being used on another CPU */
+ synchronize_irq(irq);
+ xfree(action);
+ return;
+ }
+ printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&desc->lock,flags);
+#ifndef XEN
+ return;
+ }
+#endif
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ * probe_irq_on - begin an interrupt autodetect
+ *
+ * Commence probing for an interrupt. The interrupts are scanned
+ * and a mask of potential interrupt lines is returned.
+ *
+ */
+
+#ifndef XEN
+unsigned long probe_irq_on(void)
+{
+ unsigned int i;
+ irq_desc_t *desc;
+ unsigned long val;
+ unsigned long delay;
+
+ down(&probe_sem);
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_descp(i);
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action)
+ desc->handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger. */
+ for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+ /* about 20ms delay */ barrier();
+
+ /*
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_descp(i);
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /*
+ * Wait for spurious interrupts to trigger
+ */
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+ /* about 100ms delay */ barrier();
+
+ /*
+ * Now filter out any obviously spurious interrupts
+ */
+ val = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ return val;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ * probe_irq_mask - scan a bitmap of interrupt lines
+ * @val: mask of interrupts to consider
+ *
+ * Scan the ISA bus interrupt lines and return a bitmap of
+ * active interrupts. The interrupt probe logic state is then
+ * returned to its previous value.
+ *
+ * Note: we need to scan all the irq's even though we will
+ * only return ISA irq numbers - just so that we reset them
+ * all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+ int i;
+ unsigned int mask;
+
+ mask = 0;
+ for (i = 0; i < 16; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING))
+ mask |= 1 << i;
+
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ return mask & val;
+}
+EXPORT_SYMBOL(probe_irq_mask);
+
+/**
+ * probe_irq_off - end an interrupt autodetect
+ * @val: mask of potential interrupts (unused)
+ *
+ * Scans the unused interrupt lines and returns the line which
+ * appears to have triggered the interrupt. If no interrupt was
+ * found then zero is returned. If more than one interrupt is
+ * found then minus the first candidate is returned to indicate
+ * their is doubt.
+ *
+ * The interrupt probe logic state is returned to its previous
+ * value.
+ *
+ * BUGS: When used in a module (which arguably shouldn't happen)
+ * nothing prevents two IRQ probe callers from overlapping. The
+ * results of this are non-optimal.
+ */
+
+int probe_irq_off(unsigned long val)
+{
+ int i, irq_found, nr_irqs;
+
+ nr_irqs = 0;
+ irq_found = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+ return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+#endif
+
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+ int shared = 0;
+ unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_descp(irq);
+
+#ifndef XEN
+ if (desc->handler == &no_irq_type)
+ return -ENOSYS;
+ /*
+ * Some drivers like serial.c use request_irq() heavily,
+ * so we have to be careful not to interfere with a
+ * running system.
+ */
+ if (new->flags & SA_SAMPLE_RANDOM) {
+ /*
+ * This function might sleep, we want to call it first,
+ * outside of the atomic block.
+ * Yes, this might clear the entropy pool if the wrong
+ * driver is attempted to be loaded, without actually
+ * installing a new handler, but is this really a problem,
+ * only the sysadmin is able to do this.
+ */
+ rand_initialize_irq(irq);
+ }
+
+ if (new->flags & SA_PERCPU_IRQ) {
+ desc->status |= IRQ_PER_CPU;
+ desc->handler = &irq_type_ia64_lsapic;
+ }
+#endif
+
+ /*
+ * The following block of code has to be executed atomically
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ if ((old = *p) != NULL) {
+#ifdef XEN
+ if (1) {
+ /* Can't share interrupts unless both agree to */
+#else
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+#endif
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+#ifndef XEN
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+#endif
+ }
+
+ *p = new;
+
+#ifndef XEN
+ if (!shared) {
+#else
+ {
+#endif
+ desc->depth = 0;
+#ifdef XEN
+ desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
+#else
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
+#endif
+ desc->handler->startup(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifndef XEN
+ register_irq_proc(irq);
+#endif
+ return 0;
+}
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#ifdef CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+ cpumask_t mask = CPU_MASK_NONE;
+
+ cpu_set(cpu_logical_id(hwid), mask);
+
+ if (irq < NR_IRQS) {
+ irq_affinity[irq] = mask;
+ irq_redir[irq] = (char) (redir & 0xff);
+ }
+}
+
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : "");
+
+ len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]);
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ unsigned int irq = (unsigned long) data;
+ int full_count = count, err;
+ cpumask_t new_value, tmp;
+# define R_PREFIX_LEN 16
+ char rbuf[R_PREFIX_LEN];
+ int rlen;
+ int prelen;
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ if (!desc->handler->set_affinity)
+ return -EIO;
+
+ /*
+ * If string being written starts with a prefix of 'r' or 'R'
+ * and some limited number of spaces, set IA64_IRQ_REDIRECTED.
+ * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't
+ * all be trimmed as part of prelen, the untrimmed spaces will
+ * cause the hex parsing to fail, and this write() syscall will
+ * fail with EINVAL.
+ */
+
+ if (!count)
+ return -EINVAL;
+ rlen = min(sizeof(rbuf)-1, count);
+ if (copy_from_user(rbuf, buffer, rlen))
+ return -EFAULT;
+ rbuf[rlen] = 0;
+ prelen = 0;
+ if (tolower(*rbuf) == 'r') {
+ prelen = strspn(rbuf, "Rr ");
+ irq |= IA64_IRQ_REDIRECTED;
+ }
+
+ err = cpumask_parse(buffer+prelen, count-prelen, new_value);
+ if (err)
+ return err;
+
+ /*
+ * Do not allow disabling IRQs completely - it's a too easy
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+ cpus_and(tmp, new_value, cpu_online_map);
+ if (cpus_empty(tmp))
+ return -EINVAL;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ pending_irq_cpumask[irq] = new_value;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ return full_count;
+}
+
+void move_irq(int irq)
+{
+ /* note - we hold desc->lock */
+ cpumask_t tmp;
+ irq_desc_t *desc = irq_descp(irq);
+
+ if (!cpus_empty(pending_irq_cpumask[irq])) {
+ cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+ if (unlikely(!cpus_empty(tmp))) {
+ desc->handler->set_affinity(irq, pending_irq_cpumask[irq]);
+ }
+ cpus_clear(pending_irq_cpumask[irq]);
+ }
+}
+
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_map is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+ cpumask_t mask;
+ irq_desc_t *desc;
+ int irq, new_cpu;
+
+ for (irq=0; irq < NR_IRQS; irq++) {
+ desc = irq_descp(irq);
+
+ /*
+ * No handling for now.
+ * TBD: Implement a disable function so we can now
+ * tell CPU not to respond to these local intr sources.
+ * such as ITV,CPEI,MCA etc.
+ */
+ if (desc->status == IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], cpu_online_map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ /*
+ * Save it for phase 2 processing
+ */
+ vectors_in_migration[irq] = irq;
+
+ new_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpu);
+
+ /*
+ * Al three are essential, currently WARN_ON.. maybe panic?
+ */
+ if (desc->handler && desc->handler->disable &&
+ desc->handler->enable && desc->handler->set_affinity) {
+ desc->handler->disable(irq);
+ desc->handler->set_affinity(irq, mask);
+ desc->handler->enable(irq);
+ } else {
+ WARN_ON((!(desc->handler) || !(desc->handler->disable) ||
+ !(desc->handler->enable) ||
+ !(desc->handler->set_affinity)));
+ }
+ }
+ }
+}
+
+void fixup_irqs(void)
+{
+ unsigned int irq;
+ extern void ia64_process_pending_intr(void);
+
+ ia64_set_itv(1<<16);
+ /*
+ * Phase 1: Locate irq's bound to this cpu and
+ * relocate them for cpu removal.
+ */
+ migrate_irqs();
+
+ /*
+ * Phase 2: Perform interrupt processing for all entries reported in
+ * local APIC.
+ */
+ ia64_process_pending_intr();
+
+ /*
+ * Phase 3: Now handle any interrupts not captured in local APIC.
+ * This is to account for cases that device interrupted during the time the
+ * rte was being disabled and re-programmed.
+ */
+ for (irq=0; irq < NR_IRQS; irq++) {
+ if (vectors_in_migration[irq]) {
+ vectors_in_migration[irq]=0;
+ do_IRQ(irq, NULL);
+ }
+ }
+
+ /*
+ * Now let processor die. We do irq disable and max_xtp() to
+ * ensure there is no more interrupts routed to this processor.
+ * But the local timer interrupt can have 1 pending which we
+ * take care in timer_interrupt().
+ */
+ max_xtp();
+ local_irq_disable();
+}
+#endif
+
+#ifndef XEN
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ cpumask_t *mask = (cpumask_t *)data;
+ unsigned long full_count = count, err;
+ cpumask_t new_value;
+
+ err = cpumask_parse(buffer, count, new_value);
+ if (err)
+ return err;
+
+ *mask = new_value;
+ return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+ char name [MAX_NAMELEN];
+
+ if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) || irq_dir[irq])
+ return;
+
+ memset(name, 0, MAX_NAMELEN);
+ sprintf(name, "%d", irq);
+
+ /* create /proc/irq/1234 */
+ irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+ {
+ struct proc_dir_entry *entry;
+
+ /* create /proc/irq/1234/smp_affinity */
+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+ if (entry) {
+ entry->nlink = 1;
+ entry->data = (void *)(long)irq;
+ entry->read_proc = irq_affinity_read_proc;
+ entry->write_proc = irq_affinity_write_proc;
+ }
+
+ smp_affinity_entry[irq] = entry;
+ }
+#endif
+}
+
+cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+
+void init_irq_proc (void)
+{
+ struct proc_dir_entry *entry;
+ int i;
+
+ /* create /proc/irq */
+ root_irq_dir = proc_mkdir("irq", 0);
+
+ /* create /proc/irq/prof_cpu_mask */
+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+ if (!entry)
+ return;
+
+ entry->nlink = 1;
+ entry->data = (void *)&prof_cpu_mask;
+ entry->read_proc = prof_cpu_mask_read_proc;
+ entry->write_proc = prof_cpu_mask_write_proc;
+
+ /*
+ * Create entries for all existing IRQs.
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ if (irq_descp(i)->handler == &no_irq_type)
+ continue;
+ register_irq_proc(i);
+ }
+}
+#endif
+
+
+#ifdef XEN
+/*
+ * HANDLING OF GUEST-BOUND PHYSICAL IRQS
+ */
+
+#define IRQ_MAX_GUESTS 7
+typedef struct {
+ u8 nr_guests;
+ u8 in_flight;
+ u8 shareable;
+ struct domain *guest[IRQ_MAX_GUESTS];
+} irq_guest_action_t;
+
+static void __do_IRQ_guest(int irq)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ struct domain *d;
+ int i;
+
+ for ( i = 0; i < action->nr_guests; i++ )
+ {
+ d = action->guest[i];
+ if ( !test_and_set_bit(irq, &d->pirq_mask) )
+ action->in_flight++;
+ send_guest_pirq(d, irq);
+ }
+}
+
+int pirq_guest_unmask(struct domain *d)
+{
+ irq_desc_t *desc;
+ int i, j, pirq;
+ u32 m;
+ shared_info_t *s = d->shared_info;
+
+ for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ )
+ {
+ m = d->pirq_mask[i];
+ while ( (j = ffs(m)) != 0 )
+ {
+ m &= ~(1 << --j);
+ pirq = (i << 5) + j;
+ desc = &irq_desc[pirq];
+ spin_lock_irq(&desc->lock);
+ if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
+ test_and_clear_bit(pirq, &d->pirq_mask) &&
+ (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
+ desc->handler->end(pirq);
+ spin_unlock_irq(&desc->lock);
+ }
+ }
+
+ return 0;
+}
+
+int pirq_guest_bind(struct vcpu *d, int irq, int will_share)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ unsigned long flags;
+ int rc = 0;
+
+ if ( !IS_CAPABLE_PHYSDEV(d->domain) )
+ return -EPERM;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( !(desc->status & IRQ_GUEST) )
+ {
+ if ( desc->action != NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
+ irq, desc->action->name);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action = xmalloc(irq_guest_action_t);
+ if ( (desc->action = (struct irqaction *)action) == NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ action->nr_guests = 0;
+ action->in_flight = 0;
+ action->shareable = will_share;
+
+ desc->depth = 0;
+ desc->status |= IRQ_GUEST;
+ desc->status &= ~IRQ_DISABLED;
+ desc->handler->startup(irq);
+
+ /* Attempt to bind the interrupt target to the correct CPU. */
+#if 0 /* FIXME CONFIG_SMP ??? */
+ if ( desc->handler->set_affinity != NULL )
+ desc->handler->set_affinity(
+ irq, apicid_to_phys_cpu_present(d->processor));
+#endif
+ }
+ else if ( !will_share || !action->shareable )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
+ irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if ( action->nr_guests == IRQ_MAX_GUESTS )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action->guest[action->nr_guests++] = d;
+
+ out:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return rc;
+}
+
+int pirq_guest_unbind(struct domain *d, int irq)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+ (--action->in_flight == 0) )
+ desc->handler->end(irq);
+
+ if ( action->nr_guests == 1 )
+ {
+ desc->action = NULL;
+ xfree(action);
+ desc->depth = 1;
+ desc->status |= IRQ_DISABLED;
+ desc->status &= ~IRQ_GUEST;
+ desc->handler->shutdown(irq);
+ }
+ else
+ {
+ i = 0;
+ while ( action->guest[i] != d )
+ i++;
+ memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+ action->nr_guests--;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+
+#endif
+
+#ifdef XEN
+#ifdef IA64
+// this is a temporary hack until real console input is implemented
+irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs *regs)
+{
+ domain_pend_keyboard_interrupt(irq);
+}
+
+void serial_input_init(void)
+{
+ int retval;
+ int irq = 0x30; // FIXME
+
+ retval = request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL);
+ if (retval) {
+ printk("serial_input_init: broken request_irq call\n");
+ while(1);
+ }
+}
+#endif
+#endif
diff --git a/xen/arch/ia64/ivt.S b/xen/arch/ia64/ivt.S
new file mode 100644
index 0000000000..4d6785c310
--- /dev/null
+++ b/xen/arch/ia64/ivt.S
@@ -0,0 +1,1898 @@
+
+#ifdef XEN
+//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled??
+// these are all hacked out for now as the entire IVT
+// will eventually be replaced... just want to use it
+// for startup code to handle TLB misses
+//#define ia64_leave_kernel 0
+//#define ia64_ret_from_syscall 0
+//#define ia64_handle_irq 0
+//#define ia64_fault 0
+#define ia64_illegal_op_fault 0
+#define ia64_prepare_handle_unaligned 0
+#define ia64_bad_break 0
+#define ia64_trace_syscall 0
+#define sys_call_table 0
+#define sys_ni_syscall 0
+#include <asm/vhpt.h>
+#endif
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Kenneth Chen <kenneth.w.chen@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#if 1
+# define PSR_DEFAULT_BITS psr.ac
+#else
+# define PSR_DEFAULT_BITS 0
+#endif
+
+#if 0
+ /*
+ * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't
+ * needed for something else before enabling this...
+ */
+# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#define MINSTATE_VIRT /* needed by minstate.h */
+#include "minstate.h"
+
+#define FAULT(n) \
+ mov r31=pr; \
+ mov r19=n;; /* prepare to save predicates */ \
+ br.sptk.many dispatch_to_fault_handler
+
+#ifdef XEN
+#define REFLECT(n) \
+ mov r31=pr; \
+ mov r19=n;; /* prepare to save predicates */ \
+ br.sptk.many dispatch_reflection
+#endif
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+ DBG_FAULT(0)
+ /*
+ * The VHPT vector is invoked when the TLB entry for the virtual page table
+ * is missing. This happens only as a result of a previous
+ * (the "original") TLB miss, which may either be caused by an instruction
+ * fetch or a data access (or non-access).
+ *
+ * What we do here is normal TLB miss handing for the _original_ miss, followed
+ * by inserting the TLB entry for the virtual page table page that the VHPT
+ * walker was attempting to access. The latter gets inserted as long
+ * as both L1 and L2 have valid mappings for the faulting address.
+ * The TLB entry for the original miss gets inserted only if
+ * the L3 entry indicates that the page is present.
+ *
+ * do_page_fault gets invoked in the following cases:
+ * - the faulting virtual address uses unimplemented address bits
+ * - the faulting virtual address has no L1, L2, or L3 mapping
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+ movl r18=PAGE_SHIFT
+ mov r25=cr.itir
+#endif
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=IA64_KR(PT_BASE) // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ shr r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+ extr.u r26=r25,2,6
+ ;;
+ cmp.ne p8,p0=r18,r26
+ sub r27=r26,r18
+ ;;
+(p8) dep r25=r18,r25,2,6
+(p8) shr r22=r22,r27
+#endif
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r22,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
+ dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r21] // read the L3 PTE
+ mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
+ ;;
+(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
+ mov r22=cr.iha // get the VHPT address that caused the TLB miss
+ ;; // avoid RAW on p7
+(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
+ dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address
+ ;;
+(p10) itc.i r18 // insert the instruction TLB entry
+(p11) itc.d r18 // insert the data TLB entry
+(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
+ mov cr.ifa=r22
+
+#ifdef CONFIG_HUGETLB_PAGE
+(p8) mov cr.itir=r25 // change to default page-size for VHPT
+#endif
+
+ /*
+ * Now compute and insert the TLB entry for the virtual page table. We never
+ * execute in a page table page so there is no need to set the exception deferral
+ * bit.
+ */
+ adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+ ;;
+(p7) itc.d r24
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ /*
+ * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g
+ * between reading the pagetable and the "itc". If so, flush the entry we
+ * inserted and retry.
+ */
+ ld8 r25=[r21] // read L3 PTE again
+ ld8 r26=[r17] // read L2 entry again
+ ;;
+ cmp.ne p6,p7=r26,r20 // did L2 entry change
+ mov r27=PAGE_SHIFT<<2
+ ;;
+(p6) ptc.l r22,r27 // purge PTE page translation
+(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
+ ;;
+(p6) ptc.l r16,r27 // purge translation
+#endif
+
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+END(vhpt_miss)
+
+ .org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+ DBG_FAULT(1)
+#ifdef XEN
+ VHPT_CCHAIN_LOOKUP(itlb_miss,i)
+#ifdef VHPT_GLOBAL
+ br.cond.sptk page_fault
+ ;;
+#endif
+#endif
+ /*
+ * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+.itlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.i r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(itlb_miss)
+
+ .org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+ DBG_FAULT(2)
+#ifdef XEN
+ VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
+#ifdef VHPT_GLOBAL
+ br.cond.sptk page_fault
+ ;;
+#endif
+#endif
+ /*
+ * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+dtlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.d r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(dtlb_miss)
+
+ .org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+ DBG_FAULT(3)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
+// br.cond.sptk page_fault
+// ;;
+//#endif
+#endif
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r21=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r31=pr
+ ;;
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into r21
+ ;;
+ cmp.gt p8,p0=6,r22 // user mode
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk .itlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p8) br.cond.spnt page_fault
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_itlb_miss)
+
+ .org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+ DBG_FAULT(4)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
+// br.cond.sptk page_fault
+// ;;
+//#endif
+#endif
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r21=cr.ipsr
+ mov r31=pr
+ ;;
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into r21
+ ;;
+ cmp.gt p8,p0=6,r22 // access to region 0-5
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk dtlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
+ tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ cmp.ne p8,p0=r0,r23
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+(p8) br.cond.spnt page_fault
+#ifdef XEN
+ ;;
+ // FIXME: inadequate test, this is where we test for Xen address
+ // note that 0xf000 (cached) and 0xd000 (uncached) addresses
+ // should be OK. (Though no I/O is done in Xen, EFI needs uncached
+ // addresses and some domain EFI calls are passed through)
+ tbit.nz p0,p8=r16,60
+(p8) br.cond.spnt page_fault
+//(p8) br.cond.spnt 0
+ ;;
+#endif
+
+ dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p6) mov cr.ipsr=r21
+ ;;
+(p7) itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_dtlb_miss)
+
+ .org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+ /*
+ * In the absence of kernel bugs, we get here when the virtually mapped linear
+ * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
+ * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page
+ * table is missing, a nested TLB miss fault is triggered and control is
+ * transferred to this point. When this happens, we lookup the pte for the
+ * faulting address by walking the page table in physical mode and return to the
+ * continuation point passed in register r30 (or call page_fault if the address is
+ * not mapped).
+ *
+ * Input: r16: faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Output: r17: physical address of L3 PTE of faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
+ */
+ rsm psr.dt // switch to using physical data addressing
+ mov r19=IA64_KR(PT_BASE) // get the page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ ;;
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is faulting address in region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+(p6) br.cond.spnt page_fault
+ mov b0=r30
+ br.sptk.many b0 // return to continuation point
+END(nested_dtlb_miss)
+
+ .org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+#ifdef XEN
+ REFLECT(6)
+#endif
+ DBG_FAULT(6)
+ FAULT(6)
+END(ikey_miss)
+
+ //-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+ ssm psr.dt
+ ;;
+ srlz.i
+ ;;
+ SAVE_MIN_WITH_COVER
+#ifdef XEN
+ alloc r15=ar.pfs,0,0,4,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out3=cr.itir
+#else
+ alloc r15=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+#endif
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collectin is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r14=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+END(page_fault)
+
+ .org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+#ifdef XEN
+ REFLECT(7)
+#endif
+ DBG_FAULT(7)
+ FAULT(7)
+END(dkey_miss)
+
+ .org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+#ifdef XEN
+ REFLECT(8)
+#endif
+ DBG_FAULT(8)
+ /*
+ * What we do here is to simply turn on the dirty bit in the PTE. We need to
+ * update both the page-table and the TLB entry. To efficiently access the PTE,
+ * we address it through the virtual page table. Most likely, the TLB entry for
+ * the relevant virtual page table page is still present in the TLB so we can
+ * normally do this without additional TLB misses. In case the necessary virtual
+ * page table TLB entry isn't present, we take a nested TLB miss hit where we look
+ * up the physical address of the L3 PTE and then continue at label 1 below.
+ */
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault
+ mov r31=pr // save pr
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov pr=r31,-1 // restore pr
+ rfi
+END(dirty_bit)
+
+ .org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+#ifdef XEN
+ REFLECT(9)
+#endif
+ DBG_FAULT(9)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ mov r31=pr // save predicates
+#ifdef CONFIG_ITANIUM
+ /*
+ * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+ */
+ mov r17=cr.ipsr
+ ;;
+ mov r18=cr.iip
+ tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
+ ;;
+(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
+#endif /* CONFIG_ITANIUM */
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the accessed bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.i r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18 // install updated PTE
+#endif /* !CONFIG_SMP */
+ mov pr=r31,-1
+ rfi
+END(iaccess_bit)
+
+ .org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+#ifdef XEN
+ REFLECT(10)
+#endif
+ DBG_FAULT(10)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r31=pr
+ mov r29=b0 // save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the dirty bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ /*
+ * Tell the assemblers dependency-violation checker that the above "itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+ ;;
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+ ;;
+(p7) ptc.l r16,r24
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov b0=r29 // restore b0
+ mov pr=r31,-1
+ rfi
+END(daccess_bit)
+
+ .org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+ /*
+ * The streamlined system call entry/exit paths only save/restore the initial part
+ * of pt_regs. This implies that the callers of system-calls must adhere to the
+ * normal procedure calling conventions.
+ *
+ * Registers to be saved & restored:
+ * CR registers: cr.ipsr, cr.iip, cr.ifs
+ * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
+ * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+ * Registers to be restored only:
+ * r8-r11: output value from the system call.
+ *
+ * During system call exit, scratch registers (including r15) are modified/cleared
+ * to prevent leaking bits from kernel to user level.
+ */
+ DBG_FAULT(11)
+#ifdef XEN
+ mov r16=cr.isr
+ mov r17=cr.iim
+ mov r31=pr
+ ;;
+ movl r18=XSI_PSR_IC
+ ;;
+ ld8 r19=[r18]
+ ;;
+ cmp.eq p7,p0=r0,r17 // is this a psuedo-cover?
+(p7) br.spnt.many dispatch_privop_fault
+ ;;
+ // if vpsr.ic is off, we have a hyperprivop
+ // A hyperprivop is hand-coded assembly with psr.ic off
+ // which means no calls, no use of r1-r15 and no memory accesses
+ // except to pinned addresses!
+ cmp4.eq p7,p0=r0,r19
+(p7) br.sptk.many fast_hyperprivop
+ ;;
+ mov r22=IA64_KR(CURRENT);;
+ adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;;
+ ld4 r23=[r22];;
+ cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm?
+(p6) br.spnt.many dispatch_break_fault
+ ;;
+ br.sptk.many fast_break_reflect
+ ;;
+#endif
+ mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
+ mov r17=cr.iim
+ mov r18=__IA64_BREAK_SYSCALL
+ mov r21=ar.fpsr
+ mov r29=cr.ipsr
+ mov r19=b6
+ mov r25=ar.unat
+ mov r27=ar.rsc
+ mov r26=ar.pfs
+ mov r28=cr.iip
+#ifndef XEN
+ mov r31=pr // prepare to save predicates
+#endif
+ mov r20=r1
+ ;;
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+ cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so)
+(p7) br.cond.spnt non_syscall
+ ;;
+ ld1 r17=[r16] // load current->thread.on_ustack flag
+ st1 [r16]=r0 // clear current->thread.on_ustack flag
+ add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
+ ;;
+ invala
+
+ /* adjust return address so we skip over the break instruction: */
+
+ extr.u r8=r29,41,2 // extract ei field from cr.ipsr
+ ;;
+ cmp.eq p6,p7=2,r8 // isr.ei==2?
+ mov r2=r1 // setup r2 for ia64_syscall_setup
+ ;;
+(p6) mov r8=0 // clear ei to 0
+(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
+(p7) adds r8=1,r8 // increment ei to next slot
+ ;;
+ cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already?
+ dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
+ ;;
+
+ // switch from user to kernel RBS:
+ MINSTATE_START_SAVE_MIN_VIRT
+ br.call.sptk.many b7=ia64_syscall_setup
+ ;;
+ MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ mov r3=NR_syscalls - 1
+ ;;
+(p15) ssm psr.i // restore psr.i
+ // p10==true means out registers are more than 8 or r15's Nat is true
+(p10) br.cond.spnt.many ia64_ret_from_syscall
+ ;;
+ movl r16=sys_call_table
+
+ adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
+ movl r2=ia64_ret_from_syscall
+ ;;
+ shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
+ cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
+ mov rp=r2 // set the real return addr
+ ;;
+(p6) ld8 r20=[r20] // load address of syscall entry point
+(p7) movl r20=sys_ni_syscall
+
+ add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 r2=[r2] // r2 = current_thread_info()->flags
+ ;;
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ cmp.eq p8,p0=r2,r0
+ mov b6=r20
+ ;;
+(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.cond.sptk ia64_trace_syscall
+ // NOT REACHED
+END(break_fault)
+
+ .org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+ DBG_FAULT(12)
+ mov r31=pr // prepare to save predicates
+ ;;
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_handle_irq
+END(interrupt)
+
+ .org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ DBG_FAULT(13)
+ FAULT(13)
+
+#ifdef XEN
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+GLOBAL_ENTRY(dispatch_break_fault)
+ SAVE_MIN_WITH_COVER
+ ;;
+dispatch_break_fault_post_save:
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_break
+END(dispatch_break_fault)
+#endif
+
+ .org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ DBG_FAULT(14)
+ FAULT(14)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ *
+ * ia64_syscall_setup() is a separate subroutine so that it can
+ * allocate stacked registers so it can safely demine any
+ * potential NaT values from the input registers.
+ *
+ * On entry:
+ * - executing on bank 0 or bank 1 register set (doesn't matter)
+ * - r1: stack pointer
+ * - r2: current task pointer
+ * - r3: preserved
+ * - r11: original contents (saved ar.pfs to be saved)
+ * - r12: original contents (sp to be saved)
+ * - r13: original contents (tp to be saved)
+ * - r15: original contents (syscall # to be saved)
+ * - r18: saved bsp (after switching to kernel stack)
+ * - r19: saved b6
+ * - r20: saved r1 (gp)
+ * - r21: saved ar.fpsr
+ * - r22: kernel's register backing store base (krbs_base)
+ * - r23: saved ar.bspstore
+ * - r24: saved ar.rnat
+ * - r25: saved ar.unat
+ * - r26: saved ar.pfs
+ * - r27: saved ar.rsc
+ * - r28: saved cr.iip
+ * - r29: saved cr.ipsr
+ * - r31: saved pr
+ * - b0: original contents (to be saved)
+ * On exit:
+ * - executing on bank 1 registers
+ * - psr.ic enabled, interrupts restored
+ * - p10: TRUE if syscall is invoked with more than 8 out
+ * registers or r15's Nat is true
+ * - r1: kernel's gp
+ * - r3: preserved (same as on entry)
+ * - r8: -EINVAL if p10 is true
+ * - r12: points to kernel stack
+ * - r13: points to current task
+ * - p15: TRUE if interrupts need to be re-enabled
+ * - ar.fpsr: set to kernel settings
+ */
+GLOBAL_ENTRY(ia64_syscall_setup)
+#ifndef XEN
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+#endif
+ st8 [r1]=r19 // save b6
+ add r16=PT(CR_IPSR),r1 // initialize first base pointer
+ add r17=PT(R11),r1 // initialize second base pointer
+ ;;
+ alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
+ st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
+ tnat.nz p8,p0=in0
+
+ st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
+ tnat.nz p9,p0=in1
+(pKStk) mov r18=r0 // make sure r18 isn't NaT
+ ;;
+
+ st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
+ st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
+ mov r28=b0 // save b0 (2 cyc)
+ ;;
+
+ st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
+ dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
+(p8) mov in0=-1
+ ;;
+
+ st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
+ extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
+ and r8=0x7f,r19 // A // get sof of ar.pfs
+
+ st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9) mov in1=-1
+ ;;
+
+(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
+ tnat.nz p10,p0=in2
+ add r11=8,r11
+ ;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
+ tnat.nz p11,p0=in3
+ ;;
+(p10) mov in2=-1
+ tnat.nz p12,p0=in4 // [I0]
+(p11) mov in3=-1
+ ;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
+ shl r18=r18,16 // compute ar.rsc to be used for "loadrs"
+ ;;
+ st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
+ st8 [r17]=r28,PT(R1)-PT(B0) // save b0
+ tnat.nz p13,p0=in5 // [I0]
+ ;;
+ st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
+ st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
+(p12) mov in4=-1
+ ;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
+(p13) mov in5=-1
+ ;;
+ st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
+ tnat.nz p14,p0=in6
+ cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
+ ;;
+ stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p9) tnat.nz p10,p0=r15
+ adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
+
+ st8.spill [r17]=r15 // save r15
+ tnat.nz p8,p0=in7
+ nop.i 0
+
+ mov r13=r2 // establish `current'
+ movl r1=__gp // establish kernel global pointer
+ ;;
+(p14) mov in6=-1
+(p8) mov in7=-1
+ nop.i 0
+
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ movl r17=FPSR_DEFAULT
+ ;;
+ mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
+(p10) mov r8=-EINVAL
+ br.ret.sptk.many b7
+END(ia64_syscall_setup)
+
+ .org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ DBG_FAULT(15)
+ FAULT(15)
+
+ /*
+ * Squatting in this space ...
+ *
+ * This special case dispatcher for illegal operation faults allows preserved
+ * registers to be modified through a callback function (asm only) that is handed
+ * back from the fault handler in r8. Up to three arguments can be passed to the
+ * callback function by returning an aggregate with the callback as its first
+ * element, followed by the arguments.
+ */
+ENTRY(dispatch_illegal_op_fault)
+ SAVE_MIN_WITH_COVER
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
+ mov out0=ar.ec
+ ;;
+ SAVE_REST
+ ;;
+ br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+ alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r9
+ mov out1=r10
+ mov out2=r11
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ mov b6=r8
+ ;;
+ cmp.ne p6,p0=0,r8
+(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
+ br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
+ .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ DBG_FAULT(16)
+ FAULT(16)
+
+#ifdef XEN
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ENTRY(dispatch_privop_fault)
+ SAVE_MIN_WITH_COVER
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.itir
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_privop
+END(dispatch_privop_fault)
+#endif
+
+
+ .org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ DBG_FAULT(17)
+ FAULT(17)
+
+ENTRY(non_syscall)
+ SAVE_MIN_WITH_COVER
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ alloc r14=ar.pfs,0,0,2,0
+ mov out0=cr.iim
+ add out1=16,sp
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r15=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r15
+ ;;
+ br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ .org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ DBG_FAULT(18)
+ FAULT(18)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_unaligned_handler)
+ SAVE_MIN_WITH_COVER
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+ .org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ DBG_FAULT(19)
+ FAULT(19)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_to_fault_handler)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: fault vector number (e.g., 24 for General Exception)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=r15
+ mov out1=cr.isr
+ mov out2=cr.ifa
+ mov out3=cr.iim
+ mov out4=cr.itir
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+ .org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+#ifdef XEN
+ REFLECT(20)
+#endif
+ DBG_FAULT(20)
+ mov r16=cr.ifa
+ rsm psr.dt
+ /*
+ * The Linux page fault handler doesn't expect non-present pages to be in
+ * the TLB. Flush the existing entry now, so we meet that expectation.
+ */
+ mov r17=PAGE_SHIFT<<2
+ ;;
+ ptc.l r16,r17
+ ;;
+ mov r31=pr
+ srlz.d
+ br.sptk.many page_fault
+END(page_not_present)
+
+ .org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+#ifdef XEN
+ REFLECT(21)
+#endif
+ DBG_FAULT(21)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(key_permission)
+
+ .org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+#ifdef XEN
+ REFLECT(22)
+#endif
+ DBG_FAULT(22)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(iaccess_rights)
+
+ .org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+#ifdef XEN
+ REFLECT(23)
+#endif
+ DBG_FAULT(23)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(daccess_rights)
+
+ .org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+ DBG_FAULT(24)
+ mov r16=cr.isr
+ mov r31=pr
+ ;;
+#ifdef XEN
+ cmp4.ge p6,p0=0x20,r16
+(p6) br.sptk.many dispatch_privop_fault
+#else
+ cmp4.eq p6,p0=0,r16
+(p6) br.sptk.many dispatch_illegal_op_fault
+#endif
+ ;;
+ mov r19=24 // fault number
+ br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+ .org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+#ifdef XEN
+ REFLECT(25)
+#endif
+ DBG_FAULT(25)
+ rsm psr.dfh // ensure we can access fph
+ ;;
+ srlz.d
+ mov r31=pr
+ mov r19=25
+ br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+ .org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+#ifdef XEN
+ REFLECT(26)
+#endif
+ DBG_FAULT(26)
+ FAULT(26)
+END(nat_consumption)
+
+ .org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+#ifdef XEN
+ // this probably need not reflect...
+ REFLECT(27)
+#endif
+ DBG_FAULT(27)
+ /*
+ * A [f]chk.[as] instruction needs to take the branch to the recovery code but
+ * this part of the architecture is not implemented in hardware on some CPUs, such
+ * as Itanium. Thus, in general we need to emulate the behavior. IIM contains
+ * the relative target (not yet sign extended). So after sign extending it we
+ * simply add it to IIP. We also need to reset the EI field of the IPSR to zero,
+ * i.e., the slot to restart into.
+ *
+ * cr.imm contains zero_ext(imm21)
+ */
+ mov r18=cr.iim
+ ;;
+ mov r17=cr.iip
+ shl r18=r18,43 // put sign bit in position (43=64-21)
+ ;;
+
+ mov r16=cr.ipsr
+ shr r18=r18,39 // sign extend (39=43-4)
+ ;;
+
+ add r17=r17,r18 // now add the offset
+ ;;
+ mov cr.iip=r17
+ dep r16=0,r16,41,2 // clear EI
+ ;;
+
+ mov cr.ipsr=r16
+ ;;
+
+ rfi // and go back
+END(speculation_vector)
+
+ .org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ DBG_FAULT(28)
+ FAULT(28)
+
+ .org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+#ifdef XEN
+ REFLECT(29)
+#endif
+ DBG_FAULT(29)
+ FAULT(29)
+END(debug_vector)
+
+ .org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+#ifdef XEN
+ REFLECT(30)
+#endif
+ DBG_FAULT(30)
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+ ;;
+ br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+ .org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+#ifdef XEN
+ REFLECT(31)
+#endif
+ DBG_FAULT(31)
+ FAULT(31)
+END(unsupported_data_reference)
+
+ .org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+#ifdef XEN
+ REFLECT(32)
+#endif
+ DBG_FAULT(32)
+ FAULT(32)
+END(floating_point_fault)
+
+ .org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+#ifdef XEN
+ REFLECT(33)
+#endif
+ DBG_FAULT(33)
+ FAULT(33)
+END(floating_point_trap)
+
+ .org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+#ifdef XEN
+ REFLECT(34)
+#endif
+ DBG_FAULT(34)
+ FAULT(34)
+END(lower_privilege_trap)
+
+ .org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+#ifdef XEN
+ REFLECT(35)
+#endif
+ DBG_FAULT(35)
+ FAULT(35)
+END(taken_branch_trap)
+
+ .org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+#ifdef XEN
+ REFLECT(36)
+#endif
+ DBG_FAULT(36)
+ FAULT(36)
+END(single_step_trap)
+
+ .org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+ DBG_FAULT(37)
+ FAULT(37)
+
+ .org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ DBG_FAULT(38)
+ FAULT(38)
+
+ .org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ DBG_FAULT(39)
+ FAULT(39)
+
+ .org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ DBG_FAULT(40)
+ FAULT(40)
+
+ .org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ DBG_FAULT(41)
+ FAULT(41)
+
+ .org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ DBG_FAULT(42)
+ FAULT(42)
+
+ .org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ DBG_FAULT(43)
+ FAULT(43)
+
+ .org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ DBG_FAULT(44)
+ FAULT(44)
+
+ .org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+#ifdef XEN
+ REFLECT(45)
+#endif
+ DBG_FAULT(45)
+ FAULT(45)
+END(ia32_exception)
+
+ .org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(ia32_intercept)
+#ifdef XEN
+ REFLECT(46)
+#endif
+ DBG_FAULT(46)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ mov r16=cr.isr
+ ;;
+ extr.u r17=r16,16,8 // get ISR.code
+ mov r18=ar.eflag
+ mov r19=cr.iim // old eflag value
+ ;;
+ cmp.ne p6,p0=2,r17
+(p6) br.cond.spnt 1f // not a system flag fault
+ xor r16=r18,r19
+ ;;
+ extr.u r17=r16,18,1 // get the eflags.ac bit
+ ;;
+ cmp.eq p6,p0=0,r17
+(p6) br.cond.spnt 1f // eflags.ac bit didn't change
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+
+1:
+#endif // CONFIG_IA32_SUPPORT
+ FAULT(46)
+END(ia32_intercept)
+
+ .org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ENTRY(ia32_interrupt)
+#ifdef XEN
+ REFLECT(47)
+#endif
+ DBG_FAULT(47)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ br.sptk.many dispatch_to_ia32_handler
+#else
+ FAULT(47)
+#endif
+END(ia32_interrupt)
+
+ .org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ DBG_FAULT(48)
+ FAULT(48)
+
+ .org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ DBG_FAULT(49)
+ FAULT(49)
+
+ .org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ DBG_FAULT(50)
+ FAULT(50)
+
+ .org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ DBG_FAULT(51)
+ FAULT(51)
+
+ .org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ DBG_FAULT(52)
+ FAULT(52)
+
+ .org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ DBG_FAULT(53)
+ FAULT(53)
+
+ .org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ DBG_FAULT(54)
+ FAULT(54)
+
+ .org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ DBG_FAULT(55)
+ FAULT(55)
+
+ .org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ DBG_FAULT(56)
+ FAULT(56)
+
+ .org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ DBG_FAULT(57)
+ FAULT(57)
+
+ .org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ DBG_FAULT(58)
+ FAULT(58)
+
+ .org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ DBG_FAULT(59)
+ FAULT(59)
+
+ .org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ DBG_FAULT(60)
+ FAULT(60)
+
+ .org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ DBG_FAULT(61)
+ FAULT(61)
+
+ .org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ DBG_FAULT(62)
+ FAULT(62)
+
+ .org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ DBG_FAULT(63)
+ FAULT(63)
+
+ .org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ DBG_FAULT(64)
+ FAULT(64)
+
+ .org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ DBG_FAULT(65)
+ FAULT(65)
+
+ .org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ DBG_FAULT(66)
+ FAULT(66)
+
+ .org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ DBG_FAULT(67)
+ FAULT(67)
+
+#ifdef XEN
+ .org ia64_ivt+0x8000
+ENTRY(dispatch_reflection)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out4=r15
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr
+ mov out3=cr.iim
+// mov out3=cr.itir
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_reflection
+END(dispatch_reflection)
+
+#define SAVE_MIN_COVER_DONE DO_SAVE_MIN(,mov r30=cr.ifs,)
+
+// same as dispatch_break_fault except cover has already been done
+GLOBAL_ENTRY(dispatch_slow_hyperprivop)
+ SAVE_MIN_COVER_DONE
+ ;;
+ br.sptk.many dispatch_break_fault_post_save
+END(dispatch_slow_hyperprivop)
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ // IA32 interrupt entry point
+
+ENTRY(dispatch_to_ia32_handler)
+ SAVE_MIN
+ ;;
+ mov r14=cr.isr
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i
+ adds r3=8,r2 // Base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;;
+ mov r15=0x80
+ shr r14=r14,16 // Get interrupt number
+ ;;
+ cmp.ne p6,p0=r14,r15
+(p6) br.call.dpnt.many b6=non_ia32_syscall
+
+ adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
+ adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+ ;;
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ ld8 r8=[r14] // get r8
+ ;;
+ st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP)
+ ;;
+ alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ ;;
+ ld4 r8=[r14],8 // r8 == eax (syscall number)
+ mov r15=IA32_NR_syscalls
+ ;;
+ cmp.ltu.unc p6,p7=r8,r15
+ ld4 out1=[r14],8 // r9 == ecx
+ ;;
+ ld4 out2=[r14],8 // r10 == edx
+ ;;
+ ld4 out0=[r14] // r11 == ebx
+ adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+ ;;
+ ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
+ ;;
+ ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 out4=[r14] // r15 == edi
+ movl r16=ia32_syscall_table
+ ;;
+(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
+ ld4 r2=[r2] // r2 = current_thread_info()->flags
+ ;;
+ ld8 r16=[r16]
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ mov b6=r16
+ movl r15=ia32_ret_from_syscall
+ cmp.eq p8,p0=r2,r0
+ ;;
+ mov rp=r15
+(p8) br.call.sptk.many b6=b6
+ br.cond.sptk ia32_trace_syscall
+
+non_ia32_syscall:
+ alloc r15=ar.pfs,0,0,2,0
+ mov out0=r14 // interrupt #
+ add out1=16,sp // pointer to pt_regs
+ ;; // avoid WAW on CFM
+ br.call.sptk.many rp=ia32_bad_interrupt
+.ret1: movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ br.ret.sptk.many rp
+END(dispatch_to_ia32_handler)
+
+#endif /* CONFIG_IA32_SUPPORT */
diff --git a/xen/arch/ia64/lib/Makefile b/xen/arch/ia64/lib/Makefile
new file mode 100644
index 0000000000..77e94724ce
--- /dev/null
+++ b/xen/arch/ia64/lib/Makefile
@@ -0,0 +1,44 @@
+#
+# Makefile for ia64-specific library routines..
+#
+
+include $(BASEDIR)/Rules.mk
+
+OBJS := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
+ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \
+ bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o \
+ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \
+ flush.o ip_fast_csum.o do_csum.o copy_user.o \
+ memset.o strlen.o memcpy.o
+
+default: $(OBJS)
+ $(LD) -r -o ia64lib.o $(OBJS)
+
+AFLAGS += -I$(BASEDIR)/include -D__ASSEMBLY__
+
+__divdi3.o: idiv64.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
+
+__udivdi3.o: idiv64.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
+
+__moddi3.o: idiv64.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
+
+__umoddi3.o: idiv64.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
+
+__divsi3.o: idiv32.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
+
+__udivsi3.o: idiv32.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DUNSIGNED -c -o $@ $<
+
+__modsi3.o: idiv32.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -c -o $@ $<
+
+__umodsi3.o: idiv32.S
+ $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -DMODULO -DUNSIGNED -c -o $@ $<
+
+clean:
+ rm -f *.o *~
diff --git a/xen/arch/ia64/mm_init.c b/xen/arch/ia64/mm_init.c
new file mode 100644
index 0000000000..0e3ce45dec
--- /dev/null
+++ b/xen/arch/ia64/mm_init.c
@@ -0,0 +1,547 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#ifdef XEN
+#include <xen/sched.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/personality.h>
+#endif
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+
+#ifndef XEN
+#include <asm/a.out.h>
+#endif
+#include <asm/bitops.h>
+#include <asm/dma.h>
+#ifndef XEN
+#include <asm/ia32.h>
+#endif
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/numa.h>
+#include <asm/patch.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+#ifndef XEN
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+#endif
+
+extern void ia64_tlb_init (void);
+
+unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+unsigned long vmalloc_end = VMALLOC_END_INIT;
+EXPORT_SYMBOL(vmalloc_end);
+struct page *vmem_map;
+EXPORT_SYMBOL(vmem_map);
+#endif
+
+static int pgt_cache_water[2] = { 25, 50 };
+
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+
+#ifdef XEN
+void *high_memory;
+EXPORT_SYMBOL(high_memory);
+
+/////////////////////////////////////////////
+// following from linux-2.6.7/mm/mmap.c
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware. The expected
+ * behavior is in parens:
+ *
+ * map_type prot
+ * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
+ * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (yes) yes w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (copy) copy w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+ __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+ __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+ printf("insert_vm_struct: called, not implemented yet\n");
+}
+
+/////////////////////////////////////////////
+//following from linux/mm/memory.c
+
+#ifndef __ARCH_HAS_4LEVEL_HACK
+/*
+ * Allocate page upper directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level or three-level page table, this ends up actually being
+ * entirely optimized away.
+ */
+pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ pud_t *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pud_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pgd_present(*pgd)) {
+ pud_free(new);
+ goto out;
+ }
+ pgd_populate(mm, pgd, new);
+ out:
+ return pud_offset(pgd, address);
+}
+
+/*
+ * Allocate page middle directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level page table, this ends up actually being entirely
+ * optimized away.
+ */
+pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+ pmd_t *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pmd_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pud_present(*pud)) {
+ pmd_free(new);
+ goto out;
+ }
+ pud_populate(mm, pud, new);
+ out:
+ return pmd_offset(pud, address);
+}
+#endif
+
+pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+ if (!pmd_present(*pmd)) {
+ struct page *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pte_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pmd_present(*pmd)) {
+ pte_free(new);
+ goto out;
+ }
+ inc_page_state(nr_page_table_pages);
+ pmd_populate(mm, pmd, new);
+ }
+out:
+ return pte_offset_map(pmd, address);
+}
+/////////////////////////////////////////////
+#endif /* XEN */
+
+void
+update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+{
+ unsigned long addr;
+ struct page *page;
+
+ if (!pte_exec(pte))
+ return; /* not an executable page... */
+
+ page = pte_page(pte);
+ /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
+ addr = (unsigned long) page_address(page);
+
+ if (test_bit(PG_arch_1, &page->flags))
+ return; /* i-cache is already coherent with d-cache */
+
+ flush_icache_range(addr, addr + PAGE_SIZE);
+ set_bit(PG_arch_1, &page->flags); /* mark page as clean */
+}
+
+inline void
+ia64_set_rbs_bot (void)
+{
+#ifdef XEN
+ unsigned stack_size = MAX_USER_STACK_SIZE;
+#else
+ unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16;
+#endif
+
+ if (stack_size > MAX_USER_STACK_SIZE)
+ stack_size = MAX_USER_STACK_SIZE;
+ current->arch._thread.rbs_bot = STACK_TOP - stack_size;
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+#ifdef XEN
+printf("ia64_init_addr_space: called, not implemented\n");
+#else
+ struct vm_area_struct *vma;
+
+ ia64_set_rbs_bot();
+
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
+ * the problem. When the process attempts to write to the register backing store
+ * for the first time, it will get a SEGFAULT in this case.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+ vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
+ insert_vm_struct(current->mm, vma);
+ }
+
+ /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
+ if (!(current->personality & MMAP_PAGE_ZERO)) {
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_end = PAGE_SIZE;
+ vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+ insert_vm_struct(current->mm, vma);
+ }
+ }
+#endif
+}
+
+setup_gate (void)
+{
+ printk("setup_gate not-implemented.\n");
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+ unsigned long psr, pta, impl_va_bits;
+ extern void __devinit tlb_init (void);
+ int cpu;
+
+#ifdef CONFIG_DISABLE_VHPT
+# define VHPT_ENABLE_BIT 0
+#else
+# define VHPT_ENABLE_BIT 1
+#endif
+
+ /* Pin mapping for percpu area into TLB */
+ psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
+ pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
+ PERCPU_PAGE_SHIFT);
+
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+
+ /*
+ * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
+ * address space. The IA-64 architecture guarantees that at least 50 bits of
+ * virtual address space are implemented but if we pick a large enough page size
+ * (e.g., 64KB), the mapped address space is big enough that it will overlap with
+ * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages,
+ * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
+ * problem in practice. Alternatively, we could truncate the top of the mapped
+ * address space to not permit mappings that would overlap with the VMLPT.
+ * --davidm 00/12/06
+ */
+# define pte_bits 3
+# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+ /*
+ * The virtual page table has to cover the entire implemented address space within
+ * a region even though not all of this space may be mappable. The reason for
+ * this is that the Access bit and Dirty bit fault handlers perform
+ * non-speculative accesses to the virtual page table, so the address range of the
+ * virtual page table itself needs to be covered by virtual page table.
+ */
+# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+# define POW2(n) (1ULL << (n))
+
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+
+ if (impl_va_bits < 51 || impl_va_bits > 61)
+ panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+
+#ifdef XEN
+ vhpt_init();
+#endif
+#if 0
+ /* place the VMLPT at the end of each page-table mapped region: */
+ pta = POW2(61) - POW2(vmlpt_bits);
+
+ if (POW2(mapped_space_bits) >= pta)
+ panic("mm/init: overlap between virtually mapped linear page table and "
+ "mapped kernel space!");
+ /*
+ * Set the (virtually mapped linear) page table address. Bit
+ * 8 selects between the short and long format, bits 2-7 the
+ * size of the table, and bit 0 whether the VHPT walker is
+ * enabled.
+ */
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+#endif
+ ia64_tlb_init();
+
+#ifdef CONFIG_HUGETLB_PAGE
+ ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+ ia64_srlz_d();
+#endif
+
+ cpu = smp_processor_id();
+
+#ifndef XEN
+ /* mca handler uses cr.lid as key to pick the right entry */
+ ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
+
+ /* insert this percpu data information into our list for MCA recovery purposes */
+ ia64_mca_tlb_list[cpu].percpu_paddr = pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
+ /* Also save per-cpu tlb flush recipe for use in physical mode mca handler */
+ ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
+ ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
+ ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
+ ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
+ ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
+#endif
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+
+int
+create_mem_map_page_table (u64 start, u64 end, void *arg)
+{
+ unsigned long address, start_page, end_page;
+ struct page *map_start, *map_end;
+ int node;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ start_page = (unsigned long) map_start & PAGE_MASK;
+ end_page = PAGE_ALIGN((unsigned long) map_end);
+ node = paddr_to_nid(__pa(start));
+
+ for (address = start_page; address < end_page; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pmd = pmd_offset(pgd, address);
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pte = pte_offset_kernel(pmd, address);
+
+ if (pte_none(*pte))
+ set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT,
+ PAGE_KERNEL));
+ }
+ return 0;
+}
+
+struct memmap_init_callback_data {
+ struct page *start;
+ struct page *end;
+ int nid;
+ unsigned long zone;
+};
+
+static int
+virtual_memmap_init (u64 start, u64 end, void *arg)
+{
+ struct memmap_init_callback_data *args;
+ struct page *map_start, *map_end;
+
+ args = (struct memmap_init_callback_data *) arg;
+
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ if (map_start < args->start)
+ map_start = args->start;
+ if (map_end > args->end)
+ map_end = args->end;
+
+ /*
+ * We have to initialize "out of bounds" struct page elements that fit completely
+ * on the same pages that were allocated for the "in bounds" elements because they
+ * may be referenced later (and found to be "reserved").
+ */
+ map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page);
+ map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end)
+ / sizeof(struct page));
+
+ if (map_start < map_end)
+ memmap_init_zone(map_start, (unsigned long) (map_end - map_start),
+ args->nid, args->zone, page_to_pfn(map_start));
+ return 0;
+}
+
+void
+memmap_init (struct page *start, unsigned long size, int nid,
+ unsigned long zone, unsigned long start_pfn)
+{
+ if (!vmem_map)
+ memmap_init_zone(start, size, nid, zone, start_pfn);
+ else {
+ struct memmap_init_callback_data args;
+
+ args.start = start;
+ args.end = start + size;
+ args.nid = nid;
+ args.zone = zone;
+
+ efi_memmap_walk(virtual_memmap_init, &args);
+ }
+}
+
+int
+ia64_pfn_valid (unsigned long pfn)
+{
+ char byte;
+ struct page *pg = pfn_to_page(pfn);
+
+ return (__get_user(byte, (char *) pg) == 0)
+ && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
+ || (__get_user(byte, (char *) (pg + 1) - 1) == 0));
+}
+EXPORT_SYMBOL(ia64_pfn_valid);
+
+int
+find_largest_hole (u64 start, u64 end, void *arg)
+{
+ u64 *max_gap = arg;
+
+ static u64 last_end = PAGE_OFFSET;
+
+ /* NOTE: this algorithm assumes efi memmap table is ordered */
+
+#ifdef XEN
+//printf("find_largest_hole: start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg);
+#endif
+ if (*max_gap < (start - last_end))
+ *max_gap = start - last_end;
+ last_end = end;
+#ifdef XEN
+//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end);
+#endif
+ return 0;
+}
+#endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+static int
+count_reserved_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long num_reserved = 0;
+ unsigned long *count = arg;
+
+ for (; start < end; start += PAGE_SIZE)
+ if (PageReserved(virt_to_page(start)))
+ ++num_reserved;
+ *count += num_reserved;
+ return 0;
+}
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
+ * system call handler. When this option is in effect, all fsyscalls will end up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler. This is
+ * useful for performance testing, but conceivably could also come in handy for debugging
+ * purposes.
+ */
+
+static int nolwsys;
+
+static int __init
+nolwsys_setup (char *s)
+{
+ nolwsys = 1;
+ return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+void
+mem_init (void)
+{
+#ifdef CONFIG_PCI
+ /*
+ * This needs to be called _after_ the command line has been parsed but _before_
+ * any drivers that may need the PCI DMA interface are initialized or bootmem has
+ * been freed.
+ */
+ platform_dma_init();
+#endif
+
+}
diff --git a/xen/arch/ia64/mmio.c b/xen/arch/ia64/mmio.c
new file mode 100644
index 0000000000..2fb5c34251
--- /dev/null
+++ b/xen/arch/ia64/mmio.c
@@ -0,0 +1,325 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ * Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <xen/interrupt.h>
+#include <asm/vmx_vcpu.h>
+
+struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
+{
+ int i;
+ for (i=0; mio_base[i].iot != NOT_IO; i++ ) {
+ if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end )
+ return &mio_base[i];
+ }
+ return NULL;
+}
+
+
+extern void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma);
+static inline void mmio_write(VCPU *vcpu, void *src, u64 dest_pa, size_t s, int ma)
+{
+ struct virutal_platform_def *v_plat;
+ struct mmio_list *mio;
+
+ v_plat = vmx_vcpu_get_plat(vcpu);
+ mio = lookup_mmio(dest_pa, v_plat->mmio);
+ if ( mio == NULL )
+ panic ("Wrong address for MMIO\n");
+
+ switch (mio->iot) {
+ case PIB_MMIO:
+ pib_write(vcpu, src, dest_pa - v_plat->pib_base, s, ma);
+ break;
+ case VGA_BUFF:
+ case CHIPSET_IO:
+ case LOW_MMIO:
+ case LEGACY_IO:
+ case IO_SAPIC:
+ default:
+ break;
+ }
+ return;
+}
+
+static inline void mmio_read(VCPU *vcpu, u64 src_pa, void *dest, size_t s, int ma)
+{
+ struct virutal_platform_def *v_plat;
+ struct mmio_list *mio;
+
+ v_plat = vmx_vcpu_get_plat(vcpu);
+ mio = lookup_mmio(src_pa, v_plat->mmio);
+ if ( mio == NULL )
+ panic ("Wrong address for MMIO\n");
+
+ switch (mio->iot) {
+ case PIB_MMIO:
+ pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
+ break;
+ case VGA_BUFF:
+ case CHIPSET_IO:
+ case LOW_MMIO:
+ case LEGACY_IO:
+ case IO_SAPIC:
+ default:
+ break;
+ }
+ return;
+}
+
+/*
+ * Read or write data in guest virtual address mode.
+ */
+
+void
+memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s)
+{
+ uint64_t pa;
+
+ if (!vtlb->nomap)
+ panic("Normal memory write shouldn't go to this point!");
+ pa = PPN_2_PA(vtlb->ppn);
+ pa += POFFSET((u64)dest, vtlb->ps);
+ mmio_write (vcpu, src, pa, s, vtlb->ma);
+}
+
+
+void
+memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s)
+{
+ uint64_t pa = (uint64_t)dest;
+ int ma;
+
+ if ( pa & (1UL <<63) ) {
+ // UC
+ ma = 4;
+ pa <<=1;
+ pa >>=1;
+ }
+ else {
+ // WBL
+ ma = 0; // using WB for WBL
+ }
+ mmio_write (vcpu, src, pa, s, ma);
+}
+
+void
+memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s)
+{
+ uint64_t pa;
+
+ if (!vtlb->nomap)
+ panic("Normal memory write shouldn't go to this point!");
+ pa = PPN_2_PA(vtlb->ppn);
+ pa += POFFSET((u64)src, vtlb->ps);
+
+ mmio_read(vcpu, pa, dest, s, vtlb->ma);
+}
+
+void
+memread_p(VCPU *vcpu, void *src, void *dest, size_t s)
+{
+ uint64_t pa = (uint64_t)src;
+ int ma;
+
+ if ( pa & (1UL <<63) ) {
+ // UC
+ ma = 4;
+ pa <<=1;
+ pa >>=1;
+ }
+ else {
+ // WBL
+ ma = 0; // using WB for WBL
+ }
+ mmio_read(vcpu, pa, dest, s, ma);
+}
+
+#define PIB_LOW_HALF(ofst) !(ofst&(1<<20))
+#define PIB_OFST_INTA 0x1E0000
+#define PIB_OFST_XTP 0x1E0008
+
+
+/*
+ * Deliver IPI message. (Only U-VP is supported now)
+ * offset: address offset to IPI space.
+ * value: deliver value.
+ */
+static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector)
+{
+#ifdef IPI_DEBUG
+ printf ("deliver_ipi %lx %lx\n",dm,vector);
+#endif
+ switch ( dm ) {
+ case 0: // INT
+ vmx_vcpu_pend_interrupt (vcpu, vector);
+ break;
+ case 2: // PMI
+ // TODO -- inject guest PMI
+ panic ("Inject guest PMI!\n");
+ break;
+ case 4: // NMI
+ vmx_vcpu_pend_interrupt (vcpu, 2);
+ break;
+ case 5: // INIT
+ // TODO -- inject guest INIT
+ panic ("Inject guest INIT!\n");
+ break;
+ case 7: // ExtINT
+ vmx_vcpu_pend_interrupt (vcpu, 0);
+ break;
+
+ case 1:
+ case 3:
+ case 6:
+ default:
+ panic ("Deliver reserved IPI!\n");
+ break;
+ }
+}
+
+/*
+ * TODO: Use hash table for the lookup.
+ */
+static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid)
+{
+ int i;
+ VCPU *vcpu;
+ LID lid;
+
+ for (i=0; i<MAX_VIRT_CPUS; i++) {
+ vcpu = d->vcpu[i];
+ lid.val = VPD_CR(vcpu, lid);
+ if ( lid.id == id && lid.eid == eid ) {
+ return vcpu;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * execute write IPI op.
+ */
+static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value)
+{
+ VCPU *target_cpu;
+
+ target_cpu = lid_2_vcpu(vcpu->domain,
+ ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid);
+ if ( target_cpu == NULL ) panic("Unknown IPI cpu\n");
+ if ( target_cpu == vcpu ) {
+ // IPI to self
+ deliver_ipi (vcpu, ((ipi_d_t)value).dm,
+ ((ipi_d_t)value).vector);
+ return 1;
+ }
+ else {
+ // TODO: send Host IPI to inject guest SMP IPI interruption
+ panic ("No SM-VP supported!\n");
+ return 0;
+ }
+}
+
+void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma)
+{
+
+ switch (pib_off) {
+ case PIB_OFST_INTA:
+ panic("Undefined write on PIB INTA\n");
+ break;
+ case PIB_OFST_XTP:
+ if ( s == 1 && ma == 4 /* UC */) {
+ vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
+ }
+ else {
+ panic("Undefined write on PIB XTP\n");
+ }
+ break;
+ default:
+ if ( PIB_LOW_HALF(pib_off) ) { // lower half
+ if ( s != 8 || ma != 0x4 /* UC */ ) {
+ panic("Undefined IPI-LHF write!\n");
+ }
+ else {
+ write_ipi(vcpu, pib_off, *(uint64_t *)src);
+ // TODO for SM-VP
+ }
+ }
+ else { // upper half
+ printf("IPI-UHF write %lx\n",pib_off);
+ panic("Not support yet for SM-VP\n");
+ }
+ break;
+ }
+}
+
+void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int ma)
+{
+ switch (pib_off) {
+ case PIB_OFST_INTA:
+ // todo --- emit on processor system bus.
+ if ( s == 1 && ma == 4) { // 1 byte load
+ // TODO: INTA read from IOSAPIC
+ }
+ else {
+ panic("Undefined read on PIB INTA\n");
+ }
+ break;
+ case PIB_OFST_XTP:
+ if ( s == 1 && ma == 4) {
+ *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
+ }
+ else {
+ panic("Undefined read on PIB XTP\n");
+ }
+ break;
+ default:
+ if ( PIB_LOW_HALF(pib_off) ) { // lower half
+ if ( s != 8 || ma != 4 ) {
+ panic("Undefined IPI-LHF read!\n");
+ }
+ else {
+#ifdef IPI_DEBUG
+ printf("IPI-LHF read %lx\n",pib_off);
+#endif
+ *(uint64_t *)dest = 0; // TODO for SM-VP
+ }
+ }
+ else { // upper half
+ if ( s != 1 || ma != 4 ) {
+ panic("Undefined PIB-UHF read!\n");
+ }
+ else {
+#ifdef IPI_DEBUG
+ printf("IPI-UHF read %lx\n",pib_off);
+#endif
+ *(uint8_t *)dest = 0; // TODO for SM-VP
+ }
+ }
+ break;
+ }
+}
+
diff --git a/xen/arch/ia64/patch/linux-2.6.11/cpumask.h b/xen/arch/ia64/patch/linux-2.6.11/cpumask.h
new file mode 100644
index 0000000000..c5866b5350
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/cpumask.h
@@ -0,0 +1,12 @@
+--- ../../linux-2.6.11/include/linux/cpumask.h 2005-03-02 00:38:00.000000000 -0700
++++ include/asm-ia64/linux/cpumask.h 2005-04-28 13:21:20.000000000 -0600
+@@ -342,7 +342,9 @@
+ */
+
+ extern cpumask_t cpu_possible_map;
++#ifndef XEN
+ extern cpumask_t cpu_online_map;
++#endif
+ extern cpumask_t cpu_present_map;
+
+ #if NR_CPUS > 1
diff --git a/xen/arch/ia64/patch/linux-2.6.11/efi.c b/xen/arch/ia64/patch/linux-2.6.11/efi.c
new file mode 100644
index 0000000000..e79d178edc
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/efi.c
@@ -0,0 +1,50 @@
+--- ../../linux-2.6.11/arch/ia64/kernel/efi.c 2005-03-02 00:37:47.000000000 -0700
++++ arch/ia64/efi.c 2005-06-09 06:15:36.000000000 -0600
+@@ -320,6 +320,16 @@
+ if (!(md->attribute & EFI_MEMORY_WB))
+ continue;
+
++#ifdef XEN
++// this works around a problem in the ski bootloader
++{
++ extern long running_on_sim;
++ if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
++ continue;
++}
++// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
++ if (md->phys_addr >= 0x100000000) continue;
++#endif
+ /*
+ * granule_addr is the base of md's first granule.
+ * [granule_addr - first_non_wb_addr) is guaranteed to
+@@ -719,6 +729,30 @@
+ return 0;
+ }
+
++#ifdef XEN
++// variation of efi_get_iobase which returns entire memory descriptor
++efi_memory_desc_t *
++efi_get_io_md (void)
++{
++ void *efi_map_start, *efi_map_end, *p;
++ efi_memory_desc_t *md;
++ u64 efi_desc_size;
++
++ efi_map_start = __va(ia64_boot_param->efi_memmap);
++ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
++ efi_desc_size = ia64_boot_param->efi_memdesc_size;
++
++ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
++ md = p;
++ if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
++ if (md->attribute & EFI_MEMORY_UC)
++ return md;
++ }
++ }
++ return 0;
++}
++#endif
++
+ u32
+ efi_mem_type (unsigned long phys_addr)
+ {
diff --git a/xen/arch/ia64/patch/linux-2.6.11/entry.S b/xen/arch/ia64/patch/linux-2.6.11/entry.S
new file mode 100644
index 0000000000..2b7f690aa8
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/entry.S
@@ -0,0 +1,237 @@
+--- ../../linux-2.6.11/arch/ia64/kernel/entry.S 2005-03-02 00:37:50.000000000 -0700
++++ arch/ia64/entry.S 2005-05-23 16:49:23.000000000 -0600
+@@ -46,6 +46,7 @@
+
+ #include "minstate.h"
+
++#ifndef XEN
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame.
+@@ -174,6 +175,7 @@
+ mov rp=loc0
+ br.ret.sptk.many rp
+ END(sys_clone)
++#endif /* !XEN */
+
+ /*
+ * prev_task <- ia64_switch_to(struct task_struct *next)
+@@ -191,7 +193,11 @@
+ movl r25=init_task
+ mov r27=IA64_KR(CURRENT_STACK)
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
++#ifdef XEN
++ dep r20=0,in0,60,4 // physical address of "next"
++#else
+ dep r20=0,in0,61,3 // physical address of "next"
++#endif
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ shr.u r26=r20,IA64_GRANULE_SHIFT
+@@ -220,6 +226,16 @@
+ br.ret.sptk.many rp // boogie on out in new context
+
+ .map:
++#ifdef XEN
++ // avoid overlapping with kernel TR
++ movl r25=KERNEL_START
++ dep r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
++ ;;
++ cmp.eq p7,p0=r25,r23
++ ;;
++(p7) mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
++(p7) br.cond.sptk .done
++#endif
+ rsm psr.ic // interrupts (psr.i) are already disabled here
+ movl r25=PAGE_KERNEL
+ ;;
+@@ -376,7 +392,11 @@
+ * - b7 holds address to return to
+ * - must not touch r8-r11
+ */
++#ifdef XEN
++GLOBAL_ENTRY(load_switch_stack)
++#else
+ ENTRY(load_switch_stack)
++#endif
+ .prologue
+ .altrp b7
+
+@@ -470,6 +490,7 @@
+ br.cond.sptk.many b7
+ END(load_switch_stack)
+
++#ifndef XEN
+ GLOBAL_ENTRY(__ia64_syscall)
+ .regstk 6,0,0,0
+ mov r15=in5 // put syscall number in place
+@@ -588,6 +609,7 @@
+ }
+ .ret4: br.cond.sptk ia64_leave_kernel
+ END(ia64_strace_leave_kernel)
++#endif
+
+ GLOBAL_ENTRY(ia64_ret_from_clone)
+ PT_REGS_UNWIND_INFO(0)
+@@ -604,6 +626,15 @@
+ */
+ br.call.sptk.many rp=ia64_invoke_schedule_tail
+ }
++#ifdef XEN
++ // new domains are cloned but not exec'ed so switch to user mode here
++ cmp.ne pKStk,pUStk=r0,r0
++#ifdef CONFIG_VTI
++ br.cond.spnt ia64_leave_hypervisor
++#else // CONFIG_VTI
++ br.cond.spnt ia64_leave_kernel
++#endif // CONFIG_VTI
++#else
+ .ret8:
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+@@ -614,6 +645,7 @@
+ ;;
+ cmp.ne p6,p0=r2,r0
+ (p6) br.cond.spnt .strace_check_retval
++#endif
+ ;; // added stop bits to prevent r8 dependency
+ END(ia64_ret_from_clone)
+ // fall through
+@@ -700,19 +732,27 @@
+ .work_processed_syscall:
+ adds r2=PT(LOADRS)+16,r12
+ adds r3=PT(AR_BSPSTORE)+16,r12
++#ifdef XEN
++ ;;
++#else
+ adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ (p6) ld4 r31=[r18] // load current_thread_info()->flags
++#endif
+ ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
+ mov b7=r0 // clear b7
+ ;;
+ ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
+ ld8 r18=[r2],PT(R9)-PT(B6) // load b6
++#ifndef XEN
+ (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
++#endif
+ ;;
+ mov r16=ar.bsp // M2 get existing backing store pointer
++#ifndef XEN
+ (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
+ (p6) br.cond.spnt .work_pending_syscall
++#endif
+ ;;
+ // start restoring the state saved on the kernel stack (struct pt_regs):
+ ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
+@@ -757,7 +797,11 @@
+ ;;
+ ld8.fill r12=[r2] // restore r12 (sp)
+ ld8.fill r15=[r3] // restore r15
++#ifdef XEN
++ movl r3=THIS_CPU(ia64_phys_stacked_size_p8)
++#else
+ addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
++#endif
+ ;;
+ (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
+ (pUStk) st1 [r14]=r17
+@@ -814,9 +858,18 @@
+ (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+ #endif
+ .work_processed_kernel:
++#ifdef XEN
++ alloc loc0=ar.pfs,0,1,1,0
++ adds out0=16,r12
++ ;;
++(p6) br.call.sptk.many b0=deliver_pending_interrupt
++ mov ar.pfs=loc0
++ mov r31=r0
++#else
+ adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ (p6) ld4 r31=[r17] // load current_thread_info()->flags
++#endif
+ adds r21=PT(PR)+16,r12
+ ;;
+
+@@ -828,17 +881,20 @@
+ ld8 r28=[r2],8 // load b6
+ adds r29=PT(R24)+16,r12
+
+- ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
++ ld8.fill r16=[r3]
+ adds r30=PT(AR_CCV)+16,r12
+ (p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
+ ;;
++ adds r3=PT(AR_CSD)-PT(R16),r3
+ ld8.fill r24=[r29]
+ ld8 r15=[r30] // load ar.ccv
+ (p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
+ ;;
+ ld8 r29=[r2],16 // load b7
+ ld8 r30=[r3],16 // load ar.csd
++#ifndef XEN
+ (p6) br.cond.spnt .work_pending
++#endif
+ ;;
+ ld8 r31=[r2],16 // load ar.ssd
+ ld8.fill r8=[r3],16
+@@ -934,7 +990,11 @@
+ shr.u r18=r19,16 // get byte size of existing "dirty" partition
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
++#ifdef XEN
++ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
++#else
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
++#endif
+ ;;
+ ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
+ (pKStk) br.cond.dpnt skip_rbs_switch
+@@ -1069,6 +1129,7 @@
+ mov pr=r31,-1 // I0
+ rfi // B
+
++#ifndef XEN
+ /*
+ * On entry:
+ * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+@@ -1130,6 +1191,7 @@
+ ld8 r8=[r2]
+ ld8 r10=[r3]
+ br.cond.sptk.many .work_processed_syscall // re-check
++#endif
+
+ END(ia64_leave_kernel)
+
+@@ -1166,6 +1228,7 @@
+ br.ret.sptk.many rp
+ END(ia64_invoke_schedule_tail)
+
++#ifndef XEN
+ /*
+ * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
+ * be set up by the caller. We declare 8 input registers so the system call
+@@ -1264,6 +1327,7 @@
+ mov ar.unat=r9
+ br.many b7
+ END(sys_rt_sigreturn)
++#endif
+
+ GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
+ .prologue
+@@ -1278,6 +1342,7 @@
+ br.cond.sptk.many rp // goes to ia64_leave_kernel
+ END(ia64_prepare_handle_unaligned)
+
++#ifndef XEN
+ //
+ // unw_init_running(void (*callback)(info, arg), void *arg)
+ //
+@@ -1585,3 +1650,4 @@
+ data8 sys_ni_syscall
+
+ .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.11/entry.h b/xen/arch/ia64/patch/linux-2.6.11/entry.h
new file mode 100644
index 0000000000..8ccad88412
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/entry.h
@@ -0,0 +1,37 @@
+--- /home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/arch/ia64/kernel/entry.h 2005-03-01 23:38:07.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/arch/ia64/entry.h 2005-05-18 14:00:53.000000000 -0700
+@@ -7,6 +7,12 @@
+ #define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
+ #define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
+ #define PRED_USER_STACK 3 /* returning to user-stacks? */
++#ifdef CONFIG_VTI
++#define PRED_EMUL 2 /* Need to save r4-r7 for inst emulation */
++#define PRED_NON_EMUL 3 /* No need to save r4-r7 for normal path */
++#define PRED_BN0 6 /* Guest is in bank 0 */
++#define PRED_BN1 7 /* Guest is in bank 1 */
++#endif // CONFIG_VTI
+ #define PRED_SYSCALL 4 /* inside a system call? */
+ #define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
+
+@@ -17,12 +23,21 @@
+ # define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
+ # define pKStk PASTE(p,PRED_KERNEL_STACK)
+ # define pUStk PASTE(p,PRED_USER_STACK)
++#ifdef CONFIG_VTI
++# define pEml PASTE(p,PRED_EMUL)
++# define pNonEml PASTE(p,PRED_NON_EMUL)
++# define pBN0 PASTE(p,PRED_BN0)
++# define pBN1 PASTE(p,PRED_BN1)
++#endif // CONFIG_VTI
+ # define pSys PASTE(p,PRED_SYSCALL)
+ # define pNonSys PASTE(p,PRED_NON_SYSCALL)
+ #endif
+
+ #define PT(f) (IA64_PT_REGS_##f##_OFFSET)
+ #define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
++#ifdef CONFIG_VTI
++#define VPD(f) (VPD_##f##_START_OFFSET)
++#endif // CONFIG_VTI
+
+ #define PT_REGS_SAVES(off) \
+ .unwabi 3, 'i'; \
diff --git a/xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h b/xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h
new file mode 100644
index 0000000000..e2966b8a5e
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h
@@ -0,0 +1,69 @@
+--- /home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/gcc_intrin.h 2005-03-01 23:38:08.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/gcc_intrin.h 2005-05-18 14:00:53.000000000 -0700
+@@ -368,6 +368,66 @@
+ #define ia64_mf() asm volatile ("mf" ::: "memory")
+ #define ia64_mfa() asm volatile ("mf.a" ::: "memory")
+
++#ifdef CONFIG_VTI
++/*
++ * Flushrs instruction stream.
++ */
++#define ia64_flushrs() asm volatile ("flushrs;;":::"memory")
++
++#define ia64_loadrs() asm volatile ("loadrs;;":::"memory")
++
++#define ia64_get_rsc() \
++({ \
++ unsigned long val; \
++ asm volatile ("mov %0=ar.rsc;;" : "=r"(val) :: "memory"); \
++ val; \
++})
++
++#define ia64_set_rsc(val) \
++ asm volatile ("mov ar.rsc=%0;;" :: "r"(val) : "memory")
++
++#define ia64_get_bspstore() \
++({ \
++ unsigned long val; \
++ asm volatile ("mov %0=ar.bspstore;;" : "=r"(val) :: "memory"); \
++ val; \
++})
++
++#define ia64_set_bspstore(val) \
++ asm volatile ("mov ar.bspstore=%0;;" :: "r"(val) : "memory")
++
++#define ia64_get_rnat() \
++({ \
++ unsigned long val; \
++ asm volatile ("mov %0=ar.rnat;" : "=r"(val) :: "memory"); \
++ val; \
++})
++
++#define ia64_set_rnat(val) \
++ asm volatile ("mov ar.rnat=%0;;" :: "r"(val) : "memory")
++
++#define ia64_ttag(addr) \
++({ \
++ __u64 ia64_intri_res; \
++ asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \
++ ia64_intri_res; \
++})
++
++#define ia64_get_dcr() \
++({ \
++ __u64 result; \
++ asm volatile ("mov %0=cr.dcr" : "=r"(result) : ); \
++ result; \
++})
++
++#define ia64_set_dcr(val) \
++({ \
++ asm volatile ("mov cr.dcr=%0" :: "r"(val) ); \
++})
++
++#endif // CONFIG_VTI
++
++
+ #define ia64_invala() asm volatile ("invala" ::: "memory")
+
+ #define ia64_thash(addr) \
diff --git a/xen/arch/ia64/patch/linux-2.6.11/hardirq.h b/xen/arch/ia64/patch/linux-2.6.11/hardirq.h
new file mode 100644
index 0000000000..76b2115d66
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/hardirq.h
@@ -0,0 +1,14 @@
+--- ../../linux-2.6.11/include/linux/hardirq.h 2005-03-02 00:38:00.000000000 -0700
++++ include/asm-ia64/linux/hardirq.h 2005-04-28 16:34:39.000000000 -0600
+@@ -60,7 +60,11 @@
+ */
+ #define in_irq() (hardirq_count())
+ #define in_softirq() (softirq_count())
++#ifndef XEN
+ #define in_interrupt() (irq_count())
++#else
++#define in_interrupt() 0 // FIXME LATER
++#endif
+
+ #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
+ # define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
diff --git a/xen/arch/ia64/patch/linux-2.6.11/head.S b/xen/arch/ia64/patch/linux-2.6.11/head.S
new file mode 100644
index 0000000000..52ab758d85
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/head.S
@@ -0,0 +1,120 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/head.S 2005-03-01 23:38:13.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/head.S 2005-05-18 12:40:50.000000000 -0700
+@@ -76,21 +76,21 @@
+ * We initialize all of them to prevent inadvertently assuming
+ * something about the state of address translation early in boot.
+ */
+- mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r7=(0<<61)
+- mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r9=(1<<61)
+- mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r11=(2<<61)
+- mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r13=(3<<61)
+- mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r15=(4<<61)
+- mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
++ movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r17=(5<<61)
+- mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
++ movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r19=(6<<61)
+- mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
++ movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r21=(7<<61)
+ ;;
+ mov rr[r7]=r6
+@@ -129,8 +129,13 @@
+ /*
+ * Switch into virtual mode:
+ */
++#ifdef CONFIG_VTI
++ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \
++ |IA64_PSR_DI)
++#else // CONFIG_VTI
+ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+ |IA64_PSR_DI)
++#endif // CONFIG_VTI
+ ;;
+ mov cr.ipsr=r16
+ movl r17=1f
+@@ -143,7 +148,11 @@
+ 1: // now we are in virtual mode
+
+ // set IVT entry point---can't access I/O ports without it
++#ifdef CONFIG_VTI
++ movl r3=vmx_ia64_ivt
++#else // CONFIG_VTI
+ movl r3=ia64_ivt
++#endif // CONFIG_VTI
+ ;;
+ mov cr.iva=r3
+ movl r2=FPSR_DEFAULT
+@@ -187,7 +196,11 @@
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
++#ifdef XEN
++ dep r2=-1,r3,60,4 // IMVA of task
++#else
+ dep r2=-1,r3,61,3 // IMVA of task
++#endif
+ ;;
+ mov r17=rr[r2]
+ shr.u r16=r3,IA64_GRANULE_SHIFT
+@@ -207,8 +220,15 @@
+
+ .load_current:
+ // load the "current" pointer (r13) and ar.k6 with the current task
++#ifdef CONFIG_VTI
++ mov r21=r2 // virtual address
++ ;;
++ bsw.1
++ ;;
++#else // CONFIG_VTI
+ mov IA64_KR(CURRENT)=r2 // virtual address
+ mov IA64_KR(CURRENT_STACK)=r16
++#endif // CONFIG_VTI
+ mov r13=r2
+ /*
+ * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
+@@ -227,7 +247,11 @@
+ ;;
+ mov ar.rsc=0x3 // place RSE in eager mode
+
++#ifdef XEN
++(isBP) dep r28=-1,r28,60,4 // make address virtual
++#else
+ (isBP) dep r28=-1,r28,61,3 // make address virtual
++#endif
+ (isBP) movl r2=ia64_boot_param
+ ;;
+ (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
+@@ -254,7 +278,9 @@
+ br.call.sptk.many b0=console_print
+
+ self: hint @pause
++ ;;
+ br.sptk.many self // endless loop
++ ;;
+ END(_start)
+
+ GLOBAL_ENTRY(ia64_save_debug_regs)
+@@ -850,7 +876,11 @@
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+ GLOBAL_ENTRY(sched_clock)
++#ifdef XEN
++ movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
++#else
+ addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
++#endif
+ mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
+ ;;
+ ldf8 f8=[r8]
diff --git a/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h b/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h
new file mode 100644
index 0000000000..9c653e982b
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h
@@ -0,0 +1,26 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/hp/sim/hpsim_ssc.h 2005-03-01 23:38:17.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/hpsim_ssc.h 2005-05-18 12:40:19.000000000 -0700
+@@ -33,4 +33,23 @@
+ */
+ extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
++#ifdef XEN
++/* Note: These are declared in linux/arch/ia64/hp/sim/simscsi.c but belong
++ * in linux/include/asm-ia64/hpsim_ssc.h, hence their addition here */
++#define SSC_OPEN 50
++#define SSC_CLOSE 51
++#define SSC_READ 52
++#define SSC_WRITE 53
++#define SSC_GET_COMPLETION 54
++#define SSC_WAIT_COMPLETION 55
++
++#define SSC_WRITE_ACCESS 2
++#define SSC_READ_ACCESS 1
++
++struct ssc_disk_req {
++ unsigned long addr;
++ unsigned long len;
++};
++#endif
++
+ #endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/ia64regs.h b/xen/arch/ia64/patch/linux-2.6.11/ia64regs.h
new file mode 100644
index 0000000000..6efd973a7d
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/ia64regs.h
@@ -0,0 +1,38 @@
+--- /home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/ia64regs.h 2005-03-01 23:38:07.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/ia64regs.h 2005-05-18 14:00:53.000000000 -0700
+@@ -87,6 +87,35 @@
+ #define _IA64_REG_CR_LRR0 4176
+ #define _IA64_REG_CR_LRR1 4177
+
++#ifdef CONFIG_VTI
++#define IA64_REG_CR_DCR 0
++#define IA64_REG_CR_ITM 1
++#define IA64_REG_CR_IVA 2
++#define IA64_REG_CR_PTA 8
++#define IA64_REG_CR_IPSR 16
++#define IA64_REG_CR_ISR 17
++#define IA64_REG_CR_IIP 19
++#define IA64_REG_CR_IFA 20
++#define IA64_REG_CR_ITIR 21
++#define IA64_REG_CR_IIPA 22
++#define IA64_REG_CR_IFS 23
++#define IA64_REG_CR_IIM 24
++#define IA64_REG_CR_IHA 25
++#define IA64_REG_CR_LID 64
++#define IA64_REG_CR_IVR 65
++#define IA64_REG_CR_TPR 66
++#define IA64_REG_CR_EOI 67
++#define IA64_REG_CR_IRR0 68
++#define IA64_REG_CR_IRR1 69
++#define IA64_REG_CR_IRR2 70
++#define IA64_REG_CR_IRR3 71
++#define IA64_REG_CR_ITV 72
++#define IA64_REG_CR_PMV 73
++#define IA64_REG_CR_CMCV 74
++#define IA64_REG_CR_LRR0 80
++#define IA64_REG_CR_LRR1 81
++#endif // CONFIG_VTI
++
+ /* Indirect Registers for getindreg() and setindreg() */
+
+ #define _IA64_REG_INDR_CPUID 9000 /* getindreg only */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/interrupt.h b/xen/arch/ia64/patch/linux-2.6.11/interrupt.h
new file mode 100644
index 0000000000..29491bd442
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/interrupt.h
@@ -0,0 +1,27 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/linux/interrupt.h 2005-03-01 23:38:09.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/linux/interrupt.h 2005-05-18 12:40:50.000000000 -0700
+@@ -33,6 +33,7 @@
+ #define IRQ_HANDLED (1)
+ #define IRQ_RETVAL(x) ((x) != 0)
+
++#ifndef XEN
+ struct irqaction {
+ irqreturn_t (*handler)(int, void *, struct pt_regs *);
+ unsigned long flags;
+@@ -49,6 +50,7 @@
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long, const char *, void *);
+ extern void free_irq(unsigned int, void *);
++#endif
+
+
+ #ifdef CONFIG_GENERIC_HARDIRQS
+@@ -121,7 +123,7 @@
+ };
+
+ asmlinkage void do_softirq(void);
+-extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
++//extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
+ extern void softirq_init(void);
+ #define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0)
+ extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
diff --git a/xen/arch/ia64/patch/linux-2.6.11/io.h b/xen/arch/ia64/patch/linux-2.6.11/io.h
new file mode 100644
index 0000000000..c935f35cf3
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/io.h
@@ -0,0 +1,14 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/io.h 2005-03-01 23:38:34.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/io.h 2005-05-18 12:40:50.000000000 -0700
+@@ -23,7 +23,11 @@
+ #define __SLOW_DOWN_IO do { } while (0)
+ #define SLOW_DOWN_IO do { } while (0)
+
++#ifdef XEN
++#define __IA64_UNCACHED_OFFSET 0xd000000000000000UL /* region 6 */
++#else
+ #define __IA64_UNCACHED_OFFSET 0xc000000000000000UL /* region 6 */
++#endif
+
+ /*
+ * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but
diff --git a/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c b/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c
new file mode 100644
index 0000000000..51f8fe05cf
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c
@@ -0,0 +1,118 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/irq_ia64.c 2005-03-01 23:38:07.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/irq_ia64.c 2005-05-18 12:40:51.000000000 -0700
+@@ -106,6 +106,9 @@
+ unsigned long saved_tpr;
+
+ #if IRQ_DEBUG
++#ifdef XEN
++ xen_debug_irq(vector, regs);
++#endif
+ {
+ unsigned long bsp, sp;
+
+@@ -148,6 +151,9 @@
+ ia64_setreg(_IA64_REG_CR_TPR, vector);
+ ia64_srlz_d();
+
++#ifdef XEN
++ if (!xen_do_IRQ(vector))
++#endif
+ __do_IRQ(local_vector_to_irq(vector), regs);
+
+ /*
+@@ -167,6 +173,95 @@
+ irq_exit();
+ }
+
++#ifdef CONFIG_VTI
++/*
++ * That's where the IVT branches when we get an external
++ * interrupt. This branches to the correct hardware IRQ handler via
++ * function ptr.
++ */
++void
++vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
++{
++ unsigned long saved_tpr;
++ int wake_dom0 = 0;
++
++
++#if IRQ_DEBUG
++ {
++ unsigned long bsp, sp;
++
++ /*
++ * Note: if the interrupt happened while executing in
++ * the context switch routine (ia64_switch_to), we may
++ * get a spurious stack overflow here. This is
++ * because the register and the memory stack are not
++ * switched atomically.
++ */
++ bsp = ia64_getreg(_IA64_REG_AR_BSP);
++ sp = ia64_getreg(_IA64_REG_AR_SP);
++
++ if ((sp - bsp) < 1024) {
++ static unsigned char count;
++ static long last_time;
++
++ if (jiffies - last_time > 5*HZ)
++ count = 0;
++ if (++count < 5) {
++ last_time = jiffies;
++ printk("ia64_handle_irq: DANGER: less than "
++ "1KB of free stack space!!\n"
++ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
++ }
++ }
++ }
++#endif /* IRQ_DEBUG */
++
++ /*
++ * Always set TPR to limit maximum interrupt nesting depth to
++ * 16 (without this, it would be ~240, which could easily lead
++ * to kernel stack overflows).
++ */
++ irq_enter();
++ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
++ ia64_srlz_d();
++ while (vector != IA64_SPURIOUS_INT_VECTOR) {
++ if (!IS_RESCHEDULE(vector)) {
++ ia64_setreg(_IA64_REG_CR_TPR, vector);
++ ia64_srlz_d();
++
++ if (vector != IA64_TIMER_VECTOR) {
++ /* FIXME: Leave IRQ re-route later */
++ vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
++ wake_dom0 = 1;
++ }
++ else { // FIXME: Handle Timer only now
++ __do_IRQ(local_vector_to_irq(vector), regs);
++ }
++
++ /*
++ * Disable interrupts and send EOI:
++ */
++ local_irq_disable();
++ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
++ }
++ else {
++ printf("Oops: RESCHEDULE IPI absorbed by HV\n");
++ }
++ ia64_eoi();
++ vector = ia64_get_ivr();
++ }
++ /*
++ * This must be done *after* the ia64_eoi(). For example, the keyboard softirq
++ * handler needs to be able to wait for further keyboard interrupts, which can't
++ * come through until ia64_eoi() has been done.
++ */
++ irq_exit();
++ if ( wake_dom0 && current != dom0 )
++ domain_wake(dom0->vcpu[0]);
++}
++#endif
++
++
+ #ifdef CONFIG_HOTPLUG_CPU
+ /*
+ * This function emulates a interrupt processing when a cpu is about to be
diff --git a/xen/arch/ia64/patch/linux-2.6.11/kregs.h b/xen/arch/ia64/patch/linux-2.6.11/kregs.h
new file mode 100644
index 0000000000..1268171987
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/kregs.h
@@ -0,0 +1,65 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/kregs.h 2005-03-01 23:37:49.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/kregs.h 2005-05-18 12:40:50.000000000 -0700
+@@ -29,8 +29,20 @@
+ */
+ #define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */
+ #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
++#ifdef CONFIG_VTI
++#define IA64_TR_XEN_IN_DOM 6 /* itr6, dtr6: Double mapping for xen image in domain space */
++#endif // CONFIG_VTI
+ #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
+ #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */
++#ifdef XEN
++#define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */
++#define IA64_TR_VHPT 4 /* dtr4: vhpt */
++#ifdef CONFIG_VTI
++#define IA64_TR_VHPT_IN_DOM 5 /* dtr5: Double mapping for vhpt table in domain space */
++#define IA64_TR_RR7_SWITCH_STUB 7 /* dtr7: mapping for rr7 switch stub */
++#define IA64_TEMP_PHYSICAL 8 /* itr8, dtr8: temp mapping for guest physical memory 256M */
++#endif // CONFIG_VTI
++#endif
+
+ /* Processor status register bits: */
+ #define IA64_PSR_BE_BIT 1
+@@ -66,6 +78,9 @@
+ #define IA64_PSR_ED_BIT 43
+ #define IA64_PSR_BN_BIT 44
+ #define IA64_PSR_IA_BIT 45
++#ifdef CONFIG_VTI
++#define IA64_PSR_VM_BIT 46
++#endif // CONFIG_VTI
+
+ /* A mask of PSR bits that we generally don't want to inherit across a clone2() or an
+ execve(). Only list flags here that need to be cleared/set for BOTH clone2() and
+@@ -107,6 +122,9 @@
+ #define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT)
+ #define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT)
+ #define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT)
++#ifdef CONFIG_VTI
++#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
++#endif // CONFIG_VTI
+
+ /* User mask bits: */
+ #define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH)
+@@ -160,4 +178,21 @@
+ #define IA64_ISR_CODE_LFETCH 4
+ #define IA64_ISR_CODE_PROBEF 5
+
++#ifdef XEN
++/* Interruption Function State */
++#define IA64_IFS_V_BIT 63
++#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
++
++/* Page Table Address */
++#define IA64_PTA_VE_BIT 0
++#define IA64_PTA_SIZE_BIT 2
++#define IA64_PTA_VF_BIT 8
++#define IA64_PTA_BASE_BIT 15
++
++#define IA64_PTA_VE (__IA64_UL(1) << IA64_PTA_VE_BIT)
++#define IA64_PTA_SIZE (__IA64_UL(0x3f) << IA64_PTA_SIZE_BIT)
++#define IA64_PTA_VF (__IA64_UL(1) << IA64_PTA_VF_BIT)
++#define IA64_PTA_BASE (__IA64_UL(0) - ((__IA64_UL(1) << IA64_PTA_BASE_BIT)))
++#endif
++
+ #endif /* _ASM_IA64_kREGS_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h b/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h
new file mode 100644
index 0000000000..b9acc231e9
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h
@@ -0,0 +1,32 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/mca_asm.h 2005-03-01 23:38:38.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/mca_asm.h 2005-05-18 12:40:19.000000000 -0700
+@@ -26,8 +26,13 @@
+ * direct mapped to physical addresses.
+ * 1. Lop off bits 61 thru 63 in the virtual address
+ */
++#ifdef XEN
++#define INST_VA_TO_PA(addr) \
++ dep addr = 0, addr, 60, 4
++#else // XEN
+ #define INST_VA_TO_PA(addr) \
+ dep addr = 0, addr, 61, 3
++#endif // XEN
+ /*
+ * This macro converts a data virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+@@ -42,9 +47,15 @@
+ * direct mapped to physical addresses.
+ * 1. Put 0x7 in bits 61 thru 63.
+ */
++#ifdef XEN
++#define DATA_PA_TO_VA(addr,temp) \
++ mov temp = 0xf ;; \
++ dep addr = temp, addr, 60, 4
++#else // XEN
+ #define DATA_PA_TO_VA(addr,temp) \
+ mov temp = 0x7 ;; \
+ dep addr = temp, addr, 61, 3
++#endif // XEN
+
+ #define GET_THIS_PADDR(reg, var) \
+ mov reg = IA64_KR(PER_CPU_DATA);; \
diff --git a/xen/arch/ia64/patch/linux-2.6.11/minstate.h b/xen/arch/ia64/patch/linux-2.6.11/minstate.h
new file mode 100644
index 0000000000..4fc12a53f1
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/minstate.h
@@ -0,0 +1,25 @@
+ minstate.h | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+Index: linux-2.6.11-xendiffs/arch/ia64/kernel/minstate.h
+===================================================================
+--- linux-2.6.11-xendiffs.orig/arch/ia64/kernel/minstate.h 2005-04-06 22:51:31.170261541 -0500
++++ linux-2.6.11-xendiffs/arch/ia64/kernel/minstate.h 2005-04-06 22:54:03.210575034 -0500
+@@ -48,7 +48,7 @@
+ (pUStk) mov r24=ar.rnat; \
+ (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+ (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
+-(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
++(pUStk) dep r22=-1,r22,60,4; /* compute kernel virtual addr of RBS */ \
+ ;; \
+ (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+ (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
+@@ -57,7 +57,7 @@
+ (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+
+ #define MINSTATE_END_SAVE_MIN_PHYS \
+- dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \
++ dep r12=-1,r12,60,4; /* make sp a kernel virtual address */ \
+ ;;
+
+ #ifdef MINSTATE_VIRT
diff --git a/xen/arch/ia64/patch/linux-2.6.11/mm_contig.c b/xen/arch/ia64/patch/linux-2.6.11/mm_contig.c
new file mode 100644
index 0000000000..df5eb199a6
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/mm_contig.c
@@ -0,0 +1,47 @@
+--- ../../linux-2.6.11/arch/ia64/mm/contig.c 2005-03-02 00:37:55.000000000 -0700
++++ arch/ia64/mm_contig.c 2005-04-28 16:13:52.000000000 -0600
+@@ -35,6 +35,7 @@
+ *
+ * Just walks the pages in the system and describes where they're allocated.
+ */
++#ifndef XEN
+ void
+ show_mem (void)
+ {
+@@ -63,6 +64,7 @@
+ printk("%d pages swap cached\n", cached);
+ printk("%ld pages in page table cache\n", pgtable_cache_size);
+ }
++#endif
+
+ /* physical address where the bootmem map is located */
+ unsigned long bootmap_start;
+@@ -140,6 +142,7 @@
+ * Walk the EFI memory map and find usable memory for the system, taking
+ * into account reserved areas.
+ */
++#ifndef XEN
+ void
+ find_memory (void)
+ {
+@@ -168,6 +171,7 @@
+
+ find_initrd();
+ }
++#endif
+
+ #ifdef CONFIG_SMP
+ /**
+@@ -225,6 +229,7 @@
+ * Set up the page tables.
+ */
+
++#ifndef XEN
+ void
+ paging_init (void)
+ {
+@@ -297,3 +302,4 @@
+ #endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++#endif /* !CONFIG_XEN */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/page.h b/xen/arch/ia64/patch/linux-2.6.11/page.h
new file mode 100644
index 0000000000..aac3ae04f8
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/page.h
@@ -0,0 +1,74 @@
+--- /home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/page.h 2005-03-01 23:37:48.000000000 -0800
++++ /home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/page.h 2005-05-20 09:36:02.000000000 -0700
+@@ -32,6 +32,7 @@
+ #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
+
+ #define PERCPU_PAGE_SHIFT 16 /* log2() of max. size of per-CPU area */
++
+ #define PERCPU_PAGE_SIZE (__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
+
+ #define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
+@@ -95,9 +96,15 @@
+ #endif
+
+ #ifndef CONFIG_DISCONTIGMEM
++#ifdef XEN
++# define pfn_valid(pfn) (0)
++# define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
++# define pfn_to_page(_pfn) (frame_table + (_pfn))
++#else
+ # define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
+ # define page_to_pfn(page) ((unsigned long) (page - mem_map))
+ # define pfn_to_page(pfn) (mem_map + (pfn))
++#endif
+ #else
+ extern struct page *vmem_map;
+ extern unsigned long max_low_pfn;
+@@ -109,6 +116,11 @@
+ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
++#ifdef XEN
++#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
++#define phys_to_page(kaddr) pfn_to_page(((kaddr) >> PAGE_SHIFT))
++#endif
++
+ typedef union ia64_va {
+ struct {
+ unsigned long off : 61; /* intra-region offset */
+@@ -124,8 +136,23 @@
+ * expressed in this way to ensure they result in a single "dep"
+ * instruction.
+ */
++#ifdef XEN
++typedef union xen_va {
++ struct {
++ unsigned long off : 60;
++ unsigned long reg : 4;
++ } f;
++ unsigned long l;
++ void *p;
++} xen_va;
++
++// xen/drivers/console.c uses __va in a declaration (should be fixed!)
++#define __pa(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
++#define __va(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
++#else
+ #define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
+ #define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
++#endif
+
+ #define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
+ #define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
+@@ -197,7 +224,11 @@
+ # define __pgprot(x) (x)
+ #endif /* !STRICT_MM_TYPECHECKS */
+
++#ifdef XEN
++#define PAGE_OFFSET __IA64_UL_CONST(0xf000000000000000)
++#else
+ #define PAGE_OFFSET __IA64_UL_CONST(0xe000000000000000)
++#endif
+
+ #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \
diff --git a/xen/arch/ia64/patch/linux-2.6.11/pal.S b/xen/arch/ia64/patch/linux-2.6.11/pal.S
new file mode 100644
index 0000000000..6e1fa22ca6
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/pal.S
@@ -0,0 +1,26 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/pal.S 2005-03-01 23:38:33.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/pal.S 2005-05-18 12:40:19.000000000 -0700
+@@ -166,7 +166,11 @@
+ adds r8 = 1f-1b,r8 // calculate return address for call
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
++#ifdef XEN
++ dep.z loc2=loc2,0,60 // convert pal entry point to physical
++#else // XEN
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
++#endif // XEN
+ tpa r8=r8 // convert rp to physical
+ ;;
+ mov b7 = loc2 // install target to branch reg
+@@ -225,7 +229,11 @@
+ mov loc3 = psr // save psr
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
++#ifdef XEN
++ dep.z loc2=loc2,0,60 // convert pal entry point to physical
++#else // XEN
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
++#endif // XEN
+ ;;
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ movl r16=PAL_PSR_BITS_TO_CLEAR
diff --git a/xen/arch/ia64/patch/linux-2.6.11/pal.h b/xen/arch/ia64/patch/linux-2.6.11/pal.h
new file mode 100644
index 0000000000..40da4e259a
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/pal.h
@@ -0,0 +1,12 @@
+--- /home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/pal.h 2005-03-01 23:38:13.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/pal.h 2005-05-18 14:00:53.000000000 -0700
+@@ -1559,6 +1559,9 @@
+ return iprv.status;
+ }
+
++#ifdef CONFIG_VTI
++#include <asm/vmx_pal.h>
++#endif // CONFIG_VTI
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _ASM_IA64_PAL_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/pgalloc.h b/xen/arch/ia64/patch/linux-2.6.11/pgalloc.h
new file mode 100644
index 0000000000..64ea618a6d
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/pgalloc.h
@@ -0,0 +1,76 @@
+--- ../../linux-2.6.11/include/asm-ia64/pgalloc.h 2005-03-02 00:37:31.000000000 -0700
++++ include/asm-ia64/pgalloc.h 2005-06-09 13:40:48.000000000 -0600
+@@ -61,7 +61,12 @@
+ pgd_t *pgd = pgd_alloc_one_fast(mm);
+
+ if (unlikely(pgd == NULL)) {
++#ifdef XEN
++ pgd = (pgd_t *)alloc_xenheap_page();
++ memset(pgd,0,PAGE_SIZE);
++#else
+ pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
++#endif
+ }
+ return pgd;
+ }
+@@ -104,7 +109,12 @@
+ static inline pmd_t*
+ pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
++#ifdef XEN
++ pmd_t *pmd = (pmd_t *)alloc_xenheap_page();
++ memset(pmd,0,PAGE_SIZE);
++#else
+ pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
++#endif
+
+ return pmd;
+ }
+@@ -136,7 +146,12 @@
+ static inline struct page *
+ pte_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
++#ifdef XEN
++ struct page *pte = alloc_xenheap_page();
++ memset(pte,0,PAGE_SIZE);
++#else
+ struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++#endif
+
+ return pte;
+ }
+@@ -144,7 +159,12 @@
+ static inline pte_t *
+ pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
+ {
++#ifdef XEN
++ pte_t *pte = (pte_t *)alloc_xenheap_page();
++ memset(pte,0,PAGE_SIZE);
++#else
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
++#endif
+
+ return pte;
+ }
+@@ -152,13 +172,21 @@
+ static inline void
+ pte_free (struct page *pte)
+ {
++#ifdef XEN
++ free_xenheap_page(pte);
++#else
+ __free_page(pte);
++#endif
+ }
+
+ static inline void
+ pte_free_kernel (pte_t *pte)
+ {
++#ifdef XEN
++ free_xenheap_page((unsigned long) pte);
++#else
+ free_page((unsigned long) pte);
++#endif
+ }
+
+ #define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
diff --git a/xen/arch/ia64/patch/linux-2.6.11/processor.h b/xen/arch/ia64/patch/linux-2.6.11/processor.h
new file mode 100644
index 0000000000..b15d152acd
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/processor.h
@@ -0,0 +1,37 @@
+--- /home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/processor.h 2005-03-01 23:37:58.000000000 -0800
++++ /home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/processor.h 2005-05-20 09:36:02.000000000 -0700
+@@ -94,7 +94,11 @@
+ #ifdef CONFIG_NUMA
+ #include <asm/nodedata.h>
+ #endif
++#ifdef XEN
++#include <asm/xenprocessor.h>
++#endif
+
++#ifndef XEN
+ /* like above but expressed as bitfields for more efficient access: */
+ struct ia64_psr {
+ __u64 reserved0 : 1;
+@@ -133,6 +137,7 @@
+ __u64 bn : 1;
+ __u64 reserved4 : 19;
+ };
++#endif
+
+ /*
+ * CPU type, hardware bug flags, and per-CPU state. Frequently used
+@@ -408,12 +413,14 @@
+ */
+
+ /* Return TRUE if task T owns the fph partition of the CPU we're running on. */
++#ifndef XEN
+ #define ia64_is_local_fpu_owner(t) \
+ ({ \
+ struct task_struct *__ia64_islfo_task = (t); \
+ (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id() \
+ && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER)); \
+ })
++#endif
+
+ /* Mark task T as owning the fph partition of the CPU we're running on. */
+ #define ia64_set_local_fpu_owner(t) do { \
diff --git a/xen/arch/ia64/patch/linux-2.6.11/ptrace.h b/xen/arch/ia64/patch/linux-2.6.11/ptrace.h
new file mode 100644
index 0000000000..dd79914f59
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/ptrace.h
@@ -0,0 +1,20 @@
+--- /home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/ptrace.h 2005-03-01 23:38:38.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/ptrace.h 2005-05-18 14:00:53.000000000 -0700
+@@ -95,6 +95,9 @@
+ * (because the memory stack pointer MUST ALWAYS be aligned this way)
+ *
+ */
++#ifdef CONFIG_VTI
++#include "vmx_ptrace.h"
++#else //CONFIG_VTI
+ struct pt_regs {
+ /* The following registers are saved by SAVE_MIN: */
+ unsigned long b6; /* scratch */
+@@ -170,6 +173,7 @@
+ struct ia64_fpreg f10; /* scratch */
+ struct ia64_fpreg f11; /* scratch */
+ };
++#endif // CONFIG_VTI
+
+ /*
+ * This structure contains the addition registers that need to
diff --git a/xen/arch/ia64/patch/linux-2.6.11/series b/xen/arch/ia64/patch/linux-2.6.11/series
new file mode 100644
index 0000000000..ea26a175fb
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/series
@@ -0,0 +1,40 @@
+bootmem.h
+current.h
+efi.c
+efi.h
+entry.S
+gcc_intrin.h
+hardirq.h
+head.S
+hpsim_irq.c
+hpsim_ssc.h
+hw_irq.h
+ide.h
+init_task.c
+init_task.h
+interrupt.h
+io.h
+irq.h
+irq_ia64.c
+ivt.S
+kregs.h
+lds.S
+linuxtime.h
+minstate.h
+mm_bootmem.c
+mm_contig.c
+mmzone.h
+page_alloc.c
+page.h
+processor.h
+sal.h
+setup.c
+slab.c
+slab.h
+system.h
+time.c
+kernel-time.c
+tlb.c
+types.h
+unaligned.c
+wait.h
diff --git a/xen/arch/ia64/patch/linux-2.6.11/setup.c b/xen/arch/ia64/patch/linux-2.6.11/setup.c
new file mode 100644
index 0000000000..2fea5662fd
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/setup.c
@@ -0,0 +1,151 @@
+--- ../../linux-2.6.11/arch/ia64/kernel/setup.c 2005-03-02 00:37:49.000000000 -0700
++++ arch/ia64/setup.c 2005-06-03 10:14:24.000000000 -0600
+@@ -51,6 +51,10 @@
+ #include <asm/smp.h>
+ #include <asm/system.h>
+ #include <asm/unistd.h>
++#ifdef CONFIG_VTI
++#include <asm/vmx.h>
++#endif // CONFIG_VTI
++#include <asm/io.h>
+
+ #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+ # error "struct cpuinfo_ia64 too big!"
+@@ -127,7 +131,16 @@
+ range_end = min(end, rsvd_region[i].start);
+
+ if (range_start < range_end)
++#ifdef XEN
++ {
++ /* init_boot_pages requires "ps, pe" */
++ printk("Init boot pages: 0x%lx -> 0x%lx.\n",
++ __pa(range_start), __pa(range_end));
++ (*func)(__pa(range_start), __pa(range_end), 0);
++ }
++#else
+ call_pernode_memory(__pa(range_start), range_end - range_start, func);
++#endif
+
+ /* nothing more available in this segment */
+ if (range_end == end) return 0;
+@@ -185,7 +198,12 @@
+ n++;
+
+ rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
++#ifdef XEN
++ /* Reserve xen image/bitmap/xen-heap */
++ rsvd_region[n].end = rsvd_region[n].start + xenheap_size;
++#else
+ rsvd_region[n].end = (unsigned long) ia64_imva(_end);
++#endif
+ n++;
+
+ #ifdef CONFIG_BLK_DEV_INITRD
+@@ -299,17 +317,25 @@
+ }
+
+ void __init
++#ifdef XEN
++early_setup_arch (char **cmdline_p)
++#else
+ setup_arch (char **cmdline_p)
++#endif
+ {
+ unw_init();
+
+ ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+
+ *cmdline_p = __va(ia64_boot_param->command_line);
++#ifdef XEN
++ efi_init();
++#else
+ strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+ efi_init();
+ io_port_init();
++#endif
+
+ #ifdef CONFIG_IA64_GENERIC
+ {
+@@ -336,6 +362,11 @@
+ }
+ #endif
+
++#ifdef XEN
++ early_cmdline_parse(cmdline_p);
++ cmdline_parse(*cmdline_p);
++#undef CONFIG_ACPI_BOOT
++#endif
+ if (early_console_setup(*cmdline_p) == 0)
+ mark_bsp_online();
+
+@@ -351,8 +382,18 @@
+ # endif
+ #endif /* CONFIG_APCI_BOOT */
+
++#ifndef XEN
+ find_memory();
++#else
++ io_port_init();
++}
+
++void __init
++late_setup_arch (char **cmdline_p)
++{
++#undef CONFIG_ACPI_BOOT
++ acpi_table_init();
++#endif
+ /* process SAL system table: */
+ ia64_sal_init(efi.sal_systab);
+
+@@ -360,6 +401,10 @@
+ cpu_physical_id(0) = hard_smp_processor_id();
+ #endif
+
++#ifdef CONFIG_VTI
++ identify_vmx_feature();
++#endif // CONFIG_VTI
++
+ cpu_init(); /* initialize the bootstrap CPU */
+
+ #ifdef CONFIG_ACPI_BOOT
+@@ -492,12 +537,14 @@
+ {
+ }
+
++#ifndef XEN
+ struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo
+ };
++#endif
+
+ void
+ identify_cpu (struct cpuinfo_ia64 *c)
+@@ -551,6 +598,12 @@
+ }
+ c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+ c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
++
++#ifdef CONFIG_VTI
++ /* If vmx feature is on, do necessary initialization for vmx */
++ if (vmx_enabled)
++ vmx_init_env();
++#endif
+ }
+
+ void
+@@ -659,7 +712,11 @@
+ | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
++#ifdef XEN
++ if (current->domain->arch.mm)
++#else
+ if (current->mm)
++#endif
+ BUG();
+
+ ia64_mmu_init(ia64_imva(cpu_data));
diff --git a/xen/arch/ia64/patch/linux-2.6.11/sn_sal.h b/xen/arch/ia64/patch/linux-2.6.11/sn_sal.h
new file mode 100644
index 0000000000..b38c1300f1
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/sn_sal.h
@@ -0,0 +1,33 @@
+--- /data/lwork/attica1/edwardsg/linux-2.6.11/include/asm-ia64/sn/sn_sal.h 2005-03-02 01:38:33 -06:00
++++ include/asm-ia64/sn/sn_sal.h 2005-06-01 14:31:47 -05:00
+@@ -123,6 +123,7 @@
+ #define SALRET_ERROR (-3)
+
+
++#ifndef XEN
+ /**
+ * sn_sal_rev_major - get the major SGI SAL revision number
+ *
+@@ -226,6 +227,7 @@ ia64_sn_get_klconfig_addr(nasid_t nasid)
+ }
+ return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL;
+ }
++#endif /* !XEN */
+
+ /*
+ * Returns the next console character.
+@@ -304,6 +306,7 @@ ia64_sn_console_putb(const char *buf, in
+ return (u64)0;
+ }
+
++#ifndef XEN
+ /*
+ * Print a platform error record
+ */
+@@ -987,5 +990,5 @@ ia64_sn_hwperf_op(nasid_t nasid, u64 opc
+ *v0 = (int) rv.v0;
+ return (int) rv.status;
+ }
+-
++#endif /* !XEN */
+ #endif /* _ASM_IA64_SN_SN_SAL_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/system.h b/xen/arch/ia64/patch/linux-2.6.11/system.h
new file mode 100644
index 0000000000..901b512535
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/system.h
@@ -0,0 +1,38 @@
+--- /home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/system.h 2005-03-01 23:38:07.000000000 -0800
++++ /home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/system.h 2005-05-20 09:36:02.000000000 -0700
+@@ -18,14 +18,19 @@
+ #include <asm/page.h>
+ #include <asm/pal.h>
+ #include <asm/percpu.h>
++#ifdef XEN
++#include <asm/xensystem.h>
++#endif
+
+ #define GATE_ADDR __IA64_UL_CONST(0xa000000000000000)
+ /*
+ * 0xa000000000000000+2*PERCPU_PAGE_SIZE
+ * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
+ */
++#ifndef XEN
+ #define KERNEL_START __IA64_UL_CONST(0xa000000100000000)
+ #define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
++#endif
+
+ #ifndef __ASSEMBLY__
+
+@@ -218,6 +223,7 @@
+ # define PERFMON_IS_SYSWIDE() (0)
+ #endif
+
++#ifndef XEN
+ #define IA64_HAS_EXTRA_STATE(t) \
+ ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \
+ || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE())
+@@ -230,6 +236,7 @@
+ ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
+ (last) = ia64_switch_to((next)); \
+ } while (0)
++#endif
+
+ #ifdef CONFIG_SMP
+ /*
diff --git a/xen/arch/ia64/patch/linux-2.6.11/time.c b/xen/arch/ia64/patch/linux-2.6.11/time.c
new file mode 100644
index 0000000000..9e480b969e
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/time.c
@@ -0,0 +1,56 @@
+--- ../../linux-2.6.11/arch/ia64/kernel/time.c 2005-03-02 00:37:50.000000000 -0700
++++ arch/ia64/time.c 2005-05-02 11:19:29.000000000 -0600
+@@ -29,6 +29,9 @@
+ #include <asm/sal.h>
+ #include <asm/sections.h>
+ #include <asm/system.h>
++#ifdef XEN
++#include <linux/jiffies.h> // not included by xen/sched.h
++#endif
+
+ extern unsigned long wall_jiffies;
+
+@@ -45,6 +48,7 @@
+
+ #endif
+
++#ifndef XEN
+ static struct time_interpolator itc_interpolator = {
+ .shift = 16,
+ .mask = 0xffffffffffffffffLL,
+@@ -110,6 +114,7 @@
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ return IRQ_HANDLED;
+ }
++#endif
+
+ /*
+ * Encapsulate access to the itm structure for SMP.
+@@ -212,6 +217,7 @@
+ + itc_freq/2)/itc_freq;
+
+ if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
++#ifndef XEN
+ itc_interpolator.frequency = local_cpu_data->itc_freq;
+ itc_interpolator.drift = itc_drift;
+ #ifdef CONFIG_SMP
+@@ -228,12 +234,14 @@
+ if (!nojitter) itc_interpolator.jitter = 1;
+ #endif
+ register_time_interpolator(&itc_interpolator);
++#endif
+ }
+
+ /* Setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+ }
+
++#ifndef XEN
+ static struct irqaction timer_irqaction = {
+ .handler = timer_interrupt,
+ .flags = SA_INTERRUPT,
+@@ -253,3 +261,4 @@
+ */
+ set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
+ }
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.11/tlb.c b/xen/arch/ia64/patch/linux-2.6.11/tlb.c
new file mode 100644
index 0000000000..1a13908c62
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/tlb.c
@@ -0,0 +1,38 @@
+--- ../../linux-2.6.11/arch/ia64/mm/tlb.c 2005-03-02 00:38:38.000000000 -0700
++++ arch/ia64/tlb.c 2005-05-02 10:23:09.000000000 -0600
+@@ -43,6 +43,9 @@
+ void
+ wrap_mmu_context (struct mm_struct *mm)
+ {
++#ifdef XEN
++printf("wrap_mmu_context: called, not implemented\n");
++#else
+ unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+ struct task_struct *tsk;
+ int i;
+@@ -83,6 +86,7 @@
+ put_cpu();
+ }
+ local_flush_tlb_all();
++#endif
+ }
+
+ void
+@@ -132,6 +136,9 @@
+ void
+ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+ {
++#ifdef XEN
++printf("flush_tlb_range: called, not implemented\n");
++#else
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long size = end - start;
+ unsigned long nbits;
+@@ -163,6 +170,7 @@
+ # endif
+
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
++#endif
+ }
+ EXPORT_SYMBOL(flush_tlb_range);
+
diff --git a/xen/arch/ia64/patch/linux-2.6.11/types.h b/xen/arch/ia64/patch/linux-2.6.11/types.h
new file mode 100644
index 0000000000..9cf610326b
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/types.h
@@ -0,0 +1,44 @@
+--- ../../linux-2.6.11/include/asm-ia64/types.h 2005-03-04 10:26:30.000000000 -0700
++++ include/asm-ia64/types.h 2005-04-11 15:23:49.000000000 -0600
+@@ -1,5 +1,12 @@
+ #ifndef _ASM_IA64_TYPES_H
+ #define _ASM_IA64_TYPES_H
++#ifdef XEN
++#ifndef __ASSEMBLY__
++typedef unsigned long ssize_t;
++typedef unsigned long size_t;
++typedef long long loff_t;
++#endif
++#endif
+
+ /*
+ * This file is never included by application software unless explicitly requested (e.g.,
+@@ -61,6 +68,28 @@
+ typedef __s64 s64;
+ typedef __u64 u64;
+
++#ifdef XEN
++/*
++ * Below are truly Linux-specific types that should never collide with
++ * any application/library that wants linux/types.h.
++ */
++
++#ifdef __CHECKER__
++#define __bitwise __attribute__((bitwise))
++#else
++#define __bitwise
++#endif
++
++typedef __u16 __bitwise __le16;
++typedef __u16 __bitwise __be16;
++typedef __u32 __bitwise __le32;
++typedef __u32 __bitwise __be32;
++#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
++typedef __u64 __bitwise __le64;
++typedef __u64 __bitwise __be64;
++#endif
++#endif
++
+ #define BITS_PER_LONG 64
+
+ /* DMA addresses are 64-bits wide, in general. */
diff --git a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c
new file mode 100644
index 0000000000..942cce7ec1
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c
@@ -0,0 +1,227 @@
+--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/unaligned.c 2005-03-01 23:38:25.000000000 -0800
++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/unaligned.c 2005-05-18 12:40:50.000000000 -0700
+@@ -201,7 +201,11 @@
+
+ RPT(r1), RPT(r2), RPT(r3),
+
++#ifdef CONFIG_VTI
++ RPT(r4), RPT(r5), RPT(r6), RPT(r7),
++#else //CONFIG_VTI
+ RSW(r4), RSW(r5), RSW(r6), RSW(r7),
++#endif //CONFIG_VTI
+
+ RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+ RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+@@ -291,6 +295,121 @@
+ return reg;
+ }
+
++#ifdef CONFIG_VTI
++static void
++set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, unsigned long nat)
++{
++ struct switch_stack *sw = (struct switch_stack *) regs - 1;
++ unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
++ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
++ unsigned long rnats, nat_mask;
++ unsigned long old_rsc,new_rsc;
++ unsigned long on_kbs,rnat;
++ long sof = (regs->cr_ifs) & 0x7f;
++ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
++ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
++ long ridx = r1 - 32;
++
++ if (ridx >= sof) {
++ /* this should never happen, as the "rsvd register fault" has higher priority */
++ DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
++ return;
++ }
++
++ if (ridx < sor)
++ ridx = rotate_reg(sor, rrb_gr, ridx);
++
++ old_rsc=ia64_get_rsc();
++ new_rsc=old_rsc&(~0x3);
++ ia64_set_rsc(new_rsc);
++
++ bspstore = ia64_get_bspstore();
++ bsp =kbs + (regs->loadrs >> 19);//16+3
++
++ addr = ia64_rse_skip_regs(bsp, -sof + ridx);
++ nat_mask = 1UL << ia64_rse_slot_num(addr);
++ rnat_addr = ia64_rse_rnat_addr(addr);
++
++ if(addr >= bspstore){
++
++ ia64_flushrs ();
++ ia64_mf ();
++ *addr = val;
++ bspstore = ia64_get_bspstore();
++ rnat = ia64_get_rnat ();
++ if(bspstore < rnat_addr){
++ rnat=rnat&(~nat_mask);
++ }else{
++ *rnat_addr = (*rnat_addr)&(~nat_mask);
++ }
++ ia64_mf();
++ ia64_loadrs();
++ ia64_set_rnat(rnat);
++ }else{
++
++ rnat = ia64_get_rnat ();
++ *addr = val;
++ if(bspstore < rnat_addr){
++ rnat=rnat&(~nat_mask);
++ }else{
++ *rnat_addr = (*rnat_addr)&(~nat_mask);
++ }
++ ia64_set_bspstore (bspstore);
++ ia64_set_rnat(rnat);
++ }
++ ia64_set_rsc(old_rsc);
++}
++
++
++static void
++get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, unsigned long *nat)
++{
++ struct switch_stack *sw = (struct switch_stack *) regs - 1;
++ unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
++ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
++ unsigned long rnats, nat_mask;
++ unsigned long on_kbs;
++ unsigned long old_rsc, new_rsc;
++ long sof = (regs->cr_ifs) & 0x7f;
++ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
++ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
++ long ridx = r1 - 32;
++
++ if (ridx >= sof) {
++ /* read of out-of-frame register returns an undefined value; 0 in our case. */
++ DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
++ panic("wrong stack register number");
++ }
++
++ if (ridx < sor)
++ ridx = rotate_reg(sor, rrb_gr, ridx);
++
++ old_rsc=ia64_get_rsc();
++ new_rsc=old_rsc&(~(0x3));
++ ia64_set_rsc(new_rsc);
++
++ bspstore = ia64_get_bspstore();
++ bsp =kbs + (regs->loadrs >> 19); //16+3;
++
++ addr = ia64_rse_skip_regs(bsp, -sof + ridx);
++ nat_mask = 1UL << ia64_rse_slot_num(addr);
++ rnat_addr = ia64_rse_rnat_addr(addr);
++
++ if(addr >= bspstore){
++
++ ia64_flushrs ();
++ ia64_mf ();
++ bspstore = ia64_get_bspstore();
++ }
++ *val=*addr;
++ if(bspstore < rnat_addr){
++ *nat=!!(ia64_get_rnat()&nat_mask);
++ }else{
++ *nat = !!((*rnat_addr)&nat_mask);
++ }
++ ia64_set_rsc(old_rsc);
++}
++#else // CONFIG_VTI
+ static void
+ set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+ {
+@@ -435,9 +554,14 @@
+ *nat = 0;
+ return;
+ }
++#endif // CONFIG_VTI
+
+
++#ifdef XEN
++void
++#else
+ static void
++#endif
+ setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+ {
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+@@ -466,7 +590,11 @@
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
++#ifdef CONFIG_VTI
++ unat = &regs->eml_unat;
++#else //CONFIG_VTI
+ unat = &sw->caller_unat;
++#endif //CONFIG_VTI
+ }
+ DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
+ addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
+@@ -522,7 +650,11 @@
+ */
+ if (regnum >= IA64_FIRST_ROTATING_FR) {
+ ia64_sync_fph(current);
++#ifdef XEN
++ current->arch._thread.fph[fph_index(regs, regnum)] = *fpval;
++#else
+ current->thread.fph[fph_index(regs, regnum)] = *fpval;
++#endif
+ } else {
+ /*
+ * pt_regs or switch_stack ?
+@@ -581,7 +713,11 @@
+ */
+ if (regnum >= IA64_FIRST_ROTATING_FR) {
+ ia64_flush_fph(current);
++#ifdef XEN
++ *fpval = current->arch._thread.fph[fph_index(regs, regnum)];
++#else
+ *fpval = current->thread.fph[fph_index(regs, regnum)];
++#endif
+ } else {
+ /*
+ * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+@@ -611,7 +747,11 @@
+ }
+
+
++#ifdef XEN
++void
++#else
+ static void
++#endif
+ getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+ {
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+@@ -640,7 +780,11 @@
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
++#ifdef CONFIG_VTI
++ unat = &regs->eml_unat;;
++#else //CONFIG_VTI
+ unat = &sw->caller_unat;
++#endif //CONFIG_VTI
+ }
+
+ DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
+@@ -1294,6 +1438,9 @@
+ void
+ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
+ {
++#ifdef XEN
++printk("ia64_handle_unaligned: called, not working yet\n");
++#else
+ struct ia64_psr *ipsr = ia64_psr(regs);
+ mm_segment_t old_fs = get_fs();
+ unsigned long bundle[2];
+@@ -1502,4 +1649,5 @@
+ si.si_imm = 0;
+ force_sig_info(SIGBUS, &si, current);
+ goto done;
++#endif
+ }
diff --git a/xen/arch/ia64/patch/linux-2.6.7/bootmem.h b/xen/arch/ia64/patch/linux-2.6.7/bootmem.h
new file mode 100644
index 0000000000..442f0aea98
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/bootmem.h
@@ -0,0 +1,12 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/bootmem.h 2004-06-15 23:19:52.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/bootmem.h 2004-08-25 19:28:13.000000000 -0600
+@@ -41,7 +41,9 @@
+ extern void __init free_bootmem (unsigned long addr, unsigned long size);
+ extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+ #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
++#ifndef XEN
+ extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
++#endif
+ #define alloc_bootmem(x) \
+ __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ #define alloc_bootmem_low(x) \
diff --git a/xen/arch/ia64/patch/linux-2.6.7/current.h b/xen/arch/ia64/patch/linux-2.6.7/current.h
new file mode 100644
index 0000000000..cf11820756
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/current.h
@@ -0,0 +1,17 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/current.h 2004-06-15 23:19:52.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/current.h 2004-08-25 19:28:12.000000000 -0600
+@@ -12,6 +12,14 @@
+ * In kernel mode, thread pointer (r13) is used to point to the current task
+ * structure.
+ */
++#ifdef XEN
++struct domain;
++#define get_current() ((struct vcpu *) ia64_getreg(_IA64_REG_TP))
++#define current get_current()
++//#define set_current(d) ia64_setreg(_IA64_REG_TP,(void *)d);
++#define set_current(d) (ia64_r13 = (void *)d)
++#else
+ #define current ((struct task_struct *) ia64_getreg(_IA64_REG_TP))
++#endif
+
+ #endif /* _ASM_IA64_CURRENT_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/efi.c b/xen/arch/ia64/patch/linux-2.6.7/efi.c
new file mode 100644
index 0000000000..6cf15ecabb
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/efi.c
@@ -0,0 +1,85 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/efi.c 2004-06-15 23:18:55.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/efi.c 2004-12-17 13:47:03.000000000 -0700
+@@ -25,6 +25,9 @@
+ #include <linux/types.h>
+ #include <linux/time.h>
+ #include <linux/efi.h>
++#ifdef XEN
++#include <xen/sched.h>
++#endif
+
+ #include <asm/io.h>
+ #include <asm/kregs.h>
+@@ -49,7 +52,10 @@
+ { \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
++ efi_time_cap_t *atc = NULL; \
+ \
++ if (tc) \
++ atc = adjust_arg(tc); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), \
+ adjust_arg(tc)); \
+@@ -201,6 +207,7 @@
+ if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS)
+ return;
+
++ dummy();
+ ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+ ts->tv_nsec = tm.nanosecond;
+ }
+@@ -303,6 +310,10 @@
+ if (!(md->attribute & EFI_MEMORY_WB))
+ continue;
+
++#ifdef XEN
++// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
++ if (md->phys_addr >= 0x100000000) continue;
++#endif
+ /*
+ * granule_addr is the base of md's first granule.
+ * [granule_addr - first_non_wb_addr) is guaranteed to
+@@ -456,9 +467,11 @@
+
+ cpu = smp_processor_id();
+
++#ifndef XEN
+ /* insert this TR into our list for MCA recovery purposes */
+ ia64_mca_tlb_list[cpu].pal_base = vaddr & mask;
+ ia64_mca_tlb_list[cpu].pal_paddr = pte_val(mk_pte_phys(md->phys_addr, PAGE_KERNEL));
++#endif
+ }
+ }
+
+@@ -680,6 +693,30 @@
+ return 0;
+ }
+
++#ifdef XEN
++// variation of efi_get_iobase which returns entire memory descriptor
++efi_memory_desc_t *
++efi_get_io_md (void)
++{
++ void *efi_map_start, *efi_map_end, *p;
++ efi_memory_desc_t *md;
++ u64 efi_desc_size;
++
++ efi_map_start = __va(ia64_boot_param->efi_memmap);
++ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
++ efi_desc_size = ia64_boot_param->efi_memdesc_size;
++
++ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
++ md = p;
++ if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
++ if (md->attribute & EFI_MEMORY_UC)
++ return md;
++ }
++ }
++ return 0;
++}
++#endif
++
+ u32
+ efi_mem_type (unsigned long phys_addr)
+ {
diff --git a/xen/arch/ia64/patch/linux-2.6.7/efi.h b/xen/arch/ia64/patch/linux-2.6.7/efi.h
new file mode 100644
index 0000000000..2aee7f9677
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/efi.h
@@ -0,0 +1,13 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/efi.h 2004-06-15 23:20:03.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/efi.h 2004-08-25 19:28:13.000000000 -0600
+@@ -15,8 +15,10 @@
+ #include <linux/string.h>
+ #include <linux/time.h>
+ #include <linux/types.h>
++#ifndef XEN
+ #include <linux/proc_fs.h>
+ #include <linux/rtc.h>
++#endif
+ #include <linux/ioport.h>
+
+ #include <asm/page.h>
diff --git a/xen/arch/ia64/patch/linux-2.6.7/entry.S b/xen/arch/ia64/patch/linux-2.6.7/entry.S
new file mode 100644
index 0000000000..c163008f27
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/entry.S
@@ -0,0 +1,195 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/entry.S 2005-03-24 19:39:56.000000000 -0700
++++ arch/ia64/entry.S 2005-04-01 12:56:01.000000000 -0700
+@@ -35,7 +35,9 @@
+
+ #include <asm/asmmacro.h>
+ #include <asm/cache.h>
++#ifndef XEN
+ #include <asm/errno.h>
++#endif
+ #include <asm/kregs.h>
+ #include <asm/offsets.h>
+ #include <asm/pgtable.h>
+@@ -46,6 +48,23 @@
+
+ #include "minstate.h"
+
++#ifdef XEN
++#define sys_execve 0
++#define do_fork 0
++#define syscall_trace 0
++#define schedule 0
++#define do_notify_resume_user 0
++#define ia64_rt_sigsuspend 0
++#define ia64_rt_sigreturn 0
++#define ia64_handle_unaligned 0
++#define errno 0
++#define sys_ni_syscall 0
++#define unw_init_frame_info 0
++#define sys_call_table 0
++#endif
++
++ /*
++
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame.
+@@ -178,11 +197,14 @@
+ DO_SAVE_SWITCH_STACK
+ .body
+
++#ifdef XEN
++//#undef IA64_TASK_THREAD_KSP_OFFSET
++//#define IA64_TASK_THREAD_KSP_OFFSET 0x38
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ movl r25=init_task
+ mov r27=IA64_KR(CURRENT_STACK)
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+- dep r20=0,in0,61,3 // physical address of "current"
++ dep r20=0,in0,60,4 // physical address of "current"
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ shr.u r26=r20,IA64_GRANULE_SHIFT
+@@ -194,6 +216,22 @@
+ (p6) cmp.eq p7,p6=r26,r27
+ (p6) br.cond.dpnt .map
+ ;;
++#else
++ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
++ mov r27=IA64_KR(CURRENT_STACK)
++ dep r20=0,in0,61,3 // physical address of "current"
++ ;;
++ st8 [r22]=sp // save kernel stack pointer of old task
++ shr.u r26=r20,IA64_GRANULE_SHIFT
++ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
++ ;;
++ /*
++ * If we've already mapped this task's page, we can skip doing it again.
++ */
++ cmp.eq p7,p6=r26,r27
++(p6) br.cond.dpnt .map
++ ;;
++#endif
+ .done:
+ (p6) ssm psr.ic // if we we had to map, renable the psr.ic bit FIRST!!!
+ ;;
+@@ -211,6 +249,16 @@
+ br.ret.sptk.many rp // boogie on out in new context
+
+ .map:
++#ifdef XEN
++ // avoid overlapping with kernel TR
++ movl r25=KERNEL_START
++ dep r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
++ ;;
++ cmp.eq p7,p0=r25,r23
++ ;;
++(p7) mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
++(p7) br.cond.sptk .done
++#endif
+ rsm psr.ic // interrupts (psr.i) are already disabled here
+ movl r25=PAGE_KERNEL
+ ;;
+@@ -367,7 +415,11 @@
+ * - b7 holds address to return to
+ * - must not touch r8-r11
+ */
++#ifdef XEN
++GLOBAL_ENTRY(load_switch_stack)
++#else
+ ENTRY(load_switch_stack)
++#endif
+ .prologue
+ .altrp b7
+
+@@ -595,6 +647,11 @@
+ */
+ br.call.sptk.many rp=ia64_invoke_schedule_tail
+ }
++#ifdef XEN
++ // new domains are cloned but not exec'ed so switch to user mode here
++ cmp.ne pKStk,pUStk=r0,r0
++ br.cond.spnt ia64_leave_kernel
++#else
+ .ret8:
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+@@ -603,6 +660,7 @@
+ mov r8=0
+ tbit.nz p6,p0=r2,TIF_SYSCALL_TRACE
+ (p6) br.cond.spnt .strace_check_retval
++#endif
+ ;; // added stop bits to prevent r8 dependency
+ END(ia64_ret_from_clone)
+ // fall through
+@@ -684,9 +742,14 @@
+ #endif /* CONFIG_PREEMPT */
+ adds r16=PT(LOADRS)+16,r12
+ adds r17=PT(AR_BSPSTORE)+16,r12
++#ifdef XEN
++ mov r31=r0
++ ;;
++#else
+ adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ (p6) ld4 r31=[r18] // load current_thread_info()->flags
++#endif
+ ld8 r19=[r16],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
+ nop.i 0
+ ;;
+@@ -745,7 +808,11 @@
+ mov b7=r0 // clear b7
+ ;;
+ (pUStk) st1 [r14]=r3
++#ifdef XEN
++ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
++#else
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
++#endif
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
+ srlz.i // ensure interruption collection is off
+@@ -796,9 +863,18 @@
+ ;;
+ (p6) cmp.eq.unc p6,p0=r21,r0 // p6 <- p6 && (r21 == 0)
+ #endif /* CONFIG_PREEMPT */
++#ifdef XEN
++ alloc loc0=ar.pfs,0,1,1,0
++ adds out0=16,r12
++ ;;
++(p6) br.call.sptk.many b0=deliver_pending_interrupt
++ mov ar.pfs=loc0
++ mov r31=r0
++#else
+ adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ (p6) ld4 r31=[r17] // load current_thread_info()->flags
++#endif
+ adds r21=PT(PR)+16,r12
+ ;;
+
+@@ -912,7 +988,11 @@
+ shr.u r18=r19,16 // get byte size of existing "dirty" partition
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
++#ifdef XEN
++ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
++#else
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
++#endif
+ ;;
+ ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
+ (pKStk) br.cond.dpnt skip_rbs_switch
+@@ -1264,6 +1344,7 @@
+ br.ret.sptk.many rp
+ END(unw_init_running)
+
++#ifndef XEN
+ .rodata
+ .align 8
+ .globl sys_call_table
+@@ -1526,3 +1607,4 @@
+ data8 sys_ni_syscall
+
+ .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h b/xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h
new file mode 100644
index 0000000000..abc9ca729c
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h
@@ -0,0 +1,20 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/gcc_intrin.h 2005-01-23 13:23:36.000000000 -0700
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/gcc_intrin.h 2004-08-25 19:28:13.000000000 -0600
+@@ -92,6 +92,9 @@
+
+ #define ia64_hint_pause 0
+
++#ifdef XEN
++#define ia64_hint(mode) 0
++#else
+ #define ia64_hint(mode) \
+ ({ \
+ switch (mode) { \
+@@ -100,6 +103,7 @@
+ break; \
+ } \
+ })
++#endif
+
+
+ /* Integer values for mux1 instruction */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/hardirq.h b/xen/arch/ia64/patch/linux-2.6.7/hardirq.h
new file mode 100644
index 0000000000..5b63a95f8a
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/hardirq.h
@@ -0,0 +1,22 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/hardirq.h 2004-06-15 23:19:02.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hardirq.h 2004-12-17 13:47:03.000000000 -0700
+@@ -81,10 +81,19 @@
+ */
+ #define in_irq() (hardirq_count())
+ #define in_softirq() (softirq_count())
++#ifdef XEN
+ #define in_interrupt() (irq_count())
++#else
++#define in_interrupt() 0 // FIXME LATER
++#endif
+
++#ifdef XEN
++#define hardirq_trylock(cpu) (!in_interrupt())
++#define hardirq_endlock(cpu) do { } while (0)
++#else
+ #define hardirq_trylock() (!in_interrupt())
+ #define hardirq_endlock() do { } while (0)
++#endif
+
+ #ifdef CONFIG_PREEMPT
+ # include <linux/smp_lock.h>
diff --git a/xen/arch/ia64/patch/linux-2.6.7/head.S b/xen/arch/ia64/patch/linux-2.6.7/head.S
new file mode 100644
index 0000000000..007f0f77fc
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/head.S
@@ -0,0 +1,93 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/head.S 2005-03-24 19:39:56.000000000 -0700
++++ arch/ia64/head.S 2005-04-01 12:56:01.000000000 -0700
+@@ -1,3 +1,8 @@
++#ifdef XEN
++#define console_print printf
++#define kernel_thread_helper 0
++#define sys_exit 0
++#endif
+ /*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+@@ -166,7 +171,11 @@
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
++#ifdef XEN
++ dep r2=-1,r3,60,4 // IMVA of task
++#else
+ dep r2=-1,r3,61,3 // IMVA of task
++#endif
+ ;;
+ mov r17=rr[r2]
+ ;;
+@@ -205,7 +214,11 @@
+ ;;
+ mov ar.rsc=0x3 // place RSE in eager mode
+
++#ifdef XEN
++(isBP) dep r28=-1,r28,60,4 // make address virtual
++#else
+ (isBP) dep r28=-1,r28,61,3 // make address virtual
++#endif
+ (isBP) movl r2=ia64_boot_param
+ ;;
+ (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
+@@ -238,14 +251,30 @@
+ br.call.sptk.many rp=sys_fw_init
+ .ret1:
+ #endif
++#ifdef XEN
++ alloc r2=ar.pfs,8,0,2,0
++ ;;
++#define fake_mbi_magic 0
++#define MULTIBOOT_INFO_SIZE 1024
++ .rodata
++fake_mbi:
++ .skip MULTIBOOT_INFO_SIZE
++ .previous
++ movl out0=fake_mbi
++ ;;
++ br.call.sptk.many rp=cmain
++#else
+ br.call.sptk.many rp=start_kernel
++#endif
+ .ret2: addl r3=@ltoff(halt_msg),gp
+ ;;
+ alloc r2=ar.pfs,8,0,2,0
+ ;;
+ ld8 out0=[r3]
+ br.call.sptk.many b0=console_print
++ ;;
+ self: br.sptk.many self // endless loop
++ ;;
+ END(_start)
+
+ GLOBAL_ENTRY(ia64_save_debug_regs)
+@@ -781,8 +810,13 @@
+ movl r18=KERNEL_START
+ dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
++#ifdef XEN
++ dep r17=-1,r17,60,4
++ dep sp=-1,sp,60,4
++#else
+ dep r17=-1,r17,61,3
+ dep sp=-1,sp,61,3
++#endif
+ ;;
+ or r3=r3,r18
+ or r14=r14,r18
+@@ -838,7 +872,12 @@
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+ GLOBAL_ENTRY(sched_clock)
++#ifdef XEN
++ break 0;; // FIX IA64_CPUINFO_NSEC_PER_CYC_OFFSET
++ //movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
++#else
+ addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
++#endif
+ mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
+ ;;
+ ldf8 f8=[r8]
diff --git a/xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c b/xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c
new file mode 100644
index 0000000000..a7f576e16b
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c
@@ -0,0 +1,36 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/hp/sim/hpsim_irq.c 2004-06-15 23:20:26.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/hpsim_irq.c 2004-11-01 17:54:15.000000000 -0700
+@@ -9,7 +9,17 @@
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
+ #include <linux/irq.h>
++#ifdef XEN
++#include <asm/hw_irq.h>
++#endif
+
++#if 1
++void __init
++hpsim_irq_init (void)
++{
++ printf("*** hpsim_irq_init called: NOT NEEDED?!?!?\n");
++}
++#else
+ static unsigned int
+ hpsim_irq_startup (unsigned int irq)
+ {
+@@ -19,6 +29,10 @@
+ static void
+ hpsim_irq_noop (unsigned int irq)
+ {
++#if 1
++printf("hpsim_irq_noop: irq=%d\n",irq);
++while(irq);
++#endif
+ }
+
+ static struct hw_interrupt_type irq_type_hp_sim = {
+@@ -44,3 +58,4 @@
+ idesc->handler = &irq_type_hp_sim;
+ }
+ }
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h b/xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h
new file mode 100644
index 0000000000..e8bf15eda0
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h
@@ -0,0 +1,26 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/hp/sim/hpsim_ssc.h 2004-06-15 23:19:43.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hpsim_ssc.h 2004-08-29 01:04:23.000000000 -0600
+@@ -33,4 +33,23 @@
+ */
+ extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
++#ifdef XEN
++/* Note: These are declared in linux/arch/ia64/hp/sim/simscsi.c but belong
++ * in linux/include/asm-ia64/hpsim_ssc.h, hence their addition here */
++#define SSC_OPEN 50
++#define SSC_CLOSE 51
++#define SSC_READ 52
++#define SSC_WRITE 53
++#define SSC_GET_COMPLETION 54
++#define SSC_WAIT_COMPLETION 55
++
++#define SSC_WRITE_ACCESS 2
++#define SSC_READ_ACCESS 1
++
++struct ssc_disk_req {
++ unsigned long addr;
++ unsigned long len;
++};
++#endif
++
+ #endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/hw_irq.h b/xen/arch/ia64/patch/linux-2.6.7/hw_irq.h
new file mode 100644
index 0000000000..8607ff4234
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/hw_irq.h
@@ -0,0 +1,24 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/hw_irq.h 2004-06-15 23:19:22.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hw_irq.h 2004-08-27 09:07:38.000000000 -0600
+@@ -9,7 +9,9 @@
+ #include <linux/interrupt.h>
+ #include <linux/sched.h>
+ #include <linux/types.h>
++#ifndef XEN
+ #include <linux/profile.h>
++#endif
+
+ #include <asm/machvec.h>
+ #include <asm/ptrace.h>
+@@ -96,7 +98,11 @@
+ * Default implementations for the irq-descriptor API:
+ */
+
++#ifdef XEN
++#define _irq_desc irq_desc
++#else
+ extern irq_desc_t _irq_desc[NR_IRQS];
++#endif
+
+ #ifndef CONFIG_IA64_GENERIC
+ static inline irq_desc_t *
diff --git a/xen/arch/ia64/patch/linux-2.6.7/ide.h b/xen/arch/ia64/patch/linux-2.6.7/ide.h
new file mode 100644
index 0000000000..b32b79bf3d
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/ide.h
@@ -0,0 +1,35 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/ide.h 2004-06-15 23:19:36.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/ide.h 2004-08-25 19:28:13.000000000 -0600
+@@ -64,6 +64,32 @@
+ #define ide_init_default_irq(base) ide_default_irq(base)
+ #endif
+
++#ifdef XEN
++// this is moved to linux/ide.h in newer versions of linux
++typedef union {
++ unsigned all : 8; /* all of the bits together */
++ struct {
++ unsigned head : 4; /* always zeros here */
++ unsigned unit : 1; /* drive select number, 0 or 1 */
++ unsigned bit5 : 1; /* always 1 */
++ unsigned lba : 1; /* using LBA instead of CHS */
++ unsigned bit7 : 1; /* always 1 */
++ } b;
++} select_t;
++
++typedef union {
++ unsigned all : 8; /* all of the bits together */
++ struct {
++ unsigned bit0 : 1;
++ unsigned nIEN : 1; /* device INTRQ to host */
++ unsigned SRST : 1; /* host soft reset bit */
++ unsigned bit3 : 1; /* ATA-2 thingy */
++ unsigned reserved456 : 3;
++ unsigned HOB : 1; /* 48-bit address ordering */
++ } b;
++} control_t;
++#endif
++
+ #include <asm-generic/ide_iops.h>
+
+ #endif /* __KERNEL__ */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/init_task.c b/xen/arch/ia64/patch/linux-2.6.7/init_task.c
new file mode 100644
index 0000000000..8dea5b81f9
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/init_task.c
@@ -0,0 +1,35 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/init_task.c 2004-06-15 23:20:26.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/init_task.c 2004-08-27 00:06:35.000000000 -0600
+@@ -15,10 +15,12 @@
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+
++#ifndef XEN
+ static struct fs_struct init_fs = INIT_FS;
+ static struct files_struct init_files = INIT_FILES;
+ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
++#endif
+ struct mm_struct init_mm = INIT_MM(init_mm);
+
+ EXPORT_SYMBOL(init_mm);
+@@ -33,13 +35,19 @@
+
+ union {
+ struct {
++#ifdef XEN
++ struct domain task;
++#else
+ struct task_struct task;
+ struct thread_info thread_info;
++#endif
+ } s;
+ unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+ } init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+ .task = INIT_TASK(init_task_mem.s.task),
++#ifndef XEN
+ .thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
++#endif
+ }};
+
+ EXPORT_SYMBOL(init_task);
diff --git a/xen/arch/ia64/patch/linux-2.6.7/init_task.h b/xen/arch/ia64/patch/linux-2.6.7/init_task.h
new file mode 100644
index 0000000000..e1092f416a
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/init_task.h
@@ -0,0 +1,53 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/init_task.h 2004-06-15 23:18:57.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/init_task.h 2004-11-15 17:06:20.000000000 -0700
+@@ -31,6 +31,18 @@
+ .max_reqs = ~0U, \
+ }
+
++#ifdef XEN
++#define INIT_MM(name) \
++{ \
++ .mm_rb = RB_ROOT, \
++ .pgd = swapper_pg_dir, \
++ .mm_users = ATOMIC_INIT(2), \
++ .mm_count = ATOMIC_INIT(1), \
++ .page_table_lock = SPIN_LOCK_UNLOCKED, \
++ .mmlist = LIST_HEAD_INIT(name.mmlist), \
++ .cpu_vm_mask = CPU_MASK_ALL, \
++}
++#else
+ #define INIT_MM(name) \
+ { \
+ .mm_rb = RB_ROOT, \
+@@ -43,6 +55,7 @@
+ .cpu_vm_mask = CPU_MASK_ALL, \
+ .default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
+ }
++#endif
+
+ #define INIT_SIGNALS(sig) { \
+ .count = ATOMIC_INIT(1), \
+@@ -64,6 +77,15 @@
+ * INIT_TASK is used to set up the first task table, touch at
+ * your own risk!. Base=0, limit=0x1fffff (=2MB)
+ */
++#ifdef XEN
++#define INIT_TASK(tsk) \
++{ \
++ /*processor: 0,*/ \
++ /*domain_id: IDLE_DOMAIN_ID,*/ \
++ /*domain_flags: DOMF_idle_domain,*/ \
++ refcnt: ATOMIC_INIT(1) \
++}
++#else
+ #define INIT_TASK(tsk) \
+ { \
+ .state = 0, \
+@@ -113,6 +135,7 @@
+ .switch_lock = SPIN_LOCK_UNLOCKED, \
+ .journal_info = NULL, \
+ }
++#endif
+
+
+
diff --git a/xen/arch/ia64/patch/linux-2.6.7/interrupt.h b/xen/arch/ia64/patch/linux-2.6.7/interrupt.h
new file mode 100644
index 0000000000..d6b1f6b6ae
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/interrupt.h
@@ -0,0 +1,18 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/interrupt.h 2004-06-15 23:19:29.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/interrupt.h 2004-08-25 19:28:13.000000000 -0600
+@@ -32,6 +32,7 @@
+ #define IRQ_HANDLED (1)
+ #define IRQ_RETVAL(x) ((x) != 0)
+
++#ifndef XEN
+ struct irqaction {
+ irqreturn_t (*handler)(int, void *, struct pt_regs *);
+ unsigned long flags;
+@@ -46,6 +47,7 @@
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long, const char *, void *);
+ extern void free_irq(unsigned int, void *);
++#endif
+
+ /*
+ * Temporary defines for UP kernels, until all code gets fixed.
diff --git a/xen/arch/ia64/patch/linux-2.6.7/io.h b/xen/arch/ia64/patch/linux-2.6.7/io.h
new file mode 100644
index 0000000000..a3c7f51575
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/io.h
@@ -0,0 +1,14 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/io.h 2004-06-15 23:18:57.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/io.h 2004-11-05 16:53:36.000000000 -0700
+@@ -23,7 +23,11 @@
+ #define __SLOW_DOWN_IO do { } while (0)
+ #define SLOW_DOWN_IO do { } while (0)
+
++#ifdef XEN
++#define __IA64_UNCACHED_OFFSET 0xdffc000000000000 /* region 6 */
++#else
+ #define __IA64_UNCACHED_OFFSET 0xc000000000000000 /* region 6 */
++#endif
+
+ /*
+ * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but
diff --git a/xen/arch/ia64/patch/linux-2.6.7/irq.h b/xen/arch/ia64/patch/linux-2.6.7/irq.h
new file mode 100644
index 0000000000..65b09cd01f
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/irq.h
@@ -0,0 +1,18 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/irq.h 2005-01-23 13:23:36.000000000 -0700
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/irq.h 2004-08-25 19:28:13.000000000 -0600
+@@ -30,6 +30,15 @@
+ extern void enable_irq (unsigned int);
+ extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
+
++#ifdef XEN
++// dup'ed from signal.h to avoid changes to includes
++#define SA_NOPROFILE 0x02000000
++#define SA_SHIRQ 0x04000000
++#define SA_RESTART 0x10000000
++#define SA_INTERRUPT 0x20000000
++#define SA_SAMPLE_RANDOM SA_RESTART
++#endif
++
+ #ifdef CONFIG_SMP
+ extern void move_irq(int irq);
+ #else
diff --git a/xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c b/xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c
new file mode 100644
index 0000000000..8d991ddd2c
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c
@@ -0,0 +1,82 @@
+--- /home/djm/linux-2.6.7/arch/ia64/kernel/irq_ia64.c 2004-06-15 23:19:13.000000000 -0600
++++ arch/ia64/irq_ia64.c 2005-02-17 13:17:16.000000000 -0700
+@@ -17,18 +17,26 @@
+ #include <linux/config.h>
+ #include <linux/module.h>
+
++#ifndef XEN
+ #include <linux/jiffies.h>
++#endif
+ #include <linux/errno.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/ioport.h>
++#ifndef XEN
+ #include <linux/kernel_stat.h>
++#endif
+ #include <linux/slab.h>
++#ifndef XEN
+ #include <linux/ptrace.h>
+ #include <linux/random.h> /* for rand_initialize_irq() */
+ #include <linux/signal.h>
++#endif
+ #include <linux/smp.h>
++#ifndef XEN
+ #include <linux/smp_lock.h>
++#endif
+ #include <linux/threads.h>
+
+ #include <asm/bitops.h>
+@@ -101,6 +109,24 @@
+ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+ {
+ unsigned long saved_tpr;
++#if 0
++//FIXME: For debug only, can be removed
++ static char firstirq = 1;
++ static char firsttime[256];
++ static char firstpend[256];
++ if (firstirq) {
++ int i;
++ for (i=0;i<256;i++) firsttime[i] = 1;
++ for (i=0;i<256;i++) firstpend[i] = 1;
++ firstirq = 0;
++ }
++ if (firsttime[vector]) {
++ printf("**** (entry) First received int on vector=%d,itc=%lx\n",
++ (unsigned long) vector, ia64_get_itc());
++ firsttime[vector] = 0;
++ }
++#endif
++
+
+ #if IRQ_DEBUG
+ {
+@@ -145,6 +171,27 @@
+ ia64_setreg(_IA64_REG_CR_TPR, vector);
+ ia64_srlz_d();
+
++#ifdef XEN
++ if (vector != 0xef) {
++ extern void vcpu_pend_interrupt(void *, int);
++#if 0
++ if (firsttime[vector]) {
++ printf("**** (iterate) First received int on vector=%d,itc=%lx\n",
++ (unsigned long) vector, ia64_get_itc());
++ firsttime[vector] = 0;
++ }
++ if (firstpend[vector]) {
++ printf("**** First pended int on vector=%d,itc=%lx\n",
++ (unsigned long) vector,ia64_get_itc());
++ firstpend[vector] = 0;
++ }
++#endif
++ //FIXME: TEMPORARY HACK!!!!
++ vcpu_pend_interrupt(dom0->vcpu[0],vector);
++ domain_wake(dom0->vcpu[0]);
++ }
++ else
++#endif
+ do_IRQ(local_vector_to_irq(vector), regs);
+
+ /*
diff --git a/xen/arch/ia64/patch/linux-2.6.7/ivt.S b/xen/arch/ia64/patch/linux-2.6.7/ivt.S
new file mode 100644
index 0000000000..83909bb110
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/ivt.S
@@ -0,0 +1,528 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/ivt.S 2004-06-15 23:18:59.000000000 -0600
++++ arch/ia64/ivt.S 2005-04-01 12:56:01.000000000 -0700
+@@ -1,3 +1,21 @@
++
++#ifdef XEN
++//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled??
++// these are all hacked out for now as the entire IVT
++// will eventually be replaced... just want to use it
++// for startup code to handle TLB misses
++//#define ia64_leave_kernel 0
++//#define ia64_ret_from_syscall 0
++//#define ia64_handle_irq 0
++//#define ia64_fault 0
++#define ia64_illegal_op_fault 0
++#define ia64_prepare_handle_unaligned 0
++#define ia64_bad_break 0
++#define ia64_trace_syscall 0
++#define sys_call_table 0
++#define sys_ni_syscall 0
++#include <asm/vhpt.h>
++#endif
+ /*
+ * arch/ia64/kernel/ivt.S
+ *
+@@ -76,6 +94,13 @@
+ mov r19=n;; /* prepare to save predicates */ \
+ br.sptk.many dispatch_to_fault_handler
+
++#ifdef XEN
++#define REFLECT(n) \
++ mov r31=pr; \
++ mov r19=n;; /* prepare to save predicates */ \
++ br.sptk.many dispatch_reflection
++#endif
++
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+@@ -213,6 +238,13 @@
+ // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ ENTRY(itlb_miss)
+ DBG_FAULT(1)
++#ifdef XEN
++ VHPT_CCHAIN_LOOKUP(itlb_miss,i)
++#ifdef VHPT_GLOBAL
++ br.cond.sptk page_fault
++ ;;
++#endif
++#endif
+ /*
+ * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+@@ -257,6 +289,13 @@
+ // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ ENTRY(dtlb_miss)
+ DBG_FAULT(2)
++#ifdef XEN
++ VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
++#ifdef VHPT_GLOBAL
++ br.cond.sptk page_fault
++ ;;
++#endif
++#endif
+ /*
+ * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+@@ -301,6 +340,13 @@
+ // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ ENTRY(alt_itlb_miss)
+ DBG_FAULT(3)
++#ifdef XEN
++//#ifdef VHPT_GLOBAL
++// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
++// br.cond.sptk page_fault
++// ;;
++//#endif
++#endif
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r21=cr.ipsr
+@@ -339,6 +385,13 @@
+ // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ ENTRY(alt_dtlb_miss)
+ DBG_FAULT(4)
++#ifdef XEN
++//#ifdef VHPT_GLOBAL
++// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
++// br.cond.sptk page_fault
++// ;;
++//#endif
++#endif
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+@@ -368,6 +421,17 @@
+ cmp.ne p8,p0=r0,r23
+ (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+ (p8) br.cond.spnt page_fault
++#ifdef XEN
++ ;;
++ // FIXME: inadequate test, this is where we test for Xen address
++ // note that 0xf000 (cached) and 0xd000 (uncached) addresses
++ // should be OK. (Though no I/O is done in Xen, EFI needs uncached
++ // addresses and some domain EFI calls are passed through)
++ tbit.nz p0,p8=r16,60
++(p8) br.cond.spnt page_fault
++//(p8) br.cond.spnt 0
++ ;;
++#endif
+
+ dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+@@ -448,6 +512,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ ENTRY(ikey_miss)
++#ifdef XEN
++ REFLECT(6)
++#endif
+ DBG_FAULT(6)
+ FAULT(6)
+ END(ikey_miss)
+@@ -460,9 +527,16 @@
+ srlz.i
+ ;;
+ SAVE_MIN_WITH_COVER
++#ifdef XEN
++ alloc r15=ar.pfs,0,0,4,0
++ mov out0=cr.ifa
++ mov out1=cr.isr
++ mov out3=cr.itir
++#else
+ alloc r15=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
++#endif
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+@@ -483,6 +557,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ ENTRY(dkey_miss)
++#ifdef XEN
++ REFLECT(7)
++#endif
+ DBG_FAULT(7)
+ FAULT(7)
+ END(dkey_miss)
+@@ -491,6 +568,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ ENTRY(dirty_bit)
++#ifdef XEN
++ REFLECT(8)
++#endif
+ DBG_FAULT(8)
+ /*
+ * What we do here is to simply turn on the dirty bit in the PTE. We need to
+@@ -553,6 +633,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ ENTRY(iaccess_bit)
++#ifdef XEN
++ REFLECT(9)
++#endif
+ DBG_FAULT(9)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused the fault
+@@ -618,6 +701,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ ENTRY(daccess_bit)
++#ifdef XEN
++ REFLECT(10)
++#endif
+ DBG_FAULT(10)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused the fault
+@@ -686,6 +772,16 @@
+ * to prevent leaking bits from kernel to user level.
+ */
+ DBG_FAULT(11)
++#ifdef XEN
++ mov r16=cr.isr
++ mov r17=cr.iim
++ mov r31=pr
++ ;;
++ cmp.eq p7,p0=r0,r17 // is this a psuedo-cover?
++ // FIXME: may also need to check slot==2?
++(p7) br.sptk.many dispatch_privop_fault
++ br.sptk.many dispatch_break_fault
++#endif
+ mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
+ mov r17=cr.iim
+ mov r18=__IA64_BREAK_SYSCALL
+@@ -696,7 +792,9 @@
+ mov r27=ar.rsc
+ mov r26=ar.pfs
+ mov r28=cr.iip
++#ifndef XEN
+ mov r31=pr // prepare to save predicates
++#endif
+ mov r20=r1
+ ;;
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+@@ -792,6 +890,36 @@
+ DBG_FAULT(13)
+ FAULT(13)
+
++#ifdef XEN
++ // There is no particular reason for this code to be here, other than that
++ // there happens to be space here that would go unused otherwise. If this
++ // fault ever gets "unreserved", simply moved the following code to a more
++ // suitable spot...
++
++ENTRY(dispatch_break_fault)
++ SAVE_MIN_WITH_COVER
++ ;;
++ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
++ mov out0=cr.ifa
++ adds out1=16,sp
++ mov out2=cr.isr // FIXME: pity to make this slow access twice
++ mov out3=cr.iim // FIXME: pity to make this slow access twice
++
++ ssm psr.ic | PSR_DEFAULT_BITS
++ ;;
++ srlz.i // guarantee that interruption collection is on
++ ;;
++(p15) ssm psr.i // restore psr.i
++ adds r3=8,r2 // set up second base pointer
++ ;;
++ SAVE_REST
++ movl r14=ia64_leave_kernel
++ ;;
++ mov rp=r14
++ br.sptk.many ia64_prepare_handle_break
++END(dispatch_break_fault)
++#endif
++
+ .org ia64_ivt+0x3800
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x3800 Entry 14 (size 64 bundles) Reserved
+@@ -842,9 +970,11 @@
+ * - ar.fpsr: set to kernel settings
+ */
+ GLOBAL_ENTRY(ia64_syscall_setup)
++#ifndef XEN
+ #if PT(B6) != 0
+ # error This code assumes that b6 is the first field in pt_regs.
+ #endif
++#endif
+ st8 [r1]=r19 // save b6
+ add r16=PT(CR_IPSR),r1 // initialize first base pointer
+ add r17=PT(R11),r1 // initialize second base pointer
+@@ -974,6 +1104,37 @@
+ DBG_FAULT(16)
+ FAULT(16)
+
++#ifdef XEN
++ // There is no particular reason for this code to be here, other than that
++ // there happens to be space here that would go unused otherwise. If this
++ // fault ever gets "unreserved", simply moved the following code to a more
++ // suitable spot...
++
++ENTRY(dispatch_privop_fault)
++ SAVE_MIN_WITH_COVER
++ ;;
++ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
++ mov out0=cr.ifa
++ adds out1=16,sp
++ mov out2=cr.isr // FIXME: pity to make this slow access twice
++ mov out3=cr.itir
++
++ ssm psr.ic | PSR_DEFAULT_BITS
++ ;;
++ srlz.i // guarantee that interruption collection is on
++ ;;
++(p15) ssm psr.i // restore psr.i
++ adds r3=8,r2 // set up second base pointer
++ ;;
++ SAVE_REST
++ movl r14=ia64_leave_kernel
++ ;;
++ mov rp=r14
++ br.sptk.many ia64_prepare_handle_privop
++END(dispatch_privop_fault)
++#endif
++
++
+ .org ia64_ivt+0x4400
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x4400 Entry 17 (size 64 bundles) Reserved
+@@ -1090,6 +1251,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ ENTRY(page_not_present)
++#ifdef XEN
++ REFLECT(20)
++#endif
+ DBG_FAULT(20)
+ mov r16=cr.ifa
+ rsm psr.dt
+@@ -1110,6 +1274,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ ENTRY(key_permission)
++#ifdef XEN
++ REFLECT(21)
++#endif
+ DBG_FAULT(21)
+ mov r16=cr.ifa
+ rsm psr.dt
+@@ -1123,6 +1290,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ ENTRY(iaccess_rights)
++#ifdef XEN
++ REFLECT(22)
++#endif
+ DBG_FAULT(22)
+ mov r16=cr.ifa
+ rsm psr.dt
+@@ -1136,6 +1306,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ ENTRY(daccess_rights)
++#ifdef XEN
++ REFLECT(23)
++#endif
+ DBG_FAULT(23)
+ mov r16=cr.ifa
+ rsm psr.dt
+@@ -1153,8 +1326,13 @@
+ mov r16=cr.isr
+ mov r31=pr
+ ;;
++#ifdef XEN
++ cmp4.ge p6,p0=0x20,r16
++(p6) br.sptk.many dispatch_privop_fault
++#else
+ cmp4.eq p6,p0=0,r16
+ (p6) br.sptk.many dispatch_illegal_op_fault
++#endif
+ ;;
+ mov r19=24 // fault number
+ br.sptk.many dispatch_to_fault_handler
+@@ -1164,6 +1342,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ ENTRY(disabled_fp_reg)
++#ifdef XEN
++ REFLECT(25)
++#endif
+ DBG_FAULT(25)
+ rsm psr.dfh // ensure we can access fph
+ ;;
+@@ -1177,6 +1358,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ ENTRY(nat_consumption)
++#ifdef XEN
++ REFLECT(26)
++#endif
+ DBG_FAULT(26)
+ FAULT(26)
+ END(nat_consumption)
+@@ -1185,6 +1369,10 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ ENTRY(speculation_vector)
++#ifdef XEN
++ // this probably need not reflect...
++ REFLECT(27)
++#endif
+ DBG_FAULT(27)
+ /*
+ * A [f]chk.[as] instruction needs to take the branch to the recovery code but
+@@ -1228,6 +1416,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ ENTRY(debug_vector)
++#ifdef XEN
++ REFLECT(29)
++#endif
+ DBG_FAULT(29)
+ FAULT(29)
+ END(debug_vector)
+@@ -1236,6 +1427,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ ENTRY(unaligned_access)
++#ifdef XEN
++ REFLECT(30)
++#endif
+ DBG_FAULT(30)
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+@@ -1247,6 +1441,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ ENTRY(unsupported_data_reference)
++#ifdef XEN
++ REFLECT(31)
++#endif
+ DBG_FAULT(31)
+ FAULT(31)
+ END(unsupported_data_reference)
+@@ -1255,6 +1452,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ ENTRY(floating_point_fault)
++#ifdef XEN
++ REFLECT(32)
++#endif
+ DBG_FAULT(32)
+ FAULT(32)
+ END(floating_point_fault)
+@@ -1263,6 +1463,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ ENTRY(floating_point_trap)
++#ifdef XEN
++ REFLECT(33)
++#endif
+ DBG_FAULT(33)
+ FAULT(33)
+ END(floating_point_trap)
+@@ -1271,6 +1474,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ ENTRY(lower_privilege_trap)
++#ifdef XEN
++ REFLECT(34)
++#endif
+ DBG_FAULT(34)
+ FAULT(34)
+ END(lower_privilege_trap)
+@@ -1279,6 +1485,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ ENTRY(taken_branch_trap)
++#ifdef XEN
++ REFLECT(35)
++#endif
+ DBG_FAULT(35)
+ FAULT(35)
+ END(taken_branch_trap)
+@@ -1287,6 +1496,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ ENTRY(single_step_trap)
++#ifdef XEN
++ REFLECT(36)
++#endif
+ DBG_FAULT(36)
+ FAULT(36)
+ END(single_step_trap)
+@@ -1343,6 +1555,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ ENTRY(ia32_exception)
++#ifdef XEN
++ REFLECT(45)
++#endif
+ DBG_FAULT(45)
+ FAULT(45)
+ END(ia32_exception)
+@@ -1351,6 +1566,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ ENTRY(ia32_intercept)
++#ifdef XEN
++ REFLECT(46)
++#endif
+ DBG_FAULT(46)
+ #ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+@@ -1381,6 +1599,9 @@
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ ENTRY(ia32_interrupt)
++#ifdef XEN
++ REFLECT(47)
++#endif
+ DBG_FAULT(47)
+ #ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+@@ -1510,6 +1731,39 @@
+ DBG_FAULT(67)
+ FAULT(67)
+
++#ifdef XEN
++ .org ia64_ivt+0x8000
++ENTRY(dispatch_reflection)
++ /*
++ * Input:
++ * psr.ic: off
++ * r19: intr type (offset into ivt, see ia64_int.h)
++ * r31: contains saved predicates (pr)
++ */
++ SAVE_MIN_WITH_COVER_R19
++ alloc r14=ar.pfs,0,0,5,0
++ mov out4=r15
++ mov out0=cr.ifa
++ adds out1=16,sp
++ mov out2=cr.isr
++ mov out3=cr.iim
++// mov out3=cr.itir
++
++ ssm psr.ic | PSR_DEFAULT_BITS
++ ;;
++ srlz.i // guarantee that interruption collection is on
++ ;;
++(p15) ssm psr.i // restore psr.i
++ adds r3=8,r2 // set up second base pointer
++ ;;
++ SAVE_REST
++ movl r14=ia64_leave_kernel
++ ;;
++ mov rp=r14
++ br.sptk.many ia64_prepare_handle_reflection
++END(dispatch_reflection)
++#endif
++
+ #ifdef CONFIG_IA32_SUPPORT
+
+ /*
diff --git a/xen/arch/ia64/patch/linux-2.6.7/kregs.h b/xen/arch/ia64/patch/linux-2.6.7/kregs.h
new file mode 100644
index 0000000000..926fbe3171
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/kregs.h
@@ -0,0 +1,13 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/kregs.h 2004-06-15 23:19:01.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/kregs.h 2004-09-17 18:27:22.000000000 -0600
+@@ -30,6 +30,10 @@
+ #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
+ #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
+ #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */
++#ifdef XEN
++#define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */
++#define IA64_TR_VHPT 4 /* dtr4: vhpt */
++#endif
+
+ /* Processor status register bits: */
+ #define IA64_PSR_BE_BIT 1
diff --git a/xen/arch/ia64/patch/linux-2.6.7/lds.S b/xen/arch/ia64/patch/linux-2.6.7/lds.S
new file mode 100644
index 0000000000..a081b4be22
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/lds.S
@@ -0,0 +1,17 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/vmlinux.lds.S 2004-06-15 23:19:52.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/xen.lds.S 2004-08-25 19:28:12.000000000 -0600
+@@ -11,12 +11,14 @@
+ OUTPUT_FORMAT("elf64-ia64-little")
+ OUTPUT_ARCH(ia64)
+ ENTRY(phys_start)
++#ifndef XEN
+ jiffies = jiffies_64;
+ PHDRS {
+ code PT_LOAD;
+ percpu PT_LOAD;
+ data PT_LOAD;
+ }
++#endif
+ SECTIONS
+ {
+ /* Sections to be discarded */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/linuxtime.h b/xen/arch/ia64/patch/linux-2.6.7/linuxtime.h
new file mode 100644
index 0000000000..75f92a5a35
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/linuxtime.h
@@ -0,0 +1,34 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/time.h 2004-06-15 23:19:37.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/xen/linuxtime.h 2004-11-15 17:42:04.000000000 -0700
+@@ -1,6 +1,11 @@
+ #ifndef _LINUX_TIME_H
+ #define _LINUX_TIME_H
+
++#ifdef XEN
++typedef s64 time_t;
++typedef s64 suseconds_t;
++#endif
++
+ #include <asm/param.h>
+ #include <linux/types.h>
+
+@@ -25,7 +30,9 @@
+ #ifdef __KERNEL__
+
+ #include <linux/spinlock.h>
++#ifndef XEN
+ #include <linux/seqlock.h>
++#endif
+ #include <linux/timex.h>
+ #include <asm/div64.h>
+ #ifndef div_long_long_rem
+@@ -322,7 +329,9 @@
+
+ extern struct timespec xtime;
+ extern struct timespec wall_to_monotonic;
++#ifndef XEN
+ extern seqlock_t xtime_lock;
++#endif
+
+ static inline unsigned long get_seconds(void)
+ {
diff --git a/xen/arch/ia64/patch/linux-2.6.7/mca_asm.h b/xen/arch/ia64/patch/linux-2.6.7/mca_asm.h
new file mode 100644
index 0000000000..35e5bce4a2
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/mca_asm.h
@@ -0,0 +1,32 @@
+--- ../../linux-2.6.7/include/asm-ia64/mca_asm.h 2004-06-15 23:20:03.000000000 -0600
++++ include/asm-ia64/mca_asm.h 2005-04-01 12:56:37.000000000 -0700
+@@ -26,8 +26,13 @@
+ * direct mapped to physical addresses.
+ * 1. Lop off bits 61 thru 63 in the virtual address
+ */
++#ifdef XEN
++#define INST_VA_TO_PA(addr) \
++ dep addr = 0, addr, 60, 4
++#else // XEN
+ #define INST_VA_TO_PA(addr) \
+ dep addr = 0, addr, 61, 3
++#endif // XEN
+ /*
+ * This macro converts a data virtual address to a physical address
+ * Right now for simulation purposes the virtual addresses are
+@@ -42,9 +47,15 @@
+ * direct mapped to physical addresses.
+ * 1. Put 0x7 in bits 61 thru 63.
+ */
++#ifdef XEN
++#define DATA_PA_TO_VA(addr,temp) \
++ mov temp = 0xf ;; \
++ dep addr = temp, addr, 60, 4
++#else // XEN
+ #define DATA_PA_TO_VA(addr,temp) \
+ mov temp = 0x7 ;; \
+ dep addr = temp, addr, 61, 3
++#endif // XEN
+
+ /*
+ * This macro jumps to the instruction at the given virtual address
diff --git a/xen/arch/ia64/patch/linux-2.6.7/minstate.h b/xen/arch/ia64/patch/linux-2.6.7/minstate.h
new file mode 100644
index 0000000000..f73a8d3874
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/minstate.h
@@ -0,0 +1,29 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/minstate.h 2004-06-15 23:19:52.000000000 -0600
++++ arch/ia64/minstate.h 2005-04-01 12:56:01.000000000 -0700
+@@ -45,7 +45,7 @@
+ (pKStk) tpa r1=sp; /* compute physical addr of sp */ \
+ (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+ (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
+-(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
++(pUStk) dep r22=-1,r22,60,4; /* compute kernel virtual addr of RBS */ \
+ ;; \
+ (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+ (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
+@@ -65,7 +65,7 @@
+ #endif
+
+ #ifdef MINSTATE_PHYS
+-# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; dep reg=0,reg,61,3
++# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; dep reg=0,reg,60,4
+ # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
+ # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
+ #endif
+@@ -172,7 +172,7 @@
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r15,16; \
+ .mem.offset 8,0; st8.spill [r17]=r14,16; \
+- dep r14=-1,r0,61,3; \
++ dep r14=-1,r0,60,4; \
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r2,16; \
+ .mem.offset 8,0; st8.spill [r17]=r3,16; \
diff --git a/xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c b/xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c
new file mode 100644
index 0000000000..0aefe25d80
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c
@@ -0,0 +1,92 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/bootmem.c 2004-06-15 23:19:09.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/mm_bootmem.c 2004-12-17 13:47:03.000000000 -0700
+@@ -10,7 +10,9 @@
+ */
+
+ #include <linux/mm.h>
++#ifndef XEN
+ #include <linux/kernel_stat.h>
++#endif
+ #include <linux/swap.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
+@@ -55,6 +57,9 @@
+ bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
+ bdata->node_boot_start = (start << PAGE_SHIFT);
+ bdata->node_low_pfn = end;
++#ifdef XEN
++//printk("init_bootmem_core: mapstart=%lx,start=%lx,end=%lx,bdata->node_bootmem_map=%lx,bdata->node_boot_start=%lx,bdata->node_low_pfn=%lx\n",mapstart,start,end,bdata->node_bootmem_map,bdata->node_boot_start,bdata->node_low_pfn);
++#endif
+
+ /*
+ * Initially all pages are reserved - setup_arch() has to
+@@ -146,6 +151,9 @@
+ unsigned long i, start = 0, incr, eidx;
+ void *ret;
+
++#ifdef XEN
++//printf("__alloc_bootmem_core(%lx,%lx,%lx,%lx) called\n",bdata,size,align,goal);
++#endif
+ if(!size) {
+ printk("__alloc_bootmem_core(): zero-sized request\n");
+ BUG();
+@@ -153,6 +161,9 @@
+ BUG_ON(align & (align-1));
+
+ eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
++#ifdef XEN
++//printf("__alloc_bootmem_core: eidx=%lx\n",eidx);
++#endif
+ offset = 0;
+ if (align &&
+ (bdata->node_boot_start & (align - 1UL)) != 0)
+@@ -182,6 +193,9 @@
+ unsigned long j;
+ i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+ i = ALIGN(i, incr);
++#ifdef XEN
++//if (i >= eidx) goto fail_block;
++#endif
+ if (test_bit(i, bdata->node_bootmem_map))
+ continue;
+ for (j = i + 1; j < i + areasize; ++j) {
+@@ -203,6 +217,9 @@
+ return NULL;
+
+ found:
++#ifdef XEN
++//printf("__alloc_bootmem_core: start=%lx\n",start);
++#endif
+ bdata->last_success = start << PAGE_SHIFT;
+ BUG_ON(start >= eidx);
+
+@@ -262,6 +279,9 @@
+ page = virt_to_page(phys_to_virt(bdata->node_boot_start));
+ idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ map = bdata->node_bootmem_map;
++#ifdef XEN
++//printk("free_all_bootmem_core: bdata=%lx, bdata->node_boot_start=%lx, bdata->node_low_pfn=%lx, bdata->node_bootmem_map=%lx\n",bdata,bdata->node_boot_start,bdata->node_low_pfn,bdata->node_bootmem_map);
++#endif
+ for (i = 0; i < idx; ) {
+ unsigned long v = ~map[i / BITS_PER_LONG];
+ if (v) {
+@@ -285,6 +305,9 @@
+ * Now free the allocator bitmap itself, it's not
+ * needed anymore:
+ */
++#ifdef XEN
++//printk("About to free the allocator bitmap itself\n");
++#endif
+ page = virt_to_page(bdata->node_bootmem_map);
+ count = 0;
+ for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
+@@ -327,6 +350,9 @@
+ return(init_bootmem_core(&contig_page_data, start, 0, pages));
+ }
+
++#ifdef XEN
++#undef reserve_bootmem
++#endif
+ #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+ void __init reserve_bootmem (unsigned long addr, unsigned long size)
+ {
diff --git a/xen/arch/ia64/patch/linux-2.6.7/mm_contig.c b/xen/arch/ia64/patch/linux-2.6.7/mm_contig.c
new file mode 100644
index 0000000000..87c9f8b651
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/mm_contig.c
@@ -0,0 +1,216 @@
+--- ../../linux-2.6.7/arch/ia64/mm/contig.c 2004-06-15 23:19:12.000000000 -0600
++++ arch/ia64/mm_contig.c 2005-03-23 14:54:06.000000000 -0700
+@@ -15,11 +15,21 @@
+ * memory.
+ */
+ #include <linux/config.h>
++#ifdef XEN
++#include <xen/sched.h>
++#endif
+ #include <linux/bootmem.h>
+ #include <linux/efi.h>
+ #include <linux/mm.h>
+ #include <linux/swap.h>
+
++#ifdef XEN
++#undef reserve_bootmem
++extern struct page *zero_page_memmap_ptr;
++struct page *mem_map;
++#define MAX_DMA_ADDRESS ~0UL // FIXME???
++#endif
++
+ #include <asm/meminit.h>
+ #include <asm/pgalloc.h>
+ #include <asm/pgtable.h>
+@@ -37,30 +47,7 @@
+ void
+ show_mem (void)
+ {
+- int i, total = 0, reserved = 0;
+- int shared = 0, cached = 0;
+-
+- printk("Mem-info:\n");
+- show_free_areas();
+-
+- printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+- i = max_mapnr;
+- while (i-- > 0) {
+- if (!pfn_valid(i))
+- continue;
+- total++;
+- if (PageReserved(mem_map+i))
+- reserved++;
+- else if (PageSwapCache(mem_map+i))
+- cached++;
+- else if (page_count(mem_map + i))
+- shared += page_count(mem_map + i) - 1;
+- }
+- printk("%d pages of RAM\n", total);
+- printk("%d reserved pages\n", reserved);
+- printk("%d pages shared\n", shared);
+- printk("%d pages swap cached\n", cached);
+- printk("%ld pages in page table cache\n", pgtable_cache_size);
++ printk("Dummy show_mem\n");
+ }
+
+ /* physical address where the bootmem map is located */
+@@ -80,6 +67,9 @@
+ {
+ unsigned long *max_pfnp = arg, pfn;
+
++#ifdef XEN
++//printf("find_max_pfn: start=%lx, end=%lx, *arg=%lx\n",start,end,*(unsigned long *)arg);
++#endif
+ pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
+ if (pfn > *max_pfnp)
+ *max_pfnp = pfn;
+@@ -133,41 +123,6 @@
+ return 0;
+ }
+
+-/**
+- * find_memory - setup memory map
+- *
+- * Walk the EFI memory map and find usable memory for the system, taking
+- * into account reserved areas.
+- */
+-void
+-find_memory (void)
+-{
+- unsigned long bootmap_size;
+-
+- reserve_memory();
+-
+- /* first find highest page frame number */
+- max_pfn = 0;
+- efi_memmap_walk(find_max_pfn, &max_pfn);
+-
+- /* how many bytes to cover all the pages */
+- bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
+-
+- /* look for a location to hold the bootmap */
+- bootmap_start = ~0UL;
+- efi_memmap_walk(find_bootmap_location, &bootmap_size);
+- if (bootmap_start == ~0UL)
+- panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
+-
+- bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+-
+- /* Free all available memory, then mark bootmem-map as being in use. */
+- efi_memmap_walk(filter_rsvd_memory, free_bootmem);
+- reserve_bootmem(bootmap_start, bootmap_size);
+-
+- find_initrd();
+-}
+-
+ #ifdef CONFIG_SMP
+ /**
+ * per_cpu_init - setup per-cpu variables
+@@ -227,73 +182,42 @@
+ void
+ paging_init (void)
+ {
+- unsigned long max_dma;
+- unsigned long zones_size[MAX_NR_ZONES];
+-#ifdef CONFIG_VIRTUAL_MEM_MAP
+- unsigned long zholes_size[MAX_NR_ZONES];
+- unsigned long max_gap;
+-#endif
+-
+- /* initialize mem_map[] */
++ struct pfn_info *pg;
++ /* Allocate and map the machine-to-phys table */
++ if ((pg = alloc_domheap_pages(NULL, 10)) == NULL)
++ panic("Not enough memory to bootstrap Xen.\n");
++ memset(page_to_virt(pg), 0x55, 16UL << 20);
+
+- memset(zones_size, 0, sizeof(zones_size));
++ /* Other mapping setup */
+
+- num_physpages = 0;
+- efi_memmap_walk(count_pages, &num_physpages);
+
+- max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+-
+-#ifdef CONFIG_VIRTUAL_MEM_MAP
+- memset(zholes_size, 0, sizeof(zholes_size));
+-
+- num_dma_physpages = 0;
+- efi_memmap_walk(count_dma_pages, &num_dma_physpages);
+-
+- if (max_low_pfn < max_dma) {
+- zones_size[ZONE_DMA] = max_low_pfn;
+- zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
+- } else {
+- zones_size[ZONE_DMA] = max_dma;
+- zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
+- if (num_physpages > num_dma_physpages) {
+- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
+- zholes_size[ZONE_NORMAL] =
+- ((max_low_pfn - max_dma) -
+- (num_physpages - num_dma_physpages));
+- }
+- }
+-
+- max_gap = 0;
+- efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
+- if (max_gap < LARGE_GAP) {
+- vmem_map = (struct page *) 0;
+- free_area_init_node(0, &contig_page_data, NULL, zones_size, 0,
+- zholes_size);
+- mem_map = contig_page_data.node_mem_map;
+- } else {
+- unsigned long map_size;
+-
+- /* allocate virtual_mem_map */
+-
+- map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
+- vmalloc_end -= map_size;
+- vmem_map = (struct page *) vmalloc_end;
+- efi_memmap_walk(create_mem_map_page_table, 0);
+-
+- free_area_init_node(0, &contig_page_data, vmem_map, zones_size,
+- 0, zholes_size);
+-
+- mem_map = contig_page_data.node_mem_map;
+- printk("Virtual mem_map starts at 0x%p\n", mem_map);
+- }
+-#else /* !CONFIG_VIRTUAL_MEM_MAP */
+- if (max_low_pfn < max_dma)
+- zones_size[ZONE_DMA] = max_low_pfn;
+- else {
+- zones_size[ZONE_DMA] = max_dma;
+- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
+- }
+- free_area_init(zones_size);
+-#endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+ }
++
++struct pfn_info *frame_table;
++unsigned long frame_table_size;
++unsigned long max_page;
++
++/* FIXME: postpone support to machines with big holes between physical memorys.
++ * Current hack allows only efi memdesc upto 4G place. (See efi.c)
++ */
++#ifndef CONFIG_VIRTUAL_MEM_MAP
++#define FT_ALIGN_SIZE (16UL << 20)
++void __init init_frametable(void)
++{
++ unsigned long i, p;
++ frame_table_size = max_page * sizeof(struct pfn_info);
++ frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
++
++ /* Request continuous trunk from boot allocator, since HV
++ * address is identity mapped */
++ p = alloc_boot_pages(frame_table_size>>PAGE_SHIFT, FT_ALIGN_SIZE>>PAGE_SHIFT) << PAGE_SHIFT;
++ if (p == 0)
++ panic("Not enough memory for frame table.\n");
++
++ frame_table = __va(p);
++ memset(frame_table, 0, frame_table_size);
++ printk("size of frame_table: %lukB\n",
++ frame_table_size >> 10);
++}
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.7/mmzone.h b/xen/arch/ia64/patch/linux-2.6.7/mmzone.h
new file mode 100644
index 0000000000..20c6df5a5c
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/mmzone.h
@@ -0,0 +1,14 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/mmzone.h 2004-06-15 23:19:36.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/mmzone.h 2004-08-25 19:28:13.000000000 -0600
+@@ -185,7 +185,11 @@
+ char *name;
+ unsigned long spanned_pages; /* total size, including holes */
+ unsigned long present_pages; /* amount of memory (excluding holes) */
++#ifdef XEN
++};
++#else
+ } ____cacheline_maxaligned_in_smp;
++#endif
+
+
+ /*
diff --git a/xen/arch/ia64/patch/linux-2.6.7/page.h b/xen/arch/ia64/patch/linux-2.6.7/page.h
new file mode 100644
index 0000000000..c212887024
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/page.h
@@ -0,0 +1,84 @@
+--- ../../linux-2.6.7/include/asm-ia64/page.h 2004-06-15 23:18:58.000000000 -0600
++++ include/asm-ia64/page.h 2005-04-01 12:56:37.000000000 -0700
+@@ -12,6 +12,9 @@
+ #include <asm/intrinsics.h>
+ #include <asm/types.h>
+
++#ifndef __ASSEMBLY__
++#include <asm/flushtlb.h>
++#endif
+ /*
+ * PAGE_SHIFT determines the actual kernel page size.
+ */
+@@ -84,14 +87,22 @@
+ #endif
+
+ #ifndef CONFIG_DISCONTIGMEM
++#ifdef XEN
++#define pfn_valid(pfn) (0)
++#else
+ #define pfn_valid(pfn) (((pfn) < max_mapnr) && ia64_pfn_valid(pfn))
+-#define page_to_pfn(page) ((unsigned long) (page - mem_map))
+-#define pfn_to_page(pfn) (mem_map + (pfn))
++#endif
+ #endif /* CONFIG_DISCONTIGMEM */
+
+-#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
++#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
++#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
++
++#define page_to_phys(_page) (page_to_pfn(_page) << PAGE_SHIFT)
+ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
++#define pfn_to_page(_pfn) (frame_table + (_pfn))
++#define phys_to_page(kaddr) pfn_to_page(((kaddr) >> PAGE_SHIFT))
++
+ typedef union ia64_va {
+ struct {
+ unsigned long off : 61; /* intra-region offset */
+@@ -107,8 +118,25 @@
+ * expressed in this way to ensure they result in a single "dep"
+ * instruction.
+ */
++#ifdef XEN
++typedef union xen_va {
++ struct {
++ unsigned long off : 60;
++ unsigned long reg : 4;
++ } f;
++ unsigned long l;
++ void *p;
++} xen_va;
++
++// xen/drivers/console.c uses __va in a declaration (should be fixed!)
++#define __pa(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
++#define __va(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
++//# define __pa(x) ((unsigned long)(((unsigned long)x) - PAGE_OFFSET))
++//# define __va(x) ((void *)((char *)(x) + PAGE_OFFSET))
++#else
+ #define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
+ #define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
++#endif
+
+ #define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
+ #define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
+@@ -180,11 +208,19 @@
+ # define __pgprot(x) (x)
+ #endif /* !STRICT_MM_TYPECHECKS */
+
++#ifdef XEN
++#define PAGE_OFFSET 0xf000000000000000
++#else
+ #define PAGE_OFFSET 0xe000000000000000
++#endif
+
+ #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \
+ (((current->thread.flags & IA64_THREAD_XSTACK) != 0) \
+ ? VM_EXEC : 0))
+
++#ifdef XEN
++#define __flush_tlb() do {} while(0);
++#endif
++
+ #endif /* _ASM_IA64_PAGE_H */
diff --git a/xen/arch/ia64/patch/linux-2.6.7/page_alloc.c b/xen/arch/ia64/patch/linux-2.6.7/page_alloc.c
new file mode 100644
index 0000000000..41da21c18d
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/page_alloc.c
@@ -0,0 +1,305 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/page_alloc.c 2004-06-15 23:18:57.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/page_alloc.c 2004-12-17 13:47:03.000000000 -0700
+@@ -19,20 +19,28 @@
+ #include <linux/mm.h>
+ #include <linux/swap.h>
+ #include <linux/interrupt.h>
++#ifndef XEN
+ #include <linux/pagemap.h>
++#endif
+ #include <linux/bootmem.h>
+ #include <linux/compiler.h>
+ #include <linux/module.h>
++#ifndef XEN
+ #include <linux/suspend.h>
+ #include <linux/pagevec.h>
+ #include <linux/blkdev.h>
++#endif
+ #include <linux/slab.h>
++#ifndef XEN
+ #include <linux/notifier.h>
++#endif
+ #include <linux/topology.h>
++#ifndef XEN
+ #include <linux/sysctl.h>
+ #include <linux/cpu.h>
+
+ #include <asm/tlbflush.h>
++#endif
+
+ DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
+ struct pglist_data *pgdat_list;
+@@ -71,6 +79,9 @@
+
+ static void bad_page(const char *function, struct page *page)
+ {
++#ifdef XEN
++printk("bad_page: called but disabled\n");
++#else
+ printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
+ function, current->comm, page);
+ printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d\n",
+@@ -91,6 +102,7 @@
+ set_page_count(page, 0);
+ page->mapping = NULL;
+ page->mapcount = 0;
++#endif
+ }
+
+ #ifndef CONFIG_HUGETLB_PAGE
+@@ -218,6 +230,7 @@
+
+ static inline void free_pages_check(const char *function, struct page *page)
+ {
++#ifndef XEN
+ if ( page_mapped(page) ||
+ page->mapping != NULL ||
+ page_count(page) != 0 ||
+@@ -233,6 +246,7 @@
+ 1 << PG_swapcache |
+ 1 << PG_writeback )))
+ bad_page(function, page);
++#endif
+ if (PageDirty(page))
+ ClearPageDirty(page);
+ }
+@@ -276,6 +290,9 @@
+
+ void __free_pages_ok(struct page *page, unsigned int order)
+ {
++#ifdef XEN
++printk("__free_pages_ok: called but disabled\n");
++#else
+ LIST_HEAD(list);
+ int i;
+
+@@ -285,6 +302,7 @@
+ list_add(&page->lru, &list);
+ kernel_map_pages(page, 1<<order, 0);
+ free_pages_bulk(page_zone(page), 1, &list, order);
++#endif
+ }
+
+ #define MARK_USED(index, order, area) \
+@@ -330,6 +348,7 @@
+ */
+ static void prep_new_page(struct page *page, int order)
+ {
++#ifndef XEN
+ if (page->mapping || page_mapped(page) ||
+ (page->flags & (
+ 1 << PG_private |
+@@ -343,11 +362,14 @@
+ 1 << PG_swapcache |
+ 1 << PG_writeback )))
+ bad_page(__FUNCTION__, page);
++#endif
+
+ page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
+ 1 << PG_referenced | 1 << PG_arch_1 |
+ 1 << PG_checked | 1 << PG_mappedtodisk);
++#ifndef XEN
+ page->private = 0;
++#endif
+ set_page_refs(page, order);
+ }
+
+@@ -590,13 +612,17 @@
+ unsigned long min;
+ struct zone **zones;
+ struct page *page;
++#ifndef XEN
+ struct reclaim_state reclaim_state;
++#endif
+ struct task_struct *p = current;
+ int i;
+ int alloc_type;
+ int do_retry;
+
++#ifndef XEN
+ might_sleep_if(wait);
++#endif
+
+ zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
+ if (zones[0] == NULL) /* no zones in the zonelist */
+@@ -610,12 +636,14 @@
+
+ min = (1<<order) + z->protection[alloc_type];
+
++#ifndef XEN
+ /*
+ * We let real-time tasks dip their real-time paws a little
+ * deeper into reserves.
+ */
+ if (rt_task(p))
+ min -= z->pages_low >> 1;
++#endif
+
+ if (z->free_pages >= min ||
+ (!wait && z->free_pages >= z->pages_high)) {
+@@ -627,9 +655,11 @@
+ }
+ }
+
++#ifndef XEN
+ /* we're somewhat low on memory, failed to find what we needed */
+ for (i = 0; zones[i] != NULL; i++)
+ wakeup_kswapd(zones[i]);
++#endif
+
+ /* Go through the zonelist again, taking __GFP_HIGH into account */
+ for (i = 0; zones[i] != NULL; i++) {
+@@ -639,8 +669,10 @@
+
+ if (gfp_mask & __GFP_HIGH)
+ min -= z->pages_low >> 2;
++#ifndef XEN
+ if (rt_task(p))
+ min -= z->pages_low >> 1;
++#endif
+
+ if (z->free_pages >= min ||
+ (!wait && z->free_pages >= z->pages_high)) {
+@@ -654,6 +686,7 @@
+
+ /* here we're in the low on memory slow path */
+
++#ifndef XEN
+ rebalance:
+ if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+ /* go through the zonelist yet again, ignoring mins */
+@@ -681,6 +714,7 @@
+
+ p->reclaim_state = NULL;
+ p->flags &= ~PF_MEMALLOC;
++#endif
+
+ /* go through the zonelist yet one more time */
+ for (i = 0; zones[i] != NULL; i++) {
+@@ -698,6 +732,11 @@
+ }
+ }
+
++#ifdef XEN
++printk(KERN_WARNING "%s: page allocation failure."
++ " order:%d, mode:0x%x\n",
++ "(xen tasks have no comm)", order, gfp_mask);
++#else
+ /*
+ * Don't let big-order allocations loop unless the caller explicitly
+ * requests that. Wait for some write requests to complete then retry.
+@@ -724,6 +763,7 @@
+ p->comm, order, gfp_mask);
+ dump_stack();
+ }
++#endif
+ return NULL;
+ got_pg:
+ kernel_map_pages(page, 1 << order, 1);
+@@ -808,6 +848,7 @@
+
+ EXPORT_SYMBOL(get_zeroed_page);
+
++#ifndef XEN
+ void __pagevec_free(struct pagevec *pvec)
+ {
+ int i = pagevec_count(pvec);
+@@ -815,10 +856,15 @@
+ while (--i >= 0)
+ free_hot_cold_page(pvec->pages[i], pvec->cold);
+ }
++#endif
+
+ fastcall void __free_pages(struct page *page, unsigned int order)
+ {
++#ifdef XEN
++ if (!PageReserved(page)) {
++#else
+ if (!PageReserved(page) && put_page_testzero(page)) {
++#endif
+ if (order == 0)
+ free_hot_page(page);
+ else
+@@ -914,6 +960,13 @@
+ return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+ }
+
++#ifdef XEN
++unsigned int nr_free_highpages (void)
++{
++printf("nr_free_highpages: called but not implemented\n");
++}
++#endif
++
+ #ifdef CONFIG_HIGHMEM
+ unsigned int nr_free_highpages (void)
+ {
+@@ -1022,6 +1075,7 @@
+
+ void si_meminfo(struct sysinfo *val)
+ {
++#ifndef XEN
+ val->totalram = totalram_pages;
+ val->sharedram = 0;
+ val->freeram = nr_free_pages();
+@@ -1034,6 +1088,7 @@
+ val->freehigh = 0;
+ #endif
+ val->mem_unit = PAGE_SIZE;
++#endif
+ }
+
+ EXPORT_SYMBOL(si_meminfo);
+@@ -1165,7 +1220,9 @@
+ printk("= %lukB\n", K(total));
+ }
+
++#ifndef XEN
+ show_swap_cache_info();
++#endif
+ }
+
+ /*
+@@ -1530,6 +1587,9 @@
+ zone->wait_table_size = wait_table_size(size);
+ zone->wait_table_bits =
+ wait_table_bits(zone->wait_table_size);
++#ifdef XEN
++//printf("free_area_init_core-1: calling alloc_bootmem_node(%lx,%lx)\n",pgdat,zone->wait_table_size * sizeof(wait_queue_head_t));
++#endif
+ zone->wait_table = (wait_queue_head_t *)
+ alloc_bootmem_node(pgdat, zone->wait_table_size
+ * sizeof(wait_queue_head_t));
+@@ -1584,6 +1644,9 @@
+ */
+ bitmap_size = (size-1) >> (i+4);
+ bitmap_size = LONG_ALIGN(bitmap_size+1);
++#ifdef XEN
++//printf("free_area_init_core-2: calling alloc_bootmem_node(%lx,%lx)\n",pgdat, bitmap_size);
++#endif
+ zone->free_area[i].map =
+ (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
+ }
+@@ -1601,6 +1664,9 @@
+ calculate_zone_totalpages(pgdat, zones_size, zholes_size);
+ if (!node_mem_map) {
+ size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
++#ifdef XEN
++//printf("free_area_init_node: calling alloc_bootmem_node(%lx,%lx)\n",pgdat,size);
++#endif
+ node_mem_map = alloc_bootmem_node(pgdat, size);
+ }
+ pgdat->node_mem_map = node_mem_map;
+@@ -1784,6 +1850,7 @@
+
+ #endif /* CONFIG_PROC_FS */
+
++#ifndef XEN
+ #ifdef CONFIG_HOTPLUG_CPU
+ static int page_alloc_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+@@ -2011,3 +2078,4 @@
+ setup_per_zone_protection();
+ return 0;
+ }
++#endif
diff --git a/xen/arch/ia64/patch/linux-2.6.7/pal.S b/xen/arch/ia64/patch/linux-2.6.7/pal.S
new file mode 100644
index 0000000000..94519e6435
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/pal.S
@@ -0,0 +1,26 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/pal.S 2004-06-15 23:20:25.000000000 -0600
++++ arch/ia64/pal.S 2005-04-01 12:56:01.000000000 -0700
+@@ -163,7 +163,11 @@
+ adds r8 = 1f-1b,r8 // calculate return address for call
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
++#ifdef XEN
++ dep.z loc2=loc2,0,60 // convert pal entry point to physical
++#else // XEN
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
++#endif // XEN
+ tpa r8=r8 // convert rp to physical
+ ;;
+ mov b7 = loc2 // install target to branch reg
+@@ -218,7 +222,11 @@
+ mov loc3 = psr // save psr
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
++#ifdef XEN
++ dep.z loc2=loc2,0,60 // convert pal entry point to physical
++#else // XEN
+ dep.z loc2=loc2,0,61 // convert pal entry point to physical
++#endif // XEN
+ ;;
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ movl r16=PAL_PSR_BITS_TO_CLEAR
diff --git a/xen/arch/ia64/patch/linux-2.6.7/pgalloc.h b/xen/arch/ia64/patch/linux-2.6.7/pgalloc.h
new file mode 100644
index 0000000000..d2a9ee4f4b
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/pgalloc.h
@@ -0,0 +1,65 @@
+--- ../../linux-2.6.7/include/asm-ia64/pgalloc.h 2004-06-15 23:18:54.000000000 -0600
++++ include/asm-ia64/pgalloc.h 2005-03-23 14:54:11.000000000 -0700
+@@ -34,6 +34,10 @@
+ #define pmd_quicklist (local_cpu_data->pmd_quick)
+ #define pgtable_cache_size (local_cpu_data->pgtable_cache_sz)
+
++/* FIXME: Later 3 level page table should be over, to create
++ * new interface upon xen memory allocator. To simplify first
++ * effort moving to xen allocator, use xenheap pages temporarily.
++ */
+ static inline pgd_t*
+ pgd_alloc_one_fast (struct mm_struct *mm)
+ {
+@@ -55,7 +59,7 @@
+ pgd_t *pgd = pgd_alloc_one_fast(mm);
+
+ if (unlikely(pgd == NULL)) {
+- pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
++ pgd = (pgd_t *)alloc_xenheap_page();
+ if (likely(pgd != NULL))
+ clear_page(pgd);
+ }
+@@ -93,7 +97,7 @@
+ static inline pmd_t*
+ pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+- pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++ pmd_t *pmd = (pmd_t *)alloc_xenheap_page();
+
+ if (likely(pmd != NULL))
+ clear_page(pmd);
+@@ -125,7 +129,7 @@
+ static inline struct page *
+ pte_alloc_one (struct mm_struct *mm, unsigned long addr)
+ {
+- struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
++ struct page *pte = alloc_xenheap_page();
+
+ if (likely(pte != NULL))
+ clear_page(page_address(pte));
+@@ -135,7 +139,7 @@
+ static inline pte_t *
+ pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
+ {
+- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
++ pte_t *pte = (pte_t *)alloc_xenheap_page();
+
+ if (likely(pte != NULL))
+ clear_page(pte);
+@@ -145,13 +149,13 @@
+ static inline void
+ pte_free (struct page *pte)
+ {
+- __free_page(pte);
++ free_xenheap_page(pte);
+ }
+
+ static inline void
+ pte_free_kernel (pte_t *pte)
+ {
+- free_page((unsigned long) pte);
++ free_xenheap_page((unsigned long) pte);
+ }
+
+ #define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
diff --git a/xen/arch/ia64/patch/linux-2.6.7/processor.h b/xen/arch/ia64/patch/linux-2.6.7/processor.h
new file mode 100644
index 0000000000..6d34f0b4bd
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/processor.h
@@ -0,0 +1,19 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/processor.h 2005-01-23 13:23:36.000000000 -0700
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/processor.h 2004-08-25 19:28:13.000000000 -0600
+@@ -406,12 +406,16 @@
+ */
+
+ /* Return TRUE if task T owns the fph partition of the CPU we're running on. */
++#ifdef XEN
++#define ia64_is_local_fpu_owner(t) 0
++#else
+ #define ia64_is_local_fpu_owner(t) \
+ ({ \
+ struct task_struct *__ia64_islfo_task = (t); \
+ (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id() \
+ && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER)); \
+ })
++#endif
+
+ /* Mark task T as owning the fph partition of the CPU we're running on. */
+ #define ia64_set_local_fpu_owner(t) do { \
diff --git a/xen/arch/ia64/patch/linux-2.6.7/sal.h b/xen/arch/ia64/patch/linux-2.6.7/sal.h
new file mode 100644
index 0000000000..72c8f84dbb
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/sal.h
@@ -0,0 +1,26 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/sal.h 2004-06-15 23:20:04.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/sal.h 2004-10-27 13:55:23.000000000 -0600
+@@ -646,7 +646,23 @@
+ {
+ struct ia64_sal_retval isrv;
+
++//#ifdef XEN
++#if 0
++ unsigned long *x = (unsigned long *)ia64_sal;
++ unsigned long *inst = (unsigned long *)*x;
++ unsigned long __ia64_sc_flags;
++ struct ia64_fpreg __ia64_sc_fr[6];
++printf("ia64_sal_freq_base: about to save_scratch_fpregs\n");
++ ia64_save_scratch_fpregs(__ia64_sc_fr);
++ spin_lock_irqsave(&sal_lock, __ia64_sc_flags);
++printf("ia64_sal_freq_base: about to call, ia64_sal=%p, ia64_sal[0]=%p, ia64_sal[1]=%p\n",x,x[0],x[1]);
++printf("first inst=%p,%p\n",inst[0],inst[1]);
++ isrv = (*ia64_sal)(SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
++ spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags);
++ ia64_load_scratch_fpregs(__ia64_sc_fr);
++#else
+ SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
++#endif
+ *ticks_per_second = isrv.v0;
+ *drift_info = isrv.v1;
+ return isrv.status;
diff --git a/xen/arch/ia64/patch/linux-2.6.7/setup.c b/xen/arch/ia64/patch/linux-2.6.7/setup.c
new file mode 100644
index 0000000000..c35d18c058
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/setup.c
@@ -0,0 +1,203 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/setup.c 2004-06-15 23:18:58.000000000 -0600
++++ arch/ia64/setup.c 2005-04-04 22:31:09.000000000 -0600
+@@ -21,6 +21,9 @@
+ #include <linux/init.h>
+
+ #include <linux/acpi.h>
++#ifdef XEN
++#include <xen/sched.h>
++#endif
+ #include <linux/bootmem.h>
+ #include <linux/console.h>
+ #include <linux/delay.h>
+@@ -30,13 +33,17 @@
+ #include <linux/seq_file.h>
+ #include <linux/string.h>
+ #include <linux/threads.h>
++#ifndef XEN
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#endif
+ #include <linux/efi.h>
+ #include <linux/initrd.h>
+
++#ifndef XEN
+ #include <asm/ia32.h>
++#endif
+ #include <asm/machvec.h>
+ #include <asm/mca.h>
+ #include <asm/meminit.h>
+@@ -50,6 +57,11 @@
+ #include <asm/smp.h>
+ #include <asm/system.h>
+ #include <asm/unistd.h>
++#ifdef XEN
++#include <linux/mm.h>
++#include <asm/mmu_context.h>
++extern unsigned long loops_per_jiffy; // from linux/init/main.c
++#endif
+
+ #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+ # error "struct cpuinfo_ia64 too big!"
+@@ -65,7 +77,9 @@
+ DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
+ unsigned long ia64_cycles_per_usec;
+ struct ia64_boot_param *ia64_boot_param;
++#ifndef XEN
+ struct screen_info screen_info;
++#endif
+
+ unsigned long ia64_max_cacheline_size;
+ unsigned long ia64_iobase; /* virtual address for I/O accesses */
+@@ -98,7 +112,6 @@
+ struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+ int num_rsvd_regions;
+
+-
+ /*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+@@ -128,9 +141,12 @@
+ for (i = 0; i < num_rsvd_regions; ++i) {
+ range_start = max(start, prev_start);
+ range_end = min(end, rsvd_region[i].start);
+-
+- if (range_start < range_end)
+- call_pernode_memory(__pa(range_start), range_end - range_start, func);
++ /* init_boot_pages requires "ps, pe" */
++ if (range_start < range_end) {
++ printk("Init boot pages: 0x%lx -> 0x%lx.\n",
++ __pa(range_start), __pa(range_end));
++ (*func)(__pa(range_start), __pa(range_end), 0);
++ }
+
+ /* nothing more available in this segment */
+ if (range_end == end) return 0;
+@@ -187,17 +203,17 @@
+ + strlen(__va(ia64_boot_param->command_line)) + 1);
+ n++;
+
++ /* Reserve xen image/bitmap/xen-heap */
+ rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+- rsvd_region[n].end = (unsigned long) ia64_imva(_end);
++ rsvd_region[n].end = rsvd_region[n].start + xenheap_size;
+ n++;
+
+-#ifdef CONFIG_BLK_DEV_INITRD
++ /* This is actually dom0 image */
+ if (ia64_boot_param->initrd_start) {
+ rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+ rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size;
+ n++;
+ }
+-#endif
+
+ /* end of memory marker */
+ rsvd_region[n].start = ~0UL;
+@@ -207,6 +223,16 @@
+ num_rsvd_regions = n;
+
+ sort_regions(rsvd_region, num_rsvd_regions);
++
++ {
++ int i;
++ printk("Reserved regions: \n");
++ for (i = 0; i < num_rsvd_regions; i++)
++ printk(" [%d] -> [0x%lx, 0x%lx]\n",
++ i,
++ rsvd_region[i].start,
++ rsvd_region[i].end);
++ }
+ }
+
+ /**
+@@ -280,23 +306,26 @@
+ }
+ #endif
+
++#ifdef XEN
+ void __init
+-setup_arch (char **cmdline_p)
++early_setup_arch(char **cmdline_p)
+ {
+ unw_init();
+-
+- ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+-
++
+ *cmdline_p = __va(ia64_boot_param->command_line);
+ strlcpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+-
++ cmdline_parse(*cmdline_p);
++
+ efi_init();
+- io_port_init();
+-
++
+ #ifdef CONFIG_IA64_GENERIC
+ machvec_init(acpi_get_sysname());
+ #endif
+
++#ifdef XEN
++#undef CONFIG_ACPI_BOOT
++#endif
++
+ #ifdef CONFIG_ACPI_BOOT
+ /* Initialize the ACPI boot-time table parser */
+ acpi_table_init();
+@@ -308,9 +337,13 @@
+ smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
+ # endif
+ #endif /* CONFIG_APCI_BOOT */
++ io_port_init();
++}
++#endif
+
+- find_memory();
+-
++void __init
++setup_arch (void)
++{
+ /* process SAL system table: */
+ ia64_sal_init(efi.sal_systab);
+
+@@ -353,7 +386,6 @@
+ /* enable IA-64 Machine Check Abort Handling */
+ ia64_mca_init();
+
+- platform_setup(cmdline_p);
+ paging_init();
+ }
+
+@@ -413,6 +445,9 @@
+ sprintf(cp, " 0x%lx", mask);
+ }
+
++#ifdef XEN
++#define seq_printf(a,b...) printf(b)
++#endif
+ seq_printf(m,
+ "processor : %d\n"
+ "vendor : %s\n"
+@@ -616,7 +651,11 @@
+ | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
++#ifdef XEN
++ if (current->domain->arch.mm)
++#else
+ if (current->mm)
++#endif
+ BUG();
+
+ ia64_mmu_init(ia64_imva(cpu_data));
+@@ -667,6 +706,8 @@
+ void
+ check_bugs (void)
+ {
++#ifndef XEN
+ ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+ (unsigned long) __end___mckinley_e9_bundles);
++#endif
+ }
diff --git a/xen/arch/ia64/patch/linux-2.6.7/slab.c b/xen/arch/ia64/patch/linux-2.6.7/slab.c
new file mode 100644
index 0000000000..56a28f2de1
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/slab.c
@@ -0,0 +1,139 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/slab.c 2004-06-15 23:19:44.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/slab.c 2004-12-17 13:47:03.000000000 -0700
+@@ -86,15 +86,30 @@
+ #include <linux/init.h>
+ #include <linux/compiler.h>
+ #include <linux/seq_file.h>
++#ifndef XEN
+ #include <linux/notifier.h>
+ #include <linux/kallsyms.h>
+ #include <linux/cpu.h>
+ #include <linux/sysctl.h>
+ #include <linux/module.h>
++#endif
+
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
++#ifndef XEN
+ #include <asm/tlbflush.h>
++#endif
++
++#ifdef XEN
++#define lock_cpu_hotplug() do { } while (0)
++#define unlock_cpu_hotplug() do { } while (0)
++#define might_sleep_if(x) do { } while (0)
++#define dump_stack() do { } while (0)
++#define start_cpu_timer(cpu) do { } while (0)
++static inline void __down(struct semaphore *sem) { }
++static inline void __up(struct semaphore *sem) { }
++static inline void might_sleep(void) { }
++#endif
+
+ /*
+ * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+@@ -530,7 +545,9 @@
+ FULL
+ } g_cpucache_up;
+
++#ifndef XEN
+ static DEFINE_PER_CPU(struct timer_list, reap_timers);
++#endif
+
+ static void reap_timer_fnc(unsigned long data);
+ static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+@@ -588,6 +605,7 @@
+ * Add the CPU number into the expiry time to minimize the possibility of the
+ * CPUs getting into lockstep and contending for the global cache chain lock.
+ */
++#ifndef XEN
+ static void __devinit start_cpu_timer(int cpu)
+ {
+ struct timer_list *rt = &per_cpu(reap_timers, cpu);
+@@ -600,6 +618,7 @@
+ add_timer_on(rt, cpu);
+ }
+ }
++#endif
+
+ #ifdef CONFIG_HOTPLUG_CPU
+ static void stop_cpu_timer(int cpu)
+@@ -634,6 +653,7 @@
+ return nc;
+ }
+
++#ifndef XEN
+ static int __devinit cpuup_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+@@ -693,6 +713,7 @@
+ }
+
+ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
++#endif
+
+ /* Initialisation.
+ * Called after the gfp() functions have been enabled, and before smp_init().
+@@ -805,10 +826,14 @@
+ /* Done! */
+ g_cpucache_up = FULL;
+
++#ifdef XEN
++printk("kmem_cache_init: some parts commented out, ignored\n");
++#else
+ /* Register a cpu startup notifier callback
+ * that initializes ac_data for all new cpus
+ */
+ register_cpu_notifier(&cpucache_notifier);
++#endif
+
+
+ /* The reap timers are started later, with a module init call:
+@@ -886,8 +911,10 @@
+ page++;
+ }
+ sub_page_state(nr_slab, nr_freed);
++#ifndef XEN
+ if (current->reclaim_state)
+ current->reclaim_state->reclaimed_slab += nr_freed;
++#endif
+ free_pages((unsigned long)addr, cachep->gfporder);
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
+@@ -1363,8 +1390,10 @@
+ + cachep->num;
+ }
+
++#ifndef XEN
+ cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
+ ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
++#endif
+
+ /* Need the semaphore to access the chain. */
+ down(&cache_chain_sem);
+@@ -2237,8 +2266,10 @@
+
+ if (unlikely(addr < min_addr))
+ goto out;
++#ifndef XEN
+ if (unlikely(addr > (unsigned long)high_memory - size))
+ goto out;
++#endif
+ if (unlikely(addr & align_mask))
+ goto out;
+ if (unlikely(!kern_addr_valid(addr)))
+@@ -2769,6 +2800,7 @@
+ */
+ static void reap_timer_fnc(unsigned long cpu)
+ {
++#ifndef XEN
+ struct timer_list *rt = &__get_cpu_var(reap_timers);
+
+ /* CPU hotplug can drag us off cpu: don't run on wrong CPU */
+@@ -2776,6 +2808,7 @@
+ cache_reap();
+ mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu);
+ }
++#endif
+ }
+
+ #ifdef CONFIG_PROC_FS
diff --git a/xen/arch/ia64/patch/linux-2.6.7/slab.h b/xen/arch/ia64/patch/linux-2.6.7/slab.h
new file mode 100644
index 0000000000..9b9bad6935
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/slab.h
@@ -0,0 +1,14 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/slab.h 2004-06-15 23:20:26.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/slab.h 2004-08-25 19:28:13.000000000 -0600
+@@ -83,7 +83,11 @@
+ goto found; \
+ else \
+ i++;
++#ifdef XEN
++#include <linux/kmalloc_sizes.h>
++#else
+ #include "kmalloc_sizes.h"
++#endif
+ #undef CACHE
+ {
+ extern void __you_cannot_kmalloc_that_much(void);
diff --git a/xen/arch/ia64/patch/linux-2.6.7/swiotlb.c b/xen/arch/ia64/patch/linux-2.6.7/swiotlb.c
new file mode 100644
index 0000000000..63507430f5
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/swiotlb.c
@@ -0,0 +1,47 @@
+--- ../../linux-2.6.7/arch/ia64/lib/swiotlb.c 2004-06-15 23:19:43.000000000 -0600
++++ arch/ia64/lib/swiotlb.c 2005-03-23 14:54:05.000000000 -0700
+@@ -100,7 +100,11 @@
+ /*
+ * Get IO TLB memory from the low pages
+ */
+- io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
++ /* FIXME: Do we really need swiotlb in HV? If all memory trunks
++ * presented to guest as <4G, are actually <4G in machine range,
++ * no DMA intevention from HV...
++ */
++ io_tlb_start = alloc_xenheap_pages(get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)));
+ if (!io_tlb_start)
+ BUG();
+ io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+@@ -110,11 +114,11 @@
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+ * between io_tlb_start and io_tlb_end.
+ */
+- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
++ io_tlb_list = alloc_xenheap_pages(get_order(io_tlb_nslabs * sizeof(int)));
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
++ io_tlb_orig_addr = alloc_xenheap_pages(get_order(io_tlb_nslabs * sizeof(char *)));
+
+ printk(KERN_INFO "Placing software IO TLB between 0x%p - 0x%p\n",
+ (void *) io_tlb_start, (void *) io_tlb_end);
+@@ -279,7 +283,7 @@
+ /* XXX fix me: the DMA API should pass us an explicit DMA mask instead: */
+ flags |= GFP_DMA;
+
+- ret = (void *)__get_free_pages(flags, get_order(size));
++ ret = (void *)alloc_xenheap_pages(get_order(size));
+ if (!ret)
+ return NULL;
+
+@@ -294,7 +298,7 @@
+ void
+ swiotlb_free_coherent (struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+ {
+- free_pages((unsigned long) vaddr, get_order(size));
++ free_xenheap_pages((unsigned long) vaddr, get_order(size));
+ }
+
+ /*
diff --git a/xen/arch/ia64/patch/linux-2.6.7/system.h b/xen/arch/ia64/patch/linux-2.6.7/system.h
new file mode 100644
index 0000000000..ab1516ef7a
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/system.h
@@ -0,0 +1,43 @@
+--- ../../linux-2.6.7/include/asm-ia64/system.h 2005-03-24 19:39:56.000000000 -0700
++++ include/asm-ia64/system.h 2005-04-01 12:56:37.000000000 -0700
+@@ -24,8 +24,16 @@
+ * 0xa000000000000000+2*PERCPU_PAGE_SIZE
+ * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
+ */
++#ifdef XEN
++//#define KERNEL_START 0xf000000100000000
++#define KERNEL_START 0xf000000004000000
++#define PERCPU_ADDR 0xf100000000000000-PERCPU_PAGE_SIZE
++#define SHAREDINFO_ADDR 0xf100000000000000
++#define VHPT_ADDR 0xf200000000000000
++#else
+ #define KERNEL_START 0xa000000100000000
+ #define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
++#endif
+
+ #ifndef __ASSEMBLY__
+
+@@ -218,9 +226,13 @@
+ # define PERFMON_IS_SYSWIDE() (0)
+ #endif
+
++#ifdef XEN
++#define IA64_HAS_EXTRA_STATE(t) 0
++#else
+ #define IA64_HAS_EXTRA_STATE(t) \
+ ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \
+ || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE())
++#endif
+
+ #define __switch_to(prev,next,last) do { \
+ if (IA64_HAS_EXTRA_STATE(prev)) \
+@@ -249,6 +261,9 @@
+ #else
+ # define switch_to(prev,next,last) __switch_to(prev, next, last)
+ #endif
++//#ifdef XEN
++//#undef switch_to
++//#endif
+
+ /*
+ * On IA-64, we don't want to hold the runqueue's lock during the low-level context-switch,
diff --git a/xen/arch/ia64/patch/linux-2.6.7/time.c b/xen/arch/ia64/patch/linux-2.6.7/time.c
new file mode 100644
index 0000000000..1b6263ce4f
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/time.c
@@ -0,0 +1,338 @@
+--- ../../linux-2.6.7/arch/ia64/kernel/time.c 2004-06-15 23:19:01.000000000 -0600
++++ arch/ia64/time.c 2005-03-14 17:27:11.000000000 -0700
+@@ -10,16 +10,22 @@
+ */
+ #include <linux/config.h>
+
++#ifndef XEN
+ #include <linux/cpu.h>
++#endif
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
++#ifndef XEN
+ #include <linux/profile.h>
++#endif
+ #include <linux/sched.h>
+ #include <linux/time.h>
+ #include <linux/interrupt.h>
+ #include <linux/efi.h>
++#ifndef XEN
+ #include <linux/profile.h>
++#endif
+ #include <linux/timex.h>
+
+ #include <asm/machvec.h>
+@@ -29,6 +35,9 @@
+ #include <asm/sal.h>
+ #include <asm/sections.h>
+ #include <asm/system.h>
++#ifdef XEN
++#include <asm/ia64_int.h>
++#endif
+
+ extern unsigned long wall_jiffies;
+
+@@ -45,6 +54,59 @@
+
+ #endif
+
++#ifdef XEN
++volatile unsigned long last_nsec_offset;
++extern rwlock_t xtime_lock;
++unsigned long cpu_khz; /* Detected as we calibrate the TSC */
++static s_time_t stime_irq; /* System time at last 'time update' */
++
++static inline u64 get_time_delta(void)
++{
++ return ia64_get_itc();
++}
++
++s_time_t get_s_time(void)
++{
++ s_time_t now;
++ unsigned long flags;
++
++ read_lock_irqsave(&xtime_lock, flags);
++
++ now = stime_irq + get_time_delta();
++
++ /* Ensure that the returned system time is monotonically increasing. */
++ {
++ static s_time_t prev_now = 0;
++ if ( unlikely(now < prev_now) )
++ now = prev_now;
++ prev_now = now;
++ }
++
++ read_unlock_irqrestore(&xtime_lock, flags);
++
++ return now;
++}
++
++void update_dom_time(struct vcpu *v)
++{
++// FIXME: implement this?
++// printf("update_dom_time: called, not implemented, skipping\n");
++ return;
++}
++
++/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
++void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
++{
++// FIXME: Should this be do_settimeofday (from linux)???
++ printf("do_settime: called, not implemented, stopping\n");
++ dummy();
++}
++#endif
++
++#if 0 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
++#endif /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
++
++#ifndef XEN
+ static void
+ itc_reset (void)
+ {
+@@ -80,12 +142,15 @@
+ return (elapsed_cycles*local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT;
+ }
+
++#ifndef XEN
+ static struct time_interpolator itc_interpolator = {
+ .get_offset = itc_get_offset,
+ .update = itc_update,
+ .reset = itc_reset
+ };
++#endif
+
++#ifndef XEN
+ int
+ do_settimeofday (struct timespec *tv)
+ {
+@@ -95,7 +160,9 @@
+ if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
++#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_seqlock_irq(&xtime_lock);
++#endif
+ {
+ /*
+ * This is revolting. We need to set "xtime" correctly. However, the value
+@@ -117,12 +184,15 @@
+ time_esterror = NTP_PHASE_LIMIT;
+ time_interpolator_reset();
+ }
++#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_sequnlock_irq(&xtime_lock);
++#endif
+ clock_was_set();
+ return 0;
+ }
+
+ EXPORT_SYMBOL(do_settimeofday);
++#endif
+
+ void
+ do_gettimeofday (struct timeval *tv)
+@@ -185,6 +255,7 @@
+ }
+
+ EXPORT_SYMBOL(do_gettimeofday);
++#endif
+
+ /*
+ * The profiling function is SMP safe. (nothing can mess
+@@ -195,6 +266,9 @@
+ static inline void
+ ia64_do_profile (struct pt_regs * regs)
+ {
++#ifdef XEN
++}
++#else
+ unsigned long ip, slot;
+ extern cpumask_t prof_cpu_mask;
+
+@@ -231,24 +305,89 @@
+ ip = prof_len-1;
+ atomic_inc((atomic_t *)&prof_buffer[ip]);
+ }
++#endif
++
++#ifdef XEN
++unsigned long domain0_ready = 0; // FIXME (see below)
++#define typecheck(a,b) 1
++/* FROM linux/include/linux/jiffies.h */
++/*
++ * These inlines deal with timer wrapping correctly. You are
++ * strongly encouraged to use them
++ * 1. Because people otherwise forget
++ * 2. Because if the timer wrap changes in future you won't have to
++ * alter your driver code.
++ *
++ * time_after(a,b) returns true if the time a is after time b.
++ *
++ * Do this with "<0" and ">=0" to only test the sign of the result. A
++ * good compiler would generate better code (and a really good compiler
++ * wouldn't care). Gcc is currently neither.
++ */
++#define time_after(a,b) \
++ (typecheck(unsigned long, a) && \
++ typecheck(unsigned long, b) && \
++ ((long)(b) - (long)(a) < 0))
++#define time_before(a,b) time_after(b,a)
++
++#define time_after_eq(a,b) \
++ (typecheck(unsigned long, a) && \
++ typecheck(unsigned long, b) && \
++ ((long)(a) - (long)(b) >= 0))
++#define time_before_eq(a,b) time_after_eq(b,a)
++#endif
+
+ static irqreturn_t
+ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+ {
+ unsigned long new_itm;
+
++#ifndef XEN
+ if (unlikely(cpu_is_offline(smp_processor_id()))) {
+ return IRQ_HANDLED;
+ }
++#endif
++#ifdef XEN
++ if (current->domain == dom0) {
++ // FIXME: there's gotta be a better way of doing this...
++ // We have to ensure that domain0 is launched before we
++ // call vcpu_timer_expired on it
++ //domain0_ready = 1; // moved to xensetup.c
++ }
++ if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
++ vcpu_pend_timer(dom0->vcpu[0]);
++ //vcpu_set_next_timer(dom0->vcpu[0]);
++ domain_wake(dom0->vcpu[0]);
++ }
++ if (!is_idle_task(current->domain) && current->domain != dom0) {
++ if (vcpu_timer_expired(current)) {
++ vcpu_pend_timer(current);
++ // ensure another timer interrupt happens even if domain doesn't
++ vcpu_set_next_timer(current);
++ domain_wake(current);
++ }
++ }
++ raise_actimer_softirq();
++#endif
+
++#ifndef XEN
+ platform_timer_interrupt(irq, dev_id, regs);
++#endif
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
++#ifdef XEN
++ return;
++#else
+ printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+ ia64_get_itc(), new_itm);
++#endif
+
++#ifdef XEN
++// printf("GOT TO HERE!!!!!!!!!!!\n");
++ //while(1);
++#endif
+ ia64_do_profile(regs);
+
+ while (1) {
+@@ -269,10 +408,16 @@
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
++#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_seqlock(&xtime_lock);
++#endif
++#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ do_timer(regs);
++#endif
+ local_cpu_data->itm_next = new_itm;
++#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_sequnlock(&xtime_lock);
++#endif
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+@@ -292,7 +437,12 @@
+ */
+ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
++//#ifdef XEN
++// vcpu_set_next_timer(current);
++//#else
++//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
+ ia64_set_itm(new_itm);
++//#endif
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ return IRQ_HANDLED;
+@@ -307,6 +457,7 @@
+ int cpu = smp_processor_id();
+ unsigned long shift = 0, delta;
+
++printf("ia64_cpu_local_tick: about to call ia64_set_itv\n");
+ /* arrange for the cycle counter to generate a timer interrupt: */
+ ia64_set_itv(IA64_TIMER_VECTOR);
+
+@@ -320,6 +471,7 @@
+ shift = (2*(cpu - hi) + 1) * delta/hi/2;
+ }
+ local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
++printf("***** ia64_cpu_local_tick: Setting itm to %lx\n",local_cpu_data->itm_next);
+ ia64_set_itm(local_cpu_data->itm_next);
+ }
+
+@@ -335,6 +487,7 @@
+ * frequency and then a PAL call to determine the frequency ratio between the ITC
+ * and the base frequency.
+ */
++
+ status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+ &platform_base_freq, &platform_base_drift);
+ if (status != 0) {
+@@ -384,9 +537,11 @@
+ + itc_freq/2)/itc_freq;
+
+ if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
++#ifndef XEN
+ itc_interpolator.frequency = local_cpu_data->itc_freq;
+ itc_interpolator.drift = itc_drift;
+ register_time_interpolator(&itc_interpolator);
++#endif
+ }
+
+ /* Setup the CPU local timer tick */
+@@ -395,7 +550,9 @@
+
+ static struct irqaction timer_irqaction = {
+ .handler = timer_interrupt,
++#ifndef XEN
+ .flags = SA_INTERRUPT,
++#endif
+ .name = "timer"
+ };
+
+@@ -403,12 +560,16 @@
+ time_init (void)
+ {
+ register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
++#ifndef XEN
+ efi_gettimeofday(&xtime);
++#endif
+ ia64_init_itm();
+
++#ifndef XEN
+ /*
+ * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
+ * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
+ */
+ set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
++#endif
+ }
diff --git a/xen/arch/ia64/patch/linux-2.6.7/tlb.c b/xen/arch/ia64/patch/linux-2.6.7/tlb.c
new file mode 100644
index 0000000000..ca49c7b797
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/tlb.c
@@ -0,0 +1,48 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/mm/tlb.c 2004-06-15 23:19:43.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/tlb.c 2004-08-25 19:28:12.000000000 -0600
+@@ -21,7 +21,9 @@
+ #include <asm/mmu_context.h>
+ #include <asm/pgalloc.h>
+ #include <asm/pal.h>
++#ifndef XEN
+ #include <asm/tlbflush.h>
++#endif
+
+ static struct {
+ unsigned long mask; /* mask of supported purge page-sizes */
+@@ -43,6 +45,9 @@
+ void
+ wrap_mmu_context (struct mm_struct *mm)
+ {
++#ifdef XEN
++printf("wrap_mmu_context: called, not implemented\n");
++#else
+ unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
+ struct task_struct *tsk;
+ int i;
+@@ -83,6 +88,7 @@
+ put_cpu();
+ }
+ local_flush_tlb_all();
++#endif
+ }
+
+ void
+@@ -132,6 +138,9 @@
+ void
+ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+ {
++#ifdef XEN
++printf("flush_tlb_range: called, not implemented\n");
++#else
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long size = end - start;
+ unsigned long nbits;
+@@ -163,6 +172,7 @@
+ # endif
+
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
++#endif
+ }
+ EXPORT_SYMBOL(flush_tlb_range);
+
diff --git a/xen/arch/ia64/patch/linux-2.6.7/types.h b/xen/arch/ia64/patch/linux-2.6.7/types.h
new file mode 100644
index 0000000000..a70a53db72
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/types.h
@@ -0,0 +1,15 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/types.h 2004-06-15 23:19:01.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/types.h 2004-11-11 17:08:30.000000000 -0700
+@@ -1,5 +1,12 @@
+ #ifndef _ASM_IA64_TYPES_H
+ #define _ASM_IA64_TYPES_H
++#ifdef XEN
++#ifndef __ASSEMBLY__
++typedef unsigned long ssize_t;
++typedef unsigned long size_t;
++typedef long long loff_t;
++#endif
++#endif
+
+ /*
+ * This file is never included by application software unless explicitly requested (e.g.,
diff --git a/xen/arch/ia64/patch/linux-2.6.7/unaligned.c b/xen/arch/ia64/patch/linux-2.6.7/unaligned.c
new file mode 100644
index 0000000000..b18beb9759
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/unaligned.c
@@ -0,0 +1,97 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/unaligned.c 2004-06-15 23:20:03.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/unaligned.c 2004-08-25 19:28:12.000000000 -0600
+@@ -15,8 +15,10 @@
+ */
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
++#ifndef XEN
+ #include <linux/smp_lock.h>
+ #include <linux/tty.h>
++#endif
+
+ #include <asm/intrinsics.h>
+ #include <asm/processor.h>
+@@ -24,7 +26,16 @@
+ #include <asm/uaccess.h>
+ #include <asm/unaligned.h>
+
++#ifdef XEN
++#define ia64_peek(x...) printk("ia64_peek: called, not implemented\n")
++#define ia64_poke(x...) printk("ia64_poke: called, not implemented\n")
++#define ia64_sync_fph(x...) printk("ia64_sync_fph: called, not implemented\n")
++#define ia64_flush_fph(x...) printk("ia64_flush_fph: called, not implemented\n")
++#define die_if_kernel(x...) printk("die_if_kernel: called, not implemented\n")
++#define jiffies 0
++#else
+ extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
++#endif
+
+ #undef DEBUG_UNALIGNED_TRAP
+
+@@ -437,7 +448,11 @@
+ }
+
+
++#ifdef XEN
++void
++#else
+ static void
++#endif
+ setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+ {
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+@@ -611,7 +626,11 @@
+ }
+
+
++#ifdef XEN
++void
++#else
+ static void
++#endif
+ getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+ {
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+@@ -1298,7 +1317,9 @@
+ mm_segment_t old_fs = get_fs();
+ unsigned long bundle[2];
+ unsigned long opcode;
++#ifndef XEN
+ struct siginfo si;
++#endif
+ const struct exception_table_entry *eh = NULL;
+ union {
+ unsigned long l;
+@@ -1317,6 +1338,9 @@
+ * user-level unaligned accesses. Otherwise, a clever program could trick this
+ * handler into reading an arbitrary kernel addresses...
+ */
++#ifdef XEN
++printk("ia64_handle_unaligned: called, not working yet\n");
++#else
+ if (!user_mode(regs))
+ eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
+ if (user_mode(regs) || eh) {
+@@ -1353,6 +1377,7 @@
+
+ if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
+ goto failure;
++#endif
+
+ /*
+ * extract the instruction from the bundle given the slot number
+@@ -1493,6 +1518,7 @@
+ /* NOT_REACHED */
+ }
+ force_sigbus:
++#ifndef XEN
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRALN;
+@@ -1501,5 +1527,6 @@
+ si.si_isr = 0;
+ si.si_imm = 0;
+ force_sig_info(SIGBUS, &si, current);
++#endif
+ goto done;
+ }
diff --git a/xen/arch/ia64/patch/linux-2.6.7/wait.h b/xen/arch/ia64/patch/linux-2.6.7/wait.h
new file mode 100644
index 0000000000..661bf03c0a
--- /dev/null
+++ b/xen/arch/ia64/patch/linux-2.6.7/wait.h
@@ -0,0 +1,26 @@
+--- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/wait.h 2004-06-15 23:19:31.000000000 -0600
++++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/wait.h 2004-08-25 19:28:13.000000000 -0600
+@@ -104,10 +104,15 @@
+ list_del(&old->task_list);
+ }
+
++#ifdef XEN
++void FASTCALL(__wake_up(struct task_struct *p));
++#else
+ void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
++#endif
+ extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
+ extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+
++#ifndef XEN
+ #define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
+ #define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
+ #define wake_up_all(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL)
+@@ -117,6 +122,7 @@
+ #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
+ #define wake_up_locked(x) __wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
+ #define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
++#endif
+
+ #define __wait_event(wq, condition) \
+ do { \
diff --git a/xen/arch/ia64/pcdp.c b/xen/arch/ia64/pcdp.c
new file mode 100644
index 0000000000..469047a69d
--- /dev/null
+++ b/xen/arch/ia64/pcdp.c
@@ -0,0 +1,120 @@
+/*
+ * Parse the EFI PCDP table to locate the console device.
+ *
+ * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P.
+ * Khalid Aziz <khalid.aziz@hp.com>
+ * Alex Williamson <alex.williamson@hp.com>
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/console.h>
+#include <linux/efi.h>
+#include <linux/serial.h>
+#ifdef XEN
+#include <linux/errno.h>
+#endif
+#include "pcdp.h"
+
+static int __init
+setup_serial_console(struct pcdp_uart *uart)
+{
+#ifdef XEN
+ extern char opt_com1[1];
+ if (opt_com1[0]) return 0;
+ sprintf(&opt_com1[0], "%lu,%dn1,0x%lx,9",
+ uart->baud, uart->bits ? uart->bits : 8,
+ uart->addr.address);
+ return 0;
+#else
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+ int mmio;
+ static char options[64];
+
+ mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY);
+ snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d",
+ mmio ? "mmio" : "io", uart->addr.address, uart->baud,
+ uart->bits ? uart->bits : 8);
+
+ return early_serial_console_init(options);
+#else
+ return -ENODEV;
+#endif
+#endif
+}
+
+#ifndef XEN
+static int __init
+setup_vga_console(struct pcdp_vga *vga)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+ if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) {
+ printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not MMIO!\n");
+ return -ENODEV;
+ }
+
+ conswitchp = &vga_con;
+ printk(KERN_INFO "PCDP: VGA console\n");
+ return 0;
+#else
+ return -ENODEV;
+#endif
+}
+#endif
+
+int __init
+efi_setup_pcdp_console(char *cmdline)
+{
+ struct pcdp *pcdp;
+ struct pcdp_uart *uart;
+ struct pcdp_device *dev, *end;
+ int i, serial = 0;
+
+ pcdp = efi.hcdp;
+ if (!pcdp)
+ return -ENODEV;
+
+#ifndef XEN
+ printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp));
+#endif
+
+ if (strstr(cmdline, "console=hcdp")) {
+ if (pcdp->rev < 3)
+ serial = 1;
+ } else if (strstr(cmdline, "console=")) {
+#ifndef XEN
+ printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n");
+#endif
+ return -ENODEV;
+ }
+
+ if (pcdp->rev < 3 && efi_uart_console_only())
+ serial = 1;
+
+ for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) {
+ if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) {
+ if (uart->type == PCDP_CONSOLE_UART) {
+ return setup_serial_console(uart);
+ }
+ }
+ }
+
+#ifndef XEN
+ end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length);
+ for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts);
+ dev < end;
+ dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) {
+ if (dev->flags & PCDP_PRIMARY_CONSOLE) {
+ if (dev->type == PCDP_CONSOLE_VGA) {
+ return setup_vga_console((struct pcdp_vga *) dev);
+ }
+ }
+ }
+#endif
+
+ return -ENODEV;
+}
diff --git a/xen/arch/ia64/pdb-stub.c b/xen/arch/ia64/pdb-stub.c
new file mode 100644
index 0000000000..49c8131312
--- /dev/null
+++ b/xen/arch/ia64/pdb-stub.c
@@ -0,0 +1,59 @@
+
+/*
+ * pervasive debugger
+ * www.cl.cam.ac.uk/netos/pdb
+ *
+ * alex ho
+ * 2004
+ * university of cambridge computer laboratory
+ *
+ * code adapted originally from kgdb, nemesis, & gdbserver
+ */
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/ptrace.h>
+#include <xen/keyhandler.h>
+#include <asm/processor.h>
+#include <asm/pdb.h>
+#include <xen/list.h>
+#include <xen/serial.h>
+
+#define __PDB_GET_VAL 1
+#define __PDB_SET_VAL 2
+
+/*
+ * Read or write memory in an address space
+ */
+int pdb_change_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr, int rw)
+{
+ dummy();
+ return 0;
+}
+
+/*
+ * Set memory in a domain's address space
+ * Set "length" bytes at "address" from "domain" to the values in "buffer".
+ * Return the number of bytes set, 0 if there was a problem.
+ */
+
+int pdb_set_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr)
+{
+ int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
+ return count;
+}
+
+/*
+ * Read memory from a domain's address space.
+ * Fetch "length" bytes at "address" from "domain" into "buffer".
+ * Return the number of bytes read, 0 if there was a problem.
+ */
+
+int pdb_get_values(u_char *buffer, int length,
+ unsigned long cr3, unsigned long addr)
+{
+ return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL);
+}
+
diff --git a/xen/arch/ia64/privop.c b/xen/arch/ia64/privop.c
index 0dfa9c164b..1f50ea2448 100644
--- a/xen/arch/ia64/privop.c
+++ b/xen/arch/ia64/privop.c
@@ -7,7 +7,6 @@
*/
#include <asm/privop.h>
-#include <asm/privify.h>
#include <asm/vcpu.h>
#include <asm/processor.h>
#include <asm/delay.h> // Debug only
@@ -20,7 +19,7 @@ Hypercall bundle creation
**************************************************************************/
-void build_hypercall_bundle(UINT64 *imva, UINT64 breakimm, UINT64 hypnum, UINT64 ret)
+void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64 ret)
{
INST64_A5 slot0;
INST64_I19 slot1;
@@ -32,10 +31,10 @@ void build_hypercall_bundle(UINT64 *imva, UINT64 breakimm, UINT64 hypnum, UINT64
slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9;
slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7;
slot0.imm5c = hypnum >> 16; slot0.s = 0;
- // slot1: break breakimm
+ // slot1: break brkimm
slot1.inst = 0;
slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0;
- slot1.imm20 = breakimm; slot1.i = breakimm >> 20;
+ slot1.imm20 = brkimm; slot1.i = brkimm >> 20;
// if ret slot2: br.ret.sptk.many rp
// else slot2: br.cond.sptk.many rp
slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0;
@@ -206,7 +205,8 @@ IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst)
return(IA64_ILLOP_FAULT);
if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M41.r2);
+ if (!inst.inst) pte = vcpu_get_tmp(vcpu,0);
+ else pte = vcpu_get_gr(vcpu,inst.M41.r2);
return (vcpu_itc_d(vcpu,pte,itir,ifa));
}
@@ -220,7 +220,8 @@ IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst)
return(IA64_ILLOP_FAULT);
if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M41.r2);
+ if (!inst.inst) pte = vcpu_get_tmp(vcpu,0);
+ else pte = vcpu_get_gr(vcpu,inst.M41.r2);
return (vcpu_itc_i(vcpu,pte,itir,ifa));
}
@@ -418,10 +419,17 @@ IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst)
UINT64 val;
IA64FAULT fault;
- fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- else return fault;
+ if (inst.M43.r1 > 63) { // privified mov from pmd
+ fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
+ }
+ else {
+ fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ }
+ return fault;
}
unsigned long from_cr_cnt[128] = { 0 };
@@ -525,12 +533,15 @@ PPEFCN Mpriv_funcs[64] = {
struct {
unsigned long mov_to_ar_imm;
unsigned long mov_to_ar_reg;
+ unsigned long mov_from_ar;
unsigned long ssm;
unsigned long rsm;
unsigned long rfi;
unsigned long bsw0;
unsigned long bsw1;
unsigned long cover;
+ unsigned long fc;
+ unsigned long cpuid;
unsigned long Mpriv_cnt[64];
} privcnt = { 0 };
@@ -539,7 +550,8 @@ unsigned long privop_trace = 0;
IA64FAULT
priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl)
{
- IA64_BUNDLE bundle, __get_domain_bundle(UINT64);
+ IA64_BUNDLE bundle;
+ IA64_BUNDLE __get_domain_bundle(UINT64);
int slot;
IA64_SLOT_TYPE slot_type;
INST64 inst;
@@ -551,19 +563,14 @@ priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl)
// make a local copy of the bundle containing the privop
#if 1
bundle = __get_domain_bundle(iip);
- if (!bundle.i64[0] && !bundle.i64[1]) return IA64_RETRY;
-#else
-#ifdef AVOIDING_POSSIBLE_DOMAIN_TLB_MISS
- //TODO: this needs to check for faults and behave accordingly
- if (!vcpu_get_iip_bundle(&bundle)) return IA64_DTLB_FAULT;
+ if (!bundle.i64[0] && !bundle.i64[1])
#else
-if (iip < 0x10000) {
- printf("priv_handle_op: unlikely iip=%p,b0=%p\n",iip,regs->b0);
- dummy();
-}
- bundle = *(IA64_BUNDLE *)iip;
-#endif
+ if (__copy_from_user(&bundle,iip,sizeof(bundle)))
#endif
+ {
+//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip);
+ return vcpu_force_data_miss(vcpu,regs->cr_iip);
+ }
#if 0
if (iip==0xa000000100001820) {
static int firstpagefault = 1;
@@ -622,7 +629,9 @@ if (iip < 0x10000) {
else if (inst.generic.major != 1) break;
x6 = inst.M29.x6;
if (x6 == 0x2a) {
- privcnt.mov_to_ar_reg++;
+ if (inst.M29.r2 > 63 && inst.M29.ar3 < 8)
+ privcnt.mov_from_ar++; // privified mov from kr
+ else privcnt.mov_to_ar_reg++;
return priv_mov_to_ar_reg(vcpu,inst);
}
if (inst.M29.x3 != 0) break;
@@ -633,7 +642,11 @@ if (iip < 0x10000) {
else x6 = 0x1a;
}
}
- privcnt.Mpriv_cnt[x6]++;
+ if (x6 == 52 && inst.M28.r3 > 63)
+ privcnt.fc++;
+ else if (x6 == 16 && inst.M43.r3 > 63)
+ privcnt.cpuid++;
+ else privcnt.Mpriv_cnt[x6]++;
return (*pfunc)(vcpu,inst);
break;
case B:
@@ -668,7 +681,9 @@ if (iip < 0x10000) {
#endif
if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3
if (inst.I26.x6 == 0x2a) {
- privcnt.mov_to_ar_reg++;
+ if (inst.I26.r2 > 63 && inst.I26.ar3 < 8)
+ privcnt.mov_from_ar++; // privified mov from kr
+ else privcnt.mov_to_ar_reg++;
return priv_mov_to_ar_reg(vcpu,inst);
}
if (inst.I27.x6 == 0x0a) {
@@ -682,7 +697,7 @@ if (iip < 0x10000) {
//printf("We who are about do die salute you\n");
printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d)\n",
iip, (UINT64)inst.inst, slot, slot_type);
- //printf("vtop(0x%lx)==0x%lx\r\n", iip, tr_vtop(iip));
+ //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
//thread_mozambique("privop fault\n");
return (IA64_ILLOP_FAULT);
}
@@ -717,34 +732,76 @@ priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr)
privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT;
// its OK for a privified-cover to be executed in user-land
fault = priv_handle_op(vcpu,regs,privlvl);
- if (fault == IA64_NO_FAULT) { // success!!
+ if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { // success!!
// update iip/ipsr to point to the next instruction
(void)vcpu_increment_iip(vcpu);
}
- else if (fault == IA64_EXTINT_VECTOR) {
- // update iip/ipsr before delivering interrupt
- (void)vcpu_increment_iip(vcpu);
- }
- else if (fault == IA64_RFI_IN_PROGRESS) return fault;
- // success but don't update to next instruction
- else if (fault == IA64_RETRY) {
- //printf("Priv emulate gets IA64_RETRY\n");
- //printf("priv_emulate: returning RETRY, not implemented!\n");
- //while (1);
- // don't update iip/ipsr, deliver
-
- vcpu_force_data_miss(vcpu,regs->cr_iip);
- return IA64_RETRY;
- }
- else if (priv_verbose) printf("unhandled operation from handle_op\n");
-// if (fault == IA64_ILLOP_FAULT) {
-// printf("priv_emulate: returning ILLOP, not implemented!\n");
-// while (1);
-// }
return fault;
}
+// FIXME: Move these to include/public/arch-ia64?
+#define HYPERPRIVOP_RFI 0x1
+#define HYPERPRIVOP_RSM_DT 0x2
+#define HYPERPRIVOP_SSM_DT 0x3
+#define HYPERPRIVOP_COVER 0x4
+#define HYPERPRIVOP_ITC_D 0x5
+#define HYPERPRIVOP_ITC_I 0x6
+#define HYPERPRIVOP_SSM_I 0x7
+#define HYPERPRIVOP_MAX 0x7
+
+char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
+ 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
+ 0
+};
+
+unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+
+/* hyperprivops are generally executed in assembly (with physical psr.ic off)
+ * so this code is primarily used for debugging them */
+int
+ia64_hyperprivop(unsigned long iim, REGS *regs)
+{
+ struct vcpu *v = (struct domain *) current;
+ INST64 inst;
+ UINT64 val;
+
+// FIXME: Handle faults appropriately for these
+ if (!iim || iim > HYPERPRIVOP_MAX) {
+ printf("bad hyperprivop; ignored\n");
+ return 1;
+ }
+ slow_hyperpriv_cnt[iim]++;
+ switch(iim) {
+ case HYPERPRIVOP_RFI:
+ (void)vcpu_rfi(v);
+ return 0; // don't update iip
+ case HYPERPRIVOP_RSM_DT:
+ (void)vcpu_reset_psr_dt(v);
+ return 1;
+ case HYPERPRIVOP_SSM_DT:
+ (void)vcpu_set_psr_dt(v);
+ return 1;
+ case HYPERPRIVOP_COVER:
+ (void)vcpu_cover(v);
+ return 1;
+ case HYPERPRIVOP_ITC_D:
+ inst.inst = 0;
+ (void)priv_itc_d(v,inst);
+ return 1;
+ case HYPERPRIVOP_ITC_I:
+ inst.inst = 0;
+ (void)priv_itc_i(v,inst);
+ return 1;
+ case HYPERPRIVOP_SSM_I:
+ (void)vcpu_set_psr_i(v);
+ return 1;
+ }
+ return 0;
+}
+
+
/**************************************************************************
Privileged operation instrumentation routines
**************************************************************************/
@@ -784,10 +841,12 @@ char *cr_str[128] = {
RS,RS,RS,RS,RS,RS,RS,RS
};
-void dump_privop_counts(void)
+// FIXME: should use snprintf to ensure no buffer overflow
+int dump_privop_counts(char *buf)
{
int i, j;
UINT64 sum = 0;
+ char *s = buf;
// this is ugly and should probably produce sorted output
// but it will have to do for now
@@ -796,68 +855,187 @@ void dump_privop_counts(void)
sum += privcnt.rfi; sum += privcnt.bsw0;
sum += privcnt.bsw1; sum += privcnt.cover;
for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i];
- printf("Privop statistics: (Total privops: %ld)\r\n",sum);
+ s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum);
if (privcnt.mov_to_ar_imm)
- printf("%10d %s [%d%%]\r\n", privcnt.mov_to_ar_imm,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm,
"mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum);
if (privcnt.mov_to_ar_reg)
- printf("%10d %s [%d%%]\r\n", privcnt.mov_to_ar_reg,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg,
"mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum);
+ if (privcnt.mov_from_ar)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar,
+ "privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum);
if (privcnt.ssm)
- printf("%10d %s [%d%%]\r\n", privcnt.ssm,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm,
"ssm", (privcnt.ssm*100L)/sum);
if (privcnt.rsm)
- printf("%10d %s [%d%%]\r\n", privcnt.rsm,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm,
"rsm", (privcnt.rsm*100L)/sum);
if (privcnt.rfi)
- printf("%10d %s [%d%%]\r\n", privcnt.rfi,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi,
"rfi", (privcnt.rfi*100L)/sum);
if (privcnt.bsw0)
- printf("%10d %s [%d%%]\r\n", privcnt.bsw0,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0,
"bsw0", (privcnt.bsw0*100L)/sum);
if (privcnt.bsw1)
- printf("%10d %s [%d%%]\r\n", privcnt.bsw1,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1,
"bsw1", (privcnt.bsw1*100L)/sum);
if (privcnt.cover)
- printf("%10d %s [%d%%]\r\n", privcnt.cover,
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover,
"cover", (privcnt.cover*100L)/sum);
+ if (privcnt.fc)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc,
+ "privified-fc", (privcnt.fc*100L)/sum);
+ if (privcnt.cpuid)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid,
+ "privified-getcpuid", (privcnt.cpuid*100L)/sum);
for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) {
- if (!Mpriv_str[i]) printf("PRIVSTRING NULL!!\r\n");
- else printf("%10d %s [%d%%]\r\n", privcnt.Mpriv_cnt[i],
+ if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n");
+ else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i],
Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum);
if (i == 0x24) { // mov from CR
- printf(" [");
+ s += sprintf(s," [");
for (j=0; j < 128; j++) if (from_cr_cnt[j]) {
if (!cr_str[j])
- printf("PRIVSTRING NULL!!\r\n");
- printf("%s(%d),",cr_str[j],from_cr_cnt[j]);
+ s += sprintf(s,"PRIVSTRING NULL!!\n");
+ s += sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]);
}
- printf("]\r\n");
+ s += sprintf(s,"]\n");
}
else if (i == 0x2c) { // mov to CR
- printf(" [");
+ s += sprintf(s," [");
for (j=0; j < 128; j++) if (to_cr_cnt[j]) {
if (!cr_str[j])
- printf("PRIVSTRING NULL!!\r\n");
- printf("%s(%d),",cr_str[j],to_cr_cnt[j]);
+ s += sprintf(s,"PRIVSTRING NULL!!\n");
+ s += sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]);
}
- printf("]\r\n");
+ s += sprintf(s,"]\n");
}
}
+ return s - buf;
}
-void zero_privop_counts(void)
+int zero_privop_counts(char *buf)
{
int i, j;
+ char *s = buf;
// this is ugly and should probably produce sorted output
// but it will have to do for now
- printf("Zeroing privop statistics\r\n");
privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0;
+ privcnt.mov_from_ar = 0;
privcnt.ssm = 0; privcnt.rsm = 0;
privcnt.rfi = 0; privcnt.bsw0 = 0;
privcnt.bsw1 = 0; privcnt.cover = 0;
+ privcnt.fc = 0; privcnt.cpuid = 0;
for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0;
for (j=0; j < 128; j++) from_cr_cnt[j] = 0;
for (j=0; j < 128; j++) to_cr_cnt[j] = 0;
+ s += sprintf(s,"All privop statistics zeroed\n");
+ return s - buf;
+}
+
+#ifdef PRIVOP_ADDR_COUNT
+
+extern struct privop_addr_count privop_addr_counter[];
+
+void privop_count_addr(unsigned long iip, int inst)
+{
+ struct privop_addr_count *v = &privop_addr_counter[inst];
+ int i;
+
+ for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) {
+ if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; }
+ else if (v->addr[i] == iip) { v->count[i]++; return; }
+ }
+ v->overflow++;;
+}
+
+int dump_privop_addrs(char *buf)
+{
+ int i,j;
+ char *s = buf;
+ s += sprintf(s,"Privop addresses:\n");
+ for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+ struct privop_addr_count *v = &privop_addr_counter[i];
+ s += sprintf(s,"%s:\n",v->instname);
+ for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) {
+ if (!v->addr[j]) break;
+ s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]);
+ }
+ if (v->overflow)
+ s += sprintf(s," other #%ld\n",v->overflow);
+ }
+ return s - buf;
+}
+
+void zero_privop_addrs(void)
+{
+ int i,j;
+ for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+ struct privop_addr_count *v = &privop_addr_counter[i];
+ for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+ v->addr[j] = v->count[j] = 0;
+ v->overflow = 0;
+ }
+}
+#endif
+
+int dump_hyperprivop_counts(char *buf)
+{
+ int i;
+ char *s = buf;
+ unsigned long total = 0;
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i];
+ s += sprintf(s,"Slow hyperprivops (total %d):\n",total);
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+ if (slow_hyperpriv_cnt[i])
+ s += sprintf(s,"%10d %s\n",
+ slow_hyperpriv_cnt[i], hyperpriv_str[i]);
+ total = 0;
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i];
+ s += sprintf(s,"Fast hyperprivops (total %d):\n",total);
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+ if (fast_hyperpriv_cnt[i])
+ s += sprintf(s,"%10d %s\n",
+ fast_hyperpriv_cnt[i], hyperpriv_str[i]);
+ return s - buf;
+}
+
+void zero_hyperprivop_counts(void)
+{
+ int i;
+ for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0;
+ for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0;
+}
+
+#define TMPBUFLEN 8*1024
+int dump_privop_counts_to_user(char __user *ubuf, int len)
+{
+ char buf[TMPBUFLEN];
+ int n = dump_privop_counts(buf);
+
+ n += dump_hyperprivop_counts(buf + n);
+ n += dump_reflect_counts(buf + n);
+#ifdef PRIVOP_ADDR_COUNT
+ n += dump_privop_addrs(buf + n);
+#endif
+ if (len < TMPBUFLEN) return -1;
+ if (__copy_to_user(ubuf,buf,n)) return -1;
+ return n;
+}
+
+int zero_privop_counts_to_user(char __user *ubuf, int len)
+{
+ char buf[TMPBUFLEN];
+ int n = zero_privop_counts(buf);
+
+ zero_hyperprivop_counts();
+#ifdef PRIVOP_ADDR_COUNT
+ zero_privop_addrs();
+#endif
+ zero_reflect_counts();
+ if (len < TMPBUFLEN) return -1;
+ if (__copy_to_user(ubuf,buf,n)) return -1;
+ return n;
}
diff --git a/xen/arch/ia64/process.c b/xen/arch/ia64/process.c
index 9201446bce..f664b74a42 100644
--- a/xen/arch/ia64/process.c
+++ b/xen/arch/ia64/process.c
@@ -21,7 +21,7 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/desc.h>
-#include <asm/ldt.h>
+//#include <asm/ldt.h>
#include <xen/irq.h>
#include <xen/event.h>
#include <asm/regionreg.h>
@@ -31,6 +31,7 @@
#include <asm/hpsim_ssc.h>
#include <asm/dom_fw.h>
+extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
extern struct ia64_sal_retval pal_emulator_static(UINT64);
extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
@@ -49,7 +50,8 @@ extern unsigned long dom0_start, dom0_size;
IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
-#define PSCB(x) x->shared_info->arch
+#define PSCB(x,y) x->vcpu_info->arch.y
+#define PSCBX(x,y) x->arch.y
extern unsigned long vcpu_verbose;
@@ -59,17 +61,22 @@ long do_iopl(domid_t domain, unsigned int new_io_pl)
return 0;
}
-void schedule_tail(struct domain *next)
+void schedule_tail(struct vcpu *next)
{
unsigned long rr7;
- printk("current=%lx,shared_info=%lx\n",current,current->shared_info);
- printk("next=%lx,shared_info=%lx\n",next,next->shared_info);
+ //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
+ //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
+#ifdef CONFIG_VTI
+ /* rr7 will be postponed to last point when resuming back to guest */
+ vmx_load_all_rr(current);
+#else // CONFIG_VTI
if (rr7 = load_region_regs(current)) {
printk("schedule_tail: change to rr7 not yet implemented\n");
}
+#endif // CONFIG_VTI
}
-extern TR_ENTRY *match_tr(struct domain *d, unsigned long ifa);
+extern TR_ENTRY *match_tr(struct vcpu *v, unsigned long ifa);
void tdpfoo(void) { }
@@ -80,7 +87,7 @@ void tdpfoo(void) { }
unsigned long translate_domain_pte(unsigned long pteval,
unsigned long address, unsigned long itir)
{
- struct domain *d = (struct domain *) current;
+ struct domain *d = current->domain;
unsigned long mask, pteval2, mpaddr;
unsigned long lookup_domain_mpa(struct domain *,unsigned long);
extern struct domain *dom0;
@@ -113,22 +120,58 @@ unsigned long translate_domain_mpaddr(unsigned long mpaddr)
extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
unsigned long pteval;
- if (current == dom0) {
+ if (current->domain == dom0) {
if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr %p! continuing...\n",mpaddr);
tdpfoo();
}
}
- pteval = lookup_domain_mpa(current,mpaddr);
+ pteval = lookup_domain_mpa(current->domain,mpaddr);
return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
}
+unsigned long slow_reflect_count[0x80] = { 0 };
+unsigned long fast_reflect_count[0x80] = { 0 };
+
+#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++;
+
+void zero_reflect_counts(void)
+{
+ int i;
+ for (i=0; i<0x80; i++) slow_reflect_count[i] = 0;
+ for (i=0; i<0x80; i++) fast_reflect_count[i] = 0;
+}
+
+int dump_reflect_counts(char *buf)
+{
+ int i,j,cnt;
+ char *s = buf;
+
+ s += sprintf(s,"Slow reflections by vector:\n");
+ for (i = 0, j = 0; i < 0x80; i++) {
+ if (cnt = slow_reflect_count[i]) {
+ s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+ if ((j++ & 3) == 3) s += sprintf(s,"\n");
+ }
+ }
+ if (j & 3) s += sprintf(s,"\n");
+ s += sprintf(s,"Fast reflections by vector:\n");
+ for (i = 0, j = 0; i < 0x80; i++) {
+ if (cnt = fast_reflect_count[i]) {
+ s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+ if ((j++ & 3) == 3) s += sprintf(s,"\n");
+ }
+ }
+ if (j & 3) s += sprintf(s,"\n");
+ return s - buf;
+}
+
void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long itiriim, struct pt_regs *regs, unsigned long vector)
{
- unsigned long vcpu_get_ipsr_int_state(struct domain *,unsigned long);
- unsigned long vcpu_get_rr_ve(struct domain *,unsigned long);
- unsigned long vcpu_get_itir_on_fault(struct domain *,unsigned long);
- struct domain *d = (struct domain *) current;
+ unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long);
+ unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long);
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
if (vector == IA64_EXTINT_VECTOR) {
@@ -140,86 +183,91 @@ void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long it
first_extint = 0;
}
}
- if (!PSCB(d).interrupt_collection_enabled) {
- if (!(PSCB(d).ipsr & IA64_PSR_DT)) {
- printf("psr.dt off, trying to deliver nested dtlb!\n");
- while(1);
+ if (!PSCB(v,interrupt_collection_enabled)) {
+ if (!(PSCB(v,ipsr) & IA64_PSR_DT)) {
+ panic_domain(regs,"psr.dt off, trying to deliver nested dtlb!\n");
}
vector &= ~0xf;
if (vector != IA64_DATA_TLB_VECTOR &&
- vector != IA64_DATA_TLB_VECTOR) {
-printf("psr.ic off, delivering fault=%lx,iip=%p,isr=%p,PSCB.iip=%p\n",
- vector,regs->cr_iip,isr,PSCB(d).iip);
- while(1);
+ vector != IA64_ALT_DATA_TLB_VECTOR &&
+ vector != IA64_VHPT_TRANS_VECTOR) {
+panic_domain(regs,"psr.ic off, delivering fault=%lx,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
+ vector,regs->cr_iip,ifa,isr,PSCB(v,iip));
}
//printf("Delivering NESTED DATA TLB fault\n");
vector = IA64_DATA_NESTED_TLB_VECTOR;
- regs->cr_iip = ((unsigned long) PSCB(d).iva + vector) & ~0xffUL;
+ regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL;
regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
// NOTE: nested trap must NOT pass PSCB address
- //regs->r31 = (unsigned long) &PSCB(d);
+ //regs->r31 = (unsigned long) &PSCB(v);
+ inc_slow_reflect_count(vector);
return;
}
- if ((vector & 0xf) != IA64_FORCED_IFA) PSCB(d).ifa = ifa;
- else ifa = PSCB(d).ifa;
+ if ((vector & 0xf) == IA64_FORCED_IFA)
+ ifa = PSCB(v,tmp[0]);
vector &= ~0xf;
-// always deliver on ALT vector (for now?) because no VHPT
-// if (!vcpu_get_rr_ve(d,ifa)) {
- if (vector == IA64_DATA_TLB_VECTOR)
- vector = IA64_ALT_DATA_TLB_VECTOR;
- else if (vector == IA64_INST_TLB_VECTOR)
- vector = IA64_ALT_INST_TLB_VECTOR;
-// }
- PSCB(d).unat = regs->ar_unat; // not sure if this is really needed?
- PSCB(d).precover_ifs = regs->cr_ifs;
- vcpu_bsw0(d);
- PSCB(d).ipsr = vcpu_get_ipsr_int_state(d,regs->cr_ipsr);
+ PSCB(v,ifa) = ifa;
+ if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt TLB miss */
+ vcpu_thash(v,ifa,&PSCB(current,iha));
+ PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed?
+ PSCB(v,precover_ifs) = regs->cr_ifs;
+ vcpu_bsw0(v);
+ PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr);
if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
- PSCB(d).iim = itiriim;
- else PSCB(d).itir = vcpu_get_itir_on_fault(d,ifa);
- PSCB(d).isr = isr; // this is unnecessary except for interrupts!
- PSCB(d).iip = regs->cr_iip;
- PSCB(d).ifs = 0;
- PSCB(d).incomplete_regframe = 0;
-
- regs->cr_iip = ((unsigned long) PSCB(d).iva + vector) & ~0xffUL;
+ PSCB(v,iim) = itiriim;
+ else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa);
+ PSCB(v,isr) = isr; // this is unnecessary except for interrupts!
+ PSCB(v,iip) = regs->cr_iip;
+ PSCB(v,ifs) = 0;
+ PSCB(v,incomplete_regframe) = 0;
+
+ regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL;
regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
-// FIXME: NEED TO PASS PSCB, BUT **NOT** IN R31 WHICH IS BEING USED FOR ar.pr
-// IN ANY CASE, PASS PINNED ADDRESS, NOT THIS ONE
- //regs->r31 = (unsigned long) &PSCB(d);
+#ifdef CONFIG_SMP
+#error "sharedinfo doesn't handle smp yet"
+#endif
+ regs->r31 = &((shared_info_t *)SHAREDINFO_ADDR)->vcpu_data[0].arch;
+
+ PSCB(v,interrupt_delivery_enabled) = 0;
+ PSCB(v,interrupt_collection_enabled) = 0;
- PSCB(d).interrupt_delivery_enabled = 0;
- PSCB(d).interrupt_collection_enabled = 0;
+ inc_slow_reflect_count(vector);
}
void foodpi(void) {}
+unsigned long pending_false_positive = 0;
+
// ONLY gets called from ia64_leave_kernel
// ONLY call with interrupts disabled?? (else might miss one?)
// NEVER successful if already reflecting a trap/fault because psr.i==0
void deliver_pending_interrupt(struct pt_regs *regs)
{
- struct domain *d = (struct domain *) current;
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
// FIXME: Will this work properly if doing an RFI???
if (!is_idle_task(d) && user_mode(regs)) {
- vcpu_poke_timer(d);
- if (vcpu_deliverable_interrupts(d)) {
+ //vcpu_poke_timer(v);
+ if (vcpu_deliverable_interrupts(v)) {
unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
- foodpi();
+ if (vcpu_timer_pending_early(v))
+printf("*#*#*#* about to deliver early timer to domain %d!!!\n",v->domain->domain_id);
reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
}
+ else if (PSCB(v,pending_interruption))
+ ++pending_false_positive;
}
}
-int handle_lazy_cover(struct domain *d, unsigned long isr, struct pt_regs *regs)
+int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
{
- if (!PSCB(d).interrupt_collection_enabled) {
+ if (!PSCB(v,interrupt_collection_enabled)) {
if (isr & IA64_ISR_IR) {
// printf("Handling lazy cover\n");
- PSCB(d).ifs = regs->cr_ifs;
- PSCB(d).incomplete_regframe = 1;
+ PSCB(v,ifs) = regs->cr_ifs;
+ PSCB(v,incomplete_regframe) = 1;
regs->cr_ifs = 0;
return(1); // retry same instruction with cr.ifs off
}
@@ -231,15 +279,16 @@ int handle_lazy_cover(struct domain *d, unsigned long isr, struct pt_regs *regs)
void xen_handle_domain_access(unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir)
{
- struct domain *d = (struct domain *) current;
+ struct domain *d = (struct domain *) current->domain;
+ struct domain *ed = (struct vcpu *) current;
TR_ENTRY *trp;
unsigned long psr = regs->cr_ipsr, mask, flags;
unsigned long iip = regs->cr_iip;
// FIXME should validate address here
- unsigned long pteval, mpaddr;
+ unsigned long pteval, mpaddr, ps;
unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+ unsigned long match_dtlb(struct vcpu *,unsigned long, unsigned long *, unsigned long *);
IA64FAULT fault;
- extern void __get_domain_bundle(void);
// NEED TO HANDLE THREE CASES:
// 1) domain is in metaphysical mode
@@ -248,7 +297,7 @@ void xen_handle_domain_access(unsigned long address, unsigned long isr, struct p
// got here trying to read a privop bundle
//if (d->metaphysical_mode) {
- if (d->metaphysical_mode && !(address>>61)) { //FIXME
+ if (PSCB(current,metaphysical_mode) && !(address>>61)) { //FIXME
if (d == dom0) {
if (address < dom0_start || address >= dom0_start + dom0_size) {
printk("xen_handle_domain_access: out-of-bounds"
@@ -259,45 +308,60 @@ void xen_handle_domain_access(unsigned long address, unsigned long isr, struct p
pteval = lookup_domain_mpa(d,address);
//FIXME: check return value?
// would be nice to have a counter here
- vcpu_itc_no_srlz(d,2,address,pteval,PAGE_SHIFT);
+ vcpu_itc_no_srlz(ed,2,address,pteval,-1UL,PAGE_SHIFT);
return;
}
if (address < 0x4000) printf("WARNING: page_fault @%p, iip=%p\n",address,iip);
- if (*(unsigned long *)__get_domain_bundle != iip) {
- printf("Bad user space access @%p ",address);
- printf("iip=%p, ipsr=%p, b0=%p\n",iip,psr,regs->b0);
- while(1);
- }
- fault = vcpu_tpa(d,address,&mpaddr);
- if (fault != IA64_NO_FAULT) {
- // this is hardcoded to handle __get_domain_bundle only
- regs->r8 = 0; regs->r9 = 0;
- regs->cr_iip += 0x20;
- //regs->cr_iip |= (2UL << IA64_PSR_RI_BIT);
+ // if we are fortunate enough to have it in the 1-entry TLB...
+ if (pteval = match_dtlb(ed,address,&ps,NULL)) {
+ vcpu_itc_no_srlz(ed,6,address,pteval,-1UL,ps);
return;
}
+ // look in the TRs
+ fault = vcpu_tpa(ed,address,&mpaddr);
+ if (fault != IA64_NO_FAULT) {
+ static int uacnt = 0;
+ // can't translate it, just fail (poor man's exception)
+ // which results in retrying execution
+//printk("*** xen_handle_domain_access: poor man's exception cnt=%i iip=%p, addr=%p...\n",uacnt++,iip,address);
+ if (ia64_done_with_exception(regs)) {
+//if (!(uacnt++ & 0x3ff)) printk("*** xen_handle_domain_access: successfully handled cnt=%d iip=%p, addr=%p...\n",uacnt,iip,address);
+ return;
+ }
+ else {
+ // should never happen. If it does, region 0 addr may
+ // indicate a bad xen pointer
+ printk("*** xen_handle_domain_access: exception table"
+ " lookup failed, iip=%p, addr=%p, spinning...\n",
+ iip,address);
+ panic_domain(regs,"*** xen_handle_domain_access: exception table"
+ " lookup failed, iip=%p, addr=%p, spinning...\n",
+ iip,address);
+ }
+ }
if (d == dom0) {
if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
printk("xen_handle_domain_access: vcpu_tpa returned out-of-bounds dom0 mpaddr %p! continuing...\n",mpaddr);
tdpfoo();
}
}
+//printk("*** xen_handle_domain_access: tpa resolved miss @%p...\n",address);
pteval = lookup_domain_mpa(d,mpaddr);
// would be nice to have a counter here
//printf("Handling privop data TLB miss\n");
// FIXME, must be inlined or potential for nested fault here!
- vcpu_itc_no_srlz(d,2,address,pteval,PAGE_SHIFT);
+ vcpu_itc_no_srlz(ed,2,address,pteval,-1UL,PAGE_SHIFT);
}
void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir)
{
- struct domain *d = (struct domain *) current;
+ struct domain *d = (struct domain *) current->domain;
TR_ENTRY *trp;
unsigned long psr = regs->cr_ipsr, mask, flags;
unsigned long iip = regs->cr_iip;
// FIXME should validate address here
- unsigned long pteval, mpaddr;
+ unsigned long iha, pteval, mpaddr;
unsigned long lookup_domain_mpa(struct domain *,unsigned long);
unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
unsigned long vector;
@@ -317,31 +381,33 @@ void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_reg
// FIXME: no need to pass itir in to this routine as we need to
// compute the virtual itir anyway (based on domain's RR.ps)
// AND ACTUALLY reflect_interruption doesn't use it anyway!
- itir = vcpu_get_itir_on_fault(d,address);
+ itir = vcpu_get_itir_on_fault(current,address);
- if (d->metaphysical_mode && (is_data || !(address>>61))) { //FIXME
+ if (PSCB(current,metaphysical_mode) && (is_data || !(address>>61))) { //FIXME
// FIXME should validate mpaddr here
if (d == dom0) {
if (address < dom0_start || address >= dom0_start + dom0_size) {
printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, iip=%p! continuing...\n",address,iip);
- printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, old iip=%p!\n",address,d->shared_info->arch.iip);
+ printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, old iip=%p!\n",address,current->vcpu_info->arch.iip);
tdpfoo();
}
}
pteval = lookup_domain_mpa(d,address);
// FIXME, must be inlined or potential for nested fault here!
- vcpu_itc_no_srlz(d,is_data?2:1,address,pteval,PAGE_SHIFT);
+ vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,PAGE_SHIFT);
return;
}
- if (trp = match_tr(d,address)) {
+ if (trp = match_tr(current,address)) {
// FIXME address had better be pre-validated on insert
pteval = translate_domain_pte(trp->page_flags,address,trp->itir);
- vcpu_itc_no_srlz(d,is_data?2:1,address,pteval,(trp->itir>>2)&0x3f);
+ vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(trp->itir>>2)&0x3f);
return;
}
- vector = is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR;
- if (handle_lazy_cover(d, isr, regs)) return;
-if (!(address>>61)) { printf("ia64_do_page_fault: @%p???, iip=%p, itc=%p (spinning...)\n",address,iip,ia64_get_itc()); while(1); }
+
+ if (handle_lazy_cover(current, isr, regs)) return;
+if (!(address>>61)) {
+panic_domain(0,"ia64_do_page_fault: @%p???, iip=%p, b0=%p, itc=%p (spinning...)\n",address,iip,regs->b0,ia64_get_itc());
+}
if ((isr & IA64_ISR_SP)
|| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
{
@@ -353,6 +419,37 @@ if (!(address>>61)) { printf("ia64_do_page_fault: @%p???, iip=%p, itc=%p (spinni
ia64_psr(regs)->ed = 1;
return;
}
+
+ if (vcpu_get_rr_ve(current, address) && (PSCB(current,pta) & IA64_PTA_VE))
+ {
+ if (PSCB(current,pta) & IA64_PTA_VF)
+ {
+ /* long format VHPT - not implemented */
+ vector = is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR;
+ }
+ else
+ {
+ /* short format VHPT */
+ vcpu_thash(current, address, &iha);
+ if (__copy_from_user(&pteval, iha, sizeof(pteval)) == 0)
+ {
+ /*
+ * Optimisation: this VHPT walker aborts on not-present pages
+ * instead of inserting a not-present translation, this allows
+ * vectoring directly to the miss handler.
+ \ */
+ if (pteval & _PAGE_P)
+ {
+ pteval = translate_domain_pte(pteval,address,itir);
+ vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f);
+ return;
+ }
+ else vector = is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR;
+ }
+ else vector = IA64_VHPT_TRANS_VECTOR;
+ }
+ }
+ else vector = is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR;
reflect_interruption(address, isr, itir, regs, vector);
}
@@ -492,7 +589,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
case 32: /* fp fault */
case 33: /* fp trap */
//result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
- if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+ //if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
//siginfo.si_signo = SIGFPE;
//siginfo.si_errno = 0;
//siginfo.si_code = FPE_FLTINV;
@@ -501,7 +598,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
//siginfo.si_isr = isr;
//siginfo.si_imm = 0;
//force_sig_info(SIGFPE, &siginfo, current);
- }
+ //}
//return;
sprintf(buf, "FP fault/trap");
break;
@@ -671,88 +768,37 @@ if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring
vcpu_set_gr(current,8,-1L);
break;
default:
- printf("ia64_handle_break: bad ssc code %lx\n",ssc);
+ printf("ia64_handle_break: bad ssc code %lx, iip=%p\n",ssc,regs->cr_iip);
break;
}
vcpu_increment_iip(current);
}
-void fooefi(void) {}
+int first_break = 1;
void
ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim)
{
- static int first_time = 1;
- struct domain *d = (struct domain *) current;
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
extern unsigned long running_on_sim;
- if (first_time) {
+ if (first_break) {
if (platform_is_hp_ski()) running_on_sim = 1;
else running_on_sim = 0;
- first_time = 0;
+ first_break = 0;
}
if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
else do_ssc(vcpu_get_gr(current,36), regs);
}
- else if (iim == d->breakimm) {
- struct ia64_sal_retval x;
- switch (regs->r2) {
- case FW_HYPERCALL_PAL_CALL:
- //printf("*** PAL hypercall: index=%d\n",regs->r28);
- //FIXME: This should call a C routine
- x = pal_emulator_static(regs->r28);
- regs->r8 = x.status; regs->r9 = x.v0;
- regs->r10 = x.v1; regs->r11 = x.v2;
- break;
- case FW_HYPERCALL_SAL_CALL:
- x = sal_emulator(vcpu_get_gr(d,32),vcpu_get_gr(d,33),
- vcpu_get_gr(d,34),vcpu_get_gr(d,35),
- vcpu_get_gr(d,36),vcpu_get_gr(d,37),
- vcpu_get_gr(d,38),vcpu_get_gr(d,39));
- regs->r8 = x.status; regs->r9 = x.v0;
- regs->r10 = x.v1; regs->r11 = x.v2;
- break;
- case FW_HYPERCALL_EFI_RESET_SYSTEM:
- printf("efi.reset_system called ");
- if (current == dom0) {
- printf("(by dom0)\n ");
- (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
- }
- printf("(not supported for non-0 domain)\n");
- regs->r8 = EFI_UNSUPPORTED;
- break;
- case FW_HYPERCALL_EFI_GET_TIME:
- {
- unsigned long *tv, *tc;
- fooefi();
- tv = vcpu_get_gr(d,32);
- tc = vcpu_get_gr(d,33);
- //printf("efi_get_time(%p,%p) called...",tv,tc);
- tv = __va(translate_domain_mpaddr(tv));
- if (tc) tc = __va(translate_domain_mpaddr(tc));
- regs->r8 = (*efi.get_time)(tv,tc);
- //printf("and returns %lx\n",regs->r8);
- }
- break;
- case FW_HYPERCALL_EFI_SET_TIME:
- case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
- case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
- // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
- // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
- // POINTER ARGUMENTS WILL BE VIRTUAL!!
- case FW_HYPERCALL_EFI_GET_VARIABLE:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
- case FW_HYPERCALL_EFI_SET_VARIABLE:
- case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
- // FIXME: need fixes in efi.h from 2.6.9
- regs->r8 = EFI_UNSUPPORTED;
- break;
- }
- vcpu_increment_iip(current);
+ else if (iim == d->arch.breakimm) {
+ if (ia64_hypercall(regs))
+ vcpu_increment_iip(current);
+ }
+ else if (!PSCB(v,interrupt_collection_enabled)) {
+ if (ia64_hyperprivop(iim,regs))
+ vcpu_increment_iip(current);
}
else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR);
}
@@ -761,17 +807,14 @@ void
ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long itir)
{
IA64FAULT vector;
- struct domain *d = (struct domain *) current;
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
// FIXME: no need to pass itir in to this routine as we need to
// compute the virtual itir anyway (based on domain's RR.ps)
// AND ACTUALLY reflect_interruption doesn't use it anyway!
- itir = vcpu_get_itir_on_fault(d,ifa);
- vector = priv_emulate((struct domain *)current,regs,isr);
- if (vector == IA64_RETRY) {
- reflect_interruption(ifa,isr,itir,regs,
- IA64_ALT_DATA_TLB_VECTOR | IA64_FORCED_IFA);
- }
- else if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) {
+ itir = vcpu_get_itir_on_fault(v,ifa);
+ vector = priv_emulate(current,regs,isr);
+ if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) {
reflect_interruption(ifa,isr,itir,regs,vector);
}
}
@@ -782,20 +825,19 @@ UINT64 int_counts[INTR_TYPE_MAX];
void
ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim, unsigned long vector)
{
- extern unsigned long vcpu_get_itir_on_fault(struct domain *vcpu, UINT64 ifa);
- struct domain *d = (struct domain *) current;
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
unsigned long check_lazy_cover = 0;
unsigned long psr = regs->cr_ipsr;
- unsigned long itir = vcpu_get_itir_on_fault(d,ifa);
+ unsigned long itir = vcpu_get_itir_on_fault(v,ifa);
if (!(psr & IA64_PSR_CPL)) {
- printf("ia64_handle_reflection: reflecting with priv=0!!\n");
- while(1);
+ printk("ia64_handle_reflection: reflecting with priv=0!!\n");
}
// FIXME: no need to pass itir in to this routine as we need to
// compute the virtual itir anyway (based on domain's RR.ps)
// AND ACTUALLY reflect_interruption doesn't use it anyway!
- itir = vcpu_get_itir_on_fault(d,ifa);
+ itir = vcpu_get_itir_on_fault(v,ifa);
switch(vector) {
case 8:
vector = IA64_DIRTY_BIT_VECTOR; break;
@@ -804,16 +846,20 @@ ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long i
case 10:
check_lazy_cover = 1;
vector = IA64_DATA_ACCESS_BIT_VECTOR; break;
+ case 20:
+ check_lazy_cover = 1;
+ vector = IA64_PAGE_NOT_PRESENT_VECTOR; break;
case 22:
vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break;
case 23:
check_lazy_cover = 1;
vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break;
case 25:
- vector = IA64_DISABLED_FPREG_VECTOR; break;
+ vector = IA64_DISABLED_FPREG_VECTOR;
+ break;
case 26:
printf("*** NaT fault... attempting to handle as privop\n");
- vector = priv_emulate(d,regs,isr);
+ vector = priv_emulate(v,regs,isr);
if (vector == IA64_NO_FAULT) {
printf("*** Handled privop masquerading as NaT fault\n");
return;
@@ -831,6 +877,6 @@ printf("*** Handled privop masquerading as NaT fault\n");
while(vector);
return;
}
- if (check_lazy_cover && handle_lazy_cover(d, isr, regs)) return;
+ if (check_lazy_cover && handle_lazy_cover(v, isr, regs)) return;
reflect_interruption(ifa,isr,itir,regs,vector);
}
diff --git a/xen/arch/ia64/regionreg.c b/xen/arch/ia64/regionreg.c
index bb1803a71d..6653d4b6a8 100644
--- a/xen/arch/ia64/regionreg.c
+++ b/xen/arch/ia64/regionreg.c
@@ -63,9 +63,14 @@ unsigned long allocate_reserved_rid(void)
// returns -1 if none available
-unsigned long allocate_metaphysical_rid(void)
+unsigned long allocate_metaphysical_rr0(void)
{
- unsigned long rid = allocate_reserved_rid();
+ ia64_rr rrv;
+
+ rrv.rid = allocate_reserved_rid();
+ rrv.ps = PAGE_SHIFT;
+ rrv.ve = 0;
+ return rrv.rrval;
}
int deallocate_metaphysical_rid(unsigned long rid)
@@ -146,6 +151,8 @@ int allocate_rid_range(struct domain *d, unsigned long ridbits)
d->rid_bits = ridbits;
d->starting_rid = i << IA64_MIN_IMPL_RID_BITS;
d->ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
+printf("###allocating rid_range, domain %p: starting_rid=%lx, ending_rid=%lx\n",
+d,d->starting_rid, d->ending_rid);
return 1;
}
@@ -187,7 +194,7 @@ int deallocate_rid_range(struct domain *d)
// it should be unmangled
//This appears to work in Xen... turn it on later so no complications yet
-//#define CONFIG_MANGLE_RIDS
+#define CONFIG_MANGLE_RIDS
#ifdef CONFIG_MANGLE_RIDS
static inline unsigned long
vmMangleRID(unsigned long RIDVal)
@@ -249,7 +256,7 @@ static inline int validate_page_size(unsigned long ps)
// NOTE: DOES NOT SET VCPU's rrs[x] value!!
int set_one_rr(unsigned long rr, unsigned long val)
{
- struct domain *d = current;
+ struct vcpu *v = current;
unsigned long rreg = REGION_NUMBER(rr);
ia64_rr rrv, newrrv, memrrv;
unsigned long newrid;
@@ -258,16 +265,21 @@ int set_one_rr(unsigned long rr, unsigned long val)
rrv.rrval = val;
newrrv.rrval = 0;
- newrid = d->starting_rid + rrv.rid;
+ newrid = v->domain->starting_rid + rrv.rid;
- if (newrid > d->ending_rid) return 0;
+ if (newrid > v->domain->ending_rid) {
+ printk("can't set rr%d to %lx, starting_rid=%lx,"
+ "ending_rid=%lx, val=%lx\n", rreg, newrid,
+ v->domain->starting_rid,v->domain->ending_rid,val);
+ return 0;
+ }
memrrv.rrval = rrv.rrval;
if (rreg == 7) {
newrrv.rid = newrid;
newrrv.ve = VHPT_ENABLED_REGION_7;
newrrv.ps = IA64_GRANULE_SHIFT;
- ia64_new_rr7(vmMangleRID(newrrv.rrval));
+ ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info);
}
else {
newrrv.rid = newrid;
@@ -275,22 +287,20 @@ int set_one_rr(unsigned long rr, unsigned long val)
if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7;
else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6;
newrrv.ps = PAGE_SHIFT;
+ if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
set_rr(rr,newrrv.rrval);
}
return 1;
}
// set rr0 to the passed rid (for metaphysical mode so don't use domain offset
-int set_metaphysical_rr(unsigned long rr, unsigned long rid)
+int set_metaphysical_rr0(void)
{
+ struct vcpu *v = current;
ia64_rr rrv;
- rrv.rrval = 0;
- rrv.rid = rid;
- rrv.ps = PAGE_SHIFT;
// rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING
- rrv.ve = 0;
- set_rr(rr,rrv.rrval);
+ set_rr(0,v->arch.metaphysical_rr0);
}
// validates/changes region registers 0-6 in the currently executing domain
@@ -310,43 +320,45 @@ int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3,
return 1;
}
-void init_all_rr(struct domain *d)
+void init_all_rr(struct vcpu *v)
{
ia64_rr rrv;
rrv.rrval = 0;
- rrv.rid = d->metaphysical_rid;
+ rrv.rrval = v->domain->arch.metaphysical_rr0;
rrv.ps = PAGE_SHIFT;
rrv.ve = 1;
- d->shared_info->arch.rrs[0] = -1;
- d->shared_info->arch.rrs[1] = rrv.rrval;
- d->shared_info->arch.rrs[2] = rrv.rrval;
- d->shared_info->arch.rrs[3] = rrv.rrval;
- d->shared_info->arch.rrs[4] = rrv.rrval;
- d->shared_info->arch.rrs[5] = rrv.rrval;
- d->shared_info->arch.rrs[6] = rrv.rrval;
-// d->shared_info->arch.rrs[7] = rrv.rrval;
+if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); }
+ v->vcpu_info->arch.rrs[0] = -1;
+ v->vcpu_info->arch.rrs[1] = rrv.rrval;
+ v->vcpu_info->arch.rrs[2] = rrv.rrval;
+ v->vcpu_info->arch.rrs[3] = rrv.rrval;
+ v->vcpu_info->arch.rrs[4] = rrv.rrval;
+ v->vcpu_info->arch.rrs[5] = rrv.rrval;
+ rrv.ve = 0;
+ v->vcpu_info->arch.rrs[6] = rrv.rrval;
+// v->shared_info->arch.rrs[7] = rrv.rrval;
}
/* XEN/ia64 INTERNAL ROUTINES */
-unsigned long physicalize_rid(struct domain *d, unsigned long rid)
+unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval)
{
ia64_rr rrv;
- rrv.rrval = rid;
- rrv.rid += d->starting_rid;
+ rrv.rrval = rrval;
+ rrv.rid += v->domain->starting_rid;
return rrv.rrval;
}
unsigned long
-virtualize_rid(struct domain *d, unsigned long rid)
+virtualize_rid(struct vcpu *v, unsigned long rrval)
{
ia64_rr rrv;
- rrv.rrval = rid;
- rrv.rid -= d->starting_rid;
+ rrv.rrval = rrval;
+ rrv.rid -= v->domain->starting_rid;
return rrv.rrval;
}
@@ -357,43 +369,42 @@ virtualize_rid(struct domain *d, unsigned long rid)
// rr7 (because we have to to assembly and physical mode
// to change rr7). If no change to rr7 is required, returns 0.
//
-unsigned long load_region_regs(struct domain *d)
+unsigned long load_region_regs(struct vcpu *v)
{
- unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6;
- unsigned long oldrr7, newrr7;
+ unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7;
// TODO: These probably should be validated
+ unsigned long bad = 0;
- if (d->metaphysical_mode) {
+ if (v->vcpu_info->arch.metaphysical_mode) {
ia64_rr rrv;
- rrv.rid = d->metaphysical_rid;
+ rrv.rrval = 0;
+ rrv.rid = v->domain->arch.metaphysical_rr0;
rrv.ps = PAGE_SHIFT;
rrv.ve = 1;
- rr0 = rr1 = rr2 = rr3 = rr4 = rr5 = rr6 = newrr7 = rrv.rrval;
+ rr0 = rrv.rrval;
+ set_rr_no_srlz(0x0000000000000000L, rr0);
+ ia64_srlz_d();
}
else {
- rr0 = physicalize_rid(d, d->shared_info->arch.rrs[0]);
- rr1 = physicalize_rid(d, d->shared_info->arch.rrs[1]);
- rr2 = physicalize_rid(d, d->shared_info->arch.rrs[2]);
- rr3 = physicalize_rid(d, d->shared_info->arch.rrs[3]);
- rr4 = physicalize_rid(d, d->shared_info->arch.rrs[4]);
- rr5 = physicalize_rid(d, d->shared_info->arch.rrs[5]);
- rr6 = physicalize_rid(d, d->shared_info->arch.rrs[6]);
- newrr7 = physicalize_rid(d, d->shared_info->arch.rrs[7]);
+ rr0 = v->vcpu_info->arch.rrs[0];
+ if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1;
}
-
- set_rr_no_srlz(0x0000000000000000L, rr0);
- set_rr_no_srlz(0x2000000000000000L, rr1);
- set_rr_no_srlz(0x4000000000000000L, rr2);
- set_rr_no_srlz(0x6000000000000000L, rr3);
- set_rr_no_srlz(0x8000000000000000L, rr4);
- set_rr_no_srlz(0xa000000000000000L, rr5);
- set_rr_no_srlz(0xc000000000000000L, rr6);
- ia64_srlz_d();
- oldrr7 = get_rr(0xe000000000000000L);
- if (oldrr7 != newrr7) {
- newrr7 = (newrr7 & ~0xff) | (PAGE_SHIFT << 2) | 1;
- return vmMangleRID(newrr7);
+ rr1 = v->vcpu_info->arch.rrs[1];
+ rr2 = v->vcpu_info->arch.rrs[2];
+ rr3 = v->vcpu_info->arch.rrs[3];
+ rr4 = v->vcpu_info->arch.rrs[4];
+ rr5 = v->vcpu_info->arch.rrs[5];
+ rr6 = v->vcpu_info->arch.rrs[6];
+ rr7 = v->vcpu_info->arch.rrs[7];
+ if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2;
+ if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4;
+ if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8;
+ if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10;
+ if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20;
+ if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40;
+ if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80;
+ if (bad) {
+ panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad);
}
- else return 0;
}
diff --git a/xen/arch/ia64/smp.c b/xen/arch/ia64/smp.c
new file mode 100644
index 0000000000..ae581f3f56
--- /dev/null
+++ b/xen/arch/ia64/smp.c
@@ -0,0 +1,43 @@
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+//#include <xen/irq.h>
+#include <xen/sched.h>
+#include <xen/delay.h>
+#include <xen/spinlock.h>
+#include <asm/smp.h>
+//#include <asm/mc146818rtc.h>
+#include <asm/pgalloc.h>
+//#include <asm/smpboot.h>
+#include <asm/hardirq.h>
+
+
+//Huh? This seems to be used on ia64 even if !CONFIG_SMP
+void flush_tlb_mask(cpumask_t mask)
+{
+ dummy();
+}
+//#if CONFIG_SMP || IA64
+#if CONFIG_SMP
+//Huh? This seems to be used on ia64 even if !CONFIG_SMP
+void smp_send_event_check_mask(cpumask_t mask)
+{
+ dummy();
+ //send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+}
+
+
+//Huh? This seems to be used on ia64 even if !CONFIG_SMP
+int try_flush_tlb_mask(cpumask_t mask)
+{
+ dummy();
+ return 1;
+}
+#endif
diff --git a/xen/arch/ia64/smpboot.c b/xen/arch/ia64/smpboot.c
new file mode 100644
index 0000000000..482349121b
--- /dev/null
+++ b/xen/arch/ia64/smpboot.c
@@ -0,0 +1,2 @@
+// expand later
+int ht_per_core = 1;
diff --git a/xen/arch/ia64/sn_console.c b/xen/arch/ia64/sn_console.c
new file mode 100644
index 0000000000..d29a82935c
--- /dev/null
+++ b/xen/arch/ia64/sn_console.c
@@ -0,0 +1,84 @@
+/*
+ * C-Brick Serial Port (and console) driver for SGI Altix machines.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <asm/acpi.h>
+#include <asm/sn/sn_sal.h>
+#include <xen/serial.h>
+
+void sn_putc(struct serial_port *, char);
+
+static struct uart_driver sn_sal_console = {
+ .putc = sn_putc,
+};
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * pulled from arch/ia64/sn/kernel/setup.c
+ */
+static void __init early_sn_setup(void)
+{
+ efi_system_table_t *efi_systab;
+ efi_config_table_t *config_tables;
+ struct ia64_sal_systab *sal_systab;
+ struct ia64_sal_desc_entry_point *ep;
+ char *p;
+ int i, j;
+
+ /*
+ * Parse enough of the SAL tables to locate the SAL entry point. Since, console
+ * IO on SN2 is done via SAL calls, early_printk won't work without this.
+ *
+ * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
+ * Any changes to those file may have to be made hereas well.
+ */
+ efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+ config_tables = __va(efi_systab->tables);
+ for (i = 0; i < efi_systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+ 0) {
+ sal_systab = __va(config_tables[i].table);
+ p = (char *)(sal_systab + 1);
+ for (j = 0; j < sal_systab->entry_count; j++) {
+ if (*p == SAL_DESC_ENTRY_POINT) {
+ ep = (struct ia64_sal_desc_entry_point
+ *)p;
+ ia64_sal_handler_init(__va
+ (ep->sal_proc),
+ __va(ep->gp));
+ return;
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+ }
+ }
+ /* Uh-oh, SAL not available?? */
+ printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+/**
+ * sn_serial_console_early_setup - Sets up early console output support
+ *
+ * pulled from drivers/serial/sn_console.c
+ */
+int __init sn_serial_console_early_setup(void)
+{
+ if (strcmp("sn2",acpi_get_sysname()))
+ return -1;
+
+ early_sn_setup(); /* Find SAL entry points */
+ serial_register_uart(0, &sn_sal_console, NULL);
+
+ return 0;
+}
+
+/*
+ * sn_putc - Send a character to the console, polled or interrupt mode
+ */
+void sn_putc(struct serial_port *port, char c)
+{
+ return ia64_sn_console_putc(c);
+}
diff --git a/xen/arch/ia64/tools/README.RunVT b/xen/arch/ia64/tools/README.RunVT
new file mode 100644
index 0000000000..f93e85b3f3
--- /dev/null
+++ b/xen/arch/ia64/tools/README.RunVT
@@ -0,0 +1,59 @@
+INSTRUCTIONS FOR Running IPF/Xen with VT-enabled Tiger4 pltform
+
+Note: the Domain0 must be an unmodified Linux
+
+1) Perform operations in README.xenia64 to get a flattened Xen IPF source tree
+
+2) Build an unmodified Linux 2.6 kernel
+ a) tar xvfz linux-2.6.11.tar.gz
+ b) cp arch/ia64/configs/tiger_defconfig .config
+ c) Build linux.
+ 1) yes "" | make oldconfig
+ 2) make
+
+3) Build IPF VT-enabled Xen image
+ edit xen/arch/ia64/Rules.mk for
+ CONFIG_VTI ?= y to enable VT-enable build
+4) Setup ELILO.CONF
+ image=xen
+ label=xen
+ initrd=vmlinux2.6.11 // unmodified Linux kernel image
+ read-only
+ append="nomca root=/dev/sda3"
+
+STATUS as 4/28/05 - Features implemented for Domain0
+
+0. Runs unmodified Linux kernel as Domain0
+ Validated with Linux 2.6.11 to run Xwindow and NIC on UP logical processor
+
+1. Take advantage of VT-enabled processor
+ a. Processor intercepts guest privileged instruction and deliver Opcode/Cause to Hypervisor
+ b. One VPD (Virtual Processor Descriptor) per Virtual Processor
+ c. Domains are in a different virtual address space from hypervisor. Domains have one less VA bit than hypervisor, where hypervisor runs in 0xF00000... address protected by the processor from Domains.
+
+2. vTLB and guest_VHPT
+ a. vTLB extending machine TLB entries through hypervisor internal data structure
+ vTLB caches Domains installed TR's and TC's, and then installs TC's for Domains instead.
+ vTLB implements collision chains
+ b. Processor walks hypervisor internal VHPT, not the domain VHPT. On TLB miss, vTLB is consulted first to put hypervisor cached entry into VHPT without inject TLB miss to domain.
+
+3. Region ID fix-partitioning
+ a. currently hard partition 24bits of RIDs into 16 partitions by using top 4bit.
+ b. Hypervisor uses the very last partition RIDs, i.e., 0xFxxxxx RIDs
+ c. Effectively supports Domain0 and 14 other DomainN
+
+4. HyperVisor is mapped with 2 sets of RIDs during runtime, its own RIDs and the active Domain RIDs
+ a. Domain RIDs are used by processor to access guest_VHPT during Domain runtime
+ b. Hypervisor RIDs are used when Hypervisor is running
+ c. Implies there are some Region registers transition on entering/exiting hypervisor
+
+5. Linux styled pt_regs with minor modification for VT and instruction emulation
+ a. Part of Domain registers are saved/restored from VPD
+ b. Extended pt_regs to include r4~r7 and Domain's iipa & isr for possible instruction emulation, so no need to save a complete set of switch_stack on IVT entry
+
+6. Linux styled per virtual processor memory/RSE stacks, which is the same as non-VT domain0
+
+7. Handles splitted I/DCache design
+ Newer IPF processors has split I/Dcaches. The design takes this into consideration when Xen recopy Domain0 to target address for execution
+
+
diff --git a/xen/arch/ia64/tools/README.xenia64 b/xen/arch/ia64/tools/README.xenia64
new file mode 100644
index 0000000000..f0f8d8ff4e
--- /dev/null
+++ b/xen/arch/ia64/tools/README.xenia64
@@ -0,0 +1,21 @@
+# Xen/ia64 heavily leverages/reuses many files from Linux/ia64
+# you need the following files from kernel.org
+# linux-2.6.11.tar.gz
+# place these in the parent directory of the xenXXX.bk tree
+# e.g. xen-unstable.bk should be in the same directory as linux-2.6.11.tar.gz
+
+# unpack linux-2.6.11 in the xenXXX.bk/.. directory
+tar xzf linux-2.6.11.tar.gz
+
+# go back to the xen subdirectory of xenXXX.bk
+cd xenXXX.bk/xen
+
+# create and patch the linux/ia64 files
+# this should print out many patch messages but no errors
+bash arch/ia64/tools/mkbuildtree
+
+# build xen/ia64
+# if using cross-compiler
+make XEN_TARGET_ARCH=ia64
+# else if native
+make
diff --git a/xen/arch/ia64/tools/README.xenia64linux b/xen/arch/ia64/tools/README.xenia64linux
new file mode 100644
index 0000000000..5c1555ebdf
--- /dev/null
+++ b/xen/arch/ia64/tools/README.xenia64linux
@@ -0,0 +1,50 @@
+INSTRUCTIONS FOR BUILDING XENLINUX/IA64
+
+1) In linux-2.6.9: (also tested with 2.6.10, 2.6.11.2)
+ cp arch/ia64/configs/zx1_defconfig .config
+2) vi .config
+ unset CONFIG_IA32_SUPPORT
+ unset CONFIG_IDE [for now, need to fix later]
+ unset CONFIG_VIRTUAL_MEM_MAP [for now, need to fix later]
+3) if running on ski, it is useful to make the following change:
+ a) at the beginning of drivers/acpi/motherboard.c:acpi_reserve_resources()
+ add the line:
+ if (!acpi_gbl_FADT) return;
+4) Build linux.
+ a) yes "" | make oldconfig
+ b) check the resulting .config to ensure there are no modules used (because
+ Xen/ia64 doesn't support them yet). Change '=m' to '=n' and remake
+ c) yes "" | make oldconfig
+ d) make
+5) Linux must be "privified" to run on Xen/ia64. This process converts all
+ privilege-sensitive instructions into privileged instructions.
+ Usage: privify infile outfile
+ Privify is very dumb... it will not overwrite outfile. It also prints
+ out a bunch of useless info that can be safely ignored (except for "panic").
+ The privify program can be obtained from:
+ ftp://ftp.hpl.hp.com/pub/xen-ia64/privify
+6) debug fixes:
+ a) periodically xenlinux/ia64 goes into a fit of printing
+ "Oops: timer tick before it is due..." This can be changed
+ in arch/ia64/kernel/time.c to either ignore it or print something
+ shorter
+ b) The hp simulator (ski) console drivers can be turned on to allow
+ output of early boot information from xenlinux. This results
+ in some duplication of later output (which can be ignored).
+ i) in linux/arch/ia64/Makefile, force the sim drivers by changing
+ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim to
+ drivers-y += arch/ia64/hp/sim
+ ii) in linux/arch/ia64/hp/sim/Makefile, force the sim drivers
+ by changing obj-$(CONFIG_HP_SIMSERIAL) to obj-y and
+ obj-$(CONFIG_HP_SIM_SERIAL_CONSOLE) to obj-y
+ iii) in arch/ia64/kernel/setup.c:early_console_setup(), replace
+ the contents of the routine with:
+ extern struct console hpsim_cons;
+ register_console(&hpsim_cons);
+ return 0;
+ (It may be necessary also to un-inline the routine, not sure.)
+ c) It can be useful to modify linux/init/main.c to add a printf before
+ or after a lot of the init calls
+6) Start Xen from elilo, passing the Linux kernel as an initrd.
+ NOTE: mca currently has a problem with binary translation,
+ must run with "nomca" as a kernel argument
diff --git a/xen/arch/ia64/tools/mkbuildtree b/xen/arch/ia64/tools/mkbuildtree
new file mode 100644
index 0000000000..5964c836c8
--- /dev/null
+++ b/xen/arch/ia64/tools/mkbuildtree
@@ -0,0 +1,345 @@
+#!/bin/sh
+#
+# run in xen-X.X/xen directory after unpacking linux in same directory
+
+XEN=$PWD
+LINUX=$XEN/../../linux-2.6.11
+LINUXPATCH=$XEN/arch/ia64/patch/linux-2.6.11
+XENPATCH=$XEN/arch/ia64/patch/xen-2.0.1
+
+cp_patch ()
+{
+ #diff -u $LINUX/$1 $XEN/$2 > $LINUXPATCH/$3
+ cp $LINUX/$1 $XEN/$2
+ patch <$LINUXPATCH/$3 $XEN/$2
+}
+
+xen_patch ()
+{
+ #patch <$XENPATCH/$2 $XEN/$1
+ echo 'skipping patch of' $XEN/$1
+}
+
+softlink ()
+{
+ ln -s $LINUX/$1 $XEN/$2
+}
+
+null ()
+{
+ touch $XEN/$1
+}
+
+
+# ensure linux directory is set up
+if [ ! -d $LINUX ]; then
+ echo "ERROR: $LINUX directory doesn't exist"
+ exit
+fi
+
+# setup
+
+#mkdir arch/ia64
+#mkdir arch/ia64/lib
+#mkdir include/asm-ia64
+mkdir include/asm-generic
+mkdir include/asm-ia64/linux
+mkdir include/asm-ia64/linux/byteorder
+mkdir include/asm-ia64/sn
+# use "gcc -Iinclude/asm-ia64" to find these linux includes
+#ln -s $XEN/include/xen $XEN/include/linux
+#ln -s $XEN/include/asm-ia64/linux $XEN/include/asm-ia64/xen
+ln -s ../slab.h include/asm-ia64/linux/slab.h
+
+# prepare for building asm-offsets (circular dependency)
+#echo '#define IA64_TASK_SIZE 0' > include/asm-ia64/asm-offsets.h
+#sleep 2
+#touch arch/ia64/asm-offsets.c
+
+# patches to xen/common files
+#xen_patch common/domain.c domain.c
+#xen_patch common/dom_mem_ops.c dom_mem_ops.c
+#xen_patch common/grant_table.c grant_table.c
+#xen_patch common/kernel.c kernel.c
+#xen_patch common/dom0_ops.c dom0_ops.c
+#xen_patch common/memory.c memory.c
+#xen_patch common/keyhandler.c keyhandler.c
+#xen_patch common/softirq.c softirq.c
+#xen_patch common/string.c string.c
+#xen_patch common/elf.c elf.c
+#xen_patch common/schedule.c schedule.c
+#xen_patch drivers/char/serial.c serial.c
+#xen_patch drivers/char/console.c console.c
+#xen_patch include/public/xen.h xen.h
+#xen_patch include/xen/grant_table.h grant_table.h
+#xen_patch include/xen/init.h init.h
+#xen_patch include/xen/irq.h irq.h
+#xen_patch include/xen/list.h list.h
+#xen_patch include/xen/sched.h sched.h
+#xen_patch include/xen/slab.h slab.h
+#xen_patch include/xen/time.h time.h
+
+
+# arch/ia64 files
+
+cp_patch arch/ia64/kernel/efi.c arch/ia64/efi.c efi.c
+cp_patch arch/ia64/kernel/entry.S arch/ia64/entry.S entry.S
+cp_patch arch/ia64/kernel/head.S arch/ia64/head.S head.S
+#cp_patch arch/ia64/kernel/init_task.c arch/ia64/init_task.c init_task.c
+cp_patch arch/ia64/kernel/irq_ia64.c arch/ia64/irq_ia64.c irq_ia64.c
+#cp_patch arch/ia64/kernel/ivt.S arch/ia64/ivt.S ivt.S
+#cp_patch arch/ia64/kernel/minstate.h arch/ia64/minstate.h minstate.h
+cp_patch arch/ia64/kernel/setup.c arch/ia64/setup.c setup.c
+cp_patch arch/ia64/kernel/time.c arch/ia64/time.c time.c
+cp_patch arch/ia64/kernel/unaligned.c arch/ia64/unaligned.c unaligned.c
+#cp_patch arch/ia64/kernel/vmlinux.lds.S arch/ia64/xen.lds.S lds.S
+softlink arch/ia64/kernel/vmlinux.lds.S arch/ia64/xen.lds.S
+
+#cp_patch mm/bootmem.c arch/ia64/mm_bootmem.c mm_bootmem.c
+#cp_patch mm/page_alloc.c arch/ia64/page_alloc.c page_alloc.c
+#cp_patch mm/slab.c arch/ia64/slab.c slab.c
+
+# following renamed to avoid conflict
+#cp_patch kernel/extable.c arch/ia64/linuxextable.c linuxextable.c
+softlink kernel/extable.c arch/ia64/linuxextable.c
+
+cp_patch arch/ia64/mm/contig.c arch/ia64/mm_contig.c mm_contig.c
+cp_patch arch/ia64/mm/tlb.c arch/ia64/tlb.c tlb.c
+
+#cp_patch arch/ia64/hp/sim/hpsim_irq.c arch/ia64/hpsim_irq.c hpsim_irq.c
+
+softlink arch/ia64/kernel/efi_stub.S arch/ia64/efi_stub.S
+cp_patch arch/ia64/kernel/entry.h arch/ia64/entry.h entry.h
+softlink arch/ia64/kernel/ia64_ksyms.c arch/ia64/ia64_ksyms.c
+softlink arch/ia64/kernel/irq_lsapic.c arch/ia64/irq_lsapic.c
+softlink arch/ia64/kernel/machvec.c arch/ia64/machvec.c
+softlink arch/ia64/mm/extable.c arch/ia64/extable.c
+#softlink arch/ia64/kernel/pal.S arch/ia64/pal.S
+cp_patch arch/ia64/kernel/pal.S arch/ia64/pal.S pal.S
+softlink arch/ia64/kernel/patch.c arch/ia64/patch.c
+softlink arch/ia64/kernel/sal.c arch/ia64/sal.c
+softlink arch/ia64/kernel/minstate.h arch/ia64/minstate.h
+
+softlink arch/ia64/lib/bitop.c arch/ia64/lib/bitop.c
+softlink arch/ia64/lib/carta_random.S arch/ia64/lib/carta_random.S
+softlink arch/ia64/lib/checksum.c arch/ia64/lib/checksum.c
+softlink arch/ia64/lib/clear_page.S arch/ia64/lib/clear_page.S
+softlink arch/ia64/lib/clear_user.S arch/ia64/lib/clear_user.S
+softlink arch/ia64/lib/copy_page_mck.S arch/ia64/lib/copy_page_mck.S
+softlink arch/ia64/lib/copy_page.S arch/ia64/lib/copy_page.S
+softlink arch/ia64/lib/copy_user.S arch/ia64/lib/copy_user.S
+softlink arch/ia64/lib/csum_partial_copy.c arch/ia64/lib/csum_partial_copy.c
+softlink arch/ia64/lib/dec_and_lock.c arch/ia64/lib/dec_and_lock.c
+softlink arch/ia64/lib/do_csum.S arch/ia64/lib/do_csum.S
+softlink arch/ia64/lib/flush.S arch/ia64/lib/flush.S
+softlink arch/ia64/lib/idiv32.S arch/ia64/lib/idiv32.S
+softlink arch/ia64/lib/idiv64.S arch/ia64/lib/idiv64.S
+softlink arch/ia64/lib/io.c arch/ia64/lib/io.c
+softlink arch/ia64/lib/ip_fast_csum.S arch/ia64/lib/ip_fast_csum.S
+softlink arch/ia64/lib/memcpy_mck.S arch/ia64/lib/memcpy_mck.S
+softlink arch/ia64/lib/memcpy.S arch/ia64/lib/memcpy.S
+softlink arch/ia64/lib/memset.S arch/ia64/lib/memset.S
+softlink arch/ia64/lib/strlen.S arch/ia64/lib/strlen.S
+softlink arch/ia64/lib/strlen_user.S arch/ia64/lib/strlen_user.S
+softlink arch/ia64/lib/strncpy_from_user.S arch/ia64/lib/strncpy_from_user.S
+softlink arch/ia64/lib/strnlen_user.S arch/ia64/lib/strnlen_user.S
+softlink arch/ia64/lib/xor.S arch/ia64/lib/xor.S
+
+softlink lib/cmdline.c arch/ia64/cmdline.c
+
+softlink arch/ia64/hp/sim/hpsim.S arch/ia64/hpsim.S
+
+# xen/include/asm-generic files
+
+softlink include/asm-generic/bug.h include/asm-generic/bug.h
+softlink include/asm-generic/div64.h include/asm-generic/div64.h
+softlink include/asm-generic/errno.h include/asm-generic/errno.h
+softlink include/asm-generic/errno-base.h include/asm-generic/errno-base.h
+softlink include/asm-generic/ide_iops.h include/asm-generic/ide_iops.h
+softlink include/asm-generic/iomap.h include/asm-generic/iomap.h
+softlink include/asm-generic/pci-dma-compat.h include/asm-generic/pci-dma-compat.h
+softlink include/asm-generic/pci.h include/asm-generic/pci.h
+softlink include/asm-generic/pgtable.h include/asm-generic/pgtable.h
+softlink include/asm-generic/pgtable-nopud.h include/asm-generic/pgtable-nopud.h
+softlink include/asm-generic/sections.h include/asm-generic/sections.h
+softlink include/asm-generic/topology.h include/asm-generic/topology.h
+softlink include/asm-generic/vmlinux.lds.h include/asm-generic/vmlinux.lds.h
+
+
+# xen/include/asm-ia64 files
+
+cp_patch arch/ia64/hp/sim/hpsim_ssc.h include/asm-ia64/hpsim_ssc.h hpsim_ssc.h
+
+#cp_patch include/asm-ia64/current.h include/asm-ia64/current.h current.h
+softlink include/asm-ia64/current.h include/asm-ia64/current.h
+cp_patch include/asm-ia64/gcc_intrin.h include/asm-ia64/gcc_intrin.h gcc_intrin.h
+#softlink include/asm-ia64/gcc_intrin.h include/asm-ia64/gcc_intrin.h
+#cp_patch include/asm-ia64/hardirq.h include/asm-ia64/hardirq.h hardirq.h
+softlink include/asm-ia64/hardirq.h include/asm-ia64/hardirq.h
+#cp_patch include/asm-ia64/hw_irq.h include/asm-ia64/hw_irq.h hw_irq.h
+softlink include/asm-ia64/hw_irq.h include/asm-ia64/hw_irq.h
+#cp_patch include/asm-ia64/ide.h include/asm-ia64/ide.h ide.h
+cp_patch include/asm-ia64/io.h include/asm-ia64/io.h io.h
+#cp_patch include/asm-ia64/irq.h include/asm-ia64/irq.h irq.h
+softlink include/asm-ia64/irq.h include/asm-ia64/irq.h
+cp_patch include/asm-ia64/kregs.h include/asm-ia64/kregs.h kregs.h
+cp_patch include/asm-ia64/page.h include/asm-ia64/page.h page.h
+cp_patch include/asm-ia64/processor.h include/asm-ia64/processor.h processor.h
+#cp_patch include/asm-ia64/sal.h include/asm-ia64/sal.h sal.h
+softlink include/asm-ia64/sal.h include/asm-ia64/sal.h
+cp_patch include/asm-ia64/system.h include/asm-ia64/system.h system.h
+cp_patch include/asm-ia64/types.h include/asm-ia64/types.h types.h
+
+null include/asm-ia64/desc.h
+#null include/asm-ia64/domain_page.h
+#null include/asm-ia64/flushtlb.h
+null include/asm-ia64/io_apic.h
+null include/asm-ia64/pdb.h
+null include/asm-ia64/module.h
+null include/asm-ia64/ia32.h
+null include/asm-ia64/tlbflush.h
+
+null include/asm-ia64/sn/arch.h
+null include/asm-ia64/sn/geo.h
+null include/asm-ia64/sn/nodepda.h
+null include/asm-ia64/sn/sn_cpuid.h
+cp_patch include/asm-ia64/sn/sn_sal.h include/asm-ia64/sn/sn_sal.h sn_sal.h
+
+softlink include/asm-ia64/acpi.h include/asm-ia64/acpi.h
+softlink include/asm-ia64/asmmacro.h include/asm-ia64/asmmacro.h
+softlink include/asm-ia64/atomic.h include/asm-ia64/atomic.h
+softlink include/asm-ia64/bitops.h include/asm-ia64/bitops.h
+softlink include/asm-ia64/break.h include/asm-ia64/break.h
+softlink include/asm-ia64/bug.h include/asm-ia64/bug.h
+softlink include/asm-ia64/byteorder.h include/asm-ia64/byteorder.h
+softlink include/asm-ia64/cacheflush.h include/asm-ia64/cacheflush.h
+softlink include/asm-ia64/cache.h include/asm-ia64/cache.h
+softlink include/asm-ia64/checksum.h include/asm-ia64/checksum.h
+softlink include/asm-ia64/delay.h include/asm-ia64/delay.h
+softlink include/asm-ia64/div64.h include/asm-ia64/div64.h
+softlink include/asm-ia64/dma.h include/asm-ia64/dma.h
+softlink include/asm-ia64/dma-mapping.h include/asm-ia64/dma-mapping.h
+softlink include/asm-ia64/errno.h include/asm-ia64/errno.h
+softlink include/asm-ia64/fpu.h include/asm-ia64/fpu.h
+softlink include/asm-ia64/hdreg.h include/asm-ia64/hdreg.h
+#softlink include/asm-ia64/ia32.h include/asm-ia64/ia32.h
+cp_patch include/asm-ia64/ia64regs.h include/asm-ia64/ia64regs.h ia64regs.h
+softlink include/asm-ia64/intrinsics.h include/asm-ia64/intrinsics.h
+softlink include/asm-ia64/ioctl.h include/asm-ia64/ioctl.h
+softlink include/asm-ia64/linkage.h include/asm-ia64/linkage.h
+softlink include/asm-ia64/machvec.h include/asm-ia64/machvec.h
+softlink include/asm-ia64/machvec_hpsim.h include/asm-ia64/machvec_hpsim.h
+#softlink include/asm-ia64/mca_asm.h include/asm-ia64/mca_asm.h
+cp_patch include/asm-ia64/mca_asm.h include/asm-ia64/mca_asm.h mca_asm.h
+softlink include/asm-ia64/mca.h include/asm-ia64/mca.h
+softlink include/asm-ia64/meminit.h include/asm-ia64/meminit.h
+softlink include/asm-ia64/mman.h include/asm-ia64/mman.h
+softlink include/asm-ia64/numa.h include/asm-ia64/numa.h
+cp_patch include/asm-ia64/pal.h include/asm-ia64/pal.h pal.h
+softlink include/asm-ia64/param.h include/asm-ia64/param.h
+softlink include/asm-ia64/patch.h include/asm-ia64/patch.h
+softlink include/asm-ia64/pci.h include/asm-ia64/pci.h
+softlink include/asm-ia64/percpu.h include/asm-ia64/percpu.h
+#softlink include/asm-ia64/pgalloc.h include/asm-ia64/pgalloc.h
+cp_patch include/asm-ia64/pgalloc.h include/asm-ia64/pgalloc.h pgalloc.h
+softlink include/asm-ia64/pgtable.h include/asm-ia64/pgtable.h
+cp_patch include/asm-ia64/ptrace.h include/asm-ia64/ptrace.h ptrace.h
+softlink include/asm-ia64/ptrace_offsets.h include/asm-ia64/ptrace_offsets.h
+softlink include/asm-ia64/rse.h include/asm-ia64/rse.h
+softlink include/asm-ia64/rwsem.h include/asm-ia64/rwsem.h
+softlink include/asm-ia64/scatterlist.h include/asm-ia64/scatterlist.h
+softlink include/asm-ia64/sections.h include/asm-ia64/sections.h
+softlink include/asm-ia64/semaphore.h include/asm-ia64/semaphore.h
+softlink include/asm-ia64/setup.h include/asm-ia64/setup.h
+softlink include/asm-ia64/sigcontext.h include/asm-ia64/sigcontext.h
+softlink include/asm-ia64/signal.h include/asm-ia64/signal.h
+softlink include/asm-ia64/smp.h include/asm-ia64/smp.h
+softlink include/asm-ia64/spinlock.h include/asm-ia64/spinlock.h
+softlink include/asm-ia64/string.h include/asm-ia64/string.h
+softlink include/asm-ia64/thread_info.h include/asm-ia64/thread_info.h
+softlink include/asm-ia64/timex.h include/asm-ia64/timex.h
+softlink include/asm-ia64/topology.h include/asm-ia64/topology.h
+softlink include/asm-ia64/uaccess.h include/asm-ia64/uaccess.h
+softlink include/asm-ia64/unaligned.h include/asm-ia64/unaligned.h
+softlink include/asm-ia64/unistd.h include/asm-ia64/unistd.h
+softlink include/asm-ia64/unwind.h include/asm-ia64/unwind.h
+softlink include/asm-ia64/ustack.h include/asm-ia64/ustack.h
+
+#rename this one because xen/include/asm/serial.h already exists
+#there is only one use of it that must be patched -- arch/ia64/setup.c
+#softlink include/asm-ia64/serial.h include/asm-ia64/asmserial.h
+
+# xen/include/asm-ia64/linux/*.h (== linux/include/linux/*.h)
+
+#cp_patch include/linux/bootmem.h include/asm-ia64/linux/bootmem.h bootmem.h
+cp_patch include/linux/cpumask.h include/asm-ia64/linux/cpumask.h cpumask.h
+#cp_patch include/linux/dma-mapping.h include/asm-ia64/linux/dma-mapping.h dma-mapping.h
+softlink include/linux/dma-mapping.h include/asm-ia64/linux/dma-mapping.h
+#cp_patch include/linux/efi.h include/asm-ia64/linux/efi.h efi.h
+softlink include/linux/efi.h include/asm-ia64/linux/efi.h
+cp_patch include/linux/hardirq.h include/asm-ia64/linux/hardirq.h hardirq.h
+#cp_patch include/linux/init_task.h include/asm-ia64/linux/init_task.h init_task.h
+cp_patch include/linux/interrupt.h include/asm-ia64/linux/interrupt.h interrupt.h
+#cp_patch include/linux/mmzone.h include/asm-ia64/linux/mmzone.h mmzone.h
+softlink include/linux/mmzone.h include/asm-ia64/linux/mmzone.h
+
+#cp_patch include/linux/wait.h include/asm-ia64/linux/wait.h wait.h
+softlink include/linux/wait.h include/asm-ia64/linux/wait.h
+
+#cp_patch include/linux/slab.h include/asm-ia64/slab.h slab.h
+
+# following renamed to avoid conflict
+#cp_patch include/linux/time.h include/xen/linuxtime.h linuxtime.h
+softlink include/linux/time.h include/asm-ia64/linux/linuxtime.h
+
+softlink include/linux/bcd.h include/asm-ia64/linux/bcd.h
+softlink include/linux/bitmap.h include/asm-ia64/linux/bitmap.h
+softlink include/linux/bitops.h include/asm-ia64/linux/bitops.h
+softlink include/linux/err.h include/asm-ia64/linux/err.h
+softlink include/linux/gfp.h include/asm-ia64/linux/gfp.h
+softlink include/linux/initrd.h include/asm-ia64/linux/initrd.h
+softlink include/linux/kmalloc_sizes.h include/asm-ia64/linux/kmalloc_sizes.h
+softlink include/linux/linkage.h include/asm-ia64/linux/linkage.h
+softlink include/linux/numa.h include/asm-ia64/linux/numa.h
+softlink include/linux/page-flags.h include/asm-ia64/linux/page-flags.h
+softlink include/linux/percpu.h include/asm-ia64/linux/percpu.h
+softlink include/linux/preempt.h include/asm-ia64/linux/preempt.h
+softlink include/linux/rbtree.h include/asm-ia64/linux/rbtree.h
+softlink include/linux/rwsem.h include/asm-ia64/linux/rwsem.h
+#softlink include/linux/seq_file.h include/asm-ia64/linux/seq_file.h
+#softlink include/linux/serial_core.h include/asm-ia64/linux/serial_core.h
+softlink include/linux/stddef.h include/asm-ia64/linux/stddef.h
+softlink include/linux/thread_info.h include/asm-ia64/linux/thread_info.h
+softlink include/linux/threads.h include/asm-ia64/linux/threads.h
+softlink include/linux/timex.h include/asm-ia64/linux/timex.h
+softlink include/linux/topology.h include/asm-ia64/linux/topology.h
+softlink include/linux/seqlock.h include/asm-ia64/linux/seqlock.h
+softlink include/linux/jiffies.h include/asm-ia64/linux/jiffies.h
+
+softlink drivers/firmware/pcdp.h arch/ia64/pcdp.h
+
+null include/asm-ia64/linux/file.h
+null include/asm-ia64/linux/module.h
+null include/asm-ia64/linux/swap.h
+null include/asm-ia64/linux/device.h
+null include/asm-ia64/linux/proc_fs.h
+null include/asm-ia64/linux/rtc.h
+null include/asm-ia64/linux/profile.h
+null include/asm-ia64/linux/seqlock.h
+null include/asm-ia64/linux/smp_lock.h
+null include/asm-ia64/linux/tty.h
+null include/asm-ia64/linux/kernel_stat.h
+null include/asm-ia64/linux/ptrace.h
+null include/asm-ia64/linux/random.h
+null include/asm-ia64/linux/signal.h
+null include/asm-ia64/linux/bootmem.h
+null include/asm-ia64/linux/serial.h
+null include/asm-ia64/linux/serial_core.h
+null include/asm-ia64/linux/seq_file.h
+null include/asm-ia64/linux/cpu.h
+null include/asm-ia64/linux/ioport.h
+
+softlink include/linux/byteorder/generic.h include/asm-ia64/linux/byteorder/generic.h
+softlink include/linux/byteorder/little_endian.h include/asm-ia64/linux/byteorder/little_endian.h
+softlink include/linux/byteorder/swab.h include/asm-ia64/linux/byteorder/swab.h
+
diff --git a/xen/arch/ia64/tools/privify/Makefile b/xen/arch/ia64/tools/privify/Makefile
new file mode 100644
index 0000000000..9283c0b20d
--- /dev/null
+++ b/xen/arch/ia64/tools/privify/Makefile
@@ -0,0 +1,9 @@
+privify: privify_elf64.o privify.o
+ gcc -g privify.o privify_elf64.o -o privify
+
+
+privify_elf64.o: privify_elf64.c
+ gcc -g -D__KERNEL__ -c privify_elf64.c
+
+privify.o: privify.c
+ gcc -nostdinc -g -D__KERNEL__ -c privify.c
diff --git a/xen/arch/ia64/tools/privify/README.privify b/xen/arch/ia64/tools/privify/README.privify
new file mode 100644
index 0000000000..77e3b00449
--- /dev/null
+++ b/xen/arch/ia64/tools/privify/README.privify
@@ -0,0 +1,8 @@
+In this directory, just "make".
+
+Run the resulting program on a vmlinux that has been adjusted
+to run on Xen (see arch/ia64/tools/README.xenia64linux):
+
+ ./privify vmlinux xenlinux
+
+Use the resulting xenlinux file as domain0
diff --git a/xen/arch/ia64/tools/privify/privify.c b/xen/arch/ia64/tools/privify/privify.c
new file mode 100644
index 0000000000..2b10186778
--- /dev/null
+++ b/xen/arch/ia64/tools/privify/privify.c
@@ -0,0 +1,360 @@
+/*
+ * Binary translate privilege-sensitive ops to privileged
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include "privify.h"
+
+typedef unsigned long long u64;
+typedef unsigned long long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+ u64 i64[2];
+ struct { u64 template:5,slot0:41,slot1a:18,slot1b:23,slot2:41; };
+ // NOTE: following doesn't work because bitfields can't cross natural
+ // size boundaries
+ //struct { u64 template:5, slot0:41, slot1:41, slot2:41; };
+} IA64_BUNDLE;
+
+typedef enum E_IA64_SLOT_TYPE { I, M, F, B, L, ILLEGAL } IA64_SLOT_TYPE;
+
+typedef union U_INST64_A5 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, imm7b:7, r3:2, imm5c:5, imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+ IA64_INST inst;
+ struct { u64 qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+ IA64_INST inst;
+ struct { u64 qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+ IA64_INST inst;
+ struct { u64 qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+ IA64_INST inst;
+ struct { u64 qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4;};
+} INST64_I27;
+
+typedef union U_INST64_I28 { // not privileged (mov from AR)
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+ IA64_INST inst;
+ struct { u64 qp:6, :14, r3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, imm:7, ar3:7,x4:4,x2:2,x3:3,s:1,major:4;};
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M41 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+ IA64_INST inst;
+ struct { u64 qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+ IA64_INST inst;
+ struct { u64 qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+ IA64_INST inst;
+ struct { u64 qp:6, r1:7, un7:7, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64 {
+ IA64_INST inst;
+ struct { u64 :37, major:4; } generic;
+ INST64_A5 A5; // used in build_hypercall_bundle only
+ INST64_B4 B4; // used in build_hypercall_bundle only
+ INST64_B8 B8; // rfi, bsw.[01]
+ INST64_B9 B9; // break.b
+ INST64_I19 I19; // used in build_hypercall_bundle only
+ INST64_I26 I26; // mov register to ar (I unit)
+ INST64_I27 I27; // mov immediate to ar (I unit)
+ INST64_I28 I28; // mov from ar (I unit)
+ INST64_M28 M28; // purge translation cache entry
+ INST64_M29 M29; // mov register to ar (M unit)
+ INST64_M30 M30; // mov immediate to ar (M unit)
+ INST64_M31 M31; // mov from ar (M unit)
+ INST64_M32 M32; // mov reg to cr
+ INST64_M33 M33; // mov from cr
+ INST64_M35 M35; // mov to psr
+ INST64_M36 M36; // mov from psr
+ INST64_M41 M41; // translation cache insert
+ INST64_M42 M42; // mov to indirect reg/translation reg insert
+ INST64_M43 M43; // mov from indirect reg
+ INST64_M44 M44; // set/reset system mask
+ INST64_M45 M45; // translation purge
+ INST64_M46 M46; // translation access (tpa,tak)
+} INST64;
+
+#define MASK_41 ((u64)0x1ffffffffff)
+
+long priv_verbose = 0;
+#define verbose(a...) do { if (priv_verbose) printf(a); } while(0)
+
+/*
+ * privify_inst
+ *
+ * Replaces privilege-sensitive instructions (and reads from write-trapping
+ * registers) with privileged/trapping instructions as follows:
+ * mov rx=ar.cflg -> mov ar.cflg=r(x+64) [**]
+ * mov rx=ar.ky -> mov ar.ky=r(x+64)
+ * fc rx -> ptc r(x+64)
+ * thash rx=ry -> tak rx=r(y+64)
+ * ttag rx=ry -> tpa rx=r(y+64)
+ * mov rx=cpuid[ry] -> mov r(x+64)=rr[ry]
+ * mov rx=pmd[ry] -> mov r(x+64)=pmc[ry] [**]
+ * cover -> break.b 0x1fffff
+ *
+ * [**] not currently implemented
+ */
+IA64_INST privify_inst(IA64_INST inst_val,
+ IA64_SLOT_TYPE slot_type, IA64_BUNDLE *bp, char **msg)
+{
+ INST64 inst = *(INST64 *)&inst_val;
+
+ *msg = 0;
+ switch (slot_type) {
+ case M:
+ // FIXME: Also use for mov_to/from_ar.cflag (M29/M30) (IA32 only)
+ if (inst.generic.major != 1) break;
+ if (inst.M46.x3 != 0) break;
+ if (inst.M31.x6 == 0x22 && inst.M31.ar3 < 8) {
+ // mov r1=kr -> mov kr=r1+64
+ verbose("privify_inst: privified mov r1=kr @%p\n",bp);
+ if (inst.M31.r1 >= 64) *msg = "mov r1=kr w/r1>63";
+ else privify_mov_from_kr_m(inst);
+ break;
+ }
+ if (inst.M29.x6 == 0x2a && inst.M29.ar3 < 8) {// mov kr=r1
+ if (inst.M29.r2 >= 64) *msg = "mov kr=r2 w/r2>63";
+ break;
+ }
+ if (inst.M28.x6 == 0x30) {
+ // fc r3-> ptc r3+64
+ verbose("privify_inst: privified fc r3 @%p\n",bp);
+ if (inst.M28.r3 >= 64) *msg = "fc r3 w/r3>63";
+ else privify_fc(inst);
+ break;
+ }
+ if (inst.M28.x6 == 0x34) {
+ if (inst.M28.r3 >= 64) *msg = "ptc.e w/r3>63";
+ break;
+ }
+ if (inst.M46.un7 != 0) break;
+ if (inst.M46.un1 != 0) break;
+ if (inst.M46.x6 == 0x1a) { // thash -> tak r1=r3+64
+ verbose("privify_inst: privified thash @%p\n",bp);
+ if (inst.M46.r3 >= 64) *msg = "thash w/r3>63";
+ else privify_thash(inst);
+ }
+ else if (inst.M46.x6 == 0x1b) { // ttag -> tpa r1=r3+64
+ verbose("privify_inst: privified ttag @%p\n",bp);
+ if (inst.M46.r3 >= 64) *msg = "ttag w/r3>63";
+ else privify_ttag(inst);
+ }
+ else if (inst.M43.x6 == 0x17) {
+ verbose("privify_inst: privified mov_from_cpuid @%p\n",bp);
+ if (inst.M43.r1 >= 64) *msg = "mov_from_cpuid w/r1>63";
+ else privify_mov_from_cpuid(inst);
+ }
+ else if (inst.M46.x6 == 0x1e) { // tpa
+ if (inst.M46.r3 >= 64) *msg = "tpa w/r3>63";
+ }
+ else if (inst.M46.x6 == 0x1f) { // tak
+ if (inst.M46.r3 >= 64) *msg = "tak w/r3>63";
+ }
+ else if (inst.M43.x6 == 0x10) {
+ if (inst.M43.r1 >= 64) *msg = "mov_to_rr w/r1>63";
+ }
+ break;
+ case B:
+ if (inst.generic.major != 0) break;
+ if (inst.B8.x6 == 0x2) { // cover -> break.b 0x1fffff
+ if (inst.B8.un21 != 0) break;
+ if (inst.B8.un4 != 0) break;
+ privify_cover(inst);
+ verbose("privify_inst: privified cover @%p\n",bp);
+ }
+ if (inst.B9.x6 == 0x0) { // (p15) break.b 0x1fffff -> cover
+ if (inst.B9.qp != 15) break;
+ if (inst.B9.imm20 != 0xfffff) break;
+ if (inst.B9.i != 1) break;
+ inst.B8.x6 = 0x2;
+ inst.B8.un21 = 0;
+ inst.B8.un4 = 0;
+ inst.B8.qp = 0;
+ verbose("privify_inst: unprivified pseudo-cover @%p\n",
+ bp);
+ }
+ break;
+ case I: // only used for privifying mov_from_ar
+ // FIXME: Also use for mov_to/from_ar.cflag (I26/I27) (IA32 only)
+ if (inst.generic.major != 0) break;
+ if (inst.I28.x6 == 0x32 && !inst.I28.x3 && inst.I28.ar3 < 8) {
+ // mov r1=kr -> mov kr=r1+64
+ verbose("privify_inst: privified mov r1=kr @%p\n",bp);
+ if (inst.I28.r1 >= 64) *msg = "mov r1=kr w/r1>63";
+ else privify_mov_from_kr_i(inst);
+ }
+ else if (inst.I26.x6 == 0x2a && !inst.I26.x3 &&
+ inst.I26.ar3 < 8) {// mov kr=r1
+ if (inst.I26.r2 >= 64) *msg = "mov kr=r2 w/r2>63";
+ }
+ break;
+ case F: case L: case ILLEGAL:
+ break;
+ }
+ return *(IA64_INST *)&inst;
+}
+
+#define read_slot1(b) (((b.i64[0]>>46L) | (b.i64[1]<<18UL)) & MASK_41)
+// Not sure why, but this more obvious definition of read_slot1 doesn't work
+// because the compiler treats (b.slot1b<<18UL) as a signed 32-bit integer
+// so not enough bits get used and it gets sign extended to boot!
+//#define read_slot1(b) ((b.slot1a | (b.slot1b<<18UL)) & MASK_41)
+#define write_slot1(b,inst) do { b.slot1a=inst;b.slot1b=inst>>18UL;} while (0)
+
+
+void privify_memory(void *start, unsigned long len)
+{
+ IA64_BUNDLE bundle, *bp = (IA64_BUNDLE *)start;
+ IA64_INST tmp;
+ char *msg;
+
+printf("privifying %ld bytes of memory at %p\n",len,start);
+ if ((unsigned long)start & 0xfL) {
+ printf("unaligned memory block in privify_memory\n");
+ }
+ len &= ~0xf;
+ for (bundle = *bp; len; len -= 16) {
+ switch(bundle.template) {
+ case 0x06: case 0x07: case 0x14: case 0x15:
+ case 0x1a: case 0x1b: case 0x1e: case 0x1f:
+ break;
+ case 0x16: case 0x17:
+ // may be B in slot0/1 but cover can only be slot2
+ bundle.slot2 = privify_inst(bundle.slot2,B,bp,&msg);
+ break;
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ tmp = privify_inst(read_slot1(bundle),I,bp,&msg);
+ write_slot1(bundle,tmp);
+ case 0x0c: case 0x0d:
+ bundle.slot2 = privify_inst(bundle.slot2,I,bp,&msg);
+ case 0x04: case 0x05:
+ // could a privified cover be in slot2 here?
+ bundle.slot0 = privify_inst(bundle.slot0,M,bp,&msg);
+ break;
+ case 0x08: case 0x09: case 0x0a: case 0x0b:
+ bundle.slot2 = privify_inst(bundle.slot2,I,bp,&msg);
+ case 0x0e: case 0x0f:
+ bundle.slot0 = privify_inst(bundle.slot0,M,bp,&msg);
+ if (msg) break;
+ tmp = privify_inst(read_slot1(bundle),M,bp,&msg);
+ write_slot1(bundle,tmp);
+ break;
+ case 0x10: case 0x11:
+ tmp = privify_inst(read_slot1(bundle),I,bp,&msg);
+ write_slot1(bundle,tmp);
+ case 0x12: case 0x13:
+ // may be B in slot1 but cover can only be slot2
+ case 0x1c: case 0x1d:
+ bundle.slot0 = privify_inst(bundle.slot0,M,bp,&msg);
+ if (msg) break;
+ bundle.slot2 = privify_inst(bundle.slot2,B,bp,&msg);
+ break;
+ case 0x18: case 0x19:
+ bundle.slot0 = privify_inst(bundle.slot0,M,bp,&msg);
+ if (msg) break;
+ tmp = privify_inst(read_slot1(bundle),M,bp,&msg);
+ write_slot1(bundle,tmp);
+ if (msg) break;
+ bundle.slot2 = privify_inst(bundle.slot2,B,bp,&msg);
+ break;
+ }
+ if (msg) {
+ if (bundle.slot2)
+ printf("privify_memory: %s @%p\n",msg,bp);
+ else
+ printf("privify_memory: %s @%p probably not insts\n",
+ msg,bp);
+ printf("privify_memory: bundle=%p,%p\n",
+ bundle.i64[1],bundle.i64[0]);
+ }
+ *bp = bundle;
+ bundle = *++bp;
+ }
+
+}
diff --git a/xen/arch/ia64/tools/privify/privify.h b/xen/arch/ia64/tools/privify/privify.h
new file mode 100644
index 0000000000..49291b3139
--- /dev/null
+++ b/xen/arch/ia64/tools/privify/privify.h
@@ -0,0 +1,34 @@
+/*
+ * Binary translate privilege-sensitive ops to privileged
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+/*
+ * Macros to replace privilege-sensitive instructions (and reads from
+ * write-trapping registers) with privileged/trapping instructions as follows:
+ * mov rx=ar.cflg -> mov ar.cflg=r(x+64) [**]
+ * mov rx=ar.ky -> mov ar.ky=r(x+64)
+ * fc rx -> ptc r(x+64)
+ * thash rx=ry -> tak rx=r(y+64)
+ * ttag rx=ry -> tpa rx=r(y+64)
+ * mov rx=cpuid[ry] -> mov r(x+64)=rr[ry]
+ * mov rx=pmd[ry] -> mov r(x+64)=pmc[ry] [**]
+ * cover -> break.b 0x1fffff
+ * [**] not implemented yet
+ */
+
+#define notimpl(s) printk(s##" not implemented");
+#define privify_mov_from_cflg_m(i) do { notimpl("mov from ar.cflg"); } while(0)
+#define privify_mov_from_cflg_i(i) do { notimpl("mov from ar.cflg"); } while(0)
+#define privify_mov_from_kr_m(i) do { i.M31.x6 = 0x2a; i.M29.r2 = i.M31.r1 + 64; } while(0)
+#define privify_mov_from_kr_i(i) do { i.I28.x6 = 0x2a; i.I26.r2 = i.I28.r1 + 64; } while(0)
+#define privify_fc(i) do { i.M28.x6 = 0x34; i.M28.r3 = i.M28.r3 + 64; } while(0)
+#define privify_thash(i) do { i.M46.x6 = 0x1f; i.M46.r3 += 64; } while(0)
+#define privify_ttag(i) do { i.M46.x6 = 0x1f; i.M46.r3 += 64; } while(0)
+#define privify_mov_from_cpuid(i) do { i.M43.x6 = 0x10; i.M43.r1 += 64; } while(0)
+#define privify_mov_from_pmd(i) do { notimpl("mov from pmd"); } while(0)
+#define privify_cover(x) do { x.B8.x6 = 0x0; x.B9.imm20 = 0xfffff; x.B9.i = 0x1; } while(0)
+
diff --git a/xen/arch/ia64/tools/privify/privify_elf64.c b/xen/arch/ia64/tools/privify/privify_elf64.c
new file mode 100644
index 0000000000..2fa9e49256
--- /dev/null
+++ b/xen/arch/ia64/tools/privify/privify_elf64.c
@@ -0,0 +1,120 @@
+/*
+ * Binary translate privilege-sensitive ops to privileged
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define ELFSIZE 64
+#include <linux/elf.h>
+
+#define MAX_FILSIZ (32*1024*1024)
+unsigned long buf[MAX_FILSIZ/sizeof(unsigned long)];
+
+static void
+usage (FILE *fp)
+{
+ fprintf(fp, "Usage: privify elf64filein elf64fileout\n");
+}
+
+static void
+panic (char *s)
+{
+ fprintf(stderr, "panic: %s\n",s);
+ exit(1);
+}
+
+static int
+read_file(const char *in_path, char *buf, int maxsize)
+{
+ ssize_t nread, totread = 0, ssize_inc = 8192;
+ int from;
+
+ if ((from = open (in_path, O_RDONLY)) < 0) return -1;
+ maxsize -= ssize_inc; // create safety zone
+ if (maxsize < 0) panic("input file exceeds max size");
+ while ((nread = read(from, buf, ssize_inc)) > 0) {
+ if (nread < 0) return -1; // problem
+ totread += nread;
+ if (nread < ssize_inc) return totread; // done
+ buf += ssize_inc;
+ if (totread > maxsize) // buffer too small
+ panic("file exceeds max size\n");
+ }
+ return totread;
+}
+
+static int
+write_file(const char *out_path, char *buf, int size)
+{
+ int to;
+
+ if ((to = open(out_path, O_WRONLY|O_CREAT|O_EXCL,0644)) < 0)
+ return -1;
+
+ if (write(to,buf,size) < 0) return -1;
+
+ return 0;
+}
+
+#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
+ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
+ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
+ (ehdr).e_ident[EI_MAG3] == ELFMAG3)
+
+
+static void
+privify_elf(char *elfbase)
+{
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfbase;
+ Elf64_Phdr *phdr;
+ Elf64_Shdr *shdr;
+ char *elfaddr;
+ unsigned long size;
+ int h;
+
+ if ( !IS_ELF(*ehdr) )
+ panic("Kernel image does not have an ELF header.\n");
+ for ( h = 0; h < ehdr->e_phnum; h++ ) {
+ phdr = (Elf64_Phdr *)(elfbase +
+ ehdr->e_phoff + (h*ehdr->e_phentsize));
+ printf("h=%d, phdr=%p,phdr->p_type=%lx",h,phdr,phdr->p_type);
+ if ((phdr->p_type != PT_LOAD)) {
+ printf("\n");
+ continue;
+ }
+ size = phdr->p_filesz;
+ elfaddr = elfbase + phdr->p_offset;
+ printf(",elfaddr=%p,size=%d,phdr->p_flags=%lx\n",
+ elfaddr,size,phdr->p_flags);
+ if (phdr->p_flags & PF_X) privify_memory(elfaddr,size);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ char *in_path, *out_path;
+ int fsize;
+
+ if (argc != 3) {
+ usage(stdout);
+ exit(1);
+ }
+ in_path = argv[1];
+ out_path = argv[2];
+ if ((fsize = read_file(in_path,(char *)buf,MAX_FILSIZ)) < 0) {
+ perror("read_file");
+ panic("failed");
+ }
+ privify_elf((char *)buf);
+ fflush(stdout);
+ if (write_file(out_path,(char *)buf,fsize) < 0) {
+ perror("write_file");
+ panic("failed");
+ }
+}
diff --git a/xen/arch/ia64/vcpu.c b/xen/arch/ia64/vcpu.c
index d0d62771e8..b55e5b6bd7 100644
--- a/xen/arch/ia64/vcpu.c
+++ b/xen/arch/ia64/vcpu.c
@@ -1,21 +1,25 @@
/*
* Virtualized CPU functions
- *
+ *
* Copyright (C) 2004 Hewlett-Packard Co.
* Dan Magenheimer (dan.magenheimer@hp.com)
*
*/
#include <linux/sched.h>
+#include <public/arch-ia64.h>
#include <asm/ia64_int.h>
#include <asm/vcpu.h>
#include <asm/regionreg.h>
#include <asm/tlb.h>
#include <asm/processor.h>
#include <asm/delay.h>
+#ifdef CONFIG_VTI
+#include <asm/vmx_vcpu.h>
+#endif // CONFIG_VTI
typedef union {
- struct ia64_psr;
+ struct ia64_psr ia64_psr;
unsigned long i64;
} PSR;
@@ -23,8 +27,9 @@ typedef union {
//typedef struct domain VCPU;
// this def for vcpu_regs won't work if kernel stack is present
-#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->regs)
-#define PSCB(x) x->shared_info->arch
+#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs)
+#define PSCB(x,y) x->vcpu_info->arch.y
+#define PSCBX(x,y) x->arch.y
#define TRUE 1
#define FALSE 0
@@ -36,6 +41,17 @@ typedef union {
#define STATIC
+#ifdef PRIVOP_ADDR_COUNT
+struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = {
+ { "rsm", { 0 }, { 0 }, 0 },
+ { "ssm", { 0 }, { 0 }, 0 }
+};
+extern void privop_count_addr(unsigned long addr, int inst);
+#define PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst)
+#else
+#define PRIVOP_COUNT_ADDR(x,y) do {} while (0)
+#endif
+
unsigned long vcpu_verbose = 0;
#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
@@ -76,30 +92,20 @@ vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value)
IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
{
if (reg == 44) return (vcpu_set_itc(vcpu,val));
- if (reg == 27) return (IA64_ILLOP_FAULT);
- if (reg > 7) return (IA64_ILLOP_FAULT);
- PSCB(vcpu).krs[reg] = val;
-#if 0
-// for now, privify kr read's so all kr accesses are privileged
- switch (reg) {
- case 0: asm volatile ("mov ar.k0=%0" :: "r"(val)); break;
- case 1: asm volatile ("mov ar.k1=%0" :: "r"(val)); break;
- case 2: asm volatile ("mov ar.k2=%0" :: "r"(val)); break;
- case 3: asm volatile ("mov ar.k3=%0" :: "r"(val)); break;
- case 4: asm volatile ("mov ar.k4=%0" :: "r"(val)); break;
- case 5: asm volatile ("mov ar.k5=%0" :: "r"(val)); break;
- case 6: asm volatile ("mov ar.k6=%0" :: "r"(val)); break;
- case 7: asm volatile ("mov ar.k7=%0" :: "r"(val)); break;
- case 27: asm volatile ("mov ar.cflg=%0" :: "r"(val)); break;
- }
-#endif
+ else if (reg == 27) return (IA64_ILLOP_FAULT);
+ else if (reg == 24)
+ printf("warning: setting ar.eflg is a no-op; no IA-32 support\n");
+ else if (reg > 7) return (IA64_ILLOP_FAULT);
+ else PSCB(vcpu,krs[reg]) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
{
- if (reg > 7) return (IA64_ILLOP_FAULT);
- *val = PSCB(vcpu).krs[reg];
+ if (reg == 24)
+ printf("warning: getting ar.eflg is a no-op; no IA-32 support\n");
+ else if (reg > 7) return (IA64_ILLOP_FAULT);
+ else *val = PSCB(vcpu,krs[reg]);
return IA64_NO_FAULT;
}
@@ -110,29 +116,36 @@ IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode)
{
/* only do something if mode changes */
- if (!!newmode ^ !!vcpu->metaphysical_mode) {
- if (newmode) set_metaphysical_rr(0,vcpu->metaphysical_rid);
- else if (PSCB(vcpu).rrs[0] != -1)
- set_one_rr(0, PSCB(vcpu).rrs[0]);
- vcpu->metaphysical_mode = newmode;
+ if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) {
+ if (newmode) set_metaphysical_rr0();
+ else if (PSCB(vcpu,rrs[0]) != -1)
+ set_one_rr(0, PSCB(vcpu,rrs[0]));
+ PSCB(vcpu,metaphysical_mode) = newmode;
}
}
+IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu)
+{
+ vcpu_set_metaphysical_mode(vcpu,TRUE);
+ return IA64_NO_FAULT;
+}
+
IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
{
struct ia64_psr psr, imm, *ipsr;
REGS *regs = vcpu_regs(vcpu);
+ PRIVOP_COUNT_ADDR(regs,_RSM);
// TODO: All of these bits need to be virtualized
// TODO: Only allowed for current vcpu
__asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
ipsr = (struct ia64_psr *)&regs->cr_ipsr;
imm = *(struct ia64_psr *)&imm24;
// interrupt flag
- if (imm.i) PSCB(vcpu).interrupt_delivery_enabled = 0;
- if (imm.ic) PSCB(vcpu).interrupt_collection_enabled = 0;
+ if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0;
+ if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 0;
// interrupt collection flag
- //if (imm.ic) PSCB(vcpu).interrupt_delivery_enabled = 0;
+ //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0;
// just handle psr.up and psr.pp for now
if (imm24 & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP
| IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT
@@ -151,12 +164,26 @@ IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu);
#define SPURIOUS_VECTOR 0xf
+IA64FAULT vcpu_set_psr_dt(VCPU *vcpu)
+{
+ vcpu_set_metaphysical_mode(vcpu,FALSE);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_i(VCPU *vcpu)
+{
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ PSCB(vcpu,interrupt_collection_enabled) = 1;
+ return IA64_NO_FAULT;
+}
+
IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
{
struct ia64_psr psr, imm, *ipsr;
REGS *regs = vcpu_regs(vcpu);
UINT64 mask, enabling_interrupts = 0;
+ PRIVOP_COUNT_ADDR(regs,_SSM);
// TODO: All of these bits need to be virtualized
__asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
imm = *(struct ia64_psr *)&imm24;
@@ -170,13 +197,13 @@ IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
if (imm.pp) { ipsr->pp = 1; psr.pp = 1; }
if (imm.sp) { ipsr->sp = 1; psr.sp = 1; }
if (imm.i) {
- if (!PSCB(vcpu).interrupt_delivery_enabled) {
+ if (!PSCB(vcpu,interrupt_delivery_enabled)) {
//printf("vcpu_set_psr_sm: psr.ic 0->1 ");
enabling_interrupts = 1;
}
- PSCB(vcpu).interrupt_delivery_enabled = 1;
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
}
- if (imm.ic) PSCB(vcpu).interrupt_collection_enabled = 1;
+ if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
// TODO: do this faster
if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
if (imm.ac) { ipsr->ac = 1; psr.ac = 1; }
@@ -218,11 +245,11 @@ IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; }
if (newpsr.i) {
- if (!PSCB(vcpu).interrupt_delivery_enabled)
+ if (!PSCB(vcpu,interrupt_delivery_enabled))
enabling_interrupts = 1;
- PSCB(vcpu).interrupt_delivery_enabled = 1;
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
}
- if (newpsr.ic) PSCB(vcpu).interrupt_collection_enabled = 1;
+ if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; }
if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
@@ -253,9 +280,9 @@ IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval)
__asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
newpsr = *(struct ia64_psr *)&psr;
if (newpsr.cpl == 2) newpsr.cpl = 0;
- if (PSCB(vcpu).interrupt_delivery_enabled) newpsr.i = 1;
+ if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1;
else newpsr.i = 0;
- if (PSCB(vcpu).interrupt_collection_enabled) newpsr.ic = 1;
+ if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1;
else newpsr.ic = 0;
*pval = *(unsigned long *)&newpsr;
return IA64_NO_FAULT;
@@ -263,28 +290,28 @@ IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval)
BOOLEAN vcpu_get_psr_ic(VCPU *vcpu)
{
- return !!PSCB(vcpu).interrupt_collection_enabled;
+ return !!PSCB(vcpu,interrupt_collection_enabled);
}
BOOLEAN vcpu_get_psr_i(VCPU *vcpu)
{
- return !!PSCB(vcpu).interrupt_delivery_enabled;
+ return !!PSCB(vcpu,interrupt_delivery_enabled);
}
UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
{
- UINT64 dcr = PSCB(vcpu).dcr;
+ UINT64 dcr = PSCBX(vcpu,dcr);
PSR psr = {0};
-
+
//printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr);
psr.i64 = prevpsr;
- psr.be = 0; if (dcr & IA64_DCR_BE) psr.be = 1;
- psr.pp = 0; if (dcr & IA64_DCR_PP) psr.pp = 1;
- psr.ic = PSCB(vcpu).interrupt_collection_enabled;
- psr.i = PSCB(vcpu).interrupt_delivery_enabled;
- psr.bn = PSCB(vcpu).banknum;
- psr.dt = 1; psr.it = 1; psr.rt = 1;
- if (psr.cpl == 2) psr.cpl = 0; // !!!! fool domain
+ psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
+ psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1;
+ psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled);
+ psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled);
+ psr.ia64_psr.bn = PSCB(vcpu,banknum);
+ psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1;
+ if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain
// psr.pk = 1;
//printf("returns 0x%016lx...",psr.i64);
return psr.i64;
@@ -298,22 +325,22 @@ IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval)
{
extern unsigned long privop_trace;
//privop_trace=0;
-//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu).iip);
+//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip));
// Reads of cr.dcr on Xen always have the sign bit set, so
// a domain can differentiate whether it is running on SP or not
- *pval = PSCB(vcpu).dcr | 0x8000000000000000L;
+ *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).iva & ~0x7fffL;
+ *pval = PSCBX(vcpu,iva) & ~0x7fffL;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).pta;
+ *pval = PSCB(vcpu,pta);
return (IA64_NO_FAULT);
}
@@ -321,13 +348,13 @@ IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval)
{
//REGS *regs = vcpu_regs(vcpu);
//*pval = regs->cr_ipsr;
- *pval = PSCB(vcpu).ipsr;
+ *pval = PSCB(vcpu,ipsr);
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).isr;
+ *pval = PSCB(vcpu,isr);
return (IA64_NO_FAULT);
}
@@ -335,13 +362,13 @@ IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval)
{
//REGS *regs = vcpu_regs(vcpu);
//*pval = regs->cr_iip;
- *pval = PSCB(vcpu).iip;
+ *pval = PSCB(vcpu,iip);
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval)
{
- UINT64 val = PSCB(vcpu).ifa;
+ UINT64 val = PSCB(vcpu,ifa);
*pval = val;
return (IA64_NO_FAULT);
}
@@ -360,14 +387,14 @@ unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa)
IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
{
- UINT64 val = PSCB(vcpu).itir;
+ UINT64 val = PSCB(vcpu,itir);
*pval = val;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
{
- UINT64 val = PSCB(vcpu).iipa;
+ UINT64 val = PSCB(vcpu,iipa);
// SP entry code does not save iipa yet nor does it get
// properly delivered in the pscb
printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
@@ -377,23 +404,26 @@ IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
{
- //PSCB(vcpu).ifs = PSCB(vcpu)->regs.cr_ifs;
- //*pval = PSCB(vcpu).regs.cr_ifs;
- *pval = PSCB(vcpu).ifs;
- PSCB(vcpu).incomplete_regframe = 0;
+ //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs;
+ //*pval = PSCB(vcpu,regs).cr_ifs;
+ *pval = PSCB(vcpu,ifs);
+ PSCB(vcpu,incomplete_regframe) = 0;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval)
{
- UINT64 val = PSCB(vcpu).iim;
+ UINT64 val = PSCB(vcpu,iim);
*pval = val;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval)
{
- return vcpu_thash(vcpu,PSCB(vcpu).ifa,pval);
+ //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval);
+ UINT64 val = PSCB(vcpu,iha);
+ *pval = val;
+ return (IA64_NO_FAULT);
}
IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val)
@@ -404,13 +434,13 @@ extern unsigned long privop_trace;
// a domain can differentiate whether it is running on SP or not
// Thus, writes of DCR should ignore the sign bit
//verbose("vcpu_set_dcr: called\n");
- PSCB(vcpu).dcr = val & ~0x8000000000000000L;
+ PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).iva = val & ~0x7fffL;
+ PSCBX(vcpu,iva) = val & ~0x7fffL;
return (IA64_NO_FAULT);
}
@@ -422,25 +452,25 @@ IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val)
}
if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT;
if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu).pta = val;
+ PSCB(vcpu,pta) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).ipsr = val;
+ PSCB(vcpu,ipsr) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).isr = val;
+ PSCB(vcpu,isr) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).iip = val;
+ PSCB(vcpu,iip) = val;
return IA64_NO_FAULT;
}
@@ -455,13 +485,13 @@ IA64FAULT vcpu_increment_iip(VCPU *vcpu)
IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).ifa = val;
+ PSCB(vcpu,ifa) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).itir = val;
+ PSCB(vcpu,itir) = val;
return IA64_NO_FAULT;
}
@@ -470,26 +500,26 @@ IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val)
// SP entry code does not save iipa yet nor does it get
// properly delivered in the pscb
printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
- PSCB(vcpu).iipa = val;
+ PSCB(vcpu,iipa) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val)
{
//REGS *regs = vcpu_regs(vcpu);
- PSCB(vcpu).ifs = val;
+ PSCB(vcpu,ifs) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).iim = val;
+ PSCB(vcpu,iim) = val;
return IA64_NO_FAULT;
}
IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val)
{
- PSCB(vcpu).iha = val;
+ PSCB(vcpu,iha) = val;
return IA64_NO_FAULT;
}
@@ -503,11 +533,26 @@ void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
printf("vcpu_pend_interrupt: bad vector\n");
return;
}
- if (!test_bit(vector,PSCB(vcpu).delivery_mask)) return;
- if (test_bit(vector,PSCB(vcpu).irr)) {
+#ifdef CONFIG_VTI
+ if ( VMX_DOMAIN(vcpu) ) {
+ set_bit(vector,VPD_CR(vcpu,irr));
+ } else
+#endif // CONFIG_VTI
+ {
+ if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return;
+ if (test_bit(vector,PSCBX(vcpu,irr))) {
//printf("vcpu_pend_interrupt: overrun\n");
}
- set_bit(vector,PSCB(vcpu).irr);
+ set_bit(vector,PSCBX(vcpu,irr));
+ PSCB(vcpu,pending_interruption) = 1;
+ }
+}
+
+void early_tick(VCPU *vcpu)
+{
+ UINT64 *p = &PSCBX(vcpu,irr[3]);
+ printf("vcpu_check_pending: about to deliver early tick\n");
+ printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p);
}
#define IA64_TPR_MMI 0x10000
@@ -523,9 +568,9 @@ UINT64 vcpu_check_pending_interrupts(VCPU *vcpu)
{
UINT64 *p, *q, *r, bits, bitnum, mask, i, vector;
- p = &PSCB(vcpu).irr[3];
- q = &PSCB(vcpu).delivery_mask[3];
- r = &PSCB(vcpu).insvc[3];
+ p = &PSCBX(vcpu,irr[3]);
+ q = &PSCB(vcpu,delivery_mask[3]);
+ r = &PSCBX(vcpu,insvc[3]);
for (i = 3; ; p--, q--, r--, i--) {
bits = *p & *q;
if (bits) break; // got a potential interrupt
@@ -550,18 +595,26 @@ UINT64 vcpu_check_pending_interrupts(VCPU *vcpu)
//printf("but masked by equal inservice\n");
return SPURIOUS_VECTOR;
}
- if (PSCB(vcpu).tpr & IA64_TPR_MMI) {
+ if (PSCB(vcpu,tpr) & IA64_TPR_MMI) {
// tpr.mmi is set
//printf("but masked by tpr.mmi\n");
return SPURIOUS_VECTOR;
}
- if (((PSCB(vcpu).tpr & IA64_TPR_MIC) + 15) >= vector) {
+ if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) {
//tpr.mic masks class
//printf("but masked by tpr.mic\n");
return SPURIOUS_VECTOR;
}
//printf("returned to caller\n");
+#if 0
+if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ UINT64 now = ia64_get_itc();
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+ if (now < itm) early_tick(vcpu);
+
+}
+#endif
return vector;
}
@@ -571,13 +624,20 @@ UINT64 vcpu_deliverable_interrupts(VCPU *vcpu)
vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR);
}
+UINT64 vcpu_deliverable_timer(VCPU *vcpu)
+{
+ return (vcpu_get_psr_i(vcpu) &&
+ vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv));
+}
+
IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval)
{
extern unsigned long privop_trace;
//privop_trace=1;
//TODO: Implement this
printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n");
- *pval = 0;
+ //*pval = 0;
+ *pval = ia64_getreg(_IA64_REG_CR_LID);
return IA64_NO_FAULT;
}
@@ -585,7 +645,16 @@ IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
{
int i;
UINT64 vector, mask;
-#if 1
+
+#define HEARTBEAT_FREQ 16 // period in seconds
+#ifdef HEARTBEAT_FREQ
+#define N_DOMS 16 // period in seconds
+ static long count[N_DOMS] = { 0 };
+ static long nonclockcount[N_DOMS] = { 0 };
+ REGS *regs = vcpu_regs(vcpu);
+ unsigned domid = vcpu->domain->domain_id;
+#endif
+#ifdef IRQ_DEBUG
static char firstivr = 1;
static char firsttime[256];
if (firstivr) {
@@ -597,13 +666,25 @@ IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
vector = vcpu_check_pending_interrupts(vcpu);
if (vector == SPURIOUS_VECTOR) {
- PSCB(vcpu).pending_interruption = 0;
+ PSCB(vcpu,pending_interruption) = 0;
*pval = vector;
return IA64_NO_FAULT;
}
+#ifdef HEARTBEAT_FREQ
+ if (domid >= N_DOMS) domid = N_DOMS-1;
+ if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) {
+ printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
+ domid, count[domid], nonclockcount[domid]);
+ //count[domid] = 0;
+ //dump_runq();
+ }
+ }
+ else nonclockcount[domid]++;
+#endif
// now have an unmasked, pending, deliverable vector!
// getting ivr has "side effects"
-#if 0
+#ifdef IRQ_DEBUG
if (firsttime[vector]) {
printf("*** First get_ivr on vector=%d,itc=%lx\n",
vector,ia64_get_itc());
@@ -613,16 +694,20 @@ IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
i = vector >> 6;
mask = 1L << (vector & 0x3f);
//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector);
- PSCB(vcpu).insvc[i] |= mask;
- PSCB(vcpu).irr[i] &= ~mask;
- PSCB(vcpu).pending_interruption--;
+ PSCBX(vcpu,insvc[i]) |= mask;
+ PSCBX(vcpu,irr[i]) &= ~mask;
+ //PSCB(vcpu,pending_interruption)--;
*pval = vector;
+ // if delivering a timer interrupt, remember domain_itm
+ if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm);
+ }
return IA64_NO_FAULT;
}
IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).tpr;
+ *pval = PSCB(vcpu,tpr);
return (IA64_NO_FAULT);
}
@@ -678,19 +763,19 @@ IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).itv;
+ *pval = PSCB(vcpu,itv);
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).pmv;
+ *pval = PSCB(vcpu,pmv);
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
{
- *pval = PSCB(vcpu).cmcv;
+ *pval = PSCB(vcpu,cmcv);
return (IA64_NO_FAULT);
}
@@ -719,7 +804,9 @@ IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val)
IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val)
{
if (val & 0xff00) return IA64_RSVDREG_FAULT;
- PSCB(vcpu).tpr = val;
+ PSCB(vcpu,tpr) = val;
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
return (IA64_NO_FAULT);
}
@@ -728,7 +815,7 @@ IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
UINT64 *p, bits, vec, bitnum;
int i;
- p = &PSCB(vcpu).insvc[3];
+ p = &PSCBX(vcpu,insvc[3]);
for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
if (i < 0) {
printf("Trying to EOI interrupt when none are in-service.\r\n");
@@ -740,11 +827,13 @@ IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
bits &= ~(1L << bitnum);
*p = bits;
/* clearing an eoi bit may unmask another pending interrupt... */
- if (PSCB(vcpu).interrupt_delivery_enabled) { // but only if enabled...
+ if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled...
// worry about this later... Linux only calls eoi
// with interrupts disabled
printf("Trying to EOI interrupt with interrupts enabled\r\n");
}
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
//printf("YYYYY vcpu_set_eoi: Successful\n");
return (IA64_NO_FAULT);
}
@@ -775,10 +864,10 @@ IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val)
extern unsigned long privop_trace;
//privop_trace=1;
if (val & 0xef00) return (IA64_ILLOP_FAULT);
- PSCB(vcpu).itv = val;
+ PSCB(vcpu,itv) = val;
if (val & 0x10000) {
-printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCB(vcpu).domain_itm);
- PSCB(vcpu).domain_itm = 0;
+printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCBX(vcpu,domain_itm));
+ PSCBX(vcpu,domain_itm) = 0;
}
else vcpu_enable_timer(vcpu,1000000L);
return (IA64_NO_FAULT);
@@ -787,35 +876,56 @@ printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCB(vcpu).domain_i
IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val)
{
if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu).pmv = val;
+ PSCB(vcpu,pmv) = val;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val)
{
if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu).cmcv = val;
+ PSCB(vcpu,cmcv) = val;
return (IA64_NO_FAULT);
}
/**************************************************************************
+ VCPU temporary register access routines
+**************************************************************************/
+UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index)
+{
+ if (index > 7) return 0;
+ return PSCB(vcpu,tmp[index]);
+}
+
+void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val)
+{
+ if (index <= 7) PSCB(vcpu,tmp[index]) = val;
+}
+
+/**************************************************************************
Interval timer routines
**************************************************************************/
BOOLEAN vcpu_timer_disabled(VCPU *vcpu)
{
- UINT64 itv = PSCB(vcpu).itv;
+ UINT64 itv = PSCB(vcpu,itv);
return(!itv || !!(itv & 0x10000));
}
+BOOLEAN vcpu_timer_inservice(VCPU *vcpu)
+{
+ UINT64 itv = PSCB(vcpu,itv);
+ return (test_bit(itv, PSCBX(vcpu,insvc)));
+}
+
BOOLEAN vcpu_timer_expired(VCPU *vcpu)
{
- unsigned long domain_itm = PSCB(vcpu).domain_itm;
+ unsigned long domain_itm = PSCBX(vcpu,domain_itm);
unsigned long now = ia64_get_itc();
-
- if (domain_itm && (now > domain_itm) &&
- !vcpu_timer_disabled(vcpu)) return TRUE;
- return FALSE;
+
+ if (!domain_itm) return FALSE;
+ if (now < domain_itm) return FALSE;
+ if (vcpu_timer_disabled(vcpu)) return FALSE;
+ return TRUE;
}
void vcpu_safe_set_itm(unsigned long val)
@@ -836,36 +946,36 @@ void vcpu_safe_set_itm(unsigned long val)
void vcpu_set_next_timer(VCPU *vcpu)
{
- UINT64 d = PSCB(vcpu).domain_itm;
- //UINT64 s = PSCB(vcpu).xen_itm;
+ UINT64 d = PSCBX(vcpu,domain_itm);
+ //UINT64 s = PSCBX(vcpu,xen_itm);
UINT64 s = local_cpu_data->itm_next;
UINT64 now = ia64_get_itc();
- //UINT64 interval = PSCB(vcpu).xen_timer_interval;
+ //UINT64 interval = PSCBX(vcpu,xen_timer_interval);
/* gloss over the wraparound problem for now... we know it exists
* but it doesn't matter right now */
#if 0
/* ensure at least next SP tick is in the future */
- if (!interval) PSCB(vcpu).xen_itm = now +
+ if (!interval) PSCBX(vcpu,xen_itm) = now +
#if 0
(running_on_sim() ? SIM_DEFAULT_CLOCK_RATE :
- DEFAULT_CLOCK_RATE);
+ DEFAULT_CLOCK_RATE);
#else
3000000;
//printf("vcpu_set_next_timer: HACK!\n");
#endif
#if 0
- if (PSCB(vcpu).xen_itm < now)
- while (PSCB(vcpu).xen_itm < now + (interval>>1))
- PSCB(vcpu).xen_itm += interval;
+ if (PSCBX(vcpu,xen_itm) < now)
+ while (PSCBX(vcpu,xen_itm) < now + (interval>>1))
+ PSCBX(vcpu,xen_itm) += interval;
#endif
#endif
- if (is_idle_task(vcpu)) {
+ if (is_idle_task(vcpu->domain)) {
printf("****** vcpu_set_next_timer called during idle!!\n");
}
- //s = PSCB(vcpu).xen_itm;
+ //s = PSCBX(vcpu,xen_itm);
if (d && (d > now) && (d < s)) {
vcpu_safe_set_itm(d);
//using_domain_as_itm++;
@@ -879,11 +989,11 @@ void vcpu_set_next_timer(VCPU *vcpu)
// parameter is a time interval specified in cycles
void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles)
{
- PSCB(vcpu).xen_timer_interval = cycles;
+ PSCBX(vcpu,xen_timer_interval) = cycles;
vcpu_set_next_timer(vcpu);
printf("vcpu_enable_timer(%d): interval set to %d cycles\n",
- PSCB(vcpu).xen_timer_interval);
- __set_bit(PSCB(vcpu).itv, PSCB(vcpu).delivery_mask);
+ PSCBX(vcpu,xen_timer_interval));
+ __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask));
}
IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val)
@@ -892,30 +1002,34 @@ IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val)
//if (val < now) val = now + 1000;
//printf("*** vcpu_set_itm: called with %lx\n",val);
- PSCB(vcpu).domain_itm = val;
+ PSCBX(vcpu,domain_itm) = val;
vcpu_set_next_timer(vcpu);
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
{
-
+
UINT64 oldnow = ia64_get_itc();
- UINT64 olditm = PSCB(vcpu).domain_itm;
+ UINT64 olditm = PSCBX(vcpu,domain_itm);
unsigned long d = olditm - oldnow;
unsigned long x = local_cpu_data->itm_next - oldnow;
-
+
UINT64 newnow = val, min_delta;
+#define DISALLOW_SETTING_ITC_FOR_NOW
+#ifdef DISALLOW_SETTING_ITC_FOR_NOW
+printf("vcpu_set_itc: Setting ar.itc is currently disabled\n");
+#else
local_irq_disable();
if (olditm) {
printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
- PSCB(vcpu).domain_itm = newnow + d;
+ PSCBX(vcpu,domain_itm) = newnow + d;
}
local_cpu_data->itm_next = newnow + x;
- d = PSCB(vcpu).domain_itm;
+ d = PSCBX(vcpu,domain_itm);
x = local_cpu_data->itm_next;
-
+
ia64_set_itc(newnow);
if (d && (d > newnow) && (d < x)) {
vcpu_safe_set_itm(d);
@@ -926,6 +1040,7 @@ printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
//using_xen_as_itm++;
}
local_irq_enable();
+#endif
return (IA64_NO_FAULT);
}
@@ -946,18 +1061,43 @@ IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval)
void vcpu_pend_timer(VCPU *vcpu)
{
- UINT64 itv = PSCB(vcpu).itv & 0xff;
+ UINT64 itv = PSCB(vcpu,itv) & 0xff;
if (vcpu_timer_disabled(vcpu)) return;
+ //if (vcpu_timer_inservice(vcpu)) return;
+ if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
+ // already delivered an interrupt for this so
+ // don't deliver another
+ return;
+ }
+#if 0
+ // attempt to flag "timer tick before its due" source
+ {
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+ UINT64 now = ia64_get_itc();
+ if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n");
+ }
+#endif
vcpu_pend_interrupt(vcpu, itv);
}
+// returns true if ready to deliver a timer interrupt too early
+UINT64 vcpu_timer_pending_early(VCPU *vcpu)
+{
+ UINT64 now = ia64_get_itc();
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+
+ if (vcpu_timer_disabled(vcpu)) return 0;
+ if (!itm) return 0;
+ return (vcpu_deliverable_timer(vcpu) && (now < itm));
+}
+
//FIXME: This is a hack because everything dies if a timer tick is lost
void vcpu_poke_timer(VCPU *vcpu)
{
- UINT64 itv = PSCB(vcpu).itv & 0xff;
+ UINT64 itv = PSCB(vcpu,itv) & 0xff;
UINT64 now = ia64_get_itc();
- UINT64 itm = PSCB(vcpu).domain_itm;
+ UINT64 itm = PSCBX(vcpu,domain_itm);
UINT64 irr;
if (vcpu_timer_disabled(vcpu)) return;
@@ -967,13 +1107,13 @@ void vcpu_poke_timer(VCPU *vcpu)
while(1);
}
// using 0xef instead of itv so can get real irr
- if (now > itm && !test_bit(0xefL, PSCB(vcpu).insvc)) {
- if (!test_bit(0xefL,PSCB(vcpu).irr)) {
+ if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) {
+ if (!test_bit(0xefL,PSCBX(vcpu,irr))) {
irr = ia64_getreg(_IA64_REG_CR_IRR3);
if (irr & (1L<<(0xef-0xc0))) return;
if (now-itm>0x800000)
printf("*** poking timer: now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm());
- vcpu_pend_interrupt(vcpu, 0xefL);
+ vcpu_pend_timer(vcpu);
}
}
}
@@ -985,8 +1125,8 @@ Privileged operation emulation routines
IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa)
{
- PSCB(vcpu).ifa = ifa; // privop traps don't set ifa so do it here
- return (IA64_DATA_TLB_VECTOR | IA64_FORCED_IFA);
+ PSCB(vcpu,tmp[0]) = ifa; // save ifa in vcpu structure, then specify IA64_FORCED_IFA
+ return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR : IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA;
}
@@ -999,34 +1139,35 @@ IA64FAULT vcpu_rfi(VCPU *vcpu)
REGS *regs = vcpu_regs(vcpu);
extern void dorfirfi(void);
- psr.i64 = PSCB(vcpu).ipsr;
- if (psr.cpl < 3) psr.cpl = 2;
- if (psr.i) PSCB(vcpu).interrupt_delivery_enabled = 1;
- int_enable = psr.i;
- if (psr.ic) PSCB(vcpu).interrupt_collection_enabled = 1;
- if (psr.dt && psr.rt && psr.it) vcpu_set_metaphysical_mode(vcpu,FALSE);
+ psr.i64 = PSCB(vcpu,ipsr);
+ if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2;
+ if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ int_enable = psr.ia64_psr.i;
+ if (psr.ia64_psr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
+ if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it) vcpu_set_metaphysical_mode(vcpu,FALSE);
else vcpu_set_metaphysical_mode(vcpu,TRUE);
- psr.ic = 1; psr.i = 1;
- psr.dt = 1; psr.rt = 1; psr.it = 1;
- psr.bn = 1;
+ psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1;
+ psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1;
+ psr.ia64_psr.bn = 1;
//psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken
- if (psr.be) {
+ if (psr.ia64_psr.be) {
printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
return (IA64_ILLOP_FAULT);
}
- PSCB(vcpu).incomplete_regframe = 0; // is this necessary?
- ifs = PSCB(vcpu).ifs;
+ PSCB(vcpu,incomplete_regframe) = 0; // is this necessary?
+ ifs = PSCB(vcpu,ifs);
//if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
//if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
if (ifs & regs->cr_ifs & 0x8000000000000000L) {
-#define SI_OFS(x) ((char *)(&PSCB(vcpu).x) - (char *)(vcpu->shared_info))
-if (SI_OFS(iip)!=0x150 || SI_OFS(ipsr)!=0x148 || SI_OFS(ifs)!=0x158) {
+#define SI_OFS(x) ((char *)(&PSCB(vcpu,x)) - (char *)(vcpu->vcpu_info))
+if (SI_OFS(iip)!=0x10 || SI_OFS(ipsr)!=0x08 || SI_OFS(ifs)!=0x18) {
printf("SI_CR_IIP/IPSR/IFS_OFFSET CHANGED, SEE dorfirfi\n");
+printf("SI_CR_IIP=0x%x,IPSR=0x%x,IFS_OFFSET=0x%x\n",SI_OFS(iip),SI_OFS(ipsr),SI_OFS(ifs));
while(1);
}
- // TODO: validate PSCB(vcpu).iip
- // TODO: PSCB(vcpu).ipsr = psr;
- PSCB(vcpu).ipsr = psr.i64;
+ // TODO: validate PSCB(vcpu,iip)
+ // TODO: PSCB(vcpu,ipsr) = psr;
+ PSCB(vcpu,ipsr) = psr.i64;
// now set up the trampoline
regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!!
__asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory");
@@ -1034,22 +1175,23 @@ while(1);
}
else {
regs->cr_ipsr = psr.i64;
- regs->cr_iip = PSCB(vcpu).iip;
+ regs->cr_iip = PSCB(vcpu,iip);
}
- PSCB(vcpu).interrupt_collection_enabled = 1;
+ PSCB(vcpu,interrupt_collection_enabled) = 1;
vcpu_bsw1(vcpu);
- PSCB(vcpu).interrupt_delivery_enabled = int_enable;
+ PSCB(vcpu,interrupt_delivery_enabled) = int_enable;
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_cover(VCPU *vcpu)
{
+ // TODO: Only allowed for current vcpu
REGS *regs = vcpu_regs(vcpu);
- if (!PSCB(vcpu).interrupt_collection_enabled) {
- if (!PSCB(vcpu).incomplete_regframe)
- PSCB(vcpu).ifs = regs->cr_ifs;
- else PSCB(vcpu).incomplete_regframe = 0;
+ if (!PSCB(vcpu,interrupt_collection_enabled)) {
+ if (!PSCB(vcpu,incomplete_regframe))
+ PSCB(vcpu,ifs) = regs->cr_ifs;
+ else PSCB(vcpu,incomplete_regframe) = 0;
}
regs->cr_ifs = 0;
return (IA64_NO_FAULT);
@@ -1058,7 +1200,7 @@ IA64FAULT vcpu_cover(VCPU *vcpu)
IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
{
extern unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr);
- UINT64 pta = PSCB(vcpu).pta;
+ UINT64 pta = PSCB(vcpu,pta);
UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT;
UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1);
UINT64 Mask = (1L << pta_sz) - 1;
@@ -1072,15 +1214,17 @@ IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15;
UINT64 VHPT_addr2b =
((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;;
- UINT64 VHPT_addr3 = VHPT_offset & 0x3fff;
+ UINT64 VHPT_addr3 = VHPT_offset & 0x7fff;
UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) |
VHPT_addr3;
+#if 0
if (VHPT_addr1 == 0xe000000000000000L) {
printf("vcpu_thash: thash unsupported with rr7 @%lx\n",
- PSCB(vcpu).iip);
+ PSCB(vcpu,iip));
return (IA64_ILLOP_FAULT);
}
+#endif
//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr);
*pval = VHPT_addr;
return (IA64_NO_FAULT);
@@ -1095,18 +1239,24 @@ IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
{
extern TR_ENTRY *match_tr(VCPU *,UINT64);
- extern TR_ENTRY *match_dtlb(VCPU *,UINT64);
+ unsigned long match_dtlb(VCPU *, unsigned long, unsigned long *, unsigned long *);
TR_ENTRY *trp;
- UINT64 mask;
+ UINT64 mask, pteval, mp_pte, ps;
extern unsigned long privop_trace;
- if ((trp=match_tr(current,vadr)) || (trp=match_dtlb(current,vadr))) {
+ if (pteval = match_dtlb(vcpu, vadr, &ps, &mp_pte) && (mp_pte != -1UL)) {
+ mask = (1L << ps) - 1;
+ *padr = ((mp_pte & _PAGE_PPN_MASK) & ~mask) | (vadr & mask);
+ verbose("vcpu_tpa: addr=%p @%p, successful, padr=%p\n",vadr,PSCB(vcpu,iip),*padr);
+ return (IA64_NO_FAULT);
+ }
+ if (trp=match_tr(current,vadr)) {
mask = (1L << trp->ps) - 1;
*padr = ((trp->ppn << 12) & ~mask) | (vadr & mask);
- verbose("vcpu_tpa: addr=%p @%p, successful, padr=%p\n",vadr,PSCB(vcpu).iip,*padr);
+ verbose("vcpu_tpa: addr=%p @%p, successful, padr=%p\n",vadr,PSCB(vcpu,iip),*padr);
return (IA64_NO_FAULT);
}
- verbose("vcpu_tpa addr=%p, @%p, forcing data miss\n",vadr,PSCB(vcpu).iip);
+ verbose("vcpu_tpa addr=%p, @%p, forcing data miss\n",vadr,PSCB(vcpu,iip));
return vcpu_force_data_miss(vcpu, vadr);
}
@@ -1196,30 +1346,32 @@ IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
IA64FAULT vcpu_bsw0(VCPU *vcpu)
{
+ // TODO: Only allowed for current vcpu
REGS *regs = vcpu_regs(vcpu);
unsigned long *r = &regs->r16;
- unsigned long *b0 = &PSCB(vcpu).bank0_regs[0];
- unsigned long *b1 = &PSCB(vcpu).bank1_regs[0];
+ unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+ unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
int i;
- if (PSCB(vcpu).banknum) {
+ if (PSCB(vcpu,banknum)) {
for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; }
- PSCB(vcpu).banknum = 0;
+ PSCB(vcpu,banknum) = 0;
}
return (IA64_NO_FAULT);
}
IA64FAULT vcpu_bsw1(VCPU *vcpu)
{
+ // TODO: Only allowed for current vcpu
REGS *regs = vcpu_regs(vcpu);
unsigned long *r = &regs->r16;
- unsigned long *b0 = &PSCB(vcpu).bank0_regs[0];
- unsigned long *b1 = &PSCB(vcpu).bank1_regs[0];
+ unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+ unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
int i;
- if (!PSCB(vcpu).banknum) {
+ if (!PSCB(vcpu,banknum)) {
for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; }
- PSCB(vcpu).banknum = 1;
+ PSCB(vcpu,banknum) = 1;
}
return (IA64_NO_FAULT);
}
@@ -1260,30 +1412,27 @@ IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval)
unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr)
{
-
ia64_rr rr;
- rr.rrval = PSCB(vcpu).rrs[vadr>>61];
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
return(rr.ve);
}
unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr)
{
-
ia64_rr rr;
- rr.rrval = PSCB(vcpu).rrs[vadr>>61];
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
return(rr.ps);
}
unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
{
-
ia64_rr rr;
- rr.rrval = PSCB(vcpu).rrs[vadr>>61];
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
return(rr.rid);
}
@@ -1291,7 +1440,7 @@ unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
{
extern void set_one_rr(UINT64, UINT64);
- PSCB(vcpu).rrs[reg>>61] = val;
+ PSCB(vcpu,rrs)[reg>>61] = val;
// warning: set_one_rr() does it "live"
set_one_rr(reg,val);
return (IA64_NO_FAULT);
@@ -1299,7 +1448,7 @@ IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
{
- UINT val = PSCB(vcpu).rrs[reg>>61];
+ UINT val = PSCB(vcpu,rrs)[reg>>61];
*pval = val;
return (IA64_NO_FAULT);
}
@@ -1367,8 +1516,8 @@ TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count)
for (i = 0; i < count; i++, trp++) {
if (!trp->p) continue;
if (physicalize_rid(vcpu,trp->rid) != rid) continue;
- if (ifa < trp->vadr) continue;
- if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
+ if (ifa < trp->vadr) continue;
+ if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
//if (trp->key && !match_pkr(vcpu,trp->key)) continue;
return trp;
}
@@ -1379,9 +1528,9 @@ TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa)
{
TR_ENTRY *trp;
- trp = vcpu_match_tr_entry(vcpu,vcpu->shared_info->arch.dtrs,ifa,NDTRS);
+ trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS);
if (trp) return trp;
- trp = vcpu_match_tr_entry(vcpu,vcpu->shared_info->arch.itrs,ifa,NITRS);
+ trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS);
if (trp) return trp;
return 0;
}
@@ -1392,7 +1541,8 @@ IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte,
TR_ENTRY *trp;
if (slot >= NDTRS) return IA64_RSVDREG_FAULT;
- trp = &PSCB(vcpu).dtrs[slot];
+ trp = &PSCBX(vcpu,dtrs[slot]);
+//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
vcpu_set_tr_entry(trp,pte,itir,ifa);
return IA64_NO_FAULT;
}
@@ -1403,7 +1553,8 @@ IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte,
TR_ENTRY *trp;
if (slot >= NITRS) return IA64_RSVDREG_FAULT;
- trp = &PSCB(vcpu).itrs[slot];
+ trp = &PSCBX(vcpu,itrs[slot]);
+//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
vcpu_set_tr_entry(trp,pte,itir,ifa);
return IA64_NO_FAULT;
}
@@ -1414,26 +1565,59 @@ IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte,
void foobar(void) { /*vcpu_verbose = 1;*/ }
-extern VCPU *dom0;
+extern struct domain *dom0;
-void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 logps)
+void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps)
{
unsigned long psr;
- unsigned long ps = (vcpu==dom0) ? logps : PAGE_SHIFT;
+ unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
// FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
// FIXME, must be inlined or potential for nested fault here!
+ if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) {
+ printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n");
+ //FIXME: kill domain here
+ while(1);
+ }
psr = ia64_clear_ic();
ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
ia64_set_psr(psr);
// ia64_srlz_i(); // no srls req'd, will rfi later
- if (IorD & 0x1) vcpu_set_tr_entry(&PSCB(vcpu).itlb,pte,logps<<2,vaddr);
- if (IorD & 0x2) vcpu_set_tr_entry(&PSCB(vcpu).dtlb,pte,logps<<2,vaddr);
+#ifdef VHPT_GLOBAL
+ if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) {
+ // FIXME: this is dangerous... vhpt_flush_address ensures these
+ // addresses never get flushed. More work needed if this
+ // ever happens.
+//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
+ vhpt_insert(vaddr,pte,logps<<2);
+ }
+ // even if domain pagesize is larger than PAGE_SIZE, just put
+ // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
+ else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
+#endif
+ if (IorD & 0x4) return; // don't place in 1-entry TLB
+ if (IorD & 0x1) {
+ vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
+ PSCBX(vcpu,itlb_pte) = mp_pte;
+ }
+ if (IorD & 0x2) {
+ vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
+ PSCBX(vcpu,dtlb_pte) = mp_pte;
+ }
}
-TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa)
+// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check
+// the physical address contained for correctness
+unsigned long match_dtlb(VCPU *vcpu, unsigned long ifa, unsigned long *ps, unsigned long *mp_pte)
{
- return vcpu_match_tr_entry(vcpu,&vcpu->shared_info->arch.dtlb,ifa,1);
+ TR_ENTRY *trp;
+
+ if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) {
+ if (ps) *ps = trp->ps;
+ if (mp_pte) *mp_pte = vcpu->arch.dtlb_pte;
+ return (trp->page_flags);
+ }
+ return 0UL;
}
IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
@@ -1441,7 +1625,7 @@ IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
unsigned long pteval, logps = (itir >> 2) & 0x3f;
unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
- if (((itir & ~0xfc) >> 2) < PAGE_SHIFT) {
+ if (logps < PAGE_SHIFT) {
printf("vcpu_itc_d: domain trying to use smaller page size!\n");
//FIXME: kill domain here
while(1);
@@ -1449,7 +1633,7 @@ IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
//itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
pteval = translate_domain_pte(pte,ifa,itir);
if (!pteval) return IA64_ILLOP_FAULT;
- vcpu_itc_no_srlz(vcpu,2,ifa,pteval,logps);
+ vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
return IA64_NO_FAULT;
}
@@ -1459,7 +1643,7 @@ IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
// FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
- if (((itir & ~0xfc) >> 2) < PAGE_SHIFT) {
+ if (logps < PAGE_SHIFT) {
printf("vcpu_itc_i: domain trying to use smaller page size!\n");
//FIXME: kill domain here
while(1);
@@ -1468,7 +1652,7 @@ IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
pteval = translate_domain_pte(pte,ifa,itir);
// FIXME: what to do if bad physical address? (machine check?)
if (!pteval) return IA64_ILLOP_FAULT;
- vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,logps);
+ vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
return IA64_NO_FAULT;
}
@@ -1478,13 +1662,26 @@ IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
return IA64_ILLOP_FAULT;
}
+// At privlvl=0, fc performs no access rights or protection key checks, while
+// at privlvl!=0, fc performs access rights checks as if it were a 1-byte
+// read but no protection key check. Thus in order to avoid an unexpected
+// access rights fault, we have to translate the virtual address to a
+// physical address (possibly via a metaphysical address) and do the fc
+// on the physical address, which is guaranteed to flush the same cache line
IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr)
{
- UINT64 mpaddr;
+ // TODO: Only allowed for current vcpu
+ UINT64 mpaddr, ps;
IA64FAULT fault;
- unsigned long lookup_domain_mpa(VCPU *,unsigned long);
+ unsigned long match_dtlb(VCPU *, unsigned long, unsigned long *, unsigned long *);
+ unsigned long lookup_domain_mpa(struct domain *,unsigned long);
unsigned long pteval, dom_imva;
+ if (pteval = match_dtlb(vcpu, vadr, NULL, NULL)) {
+ dom_imva = __va(pteval & _PFN_MASK);
+ ia64_fc(dom_imva);
+ return IA64_NO_FAULT;
+ }
fault = vcpu_tpa(vcpu, vadr, &mpaddr);
if (fault == IA64_NO_FAULT) {
struct domain *dom0;
@@ -1494,7 +1691,7 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr)
printk("vcpu_fc: bad dom0 mpaddr %p!\n",mpaddr);
}
}
- pteval = lookup_domain_mpa(vcpu,mpaddr);
+ pteval = lookup_domain_mpa(vcpu->domain,mpaddr);
if (pteval) {
dom_imva = __va(pteval & _PFN_MASK);
ia64_fc(dom_imva);
@@ -1516,10 +1713,13 @@ IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
// base = stride1 = stride2 = 0, count0 = count 1 = 1
// FIXME: When VHPT is in place, flush that too!
+#ifdef VHPT_GLOBAL
+ vhpt_flush(); // FIXME: This is overdoing it
+#endif
local_flush_tlb_all();
// just invalidate the "whole" tlb
- vcpu_purge_tr_entry(&PSCB(vcpu).dtlb);
- vcpu_purge_tr_entry(&PSCB(vcpu).itlb);
+ vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
return IA64_NO_FAULT;
}
@@ -1535,9 +1735,12 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
// FIXME: validate not flushing Xen addresses
// if (Xen address) return(IA64_ILLOP_FAULT);
// FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
+#ifdef VHPT_GLOBAL
+ vhpt_flush_address(vadr,addr_range);
+#endif
ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
- vcpu_purge_tr_entry(&PSCB(vcpu).dtlb);
- vcpu_purge_tr_entry(&PSCB(vcpu).itlb);
+ vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
return IA64_NO_FAULT;
}
@@ -1555,5 +1758,5 @@ IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
void vcpu_set_regs(VCPU *vcpu, REGS *regs)
{
- vcpu->regs = regs;
+ vcpu->arch.regs = regs;
}
diff --git a/xen/arch/ia64/vhpt.c b/xen/arch/ia64/vhpt.c
new file mode 100644
index 0000000000..b535f9fc56
--- /dev/null
+++ b/xen/arch/ia64/vhpt.c
@@ -0,0 +1,119 @@
+/*
+ * Initialize VHPT support.
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/vhpt.h>
+
+unsigned long vhpt_paddr, vhpt_pend, vhpt_pte;
+
+void vhpt_flush(void)
+{
+ struct vhpt_lf_entry *v = (void *)VHPT_ADDR;
+ int i, cnt = 0;
+#if 0
+static int firsttime = 2;
+
+if (firsttime) firsttime--;
+else {
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+}
+#endif
+ for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
+ v->itir = 0;
+ v->CChain = 0;
+ v->page_flags = 0;
+ v->ti_tag = INVALID_TI_TAG;
+ }
+ // initialize cache too???
+}
+
+#ifdef VHPT_GLOBAL
+void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
+{
+ unsigned long ps;
+ struct vhpt_lf_entry *vlfe;
+
+ if ((vadr >> 61) == 7) {
+ // no vhpt for region 7 yet, see vcpu_itc_no_srlz
+ printf("vhpt_flush_address: region 7, spinning...\n");
+ while(1);
+ }
+#if 0
+ // this only seems to occur at shutdown, but it does occur
+ if ((!addr_range) || addr_range & (addr_range - 1)) {
+ printf("vhpt_flush_address: weird range, spinning...\n");
+ while(1);
+ }
+//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
+#endif
+ while ((long)addr_range > 0) {
+ vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
+ // FIXME: for now, just blow it away even if it belongs to
+ // another domain. Later, use ttag to check for match
+//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
+//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
+//}
+ vlfe->ti_tag |= INVALID_TI_TAG;
+ addr_range -= PAGE_SIZE;
+ vadr += PAGE_SIZE;
+ }
+}
+#endif
+
+void vhpt_map(void)
+{
+ unsigned long psr;
+
+ psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+}
+
+void vhpt_init(void)
+{
+ unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva;
+#if !VHPT_ENABLED
+ return;
+#endif
+ // allocate a huge chunk of physical memory.... how???
+ vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
+ vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
+ printf("vhpt_init: vhpt size=%p, align=%p\n",vhpt_total_size,vhpt_alignment);
+ /* This allocation only holds true if vhpt table is unique for
+ * all domains. Or else later new vhpt table should be allocated
+ * from domain heap when each domain is created. Assume xen buddy
+ * allocator can provide natural aligned page by order?
+ */
+ vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT);
+ if (!vhpt_imva) {
+ printf("vhpt_init: can't allocate VHPT!\n");
+ while(1);
+ }
+ vhpt_paddr = __pa(vhpt_imva);
+ vhpt_pend = vhpt_paddr + vhpt_total_size - 1;
+ printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend);
+ vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL));
+ vhpt_map();
+ ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
+ VHPT_ENABLED);
+ vhpt_flush();
+}
+
diff --git a/xen/arch/ia64/vlsapic.c b/xen/arch/ia64/vlsapic.c
new file mode 100644
index 0000000000..c25c91d1fa
--- /dev/null
+++ b/xen/arch/ia64/vlsapic.c
@@ -0,0 +1,471 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vlsapic.c: virtual lsapic model including ITC timer.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ */
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+//u64 fire_itc;
+//u64 fire_itc2;
+//u64 fire_itm;
+//u64 fire_itm2;
+/*
+ * Update the checked last_itc.
+ */
+static void update_last_itc(vtime_t *vtm, uint64_t cur_itc)
+{
+ vtm->last_itc = cur_itc;
+}
+
+/*
+ * ITC value saw in guest (host+offset+drift).
+ */
+static uint64_t now_itc(vtime_t *vtm)
+{
+ uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc();
+
+ if ( vtm->vtm_local_drift ) {
+// guest_itc -= vtm->vtm_local_drift;
+ }
+ if ( (long)(guest_itc - vtm->last_itc) > 0 ) {
+ return guest_itc;
+
+ }
+ else {
+ /* guest ITC backwarded due after LP switch */
+ return vtm->last_itc;
+ }
+}
+
+/*
+ * Interval time components reset.
+ */
+static void vtm_reset(VCPU *vcpu)
+{
+ uint64_t cur_itc;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ vtm->vtm_offset = 0;
+ vtm->vtm_local_drift = 0;
+ VPD_CR(vcpu, itm) = 0;
+ VPD_CR(vcpu, itv) = 0x10000;
+ cur_itc = ia64_get_itc();
+ vtm->last_itc = vtm->vtm_offset + cur_itc;
+}
+
+/* callback function when vtm_timer expires */
+static void vtm_timer_fn(void *data)
+{
+ vtime_t *vtm;
+ VCPU *vcpu = data;
+ u64 cur_itc,vitm;
+
+ UINT64 vec;
+
+ vec = VPD_CR(vcpu, itv) & 0xff;
+ vmx_vcpu_pend_interrupt(vcpu, vec);
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ cur_itc = now_itc(vtm);
+ vitm =VPD_CR(vcpu, itm);
+ //fire_itc2 = cur_itc;
+ //fire_itm2 = vitm;
+ update_last_itc(vtm,cur_itc); // pseudo read to update vITC
+}
+
+void vtm_init(VCPU *vcpu)
+{
+ vtime_t *vtm;
+ uint64_t itc_freq;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+
+ itc_freq = local_cpu_data->itc_freq;
+ vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
+ vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
+ init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
+ vtm_reset(vcpu);
+}
+
+/*
+ * Action when guest read ITC.
+ */
+uint64_t vtm_get_itc(VCPU *vcpu)
+{
+ uint64_t guest_itc, spsr;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ // FIXME: should use local_irq_disable & local_irq_enable ??
+ local_irq_save(spsr);
+ guest_itc = now_itc(vtm);
+ update_last_itc(vtm, guest_itc);
+
+ local_irq_restore(spsr);
+ return guest_itc;
+}
+
+void vtm_set_itc(VCPU *vcpu, uint64_t new_itc)
+{
+ uint64_t spsr;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ local_irq_save(spsr);
+ vtm->vtm_offset = new_itc - ia64_get_itc();
+ vtm->last_itc = new_itc;
+ vtm_interruption_update(vcpu, vtm);
+ local_irq_restore(spsr);
+}
+
+void vtm_set_itv(VCPU *vcpu)
+{
+ uint64_t spsr,itv;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ local_irq_save(spsr);
+ itv = VPD_CR(vcpu, itv);
+ if ( ITV_IRQ_MASK(itv) )
+ rem_ac_timer(&vtm->vtm_timer);
+ vtm_interruption_update(vcpu, vtm);
+ local_irq_restore(spsr);
+}
+
+
+/*
+ * Update interrupt or hook the vtm ac_timer for fire
+ * At this point vtm_timer should be removed if itv is masked.
+ */
+/* Interrupt must be disabled at this point */
+
+extern u64 tick_to_ns(u64 tick);
+#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */
+void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
+{
+ uint64_t cur_itc,vitm,vitv;
+ uint64_t expires;
+ long diff_now, diff_last;
+ uint64_t spsr;
+
+ vitv = VPD_CR(vcpu, itv);
+ if ( ITV_IRQ_MASK(vitv) ) {
+ return;
+ }
+
+ vitm =VPD_CR(vcpu, itm);
+ local_irq_save(spsr);
+ cur_itc =now_itc(vtm);
+ diff_last = vtm->last_itc - vitm;
+ diff_now = cur_itc - vitm;
+ update_last_itc (vtm,cur_itc);
+
+ if ( diff_last >= 0 ) {
+ // interrupt already fired.
+ rem_ac_timer(&vtm->vtm_timer);
+ }
+ else if ( diff_now >= 0 ) {
+ // ITV is fired.
+ vmx_vcpu_pend_interrupt(vcpu, vitv&0xff);
+ }
+ /* Both last_itc & cur_itc < itm, wait for fire condition */
+ else {
+ expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP;
+ set_ac_timer(&vtm->vtm_timer, expires);
+ }
+ local_irq_restore(spsr);
+}
+
+/*
+ * Action for vtm when the domain is scheduled out.
+ * Remove the ac_timer for vtm.
+ */
+void vtm_domain_out(VCPU *vcpu)
+{
+ rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
+}
+
+/*
+ * Action for vtm when the domain is scheduled in.
+ * Fire vtm IRQ or add the ac_timer for vtm.
+ */
+void vtm_domain_in(VCPU *vcpu)
+{
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ vtm_interruption_update(vcpu, vtm);
+}
+
+/*
+ * Next for vLSapic
+ */
+
+#define NMI_VECTOR 2
+#define ExtINT_VECTOR 0
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i])
+/*
+ * LID-CR64: Keep in vpd.
+ * IVR-CR65: (RO) see guest_read_ivr().
+ * TPR-CR66: Keep in vpd, acceleration enabled.
+ * EOI-CR67: see guest_write_eoi().
+ * IRR0-3 - CR68-71: (RO) Keep in vpd irq_pending[]
+ * can move to vpd for optimization.
+ * ITV: in time virtualization.
+ * PMV: Keep in vpd initialized as 0x10000.
+ * CMCV: Keep in vpd initialized as 0x10000.
+ * LRR0-1: Keep in vpd, initialized as 0x10000.
+ *
+ */
+
+void vlsapic_reset(VCPU *vcpu)
+{
+ int i;
+ VPD_CR(vcpu, lid) = 0;
+ VPD_CR(vcpu, ivr) = 0;
+ VPD_CR(vcpu,tpr) = 0x10000;
+ VPD_CR(vcpu, eoi) = 0;
+ VPD_CR(vcpu, irr[0]) = 0;
+ VPD_CR(vcpu, irr[1]) = 0;
+ VPD_CR(vcpu, irr[2]) = 0;
+ VPD_CR(vcpu, irr[3]) = 0;
+ VPD_CR(vcpu, pmv) = 0x10000;
+ VPD_CR(vcpu, cmcv) = 0x10000;
+ VPD_CR(vcpu, lrr0) = 0x10000; // default reset value?
+ VPD_CR(vcpu, lrr1) = 0x10000; // default reset value?
+ for ( i=0; i<4; i++) {
+ VLSAPIC_INSVC(vcpu,i) = 0;
+ }
+}
+
+/*
+ * Find highest signaled bits in 4 words (long).
+ *
+ * return 0-255: highest bits.
+ * -1 : Not found.
+ */
+static __inline__ int highest_bits(uint64_t *dat)
+{
+ uint64_t bits, bitnum=-1;
+ int i;
+
+ /* loop for all 256 bits */
+ for ( i=3; i >= 0 ; i -- ) {
+ bits = dat[i];
+ if ( bits ) {
+ bitnum = ia64_fls(bits);
+ return i*64+bitnum;
+ }
+ }
+ return -1;
+}
+
+/*
+ * Return 0-255 for pending irq.
+ * -1 when no pending.
+ */
+static int highest_pending_irq(VCPU *vcpu)
+{
+ if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+ if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+ return highest_bits(&VPD_CR(vcpu, irr[0]));
+}
+
+static int highest_inservice_irq(VCPU *vcpu)
+{
+ if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+ if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+ return highest_bits(&(VLSAPIC_INSVC(vcpu, 0)));
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static int is_higher_irq(int pending, int inservice)
+{
+ return ( (pending >> 4) > (inservice>>4) ||
+ ((pending != -1) && (inservice == -1)) );
+}
+
+static int is_higher_class(int pending, int mic)
+{
+ return ( (pending >> 4) > mic );
+}
+
+static int is_invalid_irq(int vec)
+{
+ return (vec == 1 || ((vec <= 14 && vec >= 3)));
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+static int
+irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+ uint64_t vtpr;
+
+ vtpr = VPD_CR(vcpu, tpr);
+
+ if ( h_pending == NMI_VECTOR && h_inservice != NMI_VECTOR )
+ // Non Maskable Interrupt
+ return 0;
+
+ if ( h_pending == ExtINT_VECTOR && h_inservice >= 16)
+ return (vtpr>>16)&1; // vtpr.mmi
+
+ if ( !(vtpr&(1UL<<16)) &&
+ is_higher_irq(h_pending, h_inservice) &&
+ is_higher_class(h_pending, (vtpr>>4)&0xf) )
+ return 0;
+
+ return 1;
+}
+
+void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+{
+ uint64_t spsr;
+
+ if (vector & ~0xff) {
+ printf("vmx_vcpu_pend_interrupt: bad vector\n");
+ return;
+ }
+ local_irq_save(spsr);
+ VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63);
+ local_irq_restore(spsr);
+}
+
+/*
+ * If the new pending interrupt is enabled and not masked, we directly inject
+ * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when
+ * the interrupt becomes unmasked, it gets injected.
+ * RETURN:
+ * TRUE: Interrupt is injected.
+ * FALSE: Not injected but may be in VHPI when vac.a_int=1
+ *
+ * Optimization: We defer setting the VHPI until the EOI time, if a higher
+ * priority interrupt is in-service. The idea is to reduce the
+ * number of unnecessary calls to inject_vhpi.
+ */
+int vmx_check_pending_irq(VCPU *vcpu)
+{
+ uint64_t spsr;
+ int h_pending, h_inservice;
+ int injected=0;
+ uint64_t isr;
+ IA64_PSR vpsr;
+
+ local_irq_save(spsr);
+ h_pending = highest_pending_irq(vcpu);
+ if ( h_pending == -1 ) goto chk_irq_exit;
+ h_inservice = highest_inservice_irq(vcpu);
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.i &&
+ !irq_masked(vcpu, h_pending, h_inservice) ) {
+ //inject_guest_irq(v);
+ isr = vpsr.val & IA64_PSR_RI;
+ if ( !vpsr.ic )
+ panic("Interrupt when IC=0\n");
+ vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+ injected = 1;
+ }
+ else if ( VMX_VPD(vcpu,vac).a_int &&
+ is_higher_irq(h_pending,h_inservice) ) {
+ vmx_inject_vhpi(vcpu,h_pending);
+ }
+
+chk_irq_exit:
+ local_irq_restore(spsr);
+ return injected;
+}
+
+void guest_write_eoi(VCPU *vcpu)
+{
+ int vec;
+ uint64_t spsr;
+
+ vec = highest_inservice_irq(vcpu);
+ if ( vec < 0 ) panic("Wrong vector to EOI\n");
+ local_irq_save(spsr);
+ VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
+ local_irq_restore(spsr);
+ VPD_CR(vcpu, eoi)=0; // overwrite the data
+}
+
+uint64_t guest_read_vivr(VCPU *vcpu)
+{
+ int vec, next, h_inservice;
+ uint64_t spsr;
+
+ local_irq_save(spsr);
+ vec = highest_pending_irq(vcpu);
+ h_inservice = highest_inservice_irq(vcpu);
+ if ( vec < 0 || irq_masked(vcpu, vec, h_inservice) ) {
+ local_irq_restore(spsr);
+ return IA64_SPURIOUS_INT_VECTOR;
+ }
+
+ VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
+ VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63));
+
+ h_inservice = highest_inservice_irq(vcpu);
+ next = highest_pending_irq(vcpu);
+ if ( VMX_VPD(vcpu,vac).a_int &&
+ (is_higher_irq(next, h_inservice) || (next == -1)) )
+ vmx_inject_vhpi(vcpu, next);
+ local_irq_restore(spsr);
+ return (uint64_t)vec;
+}
+
+void vmx_inject_vhpi(VCPU *vcpu, u8 vec)
+{
+ VMX_VPD(vcpu,vhpi) = vec / 16;
+
+
+ // non-maskable
+ if ( vec == NMI_VECTOR ) // NMI
+ VMX_VPD(vcpu,vhpi) = 32;
+ else if (vec == ExtINT_VECTOR) //ExtINT
+ VMX_VPD(vcpu,vhpi) = 16;
+ else if (vec == -1)
+ VMX_VPD(vcpu,vhpi) = 0; /* Nothing pending */
+
+ ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT,
+ (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
+}
+
diff --git a/xen/arch/ia64/vmmu.c b/xen/arch/ia64/vmmu.c
new file mode 100644
index 0000000000..c39d6f2851
--- /dev/null
+++ b/xen/arch/ia64/vmmu.c
@@ -0,0 +1,802 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmmu.c: virtual memory management unit components.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/tlb.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vcpu.h>
+#include <xen/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+/*
+ * Architecture ppn is in 4KB unit while XEN
+ * page may be different(1<<PAGE_SHIFT).
+ */
+static inline u64 arch_ppn_to_xen_ppn(u64 appn)
+{
+ return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT;
+}
+
+static inline u64 xen_ppn_to_arch_ppn(u64 xppn)
+{
+ return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT;
+}
+
+
+/*
+ * Get the machine page frame number in 16KB unit
+ * Input:
+ * d:
+ */
+u64 get_mfn(domid_t domid, u64 gpfn, u64 pages)
+{
+ struct domain *d;
+ u64 i, xen_gppn, xen_mppn, mpfn;
+
+ if ( domid == DOMID_SELF ) {
+ d = current->domain;
+ }
+ else {
+ d = find_domain_by_id(domid);
+ }
+ xen_gppn = arch_ppn_to_xen_ppn(gpfn);
+ xen_mppn = __gpfn_to_mfn(d, xen_gppn);
+/*
+ for (i=0; i<pages; i++) {
+ if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) {
+ return INVALID_MFN;
+ }
+ }
+*/
+ mpfn= xen_ppn_to_arch_ppn(xen_mppn);
+ mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn);
+ return mpfn;
+
+}
+
+/*
+ * The VRN bits of va stand for which rr to get.
+ */
+rr_t vmmu_get_rr(VCPU *vcpu, u64 va)
+{
+ rr_t vrr;
+ vmx_vcpu_get_rr(vcpu, va, &vrr.value);
+ return vrr;
+}
+
+
+void recycle_message(thash_cb_t *hcb, u64 para)
+{
+ printk("hcb=%p recycled with %lx\n",hcb,para);
+}
+
+
+/*
+ * Purge all guest TCs in logical processor.
+ * Instead of purging all LP TCs, we should only purge
+ * TCs that belong to this guest.
+ */
+void
+purge_machine_tc_by_domid(domid_t domid)
+{
+#ifndef PURGE_GUEST_TC_ONLY
+ // purge all TCs
+ struct ia64_pal_retval result;
+ u64 addr;
+ u32 count1,count2;
+ u32 stride1,stride2;
+ u32 i,j;
+ u64 psr;
+
+
+ result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0);
+ if ( result.status != 0 ) {
+ panic ("PAL_PTCE_INFO failed\n");
+ }
+ addr = result.v0;
+ count1 = HIGH_32BITS(result.v1);
+ count2 = LOW_32BITS (result.v1);
+ stride1 = HIGH_32BITS(result.v2);
+ stride2 = LOW_32BITS (result.v2);
+
+ local_irq_save(psr);
+ for (i=0; i<count1; i++) {
+ for (j=0; j<count2; j++) {
+ ia64_ptce(addr);
+ addr += stride2;
+ }
+ addr += stride1;
+ }
+ local_irq_restore(psr);
+#else
+ // purge all TCs belong to this guest.
+#endif
+}
+
+static thash_cb_t *init_domain_vhpt(struct vcpu *d)
+{
+ struct pfn_info *page;
+ void *vbase,*vcur;
+ vhpt_special *vs;
+ thash_cb_t *vhpt;
+ PTA pta_value;
+
+ page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER);
+ if ( page == NULL ) {
+ panic("No enough contiguous memory for init_domain_mm\n");
+ }
+ vbase = page_to_virt(page);
+ printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase);
+ memset(vbase, 0, VCPU_TLB_SIZE);
+ vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+ vhpt = --((thash_cb_t*)vcur);
+ vhpt->ht = THASH_VHPT;
+ vhpt->vcpu = d;
+ vhpt->hash_func = machine_thash;
+ vs = --((vhpt_special *)vcur);
+
+ /* Setup guest pta */
+ pta_value.val = 0;
+ pta_value.ve = 1;
+ pta_value.vf = 1;
+ pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */
+ pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT;
+ d->arch.arch_vmx.mpta = pta_value.val;
+
+ vhpt->vs = vs;
+ vhpt->vs->get_mfn = get_mfn;
+ vhpt->vs->tag_func = machine_ttag;
+ vhpt->hash = vbase;
+ vhpt->hash_sz = VCPU_TLB_SIZE/2;
+ vhpt->cch_buf = (u64)vbase + vhpt->hash_sz;
+ vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf;
+ vhpt->recycle_notifier = recycle_message;
+ thash_init(vhpt,VCPU_TLB_SHIFT-1);
+ return vhpt;
+}
+
+
+thash_cb_t *init_domain_tlb(struct vcpu *d)
+{
+ struct pfn_info *page;
+ void *vbase,*vcur;
+ tlb_special_t *ts;
+ thash_cb_t *tlb;
+
+ page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER);
+ if ( page == NULL ) {
+ panic("No enough contiguous memory for init_domain_mm\n");
+ }
+ vbase = page_to_virt(page);
+ printk("Allocate domain tlb at 0x%lx\n", (u64)vbase);
+ memset(vbase, 0, VCPU_TLB_SIZE);
+ vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+ tlb = --((thash_cb_t*)vcur);
+ tlb->ht = THASH_TLB;
+ tlb->vcpu = d;
+ ts = --((tlb_special_t *)vcur);
+ tlb->ts = ts;
+ tlb->ts->vhpt = init_domain_vhpt(d);
+ tlb->hash_func = machine_thash;
+ tlb->hash = vbase;
+ tlb->hash_sz = VCPU_TLB_SIZE/2;
+ tlb->cch_buf = (u64)vbase + tlb->hash_sz;
+ tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf;
+ tlb->recycle_notifier = recycle_message;
+ thash_init(tlb,VCPU_TLB_SHIFT-1);
+ return tlb;
+}
+
+/* Allocate physical to machine mapping table for domN
+ * FIXME: Later this interface may be removed, if that table is provided
+ * by control panel. Dom0 has gpfn identical to mfn, which doesn't need
+ * this interface at all.
+ */
+void
+alloc_pmt(struct domain *d)
+{
+ struct pfn_info *page;
+
+ /* Only called once */
+ ASSERT(d->arch.pmt);
+
+ page = alloc_domheap_pages(NULL, get_order(d->max_pages));
+ ASSERT(page);
+
+ d->arch.pmt = page_to_virt(page);
+ memset(d->arch.pmt, 0x55, d->max_pages * 8);
+}
+
+/*
+ * Insert guest TLB to machine TLB.
+ * data: In TLB format
+ */
+void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb)
+{
+ u64 saved_itir, saved_ifa, saved_rr;
+ u64 pages;
+ thash_data_t mtlb;
+ rr_t vrr;
+ unsigned int cl = tlb->cl;
+
+ mtlb.ifa = tlb->vadr;
+ mtlb.itir = tlb->itir & ~ITIR_RV_MASK;
+ vrr = vmmu_get_rr(d,mtlb.ifa);
+ //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value);
+ pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+ mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+ mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages);
+ if (mtlb.ppn == INVALID_MFN)
+ panic("Machine tlb insert with invalid mfn number.\n");
+
+ __asm __volatile("rsm psr.ic|psr.i;; srlz.i" );
+
+ saved_itir = ia64_getreg(_IA64_REG_CR_ITIR);
+ saved_ifa = ia64_getreg(_IA64_REG_CR_IFA);
+ saved_rr = ia64_get_rr(mtlb.ifa);
+
+ ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir);
+ ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa);
+ /* Only access memory stack which is mapped by TR,
+ * after rr is switched.
+ */
+ ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.value));
+ ia64_srlz_d();
+ if ( cl == ISIDE_TLB ) {
+ ia64_itci(mtlb.page_flags);
+ ia64_srlz_i();
+ }
+ else {
+ ia64_itcd(mtlb.page_flags);
+ ia64_srlz_d();
+ }
+ ia64_set_rr(mtlb.ifa,saved_rr);
+ ia64_srlz_d();
+ ia64_setreg(_IA64_REG_CR_IFA, saved_ifa);
+ ia64_setreg(_IA64_REG_CR_ITIR, saved_itir);
+ __asm __volatile("ssm psr.ic|psr.i;; srlz.i" );
+}
+
+u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps)
+{
+ u64 saved_pta, saved_rr0;
+ u64 hash_addr, tag;
+ unsigned long psr;
+ struct vcpu *v = current;
+ rr_t vrr;
+
+
+ saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+ saved_rr0 = ia64_get_rr(0);
+ vrr.value = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ // TODO: Set to enforce lazy mode
+ local_irq_save(psr);
+ ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.value));
+ ia64_srlz_d();
+
+ hash_addr = ia64_thash(va);
+ ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+ ia64_set_rr(0, saved_rr0);
+ ia64_srlz_d();
+ local_irq_restore(psr);
+ return hash_addr;
+}
+
+u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps)
+{
+ u64 saved_pta, saved_rr0;
+ u64 hash_addr, tag;
+ u64 psr;
+ struct vcpu *v = current;
+ rr_t vrr;
+
+ // TODO: Set to enforce lazy mode
+ saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+ saved_rr0 = ia64_get_rr(0);
+ vrr.value = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ local_irq_save(psr);
+ ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.value));
+ ia64_srlz_d();
+
+ tag = ia64_ttag(va);
+ ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+ ia64_set_rr(0, saved_rr0);
+ ia64_srlz_d();
+ local_irq_restore(psr);
+ return tag;
+}
+
+/*
+ * Purge machine tlb.
+ * INPUT
+ * rr: guest rr.
+ * va: only bits 0:60 is valid
+ * size: bits format (1<<size) for the address range to purge.
+ *
+ */
+void machine_tlb_purge(u64 rid, u64 va, u64 ps)
+{
+ u64 saved_rr0;
+ u64 psr;
+ rr_t vrr;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ saved_rr0 = ia64_get_rr(0);
+ vrr.value = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+ local_irq_save(psr);
+ ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.value) );
+ ia64_srlz_d();
+ ia64_ptcl(va, ps << 2);
+ ia64_set_rr( 0, saved_rr0 );
+ ia64_srlz_d();
+ local_irq_restore(psr);
+}
+
+
+int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
+{
+ ia64_rr vrr;
+ PTA vpta;
+ IA64_PSR vpsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ vrr = vmx_vcpu_rr(vcpu, vadr);
+ vmx_vcpu_get_pta(vcpu,&vpta.val);
+
+ if ( vrr.ve & vpta.ve ) {
+ switch ( ref ) {
+ case DATA_REF:
+ case NA_REF:
+ return vpsr.dt;
+ case INST_REF:
+ return vpsr.dt && vpsr.it && vpsr.ic;
+ case RSE_REF:
+ return vpsr.dt && vpsr.rt;
+
+ }
+ }
+ return 0;
+}
+
+
+int unimplemented_gva(VCPU *vcpu,u64 vadr)
+{
+ int bit=vcpu->domain->arch.imp_va_msb;
+ u64 ladr =(vadr<<3)>>(3+bit);
+ if(!ladr||ladr==(1U<<(61-bit))-1){
+ return 0;
+ }else{
+ return 1;
+ }
+}
+
+
+/*
+ * Prefetch guest bundle code.
+ * INPUT:
+ * code: buffer pointer to hold the read data.
+ * num: number of dword (8byts) to read.
+ */
+int
+fetch_code(VCPU *vcpu, u64 gip, u64 *code)
+{
+ u64 gpip; // guest physical IP
+ u64 mpa;
+ thash_data_t *tlb;
+ rr_t vrr;
+ u64 mfn;
+
+ if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode
+ gpip = gip;
+ }
+ else {
+ vmx_vcpu_get_rr(vcpu, gip, &vrr.value);
+ tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu),
+ vrr.rid, gip, ISIDE_TLB );
+ if ( tlb == NULL ) panic("No entry found in ITLB\n");
+ gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) );
+ }
+ mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
+ if ( mfn == INVALID_MFN ) return 0;
+
+ mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT);
+ *code = *(u64*)__va(mpa);
+ return 1;
+}
+
+IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ rr_t vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.section=THASH_TLB_TC;
+ data.cl=ISIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+
+ sections.v = THASH_SECTION_TR;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ while (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.v = THASH_SECTION_TC;
+ thash_purge_entries(hcb, &data, sections);
+ thash_insert(hcb, &data, ifa);
+ return IA64_NO_FAULT;
+}
+
+
+
+
+IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ rr_t vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.section=THASH_TLB_TC;
+ data.cl=DSIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.v = THASH_SECTION_TR;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.v = THASH_SECTION_TC;
+ thash_purge_entries(hcb, &data, sections);
+ thash_insert(hcb, &data, ifa);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT insert_foreignmap(VCPU *vcpu, UINT64 pte, UINT64 ps, UINT64 va)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ rr_t vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=0;
+ data.ps = ps;
+ data.vadr=PAGEALIGN(va,ps);
+ data.section=THASH_TLB_FM;
+ data.cl=DSIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, va, &vrr);
+ data.rid = vrr.rid;
+ sections.v = THASH_SECTION_TR|THASH_SECTION_TC|THASH_SECTION_FM;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Foreignmap Tlb conflict!!");
+ return;
+ }
+ thash_insert(hcb, &data, va);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ rr_t vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.section=THASH_TLB_TR;
+ data.cl=ISIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.v = THASH_SECTION_TR;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.v=THASH_SECTION_TC;
+ thash_purge_entries(hcb, &data, sections);
+ thash_tr_insert(hcb, &data, ifa, idx);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ rr_t vrr;
+
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.section=THASH_TLB_TR;
+ data.cl=DSIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.v = THASH_SECTION_TR;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ while (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.v=THASH_SECTION_TC;
+ thash_purge_entries(hcb, &data, sections);
+ thash_tr_insert(hcb, &data, ifa, idx);
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ search_section_t sections;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ sections.v = THASH_SECTION_TR | THASH_SECTION_TC;
+ thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ search_section_t sections;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ sections.v = THASH_SECTION_TR | THASH_SECTION_TC;
+ thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr vrr;
+ search_section_t sections;
+ thash_data_t data, *ovl;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ vrr=vmx_vcpu_rr(vcpu,vadr);
+ sections.v = THASH_SECTION_TC;
+ vadr = PAGEALIGN(vadr, ps);
+
+ thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB);
+ thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
+{
+ thash_cb_t *hcb;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ thash_purge_all(hcb);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+ vmx_vcpu_ptc_l(vcpu, vadr, ps);
+ return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ vmx_vcpu_ptc_l(vcpu, vadr, ps);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+ PTA vpta;
+ ia64_rr vrr;
+ u64 vhpt_offset,tmp;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ vrr=vmx_vcpu_rr(vcpu, vadr);
+ if(vpta.vf){
+ panic("THASH,Don't support long format VHPT");
+ *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0);
+ }else{
+ vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1);
+ *pval = (vadr&VRN_MASK)|
+ (vpta.val<<3>>(vpta.size+3)<<(vpta.size))|
+ vhpt_offset;
+ }
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+ ia64_rr vrr;
+ PTA vpta;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ vrr=vmx_vcpu_rr(vcpu, vadr);
+ if(vpta.vf){
+ panic("THASH,Don't support long format VHPT");
+ *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0);
+ }else{
+ *pval = 1;
+ }
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+ thash_data_t *data;
+ thash_cb_t *hcb;
+ ia64_rr vrr;
+ ISR visr,pt_isr;
+ REGS *regs;
+ u64 vhpt_adr;
+ IA64_PSR vpsr;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ vrr=vmx_vcpu_rr(vcpu,vadr);
+ regs=vcpu_regs(vcpu);
+ pt_isr.val=regs->cr_isr;
+ visr.val=0;
+ visr.ei=pt_isr.ei;
+ visr.ir=pt_isr.ir;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if(vpsr.ic==0){
+ visr.ni=1;
+ }
+ visr.na=1;
+ data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB);
+ if(data){
+ if(data->p==0){
+ visr.na=1;
+ vmx_vcpu_set_isr(vcpu,visr.val);
+ page_not_present(vcpu, vadr);
+ return IA64_FAULT;
+ }else if(data->ma == VA_MATTR_NATPAGE){
+ visr.na = 1;
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dnat_page_consumption(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1));
+ return IA64_NO_FAULT;
+ }
+ }else{
+ if(!vhpt_enabled(vcpu, vadr, NA_REF)){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ alt_dtlb(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dtlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ else{
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dvhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }
+ }
+}
+
+IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+{
+ thash_data_t *data;
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ PTA vpta;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){
+ *key=1;
+ return IA64_NO_FAULT;
+ }
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB);
+ if(!data||!data->p){
+ *key=1;
+ }else{
+ *key=data->key;
+ }
+ return IA64_NO_FAULT;
+}
+
diff --git a/xen/arch/ia64/vmx_entry.S b/xen/arch/ia64/vmx_entry.S
new file mode 100644
index 0000000000..682a69df8a
--- /dev/null
+++ b/xen/arch/ia64/vmx_entry.S
@@ -0,0 +1,611 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_entry.S:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ */
+
+#ifndef VCPU_TLB_SHIFT
+#define VCPU_TLB_SHIFT 22
+#endif
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#include "vmx_minstate.h"
+
+/*
+ * prev_task <- vmx_ia64_switch_to(struct task_struct *next)
+ * With Ingo's new scheduler, interrupts are disabled when this routine gets
+ * called. The code starting at .map relies on this. The rest of the code
+ * doesn't care about the interrupt masking status.
+ *
+ * Since we allocate domain stack in xenheap, there's no need to map new
+ * domain's stack since all xenheap is mapped by TR. Another different task
+ * for vmx_ia64_switch_to is to switch to bank0 and change current pointer.
+ */
+GLOBAL_ENTRY(vmx_ia64_switch_to)
+ .prologue
+ alloc r16=ar.pfs,1,0,0,0
+ DO_SAVE_SWITCH_STACK
+ .body
+
+ bsw.0 // Switch to bank0, because bank0 r21 is current pointer
+ ;;
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ movl r25=init_task
+ adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ ;;
+ /*
+ * TR always mapped this task's page, we can skip doing it again.
+ */
+ ld8 sp=[r26] // load kernel stack pointer of new task
+ mov r21=in0 // update "current" application register
+ mov r8=r13 // return pointer to previously running task
+ mov r13=in0 // set "current" pointer
+ ;;
+ bsw.1
+ ;;
+ DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+ sync.i // ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+ br.ret.sptk.many rp // boogie on out in new context
+END(vmx_ia64_switch_to)
+
+GLOBAL_ENTRY(ia64_leave_nested)
+ rsm psr.i
+ ;;
+ adds r21=PT(PR)+16,r12
+ ;;
+
+ lfetch [r21],PT(CR_IPSR)-PT(PR)
+ adds r2=PT(B6)+16,r12
+ adds r3=PT(R16)+16,r12
+ ;;
+ lfetch [r21]
+ ld8 r28=[r2],8 // load b6
+ adds r29=PT(R24)+16,r12
+
+ ld8.fill r16=[r3]
+ adds r3=PT(AR_CSD)-PT(R16),r3
+ adds r30=PT(AR_CCV)+16,r12
+ ;;
+ ld8.fill r24=[r29]
+ ld8 r15=[r30] // load ar.ccv
+ ;;
+ ld8 r29=[r2],16 // load b7
+ ld8 r30=[r3],16 // load ar.csd
+ ;;
+ ld8 r31=[r2],16 // load ar.ssd
+ ld8.fill r8=[r3],16
+ ;;
+ ld8.fill r9=[r2],16
+ ld8.fill r10=[r3],PT(R17)-PT(R10)
+ ;;
+ ld8.fill r11=[r2],PT(R18)-PT(R11)
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ ld8.fill r22=[r2],24
+ ld8.fill r23=[r3],24
+ mov b6=r28
+ ;;
+ ld8.fill r25=[r2],16
+ ld8.fill r26=[r3],16
+ mov b7=r29
+ ;;
+ ld8.fill r27=[r2],16
+ ld8.fill r28=[r3],16
+ ;;
+ ld8.fill r29=[r2],16
+ ld8.fill r30=[r3],24
+ ;;
+ ld8.fill r31=[r2],PT(F9)-PT(R31)
+ adds r3=PT(F10)-PT(F6),r3
+ ;;
+ ldf.fill f9=[r2],PT(F6)-PT(F9)
+ ldf.fill f10=[r3],PT(F8)-PT(F10)
+ ;;
+ ldf.fill f6=[r2],PT(F7)-PT(F6)
+ ;;
+ ldf.fill f7=[r2],PT(F11)-PT(F7)
+ ldf.fill f8=[r3],32
+ ;;
+ srlz.i // ensure interruption collection is off
+ mov ar.ccv=r15
+ ;;
+ bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
+ ;;
+ ldf.fill f11=[r2]
+// mov r18=r13
+// mov r21=r13
+ adds r16=PT(CR_IPSR)+16,r12
+ adds r17=PT(CR_IIP)+16,r12
+ ;;
+ ld8 r29=[r16],16 // load cr.ipsr
+ ld8 r28=[r17],16 // load cr.iip
+ ;;
+ ld8 r30=[r16],16 // load cr.ifs
+ ld8 r25=[r17],16 // load ar.unat
+ ;;
+ ld8 r26=[r16],16 // load ar.pfs
+ ld8 r27=[r17],16 // load ar.rsc
+ cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
+ ;;
+ ld8 r24=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+ ;;
+ ld8 r31=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+ ;;
+ ld8 r20=[r16],16 // ar.fpsr
+ ld8.fill r15=[r17],16
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r2=[r17]
+ ;;
+ ld8.fill r3=[r16]
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
+ ;;
+ mov b0=r22
+ mov ar.pfs=r26
+ mov cr.ifs=r30
+ mov cr.ipsr=r29
+ mov ar.fpsr=r20
+ mov cr.iip=r28
+ ;;
+ mov ar.rsc=r27
+ mov ar.unat=r25
+ mov pr=r31,-1
+ rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * work.need_resched etc. mustn't get changed by this CPU before it returns to
+ ;;
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+ rsm psr.i
+ ;;
+ alloc loc0=ar.pfs,0,1,1,0
+ adds out0=16,r12
+ ;;
+ br.call.sptk.many b0=vmx_deliver_pending_interrupt
+ mov ar.pfs=loc0
+ adds r8=IA64_VPD_BASE_OFFSET,r13
+ ;;
+ ld8 r8=[r8]
+ ;;
+ adds r9=VPD(VPSR),r8
+ ;;
+ ld8 r9=[r9]
+ ;;
+ tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT
+ ;;
+(pBN0) add r7=VPD(VBNAT),r8;
+(pBN1) add r7=VPD(VNAT),r8;
+ ;;
+ ld8 r7=[r7]
+ ;;
+ mov ar.unat=r7
+(pBN0) add r4=VPD(VBGR),r8;
+(pBN1) add r4=VPD(VGR),r8;
+(pBN0) add r5=VPD(VBGR)+0x8,r8;
+(pBN1) add r5=VPD(VGR)+0x8,r8;
+ ;;
+ ld8.fill r16=[r4],16
+ ld8.fill r17=[r5],16
+ ;;
+ ld8.fill r18=[r4],16
+ ld8.fill r19=[r5],16
+ ;;
+ ld8.fill r20=[r4],16
+ ld8.fill r21=[r5],16
+ ;;
+ ld8.fill r22=[r4],16
+ ld8.fill r23=[r5],16
+ ;;
+ ld8.fill r24=[r4],16
+ ld8.fill r25=[r5],16
+ ;;
+ ld8.fill r26=[r4],16
+ ld8.fill r27=[r5],16
+ ;;
+ ld8.fill r28=[r4],16
+ ld8.fill r29=[r5],16
+ ;;
+ ld8.fill r30=[r4],16
+ ld8.fill r31=[r5],16
+ ;;
+ bsw.0
+ ;;
+ mov r18=r8 //vpd
+ mov r19=r9 //vpsr
+ adds r20=PT(PR)+16,r12
+ ;;
+ lfetch [r20],PT(CR_IPSR)-PT(PR)
+ adds r16=PT(B6)+16,r12
+ adds r17=PT(B7)+16,r12
+ ;;
+ lfetch [r20]
+ mov r21=r13 // get current
+ ;;
+ ld8 r30=[r16],16 // load b6
+ ld8 r31=[r17],16 // load b7
+ add r20=PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r29=[r20] //load ar_unat
+ mov b6=r30
+ mov b7=r31
+ ld8 r30=[r16],16 //load ar_csd
+ ld8 r31=[r17],16 //load ar_ssd
+ ;;
+ mov ar.unat=r29
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ ld8.fill r8=[r16],16 //load r8
+ ld8.fill r9=[r17],16 //load r9
+ ;;
+ ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10
+ ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11
+ ;;
+ ld8.fill r1=[r16],16 //load r1
+ ld8.fill r12=[r17],16 //load r12
+ ;;
+ ld8.fill r13=[r16],16 //load r13
+ ld8 r30=[r17],16 //load ar_fpsr
+ ;;
+ ld8.fill r15=[r16],16 //load r15
+ ld8.fill r14=[r17],16 //load r14
+ mov ar.fpsr=r30
+ ;;
+ ld8.fill r2=[r16],16 //load r2
+ ld8.fill r3=[r17],16 //load r3
+ ;;
+/*
+(pEml) ld8.fill r4=[r16],16 //load r4
+(pEml) ld8.fill r5=[r17],16 //load r5
+ ;;
+(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
+(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
+ ;;
+(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16
+(pNonEml) adds r17=PT(F7)-PT(R5),r17
+ ;;
+*/
+ ld8.fill r4=[r16],16 //load r4
+ ld8.fill r5=[r17],16 //load r5
+ ;;
+ ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
+ ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
+ ;;
+
+ ld8 r30=[r16],PT(F6)-PT(AR_CCV)
+ rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
+ ;;
+ srlz.i // ensure interruption collection is off
+ ;;
+ invala // invalidate ALAT
+ ;;
+ ldf.fill f6=[r16],32
+ ldf.fill f7=[r17],32
+ ;;
+ ldf.fill f8=[r16],32
+ ldf.fill f9=[r17],32
+ ;;
+ ldf.fill f10=[r16]
+ ldf.fill f11=[r17]
+ ;;
+ mov ar.ccv=r30
+ adds r16=PT(CR_IPSR)-PT(F10),r16
+ adds r17=PT(CR_IIP)-PT(F11),r17
+ ;;
+ ld8 r31=[r16],16 // load cr.ipsr
+ ld8 r30=[r17],16 // load cr.iip
+ ;;
+ ld8 r29=[r16],16 // load cr.ifs
+ ld8 r28=[r17],16 // load ar.unat
+ ;;
+ ld8 r27=[r16],16 // load ar.pfs
+ ld8 r26=[r17],16 // load ar.rsc
+ ;;
+ ld8 r25=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r24=[r17],16// load ar.bspstore (may be garbage)
+ ;;
+ ld8 r23=[r16],16 // load predicates
+ ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0
+ ;;
+ ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
+ ;;
+//rbs_switch
+ // loadrs has already been shifted
+ alloc r16=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ mov ar.rsc=r20
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r24
+ ;;
+ ld8 r24=[r17] //load rfi_pfs
+ mov ar.unat=r28
+ mov ar.rnat=r25
+ mov ar.rsc=r26
+ ;;
+ mov cr.ipsr=r31
+ mov cr.iip=r30
+ mov cr.ifs=r29
+ cmp.ne p6,p0=r24,r0
+(p6)br.sptk vmx_dorfirfi
+ ;;
+vmx_dorfirfi_back:
+ mov ar.pfs=r27
+
+//vsa_sync_write_start
+ movl r20=__vsa_base
+ ;;
+ ld8 r20=[r20] // read entry point
+ mov r25=r18
+ ;;
+ add r16=PAL_VPS_SYNC_WRITE,r20
+ movl r24=switch_rr7 // calculate return address
+ ;;
+ mov b0=r16
+ br.cond.sptk b0 // call the service
+ ;;
+// switch rr7 and rr5
+switch_rr7:
+ adds r24=SWITCH_MRR5_OFFSET, r21
+ adds r26=SWITCH_MRR6_OFFSET, r21
+ adds r16=SWITCH_MRR7_OFFSET ,r21
+ movl r25=(5<<61)
+ movl r27=(6<<61)
+ movl r17=(7<<61)
+ ;;
+ ld8 r24=[r24]
+ ld8 r26=[r26]
+ ld8 r16=[r16]
+ ;;
+ mov rr[r25]=r24
+ mov rr[r27]=r26
+ mov rr[r17]=r16
+ ;;
+ srlz.i
+ ;;
+ add r24=SWITCH_MPTA_OFFSET, r21
+ ;;
+ ld8 r24=[r24]
+ ;;
+ mov cr.pta=r24
+ ;;
+ srlz.i
+ ;;
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ * must be at bank 0
+ * parameter:
+ * r18:vpd
+ * r19:vpsr
+ * r20:__vsa_base
+ * r22:b0
+ * r23:predicate
+ */
+ mov r24=r22
+ mov r25=r18
+ tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+ ;;
+ (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+ (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+ ;;
+ mov pr=r23,-2
+ mov b0=r29
+ ;;
+ br.cond.sptk b0 // call pal service
+END(ia64_leave_hypervisor)
+
+//r24 rfi_pfs
+//r17 address of rfi_pfs
+GLOBAL_ENTRY(vmx_dorfirfi)
+ mov r16=ar.ec
+ movl r20 = vmx_dorfirfi_back
+ ;;
+// clean rfi_pfs
+ st8 [r17]=r0
+ mov b0=r20
+// pfs.pec=ar.ec
+ dep r24 = r16, r24, 52, 6
+ ;;
+ mov ar.pfs=r24
+ ;;
+ br.ret.sptk b0
+ ;;
+END(vmx_dorfirfi)
+
+
+#define VMX_PURGE_RR7 0
+#define VMX_INSERT_RR7 1
+/*
+ * in0: old rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ */
+GLOBAL_ENTRY(vmx_purge_double_mapping)
+ alloc loc1 = ar.pfs,5,9,0,0
+ mov loc0 = rp
+ movl r8 = 1f
+ ;;
+ movl loc4 = KERNEL_TR_PAGE_SHIFT
+ movl loc5 = VCPU_TLB_SHIFT
+ mov loc6 = psr
+ movl loc7 = XEN_RR7_SWITCH_STUB
+ mov loc8 = (1<<VMX_PURGE_RR7)
+ ;;
+ srlz.i
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ mov b6 = loc7
+ mov rp = r8
+ ;;
+ br.sptk b6
+1:
+ mov ar.rsc = 3
+ mov rp = loc0
+ ;;
+ mov psr.l = loc6
+ ;;
+ srlz.i
+ ;;
+ br.ret.sptk rp
+END(vmx_purge_double_mapping)
+
+/*
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ */
+GLOBAL_ENTRY(vmx_insert_double_mapping)
+ alloc loc1 = ar.pfs,5,9,0,0
+ mov loc0 = rp
+ movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image
+ ;;
+ movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table
+ movl r8 = 1f
+ movl loc4 = KERNEL_TR_PAGE_SHIFT
+ ;;
+ movl loc5 = VCPU_TLB_SHIFT
+ mov loc6 = psr
+ movl loc7 = XEN_RR7_SWITCH_STUB
+ ;;
+ srlz.i
+ ;;
+ rsm psr.i | psr.ic
+ mov loc8 = (1<<VMX_INSERT_RR7)
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ mov b6 = loc7
+ mov rp = r8
+ ;;
+ br.sptk b6
+1:
+ mov ar.rsc = 3
+ mov rp = loc0
+ ;;
+ mov psr.l = loc6
+ ;;
+ srlz.i
+ ;;
+ br.ret.sptk rp
+END(vmx_insert_double_mapping)
+
+ .align PAGE_SIZE
+/*
+ * Stub to add double mapping for new domain, which shouldn't
+ * access any memory when active. Before reaching this point,
+ * both psr.i/ic is cleared and rse is set in lazy mode.
+ *
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ * loc2: TR number for xen image
+ * loc3: TR number for vhpt table
+ * loc4: page size for xen image
+ * loc5: page size of vhpt table
+ * loc7: free to use
+ * loc8: purge or insert
+ * r8: will contain old rid value
+ */
+GLOBAL_ENTRY(vmx_switch_rr7)
+ movl loc7 = (7<<61)
+ dep.z loc4 = loc4, 2, 6
+ dep.z loc5 = loc5, 2, 6
+ ;;
+ tbit.nz p6,p7=loc8, VMX_INSERT_RR7
+ mov r8 = rr[loc7]
+ ;;
+ mov rr[loc7] = in0
+(p6)mov cr.ifa = in1
+(p6)mov cr.itir = loc4
+ ;;
+ srlz.i
+ ;;
+(p6)itr.i itr[loc2] = in3
+(p7)ptr.i in1, loc4
+ ;;
+(p6)itr.d dtr[loc2] = in3
+(p7)ptr.d in1, loc4
+ ;;
+ srlz.i
+ ;;
+(p6)mov cr.ifa = in2
+(p6)mov cr.itir = loc5
+ ;;
+(p6)itr.d dtr[loc3] = in4
+(p7)ptr.d in2, loc5
+ ;;
+ srlz.i
+ ;;
+ mov rr[loc7] = r8
+ ;;
+ srlz.i
+ br.sptk rp
+END(vmx_switch_rr7)
+ .align PAGE_SIZE
diff --git a/xen/arch/ia64/vmx_init.c b/xen/arch/ia64/vmx_init.c
new file mode 100644
index 0000000000..f89659ba0c
--- /dev/null
+++ b/xen/arch/ia64/vmx_init.c
@@ -0,0 +1,297 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_init.c: initialization work for vt specific domain
+ * Copyright (c) 2005, Intel Corporation.
+ * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
+ * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ * Fred Yang <fred.yang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@intel.com>:
+ * Simplied design in first step:
+ * - One virtual environment
+ * - Domain is bound to one LP
+ * Later to support guest SMP:
+ * - Need interface to handle VP scheduled to different LP
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/pal.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/vmx_vcpu.h>
+#include <xen/lib.h>
+#include <asm/vmmu.h>
+#include <public/arch-ia64.h>
+#include <asm/vmx_phy_mode.h>
+#include <asm/vmx.h>
+#include <xen/mm.h>
+
+/* Global flag to identify whether Intel vmx feature is on */
+u32 vmx_enabled = 0;
+static u32 vm_order;
+static u64 buffer_size;
+static u64 vp_env_info;
+static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
+u64 __vsa_base = 0; /* Run-time service base of VMX */
+
+/* Check whether vt feature is enabled or not. */
+void
+identify_vmx_feature(void)
+{
+ pal_status_t ret;
+ u64 avail = 1, status = 1, control = 1;
+
+ vmx_enabled = 0;
+ /* Check VT-i feature */
+ ret = ia64_pal_proc_get_features(&avail, &status, &control);
+ if (ret != PAL_STATUS_SUCCESS) {
+ printk("Get proc features failed.\n");
+ goto no_vti;
+ }
+
+ /* FIXME: do we need to check status field, to see whether
+ * PSR.vm is actually enabled? If yes, aonther call to
+ * ia64_pal_proc_set_features may be reuqired then.
+ */
+ printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
+ avail, status, control, avail & PAL_PROC_VM_BIT);
+ if (!(avail & PAL_PROC_VM_BIT)) {
+ printk("No VT feature supported.\n");
+ goto no_vti;
+ }
+
+ ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
+ if (ret != PAL_STATUS_SUCCESS) {
+ printk("Get vp environment info failed.\n");
+ goto no_vti;
+ }
+
+ /* Does xen has ability to decode itself? */
+ if (!(vp_env_info & VP_OPCODE))
+ printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info);
+ vm_order = get_order(buffer_size);
+ printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order);
+
+ vmx_enabled = 1;
+no_vti:
+ return;
+}
+
+/*
+ * Init virtual environment on current LP
+ * vsa_base is the indicator whether it's first LP to be initialized
+ * for current domain.
+ */
+void
+vmx_init_env(void)
+{
+ u64 status, tmp_base;
+
+ if (!vm_buffer) {
+ vm_buffer = alloc_xenheap_pages(vm_order);
+ ASSERT(vm_buffer);
+ printk("vm_buffer: 0x%lx\n", vm_buffer);
+ }
+
+ status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+ __pa(vm_buffer),
+ vm_buffer,
+ &tmp_base);
+
+ if (status != PAL_STATUS_SUCCESS) {
+ printk("ia64_pal_vp_init_env failed.\n");
+ return -1;
+ }
+
+ if (!__vsa_base)
+ __vsa_base = tmp_base;
+ else
+ ASSERT(tmp_base != __vsa_base);
+
+ /* Init stub for rr7 switch */
+ vmx_init_double_mapping_stub();
+}
+
+typedef union {
+ u64 value;
+ struct {
+ u64 number : 8;
+ u64 revision : 8;
+ u64 model : 8;
+ u64 family : 8;
+ u64 archrev : 8;
+ u64 rv : 24;
+ };
+} cpuid3_t;
+
+/* Allocate vpd from xenheap */
+static vpd_t *alloc_vpd(void)
+{
+ int i;
+ cpuid3_t cpuid3;
+ vpd_t *vpd;
+
+ vpd = alloc_xenheap_pages(get_order(VPD_SIZE));
+ if (!vpd) {
+ printk("VPD allocation failed.\n");
+ return NULL;
+ }
+
+ printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t));
+ memset(vpd, 0, VPD_SIZE);
+ /* CPUID init */
+ for (i = 0; i < 5; i++)
+ vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+ /* Limit the CPUID number to 5 */
+ cpuid3.value = vpd->vcpuid[3];
+ cpuid3.number = 4; /* 5 - 1 */
+ vpd->vcpuid[3] = cpuid3.value;
+
+ vpd->vdc.d_vmsw = 1;
+ return vpd;
+}
+
+
+
+/*
+ * Create a VP on intialized VMX environment.
+ */
+static void
+vmx_create_vp(struct vcpu *v)
+{
+ u64 ret;
+ vpd_t *vpd = v->arch.arch_vmx.vpd;
+ u64 ivt_base;
+ extern char vmx_ia64_ivt;
+ /* ia64_ivt is function pointer, so need this tranlation */
+ ivt_base = (u64) &vmx_ia64_ivt;
+ printk("ivt_base: 0x%lx\n", ivt_base);
+ ret = ia64_pal_vp_create(vpd, ivt_base, 0);
+ if (ret != PAL_STATUS_SUCCESS)
+ panic("ia64_pal_vp_create failed. \n");
+}
+
+void vmx_init_double_mapping_stub(void)
+{
+ u64 base, psr;
+ extern void vmx_switch_rr7(void);
+
+ base = (u64) &vmx_switch_rr7;
+ base = *((u64*)base);
+
+ psr = ia64_clear_ic();
+ ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB,
+ pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)),
+ RR7_SWITCH_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n", (u64)(__pa(base)));
+}
+
+/* Other non-context related tasks can be done in context switch */
+void
+vmx_save_state(struct vcpu *v)
+{
+ u64 status, psr;
+ u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+
+ /* FIXME: about setting of pal_proc_vector... time consuming */
+ status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0);
+ if (status != PAL_STATUS_SUCCESS)
+ panic("Save vp status failed\n");
+
+ /* FIXME: Do we really need purge double mapping for old vcpu?
+ * Since rid is completely different between prev and next,
+ * it's not overlap and thus no MCA possible... */
+ dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+ vmx_purge_double_mapping(dom_rr7, KERNEL_START,
+ (u64)v->arch.vtlb->ts->vhpt->hash);
+
+}
+
+/* Even guest is in physical mode, we still need such double mapping */
+void
+vmx_load_state(struct vcpu *v)
+{
+ u64 status, psr;
+ u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+ u64 pte_xen, pte_vhpt;
+
+ status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0);
+ if (status != PAL_STATUS_SUCCESS)
+ panic("Restore vp status failed\n");
+
+ dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+ pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+ pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >> PAGE_SHIFT), PAGE_KERNEL));
+ vmx_insert_double_mapping(dom_rr7, KERNEL_START,
+ (u64)v->arch.vtlb->ts->vhpt->hash,
+ pte_xen, pte_vhpt);
+
+ /* Guest vTLB is not required to be switched explicitly, since
+ * anchored in vcpu */
+}
+
+/* Purge old double mapping and insert new one, due to rr7 change */
+void
+vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7)
+{
+ u64 pte_xen, pte_vhpt, vhpt_base;
+
+ vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash;
+ vmx_purge_double_mapping(oldrr7, KERNEL_START,
+ vhpt_base);
+
+ pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+ pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT), PAGE_KERNEL));
+ vmx_insert_double_mapping(newrr7, KERNEL_START,
+ vhpt_base,
+ pte_xen, pte_vhpt);
+}
+
+/*
+ * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
+ * is registered here.
+ */
+void
+vmx_final_setup_domain(struct domain *d)
+{
+ struct vcpu *v = d->vcpu[0];
+ vpd_t *vpd;
+
+ /* Allocate resources for vcpu 0 */
+ //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct));
+
+ vpd = alloc_vpd();
+ ASSERT(vpd);
+
+ v->arch.arch_vmx.vpd = vpd;
+ vpd->virt_env_vaddr = vm_buffer;
+
+ /* v->arch.schedule_tail = arch_vmx_do_launch; */
+ vmx_create_vp(v);
+
+ /* Set this ed to be vmx */
+ v->arch.arch_vmx.flags = 1;
+
+ /* Other vmx specific initialization work */
+}
+
diff --git a/xen/arch/ia64/vmx_interrupt.c b/xen/arch/ia64/vmx_interrupt.c
new file mode 100644
index 0000000000..056d8c9700
--- /dev/null
+++ b/xen/arch/ia64/vmx_interrupt.c
@@ -0,0 +1,388 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_interrupt.c: handle inject interruption.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx_pal_vsa.h>
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+void
+collect_interruption(VCPU *vcpu)
+{
+ u64 ipsr;
+ u64 vdcr;
+ u64 vifs;
+ IA64_PSR vpsr;
+ REGS * regs = vcpu_regs(vcpu);
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+ if(vpsr.ic){
+ extern void vmx_dorfirfi(void);
+ if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
+ panic("COLLECT interruption for vmx_dorfirfi\n");
+
+ /* Sync mpsr id/da/dd/ss/ed bits to vipsr
+ * since after guest do rfi, we still want these bits on in
+ * mpsr
+ */
+
+ ipsr = regs->cr_ipsr;
+ vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+ | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED));
+ vmx_vcpu_set_ipsr(vcpu, vpsr.val);
+
+ /* Currently, for trap, we do not advance IIP to next
+ * instruction. That's because we assume caller already
+ * set up IIP correctly
+ */
+
+ vmx_vcpu_set_iip(vcpu , regs->cr_iip);
+
+ /* set vifs.v to zero */
+ vifs = VPD_CR(vcpu,ifs);
+ vifs &= ~IA64_IFS_V;
+ vmx_vcpu_set_ifs(vcpu, vifs);
+
+ vmx_vcpu_set_iipa(vcpu, regs->cr_iipa);
+ }
+
+ vdcr = VPD_CR(vcpu,dcr);
+
+ /* Set guest psr
+ * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+ * be: set to the value of dcr.be
+ * pp: set to the value of dcr.pp
+ */
+ vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+ vpsr.val |= ( vdcr & IA64_DCR_BE);
+
+ /* VDCR pp bit position is different from VPSR pp bit */
+ if ( vdcr & IA64_DCR_PP ) {
+ vpsr.val |= IA64_PSR_PP;
+ } else {
+ vpsr.val &= ~IA64_PSR_PP;;
+ }
+
+ vmx_vcpu_set_psr(vcpu, vpsr.val);
+
+}
+int
+inject_guest_interruption(VCPU *vcpu, u64 vec)
+{
+ u64 viva;
+ REGS *regs;
+ regs=vcpu_regs(vcpu);
+
+ collect_interruption(vcpu);
+
+ vmx_vcpu_get_iva(vcpu,&viva);
+ regs->cr_iip = viva + vec;
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ * set_ifa: if true, set vIFA
+ * set_itir: if true, set vITIR
+ * set_iha: if true, set vIHA
+ */
+void
+set_ifa_itir_iha (VCPU *vcpu, u64 vadr,
+ int set_ifa, int set_itir, int set_iha)
+{
+ IA64_PSR vpsr;
+ u64 value;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ /* Vol2, Table 8-1 */
+ if ( vpsr.ic ) {
+ if ( set_ifa){
+ vmx_vcpu_set_ifa(vcpu, vadr);
+ }
+ if ( set_itir) {
+ value = vmx_vcpu_get_itir_on_fault(vcpu, vadr);
+ vmx_vcpu_set_itir(vcpu, value);
+ }
+
+ if ( set_iha) {
+ vmx_vcpu_thash(vcpu, vadr, &value);
+ vmx_vcpu_set_iha(vcpu, value);
+ }
+ }
+
+
+}
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dtlb_fault (VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ * @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+itlb_fault (VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ * @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+nested_dtlb (VCPU *vcpu)
+{
+ inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ * @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_dtlb (VCPU *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_itlb (VCPU *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ * VHPT Translation Vector
+ */
+static void
+_vhpt_fault(VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA*/
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+ivhpt_fault (VCPU *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dvhpt_fault (VCPU *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ * General Exception vector
+ */
+void
+_general_exception (VCPU *vcpu)
+{
+ inject_guest_interruption(vcpu,IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_op (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_dep (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rsv_reg_field (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void
+privilege_op (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+unimpl_daddr (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+privilege_reg (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/* Deal with
+ * Nat consumption vector
+ * Parameter:
+ * vaddr: Optional, if t == REGISTER
+ */
+static void
+_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t)
+{
+ /* If vPSR.ic && t == DATA/INST, IFA */
+ if ( t == DATA || t == INSTRUCTION ) {
+ /* IFA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 0, 0);
+ }
+
+ inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * IR Data Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+static void
+ir_nat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+inat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rnat_consumption (VCPU *vcpu)
+{
+ _nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dnat_page_consumption (VCPU *vcpu, uint64_t vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ * Page not present vector
+ */
+void
+page_not_present(VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
diff --git a/xen/arch/ia64/vmx_ivt.S b/xen/arch/ia64/vmx_ivt.S
new file mode 100644
index 0000000000..9647386a8c
--- /dev/null
+++ b/xen/arch/ia64/vmx_ivt.S
@@ -0,0 +1,978 @@
+/*
+ * arch/ia64/kernel/vmx_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Kenneth Chen <kenneth.w.chen@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ *
+ * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/vhpt.h>
+
+
+#if 0
+ /*
+ * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't
+ * needed for something else before enabling this...
+ */
+# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16
+#else
+# define VMX_DBG_FAULT(i)
+#endif
+
+#include "vmx_minstate.h"
+
+
+
+#define VMX_FAULT(n) \
+vmx_fault_##n:; \
+ br.sptk vmx_fault_##n; \
+ ;; \
+
+
+#define VMX_REFLECT(n) \
+ mov r31=pr; \
+ mov r19=n; /* prepare to save predicates */ \
+ mov r29=cr.ipsr; \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p7) br.sptk.many vmx_dispatch_reflection; \
+ VMX_FAULT(n); \
+
+
+GLOBAL_ENTRY(vmx_panic)
+ br.sptk.many vmx_panic
+ ;;
+END(vmx_panic)
+
+
+
+
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global vmx_ia64_ivt
+vmx_ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vmx_vhpt_miss)
+ VMX_FAULT(0)
+END(vmx_vhpt_miss)
+
+ .org vmx_ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(vmx_itlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6) br.sptk vmx_fault_1
+ mov r16 = cr.ifa
+ ;;
+ thash r17 = r16
+ ttag r20 = r16
+ ;;
+vmx_itlb_loop:
+ cmp.eq p6,p0 = r0, r17
+(p6) br vmx_itlb_out
+ ;;
+ adds r22 = VLE_TITAG_OFFSET, r17
+ adds r23 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r24 = [r22]
+ ld8 r25 = [r23]
+ ;;
+ lfetch [r25]
+ cmp.eq p6,p7 = r20, r24
+ ;;
+(p7) mov r17 = r25;
+(p7) br.sptk vmx_itlb_loop
+ ;;
+ adds r23 = VLE_PGFLAGS_OFFSET, r17
+ adds r24 = VLE_ITIR_OFFSET, r17
+ ;;
+ ld8 r26 = [r23]
+ ld8 r25 = [r24]
+ ;;
+ mov cr.itir = r25
+ ;;
+ itc.i r26
+ ;;
+ srlz.i
+ ;;
+ mov r23=r31
+ mov r22=b0
+ adds r16=IA64_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VPD(VPSR),r18
+ movl r20=__vsa_base
+ ;;
+ ld8 r19=[r19]
+ ld8 r20=[r20]
+ ;;
+ br.sptk ia64_vmm_entry
+ ;;
+vmx_itlb_out:
+ mov r19 = 1
+ br.sptk vmx_dispatch_tlb_miss
+ VMX_FAULT(1);
+END(vmx_itlb_miss)
+
+ .org vmx_ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(vmx_dtlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk vmx_fault_1
+ mov r16 = cr.ifa
+ ;;
+ thash r17 = r16
+ ttag r20 = r16
+ ;;
+vmx_dtlb_loop:
+ cmp.eq p6,p0 = r0, r17
+(p6)br vmx_dtlb_out
+ ;;
+ adds r22 = VLE_TITAG_OFFSET, r17
+ adds r23 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r24 = [r22]
+ ld8 r25 = [r23]
+ ;;
+ lfetch [r25]
+ cmp.eq p6,p7 = r20, r24
+ ;;
+(p7)mov r17 = r25;
+(p7)br.sptk vmx_dtlb_loop
+ ;;
+ adds r23 = VLE_PGFLAGS_OFFSET, r17
+ adds r24 = VLE_ITIR_OFFSET, r17
+ ;;
+ ld8 r26 = [r23]
+ ld8 r25 = [r24]
+ ;;
+ mov cr.itir = r25
+ ;;
+ itc.d r26
+ ;;
+ srlz.d;
+ ;;
+ mov r23=r31
+ mov r22=b0
+ adds r16=IA64_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VPD(VPSR),r18
+ movl r20=__vsa_base
+ ;;
+ ld8 r19=[r19]
+ ld8 r20=[r20]
+ ;;
+ br.sptk ia64_vmm_entry
+ ;;
+vmx_dtlb_out:
+ mov r19 = 2
+ br.sptk vmx_dispatch_tlb_miss
+ VMX_FAULT(2);
+END(vmx_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(vmx_alt_itlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_3
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r24=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+ VMX_FAULT(3);
+END(vmx_alt_itlb_miss)
+
+
+ .org vmx_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(vmx_alt_dtlb_miss)
+ mov r31=pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_4
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r24=cr.ipsr
+ ;;
+ and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
+ tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+ dep r24=-1,r24,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p6) mov cr.ipsr=r24
+ ;;
+(p7) itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+ VMX_FAULT(4);
+END(vmx_alt_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(vmx_nested_dtlb_miss)
+ VMX_FAULT(5)
+END(vmx_nested_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(vmx_ikey_miss)
+ VMX_REFLECT(6)
+END(vmx_ikey_miss)
+
+ .org vmx_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(vmx_dkey_miss)
+ VMX_REFLECT(7)
+END(vmx_dkey_miss)
+
+ .org vmx_ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(vmx_dirty_bit)
+ VMX_REFLECT(8)
+END(vmx_idirty_bit)
+
+ .org vmx_ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(vmx_iaccess_bit)
+ VMX_REFLECT(9)
+END(vmx_iaccess_bit)
+
+ .org vmx_ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(vmx_daccess_bit)
+ VMX_REFLECT(10)
+END(vmx_daccess_bit)
+
+ .org vmx_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(vmx_break_fault)
+ mov r31=pr
+ mov r19=11
+ br.sptk.many vmx_dispatch_break_fault
+END(vmx_break_fault)
+
+ .org vmx_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(vmx_interrupt)
+ mov r31=pr // prepare to save predicates
+ mov r19=12
+ mov r29=cr.ipsr
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+(p7) br.sptk vmx_dispatch_interrupt
+ ;;
+ mov r27=ar.rsc /* M */
+ mov r20=r1 /* A */
+ mov r25=ar.unat /* M */
+ mov r26=ar.pfs /* I */
+ mov r28=cr.iip /* M */
+ cover /* B (or nothing) */
+ ;;
+ mov r1=sp
+ ;;
+ invala /* M */
+ mov r30=cr.ifs
+ ;;
+ addl r1=-IA64_PT_REGS_SIZE,r1
+ ;;
+ adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
+ adds r16=PT(CR_IPSR),r1
+ ;;
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+ st8 [r16]=r29 /* save cr.ipsr */
+ ;;
+ lfetch.fault.excl.nt1 [r17]
+ mov r29=b0
+ ;;
+ adds r16=PT(R8),r1 /* initialize first base pointer */
+ adds r17=PT(R9),r1 /* initialize second base pointer */
+ mov r18=r0 /* make sure r18 isn't NaT */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+ ;;
+ st8 [r16]=r28,16 /* save cr.iip */
+ st8 [r17]=r30,16 /* save cr.ifs */
+ mov r8=ar.fpsr /* M */
+ mov r9=ar.csd
+ mov r10=ar.ssd
+ movl r11=FPSR_DEFAULT /* L-unit */
+ ;;
+ st8 [r16]=r25,16 /* save ar.unat */
+ st8 [r17]=r26,16 /* save ar.pfs */
+ shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
+ ;;
+ st8 [r16]=r27,16 /* save ar.rsc */
+ adds r17=16,r17 /* skip over ar_rnat field */
+ ;; /* avoid RAW on r16 & r17 */
+ st8 [r17]=r31,16 /* save predicates */
+ adds r16=16,r16 /* skip over ar_bspstore field */
+ ;;
+ st8 [r16]=r29,16 /* save b0 */
+ st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+ adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of scratch) */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+ mov r13=r21 /* establish `current' */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+ dep r14=-1,r0,60,4
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+ adds r2=IA64_PT_REGS_R16_OFFSET,r1
+ ;;
+ mov r8=ar.ccv
+ movl r1=__gp /* establish kernel global pointer */
+ ;; \
+ bsw.1
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ ssm psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+ mov r18=b6
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+ mov r19=b7
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+ ;;
+ mov ar.fpsr=r11 /* M-unit */
+ st8 [r2]=r8,8 /* ar.ccv */
+ adds r24=PT(B6)-PT(F7),r3
+ ;;
+ stf.spill [r2]=f6,32
+ stf.spill [r3]=f7,32
+ ;;
+ stf.spill [r2]=f8,32
+ stf.spill [r3]=f9,32
+ ;;
+ stf.spill [r2]=f10
+ stf.spill [r3]=f11
+ adds r25=PT(B7)-PT(F11),r3
+ ;;
+ st8 [r24]=r18,16 /* b6 */
+ st8 [r25]=r19,16 /* b7 */
+ ;;
+ st8 [r24]=r9 /* ar.csd */
+ st8 [r25]=r10 /* ar.ssd */
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_nested
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_irq
+ ;;
+END(vmx_interrupt)
+
+ .org vmx_ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(13)
+ VMX_FAULT(13)
+
+
+ .org vmx_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(14)
+ VMX_FAULT(14)
+
+
+ .org vmx_ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(15)
+ VMX_FAULT(15)
+
+
+ .org vmx_ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(16)
+ VMX_FAULT(16)
+
+ .org vmx_ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(17)
+ VMX_FAULT(17)
+
+ .org vmx_ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(18)
+ VMX_FAULT(18)
+
+ .org vmx_ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(19)
+ VMX_FAULT(19)
+
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(vmx_iaccess_rights)
+ VMX_REFLECT(22)
+END(vmx_iaccess_rights)
+
+ .org vmx_ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(vmx_daccess_rights)
+ VMX_REFLECT(23)
+END(vmx_daccess_rights)
+
+ .org vmx_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(vmx_general_exception)
+ VMX_FAULT(24)
+// VMX_REFLECT(24)
+END(vmx_general_exception)
+
+ .org vmx_ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(vmx_disabled_fp_reg)
+ VMX_REFLECT(25)
+END(vmx_disabled_fp_reg)
+
+ .org vmx_ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(vmx_nat_consumption)
+ VMX_REFLECT(26)
+END(vmx_nat_consumption)
+
+ .org vmx_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(vmx_speculation_vector)
+ VMX_REFLECT(27)
+END(vmx_speculation_vector)
+
+ .org vmx_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(28)
+ VMX_FAULT(28)
+
+ .org vmx_ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(vmx_debug_vector)
+ VMX_DBG_FAULT(29)
+ VMX_FAULT(29)
+END(vmx_debug_vector)
+
+ .org vmx_ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(vmx_unaligned_access)
+ VMX_REFLECT(30)
+END(vmx_unaligned_access)
+
+ .org vmx_ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(vmx_unsupported_data_reference)
+ VMX_REFLECT(31)
+END(vmx_unsupported_data_reference)
+
+ .org vmx_ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(vmx_floating_point_fault)
+ VMX_REFLECT(32)
+END(vmx_floating_point_fault)
+
+ .org vmx_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(vmx_floating_point_trap)
+ VMX_REFLECT(33)
+END(vmx_floating_point_trap)
+
+ .org vmx_ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(vmx_lower_privilege_trap)
+ VMX_REFLECT(34)
+END(vmx_lower_privilege_trap)
+
+ .org vmx_ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(vmx_taken_branch_trap)
+ VMX_REFLECT(35)
+END(vmx_taken_branch_trap)
+
+ .org vmx_ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(vmx_single_step_trap)
+ VMX_REFLECT(36)
+END(vmx_single_step_trap)
+
+ .org vmx_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(vmx_virtualization_fault)
+ VMX_DBG_FAULT(37)
+ mov r31=pr
+ mov r19=37
+ br.sptk vmx_dispatch_virtualization_fault
+END(vmx_virtualization_fault)
+
+ .org vmx_ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(38)
+ VMX_FAULT(38)
+
+ .org vmx_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(39)
+ VMX_FAULT(39)
+
+ .org vmx_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(40)
+ VMX_FAULT(40)
+
+ .org vmx_ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(41)
+ VMX_FAULT(41)
+
+ .org vmx_ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(42)
+ VMX_FAULT(42)
+
+ .org vmx_ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(43)
+ VMX_FAULT(43)
+
+ .org vmx_ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(44)
+ VMX_FAULT(44)
+
+ .org vmx_ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(vmx_ia32_exception)
+ VMX_DBG_FAULT(45)
+ VMX_FAULT(45)
+END(vmx_ia32_exception)
+
+ .org vmx_ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(vmx_ia32_intercept)
+ VMX_DBG_FAULT(46)
+ VMX_FAULT(46)
+END(vmx_ia32_intercept)
+
+ .org vmx_ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ENTRY(vmx_ia32_interrupt)
+ VMX_DBG_FAULT(47)
+ VMX_FAULT(47)
+END(vmx_ia32_interrupt)
+
+ .org vmx_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(48)
+ VMX_FAULT(48)
+
+ .org vmx_ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(49)
+ VMX_FAULT(49)
+
+ .org vmx_ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(50)
+ VMX_FAULT(50)
+
+ .org vmx_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(51)
+ VMX_FAULT(51)
+
+ .org vmx_ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(52)
+ VMX_FAULT(52)
+
+ .org vmx_ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(53)
+ VMX_FAULT(53)
+
+ .org vmx_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(54)
+ VMX_FAULT(54)
+
+ .org vmx_ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(55)
+ VMX_FAULT(55)
+
+ .org vmx_ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(56)
+ VMX_FAULT(56)
+
+ .org vmx_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(57)
+ VMX_FAULT(57)
+
+ .org vmx_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(58)
+ VMX_FAULT(58)
+
+ .org vmx_ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(59)
+ VMX_FAULT(59)
+
+ .org vmx_ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(60)
+ VMX_FAULT(60)
+
+ .org vmx_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(61)
+ VMX_FAULT(61)
+
+ .org vmx_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(62)
+ VMX_FAULT(62)
+
+ .org vmx_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(63)
+ VMX_FAULT(63)
+
+ .org vmx_ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(64)
+ VMX_FAULT(64)
+
+ .org vmx_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(65)
+ VMX_FAULT(65)
+
+ .org vmx_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(66)
+ VMX_FAULT(66)
+
+ .org vmx_ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(67)
+ VMX_FAULT(67)
+
+ .org vmx_ia64_ivt+0x8000
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+
+ENTRY(vmx_dispatch_reflection)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ VMX_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,4,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out2=cr.iim
+ mov out3=r15
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_reflect_interruption
+END(vmx_dispatch_reflection)
+
+ENTRY(vmx_dispatch_virtualization_fault)
+ cmp.eq pEml,pNonEml=r0,r0 /* force pEml =1, save r4 ~ r7 */
+ ;;
+ VMX_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn group!)
+ mov out0=r13 //vcpu
+ mov out1=r4 //cause
+ mov out2=r5 //opcode
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_emulate
+END(vmx_dispatch_virtualization_fault)
+
+
+
+ENTRY(vmx_dispatch_tlb_miss)
+ VMX_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=r13
+ mov out1=r15
+ mov out2=cr.ifa
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_hpw_miss
+END(vmx_dispatch_tlb_miss)
+
+
+ENTRY(vmx_dispatch_break_fault)
+ cmp.ne pEml,pNonEml=r0,r0 /* force pNonEml =1, don't save r4 ~ r7 */
+ ;;
+ VMX_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_break
+END(vmx_dispatch_break_fault)
+
+
+ENTRY(vmx_dispatch_interrupt)
+ cmp.ne pEml,pNonEml=r0,r0 /* force pNonEml =1, don't save r4 ~ r7 */
+ ;;
+ VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ ssm psr.i
+ adds r3=16,r2 // set up second base pointer for SAVE_REST
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_irq
+END(vmx_dispatch_interrupt)
diff --git a/xen/arch/ia64/vmx_minstate.h b/xen/arch/ia64/vmx_minstate.h
new file mode 100644
index 0000000000..afee6516d9
--- /dev/null
+++ b/xen/arch/ia64/vmx_minstate.h
@@ -0,0 +1,329 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_minstate.h:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/vmx_vpd.h>
+#include <asm/cache.h>
+#include "entry.h"
+
+#define VMX_MINSTATE_START_SAVE_MIN \
+ mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+ ;; \
+ mov.m r28=ar.rnat; \
+ addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r22]; \
+ addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+ mov r23=ar.bspstore; /* save ar.bspstore */ \
+ ;; \
+ mov ar.bspstore=r22; /* switch to kernel RBS */ \
+ ;; \
+ mov r18=ar.bsp; \
+ mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+
+
+
+#define VMX_MINSTATE_END_SAVE_MIN \
+ bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
+ ;;
+
+
+#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
+ /* begin to call pal vps sync_read and cleanup psr.pl */ \
+ add r25=IA64_VPD_BASE_OFFSET, r21; \
+ movl r20=__vsa_base; \
+ ;; \
+ ld8 r25=[r25]; /* read vpd base */ \
+ ld8 r20=[r20]; /* read entry point */ \
+ ;; \
+ mov r6=r25; \
+ add r20=PAL_VPS_SYNC_READ,r20; \
+ ;; \
+{ .mii; \
+ add r22=VPD(VPSR),r25; \
+ mov r24=ip; \
+ mov b0=r20; \
+ ;; \
+}; \
+{ .mmb; \
+ add r24 = 0x20, r24; \
+ mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \
+ br.cond.sptk b0; /* call the service */ \
+ ;; \
+}; \
+ ld8 r7=[r22]; \
+ /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \
+ extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \
+ ;; \
+ extr.u r30=r16, IA64_PSR_BE_BIT, 5; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \
+ ;; \
+ extr.u r30=r16, IA64_PSR_RI_BIT, 2; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \
+ ;; \
+ st8 [r22]=r7; \
+ ;;
+
+
+
+#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current pointer */
+//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG
+#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21
+
+/*
+ * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * r2 = points to &pt_regs.r16
+ * r8 = contents of ar.ccv
+ * r9 = contents of ar.csd
+ * r10 = contents of ar.ssd
+ * r11 = FPSR_DEFAULT
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+/* switch rr7 */ \
+ movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \
+ movl r17=(7<<61); \
+ movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \
+ movl r22=(6<<61); \
+ movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1); \
+ movl r23=(5<<61); \
+ ;; \
+ mov rr[r17]=r16; \
+ mov rr[r22]=r20; \
+ mov rr[r23]=r18; \
+ ;; \
+ srlz.i; \
+ ;; \
+ VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
+ mov r27=ar.rsc; /* M */ \
+ mov r20=r1; /* A */ \
+ mov r26=ar.unat; /* M */ \
+ mov r29=cr.ipsr; /* M */ \
+ COVER; /* B;; (or nothing) */ \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p6) br.sptk.few vmx_panic; \
+ mov r1=r16; \
+/* mov r21=r16; */ \
+ /* switch from user to kernel RBS: */ \
+ ;; \
+ invala; /* M */ \
+ SAVE_IFS; \
+ ;; \
+ VMX_MINSTATE_START_SAVE_MIN \
+ adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
+ adds r16=PT(CR_IPSR),r1; \
+ ;; \
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
+ st8 [r16]=r29; /* save cr.ipsr */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r17]; \
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
+ mov r29=b0 \
+ ;; \
+ adds r16=PT(R8),r1; /* initialize first base pointer */ \
+ adds r17=PT(R9),r1; /* initialize second base pointer */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r8,16; \
+.mem.offset 8,0; st8.spill [r17]=r9,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r10,24; \
+.mem.offset 8,0; st8.spill [r17]=r11,24; \
+ ;; \
+ mov r8=ar.pfs; /* I */ \
+ mov r9=cr.iip; /* M */ \
+ mov r10=ar.fpsr; /* M */ \
+ ;; \
+ st8 [r16]=r9,16; /* save cr.iip */ \
+ st8 [r17]=r30,16; /* save cr.ifs */ \
+ sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
+ ;; \
+ st8 [r16]=r26,16; /* save ar.unat */ \
+ st8 [r17]=r8,16; /* save ar.pfs */ \
+ shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
+ ;; \
+ st8 [r16]=r27,16; /* save ar.rsc */ \
+ st8 [r17]=r28,16; /* save ar.rnat */ \
+ ;; /* avoid RAW on r16 & r17 */ \
+ st8 [r16]=r23,16; /* save ar.bspstore */ \
+ st8 [r17]=r31,16; /* save predicates */ \
+ ;; \
+ st8 [r16]=r29,16; /* save b0 */ \
+ st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
+.mem.offset 8,0; st8.spill [r17]=r12,16; \
+ adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r13,16; \
+.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \
+ mov r13=r21; /* establish `current' */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r15,16; \
+.mem.offset 8,0; st8.spill [r17]=r14,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r2,16; \
+.mem.offset 8,0; st8.spill [r17]=r3,16; \
+ adds r2=PT(F6),r1; \
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r4,16; \
+ .mem.offset 8,0; st8.spill [r17]=r5,16; \
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r6,16; \
+ .mem.offset 8,0; st8.spill [r17]=r7,16; \
+ mov r20=ar.ccv; \
+ ;; \
+ mov r18=cr.iipa; \
+ mov r4=cr.isr; \
+ mov r22=ar.unat; \
+ ;; \
+ st8 [r16]=r18,16; \
+ st8 [r17]=r4; \
+ ;; \
+ adds r16=PT(EML_UNAT),r1; \
+ adds r17=PT(AR_CCV),r1; \
+ ;; \
+ st8 [r16]=r22,8; \
+ st8 [r17]=r20; \
+ mov r4=r24; \
+ mov r5=r25; \
+ ;; \
+ st8 [r16]=r0; \
+ EXTRA; \
+ mov r9=ar.csd; \
+ mov r10=ar.ssd; \
+ movl r11=FPSR_DEFAULT; /* L-unit */ \
+ movl r1=__gp; /* establish kernel global pointer */ \
+ ;; \
+ PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
+ VMX_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * r2: points to &pt_regs.f6
+ * r3: points to &pt_regs.f7
+ * r4,r5,scrach
+ * r6: points to vpd
+ * r7: vpsr
+ * r9: contents of ar.csd
+ * r10: contents of ar.ssd
+ * r11: FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define VMX_SAVE_REST \
+ tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \
+ ;; \
+(pBN0) add r4=VPD(VBGR),r6; \
+(pBN0) add r5=VPD(VBGR)+0x8,r6; \
+(pBN0) add r7=VPD(VBNAT),r6; \
+ ;; \
+(pBN1) add r5=VPD(VGR)+0x8,r6; \
+(pBN1) add r4=VPD(VGR),r6; \
+(pBN1) add r7=VPD(VNAT),r6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r16,16; \
+.mem.offset 8,0; st8.spill [r5]=r17,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r18,16; \
+.mem.offset 8,0; st8.spill [r5]=r19,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r20,16; \
+.mem.offset 8,0; st8.spill [r5]=r21,16; \
+ mov r18=b6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r22,16; \
+.mem.offset 8,0; st8.spill [r5]=r23,16; \
+ mov r19=b7; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r24,16; \
+.mem.offset 8,0; st8.spill [r5]=r25,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r26,16; \
+.mem.offset 8,0; st8.spill [r5]=r27,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r28,16; \
+.mem.offset 8,0; st8.spill [r5]=r29,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r30,16; \
+.mem.offset 8,0; st8.spill [r5]=r31,16; \
+ ;; \
+ mov r30=ar.unat; \
+ ;; \
+ st8 [r7]=r30; \
+ mov ar.fpsr=r11; /* M-unit */ \
+ ;; \
+ stf.spill [r2]=f6,32; \
+ stf.spill [r3]=f7,32; \
+ ;; \
+ stf.spill [r2]=f8,32; \
+ stf.spill [r3]=f9,32; \
+ ;; \
+ stf.spill [r2]=f10; \
+ stf.spill [r3]=f11; \
+ ;; \
+ adds r2=PT(B6)-PT(F10),r2; \
+ adds r3=PT(B7)-PT(F11),r3; \
+ ;; \
+ st8 [r2]=r18,16; /* b6 */ \
+ st8 [r3]=r19,16; /* b7 */ \
+ ;; \
+ st8 [r2]=r9; /* ar.csd */ \
+ st8 [r3]=r10; /* ar.ssd */ \
+ ;;
+
+#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
+#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, )
diff --git a/xen/arch/ia64/vmx_phy_mode.c b/xen/arch/ia64/vmx_phy_mode.c
new file mode 100644
index 0000000000..def87baf9e
--- /dev/null
+++ b/xen/arch/ia64/vmx_phy_mode.c
@@ -0,0 +1,393 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_phy_mode.c: emulating domain physical mode.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Arun Sharma (arun.sharma@intel.com)
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ */
+
+
+#include <asm/processor.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_phy_mode.h>
+#include <xen/sched.h>
+#include <asm/pgtable.h>
+
+
+int valid_mm_mode[8] = {
+ GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */
+ INV_MODE,
+ INV_MODE,
+ GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */
+ INV_MODE,
+ GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */
+ INV_MODE,
+ GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/
+};
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ * mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+ /* 2004/09/12(Kevin): Allow switch to self */
+ /*
+ * (it,dt,rt): (0,0,0) -> (1,1,1)
+ * This kind of transition usually occurs in the very early
+ * stage of Linux boot up procedure. Another case is in efi
+ * and pal calls. (see "arch/ia64/kernel/head.S")
+ *
+ * (it,dt,rt): (0,0,0) -> (0,1,1)
+ * This kind of transition is found when OSYa exits efi boot
+ * service. Due to gva = gpa in this case (Same region),
+ * data access can be satisfied though itlb entry for physical
+ * emulation is hit.
+ */
+ SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /*
+ * (it,dt,rt): (0,1,1) -> (1,1,1)
+ * This kind of transition is found in OSYa.
+ *
+ * (it,dt,rt): (0,1,1) -> (0,0,0)
+ * This kind of transition is found in OSYa
+ */
+ SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V,
+ /* (1,0,0)->(1,1,1) */
+ 0, 0, 0, 0, 0, 0, 0, SW_P2V,
+ /*
+ * (it,dt,rt): (1,0,1) -> (1,1,1)
+ * This kind of transition usually occurs when Linux returns
+ * from the low level TLB miss handlers.
+ * (see "arch/ia64/kernel/ivt.S")
+ */
+ 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /*
+ * (it,dt,rt): (1,1,1) -> (1,0,1)
+ * This kind of transition usually occurs in Linux low level
+ * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+ *
+ * (it,dt,rt): (1,1,1) -> (0,0,0)
+ * This kind of transition usually occurs in pal and efi calls,
+ * which requires running in physical mode.
+ * (see "arch/ia64/kernel/head.S")
+ * (1,1,1)->(1,0,0)
+ */
+
+ SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF,
+};
+
+void
+physical_mode_init(VCPU *vcpu)
+{
+ UINT64 psr;
+ struct domain * d = vcpu->domain;
+
+ vcpu->domain->arch.emul_phy_rr0.rid = XEN_RR7_RID+((d->domain_id)<<3);
+ /* FIXME */
+#if 0
+ vcpu->domain->arch.emul_phy_rr0.ps = 28; /* set page size to 256M */
+#endif
+ vcpu->domain->arch.emul_phy_rr0.ps = EMUL_PHY_PAGE_SHIFT; /* set page size to 4k */
+ vcpu->domain->arch.emul_phy_rr0.ve = 1; /* enable VHPT walker on this region */
+
+ vcpu->domain->arch.emul_phy_rr4.rid = XEN_RR7_RID + ((d->domain_id)<<3) + 4;
+ vcpu->domain->arch.emul_phy_rr4.ps = EMUL_PHY_PAGE_SHIFT; /* set page size to 4k */
+ vcpu->domain->arch.emul_phy_rr4.ve = 1; /* enable VHPT walker on this region */
+
+ vcpu->arch.old_rsc = 0;
+ vcpu->arch.mode_flags = GUEST_IN_PHY;
+
+ psr = ia64_clear_ic();
+
+ ia64_set_rr((VRN0<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr0.rrval);
+ ia64_srlz_d();
+ ia64_set_rr((VRN4<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr4.rrval);
+ ia64_srlz_d();
+#if 0
+ /* FIXME: temp workaround to support guest physical mode */
+ia64_itr(0x1, IA64_TEMP_PHYSICAL, dom0_start,
+ pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)),
+ 28);
+ia64_itr(0x2, IA64_TEMP_PHYSICAL, dom0_start,
+ pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)),
+ 28);
+ia64_srlz_i();
+#endif
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
+void
+physical_itlb_miss(VCPU *vcpu, u64 vadr)
+{
+ u64 psr;
+ IA64_PSR vpsr;
+ u64 mppn,gppn;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ gppn=(vadr<<1)>>13;
+ mppn = get_mfn(DOMID_SELF,gppn,1);
+ mppn=(mppn<<12)|(vpsr.cpl<<7)|PHY_PAGE_WB;
+
+ psr=ia64_clear_ic();
+ ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+void
+physical_dtlb_miss(VCPU *vcpu, u64 vadr)
+{
+ u64 psr;
+ IA64_PSR vpsr;
+ u64 mppn,gppn;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ gppn=(vadr<<1)>>13;
+ mppn = get_mfn(DOMID_SELF,gppn,1);
+ mppn=(mppn<<12)|(vpsr.cpl<<7);
+ if(vadr>>63)
+ mppn |= PHY_PAGE_UC;
+ else
+ mppn |= PHY_PAGE_WB;
+
+ psr=ia64_clear_ic();
+ ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+void
+vmx_init_all_rr(VCPU *vcpu)
+{
+ VMX(vcpu,vrr[VRN0]) = 0x38;
+ VMX(vcpu,vrr[VRN1]) = 0x38;
+ VMX(vcpu,vrr[VRN2]) = 0x38;
+ VMX(vcpu,vrr[VRN3]) = 0x38;
+ VMX(vcpu,vrr[VRN4]) = 0x38;
+ VMX(vcpu,vrr[VRN5]) = 0x38;
+ VMX(vcpu,vrr[VRN6]) = 0x60;
+ VMX(vcpu,vrr[VRN7]) = 0x60;
+
+ VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38);
+ VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60);
+ VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60);
+}
+
+void
+vmx_load_all_rr(VCPU *vcpu)
+{
+ unsigned long psr;
+
+ psr = ia64_clear_ic();
+
+ /* WARNING: not allow co-exist of both virtual mode and physical
+ * mode in same region
+ */
+ if (is_physical_mode(vcpu)) {
+ ia64_set_rr((VRN0 << VRN_SHIFT),
+ vcpu->domain->arch.emul_phy_rr0.rrval);
+ ia64_set_rr((VRN4 << VRN_SHIFT),
+ vcpu->domain->arch.emul_phy_rr4.rrval);
+ } else {
+ ia64_set_rr((VRN0 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
+ ia64_set_rr((VRN4 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4])));
+ }
+
+#if 1
+ /* rr567 will be postponed to last point when resuming back to guest */
+ ia64_set_rr((VRN1 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1])));
+ ia64_set_rr((VRN2 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2])));
+ ia64_set_rr((VRN3 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3])));
+#endif
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+}
+
+void
+switch_to_physical_rid(VCPU *vcpu)
+{
+ UINT64 psr;
+
+ /* Save original virtual mode rr[0] and rr[4] */
+
+ psr=ia64_clear_ic();
+ ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr0.rrval);
+ ia64_srlz_d();
+ ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr4.rrval);
+ ia64_srlz_d();
+
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+
+void
+switch_to_virtual_rid(VCPU *vcpu)
+{
+ UINT64 psr;
+ ia64_rr mrr;
+
+ psr=ia64_clear_ic();
+
+ mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT);
+ mrr.rid = VRID_2_MRID(vcpu,mrr.rid);
+//VRID_2_MRID(vcpu,mrr.rid);
+ mrr.ve = 1;
+ ia64_set_rr(VRN0<<VRN_SHIFT, mrr.rrval );
+ ia64_srlz_d();
+ mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT);
+ mrr.rid = VRID_2_MRID(vcpu,mrr.rid);
+ mrr.ve = 1;
+ ia64_set_rr(VRN4<<VRN_SHIFT, mrr.rrval );
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr)
+{
+ return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void
+switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+ int act;
+ REGS * regs=vcpu_regs(vcpu);
+ act = mm_switch_action(old_psr, new_psr);
+ switch (act) {
+ case SW_V2P:
+ vcpu->arch.old_rsc = regs->ar_rsc;
+ switch_to_physical_rid(vcpu);
+ /*
+ * Set rse to enforced lazy, to prevent active rse save/restor when
+ * guest physical mode.
+ */
+ regs->ar_rsc &= ~(IA64_RSC_MODE);
+ vcpu->arch.mode_flags |= GUEST_IN_PHY;
+ break;
+ case SW_P2V:
+ switch_to_virtual_rid(vcpu);
+ /*
+ * recover old mode which is saved when entering
+ * guest physical mode
+ */
+ regs->ar_rsc = vcpu->arch.old_rsc;
+ vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+ break;
+ case SW_SELF:
+ printf("Switch to self-0x%lx!!! MM mode doesn't change...\n",
+ old_psr.val);
+ break;
+ case SW_NOP:
+ printf("No action required for mode transition: (0x%lx -> 0x%lx)\n",
+ old_psr.val, new_psr.val);
+ break;
+ default:
+ /* Sanity check */
+ printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
+ panic("Unexpected virtual <--> physical mode transition");
+ break;
+ }
+ return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+
+ if ( (old_psr.dt != new_psr.dt ) ||
+ (old_psr.it != new_psr.it ) ||
+ (old_psr.rt != new_psr.rt )
+ ) {
+ switch_mm_mode (vcpu, old_psr, new_psr);
+ }
+
+ return 0;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+prepare_if_physical_mode(VCPU *vcpu)
+{
+ if (is_physical_mode(vcpu))
+ switch_to_virtual_rid(vcpu);
+ return;
+}
+
+/* Recover always follows prepare */
+void
+recover_if_physical_mode(VCPU *vcpu)
+{
+ if (is_physical_mode(vcpu))
+ switch_to_physical_rid(vcpu);
+ return;
+}
+
diff --git a/xen/arch/ia64/vmx_process.c b/xen/arch/ia64/vmx_process.c
new file mode 100644
index 0000000000..2c541af113
--- /dev/null
+++ b/xen/arch/ia64/vmx_process.c
@@ -0,0 +1,345 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_process.c: handling VMX architecture-related VM exits
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h> /* FOR struct ia64_sal_retval */
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/regionreg.h>
+#include <asm/privop.h>
+#include <asm/ia64_int.h>
+#include <asm/hpsim_ssc.h>
+#include <asm/dom_fw.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/kregs.h>
+#include <asm/vmx_mm_def.h>
+/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+
+
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+extern void rnat_consumption (VCPU *vcpu);
+
+IA64FAULT
+vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim)
+{
+ static int first_time = 1;
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
+ extern unsigned long running_on_sim;
+ unsigned long i, sal_param[8];
+
+#if 0
+ if (first_time) {
+ if (platform_is_hp_ski()) running_on_sim = 1;
+ else running_on_sim = 0;
+ first_time = 0;
+ }
+ if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
+ if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
+ else do_ssc(vcpu_get_gr(current,36), regs);
+ }
+#endif
+ if (iim == d->arch.breakimm) {
+ struct ia64_sal_retval x;
+ switch (regs->r2) {
+ case FW_HYPERCALL_PAL_CALL:
+ //printf("*** PAL hypercall: index=%d\n",regs->r28);
+ //FIXME: This should call a C routine
+ x = pal_emulator_static(VMX_VPD(v, vgr[12]));
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+ if (regs->r8)
+ printk("Failed vpal emulation, with index:0x%lx\n",
+ VMX_VPD(v, vgr[12]));
+#endif
+ break;
+ case FW_HYPERCALL_SAL_CALL:
+ for (i = 0; i < 8; i++)
+ vmx_vcpu_get_gr(v, 32+i, &sal_param[i]);
+ x = sal_emulator(sal_param[0], sal_param[1],
+ sal_param[2], sal_param[3],
+ sal_param[4], sal_param[5],
+ sal_param[6], sal_param[7]);
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+ if (regs->r8)
+ printk("Failed vsal emulation, with index:0x%lx\n",
+ sal_param[0]);
+#endif
+ break;
+ case FW_HYPERCALL_EFI_RESET_SYSTEM:
+ printf("efi.reset_system called ");
+ if (current->domain == dom0) {
+ printf("(by dom0)\n ");
+ (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+ }
+ printf("(not supported for non-0 domain)\n");
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ case FW_HYPERCALL_EFI_GET_TIME:
+ {
+ unsigned long *tv, *tc;
+ fooefi();
+ vmx_vcpu_get_gr(v, 32, &tv);
+ vmx_vcpu_get_gr(v, 33, &tc);
+ printf("efi_get_time(%p,%p) called...",tv,tc);
+ tv = __va(translate_domain_mpaddr(tv));
+ if (tc) tc = __va(translate_domain_mpaddr(tc));
+ regs->r8 = (*efi.get_time)(tv,tc);
+ printf("and returns %lx\n",regs->r8);
+ }
+ break;
+ case FW_HYPERCALL_EFI_SET_TIME:
+ case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+ case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+ // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+ // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
+ // POINTER ARGUMENTS WILL BE VIRTUAL!!
+ case FW_HYPERCALL_EFI_GET_VARIABLE:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+ case FW_HYPERCALL_EFI_SET_VARIABLE:
+ case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+ // FIXME: need fixes in efi.h from 2.6.9
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ }
+#if 0
+ if (regs->r8)
+ printk("Failed vgfw emulation, with index:0x%lx\n",
+ regs->r2);
+#endif
+ vmx_vcpu_increment_iip(current);
+ } else
+ vmx_reflect_interruption(ifa,isr,iim,11);
+}
+
+static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800,
+ 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000,
+ 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600,
+ 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000,
+ 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00,
+ 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400,
+ 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00,
+ 0x7f00,
+};
+
+
+
+void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim,
+ UINT64 vector)
+{
+ VCPU *vcpu = current;
+ REGS *regs=vcpu_regs(vcpu);
+ UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu);
+ if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){
+ panic("Guest nested fault!");
+ }
+ VPD_CR(vcpu,isr)=isr;
+ VPD_CR(vcpu,iipa) = regs->cr_iip;
+ vector=vec2off[vector];
+ if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+ VPD_CR(vcpu,iim) = iim;
+ else {
+ set_ifa_itir_iha(vcpu,ifa,1,1,1);
+ }
+ inject_guest_interruption(vcpu, vector);
+}
+
+// ONLY gets called from ia64_leave_kernel
+// ONLY call with interrupts disabled?? (else might miss one?)
+// NEVER successful if already reflecting a trap/fault because psr.i==0
+void vmx_deliver_pending_interrupt(struct pt_regs *regs)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
+ // FIXME: Will this work properly if doing an RFI???
+ if (!is_idle_task(d) ) { // always comes from guest
+ //vcpu_poke_timer(v);
+ //if (vcpu_deliverable_interrupts(v)) {
+ // unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
+ // foodpi();
+ // reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
+ //}
+ extern void vmx_dorfirfi(void);
+ struct pt_regs *user_regs = vcpu_regs(current);
+
+ if (user_regs != regs)
+ printk("WARNING: checking pending interrupt in nested interrupt!!!\n");
+ if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
+ return;
+ vmx_check_pending_irq(v);
+ }
+}
+
+extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
+
+/* We came here because the H/W VHPT walker failed to find an entry */
+void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr)
+{
+ IA64_PSR vpsr;
+ CACHE_LINE_TYPE type;
+ u64 vhpt_adr;
+ ISR misr;
+ ia64_rr vrr;
+ REGS *regs;
+ thash_cb_t *vtlb, *vhpt;
+ thash_data_t *data, me;
+ vtlb=vmx_vcpu_get_vtlb(vcpu);
+#ifdef VTLB_DEBUG
+ check_vtlb_sanity(vtlb);
+ dump_vtlb(vtlb);
+#endif
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ regs = vcpu_regs(vcpu);
+ misr.val=regs->cr_isr;
+/* TODO
+ if(vcpu->domain->id && vec == 2 &&
+ vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){
+ emulate_ins(&v);
+ return;
+ }
+*/
+
+ if((vec==1)&&(!vpsr.it)){
+ physical_itlb_miss(vcpu, vadr);
+ return;
+ }
+ if((vec==2)&&(!vpsr.dt)){
+ physical_dtlb_miss(vcpu, vadr);
+ return;
+ }
+ vrr = vmx_vcpu_rr(vcpu,vadr);
+ if(vec == 1) type = ISIDE_TLB;
+ else if(vec == 2) type = DSIDE_TLB;
+ else panic("wrong vec\n");
+
+// prepare_if_physical_mode(vcpu);
+
+ if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){
+ if ( data->ps != vrr.ps ) {
+ machine_tlb_insert(vcpu, data);
+ }
+ else {
+ thash_insert(vtlb->ts->vhpt,data,vadr);
+ }
+ }else if(type == DSIDE_TLB){
+ if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ alt_dtlb(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ } else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ dtlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }else{
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ dvhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }
+ }
+ }else if(type == ISIDE_TLB){
+ if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ alt_itlb(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ itlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ ivhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ }
+ }
+}
+
+
diff --git a/xen/arch/ia64/vmx_utility.c b/xen/arch/ia64/vmx_utility.c
new file mode 100644
index 0000000000..05239d5b3e
--- /dev/null
+++ b/xen/arch/ia64/vmx_utility.c
@@ -0,0 +1,659 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_utility.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/vmx_mm_def.h>
+
+
+/*
+ * Return:
+ * 0: Not reserved indirect registers
+ * 1: Is reserved indirect registers
+ */
+int
+is_reserved_indirect_register (
+ int type,
+ int index )
+{
+ switch (type) {
+ case IA64_CPUID:
+ if ( index >= 5 ) {
+ return 1;
+ }
+
+ case IA64_DBR:
+ case IA64_IBR:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PMC:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PMD:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PKR:
+ //bugbugbug:check with pal about the max pkr!!!!
+ break;
+
+ case IA64_RR:
+ //bugbugbug:check with pal about the max rr!!!!
+ break;
+
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return 0;
+
+}
+
+/*
+ * Return:
+ * Set all ignored fields in value to 0 and return
+ */
+u64
+indirect_reg_igfld_MASK (
+ int type,
+ int index,
+ u64 value
+ )
+{
+ u64 nvalue;
+
+ nvalue = value;
+ switch ( type ) {
+ case IA64_CPUID:
+ if ( index == 2 ) {
+ nvalue = 0;
+ }
+ break;
+
+ case IA64_DBR:
+ case IA64_IBR:
+ /* Refer to SDM Vol2 Table 7-1,7-2 */
+ if ( index % 2 != 0) {
+ /* Ignore field: {61:60} */
+ nvalue = value & (~MASK (60, 2));
+ }
+ break;
+ case IA64_PMC:
+ if ( index == 0 ) {
+ /* Ignore field: 3:1 */
+ nvalue = value & (~MASK (1, 3));
+ }
+ break;
+ case IA64_PMD:
+ if ( index >= 4 ) {
+ /* Ignore field: 7:7 */
+ /* bugbug: this code is correct for generic
+ * PMD. However, for implementation specific
+ * PMD, it's WRONG. need more info to judge
+ * what's implementation specific PMD.
+ */
+ nvalue = value & (~MASK (7, 1));
+ }
+ break;
+ case IA64_PKR:
+ case IA64_RR:
+ break;
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return nvalue;
+}
+
+/*
+ * Return:
+ * Set all ignored fields in value to 0 and return
+ */
+u64
+cr_igfld_mask (int index, u64 value)
+{
+ u64 nvalue;
+
+ nvalue = value;
+
+ switch ( index ) {
+ case IA64_REG_CR_IVA:
+ /* Ignore filed: 14:0 */
+ nvalue = value & (~MASK (0, 15));
+ break;
+
+ case IA64_REG_CR_IHA:
+ /* Ignore filed: 1:0 */
+ nvalue = value & (~MASK (0, 2));
+ break;
+
+ case IA64_REG_CR_LID:
+ /* Ignore filed: 63:32 */
+ nvalue = value & (~MASK (32, 32));
+ break;
+
+ case IA64_REG_CR_TPR:
+ /* Ignore filed: 63:17,3:0 */
+ nvalue = value & (~MASK (17, 47));
+ nvalue = nvalue & (~MASK (0, 4));
+ break;
+
+ case IA64_REG_CR_EOI:
+ /* Ignore filed: 63:0 */
+ nvalue = 0;
+ break;
+
+ case IA64_REG_CR_ITV:
+ case IA64_REG_CR_PMV:
+ case IA64_REG_CR_CMCV:
+ case IA64_REG_CR_LRR0:
+ case IA64_REG_CR_LRR1:
+ /* Ignore filed: 63:17,12:12 */
+ nvalue = value & (~MASK (17, 47));
+ nvalue = nvalue & (~MASK (12, 1));
+ break;
+ }
+
+ return nvalue;
+}
+
+
+/*
+ * Return:
+ * 1: PSR reserved fields are not zero
+ * 0: PSR reserved fields are all zero
+ */
+int
+check_psr_rsv_fields (u64 value)
+{
+ /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46
+ * These reserved fields shall all be zero
+ * Otherwise we will panic
+ */
+
+ if ( value & MASK (0, 1) ||
+ value & MASK (6, 7) ||
+ value & MASK (16, 1) ||
+ value & MASK (28, 4) ||
+ value & MASK (46, 18)
+ ) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * Return:
+ * 1: CR reserved fields are not zero
+ * 0: CR reserved fields are all zero
+ */
+int
+check_cr_rsv_fields (int index, u64 value)
+{
+ switch (index) {
+ case IA64_REG_CR_DCR:
+ if ( (value & MASK ( 3, 5 )) ||
+ (value & MASK (15, 49))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITM:
+ case IA64_REG_CR_IVA:
+ case IA64_REG_CR_IIP:
+ case IA64_REG_CR_IFA:
+ case IA64_REG_CR_IIPA:
+ case IA64_REG_CR_IIM:
+ case IA64_REG_CR_IHA:
+ case IA64_REG_CR_EOI:
+ return 0;
+
+ case IA64_REG_CR_PTA:
+ if ( (value & MASK ( 1, 1 )) ||
+ (value & MASK (9, 6))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IPSR:
+ return check_psr_rsv_fields (value);
+
+
+ case IA64_REG_CR_ISR:
+ if ( (value & MASK ( 24, 8 )) ||
+ (value & MASK (44, 20))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITIR:
+ if ( (value & MASK ( 0, 2 )) ||
+ (value & MASK (32, 32))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IFS:
+ if ( (value & MASK ( 38, 25 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_LID:
+ if ( (value & MASK ( 0, 16 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IVR:
+ if ( (value & MASK ( 8, 56 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_TPR:
+ if ( (value & MASK ( 8, 8 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IRR0:
+ if ( (value & MASK ( 1, 1 )) ||
+ (value & MASK (3, 13))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITV:
+ case IA64_REG_CR_PMV:
+ case IA64_REG_CR_CMCV:
+ if ( (value & MASK ( 8, 4 )) ||
+ (value & MASK (13, 3))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_LRR0:
+ case IA64_REG_CR_LRR1:
+ if ( (value & MASK ( 11, 1 )) ||
+ (value & MASK (14, 1))) {
+ return 1;
+ }
+ return 0;
+ }
+
+
+ panic ("Unsupported CR");
+}
+
+
+
+/*
+ * Return:
+ * 0: Indirect Reg reserved fields are not zero
+ * 1: Indirect Reg reserved fields are all zero
+ */
+int
+check_indirect_reg_rsv_fields ( int type, int index, u64 value )
+{
+
+ switch ( type ) {
+ case IA64_CPUID:
+ if ( index == 3 ) {
+ if ( value & MASK (40, 24 )) {
+ return 0;
+ }
+ } else if ( index == 4 ) {
+ if ( value & MASK (2, 62 )) {
+ return 0;
+ }
+ }
+ break;
+
+ case IA64_DBR:
+ case IA64_IBR:
+ case IA64_PMC:
+ case IA64_PMD:
+ break;
+
+ case IA64_PKR:
+ if ( value & MASK (4, 4) ||
+ value & MASK (32, 32 )) {
+ return 0;
+ }
+ break;
+
+ case IA64_RR:
+ if ( value & MASK (1, 1) ||
+ value & MASK (32, 32 )) {
+ return 0;
+ }
+ break;
+
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return 1;
+}
+
+
+
+
+/* Return
+ * Same format as isr_t
+ * Only ei/ni bits are valid, all other bits are zero
+ */
+u64
+set_isr_ei_ni (VCPU *vcpu)
+{
+
+ IA64_PSR vpsr,ipsr;
+ ISR visr;
+ REGS *regs;
+
+ regs=vcpu_regs(vcpu);
+
+ visr.val = 0;
+
+ vpsr.val = vmx_vcpu_get_psr (vcpu);
+
+ if (!vpsr.ic == 1 ) {
+ /* Set ISR.ni */
+ visr.ni = 1;
+ }
+ ipsr.val = regs->cr_ipsr;
+
+ visr.ei = ipsr.ri;
+ return visr.val;
+}
+
+
+/* Set up ISR.na/code{3:0}/r/w for no-access instructions
+ * Refer to SDM Vol Table 5-1
+ * Parameter:
+ * setr: if 1, indicates this function will set up ISR.r
+ * setw: if 1, indicates this function will set up ISR.w
+ * Return:
+ * Same format as ISR. All fields are zero, except na/code{3:0}/r/w
+ */
+u64
+set_isr_for_na_inst(VCPU *vcpu, int op)
+{
+ ISR visr;
+ visr.val = 0;
+ switch (op) {
+ case IA64_INST_TPA:
+ visr.na = 1;
+ visr.code = 0;
+ break;
+ case IA64_INST_TAK:
+ visr.na = 1;
+ visr.code = 3;
+ break;
+ }
+ return visr.val;
+}
+
+
+
+/*
+ * Set up ISR for registe Nat consumption fault
+ * Parameters:
+ * read: if 1, indicates this is a read access;
+ * write: if 1, indicates this is a write access;
+ */
+void
+set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write)
+{
+ ISR visr;
+ u64 value;
+ /* Need set up ISR: code, ei, ni, na, r/w */
+ visr.val = 0;
+
+ /* ISR.code{7:4} =1,
+ * Set up ISR.code{3:0}, ISR.na
+ */
+ visr.code = (1 << 4);
+ if (inst) {
+
+ value = set_isr_for_na_inst (vcpu,inst);
+ visr.val = visr.val | value;
+ }
+
+ /* Set up ISR.r/w */
+ visr.r = read;
+ visr.w = write;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu,visr.val);
+}
+
+
+
+/*
+ * Set up ISR for break fault
+ */
+void set_break_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: ei, ni */
+
+ visr.val = 0;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr(vcpu, visr.val);
+}
+
+
+
+
+
+
+/*
+ * Set up ISR for Priviledged Operation fault
+ */
+void set_privileged_operation_isr (VCPU *vcpu,int inst)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni, na */
+
+ visr.val = 0;
+
+ /* Set up na, code{3:0} for no-access instruction */
+ value = set_isr_for_na_inst (vcpu, inst);
+ visr.val = visr.val | value;
+
+
+ /* ISR.code{7:4} =1 */
+ visr.code = (1 << 4) | visr.code;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+/*
+ * Set up ISR for Priviledged Register fault
+ */
+void set_privileged_reg_isr (VCPU *vcpu, int inst)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni */
+
+ visr.val = 0;
+
+ /* ISR.code{7:4} =2 */
+ visr.code = 2 << 4;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+
+/*
+ * Set up ISR for Reserved Register/Field fault
+ */
+void set_rsv_reg_field_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni */
+
+ visr.val = 0;
+
+ /* ISR.code{7:4} =4 */
+ visr.code = (3 << 4) | visr.code;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+/*
+ * Set up ISR for Illegal Operation fault
+ */
+void set_illegal_op_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: ei, ni */
+
+ visr.val = 0;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access)
+{
+ ISR isr;
+
+ isr.val = 0;
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag;
+ isr.na = non_access;
+ isr.r = 1;
+ isr.w = 0;
+ vmx_vcpu_set_isr(vcpu, isr.val);
+ return;
+}
+
+void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access)
+{
+ u64 value;
+ ISR isr;
+
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_PRIV_OP_FAULT;
+ isr.na = non_access;
+ vmx_vcpu_set_isr(vcpu, isr.val);
+
+ return;
+}
+
+
+IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index)
+{
+ u64 sof;
+ REGS *regs;
+ regs=vcpu_regs(vcpu);
+ sof = regs->cr_ifs & 0x7f;
+ if(reg_index >= sof + 32)
+ return IA64_FAULT;
+ return IA64_NO_FAULT;;
+}
+
+
+int is_reserved_rr_register(VCPU* vcpu, int reg_index)
+{
+ return (reg_index >= 8);
+}
+
+#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32))
+int is_reserved_itir_field(VCPU* vcpu, u64 itir)
+{
+ if ( itir & ITIR_RSV_MASK ) {
+ return 1;
+ }
+ return 0;
+}
+
+int is_reserved_rr_field(VCPU* vcpu, u64 reg_value)
+{
+ ia64_rr rr;
+ rr.rrval = reg_value;
+
+ if(rr.reserved0 != 0 || rr.reserved1 != 0){
+ return 1;
+ }
+ if(rr.ps < 12 || rr.ps > 28){
+ // page too big or small.
+ return 1;
+ }
+ if(rr.ps > 15 && rr.ps % 2 != 0){
+ // unsupported page size.
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/xen/arch/ia64/vmx_vcpu.c b/xen/arch/ia64/vmx_vcpu.c
new file mode 100644
index 0000000000..05c211d428
--- /dev/null
+++ b/xen/arch/ia64/vmx_vcpu.c
@@ -0,0 +1,436 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Fred yang (fred.yang@intel.com)
+ * Arun Sharma (arun.sharma@intel.com)
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+
+//u64 fire_itc;
+//u64 fire_itc2;
+//u64 fire_itm;
+//u64 fire_itm2;
+/*
+ * Copyright (c) 2005 Intel Corporation.
+ * Anthony Xu (anthony.xu@intel.com)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/**************************************************************************
+ VCPU general register access routines
+**************************************************************************/
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+//unsigned long last_guest_rsm = 0x0;
+struct guest_psr_bundle{
+ unsigned long ip;
+ unsigned long psr;
+};
+
+struct guest_psr_bundle guest_psr_buf[100];
+unsigned long guest_psr_index = 0;
+
+void
+vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value)
+{
+
+ UINT64 mask;
+ REGS *regs;
+ IA64_PSR old_psr, new_psr;
+ old_psr.val=vmx_vcpu_get_psr(vcpu);
+
+ regs=vcpu_regs(vcpu);
+ /* We only support guest as:
+ * vpsr.pk = 0
+ * vpsr.is = 0
+ * Otherwise panic
+ */
+ if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
+ panic ("Setting unsupport guest psr!");
+ }
+
+ /*
+ * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+ * Since these bits will become 0, after success execution of each
+ * instruction, we will change set them to mIA64_PSR
+ */
+ VMX_VPD(vcpu,vpsr) = value &
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ new_psr.val=vmx_vcpu_get_psr(vcpu);
+ {
+ struct xen_regs *regs = vcpu_regs(vcpu);
+ guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
+ guest_psr_buf[guest_psr_index].psr = new_psr.val;
+ if (++guest_psr_index >= 100)
+ guest_psr_index = 0;
+ }
+#if 0
+ if (old_psr.i != new_psr.i) {
+ if (old_psr.i)
+ last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
+ else
+ last_guest_rsm = 0;
+ }
+#endif
+
+ /*
+ * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+ * , except for the following bits:
+ * ic/i/dt/si/rt/mc/it/bn/vm
+ */
+ mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+ IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+ IA64_PSR_VM;
+
+ regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) );
+
+ check_mm_mode_switch(vcpu, old_psr, new_psr);
+ return IA64_NO_FAULT;
+}
+
+/* Adjust slot both in xen_regs and vpd, upon vpsr.ri which
+ * should have sync with ipsr in entry.
+ *
+ * Clear some bits due to successfully emulation.
+ */
+IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu)
+{
+ // TODO: trap_bounce?? Eddie
+ REGS *regs = vcpu_regs(vcpu);
+ IA64_PSR vpsr;
+ IA64_PSR *ipsr = (IA64_PSR *)&regs->cr_ipsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if (vpsr.ri == 2) {
+ vpsr.ri = 0;
+ regs->cr_iip += 16;
+ } else {
+ vpsr.ri++;
+ }
+
+ ipsr->ri = vpsr.ri;
+ vpsr.val &=
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ VMX_VPD(vcpu, vpsr) = vpsr.val;
+
+ ipsr->val &=
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ return (IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ IA64_PSR vpsr;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+ if(!vpsr.ic)
+ VPD_CR(vcpu,ifs) = regs->cr_ifs;
+ regs->cr_ifs = IA64_IFS_V;
+ return (IA64_NO_FAULT);
+}
+
+
+thash_cb_t *
+vmx_vcpu_get_vtlb(VCPU *vcpu)
+{
+ return vcpu->arch.vtlb;
+}
+
+
+struct virutal_platform_def *
+vmx_vcpu_get_plat(VCPU *vcpu)
+{
+ return &(vcpu->arch.arch_vmx.vmx_platform);
+}
+
+
+ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr)
+{
+ return (ia64_rr)VMX(vcpu,vrr[vadr>>61]);
+}
+
+
+IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ extern void set_one_rr(UINT64, UINT64);
+ ia64_rr oldrr,newrr;
+ thash_cb_t *hcb;
+ oldrr=vmx_vcpu_rr(vcpu,reg);
+ newrr.rrval=val;
+#if 1
+ if(oldrr.ps!=newrr.ps){
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ thash_purge_all(hcb);
+ }
+#endif
+ VMX(vcpu,vrr[reg>>61]) = val;
+ switch((u64)(reg>>61)) {
+ case VRN5:
+ VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val);
+ break;
+ case VRN6:
+ VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val);
+ break;
+ case VRN7:
+ VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val);
+ /* Change double mapping for this domain */
+ vmx_change_double_mapping(vcpu,
+ vmx_vrrtomrr(vcpu,oldrr.rrval),
+ vmx_vrrtomrr(vcpu,newrr.rrval));
+ break;
+ default:
+ ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val));
+ break;
+ }
+
+ return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+ VCPU protection key register access routines
+**************************************************************************/
+
+IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ UINT64 val = (UINT64)ia64_get_pkr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ ia64_set_pkr(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+#if 0
+int tlb_debug=0;
+check_entry(u64 va, u64 ps, char *str)
+{
+ va &= ~ (PSIZE(ps)-1);
+ if ( va == 0x2000000002908000UL ||
+ va == 0x600000000000C000UL ) {
+ stop();
+ }
+ if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps);
+}
+#endif
+
+
+u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa)
+{
+ ia64_rr rr,rr1;
+ rr=vmx_vcpu_rr(vcpu,ifa);
+ rr1.rrval=0;
+ rr1.ps=rr.ps;
+ rr1.rid=rr.rid;
+ return (rr1.rrval);
+}
+
+
+
+
+IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ UINT64 ifs, psr;
+ REGS *regs = vcpu_regs(vcpu);
+ psr = VPD_CR(vcpu,ipsr);
+ vmx_vcpu_set_psr(vcpu,psr);
+ ifs=VPD_CR(vcpu,ifs);
+ if((ifs>>63)&&(ifs<<1)){
+ ifs=(regs->cr_ifs)&0x7f;
+ regs->rfi_pfs = (ifs<<7)|ifs;
+ regs->cr_ifs = VPD_CR(vcpu,ifs);
+ }
+ regs->cr_iip = VPD_CR(vcpu,iip);
+ return (IA64_NO_FAULT);
+}
+
+
+UINT64
+vmx_vcpu_get_psr(VCPU *vcpu)
+{
+ return VMX_VPD(vcpu,vpsr);
+}
+
+
+IA64FAULT
+vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val)
+{
+ IA64_PSR vpsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.bn ) {
+ *val=VMX_VPD(vcpu,vgr[reg-16]);
+ // Check NAT bit
+ if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) {
+ // TODO
+ //panic ("NAT consumption fault\n");
+ return IA64_FAULT;
+ }
+
+ }
+ else {
+ *val=VMX_VPD(vcpu,vbgr[reg-16]);
+ if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) {
+ //panic ("NAT consumption fault\n");
+ return IA64_FAULT;
+ }
+
+ }
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT
+vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat)
+{
+ IA64_PSR vpsr;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.bn ) {
+ VMX_VPD(vcpu,vgr[reg-16]) = val;
+ if(nat){
+ VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) );
+ }else{
+ VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) );
+ }
+ }
+ else {
+ VMX_VPD(vcpu,vbgr[reg-16]) = val;
+ if(nat){
+ VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) );
+ }else{
+ VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) );
+ }
+ }
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT
+vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val)
+{
+ REGS *regs=vcpu_regs(vcpu);
+ u64 nat;
+ //TODO, Eddie
+ if (!regs) return 0;
+ if (reg >= 16 && reg < 32) {
+ return vmx_vcpu_get_bgr(vcpu,reg,val);
+ }
+ getreg(reg,val,&nat,regs); // FIXME: handle NATs later
+ if(nat){
+ return IA64_FAULT;
+ }
+ return IA64_NO_FAULT;
+}
+
+// returns:
+// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
+// IA64_NO_FAULT otherwise
+
+IA64FAULT
+vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ long sof = (regs->cr_ifs) & 0x7f;
+ //TODO Eddie
+
+ if (!regs) return IA64_ILLOP_FAULT;
+ if (reg >= sof + 32) return IA64_ILLOP_FAULT;
+ if ( reg >= 16 && reg < 32 ) {
+ return vmx_vcpu_set_bgr(vcpu,reg, value, nat);
+ }
+ setreg(reg,value,nat,regs);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ UINT64 vpsr;
+ vpsr = vmx_vcpu_get_psr(vcpu);
+ vpsr &= (~imm24);
+ vmx_vcpu_set_psr(vcpu, vpsr);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ UINT64 vpsr;
+ vpsr = vmx_vcpu_get_psr(vcpu);
+ vpsr |= imm24;
+ vmx_vcpu_set_psr(vcpu, vpsr);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
+{
+ vmx_vcpu_set_psr(vcpu, val);
+ return IA64_NO_FAULT;
+}
+
+
diff --git a/xen/arch/ia64/vmx_virt.c b/xen/arch/ia64/vmx_virt.c
new file mode 100644
index 0000000000..29b1164196
--- /dev/null
+++ b/xen/arch/ia64/vmx_virt.c
@@ -0,0 +1,1501 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_virt.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Fred yang (fred.yang@intel.com)
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+
+
+#include <asm/privop.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/delay.h> // Debug only
+#include <asm/vmmu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/smp.h>
+
+#include <asm/virt_event.h>
+extern UINT64 privop_trace;
+
+void
+ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause)
+{
+ *cause=0;
+ switch (slot_type) {
+ case M:
+ if (inst.generic.major==0){
+ if(inst.M28.x3==0){
+ if(inst.M44.x4==6){
+ *cause=EVENT_SSM;
+ }else if(inst.M44.x4==7){
+ *cause=EVENT_RSM;
+ }else if(inst.M30.x4==8&&inst.M30.x2==2){
+ *cause=EVENT_MOV_TO_AR_IMM;
+ }
+ }
+ }
+ else if(inst.generic.major==1){
+ if(inst.M28.x3==0){
+ if(inst.M32.x6==0x2c){
+ *cause=EVENT_MOV_TO_CR;
+ }else if(inst.M33.x6==0x24){
+ *cause=EVENT_MOV_FROM_CR;
+ }else if(inst.M35.x6==0x2d){
+ *cause=EVENT_MOV_TO_PSR;
+ }else if(inst.M36.x6==0x25){
+ *cause=EVENT_MOV_FROM_PSR;
+ }else if(inst.M29.x6==0x2A){
+ *cause=EVENT_MOV_TO_AR;
+ }else if(inst.M31.x6==0x22){
+ *cause=EVENT_MOV_FROM_AR;
+ }else if(inst.M45.x6==0x09){
+ *cause=EVENT_PTC_L;
+ }else if(inst.M45.x6==0x0A){
+ *cause=EVENT_PTC_G;
+ }else if(inst.M45.x6==0x0B){
+ *cause=EVENT_PTC_GA;
+ }else if(inst.M45.x6==0x0C){
+ *cause=EVENT_PTR_D;
+ }else if(inst.M45.x6==0x0D){
+ *cause=EVENT_PTR_I;
+ }else if(inst.M46.x6==0x1A){
+ *cause=EVENT_THASH;
+ }else if(inst.M46.x6==0x1B){
+ *cause=EVENT_TTAG;
+ }else if(inst.M46.x6==0x1E){
+ *cause=EVENT_TPA;
+ }else if(inst.M46.x6==0x1F){
+ *cause=EVENT_TAK;
+ }else if(inst.M47.x6==0x34){
+ *cause=EVENT_PTC_E;
+ }else if(inst.M41.x6==0x2E){
+ *cause=EVENT_ITC_D;
+ }else if(inst.M41.x6==0x2F){
+ *cause=EVENT_ITC_I;
+ }else if(inst.M42.x6==0x00){
+ *cause=EVENT_MOV_TO_RR;
+ }else if(inst.M42.x6==0x01){
+ *cause=EVENT_MOV_TO_DBR;
+ }else if(inst.M42.x6==0x02){
+ *cause=EVENT_MOV_TO_IBR;
+ }else if(inst.M42.x6==0x03){
+ *cause=EVENT_MOV_TO_PKR;
+ }else if(inst.M42.x6==0x04){
+ *cause=EVENT_MOV_TO_PMC;
+ }else if(inst.M42.x6==0x05){
+ *cause=EVENT_MOV_TO_PMD;
+ }else if(inst.M42.x6==0x0E){
+ *cause=EVENT_ITR_D;
+ }else if(inst.M42.x6==0x0F){
+ *cause=EVENT_ITR_I;
+ }else if(inst.M43.x6==0x10){
+ *cause=EVENT_MOV_FROM_RR;
+ }else if(inst.M43.x6==0x11){
+ *cause=EVENT_MOV_FROM_DBR;
+ }else if(inst.M43.x6==0x12){
+ *cause=EVENT_MOV_FROM_IBR;
+ }else if(inst.M43.x6==0x13){
+ *cause=EVENT_MOV_FROM_PKR;
+ }else if(inst.M43.x6==0x14){
+ *cause=EVENT_MOV_FROM_PMC;
+/*
+ }else if(inst.M43.x6==0x15){
+ *cause=EVENT_MOV_FROM_PMD;
+*/
+ }else if(inst.M43.x6==0x17){
+ *cause=EVENT_MOV_FROM_CPUID;
+ }
+ }
+ }
+ break;
+ case B:
+ if(inst.generic.major==0){
+ if(inst.B8.x6==0x02){
+ *cause=EVENT_COVER;
+ }else if(inst.B8.x6==0x08){
+ *cause=EVENT_RFI;
+ }else if(inst.B8.x6==0x0c){
+ *cause=EVENT_BSW_0;
+ }else if(inst.B8.x6==0x0d){
+ *cause=EVENT_BSW_1;
+ }
+ }
+ }
+}
+
+IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vmx_vcpu_reset_psr_sm(vcpu,imm24);
+}
+
+IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vmx_vcpu_set_psr_sm(vcpu,imm24);
+}
+
+unsigned long last_guest_psr = 0x0;
+IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+
+/*
+ if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
+ return vmx_vcpu_set_gr(vcpu, tgt, val);
+ else return fault;
+ */
+ val = vmx_vcpu_get_psr(vcpu);
+ val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+ last_guest_psr = val;
+ return vmx_vcpu_set_gr(vcpu, tgt, val, 0);
+}
+
+/**
+ * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
+ */
+IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+ if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
+ panic(" get_psr nat bit fault\n");
+
+ val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32));
+#if 0
+ if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32))))
+ while(1);
+ else
+ last_mov_from_psr = 0;
+#endif
+ return vmx_vcpu_set_psr_l(vcpu,val);
+}
+
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst)
+{
+ IA64_PSR vpsr;
+ REGS *regs;
+#ifdef CHECK_FAULT
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ regs=vcpu_regs(vcpu);
+ vpsr.val=regs->cr_ipsr;
+ if ( vpsr.is == 1 ) {
+ panic ("We do not support IA32 instruction yet");
+ }
+
+ return vmx_vcpu_rfi(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst)
+{
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return vmx_vcpu_bsw0(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst)
+{
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return vmx_vcpu_bsw1(vcpu);
+}
+
+IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst)
+{
+ return vmx_vcpu_cover(vcpu);
+}
+
+IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ ISR isr;
+ IA64_PSR vpsr;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+ if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (unimplemented_gva(vcpu,r3) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst)
+{
+ u64 r3;
+ ISR isr;
+ IA64_PSR vpsr;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+ return vmx_vcpu_ptc_e(vcpu,r3);
+}
+
+IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst)
+{
+ return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst)
+{
+ return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3)
+{
+ ISR isr;
+ IA64FAULT ret1, ret2;
+
+#ifdef VMAL_NO_FAULT_CHECK
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3);
+ ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2);
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) {
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,r3) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+ return IA64_FAULT;
+ return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+ return IA64_FAULT;
+ return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7));
+}
+
+
+IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(unimplemented_gva(vcpu, r3)){
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_thash(vcpu, r3, &r1);
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+ #ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(unimplemented_gva(vcpu, r3)){
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_ttag(vcpu, r3, &r1);
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(vpsr.cpl!=0){
+ visr.val=0;
+ vcpu_set_isr(vcpu, visr.val);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,1);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if (unimplemented_gva(vcpu,r3) ) {
+ // inject unimplemented_data_address_fault
+ visr.val = set_isr_ei_ni(vcpu);
+ visr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ // FAULT_UNIMPLEMENTED_DATA_ADDRESS.
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+
+ if(vmx_vcpu_tpa(vcpu, r3, &r1)){
+ return IA64_FAULT;
+ }
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+ int fault=IA64_NO_FAULT;
+#ifdef CHECK_FAULT
+ visr.val=0;
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(vpsr.cpl!=0){
+ vcpu_set_isr(vcpu, visr.val);
+ return IA64_FAULT;
+ }
+#endif
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,1);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif
+ }
+ if(vmx_vcpu_tak(vcpu, r3, &r1)){
+ return IA64_FAULT;
+ }
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+/************************************
+ * Insert translation register/cache
+************************************/
+
+IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+ ISR isr;
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if(is_reserved_rr_register(vcpu, slot)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,&itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (is_reserved_itir_field(vcpu, itir)) {
+ // TODO
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+ ISR isr;
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if(is_reserved_rr_register(vcpu, slot)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,&itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (is_reserved_itir_field(vcpu, itir)) {
+ // TODO
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 *pte)
+{
+ UINT64 fault;
+ ISR isr;
+ IA64_PSR vpsr;
+ IA64FAULT ret1;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte);
+#ifdef VMAL_NO_FAULT_CHECK
+ if( ret1 != IA64_NO_FAULT ){
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 itir, ifa, pte;
+
+ if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+ return IA64_FAULT;
+ }
+
+ return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa));
+}
+
+IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 itir, ifa, pte;
+
+ if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+ return IA64_FAULT;
+ }
+
+ return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa));
+
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+*************************************/
+
+IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
+{
+ // I27 and M30 are identical for these fields
+ if(inst.M30.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ UINT64 imm;
+ if(inst.M30.s){
+ imm = -inst.M30.imm;
+ }else{
+ imm = inst.M30.imm;
+ }
+ return (vmx_vcpu_set_itc(vcpu, imm));
+}
+
+IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
+{
+ // I26 and M29 are identical for these fields
+ u64 r2;
+ if(inst.M29.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+ if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return (vmx_vcpu_set_itc(vcpu, r2));
+}
+
+
+IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst)
+{
+ // I27 and M30 are identical for these fields
+ if(inst.M31.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu,inst.M31.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.si&& vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ u64 r1;
+ vmx_vcpu_get_itc(vcpu,&r1);
+ vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0);
+ return IA64_NO_FAULT;
+}
+
+
+/********************************
+ * Moves to privileged registers
+********************************/
+
+IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pkr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_rr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_dbr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_ibr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pmc(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pmd(vcpu,r3,r2));
+}
+
+
+/**********************************
+ * Moves from privileged registers
+ **********************************/
+
+IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_rr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_pkr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_dbr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_ibr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_pmc(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_cpuid(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,cr3;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if ( check_cr_rsv_fields (inst.M32.cr3, r2)) {
+ /* Inject Reserved Register/Field fault
+ * into guest */
+ set_rsv_reg_field_isr (vcpu,0);
+ rsv_reg_field (vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ extern u64 cr_igfld_mask(int index, u64 value);
+ r2 = cr_igfld_mask(inst.M32.cr3,r2);
+ VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2;
+ switch (inst.M32.cr3) {
+ case 0: return vmx_vcpu_set_dcr(vcpu,r2);
+ case 1: return vmx_vcpu_set_itm(vcpu,r2);
+ case 2: return vmx_vcpu_set_iva(vcpu,r2);
+ case 8: return vmx_vcpu_set_pta(vcpu,r2);
+ case 16:return vmx_vcpu_set_ipsr(vcpu,r2);
+ case 17:return vmx_vcpu_set_isr(vcpu,r2);
+ case 19:return vmx_vcpu_set_iip(vcpu,r2);
+ case 20:return vmx_vcpu_set_ifa(vcpu,r2);
+ case 21:return vmx_vcpu_set_itir(vcpu,r2);
+ case 22:return vmx_vcpu_set_iipa(vcpu,r2);
+ case 23:return vmx_vcpu_set_ifs(vcpu,r2);
+ case 24:return vmx_vcpu_set_iim(vcpu,r2);
+ case 25:return vmx_vcpu_set_iha(vcpu,r2);
+ case 64:return vmx_vcpu_set_lid(vcpu,r2);
+ case 65:return IA64_NO_FAULT;
+ case 66:return vmx_vcpu_set_tpr(vcpu,r2);
+ case 67:return vmx_vcpu_set_eoi(vcpu,r2);
+ case 68:return IA64_NO_FAULT;
+ case 69:return IA64_NO_FAULT;
+ case 70:return IA64_NO_FAULT;
+ case 71:return IA64_NO_FAULT;
+ case 72:return vmx_vcpu_set_itv(vcpu,r2);
+ case 73:return vmx_vcpu_set_pmv(vcpu,r2);
+ case 74:return vmx_vcpu_set_cmcv(vcpu,r2);
+ case 80:return vmx_vcpu_set_lrr0(vcpu,r2);
+ case 81:return vmx_vcpu_set_lrr1(vcpu,r2);
+ default: return IA64_NO_FAULT;
+ }
+}
+
+
+#define cr_get(cr) \
+ ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\
+ vmx_vcpu_set_gr(vcpu, tgt, val,0):fault;
+
+
+IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3||
+ (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
+// from_cr_cnt[inst.M33.cr3]++;
+ switch (inst.M33.cr3) {
+ case 0: return cr_get(dcr);
+ case 1: return cr_get(itm);
+ case 2: return cr_get(iva);
+ case 8: return cr_get(pta);
+ case 16:return cr_get(ipsr);
+ case 17:return cr_get(isr);
+ case 19:return cr_get(iip);
+ case 20:return cr_get(ifa);
+ case 21:return cr_get(itir);
+ case 22:return cr_get(iipa);
+ case 23:return cr_get(ifs);
+ case 24:return cr_get(iim);
+ case 25:return cr_get(iha);
+ case 64:val = ia64_getreg(_IA64_REG_CR_LID);
+ return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+// case 64:return cr_get(lid);
+ case 65:
+ vmx_vcpu_get_ivr(vcpu,&val);
+ return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+ case 66:return cr_get(tpr);
+ case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0);
+ case 68:return cr_get(irr0);
+ case 69:return cr_get(irr1);
+ case 70:return cr_get(irr2);
+ case 71:return cr_get(irr3);
+ case 72:return cr_get(itv);
+ case 73:return cr_get(pmv);
+ case 74:return cr_get(cmcv);
+ case 80:return cr_get(lrr0);
+ case 81:return cr_get(lrr1);
+ default:
+ panic("Read reserved cr register");
+ }
+}
+
+
+
+
+//#define BYPASS_VMAL_OPCODE
+extern IA64_SLOT_TYPE slot_types[0x20][3];
+IA64_BUNDLE __vmx_get_domain_bundle(u64 iip)
+{
+ IA64_BUNDLE bundle;
+
+ fetch_code( current,iip, &bundle.i64[0]);
+ fetch_code( current,iip+8, &bundle.i64[1]);
+ return bundle;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void
+vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode)
+{
+ IA64_BUNDLE bundle;
+ int slot;
+ IA64_SLOT_TYPE slot_type;
+ IA64FAULT status;
+ INST64 inst;
+ REGS * regs;
+ UINT64 iip;
+ regs = vcpu_regs(vcpu);
+ iip = regs->cr_iip;
+ IA64_PSR vpsr;
+/*
+ if (privop_trace) {
+ static long i = 400;
+ //if (i > 0) printf("privop @%p\n",iip);
+ if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
+ iip,ia64_get_itc(),ia64_get_itm());
+ i--;
+ }
+*/
+#ifdef VTLB_DEBUG
+ check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu));
+ dump_vtlb(vmx_vcpu_get_vtlb(vcpu));
+#endif
+#if 0
+if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) {
+ printf ("VMAL decode error: cause - %lx; op - %lx\n",
+ cause, opcode );
+ return;
+}
+#endif
+#ifdef BYPASS_VMAL_OPCODE
+ // make a local copy of the bundle containing the privop
+ bundle = __vmx_get_domain_bundle(iip);
+ slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+ if (!slot) inst.inst = bundle.slot0;
+ else if (slot == 1)
+ inst.inst = bundle.slot1a + (bundle.slot1b<<23);
+ else if (slot == 2) inst.inst = bundle.slot2;
+ else printf("priv_handle_op: illegal slot: %d\n", slot);
+ slot_type = slot_types[bundle.template][slot];
+ ia64_priv_decoder(slot_type, inst, &cause);
+ if(cause==0){
+ printf("This instruction at 0x%lx slot %d can't be virtualized", iip, slot);
+ panic("123456\n");
+ }
+#else
+ inst.inst=opcode;
+#endif /* BYPASS_VMAL_OPCODE */
+
+ /*
+ * Switch to actual virtual rid in rr0 and rr4,
+ * which is required by some tlb related instructions.
+ */
+ prepare_if_physical_mode(vcpu);
+
+ switch(cause) {
+ case EVENT_RSM:
+ status=vmx_emul_rsm(vcpu, inst);
+ break;
+ case EVENT_SSM:
+ status=vmx_emul_ssm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PSR:
+ status=vmx_emul_mov_to_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PSR:
+ status=vmx_emul_mov_from_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CR:
+ status=vmx_emul_mov_from_cr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_CR:
+ status=vmx_emul_mov_to_cr(vcpu, inst);
+ break;
+ case EVENT_BSW_0:
+ status=vmx_emul_bsw0(vcpu, inst);
+ break;
+ case EVENT_BSW_1:
+ status=vmx_emul_bsw1(vcpu, inst);
+ break;
+ case EVENT_COVER:
+ status=vmx_emul_cover(vcpu, inst);
+ break;
+ case EVENT_RFI:
+ status=vmx_emul_rfi(vcpu, inst);
+ break;
+ case EVENT_ITR_D:
+ status=vmx_emul_itr_d(vcpu, inst);
+ break;
+ case EVENT_ITR_I:
+ status=vmx_emul_itr_i(vcpu, inst);
+ break;
+ case EVENT_PTR_D:
+ status=vmx_emul_ptr_d(vcpu, inst);
+ break;
+ case EVENT_PTR_I:
+ status=vmx_emul_ptr_i(vcpu, inst);
+ break;
+ case EVENT_ITC_D:
+ status=vmx_emul_itc_d(vcpu, inst);
+ break;
+ case EVENT_ITC_I:
+ status=vmx_emul_itc_i(vcpu, inst);
+ break;
+ case EVENT_PTC_L:
+ status=vmx_emul_ptc_l(vcpu, inst);
+ break;
+ case EVENT_PTC_G:
+ status=vmx_emul_ptc_g(vcpu, inst);
+ break;
+ case EVENT_PTC_GA:
+ status=vmx_emul_ptc_ga(vcpu, inst);
+ break;
+ case EVENT_PTC_E:
+ status=vmx_emul_ptc_e(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_RR:
+ status=vmx_emul_mov_to_rr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_RR:
+ status=vmx_emul_mov_from_rr(vcpu, inst);
+ break;
+ case EVENT_THASH:
+ status=vmx_emul_thash(vcpu, inst);
+ break;
+ case EVENT_TTAG:
+ status=vmx_emul_ttag(vcpu, inst);
+ break;
+ case EVENT_TPA:
+ status=vmx_emul_tpa(vcpu, inst);
+ break;
+ case EVENT_TAK:
+ status=vmx_emul_tak(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR_IMM:
+ status=vmx_emul_mov_to_ar_imm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR:
+ status=vmx_emul_mov_to_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_AR:
+ status=vmx_emul_mov_from_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_DBR:
+ status=vmx_emul_mov_to_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_IBR:
+ status=vmx_emul_mov_to_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMC:
+ status=vmx_emul_mov_to_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMD:
+ status=vmx_emul_mov_to_pmd(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PKR:
+ status=vmx_emul_mov_to_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_DBR:
+ status=vmx_emul_mov_from_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_IBR:
+ status=vmx_emul_mov_from_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PMC:
+ status=vmx_emul_mov_from_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PKR:
+ status=vmx_emul_mov_from_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CPUID:
+ status=vmx_emul_mov_from_cpuid(vcpu, inst);
+ break;
+ case EVENT_VMSW:
+ printf ("Unimplemented instruction %d\n", cause);
+ status=IA64_FAULT;
+ break;
+ default:
+ printf("unknown cause %d:\n", cause);
+ /* For unknown cause, let hardware to re-execute */
+ status=IA64_RETRY;
+// panic("unknown cause in virtualization intercept");
+ };
+
+#if 0
+ if (status == IA64_FAULT)
+ panic("Emulation failed with cause %d:\n", cause);
+#endif
+
+ if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) {
+ vmx_vcpu_increment_iip(vcpu);
+ }
+
+ recover_if_physical_mode(vcpu);
+//TODO set_irq_check(v);
+ return;
+
+}
+
diff --git a/xen/arch/ia64/vmx_vsa.S b/xen/arch/ia64/vmx_vsa.S
new file mode 100644
index 0000000000..5ceea44fb6
--- /dev/null
+++ b/xen/arch/ia64/vmx_vsa.S
@@ -0,0 +1,84 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vsa.c: Call PAL virtualization services.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Arun Sharma <arun.sharma@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+#include <asm/asmmacro.h>
+
+
+ .text
+
+/*
+ * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2,
+ * UINT64 arg3, UINT64 arg4, UINT64 arg5,
+ * UINT64 arg6, UINT64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ * rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+ .regstk 4,4,0,0
+
+rpsave = loc0
+pfssave = loc1
+psrsave = loc2
+entry = loc3
+hostret = r24
+
+ alloc pfssave=ar.pfs,4,4,0,0
+ mov rpsave=rp
+ movl entry=@gprel(__vsa_base)
+1: mov hostret=ip
+ mov r25=in1 // copy arguments
+ mov r26=in2
+ mov r27=in3
+ mov psrsave=psr
+ ;;
+ add entry=entry,gp
+ tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
+ tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
+ ;;
+ ld8 entry=[entry] // read entry point
+ ;;
+ add hostret=2f-1b,hostret // calculate return address
+ add entry=entry,in0
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.d
+ mov b6=entry
+ br.cond.sptk b6 // call the service
+2:
+ // Architectural sequence for enabling interrupts if necessary
+(p7) ssm psr.ic
+ ;;
+(p7) srlz.d
+ ;;
+(p6) ssm psr.i
+ ;;
+ mov rp=rpsave
+ mov ar.pfs=pfssave
+ mov r8=r31
+ ;;
+ srlz.d
+ br.ret.sptk rp
+
+END(ia64_call_vsa)
+
diff --git a/xen/arch/ia64/vtlb.c b/xen/arch/ia64/vtlb.c
new file mode 100644
index 0000000000..6cbb4478b7
--- /dev/null
+++ b/xen/arch/ia64/vtlb.c
@@ -0,0 +1,1004 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ * XiaoYan Feng (Fleming Feng) (Fleming.feng@intel.com)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <xen/interrupt.h>
+#include <asm/vcpu.h>
+#define MAX_CCH_LENGTH 40
+
+
+static void cch_mem_init(thash_cb_t *hcb)
+{
+ thash_cch_mem_t *p, *q;
+
+ hcb->cch_freelist = p = hcb->cch_buf;
+
+ for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz;
+ p++, q++ ) {
+ p->next = q;
+ }
+ p->next = NULL;
+}
+
+static thash_data_t *cch_alloc(thash_cb_t *hcb)
+{
+ thash_cch_mem_t *p;
+
+ if ( (p = hcb->cch_freelist) != NULL ) {
+ hcb->cch_freelist = p->next;
+ }
+ return &(p->data);
+}
+
+static void cch_free(thash_cb_t *hcb, thash_data_t *cch)
+{
+ thash_cch_mem_t *p = (thash_cch_mem_t*)cch;
+
+ p->next = hcb->cch_freelist;
+ hcb->cch_freelist = p;
+}
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE cl)
+{
+ u64 size1,sa1,ea1;
+
+ if ( tlb->rid != rid || tlb->cl != cl )
+ return 0;
+ size1 = PSIZE(tlb->ps);
+ sa1 = tlb->vadr & ~(size1-1); // mask the low address bits
+ ea1 = sa1 + size1;
+
+ if ( va >= sa1 && (va < ea1 || ea1 == 0) )
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Only for TLB format.
+ */
+static int
+__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 sva, u64 eva)
+{
+ uint64_t size1,size2,sa1,ea1,ea2;
+
+ if ( entry->invalid || entry->rid != rid || entry->cl != cl ) {
+ return 0;
+ }
+ size1=PSIZE(entry->ps);
+ sa1 = entry->vadr & ~(size1-1); // mask the low address bits
+ ea1 = sa1 + size1;
+ if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) )
+ return 0;
+ else
+ return 1;
+
+}
+
+static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr)
+{
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,tr);
+ }
+ tr->invalid = 1;
+}
+
+static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx)
+{
+ *tr = *data;
+ tr->tr_idx = idx;
+}
+
+
+static void __init_tr(thash_cb_t *hcb)
+{
+ int i;
+ thash_data_t *tr;
+
+ for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) {
+ tr[i].invalid = 1;
+ }
+ for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) {
+ tr[i].invalid = 1;
+ }
+}
+
+/*
+ * Replace TR entry.
+ */
+static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx)
+{
+ thash_data_t *tr;
+
+ if ( insert->cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,idx);
+ }
+ else {
+ tr = &DTR(hcb,idx);
+ }
+ if ( !INVALID_TLB(tr) ) {
+ __rem_tr(hcb, tr);
+ }
+ __set_tr (tr, insert, idx);
+}
+
+/*
+ * remove TR entry.
+ */
+static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx)
+{
+ thash_data_t *tr;
+
+ if ( cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,idx);
+ }
+ else {
+ tr = &DTR(hcb,idx);
+ }
+ if ( !INVALID_TLB(tr) ) {
+ __rem_tr(hcb, tr);
+ }
+}
+
+/*
+ * Delete an thash entry in collision chain.
+ * prev: the previous entry.
+ * rem: the removed entry.
+ */
+static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t *rem)
+{
+ //prev->next = rem->next;
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,rem);
+ }
+ cch_free (hcb, rem);
+}
+
+/*
+ * Delete an thash entry leading collision chain.
+ */
+static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash)
+{
+ thash_data_t *next=hash->next;
+
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,hash);
+ }
+ if ( next != NULL ) {
+ *hash = *next;
+ cch_free (hcb, next);
+ }
+ else {
+ INVALIDATE_HASH(hcb, hash);
+ }
+}
+
+thash_data_t *__vtr_lookup(thash_cb_t *hcb,
+ u64 rid, u64 va,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *tr;
+ int num,i;
+
+ if ( cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,0);
+ num = NITRS;
+ }
+ else {
+ tr = &DTR(hcb,0);
+ num = NDTRS;
+ }
+ for ( i=0; i<num; i++ ) {
+ if ( !INVALID_ENTRY(hcb,&tr[i]) &&
+ __is_translated(&tr[i], rid, va, cl) )
+ return &tr[i];
+ }
+ return NULL;
+}
+
+
+/*
+ * Find overlap VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb)
+{
+ thash_data_t *cch;
+ thash_internal_t *priv = &hcb->priv;
+
+
+ for (cch=priv->cur_cch; cch; cch = cch->next) {
+ if ( priv->tag == cch->etag ) {
+ return cch;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Find overlap TLB/VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb)
+{
+ thash_data_t *cch;
+ thash_internal_t *priv = &hcb->priv;
+
+ /* Find overlap TLB entry */
+ for (cch=priv->cur_cch; cch; cch = cch->next) {
+ if ( ((1UL<<cch->section) & priv->s_sect.v) &&
+ __is_tlb_overlap(hcb, cch, priv->rid, priv->cl,
+ priv->_curva, priv->_eva) ) {
+ return cch;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Get the machine format of VHPT entry.
+ * PARAS:
+ * 1: tlb: means the tlb format hash entry converting to VHPT.
+ * 2: va means the guest virtual address that must be coverd by
+ * the translated machine VHPT.
+ * 3: vhpt: means the machine format VHPT converting from tlb.
+ * NOTES:
+ * 1: In case of the machine address is discontiguous,
+ * "tlb" needs to be covered by several machine VHPT. va
+ * is used to choice one of them.
+ * 2: Foreign map is supported in this API.
+ * RETURN:
+ * 0/1: means successful or fail.
+ *
+ */
+int __tlb_to_vhpt(thash_cb_t *hcb,
+ thash_data_t *tlb, u64 va,
+ thash_data_t *vhpt)
+{
+ u64 pages,mfn;
+ rr_t vrr;
+
+ ASSERT ( hcb->ht == THASH_VHPT );
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+ mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages);
+ if ( mfn == INVALID_MFN ) return 0;
+
+ // TODO with machine discontinuous address space issue.
+ vhpt->etag = (hcb->vs->tag_func)( hcb->pta,
+ tlb->vadr, tlb->rid, tlb->ps);
+ //vhpt->ti = 0;
+ vhpt->itir = tlb->itir & ~ITIR_RV_MASK;
+ vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+ vhpt->ppn = mfn;
+ vhpt->next = 0;
+ return 1;
+}
+
+
+/*
+ * Insert an entry to hash table.
+ * NOTES:
+ * 1: TLB entry may be TR, TC or Foreign Map. For TR entry,
+ * itr[]/dtr[] need to be updated too.
+ * 2: Inserting to collision chain may trigger recycling if
+ * the buffer for collision chain is empty.
+ * 3: The new entry is inserted at the next of hash table.
+ * (I.e. head of the collision chain)
+ * 4: The buffer holding the entry is allocated internally
+ * from cch_buf or just in the hash table.
+ * 5: Return the entry in hash table or collision chain.
+ * 6: Input parameter, entry, should be in TLB format.
+ * I.e. Has va, rid, ps...
+ * 7: This API is invoked by emulating ITC/ITR and tlb_miss.
+ *
+ */
+
+void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx)
+{
+ if ( hcb->ht != THASH_TLB || entry->section != THASH_TLB_TR ) {
+ panic("wrong parameter\n");
+ }
+ entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+ entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+ rep_tr(hcb, entry, idx);
+ return ;
+}
+
+thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry)
+{
+ thash_data_t *cch;
+
+ cch = cch_alloc(hcb);
+ if(cch == NULL){
+ // recycle
+ if ( hcb->recycle_notifier ) {
+ hcb->recycle_notifier(hcb,(u64)entry);
+ }
+ thash_purge_all(hcb);
+ cch = cch_alloc(hcb);
+ }
+ return cch;
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ * 1: When inserting VHPT to thash, "va" is a must covered
+ * address by the inserted machine VHPT entry.
+ * 2: The format of entry is always in TLB.
+ * 3: The caller need to make sure the new entry will not overlap
+ * with any existed entry.
+ */
+static void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table, *cch;
+ rr_t vrr;
+
+ hash_table = (hcb->hash_func)(hcb->pta,
+ va, entry->rid, entry->ps);
+ if( INVALID_ENTRY(hcb, hash_table) ) {
+ *hash_table = *entry;
+ hash_table->next = 0;
+ }
+ else {
+ // TODO: Add collision chain length limitation.
+ cch = __alloc_chain(hcb,entry);
+
+ *cch = *hash_table;
+ *hash_table = *entry;
+ hash_table->next = cch;
+ }
+ thash_insert (hcb->ts->vhpt, entry, va);
+ return ;
+}
+
+static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table, *cch;
+ rr_t vrr;
+
+ hash_table = (hcb->hash_func)(hcb->pta,
+ va, entry->rid, entry->ps);
+ if( INVALID_ENTRY(hcb, hash_table) ) {
+ if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+ panic("Can't convert to machine VHPT entry\n");
+ }
+ hash_table->next = 0;
+ }
+ else {
+ // TODO: Add collision chain length limitation.
+ cch = __alloc_chain(hcb,entry);
+
+ *cch = *hash_table;
+ if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+ panic("Can't convert to machine VHPT entry\n");
+ }
+ hash_table->next = cch;
+ }
+ return /*hash_table*/;
+}
+
+void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table;
+ rr_t vrr;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr);
+ if ( entry->ps != vrr.ps && entry->section==THASH_TLB_TC) {
+ panic("Not support for multiple page size now\n");
+ }
+ entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+ entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+ (hcb->ins_hash)(hcb, entry, va);
+
+}
+
+static void rem_thash(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_data_t *hash_table, *p, *q;
+ thash_internal_t *priv = &hcb->priv;
+ int idx;
+
+ hash_table = priv->hash_base;
+ if ( hash_table == entry ) {
+ __rem_hash_head (hcb, entry);
+ return ;
+ }
+ // remove from collision chain
+ p = hash_table;
+ for ( q=p->next; q; q = p->next ) {
+ if ( q == entry ) {
+ p->next = q->next;
+ __rem_chain(hcb, entry);
+ return ;
+ }
+ p = q;
+ }
+ panic("Entry not existed or bad sequence\n");
+}
+
+static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_data_t *hash_table, *p, *q;
+ thash_internal_t *priv = &hcb->priv;
+ int idx;
+
+ if ( entry->section == THASH_TLB_TR ) {
+ return rem_tr(hcb, entry->cl, entry->tr_idx);
+ }
+ rem_thash(hcb, entry);
+}
+
+int cch_depth=0;
+/*
+ * Purge the collision chain starting from cch.
+ * NOTE:
+ * For those UN-Purgable entries(FM), this function will return
+ * the head of left collision chain.
+ */
+static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch)
+{
+ thash_data_t *next;
+
+ if ( ++cch_depth > MAX_CCH_LENGTH ) {
+ printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n");
+ }
+ if ( cch -> next ) {
+ next = thash_rem_cch(hcb, cch->next);
+ }
+ else {
+ next = NULL;
+ }
+ if ( PURGABLE_ENTRY(hcb, cch) ) {
+ __rem_chain(hcb, cch);
+ return next;
+ }
+ else {
+ cch->next = next;
+ return cch;
+ }
+}
+
+/*
+ * Purge one hash line (include the entry in hash table).
+ * Can only be called by thash_purge_all.
+ * Input:
+ * hash: The head of collision chain (hash table)
+ *
+ */
+static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash)
+{
+ if ( INVALID_ENTRY(hcb, hash) ) return;
+
+ if ( hash->next ) {
+ cch_depth = 0;
+ hash->next = thash_rem_cch(hcb, hash->next);
+ }
+ // Then hash table itself.
+ if ( PURGABLE_ENTRY(hcb, hash) ) {
+ __rem_hash_head(hcb, hash);
+ }
+}
+
+
+/*
+ * Find an overlap entry in hash table and its collision chain.
+ * Refer to SDM2 4.1.1.4 for overlap definition.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * va & ps identify the address space for overlap lookup
+ * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX)
+ * 3: cl means I side or D side.
+ * RETURNS:
+ * NULL to indicate the end of findings.
+ * NOTES:
+ *
+ */
+thash_data_t *thash_find_overlap(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t s_sect)
+{
+ return (hcb->find_overlap)(hcb, in->vadr,
+ in->ps, in->rid, in->cl, s_sect);
+}
+
+static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb,
+ u64 va, u64 ps, int rid, char cl, search_section_t s_sect)
+{
+ thash_data_t *hash_table;
+ thash_internal_t *priv = &hcb->priv;
+ u64 tag;
+ rr_t vrr;
+
+ priv->_curva = PAGEALIGN(va,ps);
+ priv->_eva = priv->_curva + PSIZE(ps);
+ priv->rid = rid;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ priv->ps = vrr.ps;
+ hash_table = (hcb->hash_func)(hcb->pta,
+ priv->_curva, rid, priv->ps);
+
+ priv->s_sect = s_sect;
+ priv->cl = cl;
+ priv->_tr_idx = 0;
+ priv->hash_base = hash_table;
+ priv->cur_cch = hash_table;
+ return (hcb->next_overlap)(hcb);
+}
+
+static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb,
+ u64 va, u64 ps, int rid, char cl, search_section_t s_sect)
+{
+ thash_data_t *hash_table;
+ thash_internal_t *priv = &hcb->priv;
+ u64 tag;
+ rr_t vrr;
+
+ priv->_curva = PAGEALIGN(va,ps);
+ priv->_eva = priv->_curva + PSIZE(ps);
+ priv->rid = rid;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ priv->ps = vrr.ps;
+ hash_table = (hcb->hash_func)( hcb->pta,
+ priv->_curva, rid, priv->ps);
+ tag = (hcb->vs->tag_func)( hcb->pta,
+ priv->_curva, rid, priv->ps);
+
+ priv->tag = tag;
+ priv->hash_base = hash_table;
+ priv->cur_cch = hash_table;
+ return (hcb->next_overlap)(hcb);
+}
+
+
+static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *tr;
+ thash_internal_t *priv = &hcb->priv;
+ int num;
+
+ if ( priv->cl == ISIDE_TLB ) {
+ num = NITRS;
+ tr = &ITR(hcb,0);
+ }
+ else {
+ num = NDTRS;
+ tr = &DTR(hcb,0);
+ }
+ for (; priv->_tr_idx < num; priv->_tr_idx ++ ) {
+ if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx],
+ priv->rid, priv->cl,
+ priv->_curva, priv->_eva) ) {
+ return &tr[priv->_tr_idx++];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Similar with vtlb_next_overlap but find next entry.
+ * NOTES:
+ * Intermediate position information is stored in hcb->priv.
+ */
+static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *ovl;
+ thash_internal_t *priv = &hcb->priv;
+ u64 addr,rr_psize;
+ rr_t vrr;
+
+ if ( priv->s_sect.tr ) {
+ ovl = vtr_find_next_overlap (hcb);
+ if ( ovl ) return ovl;
+ priv->s_sect.tr = 0;
+ }
+ if ( priv->s_sect.v == 0 ) return NULL;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+ rr_psize = PSIZE(vrr.ps);
+
+ while ( priv->_curva < priv->_eva ) {
+ if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+ ovl = _vtlb_next_overlap_in_chain(hcb);
+ if ( ovl ) {
+ priv->cur_cch = ovl->next;
+ return ovl;
+ }
+ }
+ priv->_curva += rr_psize;
+ priv->hash_base = (hcb->hash_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->cur_cch = priv->hash_base;
+ }
+ return NULL;
+}
+
+static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *ovl;
+ thash_internal_t *priv = &hcb->priv;
+ u64 addr,rr_psize;
+ rr_t vrr;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+ rr_psize = PSIZE(vrr.ps);
+
+ while ( priv->_curva < priv->_eva ) {
+ if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+ ovl = _vhpt_next_overlap_in_chain(hcb);
+ if ( ovl ) {
+ priv->cur_cch = ovl->next;
+ return ovl;
+ }
+ }
+ priv->_curva += rr_psize;
+ priv->hash_base = (hcb->hash_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->tag = (hcb->vs->tag_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->cur_cch = priv->hash_base;
+ }
+ return NULL;
+}
+
+
+/*
+ * Find and purge overlap entries in hash table and its collision chain.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * rid, va & ps identify the address space for purge
+ * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX)
+ * 3: cl means I side or D side.
+ * NOTES:
+ *
+ */
+void thash_purge_entries(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t p_sect)
+{
+ return thash_purge_entries_ex(hcb, in->rid, in->vadr,
+ in->ps, p_sect, in->cl);
+}
+
+void thash_purge_entries_ex(thash_cb_t *hcb,
+ u64 rid, u64 va, u64 ps,
+ search_section_t p_sect,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *ovl;
+
+ ovl = (hcb->find_overlap)(hcb, va, ps, rid, cl, p_sect);
+ while ( ovl != NULL ) {
+ (hcb->rem_hash)(hcb, ovl);
+ ovl = (hcb->next_overlap)(hcb);
+ };
+}
+
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+// TODO: add sections.
+void thash_purge_all(thash_cb_t *hcb)
+{
+ thash_data_t *hash_table;
+
+#ifdef VTLB_DEBUG
+ extern u64 sanity_check;
+ static u64 statistics_before_purge_all=0;
+ if ( statistics_before_purge_all ) {
+ sanity_check = 1;
+ check_vtlb_sanity(hcb);
+ }
+#endif
+
+ hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+
+ for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+ thash_rem_line(hcb, hash_table);
+ }
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ * in: TLB format for both VHPT & TLB.
+ */
+thash_data_t *vtlb_lookup(thash_cb_t *hcb,
+ thash_data_t *in)
+{
+ return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl);
+}
+
+thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb,
+ u64 rid, u64 va,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *hash_table, *cch;
+ u64 tag;
+ rr_t vrr;
+
+ ASSERT ( hcb->ht == THASH_VTLB );
+
+ cch = __vtr_lookup(hcb, rid, va, cl);;
+ if ( cch ) return cch;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps);
+
+ if ( INVALID_ENTRY(hcb, hash_table ) )
+ return NULL;
+
+
+ for (cch=hash_table; cch; cch = cch->next) {
+ if ( __is_translated(cch, rid, va, cl) )
+ return cch;
+ }
+ return NULL;
+}
+
+
+/*
+ * Notifier when TLB is deleted from hash table and its collision chain.
+ * NOTES:
+ * The typical situation is that TLB remove needs to inform
+ * VHPT to remove too.
+ * PARAS:
+ * 1: hcb is TLB object.
+ * 2: The format of entry is always in TLB.
+ *
+ */
+void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_cb_t *vhpt;
+ search_section_t s_sect;
+
+ s_sect.v = 0;
+ thash_purge_entries(hcb->ts->vhpt, entry, s_sect);
+ machine_tlb_purge(entry->rid, entry->vadr, entry->ps);
+}
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(thash_cb_t *hcb, u64 sz)
+{
+ thash_data_t *hash_table;
+
+ cch_mem_init (hcb);
+ hcb->magic = THASH_CB_MAGIC;
+ hcb->pta.val = hcb->hash;
+ hcb->pta.vf = 1;
+ hcb->pta.ve = 1;
+ hcb->pta.size = sz;
+ hcb->get_rr_fn = vmmu_get_rr;
+ ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 );
+ if ( hcb->ht == THASH_TLB ) {
+ hcb->remove_notifier = tlb_remove_notifier;
+ hcb->find_overlap = vtlb_find_overlap;
+ hcb->next_overlap = vtlb_next_overlap;
+ hcb->rem_hash = rem_vtlb;
+ hcb->ins_hash = vtlb_insert;
+ __init_tr(hcb);
+ }
+ else {
+ hcb->remove_notifier = NULL;
+ hcb->find_overlap = vhpt_find_overlap;
+ hcb->next_overlap = vhpt_next_overlap;
+ hcb->rem_hash = rem_thash;
+ hcb->ins_hash = vhpt_insert;
+ }
+ hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+
+ for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+ INVALIDATE_HASH(hcb,hash_table);
+ }
+}
+
+
+#ifdef VTLB_DEBUG
+static u64 cch_length_statistics[MAX_CCH_LENGTH+1];
+u64 sanity_check=0;
+u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash)
+{
+ thash_data_t *cch;
+ thash_data_t *ovl;
+ search_section_t s_sect;
+ u64 num=0;
+
+ s_sect.v = 0;
+ for (cch=hash; cch; cch=cch->next) {
+ ovl = thash_find_overlap(vhpt, cch, s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ num ++;
+ }
+ if ( num >= MAX_CCH_LENGTH ) {
+ cch_length_statistics[MAX_CCH_LENGTH] ++;
+ }
+ else {
+ cch_length_statistics[num] ++;
+ }
+ return num;
+}
+
+void check_vtlb_sanity(thash_cb_t *vtlb)
+{
+// struct pfn_info *page;
+ u64 hash_num, i, psr;
+ static u64 check_ok_num, check_fail_num,check_invalid;
+// void *vb1, *vb2;
+ thash_data_t *hash, *cch;
+ thash_data_t *ovl;
+ search_section_t s_sect;
+ thash_cb_t *vhpt = vtlb->ts->vhpt;
+ u64 invalid_ratio;
+
+ if ( sanity_check == 0 ) return;
+ sanity_check --;
+ s_sect.v = 0;
+// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER);
+// if ( page == NULL ) {
+// panic("No enough contiguous memory for init_domain_mm\n");
+// };
+// vb1 = page_to_virt(page);
+// printf("Allocated page=%lp vbase=%lp\n", page, vb1);
+// vb2 = vb1 + vtlb->hash_sz;
+ hash_num = vhpt->hash_sz / sizeof(thash_data_t);
+// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num);
+ printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n",
+ vtlb, vtlb->hash,vtlb->hash_sz,
+ vhpt, vhpt->hash, vhpt->hash_sz);
+ //memcpy(vb1, vtlb->hash, vtlb->hash_sz);
+ //memcpy(vb2, vhpt->hash, vhpt->hash_sz);
+ for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+ cch_length_statistics[i] = 0;
+ }
+
+ local_irq_save(psr);
+
+ hash = vhpt->hash;
+ for (i=0; i < hash_num; i++) {
+ if ( !INVALID_ENTRY(vhpt, hash) ) {
+ for ( cch= hash; cch; cch=cch->next) {
+ cch->checked = 0;
+ }
+ }
+ hash ++;
+ }
+ printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num);
+ check_invalid = 0;
+ check_ok_num=0;
+ hash = vtlb->hash;
+ for ( i=0; i< hash_num; i++ ) {
+ if ( !INVALID_ENTRY(vtlb, hash) ) {
+ check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash);
+ }
+ else {
+ check_invalid++;
+ }
+ hash ++;
+ }
+ printf("Done vtlb entry check, hash=%lp\n", hash);
+ printf("check_ok_num = 0x%lx check_invalid=0x%lx\n", check_ok_num,check_invalid);
+ invalid_ratio = 1000*check_invalid / hash_num;
+ printf("%02ld.%01ld%% entries are invalid\n",
+ invalid_ratio/10, invalid_ratio % 10 );
+ for (i=0; i<NDTRS; i++) {
+ ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ }
+ printf("Done dTR\n");
+ for (i=0; i<NITRS; i++) {
+ ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ }
+ printf("Done iTR\n");
+ check_fail_num = 0;
+ check_invalid = 0;
+ check_ok_num=0;
+ hash = vhpt->hash;
+ for (i=0; i < hash_num; i++) {
+ if ( !INVALID_ENTRY(vhpt, hash) ) {
+ for ( cch= hash; cch; cch=cch->next) {
+ if ( !cch->checked ) {
+ printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash, cch);
+ check_fail_num ++;
+ }
+ else {
+ check_ok_num++;
+ }
+ }
+ }
+ else {
+ check_invalid ++;
+ }
+ hash ++;
+ }
+ local_irq_restore(psr);
+ printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n",
+ check_ok_num, check_fail_num, check_invalid);
+ //memcpy(vtlb->hash, vb1, vtlb->hash_sz);
+ //memcpy(vhpt->hash, vb2, vhpt->hash_sz);
+ printf("The statistics of collision chain length is listed\n");
+ for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+ printf("CCH length=%02ld, chain number=%ld\n", i, cch_length_statistics[i]);
+ }
+// free_domheap_pages(page, VCPU_TLB_ORDER);
+ printf("Done check_vtlb\n");
+}
+
+void dump_vtlb(thash_cb_t *vtlb)
+{
+ static u64 dump_vtlb=0;
+ thash_data_t *hash, *cch, *tr;
+ u64 hash_num,i;
+
+ if ( dump_vtlb == 0 ) return;
+ dump_vtlb --;
+ hash_num = vtlb->hash_sz / sizeof(thash_data_t);
+ hash = vtlb->hash;
+
+ printf("Dump vTC\n");
+ for ( i = 0; i < hash_num; i++ ) {
+ if ( !INVALID_ENTRY(vtlb, hash) ) {
+ printf("VTLB at hash=%lp\n", hash);
+ for (cch=hash; cch; cch=cch->next) {
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ cch, cch->vadr, cch->ps, cch->rid);
+ }
+ }
+ hash ++;
+ }
+ printf("Dump vDTR\n");
+ for (i=0; i<NDTRS; i++) {
+ tr = &DTR(vtlb,i);
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ tr, tr->vadr, tr->ps, tr->rid);
+ }
+ printf("Dump vITR\n");
+ for (i=0; i<NITRS; i++) {
+ tr = &ITR(vtlb,i);
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ tr, tr->vadr, tr->ps, tr->rid);
+ }
+ printf("End of vTLB dump\n");
+}
+#endif
diff --git a/xen/arch/ia64/xenasm.S b/xen/arch/ia64/xenasm.S
index 8616e078eb..f04dfea8fa 100644
--- a/xen/arch/ia64/xenasm.S
+++ b/xen/arch/ia64/xenasm.S
@@ -11,6 +11,9 @@
#include <asm/pgtable.h>
#include <asm/vhpt.h>
+#if 0
+// FIXME: there's gotta be a better way...
+// ski and spaski are different... moved to xenmisc.c
#define RunningOnHpSki(rx,ry,pn) \
addl rx = 2, r0; \
addl ry = 3, r0; \
@@ -22,7 +25,7 @@
;; \
(pn) movl rx = 0x7000004 ; \
;; \
- (pn) cmp.eq pn,p0 = ry, rx; \
+ (pn) cmp.ge pn,p0 = ry, rx; \
;;
//int platform_is_hp_ski(void)
@@ -32,9 +35,12 @@ GLOBAL_ENTRY(platform_is_hp_ski)
(p8) mov r8 = 1
br.ret.sptk.many b0
END(platform_is_hp_ski)
+#endif
// Change rr7 to the passed value while ensuring
-// Xen is mapped into the new region
+// Xen is mapped into the new region.
+// in0: new rr7 value
+// in1: Xen virtual address of shared info (to be pinned)
#define PSR_BITS_TO_CLEAR \
(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
@@ -45,7 +51,7 @@ END(platform_is_hp_ski)
GLOBAL_ENTRY(ia64_new_rr7)
// not sure this unwind statement is correct...
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
- alloc loc1 = ar.pfs, 1, 7, 0, 0
+ alloc loc1 = ar.pfs, 2, 7, 0, 0
1: {
mov r28 = in0 // copy procedure index
mov r8 = ip // save ip to compute branch
@@ -62,7 +68,7 @@ GLOBAL_ENTRY(ia64_new_rr7)
tpa loc6=loc6 // grab this BEFORE changing rr7
;;
#endif
- movl loc5=SHAREDINFO_ADDR
+ mov loc5=in1
;;
tpa loc5=loc5 // grab this BEFORE changing rr7
;;
@@ -126,9 +132,9 @@ GLOBAL_ENTRY(ia64_new_rr7)
(p7) br.cond.sptk .stack_overlaps
;;
movl r25=PAGE_KERNEL
- dep r20=0,r13,50,14 // physical address of "current"
+ dep r21=0,r13,60,4 // physical address of "current"
;;
- or r23=r25,r20 // construct PA | page properties
+ or r23=r25,r21 // construct PA | page properties
mov r25=IA64_GRANULE_SHIFT<<2
;;
ptr.d r13,r25
@@ -146,9 +152,9 @@ GLOBAL_ENTRY(ia64_new_rr7)
;;
movl r25=PAGE_KERNEL
;;
- mov r20=loc2 // saved percpu physical address
+ mov r21=loc2 // saved percpu physical address
;;
- or r23=r25,r20 // construct PA | page properties
+ or r23=r25,r21 // construct PA | page properties
mov r24=PERCPU_PAGE_SHIFT<<2
;;
ptr.d r22,r24
@@ -166,9 +172,9 @@ GLOBAL_ENTRY(ia64_new_rr7)
;;
movl r25=PAGE_KERNEL
;;
- mov r20=loc6 // saved vhpt physical address
+ mov r21=loc6 // saved vhpt physical address
;;
- or r23=r25,r20 // construct PA | page properties
+ or r23=r25,r21 // construct PA | page properties
mov r24=VHPT_PAGE_SHIFT<<2
;;
ptr.d r22,r24
@@ -184,11 +190,11 @@ GLOBAL_ENTRY(ia64_new_rr7)
movl r22=SHAREDINFO_ADDR
;;
- movl r25=PAGE_KERNEL
+ movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
;;
- mov r20=loc5 // saved sharedinfo physical address
+ mov r21=loc5 // saved sharedinfo physical address
;;
- or r23=r25,r20 // construct PA | page properties
+ or r23=r25,r21 // construct PA | page properties
mov r24=PAGE_SHIFT<<2
;;
ptr.d r22,r24
@@ -255,26 +261,26 @@ GLOBAL_ENTRY(ia64_prepare_handle_reflection)
br.cond.sptk.many rp // goes to ia64_leave_kernel
END(ia64_prepare_handle_reflection)
-// NOTE: instruction spacing must be explicit for recovery on miss
GLOBAL_ENTRY(__get_domain_bundle)
- ld8 r8=[r32],8
- nop 0
- nop 0
+ EX(.failure_in_get_bundle,ld8 r8=[r32],8)
+ ;;
+ EX(.failure_in_get_bundle,ld8 r9=[r32])
+ ;;
+ br.ret.sptk.many rp
+ ;;
+.failure_in_get_bundle:
+ mov r8=0
;;
- ld8 r9=[r32]
- nop 0
- nop 0
+ mov r9=0
;;
br.ret.sptk.many rp
- nop 0
- nop 0
;;
END(__get_domain_bundle)
GLOBAL_ENTRY(dorfirfi)
-#define SI_CR_IIP_OFFSET 0x150
-#define SI_CR_IPSR_OFFSET 0x148
-#define SI_CR_IFS_OFFSET 0x158
+#define SI_CR_IIP_OFFSET 0x10
+#define SI_CR_IPSR_OFFSET 0x08
+#define SI_CR_IFS_OFFSET 0x18
movl r16 = SHAREDINFO_ADDR+SI_CR_IIP_OFFSET
movl r17 = SHAREDINFO_ADDR+SI_CR_IPSR_OFFSET
movl r18 = SHAREDINFO_ADDR+SI_CR_IFS_OFFSET
@@ -459,3 +465,16 @@ static: cmp.eq p7,p8=6,r32 /* PAL_PTCE_INFO */
stacked:
br.ret.sptk.few rp
END(pal_emulator_static)
+
+GLOBAL_ENTRY(vhpt_insert)
+// alloc loc0 = ar.pfs, 3, 1, 0, 0
+ mov r16=r32
+ mov r26=r33
+ mov r27=r34
+ ;;
+ VHPT_INSERT()
+// VHPT_INSERT1() ... add collision chains later
+// mov ar.pfs = loc0
+ br.ret.sptk.few rp
+ ;;
+END(vhpt_insert)
diff --git a/xen/arch/ia64/xenirq.c b/xen/arch/ia64/xenirq.c
new file mode 100644
index 0000000000..5bf09171c8
--- /dev/null
+++ b/xen/arch/ia64/xenirq.c
@@ -0,0 +1,77 @@
+/*
+ * Xen irq routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+
+
+void
+xen_debug_irq(ia64_vector vector, struct pt_regs *regs)
+{
+//FIXME: For debug only, can be removed
+ static char firstirq = 1;
+ static char firsttime[256];
+ static char firstpend[256];
+ if (firstirq) {
+ int i;
+ for (i=0;i<256;i++) firsttime[i] = 1;
+ for (i=0;i<256;i++) firstpend[i] = 1;
+ firstirq = 0;
+ }
+ if (firsttime[vector]) {
+ printf("**** (entry) First received int on vector=%d,itc=%lx\n",
+ (unsigned long) vector, ia64_get_itc());
+ firsttime[vector] = 0;
+ }
+}
+
+
+int
+xen_do_IRQ(ia64_vector vector)
+{
+ if (vector != 0xef) {
+ extern void vcpu_pend_interrupt(void *, int);
+#if 0
+ if (firsttime[vector]) {
+ printf("**** (iterate) First received int on vector=%d,itc=%lx\n",
+ (unsigned long) vector, ia64_get_itc());
+ firsttime[vector] = 0;
+ }
+ if (firstpend[vector]) {
+ printf("**** First pended int on vector=%d,itc=%lx\n",
+ (unsigned long) vector,ia64_get_itc());
+ firstpend[vector] = 0;
+ }
+#endif
+ //FIXME: TEMPORARY HACK!!!!
+ vcpu_pend_interrupt(dom0->vcpu[0],vector);
+ domain_wake(dom0->vcpu[0]);
+ return(1);
+ }
+ return(0);
+}
+
+/* From linux/kernel/softirq.c */
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+# define invoke_softirq() __do_softirq()
+#else
+# define invoke_softirq() do_softirq()
+#endif
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+ //account_system_vtime(current);
+ //sub_preempt_count(IRQ_EXIT_OFFSET);
+ if (!in_interrupt() && local_softirq_pending())
+ invoke_softirq();
+ //preempt_enable_no_resched();
+}
+/* end from linux/kernel/softirq.c */
diff --git a/xen/arch/ia64/xenmem.c b/xen/arch/ia64/xenmem.c
new file mode 100644
index 0000000000..3a749840a0
--- /dev/null
+++ b/xen/arch/ia64/xenmem.c
@@ -0,0 +1,101 @@
+/*
+ * Xen memory allocator routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ * Copyright (C) 2005 Intel Corp.
+ *
+ * Routines used by ia64 machines with contiguous (or virtually contiguous)
+ * memory.
+ */
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+#include <xen/mm.h>
+
+extern struct page *zero_page_memmap_ptr;
+struct pfn_info *frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct page *mem_map;
+#define MAX_DMA_ADDRESS ~0UL // FIXME???
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static unsigned long num_dma_physpages;
+#endif
+
+/*
+ * Set up the page tables.
+ */
+#ifdef CONFIG_VTI
+unsigned long *mpt_table;
+unsigned long *mpt_table_size;
+#endif
+
+void
+paging_init (void)
+{
+ struct pfn_info *pg;
+
+#ifdef CONFIG_VTI
+ unsigned int mpt_order;
+ /* Create machine to physical mapping table
+ * NOTE: similar to frame table, later we may need virtually
+ * mapped mpt table if large hole exists. Also MAX_ORDER needs
+ * to be changed in common code, which only support 16M by far
+ */
+ mpt_table_size = max_page * sizeof(unsigned long);
+ mpt_order = get_order(mpt_table_size);
+ ASSERT(mpt_order <= MAX_ORDER);
+ if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
+ panic("Not enough memory to bootstrap Xen.\n");
+
+ printk("machine to physical table: 0x%lx\n", (u64)mpt_table);
+ memset(mpt_table, 0x55, mpt_table_size);
+
+ /* Any more setup here? On VMX enabled platform,
+ * there's no need to keep guest linear pg table,
+ * and read only mpt table. MAP cache is not used
+ * in this stage, and later it will be in region 5.
+ * IO remap is in region 6 with identity mapping.
+ */
+ /* HV_tlb_init(); */
+
+#else // CONFIG_VTI
+
+ /* Allocate and map the machine-to-phys table */
+ if ((pg = alloc_domheap_pages(NULL, 10)) == NULL)
+ panic("Not enough memory to bootstrap Xen.\n");
+ memset(page_to_virt(pg), 0x55, 16UL << 20);
+#endif // CONFIG_VTI
+
+ /* Other mapping setup */
+
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
+
+/* FIXME: postpone support to machines with big holes between physical memorys.
+ * Current hack allows only efi memdesc upto 4G place. (See efi.c)
+ */
+#ifndef CONFIG_VIRTUAL_MEM_MAP
+#define FT_ALIGN_SIZE (16UL << 20)
+void __init init_frametable(void)
+{
+ unsigned long i, pfn;
+ frame_table_size = max_page * sizeof(struct pfn_info);
+ frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+
+ /* Request continuous trunk from boot allocator, since HV
+ * address is identity mapped */
+ pfn = alloc_boot_pages(
+ frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT);
+ if (pfn == 0)
+ panic("Not enough memory for frame table.\n");
+
+ frame_table = __va(pfn << PAGE_SHIFT);
+ memset(frame_table, 0, frame_table_size);
+ printk("size of frame_table: %lukB\n",
+ frame_table_size >> 10);
+}
+#endif
diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c
new file mode 100644
index 0000000000..bb9f83019a
--- /dev/null
+++ b/xen/arch/ia64/xenmisc.c
@@ -0,0 +1,317 @@
+/*
+ * Xen misc
+ *
+ * Functions/decls that are/may be needed to link with Xen because
+ * of x86 dependencies
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <linux/efi.h>
+#include <asm/processor.h>
+#include <xen/serial.h>
+#include <asm/io.h>
+#include <xen/softirq.h>
+
+efi_memory_desc_t ia64_efi_io_md;
+EXPORT_SYMBOL(ia64_efi_io_md);
+unsigned long wait_init_idle;
+int phys_proc_id[NR_CPUS];
+unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c
+
+void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); }
+void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check abort handling)\n"); }
+void ia64_mca_cpu_init(void *x) { }
+void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
+void ia64_patch_vtop(unsigned long a, unsigned long b) { }
+void hpsim_setup(char **x) { }
+
+// called from mem_init... don't think s/w I/O tlb is needed in Xen
+//void swiotlb_init(void) { } ...looks like it IS needed
+
+long
+is_platform_hp_ski(void)
+{
+ int i;
+ long cpuid[6];
+
+ for (i = 0; i < 5; ++i)
+ cpuid[i] = ia64_get_cpuid(i);
+ if ((cpuid[0] & 0xff) != 'H') return 0;
+ if ((cpuid[3] & 0xff) != 0x4) return 0;
+ if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
+ if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
+ if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
+ return 1;
+}
+
+long
+platform_is_hp_ski(void)
+{
+ extern long running_on_sim;
+ return running_on_sim;
+}
+
+/* calls in xen/common code that are unused on ia64 */
+
+void sync_lazy_execstate_cpu(unsigned int cpu) {}
+void sync_lazy_execstate_mask(cpumask_t mask) {}
+void sync_lazy_execstate_all(void) {}
+
+int grant_table_create(struct domain *d) { return 0; }
+void grant_table_destroy(struct domain *d)
+{
+ printf("grant_table_destroy: domain_destruct not tested!!!\n");
+ printf("grant_table_destroy: ensure atomic_* calls work in domain_destruct!!\n");
+ dummy();
+ return;
+}
+
+struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); }
+
+void raise_actimer_softirq(void)
+{
+ raise_softirq(AC_TIMER_SOFTIRQ);
+}
+
+unsigned long __hypercall_create_continuation(
+ unsigned int op, unsigned int nr_args, ...)
+{
+ printf("__hypercall_create_continuation: not implemented!!!\n");
+}
+
+///////////////////////////////
+
+///////////////////////////////
+// from arch/x86/apic.c
+///////////////////////////////
+
+int reprogram_ac_timer(s_time_t timeout)
+{
+ struct vcpu *v = current;
+
+#ifdef CONFIG_VTI
+ if(VMX_DOMAIN(v))
+ return 1;
+#endif // CONFIG_VTI
+ local_cpu_data->itm_next = timeout;
+ if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout);
+ else vcpu_set_next_timer(current);
+ return 1;
+}
+
+///////////////////////////////
+// from arch/ia64/page_alloc.c
+///////////////////////////////
+DEFINE_PER_CPU(struct page_state, page_states) = {0};
+unsigned long totalram_pages;
+
+void __mod_page_state(unsigned offset, unsigned long delta)
+{
+ unsigned long flags;
+ void* ptr;
+
+ local_irq_save(flags);
+ ptr = &__get_cpu_var(page_states);
+ *(unsigned long*)(ptr + offset) += delta;
+ local_irq_restore(flags);
+}
+
+///////////////////////////////
+// from arch/x86/flushtlb.c
+///////////////////////////////
+
+u32 tlbflush_clock;
+u32 tlbflush_time[NR_CPUS];
+
+///////////////////////////////
+// from arch/x86/memory.c
+///////////////////////////////
+
+void init_percpu_info(void)
+{
+ dummy();
+ //memset(percpu_info, 0, sizeof(percpu_info));
+}
+
+void free_page_type(struct pfn_info *page, unsigned int type)
+{
+ dummy();
+}
+
+///////////////////////////////
+// from arch/ia64/traps.c
+///////////////////////////////
+
+void show_registers(struct pt_regs *regs)
+{
+ printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
+}
+
+///////////////////////////////
+// from common/keyhandler.c
+///////////////////////////////
+void dump_pageframe_info(struct domain *d)
+{
+ printk("dump_pageframe_info not implemented\n");
+}
+
+///////////////////////////////
+// called from arch/ia64/head.S
+///////////////////////////////
+
+void console_print(char *msg)
+{
+ printk("console_print called, how did start_kernel return???\n");
+}
+
+void kernel_thread_helper(void)
+{
+ printk("kernel_thread_helper not implemented\n");
+ dummy();
+}
+
+void sys_exit(void)
+{
+ printk("sys_exit not implemented\n");
+ dummy();
+}
+
+////////////////////////////////////
+// called from unaligned.c
+////////////////////////////////////
+
+void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */
+{
+ printk("die_if_kernel: called, not implemented\n");
+}
+
+long
+ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long *val)
+{
+ printk("ia64_peek: called, not implemented\n");
+}
+
+long
+ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long val)
+{
+ printk("ia64_poke: called, not implemented\n");
+}
+
+void
+ia64_sync_fph (struct task_struct *task)
+{
+ printk("ia64_sync_fph: called, not implemented\n");
+}
+
+void
+ia64_flush_fph (struct task_struct *task)
+{
+ printk("ia64_flush_fph: called, not implemented\n");
+}
+
+////////////////////////////////////
+// called from irq_ia64.c:init_IRQ()
+// (because CONFIG_IA64_HP_SIM is specified)
+////////////////////////////////////
+void hpsim_irq_init(void) { }
+
+
+// accomodate linux extable.c
+//const struct exception_table_entry *
+void *search_module_extables(unsigned long addr) { return NULL; }
+void *__module_text_address(unsigned long addr) { return NULL; }
+void *module_text_address(unsigned long addr) { return NULL; }
+
+void cs10foo(void) {}
+void cs01foo(void) {}
+
+// context_switch
+void context_switch(struct vcpu *prev, struct vcpu *next)
+{
+//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
+//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n",
+//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff);
+//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo();
+//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
+//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
+#ifdef CONFIG_VTI
+ unsigned long psr;
+ /* Interrupt is enabled after next task is chosen.
+ * So we have to disable it for stack switch.
+ */
+ local_irq_save(psr);
+ vtm_domain_out(prev);
+ /* Housekeeping for prev domain */
+#endif // CONFIG_VTI
+
+ switch_to(prev,next,prev);
+#ifdef CONFIG_VTI
+ /* Post-setup for new domain */
+ vtm_domain_in(current);
+ local_irq_restore(psr);
+#endif // CONFIG_VTI
+// leave this debug for now: it acts as a heartbeat when more than
+// one domain is active
+{
+static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
+static int i = 100;
+int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
+if (!cnt[id]--) { printk("%x",id); cnt[id] = 500; }
+if (!i--) { printk("+",id); cnt[id] = 1000; }
+}
+ clear_bit(_VCPUF_running, &prev->vcpu_flags);
+ //if (!is_idle_task(next->domain) )
+ //send_guest_virq(next, VIRQ_TIMER);
+#ifdef CONFIG_VTI
+ if (VMX_DOMAIN(current))
+ vmx_load_all_rr(current);
+ return;
+#else // CONFIG_VTI
+ if (!is_idle_task(current->domain)) {
+ load_region_regs(current);
+ if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+ }
+ if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+#endif // CONFIG_VTI
+}
+
+void continue_running(struct vcpu *same)
+{
+ /* nothing to do */
+}
+
+void panic_domain(struct pt_regs *regs, const char *fmt, ...)
+{
+ va_list args;
+ char buf[128];
+ struct vcpu *v = current;
+ static volatile int test = 1; // so can continue easily in debug
+ extern spinlock_t console_lock;
+ unsigned long flags;
+
+loop:
+ printf("$$$$$ PANIC in domain %d (k6=%p): ",
+ v->domain->domain_id, ia64_get_kr(IA64_KR_CURRENT));
+ va_start(args, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ printf(buf);
+ if (regs) show_registers(regs);
+ domain_pause_by_systemcontroller(current->domain);
+ v->domain->shutdown_code = SHUTDOWN_crash;
+ set_bit(_DOMF_shutdown, v->domain->domain_flags);
+ if (v->domain->domain_id == 0) {
+ int i = 1000000000L;
+ // if domain0 crashes, just periodically print out panic
+ // message to make post-mortem easier
+ while(i--);
+ goto loop;
+ }
+}
diff --git a/xen/arch/ia64/xensetup.c b/xen/arch/ia64/xensetup.c
new file mode 100644
index 0000000000..d95eebfc9f
--- /dev/null
+++ b/xen/arch/ia64/xensetup.c
@@ -0,0 +1,330 @@
+/******************************************************************************
+ * xensetup.c
+ * Copyright (c) 2004-2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+//#include <xen/spinlock.h>
+#include <xen/multiboot.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+//#include <xen/delay.h>
+#include <xen/compile.h>
+//#include <xen/console.h>
+//#include <xen/serial.h>
+#include <xen/trace.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <xen/string.h>
+
+unsigned long xenheap_phys_end;
+
+char saved_command_line[COMMAND_LINE_SIZE];
+
+struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+
+#ifdef CLONE_DOMAIN0
+struct domain *clones[CLONE_DOMAIN0];
+#endif
+extern struct domain *dom0;
+extern unsigned long domain0_ready;
+
+int find_max_pfn (unsigned long, unsigned long, void *);
+void start_of_day(void);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
+
+/*
+ * opt_xenheap_megabytes: Size of Xen heap in megabytes, including:
+ * xen image
+ * bootmap bits
+ * xen heap
+ * Note: To allow xenheap size configurable, the prerequisite is
+ * to configure elilo allowing relocation defaultly. Then since
+ * elilo chooses 256M as alignment when relocating, alignment issue
+ * on IPF can be addressed.
+ */
+unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
+unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
+extern long running_on_sim;
+unsigned long xen_pstart;
+
+static int
+xen_count_pages(u64 start, u64 end, void *arg)
+{
+ unsigned long *count = arg;
+
+ /* FIXME: do we need consider difference between DMA-usable memory and
+ * normal memory? Seems that HV has no requirement to operate DMA which
+ * is owned by Dom0? */
+ *count += (end - start) >> PAGE_SHIFT;
+ return 0;
+}
+
+/* Find first hole after trunk for xen image */
+static int
+xen_find_first_hole(u64 start, u64 end, void *arg)
+{
+ unsigned long *first_hole = arg;
+
+ if ((*first_hole) == 0) {
+ if ((start <= KERNEL_START) && (KERNEL_START < end))
+ *first_hole = __pa(end);
+ }
+
+ return 0;
+}
+
+static void __init do_initcalls(void)
+{
+ initcall_t *call;
+ for ( call = &__initcall_start; call < &__initcall_end; call++ )
+ (*call)();
+}
+
+/*
+ * IPF loader only supports one commaind line currently, for
+ * both xen and guest kernel. This function provides pre-parse
+ * to mixed command line, to split it into two parts.
+ *
+ * User should split the parameters by "--", with strings after
+ * spliter for guest kernel. Missing "--" means whole line belongs
+ * to guest. Example:
+ * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty
+ * root=/dev/sda3 ro"
+ */
+static char null[4] = { 0 };
+
+void early_cmdline_parse(char **cmdline_p)
+{
+ char *guest_cmd;
+ char *split = "--";
+
+ if (*cmdline_p == NULL) {
+ *cmdline_p = &null[0];
+ saved_command_line[0] = '\0';
+ return;
+ }
+
+ guest_cmd = strstr(*cmdline_p, split);
+ /* If no spliter, whole line is for guest */
+ if (guest_cmd == NULL) {
+ guest_cmd = *cmdline_p;
+ *cmdline_p = &null[0];
+ } else {
+ *guest_cmd = '\0'; /* Split boot parameters for xen and guest */
+ guest_cmd += strlen(split);
+ while (*guest_cmd == ' ') guest_cmd++;
+ }
+
+ strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE);
+ return;
+}
+
+void start_kernel(void)
+{
+ unsigned char *cmdline;
+ void *heap_start;
+ int i;
+ unsigned long max_mem, nr_pages, firsthole_start;
+ unsigned long dom0_memory_start, dom0_memory_end;
+ unsigned long initial_images_start, initial_images_end;
+
+ running_on_sim = is_platform_hp_ski();
+ /* Kernel may be relocated by EFI loader */
+ xen_pstart = ia64_tpa(KERNEL_START);
+
+ /* Must do this early -- e.g., spinlocks rely on get_current(). */
+ //set_current(&idle0_vcpu);
+ ia64_r13 = (void *)&idle0_vcpu;
+ idle0_vcpu.domain = &idle0_domain;
+
+ early_setup_arch(&cmdline);
+
+ /* We initialise the serial devices very early so we can get debugging. */
+ if (running_on_sim) hpsim_serial_init();
+ else ns16550_init();
+ serial_init_preirq();
+
+ init_console();
+ set_printk_prefix("(XEN) ");
+
+ /* xenheap should be in same TR-covered range with xen image */
+ xenheap_phys_end = xen_pstart + xenheap_size;
+ printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
+ xen_pstart, xenheap_phys_end);
+
+#ifdef CONFIG_VTI
+ /* If we want to enable vhpt for all regions, related initialization
+ * for HV TLB must be done earlier before first TLB miss
+ */
+#endif // CONFIG_VTI
+ /* Find next hole */
+ firsthole_start = 0;
+ efi_memmap_walk(xen_find_first_hole, &firsthole_start);
+
+ initial_images_start = xenheap_phys_end;
+ initial_images_end = initial_images_start + ia64_boot_param->initrd_size;
+
+ /* Later may find another memory trunk, even away from xen image... */
+ if (initial_images_end > firsthole_start) {
+ printk("Not enough memory to stash the DOM0 kernel image.\n");
+ printk("First hole:0x%lx, relocation end: 0x%lx\n",
+ firsthole_start, initial_images_end);
+ for ( ; ; );
+ }
+
+ /* This copy is time consuming, but elilo may load Dom0 image
+ * within xenheap range */
+ printk("ready to move Dom0 to 0x%lx...", initial_images_start);
+ memmove(__va(initial_images_start),
+ __va(ia64_boot_param->initrd_start),
+ ia64_boot_param->initrd_size);
+ ia64_boot_param->initrd_start = initial_images_start;
+ printk("Done\n");
+
+ /* first find highest page frame number */
+ max_page = 0;
+ efi_memmap_walk(find_max_pfn, &max_page);
+ printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
+
+ heap_start = memguard_init(ia64_imva(&_end));
+ printf("Before heap_start: 0x%lx\n", heap_start);
+ heap_start = __va(init_boot_allocator(__pa(heap_start)));
+ printf("After heap_start: 0x%lx\n", heap_start);
+
+ reserve_memory();
+
+ efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
+ efi_memmap_walk(xen_count_pages, &nr_pages);
+
+ printk("System RAM: %luMB (%lukB)\n",
+ nr_pages >> (20 - PAGE_SHIFT),
+ nr_pages << (PAGE_SHIFT - 10));
+
+ init_frametable();
+
+ ia64_fph_enable();
+ __ia64_init_fpu();
+
+ alloc_dom0();
+#ifdef DOMU_BUILD_STAGING
+ alloc_domU_staging();
+#endif
+
+ end_boot_allocator();
+
+ init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
+ printk("Xen heap: %luMB (%lukB)\n",
+ (xenheap_phys_end-__pa(heap_start)) >> 20,
+ (xenheap_phys_end-__pa(heap_start)) >> 10);
+
+ late_setup_arch(&cmdline);
+ setup_per_cpu_areas();
+ mem_init();
+
+printk("About to call scheduler_init()\n");
+ scheduler_init();
+ local_irq_disable();
+printk("About to call xen_time_init()\n");
+ xen_time_init();
+#ifdef CONFIG_VTI
+ init_xen_time(); /* initialise the time */
+#endif // CONFIG_VTI
+printk("About to call ac_timer_init()\n");
+ ac_timer_init();
+// init_xen_time(); ???
+ schedulers_start();
+ do_initcalls();
+printk("About to call sort_main_extable()\n");
+ sort_main_extable();
+
+ /* Create initial domain 0. */
+printk("About to call do_createdomain()\n");
+ dom0 = do_createdomain(0, 0);
+ init_task.domain = &idle0_domain;
+ init_task.processor = 0;
+// init_task.mm = &init_mm;
+ init_task.domain->arch.mm = &init_mm;
+// init_task.thread = INIT_THREAD;
+ //arch_do_createdomain(current);
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ for (i = 0; i < CLONE_DOMAIN0; i++) {
+ clones[i] = do_createdomain(i+1, 0);
+ if ( clones[i] == NULL )
+ panic("Error creating domain0 clone %d\n",i);
+ }
+ }
+#endif
+ if ( dom0 == NULL )
+ panic("Error creating domain 0\n");
+
+ set_bit(_DOMF_privileged, &dom0->domain_flags);
+
+ /*
+ * We're going to setup domain0 using the module(s) that we stashed safely
+ * above our heap. The second module, if present, is an initrd ramdisk.
+ */
+printk("About to call construct_dom0()\n");
+ dom0_memory_start = __va(ia64_boot_param->initrd_start);
+ dom0_memory_end = ia64_boot_param->initrd_size;
+ if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
+ 0,
+ 0,
+ 0) != 0)
+ panic("Could not set up DOM0 guest OS\n");
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ dom0_memory_start = __va(ia64_boot_param->initrd_start);
+ dom0_memory_end = ia64_boot_param->initrd_size;
+ for (i = 0; i < CLONE_DOMAIN0; i++) {
+printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1);
+ if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end,
+ 0,
+ 0,
+ 0) != 0)
+ panic("Could not set up DOM0 clone %d\n",i);
+ }
+ }
+#endif
+
+ /* The stash space for the initial kernel image can now be freed up. */
+ init_domheap_pages(ia64_boot_param->initrd_start,
+ ia64_boot_param->initrd_start + ia64_boot_param->initrd_size);
+ if (!running_on_sim) // slow on ski and pages are pre-initialized to zero
+ scrub_heap_pages();
+
+printk("About to call init_trace_bufs()\n");
+ init_trace_bufs();
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
+#ifndef IA64
+ console_endboot(cmdline && strstr(cmdline, "tty0"));
+#endif
+
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ for (i = 0; i < CLONE_DOMAIN0; i++)
+ domain_unpause_by_systemcontroller(clones[i]);
+ }
+#endif
+ domain_unpause_by_systemcontroller(dom0);
+ domain0_ready = 1;
+ local_irq_enable();
+printk("About to call startup_cpu_idle_loop()\n");
+ startup_cpu_idle_loop();
+}
diff --git a/xen/arch/ia64/xentime.c b/xen/arch/ia64/xentime.c
new file mode 100644
index 0000000000..8031bedf34
--- /dev/null
+++ b/xen/arch/ia64/xentime.c
@@ -0,0 +1,380 @@
+/*
+ * xen/arch/ia64/time.c
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#ifdef XEN
+#include <linux/jiffies.h> // not included by xen/sched.h
+#endif
+#include <xen/softirq.h>
+
+#define TIME_KEEPER_ID 0
+extern unsigned long wall_jiffies;
+
+static s_time_t stime_irq; /* System time at last 'time update' */
+
+unsigned long domain0_ready = 0;
+
+#ifndef CONFIG_VTI
+static inline u64 get_time_delta(void)
+{
+ return ia64_get_itc();
+}
+#else // CONFIG_VTI
+static s_time_t stime_irq = 0x0; /* System time at last 'time update' */
+unsigned long itc_scale;
+unsigned long itc_at_irq;
+static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+//static rwlock_t time_lock = RW_LOCK_UNLOCKED;
+static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs);
+
+static inline u64 get_time_delta(void)
+{
+ s64 delta_itc;
+ u64 delta, cur_itc;
+
+ cur_itc = ia64_get_itc();
+
+ delta_itc = (s64)(cur_itc - itc_at_irq);
+ if ( unlikely(delta_itc < 0) ) delta_itc = 0;
+ delta = ((u64)delta_itc) * itc_scale;
+ delta = delta >> 32;
+
+ return delta;
+}
+
+u64 tick_to_ns(u64 tick)
+{
+ return (tick * itc_scale) >> 32;
+}
+#endif // CONFIG_VTI
+
+s_time_t get_s_time(void)
+{
+ s_time_t now;
+ unsigned long flags;
+
+ read_lock_irqsave(&xtime_lock, flags);
+
+ now = stime_irq + get_time_delta();
+
+ /* Ensure that the returned system time is monotonically increasing. */
+ {
+ static s_time_t prev_now = 0;
+ if ( unlikely(now < prev_now) )
+ now = prev_now;
+ prev_now = now;
+ }
+
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ return now;
+}
+
+void update_dom_time(struct vcpu *v)
+{
+// FIXME: implement this?
+// printf("update_dom_time: called, not implemented, skipping\n");
+ return;
+}
+
+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
+void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
+{
+#ifdef CONFIG_VTI
+ s64 delta;
+ long _usecs = (long)usecs;
+
+ write_lock_irq(&xtime_lock);
+
+ delta = (s64)(stime_irq - system_time_base);
+
+ _usecs += (long)(delta/1000);
+ while ( _usecs >= 1000000 )
+ {
+ _usecs -= 1000000;
+ secs++;
+ }
+
+ wc_sec = secs;
+ wc_usec = _usecs;
+
+ write_unlock_irq(&xtime_lock);
+
+ update_dom_time(current->domain);
+#else
+// FIXME: Should this be do_settimeofday (from linux)???
+ printf("do_settime: called, not implemented, stopping\n");
+ dummy();
+#endif
+}
+
+irqreturn_t
+xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long new_itm;
+
+#define HEARTBEAT_FREQ 16 // period in seconds
+#ifdef HEARTBEAT_FREQ
+ static long count = 0;
+ if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) {
+ printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n",
+ regs->cr_iip,
+ current->vcpu_info->arch.interrupt_delivery_enabled,
+ current->vcpu_info->arch.pending_interruption);
+ count = 0;
+ }
+#endif
+#ifndef XEN
+ if (unlikely(cpu_is_offline(smp_processor_id()))) {
+ return IRQ_HANDLED;
+ }
+#endif
+#ifdef XEN
+ if (current->domain == dom0) {
+ // FIXME: there's gotta be a better way of doing this...
+ // We have to ensure that domain0 is launched before we
+ // call vcpu_timer_expired on it
+ //domain0_ready = 1; // moved to xensetup.c
+ current->vcpu_info->arch.pending_interruption = 1;
+ }
+ if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
+ vcpu_pend_timer(dom0->vcpu[0]);
+ //vcpu_set_next_timer(dom0->vcpu[0]);
+ domain_wake(dom0->vcpu[0]);
+ }
+ if (!is_idle_task(current->domain) && current->domain != dom0) {
+ if (vcpu_timer_expired(current)) {
+ vcpu_pend_timer(current);
+ // ensure another timer interrupt happens even if domain doesn't
+ vcpu_set_next_timer(current);
+ domain_wake(current);
+ }
+ }
+ raise_actimer_softirq();
+#endif
+
+#ifndef XEN
+ platform_timer_interrupt(irq, dev_id, regs);
+#endif
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
+#ifdef XEN
+ return;
+#else
+ printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
+ ia64_get_itc(), new_itm);
+#endif
+
+#ifdef XEN
+// printf("GOT TO HERE!!!!!!!!!!!\n");
+ //while(1);
+#else
+ profile_tick(CPU_PROFILING, regs);
+#endif
+
+ while (1) {
+#ifndef XEN
+ update_process_times(user_mode(regs));
+#endif
+
+ new_itm += local_cpu_data->itm_delta;
+
+ if (smp_processor_id() == TIME_KEEPER_ID) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_seqlock(&xtime_lock);
+#endif
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ do_timer(regs);
+#endif
+ local_cpu_data->itm_next = new_itm;
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_sequnlock(&xtime_lock);
+#endif
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+ if (time_after(new_itm, ia64_get_itc()))
+ break;
+ }
+
+ do {
+ /*
+ * If we're too close to the next clock tick for
+ * comfort, we increase the safety margin by
+ * intentionally dropping the next tick(s). We do NOT
+ * update itm.next because that would force us to call
+ * do_timer() which in turn would let our clock run
+ * too fast (with the potentially devastating effect
+ * of losing monotony of time).
+ */
+ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
+//#ifdef XEN
+// vcpu_set_next_timer(current);
+//#else
+//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
+ ia64_set_itm(new_itm);
+//#endif
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ return IRQ_HANDLED;
+}
+
+static struct irqaction xen_timer_irqaction = {
+#ifdef CONFIG_VTI
+ .handler = vmx_timer_interrupt,
+#else // CONFIG_VTI
+ .handler = xen_timer_interrupt,
+#endif // CONFIG_VTI
+#ifndef XEN
+ .flags = SA_INTERRUPT,
+#endif
+ .name = "timer"
+};
+
+void __init
+xen_time_init (void)
+{
+ register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction);
+ ia64_init_itm();
+}
+
+
+#ifdef CONFIG_VTI
+
+/* Late init function (after all CPUs are booted). */
+int __init init_xen_time()
+{
+ struct timespec tm;
+
+ itc_scale = 1000000000UL << 32 ;
+ itc_scale /= local_cpu_data->itc_freq;
+
+ /* System time ticks from zero. */
+ stime_irq = (s_time_t)0;
+ itc_at_irq = ia64_get_itc();
+
+ /* Wallclock time starts as the initial RTC time. */
+ efi_gettimeofday(&tm);
+ wc_sec = tm.tv_sec;
+ wc_usec = tm.tv_nsec/1000;
+
+
+ printk("Time init:\n");
+ printk(".... System Time: %ldns\n", NOW());
+ printk(".... scale: %16lX\n", itc_scale);
+ printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec);
+
+ return 0;
+}
+
+static irqreturn_t
+vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long new_itm;
+ struct vcpu *v = current;
+
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
+ return;
+
+ while (1) {
+#ifdef CONFIG_SMP
+ /*
+ * For UP, this is done in do_timer(). Weird, but
+ * fixing that would require updates to all
+ * platforms.
+ */
+ update_process_times(user_mode(v, regs));
+#endif
+ new_itm += local_cpu_data->itm_delta;
+
+ if (smp_processor_id() == TIME_KEEPER_ID) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ local_cpu_data->itm_next = new_itm;
+
+ write_lock_irq(&xtime_lock);
+ /* Update jiffies counter. */
+ (*(unsigned long *)&jiffies_64)++;
+
+ /* Update wall time. */
+ wc_usec += 1000000/HZ;
+ if ( wc_usec >= 1000000 )
+ {
+ wc_usec -= 1000000;
+ wc_sec++;
+ }
+
+ /* Updates system time (nanoseconds since boot). */
+ stime_irq += MILLISECS(1000/HZ);
+ itc_at_irq = ia64_get_itc();
+
+ write_unlock_irq(&xtime_lock);
+
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+ if (time_after(new_itm, ia64_get_itc()))
+ break;
+ }
+
+ do {
+ /*
+ * If we're too close to the next clock tick for
+ * comfort, we increase the safety margin by
+ * intentionally dropping the next tick(s). We do NOT
+ * update itm.next because that would force us to call
+ * do_timer() which in turn would let our clock run
+ * too fast (with the potentially devastating effect
+ * of losing monotony of time).
+ */
+ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
+ ia64_set_itm(new_itm);
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ raise_softirq(AC_TIMER_SOFTIRQ);
+
+ return IRQ_HANDLED;
+}
+#endif // CONFIG_VTI
+
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 9a6029dcaf..d23182897c 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -1,17 +1,26 @@
include $(BASEDIR)/Rules.mk
-ifneq ($(debugger),y)
-OBJS := $(subst pdb-linux.o,,$(OBJS))
-OBJS := $(subst pdb-stub.o,,$(OBJS))
-endif
-
OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard genapic/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard cpu/*.c))
+
+ifeq ($(TARGET_SUBARCH),x86_64)
+OBJS := $(subst cpu/centaur.o,,$(OBJS))
+OBJS := $(subst cpu/cyrix.o,,$(OBJS))
+OBJS := $(subst cpu/rise.o,,$(OBJS))
+OBJS := $(subst cpu/transmeta.o,,$(OBJS))
+endif
OBJS := $(subst $(TARGET_SUBARCH)/asm-offsets.o,,$(OBJS))
+ifneq ($(crash_debug),y)
+OBJS := $(patsubst cdb%.o,,$(OBJS))
+endif
+
default: $(TARGET)
$(TARGET): $(TARGET)-syms boot/mkelf32
@@ -24,7 +33,7 @@ $(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) $(TARGET_SUBARCH)/xen.lds
$(LD) $(LDFLAGS) -T $(TARGET_SUBARCH)/xen.lds -N \
boot/$(TARGET_SUBARCH).o $(ALL_OBJS) -o $@
-asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
$(CC) $(CFLAGS) -S -o $@ $<
boot/mkelf32: boot/mkelf32.c
@@ -35,6 +44,9 @@ clean:
rm -f x86_32/*.o x86_32/*~ x86_32/core
rm -f x86_64/*.o x86_64/*~ x86_64/core
rm -f mtrr/*.o mtrr/*~ mtrr/core
+ rm -f acpi/*.o acpi/*~ acpi/core
+ rm -f genapic/*.o genapic/*~ genapic/core
+ rm -f cpu/*.o cpu/*~ cpu/core
delete-unfresh-files:
# nothing
diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk
index c3e1c2aea1..647640797d 100644
--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -1,12 +1,11 @@
########################################
# x86-specific definitions
-CC := gcc
-LD := ld
-
-CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
-CFLAGS += -iwithprefix include -Wall -Werror -pipe
-CFLAGS += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
+CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing
+CFLAGS += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe
+CFLAGS += -I$(BASEDIR)/include
+CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic
+CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default
ifeq ($(optimize),y)
CFLAGS += -O3 -fomit-frame-pointer
@@ -24,13 +23,13 @@ CFLAGS += $(call test-gcc-flag,-fno-stack-protector-all)
ifeq ($(TARGET_SUBARCH),x86_32)
CFLAGS += -m32 -march=i686
-LDFLAGS := -m elf_i386
+LDFLAGS += -m elf_i386
endif
ifeq ($(TARGET_SUBARCH),x86_64)
CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks
CFLAGS += -fno-asynchronous-unwind-tables
-LDFLAGS := -m elf_x86_64
+LDFLAGS += -m elf_x86_64
endif
# Test for at least GCC v3.2.x.
diff --git a/xen/arch/x86/acpi.c b/xen/arch/x86/acpi.c
deleted file mode 100644
index 5bbd0cd272..0000000000
--- a/xen/arch/x86/acpi.c
+++ /dev/null
@@ -1,721 +0,0 @@
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <xen/config.h>
-#include <xen/kernel.h>
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/slab.h>
-#include <xen/pci.h>
-#include <xen/irq.h>
-#include <xen/acpi.h>
-#include <asm/mpspec.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <asm/io_apic.h>
-#include <asm/acpi.h>
-#include <asm/smpboot.h>
-
-
-#define PREFIX "ACPI: "
-
-int acpi_lapic;
-int acpi_ioapic;
-int acpi_strict;
-
-acpi_interrupt_flags acpi_sci_flags __initdata;
-int acpi_sci_override_gsi __initdata;
-/* --------------------------------------------------------------------------
- Boot-time Configuration
- -------------------------------------------------------------------------- */
-
-int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */
-int acpi_ht __initdata = 1; /* enable HT */
-
-enum acpi_irq_model_id acpi_irq_model;
-
-
-/*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
- */
-char *__acpi_map_table(unsigned long phys, unsigned long size)
-{
- unsigned long base, offset, mapped_size;
- int idx;
-
- if (phys + size < 8*1024*1024)
- return __va(phys);
-
- offset = phys & (PAGE_SIZE - 1);
- mapped_size = PAGE_SIZE - offset;
- set_fixmap(FIX_ACPI_END, phys);
- base = fix_to_virt(FIX_ACPI_END);
-
- /*
- * Most cases can be covered by the below.
- */
- idx = FIX_ACPI_END;
- while (mapped_size < size) {
- if (--idx < FIX_ACPI_BEGIN)
- return 0; /* cannot handle this */
- phys += PAGE_SIZE;
- set_fixmap(idx, phys);
- mapped_size += PAGE_SIZE;
- }
-
- return ((unsigned char *) base + offset);
-}
-
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-
-
-static int __init
-acpi_parse_madt (
- unsigned long phys_addr,
- unsigned long size)
-{
- struct acpi_table_madt *madt = NULL;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
- if (!madt) {
- printk(KERN_WARNING PREFIX "Unable to map MADT\n");
- return -ENODEV;
- }
-
- if (madt->lapic_address)
- acpi_lapic_addr = (u64) madt->lapic_address;
-
- printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n",
- madt->lapic_address);
-
- detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_lapic (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic *processor = NULL;
-
- processor = (struct acpi_table_lapic*) header;
- if (!processor)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_register_lapic (
- processor->id, /* APIC ID */
- processor->flags.enabled); /* Enabled? */
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_lapic_addr_ovr (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
-
- lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
- if (!lapic_addr_ovr)
- return -EINVAL;
-
- acpi_lapic_addr = lapic_addr_ovr->address;
-
- return 0;
-}
-
-static int __init
-acpi_parse_lapic_nmi (
- acpi_table_entry_header *header)
-{
- struct acpi_table_lapic_nmi *lapic_nmi = NULL;
-
- lapic_nmi = (struct acpi_table_lapic_nmi*) header;
- if (!lapic_nmi)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (lapic_nmi->lint != 1)
- printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
-
- return 0;
-}
-
-#endif /*CONFIG_X86_LOCAL_APIC*/
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
-
-static int __init
-acpi_parse_ioapic (
- acpi_table_entry_header *header)
-{
- struct acpi_table_ioapic *ioapic = NULL;
-
- ioapic = (struct acpi_table_ioapic*) header;
- if (!ioapic)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- mp_register_ioapic (
- ioapic->id,
- ioapic->address,
- ioapic->global_irq_base);
-
- return 0;
-}
-
-/*
- * Parse Interrupt Source Override for the ACPI SCI
- */
-static void
-acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
-{
- if (trigger == 0) /* compatible SCI trigger is level */
- trigger = 3;
-
- if (polarity == 0) /* compatible SCI polarity is low */
- polarity = 3;
-
- /* Command-line over-ride via acpi_sci= */
- if (acpi_sci_flags.trigger)
- trigger = acpi_sci_flags.trigger;
-
- if (acpi_sci_flags.polarity)
- polarity = acpi_sci_flags.polarity;
-
- /*
- * mp_config_acpi_legacy_irqs() already setup IRQs < 16
- * If GSI is < 16, this will update its flags,
- * else it will create a new mp_irqs[] entry.
- */
- mp_override_legacy_irq(gsi, polarity, trigger, gsi);
-
- /*
- * stash over-ride to indicate we've been here
- * and for later update of acpi_fadt
- */
- acpi_sci_override_gsi = gsi;
- return;
-}
-
-static int __init
-acpi_parse_fadt(unsigned long phys, unsigned long size)
-{
- struct fadt_descriptor_rev2 *fadt =0;
-
- fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
- if (!fadt) {
- printk(KERN_WARNING PREFIX "Unable to map FADT\n");
- return 0;
- }
-
-#ifdef CONFIG_ACPI_INTERPRETER
- /* initialize sci_int early for INT_SRC_OVR MADT parsing */
- acpi_fadt.sci_int = fadt->sci_int;
-#endif
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_int_src_ovr (
- acpi_table_entry_header *header)
-{
- struct acpi_table_int_src_ovr *intsrc = NULL;
-
- intsrc = (struct acpi_table_int_src_ovr*) header;
- if (!intsrc)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (intsrc->bus_irq == acpi_fadt.sci_int) {
- acpi_sci_ioapic_setup(intsrc->global_irq,
- intsrc->flags.polarity, intsrc->flags.trigger);
- return 0;
- }
-
- mp_override_legacy_irq (
- intsrc->bus_irq,
- intsrc->flags.polarity,
- intsrc->flags.trigger,
- intsrc->global_irq);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_nmi_src (
- acpi_table_entry_header *header)
-{
- struct acpi_table_nmi_src *nmi_src = NULL;
-
- nmi_src = (struct acpi_table_nmi_src*) header;
- if (!nmi_src)
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* TBD: Support nimsrc entries? */
-
- return 0;
-}
-
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
-
-
-static unsigned long __init
-acpi_scan_rsdp (
- unsigned long start,
- unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
-
-unsigned long __init
-acpi_find_rsdp (void)
-{
- unsigned long rsdp_phys = 0;
-
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp (0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
-
-/*
- * acpi_boot_init()
- * called from setup_arch(), always.
- * 1. maps ACPI tables for later use
- * 2. enumerates lapics
- * 3. enumerates io-apics
- *
- * side effects:
- * acpi_lapic = 1 if LAPIC found
- * acpi_ioapic = 1 if IOAPIC found
- * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
- * if acpi_blacklisted() disable_acpi()
- * acpi_irq_model=...
- * ...
- *
- * return value: (currently ignored)
- * 0: success
- * !0: failure
- */
-int __init
-acpi_boot_init (void)
-{
- int result = 0;
-
- if (acpi_disabled && !acpi_ht)
- return(1);
-
- /*
- * The default interrupt routing model is PIC (8259). This gets
- * overriden if IOAPICs are enumerated (below).
- */
- acpi_irq_model = ACPI_IRQ_MODEL_PIC;
-
- /*
- * Initialize the ACPI boot-time table parser.
- */
- result = acpi_table_init();
- if (result) {
- disable_acpi();
- return result;
- }
-
- result = acpi_blacklisted();
- if (result) {
- printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
- disable_acpi();
- return result;
- }
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
- /*
- * MADT
- * ----
- * Parse the Multiple APIC Description Table (MADT), if exists.
- * Note that this table provides platform SMP configuration
- * information -- the successor to MPS tables.
- */
-
- result = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
- if (!result) {
- return 0;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing MADT\n");
- return result;
- }
- else if (result > 1)
- printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n");
-
- /*
- * Local APIC
- * ----------
- * Note that the LAPIC address is obtained from the MADT (32-bit value)
- * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
- */
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
- return result;
- }
-
- mp_register_lapic_address(acpi_lapic_addr);
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic);
- if (!result) {
- printk(KERN_ERR PREFIX "No LAPIC entries present\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return -ENODEV;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- acpi_lapic = 1;
-
-#endif /*CONFIG_X86_LOCAL_APIC*/
-
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
-
- /*
- * I/O APIC
- * --------
- */
-
- /*
- * ACPI interpreter is required to complete interrupt setup,
- * so if it is off, don't enumerate the io-apics with ACPI.
- * If MPS is present, it will handle them,
- * otherwise the system will stay in PIC mode
- */
- if (acpi_disabled || acpi_noirq) {
- return 1;
- }
-
- /*
- * if "noapic" boot option, don't look for IO-APICs
- */
- if (ioapic_setup_disabled()) {
- printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
- "due to 'noapic' option.\n");
- return 1;
- }
-
-
- result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
- if (!result) {
- printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
- return -ENODEV;
- }
- else if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
- return result;
- }
-
- /* Build a default routing table for legacy (ISA) interrupts. */
- mp_config_acpi_legacy_irqs();
-
- /* Record sci_int for use when looking for MADT sci_int override */
- acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
-
- result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- /*
- * If BIOS did not supply an INT_SRC_OVR for the SCI
- * pretend we got one so we can set the SCI flags.
- */
- if (!acpi_sci_override_gsi)
- acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
-
- result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src);
- if (result < 0) {
- printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
- /* TBD: Cleanup to allow fallback to MPS */
- return result;
- }
-
- acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
-
- acpi_irq_balance_set(NULL);
-
- acpi_ioapic = 1;
-
- if (acpi_lapic && acpi_ioapic)
- smp_found_config = 1;
-
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
-
- return 0;
-}
-
-
-#ifdef CONFIG_ACPI_BUS
-/*
- * acpi_pic_sci_set_trigger()
- *
- * use ELCR to set PIC-mode trigger type for SCI
- *
- * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
- * it may require Edge Trigger -- use "acpi_sci=edge"
- *
- * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
- * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
- * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
- * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
- */
-
-void __init
-acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
-{
- unsigned char mask = 1 << (irq & 7);
- unsigned int port = 0x4d0 + (irq >> 3);
- unsigned char val = inb(port);
-
-
- printk(PREFIX "IRQ%d SCI:", irq);
- if (!(val & mask)) {
- printk(" Edge");
-
- if (trigger == 3) {
- printk(" set to Level");
- outb(val | mask, port);
- }
- } else {
- printk(" Level");
-
- if (trigger == 1) {
- printk(" set to Edge");
- outb(val & ~mask, port);
- }
- }
- printk(" Trigger.\n");
-}
-
-#endif /* CONFIG_ACPI_BUS */
-
-
-/* --------------------------------------------------------------------------
- Low-Level Sleep Support
- -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_SLEEP
-
-#define DEBUG
-
-#ifdef DEBUG
-#include <xen/serial.h>
-#endif
-
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
-
-/* new page directory that we will be using */
-static pmd_t *pmd;
-
-/* saved page directory */
-static pmd_t saved_pmd;
-
-/* page which we'll use for the new page directory */
-static pte_t *ptep;
-
-extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
-
-/*
- * acpi_create_identity_pmd
- *
- * Create a new, identity mapped pmd.
- *
- * Do this by creating new page directory, and marking all the pages as R/W
- * Then set it as the new Page Middle Directory.
- * And, of course, flush the TLB so it takes effect.
- *
- * We save the address of the old one, for later restoration.
- */
-static void acpi_create_identity_pmd (void)
-{
- pgd_t *pgd;
- int i;
-
- ptep = (pte_t*)__get_free_page(GFP_KERNEL);
-
- /* fill page with low mapping */
- for (i = 0; i < PTRS_PER_PTE; i++)
- set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
-
- pgd = pgd_offset(current->active_mm, 0);
- pmd = pmd_alloc(current->mm,pgd, 0);
-
- /* save the old pmd */
- saved_pmd = *pmd;
-
- /* set the new one */
- set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
-
- /* flush the TLB */
- local_flush_tlb();
-}
-
-/*
- * acpi_restore_pmd
- *
- * Restore the old pmd saved by acpi_create_identity_pmd and
- * free the page that said function alloc'd
- */
-static void acpi_restore_pmd (void)
-{
- set_pmd(pmd, saved_pmd);
- local_flush_tlb();
- free_page((unsigned long)ptep);
-}
-
-/**
- * acpi_save_state_mem - save kernel state
- *
- * Create an identity mapped page table and copy the wakeup routine to
- * low memory.
- */
-int acpi_save_state_mem (void)
-{
- acpi_create_identity_pmd();
- acpi_copy_wakeup_routine(acpi_wakeup_address);
-
- return 0;
-}
-
-/**
- * acpi_save_state_disk - save kernel state to disk
- *
- */
-int acpi_save_state_disk (void)
-{
- return 1;
-}
-
-/*
- * acpi_restore_state
- */
-void acpi_restore_state_mem (void)
-{
- acpi_restore_pmd();
-}
-
-/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
- *
- * We allocate a page in low memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16M pages, but not
- * <1M pages.
- */
-void __init acpi_reserve_bootmem(void)
-{
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
- if (!acpi_wakeup_address)
- printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
-}
-
-void do_suspend_lowlevel_s4bios(int resume)
-{
- if (!resume) {
- save_processor_context();
- acpi_save_register_state((unsigned long)&&acpi_sleep_done);
- acpi_enter_sleep_state_s4bios();
- return;
- }
-acpi_sleep_done:
- restore_processor_context();
-}
-
-
-#endif /*CONFIG_ACPI_SLEEP*/
-
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
new file mode 100644
index 0000000000..c209012950
--- /dev/null
+++ b/xen/arch/x86/acpi/boot.c
@@ -0,0 +1,723 @@
+/*
+ * boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/acpi.h>
+#include <xen/irq.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+
+int sbf_port;
+#define CONFIG_ACPI_PCI
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+#ifdef CONFIG_ACPI_PCI
+int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
+#else
+int acpi_noirq __initdata = 1;
+int acpi_pci_disabled __initdata = 1;
+#endif
+int acpi_ht __initdata = 1; /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES 256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+ { [0 ... MAX_MADT_ENTRIES-1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+ Boot-time Configuration
+ -------------------------------------------------------------------------- */
+
+/*
+ * The default interrupt routing model is PIC (8259). This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+ unsigned long base, offset, mapped_size;
+ int idx;
+
+ if (phys + size < 8*1024*1024)
+ return __va(phys);
+
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_ACPI_END, phys);
+ base = fix_to_virt(FIX_ACPI_END);
+
+ /*
+ * Most cases can be covered by the below.
+ */
+ idx = FIX_ACPI_END;
+ while (mapped_size < size) {
+ if (--idx < FIX_ACPI_BEGIN)
+ return NULL; /* cannot handle this */
+ phys += PAGE_SIZE;
+ set_fixmap(idx, phys);
+ mapped_size += PAGE_SIZE;
+ }
+
+ return ((char *) base + offset);
+}
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_mcfg *mcfg;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
+ if (!mcfg) {
+ printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+ return -ENODEV;
+ }
+
+ if (mcfg->base_reserved) {
+ printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
+ return -ENODEV;
+ }
+
+ pci_mmcfg_base_addr = mcfg->base_address;
+
+ return 0;
+}
+#else
+#define acpi_parse_mcfg NULL
+#endif /* !CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init
+acpi_parse_madt (
+ unsigned long phys_addr,
+ unsigned long size)
+{
+ struct acpi_table_madt *madt = NULL;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size);
+ if (!madt) {
+ printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+ return -ENODEV;
+ }
+
+ if (madt->lapic_address) {
+ acpi_lapic_addr = (u64) madt->lapic_address;
+
+ printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+ madt->lapic_address);
+ }
+
+ acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic *processor = NULL;
+
+ processor = (struct acpi_table_lapic*) header;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* no utility in registering a disabled processor */
+ if (processor->flags.enabled == 0)
+ return 0;
+
+ x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+ mp_register_lapic (
+ processor->id, /* APIC ID */
+ processor->flags.enabled); /* Enabled? */
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+ lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header;
+
+ if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+ return -EINVAL;
+
+ acpi_lapic_addr = lapic_addr_ovr->address;
+
+ return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+ lapic_nmi = (struct acpi_table_lapic_nmi*) header;
+
+ if (BAD_MADT_ENTRY(lapic_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic_nmi->lint != 1)
+ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+ return 0;
+}
+
+
+#endif /*CONFIG_X86_LOCAL_APIC*/
+
+#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
+
+static int __init
+acpi_parse_ioapic (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_ioapic *ioapic = NULL;
+
+ ioapic = (struct acpi_table_ioapic*) header;
+
+ if (BAD_MADT_ENTRY(ioapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ mp_register_ioapic (
+ ioapic->id,
+ ioapic->address,
+ ioapic->global_irq_base);
+
+ return 0;
+}
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *intsrc = NULL;
+
+ intsrc = (struct acpi_table_int_src_ovr*) header;
+
+ if (BAD_MADT_ENTRY(intsrc, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (acpi_skip_timer_override &&
+ intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+
+ mp_override_legacy_irq (
+ intsrc->bus_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger,
+ intsrc->global_irq);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src = NULL;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries? */
+
+ return 0;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+static unsigned long __init
+acpi_scan_rsdp (
+ unsigned long start,
+ unsigned long length)
+{
+ unsigned long offset = 0;
+ unsigned long sig_len = sizeof("RSD PTR ") - 1;
+
+ /*
+ * Scan all 16-byte boundaries of the physical memory region for the
+ * RSDP signature.
+ */
+ for (offset = 0; offset < length; offset += 16) {
+ if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+ continue;
+ return (start + offset);
+ }
+
+ return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_sbf *sb;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
+ if (!sb) {
+ printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+ return -ENODEV;
+ }
+
+ sbf_port = sb->sbf_cmos; /* Save CMOS port */
+
+ return 0;
+}
+
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+ struct acpi_table_hpet *hpet_tbl;
+
+ if (!phys || !size)
+ return -EINVAL;
+
+ hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size);
+ if (!hpet_tbl) {
+ printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+ return -ENODEV;
+ }
+
+ if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+ printk(KERN_WARNING PREFIX "HPET timers must be located in "
+ "memory.\n");
+ return -1;
+ }
+
+#ifdef CONFIG_X86_64
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long) hpet_tbl->addr.addrh << 32);
+
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
+#else /* X86 */
+ {
+ extern unsigned long hpet_address;
+
+ hpet_address = hpet_tbl->addr.addrl;
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
+ }
+#endif /* X86 */
+
+ return 0;
+}
+#else
+#define acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+ struct fadt_descriptor_rev2 *fadt = NULL;
+
+ fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size);
+ if(!fadt) {
+ printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+ return 0;
+ }
+
+#ifdef CONFIG_ACPI_INTERPRETER
+ /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+ acpi_fadt.sci_int = fadt->sci_int;
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+ /* detect the location of the ACPI PM Timer */
+ if (fadt->revision >= FADT2_REVISION_ID) {
+ /* FADT rev. 2 */
+ if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ } else {
+ /* FADT rev. 1 */
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ }
+ if (pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport);
+#endif
+ return 0;
+}
+
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+#if 0
+ if (efi_enabled) {
+ if (efi.acpi20)
+ return __pa(efi.acpi20);
+ else if (efi.acpi)
+ return __pa(efi.acpi);
+ }
+#endif
+ /*
+ * Scan memory looking for the RSDP signature. First search EBDA (low
+ * memory) paragraphs and then search upper memory (E0000-FFFFF).
+ */
+ rsdp_phys = acpi_scan_rsdp (0, 0x400);
+ if (!rsdp_phys)
+ rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
+
+ return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_lapic_entries(void)
+{
+ int count;
+
+ /*
+ * Note that the LAPIC address is obtained from the MADT (32-bit value)
+ * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+ */
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
+ return count;
+ }
+
+ mp_register_lapic_address(acpi_lapic_addr);
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+ MAX_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return -ENODEV;
+ }
+ else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+ return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init
+acpi_parse_madt_ioapic_entries(void)
+{
+ int count;
+
+ /*
+ * ACPI interpreter is required to complete interrupt setup,
+ * so if it is off, don't enumerate the io-apics with ACPI.
+ * If MPS is present, it will handle them,
+ * otherwise the system will stay in PIC mode
+ */
+ if (acpi_disabled || acpi_noirq) {
+ return -ENODEV;
+ }
+
+ /*
+ * if "noapic" boot option, don't look for IO-APICs
+ */
+ if (skip_ioapic_setup) {
+ printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+ "due to 'noapic' option.\n");
+ return -ENODEV;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS);
+ if (!count) {
+ printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+ return -ENODEV;
+ }
+ else if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+ return count;
+ }
+
+ count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+#ifdef CONFIG_ACPI_INTERPRETER
+ /*
+ * If BIOS did not supply an INT_SRC_OVR for the SCI
+ * pretend we got one so we can set the SCI flags.
+ */
+ if (!acpi_sci_override_gsi)
+ acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+#endif
+
+ /* Fill in identity legacy mapings where no override */
+ mp_config_acpi_legacy_irqs();
+
+ count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS);
+ if (count < 0) {
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ /* TBD: Cleanup to allow fallback to MPS */
+ return count;
+ }
+
+ return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+ return -1;
+}
+#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */
+
+
+static void __init
+acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ int count, error;
+
+ count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+ if (count >= 1) {
+
+ /*
+ * Parse MADT LAPIC entries
+ */
+ error = acpi_parse_madt_lapic_entries();
+ if (!error) {
+ acpi_lapic = 1;
+
+ /*
+ * Parse MADT IO-APIC entries
+ */
+ error = acpi_parse_madt_ioapic_entries();
+ if (!error) {
+ acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+ acpi_irq_balance_set(NULL);
+ acpi_ioapic = 1;
+
+ smp_found_config = 1;
+ clustered_apic_check();
+ }
+ }
+ if (error == -EINVAL) {
+ /*
+ * Dell Precision Workstation 410, 610 come here.
+ */
+ printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n");
+ disable_acpi();
+ }
+ }
+#endif
+ return;
+}
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ * called from setup_arch(), always.
+ * 1. checksums all tables
+ * 2. enumerates lapics
+ * 3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ * acpi_lapic = 1 if LAPIC found
+ * acpi_ioapic = 1 if IOAPIC found
+ * if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ * if acpi_blacklisted() acpi_disabled = 1;
+ * acpi_irq_model=...
+ * ...
+ *
+ * return value: (currently ignored)
+ * 0: success
+ * !0: failure
+ */
+
+int __init
+acpi_boot_table_init(void)
+{
+ int error;
+
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ /*
+ * Initialize the ACPI boot-time table parser.
+ */
+ error = acpi_table_init();
+ if (error) {
+ disable_acpi();
+ return error;
+ }
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * blacklist may disable ACPI entirely
+ */
+ error = acpi_blacklisted();
+ if (error) {
+ extern int acpi_force;
+
+ if (acpi_force) {
+ printk(KERN_WARNING PREFIX "acpi=force override\n");
+ } else {
+ printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+ disable_acpi();
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+
+int __init acpi_boot_init(void)
+{
+ /*
+ * If acpi_disabled, bail out
+ * One exception: acpi=ht continues far enough to enumerate LAPICs
+ */
+ if (acpi_disabled && !acpi_ht)
+ return 1;
+
+ acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+ /*
+ * set sci_int and PM timer address
+ */
+ acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+ /*
+ * Process the Multiple APIC Description Table (MADT), if present
+ */
+ acpi_process_madt();
+
+ acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+ acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
+
+ return 0;
+}
+
diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index aa8907e0ba..50497c55c7 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1,4 +1,6 @@
/*
+ * based on linux-2.6.11/arch/i386/kernel/apic.c
+ *
* Local APIC handling, local APIC timers
*
* (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
@@ -10,11 +12,11 @@
* for testing these extensively.
* Maciej W. Rozycki : Various updates and fixes.
* Mikael Pettersson : Power Management for UP-APIC.
+ * Pavel Machek and
+ * Mikael Pettersson : PM converted to driver model.
*/
-
#include <xen/config.h>
-#include <xen/ac_timer.h>
#include <xen/perfc.h>
#include <xen/errno.h>
#include <xen/init.h>
@@ -32,13 +34,30 @@
#include <asm/hardirq.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <mach_apic.h>
+#include <io_ports.h>
+/*
+ * Debug level
+ */
+int apic_verbosity;
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
static int enabled_via_apicbase;
+int get_physical_broadcast(void)
+{
+ unsigned int lvr, version;
+ lvr = apic_read(APIC_LVR);
+ version = GET_APIC_VERSION(lvr);
+ if (!APIC_INTEGRATED(version) || version >= 0x14)
+ return 0xff;
+ else
+ return 0xf;
+}
+
int get_maxlvt(void)
{
unsigned int v, ver, maxlvt;
@@ -80,6 +99,13 @@ void clear_local_APIC(void)
apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
}
+/* lets not touch this if we didn't frob it */
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ if (maxlvt >= 5) {
+ v = apic_read(APIC_LVTTHMR);
+ apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+ }
+#endif
/*
* Clean APIC state for other OSs:
*/
@@ -90,9 +116,14 @@ void clear_local_APIC(void)
apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ if (maxlvt >= 5)
+ apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
+#endif
v = GET_APIC_VERSION(apic_read(APIC_LVR));
if (APIC_INTEGRATED(v)) { /* !82489DX */
- if (maxlvt > 3)
+ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
@@ -109,10 +140,12 @@ void __init connect_bsp_APIC(void)
* PIC mode, enable APIC mode in the IMCR, i.e.
* connect BSP's local APIC to INT and NMI lines.
*/
- printk("leaving PIC mode, enabling APIC mode.\n");
+ apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+ "enabling APIC mode.\n");
outb(0x70, 0x22);
outb(0x01, 0x23);
}
+ enable_apic_mode();
}
void disconnect_bsp_APIC(void)
@@ -124,7 +157,8 @@ void disconnect_bsp_APIC(void)
* interrupts, including IPIs, won't work beyond
* this point! The only exception are INIT IPIs.
*/
- printk("disabling APIC mode, entering PIC mode.\n");
+ apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+ "entering PIC mode.\n");
outb(0x70, 0x22);
outb(0x00, 0x23);
}
@@ -165,10 +199,10 @@ int __init verify_local_APIC(void)
* The version register is read-only in a real APIC.
*/
reg0 = apic_read(APIC_LVR);
- Dprintk("Getting VERSION: %x\n", reg0);
+ apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
reg1 = apic_read(APIC_LVR);
- Dprintk("Getting VERSION: %x\n", reg1);
+ apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
/*
* The two version reads above should print the same
@@ -192,13 +226,7 @@ int __init verify_local_APIC(void)
* The ID register is read/write in a real APIC.
*/
reg0 = apic_read(APIC_ID);
- Dprintk("Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = apic_read(APIC_ID);
- Dprintk("Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ APIC_ID_MASK))
- return 0;
+ apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
/*
* The next two are just to see if we have sane values.
@@ -206,21 +234,25 @@ int __init verify_local_APIC(void)
* compatibility mode, but most boxes are anymore.
*/
reg0 = apic_read(APIC_LVT0);
- Dprintk("Getting LVT0: %x\n", reg0);
+ apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
reg1 = apic_read(APIC_LVT1);
- Dprintk("Getting LVT1: %x\n", reg1);
+ apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
return 1;
}
void __init sync_Arb_IDs(void)
{
+ /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
+ unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+ if (ver >= 0x14) /* P4 or higher */
+ return;
/*
* Wait for idle.
*/
apic_wait_icr_idle();
- Dprintk("Synchronizing Arb IDs.\n");
+ apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
| APIC_DM_INIT);
}
@@ -228,27 +260,63 @@ void __init sync_Arb_IDs(void)
extern void __error_in_apic_c (void);
/*
- * WAS: An initial setup of the virtual wire mode.
- * NOW: We don't bother doing anything. All we need at this point
- * is to receive timer ticks, so that 'jiffies' is incremented.
- * If we're SMP, then we can assume BIOS did setup for us.
- * If we're UP, then the APIC should be disabled (it is at reset).
- * If we're UP and APIC is enabled, then BIOS is clever and has
- * probably done initial interrupt routing for us.
+ * An initial setup of the virtual wire mode.
*/
void __init init_bsp_APIC(void)
{
-}
+ unsigned long value, ver;
-static unsigned long calculate_ldr(unsigned long old)
-{
- unsigned long id = 1UL << smp_processor_id();
- return (old & ~APIC_LDR_MASK)|SET_APIC_LOGICAL_ID(id);
+ /*
+ * Don't do the setup now if we have a SMP BIOS as the
+ * through-I/O-APIC virtual wire mode might be active.
+ */
+ if (smp_found_config || !cpu_has_apic)
+ return;
+
+ value = apic_read(APIC_LVR);
+ ver = GET_APIC_VERSION(value);
+
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+
+ /*
+ * Enable APIC.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+
+ /* This bit is reserved on P4/Xeon and should be cleared */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+ else
+ value |= APIC_SPIV_FOCUS_DISABLED;
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write_around(APIC_SPIV, value);
+
+ /*
+ * Set up the virtual wire mode.
+ */
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+ value = APIC_DM_NMI;
+ if (!APIC_INTEGRATED(ver)) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ apic_write_around(APIC_LVT1, value);
}
void __init setup_local_APIC (void)
{
- unsigned long value, ver, maxlvt;
+ unsigned long oldvalue, value, ver, maxlvt;
+
+ /* Pound the ESR really hard over the head with a big hammer - mbligh */
+ if (esr_disable) {
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ apic_write(APIC_ESR, 0);
+ }
value = apic_read(APIC_LVR);
ver = GET_APIC_VERSION(value);
@@ -256,8 +324,10 @@ void __init setup_local_APIC (void)
if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
__error_in_apic_c();
- /* Double-check wether this APIC is really registered. */
- if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+ /*
+ * Double-check whether this APIC is really registered.
+ */
+ if (!apic_id_registered())
BUG();
/*
@@ -265,19 +335,7 @@ void __init setup_local_APIC (void)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
-
- /*
- * In clustered apic mode, the firmware does this for us
- * Put the APIC into flat delivery mode.
- * Must be "all ones" explicitly for 82489DX.
- */
- apic_write_around(APIC_DFR, APIC_DFR_FLAT);
-
- /*
- * Set up the logical destination ID.
- */
- value = apic_read(APIC_LDR);
- apic_write_around(APIC_LDR, calculate_ldr(value));
+ init_apic_ldr();
/*
* Set Task Priority to 'accept all'. We never change this
@@ -297,10 +355,35 @@ void __init setup_local_APIC (void)
*/
value |= APIC_SPIV_APIC_ENABLED;
+ /*
+ * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+ * certain networking cards. If high frequency interrupts are
+ * happening on a particular IOAPIC pin, plus the IOAPIC routing
+ * entry is masked/unmasked at a high rate as well then sooner or
+ * later IOAPIC line gets 'stuck', no more interrupts are received
+ * from the device. If focus CPU is disabled then the hang goes
+ * away, oh well :-(
+ *
+ * [ This bug can be reproduced easily with a level-triggered
+ * PCI Ne2000 networking cards and PII/PIII processors, dual
+ * BX chipset. ]
+ */
+ /*
+ * Actually disabling the focus CPU check just makes the hang less
+ * frequent as it makes the interrupt distributon model be more
+ * like LRU than MRU (the short-term load is more even across CPUs).
+ * See also the comment in end_level_ioapic_irq(). --macro
+ */
+#if 1
/* Enable focus processor (bit==0) */
value &= ~APIC_SPIV_FOCUS_DISABLED;
-
- /* Set spurious IRQ vector */
+#else
+ /* Disable focus processor (bit==1) */
+ value |= APIC_SPIV_FOCUS_DISABLED;
+#endif
+ /*
+ * Set spurious IRQ vector
+ */
value |= SPURIOUS_APIC_VECTOR;
apic_write_around(APIC_SPIV, value);
@@ -315,12 +398,14 @@ void __init setup_local_APIC (void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id()) {
+ if (!smp_processor_id() && (pic_mode || !value)) {
value = APIC_DM_EXTINT;
- printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+ apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
+ smp_processor_id());
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+ apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
+ smp_processor_id());
}
apic_write_around(APIC_LVT0, value);
@@ -335,56 +420,95 @@ void __init setup_local_APIC (void)
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write_around(APIC_LVT1, value);
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
+ if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */
maxlvt = get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
- value = apic_read(APIC_ESR);
- printk("ESR value before enabling vector: %08lx\n", value);
+ oldvalue = apic_read(APIC_ESR);
- value = ERROR_APIC_VECTOR; /* enables sending errors */
+ value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
- /* spec says clear errors after enabling vector. */
+ /*
+ * spec says clear errors after enabling vector.
+ */
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
- printk("ESR value after enabling vector: %08lx\n", value);
+ if (value != oldvalue)
+ apic_printk(APIC_VERBOSE, "ESR value before enabling "
+ "vector: 0x%08lx after: 0x%08lx\n",
+ oldvalue, value);
} else {
- printk("No ESR for 82489DX.\n");
+ if (esr_disable)
+ /*
+ * Something untraceble is creating bad interrupts on
+ * secondary quads ... for the moment, just leave the
+ * ESR disabled - we can't do anything useful with the
+ * errors anyway - mbligh
+ */
+ printk("Leaving ESR disabled.\n");
+ else
+ printk("No ESR for 82489DX.\n");
}
- if ( (smp_processor_id() == 0) && (nmi_watchdog == NMI_LOCAL_APIC) )
+ if (nmi_watchdog == NMI_LOCAL_APIC)
setup_apic_nmi_watchdog();
}
-
-static inline void apic_pm_init1(void) { }
-static inline void apic_pm_init2(void) { }
-
-
/*
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
*/
+/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+static void __init lapic_disable(char *str)
+{
+ enable_local_apic = -1;
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+}
+custom_param("nolapic", lapic_disable);
+
+static void __init lapic_enable(char *str)
+{
+ enable_local_apic = 1;
+}
+custom_param("lapic", lapic_enable);
+
+static void __init apic_set_verbosity(char *str)
+{
+ if (strcmp("debug", str) == 0)
+ apic_verbosity = APIC_DEBUG;
+ else if (strcmp("verbose", str) == 0)
+ apic_verbosity = APIC_VERBOSE;
+ else
+ printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+ " use apic_verbosity=verbose or apic_verbosity=debug", str);
+}
+custom_param("apic_verbosity", apic_set_verbosity);
+
static int __init detect_init_APIC (void)
{
u32 h, l, features;
- extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+ /* Disabled by kernel option? */
+ if (enable_local_apic < 0)
+ return -1;
/* Workaround for us being called before identify_cpu(). */
- get_cpu_vendor(&boot_cpu_data);
+ /*get_cpu_vendor(&boot_cpu_data); Not for Xen */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
- break;
- if (boot_cpu_data.x86 == 15 && cpu_has_apic)
+ if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+ (boot_cpu_data.x86 == 15))
break;
goto no_apic;
case X86_VENDOR_INTEL:
- if (boot_cpu_data.x86 == 6 ||
- (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+ if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
(boot_cpu_data.x86 == 5 && cpu_has_apic))
break;
goto no_apic;
@@ -394,9 +518,19 @@ static int __init detect_init_APIC (void)
if (!cpu_has_apic) {
/*
+ * Over-ride BIOS and try to enable the local
+ * APIC only if "lapic" specified.
+ */
+ if (enable_local_apic <= 0) {
+ printk("Local APIC disabled by BIOS -- "
+ "you can enable it with \"lapic\"\n");
+ return -1;
+ }
+ /*
* Some BIOSes disable the local APIC in the
* APIC_BASE MSR. This can only be done in
- * software for Intel P6 and AMD K7 (Model > 1).
+ * software for Intel P6 or later and AMD K7
+ * (Model > 1) or later.
*/
rdmsr(MSR_IA32_APICBASE, l, h);
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
@@ -407,51 +541,53 @@ static int __init detect_init_APIC (void)
enabled_via_apicbase = 1;
}
}
-
- /* The APIC feature bit should now be enabled in `cpuid' */
+ /*
+ * The APIC feature bit should now be enabled
+ * in `cpuid'
+ */
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
printk("Could not enable APIC!\n");
return -1;
}
- set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+ set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- boot_cpu_physical_apicid = 0;
/* The BIOS may have set up the APIC at some other address */
rdmsr(MSR_IA32_APICBASE, l, h);
if (l & MSR_IA32_APICBASE_ENABLE)
mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
- if (nmi_watchdog != NMI_NONE)
- nmi_watchdog = NMI_LOCAL_APIC;
+ if (nmi_watchdog != NMI_NONE)
+ nmi_watchdog = NMI_LOCAL_APIC;
printk("Found and enabled local APIC!\n");
- apic_pm_init1();
+
return 0;
- no_apic:
+no_apic:
printk("No local APIC present or hardware disabled\n");
return -1;
}
void __init init_apic_mappings(void)
{
- unsigned long apic_phys = 0;
+ unsigned long apic_phys;
/*
- * If no local APIC can be found then set up a fake all zeroes page to
- * simulate the local APIC and another one for the IO-APIC.
+ * If no local APIC can be found then set up a fake all
+ * zeroes page to simulate the local APIC and another
+ * one for the IO-APIC.
*/
- if (!smp_found_config && detect_init_APIC()) {
- apic_phys = alloc_xenheap_page();
- apic_phys = __pa(apic_phys);
- } else
+ if (!smp_found_config && detect_init_APIC())
+ apic_phys = __pa(alloc_xenheap_page());
+ else
apic_phys = mp_lapic_addr;
set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
- Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
+ apic_phys);
/*
* Fetch the APIC ID of the BSP in case we have a
@@ -462,15 +598,28 @@ void __init init_apic_mappings(void)
#ifdef CONFIG_X86_IO_APIC
{
- unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
int i;
for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config)
+ if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ if (!ioapic_phys) {
+ printk(KERN_ERR
+ "WARNING: bogus zero IO-APIC "
+ "address found in MPTABLE, "
+ "disabling IO/APIC support!\n");
+ smp_found_config = 0;
+ skip_ioapic_setup = 1;
+ goto fake_ioapic_page;
+ }
+ } else {
+fake_ioapic_page:
+ ioapic_phys = __pa(alloc_xenheap_page());
+ }
set_fixmap_nocache(idx, ioapic_phys);
- Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
- fix_to_virt(idx), ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
idx++;
}
}
@@ -504,78 +653,100 @@ static unsigned int __init get_8254_timer_count(void)
{
/*extern spinlock_t i8253_lock;*/
/*unsigned long flags;*/
+
unsigned int count;
+
/*spin_lock_irqsave(&i8253_lock, flags);*/
- outb_p(0x00, 0x43);
- count = inb_p(0x40);
- count |= inb_p(0x40) << 8;
+
+ outb_p(0x00, PIT_MODE);
+ count = inb_p(PIT_CH0);
+ count |= inb_p(PIT_CH0) << 8;
+
/*spin_unlock_irqrestore(&i8253_lock, flags);*/
+
return count;
}
-void __init wait_8254_wraparound(void)
+/* next tick in 8254 can be caught by catching timer wraparound */
+static void __init wait_8254_wraparound(void)
{
- unsigned int curr_count, prev_count=~0;
- int delta;
+ unsigned int curr_count, prev_count;
+
curr_count = get_8254_timer_count();
do {
prev_count = curr_count;
curr_count = get_8254_timer_count();
- delta = curr_count-prev_count;
- /*
- * This limit for delta seems arbitrary, but it isn't, it's slightly
- * above the level of error a buggy Mercury/Neptune chipset timer can
- * cause.
- */
- } while (delta < 300);
+
+ /* workaround for broken Mercury/Neptune */
+ if (prev_count >= curr_count + 0x100)
+ curr_count = get_8254_timer_count();
+
+ } while (prev_count >= curr_count);
}
/*
+ * Default initialization for 8254 timers. If we use other timers like HPET,
+ * we override this later
+ */
+void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
+
+/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
- * this function with a very large value and read the current time after
- * a well defined period of time as expired.
- *
- * Calibration is only performed once, for CPU0!
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
*
* We do reads before writes even if unnecessary, to get around the
* P5 APIC double write bug.
*/
+
#define APIC_DIVISOR 1
-static void __setup_APIC_LVTT(unsigned int clocks)
+
+void __setup_APIC_LVTT(unsigned int clocks)
{
- unsigned int lvtt1_value, tmp_value;
- lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV)|LOCAL_TIMER_VECTOR;
- apic_write_around(APIC_LVTT, lvtt1_value);
+ unsigned int lvtt_value, tmp_value, ver;
+
+ ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+ /* NB. Xen uses local APIC timer in one-shot mode. */
+ lvtt_value = /*APIC_LVT_TIMER_PERIODIC |*/ LOCAL_TIMER_VECTOR;
+ if (!APIC_INTEGRATED(ver))
+ lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+ apic_write_around(APIC_LVTT, lvtt_value);
+
tmp_value = apic_read(APIC_TDCR);
apic_write_around(APIC_TDCR, (tmp_value | APIC_TDR_DIV_1));
+
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-/*
- * this is done for every CPU from setup_APIC_clocks() below.
- * We setup each local APIC with a zero timeout value for now.
- * Unlike Linux, we don't have to wait for slices etc.
- */
-void setup_APIC_timer(void * data)
+static void __init setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
- __save_flags(flags);
- __sti();
- __setup_APIC_LVTT(0);
- __restore_flags(flags);
+
+ local_irq_save(flags);
+
+ /*
+ * Wait for IRQ0's slice:
+ */
+ wait_timer_tick();
+
+ __setup_APIC_LVTT(clocks);
+
+ local_irq_restore(flags);
}
/*
- * In this function we calibrate APIC bus clocks to the external timer.
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
*
- * As a result we have the Bys Speed and CPU speed in Hz.
- *
- * We want to do the calibration only once (for CPU0). CPUs connected by the
- * same APIC bus have the very same bus frequency.
- *
- * This bit is a bit shoddy since we use the very same periodic timer interrupt
- * we try to eliminate to calibrate the APIC.
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
*/
int __init calibrate_APIC_clock(void)
@@ -586,76 +757,114 @@ int __init calibrate_APIC_clock(void)
int i;
const int LOOPS = HZ/10;
- printk("Calibrating APIC timer for CPU%d...\n", smp_processor_id());
+ apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
- /* Put whatever arbitrary (but long enough) timeout
+ /*
+ * Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
- * counter running for calibration. */
+ * counter running for calibration.
+ */
__setup_APIC_LVTT(1000000000);
- /* The timer chip counts down to zero. Let's wait
+ /*
+ * The timer chip counts down to zero. Let's wait
* for a wraparound to start exact measurement:
- * (the current tick might have been already half done) */
- wait_8254_wraparound();
+ * (the current tick might have been already half done)
+ */
+ wait_timer_tick();
- /* We wrapped around just now. Let's start: */
- rdtscll(t1);
+ /*
+ * We wrapped around just now. Let's start:
+ */
+ if (cpu_has_tsc)
+ rdtscll(t1);
tt1 = apic_read(APIC_TMCCT);
- /* Let's wait LOOPS wraprounds: */
+ /*
+ * Let's wait LOOPS wraprounds:
+ */
for (i = 0; i < LOOPS; i++)
- wait_8254_wraparound();
+ wait_timer_tick();
tt2 = apic_read(APIC_TMCCT);
- rdtscll(t2);
+ if (cpu_has_tsc)
+ rdtscll(t2);
- /* The APIC bus clock counter is 32 bits only, it
+ /*
+ * The APIC bus clock counter is 32 bits only, it
* might have overflown, but note that we use signed
* longs, thus no extra care needed.
- * underflown to be exact, as the timer counts down ;) */
+ *
+ * underflown to be exact, as the timer counts down ;)
+ */
+
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
- printk("..... CPU speed is %ld.%04ld MHz.\n",
- ((long)(t2-t1)/LOOPS) / (1000000/HZ),
- ((long)(t2-t1)/LOOPS) % (1000000/HZ));
+ if (cpu_has_tsc)
+ apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+ "%ld.%04ld MHz.\n",
+ ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+ ((long)(t2-t1)/LOOPS)%(1000000/HZ));
- printk("..... Bus speed is %ld.%04ld MHz.\n",
- result / (1000000/HZ),
- result % (1000000/HZ));
-
- /*
- * KAF: Moved this to time.c where it's calculated relative to the TSC.
- * Therefore works on machines with no local APIC.
- */
- /*cpu_freq = (u64)(((t2-t1)/LOOPS)*HZ);*/
+ apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+ "%ld.%04ld MHz.\n",
+ result/(1000000/HZ),
+ result%(1000000/HZ));
/* set up multipliers for accurate timer code */
bus_freq = result*HZ;
bus_cycle = (u32) (1000000000000LL/bus_freq); /* in pico seconds */
bus_scale = (1000*262144)/bus_cycle;
- printk("..... bus_scale = 0x%08X\n", bus_scale);
+ apic_printk(APIC_VERBOSE, "..... bus_scale = 0x%08X\n", bus_scale);
/* reset APIC to zero timeout value */
__setup_APIC_LVTT(0);
+
return result;
}
-/*
- * initialise the APIC timers for all CPUs
- * we start with the first and find out processor frequency and bus speed
- */
-void __init setup_APIC_clocks (void)
+
+static unsigned int calibration_result;
+
+void __init setup_boot_APIC_clock(void)
{
- printk("Using local APIC timer interrupts.\n");
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
- __cli();
- /* calibrate CPU0 for CPU speed and BUS speed */
- bus_freq = calibrate_APIC_clock();
- /* Now set up the timer for real. */
- setup_APIC_timer((void *)bus_freq);
- __sti();
- /* and update all other cpus */
- smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
+
+ local_irq_disable();
+
+ calibration_result = calibrate_APIC_clock();
+ /*
+ * Now set up the timer for real.
+ */
+ setup_APIC_timer(calibration_result);
+
+ local_irq_enable();
+}
+
+void __init setup_secondary_APIC_clock(void)
+{
+ setup_APIC_timer(calibration_result);
+}
+
+void __init disable_APIC_timer(void)
+{
+ if (using_apic_timer) {
+ unsigned long v;
+
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ }
+}
+
+void enable_APIC_timer(void)
+{
+ if (using_apic_timer) {
+ unsigned long v;
+
+ v = apic_read(APIC_LVTT);
+ apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
+ }
}
#undef APIC_DIVISOR
@@ -722,22 +931,17 @@ int reprogram_ac_timer(s_time_t timeout)
return 1;
}
-unsigned int apic_timer_irqs [NR_CPUS];
-
-void smp_apic_timer_interrupt(struct xen_regs * regs)
+void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
{
ack_APIC_irq();
-
- apic_timer_irqs[smp_processor_id()]++;
perfc_incrc(apic_timer);
-
raise_softirq(AC_TIMER_SOFTIRQ);
}
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
-asmlinkage void smp_spurious_interrupt(void)
+asmlinkage void smp_spurious_interrupt(struct cpu_user_regs *regs)
{
unsigned long v;
@@ -751,7 +955,7 @@ asmlinkage void smp_spurious_interrupt(void)
ack_APIC_irq();
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
smp_processor_id());
}
@@ -759,7 +963,7 @@ asmlinkage void smp_spurious_interrupt(void)
* This interrupt should never happen with our APIC/SMP architecture
*/
-asmlinkage void smp_error_interrupt(void)
+asmlinkage void smp_error_interrupt(struct cpu_user_regs *regs)
{
unsigned long v, v1;
@@ -780,7 +984,7 @@ asmlinkage void smp_error_interrupt(void)
6: Received illegal vector
7: Illegal register address
*/
- printk ("APIC error on CPU%d: %02lx(%02lx)\n",
+ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
}
@@ -790,15 +994,17 @@ asmlinkage void smp_error_interrupt(void)
*/
int __init APIC_init_uniprocessor (void)
{
+ if (enable_local_apic < 0)
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+
if (!smp_found_config && !cpu_has_apic)
return -1;
/*
* Complain if the BIOS pretends there is one.
*/
- if (!cpu_has_apic&&APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]))
- {
- printk("BIOS bug, local APIC #%d not detected!...\n",
+ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
return -1;
}
@@ -807,21 +1013,18 @@ int __init APIC_init_uniprocessor (void)
connect_bsp_APIC();
-#ifdef CONFIG_SMP
- cpu_online_map = 1;
-#endif
- phys_cpu_present_map = 1;
- apic_write_around(APIC_ID, boot_cpu_physical_apicid);
-
- apic_pm_init2();
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
setup_local_APIC();
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
#ifdef CONFIG_X86_IO_APIC
- if (smp_found_config && nr_ioapics)
- setup_IO_APIC();
+ if (smp_found_config)
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
#endif
- setup_APIC_clocks();
+ setup_boot_APIC_clock();
return 0;
}
diff --git a/xen/arch/x86/audit.c b/xen/arch/x86/audit.c
new file mode 100644
index 0000000000..dc2a14979a
--- /dev/null
+++ b/xen/arch/x86/audit.c
@@ -0,0 +1,976 @@
+/******************************************************************************
+ * arch/x86/audit.c
+ *
+ * Copyright (c) 2002-2005 K A Fraser
+ * Copyright (c) 2004 Christian Limpach
+ * Copyright (c) 2005 Michael A Fetterman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <asm/shadow.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+
+/* XXX SMP bug -- these should not be statics... */
+static int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
+static int l1, l2, oos_count, page_count;
+
+#define FILE_AND_LINE 0
+
+#if FILE_AND_LINE
+#define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
+#define ADJUST_EXTRA_ARGS ,const char *file, int line
+#define APRINTK(_f, _a...) printk(_f " %s:%d\n", ## _a, file, line)
+#else
+#define adjust _adjust
+#define ADJUST_EXTRA_ARGS
+#define APRINTK(_f, _a...) printk(_f "\n", ##_a)
+#endif
+
+int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
+{
+ int errors = 0;
+ int shadow_refcounts = !!shadow_mode_refcounts(d);
+ int shadow_enabled = !!shadow_mode_enabled(d);
+ int l2limit;
+
+ void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
+ {
+ if ( adjtype )
+ {
+ // adjust the type count
+ //
+ int tcount = page->u.inuse.type_info & PGT_count_mask;
+ tcount += dir;
+ ttot++;
+
+ if ( page_get_owner(page) == NULL )
+ {
+ APRINTK("adjust(mfn=%lx, dir=%d, adjtype=%d) owner=NULL",
+ page_to_pfn(page), dir, adjtype);
+ errors++;
+ }
+
+ if ( tcount < 0 )
+ {
+ APRINTK("Audit %d: type count went below zero "
+ "mfn=%lx t=%x ot=%x",
+ d->domain_id, page_to_pfn(page),
+ page->u.inuse.type_info,
+ page->tlbflush_timestamp);
+ errors++;
+ }
+ else if ( (tcount & ~PGT_count_mask) != 0 )
+ {
+ APRINTK("Audit %d: type count overflowed "
+ "mfn=%lx t=%x ot=%x",
+ d->domain_id, page_to_pfn(page),
+ page->u.inuse.type_info,
+ page->tlbflush_timestamp);
+ errors++;
+ }
+ else
+ page->u.inuse.type_info += dir;
+ }
+
+ // adjust the general count
+ //
+ int count = page->count_info & PGC_count_mask;
+ count += dir;
+ ctot++;
+
+ if ( count < 0 )
+ {
+ APRINTK("Audit %d: general count went below zero "
+ "mfn=%lx t=%x ot=%x",
+ d->domain_id, page_to_pfn(page),
+ page->u.inuse.type_info,
+ page->tlbflush_timestamp);
+ errors++;
+ }
+ else if ( (count & ~PGT_count_mask) != 0 )
+ {
+ APRINTK("Audit %d: general count overflowed "
+ "mfn=%lx t=%x ot=%x",
+ d->domain_id, page_to_pfn(page),
+ page->u.inuse.type_info,
+ page->tlbflush_timestamp);
+ errors++;
+ }
+ else
+ page->count_info += dir;
+ }
+
+ void adjust_l2_page(unsigned long mfn, int shadow)
+ {
+ unsigned long *pt = map_domain_page(mfn);
+ int i;
+
+ for ( i = 0; i < l2limit; i++ )
+ {
+ if ( pt[i] & _PAGE_PRESENT )
+ {
+ unsigned long l1mfn = pt[i] >> PAGE_SHIFT;
+ struct pfn_info *l1page = pfn_to_page(l1mfn);
+
+ if ( noisy )
+ {
+ if ( shadow )
+ {
+ if ( page_get_owner(l1page) != NULL )
+ {
+ printk("L2: Bizarre shadow L1 page mfn=%lx "
+ "belonging to a domain %p (id=%d)\n",
+ l1mfn,
+ page_get_owner(l1page),
+ page_get_owner(l1page)->domain_id);
+ errors++;
+ continue;
+ }
+
+ u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
+
+ if ( page_type != PGT_l1_shadow )
+ {
+ printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
+ "Expected Shadow L1 t=%x mfn=%lx\n",
+ d->domain_id, mfn, i,
+ l1page->u.inuse.type_info, l1mfn);
+ errors++;
+ }
+ }
+ else
+ {
+ if ( page_get_owner(l1page) != d )
+ {
+ printk("L2: Skip bizarre L1 page mfn=%lx "
+ "belonging to other dom %p (id=%d)\n",
+ l1mfn,
+ page_get_owner(l1page),
+ (page_get_owner(l1page)
+ ? page_get_owner(l1page)->domain_id
+ : -1));
+ errors++;
+ continue;
+ }
+
+ u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
+
+ if ( page_type == PGT_l2_page_table )
+ {
+ printk("Audit %d: [%x] Found %s Linear PT "
+ "t=%x mfn=%lx\n",
+ d->domain_id, i, (l1mfn==mfn) ? "Self" : "Other",
+ l1page->u.inuse.type_info, l1mfn);
+ }
+ else if ( page_type != PGT_l1_page_table )
+ {
+ printk("Audit %d: [L2 mfn=%lx i=%x] "
+ "Expected L1 t=%x mfn=%lx\n",
+ d->domain_id, mfn, i,
+ l1page->u.inuse.type_info, l1mfn);
+ errors++;
+ }
+ }
+ }
+
+ adjust(l1page, !shadow);
+ }
+ }
+
+ if ( shadow_mode_translate(d) && !shadow_mode_external(d) )
+ {
+ unsigned long hl2mfn =
+ pt[l2_table_offset(LINEAR_PT_VIRT_START)] >> PAGE_SHIFT;
+ struct pfn_info *hl2page = pfn_to_page(hl2mfn);
+ adjust(hl2page, 0);
+ }
+
+ unmap_domain_page(pt);
+ }
+
+ void adjust_hl2_page(unsigned long hl2mfn)
+ {
+ unsigned long *pt = map_domain_page(hl2mfn);
+ int i;
+
+ for ( i = 0; i < l2limit; i++ )
+ {
+ if ( pt[i] & _PAGE_PRESENT )
+ {
+ unsigned long gmfn = pt[i] >> PAGE_SHIFT;
+ struct pfn_info *gpage = pfn_to_page(gmfn);
+
+ if ( gmfn < 0x100 )
+ {
+ lowmem_mappings++;
+ continue;
+ }
+
+ if ( gmfn > max_page )
+ {
+ io_mappings++;
+ continue;
+ }
+
+ if ( noisy )
+ {
+ if ( page_get_owner(gpage) != d )
+ {
+ printk("Audit %d: [hl2mfn=%lx,i=%x] Skip foreign page "
+ "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+ d->domain_id, hl2mfn, i,
+ page_get_owner(gpage),
+ page_get_owner(gpage)->domain_id,
+ gmfn,
+ gpage->count_info,
+ gpage->u.inuse.type_info);
+ continue;
+ }
+ }
+ adjust(gpage, 0);
+ }
+ }
+
+ unmap_domain_page(pt);
+ }
+
+ void adjust_l1_page(unsigned long l1mfn)
+ {
+ unsigned long *pt = map_domain_page(l1mfn);
+ int i;
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( pt[i] & _PAGE_PRESENT )
+ {
+ unsigned long gmfn = pt[i] >> PAGE_SHIFT;
+ struct pfn_info *gpage = pfn_to_page(gmfn);
+
+ if ( gmfn < 0x100 )
+ {
+ lowmem_mappings++;
+ continue;
+ }
+
+ if ( gmfn > max_page )
+ {
+ io_mappings++;
+ continue;
+ }
+
+ if ( noisy )
+ {
+ if ( pt[i] & _PAGE_RW )
+ {
+ // If it's not a writable page, complain.
+ //
+ if ( !((gpage->u.inuse.type_info & PGT_type_mask) ==
+ PGT_writable_page) )
+ {
+ printk("Audit %d: [l1mfn=%lx, i=%x] Illegal RW "
+ "t=%x mfn=%lx\n",
+ d->domain_id, l1mfn, i,
+ gpage->u.inuse.type_info, gmfn);
+ errors++;
+ }
+
+ if ( shadow_refcounts &&
+ page_is_page_table(gpage) &&
+ ! page_out_of_sync(gpage) )
+ {
+ printk("Audit %d: [l1mfn=%lx, i=%x] Illegal RW of "
+ "page table gmfn=%lx\n",
+ d->domain_id, l1mfn, i, gmfn);
+ errors++;
+ }
+ }
+
+ if ( page_get_owner(gpage) != d )
+ {
+ printk("Audit %d: [l1mfn=%lx,i=%x] Skip foreign page "
+ "dom=%p (id=%d) mfn=%lx c=%08x t=%08x\n",
+ d->domain_id, l1mfn, i,
+ page_get_owner(gpage),
+ page_get_owner(gpage)->domain_id,
+ gmfn,
+ gpage->count_info,
+ gpage->u.inuse.type_info);
+ continue;
+ }
+ }
+
+ adjust(gpage, (pt[i] & _PAGE_RW) ? 1 : 0);
+ }
+ }
+
+ unmap_domain_page(pt);
+ }
+
+ void adjust_shadow_tables()
+ {
+ struct shadow_status *a;
+ unsigned long smfn, gmfn;
+ struct pfn_info *page;
+ int i;
+
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
+ smfn = a->smfn;
+ page = &frame_table[smfn];
+
+ switch ( a->gpfn_and_flags & PGT_type_mask ) {
+ case PGT_writable_pred:
+ break;
+ case PGT_snapshot:
+ adjust(pfn_to_page(gmfn), 0);
+ break;
+ case PGT_l1_shadow:
+ adjust(pfn_to_page(gmfn), 0);
+ if ( shadow_refcounts )
+ adjust_l1_page(smfn);
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 0);
+ break;
+ case PGT_hl2_shadow:
+ adjust(pfn_to_page(gmfn), 0);
+ if ( shadow_refcounts )
+ adjust_hl2_page(smfn);
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 0);
+ break;
+ case PGT_l2_shadow:
+ adjust(pfn_to_page(gmfn), 0);
+ adjust_l2_page(smfn, 1);
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 0);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ a = a->next;
+ }
+ }
+ }
+
+ void adjust_oos_list()
+ {
+ struct out_of_sync_entry *oos;
+
+ if ( (oos = d->arch.out_of_sync) )
+ ASSERT(shadow_enabled);
+
+ while ( oos )
+ {
+ adjust(pfn_to_page(oos->gmfn), 0);
+
+ // Only use entries that have low bits clear...
+ //
+ if ( !(oos->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
+ adjust(pfn_to_page(oos->writable_pl1e >> PAGE_SHIFT), 0);
+
+ if ( oos->snapshot_mfn != SHADOW_SNAPSHOT_ELSEWHERE )
+ adjust(pfn_to_page(oos->snapshot_mfn), 0);
+
+ oos = oos->next;
+ oos_count++;
+ }
+ }
+
+ void adjust_for_pgtbase()
+ {
+ struct vcpu *v;
+
+ for_each_vcpu(d, v)
+ {
+ if ( pagetable_get_paddr(v->arch.guest_table) )
+ adjust(&frame_table[pagetable_get_pfn(v->arch.guest_table)], 1);
+ if ( pagetable_get_paddr(v->arch.shadow_table) )
+ adjust(&frame_table[pagetable_get_pfn(v->arch.shadow_table)], 0);
+ if ( v->arch.monitor_shadow_ref )
+ adjust(&frame_table[v->arch.monitor_shadow_ref], 0);
+ }
+ }
+
+ void adjust_guest_pages()
+ {
+ struct list_head *list_ent = d->page_list.next;
+ struct pfn_info *page;
+ unsigned long mfn, snapshot_mfn;
+
+ while ( list_ent != &d->page_list )
+ {
+ u32 page_type;
+
+ page = list_entry(list_ent, struct pfn_info, list);
+ snapshot_mfn = mfn = page_to_pfn(page);
+ page_type = page->u.inuse.type_info & PGT_type_mask;
+
+ BUG_ON(page_get_owner(page) != d);
+
+ page_count++;
+
+ if ( shadow_enabled && !shadow_refcounts &&
+ page_out_of_sync(page) )
+ {
+ unsigned long gpfn = __mfn_to_gpfn(d, mfn);
+ ASSERT( VALID_M2P(gpfn) );
+ snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
+ ASSERT( snapshot_mfn );
+ }
+
+ switch ( page_type )
+ {
+ case PGT_l2_page_table:
+ l2++;
+
+ if ( noisy )
+ {
+ if ( shadow_refcounts )
+ {
+ printk("Audit %d: found an L2 guest page "
+ "mfn=%lx t=%08x c=%08x while in shadow mode\n",
+ d->domain_id, mfn, page->u.inuse.type_info,
+ page->count_info);
+ errors++;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ if ( (page->u.inuse.type_info & PGT_validated) !=
+ PGT_validated )
+ {
+ printk("Audit %d: L2 mfn=%lx not validated %08x\n",
+ d->domain_id, mfn, page->u.inuse.type_info);
+ errors++;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
+ {
+ printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
+ d->domain_id, mfn, page->u.inuse.type_info);
+ errors++;
+ }
+ }
+ }
+
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 1);
+
+ if ( page->u.inuse.type_info & PGT_validated )
+ adjust_l2_page(snapshot_mfn, 0);
+
+ break;
+
+ case PGT_l1_page_table:
+ l1++;
+
+ if ( noisy )
+ {
+ if ( shadow_refcounts )
+ {
+ printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
+ "while in shadow mode\n",
+ mfn, page->u.inuse.type_info, page->count_info);
+ errors++;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ if ( (page->u.inuse.type_info & PGT_validated) !=
+ PGT_validated )
+ {
+ printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
+ d->domain_id, mfn, page->u.inuse.type_info);
+ errors++;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
+ {
+ if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
+ {
+ printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
+ d->domain_id, mfn, page->u.inuse.type_info);
+ }
+ }
+ }
+ }
+
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 1);
+
+ if ( page->u.inuse.type_info & PGT_validated )
+ adjust_l1_page(snapshot_mfn);
+
+ break;
+
+ case PGT_gdt_page:
+ ASSERT( !page_out_of_sync(page) );
+ adjust(page, 1);
+ break;
+
+ case PGT_ldt_page:
+ ASSERT( !page_out_of_sync(page) );
+ adjust(page, 1);
+ break;
+
+ case PGT_writable_page:
+ if ( shadow_refcounts )
+ {
+ // In shadow mode, writable pages can get pinned by
+ // paravirtualized guests that think they are pinning
+ // their L1s and/or L2s.
+ //
+ if ( page->u.inuse.type_info & PGT_pinned )
+ adjust(page, 1);
+ }
+ }
+
+ list_ent = page->list.next;
+ }
+ }
+
+#ifdef __i386__
+ if ( shadow_mode_external(d) )
+ l2limit = L2_PAGETABLE_ENTRIES;
+ else
+ l2limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+#else
+ l2limit = 0; /* XXX x86/64 XXX */
+#endif
+
+ adjust_for_pgtbase();
+
+ adjust_guest_pages();
+
+ if ( shadow_enabled )
+ {
+ adjust_oos_list();
+ adjust_shadow_tables();
+ }
+
+ adjust(virt_to_page(d->shared_info), 1);
+
+ return errors;
+}
+
+
+#ifndef NDEBUG
+
+void audit_pagelist(struct domain *d)
+{
+ struct list_head *list_ent;
+ int xenpages, totpages;
+
+ list_ent = d->xenpage_list.next;
+ for ( xenpages = 0; (list_ent != &d->xenpage_list); xenpages++ )
+ {
+ list_ent = list_ent->next;
+ }
+ list_ent = d->page_list.next;
+ for ( totpages = 0; (list_ent != &d->page_list); totpages++ )
+ {
+ list_ent = list_ent->next;
+ }
+
+ if ( xenpages != d->xenheap_pages ||
+ totpages != d->tot_pages )
+ {
+ printk("ARGH! dom %d: xen=%d %d, pages=%d %d\n", d->domain_id,
+ xenpages, d->xenheap_pages,
+ totpages, d->tot_pages );
+ }
+}
+
+void _audit_domain(struct domain *d, int flags)
+{
+ int shadow_refcounts = !!shadow_mode_refcounts(d);
+
+ void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
+ unsigned long mfn)
+ {
+ struct pfn_info *page = &frame_table[mfn];
+ unsigned long *pt = map_domain_page(mfn);
+ int i;
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( (pt[i] & _PAGE_PRESENT) && ((pt[i] >> PAGE_SHIFT) == xmfn) )
+ printk(" found dom=%d mfn=%lx t=%08x c=%08x "
+ "pt[i=%x]=%lx\n",
+ d->domain_id, mfn, page->u.inuse.type_info,
+ page->count_info, i, pt[i]);
+ }
+
+ unmap_domain_page(pt);
+ }
+
+ void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
+ {
+ int i;
+ active_grant_entry_t *act = d->grant_table->active;
+
+ spin_lock(&d->grant_table->lock);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ if ( act[i].pin && (act[i].frame == xmfn) )
+ {
+ printk(" found active grant table entry i=%d dom=%d pin=%d\n",
+ i, act[i].domid, act[i].pin);
+ }
+ }
+
+ spin_unlock(&d->grant_table->lock);
+ }
+
+ void scan_for_pfn(struct domain *d, unsigned long xmfn)
+ {
+ scan_for_pfn_in_grant_table(d, xmfn);
+
+ if ( !shadow_mode_enabled(d) )
+ {
+ struct list_head *list_ent = d->page_list.next;
+ struct pfn_info *page;
+
+ while ( list_ent != &d->page_list )
+ {
+ page = list_entry(list_ent, struct pfn_info, list);
+
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_l1_page_table:
+ case PGT_l2_page_table:
+ scan_for_pfn_in_mfn(d, xmfn, page_to_pfn(page));
+ break;
+ default:
+ break;
+ }
+
+ list_ent = page->list.next;
+ }
+ }
+ else
+ {
+ struct shadow_status *a;
+ int i;
+
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ switch ( a->gpfn_and_flags & PGT_type_mask )
+ {
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_hl2_shadow:
+ scan_for_pfn_in_mfn(d, xmfn, a->smfn);
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ break;
+ default:
+ BUG();
+ break;
+ }
+ a = a->next;
+ }
+ }
+ }
+ }
+
+ void scan_for_pfn_remote(unsigned long xmfn)
+ {
+ struct domain *e;
+ for_each_domain ( e )
+ scan_for_pfn( e, xmfn );
+ }
+
+ unsigned long mfn;
+ struct list_head *list_ent;
+ struct pfn_info *page;
+ int errors = 0;
+
+ if ( (d != current->domain) && shadow_mode_translate(d) )
+ {
+ printk("skipping audit domain of translated domain %d "
+ "from other context\n",
+ d->domain_id);
+ return;
+ }
+
+ if ( d != current->domain )
+ domain_pause(d);
+ sync_lazy_execstate_all();
+
+ // Maybe we should just be using BIGLOCK?
+ //
+ if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
+ shadow_lock(d);
+
+ spin_lock(&d->page_alloc_lock);
+
+ audit_pagelist(d);
+
+ /* PHASE 0 */
+
+ list_ent = d->page_list.next;
+ while ( list_ent != &d->page_list )
+ {
+ u32 page_type;
+
+ page = list_entry(list_ent, struct pfn_info, list);
+ mfn = page_to_pfn(page);
+ page_type = page->u.inuse.type_info & PGT_type_mask;
+
+ BUG_ON(page_get_owner(page) != d);
+
+ if ( (page->u.inuse.type_info & PGT_count_mask) >
+ (page->count_info & PGC_count_mask) )
+ {
+ printk("taf(%08x) > caf(%08x) mfn=%lx\n",
+ page->u.inuse.type_info, page->count_info, mfn);
+ errors++;
+ }
+
+ if ( shadow_mode_refcounts(d) &&
+ (page_type == PGT_writable_page) &&
+ !(page->u.inuse.type_info & PGT_validated) )
+ {
+ printk("shadow mode writable page not validated mfn=%lx "
+ "t=%08x c=%08x\n",
+ mfn, page->u.inuse.type_info, page->count_info);
+ errors++;
+ }
+
+#if 0 /* SYSV shared memory pages plus writeable files. */
+ if ( page_type == PGT_writable_page &&
+ (page->u.inuse.type_info & PGT_count_mask) > 1 )
+ {
+ printk("writeable page with type count >1: "
+ "mfn=%lx t=%08x c=%08x\n",
+ mfn,
+ page->u.inuse.type_info,
+ page->count_info );
+ errors++;
+ scan_for_pfn_remote(mfn);
+ }
+#endif
+
+ if ( page_type == PGT_none &&
+ (page->u.inuse.type_info & PGT_count_mask) > 0 )
+ {
+ printk("normal page with type count >0: mfn=%lx t=%08x c=%08x\n",
+ mfn,
+ page->u.inuse.type_info,
+ page->count_info );
+ errors++;
+ }
+
+ if ( page_out_of_sync(page) )
+ {
+ if ( !page_is_page_table(page) )
+ {
+ printk("out of sync page mfn=%lx is not a page table\n", mfn);
+ errors++;
+ }
+ unsigned long pfn = __mfn_to_gpfn(d, mfn);
+ if ( !__shadow_status(d, pfn, PGT_snapshot) )
+ {
+ printk("out of sync page mfn=%lx doesn't have a snapshot\n",
+ mfn);
+ errors++;
+ }
+ if ( shadow_refcounts
+ ? (page_type != PGT_writable_page)
+ : !(page_type && (page_type <= PGT_l4_page_table)) )
+ {
+ printk("out of sync page mfn=%lx has strange type "
+ "t=%08x c=%08x\n",
+ mfn, page->u.inuse.type_info, page->count_info);
+ errors++;
+ }
+ }
+
+ /* Use tlbflush_timestamp to store original type_info. */
+ page->tlbflush_timestamp = page->u.inuse.type_info;
+
+ list_ent = page->list.next;
+ }
+
+ /* PHASE 1 */
+ io_mappings = lowmem_mappings = 0;
+
+ errors += audit_adjust_pgtables(d, -1, 1);
+
+ if ( !(flags & AUDIT_QUIET) &&
+ ((io_mappings > 0) || (lowmem_mappings > 0)) )
+ printk("Audit %d: Found %d lowmem mappings and %d io mappings\n",
+ d->domain_id, lowmem_mappings, io_mappings);
+
+ /* PHASE 2 */
+
+ list_ent = d->page_list.next;
+ while ( list_ent != &d->page_list )
+ {
+ page = list_entry(list_ent, struct pfn_info, list);
+ mfn = page_to_pfn(page);
+
+ switch ( page->u.inuse.type_info & PGT_type_mask)
+ {
+ case PGT_l1_page_table:
+ case PGT_l2_page_table:
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+ d->domain_id, page->u.inuse.type_info,
+ page->tlbflush_timestamp,
+ page->count_info, mfn);
+ errors++;
+ scan_for_pfn_remote(mfn);
+ }
+ break;
+ case PGT_none:
+ case PGT_writable_page:
+ case PGT_gdt_page:
+ case PGT_ldt_page:
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
+ d->domain_id, page->u.inuse.type_info,
+ page->tlbflush_timestamp,
+ page->count_info, mfn);
+ //errors++;
+ }
+ break;
+ default:
+ BUG(); // XXX fix me...
+ }
+
+ if ( (page->count_info & PGC_count_mask) != 1 )
+ {
+ printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x mfn=%lx\n",
+ d->domain_id,
+ page->count_info,
+ page->u.inuse.type_info,
+ page->tlbflush_timestamp, mfn );
+ //errors++;
+ scan_for_pfn_remote(mfn);
+ }
+
+ list_ent = page->list.next;
+ }
+
+ if ( shadow_mode_enabled(d) )
+ {
+ struct shadow_status *a;
+ struct pfn_info *page;
+ u32 page_type;
+ int i;
+
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ page = pfn_to_page(a->smfn);
+ page_type = a->gpfn_and_flags & PGT_type_mask;
+
+ switch ( page_type ) {
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_hl2_shadow:
+ case PGT_snapshot:
+ if ( ((page->u.inuse.type_info & PGT_type_mask) != page_type ) ||
+ (page->count_info != 0) )
+ {
+ printk("Audit %d: shadow page counts wrong "
+ "mfn=%lx t=%08x c=%08x\n",
+ d->domain_id, page_to_pfn(page),
+ page->u.inuse.type_info,
+ page->count_info);
+ printk("a->gpfn_and_flags=%p\n",
+ (void *)a->gpfn_and_flags);
+ errors++;
+ }
+ break;
+ case PGT_writable_pred:
+ // XXX - nothing to check?
+ break;
+
+ default:
+ BUG();
+ break;
+ }
+
+ a = a->next;
+ }
+ }
+ }
+
+ /* PHASE 3 */
+ ctot = ttot = page_count = l1 = l2 = oos_count = 0;
+
+ audit_adjust_pgtables(d, 1, 0);
+
+#if 0
+ // This covers our sins of trashing the tlbflush_timestamps...
+ //
+ local_flush_tlb();
+#endif
+
+ spin_unlock(&d->page_alloc_lock);
+
+ if ( !(flags & AUDIT_QUIET) )
+ printk("Audit dom%d Done. "
+ "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
+ d->domain_id, page_count, oos_count, l1, l2, ctot, ttot);
+
+ if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
+ shadow_unlock(d);
+
+ if ( d != current->domain )
+ domain_unpause(d);
+
+ if ( errors && !(flags & AUDIT_ERRORS_OK) )
+ BUG();
+}
+
+void audit_domains(void)
+{
+ struct domain *d;
+ for_each_domain ( d )
+ audit_domain(d);
+}
+
+void audit_domains_key(unsigned char key)
+{
+ audit_domains();
+}
+#endif
diff --git a/xen/arch/x86/bitops.c b/xen/arch/x86/bitops.c
new file mode 100644
index 0000000000..695a609a23
--- /dev/null
+++ b/xen/arch/x86/bitops.c
@@ -0,0 +1,99 @@
+
+#include <xen/bitops.h>
+#include <xen/lib.h>
+
+unsigned int __find_first_bit(
+ const unsigned long *addr, unsigned int size)
+{
+ unsigned long d0, d1, res;
+
+ __asm__ __volatile__ (
+ " xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
+ " repe; scas"__OS"\n\t"
+ " je 1f\n\t"
+ " lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
+ " bsf (%2),%0\n"
+ "1: sub %%ebx,%%edi\n\t"
+ " shl $3,%%edi\n\t"
+ " add %%edi,%%eax"
+ : "=&a" (res), "=&c" (d0), "=&D" (d1)
+ : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
+ "2" (addr), "b" ((int)(long)addr) : "memory" );
+
+ return res;
+}
+
+unsigned int __find_next_bit(
+ const unsigned long *addr, unsigned int size, unsigned int offset)
+{
+ const unsigned long *p = addr + (offset / BITS_PER_LONG);
+ unsigned int set, bit = offset & (BITS_PER_LONG - 1);
+
+ ASSERT(offset < size);
+
+ if ( bit != 0 )
+ {
+ /* Look for a bit in the first word. */
+ __asm__ ( "bsf %1,%%"__OP"ax"
+ : "=a" (set) : "r" (*p >> bit), "0" (BITS_PER_LONG) );
+ if ( set < (BITS_PER_LONG - bit) )
+ return (offset + set);
+ offset += BITS_PER_LONG - bit;
+ p++;
+ }
+
+ if ( offset >= size )
+ return size;
+
+ /* Search remaining full words for a bit. */
+ set = __find_first_bit(p, size - offset);
+ return (offset + set);
+}
+
+unsigned int __find_first_zero_bit(
+ const unsigned long *addr, unsigned int size)
+{
+ unsigned long d0, d1, d2, res;
+
+ __asm__ (
+ " xor %%edx,%%edx\n\t" /* also ensures ZF==1 if size==0 */
+ " repe; scas"__OS"\n\t"
+ " je 1f\n\t"
+ " lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
+ " xor (%2),%3\n\t"
+ " bsf %3,%0\n"
+ "1: sub %%ebx,%%edi\n\t"
+ " shl $3,%%edi\n\t"
+ " add %%edi,%%edx"
+ : "=&d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+ : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
+ "2" (addr), "b" ((int)(long)addr), "3" (-1L) : "memory" );
+
+ return res;
+}
+
+unsigned int __find_next_zero_bit(
+ const unsigned long *addr, unsigned int size, unsigned int offset)
+{
+ const unsigned long *p = addr + (offset / BITS_PER_LONG);
+ unsigned int set, bit = offset & (BITS_PER_LONG - 1);
+
+ ASSERT(offset < size);
+
+ if ( bit != 0 )
+ {
+ /* Look for zero in the first word. */
+ __asm__ ( "bsf %1,%%"__OP"ax" : "=a" (set) : "r" (~(*p >> bit)) );
+ if ( set < (BITS_PER_LONG - bit) )
+ return (offset + set);
+ offset += BITS_PER_LONG - bit;
+ p++;
+ }
+
+ if ( offset >= size )
+ return size;
+
+ /* Search remaining full words for a zero. */
+ set = __find_first_zero_bit(p, size - offset);
+ return (offset + set);
+}
diff --git a/xen/arch/x86/boot/mkelf32.c b/xen/arch/x86/boot/mkelf32.c
index 3f1f98cc60..4b7515c8cc 100644
--- a/xen/arch/x86/boot/mkelf32.c
+++ b/xen/arch/x86/boot/mkelf32.c
@@ -406,4 +406,5 @@ int main(int argc, char **argv)
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
+ * End:
*/
diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S
index b8cf16dc83..75d3b57bf0 100644
--- a/xen/arch/x86/boot/x86_32.S
+++ b/xen/arch/x86/boot/x86_32.S
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/desc.h>
#include <asm/page.h>
#define SECONDARY_CPU_FLAG 0xA5A5A5A5
@@ -15,19 +16,19 @@ ENTRY(start)
/* Magic number indicating a Multiboot header. */
.long 0x1BADB002
/* Flags to bootloader (see Multiboot spec). */
- .long 0x00000002
+ .long 0x00000003
/* Checksum: must be the negated sum of the first two fields. */
- .long -0x1BADB004
+ .long -0x1BADB005
bad_cpu_msg:
.asciz "ERR: Not a P6-compatible CPU!"
not_multiboot_msg:
.asciz "ERR: Not a Multiboot bootloader!"
bad_cpu:
- mov $SYMBOL_NAME(bad_cpu_msg)-__PAGE_OFFSET,%esi
+ mov $bad_cpu_msg-__PAGE_OFFSET,%esi
jmp print_err
not_multiboot:
- mov $SYMBOL_NAME(not_multiboot_msg)-__PAGE_OFFSET,%esi
+ mov $not_multiboot_msg-__PAGE_OFFSET,%esi
print_err:
mov $0xB8000,%edi # VGA framebuffer
1: mov (%esi),%bl
@@ -100,6 +101,22 @@ __start:
xor %eax,%eax
rep stosb
+#ifdef CONFIG_X86_PAE
+ /* Initialize low and high mappings of all memory with 2MB pages */
+ mov $idle_pg_table_l2-__PAGE_OFFSET,%edi
+ mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
+1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $DIRECTMAP_PHYS_END+0xe3,%eax
+ jne 1b
+1: stosl /* low mappings cover as much physmem as possible */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ jne 1b
+#else
/* Initialize low and high mappings of all memory with 4MB pages */
mov $idle_pg_table-__PAGE_OFFSET,%edi
mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
@@ -112,13 +129,14 @@ __start:
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
+#endif
/* Initialise IDT with simple error defaults. */
lea ignore_int,%edx
mov $(__HYPERVISOR_CS << 16),%eax
mov %dx,%ax /* selector = 0x0010 = cs */
mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
- lea SYMBOL_NAME(idt_table)-__PAGE_OFFSET,%edi
+ lea idt_table-__PAGE_OFFSET,%edi
mov $256,%ecx
1: mov %eax,(%edi)
mov %edx,4(%edi)
@@ -163,58 +181,85 @@ ignore_int:
mov %eax,%ds
mov %eax,%es
pushl $int_msg
- call SYMBOL_NAME(printf)
+ call printf
1: jmp 1b
/*** STACK LOCATION ***/
ENTRY(stack_start)
- .long SYMBOL_NAME(cpu0_stack) + 8100 - __PAGE_OFFSET
+ .long cpu0_stack + STACK_SIZE - 200 - __PAGE_OFFSET
.long __HYPERVISOR_DS
/*** DESCRIPTOR TABLES ***/
-.globl SYMBOL_NAME(idt)
-.globl SYMBOL_NAME(gdt)
+.globl idt
+.globl gdt
ALIGN
.word 0
idt_descr:
- .word 256*8-1
-SYMBOL_NAME(idt):
- .long SYMBOL_NAME(idt_table)
+ .word 256*8-1
+idt:
+ .long idt_table
.word 0
gdt_descr:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
-SYMBOL_NAME(gdt):
- .long SYMBOL_NAME(gdt_table) /* gdt base */
+ .word LAST_RESERVED_GDT_BYTE
+gdt:
+ .long gdt_table - FIRST_RESERVED_GDT_BYTE
.word 0
nopaging_gdt_descr:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
- .long SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
+ .word LAST_RESERVED_GDT_BYTE
+ .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
- ALIGN
+ .org 0x1000
/* NB. Rings != 0 get access up to 0xFC400000. This allows access to the */
/* machine->physical mapping table. Ring 0 can access all memory. */
ENTRY(gdt_table)
- .fill FIRST_RESERVED_GDT_ENTRY,8,0
.quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */
- .quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */
- .quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */
- .quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */
- .quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */
- .quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */
+ .quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
+#ifdef CONFIG_X86_PAE
+ .quad 0x00cfba00000067ff
+ .quad 0x00cfb200000067ff
+ .quad 0x00cffa00000067ff
+ .quad 0x00cff200000067ff
+#else
+ .quad 0x00cfba000000c3ff /* 0xe019 ring 1 3.95GB code at 0x0 */
+ .quad 0x00cfb2000000c3ff /* 0xe021 ring 1 3.95GB data at 0x0 */
+ .quad 0x00cffa000000c3ff /* 0xe02b ring 3 3.95GB code at 0x0 */
+ .quad 0x00cff2000000c3ff /* 0xe033 ring 3 3.95GB data at 0x0 */
+#endif
.quad 0x0000000000000000 /* unused */
.fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
- .org 0x1000
-ENTRY(idle_pg_table) # Initial page directory is 4kB
.org 0x2000
-ENTRY(cpu0_stack) # Initial stack is 8kB
- .org 0x4000
+/* Maximum STACK_ORDER for x86/32 is 1. We must therefore ensure that the */
+/* CPU0 stack is aligned on an even page boundary! */
+ENTRY(cpu0_stack)
+ .org 0x2000 + STACK_SIZE
+
+#ifdef CONFIG_X86_PAE
+
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l3)
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 1*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 2*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 3*PAGE_SIZE + 0x01
+ .quad 0x100000 + 0x2000 + STACK_SIZE + 4*PAGE_SIZE + 0x01
+ .org 0x2000 + STACK_SIZE + 1*PAGE_SIZE
+ENTRY(idle_pg_table_l2)
+ .org 0x2000 + STACK_SIZE + 5*PAGE_SIZE
+
+#else /* CONFIG_X86_PAE */
+
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
+ .org 0x2000 + STACK_SIZE + PAGE_SIZE
+
+#endif /* CONFIG_X86_PAE */
+
ENTRY(stext)
ENTRY(_stext)
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index a8253a4ce1..a2b2410ae9 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -1,5 +1,6 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/desc.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -7,18 +8,18 @@
.text
.code32
-
+
ENTRY(start)
jmp __start
-
+
.org 0x004
/*** MULTIBOOT HEADER ****/
/* Magic number indicating a Multiboot header. */
.long 0x1BADB002
/* Flags to bootloader (see Multiboot spec). */
- .long 0x00000002
+ .long 0x00000003
/* Checksum: must be the negated sum of the first two fields. */
- .long -0x1BADB004
+ .long -0x1BADB005
.org 0x010
.asciz "ERR: Not a 64-bit CPU!"
@@ -45,26 +46,26 @@ print_err:
mov $7,%al
stosb # Write an attribute to the VGA framebuffer
jmp 1b
-
+
__start:
cld
cli
/* Set up a few descriptors: on entry only CS is guaranteed good. */
- lgdt %cs:0x1001f0
- mov $(__HYPERVISOR_DS),%ecx
+ lgdt %cs:0x100306 # nopaging_gdt_descr
+ mov $(__HYPERVISOR_DS32),%ecx
mov %ecx,%ds
mov %ecx,%es
- /* Check for Multiboot bootloader */
cmp $(SECONDARY_CPU_FLAG),%ebx
- je skip_multiboot_check
+ je skip_boot_checks
+
+ /* Check for Multiboot bootloader */
cmp $0x2BADB002,%eax
jne not_multiboot
-skip_multiboot_check:
/* Save the Multiboot info structure for later use. */
- mov %ebx,0x1001e0
+ mov %ebx,0x100300 # multiboot_ptr
/* We begin by interrogating the CPU for the presence of long mode. */
mov $0x80000000,%eax
@@ -75,7 +76,9 @@ skip_multiboot_check:
cpuid
bt $29,%edx # Long mode feature?
jnc bad_cpu
-
+ mov %edx,%edi
+skip_boot_checks:
+
/* Set up FPU. */
fninit
@@ -84,15 +87,18 @@ skip_multiboot_check:
mov %ecx,%cr4
/* Load pagetable base register. */
- mov $0x101000,%eax /* idle_pg_table */
+ mov $0x102000,%eax /* idle_pg_table */
mov %eax,%cr3
/* Set up EFER (Extended Feature Enable Register). */
movl $MSR_EFER, %ecx
rdmsr
- /* Long Mode, SYSCALL/SYSRET, No-Execute */
- movl $(EFER_LME|EFER_SCE|EFER_NX),%eax
- wrmsr
+ btsl $_EFER_LME,%eax /* Long Mode */
+ btsl $_EFER_SCE,%eax /* SYSCALL/SYSRET */
+ btl $20,%edi /* CPUID 0x80000001, EDX[20] */
+ jnc 1f
+ btsl $_EFER_NX,%eax /* No-Execute */
+1: wrmsr
mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
mov %eax,%cr0
@@ -106,10 +112,6 @@ skip_multiboot_check:
/* Install relocated selectors (FS/GS unused). */
lgdt gdt_descr(%rip)
- mov $(__HYPERVISOR_DS),%ecx
- mov %ecx,%ds
- mov %ecx,%es
- mov %ecx,%ss
/* Enable full CR4 features. */
mov mmu_cr4_features(%rip),%rcx
@@ -127,6 +129,13 @@ skip_multiboot_check:
ret
__high_start:
+ mov $(__HYPERVISOR_DS64),%ecx
+ mov %ecx,%ds
+ mov %ecx,%es
+ mov %ecx,%fs
+ mov %ecx,%gs
+ mov %ecx,%ss
+
lidt idt_descr(%rip)
cmp $(SECONDARY_CPU_FLAG),%ebx
@@ -140,97 +149,109 @@ __high_start:
rep stosb
/* Initialise IDT with simple error defaults. */
- lea ignore_int(%rip),%rdx
- mov $(__HYPERVISOR_CS64 << 16),%eax
- mov %dx,%ax /* selector = 0x0010 = cs */
- mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
- lea idt_table(%rip),%rdi
- mov $256,%rcx
-1: mov %eax,(%rdi)
- mov %edx,4(%rdi)
- add $8,%rdi
+ leaq ignore_int(%rip),%rcx
+ movl %ecx,%eax
+ andl $0xFFFF0000,%eax
+ orl $0x00008E00,%eax
+ shlq $32,%rax
+ movl %ecx,%edx
+ andl $0x0000FFFF,%edx
+ orl $(__HYPERVISOR_CS64<<16),%edx
+ orq %rdx,%rax
+ shrq $32,%rcx
+ movl %ecx,%edx
+ leaq idt_table(%rip),%rdi
+ movl $256,%ecx
+1: movq %rax,(%rdi)
+ movq %rdx,8(%rdi)
+ addq $16,%rdi
loop 1b
/* Pass off the Multiboot info structure to C land. */
- mov 0x1001e0,%edi
- lea start(%rip),%rax
- sub $0x100000,%rax
+ mov multiboot_ptr(%rip),%edi
+ lea start-0x100000(%rip),%rax
add %rax,%rdi
- call cmain
+ call __start_xen
+ ud2 /* Force a panic (invalid opcode). */
/* This is the default interrupt handler. */
int_msg:
.asciz "Unknown interrupt\n"
ignore_int:
cld
- mov $(__HYPERVISOR_DS),%eax
- mov %eax,%ds
- mov %eax,%es
- lea int_msg(%rip),%rdi
- call SYMBOL_NAME(printf)
+ leaq int_msg(%rip),%rdi
+ call printf
1: jmp 1b
- .code32
- .org 0x1e0
-
/*** DESCRIPTOR TABLES ***/
-.globl SYMBOL_NAME(idt)
-.globl SYMBOL_NAME(gdt)
+.globl idt
+.globl gdt
- .org 0x1f0
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
- .long 0x100200 # gdt_table
-
- .org 0x200
-ENTRY(gdt_table)
- .fill FIRST_RESERVED_GDT_ENTRY,8,0
- .quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0x0808 ring 0 code, compatability */
- .quad 0x00af9a000000ffff /* 0x0810 ring 0 code, 64-bit mode */
- .quad 0x00cf92000000ffff /* 0x0818 ring 0 data */
- .quad 0x00cffa000000ffff /* 0x0823 ring 3 code, compatibility */
- .quad 0x00affa000000ffff /* 0x082b ring 3 code, 64-bit mode */
- .quad 0x00cff2000000ffff /* 0x0833 ring 3 data */
- .quad 0x0000000000000000 /* unused */
- .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+ .org 0x300
+ .code32
+
+multiboot_ptr: /* 0x300 */
+ .long 0
.word 0
+nopaging_gdt_descr: /* 0x306 */
+ .word LAST_RESERVED_GDT_BYTE
+ .quad gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
+
+ .word 0,0,0
gdt_descr:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
-SYMBOL_NAME(gdt):
- .quad SYMBOL_NAME(gdt_table)
+ .word LAST_RESERVED_GDT_BYTE
+gdt:
+ .quad gdt_table - FIRST_RESERVED_GDT_BYTE
- .word 0
+ .word 0,0,0
idt_descr:
- .word 256*8-1
-SYMBOL_NAME(idt):
- .quad SYMBOL_NAME(idt_table)
+ .word 256*16-1
+idt:
+ .quad idt_table
ENTRY(stack_start)
- .quad SYMBOL_NAME(cpu0_stack) + 8100
+ .quad cpu0_stack + STACK_SIZE - 200
high_start:
.quad __high_start
-
-/* Initial PML4 -- level-4 page table */
- .org 0x1000
+
+ .org 0x1000
+ENTRY(gdt_table)
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */
+ .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */
+ .quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */
+ .quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
+ .quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
+ .quad 0x0000000000000000 /* unused */
+ .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+
+/* Initial PML4 -- level-4 page table. */
+ .org 0x2000
ENTRY(idle_pg_table)
ENTRY(idle_pg_table_4)
- .quad 0x0000000000102007 # PML4[0]
+ .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[0]
.fill 261,8,0
- .quad 0x0000000000102007 # PML4[262]
+ .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]
-/* Initial PDP -- level-3 page table */
- .org 0x2000
+/* Initial PDP -- level-3 page table. */
+ .org 0x3000
ENTRY(idle_pg_table_l3)
- .quad 0x0000000000103007
+ .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
-/* Initial PDE -- level-2 page table. */
- .org 0x3000
+ .org 0x4000
+/* Maximum STACK_ORDER for x86/64 is 2. We must therefore ensure that the */
+/* CPU0 stack is aligned on a 4-page boundary. */
+ENTRY(cpu0_stack)
+
+/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */
+ .org 0x4000 + STACK_SIZE
ENTRY(idle_pg_table_l2)
- .macro identmap from=0, count=512
+ .macro identmap from=0, count=32
.if \count-1
identmap "(\from+0)","(\count/2)"
identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
@@ -240,39 +261,7 @@ ENTRY(idle_pg_table_l2)
.endm
identmap /* Too orangey for crows :-) */
- .org 0x4000
-ENTRY(cpu0_stack) # Initial stack is 8kB
-
- .org 0x6000
+ .org 0x4000 + STACK_SIZE + PAGE_SIZE
+ .code64
ENTRY(stext)
ENTRY(_stext)
-
-.globl map_domain_mem, unmap_domain_mem, ret_from_intr
-map_domain_mem:
-unmap_domain_mem:
-ret_from_intr:
-#undef machine_to_phys_mapping
-.globl copy_to_user, set_intr_gate, die, machine_to_phys_mapping
-copy_to_user:
-set_intr_gate:
-die:
-machine_to_phys_mapping:
-.globl copy_from_user, show_registers, do_iopl
-copy_from_user:
-show_registers:
-do_iopl:
-.globl idt_table, copy_user_generic, memcmp, idt_tables, new_thread
-idt_table:
-copy_user_generic:
-memcmp:
-idt_tables:
-new_thread:
-.globl switch_to, __get_user_1, __get_user_4, __get_user_8, trap_init
-switch_to:
-__get_user_1:
-__get_user_4:
-__get_user_8:
-trap_init:
-.globl set_debugreg
-set_debugreg:
-
diff --git a/xen/arch/x86/cdb.c b/xen/arch/x86/cdb.c
new file mode 100644
index 0000000000..f4b3eedc01
--- /dev/null
+++ b/xen/arch/x86/cdb.c
@@ -0,0 +1,414 @@
+/* Simple hacked-up version of pdb for use in post-mortem debugging of
+ Xen and domain 0. This should be a little cleaner, hopefully. Note
+ that we can't share a serial line with PDB. */
+/* We try to avoid assuming much about what the rest of the system is
+ doing. In particular, dynamic memory allocation is out of the
+ question. */
+/* Resuming after we've stopped used to work, but more through luck
+ than any actual intention. It doesn't at the moment. */
+#include <xen/lib.h>
+#include <asm/uaccess.h>
+#include <xen/spinlock.h>
+#include <xen/serial.h>
+#include <xen/irq.h>
+#include <asm/debugger.h>
+#include <xen/init.h>
+#include <xen/smp.h>
+#include <xen/console.h>
+#include <asm/apic.h>
+
+/* Printk isn't particularly safe just after we've trapped to the
+ debugger. so avoid it. */
+#define dbg_printk(...)
+
+static unsigned char opt_cdb[30] = "none";
+string_param("cdb", opt_cdb);
+
+struct xendbg_context {
+ int serhnd;
+ u8 reply_csum;
+ int currently_attached:1;
+};
+
+/* Like copy_from_user, but safe to call with interrupts disabled.
+
+ Trust me, and don't look behind the curtain. */
+static unsigned
+dbg_copy_from_user(void *dest, const void *src, unsigned len)
+{
+ int __d0, __d1, __d2;
+ ASSERT(!local_irq_is_enabled());
+ __asm__ __volatile__(
+ "1: rep; movsb\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: addl $4, %%esp\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __pre_ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,2b\n"
+ ".previous\n"
+ : "=c"(__d2), "=D" (__d0), "=S" (__d1)
+ : "0"(len), "1"(dest), "2"(src)
+ : "memory");
+ ASSERT(!local_irq_is_enabled());
+ return __d2;
+}
+
+static void
+xendbg_put_char(u8 data, struct xendbg_context *ctx)
+{
+ ctx->reply_csum += data;
+ serial_putc(ctx->serhnd, data);
+}
+
+static int
+hex_char_val(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else
+ BUG();
+ return -1;
+}
+
+/* Receive a command. Returns -1 on csum error, 0 otherwise. */
+/* Does not acknowledge. */
+static int
+attempt_receive_packet(char *recv_buf, struct xendbg_context *ctx)
+{
+ int count;
+ u8 csum;
+ u8 received_csum;
+ u8 ch;
+
+ /* Skip over everything up to the first '$' */
+ while ((ch = serial_getc(ctx->serhnd)) != '$')
+ ;
+ csum = 0;
+ for (count = 0; count < 4096; count++) {
+ ch = serial_getc(ctx->serhnd);
+ if (ch == '#')
+ break;
+ recv_buf[count] = ch;
+ csum += ch;
+ }
+ if (count == 4096) {
+ dbg_printk("WARNING: GDB sent a stupidly big packet.\n");
+ return -1;
+ }
+ recv_buf[count] = 0;
+ received_csum = hex_char_val(serial_getc(ctx->serhnd)) * 16 +
+ hex_char_val(serial_getc(ctx->serhnd));
+ if (received_csum == csum) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+/* Send a string of bytes to the debugger. */
+static void
+xendbg_send(const char *buf, int count, struct xendbg_context *ctx)
+{
+ int x;
+ for (x = 0; x < count; x++)
+ xendbg_put_char(buf[x], ctx);
+}
+
+/* Receive a command, discarding up to ten packets with csum
+ * errors. Acknowledges all received packets. */
+static int
+receive_command(char *recv_buf, struct xendbg_context *ctx)
+{
+ int r;
+ int count;
+
+ count = 0;
+ do {
+ r = attempt_receive_packet(recv_buf, ctx);
+ if (r < 0)
+ xendbg_put_char('-', ctx);
+ else
+ xendbg_put_char('+', ctx);
+ count++;
+ } while (r < 0 && count < 10);
+ return r;
+}
+
+static void
+xendbg_start_reply(struct xendbg_context *ctx)
+{
+ xendbg_put_char('$', ctx);
+ ctx->reply_csum = 0;
+}
+
+/* Return 0 if the reply was successfully received, !0 otherwise. */
+static int
+xendbg_finish_reply(struct xendbg_context *ctx)
+{
+ char ch;
+ char buf[3];
+
+ sprintf(buf, "%.02x\n", ctx->reply_csum);
+
+ xendbg_put_char('#', ctx);
+ xendbg_send(buf, 2, ctx);
+
+ ch = serial_getc(ctx->serhnd);
+ if (ch == '+')
+ return 0;
+ else
+ return 1;
+}
+
+/* Swap the order of the bytes in a work. */
+static inline unsigned
+bswab32(unsigned val)
+{
+ return (((val >> 0) & 0xff) << 24) |
+ (((val >> 8) & 0xff) << 16) |
+ (((val >> 16) & 0xff) << 8) |
+ (((val >> 24) & 0xff) << 0);
+}
+
+static int
+handle_memory_read_command(unsigned long addr, unsigned long length,
+ struct xendbg_context *ctx)
+{
+ int x;
+ unsigned char val;
+ int r;
+ char buf[2];
+
+ dbg_printk("Memory read starting at %lx, length %lx.\n", addr,
+ length);
+ xendbg_start_reply(ctx);
+ for (x = 0; x < length; x++) {
+ r = dbg_copy_from_user(&val, (void *)(addr + x), 1);
+ if (r != 0) {
+ dbg_printk("Error reading from %lx.\n", addr + x);
+ break;
+ }
+ sprintf(buf, "%.02x", val);
+ xendbg_send(buf, 2, ctx);
+ }
+ if (x == 0)
+ xendbg_send("E05", 3, ctx);
+ dbg_printk("Read done.\n");
+ return xendbg_finish_reply(ctx);
+}
+
+static int
+xendbg_send_reply(const char *buf, struct xendbg_context *ctx)
+{
+ xendbg_start_reply(ctx);
+ xendbg_send(buf, strlen(buf), ctx);
+ return xendbg_finish_reply(ctx);
+}
+
+static int
+handle_register_read_command(struct cpu_user_regs *regs, struct xendbg_context *ctx)
+{
+ char buf[121];
+
+ sprintf(buf,
+ "%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x%.08x",
+ bswab32(regs->eax),
+ bswab32(regs->ecx),
+ bswab32(regs->edx),
+ bswab32(regs->ebx),
+ bswab32(regs->esp),
+ bswab32(regs->ebp),
+ bswab32(regs->esi),
+ bswab32(regs->edi),
+ bswab32(regs->eip),
+ bswab32(regs->eflags),
+ bswab32(regs->cs),
+ bswab32(regs->ss),
+ bswab32(regs->ds),
+ bswab32(regs->es),
+ bswab32(regs->fs),
+ bswab32(regs->gs));
+ return xendbg_send_reply(buf, ctx);
+}
+
+static int
+process_command(char *received_packet, struct cpu_user_regs *regs,
+ struct xendbg_context *ctx)
+{
+ char *ptr;
+ unsigned long addr, length;
+ int retry;
+ int counter;
+ int resume = 0;
+
+ /* Repeat until gdb acks the reply */
+ counter = 0;
+ do {
+ switch (received_packet[0]) {
+ case 'g': /* Read registers */
+ retry = handle_register_read_command(regs, ctx);
+ ASSERT(!local_irq_is_enabled());
+ break;
+ case 'm': /* Read memory */
+ addr = simple_strtoul(received_packet + 1, &ptr, 16);
+ if (ptr == received_packet + 1 ||
+ ptr[0] != ',') {
+ xendbg_send_reply("E03", ctx);
+ return 0;
+ }
+ length = simple_strtoul(ptr + 1, &ptr, 16);
+ if (ptr[0] != 0) {
+ xendbg_send_reply("E04", ctx);
+ return 0;
+ }
+ retry =
+ handle_memory_read_command(addr,
+ length,
+ ctx);
+ ASSERT(!local_irq_is_enabled());
+ break;
+ case 'G': /* Write registers */
+ case 'M': /* Write memory */
+ retry = xendbg_send_reply("E02", ctx);
+ break;
+ case 'D':
+ resume = 1;
+ ctx->currently_attached = 0;
+ retry = xendbg_send_reply("", ctx);
+ break;
+ case 'c': /* Resume at current address */
+ ctx->currently_attached = 1;
+ resume = 1;
+ retry = 0;
+ break;
+ case 'Z': /* We need to claim to support these or gdb
+ won't let you continue the process. */
+ case 'z':
+ retry = xendbg_send_reply("OK", ctx);
+ break;
+
+ case 's': /* Single step */
+ case '?':
+ retry = xendbg_send_reply("S01", ctx);
+ break;
+ default:
+ retry = xendbg_send_reply("", ctx);
+ break;
+ }
+ counter++;
+ } while (retry == 1 && counter < 10);
+ if (retry) {
+ dbg_printk("WARNING: gdb disappeared when we were trying to send it a reply.\n");
+ return 1;
+ }
+ return resume;
+}
+
+static struct xendbg_context
+xdb_ctx = {
+ serhnd : -1
+};
+
+int
+__trap_to_cdb(struct cpu_user_regs *regs)
+{
+ int resume = 0;
+ int r;
+ static atomic_t xendbg_running = ATOMIC_INIT(1);
+ static char recv_buf[4096];
+ unsigned flags;
+
+ if (xdb_ctx.serhnd < 0) {
+ dbg_printk("Debugger not ready yet.\n");
+ return 0;
+ }
+
+ /* We rely on our caller to ensure we're only on one processor
+ * at a time... We should probably panic here, but given that
+ * we're a debugger we should probably be a little tolerant of
+ * things going wrong. */
+ /* We don't want to use a spin lock here, because we're doing
+ two distinct things:
+
+ 1 -- we don't want to run on more than one processor at a time,
+ and
+ 2 -- we want to do something sensible if we re-enter ourselves.
+
+ Spin locks are good for 1, but useless for 2. */
+ if (!atomic_dec_and_test(&xendbg_running)) {
+ printk("WARNING WARNING WARNING: Avoiding recursive xendbg.\n");
+ atomic_inc(&xendbg_running);
+ return 0;
+ }
+
+ smp_send_stop();
+
+ /* Try to make things a little more stable by disabling
+ interrupts while we're here. */
+ local_irq_save(flags);
+
+ watchdog_disable();
+ console_start_sync();
+
+ /* Shouldn't really do this, but otherwise we stop for no
+ obvious reason, which is Bad */
+ printk("Waiting for GDB to attach to XenDBG\n");
+
+ /* If gdb is already attached, tell it we've stopped again. */
+ if (xdb_ctx.currently_attached) {
+ do {
+ r = xendbg_send_reply("S01", &xdb_ctx);
+ } while (r != 0);
+ }
+
+ while (resume == 0) {
+ ASSERT(!local_irq_is_enabled());
+ r = receive_command(recv_buf, &xdb_ctx);
+ ASSERT(!local_irq_is_enabled());
+ if (r < 0) {
+ dbg_printk("GDB disappeared, trying to resume Xen...\n");
+ resume = 1;
+ } else {
+ ASSERT(!local_irq_is_enabled());
+ resume = process_command(recv_buf, regs, &xdb_ctx);
+ ASSERT(!local_irq_is_enabled());
+ }
+ }
+
+ console_end_sync();
+ watchdog_enable();
+ atomic_inc(&xendbg_running);
+
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+static int
+initialize_xendbg(void)
+{
+ if (!strcmp(opt_cdb, "none"))
+ return 0;
+ xdb_ctx.serhnd = serial_parse_handle(opt_cdb);
+ if (xdb_ctx.serhnd == -1)
+ panic("Can't parse %s as CDB serial info.\n", opt_cdb);
+
+ /* Acknowledge any spurious GDB packets. */
+ xendbg_put_char('+', &xdb_ctx);
+
+ printk("Xendbg initialised.\n");
+ return 0;
+}
+
+__initcall(initialize_xendbg);
diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
new file mode 100644
index 0000000000..1241e50921
--- /dev/null
+++ b/xen/arch/x86/cpu/amd.c
@@ -0,0 +1,254 @@
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/bitops.h>
+#include <xen/mm.h>
+#include <xen/smp.h>
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+#define num_physpages 0
+
+/*
+ * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
+ * misexecution of code under Linux. Owners of such processors should
+ * contact AMD for precise details and a CPU swap.
+ *
+ * See http://www.multimania.com/poulot/k6bug.html
+ * http://www.amd.com/K6/k6docs/revgd.html
+ *
+ * The following test is erm.. interesting. AMD neglected to up
+ * the chip setting when fixing the bug but they also tweaked some
+ * performance at the same time..
+ */
+
+extern void vide(void);
+__asm__(".align 4\nvide: ret");
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int mbytes = num_physpages >> (20-PAGE_SHIFT);
+ int r;
+
+ /*
+ * FIXME: We should handle the K5 here. Set up the write
+ * range and also turn on MSR 83 bits 4 and 31 (write alloc,
+ * no bus pipeline)
+ */
+
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, c->x86_capability);
+
+ r = get_model_name(c);
+
+ switch(c->x86)
+ {
+ case 4:
+ /*
+ * General Systems BIOSen alias the cpu frequency registers
+ * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+ * drivers subsequently pokes it, and changes the CPU speed.
+ * Workaround : Remove the unneeded alias.
+ */
+#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
+#define CBAR_ENB (0x80000000)
+#define CBAR_KEY (0X000000CB)
+ if (c->x86_model==9 || c->x86_model == 10) {
+ if (inl (CBAR) & CBAR_ENB)
+ outl (0 | CBAR_KEY, CBAR);
+ }
+ break;
+ case 5:
+ if( c->x86_model < 6 )
+ {
+ /* Based on AMD doc 20734R - June 2000 */
+ if ( c->x86_model == 0 ) {
+ clear_bit(X86_FEATURE_APIC, c->x86_capability);
+ set_bit(X86_FEATURE_PGE, c->x86_capability);
+ }
+ break;
+ }
+
+ if ( c->x86_model == 6 && c->x86_mask == 1 ) {
+ const int K6_BUG_LOOP = 1000000;
+ int n;
+ void (*f_vide)(void);
+ unsigned long d, d2;
+
+ printk(KERN_INFO "AMD K6 stepping B detected - ");
+
+ /*
+ * It looks like AMD fixed the 2.6.2 bug and improved indirect
+ * calls at the same time.
+ */
+
+ n = K6_BUG_LOOP;
+ f_vide = vide;
+ rdtscl(d);
+ while (n--)
+ f_vide();
+ rdtscl(d2);
+ d = d2-d;
+
+ /* Knock these two lines out if it debugs out ok */
+ printk(KERN_INFO "AMD K6 stepping B detected - ");
+ /* -- cut here -- */
+ if (d > 20*K6_BUG_LOOP)
+ printk("system stability may be impaired when more than 32 MB are used.\n");
+ else
+ printk("probably OK (after B9730xxxx).\n");
+ printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
+ }
+
+ /* K6 with old style WHCR */
+ if (c->x86_model < 8 ||
+ (c->x86_model== 8 && c->x86_mask < 8)) {
+ /* We can only write allocate on the low 508Mb */
+ if(mbytes>508)
+ mbytes=508;
+
+ rdmsr(MSR_K6_WHCR, l, h);
+ if ((l&0x0000FFFF)==0) {
+ unsigned long flags;
+ l=(1<<0)|((mbytes/4)<<1);
+ local_irq_save(flags);
+ wbinvd();
+ wrmsr(MSR_K6_WHCR, l, h);
+ local_irq_restore(flags);
+ printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+ mbytes);
+ }
+ break;
+ }
+
+ if ((c->x86_model == 8 && c->x86_mask >7) ||
+ c->x86_model == 9 || c->x86_model == 13) {
+ /* The more serious chips .. */
+
+ if(mbytes>4092)
+ mbytes=4092;
+
+ rdmsr(MSR_K6_WHCR, l, h);
+ if ((l&0xFFFF0000)==0) {
+ unsigned long flags;
+ l=((mbytes>>2)<<22)|(1<<16);
+ local_irq_save(flags);
+ wbinvd();
+ wrmsr(MSR_K6_WHCR, l, h);
+ local_irq_restore(flags);
+ printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+ mbytes);
+ }
+
+ /* Set MTRR capability flag if appropriate */
+ if (c->x86_model == 13 || c->x86_model == 9 ||
+ (c->x86_model == 8 && c->x86_mask >= 8))
+ set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
+ break;
+ }
+ break;
+
+ case 6: /* An Athlon/Duron */
+
+ /* Bit 15 of Athlon specific MSR 15, needs to be 0
+ * to enable SSE on Palomino/Morgan/Barton CPU's.
+ * If the BIOS didn't enable it already, enable it here.
+ */
+ if (c->x86_model >= 6 && c->x86_model <= 10) {
+ if (!cpu_has(c, X86_FEATURE_XMM)) {
+ printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ rdmsr(MSR_K7_HWCR, l, h);
+ l &= ~0x00008000;
+ wrmsr(MSR_K7_HWCR, l, h);
+ set_bit(X86_FEATURE_XMM, c->x86_capability);
+ }
+ }
+
+ /* It's been determined by AMD that Athlons since model 8 stepping 1
+ * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
+ * As per AMD technical note 27212 0.2
+ */
+ if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
+ rdmsr(MSR_K7_CLK_CTL, l, h);
+ if ((l & 0xfff00000) != 0x20000000) {
+ printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
+ ((l & 0x000fffff)|0x20000000));
+ wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
+ }
+ }
+ break;
+ }
+
+ switch (c->x86) {
+ case 15:
+ set_bit(X86_FEATURE_K8, c->x86_capability);
+ break;
+ case 6:
+ set_bit(X86_FEATURE_K7, c->x86_capability);
+ break;
+ }
+
+ display_cacheinfo(c);
+ detect_ht(c);
+
+#ifdef CONFIG_X86_HT
+ /* AMD dual core looks like HT but isn't really. Hide it from the
+ scheduler. This works around problems with the domain scheduler.
+ Also probably gives slightly better scheduling and disables
+ SMT nice which is harmful on dual core.
+ TBD tune the domain scheduler for dual core. */
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ smp_num_siblings = 1;
+#endif
+
+ if (cpuid_eax(0x80000000) >= 0x80000008) {
+ c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ if (c->x86_num_cores & (c->x86_num_cores - 1))
+ c->x86_num_cores = 1;
+ }
+}
+
+static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+ /* AMD errata T13 (order #21922) */
+ if ((c->x86 == 6)) {
+ if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */
+ size = 64;
+ if (c->x86_model == 4 &&
+ (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */
+ size = 256;
+ }
+ return size;
+}
+
+static struct cpu_dev amd_cpu_dev __initdata = {
+ .c_vendor = "AMD",
+ .c_ident = { "AuthenticAMD" },
+ .c_models = {
+ { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
+ {
+ [3] = "486 DX/2",
+ [7] = "486 DX/2-WB",
+ [8] = "486 DX/4",
+ [9] = "486 DX/4-WB",
+ [14] = "Am5x86-WT",
+ [15] = "Am5x86-WB"
+ }
+ },
+ },
+ .c_init = init_amd,
+ .c_identify = generic_identify,
+ .c_size_cache = amd_size_cache,
+};
+
+int __init amd_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(amd_init_cpu);
diff --git a/xen/arch/x86/cpu/centaur.c b/xen/arch/x86/cpu/centaur.c
new file mode 100644
index 0000000000..09e5498c2d
--- /dev/null
+++ b/xen/arch/x86/cpu/centaur.c
@@ -0,0 +1,477 @@
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/bitops.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/e820.h>
+#include "cpu.h"
+
+#ifdef CONFIG_X86_OOSTORE
+
+static u32 __init power2(u32 x)
+{
+ u32 s=1;
+ while(s<=x)
+ s<<=1;
+ return s>>=1;
+}
+
+
+/*
+ * Set up an actual MCR
+ */
+
+static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+{
+ u32 lo, hi;
+
+ hi = base & ~0xFFF;
+ lo = ~(size-1); /* Size is a power of 2 so this makes a mask */
+ lo &= ~0xFFF; /* Remove the ctrl value bits */
+ lo |= key; /* Attribute we wish to set */
+ wrmsr(reg+MSR_IDT_MCR0, lo, hi);
+ mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */
+}
+
+/*
+ * Figure what we can cover with MCR's
+ *
+ * Shortcut: We know you can't put 4Gig of RAM on a winchip
+ */
+
+static u32 __init ramtop(void) /* 16388 */
+{
+ int i;
+ u32 top = 0;
+ u32 clip = 0xFFFFFFFFUL;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long start, end;
+
+ if (e820.map[i].addr > 0xFFFFFFFFUL)
+ continue;
+ /*
+ * Don't MCR over reserved space. Ignore the ISA hole
+ * we frob around that catastrophy already
+ */
+
+ if (e820.map[i].type == E820_RESERVED)
+ {
+ if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
+ clip = e820.map[i].addr;
+ continue;
+ }
+ start = e820.map[i].addr;
+ end = e820.map[i].addr + e820.map[i].size;
+ if (start >= end)
+ continue;
+ if (end > top)
+ top = end;
+ }
+ /* Everything below 'top' should be RAM except for the ISA hole.
+ Because of the limited MCR's we want to map NV/ACPI into our
+ MCR range for gunk in RAM
+
+ Clip might cause us to MCR insufficient RAM but that is an
+ acceptable failure mode and should only bite obscure boxes with
+ a VESA hole at 15Mb
+
+ The second case Clip sometimes kicks in is when the EBDA is marked
+ as reserved. Again we fail safe with reasonable results
+ */
+
+ if(top>clip)
+ top=clip;
+
+ return top;
+}
+
+/*
+ * Compute a set of MCR's to give maximum coverage
+ */
+
+static int __init centaur_mcr_compute(int nr, int key)
+{
+ u32 mem = ramtop();
+ u32 root = power2(mem);
+ u32 base = root;
+ u32 top = root;
+ u32 floor = 0;
+ int ct = 0;
+
+ while(ct<nr)
+ {
+ u32 fspace = 0;
+
+ /*
+ * Find the largest block we will fill going upwards
+ */
+
+ u32 high = power2(mem-top);
+
+ /*
+ * Find the largest block we will fill going downwards
+ */
+
+ u32 low = base/2;
+
+ /*
+ * Don't fill below 1Mb going downwards as there
+ * is an ISA hole in the way.
+ */
+
+ if(base <= 1024*1024)
+ low = 0;
+
+ /*
+ * See how much space we could cover by filling below
+ * the ISA hole
+ */
+
+ if(floor == 0)
+ fspace = 512*1024;
+ else if(floor ==512*1024)
+ fspace = 128*1024;
+
+ /* And forget ROM space */
+
+ /*
+ * Now install the largest coverage we get
+ */
+
+ if(fspace > high && fspace > low)
+ {
+ centaur_mcr_insert(ct, floor, fspace, key);
+ floor += fspace;
+ }
+ else if(high > low)
+ {
+ centaur_mcr_insert(ct, top, high, key);
+ top += high;
+ }
+ else if(low > 0)
+ {
+ base -= low;
+ centaur_mcr_insert(ct, base, low, key);
+ }
+ else break;
+ ct++;
+ }
+ /*
+ * We loaded ct values. We now need to set the mask. The caller
+ * must do this bit.
+ */
+
+ return ct;
+}
+
+static void __init centaur_create_optimal_mcr(void)
+{
+ int i;
+ /*
+ * Allocate up to 6 mcrs to mark as much of ram as possible
+ * as write combining and weak write ordered.
+ *
+ * To experiment with: Linux never uses stack operations for
+ * mmio spaces so we could globally enable stack operation wc
+ *
+ * Load the registers with type 31 - full write combining, all
+ * writes weakly ordered.
+ */
+ int used = centaur_mcr_compute(6, 31);
+
+ /*
+ * Wipe unused MCRs
+ */
+
+ for(i=used;i<8;i++)
+ wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+static void __init winchip2_create_optimal_mcr(void)
+{
+ u32 lo, hi;
+ int i;
+
+ /*
+ * Allocate up to 6 mcrs to mark as much of ram as possible
+ * as write combining, weak store ordered.
+ *
+ * Load the registers with type 25
+ * 8 - weak write ordering
+ * 16 - weak read ordering
+ * 1 - write combining
+ */
+
+ int used = centaur_mcr_compute(6, 25);
+
+ /*
+ * Mark the registers we are using.
+ */
+
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ for(i=0;i<used;i++)
+ lo|=1<<(9+i);
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+
+ /*
+ * Wipe unused MCRs
+ */
+
+ for(i=used;i<8;i++)
+ wrmsr(MSR_IDT_MCR0+i, 0, 0);
+}
+
+/*
+ * Handle the MCR key on the Winchip 2.
+ */
+
+static void __init winchip2_unprotect_mcr(void)
+{
+ u32 lo, hi;
+ u32 key;
+
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ lo&=~0x1C0; /* blank bits 8-6 */
+ key = (lo>>17) & 7;
+ lo |= key<<6; /* replace with unlock key */
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+static void __init winchip2_protect_mcr(void)
+{
+ u32 lo, hi;
+
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ lo&=~0x1C0; /* blank bits 8-6 */
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+#endif /* CONFIG_X86_OOSTORE */
+
+#define ACE_PRESENT (1 << 6)
+#define ACE_ENABLED (1 << 7)
+#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
+
+#define RNG_PRESENT (1 << 2)
+#define RNG_ENABLED (1 << 3)
+#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
+
+static void __init init_c3(struct cpuinfo_x86 *c)
+{
+ u32 lo, hi;
+
+ /* Test for Centaur Extended Feature Flags presence */
+ if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+ u32 tmp = cpuid_edx(0xC0000001);
+
+ /* enable ACE unit, if present and disabled */
+ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+ rdmsr (MSR_VIA_FCR, lo, hi);
+ lo |= ACE_FCR; /* enable ACE unit */
+ wrmsr (MSR_VIA_FCR, lo, hi);
+ printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+ }
+
+ /* enable RNG unit, if present and disabled */
+ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+ rdmsr (MSR_VIA_RNG, lo, hi);
+ lo |= RNG_ENABLE; /* enable RNG unit */
+ wrmsr (MSR_VIA_RNG, lo, hi);
+ printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+ }
+
+ /* store Centaur Extended Feature Flags as
+ * word 5 of the CPU capability bit array
+ */
+ c->x86_capability[5] = cpuid_edx(0xC0000001);
+ }
+
+ /* Cyrix III family needs CX8 & PGE explicity enabled. */
+ if (c->x86_model >=6 && c->x86_model <= 9) {
+ rdmsr (MSR_VIA_FCR, lo, hi);
+ lo |= (1<<1 | 1<<7);
+ wrmsr (MSR_VIA_FCR, lo, hi);
+ set_bit(X86_FEATURE_CX8, c->x86_capability);
+ }
+
+ /* Before Nehemiah, the C3's had 3dNOW! */
+ if (c->x86_model >=6 && c->x86_model <9)
+ set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+
+ get_model_name(c);
+ display_cacheinfo(c);
+}
+
+static void __init init_centaur(struct cpuinfo_x86 *c)
+{
+ enum {
+ ECX8=1<<1,
+ EIERRINT=1<<2,
+ DPM=1<<3,
+ DMCE=1<<4,
+ DSTPCLK=1<<5,
+ ELINEAR=1<<6,
+ DSMC=1<<7,
+ DTLOCK=1<<8,
+ EDCTLB=1<<8,
+ EMMX=1<<9,
+ DPDC=1<<11,
+ EBRPRED=1<<12,
+ DIC=1<<13,
+ DDC=1<<14,
+ DNA=1<<15,
+ ERETSTK=1<<16,
+ E2MMX=1<<19,
+ EAMD3D=1<<20,
+ };
+
+ char *name;
+ u32 fcr_set=0;
+ u32 fcr_clr=0;
+ u32 lo,hi,newlo;
+ u32 aa,bb,cc,dd;
+
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, c->x86_capability);
+
+ switch (c->x86) {
+
+ case 5:
+ switch(c->x86_model) {
+ case 4:
+ name="C6";
+ fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+ fcr_clr=DPDC;
+ printk(KERN_NOTICE "Disabling bugged TSC.\n");
+ clear_bit(X86_FEATURE_TSC, c->x86_capability);
+#ifdef CONFIG_X86_OOSTORE
+ centaur_create_optimal_mcr();
+ /* Enable
+ write combining on non-stack, non-string
+ write combining on string, all types
+ weak write ordering
+
+ The C6 original lacks weak read order
+
+ Note 0x120 is write only on Winchip 1 */
+
+ wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
+#endif
+ break;
+ case 8:
+ switch(c->x86_mask) {
+ default:
+ name="2";
+ break;
+ case 7 ... 9:
+ name="2A";
+ break;
+ case 10 ... 15:
+ name="2B";
+ break;
+ }
+ fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+ fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+ winchip2_unprotect_mcr();
+ winchip2_create_optimal_mcr();
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ /* Enable
+ write combining on non-stack, non-string
+ write combining on string, all types
+ weak write ordering
+ */
+ lo|=31;
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ winchip2_protect_mcr();
+#endif
+ break;
+ case 9:
+ name="3";
+ fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+ fcr_clr=DPDC;
+#ifdef CONFIG_X86_OOSTORE
+ winchip2_unprotect_mcr();
+ winchip2_create_optimal_mcr();
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ /* Enable
+ write combining on non-stack, non-string
+ write combining on string, all types
+ weak write ordering
+ */
+ lo|=31;
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ winchip2_protect_mcr();
+#endif
+ break;
+ case 10:
+ name="4";
+ /* no info on the WC4 yet */
+ break;
+ default:
+ name="??";
+ }
+
+ rdmsr(MSR_IDT_FCR1, lo, hi);
+ newlo=(lo|fcr_set) & (~fcr_clr);
+
+ if (newlo!=lo) {
+ printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
+ wrmsr(MSR_IDT_FCR1, newlo, hi );
+ } else {
+ printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
+ }
+ /* Emulate MTRRs using Centaur's MCR. */
+ set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
+ /* Report CX8 */
+ set_bit(X86_FEATURE_CX8, c->x86_capability);
+ /* Set 3DNow! on Winchip 2 and above. */
+ if (c->x86_model >=8)
+ set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+ /* See if we can find out some more. */
+ if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
+ /* Yes, we can. */
+ cpuid(0x80000005,&aa,&bb,&cc,&dd);
+ /* Add L1 data and code cache sizes. */
+ c->x86_cache_size = (cc>>24)+(dd>>24);
+ }
+ sprintf( c->x86_model_id, "WinChip %s", name );
+ break;
+
+ case 6:
+ init_c3(c);
+ break;
+ }
+}
+
+static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+ /* VIA C3 CPUs (670-68F) need further shifting. */
+ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
+ size >>= 8;
+
+ /* VIA also screwed up Nehemiah stepping 1, and made
+ it return '65KB' instead of '64KB'
+ - Note, it seems this may only be in engineering samples. */
+ if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
+ size -=1;
+
+ return size;
+}
+
+static struct cpu_dev centaur_cpu_dev __initdata = {
+ .c_vendor = "Centaur",
+ .c_ident = { "CentaurHauls" },
+ .c_init = init_centaur,
+ .c_size_cache = centaur_size_cache,
+};
+
+int __init centaur_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(centaur_init_cpu);
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
new file mode 100644
index 0000000000..fcb5c16ecb
--- /dev/null
+++ b/xen/arch/x86/cpu/common.c
@@ -0,0 +1,581 @@
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/string.h>
+#include <xen/delay.h>
+#include <xen/smp.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+
+#include "cpu.h"
+
+#define tsc_disable 0
+#define disable_pse 0
+
+static int cachesize_override __initdata = -1;
+static int disable_x86_fxsr __initdata = 0;
+static int disable_x86_serial_nr __initdata = 1;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern void mcheck_init(struct cpuinfo_x86 *c);
+
+static void default_init(struct cpuinfo_x86 * c)
+{
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+ if (c->cpuid_level == -1) {
+ /* No cpuid. It must be an ancient CPU */
+ if (c->x86 == 4)
+ strcpy(c->x86_model_id, "486");
+ else if (c->x86 == 3)
+ strcpy(c->x86_model_id, "386");
+ }
+}
+
+static struct cpu_dev default_cpu = {
+ .c_init = default_init,
+};
+static struct cpu_dev * this_cpu = &default_cpu;
+
+int __init get_model_name(struct cpuinfo_x86 *c)
+{
+ unsigned int *v;
+ char *p, *q;
+
+ if (cpuid_eax(0x80000000) < 0x80000004)
+ return 0;
+
+ v = (unsigned int *) c->x86_model_id;
+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+ c->x86_model_id[48] = 0;
+
+ /* Intel chips right-justify this string for some dumb reason;
+ undo that brain damage */
+ p = q = &c->x86_model_id[0];
+ while ( *p == ' ' )
+ p++;
+ if ( p != q ) {
+ while ( *p )
+ *q++ = *p++;
+ while ( q <= &c->x86_model_id[48] )
+ *q++ = '\0'; /* Zero-pad the rest */
+ }
+
+ return 1;
+}
+
+
+void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+ unsigned int n, dummy, ecx, edx, l2size;
+
+ n = cpuid_eax(0x80000000);
+
+ if (n >= 0x80000005) {
+ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size=(ecx>>24)+(edx>>24);
+ }
+
+ if (n < 0x80000006) /* Some chips just has a large L1. */
+ return;
+
+ ecx = cpuid_ecx(0x80000006);
+ l2size = ecx >> 16;
+
+ /* do processor-specific cache resizing */
+ if (this_cpu->c_size_cache)
+ l2size = this_cpu->c_size_cache(c,l2size);
+
+ /* Allow user to override all this if necessary. */
+ if (cachesize_override != -1)
+ l2size = cachesize_override;
+
+ if ( l2size == 0 )
+ return; /* Again, no L2 cache is possible */
+
+ c->x86_cache_size = l2size;
+
+ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+ l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+
+/* Look up CPU names by table lookup. */
+static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+{
+ struct cpu_model_info *info;
+
+ if ( c->x86_model >= 16 )
+ return NULL; /* Range check */
+
+ if (!this_cpu)
+ return NULL;
+
+ info = this_cpu->c_models;
+
+ while (info && info->family) {
+ if (info->family == c->x86)
+ return info->model_names[c->x86_model];
+ info++;
+ }
+ return NULL; /* Not found */
+}
+
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+ char *v = c->x86_vendor_id;
+ int i;
+
+ for (i = 0; i < X86_VENDOR_NUM; i++) {
+ if (cpu_devs[i]) {
+ if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+ (cpu_devs[i]->c_ident[1] &&
+ !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+ c->x86_vendor = i;
+ if (!early)
+ this_cpu = cpu_devs[i];
+ break;
+ }
+ }
+ }
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+ disable_x86_fxsr = 1;
+ return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(unsigned long flag)
+{
+ unsigned long f1, f2;
+
+ asm("pushf\n\t"
+ "pushf\n\t"
+ "pop %0\n\t"
+ "mov %0,%1\n\t"
+ "xor %2,%0\n\t"
+ "push %0\n\t"
+ "popf\n\t"
+ "pushf\n\t"
+ "pop %0\n\t"
+ "popf\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+int __init have_cpuid_p(void)
+{
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+/* Do minimum CPU detection early.
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+ The others are not touched to avoid unwanted side effects. */
+void __init early_cpu_detect(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ c->x86_cache_alignment = 32;
+
+ if (!have_cpuid_p())
+ return;
+
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c, 1);
+
+ c->x86 = 4;
+ if (c->cpuid_level >= 0x00000001) {
+ u32 junk, tfms, cap0, misc;
+ cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ if (c->x86 == 0xf) {
+ c->x86 += (tfms >> 20) & 0xff;
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
+ }
+ c->x86_mask = tfms & 15;
+ if (cap0 & (1<<19))
+ c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+ c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
+ }
+
+ early_intel_workaround(c);
+}
+
+void __init generic_identify(struct cpuinfo_x86 * c)
+{
+ u32 tfms, xlvl;
+ int junk;
+
+ if (have_cpuid_p()) {
+ /* Get vendor name */
+ cpuid(0x00000000, &c->cpuid_level,
+ (int *)&c->x86_vendor_id[0],
+ (int *)&c->x86_vendor_id[8],
+ (int *)&c->x86_vendor_id[4]);
+
+ get_cpu_vendor(c, 0);
+ /* Initialize the standard set of capabilities */
+ /* Note that the vendor-specific code below might override */
+
+ /* Intel-defined flags: level 0x00000001 */
+ if ( c->cpuid_level >= 0x00000001 ) {
+ u32 capability, excap;
+ cpuid(0x00000001, &tfms, &junk, &excap, &capability);
+ c->x86_capability[0] = capability;
+ c->x86_capability[4] = excap;
+ c->x86 = (tfms >> 8) & 15;
+ c->x86_model = (tfms >> 4) & 15;
+ if (c->x86 == 0xf) {
+ c->x86 += (tfms >> 20) & 0xff;
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
+ }
+ c->x86_mask = tfms & 15;
+ } else {
+ /* Have CPUID level 0 only - unheard of */
+ c->x86 = 4;
+ }
+
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+ if ( xlvl >= 0x80000001 ) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
+ }
+ if ( xlvl >= 0x80000004 )
+ get_model_name(c); /* Default name */
+ }
+ }
+}
+
+static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+ /* Disable processor serial number */
+ unsigned long lo,hi;
+ rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+ lo |= 0x200000;
+ wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
+ clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+ /* Disabling the serial number may affect the cpuid level */
+ c->cpuid_level = cpuid_eax(0);
+ }
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+ disable_x86_serial_nr = 0;
+ return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ c->x86_cache_size = -1;
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->cpuid_level = -1; /* CPUID not detected */
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ c->x86_model_id[0] = '\0'; /* Unset */
+ c->x86_num_cores = 1;
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+ if (!have_cpuid_p()) {
+ /* First of all, decide if this is a 486 or higher */
+ /* It's a 486 if we can modify the AC flag */
+ if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+ c->x86 = 4;
+ else
+ c->x86 = 3;
+ }
+
+ generic_identify(c);
+
+#ifdef NOISY_CAPS
+ printk(KERN_DEBUG "CPU: After generic identify, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08x", c->x86_capability[i]);
+ printk("\n");
+#endif
+
+ if (this_cpu->c_identify) {
+ this_cpu->c_identify(c);
+#ifdef NOISY_CAPS
+ printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08x", c->x86_capability[i]);
+ printk("\n");
+#endif
+ }
+
+ /*
+ * Vendor-specific initialization. In this section we
+ * canonicalize the feature flags, meaning if there are
+ * features a certain CPU supports which CPUID doesn't
+ * tell us, CPUID claiming incorrect flags, or other bugs,
+ * we handle them here.
+ *
+ * At the end of this section, c->x86_capability better
+ * indicate the features this CPU genuinely supports!
+ */
+ if (this_cpu->c_init)
+ this_cpu->c_init(c);
+
+ /* Disable the PN if appropriate */
+ squash_the_stupid_serial_number(c);
+
+ /*
+ * The vendor-specific functions might have changed features. Now
+ * we do "generic changes."
+ */
+
+ /* TSC disabled? */
+ if ( tsc_disable )
+ clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+ /* FXSR disabled? */
+ if (disable_x86_fxsr) {
+ clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+ clear_bit(X86_FEATURE_XMM, c->x86_capability);
+ }
+
+ if (disable_pse)
+ clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+ /* If the model name is still unset, do table lookup. */
+ if ( !c->x86_model_id[0] ) {
+ char *p;
+ p = table_lookup_model(c);
+ if ( p )
+ strcpy(c->x86_model_id, p);
+ else
+ /* Last resort... */
+ sprintf(c->x86_model_id, "%02x/%02x",
+ c->x86_vendor, c->x86_model);
+ }
+
+ /* Now the feature flags better reflect actual CPU features! */
+#ifdef NOISY_CAPS
+ printk(KERN_DEBUG "CPU: After all inits, caps:");
+ for (i = 0; i < NCAPINTS; i++)
+ printk(" %08x", c->x86_capability[i]);
+ printk("\n");
+#endif
+ /*
+ * On SMP, boot_cpu_data holds the common feature set between
+ * all CPUs; so make sure that we indicate which features are
+ * common between the CPUs. The first time this routine gets
+ * executed, c == &boot_cpu_data.
+ */
+ if ( c != &boot_cpu_data ) {
+ /* AND the already accumulated flags with these */
+ for ( i = 0 ; i < NCAPINTS ; i++ )
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ }
+
+ /* Init Machine Check Exception if available. */
+#ifdef CONFIG_X86_MCE
+ mcheck_init(c);
+#endif
+}
+/*
+ * Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
+ */
+
+void __init dodgy_tsc(void)
+{
+ if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
+ ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC ))
+ cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
+}
+
+#ifdef CONFIG_X86_HT
+void __init detect_ht(struct cpuinfo_x86 *c)
+{
+ u32 eax, ebx, ecx, edx;
+ int index_lsb, index_msb, tmp;
+ int cpu = smp_processor_id();
+
+ if (!cpu_has(c, X86_FEATURE_HT))
+ return;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+ if (smp_num_siblings == 1) {
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ } else if (smp_num_siblings > 1 ) {
+ index_lsb = 0;
+ index_msb = 31;
+
+ if (smp_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
+ smp_num_siblings = 1;
+ return;
+ }
+ tmp = smp_num_siblings;
+ while ((tmp & 1) == 0) {
+ tmp >>=1 ;
+ index_lsb++;
+ }
+ tmp = smp_num_siblings;
+ while ((tmp & 0x80000000 ) == 0) {
+ tmp <<=1 ;
+ index_msb--;
+ }
+ if (index_lsb != index_msb )
+ index_msb++;
+ phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ phys_proc_id[cpu]);
+ }
+}
+#endif
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+ char *vendor = NULL;
+
+ if (c->x86_vendor < X86_VENDOR_NUM)
+ vendor = this_cpu->c_vendor;
+ else if (c->cpuid_level >= 0)
+ vendor = c->x86_vendor_id;
+
+ if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+ printk("%s ", vendor);
+
+ if (!c->x86_model_id[0])
+ printk("%d86", c->x86);
+ else
+ printk("%s", c->x86_model_id);
+
+ if (c->x86_mask || c->cpuid_level >= 0)
+ printk(" stepping %02x\n", c->x86_mask);
+ else
+ printk("\n");
+}
+
+cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int rise_init_cpu(void);
+void early_cpu_detect(void);
+
+void __init early_cpu_init(void)
+{
+ intel_cpu_init();
+ amd_init_cpu();
+#ifdef CONFIG_X86_32
+ cyrix_init_cpu();
+ nsc_init_cpu();
+ centaur_init_cpu();
+ transmeta_init_cpu();
+ rise_init_cpu();
+#endif
+ early_cpu_detect();
+}
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __init cpu_init (void)
+{
+ int cpu = smp_processor_id();
+ struct tss_struct *t = &init_tss[cpu];
+ char gdt_load[10];
+
+ if (cpu_test_and_set(cpu, cpu_initialized)) {
+ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+ for (;;) local_irq_enable();
+ }
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+ if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+ *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
+ *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
+ __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
+
+ /* No nested task. */
+ __asm__("pushf ; andw $0xbfff,(%"__OP"sp) ; popf");
+
+ /* Ensure FPU gets initialised for each domain. */
+ stts();
+
+ /* Set up and load the per-CPU TSS and LDT. */
+ t->bitmap = IOBMP_INVALID_OFFSET;
+#if defined(CONFIG_X86_32)
+ t->ss0 = __HYPERVISOR_DS;
+ t->esp0 = get_stack_bottom();
+#elif defined(CONFIG_X86_64)
+ /* Bottom-of-stack must be 16-byte aligned! */
+ BUG_ON((get_stack_bottom() & 15) != 0);
+ t->rsp0 = get_stack_bottom();
+#endif
+ set_tss_desc(cpu,t);
+ load_TR(cpu);
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
+
+ /* Clear all 6 debug registers: */
+#define CD(register) __asm__("mov %0,%%db" #register ::"r"(0UL) );
+ CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+#undef CD
+
+ /* Install correct page table. */
+ write_ptbase(current);
+}
diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h
new file mode 100644
index 0000000000..9df38d993c
--- /dev/null
+++ b/xen/arch/x86/cpu/cpu.h
@@ -0,0 +1,31 @@
+
+struct cpu_model_info {
+ int vendor;
+ int family;
+ char *model_names[16];
+};
+
+/* attempt to consolidate cpu attributes */
+struct cpu_dev {
+ char * c_vendor;
+
+ /* some have two possibilities for cpuid string */
+ char * c_ident[2];
+
+ struct cpu_model_info c_models[4];
+
+ void (*c_init)(struct cpuinfo_x86 * c);
+ void (*c_identify)(struct cpuinfo_x86 * c);
+ unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
+};
+
+extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
+
+extern int get_model_name(struct cpuinfo_x86 *c);
+extern void display_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void generic_identify(struct cpuinfo_x86 * c);
+extern int have_cpuid_p(void);
+
+extern void early_intel_workaround(struct cpuinfo_x86 *c);
+
diff --git a/xen/arch/x86/cpu/cyrix.c b/xen/arch/x86/cpu/cyrix.c
new file mode 100644
index 0000000000..6a3b98ea71
--- /dev/null
+++ b/xen/arch/x86/cpu/cyrix.c
@@ -0,0 +1,400 @@
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/bitops.h>
+#include <xen/delay.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+/*
+ * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
+ */
+void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+{
+ unsigned char ccr2, ccr3;
+ unsigned long flags;
+
+ /* we test for DEVID by checking whether CCR3 is writable */
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, ccr3 ^ 0x80);
+ getCx86(0xc0); /* dummy to change bus */
+
+ if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */
+ ccr2 = getCx86(CX86_CCR2);
+ setCx86(CX86_CCR2, ccr2 ^ 0x04);
+ getCx86(0xc0); /* dummy */
+
+ if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
+ *dir0 = 0xfd;
+ else { /* Cx486S A step */
+ setCx86(CX86_CCR2, ccr2);
+ *dir0 = 0xfe;
+ }
+ }
+ else {
+ setCx86(CX86_CCR3, ccr3); /* restore CCR3 */
+
+ /* read DIR0 and DIR1 CPU registers */
+ *dir0 = getCx86(CX86_DIR0);
+ *dir1 = getCx86(CX86_DIR1);
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
+ * order to identify the Cyrix CPU model after we're out of setup.c
+ *
+ * Actually since bugs.h doesn't even reference this perhaps someone should
+ * fix the documentation ???
+ */
+static unsigned char Cx86_dir0_msb __initdata = 0;
+
+static char Cx86_model[][9] __initdata = {
+ "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
+ "M II ", "Unknown"
+};
+static char Cx486_name[][5] __initdata = {
+ "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
+ "SRx2", "DRx2"
+};
+static char Cx486S_name[][4] __initdata = {
+ "S", "S2", "Se", "S2e"
+};
+static char Cx486D_name[][4] __initdata = {
+ "DX", "DX2", "?", "?", "?", "DX4"
+};
+static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
+static char cyrix_model_mult1[] __initdata = "12??43";
+static char cyrix_model_mult2[] __initdata = "12233445";
+
+/*
+ * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
+ * BIOSes for compatibility with DOS games. This makes the udelay loop
+ * work correctly, and improves performance.
+ *
+ * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
+ */
+
+static void __init check_cx686_slop(struct cpuinfo_x86 *c)
+{
+ unsigned long flags;
+
+ if (Cx86_dir0_msb == 3) {
+ unsigned char ccr3, ccr5;
+
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ ccr5 = getCx86(CX86_CCR5);
+ if (ccr5 & 2)
+ setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ local_irq_restore(flags);
+ }
+}
+
+
+static void __init set_cx86_reorder(void)
+{
+ u8 ccr3;
+
+ printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+
+ /* Load/Store Serialize to mem access disable (=reorder it)  */
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+ /* set load/store serialize from 1GB to 4GB */
+ ccr3 |= 0xe0;
+ setCx86(CX86_CCR3, ccr3);
+}
+
+static void __init set_cx86_memwb(void)
+{
+ u32 cr0;
+
+ printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+
+ /* CCR2 bit 2: unlock NW bit */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+ /* set 'Not Write-through' */
+ cr0 = 0x20000000;
+ __asm__("movl %%cr0,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr0\n"
+ : : "r" (cr0)
+ :"ax");
+ /* CCR2 bit 2: lock NW bit and set WT1 */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+}
+
+static void __init set_cx86_inc(void)
+{
+ unsigned char ccr3;
+
+ printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
+
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+ /* PCR1 -- Performance Control */
+ /* Incrementor on, whatever that is */
+ setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
+ /* PCR0 -- Performance Control */
+ /* Incrementor Margin 10 */
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+}
+
+/*
+ * Configure later MediaGX and/or Geode processor.
+ */
+
+static void __init geode_configure(void)
+{
+ unsigned long flags;
+ u8 ccr3, ccr4;
+ local_irq_save(flags);
+
+ /* Suspend on halt power saving and enable #SUSP pin */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */
+
+ ccr4 = getCx86(CX86_CCR4);
+ ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */
+
+ setCx86(CX86_CCR3, ccr3);
+
+ set_cx86_memwb();
+ set_cx86_reorder();
+ set_cx86_inc();
+
+ local_irq_restore(flags);
+}
+
+
+static void __init init_cyrix(struct cpuinfo_x86 *c)
+{
+ unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
+ char *buf = c->x86_model_id;
+ const char *p = NULL;
+
+ /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+ clear_bit(0*32+31, c->x86_capability);
+
+ /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
+ if ( test_bit(1*32+24, c->x86_capability) ) {
+ clear_bit(1*32+24, c->x86_capability);
+ set_bit(X86_FEATURE_CXMMX, c->x86_capability);
+ }
+
+ do_cyrix_devid(&dir0, &dir1);
+
+ check_cx686_slop(c);
+
+ Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+ dir0_lsn = dir0 & 0xf; /* model or clock multiplier */
+
+ /* common case step number/rev -- exceptions handled below */
+ c->x86_model = (dir1 >> 4) + 1;
+ c->x86_mask = dir1 & 0xf;
+
+ /* Now cook; the original recipe is by Channing Corn, from Cyrix.
+ * We do the same thing for each generation: we work out
+ * the model, multiplier and stepping. Black magic included,
+ * to make the silicon step/rev numbers match the printed ones.
+ */
+
+ switch (dir0_msn) {
+ unsigned char tmp;
+
+ case 0: /* Cx486SLC/DLC/SRx/DRx */
+ p = Cx486_name[dir0_lsn & 7];
+ break;
+
+ case 1: /* Cx486S/DX/DX2/DX4 */
+ p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5]
+ : Cx486S_name[dir0_lsn & 3];
+ break;
+
+ case 2: /* 5x86 */
+ Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+ p = Cx86_cb+2;
+ break;
+
+ case 3: /* 6x86/6x86L */
+ Cx86_cb[1] = ' ';
+ Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5];
+ if (dir1 > 0x21) { /* 686L */
+ Cx86_cb[0] = 'L';
+ p = Cx86_cb;
+ (c->x86_model)++;
+ } else /* 686 */
+ p = Cx86_cb+1;
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+ /* 6x86's contain this bug */
+ c->coma_bug = 1;
+ break;
+
+ case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
+ c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
+
+ /* GXm supports extended cpuid levels 'ala' AMD */
+ if (c->cpuid_level == 2) {
+ /* Enable cxMMX extensions (GX1 Datasheet 54) */
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+
+ /* GXlv/GXm/GX1 */
+ if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63)
+ geode_configure();
+ get_model_name(c); /* get CPU marketing name */
+ return;
+ }
+ else { /* MediaGX */
+ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
+ p = Cx86_cb+2;
+ c->x86_model = (dir1 & 0x20) ? 1 : 2;
+ }
+ break;
+
+ case 5: /* 6x86MX/M II */
+ if (dir1 > 7)
+ {
+ dir0_msn++; /* M II */
+ /* Enable MMX extensions (App note 108) */
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+ }
+ else
+ {
+ c->coma_bug = 1; /* 6x86MX, it has the bug. */
+ }
+ tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
+ Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
+ p = Cx86_cb+tmp;
+ if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
+ (c->x86_model)++;
+ /* Emulate MTRRs using Cyrix's ARRs. */
+ set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+ break;
+
+ case 0xf: /* Cyrix 486 without DEVID registers */
+ switch (dir0_lsn) {
+ case 0xd: /* either a 486SLC or DLC w/o DEVID */
+ dir0_msn = 0;
+ p = Cx486_name[(c->hard_math) ? 1 : 0];
+ break;
+
+ case 0xe: /* a 486S A step */
+ dir0_msn = 0;
+ p = Cx486S_name[0];
+ break;
+ }
+ break;
+
+ default: /* unknown (shouldn't happen, we know everyone ;-) */
+ dir0_msn = 7;
+ break;
+ }
+ strcpy(buf, Cx86_model[dir0_msn & 7]);
+ if (p) strcat(buf, p);
+ return;
+}
+
+/*
+ * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
+ * by the fact that they preserve the flags across the division of 5/2.
+ * PII and PPro exhibit this behavior too, but they have cpuid available.
+ */
+
+/*
+ * Perform the Cyrix 5/2 test. A Cyrix won't change
+ * the flags, while other 486 chips will.
+ */
+static inline int test_cyrix_52div(void)
+{
+ unsigned int test;
+
+ __asm__ __volatile__(
+ "sahf\n\t" /* clear flags (%eax = 0x0005) */
+ "div %b2\n\t" /* divide 5 by 2 */
+ "lahf" /* store flags into %ah */
+ : "=a" (test)
+ : "0" (5), "q" (2)
+ : "cc");
+
+ /* AH is 0x02 on Cyrix after the divide.. */
+ return (unsigned char) (test >> 8) == 0x02;
+}
+
+static void cyrix_identify(struct cpuinfo_x86 * c)
+{
+ /* Detect Cyrix with disabled CPUID */
+ if ( c->x86 == 4 && test_cyrix_52div() ) {
+ unsigned char dir0, dir1;
+
+ strcpy(c->x86_vendor_id, "CyrixInstead");
+ c->x86_vendor = X86_VENDOR_CYRIX;
+
+ /* Actually enable cpuid on the older cyrix */
+
+ /* Retrieve CPU revisions */
+
+ do_cyrix_devid(&dir0, &dir1);
+
+ dir0>>=4;
+
+ /* Check it is an affected model */
+
+ if (dir0 == 5 || dir0 == 3)
+ {
+ unsigned char ccr3, ccr4;
+ unsigned long flags;
+ printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ ccr4 = getCx86(CX86_CCR4);
+ setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ local_irq_restore(flags);
+ }
+ }
+ generic_identify(c);
+}
+
+static struct cpu_dev cyrix_cpu_dev __initdata = {
+ .c_vendor = "Cyrix",
+ .c_ident = { "CyrixInstead" },
+ .c_init = init_cyrix,
+ .c_identify = cyrix_identify,
+};
+
+int __init cyrix_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(cyrix_init_cpu);
+
+static struct cpu_dev nsc_cpu_dev __initdata = {
+ .c_vendor = "NSC",
+ .c_ident = { "Geode by NSC" },
+ .c_init = init_cyrix,
+ .c_identify = generic_identify,
+};
+
+int __init nsc_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(nsc_init_cpu);
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
new file mode 100644
index 0000000000..861723719b
--- /dev/null
+++ b/xen/arch/x86/cpu/intel.c
@@ -0,0 +1,249 @@
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/string.h>
+#include <xen/bitops.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#include <asm/vmx_vmcs.h>
+
+#include "cpu.h"
+
+#define select_idle_routine(x) ((void)0)
+
+extern int trap_init_f00f_bug(void);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Alignment at which movsl is preferred for bulk memory copies.
+ */
+struct movsl_mask movsl_mask;
+#endif
+
+void __init early_intel_workaround(struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return;
+ /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
+ if (c->x86 == 15 && c->x86_cache_alignment == 64)
+ c->x86_cache_alignment = 128;
+}
+
+/*
+ * Early probe support logic for ppro memory erratum #50
+ *
+ * This is called before we do cpu ident work
+ */
+
+int __init ppro_with_ram_bug(void)
+{
+ /* Uses data from early_cpu_detect now */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model == 1 &&
+ boot_cpu_data.x86_mask < 8) {
+ printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ * P4 Xeon errata 037 workaround.
+ * Hardware prefetcher may cause stale data to be loaded into the cache.
+ */
+static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+{
+ unsigned long lo, hi;
+
+ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+ rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+ if ((lo & (1<<9)) == 0) {
+ printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
+ printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
+ lo |= (1<<9); /* Disable hw prefetching */
+ wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+ }
+ }
+}
+
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+ unsigned int l2 = 0;
+ char *p = NULL;
+
+#ifdef CONFIG_X86_F00F_BUG
+ /*
+ * All current models of Pentium and Pentium with MMX technology CPUs
+ * have the F0 0F bug, which lets nonprivileged users lock up the system.
+ * Note that the workaround only should be initialized once...
+ */
+ c->f00f_bug = 0;
+ if ( c->x86 == 5 ) {
+ static int f00f_workaround_enabled = 0;
+
+ c->f00f_bug = 1;
+ if ( !f00f_workaround_enabled ) {
+ trap_init_f00f_bug();
+ printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+ f00f_workaround_enabled = 1;
+ }
+ }
+#endif
+
+ select_idle_routine(c);
+ l2 = init_intel_cacheinfo(c);
+
+ /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
+ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+ /* Names for the Pentium II/Celeron processors
+ detectable only by also checking the cache size.
+ Dixon is NOT a Celeron. */
+ if (c->x86 == 6) {
+ switch (c->x86_model) {
+ case 5:
+ if (c->x86_mask == 0) {
+ if (l2 == 0)
+ p = "Celeron (Covington)";
+ else if (l2 == 256)
+ p = "Mobile Pentium II (Dixon)";
+ }
+ break;
+
+ case 6:
+ if (l2 == 128)
+ p = "Celeron (Mendocino)";
+ else if (c->x86_mask == 0 || c->x86_mask == 5)
+ p = "Celeron-A";
+ break;
+
+ case 8:
+ if (l2 == 128)
+ p = "Celeron (Coppermine)";
+ break;
+ }
+ }
+
+ if ( p )
+ strcpy(c->x86_model_id, p);
+
+ detect_ht(c);
+
+ /* Work around errata */
+ Intel_errata_workarounds(c);
+
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ /*
+ * Set up the preferred alignment for movsl bulk memory moves
+ */
+ switch (c->x86) {
+ case 4: /* 486: untested */
+ break;
+ case 5: /* Old Pentia: untested */
+ break;
+ case 6: /* PII/PIII only like movsl with 8-byte alignment */
+ movsl_mask.mask = 7;
+ break;
+ case 15: /* P4 is OK down to 8-byte alignment */
+ movsl_mask.mask = 7;
+ break;
+ }
+#endif
+
+ if (c->x86 == 15)
+ set_bit(X86_FEATURE_P4, c->x86_capability);
+ if (c->x86 == 6)
+ set_bit(X86_FEATURE_P3, c->x86_capability);
+
+#ifdef CONFIG_VMX
+ start_vmx();
+#endif
+}
+
+
+static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+{
+ /* Intel PIII Tualatin. This comes in two flavours.
+ * One has 256kb of cache, the other 512. We have no way
+ * to determine which, so we use a boottime override
+ * for the 512kb model, and assume 256 otherwise.
+ */
+ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
+ size = 256;
+ return size;
+}
+
+static struct cpu_dev intel_cpu_dev __initdata = {
+ .c_vendor = "Intel",
+ .c_ident = { "GenuineIntel" },
+ .c_models = {
+ { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
+ {
+ [0] = "486 DX-25/33",
+ [1] = "486 DX-50",
+ [2] = "486 SX",
+ [3] = "486 DX/2",
+ [4] = "486 SL",
+ [5] = "486 SX/2",
+ [7] = "486 DX/2-WB",
+ [8] = "486 DX/4",
+ [9] = "486 DX/4-WB"
+ }
+ },
+ { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
+ {
+ [0] = "Pentium 60/66 A-step",
+ [1] = "Pentium 60/66",
+ [2] = "Pentium 75 - 200",
+ [3] = "OverDrive PODP5V83",
+ [4] = "Pentium MMX",
+ [7] = "Mobile Pentium 75 - 200",
+ [8] = "Mobile Pentium MMX"
+ }
+ },
+ { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
+ {
+ [0] = "Pentium Pro A-step",
+ [1] = "Pentium Pro",
+ [3] = "Pentium II (Klamath)",
+ [4] = "Pentium II (Deschutes)",
+ [5] = "Pentium II (Deschutes)",
+ [6] = "Mobile Pentium II",
+ [7] = "Pentium III (Katmai)",
+ [8] = "Pentium III (Coppermine)",
+ [10] = "Pentium III (Cascades)",
+ [11] = "Pentium III (Tualatin)",
+ }
+ },
+ { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
+ {
+ [0] = "Pentium 4 (Unknown)",
+ [1] = "Pentium 4 (Willamette)",
+ [2] = "Pentium 4 (Northwood)",
+ [4] = "Pentium 4 (Foster)",
+ [5] = "Pentium 4 (Foster)",
+ }
+ },
+ },
+ .c_init = init_intel,
+ .c_identify = generic_identify,
+ .c_size_cache = intel_size_cache,
+};
+
+__init int intel_cpu_init(void)
+{
+ cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
+ return 0;
+}
+
+// arch_initcall(intel_cpu_init);
+
diff --git a/xen/arch/x86/cpu/intel_cacheinfo.c b/xen/arch/x86/cpu/intel_cacheinfo.c
new file mode 100644
index 0000000000..f309467e29
--- /dev/null
+++ b/xen/arch/x86/cpu/intel_cacheinfo.c
@@ -0,0 +1,142 @@
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <asm/processor.h>
+
+#define LVL_1_INST 1
+#define LVL_1_DATA 2
+#define LVL_2 3
+#define LVL_3 4
+#define LVL_TRACE 5
+
+struct _cache_table
+{
+ unsigned char descriptor;
+ char cache_type;
+ short size;
+};
+
+/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+static struct _cache_table cache_table[] __initdata =
+{
+ { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
+ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
+ { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
+ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
+ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
+ { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
+ { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
+ { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
+ { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
+ { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
+ { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
+ { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
+ { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
+ { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
+ { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
+ { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
+ { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
+ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
+ { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
+ { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
+ { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
+ { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
+ { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
+ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
+ { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
+ { 0x00, 0, 0}
+};
+
+unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+ unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+
+ if (c->cpuid_level > 1) {
+ /* supports eax=2 call */
+ int i, j, n;
+ int regs[4];
+ unsigned char *dp = (unsigned char *)regs;
+
+ /* Number of times to iterate */
+ n = cpuid_eax(2) & 0xFF;
+
+ for ( i = 0 ; i < n ; i++ ) {
+ cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+ /* If bit 31 is set, this is an unknown format */
+ for ( j = 0 ; j < 3 ; j++ ) {
+ if ( regs[j] < 0 ) regs[j] = 0;
+ }
+
+ /* Byte 0 is level count, not a descriptor */
+ for ( j = 1 ; j < 16 ; j++ ) {
+ unsigned char des = dp[j];
+ unsigned char k = 0;
+
+ /* look up this descriptor in the table */
+ while (cache_table[k].descriptor != 0)
+ {
+ if (cache_table[k].descriptor == des) {
+ switch (cache_table[k].cache_type) {
+ case LVL_1_INST:
+ l1i += cache_table[k].size;
+ break;
+ case LVL_1_DATA:
+ l1d += cache_table[k].size;
+ break;
+ case LVL_2:
+ l2 += cache_table[k].size;
+ break;
+ case LVL_3:
+ l3 += cache_table[k].size;
+ break;
+ case LVL_TRACE:
+ trace += cache_table[k].size;
+ break;
+ }
+
+ break;
+ }
+
+ k++;
+ }
+ }
+ }
+
+ if ( trace )
+ printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+ else if ( l1i )
+ printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+ if ( l1d )
+ printk(", L1 D cache: %dK\n", l1d);
+ else
+ printk("\n");
+ if ( l2 )
+ printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+ if ( l3 )
+ printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+ /*
+ * This assumes the L3 cache is shared; it typically lives in
+ * the northbridge. The L1 caches are included by the L2
+ * cache, and so should not be included for the purpose of
+ * SMP switching weights.
+ */
+ c->x86_cache_size = l2 ? l2 : (l1i+l1d);
+ }
+
+ return l2;
+}
diff --git a/xen/arch/x86/cpu/rise.c b/xen/arch/x86/cpu/rise.c
new file mode 100644
index 0000000000..3b1b0f4048
--- /dev/null
+++ b/xen/arch/x86/cpu/rise.c
@@ -0,0 +1,54 @@
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/bitops.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+static void __init init_rise(struct cpuinfo_x86 *c)
+{
+ printk("CPU: Rise iDragon");
+ if (c->x86_model > 2)
+ printk(" II");
+ printk("\n");
+
+ /* Unhide possibly hidden capability flags
+ The mp6 iDragon family don't have MSRs.
+ We switch on extra features with this cpuid weirdness: */
+ __asm__ (
+ "movl $0x6363452a, %%eax\n\t"
+ "movl $0x3231206c, %%ecx\n\t"
+ "movl $0x2a32313a, %%edx\n\t"
+ "cpuid\n\t"
+ "movl $0x63634523, %%eax\n\t"
+ "movl $0x32315f6c, %%ecx\n\t"
+ "movl $0x2333313a, %%edx\n\t"
+ "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
+ );
+ set_bit(X86_FEATURE_CX8, c->x86_capability);
+}
+
+static struct cpu_dev rise_cpu_dev __initdata = {
+ .c_vendor = "Rise",
+ .c_ident = { "RiseRiseRise" },
+ .c_models = {
+ { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
+ {
+ [0] = "iDragon",
+ [2] = "iDragon",
+ [8] = "iDragon II",
+ [9] = "iDragon II"
+ }
+ },
+ },
+ .c_init = init_rise,
+};
+
+int __init rise_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(rise_init_cpu);
diff --git a/xen/arch/x86/cpu/transmeta.c b/xen/arch/x86/cpu/transmeta.c
new file mode 100644
index 0000000000..c296006256
--- /dev/null
+++ b/xen/arch/x86/cpu/transmeta.c
@@ -0,0 +1,108 @@
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "cpu.h"
+
+static void __init init_transmeta(struct cpuinfo_x86 *c)
+{
+ unsigned int cap_mask, uk, max, dummy;
+ unsigned int cms_rev1, cms_rev2;
+ unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev;
+ char cpu_info[65];
+
+ get_model_name(c); /* Same as AMD/Cyrix */
+ display_cacheinfo(c);
+
+ /* Print CMS and CPU revision */
+ max = cpuid_eax(0x80860000);
+ cpu_rev = 0;
+ if ( max >= 0x80860001 ) {
+ cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
+ if (cpu_rev != 0x02000000) {
+ printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+ (cpu_rev >> 24) & 0xff,
+ (cpu_rev >> 16) & 0xff,
+ (cpu_rev >> 8) & 0xff,
+ cpu_rev & 0xff,
+ cpu_freq);
+ }
+ }
+ if ( max >= 0x80860002 ) {
+ cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
+ if (cpu_rev == 0x02000000) {
+ printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+ new_cpu_rev, cpu_freq);
+ }
+ printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+ (cms_rev1 >> 24) & 0xff,
+ (cms_rev1 >> 16) & 0xff,
+ (cms_rev1 >> 8) & 0xff,
+ cms_rev1 & 0xff,
+ cms_rev2);
+ }
+ if ( max >= 0x80860006 ) {
+ cpuid(0x80860003,
+ (void *)&cpu_info[0],
+ (void *)&cpu_info[4],
+ (void *)&cpu_info[8],
+ (void *)&cpu_info[12]);
+ cpuid(0x80860004,
+ (void *)&cpu_info[16],
+ (void *)&cpu_info[20],
+ (void *)&cpu_info[24],
+ (void *)&cpu_info[28]);
+ cpuid(0x80860005,
+ (void *)&cpu_info[32],
+ (void *)&cpu_info[36],
+ (void *)&cpu_info[40],
+ (void *)&cpu_info[44]);
+ cpuid(0x80860006,
+ (void *)&cpu_info[48],
+ (void *)&cpu_info[52],
+ (void *)&cpu_info[56],
+ (void *)&cpu_info[60]);
+ cpu_info[64] = '\0';
+ printk(KERN_INFO "CPU: %s\n", cpu_info);
+ }
+
+ /* Unhide possibly hidden capability flags */
+ rdmsr(0x80860004, cap_mask, uk);
+ wrmsr(0x80860004, ~0, uk);
+ c->x86_capability[0] = cpuid_edx(0x00000001);
+ wrmsr(0x80860004, cap_mask, uk);
+
+ /* If we can run i686 user-space code, call us an i686 */
+#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
+ if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
+ c->x86 = 6;
+}
+
+static void transmeta_identify(struct cpuinfo_x86 * c)
+{
+ u32 xlvl;
+ generic_identify(c);
+
+ /* Transmeta-defined flags: level 0x80860001 */
+ xlvl = cpuid_eax(0x80860000);
+ if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+ if ( xlvl >= 0x80860001 )
+ c->x86_capability[2] = cpuid_edx(0x80860001);
+ }
+}
+
+static struct cpu_dev transmeta_cpu_dev __initdata = {
+ .c_vendor = "Transmeta",
+ .c_ident = { "GenuineTMx86", "TransmetaCPU" },
+ .c_init = init_transmeta,
+ .c_identify = transmeta_identify,
+};
+
+int __init transmeta_init_cpu(void)
+{
+ cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
+ return 0;
+}
+
+//early_arch_initcall(transmeta_init_cpu);
diff --git a/xen/arch/x86/dmi_scan.c b/xen/arch/x86/dmi_scan.c
new file mode 100644
index 0000000000..74a487aade
--- /dev/null
+++ b/xen/arch/x86/dmi_scan.c
@@ -0,0 +1,493 @@
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/kernel.h>
+#include <xen/string.h>
+#include <xen/init.h>
+#include <xen/cache.h>
+#include <xen/acpi.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <xen/dmi.h>
+
+#define bt_ioremap(b,l) ((u8 *)__acpi_map_table(b,l))
+#define bt_iounmap(b,l) ((void)0)
+#define memcpy_fromio memcpy
+#define alloc_bootmem(l) xmalloc_bytes(l)
+
+int es7000_plat = 0;
+
+struct dmi_header
+{
+ u8 type;
+ u8 length;
+ u16 handle;
+};
+
+#undef DMI_DEBUG
+
+#ifdef DMI_DEBUG
+#define dmi_printk(x) printk x
+#else
+#define dmi_printk(x)
+#endif
+
+static char * __init dmi_string(struct dmi_header *dm, u8 s)
+{
+ char *bp=(char *)dm;
+ bp+=dm->length;
+ if(!s)
+ return "";
+ s--;
+ while(s>0 && *bp)
+ {
+ bp+=strlen(bp);
+ bp++;
+ s--;
+ }
+ return bp;
+}
+
+/*
+ * We have to be cautious here. We have seen BIOSes with DMI pointers
+ * pointing to completely the wrong place for example
+ */
+
+static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *))
+{
+ u8 *buf;
+ struct dmi_header *dm;
+ u8 *data;
+ int i=0;
+
+ buf = bt_ioremap(base, len);
+ if(buf==NULL)
+ return -1;
+
+ data = buf;
+
+ /*
+ * Stop when we see all the items the table claimed to have
+ * OR we run off the end of the table (also happens)
+ */
+
+ while(i<num && data-buf+sizeof(struct dmi_header)<=len)
+ {
+ dm=(struct dmi_header *)data;
+ /*
+ * We want to know the total length (formated area and strings)
+ * before decoding to make sure we won't run off the table in
+ * dmi_decode or dmi_string
+ */
+ data+=dm->length;
+ while(data-buf<len-1 && (data[0] || data[1]))
+ data++;
+ if(data-buf<len-1)
+ decode(dm);
+ data+=2;
+ i++;
+ }
+ bt_iounmap(buf, len);
+ return 0;
+}
+
+
+inline static int __init dmi_checksum(u8 *buf)
+{
+ u8 sum=0;
+ int a;
+
+ for(a=0; a<15; a++)
+ sum+=buf[a];
+ return (sum==0);
+}
+
+static int __init dmi_iterate(void (*decode)(struct dmi_header *))
+{
+ u8 buf[15];
+ char __iomem *p, *q;
+
+ /*
+ * no iounmap() for that ioremap(); it would be a no-op, but it's
+ * so early in setup that sucker gets confused into doing what
+ * it shouldn't if we actually call it.
+ */
+ p = ioremap(0xF0000, 0x10000);
+ if (p == NULL)
+ return -1;
+ for (q = p; q < p + 0x10000; q += 16) {
+ memcpy_fromio(buf, q, 15);
+ if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf))
+ {
+ u16 num=buf[13]<<8|buf[12];
+ u16 len=buf[7]<<8|buf[6];
+ u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
+
+ /*
+ * DMI version 0.0 means that the real version is taken from
+ * the SMBIOS version, which we don't know at this point.
+ */
+ if(buf[14]!=0)
+ printk(KERN_INFO "DMI %d.%d present.\n",
+ buf[14]>>4, buf[14]&0x0F);
+ else
+ printk(KERN_INFO "DMI present.\n");
+ dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
+ num, len));
+ dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
+ base));
+ if(dmi_table(base,len, num, decode)==0)
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static char *dmi_ident[DMI_STRING_MAX];
+
+/*
+ * Save a DMI string
+ */
+
+static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
+{
+ char *d = (char*)dm;
+ char *p = dmi_string(dm, d[string]);
+ if(p==NULL || *p == 0)
+ return;
+ if (dmi_ident[slot])
+ return;
+ dmi_ident[slot] = alloc_bootmem(strlen(p)+1);
+ if(dmi_ident[slot])
+ strcpy(dmi_ident[slot], p);
+ else
+ printk(KERN_ERR "dmi_save_ident: out of memory.\n");
+}
+
+/*
+ * Ugly compatibility crap.
+ */
+#define dmi_blacklist dmi_system_id
+#define NO_MATCH { DMI_NONE, NULL}
+#define MATCH DMI_MATCH
+
+/*
+ * Toshiba keyboard likes to repeat keys when they are not repeated.
+ */
+
+static __init int broken_toshiba_keyboard(struct dmi_blacklist *d)
+{
+ printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n");
+ return 0;
+}
+
+
+#ifdef CONFIG_ACPI_SLEEP
+static __init int reset_videomode_after_s3(struct dmi_blacklist *d)
+{
+ /* See acpi_wakeup.S */
+ extern long acpi_video_flags;
+ acpi_video_flags |= 2;
+ return 0;
+}
+#endif
+
+
+#ifdef CONFIG_ACPI_BOOT
+extern int acpi_force;
+
+static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: acpi off\n",d->ident);
+ disable_acpi();
+ } else {
+ printk(KERN_NOTICE
+ "Warning: DMI blacklist says broken, but acpi forced\n");
+ }
+ return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident);
+ disable_acpi();
+ acpi_ht = 1;
+ } else {
+ printk(KERN_NOTICE
+ "Warning: acpi=force overrules DMI blacklist: acpi=ht\n");
+ }
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ACPI_PCI
+static __init int disable_acpi_irq(struct dmi_blacklist *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+ d->ident);
+ acpi_noirq_set();
+ }
+ return 0;
+}
+static __init int disable_acpi_pci(struct dmi_blacklist *d)
+{
+ if (!acpi_force) {
+ printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+ d->ident);
+ acpi_disable_pci();
+ }
+ return 0;
+}
+#endif
+
+/*
+ * Process the DMI blacklists
+ */
+
+
+/*
+ * This will be expanded over time to force things like the APM
+ * interrupt mask settings according to the laptop
+ */
+
+static __initdata struct dmi_blacklist dmi_blacklist[]={
+
+ { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */
+ MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+ NO_MATCH, NO_MATCH, NO_MATCH
+ } },
+#ifdef CONFIG_ACPI_SLEEP
+ { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */
+ MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
+ NO_MATCH, NO_MATCH, NO_MATCH
+ } },
+#endif
+
+#ifdef CONFIG_ACPI_BOOT
+ /*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@sourceforge.net
+ */
+
+ /*
+ * Boxes that need ACPI disabled
+ */
+
+ { dmi_disable_acpi, "IBM Thinkpad", {
+ MATCH(DMI_BOARD_VENDOR, "IBM"),
+ MATCH(DMI_BOARD_NAME, "2629H1G"),
+ NO_MATCH, NO_MATCH }},
+
+ /*
+ * Boxes that need acpi=ht
+ */
+
+ { force_acpi_ht, "FSC Primergy T850", {
+ MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "DELL GX240", {
+ MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+ MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "HP VISUALIZE NT Workstation", {
+ MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "Compaq Workstation W8000", {
+ MATCH(DMI_SYS_VENDOR, "Compaq"),
+ MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "ASUS P4B266", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "P4B266"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "ASUS P2B-DS", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "P2B-DS"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "ASUS CUR-DLS", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "ABIT i440BX-W83977", {
+ MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+ MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "IBM Bladecenter", {
+ MATCH(DMI_BOARD_VENDOR, "IBM"),
+ MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "IBM eServer xSeries 360", {
+ MATCH(DMI_BOARD_VENDOR, "IBM"),
+ MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "IBM eserver xSeries 330", {
+ MATCH(DMI_BOARD_VENDOR, "IBM"),
+ MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+ NO_MATCH, NO_MATCH }},
+
+ { force_acpi_ht, "IBM eserver xSeries 440", {
+ MATCH(DMI_BOARD_VENDOR, "IBM"),
+ MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+ NO_MATCH, NO_MATCH }},
+
+#endif // CONFIG_ACPI_BOOT
+
+#ifdef CONFIG_ACPI_PCI
+ /*
+ * Boxes that need ACPI PCI IRQ routing disabled
+ */
+
+ { disable_acpi_irq, "ASUS A7V", {
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+ MATCH(DMI_BOARD_NAME, "<A7V>"),
+ /* newer BIOS, Revision 1011, does work */
+ MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"),
+ NO_MATCH }},
+
+ /*
+ * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+ */
+ { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */
+ MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ MATCH(DMI_BOARD_NAME, "PR-DLS"),
+ MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"),
+ MATCH(DMI_BIOS_DATE, "03/21/2003") }},
+
+ { disable_acpi_pci, "Acer TravelMate 36x Laptop", {
+ MATCH(DMI_SYS_VENDOR, "Acer"),
+ MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+ NO_MATCH, NO_MATCH
+ } },
+
+#endif
+
+ { NULL, }
+};
+
+/*
+ * Process a DMI table entry. Right now all we care about are the BIOS
+ * and machine entries. For 2.5 we should pull the smbus controller info
+ * out of here.
+ */
+
+static void __init dmi_decode(struct dmi_header *dm)
+{
+#ifdef DMI_DEBUG
+ u8 *data = (u8 *)dm;
+#endif
+
+ switch(dm->type)
+ {
+ case 0:
+ dmi_printk(("BIOS Vendor: %s\n",
+ dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
+ dmi_printk(("BIOS Version: %s\n",
+ dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
+ dmi_printk(("BIOS Release: %s\n",
+ dmi_string(dm, data[8])));
+ dmi_save_ident(dm, DMI_BIOS_DATE, 8);
+ break;
+ case 1:
+ dmi_printk(("System Vendor: %s\n",
+ dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
+ dmi_printk(("Product Name: %s\n",
+ dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
+ dmi_printk(("Version: %s\n",
+ dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
+ dmi_printk(("Serial Number: %s\n",
+ dmi_string(dm, data[7])));
+ break;
+ case 2:
+ dmi_printk(("Board Vendor: %s\n",
+ dmi_string(dm, data[4])));
+ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
+ dmi_printk(("Board Name: %s\n",
+ dmi_string(dm, data[5])));
+ dmi_save_ident(dm, DMI_BOARD_NAME, 5);
+ dmi_printk(("Board Version: %s\n",
+ dmi_string(dm, data[6])));
+ dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
+ break;
+ }
+}
+
+void __init dmi_scan_machine(void)
+{
+ int err = dmi_iterate(dmi_decode);
+ if(err == 0)
+ dmi_check_system(dmi_blacklist);
+ else
+ printk(KERN_INFO "DMI not present.\n");
+}
+
+
+/**
+ * dmi_check_system - check system DMI data
+ * @list: array of dmi_system_id structures to match against
+ *
+ * Walk the blacklist table running matching functions until someone
+ * returns non zero or we hit the end. Callback function is called for
+ * each successfull match. Returns the number of matches.
+ */
+int dmi_check_system(struct dmi_system_id *list)
+{
+ int i, count = 0;
+ struct dmi_system_id *d = list;
+
+ while (d->ident) {
+ for (i = 0; i < ARRAY_SIZE(d->matches); i++) {
+ int s = d->matches[i].slot;
+ if (s == DMI_NONE)
+ continue;
+ if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr))
+ continue;
+ /* No match */
+ goto fail;
+ }
+ if (d->callback && d->callback(d))
+ break;
+ count++;
+fail: d++;
+ }
+
+ return count;
+}
+
+EXPORT_SYMBOL(dmi_check_system);
+
+/**
+ * dmi_get_system_info - return DMI data value
+ * @field: data index (see enum dmi_filed)
+ *
+ * Returns one DMI data value, can be used to perform
+ * complex DMI data checks.
+ */
+char * dmi_get_system_info(int field)
+{
+ return dmi_ident[field];
+}
+
+EXPORT_SYMBOL(dmi_get_system_info);
diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c
index b6ade93ee7..e8979417ec 100644
--- a/xen/arch/x86/dom0_ops.c
+++ b/xen/arch/x86/dom0_ops.c
@@ -13,9 +13,8 @@
#include <public/dom0_ops.h>
#include <xen/sched.h>
#include <xen/event.h>
-#include <asm/domain_page.h>
+#include <xen/domain_page.h>
#include <asm/msr.h>
-#include <asm/pdb.h>
#include <xen/trace.h>
#include <xen/console.h>
#include <asm/shadow.h>
@@ -27,8 +26,6 @@
#define TRC_DOM0OP_ENTER_BASE 0x00020000
#define TRC_DOM0OP_LEAVE_BASE 0x00030000
-extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int);
-
static int msr_cpu_mask;
static unsigned long msr_addr;
static unsigned long msr_lo;
@@ -37,20 +34,20 @@ static unsigned long msr_hi;
static void write_msr_for(void *unused)
{
if (((1 << current->processor) & msr_cpu_mask))
- wrmsr(msr_addr, msr_lo, msr_hi);
+ (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
}
static void read_msr_for(void *unused)
{
if (((1 << current->processor) & msr_cpu_mask))
- rdmsr(msr_addr, msr_lo, msr_hi);
+ (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
}
long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
{
long ret = 0;
- if ( !IS_PRIV(current) )
+ if ( !IS_PRIV(current->domain) )
return -EPERM;
switch ( op->cmd )
@@ -137,10 +134,44 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
}
break;
- case DOM0_IOPL:
+ case DOM0_IOPORT_PERMISSION:
{
- extern long do_iopl(domid_t, unsigned int);
- ret = do_iopl(op->u.iopl.domain, op->u.iopl.iopl);
+ struct domain *d;
+ unsigned int fp = op->u.ioport_permission.first_port;
+ unsigned int np = op->u.ioport_permission.nr_ports;
+ unsigned int p;
+
+ ret = -EINVAL;
+ if ( (fp + np) >= 65536 )
+ break;
+
+ ret = -ESRCH;
+ if ( unlikely((d = find_domain_by_id(
+ op->u.ioport_permission.domain)) == NULL) )
+ break;
+
+ ret = -ENOMEM;
+ if ( d->arch.iobmp_mask != NULL )
+ {
+ if ( (d->arch.iobmp_mask = xmalloc_array(
+ u8, IOBMP_BYTES)) == NULL )
+ {
+ put_domain(d);
+ break;
+ }
+ memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
+ }
+
+ ret = 0;
+ for ( p = fp; p < (fp + np); p++ )
+ {
+ if ( op->u.ioport_permission.allow_access )
+ clear_bit(p, d->arch.iobmp_mask);
+ else
+ set_bit(p, d->arch.iobmp_mask);
+ }
+
+ put_domain(d);
}
break;
@@ -148,8 +179,8 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
{
dom0_physinfo_t *pi = &op->u.physinfo;
- pi->ht_per_core = opt_noht ? 1 : ht_per_core;
- pi->cores = smp_num_cpus / pi->ht_per_core;
+ pi->ht_per_core = ht_per_core;
+ pi->cores = num_online_cpus() / ht_per_core;
pi->total_pages = max_page;
pi->free_pages = avail_domheap_pages();
pi->cpu_khz = cpu_khz;
@@ -228,7 +259,7 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
break;
}
- l_arr = (unsigned long *)alloc_xenheap_page();
+ l_arr = alloc_xenheap_page();
ret = 0;
for( n = 0; n < num; )
@@ -293,12 +324,50 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
n += j;
}
- free_xenheap_page((unsigned long)l_arr);
+ free_xenheap_page(l_arr);
put_domain(d);
}
break;
+ case DOM0_GETMEMLIST:
+ {
+ int i;
+ struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
+ unsigned long max_pfns = op->u.getmemlist.max_pfns;
+ unsigned long pfn;
+ unsigned long *buffer = op->u.getmemlist.buffer;
+ struct list_head *list_ent;
+
+ ret = -EINVAL;
+ if ( d != NULL )
+ {
+ ret = 0;
+
+ spin_lock(&d->page_alloc_lock);
+ list_ent = d->page_list.next;
+ for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
+ {
+ pfn = list_entry(list_ent, struct pfn_info, list) -
+ frame_table;
+ if ( put_user(pfn, buffer) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ buffer++;
+ list_ent = frame_table[pfn].list.next;
+ }
+ spin_unlock(&d->page_alloc_lock);
+
+ op->u.getmemlist.num_pfns = i;
+ copy_to_user(u_dom0_op, op, sizeof(*op));
+
+ put_domain(d);
+ }
+ }
+ break;
+
default:
ret = -ENOSYS;
@@ -307,49 +376,39 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
return ret;
}
-void arch_getdomaininfo_ctxt(struct domain *d, full_execution_context_t *c)
+void arch_getdomaininfo_ctxt(
+ struct vcpu *v, struct vcpu_guest_context *c)
{
- int i;
+#ifdef __i386__ /* Remove when x86_64 VMX is implemented */
+#ifdef CONFIG_VMX
+ extern void save_vmx_cpu_user_regs(struct cpu_user_regs *);
+#endif
+#endif
+
+ memcpy(c, &v->arch.guest_context, sizeof(*c));
+
+ /* IOPL privileges are virtualised -- merge back into returned eflags. */
+ BUG_ON((c->user_regs.eflags & EF_IOPL) != 0);
+ c->user_regs.eflags |= v->arch.iopl << 12;
+
+#ifdef __i386__
+#ifdef CONFIG_VMX
+ if ( VMX_DOMAIN(v) )
+ save_vmx_cpu_user_regs(&c->user_regs);
+#endif
+#endif
c->flags = 0;
- memcpy(&c->cpu_ctxt,
- &d->thread.user_ctxt,
- sizeof(d->thread.user_ctxt));
- if ( test_bit(DF_DONEFPUINIT, &d->flags) )
- c->flags |= ECF_I387_VALID;
- memcpy(&c->fpu_ctxt,
- &d->thread.i387,
- sizeof(d->thread.i387));
- memcpy(&c->trap_ctxt,
- d->thread.traps,
- sizeof(d->thread.traps));
-#ifdef ARCH_HAS_FAST_TRAP
- if ( (d->thread.fast_trap_desc.a == 0) &&
- (d->thread.fast_trap_desc.b == 0) )
- c->fast_trap_idx = 0;
- else
- c->fast_trap_idx =
- d->thread.fast_trap_idx;
+ if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) )
+ c->flags |= VGCF_I387_VALID;
+ if ( KERNEL_MODE(v, &v->arch.guest_context.user_regs) )
+ c->flags |= VGCF_IN_KERNEL;
+#ifdef CONFIG_VMX
+ if (VMX_DOMAIN(v))
+ c->flags |= VGCF_VMX_GUEST;
#endif
- c->ldt_base = d->mm.ldt_base;
- c->ldt_ents = d->mm.ldt_ents;
- c->gdt_ents = 0;
- if ( GET_GDT_ADDRESS(d) == GDT_VIRT_START )
- {
- for ( i = 0; i < 16; i++ )
- c->gdt_frames[i] =
- l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
- c->gdt_ents = GET_GDT_ENTRIES(d);
- }
- c->guestos_ss = d->thread.guestos_ss;
- c->guestos_esp = d->thread.guestos_sp;
- c->pt_base =
- pagetable_val(d->mm.pagetable);
- memcpy(c->debugreg,
- d->thread.debugreg,
- sizeof(d->thread.debugreg));
- c->event_callback_cs = d->thread.event_selector;
- c->event_callback_eip = d->thread.event_address;
- c->failsafe_callback_cs = d->thread.failsafe_selector;
- c->failsafe_callback_eip = d->thread.failsafe_address;
+
+ c->pt_base = pagetable_get_paddr(v->arch.guest_table);
+
+ c->vm_assist = v->domain->vm_assist;
}
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index aeb2b0c547..169138667d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -19,6 +19,7 @@
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/softirq.h>
+#include <xen/grant_table.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
@@ -33,170 +34,151 @@
#include <asm/shadow.h>
#include <xen/console.h>
#include <xen/elf.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vmcs.h>
+#include <asm/msr.h>
+#include <asm/physdev.h>
+#include <xen/kernel.h>
+#include <public/io/ioreq.h>
#include <xen/multicall.h>
/* opt_noreboot: If true, machine will need manual reset on error. */
static int opt_noreboot = 0;
boolean_param("noreboot", opt_noreboot);
-#if !defined(CONFIG_X86_64BITMODE)
-/* No ring-3 access in initial page tables. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
-#else
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+struct percpu_ctxt {
+ struct vcpu *curr_vcpu;
+} __cacheline_aligned;
+static struct percpu_ctxt percpu_ctxt[NR_CPUS];
+
+static void continue_idle_task(struct vcpu *v)
+{
+ reset_stack_and_jump(idle_loop);
+}
-#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p) ((_p)&PAGE_MASK)
+static void continue_nonidle_task(struct vcpu *v)
+{
+ reset_stack_and_jump(ret_from_intr);
+}
static void default_idle(void)
{
- __cli();
+ local_irq_disable();
if ( !softirq_pending(smp_processor_id()) )
safe_halt();
else
- __sti();
+ local_irq_enable();
}
-static __attribute_used__ void idle_loop(void)
+void idle_loop(void)
{
int cpu = smp_processor_id();
+
for ( ; ; )
{
irq_stat[cpu].idle_timestamp = jiffies;
+
while ( !softirq_pending(cpu) )
{
page_scrub_schedule_work();
default_idle();
}
+
do_softirq();
}
}
void startup_cpu_idle_loop(void)
{
- /* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(current->id == IDLE_DOMAIN_ID);
- domain_unpause_by_systemcontroller(current);
- raise_softirq(SCHEDULE_SOFTIRQ);
- do_softirq();
+ struct vcpu *v = current;
- /*
- * Declares CPU setup done to the boot processor.
- * Therefore memory barrier to ensure state is visible.
- */
- smp_mb();
- init_idle();
+ ASSERT(is_idle_task(v->domain));
+ percpu_ctxt[smp_processor_id()].curr_vcpu = v;
+ cpu_set(smp_processor_id(), v->domain->cpumask);
+ v->arch.schedule_tail = continue_idle_task;
idle_loop();
}
static long no_idt[2];
static int reboot_mode;
-int reboot_thru_bios = 0;
-
-#ifdef CONFIG_SMP
-int reboot_smp = 0;
-static int reboot_cpu = -1;
-/* shamelessly grabbed from lib/vsprintf.c for readability */
-#define is_digit(c) ((c) >= '0' && (c) <= '9')
-#endif
-
static inline void kb_wait(void)
{
int i;
- for (i=0; i<0x10000; i++)
- if ((inb_p(0x64) & 0x02) == 0)
+ for ( i = 0; i < 0x10000; i++ )
+ if ( (inb_p(0x64) & 0x02) == 0 )
break;
}
-
void machine_restart(char * __unused)
{
-#ifdef CONFIG_SMP
- int cpuid;
-#endif
+ int i;
if ( opt_noreboot )
{
printk("Reboot disabled on cmdline: require manual reset\n");
- for ( ; ; ) __asm__ __volatile__ ("hlt");
+ for ( ; ; )
+ safe_halt();
}
-#ifdef CONFIG_SMP
- cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
- /* KAF: Need interrupts enabled for safe IPI. */
- __sti();
-
- if (reboot_smp) {
+ watchdog_disable();
+ console_start_sync();
- /* check to see if reboot_cpu is valid
- if its not, default to the BSP */
- if ((reboot_cpu == -1) ||
- (reboot_cpu > (NR_CPUS -1)) ||
- !(phys_cpu_present_map & (1<<cpuid)))
- reboot_cpu = boot_cpu_physical_apicid;
+ local_irq_enable();
- reboot_smp = 0; /* use this as a flag to only go through this once*/
- /* re-run this function on the other CPUs
- it will fall though this section since we have
- cleared reboot_smp, and do the reboot if it is the
- correct CPU, otherwise it halts. */
- if (reboot_cpu != cpuid)
- smp_call_function((void *)machine_restart , NULL, 1, 0);
+ /* Ensure we are the boot CPU. */
+ if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid )
+ {
+ smp_call_function((void *)machine_restart, NULL, 1, 0);
+ for ( ; ; )
+ safe_halt();
}
- /* if reboot_cpu is still -1, then we want a tradional reboot,
- and if we are not running on the reboot_cpu,, halt */
- if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
- for (;;)
- __asm__ __volatile__ ("hlt");
- }
/*
* Stop all CPUs and turn off local APICs and the IO-APIC, so
* other OSs see a clean IRQ state.
*/
smp_send_stop();
disable_IO_APIC();
+
+#ifdef CONFIG_VMX
+ stop_vmx();
#endif
- if(!reboot_thru_bios) {
- /* rebooting needs to touch the page at absolute addr 0 */
- *((unsigned short *)__va(0x472)) = reboot_mode;
- for (;;) {
- int i;
- for (i=0; i<100; i++) {
- kb_wait();
- udelay(50);
- outb(0xfe,0x64); /* pulse reset low */
- udelay(50);
- }
- /* That didn't work - force a triple fault.. */
- __asm__ __volatile__("lidt %0": "=m" (no_idt));
- __asm__ __volatile__("int3");
+ /* Rebooting needs to touch the page at absolute address 0. */
+ *((unsigned short *)__va(0x472)) = reboot_mode;
+
+ for ( ; ; )
+ {
+ /* Pulse the keyboard reset line. */
+ for ( i = 0; i < 100; i++ )
+ {
+ kb_wait();
+ udelay(50);
+ outb(0xfe,0x64); /* pulse reset low */
+ udelay(50);
}
- }
- panic("Need to reinclude BIOS reboot code\n");
+ /* That didn't work - force a triple fault.. */
+ __asm__ __volatile__("lidt %0": "=m" (no_idt));
+ __asm__ __volatile__("int3");
+ }
}
void __attribute__((noreturn)) __machine_halt(void *unused)
{
for ( ; ; )
- __asm__ __volatile__ ( "cli; hlt" );
+ safe_halt();
}
void machine_halt(void)
{
- smp_call_function(__machine_halt, NULL, 1, 1);
+ watchdog_disable();
+ console_start_sync();
+ smp_call_function(__machine_halt, NULL, 1, 0);
__machine_halt(NULL);
}
@@ -208,280 +190,665 @@ void dump_pageframe_info(struct domain *d)
{
list_for_each_entry ( page, &d->page_list, list )
{
- printk("Page %08x: caf=%08x, taf=%08x\n",
- page_to_phys(page), page->count_info,
+ printk("Page %p: caf=%08x, taf=%08x\n",
+ _p(page_to_phys(page)), page->count_info,
page->u.inuse.type_info);
}
}
+
+ list_for_each_entry ( page, &d->xenpage_list, list )
+ {
+ printk("XenPage %p: caf=%08x, taf=%08x\n",
+ _p(page_to_phys(page)), page->count_info,
+ page->u.inuse.type_info);
+ }
+
page = virt_to_page(d->shared_info);
- printk("Shared_info@%08x: caf=%08x, taf=%08x\n",
- page_to_phys(page), page->count_info,
+ printk("Shared_info@%p: caf=%08x, taf=%08x\n",
+ _p(page_to_phys(page)), page->count_info,
page->u.inuse.type_info);
}
-xmem_cache_t *domain_struct_cachep;
-void __init domain_startofday(void)
+struct vcpu *arch_alloc_vcpu_struct(void)
{
- domain_struct_cachep = xmem_cache_create(
- "domain_cache", sizeof(struct domain),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
- if ( domain_struct_cachep == NULL )
- panic("No slab cache for domain structs.");
+ return xmalloc(struct vcpu);
}
-struct domain *arch_alloc_domain_struct(void)
+void arch_free_vcpu_struct(struct vcpu *v)
{
- return xmem_cache_alloc(domain_struct_cachep);
+ xfree(v);
}
-void arch_free_domain_struct(struct domain *d)
+void free_perdomain_pt(struct domain *d)
{
- xmem_cache_free(domain_struct_cachep, d);
+ free_xenheap_page(d->arch.mm_perdomain_pt);
+#ifdef __x86_64__
+ free_xenheap_page(d->arch.mm_perdomain_l2);
+ free_xenheap_page(d->arch.mm_perdomain_l3);
+#endif
}
-void free_perdomain_pt(struct domain *d)
+void arch_do_createdomain(struct vcpu *v)
{
- free_xenheap_page((unsigned long)d->mm.perdomain_pt);
+ struct domain *d = v->domain;
+
+ v->arch.flags = TF_kernel_mode;
+
+ if ( is_idle_task(d) )
+ return;
+
+ v->arch.schedule_tail = continue_nonidle_task;
+
+ d->shared_info = alloc_xenheap_page();
+ memset(d->shared_info, 0, PAGE_SIZE);
+ v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+ v->cpumap = CPUMAP_RUNANYWHERE;
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
+ machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
+ PAGE_SHIFT] = INVALID_M2P_ENTRY;
+
+ d->arch.mm_perdomain_pt = alloc_xenheap_page();
+ memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
+ machine_to_phys_mapping[virt_to_phys(d->arch.mm_perdomain_pt) >>
+ PAGE_SHIFT] = INVALID_M2P_ENTRY;
+ v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
+ v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
+ l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+
+ v->arch.guest_vtable = __linear_l2_table;
+ v->arch.shadow_vtable = __shadow_linear_l2_table;
+
+#ifdef __x86_64__
+ v->arch.guest_vl3table = __linear_l3_table;
+ v->arch.guest_vl4table = __linear_l4_table;
+
+ d->arch.mm_perdomain_l2 = alloc_xenheap_page();
+ memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
+ d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
+ d->arch.mm_perdomain_l3 = alloc_xenheap_page();
+ memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
+ d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
+ l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
+ __PAGE_HYPERVISOR);
+#endif
+
+ (void)ptwr_init(d);
+
+ shadow_lock_init(d);
+ INIT_LIST_HEAD(&d->arch.free_shadow_frames);
}
-static void continue_idle_task(struct domain *d)
+void arch_do_boot_vcpu(struct vcpu *v)
{
- reset_stack_and_jump(idle_loop);
+ struct domain *d = v->domain;
+
+ v->arch.flags = TF_kernel_mode;
+
+ v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail;
+
+ v->arch.perdomain_ptes =
+ d->arch.mm_perdomain_pt + (v->vcpu_id << PDPT_VCPU_SHIFT);
+ v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
+ l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
}
-static void continue_nonidle_task(struct domain *d)
+#ifdef CONFIG_VMX
+void arch_vmx_do_resume(struct vcpu *v)
{
- reset_stack_and_jump(ret_from_intr);
+ u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
+
+ load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+ vmx_do_resume(v);
+ reset_stack_and_jump(vmx_asm_do_resume);
}
-void arch_do_createdomain(struct domain *d)
+void arch_vmx_do_launch(struct vcpu *v)
{
-#ifdef ARCH_HAS_FAST_TRAP
- SET_DEFAULT_FAST_TRAP(&d->thread);
-#endif
+ u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
- if ( d->id == IDLE_DOMAIN_ID )
- {
- d->thread.schedule_tail = continue_idle_task;
+ load_vmcs(&v->arch.arch_vmx, vmcs_phys_ptr);
+ vmx_do_launch(v);
+ reset_stack_and_jump(vmx_asm_do_launch);
+}
+
+static int vmx_final_setup_guest(
+ struct vcpu *v, struct vcpu_guest_context *ctxt)
+{
+ int error;
+ struct cpu_user_regs *regs;
+ struct vmcs_struct *vmcs;
+
+ regs = &ctxt->user_regs;
+
+ /*
+ * Create a new VMCS
+ */
+ if (!(vmcs = alloc_vmcs())) {
+ printk("Failed to create a new VMCS\n");
+ return -ENOMEM;
}
- else
+
+ memset(&v->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct));
+
+ v->arch.arch_vmx.vmcs = vmcs;
+ error = construct_vmcs(
+ &v->arch.arch_vmx, regs, ctxt, VMCS_USE_HOST_ENV);
+ if ( error < 0 )
{
- d->thread.schedule_tail = continue_nonidle_task;
-
- d->shared_info = (void *)alloc_xenheap_page();
- memset(d->shared_info, 0, PAGE_SIZE);
- d->shared_info->arch.mfn_to_pfn_start = m2p_start_mfn;
- SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
- machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
- PAGE_SHIFT] = INVALID_P2M_ENTRY;
-
- d->mm.perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
- memset(d->mm.perdomain_pt, 0, PAGE_SIZE);
- machine_to_phys_mapping[virt_to_phys(d->mm.perdomain_pt) >>
- PAGE_SHIFT] = INVALID_P2M_ENTRY;
+ printk("Failed to construct a new VMCS\n");
+ goto out;
}
+
+ v->arch.schedule_tail = arch_vmx_do_launch;
+
+#if defined (__i386)
+ v->arch.arch_vmx.vmx_platform.real_mode_data =
+ (unsigned long *) regs->esi;
+#endif
+
+ if (v == v->domain->vcpu[0]) {
+ /*
+ * Required to do this once per domain
+ * XXX todo: add a seperate function to do these.
+ */
+ memset(&v->domain->shared_info->evtchn_mask[0], 0xff,
+ sizeof(v->domain->shared_info->evtchn_mask));
+ clear_bit(IOPACKET_PORT, &v->domain->shared_info->evtchn_mask[0]);
+
+ /* Put the domain in shadow mode even though we're going to be using
+ * the shared 1:1 page table initially. It shouldn't hurt */
+ shadow_mode_enable(v->domain,
+ SHM_enable|SHM_refcounts|
+ SHM_translate|SHM_external);
+ }
+
+ return 0;
+
+out:
+ free_vmcs(vmcs);
+ v->arch.arch_vmx.vmcs = 0;
+ return error;
}
+#endif
-int arch_final_setup_guestos(struct domain *d, full_execution_context_t *c)
+
+/* This is called by arch_final_setup_guest and do_boot_vcpu */
+int arch_set_info_guest(
+ struct vcpu *v, struct vcpu_guest_context *c)
{
+ struct domain *d = v->domain;
unsigned long phys_basetab;
int i, rc;
- clear_bit(DF_DONEFPUINIT, &d->flags);
- if ( c->flags & ECF_I387_VALID )
- set_bit(DF_DONEFPUINIT, &d->flags);
-
- memcpy(&d->thread.user_ctxt,
- &c->cpu_ctxt,
- sizeof(d->thread.user_ctxt));
-
/*
* This is sufficient! If the descriptor DPL differs from CS RPL then we'll
* #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
* If SS RPL or DPL differs from CS RPL then we'll #GP.
*/
- if ( ((d->thread.user_ctxt.cs & 3) == 0) ||
- ((d->thread.user_ctxt.ss & 3) == 0) )
- return -EINVAL;
+ if ( !(c->flags & VGCF_VMX_GUEST) )
+ {
+ if ( ((c->user_regs.cs & 3) == 0) ||
+ ((c->user_regs.ss & 3) == 0) )
+ return -EINVAL;
+ }
- memcpy(&d->thread.i387,
- &c->fpu_ctxt,
- sizeof(d->thread.i387));
+ clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
+ if ( c->flags & VGCF_I387_VALID )
+ set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
- memcpy(d->thread.traps,
- &c->trap_ctxt,
- sizeof(d->thread.traps));
+ v->arch.flags &= ~TF_kernel_mode;
+ if ( c->flags & VGCF_IN_KERNEL )
+ v->arch.flags |= TF_kernel_mode;
-#ifdef ARCH_HAS_FAST_TRAP
- if ( (rc = (int)set_fast_trap(d, c->fast_trap_idx)) != 0 )
- return rc;
-#endif
+ memcpy(&v->arch.guest_context, c, sizeof(*c));
- d->mm.ldt_base = c->ldt_base;
- d->mm.ldt_ents = c->ldt_ents;
+ if ( !(c->flags & VGCF_VMX_GUEST) )
+ {
+ /* IOPL privileges are virtualised. */
+ v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
+ v->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
+
+ /* Ensure real hardware interrupts are enabled. */
+ v->arch.guest_context.user_regs.eflags |= EF_IE;
+ } else {
+ __vmwrite(GUEST_RFLAGS, v->arch.guest_context.user_regs.eflags);
+ if (v->arch.guest_context.user_regs.eflags & EF_TF)
+ __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ else
+ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+ }
- d->thread.guestos_ss = c->guestos_ss;
- d->thread.guestos_sp = c->guestos_esp;
+ if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ return 0;
+ memset(v->arch.guest_context.debugreg, 0,
+ sizeof(v->arch.guest_context.debugreg));
for ( i = 0; i < 8; i++ )
- (void)set_debugreg(d, i, c->debugreg[i]);
+ (void)set_debugreg(v, i, c->debugreg[i]);
+
+ if ( v->vcpu_id == 0 )
+ d->vm_assist = c->vm_assist;
- d->thread.event_selector = c->event_callback_cs;
- d->thread.event_address = c->event_callback_eip;
- d->thread.failsafe_selector = c->failsafe_callback_cs;
- d->thread.failsafe_address = c->failsafe_callback_eip;
-
phys_basetab = c->pt_base;
- d->mm.pagetable = mk_pagetable(phys_basetab);
- if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d,
- PGT_base_page_table) )
- return -EINVAL;
-
- /* Failure to set GDT is harmless. */
- SET_GDT_ENTRIES(d, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(d, DEFAULT_GDT_ADDRESS);
- if ( c->gdt_ents != 0 )
+ v->arch.guest_table = mk_pagetable(phys_basetab);
+
+ if ( shadow_mode_refcounts(d) )
{
- if ( (rc = (int)set_gdt(d, c->gdt_frames, c->gdt_ents)) != 0 )
- {
- put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
- return rc;
- }
+ if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
+ return -EINVAL;
+ }
+ else
+ {
+ if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d,
+ PGT_base_page_table) )
+ return -EINVAL;
}
+ if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
+ {
+ put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
+ return rc;
+ }
+
+#ifdef CONFIG_VMX
+ if ( c->flags & VGCF_VMX_GUEST )
+ {
+ int error;
+
+ // VMX uses the initially provided page tables as the P2M map.
+ //
+ // XXX: This creates a security issue -- Xen can't necessarily
+ // trust the VMX domain builder. Xen should validate this
+ // page table, and/or build the table itself, or ???
+ //
+ if ( !pagetable_get_paddr(d->arch.phys_table) )
+ d->arch.phys_table = v->arch.guest_table;
+
+ if ( (error = vmx_final_setup_guest(v, c)) )
+ return error;
+ }
+#endif
+
+ update_pagetables(v);
+
+ /* Don't redo final setup */
+ set_bit(_VCPUF_initialised, &v->vcpu_flags);
+
return 0;
}
-#if defined(__i386__)
-void new_thread(struct domain *d,
+void new_thread(struct vcpu *d,
unsigned long start_pc,
unsigned long start_stack,
unsigned long start_info)
{
- execution_context_t *ec = &d->thread.user_ctxt;
+ struct cpu_user_regs *regs = &d->arch.guest_context.user_regs;
/*
* Initial register values:
- * DS,ES,FS,GS = FLAT_RING1_DS
- * CS:EIP = FLAT_RING1_CS:start_pc
- * SS:ESP = FLAT_RING1_DS:start_stack
+ * DS,ES,FS,GS = FLAT_KERNEL_DS
+ * CS:EIP = FLAT_KERNEL_CS:start_pc
+ * SS:ESP = FLAT_KERNEL_SS:start_stack
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
- ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
- ec->cs = FLAT_RING1_CS;
- ec->eip = start_pc;
- ec->esp = start_stack;
- ec->esi = start_info;
-
- __save_flags(ec->eflags);
- ec->eflags |= X86_EFLAGS_IF;
+ regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
+ regs->ss = FLAT_KERNEL_SS;
+ regs->cs = FLAT_KERNEL_CS;
+ regs->eip = start_pc;
+ regs->esp = start_stack;
+ regs->esi = start_info;
+
+ __save_flags(regs->eflags);
+ regs->eflags |= X86_EFLAGS_IF;
}
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
- __asm__("movl %0,%%db" #register \
- : /* no output */ \
- :"r" (thread->debugreg[register]))
+#ifdef __x86_64__
+void toggle_guest_mode(struct vcpu *v)
+{
+ v->arch.flags ^= TF_kernel_mode;
+ __asm__ __volatile__ ( "swapgs" );
+ update_pagetables(v);
+ write_ptbase(v);
+}
-void switch_to(struct domain *prev_p, struct domain *next_p)
+#define loadsegment(seg,value) ({ \
+ int __r = 1; \
+ __asm__ __volatile__ ( \
+ "1: movl %k1,%%" #seg "\n2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: xorl %k0,%k0\n" \
+ " movl %k0,%%" #seg "\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 8\n" \
+ " .quad 1b,3b\n" \
+ ".previous" \
+ : "=r" (__r) : "r" (value), "0" (__r) );\
+ __r; })
+
+static void load_segments(struct vcpu *p, struct vcpu *n)
{
- struct thread_struct *next = &next_p->thread;
- struct tss_struct *tss = init_tss + smp_processor_id();
- execution_context_t *stack_ec = get_execution_context();
- int i;
-
- __cli();
+ struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+ struct vcpu_guest_context *nctxt = &n->arch.guest_context;
+ int all_segs_okay = 1;
+
+ /* Either selector != 0 ==> reload. */
+ if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+ all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
- /* Switch guest general-register state. */
- if ( !is_idle_task(prev_p) )
+ /* Either selector != 0 ==> reload. */
+ if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+ all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
+
+ /*
+ * Either selector != 0 ==> reload.
+ * Also reload to reset FS_BASE if it was non-zero.
+ */
+ if ( unlikely(pctxt->user_regs.fs |
+ pctxt->fs_base |
+ nctxt->user_regs.fs) )
+ {
+ all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
+ if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
+ pctxt->fs_base = 0;
+ }
+
+ /*
+ * Either selector != 0 ==> reload.
+ * Also reload to reset GS_BASE if it was non-zero.
+ */
+ if ( unlikely(pctxt->user_regs.gs |
+ pctxt->gs_base_user |
+ nctxt->user_regs.gs) )
{
- memcpy(&prev_p->thread.user_ctxt,
- stack_ec,
- sizeof(*stack_ec));
- unlazy_fpu(prev_p);
- CLEAR_FAST_TRAP(&prev_p->thread);
+ /* Reset GS_BASE with user %gs? */
+ if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+ all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
+ if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
+ pctxt->gs_base_user = 0;
}
- if ( !is_idle_task(next_p) )
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->fs_base )
+ wrmsr(MSR_FS_BASE,
+ nctxt->fs_base,
+ nctxt->fs_base>>32);
+
+ /* Most kernels have non-zero GS base, so don't bother testing. */
+ /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
+ wrmsr(MSR_SHADOW_GS_BASE,
+ nctxt->gs_base_kernel,
+ nctxt->gs_base_kernel>>32);
+
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->gs_base_user )
+ wrmsr(MSR_GS_BASE,
+ nctxt->gs_base_user,
+ nctxt->gs_base_user>>32);
+
+ /* If in kernel mode then switch the GS bases around. */
+ if ( n->arch.flags & TF_kernel_mode )
+ __asm__ __volatile__ ( "swapgs" );
+
+ if ( unlikely(!all_segs_okay) )
+ {
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ unsigned long *rsp =
+ (n->arch.flags & TF_kernel_mode) ?
+ (unsigned long *)regs->rsp :
+ (unsigned long *)nctxt->kernel_sp;
+
+ if ( !(n->arch.flags & TF_kernel_mode) )
+ toggle_guest_mode(n);
+ else
+ regs->cs &= ~3;
+
+ if ( put_user(regs->ss, rsp- 1) |
+ put_user(regs->rsp, rsp- 2) |
+ put_user(regs->rflags, rsp- 3) |
+ put_user(regs->cs, rsp- 4) |
+ put_user(regs->rip, rsp- 5) |
+ put_user(nctxt->user_regs.gs, rsp- 6) |
+ put_user(nctxt->user_regs.fs, rsp- 7) |
+ put_user(nctxt->user_regs.es, rsp- 8) |
+ put_user(nctxt->user_regs.ds, rsp- 9) |
+ put_user(regs->r11, rsp-10) |
+ put_user(regs->rcx, rsp-11) )
+ {
+ DPRINTK("Error while creating failsafe callback frame.\n");
+ domain_crash();
+ }
+
+ regs->entry_vector = TRAP_syscall;
+ regs->rflags &= 0xFFFCBEFFUL;
+ regs->ss = __GUEST_SS;
+ regs->rsp = (unsigned long)(rsp-11);
+ regs->cs = __GUEST_CS;
+ regs->rip = nctxt->failsafe_callback_eip;
+ }
+}
+
+static void save_segments(struct vcpu *v)
+{
+ struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+ __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (regs->ds) );
+ __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
+ __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
+ __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
+}
+
+static void clear_segments(void)
+{
+ __asm__ __volatile__ (
+ " movl %0,%%ds; "
+ " movl %0,%%es; "
+ " movl %0,%%fs; "
+ " movl %0,%%gs; "
+ ""safe_swapgs" "
+ " movl %0,%%gs"
+ : : "r" (0) );
+}
+
+long do_switch_to_user(void)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ struct switch_to_user stu;
+ struct vcpu *v = current;
+
+ if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
+ unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
+ return -EFAULT;
+
+ toggle_guest_mode(v);
+
+ regs->rip = stu.rip;
+ regs->cs = stu.cs | 3; /* force guest privilege */
+ regs->rflags = stu.rflags;
+ regs->rsp = stu.rsp;
+ regs->ss = stu.ss | 3; /* force guest privilege */
+
+ if ( !(stu.flags & VGCF_IN_SYSCALL) )
{
- memcpy(stack_ec,
- &next_p->thread.user_ctxt,
- sizeof(*stack_ec));
+ regs->entry_vector = 0;
+ regs->r11 = stu.r11;
+ regs->rcx = stu.rcx;
+ }
+
+ /* Saved %rax gets written back to regs->rax in entry.S. */
+ return stu.rax;
+}
+
+#define switch_kernel_stack(_n,_c) ((void)0)
+
+#elif defined(__i386__)
+
+#define load_segments(_p, _n) ((void)0)
+#define save_segments(_p) ((void)0)
+#define clear_segments() ((void)0)
+
+static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
+{
+ struct tss_struct *tss = &init_tss[cpu];
+ tss->esp1 = n->arch.guest_context.kernel_sp;
+ tss->ss1 = n->arch.guest_context.kernel_ss;
+}
+
+#endif
- SET_FAST_TRAP(&next_p->thread);
+#define loaddebug(_v,_reg) \
+ __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
- /* Switch the guest OS ring-1 stack. */
- tss->esp1 = next->guestos_sp;
- tss->ss1 = next->guestos_ss;
+static void __context_switch(void)
+{
+ struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
+ struct vcpu *n = current;
+
+ if ( !is_idle_task(p->domain) )
+ {
+ memcpy(&p->arch.guest_context.user_regs,
+ stack_regs,
+ CTXT_SWITCH_STACK_BYTES);
+ unlazy_fpu(p);
+ save_segments(p);
+ }
+
+ if ( !is_idle_task(n->domain) )
+ {
+ memcpy(stack_regs,
+ &n->arch.guest_context.user_regs,
+ CTXT_SWITCH_STACK_BYTES);
/* Maybe switch the debug registers. */
- if ( unlikely(next->debugreg[7]) )
+ if ( unlikely(n->arch.guest_context.debugreg[7]) )
{
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
+ loaddebug(&n->arch.guest_context, 0);
+ loaddebug(&n->arch.guest_context, 1);
+ loaddebug(&n->arch.guest_context, 2);
+ loaddebug(&n->arch.guest_context, 3);
/* no 4 and 5 */
- loaddebug(next, 6);
- loaddebug(next, 7);
+ loaddebug(&n->arch.guest_context, 6);
+ loaddebug(&n->arch.guest_context, 7);
}
- /* Switch page tables. */
- write_ptbase(&next_p->mm);
+ if ( !VMX_DOMAIN(n) )
+ {
+ set_int80_direct_trap(n);
+ switch_kernel_stack(n, cpu);
+ }
}
- if ( unlikely(prev_p->thread.io_bitmap != NULL) )
+ if ( p->domain != n->domain )
+ cpu_set(cpu, n->domain->cpumask);
+
+ write_ptbase(n);
+
+ if ( p->vcpu_id != n->vcpu_id )
{
- for ( i = 0; i < sizeof(prev_p->thread.io_bitmap_sel) * 8; i++ )
- if ( !test_bit(i, &prev_p->thread.io_bitmap_sel) )
- memset(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
- ~0U, IOBMP_BYTES_PER_SELBIT);
- tss->bitmap = IOBMP_INVALID_OFFSET;
+ char gdt_load[10];
+ *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
+ *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
+ __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
}
- if ( unlikely(next_p->thread.io_bitmap != NULL) )
+ if ( p->domain != n->domain )
+ cpu_clear(cpu, p->domain->cpumask);
+
+ percpu_ctxt[cpu].curr_vcpu = n;
+}
+
+
+void context_switch(struct vcpu *prev, struct vcpu *next)
+{
+ struct vcpu *realprev;
+
+ local_irq_disable();
+
+ set_current(next);
+
+ if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) ||
+ is_idle_task(next->domain) )
{
- for ( i = 0; i < sizeof(next_p->thread.io_bitmap_sel) * 8; i++ )
- if ( !test_bit(i, &next_p->thread.io_bitmap_sel) )
- memcpy(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
- &next_p->thread.io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
- IOBMP_BYTES_PER_SELBIT);
- tss->bitmap = IOBMP_OFFSET;
+ local_irq_enable();
}
+ else
+ {
+ __context_switch();
- set_current(next_p);
+ local_irq_enable();
+
+ if ( !VMX_DOMAIN(next) )
+ {
+ load_LDT(next);
+ load_segments(realprev, next);
+ }
+ }
- /* Switch GDT and LDT. */
- __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
- load_LDT(next_p);
+ /*
+ * We do this late on because it doesn't need to be protected by the
+ * schedule_lock, and because we want this to be the very last use of
+ * 'prev' (after this point, a dying domain's info structure may be freed
+ * without warning).
+ */
+ clear_bit(_VCPUF_running, &prev->vcpu_flags);
- __sti();
+ schedule_tail(next);
+ BUG();
}
+void continue_running(struct vcpu *same)
+{
+ schedule_tail(same);
+ BUG();
+}
-/* XXX Currently the 'domain' field is ignored! XXX */
-long do_iopl(domid_t domain, unsigned int new_io_pl)
+int __sync_lazy_execstate(void)
{
- execution_context_t *ec = get_execution_context();
- ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
- return 0;
+ if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
+ return 0;
+ __context_switch();
+ load_LDT(current);
+ clear_segments();
+ return 1;
}
-#endif
+void sync_lazy_execstate_cpu(unsigned int cpu)
+{
+ if ( cpu == smp_processor_id() )
+ (void)__sync_lazy_execstate();
+ else
+ flush_tlb_mask(cpumask_of_cpu(cpu));
+}
+
+void sync_lazy_execstate_mask(cpumask_t mask)
+{
+ if ( cpu_isset(smp_processor_id(), mask) )
+ (void)__sync_lazy_execstate();
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(mask);
+}
+
+void sync_lazy_execstate_all(void)
+{
+ __sync_lazy_execstate();
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
+ flush_tlb_mask(cpu_online_map);
+}
-unsigned long hypercall_create_continuation(
+unsigned long __hypercall_create_continuation(
unsigned int op, unsigned int nr_args, ...)
{
struct mc_state *mcs = &mc_state[smp_processor_id()];
- execution_context_t *ec;
- unsigned long *preg;
+ struct cpu_user_regs *regs;
unsigned int i;
va_list args;
@@ -496,15 +863,39 @@ unsigned long hypercall_create_continuation(
}
else
{
- ec = get_execution_context();
+ regs = guest_cpu_user_regs();
#if defined(__i386__)
- ec->eax = op;
- ec->eip -= 2; /* re-execute 'int 0x82' */
+ regs->eax = op;
+ regs->eip -= 2; /* re-execute 'int 0x82' */
- for ( i = 0, preg = &ec->ebx; i < nr_args; i++, preg++ )
- *preg = va_arg(args, unsigned long);
-#else
- preg = NULL; /* XXX x86/64 */
+ for ( i = 0; i < nr_args; i++ )
+ {
+ switch ( i )
+ {
+ case 0: regs->ebx = va_arg(args, unsigned long); break;
+ case 1: regs->ecx = va_arg(args, unsigned long); break;
+ case 2: regs->edx = va_arg(args, unsigned long); break;
+ case 3: regs->esi = va_arg(args, unsigned long); break;
+ case 4: regs->edi = va_arg(args, unsigned long); break;
+ case 5: regs->ebp = va_arg(args, unsigned long); break;
+ }
+ }
+#elif defined(__x86_64__)
+ regs->rax = op;
+ regs->rip -= 2; /* re-execute 'syscall' */
+
+ for ( i = 0; i < nr_args; i++ )
+ {
+ switch ( i )
+ {
+ case 0: regs->rdi = va_arg(args, unsigned long); break;
+ case 1: regs->rsi = va_arg(args, unsigned long); break;
+ case 2: regs->rdx = va_arg(args, unsigned long); break;
+ case 3: regs->r10 = va_arg(args, unsigned long); break;
+ case 4: regs->r8 = va_arg(args, unsigned long); break;
+ case 5: regs->r9 = va_arg(args, unsigned long); break;
+ }
+ }
#endif
}
@@ -513,7 +904,24 @@ unsigned long hypercall_create_continuation(
return op;
}
-static void relinquish_list(struct domain *d, struct list_head *list)
+#ifdef CONFIG_VMX
+static void vmx_relinquish_resources(struct vcpu *v)
+{
+ if ( !VMX_DOMAIN(v) )
+ return;
+
+ BUG_ON(v->arch.arch_vmx.vmcs == NULL);
+ free_vmcs(v->arch.arch_vmx.vmcs);
+ v->arch.arch_vmx.vmcs = 0;
+
+ free_monitor_pagetable(v);
+ rem_ac_timer(&v->arch.arch_vmx.vmx_platform.vmx_pit.pit_timer);
+}
+#else
+#define vmx_relinquish_resources(_v) ((void)0)
+#endif
+
+static void relinquish_memory(struct domain *d, struct list_head *list)
{
struct list_head *ent;
struct pfn_info *page;
@@ -571,394 +979,66 @@ static void relinquish_list(struct domain *d, struct list_head *list)
spin_unlock_recursive(&d->page_alloc_lock);
}
-
-void domain_relinquish_memory(struct domain *d)
+void domain_relinquish_resources(struct domain *d)
{
- /* Ensure that noone is running over the dead domain's page tables. */
- synchronise_pagetables(~0UL);
-
- /* Exit shadow mode before deconstructing final guest page table. */
- shadow_mode_disable(d);
-
- /* Drop the in-use reference to the page-table base. */
- if ( pagetable_val(d->mm.pagetable) != 0 )
- {
- put_page_and_type(&frame_table[pagetable_val(d->mm.pagetable) >>
- PAGE_SHIFT]);
- d->mm.pagetable = mk_pagetable(0);
- }
-
- /*
- * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
- * it automatically gets squashed when the guest's mappings go away.
- */
- destroy_gdt(d);
-
- /* Relinquish every page of memory. */
- relinquish_list(d, &d->xenpage_list);
- relinquish_list(d, &d->page_list);
-}
-
-
-int construct_dom0(struct domain *p,
- unsigned long alloc_start,
- unsigned long alloc_end,
- char *image_start, unsigned long image_len,
- char *initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
- unsigned long nr_pt_pages;
- unsigned long count;
- l2_pgentry_t *l2tab, *l2start;
- l1_pgentry_t *l1tab = NULL, *l1start = NULL;
- struct pfn_info *page = NULL;
- start_info_t *si;
-
- /*
- * This fully describes the memory layout of the initial domain. All
- * *_start address are page-aligned, except v_start (and v_end) which are
- * superpage-aligned.
- */
- struct domain_setup_info dsi;
- unsigned long vinitrd_start;
- unsigned long vinitrd_end;
- unsigned long vphysmap_start;
- unsigned long vphysmap_end;
- unsigned long vstartinfo_start;
- unsigned long vstartinfo_end;
- unsigned long vstack_start;
- unsigned long vstack_end;
- unsigned long vpt_start;
- unsigned long vpt_end;
- unsigned long v_end;
-
- /* Machine address of next candidate page-table page. */
- unsigned long mpt_alloc;
-
- extern void physdev_init_dom0(struct domain *);
-
- /* Sanity! */
- if ( p->id != 0 )
- BUG();
- if ( test_bit(DF_CONSTRUCTED, &p->flags) )
- BUG();
-
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- /*
- * This is all a bit grim. We've moved the modules to the "safe" physical
- * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
- * routine we're going to copy it down into the region that's actually
- * been allocated to domain 0. This is highly likely to be overlapping, so
- * we use a forward copy.
- *
- * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
- * 4GB and lots of network/disk cards that allocate loads of buffers.
- * We'll have to revisit this if we ever support PAE (64GB).
- */
+ struct vcpu *v;
- rc = parseelfimage(image_start, image_len, &dsi);
- if ( rc != 0 )
- return rc;
-
- if (dsi.load_bsd_symtab)
- loadelfsymtab(image_start, 0, &dsi);
-
- /* Set up domain options */
- if ( dsi.use_writable_pagetables )
- vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+ BUG_ON(!cpus_empty(d->cpumask));
- if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
- {
- printk("Initial guest OS must load to a page boundary.\n");
- return -EINVAL;
- }
+ physdev_destroy_state(d);
- /*
- * Why do we need this? The number of page-table frames depends on the
- * size of the bootstrap address space. But the size of the address space
- * depends on the number of page-table frames (since each one is mapped
- * read-only). We have a pair of simultaneous equations in two unknowns,
- * which we solve by exhaustive search.
- */
- vinitrd_start = round_pgup(dsi.v_end);
- vinitrd_end = vinitrd_start + initrd_len;
- vphysmap_start = round_pgup(vinitrd_end);
- vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
- vpt_start = round_pgup(vphysmap_end);
- for ( nr_pt_pages = 2; ; nr_pt_pages++ )
- {
- vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
- vstartinfo_start = vpt_end;
- vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
- vstack_end = vstack_start + PAGE_SIZE;
- v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
- if ( (v_end - vstack_end) < (512 << 10) )
- v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
- if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
- L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
- break;
- }
+ ptwr_destroy(d);
- printk("PHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %p->%p\n"
- " Initrd image: %p->%p\n"
- " Dom0 alloc.: %08lx->%08lx\n",
- image_start, image_start + image_len,
- initrd_start, initrd_start + initrd_len,
- alloc_start, alloc_end);
- printk("VIRTUAL MEMORY ARRANGEMENT:\n"
- " Loaded kernel: %08lx->%08lx\n"
- " Init. ramdisk: %08lx->%08lx\n"
- " Phys-Mach map: %08lx->%08lx\n"
- " Page tables: %08lx->%08lx\n"
- " Start info: %08lx->%08lx\n"
- " Boot stack: %08lx->%08lx\n"
- " TOTAL: %08lx->%08lx\n",
- dsi.v_kernstart, dsi.v_kernend,
- vinitrd_start, vinitrd_end,
- vphysmap_start, vphysmap_end,
- vpt_start, vpt_end,
- vstartinfo_start, vstartinfo_end,
- vstack_start, vstack_end,
- dsi.v_start, v_end);
- printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
-
- if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
+ /* Release device mappings of other domains */
+ gnttab_release_dev_mappings(d->grant_table);
- /*
- * Protect the lowest 1GB of memory. We use a temporary mapping there
- * from which we copy the kernel and ramdisk images.
- */
- if ( dsi.v_start < (1<<30) )
+ /* Drop the in-use references to page-table bases. */
+ for_each_vcpu ( d, v )
{
- printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
- return -EINVAL;
- }
-
- /* Paranoia: scrub DOM0's memory allocation. */
- printk("Scrubbing DOM0 RAM: ");
- dst = (char *)alloc_start;
- while ( dst < (char *)alloc_end )
- {
-#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
- printk(".");
- touch_nmi_watchdog();
- if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
- {
- memset(dst, 0, SCRUB_BYTES);
- dst += SCRUB_BYTES;
- }
- else
+ if ( pagetable_get_paddr(v->arch.guest_table) != 0 )
{
- memset(dst, 0, (char *)alloc_end - dst);
- break;
- }
- }
- printk("done.\n");
+ if ( shadow_mode_refcounts(d) )
+ put_page(&frame_table[pagetable_get_pfn(v->arch.guest_table)]);
+ else
+ put_page_and_type(&frame_table[pagetable_get_pfn(v->arch.guest_table)]);
- /* Construct a frame-allocation list for the initial domain. */
- p->max_pages = ~0U;
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page->u.inuse.domain = p;
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &p->page_list);
- p->tot_pages++;
- }
-
- mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
-
- SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
-
- /*
- * We're basically forcing default RPLs to 1, so that our "what privilege
- * level are we returning to?" logic works.
- */
- p->thread.failsafe_selector = FLAT_GUESTOS_CS;
- p->thread.event_selector = FLAT_GUESTOS_CS;
- p->thread.guestos_ss = FLAT_GUESTOS_DS;
- for ( i = 0; i < 256; i++ )
- p->thread.traps[i].cs = FLAT_GUESTOS_CS;
-
- /* WARNING: The new domain must have its 'processor' field filled in! */
- l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
- l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(p->mm.perdomain_pt) | __PAGE_HYPERVISOR);
- p->mm.pagetable = mk_pagetable((unsigned long)l2start);
-
- l2tab += l2_table_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
- for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
- {
- if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
- {
- l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
- mpt_alloc += PAGE_SIZE;
- *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
- clear_page(l1tab);
- if ( count == 0 )
- l1tab += l1_table_offset(dsi.v_start);
+ v->arch.guest_table = mk_pagetable(0);
}
- *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
-
- page = &frame_table[mfn];
- if ( !get_page_and_type(page, p, PGT_writable_page) )
- BUG();
- mfn++;
- }
-
- /* Pages that are part of page tables must be read only. */
- l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
- l1tab += l1_table_offset(vpt_start);
- l2tab++;
- for ( count = 0; count < nr_pt_pages; count++ )
- {
- *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
- page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
- if ( count == 0 )
- {
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l2_page_table;
-
- /*
- * No longer writable: decrement the type_count.
- * Installed as CR3: increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, p); /* an extra ref because of readable mapping */
-
- /* Get another ref to L2 page so that it can be pinned. */
- if ( !get_page_and_type(page, p, PGT_l2_page_table) )
- BUG();
- set_bit(_PGT_pinned, &page->u.inuse.type_info);
- }
- else
+ if ( pagetable_get_paddr(v->arch.guest_table_user) != 0 )
{
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l1_page_table;
- page->u.inuse.type_info |=
- ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
-
- /*
- * No longer writable: decrement the type_count.
- * This is an L1 page, installed in a validated L2 page:
- * increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, p); /* an extra ref because of readable mapping */
- }
- l1tab++;
- if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
- }
-
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
-
- /* Install the new page tables. */
- __cli();
- write_ptbase(&p->mm);
-
- /* Copy the OS image. */
- (void)loadelfimage(image_start);
-
- if (dsi.load_bsd_symtab)
- loadelfsymtab(image_start, 1, &dsi);
-
- /* Copy the initial ramdisk. */
- if ( initrd_len != 0 )
- memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
- /* Set up start info area. */
- si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = p->tot_pages;
- si->shared_info = virt_to_phys(p->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- si->pt_base = vpt_start;
- si->nr_pt_frames = nr_pt_pages;
- si->mfn_list = vphysmap_start;
-
- /* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < p->tot_pages; pfn++ )
- {
- mfn = pfn + (alloc_start>>PAGE_SHIFT);
-#ifndef NDEBUG
-#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
- if ( pfn > REVERSE_START )
- mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
-#endif
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
- machine_to_phys_mapping[mfn] = pfn;
- }
-
- if ( initrd_len != 0 )
- {
- si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%08lx\n",
- si->mod_len, si->mod_start);
- }
+ if ( shadow_mode_refcounts(d) )
+ put_page(&frame_table[pagetable_get_pfn(v->arch.guest_table_user)]);
+ else
+ put_page_and_type(&frame_table[pagetable_get_pfn(v->arch.guest_table_user)]);
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
+ v->arch.guest_table_user = mk_pagetable(0);
}
- }
- *dst = '\0';
- /* Reinstate the caller's page tables. */
- write_ptbase(&current->mm);
- __sti();
+ vmx_relinquish_resources(v);
+ }
- /* Destroy low mappings - they were only for our convenience. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
- l2start[i] = mk_l2_pgentry(0);
- zap_low_mappings(); /* Do the same for the idle page tables. */
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(p);
+ shadow_mode_disable(d);
- set_bit(DF_CONSTRUCTED, &p->flags);
+ /*
+ * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
+ * it automatically gets squashed when the guest's mappings go away.
+ */
+ for_each_vcpu(d, v)
+ destroy_gdt(v);
- new_thread(p, dsi.v_kernentry, vstack_end, vstartinfo_start);
+ /* Relinquish every page of memory. */
+ relinquish_memory(d, &d->xenpage_list);
+ relinquish_memory(d, &d->page_list);
+}
-#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
- shadow_lock(&p->mm);
- shadow_mode_enable(p, SHM_test);
- shadow_unlock(&p->mm);
-#endif
- return 0;
-}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
new file mode 100644
index 0000000000..78dea289e6
--- /dev/null
+++ b/xen/arch/x86/domain_build.c
@@ -0,0 +1,691 @@
+/******************************************************************************
+ * domain_build.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/ctype.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+#include <asm/regs.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/shadow.h>
+
+/* opt_dom0_mem: memory allocated to domain 0. */
+static unsigned int opt_dom0_mem;
+static void parse_dom0_mem(char *s)
+{
+ unsigned long long bytes = parse_size_and_unit(s);
+ /* If no unit is specified we default to kB units, not bytes. */
+ if ( isdigit(s[strlen(s)-1]) )
+ opt_dom0_mem = (unsigned int)bytes;
+ else
+ opt_dom0_mem = (unsigned int)(bytes >> 10);
+}
+custom_param("dom0_mem", parse_dom0_mem);
+
+static unsigned int opt_dom0_shadow = 0;
+boolean_param("dom0_shadow", opt_dom0_shadow);
+
+static unsigned int opt_dom0_translate = 0;
+boolean_param("dom0_translate", opt_dom0_translate);
+
+#if defined(__i386__)
+/* No ring-3 access in initial leaf page tables. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
+#elif defined(__x86_64__)
+/* Allow ring-3 access in long mode as guest cannot use ring 1. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+{
+ struct pfn_info *page;
+ unsigned int order = get_order(max * PAGE_SIZE);
+ if ( (max & (max-1)) != 0 )
+ order--;
+ while ( (page = alloc_domheap_pages(d, order)) == NULL )
+ if ( order-- == 0 )
+ break;
+ return page;
+}
+
+int construct_dom0(struct domain *d,
+ unsigned long _image_start, unsigned long image_len,
+ unsigned long _initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ int i, rc, dom0_pae, xen_pae;
+ unsigned long pfn, mfn;
+ unsigned long nr_pages;
+ unsigned long nr_pt_pages;
+ unsigned long alloc_start;
+ unsigned long alloc_end;
+ unsigned long count;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct vcpu *v = d->vcpu[0];
+#if defined(__i386__)
+ char *image_start = (char *)_image_start; /* use lowmem mappings */
+ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
+#elif defined(__x86_64__)
+ char *image_start = __va(_image_start);
+ char *initrd_start = __va(_initrd_start);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+ l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+ l3_pgentry_t *l3tab = NULL, *l3start = NULL;
+#endif
+ l2_pgentry_t *l2tab = NULL, *l2start = NULL;
+ l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+
+ /*
+ * This fully describes the memory layout of the initial domain. All
+ * *_start address are page-aligned, except v_start (and v_end) which are
+ * superpage-aligned.
+ */
+ struct domain_setup_info dsi;
+ unsigned long vinitrd_start;
+ unsigned long vinitrd_end;
+ unsigned long vphysmap_start;
+ unsigned long vphysmap_end;
+ unsigned long vstartinfo_start;
+ unsigned long vstartinfo_end;
+ unsigned long vstack_start;
+ unsigned long vstack_end;
+ unsigned long vpt_start;
+ unsigned long vpt_end;
+ unsigned long v_end;
+
+ /* Machine address of next candidate page-table page. */
+ unsigned long mpt_alloc;
+
+ extern void physdev_init_dom0(struct domain *);
+ extern void translate_l2pgtable(
+ struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
+
+ /* Sanity! */
+ if ( d->domain_id != 0 )
+ BUG();
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) )
+ BUG();
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+ dsi.image_addr = (unsigned long)image_start;
+ dsi.image_len = image_len;
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ /* By default DOM0 is allocated all available memory. */
+ d->max_pages = ~0U;
+ if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
+ nr_pages = avail_domheap_pages() +
+ ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+ ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ if ( (page = alloc_largest(d, nr_pages)) == NULL )
+ panic("Not enough RAM for DOM0 reservation.\n");
+ alloc_start = page_to_phys(page);
+ alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT);
+
+ if ( (rc = parseelfimage(&dsi)) != 0 )
+ return rc;
+
+ if ( dsi.xen_section_string == NULL )
+ {
+ printk("Not a Xen-ELF image: '__xen_guest' section not found.\n");
+ return -EINVAL;
+ }
+
+ dom0_pae = !!strstr(dsi.xen_section_string, "PAE=yes");
+ xen_pae = (CONFIG_PAGING_LEVELS == 3);
+ if ( dom0_pae != xen_pae )
+ {
+ printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n",
+ xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no");
+ return -EINVAL;
+ }
+
+ /* Align load address to 4MB boundary. */
+ dsi.v_start &= ~((1UL<<22)-1);
+
+ /*
+ * Why do we need this? The number of page-table frames depends on the
+ * size of the bootstrap address space. But the size of the address space
+ * depends on the number of page-table frames (since each one is mapped
+ * read-only). We have a pair of simultaneous equations in two unknowns,
+ * which we solve by exhaustive search.
+ */
+ vinitrd_start = round_pgup(dsi.v_end);
+ vinitrd_end = vinitrd_start + initrd_len;
+ vphysmap_start = round_pgup(vinitrd_end);
+ vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32));
+ vpt_start = round_pgup(vphysmap_end);
+ for ( nr_pt_pages = 2; ; nr_pt_pages++ )
+ {
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+ vstartinfo_start = vpt_end;
+ vstartinfo_end = vstartinfo_start + PAGE_SIZE;
+ vstack_start = vstartinfo_end;
+ vstack_end = vstack_start + PAGE_SIZE;
+ v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
+ if ( (v_end - vstack_end) < (512UL << 10) )
+ v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
+#if defined(__i386__) && !defined(CONFIG_X86_PAE)
+ if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+ break;
+#elif defined(__i386__) && defined(CONFIG_X86_PAE)
+ /* 5 pages: 1x 3rd + 4x 2nd level */
+ if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
+ break;
+#elif defined(__x86_64__)
+#define NR(_l,_h,_s) \
+ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
+ ((_l) & ~((1UL<<(_s))-1))) >> (_s))
+ if ( (1 + /* # L4 */
+ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
+ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+ <= nr_pt_pages )
+ break;
+#endif
+ }
+
+ if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
+ panic("Insufficient contiguous RAM to build kernel image.\n");
+
+ printk("PHYSICAL MEMORY ARRANGEMENT:\n"
+ " Dom0 alloc.: %p->%p",
+ _p(alloc_start), _p(alloc_end));
+ if ( d->tot_pages < nr_pages )
+ printk(" (%lu pages to be allocated)",
+ nr_pages - d->tot_pages);
+ printk("\nVIRTUAL MEMORY ARRANGEMENT:\n"
+ " Loaded kernel: %p->%p\n"
+ " Init. ramdisk: %p->%p\n"
+ " Phys-Mach map: %p->%p\n"
+ " Page tables: %p->%p\n"
+ " Start info: %p->%p\n"
+ " Boot stack: %p->%p\n"
+ " TOTAL: %p->%p\n",
+ _p(dsi.v_kernstart), _p(dsi.v_kernend),
+ _p(vinitrd_start), _p(vinitrd_end),
+ _p(vphysmap_start), _p(vphysmap_end),
+ _p(vpt_start), _p(vpt_end),
+ _p(vstartinfo_start), _p(vstartinfo_end),
+ _p(vstack_start), _p(vstack_end),
+ _p(dsi.v_start), _p(v_end));
+ printk(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
+
+ if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+
+ /*
+ * We're basically forcing default RPLs to 1, so that our "what privilege
+ * level are we returning to?" logic works.
+ */
+ v->arch.guest_context.kernel_ss = FLAT_KERNEL_SS;
+ for ( i = 0; i < 256; i++ )
+ v->arch.guest_context.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+
+#if defined(__i386__)
+
+ v->arch.guest_context.failsafe_callback_cs = FLAT_KERNEL_CS;
+ v->arch.guest_context.event_callback_cs = FLAT_KERNEL_CS;
+
+ /*
+ * Protect the lowest 1GB of memory. We use a temporary mapping there
+ * from which we copy the kernel and ramdisk images.
+ */
+ if ( dsi.v_start < (1UL<<30) )
+ {
+ printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
+ return -EINVAL;
+ }
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+#if CONFIG_PAGING_LEVELS == 3
+ l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+ memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
+ for (i = 0; i < 4; i++) {
+ l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+ l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+ l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+ }
+ {
+ unsigned long va;
+ for (va = PERDOMAIN_VIRT_START; va < PERDOMAIN_VIRT_END;
+ va += (1 << L2_PAGETABLE_SHIFT)) {
+ l2tab[va >> L2_PAGETABLE_SHIFT] =
+ l2e_from_paddr(__pa(d->arch.mm_perdomain_pt) +
+ (va-PERDOMAIN_VIRT_START),
+ __PAGE_HYPERVISOR);
+ }
+ }
+ v->arch.guest_table = mk_pagetable((unsigned long)l3start);
+#else
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
+ l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
+ v->arch.guest_table = mk_pagetable((unsigned long)l2start);
+#endif
+
+ l2tab += l2_linear_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+ mpt_alloc += PAGE_SIZE;
+ *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT);
+ l2tab++;
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ }
+ *l1tab = l1e_from_pfn(mfn, L1_PROT);
+ l1tab++;
+
+ page = &frame_table[mfn];
+ if ( !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l2tab = l2start + l2_linear_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ page = &frame_table[l1e_get_pfn(*l1tab)];
+ if ( !opt_dom0_shadow )
+ l1e_remove_flags(*l1tab, _PAGE_RW);
+ else
+ if ( !get_page_type(page, PGT_writable_page) )
+ BUG();
+
+#if CONFIG_PAGING_LEVELS == 3
+ switch (count) {
+ case 0:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l3_page_table;
+ get_page(page, d); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L3 page so that it can be pinned. */
+ if ( !get_page_and_type(page, d, PGT_l3_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ break;
+ case 1 ... 4:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+ page->u.inuse.type_info |=
+ (count-1) << PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ default:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ }
+#else
+ if ( count == 0 )
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * Installed as CR3: increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, d); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L2 page so that it can be pinned. */
+ if ( !get_page_and_type(page, d, PGT_l2_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * This is an L1 page, installed in a validated L2 page:
+ * increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, d); /* an extra ref because of readable mapping */
+ }
+#endif
+ if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab);
+ }
+
+#elif defined(__x86_64__)
+
+ /* Overlap with Xen protected area? */
+ if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
+ (v_end > HYPERVISOR_VIRT_START) )
+ {
+ printk("DOM0 image overlaps with Xen private area.\n");
+ return -EINVAL;
+ }
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
+ l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
+ l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
+ l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
+ v->arch.guest_table = mk_pagetable(__pa(l4start));
+
+ l4tab += l4_table_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
+ l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
+ l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l2tab);
+ if ( count == 0 )
+ l2tab += l2_table_offset(dsi.v_start);
+ if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info =
+ PGT_l3_page_table;
+ l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l3tab);
+ if ( count == 0 )
+ l3tab += l3_table_offset(dsi.v_start);
+ *l4tab = l4e_from_paddr(__pa(l3start), L4_PROT);
+ l4tab++;
+ }
+ *l3tab = l3e_from_paddr(__pa(l2start), L3_PROT);
+ l3tab++;
+ }
+ *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT);
+ l2tab++;
+ }
+ *l1tab = l1e_from_pfn(mfn, L1_PROT);
+ l1tab++;
+
+ page = &frame_table[mfn];
+ if ( (page->u.inuse.type_info == 0) &&
+ !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l4tab = l4start + l4_table_offset(vpt_start);
+ l3start = l3tab = l4e_to_l3e(*l4tab);
+ l3tab += l3_table_offset(vpt_start);
+ l2start = l2tab = l3e_to_l2e(*l3tab);
+ l2tab += l2_table_offset(vpt_start);
+ l1start = l1tab = l2e_to_l1e(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ l1e_remove_flags(*l1tab, _PAGE_RW);
+ page = &frame_table[l1e_get_pfn(*l1tab)];
+
+ /* Read-only mapping + PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 3;
+ page->u.inuse.type_info |= PGT_validated | 1;
+
+ /* Top-level p.t. is pinned. */
+ if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
+ {
+ page->count_info += 1;
+ page->u.inuse.type_info += 1 | PGT_pinned;
+ }
+
+ /* Iterate. */
+ if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+ {
+ if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
+ {
+ if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
+ l3start = l3tab = l4e_to_l3e(*++l4tab);
+ l2start = l2tab = l3e_to_l2e(*l3tab);
+ }
+ l1start = l1tab = l2e_to_l1e(*l2tab);
+ }
+ }
+
+#endif /* __x86_64__ */
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ d->shared_info->n_vcpu = num_online_cpus();
+
+ /* Set up monitor table */
+ update_pagetables(v);
+
+ /* Install the new page tables. */
+ local_irq_disable();
+ write_ptbase(v);
+
+ /* Copy the OS image and free temporary buffer. */
+ (void)loadelfimage(&dsi);
+
+ init_domheap_pages(
+ _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK);
+
+ /* Copy the initial ramdisk and free temporary buffer. */
+ if ( initrd_len != 0 )
+ {
+ memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+ init_domheap_pages(
+ _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK);
+ }
+
+ d->next_io_page = max_page;
+
+ /* Set up start info area. */
+ si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = nr_pages;
+
+ if ( opt_dom0_translate )
+ {
+ si->shared_info = d->next_io_page << PAGE_SHIFT;
+ set_machinetophys(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
+ d->next_io_page);
+ d->next_io_page++;
+ }
+ else
+ si->shared_info = virt_to_phys(d->shared_info);
+
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ si->pt_base = vpt_start;
+ si->nr_pt_frames = nr_pt_pages;
+ si->mfn_list = vphysmap_start;
+
+ /* Write the phys->machine and machine->phys table entries. */
+ for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+ {
+ mfn = pfn + (alloc_start>>PAGE_SHIFT);
+#ifndef NDEBUG
+#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
+ if ( !opt_dom0_translate && (pfn > REVERSE_START) )
+ mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+#endif
+ ((u32 *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+ }
+ while ( pfn < nr_pages )
+ {
+ if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+ panic("Not enough RAM for DOM0 reservation.\n");
+ while ( pfn < d->tot_pages )
+ {
+ mfn = page_to_pfn(page);
+#ifndef NDEBUG
+#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
+#endif
+ ((u32 *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+#undef pfn
+ page++; pfn++;
+ }
+ }
+
+ if ( initrd_len != 0 )
+ {
+ si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ memset(si->cmd_line, 0, sizeof(si->cmd_line));
+ if ( cmdline != NULL )
+ strncpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)-1);
+
+ /* Reinstate the caller's page tables. */
+ write_ptbase(current);
+ local_irq_enable();
+
+#if defined(__i386__)
+ /* Destroy low mappings - they were only for our convenience. */
+ zap_low_mappings(l2start);
+ zap_low_mappings(idle_pg_table_l2);
+#endif
+
+ /* DOM0 gets access to everything. */
+ physdev_init_dom0(d);
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
+
+ if ( opt_dom0_shadow || opt_dom0_translate )
+ {
+ shadow_mode_enable(d, (opt_dom0_translate
+ ? SHM_enable | SHM_refcounts | SHM_translate
+ : SHM_enable));
+ if ( opt_dom0_translate )
+ {
+#if defined(__i386__) && defined(CONFIG_X86_PAE)
+ printk("FIXME: PAE code needed here: %s:%d (%s)\n",
+ __FILE__, __LINE__, __FUNCTION__);
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+#else
+ /* Hmm, what does this?
+ Looks like isn't portable across 32/64 bit and pae/non-pae ...
+ -- kraxel */
+
+ /* mafetter: This code is mostly a hack in order to be able to
+ * test with dom0's which are running with shadow translate.
+ * I expect we'll rip this out once we have a stable set of
+ * domU clients which use the various shadow modes, but it's
+ * useful to leave this here for now...
+ */
+
+ // map this domain's p2m table into current page table,
+ // so that we can easily access it.
+ //
+ ASSERT( root_get_intpte(idle_pg_table[1]) == 0 );
+ ASSERT( pagetable_get_paddr(d->arch.phys_table) );
+ idle_pg_table[1] = root_from_paddr(
+ pagetable_get_paddr(d->arch.phys_table), __PAGE_HYPERVISOR);
+ translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
+ pagetable_get_pfn(v->arch.guest_table));
+ idle_pg_table[1] = root_empty();
+ local_flush_tlb();
+#endif
+ }
+
+ update_pagetables(v); /* XXX SMP */
+ }
+
+ return 0;
+}
+
+int elf_sanity_check(Elf_Ehdr *ehdr)
+{
+ if ( !IS_ELF(*ehdr) ||
+#if defined(__i386__)
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
+ (ehdr->e_machine != EM_386) ||
+#elif defined(__x86_64__)
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
+ (ehdr->e_machine != EM_X86_64) ||
+#endif
+ (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
+ (ehdr->e_type != ET_EXEC) )
+ {
+ printk("DOM0 image is not a Xen-compatible Elf image.\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
index 50f1b5fc8b..aa41ac78da 100644
--- a/xen/arch/x86/e820.c
+++ b/xen/arch/x86/e820.c
@@ -3,6 +3,11 @@
#include <xen/lib.h>
#include <asm/e820.h>
+/* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
+unsigned long long opt_mem;
+static void parse_mem(char *s) { opt_mem = parse_size_and_unit(s); }
+custom_param("mem", parse_mem);
+
struct e820map e820;
static void __init add_memory_region(unsigned long long start,
@@ -27,15 +32,22 @@ static void __init add_memory_region(unsigned long long start,
#define E820_DEBUG 1
-static void __init print_memory_map(void)
+#ifndef NDEBUG
+#define __init_unless_debugging
+#else
+#define __init_unless_debugging __init
+#endif
+
+void __init_unless_debugging
+print_e820_memory_map(struct e820entry *map, int entries)
{
int i;
- for (i = 0; i < e820.nr_map; i++) {
+ for (i = 0; i < entries; i++) {
printk(" %016Lx - %016Lx ",
- e820.map[i].addr,
- e820.map[i].addr + e820.map[i].size);
- switch (e820.map[i].type) {
+ (unsigned long long)(map[i].addr),
+ (unsigned long long)(map[i].addr + map[i].size));
+ switch (map[i].type) {
case E820_RAM: printk("(usable)\n");
break;
case E820_RESERVED:
@@ -47,7 +59,7 @@ static void __init print_memory_map(void)
case E820_NVS:
printk("(ACPI NVS)\n");
break;
- default: printk("type %u\n", e820.map[i].type);
+ default: printk("type %u\n", map[i].type);
break;
}
}
@@ -305,17 +317,11 @@ static unsigned long __init find_max_pfn(void)
return max_pfn;
}
-static void __init machine_specific_memory_setup(
- struct e820entry *raw, int raw_nr)
+#ifdef __i386__
+static void __init clip_4gb(void)
{
- char nr = (char)raw_nr;
int i;
- sanitize_e820_map(raw, &nr);
-
- (void)copy_e820_map(raw, nr);
-
-#ifdef __i386__
/* 32-bit systems restricted to a 4GB physical memory map. */
for ( i = 0; i < e820.nr_map; i++ )
{
@@ -335,13 +341,51 @@ static void __init machine_specific_memory_setup(
e820.nr_map = i + 1;
}
}
+}
+#else
+#define clip_4gb() ((void)0)
#endif
+
+static void __init clip_mem(void)
+{
+ int i;
+
+ if ( !opt_mem )
+ return;
+
+ for ( i = 0; i < e820.nr_map; i++ )
+ {
+ if ( (e820.map[i].addr + e820.map[i].size) <= opt_mem )
+ continue;
+ printk("Truncating memory map to %lukB\n",
+ (unsigned long)(opt_mem >> 10));
+ if ( e820.map[i].addr >= opt_mem )
+ {
+ e820.nr_map = i;
+ }
+ else
+ {
+ e820.map[i].size = opt_mem - e820.map[i].addr;
+ e820.nr_map = i + 1;
+ }
+ }
+}
+
+static void __init machine_specific_memory_setup(
+ struct e820entry *raw, int *raw_nr)
+{
+ char nr = (char)*raw_nr;
+ sanitize_e820_map(raw, &nr);
+ *raw_nr = nr;
+ (void)copy_e820_map(raw, nr);
+ clip_4gb();
+ clip_mem();
}
-unsigned long init_e820(struct e820entry *raw, int raw_nr)
+unsigned long __init init_e820(struct e820entry *raw, int *raw_nr)
{
machine_specific_memory_setup(raw, raw_nr);
printk(KERN_INFO "Physical RAM map:\n");
- print_memory_map();
+ print_e820_memory_map(e820.map, e820.nr_map);
return find_max_pfn();
}
diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c
index bb70a3842b..3d7fe36151 100644
--- a/xen/arch/x86/extable.c
+++ b/xen/arch/x86/extable.c
@@ -3,6 +3,12 @@
#include <xen/spinlock.h>
#include <asm/uaccess.h>
+#ifdef PERF_COUNTERS
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/current.h>
+#endif
+
extern struct exception_table_entry __start___ex_table[];
extern struct exception_table_entry __stop___ex_table[];
extern struct exception_table_entry __start___pre_ex_table[];
@@ -63,11 +69,15 @@ search_exception_table(unsigned long addr)
}
unsigned long
-search_pre_exception_table(struct xen_regs *regs)
+search_pre_exception_table(struct cpu_user_regs *regs)
{
unsigned long addr = (unsigned long)regs->eip;
unsigned long fixup = search_one_table(
__start___pre_ex_table, __stop___pre_ex_table-1, addr);
- DPRINTK("Pre-exception: %08lx -> %08lx\n", addr, fixup);
+ DPRINTK("Pre-exception: %p -> %p\n", _p(addr), _p(fixup));
+#ifdef PERF_COUNTERS
+ if ( fixup )
+ perfc_incrc(exception_fixed);
+#endif
return fixup;
}
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 2079bf51c6..5e132a7a14 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -57,7 +57,7 @@ void write_cr3(unsigned long cr3)
*/
skip_clocktick:
- __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" );
+ __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
/*
* STEP 3. Update this CPU's timestamp. Note that this happens *after*
diff --git a/xen/arch/x86/genapic/bigsmp.c b/xen/arch/x86/genapic/bigsmp.c
new file mode 100644
index 0000000000..93c00b9208
--- /dev/null
+++ b/xen/arch/x86/genapic/bigsmp.c
@@ -0,0 +1,52 @@
+/*
+ * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
+ * Drives the local APIC in "clustered mode".
+ */
+#define APIC_DEFINITION 1
+#include <xen/config.h>
+#include <xen/cpumask.h>
+#include <asm/current.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <xen/init.h>
+#include <xen/dmi.h>
+#include <asm/mach-bigsmp/mach_apic.h>
+#include <asm/mach-bigsmp/mach_apicdef.h>
+#include <asm/mach-bigsmp/mach_ipi.h>
+#include <asm/mach-default/mach_mpparse.h>
+
+static int dmi_bigsmp; /* can be set by dmi scanners */
+
+static __init int hp_ht_bigsmp(struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+ dmi_bigsmp = 1;
+ return 0;
+}
+
+
+static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
+ { hp_ht_bigsmp, "HP ProLiant DL760 G2", {
+ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
+ }},
+
+ { hp_ht_bigsmp, "HP ProLiant DL740", {
+ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
+ }},
+ { }
+};
+
+
+static __init int probe_bigsmp(void)
+{
+ dmi_check_system(bigsmp_dmi_table);
+ return dmi_bigsmp;
+}
+
+struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp);
diff --git a/xen/arch/x86/genapic/default.c b/xen/arch/x86/genapic/default.c
new file mode 100644
index 0000000000..d84cf41141
--- /dev/null
+++ b/xen/arch/x86/genapic/default.c
@@ -0,0 +1,27 @@
+/*
+ * Default generic APIC driver. This handles upto 8 CPUs.
+ */
+#define APIC_DEFINITION 1
+#include <xen/config.h>
+#include <xen/cpumask.h>
+#include <asm/current.h>
+#include <asm/mpspec.h>
+#include <asm/mach-default/mach_apicdef.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <xen/kernel.h>
+#include <xen/string.h>
+#include <xen/smp.h>
+#include <xen/init.h>
+#include <asm/mach-default/mach_apic.h>
+#include <asm/mach-default/mach_ipi.h>
+#include <asm/mach-default/mach_mpparse.h>
+
+/* should be called last. */
+static __init int probe_default(void)
+{
+ return 1;
+}
+
+struct genapic apic_default = APIC_INIT("default", probe_default);
diff --git a/xen/arch/x86/genapic/es7000.c b/xen/arch/x86/genapic/es7000.c
new file mode 100644
index 0000000000..05e94b0f04
--- /dev/null
+++ b/xen/arch/x86/genapic/es7000.c
@@ -0,0 +1,29 @@
+/*
+ * APIC driver for the Unisys ES7000 chipset.
+ */
+#define APIC_DEFINITION 1
+#include <xen/config.h>
+#include <xen/cpumask.h>
+#include <asm/current.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/atomic.h>
+#include <xen/kernel.h>
+#include <xen/string.h>
+#include <xen/smp.h>
+#include <xen/init.h>
+#include <asm/mach-es7000/mach_apicdef.h>
+#include <asm/mach-es7000/mach_apic.h>
+#include <asm/mach-es7000/mach_ipi.h>
+#include <asm/mach-es7000/mach_mpparse.h>
+#include <asm/mach-es7000/mach_wakecpu.h>
+
+static __init int probe_es7000(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/xen/arch/x86/genapic/es7000.h b/xen/arch/x86/genapic/es7000.h
new file mode 100644
index 0000000000..70691f0c4c
--- /dev/null
+++ b/xen/arch/x86/genapic/es7000.h
@@ -0,0 +1,110 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ * Natalie Protasevich, Unisys Corporation
+ * This file contains the code to configure and interface
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#define MIP_REG 1
+#define MIP_PSAI_REG 4
+
+#define MIP_BUSY 1
+#define MIP_SPIN 0xf0000
+#define MIP_VALID 0x0100000000000000ULL
+#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
+
+#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
+
+struct mip_reg_info {
+ unsigned long long mip_info;
+ unsigned long long delivery_info;
+ unsigned long long host_reg;
+ unsigned long long mip_reg;
+};
+
+struct part_info {
+ unsigned char type;
+ unsigned char length;
+ unsigned char part_id;
+ unsigned char apic_mode;
+ unsigned long snum;
+ char ptype[16];
+ char sname[64];
+ char pname[64];
+};
+
+struct psai {
+ unsigned long long entry_type;
+ unsigned long long addr;
+ unsigned long long bep_addr;
+};
+
+struct es7000_mem_info {
+ unsigned char type;
+ unsigned char length;
+ unsigned char resv[6];
+ unsigned long long start;
+ unsigned long long size;
+};
+
+struct es7000_oem_table {
+ unsigned long long hdr;
+ struct mip_reg_info mip;
+ struct part_info pif;
+ struct es7000_mem_info shm;
+ struct psai psai;
+};
+
+struct acpi_table_sdt {
+ unsigned long pa;
+ unsigned long count;
+ struct {
+ unsigned long pa;
+ enum acpi_table_id id;
+ unsigned long size;
+ } entry[50];
+};
+
+struct oem_table {
+ struct acpi_table_header Header;
+ u32 OEMTableAddr;
+ u32 OEMTableSize;
+};
+
+struct mip_reg {
+ unsigned long long off_0;
+ unsigned long long off_8;
+ unsigned long long off_10;
+ unsigned long long off_18;
+ unsigned long long off_20;
+ unsigned long long off_28;
+ unsigned long long off_30;
+ unsigned long long off_38;
+};
+
+#define MIP_SW_APIC 0x1020b
+#define MIP_FUNC(VALUE) (VALUE & 0xff)
+
+extern int parse_unisys_oem (char *oemptr, int oem_entries);
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length);
+extern int es7000_start_cpu(int cpu, unsigned long eip);
+extern void es7000_sw_apic(void);
diff --git a/xen/arch/x86/genapic/es7000plat.c b/xen/arch/x86/genapic/es7000plat.c
new file mode 100644
index 0000000000..8bf571bd27
--- /dev/null
+++ b/xen/arch/x86/genapic/es7000plat.c
@@ -0,0 +1,302 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ * Natalie Protasevich, Unisys Corporation
+ * This file contains the code to configure and interface
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <xen/string.h>
+#include <xen/spinlock.h>
+#include <xen/errno.h>
+#include <xen/reboot.h>
+#include <xen/init.h>
+#include <xen/acpi.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/apicdef.h>
+#include "es7000.h"
+
+/*
+ * ES7000 Globals
+ */
+
+volatile unsigned long *psai = NULL;
+struct mip_reg *mip_reg;
+struct mip_reg *host_reg;
+int mip_port;
+unsigned long mip_addr, host_addr;
+
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
+
+/*
+ * GSI override for ES7000 platforms.
+ */
+
+static unsigned int base;
+
+static int
+es7000_rename_gsi(int ioapic, int gsi)
+{
+ if (!base) {
+ int i;
+ for (i = 0; i < nr_ioapics; i++)
+ base += nr_ioapic_registers[i];
+ }
+
+ if (!ioapic && (gsi < 16))
+ gsi += base;
+ return gsi;
+}
+
+#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)
+
+/*
+ * Parse the OEM Table
+ */
+
+int __init
+parse_unisys_oem (char *oemptr, int oem_entries)
+{
+ int i;
+ int success = 0;
+ unsigned char type, size;
+ unsigned long val;
+ char *tp = NULL;
+ struct psai *psaip = NULL;
+ struct mip_reg_info *mi;
+ struct mip_reg *host, *mip;
+
+ tp = oemptr;
+
+ tp += 8;
+
+ for (i=0; i <= oem_entries; i++) {
+ type = *tp++;
+ size = *tp++;
+ tp -= 2;
+ switch (type) {
+ case MIP_REG:
+ mi = (struct mip_reg_info *)tp;
+ val = MIP_RD_LO(mi->host_reg);
+ host_addr = val;
+ host = (struct mip_reg *)val;
+ host_reg = __va(host);
+ val = MIP_RD_LO(mi->mip_reg);
+ mip_port = MIP_PORT(mi->mip_info);
+ mip_addr = val;
+ mip = (struct mip_reg *)val;
+ mip_reg = __va(mip);
+ Dprintk("es7000_mipcfg: host_reg = 0x%lx \n",
+ (unsigned long)host_reg);
+ Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n",
+ (unsigned long)mip_reg);
+ success++;
+ break;
+ case MIP_PSAI_REG:
+ psaip = (struct psai *)tp;
+ if (tp != NULL) {
+ if (psaip->addr)
+ psai = __va(psaip->addr);
+ else
+ psai = NULL;
+ success++;
+ }
+ break;
+ default:
+ break;
+ }
+ if (i == 6) break;
+ tp += size;
+ }
+
+ if (success < 2) {
+ es7000_plat = 0;
+ } else {
+ printk("\nEnabling ES7000 specific features...\n");
+ es7000_plat = 1;
+ ioapic_renumber_irq = es7000_rename_gsi;
+ }
+ return es7000_plat;
+}
+
+int __init
+find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length)
+{
+ struct acpi_table_rsdp *rsdp = NULL;
+ unsigned long rsdp_phys = 0;
+ struct acpi_table_header *header = NULL;
+ int i;
+ struct acpi_table_sdt sdt = { 0 };
+
+ rsdp_phys = acpi_find_rsdp();
+ rsdp = __va(rsdp_phys);
+ if (rsdp->rsdt_address) {
+ struct acpi_table_rsdt *mapped_rsdt = NULL;
+ sdt.pa = rsdp->rsdt_address;
+
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt.pa, sizeof(struct acpi_table_header));
+ if (!header)
+ return -ENODEV;
+
+ sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3;
+ mapped_rsdt = (struct acpi_table_rsdt *)
+ __acpi_map_table(sdt.pa, header->length);
+ if (!mapped_rsdt)
+ return -ENODEV;
+
+ header = &mapped_rsdt->header;
+
+ for (i = 0; i < sdt.count; i++)
+ sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
+ };
+ for (i = 0; i < sdt.count; i++) {
+
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt.entry[i].pa,
+ sizeof(struct acpi_table_header));
+ if (!header)
+ continue;
+ if (!strncmp((char *) &header->signature, "OEM1", 4)) {
+ if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) {
+ void *addr;
+ struct oem_table *t;
+ acpi_table_print(header, sdt.entry[i].pa);
+ t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
+ addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
+ *length = header->length;
+ *oem_addr = (unsigned long) addr;
+ return 0;
+ }
+ }
+ }
+ Dprintk("ES7000: did not find Unisys ACPI OEM table!\n");
+ return -1;
+}
+
+static void
+es7000_spin(int n)
+{
+ int i = 0;
+
+ while (i++ < n)
+ rep_nop();
+}
+
+static int __init
+es7000_mip_write(struct mip_reg *mip_reg)
+{
+ int status = 0;
+ int spin;
+
+ spin = MIP_SPIN;
+ while (((unsigned long long)host_reg->off_38 &
+ (unsigned long long)MIP_VALID) != 0) {
+ if (--spin <= 0) {
+ printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+ outb(1, mip_port);
+
+ spin = MIP_SPIN;
+
+ while (((unsigned long long)mip_reg->off_38 &
+ (unsigned long long)MIP_VALID) == 0) {
+ if (--spin <= 0) {
+ printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
+ return -1;
+ }
+ es7000_spin(MIP_SPIN);
+ }
+
+ status = ((unsigned long long)mip_reg->off_0 &
+ (unsigned long long)0xffff0000000000ULL) >> 48;
+ mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
+ (unsigned long long)~MIP_VALID);
+ return status;
+}
+
+int
+es7000_start_cpu(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+
+}
+
+int
+es7000_stop_cpu(int cpu)
+{
+ int startup;
+
+ if (psai == NULL)
+ return -1;
+
+ startup= (0x1000000 | cpu);
+
+ while ((*psai & 0xff00ffff) != startup)
+ ;
+
+ startup = (*psai & 0xff0000) >> 16;
+ *psai &= 0xffffff;
+
+ return 0;
+
+}
+
+void __init
+es7000_sw_apic()
+{
+ if (es7000_plat) {
+ int mip_status;
+ struct mip_reg es7000_mip_reg;
+
+ printk("ES7000: Enabling APIC mode.\n");
+ memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+ es7000_mip_reg.off_0 = MIP_SW_APIC;
+ es7000_mip_reg.off_38 = (MIP_VALID);
+ while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+ printk("es7000_sw_apic: command failed, status = %x\n",
+ mip_status);
+ return;
+ }
+}
diff --git a/xen/arch/x86/genapic/probe.c b/xen/arch/x86/genapic/probe.c
new file mode 100644
index 0000000000..a3a94fe753
--- /dev/null
+++ b/xen/arch/x86/genapic/probe.c
@@ -0,0 +1,91 @@
+/* Copyright 2003 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic x86 APIC driver probe layer.
+ */
+#include <xen/config.h>
+#include <xen/cpumask.h>
+#include <xen/string.h>
+#include <xen/kernel.h>
+#include <xen/ctype.h>
+#include <xen/init.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/genapic.h>
+
+extern struct genapic apic_summit;
+extern struct genapic apic_bigsmp;
+extern struct genapic apic_es7000;
+extern struct genapic apic_default;
+
+struct genapic *genapic;
+
+struct genapic *apic_probe[] __initdata = {
+ &apic_summit,
+ &apic_bigsmp,
+ &apic_es7000,
+ &apic_default, /* must be last */
+ NULL,
+};
+
+static void __init genapic_apic_force(char *str)
+{
+ int i;
+ for (i = 0; apic_probe[i]; i++)
+ if (!strcmp(apic_probe[i]->name, str))
+ genapic = apic_probe[i];
+}
+custom_param("apic", genapic_apic_force);
+
+void __init generic_apic_probe(void)
+{
+ int i;
+ int changed = (genapic != NULL);
+
+ for (i = 0; !changed && apic_probe[i]; i++) {
+ if (apic_probe[i]->probe()) {
+ changed = 1;
+ genapic = apic_probe[i];
+ }
+ }
+ if (!changed)
+ genapic = &apic_default;
+
+ printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
+}
+
+/* These functions can switch the APIC even after the initial ->probe() */
+
+int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
+{
+ int i;
+ for (i = 0; apic_probe[i]; ++i) {
+ if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) {
+ genapic = apic_probe[i];
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ genapic->name);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ int i;
+ for (i = 0; apic_probe[i]; ++i) {
+ if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+ genapic = apic_probe[i];
+ printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+ genapic->name);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int hard_smp_processor_id(void)
+{
+ return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
diff --git a/xen/arch/x86/genapic/summit.c b/xen/arch/x86/genapic/summit.c
new file mode 100644
index 0000000000..28a47f09de
--- /dev/null
+++ b/xen/arch/x86/genapic/summit.c
@@ -0,0 +1,27 @@
+/*
+ * APIC driver for the IBM "Summit" chipset.
+ */
+#define APIC_DEFINITION 1
+#include <xen/config.h>
+#include <xen/cpumask.h>
+#include <asm/current.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <xen/kernel.h>
+#include <xen/string.h>
+#include <xen/smp.h>
+#include <xen/init.h>
+#include <asm/mach-summit/mach_apic.h>
+#include <asm/mach-summit/mach_apicdef.h>
+#include <asm/mach-summit/mach_ipi.h>
+#include <asm/mach-summit/mach_mpparse.h>
+
+static __init int probe_summit(void)
+{
+ /* probed later in mptable/ACPI hooks */
+ return 0;
+}
+
+struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index 6cc15dd3b5..eb5ff9479b 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -10,47 +10,83 @@
#include <xen/config.h>
#include <xen/sched.h>
+#include <asm/current.h>
#include <asm/processor.h>
#include <asm/i387.h>
void init_fpu(void)
{
- __asm__("fninit");
- if ( cpu_has_xmm ) load_mxcsr(0x1f80);
- set_bit(DF_DONEFPUINIT, &current->flags);
+ __asm__ __volatile__ ( "fninit" );
+ if ( cpu_has_xmm )
+ load_mxcsr(0x1f80);
+ set_bit(_VCPUF_fpu_initialised, &current->vcpu_flags);
}
-static inline void __save_init_fpu( struct domain *tsk )
-{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxsave %0 ; fnclex"
- : "=m" (tsk->thread.i387) );
- } else {
- asm volatile( "fnsave %0 ; fwait"
- : "=m" (tsk->thread.i387) );
- }
- clear_bit(DF_USEDFPU, &tsk->flags);
-}
-
-void save_init_fpu( struct domain *tsk )
+void save_init_fpu(struct vcpu *tsk)
{
/*
* The guest OS may have set the 'virtual STTS' flag.
* This causes us to set the real flag, so we'll need
* to temporarily clear it while saving f-p state.
*/
- if ( test_bit(DF_GUEST_STTS, &tsk->flags) ) clts();
- __save_init_fpu(tsk);
+ if ( test_bit(_VCPUF_guest_stts, &tsk->vcpu_flags) )
+ clts();
+
+ if ( cpu_has_fxsr )
+ __asm__ __volatile__ (
+ "fxsave %0 ; fnclex"
+ : "=m" (tsk->arch.guest_context.fpu_ctxt) );
+ else
+ __asm__ __volatile__ (
+ "fnsave %0 ; fwait"
+ : "=m" (tsk->arch.guest_context.fpu_ctxt) );
+
+ clear_bit(_VCPUF_fpu_dirtied, &tsk->vcpu_flags);
stts();
}
-void restore_fpu( struct domain *tsk )
+void restore_fpu(struct vcpu *tsk)
{
- if ( cpu_has_fxsr ) {
- asm volatile( "fxrstor %0"
- : : "m" (tsk->thread.i387) );
- } else {
- asm volatile( "frstor %0"
- : : "m" (tsk->thread.i387) );
- }
+ /*
+ * FXRSTOR can fault if passed a corrupted data block. We handle this
+ * possibility, which may occur if the block was passed to us by control
+ * tools, by silently clearing the block.
+ */
+ if ( cpu_has_fxsr )
+ __asm__ __volatile__ (
+ "1: fxrstor %0 \n"
+ ".section .fixup,\"ax\" \n"
+ "2: push %%"__OP"ax \n"
+ " push %%"__OP"cx \n"
+ " push %%"__OP"di \n"
+ " lea %0,%%"__OP"di \n"
+ " mov %1,%%ecx \n"
+ " xor %%eax,%%eax \n"
+ " rep ; stosl \n"
+ " pop %%"__OP"di \n"
+ " pop %%"__OP"cx \n"
+ " pop %%"__OP"ax \n"
+ " jmp 1b \n"
+ ".previous \n"
+ ".section __ex_table,\"a\"\n"
+ " "__FIXUP_ALIGN" \n"
+ " "__FIXUP_WORD" 1b,2b \n"
+ ".previous \n"
+ :
+ : "m" (tsk->arch.guest_context.fpu_ctxt),
+ "i" (sizeof(tsk->arch.guest_context.fpu_ctxt)/4) );
+ else
+ __asm__ __volatile__ (
+ "frstor %0"
+ : : "m" (tsk->arch.guest_context.fpu_ctxt) );
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c
index 62eb07ada0..7fc9340d78 100644
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -35,36 +35,18 @@
BUILD_COMMON_IRQ()
#define BI(x,y) \
- BUILD_IRQ(x##y)
+ BUILD_IRQ(x##y)
#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+ BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+ BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+ BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+ BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-BUILD_16_IRQS(0x0)
-
-#ifdef CONFIG_X86_IO_APIC
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+BUILD_16_IRQS(0x0) BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc)
-#endif
+BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
#undef BUILD_16_IRQS
#undef BI
@@ -75,11 +57,9 @@ BUILD_16_IRQS(0xc)
* is no hardware IRQ pin equivalent for them, they are triggered
* through the ICC by us (IPIs)
*/
-#ifdef CONFIG_SMP
BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
-#endif
/*
* Every pentium local APIC has two 'local interrupts', with a
@@ -93,23 +73,19 @@ BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#define IRQ(x,y) \
- IRQ##x##y##_interrupt
+ IRQ##x##y##_interrupt
#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
- void *interrupt[NR_IRQS] = {
- IRQLIST_16(0x0),
-
-#ifdef CONFIG_X86_IO_APIC
- IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc)
-#endif
+ IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+ IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+ static void (*interrupt[])(void) = {
+ IRQLIST_16(0x0), IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+ IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+ IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+ IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
};
#undef IRQ
@@ -126,31 +102,38 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
-static void end_8259A_irq (unsigned int irq)
+static void disable_8259A_vector(unsigned int vector)
{
- if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
- enable_8259A_irq(irq);
+ disable_8259A_irq(LEGACY_IRQ_FROM_VECTOR(vector));
}
-#define shutdown_8259A_irq disable_8259A_irq
+static void enable_8259A_vector(unsigned int vector)
+{
+ enable_8259A_irq(LEGACY_IRQ_FROM_VECTOR(vector));
+}
-void mask_and_ack_8259A(unsigned int);
+static void mask_and_ack_8259A_vector(unsigned int);
-static unsigned int startup_8259A_irq(unsigned int irq)
+static void end_8259A_vector(unsigned int vector)
+{
+ if (!(irq_desc[vector].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+ enable_8259A_vector(vector);
+}
+
+static unsigned int startup_8259A_vector(unsigned int vector)
{
- enable_8259A_irq(irq);
+ enable_8259A_vector(vector);
return 0; /* never anything pending */
}
static struct hw_interrupt_type i8259A_irq_type = {
- "XT-PIC",
- startup_8259A_irq,
- shutdown_8259A_irq,
- enable_8259A_irq,
- disable_8259A_irq,
- mask_and_ack_8259A,
- end_8259A_irq,
- NULL
+ .typename = "XT-PIC",
+ .startup = startup_8259A_vector,
+ .shutdown = disable_8259A_vector,
+ .enable = enable_8259A_vector,
+ .disable = disable_8259A_vector,
+ .ack = mask_and_ack_8259A_vector,
+ .end = end_8259A_vector
};
/*
@@ -162,9 +145,9 @@ static struct hw_interrupt_type i8259A_irq_type = {
*/
static unsigned int cached_irq_mask = 0xffff;
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))
/*
* Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -221,14 +204,6 @@ int i8259A_irq_pending(unsigned int irq)
return ret;
}
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- io_apic_irqs &= ~(1<<irq);
- irq_desc[irq].handler = &i8259A_irq_type;
- enable_irq(irq);
-}
-
/*
* This function assumes to be called rarely. Switching between
* 8259A registers is slow.
@@ -241,14 +216,14 @@ static inline int i8259A_irq_real(unsigned int irq)
int irqmask = 1<<irq;
if (irq < 8) {
- outb(0x0B,0x20); /* ISR register */
+ outb(0x0B,0x20); /* ISR register */
value = inb(0x20) & irqmask;
- outb(0x0A,0x20); /* back to the IRR register */
+ outb(0x0A,0x20); /* back to the IRR register */
return value;
}
- outb(0x0B,0xA0); /* ISR register */
+ outb(0x0B,0xA0); /* ISR register */
value = inb(0xA0) & (irqmask >> 8);
- outb(0x0A,0xA0); /* back to the IRR register */
+ outb(0x0A,0xA0); /* back to the IRR register */
return value;
}
@@ -258,8 +233,9 @@ static inline int i8259A_irq_real(unsigned int irq)
* first, _then_ send the EOI, and the order of EOI
* to the two 8259s is important!
*/
-void mask_and_ack_8259A(unsigned int irq)
+static void mask_and_ack_8259A_vector(unsigned int vector)
{
+ unsigned int irq = LEGACY_IRQ_FROM_VECTOR(vector);
unsigned int irqmask = 1 << irq;
unsigned long flags;
@@ -285,14 +261,14 @@ void mask_and_ack_8259A(unsigned int irq)
handle_real_irq:
if (irq & 8) {
- inb(0xA1); /* DUMMY - (do we need this?) */
+ inb(0xA1); /* DUMMY - (do we need this?) */
outb(cached_A1,0xA1);
outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
- outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
+ outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */
} else {
- inb(0x21); /* DUMMY - (do we need this?) */
+ inb(0x21); /* DUMMY - (do we need this?) */
outb(cached_21,0x21);
- outb(0x60+irq,0x20); /* 'Specific EOI' to master */
+ outb(0x60+irq,0x20); /* 'Specific EOI' to master */
}
spin_unlock_irqrestore(&i8259A_lock, flags);
return;
@@ -334,39 +310,39 @@ void __init init_8259A(int auto_eoi)
spin_lock_irqsave(&i8259A_lock, flags);
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-2 */
+ outb(0xff, 0x21); /* mask all of 8259A-1 */
+ outb(0xff, 0xA1); /* mask all of 8259A-2 */
/*
* outb_p - this has to work on a wide range of PC hardware.
*/
- outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
- outb_p(0x30 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
- outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
+ outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */
+ outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+ outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */
if (auto_eoi)
- outb_p(0x03, 0x21); /* master does Auto EOI */
+ outb_p(0x03, 0x21); /* master does Auto EOI */
else
- outb_p(0x01, 0x21); /* master expects normal EOI */
+ outb_p(0x01, 0x21); /* master expects normal EOI */
- outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
- outb_p(0x30 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
- outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
- outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
- is to be investigated) */
+ outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */
+ outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+ outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */
+ outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode
+ is to be investigated) */
if (auto_eoi)
/*
* in AEOI mode we just have to mask the interrupt
* when acking.
*/
- i8259A_irq_type.ack = disable_8259A_irq;
+ i8259A_irq_type.ack = disable_8259A_vector;
else
- i8259A_irq_type.ack = mask_and_ack_8259A;
+ i8259A_irq_type.ack = mask_and_ack_8259A_vector;
- udelay(100); /* wait for 8259A to initialize */
+ udelay(100); /* wait for 8259A to initialize */
- outb(cached_21, 0x21); /* restore master IRQ mask */
- outb(cached_A1, 0xA1); /* restore slave IRQ mask */
+ outb(cached_21, 0x21); /* restore master IRQ mask */
+ outb(cached_A1, 0xA1); /* restore slave IRQ mask */
spin_unlock_irqrestore(&i8259A_lock, flags);
}
@@ -384,25 +360,30 @@ void __init init_IRQ(void)
for ( i = 0; i < NR_IRQS; i++ )
{
irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].handler = (i<16) ? &i8259A_irq_type : &no_irq_type;
+ irq_desc[i].handler = &no_irq_type;
irq_desc[i].action = NULL;
irq_desc[i].depth = 1;
spin_lock_init(&irq_desc[i].lock);
- set_intr_gate(FIRST_EXTERNAL_VECTOR+i, interrupt[i]);
+ set_intr_gate(i, interrupt[i]);
+ }
+
+ for ( i = 0; i < 16; i++ )
+ {
+ vector_irq[LEGACY_VECTOR(i)] = i;
+ irq_desc[LEGACY_VECTOR(i)].handler = &i8259A_irq_type;
}
-#ifdef CONFIG_SMP
/*
* IRQ0 must be given a fixed assignment and initialized,
* because it's used before the IO-APIC is set up.
*/
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+ irq_vector[0] = FIRST_DEVICE_VECTOR;
+ vector_irq[FIRST_DEVICE_VECTOR] = 0;
/* Various IPI functions. */
set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-#endif
/* Self-generated IPI for local APIC timer. */
set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
@@ -414,9 +395,9 @@ void __init init_IRQ(void)
/* Set the clock to HZ Hz */
#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
setup_irq(2, &cascade);
}
diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c
index a8c670c573..b876c619ef 100644
--- a/xen/arch/x86/idle0_task.c
+++ b/xen/arch/x86/idle0_task.c
@@ -1,17 +1,27 @@
+
#include <xen/config.h>
#include <xen/sched.h>
#include <asm/desc.h>
-#define IDLE0_TASK(_t) \
-{ \
- processor: 0, \
- id: IDLE_DOMAIN_ID, \
- mm: IDLE0_MM, \
- thread: INIT_THREAD, \
- flags: 1<<DF_IDLETASK, \
- refcnt: ATOMIC_INIT(1) \
-}
+struct domain idle0_domain = {
+ domain_id: IDLE_DOMAIN_ID,
+ domain_flags:DOMF_idle_domain,
+ refcnt: ATOMIC_INIT(1)
+};
-struct domain idle0_task = IDLE0_TASK(idle0_task);
+struct vcpu idle0_vcpu = {
+ processor: 0,
+ domain: &idle0_domain
+};
struct tss_struct init_tss[NR_CPUS];
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 5e3fa047f5..470cf3b593 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -21,30 +21,27 @@
*/
#include <xen/config.h>
+#include <xen/lib.h>
#include <xen/init.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/sched.h>
-#include <xen/config.h>
-#include <asm/mc146818rtc.h>
+#include <xen/acpi.h>
#include <asm/io.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
+#include <asm/mc146818rtc.h>
#include <asm/smp.h>
#include <asm/desc.h>
-#include <asm/smpboot.h>
-
-#ifdef CONFIG_X86_IO_APIC
+#include <mach_apic.h>
+#include <io_ports.h>
-#undef APIC_LOCKUP_DEBUG
+#define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq)))
-#define APIC_LOCKUP_DEBUG
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
-static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
-
-unsigned int int_dest_addr_mode = APIC_DEST_LOGICAL;
-unsigned char int_delivery_mode = dest_LowestPrio;
+static DEFINE_SPINLOCK(ioapic_lock);
+int skip_ioapic_setup;
/*
* # of IRQ routing registers
@@ -66,30 +63,32 @@ int nr_ioapic_registers[MAX_IO_APICS];
*/
static struct irq_pin_list {
- int apic, pin, next;
+ int apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
+int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
{
- static int first_free_entry = NR_IRQS;
- struct irq_pin_list *entry = irq_2_pin + irq;
+ static int first_free_entry = NR_IRQS;
+ struct irq_pin_list *entry = irq_2_pin + irq;
- while (entry->next)
- entry = irq_2_pin + entry->next;
+ while (entry->next)
+ entry = irq_2_pin + entry->next;
- if (entry->pin != -1) {
- entry->next = first_free_entry;
- entry = irq_2_pin + entry->next;
- if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
- }
- entry->apic = apic;
- entry->pin = pin;
+ if (entry->pin != -1) {
+ entry->next = first_free_entry;
+ entry = irq_2_pin + entry->next;
+ if (++first_free_entry >= PIN_MAP_SIZE)
+ panic("io_apic.c: whoops");
+ }
+ entry->apic = apic;
+ entry->pin = pin;
}
/*
@@ -99,212 +98,174 @@ static void __init replace_pin_at_irq(unsigned int irq,
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_pin_list *entry = irq_2_pin + irq;
-
- while (1) {
- if (entry->apic == oldapic && entry->pin == oldpin) {
- entry->apic = newapic;
- entry->pin = newpin;
- }
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-}
+ struct irq_pin_list *entry = irq_2_pin + irq;
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_write(entry->apic, 0x10 + R + pin*2, reg); \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
- FINAL; \
+ while (1) {
+ if (entry->apic == oldapic && entry->pin == oldpin) {
+ entry->apic = newapic;
+ entry->pin = newpin;
+ }
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
}
-#define DO_ACTION(name,R,ACTION, FINAL) \
- \
- static void name##_IO_APIC_irq (unsigned int irq) \
- __DO_ACTION(R, ACTION, FINAL)
-
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
-DO_ACTION( __edge, 0, &= 0xffff7fff, )
-DO_ACTION( __level, 0, |= 0x00008000, )
-
-static void mask_IO_APIC_irq (unsigned int irq)
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
{
- unsigned long flags;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int pin, reg;
- spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ reg &= ~disable;
+ reg |= enable;
+ io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
}
-static void unmask_IO_APIC_irq (unsigned int irq)
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
{
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ __modify_IO_APIC_irq(irq, 0x00010000, 0);
}
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (entry.delivery_mode == dest_SMI)
- return;
-
- /*
- * Disable it in the IO-APIC irq-routing table:
- */
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ __modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-static void clear_IO_APIC (void)
+/* trigger = 0 */
+static void __edge_IO_APIC_irq (unsigned int irq)
{
- int apic, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- clear_IO_APIC_pin(apic, pin);
+ __modify_IO_APIC_irq(irq, 0, 0x00008000);
}
-static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+/* trigger = 1 */
+static void __level_IO_APIC_irq (unsigned int irq)
{
- unsigned long flags;
-
- /*
- * Only the first 8 bits are valid.
- */
- mask = mask << 24;
- spin_lock_irqsave(&ioapic_lock, flags);
- __DO_ACTION(1, = mask, )
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ __modify_IO_APIC_irq(irq, 0x00008000, 0);
}
-#define balance_irq(_irq) ((void)0)
-
-/*
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
- * specific CPU-side IRQs.
- */
-
-#define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
-int skip_ioapic_setup;
-#if 0
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
-static int __init noioapic_setup(char *str)
+static void unmask_IO_APIC_irq (unsigned int irq)
{
- skip_ioapic_setup = 1;
- return 1;
-}
+ unsigned long flags;
-__setup("noapic", noioapic_setup);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
-static int __init ioapic_setup(char *str)
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
- skip_ioapic_setup = 0;
- return 1;
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 1;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
}
-__setup("apic", ioapic_setup);
-
+static void clear_IO_APIC (void)
+{
+ int apic, pin;
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
+}
-static int __init ioapic_pirq_setup(char *str)
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
{
- int i, max;
- int ints[MAX_PIRQS+1];
-
- get_options(str, ARRAY_SIZE(ints), ints);
-
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
-
- pirqs_enabled = 1;
- printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
- return 1;
+ unsigned long flags;
+ int pin;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+ unsigned int apicid_value;
+
+ apicid_value = cpu_mask_to_apicid(cpumask);
+ /* Prepare to do the io_apic_write */
+ apicid_value = apicid_value << 24;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ for (;;) {
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
}
-__setup("pirq=", ioapic_pirq_setup);
-
-#endif
-
/*
* Find the IRQ entry number of a certain pin.
*/
-static int __init find_irq_entry(int apic, int pin, int type)
+static int find_irq_entry(int apic, int pin, int type)
{
- int i;
+ int i;
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_irqtype == type &&
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
- mp_irqs[i].mpc_dstirq == pin)
- return i;
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_irqtype == type &&
+ (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+ mp_irqs[i].mpc_dstirq == pin)
+ return i;
- return -1;
+ return -1;
}
/*
* Find the pin to which IRQ[irq] (ISA) is connected
*/
-static int __init find_isa_irq_pin(int irq, int type)
+static int find_isa_irq_pin(int irq, int type)
{
- int i;
+ int i;
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
- (mp_irqs[i].mpc_irqtype == type) &&
- (mp_irqs[i].mpc_srcbusirq == irq))
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ ) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
- return mp_irqs[i].mpc_dstirq;
- }
- return -1;
+ return mp_irqs[i].mpc_dstirq;
+ }
+ return -1;
}
/*
@@ -313,57 +274,42 @@ static int __init find_isa_irq_pin(int irq, int type)
*/
static int pin_2_irq(int idx, int apic, int pin);
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come
+ * online so mask in all cases should simply be TARGET_CPUS
+ */
+void __init setup_ioapic_dest(void)
{
- int apic, i, best_guess = -1;
+ int pin, ioapic, irq, irq_entry;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
+ }
- Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if ((mp_bus_id_to_pci_bus==NULL) || (mp_bus_id_to_pci_bus[bus] == -1)) {
- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].mpc_srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
- break;
-
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
- !mp_irqs[i].mpc_irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
+ }
}
/*
* EISA Edge/Level control register, ELCR
*/
-static int __init EISA_ELCR(unsigned int irq)
+static int EISA_ELCR(unsigned int irq)
{
- if (irq < 16) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
+ if (irq < 16) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return 0;
}
/* EISA interrupts are always polarity zero and can be edge or level
@@ -392,497 +338,490 @@ static int __init EISA_ELCR(unsigned int irq)
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) (0)
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx) (0)
+#define default_NEC98_polarity(idx) (0)
+
static int __init MPBIOS_polarity(int idx)
{
- int bus = mp_irqs[idx].mpc_srcbus;
- int polarity;
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int polarity;
- /*
- * Determine IRQ line polarity (high active or low active):
- */
- switch (mp_irqs[idx].mpc_irqflag & 3)
- {
- case 0: /* conforms, ie. bus-type dependent polarity */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- polarity = default_ISA_polarity(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- polarity = default_EISA_polarity(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- polarity = default_PCI_polarity(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- polarity = default_MCA_polarity(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- break;
- }
- case 1: /* high active */
- {
- polarity = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- case 3: /* low active */
- {
- polarity = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- polarity = 1;
- break;
- }
- }
- return polarity;
+ /*
+ * Determine IRQ line polarity (high active or low active):
+ */
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ polarity = default_ISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ polarity = default_EISA_polarity(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ polarity = default_PCI_polarity(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ polarity = default_MCA_polarity(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ polarity = default_NEC98_polarity(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* high active */
+ {
+ polarity = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ case 3: /* low active */
+ {
+ polarity = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ polarity = 1;
+ break;
+ }
+ }
+ return polarity;
}
-static int __init MPBIOS_trigger(int idx)
+static int MPBIOS_trigger(int idx)
{
- int bus = mp_irqs[idx].mpc_srcbus;
- int trigger;
+ int bus = mp_irqs[idx].mpc_srcbus;
+ int trigger;
- /*
- * Determine IRQ trigger mode (edge or level sensitive):
- */
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
- {
- case 0: /* conforms, ie. bus-type dependent */
- {
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- {
- trigger = default_ISA_trigger(idx);
- break;
- }
- case MP_BUS_EISA: /* EISA pin */
- {
- trigger = default_EISA_trigger(idx);
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- trigger = default_PCI_trigger(idx);
- break;
- }
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
- default:
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- }
- break;
- }
- case 1: /* edge */
- {
- trigger = 0;
- break;
- }
- case 2: /* reserved */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 1;
- break;
- }
- case 3: /* level */
- {
- trigger = 1;
- break;
- }
- default: /* invalid */
- {
- printk(KERN_WARNING "broken BIOS!!\n");
- trigger = 0;
- break;
- }
- }
- return trigger;
+ /*
+ * Determine IRQ trigger mode (edge or level sensitive):
+ */
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+ {
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ {
+ trigger = default_ISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_EISA: /* EISA pin */
+ {
+ trigger = default_EISA_trigger(idx);
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ trigger = default_PCI_trigger(idx);
+ break;
+ }
+ case MP_BUS_MCA: /* MCA pin */
+ {
+ trigger = default_MCA_trigger(idx);
+ break;
+ }
+ case MP_BUS_NEC98: /* NEC 98 pin */
+ {
+ trigger = default_NEC98_trigger(idx);
+ break;
+ }
+ default:
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ }
+ break;
+ }
+ case 1: /* edge */
+ {
+ trigger = 0;
+ break;
+ }
+ case 2: /* reserved */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 1;
+ break;
+ }
+ case 3: /* level */
+ {
+ trigger = 1;
+ break;
+ }
+ default: /* invalid */
+ {
+ printk(KERN_WARNING "broken BIOS!!\n");
+ trigger = 0;
+ break;
+ }
+ }
+ return trigger;
}
static inline int irq_polarity(int idx)
{
- return MPBIOS_polarity(idx);
+ return MPBIOS_polarity(idx);
}
static inline int irq_trigger(int idx)
{
- return MPBIOS_trigger(idx);
+ return MPBIOS_trigger(idx);
}
static int pin_2_irq(int idx, int apic, int pin)
{
- int irq, i;
- int bus = mp_irqs[idx].mpc_srcbus;
+ int irq, i;
+ int bus = mp_irqs[idx].mpc_srcbus;
- /*
- * Debugging check, we are in big trouble if this message pops up!
- */
- if (mp_irqs[idx].mpc_dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
-
- switch (mp_bus_id_to_type[bus])
- {
- case MP_BUS_ISA: /* ISA pin */
- case MP_BUS_EISA:
- case MP_BUS_MCA:
- {
- irq = mp_irqs[idx].mpc_srcbusirq;
- break;
- }
- case MP_BUS_PCI: /* PCI pin */
- {
- /*
- * PCI IRQs are mapped in order
- */
- i = irq = 0;
- while (i < apic)
- irq += nr_ioapic_registers[i++];
- irq += pin;
- break;
- }
- default:
- {
- printk(KERN_ERR "unknown bus type %d.\n",bus);
- irq = 0;
- break;
- }
- }
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
- /*
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
- */
- if ((pin >= 16) && (pin <= 23)) {
- if (pirq_entries[pin-16] != -1) {
- if (!pirq_entries[pin-16]) {
- printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
- } else {
- irq = pirq_entries[pin-16];
- printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
- pin-16, irq);
- }
- }
- }
- return irq;
+ switch (mp_bus_id_to_type[bus])
+ {
+ case MP_BUS_ISA: /* ISA pin */
+ case MP_BUS_EISA:
+ case MP_BUS_MCA:
+ case MP_BUS_NEC98:
+ {
+ irq = mp_irqs[idx].mpc_srcbusirq;
+ break;
+ }
+ case MP_BUS_PCI: /* PCI pin */
+ {
+ /*
+ * PCI IRQs are mapped in order
+ */
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
+
+ /*
+ * For MPS mode, so far only needed by ES7000 platform
+ */
+ if (ioapic_renumber_irq)
+ irq = ioapic_renumber_irq(apic, irq);
+
+ break;
+ }
+ default:
+ {
+ printk(KERN_ERR "unknown bus type %d.\n",bus);
+ irq = 0;
+ break;
+ }
+ }
+
+ return irq;
}
static inline int IO_APIC_irq_trigger(int irq)
{
- int apic, idx, pin;
-
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- idx = find_irq_entry(apic,pin,mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
- return irq_trigger(idx);
- }
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
+ int apic, idx, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
+ }
+ /*
+ * nonexistent IRQs are edge default
+ */
+ return 0;
}
-int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS];
-static int __init assign_irq_vector(int irq)
+int assign_irq_vector(int irq)
{
- static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
- if (IO_APIC_VECTOR(irq) > 0)
- return IO_APIC_VECTOR(irq);
-next:
- current_vector += 8;
-
- /* Skip the hypercall vector. */
- if (current_vector == HYPERCALL_VECTOR) goto next;
- /* Skip the Linux/BSD fast-trap vector. */
- if (current_vector == 0x80) goto next;
-
- if (current_vector > FIRST_SYSTEM_VECTOR) {
- offset++;
- current_vector = FIRST_DEVICE_VECTOR + offset;
- }
+ static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+
+ BUG_ON(irq >= NR_IRQ_VECTORS);
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ return IO_APIC_VECTOR(irq);
+ next:
+ current_vector += 8;
+
+ /* Skip the hypercall vector. */
+ if (current_vector == HYPERCALL_VECTOR)
+ goto next;
+
+ /* Skip the Linux/BSD fast-trap vector. */
+ if (current_vector == 0x80)
+ goto next;
+
+ if (current_vector >= FIRST_SYSTEM_VECTOR) {
+ offset++;
+ if (!(offset%8))
+ return -ENOSPC;
+ current_vector = FIRST_DEVICE_VECTOR + offset;
+ }
- if (current_vector == FIRST_SYSTEM_VECTOR)
- panic("ran out of interrupt sources!");
+ vector_irq[current_vector] = irq;
+ if (irq != AUTO_ASSIGN)
+ IO_APIC_VECTOR(irq) = current_vector;
- IO_APIC_VECTOR(irq) = current_vector;
- return current_vector;
+ return current_vector;
}
-extern void (*interrupt[NR_IRQS])(void);
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
-static unsigned int startup_edge_ioapic_irq(unsigned int irq);
-#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq
-#define enable_edge_ioapic_irq unmask_IO_APIC_irq
-static void disable_edge_ioapic_irq (unsigned int irq);
-static void ack_edge_ioapic_irq(unsigned int irq);
-static void end_edge_ioapic_irq (unsigned int i);
-static struct hw_interrupt_type ioapic_edge_irq_type = {
- "IO-APIC-edge",
- startup_edge_ioapic_irq,
- shutdown_edge_ioapic_irq,
- enable_edge_ioapic_irq,
- disable_edge_ioapic_irq,
- ack_edge_ioapic_irq,
- end_edge_ioapic_irq,
- set_ioapic_affinity,
-};
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
-static unsigned int startup_level_ioapic_irq (unsigned int irq);
-#define shutdown_level_ioapic_irq mask_IO_APIC_irq
-#define enable_level_ioapic_irq unmask_IO_APIC_irq
-#define disable_level_ioapic_irq mask_IO_APIC_irq
-static void mask_and_ack_level_ioapic_irq (unsigned int irq);
-static void end_level_ioapic_irq (unsigned int irq);
-static struct hw_interrupt_type ioapic_level_irq_type = {
- "IO-APIC-level",
- startup_level_ioapic_irq,
- shutdown_level_ioapic_irq,
- enable_level_ioapic_irq,
- disable_level_ioapic_irq,
- mask_and_ack_level_ioapic_irq,
- end_level_ioapic_irq,
- set_ioapic_affinity,
-};
+static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[vector].handler = &ioapic_level_type;
+ else
+ irq_desc[vector].handler = &ioapic_edge_type;
+}
void __init setup_IO_APIC_irqs(void)
{
- struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, vector;
- unsigned long flags;
+ struct IO_APIC_route_entry entry;
+ int apic, pin, idx, irq, first_notcon = 1, vector;
+ unsigned long flags;
- printk(KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
- entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest = target_cpus();
-
- idx = find_irq_entry(apic,pin,mp_INT);
- if (idx == -1)
- continue;
-
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- }
-
- irq = pin_2_irq(idx, apic, pin);
- /*
- * skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum
- */
- if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- && (apic != 0) && (irq == 0))
- continue;
- else
- add_pin_to_irq(irq, apic, pin);
-
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
-
- if (IO_APIC_irq_trigger(irq))
- irq_desc[irq].handler = &ioapic_level_irq_type;
- else
- irq_desc[irq].handler = &ioapic_edge_irq_type;
-
- set_intr_gate(vector, interrupt[irq]);
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest =
+ cpu_mask_to_apicid(TARGET_CPUS);
+
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if (idx == -1) {
+ if (first_notcon) {
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
+ " IO-APIC (apicid-pin) %d-%d",
+ mp_ioapics[apic].mpc_apicid,
+ pin);
+ first_notcon = 0;
+ } else
+ apic_printk(APIC_VERBOSE, ", %d-%d",
+ mp_ioapics[apic].mpc_apicid, pin);
+ continue;
+ }
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
+
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
+ }
+
+ irq = pin_2_irq(idx, apic, pin);
+ /*
+ * skip adding the timer int on secondary nodes, which causes
+ * a small but painful rift in the time-space continuum
+ */
+ if (multi_timer_check(apic, irq))
+ continue;
+ else
+ add_pin_to_irq(irq, apic, pin);
+
+ if (!apic && !IO_APIC_IRQ(irq))
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+ vector = assign_irq_vector(irq);
+ entry.vector = vector;
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
- }
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
}
+ }
+
+ if (!first_notcon)
+ apic_printk(APIC_VERBOSE, " not connected.\n");
}
/*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
+ * Set up the 8259A-master output pin:
*/
void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- memset(&entry,0,sizeof(entry));
-
- disable_8259A_irq(0);
-
- /* mask LVT0 */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
-
- /*
- * We use logical delivery to get the timer IRQ
- * to the first CPU.
- */
- entry.dest_mode = (INT_DEST_ADDR_MODE != 0);
- entry.mask = 0; /* unmask IRQ now */
- entry.dest.logical.logical_dest = target_cpus();
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.polarity = 0;
- entry.trigger = 0;
- entry.vector = vector;
-
- /*
- * The timer IRQ doesn't have to know that behind the
- * scene we have a 8259A-master in AEOI mode ...
- */
- irq_desc[0].handler = &ioapic_edge_irq_type;
-
- /*
- * Add it to the IO-APIC irq-routing table:
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- enable_8259A_irq(0);
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+ disable_8259A_irq(0);
+
+ /* mask LVT0 */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+ /*
+ * We use logical delivery to get the timer IRQ
+ * to the first CPU.
+ */
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* unmask IRQ now */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.polarity = 0;
+ entry.trigger = 0;
+ entry.vector = vector;
+
+ /*
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+ irq_desc[IO_APIC_VECTOR(0)].handler = &ioapic_edge_type;
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ enable_8259A_irq(0);
}
-void __init UNEXPECTED_IO_APIC(void)
+static inline void UNEXPECTED_IO_APIC(void)
{
- printk(KERN_WARNING
- "An unexpected IO-APIC was found. If this kernel release is less than\n"
- "three months old please report this to linux-smp@vger.kernel.org\n");
}
void __init print_IO_APIC(void)
{
-#ifdef VERBOSE
- int apic, i;
- struct IO_APIC_reg_00 reg_00;
- struct IO_APIC_reg_01 reg_01;
- struct IO_APIC_reg_02 reg_02;
- struct IO_APIC_reg_03 reg_03;
- unsigned long flags;
-
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (i = 0; i < nr_ioapics; i++)
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+ int apic, i;
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+ union IO_APIC_reg_03 reg_03;
+ unsigned long flags;
- /*
- * We are a bit conservative about what we expect. We have to
- * know about every hardware change ASAP.
- */
- printk(KERN_INFO "testing the IO APIC.......................\n");
+ if (apic_verbosity == APIC_QUIET)
+ return;
+
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+ for (i = 0; i < nr_ioapics; i++)
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+ mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
- for (apic = 0; apic < nr_ioapics; apic++) {
+ /*
+ * We are a bit conservative about what we expect. We have to
+ * know about every hardware change ASAP.
+ */
+ printk(KERN_INFO "testing the IO APIC.......................\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_00 = io_apic_read(apic, 0);
- *(int *)&reg_01 = io_apic_read(apic, 1);
- if (reg_01.version >= 0x10)
- *(int *)&reg_02 = io_apic_read(apic, 2);
- if (reg_01.version >= 0x20)
- *(int *)&reg_03 = io_apic_read(apic, 3);
+ reg_00.raw = io_apic_read(apic, 0);
+ reg_01.raw = io_apic_read(apic, 1);
+ if (reg_01.bits.version >= 0x10)
+ reg_02.raw = io_apic_read(apic, 2);
+ if (reg_01.bits.version >= 0x20)
+ reg_03.raw = io_apic_read(apic, 3);
spin_unlock_irqrestore(&ioapic_lock, flags);
- printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
- printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID);
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.delivery_type);
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.LTS);
- if (reg_00.__reserved_0 || reg_00.__reserved_1 || reg_00.__reserved_2)
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries);
- if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */
- (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
- (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
- (reg_01.entries != 0x1f) && /* dual Xeon boards */
- (reg_01.entries != 0x22) && /* bigger Xeon boards */
- (reg_01.entries != 0x2E) &&
- (reg_01.entries != 0x3F)
- )
- UNEXPECTED_IO_APIC();
-
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ);
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version);
- if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */
- (reg_01.version != 0x02) && /* VIA */
- (reg_01.version != 0x03) && /* later VIA */
- (reg_01.version != 0x10) && /* oldest IO-APICs */
- (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
- (reg_01.version != 0x13) && /* Xeon IO-APICs */
- (reg_01.version != 0x20) /* Intel P64H (82806 AA) */
- )
- UNEXPECTED_IO_APIC();
- if (reg_01.__reserved_1 || reg_01.__reserved_2)
- UNEXPECTED_IO_APIC();
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
+ if (reg_00.bits.ID >= get_physical_broadcast())
+ UNEXPECTED_IO_APIC();
+ if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
+ if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+ (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+ (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+ (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+ (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+ (reg_01.bits.entries != 0x2E) &&
+ (reg_01.bits.entries != 0x3F)
+ )
+ UNEXPECTED_IO_APIC();
+
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
+ if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+ (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+ (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+ (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
+ )
+ UNEXPECTED_IO_APIC();
+ if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
/*
* Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
* but the value of reg_02 is read as the previous read register
* value, so ignore it if reg_02 == reg_01.
*/
- if (reg_01.version >= 0x10 && *(int *)&reg_02 != *(int *)&reg_01) {
- printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration);
- if (reg_02.__reserved_1 || reg_02.__reserved_2)
- UNEXPECTED_IO_APIC();
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
+ if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+ UNEXPECTED_IO_APIC();
}
/*
@@ -890,232 +829,91 @@ void __init print_IO_APIC(void)
* or reg_03, but the value of reg_0[23] is read as the previous read
* register value, so ignore it if reg_03 == reg_0[12].
*/
- if (reg_01.version >= 0x20 && *(int *)&reg_03 != *(int *)&reg_02 &&
- *(int *)&reg_03 != *(int *)&reg_01) {
- printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)&reg_03);
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT);
- if (reg_03.__reserved_1)
- UNEXPECTED_IO_APIC();
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+ reg_03.raw != reg_01.raw) {
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
+ if (reg_03.bits.__reserved_1)
+ UNEXPECTED_IO_APIC();
}
printk(KERN_DEBUG ".... IRQ redirection table:\n");
printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
- " Stat Dest Deli Vect: \n");
+ " Stat Dest Deli Vect: \n");
- for (i = 0; i <= reg_01.entries; i++) {
- struct IO_APIC_route_entry entry;
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- printk(KERN_DEBUG " %02x %03X %02X ",
- i,
- entry.dest.logical.logical_dest,
- entry.dest.physical.physical_dest
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+ entry.dest.logical.logical_dest,
+ entry.dest.physical.physical_dest
);
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector
);
}
- }
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
- struct irq_pin_list *entry = irq_2_pin + i;
- if (entry->pin < 0)
- continue;
- printk(KERN_DEBUG "IRQ%d ", i);
- for (;;) {
- printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
- printk("\n");
- }
-
- printk(KERN_INFO ".................................... done.\n");
-#endif
-}
-
-
-#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
-
-static void print_APIC_bitfield (int base)
-{
- unsigned int v;
- int i, j;
-
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
-}
-
-
-void /*__init*/ print_local_APIC(void * dummy)
-{
- unsigned int v, ver, maxlvt;
-
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
- smp_processor_id(), hard_smp_processor_id());
- v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
- v = apic_read(APIC_LVR);
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
- ver = GET_APIC_VERSION(v);
- maxlvt = get_maxlvt();
-
- v = apic_read(APIC_TASKPRI);
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- v = apic_read(APIC_ARBPRI);
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
- v & APIC_ARBPRI_MASK);
- v = apic_read(APIC_PROCPRI);
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
- }
-
- v = apic_read(APIC_EOI);
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
- v = apic_read(APIC_RRR);
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
- v = apic_read(APIC_LDR);
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
- v = apic_read(APIC_DFR);
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
- v = apic_read(APIC_SPIV);
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
- printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
- printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
- printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
-
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- v = apic_read(APIC_ESR);
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- }
-
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
-
- v = apic_read(APIC_LVTT);
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
- if (maxlvt > 3) { /* PC is LVT#4. */
- v = apic_read(APIC_LVTPC);
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
- }
- v = apic_read(APIC_LVT0);
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
- v = apic_read(APIC_LVT1);
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
- if (maxlvt > 2) { /* ERR is LVT#3. */
- v = apic_read(APIC_LVTERR);
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
- }
-
- v = apic_read(APIC_TMICT);
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
- v = apic_read(APIC_TMCCT);
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
- v = apic_read(APIC_TDCR);
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
- printk("\n");
-}
-
-void print_all_local_APICs (void)
-{
- smp_call_function(print_local_APIC, NULL, 1, 1);
- print_local_APIC(NULL);
-}
-
-void /*__init*/ print_PIC(void)
-{
- extern spinlock_t i8259A_lock;
- unsigned int v, flags;
-
- printk(KERN_DEBUG "\nprinting PIC contents\n");
-
- spin_lock_irqsave(&i8259A_lock, flags);
-
- v = inb(0xa1) << 8 | inb(0x21);
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
-
- v = inb(0xa0) << 8 | inb(0x20);
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
-
- outb(0x0b,0xa0);
- outb(0x0b,0x20);
- v = inb(0xa0) << 8 | inb(0x20);
- outb(0x0a,0xa0);
- outb(0x0a,0x20);
-
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ }
+ printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+ printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ printk("\n");
+ }
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
+ printk(KERN_INFO ".................................... done.\n");
- v = inb(0x4d1) << 8 | inb(0x4d0);
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+ return;
}
-#endif /* 0 */
-
-
static void __init enable_IO_APIC(void)
{
- struct IO_APIC_reg_01 reg_01;
- int i;
- unsigned long flags;
+ union IO_APIC_reg_01 reg_01;
+ int i;
+ unsigned long flags;
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
- if (!pirqs_enabled)
- for (i = 0; i < MAX_PIRQS; i++)
- pirq_entries[i] = -1;
+ for (i = 0; i < PIN_MAP_SIZE; i++) {
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (i = 0; i < nr_ioapics; i++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_01 = io_apic_read(i, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[i] = reg_01.entries+1;
- }
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (i = 0; i < nr_ioapics; i++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(i, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[i] = reg_01.bits.entries+1;
+ }
- /*
- * Do not trust the IO-APIC being empty at bootup
- */
- clear_IO_APIC();
+ /*
+ * Do not trust the IO-APIC being empty at bootup
+ */
+ clear_IO_APIC();
}
/*
@@ -1123,12 +921,12 @@ static void __init enable_IO_APIC(void)
*/
void disable_IO_APIC(void)
{
- /*
- * Clear the IO-APIC before rebooting:
+ /*
+ * Clear the IO-APIC before rebooting:
*/
- clear_IO_APIC();
+ clear_IO_APIC();
- disconnect_bsp_APIC();
+ disconnect_bsp_APIC();
}
/*
@@ -1138,97 +936,112 @@ void disable_IO_APIC(void)
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
-static void __init setup_ioapic_ids_from_mpc (void)
+#ifndef CONFIG_X86_NUMAQ
+static void __init setup_ioapic_ids_from_mpc(void)
{
- struct IO_APIC_reg_00 reg_00;
- unsigned long phys_id_present_map = phys_cpu_present_map;
- int apic;
- int i;
- unsigned char old_id;
- unsigned long flags;
-
- if (clustered_apic_mode)
- /* We don't have a good way to do this yet - hack */
- phys_id_present_map = (u_long) 0xf;
- /*
- * Set the IOAPIC ID to the value stored in the MPC table.
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
-
- /* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_00 = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ union IO_APIC_reg_00 reg_00;
+ physid_mask_t phys_id_present_map;
+ int apic;
+ int i;
+ unsigned char old_id;
+ unsigned long flags;
+
+ /*
+ * This is broken; anything with a real cpu count has to
+ * circumvent this idiocy regardless.
+ */
+ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ /*
+ * Set the IOAPIC ID to the value stored in the MPC table.
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ /* Read the register 0 value */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- old_id = mp_ioapics[apic].mpc_apicid;
-
- if (mp_ioapics[apic].mpc_apicid >= apic_broadcast_id) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- reg_00.ID);
- mp_ioapics[apic].mpc_apicid = reg_00.ID;
- }
-
- /*
- * Sanity check, is the ID really free? Every APIC in a
- * system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- * I/O APIC IDs no longer have any meaning for xAPICs and SAPICs.
- */
- if ((clustered_apic_mode != CLUSTERED_APIC_XAPIC) &&
- (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid))) {
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
- apic, mp_ioapics[apic].mpc_apicid);
- for (i = 0; i < 0xf; i++)
- if (!(phys_id_present_map & (1 << i)))
- break;
- if (i >= apic_broadcast_id)
- panic("Max APIC ID exceeded!\n");
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
- i);
- phys_id_present_map |= 1 << i;
- mp_ioapics[apic].mpc_apicid = i;
- } else {
- printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
- phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
- }
-
-
- /*
- * We need to adjust the IRQ routing table
- * if the ID changed.
- */
- if (old_id != mp_ioapics[apic].mpc_apicid)
- for (i = 0; i < mp_irq_entries; i++)
- if (mp_irqs[i].mpc_dstapic == old_id)
- mp_irqs[i].mpc_dstapic
- = mp_ioapics[apic].mpc_apicid;
-
- /*
- * Read the right value from the MPC table and
- * write it into the ID register.
- */
- printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
- mp_ioapics[apic].mpc_apicid);
-
- reg_00.ID = mp_ioapics[apic].mpc_apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0, *(int *)&reg_00);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- /*
- * Sanity check
- */
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_00 = io_apic_read(apic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
- panic("could not set ID!\n");
- else
- printk(" ok.\n");
- }
+ old_id = mp_ioapics[apic].mpc_apicid;
+
+ if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ reg_00.bits.ID);
+ mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+ }
+
+ /* Don't check I/O APIC IDs for some xAPIC systems. They have
+ * no meaning without the serial APIC bus. */
+ if (NO_IOAPIC_CHECK)
+ continue;
+ /*
+ * Sanity check, is the ID really free? Every APIC in a
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(phys_id_present_map,
+ mp_ioapics[apic].mpc_apicid)) {
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+ apic, mp_ioapics[apic].mpc_apicid);
+ for (i = 0; i < get_physical_broadcast(); i++)
+ if (!physid_isset(i, phys_id_present_map))
+ break;
+ if (i >= get_physical_broadcast())
+ panic("Max APIC ID exceeded!\n");
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+ i);
+ physid_set(i, phys_id_present_map);
+ mp_ioapics[apic].mpc_apicid = i;
+ } else {
+ physid_mask_t tmp;
+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
+ "phys_id_present_map\n",
+ mp_ioapics[apic].mpc_apicid);
+ physids_or(phys_id_present_map, phys_id_present_map, tmp);
+ }
+
+
+ /*
+ * We need to adjust the IRQ routing table
+ * if the ID changed.
+ */
+ if (old_id != mp_ioapics[apic].mpc_apicid)
+ for (i = 0; i < mp_irq_entries; i++)
+ if (mp_irqs[i].mpc_dstapic == old_id)
+ mp_irqs[i].mpc_dstapic
+ = mp_ioapics[apic].mpc_apicid;
+
+ /*
+ * Read the right value from the MPC table and
+ * write it into the ID register.
+ */
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "...changing IO-APIC physical APIC ID to %d ...",
+ mp_ioapics[apic].mpc_apicid);
+
+ reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apic, 0, reg_00.raw);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ /*
+ * Sanity check
+ */
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(apic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+ printk("could not set ID!\n");
+ else
+ apic_printk(APIC_VERBOSE, " ok.\n");
+ }
}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
/*
* There is a nasty bug in some older SMP boards, their mptable lies
@@ -1240,26 +1053,37 @@ static void __init setup_ioapic_ids_from_mpc (void)
*/
static int __init timer_irq_works(void)
{
- unsigned int t1 = jiffies;
+ unsigned long t1 = jiffies;
+
+ local_irq_enable();
+ /* Let ten ticks pass... */
+ mdelay((10 * 1000) / HZ);
+
+ /*
+ * Expect a few ticks at least, to be sure some possible
+ * glue logic does not lock up after one or two first
+ * ticks in a non-ExtINT mode. Also the local APIC
+ * might have cached one ExtINT interrupt. Finally, at
+ * least one tick may be lost due to delays.
+ */
+ if (jiffies - t1 > 4)
+ return 1;
- __sti();
- /* Let ten ticks pass... */
- mdelay((10 * 1000) / HZ);
-
- /*
- * Expect a few ticks at least, to be sure some possible
- * glue logic does not lock up after one or two first
- * ticks in a non-ExtINT mode. Also the local APIC
- * might have cached one ExtINT interrupt. Finally, at
- * least one tick may be lost due to delays.
- */
- if (jiffies - t1 > 4)
- return 1;
-
- return 0;
+ return 0;
}
-static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
/*
* Starting up a edge-triggered IO-APIC interrupt is
@@ -1270,22 +1094,21 @@ static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
* This is not complete - we should be able to fake
* an edge even if it isn't on the 8259A...
*/
-
static unsigned int startup_edge_ioapic_irq(unsigned int irq)
{
- int was_pending = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
- was_pending = 1;
- }
- __unmask_IO_APIC_irq(irq);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < 16) {
+ disable_8259A_irq(irq);
+ if (i8259A_irq_pending(irq))
+ was_pending = 1;
+ }
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- return was_pending;
+ return was_pending;
}
/*
@@ -1295,16 +1118,12 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
*/
static void ack_edge_ioapic_irq(unsigned int irq)
{
- balance_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
+ if ((irq_desc[IO_APIC_VECTOR(irq)].status & (IRQ_PENDING | IRQ_DISABLED))
+ == (IRQ_PENDING | IRQ_DISABLED))
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
}
-static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
-
-
/*
* Level triggered interrupts can just be masked,
* and shutting down and starting up the interrupt
@@ -1321,20 +1140,17 @@ static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
*/
static unsigned int startup_level_ioapic_irq (unsigned int irq)
{
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq(irq);
- return 0; /* don't check for pending */
+ return 0; /* don't check for pending */
}
-static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+static void mask_and_ack_level_ioapic_irq (unsigned int irq)
{
- unsigned long v;
- int i;
-
- balance_irq(irq);
-
- mask_IO_APIC_irq(irq);
+ unsigned long v;
+ int i;
+ mask_IO_APIC_irq(irq);
/*
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1354,108 +1170,152 @@ static void mask_and_ack_level_ioapic_irq(unsigned int irq)
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
- i = IO_APIC_VECTOR(irq);
- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+ i = IO_APIC_VECTOR(irq);
- ack_APIC_irq();
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
- if (!(v & (1 << (i & 0x1f)))) {
-#ifdef APIC_LOCKUP_DEBUG
- struct irq_pin_list *entry;
-#endif
+ ack_APIC_irq();
-#ifdef APIC_MISMATCH_DEBUG
- atomic_inc(&irq_mis_count);
-#endif
- spin_lock(&ioapic_lock);
- __edge_IO_APIC_irq(irq);
-#ifdef APIC_LOCKUP_DEBUG
- for (entry = irq_2_pin + irq;;) {
- unsigned int reg;
-
- if (entry->pin == -1)
- break;
- reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
- if (reg & 0x00004000)
- printk(KERN_CRIT "Aieee!!! Remote IRR"
- " still set after unlock!\n");
- if (!entry->next)
- break;
- entry = irq_2_pin + entry->next;
- }
-#endif
- __level_IO_APIC_irq(irq);
- spin_unlock(&ioapic_lock);
- }
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+ spin_lock(&ioapic_lock);
+ __edge_IO_APIC_irq(irq);
+ __level_IO_APIC_irq(irq);
+ spin_unlock(&ioapic_lock);
+ }
}
-static void end_level_ioapic_irq(unsigned int irq)
+static void end_level_ioapic_irq (unsigned int irq)
{
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq(irq);
}
-static inline void init_IO_APIC_traps(void)
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
{
- int irq;
+ int irq = vector_to_irq(vector);
+ return startup_edge_ioapic_irq(irq);
+}
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
- for (irq = 0; irq < NR_IRQS ; irq++) {
- if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
- /*
- * Hmm.. We don't have an entry for this,
- * so default to an old-fashioned 8259
- * interrupt if we can..
- */
- if (irq < 16)
- make_8259A_irq(irq);
- else
- /* Strange. Oh, well.. */
- irq_desc[irq].handler = &no_irq_type;
- }
- }
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ return startup_level_ioapic_irq (irq);
+}
+
+static void mask_and_ack_level_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ mask_and_ack_level_ioapic_irq(irq);
+}
+
+static void end_level_ioapic_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector(unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+ unmask_IO_APIC_irq(irq);
+}
+
+static void set_ioapic_affinity_vector(
+ unsigned int vector, cpumask_t cpu_mask)
+{
+ int irq = vector_to_irq(vector);
+ set_ioapic_affinity_irq(irq, cpu_mask);
+}
+
+static void disable_edge_ioapic_vector(unsigned int vector)
+{
+}
+
+static void end_edge_ioapic_vector(unsigned int vector)
+{
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type = {
+ .typename = "IO-APIC-edge",
+ .startup = startup_edge_ioapic_vector,
+ .shutdown = disable_edge_ioapic_vector,
+ .enable = unmask_IO_APIC_vector,
+ .disable = disable_edge_ioapic_vector,
+ .ack = ack_edge_ioapic_vector,
+ .end = end_edge_ioapic_vector,
+ .set_affinity = set_ioapic_affinity_vector,
+};
+
+static struct hw_interrupt_type ioapic_level_type = {
+ .typename = "IO-APIC-level",
+ .startup = startup_level_ioapic_vector,
+ .shutdown = mask_IO_APIC_vector,
+ .enable = unmask_IO_APIC_vector,
+ .disable = mask_IO_APIC_vector,
+ .ack = mask_and_ack_level_ioapic_vector,
+ .end = end_level_ioapic_vector,
+ .set_affinity = set_ioapic_affinity_vector,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+ int irq;
+ for (irq = 0; irq < 16 ; irq++)
+ if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq))
+ make_8259A_irq(irq);
}
-static void enable_lapic_irq (unsigned int irq)
+static void enable_lapic_vector(unsigned int vector)
{
- unsigned long v;
+ unsigned long v;
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void disable_lapic_irq (unsigned int irq)
+static void disable_lapic_vector(unsigned int vector)
{
- unsigned long v;
+ unsigned long v;
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
}
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_vector(unsigned int vector)
{
- ack_APIC_irq();
+ ack_APIC_irq();
}
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
+static void end_lapic_vector(unsigned int vector) { /* nothing */ }
static struct hw_interrupt_type lapic_irq_type = {
- "local-APIC-edge",
- NULL, /* startup_irq() not used for IRQ0 */
- NULL, /* shutdown_irq() not used for IRQ0 */
- enable_lapic_irq,
- disable_lapic_irq,
- ack_lapic_irq,
- end_lapic_irq
+ .typename = "local-APIC-edge",
+ .startup = NULL, /* startup_irq() not used for IRQ0 */
+ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+ .enable = enable_lapic_vector,
+ .disable = disable_lapic_vector,
+ .ack = ack_lapic_vector,
+ .end = end_lapic_vector
};
/*
@@ -1467,57 +1327,57 @@ static struct hw_interrupt_type lapic_irq_type = {
*/
static inline void unlock_ExtINT_logic(void)
{
- int pin, i;
- struct IO_APIC_route_entry entry0, entry1;
- unsigned char save_control, save_freq_select;
- unsigned long flags;
-
- pin = find_isa_irq_pin(8, mp_INT);
- if (pin == -1)
- return;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(0, pin);
-
- memset(&entry1, 0, sizeof(entry1));
-
- entry1.dest_mode = 0; /* physical delivery */
- entry1.mask = 0; /* unmask IRQ now */
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = 0;
- entry1.vector = 0;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- save_control = CMOS_READ(RTC_CONTROL);
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
- RTC_FREQ_SELECT);
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
-
- i = 100;
- while (i-- > 0) {
- mdelay(10);
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
- i -= 10;
- }
+ int pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+ unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ if (pin == -1)
+ return;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ clear_IO_APIC_pin(0, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+
+ entry1.dest_mode = 0; /* physical delivery */
+ entry1.mask = 0; /* unmask IRQ now */
+ entry1.dest.physical.physical_dest = hard_smp_processor_id();
+ entry1.delivery_mode = dest_ExtINT;
+ entry1.polarity = entry0.polarity;
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+ RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+ i = 100;
+ while (i-- > 0) {
+ mdelay(10);
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+ i -= 10;
+ }
- CMOS_WRITE(save_control, RTC_CONTROL);
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(0, pin);
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(0, pin);
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
}
/*
@@ -1528,416 +1388,386 @@ static inline void unlock_ExtINT_logic(void)
*/
static inline void check_timer(void)
{
- extern int timer_ack;
- int pin1, pin2;
- int vector;
+ int pin1, pin2;
+ int vector;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+ vector = assign_irq_vector(0);
+
+ irq_desc[IO_APIC_VECTOR(0)].action = irq_desc[LEGACY_VECTOR(0)].action;
+ irq_desc[IO_APIC_VECTOR(0)].depth = 0;
+ irq_desc[IO_APIC_VECTOR(0)].status &= ~IRQ_DISABLED;
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+ * mode for the 8259A whenever interrupts are routed
+ * through I/O APICs. Also IRQ0 has to be enabled in
+ * the 8259A which implies the virtual wire has to be
+ * disabled in the local APIC.
+ */
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+ init_8259A(1);
+ timer_ack = 1;
+ enable_8259A_irq(0);
+
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+ printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+ if (pin1 != -1) {
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ unmask_IO_APIC_irq(0);
+ if (timer_irq_works()) {
+ return;
+ }
+ clear_IO_APIC_pin(0, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ }
- /*
- * get/set the timer IRQ vector:
- */
- disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
+ printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+ if (pin2 != -1) {
+ printk("\n..... (found pin %d) ...", pin2);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_IRQ0_pin(pin2, vector);
+ if (timer_irq_works()) {
+ printk("works.\n");
+ if (pin1 != -1)
+ replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ else
+ add_pin_to_irq(0, 0, pin2);
+ return;
+ }
+ /*
+ * Cleanup, just in case ...
+ */
+ clear_IO_APIC_pin(0, pin2);
+ }
+ printk(" failed.\n");
- /*
- * Subtle, code in do_timer_interrupt() expects an AEOI
- * mode for the 8259A whenever interrupts are routed
- * through I/O APICs. Also IRQ0 has to be enabled in
- * the 8259A which implies the virtual wire has to be
- * disabled in the local APIC.
- */
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
- timer_ack = 1;
- enable_8259A_irq(0);
-
- pin1 = find_isa_irq_pin(0, mp_INT);
- pin2 = find_isa_irq_pin(0, mp_ExtINT);
-
- printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
-
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (timer_irq_works())
- return;
- clear_IO_APIC_pin(0, pin1);
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
- }
+ if (nmi_watchdog == NMI_IO_APIC) {
+ printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
+ nmi_watchdog = 0;
+ }
- printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
- if (pin2 != -1) {
- printk("\n..... (found pin %d) ...", pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
- setup_ExtINT_IRQ0_pin(pin2, vector);
- if (timer_irq_works()) {
- printk("works.\n");
- if (pin1 != -1)
- replace_pin_at_irq(0, 0, pin1, 0, pin2);
- else
- add_pin_to_irq(0, 0, pin2);
- return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(0, pin2);
- }
- printk(" failed.\n");
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+ disable_8259A_irq(0);
+ irq_desc[vector].handler = &lapic_irq_type;
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
- disable_8259A_irq(0);
- irq_desc[0].handler = &lapic_irq_type;
- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
- enable_8259A_irq(0);
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+ printk(" failed.\n");
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
- printk(" failed.\n");
+ printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+ timer_ack = 0;
+ init_8259A(0);
+ make_8259A_irq(0);
+ apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
- init_8259A(0);
- make_8259A_irq(0);
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+ unlock_ExtINT_logic();
- unlock_ExtINT_logic();
+ if (timer_irq_works()) {
+ printk(" works.\n");
+ return;
+ }
+ printk(" failed :(.\n");
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
+ "report. Then try booting with the 'noapic' option");
+}
- if (timer_irq_works()) {
- printk(" works.\n");
- return;
- }
- printk(" failed :(.\n");
- panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+#define NR_IOAPIC_BIOSIDS 256
+static u8 ioapic_biosid_to_apic_enum[NR_IOAPIC_BIOSIDS];
+static void store_ioapic_biosid_mapping(void)
+{
+ u8 apic;
+ memset(ioapic_biosid_to_apic_enum, ~0, NR_IOAPIC_BIOSIDS);
+ for ( apic = 0; apic < nr_ioapics; apic++ )
+ ioapic_biosid_to_apic_enum[mp_ioapics[apic].mpc_apicid] = apic;
}
/*
*
- * IRQ's that are handled by the old PIC in all cases:
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
* - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
* Linux doesn't really care, as it's not actually used
* for any interrupt handling anyway.
- * - There used to be IRQ13 here as well, but all
- * MPS-compliant must not use it for FPU coupling and we
- * want to use exception 16 anyway. And there are
- * systems who connect it to an I/O APIC for other uses.
- * Thus we don't mark it special any longer.
- *
- * Additionally, something is definitely wrong with irq9
- * on PIIX4 boards.
*/
-#define PIC_IRQS (1<<2)
+#define PIC_IRQS (1 << PIC_CASCADE_IR)
void __init setup_IO_APIC(void)
{
- enable_IO_APIC();
-
- io_apic_irqs = ~PIC_IRQS;
- printk("ENABLING IO-APIC IRQs\n");
-
- /*
- * Set up IO-APIC IRQ routing.
- */
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
- sync_Arb_IDs();
- setup_IO_APIC_irqs();
- init_IO_APIC_traps();
- check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
+ store_ioapic_biosid_mapping();
+
+ enable_IO_APIC();
+
+ if (acpi_ioapic)
+ io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
+ else
+ io_apic_irqs = ~PIC_IRQS;
+
+ printk("ENABLING IO-APIC IRQs\n");
+
+ /*
+ * Set up IO-APIC IRQ routing.
+ */
+ if (!acpi_ioapic)
+ setup_ioapic_ids_from_mpc();
+ sync_Arb_IDs();
+ setup_IO_APIC_irqs();
+ init_IO_APIC_traps();
+ check_timer();
+ print_IO_APIC();
}
-#endif /* CONFIG_X86_IO_APIC */
-
-
-
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
#ifdef CONFIG_ACPI_BOOT
-#define IO_APIC_MAX_ID 15
-
int __init io_apic_get_unique_id (int ioapic, int apic_id)
{
- struct IO_APIC_reg_00 reg_00;
- static unsigned long apic_id_map = 0;
- unsigned long flags;
- int i = 0;
-
- /*
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
- * supports up to 16 on one shared APIC bus.
- *
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
- * advantage of new APIC bus architecture.
- */
-
- if (!apic_id_map)
- apic_id_map = phys_cpu_present_map;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_00 = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-
- if (apic_id >= IO_APIC_MAX_ID) {
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
- "%d\n", ioapic, apic_id, reg_00.ID);
- apic_id = reg_00.ID;
- }
+ union IO_APIC_reg_00 reg_00;
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+ physid_mask_t tmp;
+ unsigned long flags;
+ int i = 0;
+
+ /*
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
+ * supports up to 16 on one shared APIC bus.
+ *
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+ * advantage of new APIC bus architecture.
+ */
+
+ if (physids_empty(apic_id_map))
+ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ if (apic_id >= get_physical_broadcast()) {
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
+ apic_id = reg_00.bits.ID;
+ }
- /* XAPICs do not need unique IDs */
- if (clustered_apic_mode == CLUSTERED_APIC_XAPIC){
- printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n",
- ioapic, apic_id);
- return apic_id;
- }
+ /*
+ * Every APIC in a system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (check_apicid_used(apic_id_map, apic_id)) {
- /*
- * Every APIC in a system must have a unique ID or we get lots of nice
- * 'stuck on smp_invalidate_needed IPI wait' messages.
- */
- if (apic_id_map & (1 << apic_id)) {
-
- for (i = 0; i < IO_APIC_MAX_ID; i++) {
- if (!(apic_id_map & (1 << i)))
- break;
- }
+ for (i = 0; i < get_physical_broadcast(); i++) {
+ if (!check_apicid_used(apic_id_map, i))
+ break;
+ }
- if (i == IO_APIC_MAX_ID)
- panic("Max apic_id exceeded!\n");
+ if (i == get_physical_broadcast())
+ panic("Max apic_id exceeded!\n");
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
- "trying %d\n", ioapic, apic_id, i);
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+ "trying %d\n", ioapic, apic_id, i);
- apic_id = i;
- }
+ apic_id = i;
+ }
- apic_id_map |= (1 << apic_id);
+ tmp = apicid_to_cpu_present(apic_id);
+ physids_or(apic_id_map, apic_id_map, tmp);
- if (reg_00.ID != apic_id) {
- reg_00.ID = apic_id;
+ if (reg_00.bits.ID != apic_id) {
+ reg_00.bits.ID = apic_id;
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0, *(int *)&reg_00);
- *(int *)&reg_00 = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0, reg_00.raw);
+ reg_00.raw = io_apic_read(ioapic, 0);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- /* Sanity check */
- if (reg_00.ID != apic_id)
- panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
- }
+ /* Sanity check */
+ if (reg_00.bits.ID != apic_id)
+ panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+ }
- printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
- return apic_id;
+ return apic_id;
}
int __init io_apic_get_version (int ioapic)
{
- struct IO_APIC_reg_01 reg_01;
- unsigned long flags;
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_01 = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- return reg_01.version;
+ return reg_01.bits.version;
}
int __init io_apic_get_redir_entries (int ioapic)
{
- struct IO_APIC_reg_01 reg_01;
- unsigned long flags;
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
- *(int *)&reg_01 = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(ioapic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- return reg_01.entries;
+ return reg_01.bits.entries;
}
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
{
- struct IO_APIC_route_entry entry;
- unsigned long flags;
-
- if (!IO_APIC_IRQ(irq)) {
- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0/n",
- ioapic);
- return -EINVAL;
- }
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
- /*
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
- * Note that we mask (disable) IRQs now -- these get enabled when the
- * corresponding device driver registers for this IRQ.
- */
+ if (!IO_APIC_IRQ(irq)) {
+ printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
- memset(&entry,0,sizeof(entry));
+ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+ */
- entry.delivery_mode = dest_LowestPrio;
- entry.dest_mode = INT_DELIVERY_MODE;
- entry.dest.logical.logical_dest = target_cpus();
- entry.mask = 1; /* Disabled (masked) */
- entry.trigger = edge_level;
- entry.polarity = active_high_low;
+ memset(&entry,0,sizeof(entry));
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+ entry.mask = 1;
- entry.vector = assign_irq_vector(irq);
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= 16)
+ add_pin_to_irq(irq, ioapic, pin);
- printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
- "IRQ %d Mode:%i Active:%i)\n", ioapic,
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
+ entry.vector = assign_irq_vector(irq);
- if (edge_level) {
- irq_desc[irq].handler = &ioapic_level_irq_type;
- } else {
- irq_desc[irq].handler = &ioapic_edge_irq_type;
- }
+ apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+ "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+ mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+ edge_level, active_high_low);
- set_intr_gate(entry.vector, interrupt[irq]);
+ ioapic_register_intr(irq, entry.vector, edge_level);
- if (!ioapic && (irq < 16))
- disable_8259A_irq(irq);
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
- spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- return 0;
+ return 0;
}
#endif /*CONFIG_ACPI_BOOT*/
-/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
-/* level- or edge-triggered. */
-/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
-static char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
-string_param("leveltrigger", opt_leveltrigger);
-string_param("edgetrigger", opt_edgetrigger);
-static int __init ioapic_trigger_setup(void)
+int ioapic_guest_read(int apicid, int address, u32 *pval)
{
- char *p;
- irq_desc_t *desc;
- long irq;
+ u32 val;
+ int apicenum;
+ union IO_APIC_reg_00 reg_00;
+ unsigned long flags;
- p = opt_leveltrigger;
- while ( *p != '\0' )
- {
- irq = simple_strtol(p, &p, 10);
- if ( (irq <= 0) || (irq >= NR_IRQS) )
- {
- printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
- irq, opt_leveltrigger);
- break;
- }
+ if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
+ ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
+ return -EINVAL;
- printk("Forcing IRQ %ld to level-trigger: ", irq);
+ spin_lock_irqsave(&ioapic_lock, flags);
+ val = io_apic_read(apicenum, address);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
- desc = &irq_desc[irq];
- spin_lock_irq(&desc->lock);
+ /* Rewrite APIC ID to what the BIOS originally specified. */
+ if ( address == 0 )
+ {
+ reg_00.raw = val;
+ reg_00.bits.ID = apicid;
+ val = reg_00.raw;
+ }
- if ( desc->handler == &ioapic_level_irq_type )
- {
- printk("already level-triggered (no force applied).\n");
- }
- else if ( desc->handler != &ioapic_edge_irq_type )
- {
- printk("cannot force (can only force IO-APIC-edge IRQs).\n");
- }
- else
- {
- desc->handler = &ioapic_level_irq_type;
- __mask_IO_APIC_irq(irq);
- __level_IO_APIC_irq(irq);
- printk("done.\n");
- }
+ *pval = val;
+ return 0;
+}
- spin_unlock_irq(&desc->lock);
+int ioapic_guest_write(int apicid, int address, u32 val)
+{
+ int apicenum, pin, irq;
+ struct IO_APIC_route_entry rte = { 0 };
+ struct irq_pin_list *entry;
+ unsigned long flags;
- if ( *p == '\0' )
- break;
+ if ( (apicid >= NR_IOAPIC_BIOSIDS) ||
+ ((apicenum = ioapic_biosid_to_apic_enum[apicid]) >= nr_ioapics) )
+ return -EINVAL;
- if ( *p != ',' )
- {
- printk("Unexpected character '%c' in level-trigger list '%s'\n",
- *p, opt_leveltrigger);
- break;
- }
+ /* Only write to the first half of a route entry. */
+ if ( (address < 0x10) || (address & 1) )
+ return 0;
+
+ pin = (address - 0x10) >> 1;
- p++;
- }
+ rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ *(int *)&rte = val;
- p = opt_edgetrigger;
- while ( *p != '\0' )
+ if ( rte.vector >= FIRST_DEVICE_VECTOR )
{
- irq = simple_strtol(p, &p, 10);
- if ( (irq <= 0) || (irq >= NR_IRQS) )
- {
- printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
- irq, opt_edgetrigger);
- break;
- }
-
- printk("Forcing IRQ %ld to edge-trigger: ", irq);
-
- desc = &irq_desc[irq];
- spin_lock_irq(&desc->lock);
+ /* Is there a valid irq mapped to this vector? */
+ irq = vector_irq[rte.vector];
+ if ( !IO_APIC_IRQ(irq) )
+ return 0;
- if ( desc->handler == &ioapic_edge_irq_type )
- {
- printk("already edge-triggered (no force applied).\n");
- }
- else if ( desc->handler != &ioapic_level_irq_type )
- {
- printk("cannot force (can only force IO-APIC-level IRQs).\n");
- }
- else
- {
- desc->handler = &ioapic_edge_irq_type;
- __edge_IO_APIC_irq(irq);
- desc->status |= IRQ_PENDING; /* may have lost a masked edge */
- printk("done.\n");
- }
+ /* Set the correct irq-handling type. */
+ irq_desc[IO_APIC_VECTOR(irq)].handler = rte.trigger ?
+ &ioapic_level_type: &ioapic_edge_type;
- spin_unlock_irq(&desc->lock);
-
- if ( *p == '\0' )
- break;
-
- if ( *p != ',' )
+ /* Record the pin<->irq mapping. */
+ for ( entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next] )
{
- printk("Unexpected character '%c' in edge-trigger list '%s'\n",
- *p, opt_edgetrigger);
- break;
+ if ( (entry->apic == apicenum) && (entry->pin == pin) )
+ break;
+ if ( !entry->next )
+ {
+ add_pin_to_irq(irq, apicenum, pin);
+ break;
+ }
}
-
- p++;
}
+ spin_lock_irqsave(&ioapic_lock, flags);
+ io_apic_write(apicenum, 0x10 + 2 * pin, *(((int *)&rte) + 0));
+ io_apic_write(apicenum, 0x11 + 2 * pin, *(((int *)&rte) + 1));
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
return 0;
}
-
-__initcall(ioapic_trigger_setup);
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index fd8648d454..92b51f7d6e 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -11,20 +11,21 @@
#include <xen/irq.h>
#include <xen/perfc.h>
#include <xen/sched.h>
+#include <asm/current.h>
#include <asm/smpboot.h>
irq_desc_t irq_desc[NR_IRQS];
-static void __do_IRQ_guest(int irq);
+static void __do_IRQ_guest(int vector);
-void no_action(int cpl, void *dev_id, struct xen_regs *regs) { }
+void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
+static void enable_none(unsigned int vector) { }
+static unsigned int startup_none(unsigned int vector) { return 0; }
+static void disable_none(unsigned int vector) { }
+static void ack_none(unsigned int vector)
{
- printk("Unexpected IRQ trap at vector %02x.\n", irq);
+ printk("Unexpected IRQ trap at vector %02x.\n", vector);
ack_APIC_irq();
}
@@ -42,69 +43,21 @@ struct hw_interrupt_type no_irq_type = {
};
atomic_t irq_err_count;
-atomic_t irq_mis_count;
-inline void disable_irq_nosync(unsigned int irq)
+asmlinkage void do_IRQ(struct cpu_user_regs *regs)
{
- irq_desc_t *desc = &irq_desc[irq];
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- if ( desc->depth++ == 0 )
- {
- desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
- }
-
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-void disable_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- do { smp_mb(); } while ( irq_desc[irq].status & IRQ_INPROGRESS );
-}
-
-void enable_irq(unsigned int irq)
-{
- irq_desc_t *desc = &irq_desc[irq];
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- if ( --desc->depth == 0 )
- {
- desc->status &= ~IRQ_DISABLED;
- if ( (desc->status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING )
- {
- desc->status |= IRQ_REPLAY;
- hw_resend_irq(desc->handler,irq);
- }
- desc->handler->enable(irq);
- }
-
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-asmlinkage void do_IRQ(struct xen_regs regs)
-{
-#if defined(__i386__)
- unsigned int irq = regs.entry_vector;
-#else
- unsigned int irq = 0; /* XXX */
-#endif
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int vector = regs->entry_vector;
+ irq_desc_t *desc = &irq_desc[vector];
struct irqaction *action;
perfc_incrc(irqs);
spin_lock(&desc->lock);
- desc->handler->ack(irq);
+ desc->handler->ack(vector);
if ( likely(desc->status & IRQ_GUEST) )
{
- __do_IRQ_guest(irq);
+ __do_IRQ_guest(vector);
spin_unlock(&desc->lock);
return;
}
@@ -125,23 +78,24 @@ asmlinkage void do_IRQ(struct xen_regs regs)
while ( desc->status & IRQ_PENDING )
{
desc->status &= ~IRQ_PENDING;
- irq_enter(smp_processor_id(), irq);
+ irq_enter(smp_processor_id());
spin_unlock_irq(&desc->lock);
- action->handler(irq, action->dev_id, &regs);
+ action->handler(vector_to_irq(vector), action->dev_id, regs);
spin_lock_irq(&desc->lock);
- irq_exit(smp_processor_id(), irq);
+ irq_exit(smp_processor_id());
}
desc->status &= ~IRQ_INPROGRESS;
out:
- desc->handler->end(irq);
+ desc->handler->end(vector);
spin_unlock(&desc->lock);
}
void free_irq(unsigned int irq)
{
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int vector = irq_to_vector(irq);
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
@@ -152,12 +106,13 @@ void free_irq(unsigned int irq)
spin_unlock_irqrestore(&desc->lock,flags);
/* Wait to make sure it's not being used on another CPU */
- do { smp_mb(); } while ( irq_desc[irq].status & IRQ_INPROGRESS );
+ do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
}
int setup_irq(unsigned int irq, struct irqaction *new)
{
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int vector = irq_to_vector(irq);
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
@@ -171,7 +126,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
desc->action = new;
desc->depth = 0;
desc->status &= ~IRQ_DISABLED;
- desc->handler->startup(irq);
+ desc->handler->startup(vector);
spin_unlock_irqrestore(&desc->lock,flags);
@@ -191,9 +146,10 @@ typedef struct {
struct domain *guest[IRQ_MAX_GUESTS];
} irq_guest_action_t;
-static void __do_IRQ_guest(int irq)
+static void __do_IRQ_guest(int vector)
{
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int irq = vector_to_irq(vector);
+ irq_desc_t *desc = &irq_desc[vector];
irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
struct domain *d;
int i;
@@ -222,12 +178,12 @@ int pirq_guest_unmask(struct domain *d)
j = find_first_set_bit(m);
m &= ~(1 << j);
pirq = (i << 5) + j;
- desc = &irq_desc[pirq];
+ desc = &irq_desc[irq_to_vector(pirq)];
spin_lock_irq(&desc->lock);
if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
test_and_clear_bit(pirq, &d->pirq_mask) &&
(--((irq_guest_action_t *)desc->action)->in_flight == 0) )
- desc->handler->end(pirq);
+ desc->handler->end(irq_to_vector(pirq));
spin_unlock_irq(&desc->lock);
}
}
@@ -235,16 +191,22 @@ int pirq_guest_unmask(struct domain *d)
return 0;
}
-int pirq_guest_bind(struct domain *d, int irq, int will_share)
+int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
{
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int vector = irq_to_vector(irq);
+ struct domain *d = v->domain;
+ irq_desc_t *desc = &irq_desc[vector];
irq_guest_action_t *action;
unsigned long flags;
int rc = 0;
+ cpumask_t cpumask = CPU_MASK_NONE;
if ( !IS_CAPABLE_PHYSDEV(d) )
return -EPERM;
+ if ( vector == 0 )
+ return -EBUSY;
+
spin_lock_irqsave(&desc->lock, flags);
action = (irq_guest_action_t *)desc->action;
@@ -259,7 +221,7 @@ int pirq_guest_bind(struct domain *d, int irq, int will_share)
goto out;
}
- action = xmalloc(sizeof(irq_guest_action_t));
+ action = xmalloc(irq_guest_action_t);
if ( (desc->action = (struct irqaction *)action) == NULL )
{
DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
@@ -274,12 +236,12 @@ int pirq_guest_bind(struct domain *d, int irq, int will_share)
desc->depth = 0;
desc->status |= IRQ_GUEST;
desc->status &= ~IRQ_DISABLED;
- desc->handler->startup(irq);
+ desc->handler->startup(vector);
/* Attempt to bind the interrupt target to the correct CPU. */
+ cpu_set(v->processor, cpumask);
if ( desc->handler->set_affinity != NULL )
- desc->handler->set_affinity(
- irq, apicid_to_phys_cpu_present(d->processor));
+ desc->handler->set_affinity(vector, cpumask);
}
else if ( !will_share || !action->shareable )
{
@@ -296,7 +258,7 @@ int pirq_guest_bind(struct domain *d, int irq, int will_share)
goto out;
}
- action->guest[action->nr_guests++] = d;
+ action->guest[action->nr_guests++] = v->domain;
out:
spin_unlock_irqrestore(&desc->lock, flags);
@@ -305,18 +267,21 @@ int pirq_guest_bind(struct domain *d, int irq, int will_share)
int pirq_guest_unbind(struct domain *d, int irq)
{
- irq_desc_t *desc = &irq_desc[irq];
+ unsigned int vector = irq_to_vector(irq);
+ irq_desc_t *desc = &irq_desc[vector];
irq_guest_action_t *action;
unsigned long flags;
int i;
+ BUG_ON(vector == 0);
+
spin_lock_irqsave(&desc->lock, flags);
action = (irq_guest_action_t *)desc->action;
if ( test_and_clear_bit(irq, &d->pirq_mask) &&
(--action->in_flight == 0) )
- desc->handler->end(irq);
+ desc->handler->end(vector);
if ( action->nr_guests == 1 )
{
@@ -325,12 +290,12 @@ int pirq_guest_unbind(struct domain *d, int irq)
desc->depth = 1;
desc->status |= IRQ_DISABLED;
desc->status &= ~IRQ_GUEST;
- desc->handler->shutdown(irq);
+ desc->handler->shutdown(vector);
}
else
{
i = 0;
- while ( action->guest[i] != d )
+ while ( action->guest[i] && (action->guest[i] != d) )
i++;
memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
action->nr_guests--;
@@ -339,26 +304,3 @@ int pirq_guest_unbind(struct domain *d, int irq)
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
-
-int pirq_guest_bindable(int irq, int will_share)
-{
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action;
- unsigned long flags;
- int okay;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- action = (irq_guest_action_t *)desc->action;
-
- /*
- * To be bindable the IRQ must either be not currently bound (1), or
- * it must be shareable (2) and not at its share limit (3).
- */
- okay = ((!(desc->status & IRQ_GUEST) && (action == NULL)) || /* 1 */
- (action->shareable && will_share && /* 2 */
- (action->nr_guests != IRQ_MAX_GUESTS))); /* 3 */
-
- spin_unlock_irqrestore(&desc->lock, flags);
- return okay;
-}
diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c
deleted file mode 100644
index eaf324eb21..0000000000
--- a/xen/arch/x86/memory.c
+++ /dev/null
@@ -1,2400 +0,0 @@
-/******************************************************************************
- * arch/x86/memory.c
- *
- * Copyright (c) 2002-2004 K A Fraser
- * Copyright (c) 2004 Christian Limpach
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * A description of the x86 page table API:
- *
- * Domains trap to do_mmu_update with a list of update requests.
- * This is a list of (ptr, val) pairs, where the requested operation
- * is *ptr = val.
- *
- * Reference counting of pages:
- * ----------------------------
- * Each page has two refcounts: tot_count and type_count.
- *
- * TOT_COUNT is the obvious reference count. It counts all uses of a
- * physical page frame by a domain, including uses as a page directory,
- * a page table, or simple mappings via a PTE. This count prevents a
- * domain from releasing a frame back to the free pool when it still holds
- * a reference to it.
- *
- * TYPE_COUNT is more subtle. A frame can be put to one of three
- * mutually-exclusive uses: it might be used as a page directory, or a
- * page table, or it may be mapped writable by the domain [of course, a
- * frame may not be used in any of these three ways!].
- * So, type_count is a count of the number of times a frame is being
- * referred to in its current incarnation. Therefore, a page can only
- * change its type when its type count is zero.
- *
- * Pinning the page type:
- * ----------------------
- * The type of a page can be pinned/unpinned with the commands
- * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
- * pinning is not reference counted, so it can't be nested).
- * This is useful to prevent a page's type count falling to zero, at which
- * point safety checks would need to be carried out next time the count
- * is increased again.
- *
- * A further note on writable page mappings:
- * -----------------------------------------
- * For simplicity, the count of writable mappings for a page may not
- * correspond to reality. The 'writable count' is incremented for every
- * PTE which maps the page with the _PAGE_RW flag set. However, for
- * write access to be possible the page directory entry must also have
- * its _PAGE_RW bit set. We do not check this as it complicates the
- * reference counting considerably [consider the case of multiple
- * directory entries referencing a single page table, some with the RW
- * bit set, others not -- it starts getting a bit messy].
- * In normal use, this simplification shouldn't be a problem.
- * However, the logic can be added if required.
- *
- * One more note on read-only page mappings:
- * -----------------------------------------
- * We want domains to be able to map pages for read-only access. The
- * main reason is that page tables and directories should be readable
- * by a domain, but it would not be safe for them to be writable.
- * However, domains have free access to rings 1 & 2 of the Intel
- * privilege model. In terms of page protection, these are considered
- * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
- * read-only restrictions are respected in supervisor mode -- if the
- * bit is clear then any mapped page is writable.
- *
- * We get round this by always setting the WP bit and disallowing
- * updates to it. This is very unlikely to cause a problem for guest
- * OS's, which will generally use the WP bit to simplify copy-on-write
- * implementation (in that case, OS wants a fault when it writes to
- * an application-supplied buffer).
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/kernel.h>
-#include <xen/lib.h>
-#include <xen/mm.h>
-#include <xen/sched.h>
-#include <xen/errno.h>
-#include <xen/perfc.h>
-#include <xen/irq.h>
-#include <xen/softirq.h>
-#include <asm/shadow.h>
-#include <asm/page.h>
-#include <asm/flushtlb.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/domain_page.h>
-#include <asm/ldt.h>
-#include <asm/e820.h>
-
-#ifdef VERBOSE
-#define MEM_LOG(_f, _a...) \
- printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \
- current->id , __LINE__ , ## _a )
-#else
-#define MEM_LOG(_f, _a...) ((void)0)
-#endif
-
-static int alloc_l2_table(struct pfn_info *page);
-static int alloc_l1_table(struct pfn_info *page);
-static int get_page_from_pagenr(unsigned long page_nr, struct domain *d);
-static int get_page_and_type_from_pagenr(unsigned long page_nr,
- u32 type,
- struct domain *d);
-
-static void free_l2_table(struct pfn_info *page);
-static void free_l1_table(struct pfn_info *page);
-
-static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
-static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
-
-/* Used to defer flushing of memory structures. */
-static struct {
-#define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */
-#define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */
- unsigned long deferred_ops;
- /* If non-NULL, specifies a foreign subject domain for some operations. */
- struct domain *foreign;
-} __cacheline_aligned percpu_info[NR_CPUS];
-
-/*
- * Returns the current foreign domain; defaults to the currently-executing
- * domain if a foreign override hasn't been specified.
- */
-#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : current)
-
-/* Private domain structs for DOMID_XEN and DOMID_IO. */
-static struct domain *dom_xen, *dom_io;
-
-/* Frame table and its size in pages. */
-struct pfn_info *frame_table;
-unsigned long frame_table_size;
-unsigned long max_page;
-
-void __init init_frametable(void)
-{
- unsigned long i, p;
-
- frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START;
- frame_table_size = max_page * sizeof(struct pfn_info);
- frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
-
- for ( i = 0; i < frame_table_size; i += (4UL << 20) )
- {
- p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20);
- if ( p == 0 )
- panic("Not enough memory for frame table\n");
- idle_pg_table[(FRAMETABLE_VIRT_START + i) >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(p | __PAGE_HYPERVISOR | _PAGE_PSE);
- }
-
- memset(frame_table, 0, frame_table_size);
-}
-
-void arch_init_memory(void)
-{
- unsigned long i, j, pfn, nr_pfns;
- struct pfn_info *page;
-
- /*
- * We are rather picky about the layout of 'struct pfn_info'. The
- * count_info and domain fields must be adjacent, as we perform atomic
- * 64-bit operations on them. Also, just for sanity, we assert the size
- * of the structure here.
- */
- if ( (offsetof(struct pfn_info, u.inuse.domain) !=
- (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
- (sizeof(struct pfn_info) != 24) )
- {
- printk("Weird pfn_info layout (%ld,%ld,%d)\n",
- offsetof(struct pfn_info, count_info),
- offsetof(struct pfn_info, u.inuse.domain),
- sizeof(struct pfn_info));
- for ( ; ; ) ;
- }
-
- memset(percpu_info, 0, sizeof(percpu_info));
-
- /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
- memset(machine_to_phys_mapping, 0x55, 4<<20);
-
- /*
- * Initialise our DOMID_XEN domain.
- * Any Xen-heap pages that we will allow to be mapped will have
- * their domain field set to dom_xen.
- */
- dom_xen = alloc_domain_struct();
- atomic_set(&dom_xen->refcnt, 1);
- dom_xen->id = DOMID_XEN;
-
- /*
- * Initialise our DOMID_IO domain.
- * This domain owns I/O pages that are within the range of the pfn_info
- * array. Mappings occur at the priv of the caller.
- */
- dom_io = alloc_domain_struct();
- atomic_set(&dom_io->refcnt, 1);
- dom_io->id = DOMID_IO;
-
- /* M2P table is mappable read-only by privileged domains. */
- for ( i = 0; i < 1024; i++ )
- {
- /* Ensure it's mapped read-only by guests (use GDT type). */
- page = &frame_table[m2p_start_mfn+i];
- page->count_info = PGC_allocated | 1;
- page->u.inuse.type_info = PGT_gdt_page | PGT_validated | 1;
- page->u.inuse.domain = dom_xen;
- }
-
- /* First 1MB of RAM is historically marked as I/O. */
- for ( i = 0; i < 0x100; i++ )
- {
- page = &frame_table[i];
- page->count_info = PGC_allocated | 1;
- page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
- page->u.inuse.domain = dom_io;
- }
-
- /* Any non-RAM areas in the e820 map are considered to be for I/O. */
- for ( i = 0; i < e820.nr_map; i++ )
- {
- if ( e820.map[i].type == E820_RAM )
- continue;
- pfn = e820.map[i].addr >> PAGE_SHIFT;
- nr_pfns = (e820.map[i].size +
- (e820.map[i].addr & ~PAGE_MASK) +
- ~PAGE_MASK) >> PAGE_SHIFT;
- for ( j = 0; j < nr_pfns; j++ )
- {
- if ( !pfn_valid(pfn+j) )
- continue;
- page = &frame_table[pfn+j];
- page->count_info = PGC_allocated | 1;
- page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
- page->u.inuse.domain = dom_io;
- }
- }
-}
-
-static void __invalidate_shadow_ldt(struct domain *d)
-{
- int i;
- unsigned long pfn;
- struct pfn_info *page;
-
- d->mm.shadow_ldt_mapcnt = 0;
-
- for ( i = 16; i < 32; i++ )
- {
- pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i]);
- if ( pfn == 0 ) continue;
- d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
- page = &frame_table[pfn];
- ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
- ASSERT_PAGE_IS_DOMAIN(page, d);
- put_page_and_type(page);
- }
-
- /* Dispose of the (now possibly invalid) mappings from the TLB. */
- percpu_info[d->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
-}
-
-
-static inline void invalidate_shadow_ldt(struct domain *d)
-{
- if ( d->mm.shadow_ldt_mapcnt != 0 )
- __invalidate_shadow_ldt(d);
-}
-
-
-static int alloc_segdesc_page(struct pfn_info *page)
-{
- unsigned long *descs = map_domain_mem((page-frame_table) << PAGE_SHIFT);
- int i;
-
- for ( i = 0; i < 512; i++ )
- if ( unlikely(!check_descriptor(&descs[i*2])) )
- goto fail;
-
- unmap_domain_mem(descs);
- return 1;
-
- fail:
- unmap_domain_mem(descs);
- return 0;
-}
-
-
-/* Map shadow page at offset @off. */
-int map_ldt_shadow_page(unsigned int off)
-{
- struct domain *d = current;
- unsigned long l1e;
-
- if ( unlikely(in_irq()) )
- BUG();
-
- __get_user(l1e, (unsigned long *)&linear_pg_table[(d->mm.ldt_base >>
- PAGE_SHIFT) + off]);
-
- if ( unlikely(!(l1e & _PAGE_PRESENT)) ||
- unlikely(!get_page_and_type(&frame_table[l1e >> PAGE_SHIFT],
- d, PGT_ldt_page)) )
- return 0;
-
- d->mm.perdomain_pt[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW);
- d->mm.shadow_ldt_mapcnt++;
-
- return 1;
-}
-
-
-static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
-{
- struct pfn_info *page = &frame_table[page_nr];
-
- if ( unlikely(!pfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
- {
- MEM_LOG("Could not get page ref for pfn %08lx", page_nr);
- return 0;
- }
-
- return 1;
-}
-
-
-static int get_page_and_type_from_pagenr(unsigned long page_nr,
- u32 type,
- struct domain *d)
-{
- struct pfn_info *page = &frame_table[page_nr];
-
- if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
- return 0;
-
- if ( unlikely(!get_page_type(page, type)) )
- {
-#ifdef VERBOSE
- if ( (type & PGT_type_mask) != PGT_l1_page_table )
- MEM_LOG("Bad page type for pfn %08lx (%08x)",
- page_nr, page->u.inuse.type_info);
-#endif
- put_page(page);
- return 0;
- }
-
- return 1;
-}
-
-
-/*
- * We allow an L2 tables to map each other (a.k.a. linear page tables). It
- * needs some special care with reference counst and access permissions:
- * 1. The mapping entry must be read-only, or the guest may get write access
- * to its own PTEs.
- * 2. We must only bump the reference counts for an *already validated*
- * L2 table, or we can end up in a deadlock in get_page_type() by waiting
- * on a validation that is required to complete that validation.
- * 3. We only need to increment the reference counts for the mapped page
- * frame if it is mapped by a different L2 table. This is sufficient and
- * also necessary to allow validation of an L2 table mapping itself.
- */
-static int
-get_linear_pagetable(
- l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
-{
- u32 x, y;
- struct pfn_info *page;
-
- if ( (l2_pgentry_val(l2e) & _PAGE_RW) )
- {
- MEM_LOG("Attempt to create linear p.t. with write perms");
- return 0;
- }
-
- if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
- {
- /* Make sure the mapped frame belongs to the correct domain. */
- if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), d)) )
- return 0;
-
- /*
- * Make sure that the mapped frame is an already-validated L2 table.
- * If so, atomically increment the count (checking for overflow).
- */
- page = &frame_table[l2_pgentry_to_pagenr(l2e)];
- y = page->u.inuse.type_info;
- do {
- x = y;
- if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
- unlikely((x & (PGT_type_mask|PGT_validated)) !=
- (PGT_l2_page_table|PGT_validated)) )
- {
- put_page(page);
- return 0;
- }
- }
- while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
- }
-
- return 1;
-}
-
-
-static int
-get_page_from_l1e(
- l1_pgentry_t l1e, struct domain *d)
-{
- unsigned long l1v = l1_pgentry_val(l1e);
- unsigned long pfn = l1_pgentry_to_pagenr(l1e);
- struct pfn_info *page = &frame_table[pfn];
- extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
-
- if ( !(l1v & _PAGE_PRESENT) )
- return 1;
-
- if ( unlikely(l1v & (_PAGE_GLOBAL|_PAGE_PAT)) )
- {
- MEM_LOG("Bad L1 type settings %04lx", l1v & (_PAGE_GLOBAL|_PAGE_PAT));
- return 0;
- }
-
- if ( unlikely(!pfn_valid(pfn)) ||
- unlikely(page->u.inuse.domain == dom_io) )
- {
- /* DOMID_IO reverts to caller for privilege checks. */
- if ( d == dom_io )
- d = current;
-
- if ( (!IS_PRIV(d)) &&
- (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, pfn)) )
- {
- MEM_LOG("Non-privileged attempt to map I/O space %08lx", pfn);
- return 0;
- }
-
- /* No reference counting for out-of-range I/O pages. */
- if ( !pfn_valid(pfn) )
- return 1;
-
- d = dom_io;
- }
-
- return ((l1v & _PAGE_RW) ?
- get_page_and_type(page, d, PGT_writable_page) :
- get_page(page, d));
-}
-
-
-/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
-static int
-get_page_from_l2e(
- l2_pgentry_t l2e, unsigned long pfn,
- struct domain *d, unsigned long va_idx)
-{
- int rc;
-
- if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
- return 1;
-
- if ( unlikely((l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE))) )
- {
- MEM_LOG("Bad L2 page type settings %04lx",
- l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE));
- return 0;
- }
-
- rc = get_page_and_type_from_pagenr(
- l2_pgentry_to_pagenr(l2e),
- PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
-
- if ( unlikely(!rc) )
- return get_linear_pagetable(l2e, pfn, d);
-
- return 1;
-}
-
-
-static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
-{
- unsigned long l1v = l1_pgentry_val(l1e);
- unsigned long pfn = l1_pgentry_to_pagenr(l1e);
- struct pfn_info *page = &frame_table[pfn];
- struct domain *e;
-
- if ( !(l1v & _PAGE_PRESENT) || !pfn_valid(pfn) )
- return;
-
- e = page->u.inuse.domain;
- if ( unlikely(e != d) )
- {
- /*
- * Unmap a foreign page that may have been mapped via a grant table.
- * Note that this can fail for a privileged domain that can map foreign
- * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings
- * counted via a grant entry and some counted directly in the page
- * structure's reference count. Note that reference counts won't get
- * dangerously confused as long as we always try to decrement the
- * grant entry first. We may end up with a mismatch between which
- * mappings and which unmappings are counted via the grant entry, but
- * really it doesn't matter as privileged domains have carte blanche.
- */
- if ( likely(gnttab_check_unmap(e, d, pfn, !(l1v & _PAGE_RW))) )
- return;
- /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
- }
-
- if ( l1v & _PAGE_RW )
- {
- put_page_and_type(page);
- }
- else
- {
- /* We expect this is rare so we blow the entire shadow LDT. */
- if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
- PGT_ldt_page)) &&
- unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) )
- invalidate_shadow_ldt(e);
- put_page(page);
- }
-}
-
-
-/*
- * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
- * Note also that this automatically deals correctly with linear p.t.'s.
- */
-static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
-{
- if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) &&
- ((l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn) )
- put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]);
-}
-
-
-static int alloc_l2_table(struct pfn_info *page)
-{
- struct domain *d = page->u.inuse.domain;
- unsigned long page_nr = page_to_pfn(page);
- l2_pgentry_t *pl2e;
- int i;
-
- pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
-
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr, d, i)) )
- goto fail;
-
-#if defined(__i386__)
- /* Now we add our private high mappings. */
- memcpy(&pl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
- pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(page->u.inuse.domain->mm.perdomain_pt) |
- __PAGE_HYPERVISOR);
-#endif
-
- unmap_domain_mem(pl2e);
- return 1;
-
- fail:
- while ( i-- > 0 )
- put_page_from_l2e(pl2e[i], page_nr);
-
- unmap_domain_mem(pl2e);
- return 0;
-}
-
-
-static int alloc_l1_table(struct pfn_info *page)
-{
- struct domain *d = page->u.inuse.domain;
- unsigned long page_nr = page_to_pfn(page);
- l1_pgentry_t *pl1e;
- int i;
-
- pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
-
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- if ( unlikely(!get_page_from_l1e(pl1e[i], d)) )
- goto fail;
-
- unmap_domain_mem(pl1e);
- return 1;
-
- fail:
- while ( i-- > 0 )
- put_page_from_l1e(pl1e[i], d);
-
- unmap_domain_mem(pl1e);
- return 0;
-}
-
-
-static void free_l2_table(struct pfn_info *page)
-{
- unsigned long page_nr = page - frame_table;
- l2_pgentry_t *pl2e;
- int i;
-
- pl2e = map_domain_mem(page_nr << PAGE_SHIFT);
-
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- put_page_from_l2e(pl2e[i], page_nr);
-
- unmap_domain_mem(pl2e);
-}
-
-
-static void free_l1_table(struct pfn_info *page)
-{
- struct domain *d = page->u.inuse.domain;
- unsigned long page_nr = page - frame_table;
- l1_pgentry_t *pl1e;
- int i;
-
- pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
-
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- put_page_from_l1e(pl1e[i], d);
-
- unmap_domain_mem(pl1e);
-}
-
-
-static inline int update_l2e(l2_pgentry_t *pl2e,
- l2_pgentry_t ol2e,
- l2_pgentry_t nl2e)
-{
- unsigned long o = cmpxchg((unsigned long *)pl2e,
- l2_pgentry_val(ol2e),
- l2_pgentry_val(nl2e));
- if ( o != l2_pgentry_val(ol2e) )
- MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
- l2_pgentry_val(ol2e), l2_pgentry_val(nl2e), o);
- return (o == l2_pgentry_val(ol2e));
-}
-
-
-/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
-static int mod_l2_entry(l2_pgentry_t *pl2e,
- l2_pgentry_t nl2e,
- unsigned long pfn)
-{
- l2_pgentry_t ol2e;
- unsigned long _ol2e;
-
- if ( unlikely((((unsigned long)pl2e & (PAGE_SIZE-1)) >> 2) >=
- DOMAIN_ENTRIES_PER_L2_PAGETABLE) )
- {
- MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
- return 0;
- }
-
- if ( unlikely(__get_user(_ol2e, (unsigned long *)pl2e) != 0) )
- return 0;
- ol2e = mk_l2_pgentry(_ol2e);
-
- if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
- {
- /* Differ in mapping (bits 12-31) or presence (bit 0)? */
- if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
- return update_l2e(pl2e, ol2e, nl2e);
-
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current,
- ((unsigned long)pl2e &
- ~PAGE_MASK) >> 2)) )
- return 0;
-
- if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
- {
- put_page_from_l2e(nl2e, pfn);
- return 0;
- }
-
- put_page_from_l2e(ol2e, pfn);
- return 1;
- }
-
- if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
- return 0;
-
- put_page_from_l2e(ol2e, pfn);
- return 1;
-}
-
-
-static inline int update_l1e(l1_pgentry_t *pl1e,
- l1_pgentry_t ol1e,
- l1_pgentry_t nl1e)
-{
- unsigned long o = l1_pgentry_val(ol1e);
- unsigned long n = l1_pgentry_val(nl1e);
-
- if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
- unlikely(o != l1_pgentry_val(ol1e)) )
- {
- MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
- l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
- return 0;
- }
-
- return 1;
-}
-
-
-/* Update the L1 entry at pl1e to new value nl1e. */
-static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
-{
- l1_pgentry_t ol1e;
- unsigned long _ol1e;
- struct domain *d = current;
-
- if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
- {
- MEM_LOG("Bad get_user\n");
- return 0;
- }
-
- ol1e = mk_l1_pgentry(_ol1e);
-
- if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
- {
- /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */
- if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 )
- return update_l1e(pl1e, ol1e, nl1e);
-
- if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
- return 0;
-
- if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
- {
- put_page_from_l1e(nl1e, d);
- return 0;
- }
-
- put_page_from_l1e(ol1e, d);
- return 1;
- }
-
- if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
- return 0;
-
- put_page_from_l1e(ol1e, d);
- return 1;
-}
-
-
-int alloc_page_type(struct pfn_info *page, unsigned int type)
-{
- switch ( type )
- {
- case PGT_l1_page_table:
- return alloc_l1_table(page);
- case PGT_l2_page_table:
- return alloc_l2_table(page);
- case PGT_gdt_page:
- case PGT_ldt_page:
- return alloc_segdesc_page(page);
- default:
- printk("Bad type in alloc_page_type %x t=%x c=%x\n",
- type, page->u.inuse.type_info,
- page->count_info);
- BUG();
- }
-
- return 0;
-}
-
-
-void free_page_type(struct pfn_info *page, unsigned int type)
-{
- struct domain *d = page->u.inuse.domain;
-
- switch ( type )
- {
- case PGT_l1_page_table:
- free_l1_table(page);
- break;
-
- case PGT_l2_page_table:
- free_l2_table(page);
- break;
-
- default:
- BUG();
- }
-
- if ( unlikely(d->mm.shadow_mode) &&
- (get_shadow_status(&d->mm, page_to_pfn(page)) & PSH_shadowed) )
- {
- unshadow_table(page_to_pfn(page), type);
- put_shadow_status(&d->mm);
- }
-}
-
-
-void put_page_type(struct pfn_info *page)
-{
- u32 nx, x, y = page->u.inuse.type_info;
-
- again:
- do {
- x = y;
- nx = x - 1;
-
- ASSERT((x & PGT_count_mask) != 0);
-
- /*
- * The page should always be validated while a reference is held. The
- * exception is during domain destruction, when we forcibly invalidate
- * page-table pages if we detect a referential loop.
- * See domain.c:relinquish_list().
- */
- ASSERT((x & PGT_validated) ||
- test_bit(DF_DYING, &page->u.inuse.domain->flags));
-
- if ( unlikely((nx & PGT_count_mask) == 0) )
- {
- /* Record TLB information for flush later. Races are harmless. */
- page->tlbflush_timestamp = tlbflush_current_time();
-
- if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
- likely(nx & PGT_validated) )
- {
- /*
- * Page-table pages must be unvalidated when count is zero. The
- * 'free' is safe because the refcnt is non-zero and validated
- * bit is clear => other ops will spin or fail.
- */
- if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
- x & ~PGT_validated)) != x) )
- goto again;
- /* We cleared the 'valid bit' so we do the clear up. */
- free_page_type(page, x & PGT_type_mask);
- /* Carry on, but with the 'valid bit' now clear. */
- x &= ~PGT_validated;
- nx &= ~PGT_validated;
- }
- }
- else if ( unlikely((nx & (PGT_pinned | PGT_count_mask)) ==
- (PGT_pinned | 1)) )
- {
- /* Page is now only pinned. Make the back pointer mutable again. */
- nx |= PGT_va_mutable;
- }
- }
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
-}
-
-
-int get_page_type(struct pfn_info *page, u32 type)
-{
- u32 nx, x, y = page->u.inuse.type_info;
-
- again:
- do {
- x = y;
- nx = x + 1;
- if ( unlikely((nx & PGT_count_mask) == 0) )
- {
- MEM_LOG("Type count overflow on pfn %08lx\n", page_to_pfn(page));
- return 0;
- }
- else if ( unlikely((x & PGT_count_mask) == 0) )
- {
- if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
- {
- /*
- * On type change we check to flush stale TLB entries. This
- * may be unnecessary (e.g., page was GDT/LDT) but those
- * circumstances should be very rare.
- */
- struct domain *d = page->u.inuse.domain;
- if ( unlikely(NEED_FLUSH(tlbflush_time[d->processor],
- page->tlbflush_timestamp)) )
- {
- perfc_incr(need_flush_tlb_flush);
- flush_tlb_cpu(d->processor);
- }
-
- /* We lose existing type, back pointer, and validity. */
- nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
- nx |= type;
-
- /* No special validation needed for writable pages. */
- /* Page tables and GDT/LDT need to be scanned for validity. */
- if ( type == PGT_writable_page )
- nx |= PGT_validated;
- }
- }
- else if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
- {
- if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
- {
- if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
- ((type & PGT_type_mask) != PGT_l1_page_table) )
- MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %08lx\n",
- x & PGT_type_mask, type, page_to_pfn(page));
- return 0;
- }
- else if ( (x & PGT_va_mask) == PGT_va_mutable )
- {
- /* The va backpointer is mutable, hence we update it. */
- nx &= ~PGT_va_mask;
- nx |= type; /* we know the actual type is correct */
- }
- else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask)) )
- {
- /* This table is potentially mapped at multiple locations. */
- nx &= ~PGT_va_mask;
- nx |= PGT_va_unknown;
- }
- }
- else if ( unlikely(!(x & PGT_validated)) )
- {
- /* Someone else is updating validation of this page. Wait... */
- while ( (y = page->u.inuse.type_info) == x )
- cpu_relax();
- goto again;
- }
- }
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
-
- if ( unlikely(!(nx & PGT_validated)) )
- {
- /* Try to validate page type; drop the new reference on failure. */
- if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) )
- {
- MEM_LOG("Error while validating pfn %08lx for type %08x."
- " caf=%08x taf=%08x\n",
- page_to_pfn(page), type,
- page->count_info,
- page->u.inuse.type_info);
- /* Noone else can get a reference. We hold the only ref. */
- page->u.inuse.type_info = 0;
- return 0;
- }
-
- /* Noone else is updating simultaneously. */
- __set_bit(_PGT_validated, &page->u.inuse.type_info);
- }
-
- return 1;
-}
-
-
-static int do_extended_command(unsigned long ptr, unsigned long val)
-{
- int okay = 1, cpu = smp_processor_id();
- unsigned int cmd = val & MMUEXT_CMD_MASK;
- unsigned long pfn = ptr >> PAGE_SHIFT;
- unsigned long old_base_pfn;
- struct pfn_info *page = &frame_table[pfn];
- struct domain *d = current, *nd, *e;
- u32 x, y;
- domid_t domid;
- grant_ref_t gntref;
-
- switch ( cmd )
- {
- case MMUEXT_PIN_L1_TABLE:
- case MMUEXT_PIN_L2_TABLE:
- /*
- * We insist that, if you pin an L1 page, it's the first thing that
- * you do to it. This is because we require the backptr to still be
- * mutable. This assumption seems safe.
- */
- okay = get_page_and_type_from_pagenr(
- pfn,
- ((cmd==MMUEXT_PIN_L2_TABLE) ?
- PGT_l2_page_table : (PGT_l1_page_table|PGT_va_mutable)),
- FOREIGNDOM);
-
- if ( unlikely(!okay) )
- {
- MEM_LOG("Error while pinning pfn %08lx", pfn);
- break;
- }
-
- if ( unlikely(test_and_set_bit(_PGT_pinned,
- &page->u.inuse.type_info)) )
- {
- MEM_LOG("Pfn %08lx already pinned", pfn);
- put_page_and_type(page);
- okay = 0;
- break;
- }
-
- break;
-
- case MMUEXT_UNPIN_TABLE:
- if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) )
- {
- MEM_LOG("Page %08lx bad domain (dom=%p)",
- ptr, page->u.inuse.domain);
- }
- else if ( likely(test_and_clear_bit(_PGT_pinned,
- &page->u.inuse.type_info)) )
- {
- put_page_and_type(page);
- put_page(page);
- }
- else
- {
- okay = 0;
- put_page(page);
- MEM_LOG("Pfn %08lx not pinned", pfn);
- }
- break;
-
- case MMUEXT_NEW_BASEPTR:
- okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d);
- if ( likely(okay) )
- {
- invalidate_shadow_ldt(d);
-
- percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
- old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
- d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
-
- shadow_mk_pagetable(&d->mm);
-
- write_ptbase(&d->mm);
-
- put_page_and_type(&frame_table[old_base_pfn]);
- }
- else
- {
- MEM_LOG("Error while installing new baseptr %08lx", ptr);
- }
- break;
-
- case MMUEXT_TLB_FLUSH:
- percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
- break;
-
- case MMUEXT_INVLPG:
- __flush_tlb_one(ptr);
- break;
-
- case MMUEXT_FLUSH_CACHE:
- if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
- {
- MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
- okay = 0;
- }
- else
- {
- wbinvd();
- }
- break;
-
- case MMUEXT_SET_LDT:
- {
- unsigned long ents = val >> MMUEXT_CMD_SHIFT;
- if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
- (ents > 8192) ||
- ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
- ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
- {
- okay = 0;
- MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
- }
- else if ( (d->mm.ldt_ents != ents) ||
- (d->mm.ldt_base != ptr) )
- {
- invalidate_shadow_ldt(d);
- d->mm.ldt_base = ptr;
- d->mm.ldt_ents = ents;
- load_LDT(d);
- percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
- if ( ents != 0 )
- percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
- }
- break;
- }
-
- case MMUEXT_SET_FOREIGNDOM:
- domid = (domid_t)(val >> 16);
-
- if ( (e = percpu_info[cpu].foreign) != NULL )
- put_domain(e);
- percpu_info[cpu].foreign = NULL;
-
- if ( !IS_PRIV(d) )
- {
- switch ( domid )
- {
- case DOMID_IO:
- get_knownalive_domain(dom_io);
- percpu_info[cpu].foreign = dom_io;
- break;
- default:
- MEM_LOG("Dom %u cannot set foreign dom\n", d->id);
- okay = 0;
- break;
- }
- }
- else
- {
- percpu_info[cpu].foreign = e = find_domain_by_id(domid);
- if ( e == NULL )
- {
- switch ( domid )
- {
- case DOMID_XEN:
- get_knownalive_domain(dom_xen);
- percpu_info[cpu].foreign = dom_xen;
- break;
- case DOMID_IO:
- get_knownalive_domain(dom_io);
- percpu_info[cpu].foreign = dom_io;
- break;
- default:
- MEM_LOG("Unknown domain '%u'", domid);
- okay = 0;
- break;
- }
- }
- }
- break;
-
- case MMUEXT_TRANSFER_PAGE:
- domid = (domid_t)(val >> 16);
- gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
-
- if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
- unlikely(!pfn_valid(pfn)) ||
- unlikely((e = find_domain_by_id(domid)) == NULL) )
- {
- MEM_LOG("Bad frame (%08lx) or bad domid (%d).\n", pfn, domid);
- okay = 0;
- break;
- }
-
- spin_lock(&d->page_alloc_lock);
-
- /*
- * The tricky bit: atomically release ownership while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
- */
- nd = page->u.inuse.domain;
- y = page->count_info;
- do {
- x = y;
- if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
- (1|PGC_allocated)) ||
- unlikely(nd != d) )
- {
- MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%08x\n", page_to_pfn(page),
- d, d->id, nd, x, page->u.inuse.type_info);
- spin_unlock(&d->page_alloc_lock);
- put_domain(e);
- return 0;
- }
- __asm__ __volatile__(
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (d), "1" (x), "c" (NULL), "b" (x) );
- }
- while ( unlikely(nd != d) || unlikely(y != x) );
-
- /*
- * Unlink from 'd'. At least one reference remains (now anonymous), so
- * noone else is spinning to try to delete this page from 'd'.
- */
- d->tot_pages--;
- list_del(&page->list);
-
- spin_unlock(&d->page_alloc_lock);
-
- spin_lock(&e->page_alloc_lock);
-
- /*
- * Check that 'e' will accept the page and has reservation headroom.
- * Also, a domain mustn't have PGC_allocated pages when it is dying.
- */
- ASSERT(e->tot_pages <= e->max_pages);
- if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
- unlikely(e->tot_pages == e->max_pages) ||
- unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) )
- {
- MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
- "provided a bad grant ref, or is dying (%08lx).\n",
- e->tot_pages, e->max_pages, e->flags);
- spin_unlock(&e->page_alloc_lock);
- put_domain(e);
- okay = 0;
- break;
- }
-
- /* Okay, add the page to 'e'. */
- if ( unlikely(e->tot_pages++ == 0) )
- get_knownalive_domain(e);
- list_add_tail(&page->list, &e->page_list);
- page->u.inuse.domain = e;
-
- spin_unlock(&e->page_alloc_lock);
-
- /* Transfer is all done: tell the guest about its new page frame. */
- gnttab_notify_transfer(e, gntref, pfn);
-
- put_domain(e);
- break;
-
- case MMUEXT_REASSIGN_PAGE:
- if ( unlikely(!IS_PRIV(d)) )
- {
- MEM_LOG("Dom %u has no reassignment priv", d->id);
- okay = 0;
- break;
- }
-
- e = percpu_info[cpu].foreign;
- if ( unlikely(e == NULL) )
- {
- MEM_LOG("No FOREIGNDOM to reassign pfn %08lx to", pfn);
- okay = 0;
- break;
- }
-
- /*
- * Grab both page_list locks, in order. This prevents the page from
- * disappearing elsewhere while we modify the owner, and we'll need
- * both locks if we're successful so that we can change lists.
- */
- if ( d < e )
- {
- spin_lock(&d->page_alloc_lock);
- spin_lock(&e->page_alloc_lock);
- }
- else
- {
- spin_lock(&e->page_alloc_lock);
- spin_lock(&d->page_alloc_lock);
- }
-
- /* A domain shouldn't have PGC_allocated pages when it is dying. */
- if ( unlikely(test_bit(DF_DYING, &e->flags)) ||
- unlikely(IS_XEN_HEAP_FRAME(page)) )
- {
- MEM_LOG("Reassignment page is Xen heap, or dest dom is dying.");
- okay = 0;
- goto reassign_fail;
- }
-
- /*
- * The tricky bit: atomically change owner while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
- */
- nd = page->u.inuse.domain;
- y = page->count_info;
- do {
- x = y;
- if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
- (1|PGC_allocated)) ||
- unlikely(nd != d) )
- {
- MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%08x\n", page_to_pfn(page),
- d, d->id, nd, x, page->u.inuse.type_info);
- okay = 0;
- goto reassign_fail;
- }
- __asm__ __volatile__(
- LOCK_PREFIX "cmpxchg8b %3"
- : "=d" (nd), "=a" (y), "=c" (e),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (d), "1" (x), "c" (e), "b" (x) );
- }
- while ( unlikely(nd != d) || unlikely(y != x) );
-
- /*
- * Unlink from 'd'. We transferred at least one reference to 'e', so
- * noone else is spinning to try to delete this page from 'd'.
- */
- d->tot_pages--;
- list_del(&page->list);
-
- /*
- * Add the page to 'e'. Someone may already have removed the last
- * reference and want to remove the page from 'e'. However, we have
- * the lock so they'll spin waiting for us.
- */
- if ( unlikely(e->tot_pages++ == 0) )
- get_knownalive_domain(e);
- list_add_tail(&page->list, &e->page_list);
-
- reassign_fail:
- spin_unlock(&d->page_alloc_lock);
- spin_unlock(&e->page_alloc_lock);
- break;
-
- case MMUEXT_CLEAR_FOREIGNDOM:
- if ( (e = percpu_info[cpu].foreign) != NULL )
- put_domain(e);
- percpu_info[cpu].foreign = NULL;
- break;
-
- default:
- MEM_LOG("Invalid extended pt command 0x%08lx", val & MMUEXT_CMD_MASK);
- okay = 0;
- break;
- }
-
- return okay;
-}
-
-int do_mmu_update(
- mmu_update_t *ureqs, unsigned int count, unsigned int *pdone)
-{
-/*
- * We steal the m.s.b. of the @count parameter to indicate whether this
- * invocation of do_mmu_update() is resuming a previously preempted call.
- * We steal the next 15 bits to remember the current FOREIGNDOM.
- */
-#define MMU_UPDATE_PREEMPTED (~(~0U>>1))
-#define MMU_UPDATE_PREEMPT_FDOM_SHIFT ((sizeof(int)*8)-16)
-#define MMU_UPDATE_PREEMPT_FDOM_MASK (0x7FFFU<<MMU_UPDATE_PREEMPT_FDOM_SHIFT)
-
- mmu_update_t req;
- unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
- struct pfn_info *page;
- int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
- unsigned int cmd, done = 0;
- unsigned long prev_spfn = 0;
- l1_pgentry_t *prev_spl1e = 0;
- struct domain *d = current;
- u32 type_info;
- domid_t domid;
-
- cleanup_writable_pagetable(d);
-
- /*
- * If we are resuming after preemption, read how much work we have already
- * done. This allows us to set the @done output parameter correctly.
- * We also reset FOREIGNDOM here.
- */
- if ( unlikely(count&(MMU_UPDATE_PREEMPTED|MMU_UPDATE_PREEMPT_FDOM_MASK)) )
- {
- if ( !(count & MMU_UPDATE_PREEMPTED) )
- {
- /* Count overflow into private FOREIGNDOM field. */
- MEM_LOG("do_mmu_update count is too large");
- rc = -EINVAL;
- goto out;
- }
- count &= ~MMU_UPDATE_PREEMPTED;
- domid = count >> MMU_UPDATE_PREEMPT_FDOM_SHIFT;
- count &= ~MMU_UPDATE_PREEMPT_FDOM_MASK;
- if ( unlikely(pdone != NULL) )
- (void)get_user(done, pdone);
- if ( (domid != current->id) &&
- !do_extended_command(0, MMUEXT_SET_FOREIGNDOM | (domid << 16)) )
- {
- rc = -EINVAL;
- goto out;
- }
- }
-
- perfc_incrc(calls_to_mmu_update);
- perfc_addc(num_page_updates, count);
-
- if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) )
- {
- rc = -EFAULT;
- goto out;
- }
-
- for ( i = 0; i < count; i++ )
- {
- if ( hypercall_preempt_check() )
- {
- rc = hypercall_create_continuation(
- __HYPERVISOR_mmu_update, 3, ureqs,
- (count - i) |
- (FOREIGNDOM->id << MMU_UPDATE_PREEMPT_FDOM_SHIFT) |
- MMU_UPDATE_PREEMPTED, pdone);
- break;
- }
-
- if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) )
- {
- MEM_LOG("Bad __copy_from_user");
- rc = -EFAULT;
- break;
- }
-
- cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
- pfn = req.ptr >> PAGE_SHIFT;
-
- okay = 0;
-
- switch ( cmd )
- {
- /*
- * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
- */
- case MMU_NORMAL_PT_UPDATE:
- if ( unlikely(!get_page_from_pagenr(pfn, current)) )
- {
- MEM_LOG("Could not get page for normal update");
- break;
- }
-
- if ( likely(prev_pfn == pfn) )
- {
- va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
- }
- else
- {
- if ( prev_pfn != 0 )
- unmap_domain_mem((void *)va);
- va = (unsigned long)map_domain_mem(req.ptr);
- prev_pfn = pfn;
- }
-
- page = &frame_table[pfn];
- switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
- {
- case PGT_l1_page_table:
- if ( likely(get_page_type(
- page, type_info & (PGT_type_mask|PGT_va_mask))) )
- {
- okay = mod_l1_entry((l1_pgentry_t *)va,
- mk_l1_pgentry(req.val));
-
- if ( unlikely(d->mm.shadow_mode) && okay &&
- (get_shadow_status(&d->mm, page-frame_table) &
- PSH_shadowed) )
- {
- shadow_l1_normal_pt_update(
- req.ptr, req.val, &prev_spfn, &prev_spl1e);
- put_shadow_status(&d->mm);
- }
-
- put_page_type(page);
- }
- break;
- case PGT_l2_page_table:
- if ( likely(get_page_type(page, PGT_l2_page_table)) )
- {
- okay = mod_l2_entry((l2_pgentry_t *)va,
- mk_l2_pgentry(req.val),
- pfn);
-
- if ( unlikely(d->mm.shadow_mode) && okay &&
- (get_shadow_status(&d->mm, page-frame_table) &
- PSH_shadowed) )
- {
- shadow_l2_normal_pt_update(req.ptr, req.val);
- put_shadow_status(&d->mm);
- }
-
- put_page_type(page);
- }
- break;
- default:
- if ( likely(get_page_type(page, PGT_writable_page)) )
- {
- *(unsigned long *)va = req.val;
- okay = 1;
- put_page_type(page);
- }
- break;
- }
-
- put_page(page);
- break;
-
- case MMU_MACHPHYS_UPDATE:
- if ( unlikely(!get_page_from_pagenr(pfn, FOREIGNDOM)) )
- {
- MEM_LOG("Could not get page for mach->phys update");
- break;
- }
-
- machine_to_phys_mapping[pfn] = req.val;
- okay = 1;
-
- /*
- * If in log-dirty mode, mark the corresponding pseudo-physical
- * page as dirty.
- */
- if ( unlikely(d->mm.shadow_mode == SHM_logdirty) &&
- mark_dirty(&d->mm, pfn) )
- d->mm.shadow_dirty_block_count++;
-
- put_page(&frame_table[pfn]);
- break;
-
- /*
- * MMU_EXTENDED_COMMAND: Extended command is specified
- * in the least-siginificant bits of the 'value' field.
- */
- case MMU_EXTENDED_COMMAND:
- req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
- okay = do_extended_command(req.ptr, req.val);
- break;
-
- default:
- MEM_LOG("Invalid page update command %08lx", req.ptr);
- break;
- }
-
- if ( unlikely(!okay) )
- {
- rc = -EINVAL;
- break;
- }
-
- ureqs++;
- }
-
- out:
- if ( prev_pfn != 0 )
- unmap_domain_mem((void *)va);
-
- if ( unlikely(prev_spl1e != 0) )
- unmap_domain_mem((void *)prev_spl1e);
-
- deferred_ops = percpu_info[cpu].deferred_ops;
- percpu_info[cpu].deferred_ops = 0;
-
- if ( deferred_ops & DOP_FLUSH_TLB )
- local_flush_tlb();
-
- if ( deferred_ops & DOP_RELOAD_LDT )
- (void)map_ldt_shadow_page(0);
-
- if ( unlikely(percpu_info[cpu].foreign != NULL) )
- {
- put_domain(percpu_info[cpu].foreign);
- percpu_info[cpu].foreign = NULL;
- }
-
- /* Add incremental work we have done to the @done output parameter. */
- if ( unlikely(pdone != NULL) )
- __put_user(done + i, pdone);
-
- return rc;
-}
-
-
-int do_update_va_mapping(unsigned long page_nr,
- unsigned long val,
- unsigned long flags)
-{
- struct domain *d = current;
- int err = 0;
- unsigned int cpu = d->processor;
- unsigned long deferred_ops;
-
- perfc_incrc(calls_to_update_va);
-
- if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
- return -EINVAL;
-
- cleanup_writable_pagetable(d);
-
- /*
- * XXX When we make this support 4MB superpages we should also deal with
- * the case of updating L2 entries.
- */
-
- if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr],
- mk_l1_pgentry(val))) )
- err = -EINVAL;
-
- if ( unlikely(d->mm.shadow_mode) )
- {
- unsigned long sval;
-
- l1pte_propagate_from_guest(&d->mm, &val, &sval);
-
- if ( unlikely(__put_user(sval, ((unsigned long *)(
- &shadow_linear_pg_table[page_nr])))) )
- {
- /*
- * Since L2's are guranteed RW, failure indicates either that the
- * page was not shadowed, or that the L2 entry has not yet been
- * updated to reflect the shadow.
- */
- unsigned l2_idx = page_nr >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT);
- l2_pgentry_t gpde = linear_l2_table[l2_idx];
- unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT;
- unsigned long spfn;
-
- if ((spfn = (get_shadow_status(&d->mm, gpfn) & PSH_pfn_mask)))
- {
- unsigned long *sl1e = map_domain_mem(spfn << PAGE_SHIFT);
- unsigned l1_idx = page_nr & (ENTRIES_PER_L1_PAGETABLE - 1);
- sl1e[l1_idx] = sval;
- unmap_domain_mem(sl1e);
- put_shadow_status(&d->mm);
-
- perfc_incrc(shadow_update_va_fail1);
- }
- else
- perfc_incrc(shadow_update_va_fail2);
- }
-
- /*
- * If we're in log-dirty mode then we need to note that we've updated
- * the PTE in the PT-holding page. We need the machine frame number
- * for this.
- */
- if ( d->mm.shadow_mode == SHM_logdirty )
- mark_dirty(&current->mm, va_to_l1mfn(page_nr << PAGE_SHIFT));
-
- check_pagetable(&d->mm, d->mm.pagetable, "va"); /* debug */
- }
-
- deferred_ops = percpu_info[cpu].deferred_ops;
- percpu_info[cpu].deferred_ops = 0;
-
- if ( unlikely(deferred_ops & DOP_FLUSH_TLB) ||
- unlikely(flags & UVMF_FLUSH_TLB) )
- local_flush_tlb();
- else if ( unlikely(flags & UVMF_INVLPG) )
- __flush_tlb_one(page_nr << PAGE_SHIFT);
-
- if ( unlikely(deferred_ops & DOP_RELOAD_LDT) )
- (void)map_ldt_shadow_page(0);
-
- return err;
-}
-
-int do_update_va_mapping_otherdomain(unsigned long page_nr,
- unsigned long val,
- unsigned long flags,
- domid_t domid)
-{
- unsigned int cpu = smp_processor_id();
- struct domain *d;
- int rc;
-
- if ( unlikely(!IS_PRIV(current)) )
- return -EPERM;
-
- percpu_info[cpu].foreign = d = find_domain_by_id(domid);
- if ( unlikely(d == NULL) )
- {
- MEM_LOG("Unknown domain '%u'", domid);
- return -ESRCH;
- }
-
- rc = do_update_va_mapping(page_nr, val, flags);
-
- put_domain(d);
- percpu_info[cpu].foreign = NULL;
-
- return rc;
-}
-
-
-
-/*************************
- * Writable Pagetables
- */
-
-ptwr_info_t ptwr_info[NR_CPUS];
-
-#ifdef VERBOSE
-int ptwr_debug = 0x0;
-#define PTWR_PRINTK(_f, _a...) \
- do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
-#define PTWR_PRINT_WHICH (which ? 'I' : 'A')
-#else
-#define PTWR_PRINTK(_f, _a...) ((void)0)
-#endif
-
-/* Flush the given writable p.t. page and write-protect it again. */
-void ptwr_flush(const int which)
-{
- unsigned long sstat, spte, pte, *ptep, l1va;
- l1_pgentry_t *sl1e = NULL, *pl1e, ol1e, nl1e;
- l2_pgentry_t *pl2e;
- int i, cpu = smp_processor_id();
- struct domain *d = current;
-
- l1va = ptwr_info[cpu].ptinfo[which].l1va;
- ptep = (unsigned long *)&linear_pg_table[l1va>>PAGE_SHIFT];
-
- /*
- * STEP 1. Write-protect the p.t. page so no more updates can occur.
- */
-
- if ( unlikely(__get_user(pte, ptep)) )
- {
- MEM_LOG("ptwr: Could not read pte at %p\n", ptep);
- /*
- * Really a bug. We could read this PTE during the initial fault,
- * and pagetables can't have changed meantime.
- */
- BUG();
- }
- PTWR_PRINTK("[%c] disconnected_l1va at %p is %08lx\n",
- PTWR_PRINT_WHICH, ptep, pte);
- pte &= ~_PAGE_RW;
-
- if ( unlikely(d->mm.shadow_mode) )
- {
- /* Write-protect the p.t. page in the shadow page table. */
- l1pte_propagate_from_guest(&d->mm, &pte, &spte);
- __put_user(
- spte, (unsigned long *)&shadow_linear_pg_table[l1va>>PAGE_SHIFT]);
-
- /* Is the p.t. page itself shadowed? Map it into Xen space if so. */
- sstat = get_shadow_status(&d->mm, pte >> PAGE_SHIFT);
- if ( sstat & PSH_shadowed )
- sl1e = map_domain_mem((sstat & PSH_pfn_mask) << PAGE_SHIFT);
- }
-
- /* Write-protect the p.t. page in the guest page table. */
- if ( unlikely(__put_user(pte, ptep)) )
- {
- MEM_LOG("ptwr: Could not update pte at %p\n", ptep);
- /*
- * Really a bug. We could write this PTE during the initial fault,
- * and pagetables can't have changed meantime.
- */
- BUG();
- }
-
- /* Ensure that there are no stale writable mappings in any TLB. */
- /* NB. INVLPG is a serialising instruction: flushes pending updates. */
- __flush_tlb_one(l1va); /* XXX Multi-CPU guests? */
- PTWR_PRINTK("[%c] disconnected_l1va at %p now %08lx\n",
- PTWR_PRINT_WHICH, ptep, pte);
-
- /*
- * STEP 2. Validate any modified PTEs.
- */
-
- pl1e = ptwr_info[cpu].ptinfo[which].pl1e;
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- ol1e = ptwr_info[cpu].ptinfo[which].page[i];
- nl1e = pl1e[i];
-
- if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) )
- continue;
-
- /*
- * Fast path for PTEs that have merely been write-protected
- * (e.g., during a Unix fork()). A strict reduction in privilege.
- */
- if ( likely(l1_pgentry_val(ol1e) == (l1_pgentry_val(nl1e)|_PAGE_RW)) )
- {
- if ( likely(l1_pgentry_val(nl1e) & _PAGE_PRESENT) )
- {
- if ( unlikely(sl1e != NULL) )
- l1pte_propagate_from_guest(
- &d->mm, &l1_pgentry_val(nl1e),
- &l1_pgentry_val(sl1e[i]));
- put_page_type(&frame_table[l1_pgentry_to_pagenr(nl1e)]);
- }
- continue;
- }
-
- if ( unlikely(!get_page_from_l1e(nl1e, d)) )
- {
- MEM_LOG("ptwr: Could not re-validate l1 page\n");
- /*
- * Make the remaining p.t's consistent before crashing, so the
- * reference counts are correct.
- */
- memcpy(&pl1e[i], &ptwr_info[cpu].ptinfo[which].page[i],
- (ENTRIES_PER_L1_PAGETABLE - i) * sizeof(l1_pgentry_t));
- unmap_domain_mem(pl1e);
- ptwr_info[cpu].ptinfo[which].l1va = 0;
- if ( (which == PTWR_PT_ACTIVE) && likely(!d->mm.shadow_mode) )
- {
- pl2e = &linear_l2_table[ptwr_info[cpu].ptinfo[which].l2_idx];
- *pl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT);
- }
- domain_crash();
- return;
- }
-
- if ( unlikely(sl1e != NULL) )
- l1pte_propagate_from_guest(
- &d->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i]));
-
- if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) )
- put_page_from_l1e(ol1e, d);
- }
- unmap_domain_mem(pl1e);
-
- /*
- * STEP 3. Reattach the L1 p.t. page into the current address space.
- */
-
- if ( (which == PTWR_PT_ACTIVE) && likely(!d->mm.shadow_mode) )
- {
- pl2e = &linear_l2_table[ptwr_info[cpu].ptinfo[which].l2_idx];
- *pl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT);
- }
-
- /*
- * STEP 4. Final tidy-up.
- */
-
- ptwr_info[cpu].ptinfo[which].l1va = 0;
-
- if ( unlikely(sl1e != NULL) )
- {
- unmap_domain_mem(sl1e);
- put_shadow_status(&d->mm);
- }
-}
-
-/* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(unsigned long addr)
-{
- unsigned long pte, pfn, l2e;
- struct pfn_info *page;
- l2_pgentry_t *pl2e;
- int which, cpu = smp_processor_id();
- u32 l2_idx;
-
- /*
- * Attempt to read the PTE that maps the VA being accessed. By checking for
- * PDE validity in the L2 we avoid many expensive fixups in __get_user().
- */
- if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
- _PAGE_PRESENT) ||
- __get_user(pte, (unsigned long *)&linear_pg_table[addr>>PAGE_SHIFT]) )
- return 0;
-
- pfn = pte >> PAGE_SHIFT;
- page = &frame_table[pfn];
-
- /* We are looking only for read-only mappings of p.t. pages. */
- if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
- ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) )
- return 0;
-
- /* Get the L2 index at which this L1 p.t. is always mapped. */
- l2_idx = page->u.inuse.type_info & PGT_va_mask;
- if ( unlikely(l2_idx >= PGT_va_unknown) )
- {
- domain_crash(); /* Urk! This L1 is mapped in multiple L2 slots! */
- return 0;
- }
- l2_idx >>= PGT_va_shift;
-
- if ( l2_idx == (addr >> L2_PAGETABLE_SHIFT) )
- {
- MEM_LOG("PTWR failure! Pagetable maps itself at %08lx\n", addr);
- domain_crash();
- return 0;
- }
-
- /*
- * Is the L1 p.t. mapped into the current address space? If so we call it
- * an ACTIVE p.t., otherwise it is INACTIVE.
- */
- pl2e = &linear_l2_table[l2_idx];
- l2e = l2_pgentry_val(*pl2e);
- which = PTWR_PT_INACTIVE;
- if ( (l2e >> PAGE_SHIFT) == pfn )
- {
- /* Check the PRESENT bit to set ACTIVE. */
- if ( likely(l2e & _PAGE_PRESENT) )
- which = PTWR_PT_ACTIVE;
- else {
- /*
- * If the PRESENT bit is clear, we may be conflicting with
- * the current ACTIVE p.t. (it may be the same p.t. mapped
- * at another virt addr).
- * The ptwr_flush call below will restore the PRESENT bit.
- */
- if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va &&
- l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx )
- which = PTWR_PT_ACTIVE;
- }
- }
-
- PTWR_PRINTK("[%c] page_fault on l1 pt at va %08lx, pt for %08x, "
- "pfn %08lx\n", PTWR_PRINT_WHICH,
- addr, l2_idx << L2_PAGETABLE_SHIFT, pfn);
-
- /*
- * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at
- * time. If there is already one, we must flush it out.
- */
- if ( ptwr_info[cpu].ptinfo[which].l1va )
- ptwr_flush(which);
-
- ptwr_info[cpu].ptinfo[which].l1va = addr | 1;
- ptwr_info[cpu].ptinfo[which].l2_idx = l2_idx;
-
- /* For safety, disconnect the L1 p.t. page from current space. */
- if ( (which == PTWR_PT_ACTIVE) && likely(!current->mm.shadow_mode) )
- {
- *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT);
- flush_tlb(); /* XXX Multi-CPU guests? */
- }
-
- /* Temporarily map the L1 page, and make a copy of it. */
- ptwr_info[cpu].ptinfo[which].pl1e = map_domain_mem(pfn << PAGE_SHIFT);
- memcpy(ptwr_info[cpu].ptinfo[which].page,
- ptwr_info[cpu].ptinfo[which].pl1e,
- ENTRIES_PER_L1_PAGETABLE * sizeof(l1_pgentry_t));
-
- /* Finally, make the p.t. page writable by the guest OS. */
- pte |= _PAGE_RW;
- PTWR_PRINTK("[%c] update %p pte to %08lx\n", PTWR_PRINT_WHICH,
- &linear_pg_table[addr>>PAGE_SHIFT], pte);
- if ( unlikely(__put_user(pte, (unsigned long *)
- &linear_pg_table[addr>>PAGE_SHIFT])) )
- {
- MEM_LOG("ptwr: Could not update pte at %p\n", (unsigned long *)
- &linear_pg_table[addr>>PAGE_SHIFT]);
- /* Toss the writable pagetable state and crash. */
- unmap_domain_mem(ptwr_info[cpu].ptinfo[which].pl1e);
- ptwr_info[cpu].ptinfo[which].l1va = 0;
- domain_crash();
- return 0;
- }
-
- return EXCRET_fault_fixed;
-}
-
-static __init int ptwr_init(void)
-{
- int i;
-
- for ( i = 0; i < smp_num_cpus; i++ )
- {
- ptwr_info[i].ptinfo[PTWR_PT_ACTIVE].page =
- (void *)alloc_xenheap_page();
- ptwr_info[i].ptinfo[PTWR_PT_INACTIVE].page =
- (void *)alloc_xenheap_page();
- }
-
- return 0;
-}
-__initcall(ptwr_init);
-
-
-
-
-/************************************************************************/
-/************************************************************************/
-/************************************************************************/
-
-#ifndef NDEBUG
-
-void audit_domain(struct domain *d)
-{
- int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
-
- void adjust (struct pfn_info *page, int dir, int adjtype)
- {
- int count = page->count_info & PGC_count_mask;
-
- if ( adjtype )
- {
- int tcount = page->u.inuse.type_info & PGT_count_mask;
-
- ttot++;
-
- tcount += dir;
-
- if ( tcount < 0 )
- {
- /* This will only come out once. */
- printk("Audit %d: type count whent below zero pfn=%x "
- "taf=%x otaf=%x\n",
- d->id, page-frame_table,
- page->u.inuse.type_info,
- page->tlbflush_timestamp);
- }
-
- page->u.inuse.type_info =
- (page->u.inuse.type_info & ~PGT_count_mask) |
- (tcount & PGT_count_mask);
- }
-
- ctot++;
- count += dir;
- if ( count < 0 )
- {
- /* This will only come out once. */
- printk("Audit %d: general count whent below zero pfn=%x "
- "taf=%x otaf=%x\n",
- d->id, page-frame_table,
- page->u.inuse.type_info,
- page->tlbflush_timestamp);
- }
-
- page->count_info =
- (page->count_info & ~PGC_count_mask) |
- (count & PGC_count_mask);
-
- }
-
- void scan_for_pfn(struct domain *d, unsigned long xpfn)
- {
- unsigned long pfn, *pt;
- struct list_head *list_ent;
- struct pfn_info *page;
- int i;
-
- list_ent = d->page_list.next;
- for ( i = 0; (list_ent != &d->page_list); i++ )
- {
- pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
- page = &frame_table[pfn];
-
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- case PGT_l1_page_table:
- case PGT_l2_page_table:
- pt = map_domain_mem(pfn<<PAGE_SHIFT);
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- if ( (pt[i] & _PAGE_PRESENT) &&
- ((pt[i] >> PAGE_SHIFT) == xpfn) )
- printk(" found dom=%d i=%x pfn=%lx t=%x c=%x\n",
- d->id, i, pfn, page->u.inuse.type_info,
- page->count_info);
- unmap_domain_mem(pt);
- }
-
- list_ent = frame_table[pfn].list.next;
- }
-
- }
-
- void scan_for_pfn_remote(unsigned long xpfn)
- {
- struct domain *e;
- for_each_domain ( e )
- scan_for_pfn( e, xpfn );
- }
-
- int i, l1, l2;
- unsigned long pfn;
- struct list_head *list_ent;
- struct pfn_info *page;
-
- if ( d != current )
- domain_pause(d);
- synchronise_pagetables(~0UL);
-
- printk("pt base=%lx sh_info=%x\n",
- pagetable_val(d->mm.pagetable)>>PAGE_SHIFT,
- virt_to_page(d->shared_info)-frame_table);
-
- spin_lock(&d->page_alloc_lock);
-
- /* PHASE 0 */
-
- list_ent = d->page_list.next;
- for ( i = 0; (list_ent != &d->page_list); i++ )
- {
- pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
- page = &frame_table[pfn];
-
- if ( page->u.inuse.domain != d )
- BUG();
-
- if ( (page->u.inuse.type_info & PGT_count_mask) >
- (page->count_info & PGC_count_mask) )
- printk("taf > caf %x %x pfn=%lx\n",
- page->u.inuse.type_info, page->count_info, pfn );
-
-#if 0 /* SYSV shared memory pages plus writeable files. */
- if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page &&
- (page->u.inuse.type_info & PGT_count_mask) > 1 )
- {
- printk("writeable page with type count >1: pfn=%lx t=%x c=%x\n",
- pfn,
- page->u.inuse.type_info,
- page->count_info );
- scan_for_pfn_remote(pfn);
- }
-#endif
- if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_none &&
- (page->u.inuse.type_info & PGT_count_mask) > 1 )
- {
- printk("normal page with type count >1: pfn=%lx t=%x c=%x\n",
- pfn,
- page->u.inuse.type_info,
- page->count_info );
- }
-
- /* Use tlbflush_timestamp to store original type_info. */
- page->tlbflush_timestamp = page->u.inuse.type_info;
-
- list_ent = frame_table[pfn].list.next;
- }
-
-
- /* PHASE 1 */
- if( pagetable_val(d->mm.pagetable) )
- adjust(&frame_table[pagetable_val(d->mm.pagetable)>>PAGE_SHIFT], -1, 1);
-
- list_ent = d->page_list.next;
- for ( i = 0; (list_ent != &d->page_list); i++ )
- {
- unsigned long *pt;
- pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
- page = &frame_table[pfn];
-
- if ( page->u.inuse.domain != d )
- BUG();
-
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- case PGT_l2_page_table:
-
- if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
- printk("Audit %d: L2 not validated %x\n",
- d->id, page->u.inuse.type_info);
-
- if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
- printk("Audit %d: L2 not pinned %x\n",
- d->id, page->u.inuse.type_info);
- else
- adjust( page, -1, 1 );
-
- pt = map_domain_mem( pfn<<PAGE_SHIFT );
-
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- if ( pt[i] & _PAGE_PRESENT )
- {
- unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
- struct pfn_info *l1page = &frame_table[l1pfn];
-
- if ( l1page->u.inuse.domain != d )
- {
- printk("L2: Skip bizarre page belonging to other "
- "dom %p\n", l1page->u.inuse.domain);
- continue;
- }
-
- if ( (l1page->u.inuse.type_info & PGT_type_mask) ==
- PGT_l2_page_table )
- printk("Audit %d: [%x] Found %s Linear PT "
- "t=%x pfn=%lx\n", d->id, i,
- (l1pfn==pfn) ? "Self" : "Other",
- l1page->u.inuse.type_info,
- l1pfn);
- else if ( (l1page->u.inuse.type_info & PGT_type_mask) !=
- PGT_l1_page_table )
- printk("Audit %d: [%x] Expected L1 t=%x pfn=%lx\n",
- d->id, i,
- l1page->u.inuse.type_info,
- l1pfn);
-
- adjust(l1page, -1, 1);
- }
- }
-
- unmap_domain_mem(pt);
-
- break;
-
-
- case PGT_l1_page_table:
-
- if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned )
- adjust( page, -1, 1 );
-
- if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
- printk("Audit %d: L1 not validated %x\n",
- d->id, page->u.inuse.type_info);
-#if 0
- if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
- printk("Audit %d: L1 not pinned %x\n",
- d->id, page->u.inuse.type_info);
-#endif
- pt = map_domain_mem( pfn<<PAGE_SHIFT );
-
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- if ( pt[i] & _PAGE_PRESENT )
- {
- unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
- struct pfn_info *l1page = &frame_table[l1pfn];
-
- if ( l1pfn < 0x100 )
- {
- lowmem_mappings++;
- continue;
- }
-
- if ( l1pfn > max_page )
- {
- io_mappings++;
- continue;
- }
-
- if ( pt[i] & _PAGE_RW )
- {
-
- if ( (l1page->u.inuse.type_info & PGT_type_mask) ==
- PGT_l1_page_table ||
- (l1page->u.inuse.type_info & PGT_type_mask) ==
- PGT_l2_page_table )
- printk("Audit %d: [%x] Ilegal RW t=%x pfn=%lx\n",
- d->id, i,
- l1page->u.inuse.type_info,
- l1pfn);
-
- }
-
- if ( l1page->u.inuse.domain != d )
- {
- printk("Audit %d: [%lx,%x] Skip foreign page dom=%lx "
- "pfn=%lx c=%08x t=%08x m2p=%lx\n",
- d->id, pfn, i,
- (unsigned long)l1page->u.inuse.domain,
- l1pfn,
- l1page->count_info,
- l1page->u.inuse.type_info,
- machine_to_phys_mapping[l1pfn]);
- continue;
- }
-
- adjust(l1page, -1, 0);
- }
- }
-
- unmap_domain_mem(pt);
-
- break;
- }
-
- list_ent = frame_table[pfn].list.next;
- }
-
- if ( (io_mappings > 0) || (lowmem_mappings > 0) )
- printk("Audit %d: Found %d lowmem mappings and %d io mappings\n",
- d->id, lowmem_mappings, io_mappings);
-
- /* PHASE 2 */
-
- ctot = ttot = 0;
- list_ent = d->page_list.next;
- for ( i = 0; (list_ent != &d->page_list); i++ )
- {
- pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
- page = &frame_table[pfn];
-
- switch ( page->u.inuse.type_info & PGT_type_mask)
- {
- case PGT_l1_page_table:
- case PGT_l2_page_table:
- if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
- {
- printk("Audit %d: type count!=0 t=%x ot=%x c=%x pfn=%lx\n",
- d->id, page->u.inuse.type_info,
- page->tlbflush_timestamp,
- page->count_info, pfn );
- scan_for_pfn_remote(pfn);
- }
- default:
- if ( (page->count_info & PGC_count_mask) != 1 )
- {
- printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x pfn=%lx\n",
- d->id,
- page->count_info,
- page->u.inuse.type_info,
- page->tlbflush_timestamp, pfn );
- scan_for_pfn_remote(pfn);
- }
- break;
- }
-
- list_ent = frame_table[pfn].list.next;
- }
-
- /* PHASE 3 */
- list_ent = d->page_list.next;
- l1 = l2 = 0;
- for ( i = 0; (list_ent != &d->page_list); i++ )
- {
- unsigned long *pt;
- pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
- page = &frame_table[pfn];
-
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- case PGT_l2_page_table:
- l2++;
- if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned )
- adjust( page, 1, 1 );
-
- pt = map_domain_mem( pfn<<PAGE_SHIFT );
-
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- if ( pt[i] & _PAGE_PRESENT )
- {
- unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
- struct pfn_info *l1page;
-
- if (l1pfn>max_page)
- continue;
-
- l1page = &frame_table[l1pfn];
-
- if ( l1page->u.inuse.domain == d)
- adjust(l1page, 1, 1);
- }
- }
-
- unmap_domain_mem(pt);
- break;
-
- case PGT_l1_page_table:
- l1++;
- if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned )
- adjust( page, 1, 1 );
-
- pt = map_domain_mem( pfn<<PAGE_SHIFT );
-
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- if ( pt[i] & _PAGE_PRESENT )
- {
- unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
- struct pfn_info *l1page;
-
- if (l1pfn>max_page)
- continue;
-
- l1page = &frame_table[l1pfn];
-
- if ( (l1page->u.inuse.domain != d) ||
- (l1pfn < 0x100) || (l1pfn > max_page) )
- continue;
-
- adjust(l1page, 1, 0);
- }
- }
-
- unmap_domain_mem(pt);
- break;
- }
-
-
- page->tlbflush_timestamp = 0;
-
- list_ent = frame_table[pfn].list.next;
- }
-
- spin_unlock(&d->page_alloc_lock);
-
- if( pagetable_val(d->mm.pagetable) )
- adjust(&frame_table[pagetable_val(d->mm.pagetable)>>PAGE_SHIFT], 1, 1);
-
- printk("Audit %d: Done. pages=%d l1=%d l2=%d ctot=%d ttot=%d\n", d->id, i, l1, l2, ctot, ttot );
-
- if ( d != current )
- domain_unpause(d);
-}
-
-void audit_domains(void)
-{
- struct domain *d;
- for_each_domain ( d )
- audit_domain(d);
-}
-
-void audit_domains_key(unsigned char key)
-{
- audit_domains();
-}
-
-#endif
diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index 2b5a18da50..a7bd0e5d68 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -70,12 +70,14 @@
*/
#include <xen/config.h>
+#include <xen/lib.h>
#include <xen/kernel.h>
#include <xen/init.h>
#include <xen/sched.h>
-#include <xen/slab.h>
+#include <xen/smp.h>
#include <xen/spinlock.h>
+#include <asm/current.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -84,16 +86,8 @@
#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
#define down(_m) spin_lock(_m)
#define up(_m) spin_unlock(_m)
-#define vmalloc(_s) xmalloc(_s)
+#define vmalloc(_s) xmalloc_bytes(_s)
#define vfree(_p) xfree(_p)
-#define num_online_cpus() smp_num_cpus
-static inline int on_each_cpu(
- void (*func) (void *info), void *info, int retry, int wait)
-{
- int ret = smp_call_function(func, info, retry, wait);
- func(info);
- return ret;
-}
#if 0
MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
new file mode 100644
index 0000000000..9b3d631bc0
--- /dev/null
+++ b/xen/arch/x86/mm.c
@@ -0,0 +1,3124 @@
+/******************************************************************************
+ * arch/x86/mm.c
+ *
+ * Copyright (c) 2002-2005 K A Fraser
+ * Copyright (c) 2004 Christian Limpach
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * A description of the x86 page table API:
+ *
+ * Domains trap to do_mmu_update with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val.
+ *
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ *
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the free pool when it still holds
+ * a reference to it.
+ *
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ *
+ * Pinning the page type:
+ * ----------------------
+ * The type of a page can be pinned/unpinned with the commands
+ * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
+ * pinning is not reference counted, so it can't be nested).
+ * This is useful to prevent a page's type count falling to zero, at which
+ * point safety checks would need to be carried out next time the count
+ * is increased again.
+ *
+ * A further note on writable page mappings:
+ * -----------------------------------------
+ * For simplicity, the count of writable mappings for a page may not
+ * correspond to reality. The 'writable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ *
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the
+ * bit is clear then any mapped page is writable.
+ *
+ * We get round this by always setting the WP bit and disallowing
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/sched.h>
+#include <xen/errno.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/softirq.h>
+#include <xen/domain_page.h>
+#include <asm/shadow.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/ldt.h>
+#include <asm/x86_emulate.h>
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...) \
+ printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+ current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/*
+ * Both do_mmuext_op() and do_mmu_update():
+ * We steal the m.s.b. of the @count parameter to indicate whether this
+ * invocation of do_mmu_update() is resuming a previously preempted call.
+ */
+#define MMU_UPDATE_PREEMPTED (~(~0U>>1))
+
+static void free_l2_table(struct pfn_info *page);
+static void free_l1_table(struct pfn_info *page);
+
+static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
+ unsigned int type);
+static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
+
+/* Used to defer flushing of memory structures. */
+static struct {
+#define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */
+#define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */
+ unsigned int deferred_ops;
+ /* If non-NULL, specifies a foreign subject domain for some operations. */
+ struct domain *foreign;
+} __cacheline_aligned percpu_info[NR_CPUS];
+
+/*
+ * Returns the current foreign domain; defaults to the currently-executing
+ * domain if a foreign override hasn't been specified.
+ */
+#define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : current->domain)
+
+/* Private domain structs for DOMID_XEN and DOMID_IO. */
+static struct domain *dom_xen, *dom_io;
+
+/* Frame table and its size in pages. */
+struct pfn_info *frame_table;
+unsigned long max_page;
+
+void __init init_frametable(void)
+{
+ unsigned long nr_pages, page_step, i, pfn;
+
+ frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START;
+
+ nr_pages = PFN_UP(max_page * sizeof(*frame_table));
+ page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
+
+ for ( i = 0; i < nr_pages; i += page_step )
+ {
+ pfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
+ if ( pfn == 0 )
+ panic("Not enough memory for frame table\n");
+ map_pages_to_xen(
+ FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
+ pfn, page_step, PAGE_HYPERVISOR);
+ }
+
+ memset(frame_table, 0, nr_pages << PAGE_SHIFT);
+}
+
+void arch_init_memory(void)
+{
+ extern void subarch_init_memory(struct domain *);
+
+ unsigned long i, pfn, rstart_pfn, rend_pfn;
+ struct pfn_info *page;
+
+ memset(percpu_info, 0, sizeof(percpu_info));
+
+ /*
+ * Initialise our DOMID_XEN domain.
+ * Any Xen-heap pages that we will allow to be mapped will have
+ * their domain field set to dom_xen.
+ */
+ dom_xen = alloc_domain_struct();
+ atomic_set(&dom_xen->refcnt, 1);
+ dom_xen->domain_id = DOMID_XEN;
+
+ /*
+ * Initialise our DOMID_IO domain.
+ * This domain owns I/O pages that are within the range of the pfn_info
+ * array. Mappings occur at the priv of the caller.
+ */
+ dom_io = alloc_domain_struct();
+ atomic_set(&dom_io->refcnt, 1);
+ dom_io->domain_id = DOMID_IO;
+
+ /* First 1MB of RAM is historically marked as I/O. */
+ for ( i = 0; i < 0x100; i++ )
+ {
+ page = &frame_table[i];
+ page->count_info = PGC_allocated | 1;
+ page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
+ page_set_owner(page, dom_io);
+ }
+
+ /* Any areas not specified as RAM by the e820 map are considered I/O. */
+ for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
+ {
+ if ( e820.map[i].type != E820_RAM )
+ continue;
+ /* Every page from cursor to start of next RAM region is I/O. */
+ rstart_pfn = PFN_UP(e820.map[i].addr);
+ rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ while ( pfn < rstart_pfn )
+ {
+ BUG_ON(!pfn_valid(pfn));
+ page = &frame_table[pfn++];
+ page->count_info = PGC_allocated | 1;
+ page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
+ page_set_owner(page, dom_io);
+ }
+ /* Skip the RAM region. */
+ pfn = rend_pfn;
+ }
+ BUG_ON(pfn != max_page);
+
+ subarch_init_memory(dom_xen);
+}
+
+void write_ptbase(struct vcpu *v)
+{
+ write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+}
+
+void invalidate_shadow_ldt(struct vcpu *v)
+{
+ int i;
+ unsigned long pfn;
+ struct pfn_info *page;
+
+ if ( v->arch.shadow_ldt_mapcnt == 0 )
+ return;
+
+ v->arch.shadow_ldt_mapcnt = 0;
+
+ for ( i = 16; i < 32; i++ )
+ {
+ pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
+ if ( pfn == 0 ) continue;
+ v->arch.perdomain_ptes[i] = l1e_empty();
+ page = &frame_table[pfn];
+ ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
+ ASSERT_PAGE_IS_DOMAIN(page, v->domain);
+ put_page_and_type(page);
+ }
+
+ /* Dispose of the (now possibly invalid) mappings from the TLB. */
+ percpu_info[v->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
+}
+
+
+static int alloc_segdesc_page(struct pfn_info *page)
+{
+ struct desc_struct *descs;
+ int i;
+
+ descs = map_domain_page(page_to_pfn(page));
+
+ for ( i = 0; i < 512; i++ )
+ if ( unlikely(!check_descriptor(&descs[i])) )
+ goto fail;
+
+ unmap_domain_page(descs);
+ return 1;
+
+ fail:
+ unmap_domain_page(descs);
+ return 0;
+}
+
+
+/* Map shadow page at offset @off. */
+int map_ldt_shadow_page(unsigned int off)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ unsigned long gpfn, gmfn;
+ l1_pgentry_t l1e, nl1e;
+ unsigned gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
+ int res;
+
+#if defined(__x86_64__)
+ /* If in user mode, switch to kernel mode just to read LDT mapping. */
+ extern void toggle_guest_mode(struct vcpu *);
+ int user_mode = !(v->arch.flags & TF_kernel_mode);
+#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
+#elif defined(__i386__)
+#define TOGGLE_MODE() ((void)0)
+#endif
+
+ BUG_ON(unlikely(in_irq()));
+
+ shadow_sync_va(v, gva);
+
+ TOGGLE_MODE();
+ __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
+ sizeof(l1e));
+ TOGGLE_MODE();
+
+ if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
+ return 0;
+
+ gpfn = l1e_get_pfn(l1e);
+ gmfn = __gpfn_to_mfn(d, gpfn);
+ if ( unlikely(!VALID_MFN(gmfn)) )
+ return 0;
+
+ res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
+
+ if ( !res && unlikely(shadow_mode_refcounts(d)) )
+ {
+ shadow_lock(d);
+ shadow_remove_all_write_access(d, gpfn, gmfn);
+ res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
+ shadow_unlock(d);
+ }
+
+ if ( unlikely(!res) )
+ return 0;
+
+ nl1e = l1e_from_pfn(gmfn, l1e_get_flags(l1e) | _PAGE_RW);
+
+ v->arch.perdomain_ptes[off + 16] = nl1e;
+ v->arch.shadow_ldt_mapcnt++;
+
+ return 1;
+}
+
+
+static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
+{
+ struct pfn_info *page = &frame_table[page_nr];
+
+ if ( unlikely(!pfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
+ {
+ MEM_LOG("Could not get page ref for pfn %lx", page_nr);
+ return 0;
+ }
+
+ return 1;
+}
+
+
+static int get_page_and_type_from_pagenr(unsigned long page_nr,
+ u32 type,
+ struct domain *d)
+{
+ struct pfn_info *page = &frame_table[page_nr];
+
+ if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
+ return 0;
+
+ if ( unlikely(!get_page_type(page, type)) )
+ {
+ if ( (type & PGT_type_mask) != PGT_l1_page_table )
+ MEM_LOG("Bad page type for pfn %lx (%08x)",
+ page_nr, page->u.inuse.type_info);
+ put_page(page);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * We allow root tables to map each other (a.k.a. linear page tables). It
+ * needs some special care with reference counts and access permissions:
+ * 1. The mapping entry must be read-only, or the guest may get write access
+ * to its own PTEs.
+ * 2. We must only bump the reference counts for an *already validated*
+ * L2 table, or we can end up in a deadlock in get_page_type() by waiting
+ * on a validation that is required to complete that validation.
+ * 3. We only need to increment the reference counts for the mapped page
+ * frame if it is mapped by a different root table. This is sufficient and
+ * also necessary to allow validation of a root table mapping itself.
+ */
+static int
+get_linear_pagetable(
+ root_pgentry_t re, unsigned long re_pfn, struct domain *d)
+{
+ u32 x, y;
+ struct pfn_info *page;
+ unsigned long pfn;
+
+ ASSERT( !shadow_mode_refcounts(d) );
+
+ if ( (root_get_flags(re) & _PAGE_RW) )
+ {
+ MEM_LOG("Attempt to create linear p.t. with write perms");
+ return 0;
+ }
+
+ if ( (pfn = root_get_pfn(re)) != re_pfn )
+ {
+ /* Make sure the mapped frame belongs to the correct domain. */
+ if ( unlikely(!get_page_from_pagenr(pfn, d)) )
+ return 0;
+
+ /*
+ * Make sure that the mapped frame is an already-validated L2 table.
+ * If so, atomically increment the count (checking for overflow).
+ */
+ page = &frame_table[pfn];
+ y = page->u.inuse.type_info;
+ do {
+ x = y;
+ if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
+ unlikely((x & (PGT_type_mask|PGT_validated)) !=
+ (PGT_root_page_table|PGT_validated)) )
+ {
+ put_page(page);
+ return 0;
+ }
+ }
+ while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
+ }
+
+ return 1;
+}
+
+int
+get_page_from_l1e(
+ l1_pgentry_t l1e, struct domain *d)
+{
+ unsigned long mfn = l1e_get_pfn(l1e);
+ struct pfn_info *page = &frame_table[mfn];
+ extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
+
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
+ return 1;
+
+ if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
+ {
+ MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+ return 0;
+ }
+
+ if ( unlikely(!pfn_valid(mfn)) ||
+ unlikely(page_get_owner(page) == dom_io) )
+ {
+ /* DOMID_IO reverts to caller for privilege checks. */
+ if ( d == dom_io )
+ d = current->domain;
+
+ if ( (!IS_PRIV(d)) &&
+ (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
+ {
+ MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
+ return 0;
+ }
+
+ /* No reference counting for out-of-range I/O pages. */
+ if ( !pfn_valid(mfn) )
+ return 1;
+
+ d = dom_io;
+ }
+
+ return ((l1e_get_flags(l1e) & _PAGE_RW) ?
+ get_page_and_type(page, d, PGT_writable_page) :
+ get_page(page, d));
+}
+
+
+/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
+static int
+get_page_from_l2e(
+ l2_pgentry_t l2e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
+{
+ int rc;
+
+ ASSERT(!shadow_mode_refcounts(d));
+
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return 1;
+
+ if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
+ {
+ MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
+ return 0;
+ }
+
+ vaddr >>= L2_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
+
+#if CONFIG_PAGING_LEVELS == 2
+ if (!rc)
+ rc = get_linear_pagetable(l2e, pfn, d);
+#endif
+ return rc;
+}
+
+
+#if CONFIG_PAGING_LEVELS >= 3
+
+static int
+get_page_from_l3e(
+ l3_pgentry_t l3e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
+{
+ ASSERT( !shadow_mode_refcounts(d) );
+
+ int rc;
+
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ return 1;
+
+ if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
+ {
+ MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
+ return 0;
+ }
+
+ vaddr >>= L3_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l3e_get_pfn(l3e),
+ PGT_l2_page_table | vaddr, d);
+#if CONFIG_PAGING_LEVELS == 3
+ if (!rc)
+ rc = get_linear_pagetable(l3e, pfn, d);
+#endif
+ return rc;
+}
+
+#endif /* 3 level */
+
+#if CONFIG_PAGING_LEVELS >= 4
+
+static int
+get_page_from_l4e(
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+{
+ int rc;
+
+ ASSERT( !shadow_mode_refcounts(d) );
+
+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+ return 1;
+
+ if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
+ {
+ MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
+ return 0;
+ }
+
+ rc = get_page_and_type_from_pagenr(
+ l4e_get_pfn(l4e), PGT_l3_page_table, d);
+
+ if ( unlikely(!rc) )
+ return get_linear_pagetable(l4e, pfn, d);
+
+ return 1;
+}
+
+#endif /* 4 level */
+
+
+void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
+{
+ unsigned long pfn = l1e_get_pfn(l1e);
+ struct pfn_info *page = &frame_table[pfn];
+ struct domain *e;
+
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !pfn_valid(pfn) )
+ return;
+
+ e = page_get_owner(page);
+ if ( unlikely(e != d) )
+ {
+ /*
+ * Unmap a foreign page that may have been mapped via a grant table.
+ * Note that this can fail for a privileged domain that can map foreign
+ * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings
+ * counted via a grant entry and some counted directly in the page
+ * structure's reference count. Note that reference counts won't get
+ * dangerously confused as long as we always try to decrement the
+ * grant entry first. We may end up with a mismatch between which
+ * mappings and which unmappings are counted via the grant entry, but
+ * really it doesn't matter as privileged domains have carte blanche.
+ */
+ if (likely(gnttab_check_unmap(e, d, pfn,
+ !(l1e_get_flags(l1e) & _PAGE_RW))))
+ return;
+ /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
+ }
+
+ if ( l1e_get_flags(l1e) & _PAGE_RW )
+ {
+ put_page_and_type(page);
+ }
+ else
+ {
+ /* We expect this is rare so we blow the entire shadow LDT. */
+ if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
+ PGT_ldt_page)) &&
+ unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) )
+
+ // XXX SMP BUG?
+ invalidate_shadow_ldt(e->vcpu[0]);
+ put_page(page);
+ }
+}
+
+
+/*
+ * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
+ * Note also that this automatically deals correctly with linear p.t.'s.
+ */
+static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
+{
+ if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_pfn(l2e) != pfn) )
+ put_page_and_type(&frame_table[l2e_get_pfn(l2e)]);
+}
+
+
+#if CONFIG_PAGING_LEVELS >= 3
+
+static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
+{
+ if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
+ (l3e_get_pfn(l3e) != pfn) )
+ put_page_and_type(&frame_table[l3e_get_pfn(l3e)]);
+}
+
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+
+static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
+{
+ if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
+ (l4e_get_pfn(l4e) != pfn) )
+ put_page_and_type(&frame_table[l4e_get_pfn(l4e)]);
+}
+
+#endif
+
+
+static int alloc_l1_table(struct pfn_info *page)
+{
+ struct domain *d = page_get_owner(page);
+ unsigned long pfn = page_to_pfn(page);
+ l1_pgentry_t *pl1e;
+ int i;
+
+ ASSERT(!shadow_mode_refcounts(d));
+
+ pl1e = map_domain_page(pfn);
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l1_slot(i) &&
+ unlikely(!get_page_from_l1e(pl1e[i], d)) )
+ goto fail;
+
+ unmap_domain_page(pl1e);
+ return 1;
+
+ fail:
+ while ( i-- > 0 )
+ if ( is_guest_l1_slot(i) )
+ put_page_from_l1e(pl1e[i], d);
+
+ unmap_domain_page(pl1e);
+ return 0;
+}
+
+#ifdef CONFIG_X86_PAE
+static int create_pae_xen_mappings(l3_pgentry_t *pl3e)
+{
+ struct pfn_info *page;
+ l2_pgentry_t *pl2e;
+ l3_pgentry_t l3e3;
+ int i;
+
+ pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
+
+ /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
+ l3e3 = pl3e[3];
+ if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
+ {
+ MEM_LOG("PAE L3 3rd slot is empty");
+ return 0;
+ }
+
+ /*
+ * The Xen-private mappings include linear mappings. The L2 thus cannot
+ * be shared by multiple L3 tables. The test here is adequate because:
+ * 1. Cannot appear in slots != 3 because the page would then then have
+ * unknown va backpointer, which get_page_type() explicitly disallows.
+ * 2. Cannot appear in another page table's L3:
+ * a. alloc_l3_table() calls this function and this check will fail
+ * b. mod_l3_entry() disallows updates to slot 3 in an existing table
+ */
+ page = l3e_get_page(l3e3);
+ BUG_ON(page->u.inuse.type_info & PGT_pinned);
+ BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
+ {
+ MEM_LOG("PAE L3 3rd slot is shared");
+ return 0;
+ }
+
+ /* Xen private mappings. */
+ pl2e = map_domain_page(l3e_get_pfn(l3e3));
+ memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+ &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ for ( i = 0; i < (PERDOMAIN_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+ pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(
+ virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
+ for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+ pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ?
+ l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) :
+ l2e_empty();
+ unmap_domain_page(pl2e);
+
+ return 1;
+}
+
+static inline int l1_backptr(
+ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+ unsigned long l2_backptr = l2_type & PGT_va_mask;
+ BUG_ON(l2_backptr == PGT_va_unknown);
+ if ( l2_backptr == PGT_va_mutable )
+ return 0;
+ *backptr =
+ ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
+ (offset_in_l2 << L2_PAGETABLE_SHIFT);
+ return 1;
+}
+
+#else
+# define create_pae_xen_mappings(pl3e) (1)
+# define l1_backptr(bp,l2o,l2t) \
+ ({ *(bp) = (l2o) << L2_PAGETABLE_SHIFT; 1; })
+#endif
+
+static int alloc_l2_table(struct pfn_info *page, unsigned int type)
+{
+ struct domain *d = page_get_owner(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long vaddr;
+ l2_pgentry_t *pl2e;
+ int i;
+
+ /* See the code in shadow_promote() to understand why this is here. */
+ if ( (PGT_base_page_table == PGT_l2_page_table) &&
+ unlikely(shadow_mode_refcounts(d)) )
+ return 1;
+ ASSERT(!shadow_mode_refcounts(d));
+
+ pl2e = map_domain_page(pfn);
+
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( !l1_backptr(&vaddr, i, type) )
+ goto fail;
+ if ( is_guest_l2_slot(type, i) &&
+ unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
+ goto fail;
+ }
+
+#if CONFIG_PAGING_LEVELS == 2
+ /* Xen private mappings. */
+ memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
+ pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
+ l2e_from_page(
+ virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
+#endif
+
+ unmap_domain_page(pl2e);
+ return 1;
+
+ fail:
+ while ( i-- > 0 )
+ if ( is_guest_l2_slot(type, i) )
+ put_page_from_l2e(pl2e[i], pfn);
+
+ unmap_domain_page(pl2e);
+ return 0;
+}
+
+
+#if CONFIG_PAGING_LEVELS >= 3
+static int alloc_l3_table(struct pfn_info *page)
+{
+ struct domain *d = page_get_owner(page);
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long vaddr;
+ l3_pgentry_t *pl3e;
+ int i;
+
+ ASSERT(!shadow_mode_refcounts(d));
+
+ pl3e = map_domain_page(pfn);
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ {
+ vaddr = i << L3_PAGETABLE_SHIFT;
+ if ( is_guest_l3_slot(i) &&
+ unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
+ goto fail;
+ }
+
+ if ( !create_pae_xen_mappings(pl3e) )
+ goto fail;
+
+ unmap_domain_page(pl3e);
+ return 1;
+
+ fail:
+ while ( i-- > 0 )
+ if ( is_guest_l3_slot(i) )
+ put_page_from_l3e(pl3e[i], pfn);
+
+ unmap_domain_page(pl3e);
+ return 0;
+}
+#else
+#define alloc_l3_table(page) (0)
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+static int alloc_l4_table(struct pfn_info *page)
+{
+ struct domain *d = page_get_owner(page);
+ unsigned long pfn = page_to_pfn(page);
+ l4_pgentry_t *pl4e = page_to_virt(page);
+ int i;
+
+ /* See the code in shadow_promote() to understand why this is here. */
+ if ( (PGT_base_page_table == PGT_l4_page_table) &&
+ shadow_mode_refcounts(d) )
+ return 1;
+ ASSERT(!shadow_mode_refcounts(d));
+
+ for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l4_slot(i) &&
+ unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+ goto fail;
+
+ /* Xen private mappings. */
+ memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+ pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
+ pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_page(
+ virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
+
+ return 1;
+
+ fail:
+ while ( i-- > 0 )
+ if ( is_guest_l4_slot(i) )
+ put_page_from_l4e(pl4e[i], pfn);
+
+ return 0;
+}
+#else
+#define alloc_l4_table(page) (0)
+#endif
+
+
+static void free_l1_table(struct pfn_info *page)
+{
+ struct domain *d = page_get_owner(page);
+ unsigned long pfn = page_to_pfn(page);
+ l1_pgentry_t *pl1e;
+ int i;
+
+ pl1e = map_domain_page(pfn);
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l1_slot(i) )
+ put_page_from_l1e(pl1e[i], d);
+
+ unmap_domain_page(pl1e);
+}
+
+
+static void free_l2_table(struct pfn_info *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+ l2_pgentry_t *pl2e;
+ int i;
+
+ pl2e = map_domain_page(pfn);
+
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
+ put_page_from_l2e(pl2e[i], pfn);
+
+ unmap_domain_page(pl2e);
+}
+
+
+#if CONFIG_PAGING_LEVELS >= 3
+
+static void free_l3_table(struct pfn_info *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+ l3_pgentry_t *pl3e;
+ int i;
+
+ pl3e = map_domain_page(pfn);
+
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l3_slot(i) )
+ put_page_from_l3e(pl3e[i], pfn);
+
+ unmap_domain_page(pl3e);
+}
+
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+
+static void free_l4_table(struct pfn_info *page)
+{
+ unsigned long pfn = page_to_pfn(page);
+ l4_pgentry_t *pl4e = page_to_virt(page);
+ int i;
+
+ for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l4_slot(i) )
+ put_page_from_l4e(pl4e[i], pfn);
+}
+
+#endif
+
+static inline int update_l1e(l1_pgentry_t *pl1e,
+ l1_pgentry_t ol1e,
+ l1_pgentry_t nl1e)
+{
+ intpte_t o = l1e_get_intpte(ol1e);
+ intpte_t n = l1e_get_intpte(nl1e);
+
+ if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
+ unlikely(o != l1e_get_intpte(ol1e)) )
+ {
+ MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+ ": saw %" PRIpte "\n",
+ l1e_get_intpte(ol1e),
+ l1e_get_intpte(nl1e),
+ o);
+ return 0;
+ }
+ return 1;
+}
+
+
+/* Update the L1 entry at pl1e to new value nl1e. */
+static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
+{
+ l1_pgentry_t ol1e;
+ struct domain *d = current->domain;
+
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
+ return 0;
+
+ if ( unlikely(shadow_mode_refcounts(d)) )
+ return update_l1e(pl1e, ol1e, nl1e);
+
+ if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
+ {
+ if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
+ {
+ MEM_LOG("Bad L1 flags %x\n",
+ l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
+ return 0;
+ }
+
+ /* Fast path for identical mapping, r/w and presence. */
+ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT))
+ return update_l1e(pl1e, ol1e, nl1e);
+
+ if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
+ return 0;
+
+ if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+ {
+ put_page_from_l1e(nl1e, d);
+ return 0;
+ }
+ }
+ else
+ {
+ if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+ return 0;
+ }
+
+ put_page_from_l1e(ol1e, d);
+ return 1;
+}
+
+#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
+ intpte_t __o = cmpxchg((intpte_t *)(_p), \
+ _t ## e_get_intpte(_o), \
+ _t ## e_get_intpte(_n)); \
+ if ( __o != _t ## e_get_intpte(_o) ) \
+ MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte \
+ ": saw %" PRIpte "", \
+ (_t ## e_get_intpte(_o)), \
+ (_t ## e_get_intpte(_n)), \
+ (__o)); \
+ (__o == _t ## e_get_intpte(_o)); })
+
+/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
+static int mod_l2_entry(l2_pgentry_t *pl2e,
+ l2_pgentry_t nl2e,
+ unsigned long pfn,
+ unsigned int type)
+{
+ l2_pgentry_t ol2e;
+ unsigned long vaddr;
+
+ if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
+ {
+ MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
+ return 0;
+ }
+
+ if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
+ return 0;
+
+ if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
+ {
+ if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
+ {
+ MEM_LOG("Bad L2 flags %x\n",
+ l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
+ return 0;
+ }
+
+ /* Fast path for identical mapping and presence. */
+ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
+ return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
+
+ if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) ||
+ unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
+ return 0;
+
+ if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+ {
+ put_page_from_l2e(nl2e, pfn);
+ return 0;
+ }
+ }
+ else
+ {
+ if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
+ return 0;
+ }
+
+ put_page_from_l2e(ol2e, pfn);
+ return 1;
+}
+
+
+#if CONFIG_PAGING_LEVELS >= 3
+
+/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
+static int mod_l3_entry(l3_pgentry_t *pl3e,
+ l3_pgentry_t nl3e,
+ unsigned long pfn)
+{
+ l3_pgentry_t ol3e;
+ unsigned long vaddr;
+
+ if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
+ {
+ MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
+ return 0;
+ }
+
+#ifdef CONFIG_X86_PAE
+ /*
+ * Disallow updates to final L3 slot. It contains Xen mappings, and it
+ * would be a pain to ensure they remain continuously valid throughout.
+ */
+ if ( pgentry_ptr_to_slot(pl3e) >= 3 )
+ return 0;
+#endif
+
+ if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
+ return 0;
+
+ if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
+ {
+ if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
+ {
+ MEM_LOG("Bad L3 flags %x\n",
+ l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
+ return 0;
+ }
+
+ /* Fast path for identical mapping and presence. */
+ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
+ return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
+
+ vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
+ << L3_PAGETABLE_SHIFT;
+ if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
+ return 0;
+
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ {
+ BUG_ON(!create_pae_xen_mappings(pl3e));
+ put_page_from_l3e(nl3e, pfn);
+ return 0;
+ }
+
+ put_page_from_l3e(ol3e, pfn);
+ return 1;
+ }
+
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ {
+ BUG_ON(!create_pae_xen_mappings(pl3e));
+ return 0;
+ }
+
+ put_page_from_l3e(ol3e, pfn);
+ return 1;
+}
+
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+
+/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
+static int mod_l4_entry(l4_pgentry_t *pl4e,
+ l4_pgentry_t nl4e,
+ unsigned long pfn)
+{
+ l4_pgentry_t ol4e;
+
+ if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
+ {
+ MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
+ return 0;
+ }
+
+ if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
+ return 0;
+
+ if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
+ {
+ if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
+ {
+ MEM_LOG("Bad L4 flags %x\n",
+ l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
+ return 0;
+ }
+
+ /* Fast path for identical mapping and presence. */
+ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
+ return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e);
+
+ if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
+ return 0;
+
+ if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+ {
+ put_page_from_l4e(nl4e, pfn);
+ return 0;
+ }
+
+ put_page_from_l4e(ol4e, pfn);
+ return 1;
+ }
+
+ if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
+ return 0;
+
+ put_page_from_l4e(ol4e, pfn);
+ return 1;
+}
+
+#endif
+
+int alloc_page_type(struct pfn_info *page, unsigned int type)
+{
+ switch ( type & PGT_type_mask )
+ {
+ case PGT_l1_page_table:
+ return alloc_l1_table(page);
+ case PGT_l2_page_table:
+ return alloc_l2_table(page, type);
+ case PGT_l3_page_table:
+ return alloc_l3_table(page);
+ case PGT_l4_page_table:
+ return alloc_l4_table(page);
+ case PGT_gdt_page:
+ case PGT_ldt_page:
+ return alloc_segdesc_page(page);
+ default:
+ printk("Bad type in alloc_page_type %x t=%x c=%x\n",
+ type, page->u.inuse.type_info,
+ page->count_info);
+ BUG();
+ }
+
+ return 0;
+}
+
+
+void free_page_type(struct pfn_info *page, unsigned int type)
+{
+ struct domain *owner = page_get_owner(page);
+ unsigned long gpfn;
+
+ if ( owner != NULL )
+ {
+ if ( unlikely(shadow_mode_refcounts(owner)) )
+ return;
+ if ( unlikely(shadow_mode_enabled(owner)) )
+ {
+ gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
+ ASSERT(VALID_M2P(gpfn));
+ remove_shadow(owner, gpfn, type & PGT_type_mask);
+ }
+ }
+
+ switch (type & PGT_type_mask)
+ {
+ case PGT_l1_page_table:
+ free_l1_table(page);
+ break;
+
+ case PGT_l2_page_table:
+ free_l2_table(page);
+ break;
+
+#if CONFIG_PAGING_LEVELS >= 3
+ case PGT_l3_page_table:
+ free_l3_table(page);
+ break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+ case PGT_l4_page_table:
+ free_l4_table(page);
+ break;
+#endif
+
+ default:
+ printk("%s: type %x pfn %lx\n",__FUNCTION__,
+ type, page_to_pfn(page));
+ BUG();
+ }
+}
+
+
+void put_page_type(struct pfn_info *page)
+{
+ u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+ do {
+ x = y;
+ nx = x - 1;
+
+ ASSERT((x & PGT_count_mask) != 0);
+
+ /*
+ * The page should always be validated while a reference is held. The
+ * exception is during domain destruction, when we forcibly invalidate
+ * page-table pages if we detect a referential loop.
+ * See domain.c:relinquish_list().
+ */
+ ASSERT((x & PGT_validated) ||
+ test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ /* Record TLB information for flush later. Races are harmless. */
+ page->tlbflush_timestamp = tlbflush_current_time();
+
+ if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
+ likely(nx & PGT_validated) )
+ {
+ /*
+ * Page-table pages must be unvalidated when count is zero. The
+ * 'free' is safe because the refcnt is non-zero and validated
+ * bit is clear => other ops will spin or fail.
+ */
+ if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
+ x & ~PGT_validated)) != x) )
+ goto again;
+ /* We cleared the 'valid bit' so we do the clean up. */
+ free_page_type(page, x);
+ /* Carry on, but with the 'valid bit' now clear. */
+ x &= ~PGT_validated;
+ nx &= ~PGT_validated;
+ }
+ }
+ else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
+ (PGT_pinned | 1)) &&
+ ((nx & PGT_type_mask) != PGT_writable_page)) )
+ {
+ /* Page is now only pinned. Make the back pointer mutable again. */
+ nx |= PGT_va_mutable;
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct pfn_info *page, u32 type)
+{
+ u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+ do {
+ x = y;
+ nx = x + 1;
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ MEM_LOG("Type count overflow on pfn %lx", page_to_pfn(page));
+ return 0;
+ }
+ else if ( unlikely((x & PGT_count_mask) == 0) )
+ {
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+ {
+ /*
+ * On type change we check to flush stale TLB entries. This
+ * may be unnecessary (e.g., page was GDT/LDT) but those
+ * circumstances should be very rare.
+ */
+ cpumask_t mask = page_get_owner(page)->cpumask;
+ tlbflush_filter(mask, page->tlbflush_timestamp);
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
+
+ /* We lose existing type, back pointer, and validity. */
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+ nx |= type;
+
+ /* No special validation needed for writable pages. */
+ /* Page tables and GDT/LDT need to be scanned for validity. */
+ if ( type == PGT_writable_page )
+ nx |= PGT_validated;
+ }
+ }
+ else
+ {
+ if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+ {
+ if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+ ((type & PGT_type_mask) != PGT_l1_page_table) )
+ MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %lx",
+ x, type, page_to_pfn(page));
+ return 0;
+ }
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va backpointer is mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
+ ((type & PGT_va_mask) != (x & PGT_va_mask)) )
+ {
+#ifdef CONFIG_X86_PAE
+ /* We use backptr as extra typing. Cannot be unknown. */
+ if ( (type & PGT_type_mask) == PGT_l2_page_table )
+ return 0;
+#endif
+ /* This table is possibly mapped at multiple locations. */
+ nx &= ~PGT_va_mask;
+ nx |= PGT_va_unknown;
+ }
+ }
+ if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ cpu_relax();
+ goto again;
+ }
+ }
+ }
+ while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+ if ( unlikely(!(nx & PGT_validated)) )
+ {
+ /* Try to validate page type; drop the new reference on failure. */
+ if ( unlikely(!alloc_page_type(page, type)) )
+ {
+ MEM_LOG("Error while validating pfn %lx for type %08x."
+ " caf=%08x taf=%08x",
+ page_to_pfn(page), type,
+ page->count_info,
+ page->u.inuse.type_info);
+ /* Noone else can get a reference. We hold the only ref. */
+ page->u.inuse.type_info = 0;
+ return 0;
+ }
+
+ /* Noone else is updating simultaneously. */
+ __set_bit(_PGT_validated, &page->u.inuse.type_info);
+ }
+
+ return 1;
+}
+
+
+int new_guest_cr3(unsigned long mfn)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ int okay;
+ unsigned long old_base_mfn;
+
+ if ( shadow_mode_refcounts(d) )
+ okay = get_page_from_pagenr(mfn, d);
+ else
+ okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
+
+ if ( likely(okay) )
+ {
+ invalidate_shadow_ldt(v);
+
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+ update_pagetables(v); /* update shadow_table and monitor_table */
+
+ write_ptbase(v);
+
+ if ( shadow_mode_refcounts(d) )
+ put_page(&frame_table[old_base_mfn]);
+ else
+ put_page_and_type(&frame_table[old_base_mfn]);
+
+ /* CR3 also holds a ref to its shadow... */
+ if ( shadow_mode_enabled(d) )
+ {
+ if ( v->arch.monitor_shadow_ref )
+ put_shadow_ref(v->arch.monitor_shadow_ref);
+ v->arch.monitor_shadow_ref =
+ pagetable_get_pfn(v->arch.monitor_table);
+ ASSERT(!page_get_owner(&frame_table[v->arch.monitor_shadow_ref]));
+ get_shadow_ref(v->arch.monitor_shadow_ref);
+ }
+ }
+ else
+ {
+ MEM_LOG("Error while installing new baseptr %lx", mfn);
+ }
+
+ return okay;
+}
+
+static void process_deferred_ops(unsigned int cpu)
+{
+ unsigned int deferred_ops;
+ struct domain *d = current->domain;
+
+ deferred_ops = percpu_info[cpu].deferred_ops;
+ percpu_info[cpu].deferred_ops = 0;
+
+ if ( deferred_ops & DOP_FLUSH_TLB )
+ {
+ if ( shadow_mode_enabled(d) )
+ shadow_sync_all(d);
+ local_flush_tlb();
+ }
+
+ if ( deferred_ops & DOP_RELOAD_LDT )
+ (void)map_ldt_shadow_page(0);
+
+ if ( unlikely(percpu_info[cpu].foreign != NULL) )
+ {
+ put_domain(percpu_info[cpu].foreign);
+ percpu_info[cpu].foreign = NULL;
+ }
+}
+
+static int set_foreigndom(unsigned int cpu, domid_t domid)
+{
+ struct domain *e, *d = current->domain;
+ int okay = 1;
+
+ if ( (e = percpu_info[cpu].foreign) != NULL )
+ put_domain(e);
+ percpu_info[cpu].foreign = NULL;
+
+ if ( domid == DOMID_SELF )
+ goto out;
+
+ if ( !IS_PRIV(d) )
+ {
+ switch ( domid )
+ {
+ case DOMID_IO:
+ get_knownalive_domain(dom_io);
+ percpu_info[cpu].foreign = dom_io;
+ break;
+ default:
+ MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id);
+ okay = 0;
+ break;
+ }
+ }
+ else
+ {
+ percpu_info[cpu].foreign = e = find_domain_by_id(domid);
+ if ( e == NULL )
+ {
+ switch ( domid )
+ {
+ case DOMID_XEN:
+ get_knownalive_domain(dom_xen);
+ percpu_info[cpu].foreign = dom_xen;
+ break;
+ case DOMID_IO:
+ get_knownalive_domain(dom_io);
+ percpu_info[cpu].foreign = dom_io;
+ break;
+ default:
+ MEM_LOG("Unknown domain '%u'", domid);
+ okay = 0;
+ break;
+ }
+ }
+ }
+
+ out:
+ return okay;
+}
+
+static inline cpumask_t vcpumask_to_pcpumask(
+ struct domain *d, unsigned long vmask)
+{
+ unsigned int vcpu_id;
+ cpumask_t pmask;
+ struct vcpu *v;
+
+ while ( vmask != 0 )
+ {
+ vcpu_id = find_first_set_bit(vmask);
+ vmask &= ~(1UL << vcpu_id);
+ if ( (vcpu_id < MAX_VIRT_CPUS) &&
+ ((v = d->vcpu[vcpu_id]) != NULL) )
+ cpu_set(v->processor, pmask);
+ }
+
+ return pmask;
+}
+
+int do_mmuext_op(
+ struct mmuext_op *uops,
+ unsigned int count,
+ unsigned int *pdone,
+ unsigned int foreigndom)
+{
+ struct mmuext_op op;
+ int rc = 0, i = 0, okay, cpu = smp_processor_id();
+ unsigned int type, done = 0;
+ struct pfn_info *page;
+ struct vcpu *v = current;
+ struct domain *d = v->domain, *e;
+ u32 x, y, _d, _nd;
+
+ LOCK_BIGLOCK(d);
+
+ cleanup_writable_pagetable(d);
+
+ if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+ {
+ count &= ~MMU_UPDATE_PREEMPTED;
+ if ( unlikely(pdone != NULL) )
+ (void)get_user(done, pdone);
+ }
+
+ if ( !set_foreigndom(cpu, foreigndom) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if ( unlikely(!array_access_ok(uops, count, sizeof(op))) )
+ {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ for ( i = 0; i < count; i++ )
+ {
+ if ( hypercall_preempt_check() )
+ {
+ rc = hypercall4_create_continuation(
+ __HYPERVISOR_mmuext_op, uops,
+ (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+ break;
+ }
+
+ if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) )
+ {
+ MEM_LOG("Bad __copy_from_user");
+ rc = -EFAULT;
+ break;
+ }
+
+ okay = 1;
+ page = &frame_table[op.mfn];
+
+ switch ( op.cmd )
+ {
+ case MMUEXT_PIN_L1_TABLE:
+ type = PGT_l1_page_table | PGT_va_mutable;
+
+ pin_page:
+ if ( shadow_mode_refcounts(FOREIGNDOM) )
+ type = PGT_writable_page;
+
+ okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while pinning mfn %lx", op.mfn);
+ break;
+ }
+
+ if ( unlikely(test_and_set_bit(_PGT_pinned,
+ &page->u.inuse.type_info)) )
+ {
+ MEM_LOG("Mfn %lx already pinned", op.mfn);
+ put_page_and_type(page);
+ okay = 0;
+ break;
+ }
+
+ break;
+
+#ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */
+ case MMUEXT_PIN_L2_TABLE:
+ type = PGT_l2_page_table;
+ goto pin_page;
+#endif
+
+ case MMUEXT_PIN_L3_TABLE:
+ type = PGT_l3_page_table;
+ goto pin_page;
+
+ case MMUEXT_PIN_L4_TABLE:
+ type = PGT_l4_page_table;
+ goto pin_page;
+
+ case MMUEXT_UNPIN_TABLE:
+ if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
+ {
+ MEM_LOG("Mfn %lx bad domain (dom=%p)",
+ op.mfn, page_get_owner(page));
+ }
+ else if ( likely(test_and_clear_bit(_PGT_pinned,
+ &page->u.inuse.type_info)) )
+ {
+ put_page_and_type(page);
+ put_page(page);
+ }
+ else
+ {
+ okay = 0;
+ put_page(page);
+ MEM_LOG("Mfn %lx not pinned", op.mfn);
+ }
+ break;
+
+ case MMUEXT_NEW_BASEPTR:
+ okay = new_guest_cr3(op.mfn);
+ percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
+ break;
+
+#ifdef __x86_64__
+ case MMUEXT_NEW_USER_BASEPTR:
+ okay = get_page_and_type_from_pagenr(
+ op.mfn, PGT_root_page_table, d);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while installing new mfn %lx", op.mfn);
+ }
+ else
+ {
+ unsigned long old_mfn =
+ pagetable_get_pfn(v->arch.guest_table_user);
+ v->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
+ if ( old_mfn != 0 )
+ put_page_and_type(&frame_table[old_mfn]);
+ }
+ break;
+#endif
+
+ case MMUEXT_TLB_FLUSH_LOCAL:
+ percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
+ break;
+
+ case MMUEXT_INVLPG_LOCAL:
+ if ( shadow_mode_enabled(d) )
+ shadow_invlpg(v, op.linear_addr);
+ local_flush_tlb_one(op.linear_addr);
+ break;
+
+ case MMUEXT_TLB_FLUSH_MULTI:
+ case MMUEXT_INVLPG_MULTI:
+ {
+ unsigned long vmask;
+ cpumask_t pmask;
+ if ( unlikely(get_user(vmask, (unsigned long *)op.vcpumask)) )
+ {
+ okay = 0;
+ break;
+ }
+ pmask = vcpumask_to_pcpumask(d, vmask);
+ cpus_and(pmask, pmask, d->cpumask);
+ if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
+ flush_tlb_mask(pmask);
+ else
+ flush_tlb_one_mask(pmask, op.linear_addr);
+ break;
+ }
+
+ case MMUEXT_TLB_FLUSH_ALL:
+ flush_tlb_mask(d->cpumask);
+ break;
+
+ case MMUEXT_INVLPG_ALL:
+ flush_tlb_one_mask(d->cpumask, op.linear_addr);
+ break;
+
+ case MMUEXT_FLUSH_CACHE:
+ if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
+ {
+ MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n");
+ okay = 0;
+ }
+ else
+ {
+ wbinvd();
+ }
+ break;
+
+ case MMUEXT_SET_LDT:
+ {
+ if ( shadow_mode_external(d) )
+ {
+ MEM_LOG("ignoring SET_LDT hypercall from external "
+ "domain %u\n", d->domain_id);
+ okay = 0;
+ break;
+ }
+
+ unsigned long ptr = op.linear_addr;
+ unsigned long ents = op.nr_ents;
+ if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
+ (ents > 8192) ||
+ !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+ {
+ okay = 0;
+ MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
+ }
+ else if ( (v->arch.guest_context.ldt_ents != ents) ||
+ (v->arch.guest_context.ldt_base != ptr) )
+ {
+ invalidate_shadow_ldt(v);
+ v->arch.guest_context.ldt_base = ptr;
+ v->arch.guest_context.ldt_ents = ents;
+ load_LDT(v);
+ percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
+ if ( ents != 0 )
+ percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
+ }
+ break;
+ }
+
+ case MMUEXT_REASSIGN_PAGE:
+ if ( unlikely(!IS_PRIV(d)) )
+ {
+ MEM_LOG("Dom %u has no reassignment priv", d->domain_id);
+ okay = 0;
+ break;
+ }
+
+ e = percpu_info[cpu].foreign;
+ if ( unlikely(e == NULL) )
+ {
+ MEM_LOG("No FOREIGNDOM to reassign mfn %lx to", op.mfn);
+ okay = 0;
+ break;
+ }
+
+ /*
+ * Grab both page_list locks, in order. This prevents the page from
+ * disappearing elsewhere while we modify the owner, and we'll need
+ * both locks if we're successful so that we can change lists.
+ */
+ if ( d < e )
+ {
+ spin_lock(&d->page_alloc_lock);
+ spin_lock(&e->page_alloc_lock);
+ }
+ else
+ {
+ spin_lock(&e->page_alloc_lock);
+ spin_lock(&d->page_alloc_lock);
+ }
+
+ /*
+ * Check that 'e' will accept the page and has reservation
+ * headroom. Also, a domain mustn't have PGC_allocated pages when
+ * it is dying.
+ */
+ ASSERT(e->tot_pages <= e->max_pages);
+ if ( unlikely(test_bit(_DOMF_dying, &e->domain_flags)) ||
+ unlikely(e->tot_pages == e->max_pages) ||
+ unlikely(IS_XEN_HEAP_FRAME(page)) )
+ {
+ MEM_LOG("Transferee has no reservation headroom (%d,%d), or "
+ "page is in Xen heap (%lx), or dom is dying (%ld).\n",
+ e->tot_pages, e->max_pages, op.mfn, e->domain_flags);
+ okay = 0;
+ goto reassign_fail;
+ }
+
+ /*
+ * The tricky bit: atomically change owner while there is just one
+ * benign reference to the page (PGC_allocated). If that reference
+ * disappears then the deallocation routine will safely spin.
+ */
+ _d = pickle_domptr(d);
+ _nd = page->u.inuse._domain;
+ y = page->count_info;
+ do {
+ x = y;
+ if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+ (1|PGC_allocated)) ||
+ unlikely(_nd != _d) )
+ {
+ MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p,"
+ " caf=%08x, taf=%08x\n", page_to_pfn(page),
+ d, d->domain_id, unpickle_domptr(_nd), x,
+ page->u.inuse.type_info);
+ okay = 0;
+ goto reassign_fail;
+ }
+ __asm__ __volatile__(
+ LOCK_PREFIX "cmpxchg8b %3"
+ : "=d" (_nd), "=a" (y), "=c" (e),
+ "=m" (*(volatile u64 *)(&page->count_info))
+ : "0" (_d), "1" (x), "c" (e), "b" (x) );
+ }
+ while ( unlikely(_nd != _d) || unlikely(y != x) );
+
+ /*
+ * Unlink from 'd'. We transferred at least one reference to 'e',
+ * so noone else is spinning to try to delete this page from 'd'.
+ */
+ d->tot_pages--;
+ list_del(&page->list);
+
+ /*
+ * Add the page to 'e'. Someone may already have removed the last
+ * reference and want to remove the page from 'e'. However, we have
+ * the lock so they'll spin waiting for us.
+ */
+ if ( unlikely(e->tot_pages++ == 0) )
+ get_knownalive_domain(e);
+ list_add_tail(&page->list, &e->page_list);
+
+ reassign_fail:
+ spin_unlock(&d->page_alloc_lock);
+ spin_unlock(&e->page_alloc_lock);
+ break;
+
+ default:
+ MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
+ okay = 0;
+ break;
+ }
+
+ if ( unlikely(!okay) )
+ {
+ rc = -EINVAL;
+ break;
+ }
+
+ uops++;
+ }
+
+ out:
+ process_deferred_ops(cpu);
+
+ /* Add incremental work we have done to the @done output parameter. */
+ if ( unlikely(pdone != NULL) )
+ __put_user(done + i, pdone);
+
+ UNLOCK_BIGLOCK(d);
+ return rc;
+}
+
+int do_mmu_update(
+ mmu_update_t *ureqs,
+ unsigned int count,
+ unsigned int *pdone,
+ unsigned int foreigndom)
+{
+ mmu_update_t req;
+ void *va;
+ unsigned long gpfn, mfn;
+ struct pfn_info *page;
+ int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
+ unsigned int cmd, done = 0;
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ u32 type_info;
+ struct domain_mmap_cache mapcache, sh_mapcache;
+
+ LOCK_BIGLOCK(d);
+
+ cleanup_writable_pagetable(d);
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ check_pagetable(v, "pre-mmu"); /* debug */
+
+ if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+ {
+ count &= ~MMU_UPDATE_PREEMPTED;
+ if ( unlikely(pdone != NULL) )
+ (void)get_user(done, pdone);
+ }
+
+ domain_mmap_cache_init(&mapcache);
+ domain_mmap_cache_init(&sh_mapcache);
+
+ if ( !set_foreigndom(cpu, foreigndom) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ perfc_incrc(calls_to_mmu_update);
+ perfc_addc(num_page_updates, count);
+ perfc_incr_histo(bpt_updates, count, PT_UPDATES);
+
+ if ( unlikely(!array_access_ok(ureqs, count, sizeof(req))) )
+ {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ for ( i = 0; i < count; i++ )
+ {
+ if ( hypercall_preempt_check() )
+ {
+ rc = hypercall4_create_continuation(
+ __HYPERVISOR_mmu_update, ureqs,
+ (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+ break;
+ }
+
+ if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) )
+ {
+ MEM_LOG("Bad __copy_from_user");
+ rc = -EFAULT;
+ break;
+ }
+
+ cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
+ okay = 0;
+
+ switch ( cmd )
+ {
+ /*
+ * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
+ */
+ case MMU_NORMAL_PT_UPDATE:
+
+ gpfn = req.ptr >> PAGE_SHIFT;
+ mfn = __gpfn_to_mfn(d, gpfn);
+
+ if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
+ {
+ MEM_LOG("Could not get page for normal update");
+ break;
+ }
+
+ va = map_domain_page_with_cache(mfn, &mapcache);
+ va = (void *)((unsigned long)va + (req.ptr & ~PAGE_MASK));
+ page = &frame_table[mfn];
+
+ switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
+ {
+ case PGT_l1_page_table:
+ ASSERT( !shadow_mode_refcounts(d) );
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
+ {
+ l1_pgentry_t l1e;
+
+ /* FIXME: doesn't work with PAE */
+ l1e = l1e_from_intpte(req.val);
+ okay = mod_l1_entry(va, l1e);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l1_normal_pt_update(d, req.ptr, l1e, &sh_mapcache);
+ put_page_type(page);
+ }
+ break;
+ case PGT_l2_page_table:
+ ASSERT( !shadow_mode_refcounts(d) );
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
+ {
+ l2_pgentry_t l2e;
+
+ /* FIXME: doesn't work with PAE */
+ l2e = l2e_from_intpte(req.val);
+ okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn, type_info);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
+ put_page_type(page);
+ }
+ break;
+#if CONFIG_PAGING_LEVELS >= 3
+ case PGT_l3_page_table:
+ ASSERT( !shadow_mode_refcounts(d) );
+ if ( likely(get_page_type(page, PGT_l3_page_table)) )
+ {
+ l3_pgentry_t l3e;
+
+ /* FIXME: doesn't work with PAE */
+ l3e = l3e_from_intpte(req.val);
+ okay = mod_l3_entry(va, l3e, mfn);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l3_normal_pt_update(d, req.ptr, l3e, &sh_mapcache);
+ put_page_type(page);
+ }
+ break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+ case PGT_l4_page_table:
+ ASSERT( !shadow_mode_refcounts(d) );
+ if ( likely(get_page_type(page, PGT_l4_page_table)) )
+ {
+ l4_pgentry_t l4e;
+
+ l4e = l4e_from_intpte(req.val);
+ okay = mod_l4_entry(va, l4e, mfn);
+ if ( okay && unlikely(shadow_mode_enabled(d)) )
+ shadow_l4_normal_pt_update(d, req.ptr, l4e, &sh_mapcache);
+ put_page_type(page);
+ }
+ break;
+#endif
+ default:
+ if ( likely(get_page_type(page, PGT_writable_page)) )
+ {
+ if ( shadow_mode_enabled(d) )
+ {
+ shadow_lock(d);
+
+ if ( shadow_mode_log_dirty(d) )
+ __mark_dirty(d, mfn);
+
+ if ( page_is_page_table(page) &&
+ !page_out_of_sync(page) )
+ {
+ shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
+ }
+ }
+
+ *(unsigned long *)va = req.val;
+ okay = 1;
+
+ if ( shadow_mode_enabled(d) )
+ shadow_unlock(d);
+
+ put_page_type(page);
+ }
+ break;
+ }
+
+ unmap_domain_page_with_cache(va, &mapcache);
+
+ put_page(page);
+ break;
+
+ case MMU_MACHPHYS_UPDATE:
+
+ mfn = req.ptr >> PAGE_SHIFT;
+ gpfn = req.val;
+
+ /* HACK ALERT... Need to think about this some more... */
+ if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
+ {
+ shadow_lock(FOREIGNDOM);
+ printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx for dom%d\n",
+ d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
+ set_machinetophys(mfn, gpfn);
+ set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
+ okay = 1;
+ shadow_unlock(FOREIGNDOM);
+ break;
+ }
+
+ if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
+ {
+ MEM_LOG("Could not get page for mach->phys update");
+ break;
+ }
+
+ if ( unlikely(shadow_mode_translate(FOREIGNDOM) && !IS_PRIV(d)) )
+ {
+ MEM_LOG("can't mutate the m2p of translated guests");
+ break;
+ }
+
+ set_machinetophys(mfn, gpfn);
+ okay = 1;
+
+ /*
+ * If in log-dirty mode, mark the corresponding
+ * page as dirty.
+ */
+ if ( unlikely(shadow_mode_log_dirty(FOREIGNDOM)) &&
+ mark_dirty(FOREIGNDOM, mfn) )
+ FOREIGNDOM->arch.shadow_dirty_block_count++;
+
+ put_page(&frame_table[mfn]);
+ break;
+
+ default:
+ MEM_LOG("Invalid page update command %lx", req.ptr);
+ break;
+ }
+
+ if ( unlikely(!okay) )
+ {
+ rc = -EINVAL;
+ break;
+ }
+
+ ureqs++;
+ }
+
+ out:
+ domain_mmap_cache_destroy(&mapcache);
+ domain_mmap_cache_destroy(&sh_mapcache);
+
+ process_deferred_ops(cpu);
+
+ /* Add incremental work we have done to the @done output parameter. */
+ if ( unlikely(pdone != NULL) )
+ __put_user(done + i, pdone);
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ check_pagetable(v, "post-mmu"); /* debug */
+
+ UNLOCK_BIGLOCK(d);
+ return rc;
+}
+
+/* This function assumes the caller is holding the domain's BIGLOCK
+ * and is running in a shadow mode
+ */
+int update_grant_va_mapping(unsigned long va,
+ l1_pgentry_t _nl1e,
+ struct domain *d,
+ struct vcpu *v)
+{
+ /* Caller must:
+ * . own d's BIGLOCK
+ * . already have 'get_page' correctly on the to-be-installed nl1e
+ * . be responsible for flushing the TLB
+ * . check PTE being installed isn't DISALLOWED
+ */
+
+ int rc = 0;
+ l1_pgentry_t *pl1e;
+ l1_pgentry_t ol1e;
+
+ cleanup_writable_pagetable(d);
+
+ // This is actually overkill - we don't need to sync the L1 itself,
+ // just everything involved in getting to this L1 (i.e. we need
+ // linear_pg_table[l1_linear_offset(va)] to be in sync)...
+ //
+ __shadow_sync_va(v, va);
+
+ pl1e = &linear_pg_table[l1_linear_offset(va)];
+
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
+ rc = -EINVAL;
+ else if ( !shadow_mode_refcounts(d) )
+ {
+ if ( update_l1e(pl1e, ol1e, _nl1e) )
+ {
+ put_page_from_l1e(ol1e, d);
+ if ( l1e_get_flags(ol1e) & _PAGE_PRESENT )
+ rc = 0; /* Caller needs to invalidate TLB entry */
+ else
+ rc = 1; /* Caller need not invalidate TLB entry */
+ }
+ else
+ rc = -EINVAL;
+ }
+ else
+ {
+ printk("grant tables and shadow mode currently don't work together\n");
+ BUG();
+ }
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ shadow_do_update_va_mapping(va, _nl1e, v);
+
+ return rc;
+}
+
+
+int do_update_va_mapping(unsigned long va,
+ unsigned long val32,
+ unsigned long flags)
+{
+ l1_pgentry_t val = l1e_from_intpte(val32);
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ unsigned int cpu = v->processor;
+ unsigned long vmask, bmap_ptr;
+ cpumask_t pmask;
+ int rc = 0;
+
+ perfc_incrc(calls_to_update_va);
+
+ if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
+ return -EINVAL;
+
+ LOCK_BIGLOCK(d);
+
+ cleanup_writable_pagetable(d);
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ check_pagetable(v, "pre-va"); /* debug */
+
+ if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
+ val)) )
+ rc = -EINVAL;
+
+ if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
+ {
+ if ( unlikely(percpu_info[cpu].foreign &&
+ (shadow_mode_translate(d) ||
+ shadow_mode_translate(percpu_info[cpu].foreign))) )
+ {
+ // The foreign domain's pfn's are in a different namespace.
+ // There's not enough information in just a gpte to figure out
+ // how to (re-)shadow this entry.
+ //
+ domain_crash();
+ }
+
+ rc = shadow_do_update_va_mapping(va, val, v);
+
+ check_pagetable(v, "post-va"); /* debug */
+ }
+
+ switch ( flags & UVMF_FLUSHTYPE_MASK )
+ {
+ case UVMF_TLB_FLUSH:
+ switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+ {
+ case UVMF_LOCAL:
+ if ( unlikely(shadow_mode_enabled(d)) )
+ shadow_sync_all(d);
+ local_flush_tlb();
+ break;
+ case UVMF_ALL:
+ flush_tlb_mask(d->cpumask);
+ break;
+ default:
+ if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
+ rc = -EFAULT;
+ pmask = vcpumask_to_pcpumask(d, vmask);
+ cpus_and(pmask, pmask, d->cpumask);
+ flush_tlb_mask(pmask);
+ break;
+ }
+ break;
+
+ case UVMF_INVLPG:
+ switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+ {
+ case UVMF_LOCAL:
+ if ( unlikely(shadow_mode_enabled(d)) )
+ shadow_invlpg(current, va);
+ local_flush_tlb_one(va);
+ break;
+ case UVMF_ALL:
+ flush_tlb_one_mask(d->cpumask, va);
+ break;
+ default:
+ if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
+ rc = -EFAULT;
+ pmask = vcpumask_to_pcpumask(d, vmask);
+ cpus_and(pmask, pmask, d->cpumask);
+ flush_tlb_one_mask(pmask, va);
+ break;
+ }
+ break;
+ }
+
+ process_deferred_ops(cpu);
+
+ UNLOCK_BIGLOCK(d);
+
+ return rc;
+}
+
+int do_update_va_mapping_otherdomain(unsigned long va,
+ unsigned long val32,
+ unsigned long flags,
+ domid_t domid)
+{
+ unsigned int cpu = smp_processor_id();
+ struct domain *d;
+ int rc;
+
+ if ( unlikely(!IS_PRIV(current->domain)) )
+ return -EPERM;
+
+ percpu_info[cpu].foreign = d = find_domain_by_id(domid);
+ if ( unlikely(d == NULL) )
+ {
+ MEM_LOG("Unknown domain '%u'", domid);
+ return -ESRCH;
+ }
+
+ rc = do_update_va_mapping(va, val32, flags);
+
+ return rc;
+}
+
+
+
+/*************************
+ * Descriptor Tables
+ */
+
+void destroy_gdt(struct vcpu *v)
+{
+ int i;
+ unsigned long pfn;
+
+ v->arch.guest_context.gdt_ents = 0;
+ for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
+ {
+ if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
+ put_page_and_type(&frame_table[pfn]);
+ v->arch.perdomain_ptes[i] = l1e_empty();
+ v->arch.guest_context.gdt_frames[i] = 0;
+ }
+}
+
+
+long set_gdt(struct vcpu *v,
+ unsigned long *frames,
+ unsigned int entries)
+{
+ struct domain *d = v->domain;
+ /* NB. There are 512 8-byte entries per GDT page. */
+ int i, nr_pages = (entries + 511) / 512;
+ unsigned long pfn;
+
+ if ( entries > FIRST_RESERVED_GDT_ENTRY )
+ return -EINVAL;
+
+ shadow_sync_all(d);
+
+ /* Check the pages in the new GDT. */
+ for ( i = 0; i < nr_pages; i++ )
+ if ( ((pfn = frames[i]) >= max_page) ||
+ !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
+ goto fail;
+
+ /* Tear down the old GDT. */
+ destroy_gdt(v);
+
+ /* Install the new GDT. */
+ v->arch.guest_context.gdt_ents = entries;
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ v->arch.guest_context.gdt_frames[i] = frames[i];
+ v->arch.perdomain_ptes[i] =
+ l1e_from_pfn(frames[i], __PAGE_HYPERVISOR);
+ }
+
+ return 0;
+
+ fail:
+ while ( i-- > 0 )
+ put_page_and_type(&frame_table[frames[i]]);
+ return -EINVAL;
+}
+
+
+long do_set_gdt(unsigned long *frame_list, unsigned int entries)
+{
+ int nr_pages = (entries + 511) / 512;
+ unsigned long frames[16];
+ long ret;
+
+ if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
+ return -EFAULT;
+
+ LOCK_BIGLOCK(current->domain);
+
+ if ( (ret = set_gdt(current, frames, entries)) == 0 )
+ local_flush_tlb();
+
+ UNLOCK_BIGLOCK(current->domain);
+
+ return ret;
+}
+
+
+long do_update_descriptor(unsigned long pa, u64 desc)
+{
+ struct domain *dom = current->domain;
+ unsigned long gpfn = pa >> PAGE_SHIFT;
+ unsigned long mfn;
+ unsigned int offset = (pa & ~PAGE_MASK) / sizeof(struct desc_struct);
+ struct desc_struct *gdt_pent, d;
+ struct pfn_info *page;
+ long ret = -EINVAL;
+
+ *(u64 *)&d = desc;
+
+ LOCK_BIGLOCK(dom);
+
+ if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ||
+ ((pa % sizeof(struct desc_struct)) != 0) ||
+ (mfn >= max_page) ||
+ !check_descriptor(&d) )
+ {
+ UNLOCK_BIGLOCK(dom);
+ return -EINVAL;
+ }
+
+ page = &frame_table[mfn];
+ if ( unlikely(!get_page(page, dom)) )
+ {
+ UNLOCK_BIGLOCK(dom);
+ return -EINVAL;
+ }
+
+ /* Check if the given frame is in use in an unsafe context. */
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_gdt_page:
+ if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
+ goto out;
+ break;
+ case PGT_ldt_page:
+ if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
+ goto out;
+ break;
+ default:
+ if ( unlikely(!get_page_type(page, PGT_writable_page)) )
+ goto out;
+ break;
+ }
+
+ if ( shadow_mode_enabled(dom) )
+ {
+ shadow_lock(dom);
+
+ if ( shadow_mode_log_dirty(dom) )
+ __mark_dirty(dom, mfn);
+
+ if ( page_is_page_table(page) && !page_out_of_sync(page) )
+ shadow_mark_mfn_out_of_sync(current, gpfn, mfn);
+ }
+
+ /* All is good so make the update. */
+ gdt_pent = map_domain_page(mfn);
+ memcpy(&gdt_pent[offset], &d, 8);
+ unmap_domain_page(gdt_pent);
+
+ if ( shadow_mode_enabled(dom) )
+ shadow_unlock(dom);
+
+ put_page_type(page);
+
+ ret = 0; /* success */
+
+ out:
+ put_page(page);
+
+ UNLOCK_BIGLOCK(dom);
+
+ return ret;
+}
+
+
+
+/*************************
+ * Writable Pagetables
+ */
+
+#ifdef VERBOSE
+int ptwr_debug = 0x0;
+#define PTWR_PRINTK(_f, _a...) \
+ do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
+#define PTWR_PRINT_WHICH (which ? 'I' : 'A')
+#else
+#define PTWR_PRINTK(_f, _a...) ((void)0)
+#endif
+
+/* Re-validate a given p.t. page, given its prior snapshot */
+int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
+{
+ l1_pgentry_t ol1e, nl1e;
+ int modified = 0, i;
+
+#if 0
+ if ( d->domain_id )
+ printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
+ l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)l1page)]),
+ l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)snapshot)]));
+#endif
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ ol1e = snapshot[i];
+ nl1e = l1page[i];
+
+ if ( likely(l1e_get_intpte(ol1e) == l1e_get_intpte(nl1e)) )
+ continue;
+
+ /* Update number of entries modified. */
+ modified++;
+
+ /*
+ * Fast path for PTEs that have merely been write-protected
+ * (e.g., during a Unix fork()). A strict reduction in privilege.
+ */
+ if ( likely(l1e_get_intpte(ol1e) == (l1e_get_intpte(nl1e)|_PAGE_RW)) )
+ {
+ if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+ put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
+ continue;
+ }
+
+ if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+ {
+ MEM_LOG("ptwr: Could not re-validate l1 page\n");
+ /*
+ * Make the remaining p.t's consistent before crashing, so the
+ * reference counts are correct.
+ */
+ memcpy(&l1page[i], &snapshot[i],
+ (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
+ domain_crash();
+ break;
+ }
+
+ put_page_from_l1e(ol1e, d);
+ }
+
+ return modified;
+}
+
+
+/* Flush the given writable p.t. page and write-protect it again. */
+void ptwr_flush(struct domain *d, const int which)
+{
+ unsigned long pte, *ptep, l1va;
+ l1_pgentry_t *pl1e;
+ l2_pgentry_t *pl2e;
+ unsigned int modified;
+
+ ASSERT(!shadow_mode_enabled(d));
+
+ if ( unlikely(d->arch.ptwr[which].vcpu != current) )
+ write_ptbase(d->arch.ptwr[which].vcpu);
+
+ l1va = d->arch.ptwr[which].l1va;
+ ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)];
+
+ /*
+ * STEP 1. Write-protect the p.t. page so no more updates can occur.
+ */
+
+ if ( unlikely(__get_user(pte, ptep)) )
+ {
+ MEM_LOG("ptwr: Could not read pte at %p", ptep);
+ /*
+ * Really a bug. We could read this PTE during the initial fault,
+ * and pagetables can't have changed meantime.
+ */
+ BUG();
+ }
+ PTWR_PRINTK("[%c] disconnected_l1va at %p is %lx\n",
+ PTWR_PRINT_WHICH, ptep, pte);
+ pte &= ~_PAGE_RW;
+
+ /* Write-protect the p.t. page in the guest page table. */
+ if ( unlikely(__put_user(pte, ptep)) )
+ {
+ MEM_LOG("ptwr: Could not update pte at %p", ptep);
+ /*
+ * Really a bug. We could write this PTE during the initial fault,
+ * and pagetables can't have changed meantime.
+ */
+ BUG();
+ }
+
+ /* Ensure that there are no stale writable mappings in any TLB. */
+ /* NB. INVLPG is a serialising instruction: flushes pending updates. */
+ flush_tlb_one_mask(d->cpumask, l1va);
+ PTWR_PRINTK("[%c] disconnected_l1va at %p now %lx\n",
+ PTWR_PRINT_WHICH, ptep, pte);
+
+ /*
+ * STEP 2. Validate any modified PTEs.
+ */
+
+ pl1e = d->arch.ptwr[which].pl1e;
+ modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
+ unmap_domain_page(pl1e);
+ perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
+ d->arch.ptwr[which].prev_nr_updates = modified;
+
+ /*
+ * STEP 3. Reattach the L1 p.t. page into the current address space.
+ */
+
+ if ( which == PTWR_PT_ACTIVE )
+ {
+ pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx];
+ l2e_add_flags(*pl2e, _PAGE_PRESENT);
+ }
+
+ /*
+ * STEP 4. Final tidy-up.
+ */
+
+ d->arch.ptwr[which].l1va = 0;
+
+ if ( unlikely(d->arch.ptwr[which].vcpu != current) )
+ write_ptbase(current);
+}
+
+static int ptwr_emulated_update(
+ unsigned long addr,
+ physaddr_t old,
+ physaddr_t val,
+ unsigned int bytes,
+ unsigned int do_cmpxchg)
+{
+ unsigned long pfn;
+ struct pfn_info *page;
+ l1_pgentry_t pte, ol1e, nl1e, *pl1e;
+ struct domain *d = current->domain;
+
+ /* Aligned access only, thank you. */
+ if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
+ {
+ MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n",
+ bytes, addr);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ /* Turn a sub-word access into a full-word access. */
+ if (bytes != sizeof(physaddr_t))
+ {
+ int rc;
+ physaddr_t full;
+ unsigned int offset = addr & (sizeof(physaddr_t)-1);
+
+ /* Align address; read full word. */
+ addr &= ~(sizeof(physaddr_t)-1);
+ if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
+ sizeof(physaddr_t))) )
+ return rc;
+ /* Mask out bits provided by caller. */
+ full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
+ /* Shift the caller value and OR in the missing bits. */
+ val &= (((physaddr_t)1 << (bytes*8)) - 1);
+ val <<= (offset)*8;
+ val |= full;
+ }
+
+ /* Read the PTE that maps the page being updated. */
+ if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
+ sizeof(pte)))
+ {
+ MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ pfn = l1e_get_pfn(pte);
+ page = &frame_table[pfn];
+
+ /* We are looking only for read-only mappings of p.t. pages. */
+ if ( ((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) != _PAGE_PRESENT) ||
+ ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ (page_get_owner(page) != d) )
+ {
+ MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%lx, %08x)\n",
+ l1e_get_pfn(pte), page->u.inuse.type_info);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ /* Check the new PTE. */
+ nl1e = l1e_from_intpte(val);
+ if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+ return X86EMUL_UNHANDLEABLE;
+
+ /* Checked successfully: do the update (write or cmpxchg). */
+ pl1e = map_domain_page(page_to_pfn(page));
+ pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
+ if ( do_cmpxchg )
+ {
+ ol1e = l1e_from_intpte(old);
+ if ( cmpxchg((unsigned long *)pl1e, old, val) != old )
+ {
+ unmap_domain_page(pl1e);
+ put_page_from_l1e(nl1e, d);
+ return X86EMUL_CMPXCHG_FAILED;
+ }
+ }
+ else
+ {
+ ol1e = *pl1e;
+ *pl1e = nl1e;
+ }
+ unmap_domain_page(pl1e);
+
+ /* Finally, drop the old PTE. */
+ put_page_from_l1e(ol1e, d);
+
+ return X86EMUL_CONTINUE;
+}
+
+static int ptwr_emulated_write(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes)
+{
+ return ptwr_emulated_update(addr, 0, val, bytes, 0);
+}
+
+static int ptwr_emulated_cmpxchg(
+ unsigned long addr,
+ unsigned long old,
+ unsigned long new,
+ unsigned int bytes)
+{
+ return ptwr_emulated_update(addr, old, new, bytes, 1);
+}
+
+static struct x86_mem_emulator ptwr_mem_emulator = {
+ .read_std = x86_emulate_read_std,
+ .write_std = x86_emulate_write_std,
+ .read_emulated = x86_emulate_read_std,
+ .write_emulated = ptwr_emulated_write,
+ .cmpxchg_emulated = ptwr_emulated_cmpxchg
+};
+
+/* Write page fault handler: check if guest is trying to modify a PTE. */
+int ptwr_do_page_fault(struct domain *d, unsigned long addr)
+{
+ unsigned long pfn;
+ struct pfn_info *page;
+ l1_pgentry_t pte;
+ l2_pgentry_t *pl2e;
+ int which;
+ u32 l2_idx;
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ return 0;
+
+ /*
+ * Attempt to read the PTE that maps the VA being accessed. By checking for
+ * PDE validity in the L2 we avoid many expensive fixups in __get_user().
+ */
+ if ( !(l2e_get_flags(__linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
+ _PAGE_PRESENT) ||
+ __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
+ sizeof(pte)) )
+ {
+ return 0;
+ }
+
+ pfn = l1e_get_pfn(pte);
+ page = &frame_table[pfn];
+
+ /* We are looking only for read-only mappings of p.t. pages. */
+ if ( ((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) != _PAGE_PRESENT) ||
+ ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
+ (page_get_owner(page) != d) )
+ {
+ return 0;
+ }
+
+ /* x86/64: Writable pagetable code needs auditing. Use emulator for now. */
+#if defined(__x86_64__)
+ goto emulate;
+#endif
+
+ /* Get the L2 index at which this L1 p.t. is always mapped. */
+ l2_idx = page->u.inuse.type_info & PGT_va_mask;
+ if ( unlikely(l2_idx >= PGT_va_unknown) )
+ goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
+ l2_idx >>= PGT_va_shift;
+
+ if ( unlikely(l2_idx == (addr >> L2_PAGETABLE_SHIFT)) )
+ goto emulate; /* Urk! Pagetable maps itself! */
+
+ /*
+ * Is the L1 p.t. mapped into the current address space? If so we call it
+ * an ACTIVE p.t., otherwise it is INACTIVE.
+ */
+ pl2e = &__linear_l2_table[l2_idx];
+ which = PTWR_PT_INACTIVE;
+ if ( (l2e_get_pfn(*pl2e)) == pfn )
+ {
+ /*
+ * Check the PRESENT bit to set ACTIVE mode.
+ * If the PRESENT bit is clear, we may be conflicting with the current
+ * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
+ * The ptwr_flush call below will restore the PRESENT bit.
+ */
+ if ( likely(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
+ (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
+ (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
+ which = PTWR_PT_ACTIVE;
+ }
+
+ /*
+ * If this is a multi-processor guest then ensure that the page is hooked
+ * into at most one L2 table, which must be the one running on this VCPU.
+ */
+ if ( (d->vcpu[0]->next_in_list != NULL) &&
+ ((page->u.inuse.type_info & PGT_count_mask) !=
+ (!!(page->u.inuse.type_info & PGT_pinned) +
+ (which == PTWR_PT_ACTIVE))) )
+ {
+ /* Could be conflicting writable mappings from other VCPUs. */
+ cleanup_writable_pagetable(d);
+ goto emulate;
+ }
+
+ PTWR_PRINTK("[%c] page_fault on l1 pt at va %lx, pt for %08x, "
+ "pfn %lx\n", PTWR_PRINT_WHICH,
+ addr, l2_idx << L2_PAGETABLE_SHIFT, pfn);
+
+ /*
+ * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at
+ * time. If there is already one, we must flush it out.
+ */
+ if ( d->arch.ptwr[which].l1va )
+ ptwr_flush(d, which);
+
+ /*
+ * If last batch made no updates then we are probably stuck. Emulate this
+ * update to ensure we make progress.
+ */
+ if ( d->arch.ptwr[which].prev_nr_updates == 0 )
+ {
+ /* Ensure that we don't get stuck in an emulation-only rut. */
+ d->arch.ptwr[which].prev_nr_updates = 1;
+ goto emulate;
+ }
+
+ d->arch.ptwr[which].l1va = addr | 1;
+ d->arch.ptwr[which].l2_idx = l2_idx;
+ d->arch.ptwr[which].vcpu = current;
+
+ /* For safety, disconnect the L1 p.t. page from current space. */
+ if ( which == PTWR_PT_ACTIVE )
+ {
+ l2e_remove_flags(*pl2e, _PAGE_PRESENT);
+ flush_tlb_mask(d->cpumask);
+ }
+
+ /* Temporarily map the L1 page, and make a copy of it. */
+ d->arch.ptwr[which].pl1e = map_domain_page(pfn);
+ memcpy(d->arch.ptwr[which].page,
+ d->arch.ptwr[which].pl1e,
+ L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
+
+ /* Finally, make the p.t. page writable by the guest OS. */
+ l1e_add_flags(pte, _PAGE_RW);
+ if ( unlikely(__copy_to_user(&linear_pg_table[addr>>PAGE_SHIFT],
+ &pte, sizeof(pte))) )
+ {
+ MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
+ &linear_pg_table[addr>>PAGE_SHIFT]);
+ /* Toss the writable pagetable state and crash. */
+ unmap_domain_page(d->arch.ptwr[which].pl1e);
+ d->arch.ptwr[which].l1va = 0;
+ domain_crash();
+ return 0;
+ }
+
+ return EXCRET_fault_fixed;
+
+ emulate:
+ if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
+ &ptwr_mem_emulator, BITS_PER_LONG/8) )
+ return 0;
+ perfc_incrc(ptwr_emulations);
+ return EXCRET_fault_fixed;
+}
+
+int ptwr_init(struct domain *d)
+{
+ void *x = alloc_xenheap_page();
+ void *y = alloc_xenheap_page();
+
+ if ( (x == NULL) || (y == NULL) )
+ {
+ if ( x != NULL )
+ free_xenheap_page(x);
+ if ( y != NULL )
+ free_xenheap_page(y);
+ return -ENOMEM;
+ }
+
+ d->arch.ptwr[PTWR_PT_ACTIVE].page = x;
+ d->arch.ptwr[PTWR_PT_INACTIVE].page = y;
+
+ return 0;
+}
+
+void ptwr_destroy(struct domain *d)
+{
+ cleanup_writable_pagetable(d);
+ free_xenheap_page(d->arch.ptwr[PTWR_PT_ACTIVE].page);
+ free_xenheap_page(d->arch.ptwr[PTWR_PT_INACTIVE].page);
+}
+
+void cleanup_writable_pagetable(struct domain *d)
+{
+ if ( unlikely(!VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+ return;
+
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ shadow_sync_all(d);
+ }
+ else
+ {
+ if ( d->arch.ptwr[PTWR_PT_ACTIVE].l1va )
+ ptwr_flush(d, PTWR_PT_ACTIVE);
+ if ( d->arch.ptwr[PTWR_PT_INACTIVE].l1va )
+ ptwr_flush(d, PTWR_PT_INACTIVE);
+ }
+}
+
+int map_pages_to_xen(
+ unsigned long virt,
+ unsigned long pfn,
+ unsigned long nr_pfns,
+ unsigned long flags)
+{
+ l2_pgentry_t *pl2e, ol2e;
+ l1_pgentry_t *pl1e, ol1e;
+ unsigned int i;
+
+ unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
+ flags &= ~MAP_SMALL_PAGES;
+
+ while ( nr_pfns != 0 )
+ {
+ pl2e = virt_to_xen_l2e(virt);
+
+ if ( ((((virt>>PAGE_SHIFT) | pfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
+ (nr_pfns >= (1<<PAGETABLE_ORDER)) &&
+ !map_small_pages )
+ {
+ /* Super-page mapping. */
+ ol2e = *pl2e;
+ *pl2e = l2e_from_pfn(pfn, flags|_PAGE_PSE);
+
+ if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
+ {
+ local_flush_tlb_pge();
+ if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
+ free_xen_pagetable(l2e_get_page(*pl2e));
+ }
+
+ virt += 1UL << L2_PAGETABLE_SHIFT;
+ pfn += 1UL << PAGETABLE_ORDER;
+ nr_pfns -= 1UL << PAGETABLE_ORDER;
+ }
+ else
+ {
+ /* Normal page mapping. */
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ {
+ pl1e = page_to_virt(alloc_xen_pagetable());
+ clear_page(pl1e);
+ *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR);
+ }
+ else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+ {
+ pl1e = page_to_virt(alloc_xen_pagetable());
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ pl1e[i] = l1e_from_pfn(
+ l2e_get_pfn(*pl2e) + i,
+ l2e_get_flags(*pl2e) & ~_PAGE_PSE);
+ *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR);
+ local_flush_tlb_pge();
+ }
+
+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
+ ol1e = *pl1e;
+ *pl1e = l1e_from_pfn(pfn, flags);
+ if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
+ local_flush_tlb_one(virt);
+
+ virt += 1UL << L1_PAGETABLE_SHIFT;
+ pfn += 1UL;
+ nr_pfns -= 1UL;
+ }
+ }
+
+ return 0;
+}
+
+void __set_fixmap(
+ enum fixed_addresses idx, unsigned long p, unsigned long flags)
+{
+ if ( unlikely(idx >= __end_of_fixed_addresses) )
+ BUG();
+ map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
+}
+
+#ifdef MEMORY_GUARD
+
+void memguard_init(void)
+{
+ map_pages_to_xen(
+ PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
+ __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+}
+
+static void __memguard_change_range(void *p, unsigned long l, int guard)
+{
+ unsigned long _p = (unsigned long)p;
+ unsigned long _l = (unsigned long)l;
+ unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
+
+ /* Ensure we are dealing with a page-aligned whole number of pages. */
+ ASSERT((_p&PAGE_MASK) != 0);
+ ASSERT((_l&PAGE_MASK) != 0);
+ ASSERT((_p&~PAGE_MASK) == 0);
+ ASSERT((_l&~PAGE_MASK) == 0);
+
+ if ( guard )
+ flags &= ~_PAGE_PRESENT;
+
+ map_pages_to_xen(
+ _p, virt_to_phys(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
+}
+
+void memguard_guard_range(void *p, unsigned long l)
+{
+ __memguard_change_range(p, l, 1);
+}
+
+void memguard_unguard_range(void *p, unsigned long l)
+{
+ __memguard_change_range(p, l, 0);
+}
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index 7db6f8a6da..234b14106f 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -1,5 +1,5 @@
/*
- * Intel Multiprocessor Specificiation 1.1 and 1.4
+ * Intel Multiprocessor Specification 1.1 and 1.4
* compliant MP-table parsing routines.
*
* (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
@@ -14,44 +14,46 @@
*/
#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/kernel.h>
+#include <xen/types.h>
#include <xen/irq.h>
-#include <xen/smp.h>
-#include <xen/mm.h>
+#include <xen/init.h>
#include <xen/acpi.h>
+#include <xen/delay.h>
+#include <xen/sched.h>
+
+#include <asm/mc146818rtc.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
#include <asm/acpi.h>
-#include <asm/io.h>
-#include <asm/apic.h>
+#include <asm/mtrr.h>
#include <asm/mpspec.h>
-#include <asm/flushtlb.h>
-#include <asm/smpboot.h>
+#include <asm/io_apic.h>
-int numnodes = 1; /* XXX Xen */
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
/* Have we found an MP table */
int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
/*
* Various Linux-internal data structures created from the
* MP-table.
*/
int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
int mp_current_pci_id;
-int *mp_bus_id_to_type;
-int *mp_bus_id_to_node;
-int *mp_bus_id_to_local;
-int *mp_bus_id_to_pci_bus;
-int max_mp_busses;
-int max_irq_sources;
/* I/O APIC entries */
struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
/* # of MP IRQ source entries */
-struct mpc_config_intsrc *mp_irqs;
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* MP IRQ source entries */
int mp_irq_entries;
@@ -65,24 +67,18 @@ unsigned long mp_lapic_addr;
unsigned int boot_cpu_physical_apicid = -1U;
unsigned int boot_cpu_logical_apicid = -1U;
/* Internal processor count */
-static unsigned int num_processors;
+static unsigned int __initdata num_processors;
/* Bitmask of physically existing CPUs */
-unsigned long phys_cpu_present_map;
-unsigned long logical_cpu_present_map;
+physid_mask_t phys_cpu_present_map;
-#ifdef CONFIG_X86_CLUSTERED_APIC
-unsigned char esr_disable = 0;
-unsigned char clustered_apic_mode = CLUSTERED_APIC_NONE;
-unsigned int apic_broadcast_id = APIC_BROADCAST_ID_APIC;
-#endif
-unsigned char raw_phys_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
/*
* Intel MP BIOS table parsing routines:
*/
-#ifndef CONFIG_X86_VISWS_APIC
+
/*
* Checksum an MP configuration block.
*/
@@ -98,48 +94,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
}
/*
- * Processor encoding in an MP configuration block
- */
-
-static char __init *mpc_family(int family,int model)
-{
- static char n[32];
- static char *model_defs[]=
- {
- "80486DX","80486DX",
- "80486SX","80486DX/2 or 80487",
- "80486SL","80486SX/2",
- "Unknown","80486DX/2-WB",
- "80486DX/4","80486DX/4-WB"
- };
-
- switch (family) {
- case 0x04:
- if (model < 10)
- return model_defs[model];
- break;
-
- case 0x05:
- return("Pentium(tm)");
-
- case 0x06:
- return("Pentium(tm) Pro");
-
- case 0x0F:
- if (model == 0x00)
- return("Pentium 4(tm)");
- if (model == 0x01)
- return("Pentium 4(tm)");
- if (model == 0x02)
- return("Pentium 4(tm) XEON(tm)");
- if (model == 0x0F)
- return("Special controller");
- }
- sprintf(n,"Unknown CPU [%d:%d]",family, model);
- return n;
-}
-
-/*
* Have to match translation table entries to main table entries by counter
* hence the mpc_record variable .... can't see a less disgusting way of
* doing this ....
@@ -148,30 +102,30 @@ static char __init *mpc_family(int family,int model)
static int mpc_record;
static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
+#ifdef CONFIG_X86_NUMAQ
+static int MP_valid_apicid(int apicid, int version)
+{
+ return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
+}
+#else
+static int MP_valid_apicid(int apicid, int version)
+{
+ if (version >= 0x14)
+ return apicid < 0xff;
+ else
+ return apicid < 0xf;
+}
+#endif
+
void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, quad, logical_apicid;
+ int ver, apicid;
+ physid_mask_t tmp;
if (!(m->mpc_cpuflag & CPU_ENABLED))
return;
- logical_apicid = m->mpc_apicid;
- if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
- quad = translation_table[mpc_record]->trans_quad;
- logical_apicid = (quad << 4) +
- (m->mpc_apicid ? m->mpc_apicid << 1 : 1);
- printk("Processor #%d %s APIC version %d (quad %d, apic %d)\n",
- m->mpc_apicid,
- mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
- m->mpc_apicver, quad, logical_apicid);
- } else {
- printk("Processor #%d %s APIC version %d\n",
- m->mpc_apicid,
- mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
- m->mpc_apicver);
- }
+ apicid = mpc_apic_id(m, translation_table[mpc_record]);
if (m->mpc_featureflag&(1<<0))
Dprintk(" Floating point unit present.\n");
@@ -224,68 +178,68 @@ void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
- boot_cpu_logical_apicid = logical_apicid;
+ boot_cpu_logical_apicid = apicid;
}
- if (num_processors >= NR_CPUS){
- printk(KERN_WARNING "NR_CPUS limit of %i reached. Cannot "
- "boot CPU(apicid 0x%x).\n", NR_CPUS, m->mpc_apicid);
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+ return;
+ }
+
+ if (num_processors >= maxcpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+ " Processor ignored.\n", maxcpus);
return;
}
num_processors++;
+ ver = m->mpc_apicver;
- if (m->mpc_apicid > MAX_APICS) {
- printk("Processor #%d INVALID. (Max ID: %d).\n",
+ if (!MP_valid_apicid(apicid, ver)) {
+ printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n",
m->mpc_apicid, MAX_APICS);
--num_processors;
return;
}
- ver = m->mpc_apicver;
- logical_cpu_present_map |= 1 << (num_processors-1);
- phys_cpu_present_map |= apicid_to_phys_cpu_present(m->mpc_apicid);
-
+ tmp = apicid_to_cpu_present(apicid);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
+
/*
* Validate version
*/
if (ver == 0x0) {
- printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+ printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
- raw_phys_apicid[num_processors - 1] = m->mpc_apicid;
+ bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
}
static void __init MP_bus_info (struct mpc_config_bus *m)
{
char str[7];
- int quad;
memcpy(str, m->mpc_bustype, 6);
str[6] = 0;
-
- if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
- quad = translation_table[mpc_record]->trans_quad;
- mp_bus_id_to_node[m->mpc_busid] = quad;
- mp_bus_id_to_local[m->mpc_busid] = translation_table[mpc_record]->trans_local;
- quad_local_to_mp_bus_id[quad][translation_table[mpc_record]->trans_local] = m->mpc_busid;
- printk("Bus #%d is %s (node %d)\n", m->mpc_busid, str, quad);
- } else {
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
- }
+
+ mpc_oem_bus_info(m, str, translation_table[mpc_record]);
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+ mpc_oem_pci_bus(m, translation_table[mpc_record]);
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
mp_current_pci_id++;
} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
} else {
- printk("Unknown bustype %s - ignoring\n", str);
+ printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
}
}
@@ -294,10 +248,10 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
if (!(m->mpc_flags & MPC_APIC_USABLE))
return;
- printk("I/O APIC #%d Version %d at 0x%X.\n",
+ printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
if (nr_ioapics >= MAX_IO_APICS) {
- printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+ printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
MAX_IO_APICS, nr_ioapics);
panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
}
@@ -318,7 +272,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries == max_irq_sources)
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!!\n");
}
@@ -344,16 +298,17 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
BUG();
}
+#ifdef CONFIG_X86_NUMAQ
static void __init MP_translation_info (struct mpc_config_translation *m)
{
- printk("Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+ printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
if (mpc_record >= MAX_MPC_ENTRY)
- printk("MAX_MPC_ENTRY exceeded!\n");
+ printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
else
translation_table[mpc_record] = m; /* stash this for later */
- if (m->trans_quad+1 > numnodes)
- numnodes = m->trans_quad+1;
+ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+ node_set_online(m->trans_quad);
}
/*
@@ -366,10 +321,11 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
int count = sizeof (*oemtable); /* the header size */
unsigned char *oemptr = ((unsigned char *)oemtable)+count;
- printk("Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+ mpc_record = 0;
+ printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
{
- printk("SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+ printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
oemtable->oem_signature[0],
oemtable->oem_signature[1],
oemtable->oem_signature[2],
@@ -378,7 +334,7 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
}
if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
{
- printk("SMP oem mptable: checksum error!\n");
+ printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
return;
}
while (count < oemtable->oem_length) {
@@ -395,36 +351,42 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
}
default:
{
- printk("Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+ printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
return;
}
}
}
}
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (strncmp(oem, "IBM NUMA", 8))
+ printk("Warning! May not be a NUMA-Q system!\n");
+ if (mpc->mpc_oemptr)
+ smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+ mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
/*
* Read/parse the MPC
*/
static int __init smp_read_mpc(struct mp_config_table *mpc)
{
- char oem[16], prod[14];
+ char str[16];
+ char oem[10];
int count=sizeof(*mpc);
unsigned char *mpt=((unsigned char *)mpc)+count;
- int num_bus = 0;
- int num_irq = 0;
- unsigned char *bus_data;
if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
- panic("SMP mptable: bad signature [%c%c%c%c]!\n",
- mpc->mpc_signature[0],
- mpc->mpc_signature[1],
- mpc->mpc_signature[2],
- mpc->mpc_signature[3]);
+ printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+ *(u32 *)mpc->mpc_signature);
return 0;
}
if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
- panic("SMP mptable: checksum error!\n");
+ printk(KERN_ERR "SMP mptable: checksum error!\n");
return 0;
}
if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
@@ -438,14 +400,14 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
memcpy(oem,mpc->mpc_oem,8);
oem[8]=0;
- printk("OEM ID: %s ",oem);
+ printk(KERN_INFO "OEM ID: %s ",oem);
- memcpy(prod,mpc->mpc_productid,12);
- prod[12]=0;
- printk("Product ID: %s ",prod);
+ memcpy(str,mpc->mpc_productid,12);
+ str[12]=0;
+ printk("Product ID: %s ",str);
+
+ mps_oem_check(mpc, oem, str);
- detect_clustered_apic(oem, prod);
-
printk("APIC at: 0x%X\n",mpc->mpc_lapic);
/*
@@ -455,77 +417,10 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
if (!acpi_lapic)
mp_lapic_addr = mpc->mpc_lapic;
- if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
- /* We need to process the oem mpc tables to tell us which quad things are in ... */
- mpc_record = 0;
- smp_read_mpc_oem((struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr, mpc->mpc_oemsize);
- mpc_record = 0;
- }
-
- /* Pre-scan to determine the number of bus and
- * interrupts records we have
- */
- while (count < mpc->mpc_length) {
- switch (*mpt) {
- case MP_PROCESSOR:
- mpt += sizeof(struct mpc_config_processor);
- count += sizeof(struct mpc_config_processor);
- break;
- case MP_BUS:
- ++num_bus;
- mpt += sizeof(struct mpc_config_bus);
- count += sizeof(struct mpc_config_bus);
- break;
- case MP_INTSRC:
- ++num_irq;
- mpt += sizeof(struct mpc_config_intsrc);
- count += sizeof(struct mpc_config_intsrc);
- break;
- case MP_IOAPIC:
- mpt += sizeof(struct mpc_config_ioapic);
- count += sizeof(struct mpc_config_ioapic);
- break;
- case MP_LINTSRC:
- mpt += sizeof(struct mpc_config_lintsrc);
- count += sizeof(struct mpc_config_lintsrc);
- break;
- default:
- count = mpc->mpc_length;
- break;
- }
- }
- /*
- * Paranoia: Allocate one extra of both the number of busses and number
- * of irqs, and make sure that we have at least 4 interrupts per PCI
- * slot. But some machines do not report very many busses, so we need
- * to fall back on the older defaults.
- */
- ++num_bus;
- max_mp_busses = max(num_bus, MAX_MP_BUSSES);
- if (num_irq < (4 * max_mp_busses))
- num_irq = 4 * num_bus; /* 4 intr/PCI slot */
- ++num_irq;
- max_irq_sources = max(num_irq, MAX_IRQ_SOURCES);
-
- count = (max_mp_busses * sizeof(int)) * 4;
- count += (max_irq_sources * sizeof(struct mpc_config_intsrc));
- bus_data = (void *)alloc_xenheap_pages(get_order(count));
- if (!bus_data) {
- printk(KERN_ERR "SMP mptable: out of memory!\n");
- return 0;
- }
- mp_bus_id_to_type = (int *)&bus_data[0];
- mp_bus_id_to_node = (int *)&bus_data[(max_mp_busses * sizeof(int))];
- mp_bus_id_to_local = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 2];
- mp_bus_id_to_pci_bus = (int *)&bus_data[(max_mp_busses * sizeof(int)) * 3];
- mp_irqs = (struct mpc_config_intsrc *)&bus_data[(max_mp_busses * sizeof(int)) * 4];
- memset(mp_bus_id_to_pci_bus, -1, max_mp_busses * sizeof(int));
-
/*
* Now process the configuration blocks.
*/
- count = sizeof(*mpc);
- mpt = ((unsigned char *)mpc)+count;
+ mpc_record = 0;
while (count < mpc->mpc_length) {
switch(*mpt) {
case MP_PROCESSOR:
@@ -584,21 +479,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
++mpc_record;
}
-
- if (clustered_apic_mode){
- phys_cpu_present_map = logical_cpu_present_map;
- }
-
-
- printk("Enabling APIC mode: ");
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- printk("Clustered Logical. ");
- else if(clustered_apic_mode == CLUSTERED_APIC_XAPIC)
- printk("Physical. ");
- else
- printk("Flat. ");
- printk("Using %d I/O APICs\n",nr_ioapics);
-
+ clustered_apic_check();
if (!num_processors)
printk(KERN_ERR "SMP mptable: no processors registered!\n");
return num_processors;
@@ -634,12 +515,12 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
* If it does, we assume it's valid.
*/
if (mpc_default_type == 5) {
- printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+ printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
- printk("ELCR contains invalid data... not using ELCR\n");
+ printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
else {
- printk("Using ELCR to identify PCI interrupts\n");
+ printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
ELCR_fallback = 1;
}
}
@@ -686,24 +567,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
struct mpc_config_lintsrc lintsrc;
int linttypes[2] = { mp_ExtINT, mp_NMI };
int i;
- struct {
- int mp_bus_id_to_type[MAX_MP_BUSSES];
- int mp_bus_id_to_node[MAX_MP_BUSSES];
- int mp_bus_id_to_local[MAX_MP_BUSSES];
- int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
- struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
- } *bus_data;
-
- bus_data = (void *)alloc_xenheap_pages(get_order(sizeof(*bus_data)));
- if (!bus_data)
- panic("SMP mptable: out of memory!\n");
- mp_bus_id_to_type = bus_data->mp_bus_id_to_type;
- mp_bus_id_to_node = bus_data->mp_bus_id_to_node;
- mp_bus_id_to_local = bus_data->mp_bus_id_to_local;
- mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus;
- mp_irqs = bus_data->mp_irqs;
- for (i = 0; i < MAX_MP_BUSSES; ++i)
- mp_bus_id_to_pci_bus[i] = -1;
/*
* local APIC has default address
@@ -732,7 +595,8 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
bus.mpc_busid = 0;
switch (mpc_default_type) {
default:
- printk("???\nUnknown standard configuration %d\n",
+ printk("???\n");
+ printk(KERN_ERR "Unknown standard configuration %d\n",
mpc_default_type);
/* fall through */
case 1:
@@ -790,7 +654,7 @@ void __init get_smp_config (void)
/*
* ACPI may be used to obtain the entire SMP configuration or just to
- * enumerate/configure processors (CONFIG_ACPI_HT_ONLY). Note that
+ * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that
* ACPI supports both logical (e.g. Hyper-Threading) and physical
* processors, where MPS only supports physical.
*/
@@ -801,12 +665,12 @@ void __init get_smp_config (void)
else if (acpi_lapic)
printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
- printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+ printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
if (mpf->mpf_feature2 & (1<<7)) {
- printk(" IMCR and PIC compatibility mode.\n");
+ printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
pic_mode = 1;
} else {
- printk(" Virtual Wire compatibility mode.\n");
+ printk(KERN_INFO " Virtual Wire compatibility mode.\n");
pic_mode = 0;
}
@@ -815,7 +679,7 @@ void __init get_smp_config (void)
*/
if (mpf->mpf_feature1 != 0) {
- printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+ printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
construct_default_ISA_mptable(mpf->mpf_feature1);
} else if (mpf->mpf_physptr) {
@@ -838,7 +702,7 @@ void __init get_smp_config (void)
if (!mp_irq_entries) {
struct mpc_config_bus bus;
- printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
bus.mpc_type = MP_BUS;
bus.mpc_busid = 0;
@@ -851,7 +715,7 @@ void __init get_smp_config (void)
} else
BUG();
- printk("Processors: %d\n", num_processors);
+ printk(KERN_INFO "Processors: %d\n", num_processors);
/*
* Only use the first configuration found.
*/
@@ -859,7 +723,7 @@ void __init get_smp_config (void)
static int __init smp_scan_config (unsigned long base, unsigned long length)
{
- unsigned long *bp = phys_to_virt(base);
+ unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
@@ -875,11 +739,27 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
|| (mpf->mpf_specification == 4)) ) {
smp_found_config = 1;
- printk("found SMP MP-table at %08lx\n",
+ printk(KERN_INFO "found SMP MP-table at %08lx\n",
virt_to_phys(mpf));
+#if 0
reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
- if (mpf->mpf_physptr)
- reserve_bootmem((unsigned long)mpf->mpf_physptr, PAGE_SIZE);
+ if (mpf->mpf_physptr) {
+ /*
+ * We cannot access to MPC table to compute
+ * table size yet, as only few megabytes from
+ * the bottom is mapped now.
+ * PC-9800's MPC table places on the very last
+ * of physical memory; so that simply reserving
+ * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+ * in reserve_bootmem.
+ */
+ unsigned long size = PAGE_SIZE;
+ unsigned long end = max_low_pfn * PAGE_SIZE;
+ if (mpf->mpf_physptr + size > end)
+ size = end - mpf->mpf_physptr;
+ reserve_bootmem(mpf->mpf_physptr, size);
+ }
+#endif
mpf_found = mpf;
return 1;
}
@@ -889,7 +769,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
return 0;
}
-void __init find_intel_smp (void)
+void __init find_smp_config (void)
{
unsigned int address;
@@ -913,53 +793,20 @@ void __init find_intel_smp (void)
* there is a real-mode segmented pointer pointing to the
* 4K EBDA area at 0x40E, calculate and scan it here.
*
- * NOTE! There were Linux loaders that will corrupt the EBDA
+ * NOTE! There are Linux loaders that will corrupt the EBDA
* area, and as such this kind of SMP config may be less
* trustworthy, simply because the SMP table may have been
- * stomped on during early boot. Thankfully the bootloaders
- * now honour the EBDA.
+ * stomped on during early boot. These loaders are buggy and
+ * should be fixed.
+ *
+ * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
*/
- address = *(unsigned short *)phys_to_virt(0x40E);
- address <<= 4;
- smp_scan_config(address, 0x1000);
+ address = get_bios_ebda();
+ if (address)
+ smp_scan_config(address, 0x400);
}
-#else
-
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-void __init find_visws_smp(void)
-{
- smp_found_config = 1;
-
- phys_cpu_present_map |= 2; /* or in id 1 */
- apic_version[1] |= 0x10; /* integrated APIC */
- apic_version[0] |= 0x10;
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-
-#endif
-
-/*
- * - Intel MP Configuration Table
- * - or SGI Visual Workstation configuration
- */
-void __init find_smp_config (void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- find_intel_smp();
-#endif
-#ifdef CONFIG_VISWS
- find_visws_smp();
-#endif
-}
-
-
/* --------------------------------------------------------------------------
ACPI-based MP Configuration
-------------------------------------------------------------------------- */
@@ -987,7 +834,7 @@ void __init mp_register_lapic (
struct mpc_config_processor processor;
int boot_cpu = 0;
- if (id >= MAX_APICS) {
+ if (MAX_APICS - id <= 0) {
printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
id, MAX_APICS);
return;
@@ -998,14 +845,7 @@ void __init mp_register_lapic (
processor.mpc_type = MP_PROCESSOR;
processor.mpc_apicid = id;
-
- /*
- * mp_register_lapic_address() which is called before the
- * current function does the fixmap of FIX_APIC_BASE.
- * Read in the correct APIC version from there
- */
- processor.mpc_apicver = apic_read(APIC_LVR);
-
+ processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
@@ -1017,32 +857,32 @@ void __init mp_register_lapic (
MP_processor_info(&processor);
}
-#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER)
+#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT))
#define MP_ISA_BUS 0
#define MP_MAX_IOAPIC_PIN 127
struct mp_ioapic_routing {
int apic_id;
- int irq_start;
- int irq_end;
+ int gsi_base;
+ int gsi_end;
u32 pin_programmed[4];
} mp_ioapic_routing[MAX_IO_APICS];
-static int __init mp_find_ioapic (
- int irq)
+static int mp_find_ioapic (
+ int gsi)
{
int i = 0;
- /* Find the IOAPIC that manages this IRQ. */
+ /* Find the IOAPIC that manages this GSI. */
for (i = 0; i < nr_ioapics; i++) {
- if ((irq >= mp_ioapic_routing[i].irq_start)
- && (irq <= mp_ioapic_routing[i].irq_end))
+ if ((gsi >= mp_ioapic_routing[i].gsi_base)
+ && (gsi <= mp_ioapic_routing[i].gsi_end))
return i;
}
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for IRQ %d\n", irq);
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
return -1;
}
@@ -1051,7 +891,7 @@ static int __init mp_find_ioapic (
void __init mp_register_ioapic (
u8 id,
u32 address,
- u32 irq_base)
+ u32 gsi_base)
{
int idx = 0;
@@ -1077,19 +917,19 @@ void __init mp_register_ioapic (
mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
/*
- * Build basic IRQ lookup table to facilitate irq->io_apic lookups
- * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
+ * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
*/
mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
- mp_ioapic_routing[idx].irq_start = irq_base;
- mp_ioapic_routing[idx].irq_end = irq_base +
+ mp_ioapic_routing[idx].gsi_base = gsi_base;
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
io_apic_get_redir_entries(idx);
- printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
- "IRQ %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+ printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
- mp_ioapic_routing[idx].irq_start,
- mp_ioapic_routing[idx].irq_end);
+ mp_ioapic_routing[idx].gsi_base,
+ mp_ioapic_routing[idx].gsi_end);
return;
}
@@ -1099,21 +939,19 @@ void __init mp_override_legacy_irq (
u8 bus_irq,
u8 polarity,
u8 trigger,
- u32 global_irq)
+ u32 gsi)
{
struct mpc_config_intsrc intsrc;
- int i = 0;
- int found = 0;
int ioapic = -1;
int pin = -1;
/*
- * Convert 'global_irq' to 'ioapic.pin'.
+ * Convert 'gsi' to 'ioapic.pin'.
*/
- ioapic = mp_find_ioapic(global_irq);
+ ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
return;
- pin = global_irq - mp_ioapic_routing[ioapic].irq_start;
+ pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
/*
* TBD: This check is for faulty timer entries, where the override
@@ -1136,23 +974,9 @@ void __init mp_override_legacy_irq (
(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
- /*
- * If an existing [IOAPIC.PIN -> IRQ] routing entry exists we override it.
- * Otherwise create a new entry (e.g. global_irq == 2).
- */
- for (i = 0; i < mp_irq_entries; i++) {
- if ((mp_irqs[i].mpc_srcbus == intsrc.mpc_srcbus)
- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) {
- mp_irqs[i] = intsrc;
- found = 1;
- break;
- }
- }
- if (!found) {
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
- }
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
return;
}
@@ -1160,35 +984,22 @@ void __init mp_override_legacy_irq (
void __init mp_config_acpi_legacy_irqs (void)
{
+ struct mpc_config_intsrc intsrc;
int i = 0;
int ioapic = -1;
- /*
- * Initialize mp_irqs for IRQ configuration.
- */
- unsigned char *bus_data;
- int count;
-
- count = (MAX_MP_BUSSES * sizeof(int)) * 4;
- count += (MAX_IRQ_SOURCES * sizeof(int)) * 4;
- bus_data = (void *)alloc_xenheap_pages(get_order(count));
- if (!bus_data) {
- panic("Fatal: can't allocate bus memory for ACPI legacy IRQ!");
- }
- mp_bus_id_to_type = (int *)&bus_data[0];
- mp_bus_id_to_node = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int))];
- mp_bus_id_to_local = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 2];
- mp_bus_id_to_pci_bus = (int *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 3];
- mp_irqs = (struct mpc_config_intsrc *)&bus_data[(MAX_MP_BUSSES * sizeof(int)) * 4];
- for (i = 0; i < MAX_MP_BUSSES; ++i)
- mp_bus_id_to_pci_bus[i] = -1;
-
/*
* Fabricate the legacy ISA bus (bus #31).
*/
mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+ /*
+ * ES7000 has no legacy identity mappings
+ */
+ if (es7000_plat)
+ return;
+
/*
* Locate the IOAPIC that manages the ISA IRQs (0-15).
*/
@@ -1196,118 +1007,101 @@ void __init mp_config_acpi_legacy_irqs (void)
if (ioapic < 0)
return;
+ intsrc.mpc_type = MP_INTSRC;
+ intsrc.mpc_irqflag = 0; /* Conforming */
+ intsrc.mpc_srcbus = MP_ISA_BUS;
+ intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
/*
- * Use the default configuration for the IRQs 0-15. These may be
+ * Use the default configuration for the IRQs 0-15. Unless
* overriden by (MADT) interrupt source override entries.
*/
for (i = 0; i < 16; i++) {
+ int idx;
- if (i == 2)
- continue; /* Don't connect IRQ2 */
+ for (idx = 0; idx < mp_irq_entries; idx++) {
+ struct mpc_config_intsrc *irq = mp_irqs + idx;
- mp_irqs[mp_irq_entries].mpc_type = MP_INTSRC;
- mp_irqs[mp_irq_entries].mpc_irqflag = 0; /* Conforming */
- mp_irqs[mp_irq_entries].mpc_srcbus = MP_ISA_BUS;
- mp_irqs[mp_irq_entries].mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
- mp_irqs[mp_irq_entries].mpc_irqtype = mp_INT;
- mp_irqs[mp_irq_entries].mpc_srcbusirq = i; /* Identity mapped */
- mp_irqs[mp_irq_entries].mpc_dstirq = i;
+ /* Do we already have a mapping for this ISA IRQ? */
+ if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+ break;
+
+ /* Do we already have a mapping for this IOAPIC pin */
+ if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+ (irq->mpc_dstirq == i))
+ break;
+ }
+
+ if (idx != mp_irq_entries) {
+ printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+ continue; /* IRQ already used */
+ }
+
+ intsrc.mpc_irqtype = mp_INT;
+ intsrc.mpc_srcbusirq = i; /* Identity mapped */
+ intsrc.mpc_dstirq = i;
Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n",
- mp_irqs[mp_irq_entries].mpc_irqtype,
- mp_irqs[mp_irq_entries].mpc_irqflag & 3,
- (mp_irqs[mp_irq_entries].mpc_irqflag >> 2) & 3,
- mp_irqs[mp_irq_entries].mpc_srcbus,
- mp_irqs[mp_irq_entries].mpc_srcbusirq,
- mp_irqs[mp_irq_entries].mpc_dstapic,
- mp_irqs[mp_irq_entries].mpc_dstirq);
+ "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+ (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+ intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
+ intsrc.mpc_dstirq);
+ mp_irqs[mp_irq_entries] = intsrc;
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!\n");
}
}
-#ifdef CONFIG_ACPI_PCI
-
-void __init mp_parse_prt (void)
+int mp_register_gsi (u32 gsi, int edge_level, int active_high_low)
{
- struct acpi_prt_entry *entry = NULL;
int ioapic = -1;
int ioapic_pin = 0;
- int irq = 0;
int idx, bit = 0;
- int edge_level = 0;
- int active_high_low = 0;
- /*
- * Parsing through the PCI Interrupt Routing Table (PRT) and program
- * routing for all entries.
- */
- list_for_each_entry(entry, &acpi_prt.entries, node) {
- /* Need to get irq for dynamic entry */
- if (entry->link.handle) {
- irq = acpi_pci_link_get_irq(entry->link.handle, entry->link.index, &edge_level, &active_high_low);
- if (!irq)
- continue;
- }
- else {
- /* Hardwired IRQ. Assume PCI standard settings */
- irq = entry->link.index;
- edge_level = 1;
- active_high_low = 1;
- }
+#ifdef CONFIG_ACPI_BUS
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_fadt.sci_int == gsi)
+ return gsi;
+#endif
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_fadt.sci_int == irq) {
- entry->irq = irq; /*we still need to set entry's irq*/
- continue;
- }
-
- ioapic = mp_find_ioapic(irq);
- if (ioapic < 0)
- continue;
- ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start;
-
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->irq mappings (but unique PCI devices);
- * we only only program the IOAPIC on the first.
- */
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
- ioapic_pin);
- continue;
- }
- if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
- entry->irq = irq;
- continue;
- }
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+ return gsi;
+ }
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+ ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
- if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low))
- entry->irq = irq;
+ if (ioapic_renumber_irq)
+ gsi = ioapic_renumber_irq(ioapic, gsi);
- printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n",
- entry->id.segment, entry->id.bus,
- entry->id.device, ('A' + entry->pin),
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin,
- entry->irq);
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ bit = ioapic_pin % 32;
+ idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+ if (idx > 3) {
+ printk(KERN_ERR "Invalid reference to IOAPIC pin "
+ "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+ ioapic_pin);
+ return gsi;
+ }
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+ return gsi;
}
-
- print_IO_APIC();
-
- return;
-}
-#endif /*CONFIG_ACPI_PCI*/
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/
+ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+ edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);
+ return gsi;
+}
-#endif /*CONFIG_ACPI*/
+#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/
+#endif /*CONFIG_ACPI_BOOT*/
diff --git a/xen/arch/x86/mtrr/generic.c b/xen/arch/x86/mtrr/generic.c
index 56c59107a1..9ab5be4ec6 100644
--- a/xen/arch/x86/mtrr/generic.c
+++ b/xen/arch/x86/mtrr/generic.c
@@ -1,8 +1,9 @@
/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
because MTRRs can span upto 40 bits (36bits on most modern x86) */
+#include <xen/lib.h>
#include <xen/init.h>
-#include <xen/slab.h>
#include <xen/mm.h>
+#include <asm/flushtlb.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -51,7 +52,8 @@ void __init get_mtrr_state(void)
unsigned lo, dummy;
if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = xmalloc(num_var_ranges * sizeof (struct mtrr_var_range));
+ mtrr_state.var_ranges = xmalloc_array(struct mtrr_var_range,
+ num_var_ranges);
if (!mtrr_state.var_ranges)
return;
}
@@ -260,7 +262,7 @@ static void prepare_set(void)
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
- __flush_tlb();
+ local_flush_tlb();
/* Save MTRR state */
rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
@@ -272,7 +274,7 @@ static void prepare_set(void)
static void post_set(void)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
- __flush_tlb();
+ local_flush_tlb();
/* Intel (P6) standard MTRRs */
wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
diff --git a/xen/arch/x86/mtrr/main.c b/xen/arch/x86/mtrr/main.c
index b0f7396f6b..48ea94ae1f 100644
--- a/xen/arch/x86/mtrr/main.c
+++ b/xen/arch/x86/mtrr/main.c
@@ -33,8 +33,9 @@
#include <xen/config.h>
#include <xen/init.h>
-#include <xen/pci.h>
+#include <xen/lib.h>
#include <xen/smp.h>
+#include <xen/spinlock.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
@@ -48,8 +49,6 @@
#define down(_m) spin_lock(_m)
#define up(_m) spin_unlock(_m)
-#define num_booting_cpus() smp_num_cpus
-
u32 num_var_ranges = 0;
unsigned int *usage_table;
@@ -95,25 +94,6 @@ void set_mtrr_ops(struct mtrr_ops * ops)
/* Returns non-zero if we have the write-combining memory type */
static int have_wrcomb(void)
{
- struct pci_dev *dev;
-
- if ((dev = pci_find_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
- /* ServerWorks LE chipsets have problems with write-combining
- Don't allow it and leave room for other chipsets to be tagged */
- if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
- dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
- printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n");
- return 0;
- }
- /* Intel 450NX errata # 23. Non ascending cachline evictions to
- write combining memory may resulting in data corruption */
- if (dev->vendor == PCI_VENDOR_ID_INTEL &&
- dev->device == PCI_DEVICE_ID_INTEL_82451NX)
- {
- printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
- return 0;
- }
- }
return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
}
@@ -136,8 +116,7 @@ static void __init init_table(void)
int i, max;
max = num_var_ranges;
- if ((usage_table = xmalloc(max * sizeof *usage_table))
- == NULL) {
+ if ((usage_table = xmalloc_array(unsigned int, max)) == NULL) {
printk(KERN_ERR "mtrr: could not allocate\n");
return;
}
diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
index 556a661b57..0e03b6a674 100644
--- a/xen/arch/x86/nmi.c
+++ b/xen/arch/x86/nmi.c
@@ -21,18 +21,19 @@
#include <xen/time.h>
#include <xen/sched.h>
#include <xen/console.h>
+#include <xen/smp.h>
+#include <asm/current.h>
#include <asm/mc146818rtc.h>
-#include <asm/smp.h>
#include <asm/msr.h>
#include <asm/mpspec.h>
#include <asm/debugger.h>
unsigned int nmi_watchdog = NMI_NONE;
-unsigned int watchdog_on = 0;
static unsigned int nmi_hz = HZ;
-unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
-
-extern int logical_proc_id[];
+static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
+static unsigned int nmi_p4_cccr_val;
+static struct ac_timer nmi_timer[NR_CPUS];
+static unsigned int nmi_timer_ticks[NR_CPUS];
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
@@ -48,16 +49,9 @@ extern int logical_proc_id[];
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
-#define MSR_P4_MISC_ENABLE 0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
#define MSR_P4_PERFCTR0 0x300
#define MSR_P4_CCCR0 0x360
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
-#define P4_ESCR_OS0 (1<<3)
-#define P4_ESCR_USR0 (1<<2)
-#define P4_ESCR_OS1 (1<<1)
-#define P4_ESCR_USR1 (1<<0)
#define P4_CCCR_OVF_PMI0 (1<<26)
#define P4_CCCR_OVF_PMI1 (1<<27)
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
@@ -67,46 +61,38 @@ extern int logical_proc_id[];
#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
#define P4_CCCR_ENABLE (1<<12)
/*
- * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
+ * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
* CRU_ESCR0 (with any non-null event selector) through a complemented
* max threshold. [IA32-Vol3, Section 14.9.9]
*/
#define MSR_P4_IQ_COUNTER0 0x30C
-#define MSR_P4_IQ_COUNTER1 0x30D
#define MSR_P4_IQ_CCCR0 0x36C
-#define MSR_P4_IQ_CCCR1 0x36D
#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
-#define P4_NMI_CRU_ESCR0 \
- (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
- P4_ESCR_OS1|P4_ESCR_USR1)
+#define P4_NMI_CRU_ESCR0 P4_ESCR_EVENT_SELECT(0x3F)
#define P4_NMI_IQ_CCCR0 \
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
-#define P4_NMI_IQ_CCCR1 \
- (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
- P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
int __init check_nmi_watchdog (void)
{
unsigned int prev_nmi_count[NR_CPUS];
- int j, cpu;
+ int cpu;
if ( !nmi_watchdog )
return 0;
printk("Testing NMI watchdog --- ");
- for ( j = 0; j < smp_num_cpus; j++ )
- {
- cpu = cpu_logical_map(j);
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
prev_nmi_count[cpu] = nmi_count(cpu);
- }
- __sti();
+ local_irq_enable();
mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
- for ( j = 0; j < smp_num_cpus; j++ )
+ for ( cpu = 0; cpu < NR_CPUS; cpu++ )
{
- cpu = cpu_logical_map(j);
+ if ( !cpu_isset(cpu, cpu_callin_map) &&
+ !cpu_isset(cpu, cpu_online_map) )
+ continue;
if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
printk("CPU#%d stuck. ", cpu);
else
@@ -123,6 +109,13 @@ int __init check_nmi_watchdog (void)
return 0;
}
+static void nmi_timer_fn(void *unused)
+{
+ int cpu = smp_processor_id();
+ nmi_timer_ticks[cpu]++;
+ set_ac_timer(&nmi_timer[cpu], NOW() + MILLISECS(1000));
+}
+
static inline void nmi_pm_init(void) { }
#define __pminit __init
@@ -186,50 +179,40 @@ static int __pminit setup_p4_watchdog(void)
{
unsigned int misc_enable, dummy;
- rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+ rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
+ if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL))
return 0;
nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
-
- if ( logical_proc_id[smp_processor_id()] == 0 )
- {
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
- clear_msr_range(0x3F1, 2);
- /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
- docs doesn't fully define it, so leave it alone for now. */
- clear_msr_range(0x3A0, 31);
- clear_msr_range(0x3C0, 6);
- clear_msr_range(0x3C8, 6);
- clear_msr_range(0x3E0, 2);
- clear_msr_range(MSR_P4_CCCR0, 18);
- clear_msr_range(MSR_P4_PERFCTR0, 18);
+ nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+ if ( smp_num_siblings == 2 )
+ nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+
+ if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
+ clear_msr_range(0x3F1, 2);
+ /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
+ docs doesn't fully define it, so leave it alone for now. */
+ clear_msr_range(0x3A0, 31);
+ clear_msr_range(0x3C0, 6);
+ clear_msr_range(0x3C8, 6);
+ clear_msr_range(0x3E0, 2);
+ clear_msr_range(MSR_P4_CCCR0, 18);
+ clear_msr_range(MSR_P4_PERFCTR0, 18);
- wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
- }
- else if ( logical_proc_id[smp_processor_id()] == 1 )
- {
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
- Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
- wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
- }
- else
- {
- return 0;
- }
+ wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
+ wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
+ Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+ wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
return 1;
}
void __pminit setup_apic_nmi_watchdog(void)
{
+ int cpu = smp_processor_id();
+
if (!nmi_watchdog)
return;
@@ -255,6 +238,10 @@ void __pminit setup_apic_nmi_watchdog(void)
default:
return;
}
+
+ init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
+ nmi_timer_fn(NULL);
+
nmi_pm_init();
}
@@ -263,18 +250,33 @@ static unsigned int
last_irq_sums [NR_CPUS],
alert_counter [NR_CPUS];
-void touch_nmi_watchdog (void)
+static spinlock_t watchdog_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int watchdog_disable_count = 1;
+static unsigned int watchdog_on;
+
+void watchdog_disable(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&watchdog_lock, flags);
+ if ( watchdog_disable_count++ == 0 )
+ watchdog_on = 0;
+ spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
+void watchdog_enable(void)
{
- int i;
- for (i = 0; i < smp_num_cpus; i++)
- alert_counter[i] = 0;
+ unsigned long flags;
+ spin_lock_irqsave(&watchdog_lock, flags);
+ if ( --watchdog_disable_count == 0 )
+ watchdog_on = 1;
+ spin_unlock_irqrestore(&watchdog_lock, flags);
}
-void nmi_watchdog_tick (struct xen_regs * regs)
+void nmi_watchdog_tick(struct cpu_user_regs * regs)
{
int sum, cpu = smp_processor_id();
- sum = apic_timer_irqs[cpu];
+ sum = nmi_timer_ticks[cpu];
if ( (last_irq_sums[cpu] == sum) && watchdog_on )
{
@@ -300,22 +302,24 @@ void nmi_watchdog_tick (struct xen_regs * regs)
{
if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
{
- if ( logical_proc_id[cpu] == 0 )
- {
- wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
- }
- else
- {
- wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
- }
+ /*
+ * P4 quirks:
+ * - An overflown perfctr will assert its interrupt
+ * until the OVF flag in its CCCR is cleared.
+ * - LVTPC is masked on interrupt and must be
+ * unmasked by the LVTPC handler.
+ */
+ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else
+ else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
{
- wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+ /*
+ * Only P6 based Pentium M need to re-unmask the apic vector but
+ * it doesn't hurt other P6 variants.
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+ wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
}
}
diff --git a/xen/arch/x86/pci-irq.c b/xen/arch/x86/pci-irq.c
deleted file mode 100644
index e0f157846e..0000000000
--- a/xen/arch/x86/pci-irq.c
+++ /dev/null
@@ -1,1084 +0,0 @@
-/*
- * Low-Level PCI Support for PC -- Routing of Interrupts
- *
- * (c) 1999--2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/slab.h>
-#include <xen/irq.h>
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/io_apic.h>
-#include "pci-x86.h"
-
-#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
-#define PIRQ_VERSION 0x0100
-
-int broken_hp_bios_irq9;
-
-static struct irq_routing_table *pirq_table;
-
-/*
- * Never use: 0, 1, 2 (timer, keyboard, and cascade)
- * Avoid using: 13, 14 and 15 (FP error and IDE).
- * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
- */
-unsigned int pcibios_irq_mask = 0xfff8;
-
-static int pirq_penalty[16] = {
- 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
- 0, 0, 0, 0, 1000, 100000, 100000, 100000
-};
-
-struct irq_router {
- char *name;
- u16 vendor, device;
- int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
- int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
-};
-
-struct irq_router_handler {
- u16 vendor;
- int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
-};
-
-/*
- * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
- */
-
-static struct irq_routing_table * __init pirq_find_routing_table(void)
-{
- u8 *addr;
- struct irq_routing_table *rt;
- int i;
- u8 sum;
-
- for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
- rt = (struct irq_routing_table *) addr;
- if (rt->signature != PIRQ_SIGNATURE ||
- rt->version != PIRQ_VERSION ||
- rt->size % 16 ||
- rt->size < sizeof(struct irq_routing_table))
- continue;
- sum = 0;
- for(i=0; i<rt->size; i++)
- sum += addr[i];
- if (!sum) {
- DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
- return rt;
- }
- }
- return NULL;
-}
-
-/*
- * If we have a IRQ routing table, use it to search for peer host
- * bridges. It's a gross hack, but since there are no other known
- * ways how to get a list of buses, we have to go this way.
- */
-
-static void __init pirq_peer_trick(void)
-{
- struct irq_routing_table *rt = pirq_table;
- u8 busmap[256];
- int i;
- struct irq_info *e;
-
- memset(busmap, 0, sizeof(busmap));
- for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
- e = &rt->slots[i];
-#ifdef DEBUG
- {
- int j;
- DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
- for(j=0; j<4; j++)
- DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
- DBG("\n");
- }
-#endif
- busmap[e->bus] = 1;
- }
- for(i=1; i<256; i++)
- /*
- * It might be a secondary bus, but in this case its parent is already
- * known (ascending bus order) and therefore pci_scan_bus returns immediately.
- */
- if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
- printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
- pcibios_last_bus = -1;
-}
-
-/*
- * Code for querying and setting of IRQ routes on various interrupt routers.
- */
-
-void eisa_set_level_irq(unsigned int irq)
-{
- unsigned char mask = 1 << (irq & 7);
- unsigned int port = 0x4d0 + (irq >> 3);
- unsigned char val = inb(port);
-
- if (!(val & mask)) {
- DBG(" -> edge");
- outb(val | mask, port);
- }
-}
-
-/*
- * Common IRQ routing practice: nybbles in config space,
- * offset by some magic constant.
- */
-static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
-{
- u8 x;
- unsigned reg = offset + (nr >> 1);
-
- pci_read_config_byte(router, reg, &x);
- return (nr & 1) ? (x >> 4) : (x & 0xf);
-}
-
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
-{
- u8 x;
- unsigned reg = offset + (nr >> 1);
-
- pci_read_config_byte(router, reg, &x);
- x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
- pci_write_config_byte(router, reg, x);
-}
-
-/*
- * ALI pirq entries are damn ugly, and completely undocumented.
- * This has been figured out from pirq tables, and it's not a pretty
- * picture.
- */
-static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
-
- return irqmap[read_config_nybble(router, 0x48, pirq-1)];
-}
-
-static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
- unsigned int val = irqmap[irq];
-
- if (val) {
- write_config_nybble(router, 0x48, pirq-1, val);
- return 1;
- }
- return 0;
-}
-
-/*
- * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
- * just a pointer to the config space.
- */
-static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
-
- pci_read_config_byte(router, pirq, &x);
- return (x < 16) ? x : 0;
-}
-
-static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- pci_write_config_byte(router, pirq, irq);
- return 1;
-}
-
-/*
- * The VIA pirq rules are nibble-based, like ALI,
- * but without the ugly irq number munging.
- * However, PIRQD is in the upper instead of lower nibble.
- */
-static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq);
-}
-
-static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq);
- return 1;
-}
-
-/*
- * ITE 8330G pirq rules are nibble-based
- * FIXME: pirqmap may be { 1, 0, 3, 2 },
- * 2+3 are both mapped to irq 9 on my system
- */
-static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
- return read_config_nybble(router,0x43, pirqmap[pirq-1]);
-}
-
-static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
- write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
- return 1;
-}
-
-/*
- * OPTI: high four bits are nibble pointer..
- * I wonder what the low bits do?
- */
-static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0xb8, pirq >> 4);
-}
-
-static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0xb8, pirq >> 4, irq);
- return 1;
-}
-
-/*
- * Cyrix: nibble offset 0x5C
- */
-static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- return read_config_nybble(router, 0x5C, (pirq-1)^1);
-}
-
-static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- write_config_nybble(router, 0x5C, (pirq-1)^1, irq);
- return 1;
-}
-
-/*
- * PIRQ routing for SiS 85C503 router used in several SiS chipsets.
- * We have to deal with the following issues here:
- * - vendors have different ideas about the meaning of link values
- * - some onboard devices (integrated in the chipset) have special
- * links and are thus routed differently (i.e. not via PCI INTA-INTD)
- * - different revision of the router have a different layout for
- * the routing registers, particularly for the onchip devices
- *
- * For all routing registers the common thing is we have one byte
- * per routeable link which is defined as:
- * bit 7 IRQ mapping enabled (0) or disabled (1)
- * bits [6:4] reserved (sometimes used for onchip devices)
- * bits [3:0] IRQ to map to
- * allowed: 3-7, 9-12, 14-15
- * reserved: 0, 1, 2, 8, 13
- *
- * The config-space registers located at 0x41/0x42/0x43/0x44 are
- * always used to route the normal PCI INT A/B/C/D respectively.
- * Apparently there are systems implementing PCI routing table using
- * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
- * We try our best to handle both link mappings.
- *
- * Currently (2003-05-21) it appears most SiS chipsets follow the
- * definition of routing registers from the SiS-5595 southbridge.
- * According to the SiS 5595 datasheets the revision id's of the
- * router (ISA-bridge) should be 0x01 or 0xb0.
- *
- * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1.
- * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets.
- * They seem to work with the current routing code. However there is
- * some concern because of the two USB-OHCI HCs (original SiS 5595
- * had only one). YMMV.
- *
- * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1:
- *
- * 0x61: IDEIRQ:
- * bits [6:5] must be written 01
- * bit 4 channel-select primary (0), secondary (1)
- *
- * 0x62: USBIRQ:
- * bit 6 OHCI function disabled (0), enabled (1)
- *
- * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
- *
- * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
- *
- * We support USBIRQ (in addition to INTA-INTD) and keep the
- * IDE, ACPI and DAQ routing untouched as set by the BIOS.
- *
- * Currently the only reported exception is the new SiS 65x chipset
- * which includes the SiS 69x southbridge. Here we have the 85C503
- * router revision 0x04 and there are changes in the register layout
- * mostly related to the different USB HCs with USB 2.0 support.
- *
- * Onchip routing for router rev-id 0x04 (try-and-error observation)
- *
- * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs
- * bit 6-4 are probably unused, not like 5595
- */
-
-#define PIRQ_SIS_IRQ_MASK 0x0f
-#define PIRQ_SIS_IRQ_DISABLE 0x80
-#define PIRQ_SIS_USB_ENABLE 0x40
-#define PIRQ_SIS_DETECT_REGISTER 0x40
-
-/* return value:
- * -1 on error
- * 0 for PCI INTA-INTD
- * 0 or enable bit mask to check or set for onchip functions
- */
-static inline int pirq_sis5595_onchip(int pirq, int *reg)
-{
- int ret = -1;
-
- *reg = pirq;
- switch(pirq) {
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- *reg += 0x40;
- case 0x41:
- case 0x42:
- case 0x43:
- case 0x44:
- ret = 0;
- break;
-
- case 0x62:
- ret = PIRQ_SIS_USB_ENABLE; /* documented for 5595 */
- break;
-
- case 0x61:
- case 0x6a:
- case 0x7e:
- printk(KERN_INFO "SiS pirq: IDE/ACPI/DAQ mapping not implemented: (%u)\n",
- (unsigned) pirq);
- /* fall thru */
- default:
- printk(KERN_INFO "SiS router unknown request: (%u)\n",
- (unsigned) pirq);
- break;
- }
- return ret;
-}
-
-/* return value:
- * -1 on error
- * 0 for PCI INTA-INTD
- * 0 or enable bit mask to check or set for onchip functions
- */
-static inline int pirq_sis96x_onchip(int pirq, int *reg)
-{
- int ret = -1;
-
- *reg = pirq;
- switch(pirq) {
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- *reg += 0x40;
- case 0x41:
- case 0x42:
- case 0x43:
- case 0x44:
- case 0x60:
- case 0x61:
- case 0x62:
- case 0x63:
- ret = 0;
- break;
-
- default:
- printk(KERN_INFO "SiS router unknown request: (%u)\n",
- (unsigned) pirq);
- break;
- }
- return ret;
-}
-
-
-static int pirq_sis5595_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
- int reg, check;
-
- check = pirq_sis5595_onchip(pirq, &reg);
- if (check < 0)
- return 0;
-
- pci_read_config_byte(router, reg, &x);
- if (check != 0 && !(x & check))
- return 0;
-
- return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
-}
-
-static int pirq_sis96x_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 x;
- int reg, check;
-
- check = pirq_sis96x_onchip(pirq, &reg);
- if (check < 0)
- return 0;
-
- pci_read_config_byte(router, reg, &x);
- if (check != 0 && !(x & check))
- return 0;
-
- return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK);
-}
-
-static int pirq_sis5595_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- u8 x;
- int reg, set;
-
- set = pirq_sis5595_onchip(pirq, &reg);
- if (set < 0)
- return 0;
-
- x = (irq & PIRQ_SIS_IRQ_MASK);
- if (x == 0)
- x = PIRQ_SIS_IRQ_DISABLE;
- else
- x |= set;
-
- pci_write_config_byte(router, reg, x);
-
- return 1;
-}
-
-static int pirq_sis96x_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- u8 x;
- int reg, set;
-
- set = pirq_sis96x_onchip(pirq, &reg);
- if (set < 0)
- return 0;
-
- x = (irq & PIRQ_SIS_IRQ_MASK);
- if (x == 0)
- x = PIRQ_SIS_IRQ_DISABLE;
- else
- x |= set;
-
- pci_write_config_byte(router, reg, x);
-
- return 1;
-}
-
-
-/*
- * VLSI: nibble offset 0x74 - educated guess due to routing table and
- * config space of VLSI 82C534 PCI-bridge/router (1004:0102)
- * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
- * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
- * for the busbridge to the docking station.
- */
-
-static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- if (pirq > 8) {
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
- return 0;
- }
- return read_config_nybble(router, 0x74, pirq-1);
-}
-
-static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- if (pirq > 8) {
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
- return 0;
- }
- write_config_nybble(router, 0x74, pirq-1, irq);
- return 1;
-}
-
-/*
- * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
- * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register
- * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect
- * register is a straight binary coding of desired PIC IRQ (low nibble).
- *
- * The 'link' value in the PIRQ table is already in the correct format
- * for the Index register. There are some special index values:
- * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
- * and 0x03 for SMBus.
- */
-static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- outb_p(pirq, 0xc00);
- return inb(0xc01) & 0xf;
-}
-
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- outb_p(pirq, 0xc00);
- outb_p(irq, 0xc01);
- return 1;
-}
-
-/* Support for AMD756 PCI IRQ Routing
- * Jhon H. Caicedo <jhcaiced@osso.org.co>
- * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
- * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
- * The AMD756 pirq rules are nibble-based
- * offset 0x56 0-3 PIRQA 4-7 PIRQB
- * offset 0x57 0-3 PIRQC 4-7 PIRQD
- */
-static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
-{
- u8 irq;
- irq = 0;
- if (pirq <= 4)
- {
- irq = read_config_nybble(router, 0x56, pirq - 1);
- }
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
- dev->vendor, dev->device, pirq, irq);
- return irq;
-}
-
-static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
- dev->vendor, dev->device, pirq, irq);
- if (pirq <= 4)
- {
- write_config_nybble(router, 0x56, pirq - 1, irq);
- }
- return 1;
-}
-
-#ifdef CONFIG_PCI_BIOS
-
-static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
-{
- struct pci_dev *bridge;
- int pin = pci_get_interrupt_pin(dev, &bridge);
- return pcibios_set_irq_routing(bridge, pin, irq);
-}
-
-#endif
-
-
-static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- /* We must not touch 440GX even if we have tables. 440GX has
- different IRQ routing weirdness */
- if(pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) ||
- pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL))
- return 0;
- switch(device)
- {
- case PCI_DEVICE_ID_INTEL_82371FB_0:
- case PCI_DEVICE_ID_INTEL_82371SB_0:
- case PCI_DEVICE_ID_INTEL_82371AB_0:
- case PCI_DEVICE_ID_INTEL_82371MX:
- case PCI_DEVICE_ID_INTEL_82443MX_0:
- case PCI_DEVICE_ID_INTEL_82801AA_0:
- case PCI_DEVICE_ID_INTEL_82801AB_0:
- case PCI_DEVICE_ID_INTEL_82801BA_0:
- case PCI_DEVICE_ID_INTEL_82801BA_10:
- case PCI_DEVICE_ID_INTEL_82801CA_0:
- case PCI_DEVICE_ID_INTEL_82801CA_12:
- case PCI_DEVICE_ID_INTEL_82801DB_0:
- case PCI_DEVICE_ID_INTEL_82801E_0:
- case PCI_DEVICE_ID_INTEL_82801EB_0:
- case PCI_DEVICE_ID_INTEL_ESB_0:
- case PCI_DEVICE_ID_INTEL_ICH6_0:
- r->name = "PIIX/ICH";
- r->get = pirq_piix_get;
- r->set = pirq_piix_set;
- return 1;
- }
- return 0;
-}
-
-static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- /* FIXME: We should move some of the quirk fixup stuff here */
- switch(device)
- {
- case PCI_DEVICE_ID_VIA_82C586_0:
- case PCI_DEVICE_ID_VIA_82C596:
- case PCI_DEVICE_ID_VIA_82C686:
- case PCI_DEVICE_ID_VIA_8231:
- /* FIXME: add new ones for 8233/5 */
- r->name = "VIA";
- r->get = pirq_via_get;
- r->set = pirq_via_set;
- return 1;
- }
- return 0;
-}
-
-static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_VLSI_82C534:
- r->name = "VLSI 82C534";
- r->get = pirq_vlsi_get;
- r->set = pirq_vlsi_set;
- return 1;
- }
- return 0;
-}
-
-
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_SERVERWORKS_OSB4:
- case PCI_DEVICE_ID_SERVERWORKS_CSB5:
- r->name = "ServerWorks";
- r->get = pirq_serverworks_get;
- r->set = pirq_serverworks_set;
- return 1;
- }
- return 0;
-}
-
-static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- u8 reg;
- u16 devid;
-
- if (device != PCI_DEVICE_ID_SI_503)
- return 0;
-
- /*
- * In case of SiS south bridge, we need to detect the two
- * kinds of routing tables we have seen so far (5595 and 96x).
- * Since the maintain the same device ID, we need to do poke
- * the PCI configuration space to find the router type we are
- * dealing with.
- */
-
- /*
- * Factoid: writing bit6 of register 0x40 of the router config space
- * will make the SB to show up 0x096x inside the device id. Note,
- * we need to restore register 0x40 after the device id poke.
- */
-
- pci_read_config_byte(router, PIRQ_SIS_DETECT_REGISTER, &reg);
- pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg | (1 << 6));
- pci_read_config_word(router, PCI_DEVICE_ID, &devid);
- pci_write_config_byte(router, PIRQ_SIS_DETECT_REGISTER, reg);
-
- if ((devid & 0xfff0) == 0x0960) {
- r->name = "SIS96x";
- r->get = pirq_sis96x_get;
- r->set = pirq_sis96x_set;
- DBG("PCI: Detecting SiS router at %02x:%02x : SiS096x detected\n",
- rt->rtr_bus, rt->rtr_devfn);
- } else {
- r->name = "SIS5595";
- r->get = pirq_sis5595_get;
- r->set = pirq_sis5595_set;
- DBG("PCI: Detecting SiS router at %02x:%02x : SiS5595 detected\n",
- rt->rtr_bus, rt->rtr_devfn);
- }
- return 1;
-}
-
-static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_CYRIX_5520:
- r->name = "NatSemi";
- r->get = pirq_cyrix_get;
- r->set = pirq_cyrix_set;
- return 1;
- }
- return 0;
-}
-
-static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_OPTI_82C700:
- r->name = "OPTI";
- r->get = pirq_opti_get;
- r->set = pirq_opti_set;
- return 1;
- }
- return 0;
-}
-
-static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_ITE_IT8330G_0:
- r->name = "ITE";
- r->get = pirq_ite_get;
- r->set = pirq_ite_set;
- return 1;
- }
- return 0;
-}
-
-static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_AL_M1533:
- r->name = "ALI";
- r->get = pirq_ali_get;
- r->set = pirq_ali_set;
- return 1;
- /* Should add 156x some day */
- }
- return 0;
-}
-
-static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
-{
- switch(device)
- {
- case PCI_DEVICE_ID_AMD_VIPER_740B:
- r->name = "AMD756";
- break;
- case PCI_DEVICE_ID_AMD_VIPER_7413:
- r->name = "AMD766";
- break;
- case PCI_DEVICE_ID_AMD_VIPER_7443:
- r->name = "AMD768";
- break;
- default:
- return 0;
- }
- r->get = pirq_amd756_get;
- r->set = pirq_amd756_set;
- return 1;
-}
-
-static __initdata struct irq_router_handler pirq_routers[] = {
- { PCI_VENDOR_ID_INTEL, intel_router_probe },
- { PCI_VENDOR_ID_AL, ali_router_probe },
- { PCI_VENDOR_ID_ITE, ite_router_probe },
- { PCI_VENDOR_ID_VIA, via_router_probe },
- { PCI_VENDOR_ID_OPTI, opti_router_probe },
- { PCI_VENDOR_ID_SI, sis_router_probe },
- { PCI_VENDOR_ID_CYRIX, cyrix_router_probe },
- { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
- { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
- { PCI_VENDOR_ID_AMD, amd_router_probe },
- /* Someone with docs needs to add the ATI Radeon IGP */
- { 0, NULL }
-};
-static struct irq_router pirq_router;
-static struct pci_dev *pirq_router_dev;
-
-/*
- * FIXME: should we have an option to say "generic for
- * chipset" ?
- */
-
-static void __init pirq_find_router(struct irq_router *r)
-{
- struct irq_routing_table *rt = pirq_table;
- struct irq_router_handler *h;
-
-#ifdef CONFIG_PCI_BIOS
- if (!rt->signature) {
- printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
- r->set = pirq_bios_set;
- r->name = "BIOS";
- return;
- }
-#endif
-
- /* Default unless a driver reloads it */
- r->name = "default";
- r->get = NULL;
- r->set = NULL;
-
- DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
- rt->rtr_vendor, rt->rtr_device);
-
- pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
- if (!pirq_router_dev) {
- DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
- return;
- }
-
- for( h = pirq_routers; h->vendor; h++) {
- /* First look for a router match */
- if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
- break;
- /* Fall back to a device match */
- if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
- break;
- }
- printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
- pirq_router.name,
- pirq_router_dev->vendor,
- pirq_router_dev->device,
- pirq_router_dev->slot_name);
-}
-
-static struct irq_info *pirq_get_info(struct pci_dev *dev)
-{
- struct irq_routing_table *rt = pirq_table;
- int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
- struct irq_info *info;
-
- for (info = rt->slots; entries--; info++)
- if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
- return info;
- return NULL;
-}
-
-static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
-{
- u8 pin;
- struct irq_info *info;
- int i, pirq, newirq;
- int irq = 0;
- u32 mask;
- struct irq_router *r = &pirq_router;
- struct pci_dev *dev2;
- char *msg = NULL;
-
- if (!pirq_table)
- return 0;
-
- /* Find IRQ routing entry */
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (!pin) {
- DBG(" -> no interrupt pin\n");
- return 0;
- }
- pin = pin - 1;
-
- DBG("IRQ for %s:%d", dev->slot_name, pin);
- info = pirq_get_info(dev);
- if (!info) {
- DBG(" -> not found in routing table\n");
- return 0;
- }
- pirq = info->irq[pin].link;
- mask = info->irq[pin].bitmap;
- if (!pirq) {
- DBG(" -> not routed\n");
- return 0;
- }
- DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
- mask &= pcibios_irq_mask;
-
- /* Work around broken HP Pavilion Notebooks which assign USB to
- IRQ 9 even though it is actually wired to IRQ 11 */
-
- if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) {
- dev->irq = 11;
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11);
- r->set(pirq_router_dev, dev, pirq, 11);
- }
-
- /*
- * Find the best IRQ to assign: use the one
- * reported by the device if possible.
- */
- newirq = dev->irq;
- if (!newirq && assign) {
- for (i = 0; i < 16; i++) {
- if (!(mask & (1 << i)))
- continue;
- if (pirq_penalty[i] < pirq_penalty[newirq] &&
- pirq_guest_bindable(i,1))
- newirq = i;
- }
- }
- DBG(" -> newirq=%d", newirq);
-
- /* Check if it is hardcoded */
- if ((pirq & 0xf0) == 0xf0) {
- irq = pirq & 0xf;
- DBG(" -> hardcoded IRQ %d\n", irq);
- msg = "Hardcoded";
- } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
- DBG(" -> got IRQ %d\n", irq);
- msg = "Found";
- } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
- DBG(" -> assigning IRQ %d", newirq);
- if (r->set(pirq_router_dev, dev, pirq, newirq)) {
- eisa_set_level_irq(newirq);
- DBG(" ... OK\n");
- msg = "Assigned";
- irq = newirq;
- }
- }
-
- if (!irq) {
- DBG(" ... failed\n");
- if (newirq && mask == (1 << newirq)) {
- msg = "Guessed";
- irq = newirq;
- } else
- return 0;
- }
- printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
-
- /* Update IRQ for all devices with the same pirq value */
- pci_for_each_dev(dev2) {
- pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
- if (!pin)
- continue;
- pin--;
- info = pirq_get_info(dev2);
- if (!info)
- continue;
- if (info->irq[pin].link == pirq) {
- /* We refuse to override the dev->irq information. Give a warning! */
- if (dev2->irq && dev2->irq != irq) {
- printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
- dev2->slot_name, dev2->irq, irq);
- continue;
- }
- dev2->irq = irq;
- pirq_penalty[irq]++;
- if (dev != dev2)
- printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
- }
- }
- return 1;
-}
-
-void __init pcibios_irq_init(void)
-{
- DBG("PCI: IRQ init\n");
- pirq_table = pirq_find_routing_table();
-#ifdef CONFIG_PCI_BIOS
- if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
- pirq_table = pcibios_get_irq_routing_table();
-#endif
- if (pirq_table) {
- pirq_peer_trick();
- pirq_find_router(&pirq_router);
- if (pirq_table->exclusive_irqs) {
- int i;
- for (i=0; i<16; i++)
- if (!(pirq_table->exclusive_irqs & (1 << i)))
- pirq_penalty[i] += 100;
- }
- /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
- if (io_apic_assign_pci_irqs)
- pirq_table = NULL;
- }
-}
-
-void __init pcibios_fixup_irqs(void)
-{
- struct pci_dev *dev;
- u8 pin;
-
- DBG("PCI: IRQ fixup\n");
- pci_for_each_dev(dev) {
- /*
- * If the BIOS has set an out of range IRQ number, just ignore it.
- * Also keep track of which IRQ's are already in use.
- */
- if (dev->irq >= 16) {
- DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
- dev->irq = 0;
- }
- /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
- if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
- pirq_penalty[dev->irq] = 0;
- pirq_penalty[dev->irq]++;
- }
-
- pci_for_each_dev(dev) {
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Recalculate IRQ numbers if we use the I/O APIC.
- */
- if (io_apic_assign_pci_irqs)
- {
- int irq;
-
- if (pin) {
- pin--; /* interrupt pins are numbered starting from 1 */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
- /*
- * Busses behind bridges are typically not listed in the MP-table.
- * In this case we have to look up the IRQ based on the parent bus,
- * parent slot, and pin number. The SMP code detects such bridged
- * busses itself so we should get into this branch reliably.
- */
- if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
- struct pci_dev * bridge = dev->bus->self;
-
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin);
- if (irq >= 0)
- printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n",
- bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
- }
- if (irq >= 0) {
- printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
- dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
- dev->irq = irq;
- }
- }
- }
-#endif
- /*
- * Still no IRQ? Try to lookup one...
- */
- if (pin && !dev->irq)
- pcibios_lookup_irq(dev, 0);
- }
-}
-
-void pcibios_penalize_isa_irq(int irq)
-{
- /*
- * If any ISAPnP device reports an IRQ in its list of possible
- * IRQ's, we try to avoid assigning it to PCI devices.
- */
- pirq_penalty[irq] += 100;
-}
-
-void pcibios_enable_irq(struct pci_dev *dev)
-{
- u8 pin;
- extern int interrupt_line_quirk;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
- char *msg;
-
- /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
- if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
- return;
-
- if (io_apic_assign_pci_irqs)
- msg = " Probably buggy MP table.";
- else if (pci_probe & PCI_BIOS_IRQ_SCAN)
- msg = "";
- else
- msg = " Please try using pci=biosirq.";
- printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
- 'A' + pin - 1, dev->slot_name, msg);
- }
- /* VIA bridges use interrupt line for apic/pci steering across
- the V-Link */
- else if (interrupt_line_quirk)
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
-
-}
diff --git a/xen/arch/x86/pci-pc.c b/xen/arch/x86/pci-pc.c
deleted file mode 100644
index c3c097efdc..0000000000
--- a/xen/arch/x86/pci-pc.c
+++ /dev/null
@@ -1,1548 +0,0 @@
-/* -*- Mode:C; c-basic-offset:8; tab-width:8; indent-tabs-mode:t -*- */
-/*
- * Low-Level PCI Support for PC
- *
- * (c) 1999--2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/sched.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/ioport.h>
-#include <xen/acpi.h>
-
-/*#include <asm/segment.h>*/
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/smpboot.h>
-
-#include "pci-x86.h"
-
-extern int numnodes;
-#define __KERNEL_CS __HYPERVISOR_CS
-#define __KERNEL_DS __HYPERVISOR_DS
-
-unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
-
-int pcibios_last_bus = -1;
-struct pci_bus *pci_root_bus = NULL;
-struct pci_ops *pci_root_ops = NULL;
-
-int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
-int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
-
-static int pci_using_acpi_prt = 0;
-
-#ifdef CONFIG_MULTIQUAD
-#define BUS2QUAD(global) (mp_bus_id_to_node[global])
-#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
-#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-#else
-#define BUS2QUAD(global) (0)
-#define BUS2LOCAL(global) (global)
-#define QUADLOCAL2BUS(quad,local) (local)
-#endif
-
-/*
- * This interrupt-safe spinlock protects all accesses to PCI
- * configuration space.
- */
-static spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
-
-
-/*
- * Functions for accessing PCI configuration space with type 1 accesses
- */
-
-#ifdef CONFIG_PCI_DIRECT
-
-#ifdef CONFIG_MULTIQUAD
-#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
- (0x80000000 | (BUS2LOCAL(bus) << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
-
-static int pci_conf1_mq_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
-
- switch (len) {
- case 1:
- *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
- break;
- case 2:
- *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
- break;
- case 4:
- *value = inl_quad(0xCFC, BUS2QUAD(bus));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_mq_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl_quad(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8, BUS2QUAD(bus));
-
- switch (len) {
- case 1:
- outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
- break;
- case 2:
- outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
- break;
- case 4:
- outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_read_mq_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_conf1_read_mq_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_conf1_read_mq_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- if (!value)
- return -EINVAL;
-
- return pci_conf1_mq_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf1_write_mq_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf1_write_mq_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf1_write_mq_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf1_mq_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_mq_conf1 = {
- pci_conf1_read_mq_config_byte,
- pci_conf1_read_mq_config_word,
- pci_conf1_read_mq_config_dword,
- pci_conf1_write_mq_config_byte,
- pci_conf1_write_mq_config_word,
- pci_conf1_write_mq_config_dword
-};
-
-#endif /* !CONFIG_MULTIQUAD */
-#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
- (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
-
-static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) /* !CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
-
- switch (len) {
- case 1:
- *value = inb(0xCFC + (reg & 3));
- break;
- case 2:
- *value = inw(0xCFC + (reg & 2));
- break;
- case 4:
- *value = inl(0xCFC);
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) /* !CONFIG_MULTIQUAD */
-{
- unsigned long flags;
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
-
- switch (len) {
- case 1:
- outb((u8)value, 0xCFC + (reg & 3));
- break;
- case 2:
- outw((u16)value, 0xCFC + (reg & 2));
- break;
- case 4:
- outl((u32)value, 0xCFC);
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-#undef PCI_CONF1_ADDRESS
-
-static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_conf1 = {
- pci_conf1_read_config_byte,
- pci_conf1_read_config_word,
- pci_conf1_read_config_dword,
- pci_conf1_write_config_byte,
- pci_conf1_write_config_word,
- pci_conf1_write_config_dword
-};
-
-
-/*
- * Functions for accessing PCI configuration space with type 2 accesses
- */
-
-#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg)
-
-static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
-{
- unsigned long flags;
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- if (dev & 0x10)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outb((u8)(0xF0 | (fn << 1)), 0xCF8);
- outb((u8)bus, 0xCFA);
-
- switch (len) {
- case 1:
- *value = inb(PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 2:
- *value = inw(PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 4:
- *value = inl(PCI_CONF2_ADDRESS(dev, reg));
- break;
- }
-
- outb (0, 0xCF8);
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
-{
- unsigned long flags;
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- if (dev & 0x10)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- outb((u8)(0xF0 | (fn << 1)), 0xCF8);
- outb((u8)bus, 0xCFA);
-
- switch (len) {
- case 1:
- outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 2:
- outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- case 4:
- outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
- break;
- }
-
- outb (0, 0xCF8);
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-#undef PCI_CONF2_ADDRESS
-
-static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
- result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
- *value = (u8)data;
- return result;
-}
-
-static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
- result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
- *value = (u16)data;
- return result;
-}
-
-static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static struct pci_ops pci_direct_conf2 = {
- pci_conf2_read_config_byte,
- pci_conf2_read_config_word,
- pci_conf2_read_config_dword,
- pci_conf2_write_config_byte,
- pci_conf2_write_config_word,
- pci_conf2_write_config_dword
-};
-
-
-/*
- * Before we decide to use direct hardware access mechanisms, we try to do some
- * trivial checks to ensure it at least _seems_ to be working -- we just test
- * whether bus 00 contains a host bridge (this is similar to checking
- * techniques used in XFree86, but ours should be more reliable since we
- * attempt to make use of direct access hints provided by the PCI BIOS).
- *
- * This should be close to trivial, but it isn't, because there are buggy
- * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
- */
-static int __devinit pci_sanity_check(struct pci_ops *o)
-{
- u16 x;
- /* XEN: static is important to prevent stack overflow! */
- static struct pci_bus bus; /* Fake bus and device */
- static struct pci_dev dev;
-
- if (pci_probe & PCI_NO_CHECKS)
- return 1;
- bus.number = 0;
- dev.bus = &bus;
- for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
- if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
- (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
- (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
- (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
- return 1;
- DBG("PCI: Sanity check failed\n");
- return 0;
-}
-
-static struct pci_ops * __devinit pci_check_direct(void)
-{
- unsigned int tmp;
- unsigned long flags;
-
- __save_flags(flags); __cli();
-
- /*
- * Check if configuration type 1 works.
- */
- if (pci_probe & PCI_PROBE_CONF1) {
- outb (0x01, 0xCFB);
- tmp = inl (0xCF8);
- outl (0x80000000, 0xCF8);
- if (inl (0xCF8) == 0x80000000 &&
- pci_sanity_check(&pci_direct_conf1)) {
- outl (tmp, 0xCF8);
- __restore_flags(flags);
- printk(KERN_INFO "PCI: Using configuration type 1\n");
- request_region(0xCF8, 8, "PCI conf1");
-
-#ifdef CONFIG_MULTIQUAD
- /* Multi-Quad has an extended PCI Conf1 */
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- return &pci_direct_mq_conf1;
-#endif
- return &pci_direct_conf1;
- }
- outl (tmp, 0xCF8);
- }
-
- /*
- * Check if configuration type 2 works.
- */
- if (pci_probe & PCI_PROBE_CONF2) {
- outb (0x00, 0xCFB);
- outb (0x00, 0xCF8);
- outb (0x00, 0xCFA);
- if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
- pci_sanity_check(&pci_direct_conf2)) {
- __restore_flags(flags);
- printk(KERN_INFO "PCI: Using configuration type 2\n");
- request_region(0xCF8, 4, "PCI conf2");
- return &pci_direct_conf2;
- }
- }
-
- __restore_flags(flags);
- return NULL;
-}
-
-#endif
-
-/*
- * BIOS32 and PCI BIOS handling.
- */
-
-#ifdef CONFIG_PCI_BIOS
-
-#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
-#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
-#define PCIBIOS_FIND_PCI_DEVICE 0xb102
-#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
-#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
-#define PCIBIOS_READ_CONFIG_BYTE 0xb108
-#define PCIBIOS_READ_CONFIG_WORD 0xb109
-#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
-#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
-#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
-#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
-#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e
-#define PCIBIOS_SET_PCI_HW_INT 0xb10f
-
-/* BIOS32 signature: "_32_" */
-#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
-
-/* PCI signature: "PCI " */
-#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
-
-/* PCI service signature: "$PCI" */
-#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
-
-/* PCI BIOS hardware mechanism flags */
-#define PCIBIOS_HW_TYPE1 0x01
-#define PCIBIOS_HW_TYPE2 0x02
-#define PCIBIOS_HW_TYPE1_SPEC 0x10
-#define PCIBIOS_HW_TYPE2_SPEC 0x20
-
-/*
- * This is the standard structure used to identify the entry point
- * to the BIOS32 Service Directory, as documented in
- * Standard BIOS 32-bit Service Directory Proposal
- * Revision 0.4 May 24, 1993
- * Phoenix Technologies Ltd.
- * Norwood, MA
- * and the PCI BIOS specification.
- */
-
-union bios32 {
- struct {
- unsigned long signature; /* _32_ */
- unsigned long entry; /* 32 bit physical address */
- unsigned char revision; /* Revision level, 0 */
- unsigned char length; /* Length in paragraphs should be 01 */
- unsigned char checksum; /* All bytes must add up to zero */
- unsigned char reserved[5]; /* Must be zero */
- } fields;
- char chars[16];
-};
-
-/*
- * Physical address of the service directory. I don't know if we're
- * allowed to have more than one of these or not, so just in case
- * we'll make pcibios_present() take a memory start parameter and store
- * the array there.
- */
-
-static struct {
- unsigned long address;
- unsigned short segment;
-} bios32_indirect = { 0, __KERNEL_CS };
-
-/*
- * Returns the entry point for the given service, NULL on error
- */
-
-static unsigned long bios32_service(unsigned long service)
-{
- unsigned char return_code; /* %al */
- unsigned long address; /* %ebx */
- unsigned long length; /* %ecx */
- unsigned long entry; /* %edx */
- unsigned long flags;
-
- __save_flags(flags); __cli();
- __asm__("lcall *(%%edi); cld"
- : "=a" (return_code),
- "=b" (address),
- "=c" (length),
- "=d" (entry)
- : "0" (service),
- "1" (0),
- "D" (&bios32_indirect));
- __restore_flags(flags);
-
- switch (return_code) {
- case 0:
- return address + entry;
- case 0x80: /* Not present */
- printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
- return 0;
- default: /* Shouldn't happen */
- printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
- service, return_code);
- return 0;
- }
-}
-
-static struct {
- unsigned long address;
- unsigned short segment;
-} pci_indirect = { 0, __KERNEL_CS };
-
-static int pci_bios_present;
-
-static int __devinit check_pcibios(void)
-{
- u32 signature, eax, ebx, ecx;
- u8 status, major_ver, minor_ver, hw_mech;
- unsigned long flags, pcibios_entry;
-
- if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
- pci_indirect.address = pcibios_entry + PAGE_OFFSET;
-
- __save_flags(flags); __cli();
- __asm__(
- "lcall *(%%edi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=d" (signature),
- "=a" (eax),
- "=b" (ebx),
- "=c" (ecx)
- : "1" (PCIBIOS_PCI_BIOS_PRESENT),
- "D" (&pci_indirect)
- : "memory");
- __restore_flags(flags);
-
- status = (eax >> 8) & 0xff;
- hw_mech = eax & 0xff;
- major_ver = (ebx >> 8) & 0xff;
- minor_ver = ebx & 0xff;
- if (pcibios_last_bus < 0)
- pcibios_last_bus = ecx & 0xff;
- DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
- status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
- if (status || signature != PCI_SIGNATURE) {
- printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
- status, signature);
- return 0;
- }
- printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
- major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
-#ifdef CONFIG_PCI_DIRECT
- if (!(hw_mech & PCIBIOS_HW_TYPE1))
- pci_probe &= ~PCI_PROBE_CONF1;
- if (!(hw_mech & PCIBIOS_HW_TYPE2))
- pci_probe &= ~PCI_PROBE_CONF2;
-#endif
- return 1;
- }
- return 0;
-}
-
-static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus, unsigned char *device_fn)
-{
- unsigned short bx;
- unsigned short ret;
- unsigned long flags;
-
- __save_flags(flags); __cli();
- __asm__("lcall *(%%edi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=b" (bx),
- "=a" (ret)
- : "1" (PCIBIOS_FIND_PCI_DEVICE),
- "c" (device_id),
- "d" (vendor),
- "S" ((int) index),
- "D" (&pci_indirect));
- __restore_flags(flags);
- *bus = (bx >> 8) & 0xff;
- *device_fn = bx & 0xff;
- return (int) (ret & 0xff00) >> 8;
-}
-
-static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
-{
- unsigned long result = 0;
- unsigned long flags;
- unsigned long bx = ((bus << 8) | (dev << 3) | fn);
-
- if (bus > 255 || dev > 31 || fn > 7 || reg > 255)
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- switch (len) {
- case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_BYTE),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_WORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=c" (*value),
- "=a" (result)
- : "1" (PCIBIOS_READ_CONFIG_DWORD),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return (int)((result & 0xff00) >> 8);
-}
-
-static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
-{
- unsigned long result = 0;
- unsigned long flags;
- unsigned long bx = ((bus << 8) | (dev << 3) | fn);
-
- if ((bus > 255 || dev > 31 || fn > 7 || reg > 255))
- return -EINVAL;
-
- spin_lock_irqsave(&pci_config_lock, flags);
-
- switch (len) {
- case 1:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 2:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_WORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- case 4:
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (result)
- : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
- "c" (value),
- "b" (bx),
- "D" ((long)reg),
- "S" (&pci_indirect));
- break;
- }
-
- spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return (int)((result & 0xff00) >> 8);
-}
-
-static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
-{
- int result;
- u32 data;
-
- if (!value)
- BUG();
-
- result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, &data);
-
- *value = (u8)data;
-
- return result;
-}
-
-static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
-{
- int result;
- u32 data;
-
- if (!value)
- BUG();
-
- result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, &data);
-
- *value = (u16)data;
-
- return result;
-}
-
-static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
-{
- if (!value)
- BUG();
-
- return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 1, value);
-}
-
-static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 2, value);
-}
-
-static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
-{
- return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), where, 4, value);
-}
-
-
-/*
- * Function table for BIOS32 access
- */
-
-static struct pci_ops pci_bios_access = {
- pci_bios_read_config_byte,
- pci_bios_read_config_word,
- pci_bios_read_config_dword,
- pci_bios_write_config_byte,
- pci_bios_write_config_word,
- pci_bios_write_config_dword
-};
-
-/*
- * Try to find PCI BIOS.
- */
-
-static struct pci_ops * __devinit pci_find_bios(void)
-{
- union bios32 *check;
- unsigned char sum;
- int i, length;
-
- /*
- * Follow the standard procedure for locating the BIOS32 Service
- * directory by scanning the permissible address range from
- * 0xe0000 through 0xfffff for a valid BIOS32 structure.
- */
-
- for (check = (union bios32 *) __va(0xe0000);
- check <= (union bios32 *) __va(0xffff0);
- ++check) {
- if (check->fields.signature != BIOS32_SIGNATURE)
- continue;
- length = check->fields.length * 16;
- if (!length)
- continue;
- sum = 0;
- for (i = 0; i < length ; ++i)
- sum += check->chars[i];
- if (sum != 0)
- continue;
- if (check->fields.revision != 0) {
- printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
- check->fields.revision, check);
- continue;
- }
- DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
- if (check->fields.entry >= 0x100000) {
- printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
- return NULL;
- } else {
- unsigned long bios32_entry = check->fields.entry;
- DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
- bios32_indirect.address = bios32_entry + PAGE_OFFSET;
- if (check_pcibios())
- return &pci_bios_access;
- }
- break; /* Hopefully more than one BIOS32 cannot happen... */
- }
-
- return NULL;
-}
-
-/*
- * Sort the device list according to PCI BIOS. Nasty hack, but since some
- * fool forgot to define the `correct' device order in the PCI BIOS specs
- * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
- * which used BIOS ordering, we are bound to do this...
- */
-
-static void __devinit pcibios_sort(void)
-{
- LIST_HEAD(sorted_devices);
- struct list_head *ln;
- struct pci_dev *dev, *d;
- int idx, found;
- unsigned char bus, devfn;
-
- DBG("PCI: Sorting device list...\n");
- while (!list_empty(&pci_devices)) {
- ln = pci_devices.next;
- dev = pci_dev_g(ln);
- idx = found = 0;
- while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
- idx++;
- for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
- d = pci_dev_g(ln);
- if (d->bus->number == bus && d->devfn == devfn) {
- list_del(&d->global_list);
- list_add_tail(&d->global_list, &sorted_devices);
- if (d == dev)
- found = 1;
- break;
- }
- }
- if (ln == &pci_devices) {
- printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
- /*
- * We must not continue scanning as several buggy BIOSes
- * return garbage after the last device. Grr.
- */
- break;
- }
- }
- if (!found) {
- printk(KERN_WARNING "PCI: Device %02x:%02x not found by BIOS\n",
- dev->bus->number, dev->devfn);
- list_del(&dev->global_list);
- list_add_tail(&dev->global_list, &sorted_devices);
- }
- }
- list_splice(&sorted_devices, &pci_devices);
-}
-
-/*
- * BIOS Functions for IRQ Routing
- */
-
-struct irq_routing_options {
- u16 size;
- struct irq_info *table;
- u16 segment;
-} __attribute__((packed));
-
-struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
-{
- struct irq_routing_options opt;
- struct irq_routing_table *rt = NULL;
- int ret, map;
- unsigned long page;
- unsigned long flags;
-
- if (!pci_bios_present)
- return NULL;
- page = alloc_xenheap_page();
- if (!page)
- return NULL;
- opt.table = (struct irq_info *) page;
- opt.size = PAGE_SIZE;
- opt.segment = __KERNEL_DS;
-
- DBG("PCI: Fetching IRQ routing table... ");
- __save_flags(flags); __cli();
- __asm__("push %%es\n\t"
- "push %%ds\n\t"
- "pop %%es\n\t"
- "lcall *(%%esi); cld\n\t"
- "pop %%es\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (ret),
- "=b" (map)
- : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
- "1" (0),
- "D" (&opt),
- "S" (&pci_indirect)
- : "memory");
- __restore_flags(flags);
- DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
- if (ret & 0xff00)
- printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
- else if (opt.size) {
- rt = xmalloc(sizeof(struct irq_routing_table) + opt.size);
- if (rt) {
- memset(rt, 0, sizeof(struct irq_routing_table));
- rt->size = opt.size + sizeof(struct irq_routing_table);
- rt->exclusive_irqs = map;
- memcpy(rt->slots, (void *) page, opt.size);
- printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n");
- }
- }
- free_xenheap_page(page);
- return rt;
-}
-
-
-int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
-{
- int ret;
- unsigned long flags;
-
- __save_flags(flags); __cli();
- __asm__("lcall *(%%esi); cld\n\t"
- "jc 1f\n\t"
- "xor %%ah, %%ah\n"
- "1:"
- : "=a" (ret)
- : "0" (PCIBIOS_SET_PCI_HW_INT),
- "b" ((dev->bus->number << 8) | dev->devfn),
- "c" ((irq << 8) | (pin + 10)),
- "S" (&pci_indirect));
- __restore_flags(flags);
- return !(ret & 0xff00);
-}
-
-#endif
-
-/*
- * Several buggy motherboards address only 16 devices and mirror
- * them to next 16 IDs. We try to detect this `feature' on all
- * primary buses (those containing host bridges as they are
- * expected to be unique) and remove the ghost devices.
- */
-
-static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
-{
- struct list_head *ln, *mn;
- struct pci_dev *d, *e;
- int mirror = PCI_DEVFN(16,0);
- int seen_host_bridge = 0;
- int i;
-
- DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
- for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
- d = pci_dev_b(ln);
- if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
- seen_host_bridge++;
- for (mn=ln->next; mn != &b->devices; mn=mn->next) {
- e = pci_dev_b(mn);
- if (e->devfn != d->devfn + mirror ||
- e->vendor != d->vendor ||
- e->device != d->device ||
- e->class != d->class)
- continue;
- for(i=0; i<PCI_NUM_RESOURCES; i++)
- if (e->resource[i].start != d->resource[i].start ||
- e->resource[i].end != d->resource[i].end ||
- e->resource[i].flags != d->resource[i].flags)
- continue;
- break;
- }
- if (mn == &b->devices)
- return;
- }
- if (!seen_host_bridge)
- return;
- printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number);
-
- ln = &b->devices;
- while (ln->next != &b->devices) {
- d = pci_dev_b(ln->next);
- if (d->devfn >= mirror) {
- list_del(&d->global_list);
- list_del(&d->bus_list);
- xfree(d);
- } else
- ln = ln->next;
- }
-}
-
-/*
- * Discover remaining PCI buses in case there are peer host bridges.
- * We use the number of last PCI bus provided by the PCI BIOS.
- */
-static void __devinit pcibios_fixup_peer_bridges(void)
-{
- int n;
- /* XEN: static is important to prevent stack overflow! */
- static struct pci_bus bus;
- static struct pci_dev dev;
- u16 l;
-
- if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
- return;
- DBG("PCI: Peer bridge fixup\n");
- for (n=0; n <= pcibios_last_bus; n++) {
- if (pci_bus_exists(&pci_root_buses, n))
- continue;
- bus.number = n;
- bus.ops = pci_root_ops;
- dev.bus = &bus;
- for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
- if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
- l != 0x0000 && l != 0xffff) {
- DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
- printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
- pci_scan_bus(n, pci_root_ops, NULL);
- break;
- }
- }
-}
-
-/*
- * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
- */
-
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
-{
- /*
- * i450NX -- Find and scan all secondary buses on all PXB's.
- */
- int pxb, reg;
- u8 busno, suba, subb;
-#ifdef CONFIG_MULTIQUAD
- int quad = BUS2QUAD(d->bus->number);
-#endif
- printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
- reg = 0xd0;
- for(pxb=0; pxb<2; pxb++) {
- pci_read_config_byte(d, reg++, &busno);
- pci_read_config_byte(d, reg++, &suba);
- pci_read_config_byte(d, reg++, &subb);
- DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
- if (busno)
- pci_scan_bus(QUADLOCAL2BUS(quad,busno), pci_root_ops, NULL); /* Bus A */
- if (suba < subb)
- pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), pci_root_ops, NULL); /* Bus B */
- }
- pcibios_last_bus = -1;
-}
-
-static void __devinit pci_fixup_i450gx(struct pci_dev *d)
-{
- /*
- * i450GX and i450KX -- Find and scan all secondary buses.
- * (called separately for each PCI bridge found)
- */
- u8 busno;
- pci_read_config_byte(d, 0x4a, &busno);
- printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
- pci_scan_bus(busno, pci_root_ops, NULL);
- pcibios_last_bus = -1;
-}
-
-static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
-{
- /*
- * UM8886BF IDE controller sets region type bits incorrectly,
- * therefore they look like memory despite of them being I/O.
- */
- int i;
-
- printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", d->slot_name);
- for(i=0; i<4; i++)
- d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
-}
-
-static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
-{
- /*
- * NCR 53C810 returns class code 0 (at least on some systems).
- * Fix class to be PCI_CLASS_STORAGE_SCSI
- */
- if (!d->class) {
- printk("PCI: fixing NCR 53C810 class code for %s\n", d->slot_name);
- d->class = PCI_CLASS_STORAGE_SCSI << 8;
- }
-}
-
-static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
-{
- int i;
-
- /*
- * PCI IDE controllers use non-standard I/O port decoding, respect it.
- */
- if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
- return;
- DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
- for(i=0; i<4; i++) {
- struct resource *r = &d->resource[i];
- if ((r->start & ~0x80) == 0x374) {
- r->start |= 2;
- r->end = r->start;
- }
- }
-}
-
-static void __devinit pci_fixup_ide_trash(struct pci_dev *d)
-{
- int i;
-
- /*
- * There exist PCI IDE controllers which have utter garbage
- * in first four base registers. Ignore that.
- */
- DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
- for(i=0; i<4; i++)
- d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
-}
-
-static void __devinit pci_fixup_latency(struct pci_dev *d)
-{
- /*
- * SiS 5597 and 5598 chipsets require latency timer set to
- * at most 32 to avoid lockups.
- */
- DBG("PCI: Setting max latency to 32\n");
- pcibios_max_latency = 32;
-}
-
-static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
-{
- /*
- * PIIX4 ACPI device: hardwired IRQ9
- */
- d->irq = 9;
-}
-
-/*
- * Addresses issues with problems in the memory write queue timer in
- * certain VIA Northbridges. This bugfix is per VIA's specifications,
- * except for the KL133/KM133: clearing bit 5 on those Northbridges seems
- * to trigger a bug in its integrated ProSavage video card, which
- * causes screen corruption. We only clear bits 6 and 7 for that chipset,
- * until VIA can provide us with definitive information on why screen
- * corruption occurs, and what exactly those bits do.
- *
- * VIA 8363,8622,8361 Northbridges:
- * - bits 5, 6, 7 at offset 0x55 need to be turned off
- * VIA 8367 (KT266x) Northbridges:
- * - bits 5, 6, 7 at offset 0x95 need to be turned off
- * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges:
- * - bits 6, 7 at offset 0x55 need to be turned off
- */
-
-#define VIA_8363_KL133_REVISION_ID 0x81
-#define VIA_8363_KM133_REVISION_ID 0x84
-
-static void __init pci_fixup_via_northbridge_bug(struct pci_dev *d)
-{
- u8 v;
- u8 revision;
- int where = 0x55;
- int mask = 0x1f; /* clear bits 5, 6, 7 by default */
-
- pci_read_config_byte(d, PCI_REVISION_ID, &revision);
-
- if (d->device == PCI_DEVICE_ID_VIA_8367_0) {
- /* fix pci bus latency issues resulted by NB bios error
- it appears on bug free^Wreduced kt266x's bios forces
- NB latency to zero */
- pci_write_config_byte(d, PCI_LATENCY_TIMER, 0);
-
- where = 0x95; /* the memory write queue timer register is
- different for the KT266x's: 0x95 not 0x55 */
- } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 &&
- (revision == VIA_8363_KL133_REVISION_ID ||
- revision == VIA_8363_KM133_REVISION_ID)) {
- mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5
- causes screen corruption on the KL133/KM133 */
- }
-
- pci_read_config_byte(d, where, &v);
- if (v & ~mask) {
- printk("Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \
- d->device, revision, where, v, mask, v & mask);
- v &= mask;
- pci_write_config_byte(d, where, v);
- }
-}
-
-/*
- * For some reasons Intel decided that certain parts of their
- * 815, 845 and some other chipsets must look like PCI-to-PCI bridges
- * while they are obviously not. The 82801 family (AA, AB, BAM/CAM,
- * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according
- * to Intel terminology. These devices do forward all addresses from
- * system to PCI bus no matter what are their window settings, so they are
- * "transparent" (or subtractive decoding) from programmers point of view.
- */
-static void __init pci_fixup_transparent_bridge(struct pci_dev *dev)
-{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
- (dev->device & 0xff00) == 0x2400)
- dev->transparent = 1;
-}
-
-struct pci_fixup pcibios_fixups[] = {
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash },
- { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge },
- { 0 }
-};
-
-/*
- * Called after each bus is probed, but before its children
- * are examined.
- */
-
-void __devinit pcibios_fixup_bus(struct pci_bus *b)
-{
- pcibios_fixup_ghosts(b);
- pci_read_bridge_bases(b);
-}
-
-struct pci_bus * __devinit pcibios_scan_root(int busnum)
-{
- struct pci_bus *bus;
-
- pci_for_each_bus(bus) {
- if (bus->number == busnum) {
- /* Already scanned */
- return bus;
- }
- }
-
- printk("PCI: Probing PCI hardware (bus %02x)\n", busnum);
-
- return pci_scan_bus(busnum, pci_root_ops, NULL);
-}
-
-void __devinit pcibios_config_init(void)
-{
- /*
- * Try all known PCI access methods. Note that we support using
- * both PCI BIOS and direct access, with a preference for direct.
- */
-
-#ifdef CONFIG_PCI_DIRECT
- struct pci_ops *tmp = NULL;
-#endif
-
-
-#ifdef CONFIG_PCI_BIOS
- if ((pci_probe & PCI_PROBE_BIOS)
- && ((pci_root_ops = pci_find_bios()))) {
- pci_probe |= PCI_BIOS_SORT;
- pci_bios_present = 1;
- pci_config_read = pci_bios_read;
- pci_config_write = pci_bios_write;
- }
-#endif
-
-#ifdef CONFIG_PCI_DIRECT
- if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
- && (tmp = pci_check_direct())) {
- pci_root_ops = tmp;
- if (pci_root_ops == &pci_direct_conf1) {
- pci_config_read = pci_conf1_read;
- pci_config_write = pci_conf1_write;
- }
- else {
- pci_config_read = pci_conf2_read;
- pci_config_write = pci_conf2_write;
- }
- }
-#endif
-
- return;
-}
-
-void __init pcibios_init(void)
-{
- int quad;
-
- if (!pci_root_ops)
- pcibios_config_init();
- if (!pci_root_ops) {
- printk(KERN_WARNING "PCI: System does not support PCI\n");
- return;
- }
-
- pcibios_set_cacheline_size();
-
- printk(KERN_INFO "PCI: Probing PCI hardware\n");
-#ifdef CONFIG_ACPI_PCI
- if (!acpi_noirq && !acpi_pci_irq_init()) {
- pci_using_acpi_prt = 1;
- printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
- printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi' or even 'acpi=off'\n");
- }
-#endif
- if (!pci_using_acpi_prt) {
- pci_root_bus = pcibios_scan_root(0);
- pcibios_irq_init();
- pcibios_fixup_peer_bridges();
- pcibios_fixup_irqs();
- }
- if (clustered_apic_mode && (numnodes > 1)) {
- for (quad = 1; quad < numnodes; ++quad) {
- printk("Scanning PCI bus %d for quad %d\n",
- QUADLOCAL2BUS(quad,0), quad);
- pci_scan_bus(QUADLOCAL2BUS(quad,0),
- pci_root_ops, NULL);
- }
- }
-
- pcibios_resource_survey();
-
-#ifdef CONFIG_PCI_BIOS
- if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
- pcibios_sort();
-#endif
-}
-
-char * __devinit pcibios_setup(char *str)
-{
- if (!strcmp(str, "off")) {
- pci_probe = 0;
- return NULL;
- }
-#ifdef CONFIG_PCI_BIOS
- else if (!strcmp(str, "bios")) {
- pci_probe = PCI_PROBE_BIOS;
- return NULL;
- } else if (!strcmp(str, "nobios")) {
- pci_probe &= ~PCI_PROBE_BIOS;
- return NULL;
- } else if (!strcmp(str, "nosort")) {
- pci_probe |= PCI_NO_SORT;
- return NULL;
- } else if (!strcmp(str, "biosirq")) {
- pci_probe |= PCI_BIOS_IRQ_SCAN;
- return NULL;
- }
-#endif
-#ifdef CONFIG_PCI_DIRECT
- else if (!strcmp(str, "conf1")) {
- pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
- return NULL;
- }
- else if (!strcmp(str, "conf2")) {
- pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
- return NULL;
- }
-#endif
- else if (!strcmp(str, "rom")) {
- pci_probe |= PCI_ASSIGN_ROMS;
- return NULL;
- } else if (!strcmp(str, "assign-busses")) {
- pci_probe |= PCI_ASSIGN_ALL_BUSSES;
- return NULL;
- } else if (!strncmp(str, "irqmask=", 8)) {
- pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
- return NULL;
- } else if (!strncmp(str, "lastbus=", 8)) {
- pcibios_last_bus = simple_strtol(str+8, NULL, 0);
- return NULL;
- } else if (!strncmp(str, "noacpi", 6)) {
- acpi_noirq_set();
- return NULL;
- }
- return str;
-}
-
-unsigned int pcibios_assign_all_busses(void)
-{
- return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- int err;
-
- if ((err = pcibios_enable_resources(dev, mask)) < 0)
- return err;
-
-#ifdef CONFIG_ACPI_PCI
- if (pci_using_acpi_prt) {
- acpi_pci_irq_enable(dev);
- return 0;
- }
-#endif
-
- pcibios_enable_irq(dev);
-
- return 0;
-}
diff --git a/xen/arch/x86/pci-x86.c b/xen/arch/x86/pci-x86.c
deleted file mode 100644
index 7efc79c2da..0000000000
--- a/xen/arch/x86/pci-x86.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * Low-Level PCI Access for i386 machines
- *
- * Copyright 1993, 1994 Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * Drew@Colorado.EDU
- * +1 (303) 786-7975
- *
- * Drew's work was sponsored by:
- * iX Multiuser Multitasking Magazine
- * Hannover, Germany
- * hm@ix.de
- *
- * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
- *
- * For more information, please consult the following manuals (look at
- * http://www.pcisig.com/ for how to get them):
- *
- * PCI BIOS Specification
- * PCI Local Bus Specification
- * PCI to PCI Bridge Specification
- * PCI System Design Guide
- *
- *
- * CHANGELOG :
- * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
- * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
- *
- * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic
- * Potter, potter@cao-vlsi.ibp.fr
- *
- * Jan 10, 1995 : Modified to store the information about configured pci
- * devices into a list, which can be accessed via /proc/pci by
- * Curtis Varner, cvarner@cs.ucr.edu
- *
- * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
- * Alpha version. Intel & UMC chipset support only.
- *
- * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
- * moved to drivers/pci/pci.c.
- *
- * Dec 7, 1996 : Added support for direct configuration access of boards
- * with Intel compatible access schemes (tsbogend@alpha.franken.de)
- *
- * Feb 3, 1997 : Set internal functions to static, save/restore flags
- * avoid dead locks reading broken PCI BIOS, werner@suse.de
- *
- * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
- * (mj@atrey.karlin.mff.cuni.cz)
- *
- * May 7, 1997 : Added some missing cli()'s. [mj]
- *
- * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
- * (paubert@iram.es)
- *
- * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts
- * and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj]
- *
- * May 1, 1998 : Support for peer host bridges. [mj]
- *
- * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
- * can be accessed from interrupts even on SMP systems. [mj]
- *
- * August 1998 : Better support for peer host bridges and more paranoid
- * checks for direct hardware access. Ugh, this file starts to look as
- * a large gallery of common hardware bug workarounds (watch the comments)
- * -- the PCI specs themselves are sane, but most implementors should be
- * hit hard with \hammer scaled \magstep5. [mj]
- *
- * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
- *
- * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj]
- *
- * August 1999 : New resource management and configuration access stuff. [mj]
- *
- * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
- * Based on ideas by Chris Frantz and David Hinds. [mj]
- *
- * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
- * for a lot of patience during testing. [mj]
- *
- * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
- */
-
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/ioport.h>
-#include <xen/errno.h>
-
-#include "pci-x86.h"
-
-void
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
- struct resource *res, int resource)
-{
- u32 new, check;
- int reg;
-
- new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
- if (resource < 6) {
- reg = PCI_BASE_ADDRESS_0 + 4*resource;
- } else if (resource == PCI_ROM_RESOURCE) {
- res->flags |= PCI_ROM_ADDRESS_ENABLE;
- new |= PCI_ROM_ADDRESS_ENABLE;
- reg = dev->rom_base_reg;
- } else {
- /* Somebody might have asked allocation of a non-standard resource */
- return;
- }
-
- pci_write_config_dword(dev, reg, new);
- pci_read_config_dword(dev, reg, &check);
- if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
- printk(KERN_ERR "PCI: Error while updating region "
- "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
- new, check);
- }
-}
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-void
-pcibios_align_resource(void *data, struct resource *res,
- unsigned long size, unsigned long align)
-{
- if (res->flags & IORESOURCE_IO) {
- unsigned long start = res->start;
-
- if (start & 0x300) {
- start = (start + 0x3ff) & ~0x3ff;
- res->start = start;
- }
- }
-}
-
-
-/*
- * Handle resources of PCI devices. If the world were perfect, we could
- * just allocate all the resource regions and do nothing more. It isn't.
- * On the other hand, we cannot just re-allocate all devices, as it would
- * require us to know lots of host bridge internals. So we attempt to
- * keep as much of the original configuration as possible, but tweak it
- * when it's found to be wrong.
- *
- * Known BIOS problems we have to work around:
- * - I/O or memory regions not configured
- * - regions configured, but not enabled in the command register
- * - bogus I/O addresses above 64K used
- * - expansion ROMs left enabled (this may sound harmless, but given
- * the fact the PCI specs explicitly allow address decoders to be
- * shared between expansion ROMs and other resource regions, it's
- * at least dangerous)
- *
- * Our solution:
- * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
- * This gives us fixed barriers on where we can allocate.
- * (2) Allocate resources for all enabled devices. If there is
- * a collision, just mark the resource as unallocated. Also
- * disable expansion ROMs during this step.
- * (3) Try to allocate resources for disabled devices. If the
- * resources were assigned correctly, everything goes well,
- * if they weren't, they won't disturb allocation of other
- * resources.
- * (4) Assign new addresses to resources which were either
- * not configured at all or misconfigured. If explicitly
- * requested by the user, configure expansion ROM address
- * as well.
- */
-
-static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
-{
- struct list_head *ln;
- struct pci_bus *bus;
- struct pci_dev *dev;
- int idx;
- struct resource *r, *pr;
-
- /* Depth-First Search on bus tree */
- for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
- bus = pci_bus_b(ln);
- if ((dev = bus->self)) {
- for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
- r = &dev->resource[idx];
- if (!r->start)
- continue;
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0)
- printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
- }
- }
- pcibios_allocate_bus_resources(&bus->children);
- }
-}
-
-static void __init pcibios_allocate_resources(int pass)
-{
- struct pci_dev *dev;
- int idx, disabled;
- u16 command;
- struct resource *r, *pr;
-
- pci_for_each_dev(dev) {
- pci_read_config_word(dev, PCI_COMMAND, &command);
- for(idx = 0; idx < 6; idx++) {
- r = &dev->resource[idx];
- if (r->parent) /* Already allocated */
- continue;
- if (!r->start) /* Address not assigned at all */
- continue;
- if (r->flags & IORESOURCE_IO)
- disabled = !(command & PCI_COMMAND_IO);
- else
- disabled = !(command & PCI_COMMAND_MEMORY);
- if (pass == disabled) {
- DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
- r->start, r->end, r->flags, disabled, pass);
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0) {
- printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
- /* We'll assign a new address later */
- r->end -= r->start;
- r->start = 0;
- }
- }
- }
- if (!pass) {
- r = &dev->resource[PCI_ROM_RESOURCE];
- if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
- /* Turn the ROM off, leave the resource region, but keep it unregistered. */
- u32 reg;
- DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
- r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
- pci_read_config_dword(dev, dev->rom_base_reg, &reg);
- pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
- }
- }
- }
-}
-
-static void __init pcibios_assign_resources(void)
-{
- struct pci_dev *dev;
- int idx;
- struct resource *r;
-
- pci_for_each_dev(dev) {
- int class = dev->class >> 8;
-
- /* Don't touch classless devices and host bridges */
- if (!class || class == PCI_CLASS_BRIDGE_HOST)
- continue;
-
- for(idx=0; idx<6; idx++) {
- r = &dev->resource[idx];
-
- /*
- * Don't touch IDE controllers and I/O ports of video cards!
- */
- if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
- (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
- continue;
-
- /*
- * We shall assign a new address to this resource, either because
- * the BIOS forgot to do so or because we have decided the old
- * address was unusable for some reason.
- */
- if (!r->start && r->end)
- pci_assign_resource(dev, idx);
- }
-
- if (pci_probe & PCI_ASSIGN_ROMS) {
- r = &dev->resource[PCI_ROM_RESOURCE];
- r->end -= r->start;
- r->start = 0;
- if (r->end)
- pci_assign_resource(dev, PCI_ROM_RESOURCE);
- }
- }
-}
-
-void __init pcibios_set_cacheline_size(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- pci_cache_line_size = 32 >> 2;
- if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
- pci_cache_line_size = 64 >> 2; /* K7 & K8 */
- else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
- pci_cache_line_size = 128 >> 2; /* P4 */
-}
-
-void __init pcibios_resource_survey(void)
-{
- DBG("PCI: Allocating resources\n");
- pcibios_allocate_bus_resources(&pci_root_buses);
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
- pcibios_assign_resources();
-}
-
-int pcibios_enable_resources(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for(idx=0; idx<6; idx++) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<idx)))
- continue;
-
- r = &dev->resource[idx];
- if (!r->start && r->end) {
- printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (dev->resource[PCI_ROM_RESOURCE].start)
- cmd |= PCI_COMMAND_MEMORY;
- if (cmd != old_cmd) {
- printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-/*
- * If we set up a device for bus mastering, we need to check the latency
- * timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
-void pcibios_set_master(struct pci_dev *dev)
-{
- u8 lat;
- pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
- if (lat < 16)
- lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
- else if (lat > pcibios_max_latency)
- lat = pcibios_max_latency;
- else
- return;
- printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
- pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
-}
-
-#if 0
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- unsigned long prot;
-
- /* I/O space cannot be accessed via normal processor loads and
- * stores on this platform.
- */
- if (mmap_state == pci_mmap_io)
- return -EINVAL;
-
- /* Leave vm_pgoff as-is, the PCI space address is the physical
- * address on this platform.
- */
- vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
-
- prot = pgprot_val(vma->vm_page_prot);
- if (boot_cpu_data.x86 > 3)
- prot |= _PAGE_PCD | _PAGE_PWT;
- vma->vm_page_prot = __pgprot(prot);
-
- /* Write-combine setting is ignored, it is changed via the mtrr
- * interfaces on this platform.
- */
- if (remap_page_range(vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-#endif
diff --git a/xen/arch/x86/pci-x86.h b/xen/arch/x86/pci-x86.h
deleted file mode 100644
index fe70b10166..0000000000
--- a/xen/arch/x86/pci-x86.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Low-Level PCI Access for i386 machines.
- *
- * (c) 1999 Martin Mares <mj@ucw.cz>
- */
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
-#define PCI_PROBE_BIOS 0x0001
-#define PCI_PROBE_CONF1 0x0002
-#define PCI_PROBE_CONF2 0x0004
-#define PCI_NO_SORT 0x0100
-#define PCI_BIOS_SORT 0x0200
-#define PCI_NO_CHECKS 0x0400
-#define PCI_ASSIGN_ROMS 0x1000
-#define PCI_BIOS_IRQ_SCAN 0x2000
-#define PCI_ASSIGN_ALL_BUSSES 0x4000
-
-extern unsigned int pci_probe;
-
-/* pci-i386.c */
-
-extern unsigned int pcibios_max_latency;
-extern u8 pci_cache_line_size;
-
-void pcibios_resource_survey(void);
-void pcibios_set_cacheline_size(void);
-int pcibios_enable_resources(struct pci_dev *, int);
-
-/* pci-pc.c */
-
-extern int pcibios_last_bus;
-extern struct pci_bus *pci_root_bus;
-extern struct pci_ops *pci_root_ops;
-
-/* pci-irq.c */
-
-struct irq_info {
- u8 bus, devfn; /* Bus, device and function */
- struct {
- u8 link; /* IRQ line ID, chipset dependent, 0=not routed */
- u16 bitmap; /* Available IRQs */
- } __attribute__((packed)) irq[4];
- u8 slot; /* Slot number, 0=onboard */
- u8 rfu;
-} __attribute__((packed));
-
-struct irq_routing_table {
- u32 signature; /* PIRQ_SIGNATURE should be here */
- u16 version; /* PIRQ_VERSION */
- u16 size; /* Table size in bytes */
- u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
- u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */
- u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */
- u32 miniport_data; /* Crap */
- u8 rfu[11];
- u8 checksum; /* Modulo 256 checksum must give zero */
- struct irq_info slots[0];
-} __attribute__((packed));
-
-extern unsigned int pcibios_irq_mask;
-
-void pcibios_irq_init(void);
-void pcibios_fixup_irqs(void);
-void pcibios_enable_irq(struct pci_dev *dev);
diff --git a/xen/arch/x86/pdb-linux.c b/xen/arch/x86/pdb-linux.c
deleted file mode 100644
index cb4f0e0e78..0000000000
--- a/xen/arch/x86/pdb-linux.c
+++ /dev/null
@@ -1,100 +0,0 @@
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- *
- * linux & i386 dependent code. bleech.
- */
-
-#include <asm/pdb.h>
-
-/* offset to the first instruction in the linux system call code
- where we can safely set a breakpoint */
-unsigned int pdb_linux_syscall_enter_bkpt_offset = 20;
-
-/* offset to eflags saved on the stack after an int 80 */
-unsigned int pdb_linux_syscall_eflags_offset = 48;
-
-/* offset to the instruction pointer saved on the stack after an int 80 */
-unsigned int pdb_linux_syscall_eip_offset = 40;
-
-unsigned char
-pdb_linux_set_bkpt (unsigned long addr)
-{
- unsigned char old_instruction = *(unsigned char *)addr;
- *(unsigned char *)addr = 0xcc;
- return old_instruction;
-}
-
-void
-pdb_linux_clr_bkpt (unsigned long addr, unsigned char value)
-{
- *(unsigned char *)addr = value;
-}
-
-void
-pdb_linux_syscall_enter_bkpt (struct xen_regs *regs, long error_code,
- trap_info_t *ti)
-{
- /* set at breakpoint at the beginning of the
- system call in the target domain */
-
- pdb_system_call_enter_instr = pdb_linux_set_bkpt(ti->address +
- pdb_linux_syscall_enter_bkpt_offset);
- pdb_system_call = 1;
-}
-
-void
-pdb_linux_syscall_exit_bkpt (struct xen_regs *regs, struct pdb_context *pdb_ctx)
-{
- /*
- we've hit an int 0x80 in a user's program, jumped into xen
- (traps.c::do_general_protection()) which re-wrote the next
- instruction in the os kernel to 0xcc, and then hit that
- exception.
-
- we need to re-write the return instruction in the user's
- program so that we know when we have finished the system call
- and are back in the user's program.
-
- at this point our stack should look something like this:
-
- esp = 0x80a59f0
- esp + 4 = 0x0
- esp + 8 = 0x80485a0
- esp + 12 = 0x2d
- esp + 16 = 0x80485f4
- esp + 20 = 0xbffffa48
- esp + 24 = 0xd
- esp + 28 = 0xc00a0833
- esp + 32 = 0x833
- esp + 36 = 0xd
- esp + 40 = 0x804dcdd saved eip
- esp + 44 = 0x82b saved cs
- esp + 48 = 0x213392 saved eflags
- esp + 52 = 0xbffffa2c saved esp
- esp + 56 = 0x833 saved ss
- esp + 60 = 0x1000000
- */
-
- /* restore the entry instruction for the system call */
- pdb_linux_clr_bkpt(regs->eip - 1, pdb_system_call_enter_instr);
-
- /* save the address of eflags that was saved on the stack */
- pdb_system_call_eflags_addr = (regs->esp +
- pdb_linux_syscall_eflags_offset);
-
- /* muck with the return instruction so that we trap back into the
- debugger when re-entering user space */
- pdb_system_call_next_addr = *(unsigned long *)(regs->esp +
- pdb_linux_syscall_eip_offset);
- pdb_linux_get_values (&pdb_system_call_leave_instr, 1,
- pdb_system_call_next_addr,
- pdb_ctx->process, pdb_ctx->ptbr);
- pdb_linux_set_values ("cc", 1, pdb_system_call_next_addr,
- pdb_ctx->process, pdb_ctx->ptbr);
-}
diff --git a/xen/arch/x86/pdb-stub.c b/xen/arch/x86/pdb-stub.c
deleted file mode 100644
index 568bcea113..0000000000
--- a/xen/arch/x86/pdb-stub.c
+++ /dev/null
@@ -1,1280 +0,0 @@
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- *
- * code adapted originally from kgdb, nemesis, & gdbserver
- */
-
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/regs.h>
-#include <xen/keyhandler.h>
-#include <asm/apic.h>
-#include <asm/domain_page.h> /* [un]map_domain_mem */
-#include <asm/processor.h>
-#include <asm/pdb.h>
-#include <xen/list.h>
-#include <xen/serial.h>
-#include <xen/softirq.h>
-#include <xen/init.h>
-
-/* opt_pdb: Name of serial port for Xen pervasive debugger (and enable pdb) */
-static unsigned char opt_pdb[10] = "none";
-string_param("pdb", opt_pdb);
-
-#define PDB_DEBUG_TRACE
-#ifdef PDB_DEBUG_TRACE
-#define TRC(_x) _x
-#else
-#define TRC(_x)
-#endif
-
-#define DEBUG_EXCEPTION 0x01
-#define BREAKPT_EXCEPTION 0x03
-#define PDB_LIVE_EXCEPTION 0x58
-#define KEYPRESS_EXCEPTION 0x88
-
-#define BUFMAX 400
-
-static const char hexchars[] = "0123456789abcdef";
-
-static int remote_debug;
-
-#define PDB_BUFMAX 1024
-static char pdb_in_buffer[PDB_BUFMAX];
-static char pdb_out_buffer[PDB_BUFMAX];
-static char pdb_buffer[PDB_BUFMAX];
-
-struct pdb_context pdb_ctx;
-int pdb_continue_thread = 0;
-int pdb_general_thread = 0;
-
-void pdb_put_packet (unsigned char *buffer, int ack);
-void pdb_bkpt_check (u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-
-int pdb_initialized = 0;
-int pdb_page_fault_possible = 0;
-int pdb_page_fault_scratch = 0; /* just a handy variable */
-int pdb_page_fault = 0;
-static int pdb_serhnd = -1;
-static int pdb_stepping = 0;
-
-int pdb_system_call = 0;
-unsigned char pdb_system_call_enter_instr = 0; /* original enter instr */
-unsigned char pdb_system_call_leave_instr = 0; /* original next instr */
-unsigned long pdb_system_call_next_addr = 0; /* instr after int 0x80 */
-unsigned long pdb_system_call_eflags_addr = 0; /* saved eflags on stack */
-
-static inline void pdb_put_char(unsigned char c)
-{
- serial_putc(pdb_serhnd, c);
-}
-
-static inline unsigned char pdb_get_char(void)
-{
- return serial_getc(pdb_serhnd);
-}
-
-int
-get_char (char *addr)
-{
- return *addr;
-}
-
-void
-set_char (char *addr, int val)
-{
- *addr = val;
-}
-
-void
-pdb_process_query (char *ptr)
-{
- if (strcmp(ptr, "C") == 0)
- {
- /* empty string */
- }
- else if (strcmp(ptr, "fThreadInfo") == 0)
- {
-#ifdef PDB_PAST
- struct domain *p;
-#endif /* PDB_PAST */
-
- int buf_idx = 0;
-
- pdb_out_buffer[buf_idx++] = 'l';
- pdb_out_buffer[buf_idx++] = 0;
-
-#ifdef PDB_PAST
- switch (pdb_level)
- {
- case PDB_LVL_XEN: /* return a list of domains */
- {
- int count = 0;
-
- read_lock(&domlist_lock);
-
- pdb_out_buffer[buf_idx++] = 'm';
- for_each_domain ( p )
- {
- domid_t domain = p->domain + PDB_ID_OFFSET;
-
- if (count > 0)
- {
- pdb_out_buffer[buf_idx++] = ',';
- }
- if (domain > 15)
- {
- pdb_out_buffer[buf_idx++] = hexchars[domain >> 4];
- }
- pdb_out_buffer[buf_idx++] = hexchars[domain % 16];
- count++;
- }
- pdb_out_buffer[buf_idx++] = 0;
-
- read_unlock(&domlist_lock);
- break;
- }
- case PDB_LVL_GUESTOS: /* return a list of processes */
- {
- int foobar[20];
- int loop, total;
-
- /* this cr3 is wrong! */
- total = pdb_linux_process_list(pdb_ctx[pdb_level].info_cr3,
- foobar, 20);
-
- pdb_out_buffer[buf_idx++] = 'm';
- pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
- for (loop = 0; loop < total; loop++)
- {
- int pid = foobar[loop] + PDB_ID_OFFSET;
-
- pdb_out_buffer[buf_idx++] = ',';
- if (pid > 15)
- {
- pdb_out_buffer[buf_idx++] = hexchars[pid >> 4];
- }
- pdb_out_buffer[buf_idx++] = hexchars[pid % 16];
- }
- pdb_out_buffer[buf_idx++] = 0;
- break;
- }
- case PDB_LVL_PROCESS: /* hmmm... */
- {
- pdb_out_buffer[buf_idx++] = 'm';
- pdb_out_buffer[buf_idx++] = '1'; /* 1 is to go back */
- break;
- }
- default:
- break;
- }
-#endif /* PDB_PAST */
-
- }
- else if (strcmp(ptr, "sThreadInfo") == 0)
- {
- int buf_idx = 0;
-
- pdb_out_buffer[buf_idx++] = 'l';
- pdb_out_buffer[buf_idx++] = 0;
- }
- else if (strncmp(ptr, "ThreadExtraInfo,", 16) == 0)
- {
- int thread = 0;
- char *message = "foobar ?";
-
- ptr += 16;
- if (hexToInt (&ptr, &thread))
- {
- mem2hex (message, pdb_out_buffer, strlen(message) + 1);
- }
-
-#ifdef PDB_PAST
- int thread = 0;
- char message[16];
- struct domain *p;
-
- strncpy (message, dom0->name, 16);
-
- ptr += 16;
- if (hexToInt (&ptr, &thread))
- {
- mem2hex ((char *)message, pdb_out_buffer, strlen(message) + 1);
- }
-#endif /* PDB_PAST */
-
-#ifdef PDB_FUTURE
- {
- char string[task_struct_comm_length];
-
- string[0] = 0;
- pdb_linux_process_details (cr3, pid, string);
- printk (" (%s)", string);
- }
-#endif /* PDB_FUTURE*/
-
- }
- else if (strcmp(ptr, "Offsets") == 0)
- {
- /* empty string */
- }
- else if (strncmp(ptr, "Symbol", 6) == 0)
- {
- strcpy (pdb_out_buffer, "OK");
- }
- else
- {
- printk("pdb: error, unknown query [%s]\n", ptr);
- }
-}
-
-void
-pdb_x86_to_gdb_regs (char *buffer, struct xen_regs *regs)
-{
- int idx = 0;
-
- mem2hex ((char *)&regs->eax, &buffer[idx], sizeof(regs->eax));
- idx += sizeof(regs->eax) * 2;
- mem2hex ((char *)&regs->ecx, &buffer[idx], sizeof(regs->ecx));
- idx += sizeof(regs->ecx) * 2;
- mem2hex ((char *)&regs->edx, &buffer[idx], sizeof(regs->edx));
- idx += sizeof(regs->edx) * 2;
- mem2hex ((char *)&regs->ebx, &buffer[idx], sizeof(regs->ebx));
- idx += sizeof(regs->ebx) * 2;
- mem2hex ((char *)&regs->esp, &buffer[idx], sizeof(regs->esp));
- idx += sizeof(regs->esp) * 2;
- mem2hex ((char *)&regs->ebp, &buffer[idx], sizeof(regs->ebp));
- idx += sizeof(regs->ebp) * 2;
- mem2hex ((char *)&regs->esi, &buffer[idx], sizeof(regs->esi));
- idx += sizeof(regs->esi) * 2;
- mem2hex ((char *)&regs->edi, &buffer[idx], sizeof(regs->edi));
- idx += sizeof(regs->edi) * 2;
- mem2hex ((char *)&regs->eip, &buffer[idx], sizeof(regs->eip));
- idx += sizeof(regs->eip) * 2;
- mem2hex ((char *)&regs->eflags, &buffer[idx], sizeof(regs->eflags));
- idx += sizeof(regs->eflags) * 2;
- mem2hex ((char *)&regs->cs, &buffer[idx], sizeof(regs->cs));
- idx += sizeof(regs->cs) * 2;
- mem2hex ((char *)&regs->ss, &buffer[idx], sizeof(regs->ss));
- idx += sizeof(regs->ss) * 2;
- mem2hex ((char *)&regs->ds, &buffer[idx], sizeof(regs->ds));
- idx += sizeof(regs->ds) * 2;
- mem2hex ((char *)&regs->es, &buffer[idx], sizeof(regs->es));
- idx += sizeof(regs->es) * 2;
- mem2hex ((char *)&regs->fs, &buffer[idx], sizeof(regs->fs));
- idx += sizeof(regs->fs) * 2;
- mem2hex ((char *)&regs->gs, &buffer[idx], sizeof(regs->gs));
-}
-
-/* at this point we allow any register to be changed, caveat emptor */
-void
-pdb_gdb_to_x86_regs (struct xen_regs *regs, char *buffer)
-{
- hex2mem(buffer, (char *)&regs->eax, sizeof(regs->eax));
- buffer += sizeof(regs->eax) * 2;
- hex2mem(buffer, (char *)&regs->ecx, sizeof(regs->ecx));
- buffer += sizeof(regs->ecx) * 2;
- hex2mem(buffer, (char *)&regs->edx, sizeof(regs->edx));
- buffer += sizeof(regs->edx) * 2;
- hex2mem(buffer, (char *)&regs->ebx, sizeof(regs->ebx));
- buffer += sizeof(regs->ebx) * 2;
- hex2mem(buffer, (char *)&regs->esp, sizeof(regs->esp));
- buffer += sizeof(regs->esp) * 2;
- hex2mem(buffer, (char *)&regs->ebp, sizeof(regs->ebp));
- buffer += sizeof(regs->ebp) * 2;
- hex2mem(buffer, (char *)&regs->esi, sizeof(regs->esi));
- buffer += sizeof(regs->esi) * 2;
- hex2mem(buffer, (char *)&regs->edi, sizeof(regs->edi));
- buffer += sizeof(regs->edi) * 2;
- hex2mem(buffer, (char *)&regs->eip, sizeof(regs->eip));
- buffer += sizeof(regs->eip) * 2;
- hex2mem(buffer, (char *)&regs->eflags, sizeof(regs->eflags));
- buffer += sizeof(regs->eflags) * 2;
- hex2mem(buffer, (char *)&regs->cs, sizeof(regs->cs));
- buffer += sizeof(regs->cs) * 2;
- hex2mem(buffer, (char *)&regs->ss, sizeof(regs->ss));
- buffer += sizeof(regs->ss) * 2;
- hex2mem(buffer, (char *)&regs->ds, sizeof(regs->ds));
- buffer += sizeof(regs->ds) * 2;
- hex2mem(buffer, (char *)&regs->es, sizeof(regs->es));
- buffer += sizeof(regs->es) * 2;
- hex2mem(buffer, (char *)&regs->fs, sizeof(regs->fs));
- buffer += sizeof(regs->fs) * 2;
- hex2mem(buffer, (char *)&regs->gs, sizeof(regs->gs));
-}
-
-int
-pdb_process_command (char *ptr, struct xen_regs *regs, unsigned long cr3,
- int sigval)
-{
- int length;
- unsigned long addr;
- int ack = 1; /* wait for ack in pdb_put_packet */
- int go = 0;
-
- TRC(printf("pdb: [%s]\n", ptr));
-
- pdb_out_buffer[0] = 0;
-
- if (pdb_ctx.valid == 1)
- {
- if (pdb_ctx.domain == -1) /* pdb context: xen */
- {
- struct domain *p;
-
- p = &idle0_task;
- if (p->mm.shadow_mode)
- pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
- else
- pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
- }
- else if (pdb_ctx.process == -1) /* pdb context: guest os */
- {
- struct domain *p;
-
- if (pdb_ctx.domain == -2)
- {
- p = find_last_domain();
- }
- else
- {
- p = find_domain_by_id(pdb_ctx.domain);
- }
- if (p == NULL)
- {
- printk ("pdb error: unknown domain [0x%x]\n", pdb_ctx.domain);
- strcpy (pdb_out_buffer, "E01");
- pdb_ctx.domain = -1;
- goto exit;
- }
- if (p->mm.shadow_mode)
- pdb_ctx.ptbr = pagetable_val(p->mm.shadow_table);
- else
- pdb_ctx.ptbr = pagetable_val(p->mm.pagetable);
- put_domain(p);
- }
- else /* pdb context: process */
- {
- struct domain *p;
- unsigned long domain_ptbr;
-
- p = find_domain_by_id(pdb_ctx.domain);
- if (p == NULL)
- {
- printk ("pdb error: unknown domain [0x%x][0x%x]\n",
- pdb_ctx.domain, pdb_ctx.process);
- strcpy (pdb_out_buffer, "E01");
- pdb_ctx.domain = -1;
- goto exit;
- }
- if (p->mm.shadow_mode)
- domain_ptbr = pagetable_val(p->mm.shadow_table);
- else
- domain_ptbr = pagetable_val(p->mm.pagetable);
- put_domain(p);
-
- pdb_ctx.ptbr = domain_ptbr;
- /*pdb_ctx.ptbr=pdb_linux_pid_ptbr(domain_ptbr, pdb_ctx.process);*/
- }
-
- pdb_ctx.valid = 0;
- TRC(printk ("pdb change context (dom:%d, proc:%d) now 0x%lx\n",
- pdb_ctx.domain, pdb_ctx.process, pdb_ctx.ptbr));
- }
-
- switch (*ptr++)
- {
- case '?':
- pdb_out_buffer[0] = 'S';
- pdb_out_buffer[1] = hexchars[sigval >> 4];
- pdb_out_buffer[2] = hexchars[sigval % 16];
- pdb_out_buffer[3] = 0;
- break;
- case 'S': /* step with signal */
- case 's': /* step */
- {
- if ( pdb_system_call_eflags_addr != 0 )
- {
- unsigned long eflags;
- char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
-
- pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- eflags |= X86_EFLAGS_TF;
- mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
- pdb_linux_set_values(eflags_buf, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- }
-
- regs->eflags |= X86_EFLAGS_TF;
- pdb_stepping = 1;
- return 1;
- /* not reached */
- }
- case 'C': /* continue with signal */
- case 'c': /* continue */
- {
- if ( pdb_system_call_eflags_addr != 0 )
- {
- unsigned long eflags;
- char eflags_buf[sizeof(eflags)*2]; /* STUPID STUPID STUPID */
-
- pdb_linux_get_values((u_char*)&eflags, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- eflags &= ~X86_EFLAGS_TF;
- mem2hex ((u_char *)&eflags, eflags_buf, sizeof(eflags));
- pdb_linux_set_values(eflags_buf, sizeof(eflags),
- pdb_system_call_eflags_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- }
-
- regs->eflags &= ~X86_EFLAGS_TF;
- return 1; /* jump out before replying to gdb */
- /* not reached */
- }
- case 'd':
- remote_debug = !(remote_debug); /* toggle debug flag */
- break;
- case 'D': /* detach */
- return go;
- /* not reached */
- case 'g': /* return the value of the CPU registers */
- {
- pdb_x86_to_gdb_regs (pdb_out_buffer, regs);
- break;
- }
- case 'G': /* set the value of the CPU registers - return OK */
- {
- pdb_gdb_to_x86_regs (regs, ptr);
- break;
- }
- case 'H':
- {
- int thread;
- char *next = &ptr[1];
-
- if (hexToInt (&next, &thread))
- {
- if (*ptr == 'c')
- {
- pdb_continue_thread = thread;
- }
- else if (*ptr == 'g')
- {
- pdb_general_thread = thread;
- }
- else
- {
- printk ("pdb error: unknown set thread command %c (%d)\n",
- *ptr, thread);
- strcpy (pdb_out_buffer, "E00");
- break;
- }
- }
- strcpy (pdb_out_buffer, "OK");
- break;
- }
- case 'k': /* kill request */
- {
- strcpy (pdb_out_buffer, "OK"); /* ack for fun */
- printk ("don't kill bill...\n");
- ack = 0;
- break;
- }
-
- case 'q':
- {
- pdb_process_query(ptr);
- break;
- }
-
- /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
- case 'm':
- {
- /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */
- if (hexToInt (&ptr, (int *)&addr))
- if (*(ptr++) == ',')
- if (hexToInt (&ptr, &length))
- {
- ptr = 0;
-
- pdb_page_fault_possible = 1;
- pdb_page_fault = 0;
- if (addr >= PAGE_OFFSET)
- {
- mem2hex ((char *) addr, pdb_out_buffer, length);
- }
- else if (pdb_ctx.process != -1)
- {
- pdb_linux_get_values(pdb_buffer, length, addr,
- pdb_ctx.process, pdb_ctx.ptbr);
- mem2hex (pdb_buffer, pdb_out_buffer, length);
- }
- else
- {
- pdb_get_values (pdb_buffer, length,
- pdb_ctx.ptbr, addr);
- mem2hex (pdb_buffer, pdb_out_buffer, length);
- }
-
- pdb_page_fault_possible = 0;
- if (pdb_page_fault)
- {
- strcpy (pdb_out_buffer, "E03");
- }
- }
-
- if (ptr)
- {
- strcpy (pdb_out_buffer, "E01");
- }
- break;
- }
-
- /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
- case 'M':
- {
- /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */
- if (hexToInt (&ptr, (int *)&addr))
- if (*(ptr++) == ',')
- if (hexToInt (&ptr, &length))
- if (*(ptr++) == ':')
- {
-
- pdb_page_fault_possible = 1;
- pdb_page_fault = 0;
- if (addr >= PAGE_OFFSET)
- {
- hex2mem (ptr, (char *)addr, length);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- else if (pdb_ctx.process != -1)
- {
- pdb_linux_set_values(ptr, length, addr,
- pdb_ctx.process,
- pdb_ctx.ptbr);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- else
- {
- pdb_set_values (ptr, length,
- pdb_ctx.ptbr, addr);
- pdb_bkpt_check(ptr, length, pdb_ctx.ptbr, addr);
- }
- pdb_page_fault_possible = 0;
- if (pdb_page_fault)
- {
- strcpy (pdb_out_buffer, "E03");
- }
- else
- {
- strcpy (pdb_out_buffer, "OK");
- }
-
- ptr = 0;
- }
- if (ptr)
- {
- strcpy (pdb_out_buffer, "E02");
- }
- break;
- }
- case 'T':
- {
- int id;
-
- if (hexToInt (&ptr, &id))
- {
- strcpy (pdb_out_buffer, "E00");
-
-#ifdef PDB_PAST
-
- switch (pdb_level) /* previous level */
- {
- case PDB_LVL_XEN:
- {
- struct domain *p;
- id -= PDB_ID_OFFSET;
- if ( (p = find_domain_by_id(id)) == NULL)
- strcpy (pdb_out_buffer, "E00");
- else
- strcpy (pdb_out_buffer, "OK");
- put_domain(p);
-
- pdb_level = PDB_LVL_GUESTOS;
- pdb_ctx[pdb_level].ctrl = id;
- pdb_ctx[pdb_level].info = id;
- break;
- }
- case PDB_LVL_GUESTOS:
- {
- if (pdb_level == -1)
- {
- pdb_level = PDB_LVL_XEN;
- }
- else
- {
- pdb_level = PDB_LVL_PROCESS;
- pdb_ctx[pdb_level].ctrl = id;
- pdb_ctx[pdb_level].info = id;
- }
- break;
- }
- case PDB_LVL_PROCESS:
- {
- if (pdb_level == -1)
- {
- pdb_level = PDB_LVL_GUESTOS;
- }
- break;
- }
- default:
- {
- printk ("pdb internal error: invalid level [%d]\n",
- pdb_level);
- }
- }
-
-#endif /* PDB_PAST */
- }
- break;
- }
- }
-
-exit:
- /* reply to the request */
- pdb_put_packet (pdb_out_buffer, ack);
-
- return go;
-}
-
-/*
- * process an input character from the serial line.
- *
- * return "1" if the character is a gdb debug string
- * (and hence shouldn't be further processed).
- */
-
-int pdb_debug_state = 0; /* small parser state machine */
-
-int hex(char ch)
-{
- if ((ch >= 'a') && (ch <= 'f')) return (ch-'a'+10);
- if ((ch >= '0') && (ch <= '9')) return (ch-'0');
- if ((ch >= 'A') && (ch <= 'F')) return (ch-'A'+10);
- return (-1);
-}
-
-/* convert the memory pointed to by mem into hex, placing result in buf */
-/* return a pointer to the last char put in buf (null) */
-char *
-mem2hex (mem, buf, count)
- char *mem;
- char *buf;
- int count;
-{
- int i;
- unsigned char ch;
-
- for (i = 0; i < count; i++)
- {
- ch = get_char (mem++);
- *buf++ = hexchars[ch >> 4];
- *buf++ = hexchars[ch % 16];
- }
- *buf = 0;
- return (buf);
-}
-
-/* convert the hex array pointed to by buf into binary to be placed in mem */
-/* return a pointer to the character AFTER the last byte written */
-char *
-hex2mem (buf, mem, count)
- char *buf;
- char *mem;
- int count;
-{
- int i;
- unsigned char ch;
-
- for (i = 0; i < count; i++)
- {
- ch = hex (*buf++) << 4;
- ch = ch + hex (*buf++);
- set_char (mem++, ch);
- }
- return (mem);
-}
-
-int
-hexToInt (char **ptr, int *intValue)
-{
- int numChars = 0;
- int hexValue;
- int negative = 0;
-
- *intValue = 0;
-
- if (**ptr == '-')
- {
- negative = 1;
- numChars++;
- (*ptr)++;
- }
-
- while (**ptr)
- {
- hexValue = hex (**ptr);
- if (hexValue >= 0)
- {
- *intValue = (*intValue << 4) | hexValue;
- numChars++;
- }
- else
- break;
-
- (*ptr)++;
- }
-
- if ( negative )
- *intValue *= -1;
-
- return (numChars);
-}
-
-/***********************************************************************/
-/***********************************************************************/
-
-
-/*
- * Add a breakpoint to the list of known breakpoints.
- * For now there should only be two or three breakpoints so
- * we use a simple linked list. In the future, maybe a red-black tree?
- */
-struct pdb_breakpoint breakpoints;
-
-void pdb_bkpt_add (unsigned long cr3, unsigned long address)
-{
- struct pdb_breakpoint *bkpt = xmalloc(sizeof(*bkpt));
- bkpt->cr3 = cr3;
- bkpt->address = address;
- list_add(&bkpt->list, &breakpoints.list);
-}
-
-/*
- * Check to see of the breakpoint is in the list of known breakpoints
- * Return 1 if it has been set, NULL otherwise.
- */
-struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
- unsigned long address)
-{
- struct pdb_breakpoint *bkpt;
-
- list_for_each_entry ( bkpt, &breakpoints.list, list )
- {
- if ( bkpt->cr3 == cr3 && bkpt->address == address )
- return bkpt;
- }
-
- return NULL;
-}
-
-/*
- * Remove a breakpoint to the list of known breakpoints.
- * Return 1 if the element was not found, otherwise 0.
- */
-int pdb_bkpt_remove (unsigned long cr3, unsigned long address)
-{
- struct pdb_breakpoint *bkpt;
-
- list_for_each_entry ( bkpt, &breakpoints.list, list )
- {
- if ( bkpt->cr3 == cr3 && bkpt->address == address )
- {
- list_del(&bkpt->list);
- xfree(bkpt);
- return 0;
- }
- }
-
- return 1;
-}
-
-/*
- * Check to see if a memory write is really gdb setting a breakpoint
- */
-void pdb_bkpt_check (u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- if (length == 1 && buffer[0] == 'c' && buffer[1] == 'c')
- {
- /* inserting a new breakpoint */
- pdb_bkpt_add(cr3, addr);
- TRC(printk("pdb breakpoint detected at 0x%lx:0x%lx\n", cr3, addr));
- }
- else if ( pdb_bkpt_remove(cr3, addr) == 0 )
- {
- /* removing a breakpoint */
- TRC(printk("pdb breakpoint cleared at 0x%lx:0x%lx\n", cr3, addr));
- }
-}
-
-/***********************************************************************/
-
-int pdb_change_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw);
-int pdb_change_values_one_page(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw);
-
-#define __PDB_GET_VAL 1
-#define __PDB_SET_VAL 2
-
-/*
- * Set memory in a domain's address space
- * Set "length" bytes at "address" from "domain" to the values in "buffer".
- * Return the number of bytes set, 0 if there was a problem.
- */
-
-int pdb_set_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- int count = pdb_change_values(buffer, length, cr3, addr, __PDB_SET_VAL);
- return count;
-}
-
-/*
- * Read memory from a domain's address space.
- * Fetch "length" bytes at "address" from "domain" into "buffer".
- * Return the number of bytes read, 0 if there was a problem.
- */
-
-int pdb_get_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr)
-{
- return pdb_change_values(buffer, length, cr3, addr, __PDB_GET_VAL);
-}
-
-/*
- * Read or write memory in an address space
- */
-int pdb_change_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw)
-{
- int remaining; /* number of bytes to touch past this page */
- int bytes = 0;
-
- while ( (remaining = (addr + length - 1) - (addr | (PAGE_SIZE - 1))) > 0)
- {
- bytes += pdb_change_values_one_page(buffer, length - remaining,
- cr3, addr, rw);
- buffer = buffer + (2 * (length - remaining));
- length = remaining;
- addr = (addr | (PAGE_SIZE - 1)) + 1;
- }
-
- bytes += pdb_change_values_one_page(buffer, length, cr3, addr, rw);
- return bytes;
-}
-
-/*
- * Change memory in a process' address space in one page
- * Read or write "length" bytes at "address" into/from "buffer"
- * from the virtual address space referenced by "cr3".
- * Return the number of bytes read, 0 if there was a problem.
- */
-
-int pdb_change_values_one_page(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr, int rw)
-{
- l2_pgentry_t* l2_table = NULL; /* page directory */
- l1_pgentry_t* l1_table = NULL; /* page table */
- u_char *page; /* 4k page */
- int bytes = 0;
-
- l2_table = map_domain_mem(cr3);
- l2_table += l2_table_offset(addr);
- if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT))
- {
- if (pdb_page_fault_possible == 1)
- {
- pdb_page_fault = 1;
- TRC(printk("pdb: L2 error (0x%lx)\n", addr));
- }
- else
- {
- printk ("pdb error: cr3: 0x%lx dom0cr3: 0x%lx\n", cr3,
- dom0->mm.shadow_mode ? pagetable_val(dom0->mm.shadow_table)
- : pagetable_val(dom0->mm.pagetable));
- printk ("pdb error: L2:0x%p (0x%lx)\n",
- l2_table, l2_pgentry_val(*l2_table));
- }
- goto exit2;
- }
-
- if (l2_pgentry_val(*l2_table) & _PAGE_PSE)
- {
-#define PSE_PAGE_SHIFT L2_PAGETABLE_SHIFT
-#define PSE_PAGE_SIZE (1UL << PSE_PAGE_SHIFT)
-#define PSE_PAGE_MASK (~(PSE_PAGE_SIZE-1))
-
-#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << L1_PAGETABLE_SHIFT )
-
-#define pse_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PSE_PAGE_MASK)
-
- page = map_domain_mem(pse_pgentry_to_phys(*l2_table) + /* 10 bits */
- (addr & L1_PAGE_BITS)); /* 10 bits */
- page += addr & (PAGE_SIZE - 1); /* 12 bits */
- }
- else
- {
- l1_table = map_domain_mem(l2_pgentry_to_phys(*l2_table));
- l1_table += l1_table_offset(addr);
- if (!(l1_pgentry_val(*l1_table) & _PAGE_PRESENT))
- {
- if (pdb_page_fault_possible == 1)
- {
- pdb_page_fault = 1;
- TRC(printk ("pdb: L1 error (0x%lx)\n", addr));
- }
- else
- {
- printk ("L2:0x%p (0x%lx) L1:0x%p (0x%lx)\n",
- l2_table, l2_pgentry_val(*l2_table),
- l1_table, l1_pgentry_val(*l1_table));
- }
- goto exit1;
- }
-
- page = map_domain_mem(l1_pgentry_to_phys(*l1_table));
- page += addr & (PAGE_SIZE - 1);
- }
-
- switch (rw)
- {
- case __PDB_GET_VAL: /* read */
- memcpy (buffer, page, length);
- bytes = length;
- break;
- case __PDB_SET_VAL: /* write */
- hex2mem (buffer, page, length);
- bytes = length;
- break;
- default: /* unknown */
- printk ("error: unknown RW flag: %d\n", rw);
- return 0;
- }
-
- unmap_domain_mem((void *)page);
-exit1:
- if (l1_table != NULL)
- unmap_domain_mem((void *)l1_table);
-exit2:
- unmap_domain_mem((void *)l2_table);
-
- return bytes;
-}
-
-/***********************************************************************/
-
-void breakpoint(void);
-
-/* send the packet in buffer. */
-void pdb_put_packet (unsigned char *buffer, int ack)
-{
- unsigned char checksum;
- int count;
- char ch;
-
- /* $<packet info>#<checksum> */
- /* do */
- {
- pdb_put_char ('$');
- checksum = 0;
- count = 0;
-
- while ((ch = buffer[count]))
- {
- pdb_put_char (ch);
- checksum += ch;
- count += 1;
- }
-
- pdb_put_char('#');
- pdb_put_char(hexchars[checksum >> 4]);
- pdb_put_char(hexchars[checksum % 16]);
- }
-
- if (ack)
- {
- if ((ch = pdb_get_char()) != '+')
- {
- printk(" pdb return error: %c 0x%x [%s]\n", ch, ch, buffer);
- }
- }
-}
-
-void pdb_get_packet(char *buffer)
-{
- int count;
- char ch;
- unsigned char checksum = 0;
- unsigned char xmitcsum = 0;
-
- do
- {
- while ((ch = pdb_get_char()) != '$');
-
- count = 0;
- checksum = 0;
-
- while (count < BUFMAX)
- {
- ch = pdb_get_char();
- if (ch == '#') break;
- checksum += ch;
- buffer[count] = ch;
- count++;
- }
- buffer[count] = 0;
-
- if (ch == '#')
- {
- xmitcsum = hex(pdb_get_char()) << 4;
- xmitcsum += hex(pdb_get_char());
-
- if (xmitcsum == checksum)
- {
- pdb_put_char('+');
- if (buffer[2] == ':')
- {
- printk ("pdb: obsolete gdb packet (sequence ID)\n");
- }
- }
- else
- {
- pdb_put_char('-');
- }
- }
- } while (checksum != xmitcsum);
-
- return;
-}
-
-/*
- * process a machine interrupt or exception
- * Return 1 if pdb is not interested in the exception; it should
- * be propagated to the guest os.
- */
-
-int pdb_handle_exception(int exceptionVector,
- struct xen_regs *xen_regs)
-{
- int signal = 0;
- struct pdb_breakpoint* bkpt;
- int watchdog_save;
- unsigned long cr3 = read_cr3();
-
- /* No vm86 handling here as yet. */
- if ( VM86_MODE(xen_regs) )
- return 1;
-
- /* If the exception is an int3 from user space then pdb is only
- interested if it re-wrote an instruction set the breakpoint.
- This occurs when leaving a system call from a domain.
- */
- if ( (exceptionVector == 3) &&
- RING_3(xen_regs) &&
- (xen_regs->eip != (pdb_system_call_next_addr + 1)) )
- {
- TRC(printf("pdb: user bkpt (0x%x) at 0x%x:0x%lx:0x%x\n",
- exceptionVector, xen_regs->cs & 3, cr3, xen_regs->eip));
- return 1;
- }
-
- /*
- * If PDB didn't set the breakpoint, is not single stepping,
- * is not entering a system call in a domain,
- * the user didn't press the magic debug key,
- * then we don't handle the exception.
- */
- bkpt = pdb_bkpt_search(cr3, xen_regs->eip - 1);
- if ( (bkpt == NULL) &&
- !pdb_stepping &&
- !pdb_system_call &&
- xen_regs->eip != pdb_system_call_next_addr + 1 &&
- (exceptionVector != KEYPRESS_EXCEPTION) &&
- xen_regs->eip < 0xc0000000) /* Linux-specific for now! */
- {
- TRC(printf("pdb: user bkpt (0x%x) at 0x%lx:0x%x\n",
- exceptionVector, cr3, xen_regs->eip));
- return 1;
- }
-
- printk("pdb_handle_exception [0x%x][0x%lx:0x%x]\n",
- exceptionVector, cr3, xen_regs->eip);
-
- if ( pdb_stepping )
- {
- /* Stepped one instruction; now return to normal execution. */
- xen_regs->eflags &= ~X86_EFLAGS_TF;
- pdb_stepping = 0;
- }
-
- if ( pdb_system_call )
- {
- pdb_system_call = 0;
-
- pdb_linux_syscall_exit_bkpt (xen_regs, &pdb_ctx);
-
- /* we don't have a saved breakpoint so we need to rewind eip */
- xen_regs->eip--;
-
- /* if ther user doesn't care about breaking when entering a
- system call then we'll just ignore the exception */
- if ( (pdb_ctx.system_call & 0x01) == 0 )
- {
- return 0;
- }
- }
-
- if ( exceptionVector == BREAKPT_EXCEPTION && bkpt != NULL)
- {
- /* Executed Int3: replace breakpoint byte with real program byte. */
- xen_regs->eip--;
- }
-
- /* returning to user space after a system call */
- if ( xen_regs->eip == pdb_system_call_next_addr + 1)
- {
- u_char instr[2]; /* REALLY REALLY REALLY STUPID */
-
- mem2hex (&pdb_system_call_leave_instr, instr, sizeof(instr));
-
- pdb_linux_set_values (instr, 1, pdb_system_call_next_addr,
- pdb_ctx.process, pdb_ctx.ptbr);
-
- pdb_system_call_next_addr = 0;
- pdb_system_call_leave_instr = 0;
-
- /* manually rewind eip */
- xen_regs->eip--;
-
- /* if the user doesn't care about breaking when returning
- to user space after a system call then we'll just ignore
- the exception */
- if ( (pdb_ctx.system_call & 0x02) == 0 )
- {
- return 0;
- }
- }
-
- /* Generate a signal for GDB. */
- switch ( exceptionVector )
- {
- case KEYPRESS_EXCEPTION:
- signal = 2; break; /* SIGINT */
- case DEBUG_EXCEPTION:
- signal = 5; break; /* SIGTRAP */
- case BREAKPT_EXCEPTION:
- signal = 5; break; /* SIGTRAP */
- default:
- printk("pdb: can't generate signal for unknown exception vector %d\n",
- exceptionVector);
- break;
- }
-
- pdb_out_buffer[0] = 'S';
- pdb_out_buffer[1] = hexchars[signal >> 4];
- pdb_out_buffer[2] = hexchars[signal % 16];
- pdb_out_buffer[3] = 0;
- pdb_put_packet(pdb_out_buffer, 1);
-
- watchdog_save = watchdog_on;
- watchdog_on = 0;
-
- do {
- pdb_out_buffer[0] = 0;
- pdb_get_packet(pdb_in_buffer);
- }
- while ( pdb_process_command(pdb_in_buffer, xen_regs, cr3, signal) == 0 );
-
- watchdog_on = watchdog_save;
-
- return 0;
-}
-
-void pdb_key_pressed(unsigned char key)
-{
- struct xen_regs *regs = (struct xen_regs *)get_execution_context();
- pdb_handle_exception(KEYPRESS_EXCEPTION, regs);
-}
-
-void pdb_handle_debug_trap(struct xen_regs *regs, long error_code)
-{
- unsigned int condition;
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
-
- __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
- if ( (condition & (1 << 14)) != (1 << 14) )
- printk("\nwarning: debug trap w/o BS bit [0x%x]\n\n", condition);
- __asm__("movl %0,%%db6" : : "r" (0));
-
- if ( pdb_handle_exception(1, regs) != 0 )
- {
- d->thread.debugreg[6] = condition;
-
- tb->flags = TBF_EXCEPTION;
- tb->cs = d->thread.traps[1].cs;
- tb->eip = d->thread.traps[1].address;
- }
-}
-
-void initialize_pdb()
-{
- /* Certain state must be initialised even when PDB will not be used. */
- memset((void *) &breakpoints, 0, sizeof(breakpoints));
- INIT_LIST_HEAD(&breakpoints.list);
- pdb_stepping = 0;
-
- if ( strcmp(opt_pdb, "none") == 0 )
- return;
-
- if ( (pdb_serhnd = parse_serial_handle(opt_pdb)) == -1 )
- {
- printk("error: failed to initialize PDB on port %s\n", opt_pdb);
- return;
- }
-
- pdb_ctx.valid = 1;
- pdb_ctx.domain = -1;
- pdb_ctx.process = -1;
- pdb_ctx.system_call = 0;
- pdb_ctx.ptbr = 0;
-
- printk("pdb: pervasive debugger (%s) www.cl.cam.ac.uk/netos/pdb\n",
- opt_pdb);
-
- /* Acknowledge any spurious GDB packets. */
- pdb_put_char('+');
-
- register_keyhandler('D', pdb_key_pressed, "enter pervasive debugger");
-
- pdb_initialized = 1;
-}
-
-void breakpoint(void)
-{
- if ( pdb_initialized )
- asm("int $3");
-}
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
new file mode 100644
index 0000000000..1cbed74240
--- /dev/null
+++ b/xen/arch/x86/physdev.c
@@ -0,0 +1,143 @@
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/current.h>
+#include <asm/smpboot.h>
+#include <public/xen.h>
+#include <public/physdev.h>
+
+extern int ioapic_guest_read(int apicid, int address, u32 *pval);
+extern int ioapic_guest_write(int apicid, int address, u32 pval);
+
+void physdev_modify_ioport_access_range(
+ struct domain *d, int enable, int port, int num)
+{
+ int i;
+ for ( i = port; i < (port + num); i++ )
+ (enable ? clear_bit : set_bit)(i, d->arch.iobmp_mask);
+}
+
+void physdev_destroy_state(struct domain *d)
+{
+ xfree(d->arch.iobmp_mask);
+ d->arch.iobmp_mask = NULL;
+}
+
+/* Check if a domain controls a device with IO memory within frame @pfn.
+ * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise. */
+int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
+{
+ return 0;
+}
+
+/*
+ * Demuxing hypercall.
+ */
+long do_physdev_op(physdev_op_t *uop)
+{
+ physdev_op_t op;
+ long ret;
+ int irq;
+
+ if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+ return -EFAULT;
+
+ switch ( op.cmd )
+ {
+ case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
+ ret = pirq_guest_unmask(current->domain);
+ break;
+
+ case PHYSDEVOP_IRQ_STATUS_QUERY:
+ irq = op.u.irq_status_query.irq;
+ ret = -EINVAL;
+ if ( (irq < 0) || (irq >= NR_IRQS) )
+ break;
+ op.u.irq_status_query.flags = 0;
+ /* Edge-triggered interrupts don't need an explicit unmask downcall. */
+ if ( strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") == NULL )
+ op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
+ ret = 0;
+ break;
+
+ case PHYSDEVOP_APIC_READ:
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+ ret = ioapic_guest_read(
+ op.u.apic_op.apic, op.u.apic_op.offset, &op.u.apic_op.value);
+ break;
+
+ case PHYSDEVOP_APIC_WRITE:
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+ ret = ioapic_guest_write(
+ op.u.apic_op.apic, op.u.apic_op.offset, op.u.apic_op.value);
+ break;
+
+ case PHYSDEVOP_ASSIGN_VECTOR:
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ if ( (irq = op.u.irq_op.irq) >= NR_IRQS )
+ return -EINVAL;
+
+ op.u.irq_op.vector = assign_irq_vector(irq);
+ ret = 0;
+ break;
+
+ case PHYSDEVOP_SET_IOPL:
+ ret = -EINVAL;
+ if ( op.u.set_iopl.iopl > 3 )
+ break;
+ ret = 0;
+ current->arch.iopl = op.u.set_iopl.iopl;
+ break;
+
+ case PHYSDEVOP_SET_IOBITMAP:
+ ret = -EINVAL;
+ if ( !access_ok(op.u.set_iobitmap.bitmap, IOBMP_BYTES) ||
+ (op.u.set_iobitmap.nr_ports > 65536) )
+ break;
+ ret = 0;
+ current->arch.iobmp = (u8 *)op.u.set_iobitmap.bitmap;
+ current->arch.iobmp_limit = op.u.set_iobitmap.nr_ports;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if ( copy_to_user(uop, &op, sizeof(op)) )
+ ret = -EFAULT;
+
+ return ret;
+}
+
+/* Domain 0 has read access to all devices. */
+void physdev_init_dom0(struct domain *d)
+{
+ /* Access to all I/O ports. */
+ d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES);
+ BUG_ON(d->arch.iobmp_mask == NULL);
+ memset(d->arch.iobmp_mask, 0, IOBMP_BYTES);
+
+ set_bit(_DOMF_physdev_access, &d->domain_flags);
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 3b1bfc2092..0903967796 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -3,344 +3,133 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
-#include <xen/pci.h>
+#include <xen/domain.h>
#include <xen/serial.h>
#include <xen/softirq.h>
#include <xen/acpi.h>
#include <xen/console.h>
+#include <xen/serial.h>
#include <xen/trace.h>
#include <xen/multiboot.h>
+#include <xen/domain_page.h>
#include <asm/bitops.h>
#include <asm/smp.h>
#include <asm/processor.h>
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <asm/desc.h>
-#include <asm/domain_page.h>
-#include <asm/pdb.h>
#include <asm/shadow.h>
#include <asm/e820.h>
-/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
-static unsigned int opt_dom0_mem = 64000;
-integer_param("dom0_mem", opt_dom0_mem);
+extern void dmi_scan_machine(void);
+extern void generic_apic_probe(void);
/*
* opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
* pfn_info table and allocation bitmap.
*/
static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
-#if defined(__x86_64__)
+#if defined(CONFIG_X86_64)
integer_param("xenheap_megabytes", opt_xenheap_megabytes);
#endif
-/* opt_noht: If true, Hyperthreading is ignored. */
-int opt_noht = 0;
-boolean_param("noht", opt_noht);
-
-/* opt_noacpi: If true, ACPI tables are not parsed. */
-static int opt_noacpi = 0;
-boolean_param("noacpi", opt_noacpi);
-
/* opt_nosmp: If true, secondary processors are ignored. */
static int opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);
-/* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */
-/* NB. This flag implies 'nosmp' and 'noacpi'. */
-static int opt_ignorebiostables = 0;
-boolean_param("ignorebiostables", opt_ignorebiostables);
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
/* opt_watchdog: If true, run a watchdog NMI on each processor. */
static int opt_watchdog = 0;
boolean_param("watchdog", opt_watchdog);
-unsigned long xenheap_phys_end;
+/* **** Linux config option: propagated to domain0. */
+/* "acpi=off": Sisables both ACPI table parsing and interpreter. */
+/* "acpi=force": Override the disable blacklist. */
+/* "acpi=strict": Disables out-of-spec workarounds. */
+/* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
+/* "acpi=noirq": Disables ACPI interrupt routing. */
+static void parse_acpi_param(char *s);
+custom_param("acpi", parse_acpi_param);
+
+/* **** Linux config option: propagated to domain0. */
+/* acpi_skip_timer_override: Skip IRQ0 overrides. */
+extern int acpi_skip_timer_override;
+boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
+
+/* **** Linux config option: propagated to domain0. */
+/* noapic: Disable IOAPIC setup. */
+extern int skip_ioapic_setup;
+boolean_param("noapic", skip_ioapic_setup);
+
+int early_boot = 1;
+
+int ht_per_core = 1;
+cpumask_t cpu_present_map;
+
+/* Limits of Xen heap, used to initialise the allocator. */
+unsigned long xenheap_phys_start, xenheap_phys_end;
extern void arch_init_memory(void);
extern void init_IRQ(void);
extern void trap_init(void);
extern void time_init(void);
extern void ac_timer_init(void);
-extern void initialize_keytable();
-extern int do_timer_lists_from_pit;
+extern void initialize_keytable(void);
+extern void early_cpu_init(void);
+
+extern unsigned long cpu0_stack[];
-char ignore_irq13; /* set if exception 16 works */
-struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-#if defined(__x86_64__)
+#if CONFIG_PAGING_LEVELS > 2
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
#endif
EXPORT_SYMBOL(mmu_cr4_features);
-unsigned long wait_init_idle;
+struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
-struct domain *idle_task[NR_CPUS] = { &idle0_task };
+int acpi_disabled;
-#ifdef CONFIG_ACPI_INTERPRETER
-int acpi_disabled = 0;
-#else
-int acpi_disabled = 1;
-#endif
-EXPORT_SYMBOL(acpi_disabled);
-
-int phys_proc_id[NR_CPUS];
-int logical_proc_id[NR_CPUS];
-
-#if defined(__i386__)
-
-/* Standard macro to see if a specific flag is changeable */
-static inline int flag_is_changeable_p(u32 flag)
+int acpi_force;
+char acpi_param[10] = "";
+static void parse_acpi_param(char *s)
{
- u32 f1, f2;
-
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
-
- return ((f1^f2) & flag) != 0;
-}
+ /* Save the parameter so it can be propagated to domain0. */
+ strncpy(acpi_param, s, sizeof(acpi_param));
+ acpi_param[sizeof(acpi_param)-1] = '\0';
-/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
-{
- return flag_is_changeable_p(X86_EFLAGS_ID);
-}
-
-#elif defined(__x86_64__)
-
-#define have_cpuid_p() (1)
-
-#endif
-
-void __init get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "CyrixInstead"))
- c->x86_vendor = X86_VENDOR_CYRIX;
- else if (!strcmp(v, "UMC UMC UMC "))
- c->x86_vendor = X86_VENDOR_UMC;
- else if (!strcmp(v, "CentaurHauls"))
- c->x86_vendor = X86_VENDOR_CENTAUR;
- else if (!strcmp(v, "NexGenDriven"))
- c->x86_vendor = X86_VENDOR_NEXGEN;
- else if (!strcmp(v, "RiseRiseRise"))
- c->x86_vendor = X86_VENDOR_RISE;
- else if (!strcmp(v, "GenuineTMx86") ||
- !strcmp(v, "TransmetaCPU"))
- c->x86_vendor = X86_VENDOR_TRANSMETA;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-static void __init init_intel(struct cpuinfo_x86 *c)
-{
- /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
- if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
- clear_bit(X86_FEATURE_SEP, &c->x86_capability);
-
-#ifdef CONFIG_SMP
- if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
+ /* Interpret the parameter for use within Xen. */
+ if ( !strcmp(s, "off") )
{
- u32 eax, ebx, ecx, edx;
- int initial_apic_id, siblings, cpu = smp_processor_id();
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
- ht_per_core = siblings = (ebx & 0xff0000) >> 16;
-
- if ( opt_noht )
- clear_bit(X86_FEATURE_HT, &c->x86_capability[0]);
-
- if ( siblings <= 1 )
- {
- printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu);
- }
- else if ( siblings > 2 )
- {
- panic("We don't support more than two logical CPUs per package!");
- }
- else
- {
- initial_apic_id = ebx >> 24 & 0xff;
- phys_proc_id[cpu] = initial_apic_id >> 1;
- logical_proc_id[cpu] = initial_apic_id & 1;
- printk(KERN_INFO "CPU#%d: Physical ID: %d, Logical ID: %d\n",
- cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
- }
+ disable_acpi();
}
-#endif
-}
-
-static void __init init_amd(struct cpuinfo_x86 *c)
-{
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
- clear_bit(0*32+31, &c->x86_capability);
-
- switch(c->x86)
+ else if ( !strcmp(s, "force") )
{
- case 5:
- panic("AMD K6 is not supported.\n");
- case 6: /* An Athlon/Duron. We can trust the BIOS probably */
- break;
+ acpi_force = 1;
+ acpi_ht = 1;
+ acpi_disabled = 0;
}
-}
-
-/*
- * This does the hard work of actually picking apart the CPU stuff...
- */
-void __init identify_cpu(struct cpuinfo_x86 *c)
-{
- int junk, i, cpu = smp_processor_id();
- u32 xlvl, tfms;
-
- phys_proc_id[cpu] = cpu;
- logical_proc_id[cpu] = 0;
-
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
-
- if ( !have_cpuid_p() )
- panic("Ancient processors not supported\n");
-
- /* Get vendor name */
- cpuid(0x00000000, &c->cpuid_level,
- (int *)&c->x86_vendor_id[0],
- (int *)&c->x86_vendor_id[8],
- (int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c);
-
- if ( c->cpuid_level == 0 )
- panic("Decrepit CPUID not supported\n");
-
- cpuid(0x00000001, &tfms, &junk, &junk,
- &c->x86_capability[0]);
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- c->x86_mask = tfms & 15;
-
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ( (xlvl & 0xffff0000) == 0x80000000 ) {
- if ( xlvl >= 0x80000001 )
- c->x86_capability[1] = cpuid_edx(0x80000001);
- }
-
- /* Transmeta-defined flags: level 0x80860001 */
- xlvl = cpuid_eax(0x80860000);
- if ( (xlvl & 0xffff0000) == 0x80860000 ) {
- if ( xlvl >= 0x80860001 )
- c->x86_capability[2] = cpuid_edx(0x80860001);
+ else if ( !strcmp(s, "strict") )
+ {
+ acpi_strict = 1;
}
-
- printk("CPU%d: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
- smp_processor_id(),
- c->x86_capability[0],
- c->x86_capability[1],
- c->x86_capability[2],
- c->x86_vendor);
-
- switch ( c->x86_vendor ) {
- case X86_VENDOR_INTEL:
- init_intel(c);
- break;
- case X86_VENDOR_AMD:
- init_amd(c);
- break;
- case X86_VENDOR_UNKNOWN: /* Connectix Virtual PC reports this */
- break;
- case X86_VENDOR_CENTAUR:
- break;
- default:
- printk("Unknown CPU identifier (%d): continuing anyway, "
- "but might fail.\n", c->x86_vendor);
+ else if ( !strcmp(s, "ht") )
+ {
+ if ( !acpi_force )
+ disable_acpi();
+ acpi_ht = 1;
}
-
- printk("CPU caps: %08x %08x %08x %08x\n",
- c->x86_capability[0],
- c->x86_capability[1],
- c->x86_capability[2],
- c->x86_capability[3]);
-
- /*
- * On SMP, boot_cpu_data holds the common feature set between
- * all CPUs; so make sure that we indicate which features are
- * common between the CPUs. The first time this routine gets
- * executed, c == &boot_cpu_data.
- */
- if ( c != &boot_cpu_data ) {
- /* AND the already accumulated flags with these */
- for ( i = 0 ; i < NCAPINTS ; i++ )
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+ else if ( !strcmp(s, "noirq") )
+ {
+ acpi_noirq_set();
}
}
-
-unsigned long cpu_initialized;
-void __init cpu_init(void)
-{
-#if defined(__i386__) /* XXX */
- int nr = smp_processor_id();
- struct tss_struct * t = &init_tss[nr];
-
- if ( test_and_set_bit(nr, &cpu_initialized) )
- panic("CPU#%d already initialized!!!\n", nr);
- printk("Initializing CPU#%d\n", nr);
-
- t->bitmap = IOBMP_INVALID_OFFSET;
- memset(t->io_bitmap, ~0, sizeof(t->io_bitmap));
-
- /* Set up GDT and IDT. */
- SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
- __asm__ __volatile__("lgdt %0": "=m" (*current->mm.gdt));
- __asm__ __volatile__("lidt %0": "=m" (idt_descr));
-
- /* No nested task. */
- __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
-
- /* Ensure FPU gets initialised for each domain. */
- stts();
-
- /* Set up and load the per-CPU TSS and LDT. */
- t->ss0 = __HYPERVISOR_DS;
- t->esp0 = get_stack_top();
- set_tss_desc(nr,t);
- load_TR(nr);
- __asm__ __volatile__("lldt %%ax"::"a" (0));
-
- /* Clear all 6 debug registers. */
-#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-#undef CD
-
- /* Install correct page table. */
- write_ptbase(&current->mm);
-
- init_idle_task();
-#endif
-}
-
static void __init do_initcalls(void)
{
initcall_t *call;
@@ -348,17 +137,16 @@ static void __init do_initcalls(void)
(*call)();
}
-unsigned long pci_mem_start = 0x10000000;
-
static void __init start_of_day(void)
{
- unsigned long low_mem_size;
-
-#ifdef MEMORY_GUARD
+ int i;
+
+ early_cpu_init();
+
+ paging_init();
+
/* Unmap the first page of CPU0's stack. */
- extern unsigned long cpu0_stack[];
- memguard_guard_range(cpu0_stack, PAGE_SIZE);
-#endif
+ memguard_guard_stack(cpu0_stack);
open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
@@ -368,103 +156,89 @@ static void __init start_of_day(void)
sort_exception_tables();
arch_do_createdomain(current);
-
- /* Tell the PCI layer not to allocate too close to the RAM area.. */
- low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
- if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
- identify_cpu(&boot_cpu_data); /* get CPU type info */
- if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
- if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT);
-#ifdef CONFIG_SMP
- if ( opt_ignorebiostables )
- {
- opt_nosmp = 1; /* No SMP without configuration */
- opt_noacpi = 1; /* ACPI will just confuse matters also */
- }
- else
- {
- find_smp_config();
- smp_alloc_memory(); /* trampoline which other CPUs jump at */
- }
-#endif
- paging_init(); /* not much here now, but sets up fixmap */
- if ( !opt_noacpi )
- acpi_boot_init();
-#ifdef CONFIG_SMP
+ /* Map default GDT into their final position in the idle page table. */
+ map_pages_to_xen(
+ GDT_VIRT_START(current) + FIRST_RESERVED_GDT_BYTE,
+ virt_to_phys(gdt_table) >> PAGE_SHIFT, 1, PAGE_HYPERVISOR);
+
+ find_smp_config();
+
+ smp_alloc_memory();
+
+ dmi_scan_machine();
+
+ generic_apic_probe();
+
+ acpi_boot_table_init();
+ acpi_boot_init();
+
if ( smp_found_config )
get_smp_config();
-#endif
- scheduler_init();
- init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */
+
+ init_apic_mappings();
+
+ init_IRQ();
+
trap_init();
- time_init(); /* installs software handler for HZ clock. */
- init_apic_mappings(); /* make APICs addressable in our pagetables. */
+
+ ac_timer_init();
+
+ time_init();
arch_init_memory();
-#ifndef CONFIG_SMP
- APIC_init_uniprocessor();
-#else
+ scheduler_init();
+
+ identify_cpu(&boot_cpu_data);
+ if ( cpu_has_fxsr )
+ set_in_cr4(X86_CR4_OSFXSR);
+ if ( cpu_has_xmm )
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
+
if ( opt_nosmp )
- APIC_init_uniprocessor();
- else
- smp_boot_cpus();
- /*
- * Does loads of stuff, including kicking the local
- * APIC, and the IO APIC after other CPUs are booted.
- * Each IRQ is preferably handled by IO-APIC, but
- * fall thru to 8259A if we have to (but slower).
- */
-#endif
+ max_cpus = 0;
+ smp_prepare_cpus(max_cpus);
- __sti();
+ /* We aren't hotplug-capable yet. */
+ BUG_ON(!cpus_empty(cpu_present_map));
+ for_each_cpu ( i )
+ cpu_set(i, cpu_present_map);
- initialize_keytable(); /* call back handling for key codes */
+ /* Sanity: We ought to be taking interrupts by now. */
+ local_irq_enable();
- serial_init_stage2();
+ initialize_keytable();
-#ifdef XEN_DEBUGGER
- initialize_pdb(); /* pervasive debugger */
-#endif
+ serial_init_postirq();
- if ( !cpu_has_apic )
+ init_xen_time();
+
+ for_each_present_cpu ( i )
{
- do_timer_lists_from_pit = 1;
- if ( smp_num_cpus != 1 )
- panic("We need local APICs on SMP machines!");
+ if ( num_online_cpus() >= max_cpus )
+ break;
+ if ( !cpu_online(i) )
+ __cpu_up(i);
}
- ac_timer_init(); /* init accurate timers */
- init_xen_time(); /* initialise the time */
- schedulers_start(); /* start scheduler for each CPU */
+ printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ smp_cpus_done(max_cpus);
- check_nmi_watchdog();
-
-#ifdef CONFIG_PCI
- pci_init();
-#endif
do_initcalls();
-#ifdef CONFIG_SMP
- wait_init_idle = cpu_online_map;
- clear_bit(smp_processor_id(), &wait_init_idle);
- smp_threads_ready = 1;
- smp_commence(); /* Tell other CPUs that state of the world is stable. */
- while ( wait_init_idle != 0 )
- cpu_relax();
-#endif
+ schedulers_start();
- watchdog_on = 1;
+ watchdog_enable();
}
+#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
+
void __init __start_xen(multiboot_info_t *mbi)
{
- unsigned char *cmdline;
+ char *cmdline;
module_t *mod = (module_t *)__va(mbi->mods_addr);
- void *heap_start;
unsigned long firsthole_start, nr_pages;
- unsigned long dom0_memory_start, dom0_memory_end;
unsigned long initial_images_start, initial_images_end;
struct e820entry e820_raw[E820MAX];
int i, e820_raw_nr = 0, bytes = 0;
@@ -474,10 +248,14 @@ void __init __start_xen(multiboot_info_t *mbi)
cmdline_parse(__va(mbi->cmdline));
/* Must do this early -- e.g., spinlocks rely on get_current(). */
- set_current(&idle0_task);
+ set_current(&idle0_vcpu);
+ set_processor_id(0);
+
+ smp_prepare_boot_cpu();
/* We initialise the serial devices very early so we can get debugging. */
- serial_init_stage1();
+ ns16550_init();
+ serial_init_preirq();
init_console();
@@ -485,7 +263,13 @@ void __init __start_xen(multiboot_info_t *mbi)
if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
{
printk("FATAL ERROR: Require at least one Multiboot module.\n");
- for ( ; ; ) ;
+ EARLY_FAIL();
+ }
+
+ if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
+ {
+ printk("FATAL ERROR: Misaligned CPU0 stack.\n");
+ EARLY_FAIL();
}
xenheap_phys_end = opt_xenheap_megabytes << 20;
@@ -500,7 +284,7 @@ void __init __start_xen(multiboot_info_t *mbi)
e820_raw[e820_raw_nr].size =
((u64)map->length_high << 32) | (u64)map->length_low;
e820_raw[e820_raw_nr].type =
- (map->type > E820_NVS) ? E820_RESERVED : map->type;
+ (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
e820_raw_nr++;
bytes += map->size + 4;
}
@@ -521,7 +305,7 @@ void __init __start_xen(multiboot_info_t *mbi)
for ( ; ; ) ;
}
- max_page = init_e820(e820_raw, e820_raw_nr);
+ max_page = init_e820(e820_raw, &e820_raw_nr);
/* Find the first high-memory RAM hole. */
for ( i = 0; i < e820.nr_map; i++ )
@@ -539,19 +323,18 @@ void __init __start_xen(multiboot_info_t *mbi)
printk("Not enough memory to stash the DOM0 kernel image.\n");
for ( ; ; ) ;
}
-#if defined(__i386__)
+#if defined(CONFIG_X86_32)
memmove((void *)initial_images_start, /* use low mapping */
(void *)mod[0].mod_start, /* use low mapping */
mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-#elif defined(__x86_64__)
+#elif defined(CONFIG_X86_64)
memmove(__va(initial_images_start),
__va(mod[0].mod_start),
mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
#endif
/* Initialise boot-time allocator with all RAM situated after modules. */
- heap_start = memguard_init(&_end);
- heap_start = __va(init_boot_allocator(__pa(heap_start)));
+ xenheap_phys_start = init_boot_allocator(__pa(&_end));
nr_pages = 0;
for ( i = 0; i < e820.nr_map; i++ )
{
@@ -562,35 +345,48 @@ void __init __start_xen(multiboot_info_t *mbi)
init_boot_pages((e820.map[i].addr < initial_images_end) ?
initial_images_end : e820.map[i].addr,
e820.map[i].addr + e820.map[i].size);
+#if defined (CONFIG_X86_64)
+ /*
+ * x86/64 maps all registered RAM. Points to note:
+ * 1. The initial pagetable already maps low 64MB, so skip that.
+ * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
+ * Failure to do this can cause coherency problems and deadlocks
+ * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
+ */
+ {
+ /* Calculate page-frame range, discarding partial frames. */
+ unsigned long start, end;
+ start = PFN_UP(e820.map[i].addr);
+ end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ /* Clip the range to above 64MB. */
+ if ( end < (64UL << (20-PAGE_SHIFT)) )
+ continue;
+ if ( start < (64UL << (20-PAGE_SHIFT)) )
+ start = 64UL << (20-PAGE_SHIFT);
+ /* Request the mapping. */
+ map_pages_to_xen(
+ PAGE_OFFSET + (start << PAGE_SHIFT),
+ start, end-start, PAGE_HYPERVISOR);
+ }
+#endif
}
+ memguard_init();
+
printk("System RAM: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
- /* Allocate an aligned chunk of RAM for DOM0. */
- dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20);
- dom0_memory_end = dom0_memory_start + (opt_dom0_mem << 10);
- if ( dom0_memory_start == 0 )
- {
- printk("Not enough memory for DOM0 memory reservation.\n");
- for ( ; ; ) ;
- }
-
init_frametable();
end_boot_allocator();
- init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
+ init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
printk("Xen heap: %luMB (%lukB)\n",
- (xenheap_phys_end-__pa(heap_start)) >> 20,
- (xenheap_phys_end-__pa(heap_start)) >> 10);
-
- /* Initialise the slab allocator. */
- xmem_cache_init();
- xmem_cache_sizes_init(max_page);
+ (xenheap_phys_end-xenheap_phys_start) >> 20,
+ (xenheap_phys_end-xenheap_phys_start) >> 10);
- domain_startofday();
+ early_boot = 0;
start_of_day();
@@ -603,35 +399,52 @@ void __init __start_xen(multiboot_info_t *mbi)
if ( dom0 == NULL )
panic("Error creating domain 0\n");
- set_bit(DF_PRIVILEGED, &dom0->flags);
+ set_bit(_DOMF_privileged, &dom0->domain_flags);
- /* Grab the DOM0 command line. Skip past the image name. */
- cmdline = (unsigned char *)(mod[0].string ? __va(mod[0].string) : NULL);
+ /* Grab the DOM0 command line. */
+ cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
if ( cmdline != NULL )
{
+ static char dom0_cmdline[MAX_GUEST_CMDLINE];
+
+ /* Skip past the image name. */
while ( *cmdline == ' ' ) cmdline++;
if ( (cmdline = strchr(cmdline, ' ')) != NULL )
while ( *cmdline == ' ' ) cmdline++;
+
+ /* Copy the command line to a local buffer. */
+ strcpy(dom0_cmdline, cmdline);
+ cmdline = dom0_cmdline;
+
+ /* Append any extra parameters. */
+ if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
+ strcat(cmdline, " noapic");
+ if ( acpi_skip_timer_override &&
+ !strstr(cmdline, "acpi_skip_timer_override") )
+ strcat(cmdline, " acpi_skip_timer_override");
+ if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") )
+ {
+ strcat(cmdline, " acpi=");
+ strcat(cmdline, acpi_param);
+ }
}
/*
* We're going to setup domain0 using the module(s) that we stashed safely
* above our heap. The second module, if present, is an initrd ramdisk.
*/
- if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
- (char *)initial_images_start,
+ if ( construct_dom0(dom0,
+ initial_images_start,
mod[0].mod_end-mod[0].mod_start,
(mbi->mods_count == 1) ? 0 :
- (char *)initial_images_start +
+ initial_images_start +
(mod[1].mod_start-mod[0].mod_start),
(mbi->mods_count == 1) ? 0 :
mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
cmdline) != 0)
panic("Could not set up DOM0 guest OS\n");
- /* The stash space for the initial kernel image can now be freed up. */
- init_domheap_pages(initial_images_start, initial_images_end);
-
+ /* Scrub RAM that is still free and so may go to an unprivileged domain. */
scrub_heap_pages();
init_trace_bufs();
@@ -639,7 +452,20 @@ void __init __start_xen(multiboot_info_t *mbi)
/* Give up the VGA console if DOM0 is configured to grab it. */
console_endboot(cmdline && strstr(cmdline, "tty0"));
- domain_unpause_by_systemcontroller(current);
+ /* Hide UART from DOM0 if we're using it */
+ serial_endboot();
+
domain_unpause_by_systemcontroller(dom0);
+
startup_cpu_idle_loop();
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
index 465563fb8f..bac8f23ad6 100644
--- a/xen/arch/x86/shadow.c
+++ b/xen/arch/x86/shadow.c
@@ -1,21 +1,47 @@
-/* -*- Mode:C++; c-file-style:BSD; c-basic-offset:4; tab-width:4 -*- */
+/******************************************************************************
+ * arch/x86/shadow.c
+ *
+ * Copyright (c) 2005 Michael A Fetterman
+ * Based on an earlier implementation by Ian Pratt et al
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
#include <xen/config.h>
#include <xen/types.h>
#include <xen/mm.h>
+#include <xen/domain_page.h>
#include <asm/shadow.h>
-#include <asm/domain_page.h>
#include <asm/page.h>
#include <xen/event.h>
+#include <xen/sched.h>
#include <xen/trace.h>
-/********
+#define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
-To use these shadow page tables, guests must not rely on the ACCESSED
-and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
+static void shadow_free_snapshot(struct domain *d,
+ struct out_of_sync_entry *entry);
+static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
+static void free_writable_pte_predictions(struct domain *d);
+
+#if SHADOW_DEBUG
+static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
+#endif
-I doubt this will break anything. (If guests want to use the va_update
-mechanism they've signed up for this anyhow...)
+/********
There's a per-domain shadow table spin lock which works fine for SMP
hosts. We don't have to worry about interrupts as no shadow operations
@@ -27,261 +53,1209 @@ hypercall lock anyhow (at least initially).
********/
-static inline void free_shadow_page(
- struct mm_struct *m, struct pfn_info *page)
+static inline int
+shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
+ unsigned long new_type)
+{
+ struct pfn_info *page = pfn_to_page(gmfn);
+ int pinned = 0, okay = 1;
+
+ if ( page_out_of_sync(page) )
+ {
+ // Don't know how long ago this snapshot was taken.
+ // Can't trust it to be recent enough.
+ //
+ __shadow_sync_mfn(d, gmfn);
+ }
+
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+
+ if ( unlikely(page_is_page_table(page)) )
+ return 1;
+
+ FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
+
+ if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
+ {
+ FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
+ __func__, gpfn, gmfn);
+#if 1 || defined(LIVE_DANGEROUSLY)
+ set_bit(_PGC_page_table, &page->count_info);
+ return 1;
+#endif
+ return 0;
+
+ }
+
+ // To convert this page to use as a page table, the writable count
+ // should now be zero. Test this by grabbing the page as an page table,
+ // and then immediately releasing. This will also deal with any
+ // necessary TLB flushing issues for us.
+ //
+ // The cruft here about pinning doesn't really work right. This
+ // needs rethinking/rewriting... Need to gracefully deal with the
+ // TLB flushes required when promoting a writable page, and also deal
+ // with any outstanding (external) writable refs to this page (by
+ // refusing to promote it). The pinning headache complicates this
+ // code -- it would all get much simpler if we stop using
+ // shadow_lock() and move the shadow code to BIGLOCK().
+ //
+ if ( unlikely(!get_page(page, d)) )
+ BUG(); // XXX -- needs more thought for a graceful failure
+ if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
+ {
+ pinned = 1;
+ put_page_and_type(page);
+ }
+ if ( get_page_type(page, PGT_base_page_table) )
+ {
+ set_bit(_PGC_page_table, &page->count_info);
+ put_page_type(page);
+ }
+ else
+ {
+ printk("shadow_promote: get_page_type failed "
+ "dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
+ d->domain_id, gpfn, gmfn, new_type);
+ okay = 0;
+ }
+
+ // Now put the type back to writable...
+ if ( unlikely(!get_page_type(page, PGT_writable_page)) )
+ BUG(); // XXX -- needs more thought for a graceful failure
+ if ( unlikely(pinned) )
+ {
+ if ( unlikely(test_and_set_bit(_PGT_pinned,
+ &page->u.inuse.type_info)) )
+ BUG(); // hmm... someone pinned this again?
+ }
+ else
+ put_page_and_type(page);
+
+ return okay;
+}
+
+static inline void
+shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+ ASSERT(frame_table[gmfn].count_info & PGC_page_table);
+
+ if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
+ {
+ clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
+
+ if ( page_out_of_sync(pfn_to_page(gmfn)) )
+ {
+ remove_out_of_sync_entries(d, gmfn);
+ }
+ }
+}
+
+/*
+ * Things in shadow mode that collect get_page() refs to the domain's
+ * pages are:
+ * - PGC_allocated takes a gen count, just like normal.
+ * - A writable page can be pinned (paravirtualized guests may consider
+ * these pages to be L1s or L2s, and don't know the difference).
+ * Pinning a page takes a gen count (but, for domains in shadow mode,
+ * it *doesn't* take a type count)
+ * - CR3 grabs a ref to whatever it points at, just like normal.
+ * - Shadow mode grabs an initial gen count for itself, as a placehold
+ * for whatever references will exist.
+ * - Shadow PTEs that point to a page take a gen count, just like regular
+ * PTEs. However, they don't get a type count, as get_page_type() is
+ * hardwired to keep writable pages' counts at 1 for domains in shadow
+ * mode.
+ * - Whenever we shadow a page, the entry in the shadow hash grabs a
+ * general ref to the page.
+ * - Whenever a page goes out of sync, the out of sync entry grabs a
+ * general ref to the page.
+ */
+/*
+ * pfn_info fields for pages allocated as shadow pages:
+ *
+ * All 32 bits of count_info are a simple count of refs to this shadow
+ * from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
+ * c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
+ * references.
+ *
+ * u.inuse._domain is left NULL, to prevent accidently allow some random
+ * domain from gaining permissions to map this page.
+ *
+ * u.inuse.type_info & PGT_type_mask remembers what kind of page is being
+ * shadowed.
+ * u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
+ * u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
+ * is currently exists because this is a shadow of a root page, and we
+ * don't want to let those disappear just because no CR3 is currently pointing
+ * at it.
+ *
+ * tlbflush_timestamp holds a min & max index of valid page table entries
+ * within the shadow page.
+ */
+
+static inline unsigned long
+alloc_shadow_page(struct domain *d,
+ unsigned long gpfn, unsigned long gmfn,
+ u32 psh_type)
+{
+ struct pfn_info *page;
+ unsigned long smfn;
+ int pin = 0;
+
+ // Currently, we only keep pre-zero'ed pages around for use as L1's...
+ // This will change. Soon.
+ //
+ if ( psh_type == PGT_l1_shadow )
+ {
+ if ( !list_empty(&d->arch.free_shadow_frames) )
+ {
+ struct list_head *entry = d->arch.free_shadow_frames.next;
+ page = list_entry(entry, struct pfn_info, list);
+ list_del(entry);
+ perfc_decr(free_l1_pages);
+ }
+ else
+ {
+ page = alloc_domheap_page(NULL);
+ void *l1 = map_domain_page(page_to_pfn(page));
+ memset(l1, 0, PAGE_SIZE);
+ unmap_domain_page(l1);
+ }
+ }
+ else
+ page = alloc_domheap_page(NULL);
+
+ if ( unlikely(page == NULL) )
+ {
+ printk("Couldn't alloc shadow page! dom%d count=%d\n",
+ d->domain_id, d->arch.shadow_page_count);
+ printk("Shadow table counts: l1=%d l2=%d hl2=%d snapshot=%d\n",
+ perfc_value(shadow_l1_pages),
+ perfc_value(shadow_l2_pages),
+ perfc_value(hl2_table_pages),
+ perfc_value(snapshot_pages));
+ BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ }
+
+ smfn = page_to_pfn(page);
+
+ ASSERT( (gmfn & ~PGT_mfn_mask) == 0 );
+ page->u.inuse.type_info = psh_type | gmfn;
+ page->count_info = 0;
+ page->tlbflush_timestamp = 0;
+
+ switch ( psh_type )
+ {
+ case PGT_l1_shadow:
+ if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
+ goto fail;
+ perfc_incr(shadow_l1_pages);
+ d->arch.shadow_page_count++;
+ break;
+
+ case PGT_l2_shadow:
+ if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
+ goto fail;
+ perfc_incr(shadow_l2_pages);
+ d->arch.shadow_page_count++;
+ if ( PGT_l2_page_table == PGT_root_page_table )
+ pin = 1;
+
+ break;
+
+ case PGT_hl2_shadow:
+ // Treat an hl2 as an L1 for purposes of promotion.
+ // For external mode domains, treat them as an L2 for purposes of
+ // pinning.
+ //
+ if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
+ goto fail;
+ perfc_incr(hl2_table_pages);
+ d->arch.hl2_page_count++;
+ if ( shadow_mode_external(d) &&
+ (PGT_l2_page_table == PGT_root_page_table) )
+ pin = 1;
+
+ break;
+
+ case PGT_snapshot:
+ perfc_incr(snapshot_pages);
+ d->arch.snapshot_page_count++;
+ break;
+
+ default:
+ printk("Alloc shadow weird page type type=%08x\n", psh_type);
+ BUG();
+ break;
+ }
+
+ // Don't add a new shadow of something that already has a snapshot.
+ //
+ ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
+
+ set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+
+ if ( pin )
+ shadow_pin(smfn);
+
+ return smfn;
+
+ fail:
+ FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?",
+ gpfn, gmfn);
+ free_domheap_page(page);
+ return 0;
+}
+
+static void inline
+free_shadow_l1_table(struct domain *d, unsigned long smfn)
+{
+ l1_pgentry_t *pl1e = map_domain_page(smfn);
+ int i;
+ struct pfn_info *spage = pfn_to_page(smfn);
+ u32 min_max = spage->tlbflush_timestamp;
+ int min = SHADOW_MIN(min_max);
+ int max = SHADOW_MAX(min_max);
+
+ for ( i = min; i <= max; i++ )
+ {
+ shadow_put_page_from_l1e(pl1e[i], d);
+ pl1e[i] = l1e_empty();
+ }
+
+ unmap_domain_page(pl1e);
+}
+
+static void inline
+free_shadow_hl2_table(struct domain *d, unsigned long smfn)
{
- m->shadow_page_count--;
+ l1_pgentry_t *hl2 = map_domain_page(smfn);
+ int i, limit;
+
+ SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
+
+#ifdef __i386__
+ if ( shadow_mode_external(d) )
+ limit = L2_PAGETABLE_ENTRIES;
+ else
+ limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+#else
+ limit = 0; /* XXX x86/64 XXX */
+#endif
+
+ for ( i = 0; i < limit; i++ )
+ {
+ if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
+ put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
+ }
+
+ unmap_domain_page(hl2);
+}
+
+static void inline
+free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
+{
+ l2_pgentry_t *pl2e = map_domain_page(smfn);
+ int i, external = shadow_mode_external(d);
+
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ if ( external || is_guest_l2_slot(type, i) )
+ if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
+ put_shadow_ref(l2e_get_pfn(pl2e[i]));
+
+ if ( (PGT_base_page_table == PGT_l2_page_table) &&
+ shadow_mode_translate(d) && !external )
+ {
+ // free the ref to the hl2
+ //
+ put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
+ }
+
+ unmap_domain_page(pl2e);
+}
+
+void free_shadow_page(unsigned long smfn)
+{
+ struct pfn_info *page = &frame_table[smfn];
+ unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
+ struct domain *d = page_get_owner(pfn_to_page(gmfn));
+ unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
+ unsigned long type = page->u.inuse.type_info & PGT_type_mask;
+
+ SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
+
+ ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
- switch ( page->u.inuse.type_info & PGT_type_mask )
+ delete_shadow_status(d, gpfn, gmfn, type);
+
+ switch ( type )
{
- case PGT_l1_page_table:
+ case PGT_l1_shadow:
perfc_decr(shadow_l1_pages);
+ shadow_demote(d, gpfn, gmfn);
+ free_shadow_l1_table(d, smfn);
break;
- case PGT_l2_page_table:
+ case PGT_l2_shadow:
perfc_decr(shadow_l2_pages);
+ shadow_demote(d, gpfn, gmfn);
+ free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
+ break;
+
+ case PGT_hl2_shadow:
+ perfc_decr(hl2_table_pages);
+ shadow_demote(d, gpfn, gmfn);
+ free_shadow_hl2_table(d, smfn);
+ break;
+
+ case PGT_snapshot:
+ perfc_decr(snapshot_pages);
break;
default:
- printk("Free shadow weird page type pfn=%08x type=%08x\n",
- frame_table-page, page->u.inuse.type_info);
+ printk("Free shadow weird page type mfn=%lx type=%08x\n",
+ page_to_pfn(page), page->u.inuse.type_info);
break;
}
- free_domheap_page(page);
+ d->arch.shadow_page_count--;
+
+ // No TLB flushes are needed the next time this page gets allocated.
+ //
+ page->tlbflush_timestamp = 0;
+ page->u.free.cpumask = CPU_MASK_NONE;
+
+ if ( type == PGT_l1_shadow )
+ {
+ list_add(&page->list, &d->arch.free_shadow_frames);
+ perfc_incr(free_l1_pages);
+ }
+ else
+ free_domheap_page(page);
}
-static void free_shadow_state(struct mm_struct *m)
+void
+remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
{
- int i, free = 0;
- struct shadow_status *x, *n;
+ unsigned long smfn;
+
+ //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
+
+ shadow_lock(d);
+
+ while ( stype >= PGT_l1_shadow )
+ {
+ smfn = __shadow_status(d, gpfn, stype);
+ if ( smfn && MFN_PINNED(smfn) )
+ shadow_unpin(smfn);
+ stype -= PGT_l1_shadow;
+ }
+
+ shadow_unlock(d);
+}
+
+static void inline
+release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
+{
+ struct pfn_info *page;
+
+ page = &frame_table[entry->gmfn];
+
+ // Decrement ref count of guest & shadow pages
+ //
+ put_page(page);
+
+ // Only use entries that have low bits clear...
+ //
+ if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
+ {
+ put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
+ entry->writable_pl1e = -2;
+ }
+ else
+ ASSERT( entry->writable_pl1e == -1 );
+
+ // Free the snapshot
+ //
+ shadow_free_snapshot(d, entry);
+}
+
+static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
+{
+ struct out_of_sync_entry *entry = d->arch.out_of_sync;
+ struct out_of_sync_entry **prev = &d->arch.out_of_sync;
+ struct out_of_sync_entry *found = NULL;
+
+ // NB: Be careful not to call something that manipulates this list
+ // while walking it. Collect the results into a separate list
+ // first, then walk that list.
+ //
+ while ( entry )
+ {
+ if ( entry->gmfn == gmfn )
+ {
+ // remove from out of sync list
+ *prev = entry->next;
+
+ // add to found list
+ entry->next = found;
+ found = entry;
+
+ entry = *prev;
+ continue;
+ }
+ prev = &entry->next;
+ entry = entry->next;
+ }
+
+ prev = NULL;
+ entry = found;
+ while ( entry )
+ {
+ release_out_of_sync_entry(d, entry);
+
+ prev = &entry->next;
+ entry = entry->next;
+ }
+
+ // Add found list to free list
+ if ( prev )
+ {
+ *prev = d->arch.out_of_sync_free;
+ d->arch.out_of_sync_free = found;
+ }
+}
+
+static void free_out_of_sync_state(struct domain *d)
+{
+ struct out_of_sync_entry *entry;
+
+ // NB: Be careful not to call something that manipulates this list
+ // while walking it. Remove one item at a time, and always
+ // restart from start of list.
+ //
+ while ( (entry = d->arch.out_of_sync) )
+ {
+ d->arch.out_of_sync = entry->next;
+ release_out_of_sync_entry(d, entry);
+
+ entry->next = d->arch.out_of_sync_free;
+ d->arch.out_of_sync_free = entry;
+ }
+}
+
+static void free_shadow_pages(struct domain *d)
+{
+ int i;
+ struct shadow_status *x;
+ struct vcpu *v;
/*
* WARNING! The shadow page table must not currently be in use!
* e.g., You are expected to have paused the domain and synchronized CR3.
*/
- shadow_audit(m, 1);
+ if( !d->arch.shadow_ht ) return;
+
+ shadow_audit(d, 1);
+
+ // first, remove any outstanding refs from out_of_sync entries...
+ //
+ free_out_of_sync_state(d);
- /* Free each hash chain in turn. */
+ // second, remove any outstanding refs from v->arch.shadow_table
+ // and CR3.
+ //
+ for_each_vcpu(d, v)
+ {
+ if ( pagetable_get_paddr(v->arch.shadow_table) )
+ {
+ put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
+ v->arch.shadow_table = mk_pagetable(0);
+ }
+
+ if ( v->arch.monitor_shadow_ref )
+ {
+ put_shadow_ref(v->arch.monitor_shadow_ref);
+ v->arch.monitor_shadow_ref = 0;
+ }
+ }
+
+ // For external shadows, remove the monitor table's refs
+ //
+ if ( shadow_mode_external(d) )
+ {
+ for_each_vcpu(d, v)
+ {
+ l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
+
+ if ( mpl2e )
+ {
+ l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
+ l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
+
+ if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
+ {
+ put_shadow_ref(l2e_get_pfn(hl2e));
+ mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
+ }
+ if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
+ {
+ put_shadow_ref(l2e_get_pfn(smfn));
+ mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
+ }
+ }
+ }
+ }
+
+ // Now, the only refs to shadow pages that are left are from the shadow
+ // pages themselves. We just unpin the pinned pages, and the rest
+ // should automatically disappear.
+ //
+ // NB: Beware: each explicitly or implicit call to free_shadow_page
+ // can/will result in the hash bucket getting rewritten out from
+ // under us... First, collect the list of pinned pages, then
+ // free them.
+ //
for ( i = 0; i < shadow_ht_buckets; i++ )
{
+ u32 count;
+ unsigned long *mfn_list;
+
/* Skip empty buckets. */
- x = &m->shadow_ht[i];
- if ( x->pfn == 0 )
+ if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
+ continue;
+
+ count = 0;
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
+ if ( MFN_PINNED(x->smfn) )
+ count++;
+ if ( !count )
continue;
- /* Free the head page. */
- free_shadow_page(
- m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
+ mfn_list = xmalloc_array(unsigned long, count);
+ count = 0;
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
+ if ( MFN_PINNED(x->smfn) )
+ mfn_list[count++] = x->smfn;
- /* Reinitialise the head node. */
- x->pfn = 0;
- x->spfn_and_flags = 0;
- n = x->next;
- x->next = NULL;
+ while ( count )
+ {
+ shadow_unpin(mfn_list[--count]);
+ }
+ xfree(mfn_list);
+ }
- free++;
+ // Now free the pre-zero'ed pages from the domain
+ //
+ struct list_head *list_ent, *tmp;
+ list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
+ {
+ list_del(list_ent);
+ perfc_decr(free_l1_pages);
- /* Iterate over non-head nodes. */
- for ( x = n; x != NULL; x = n )
- {
- /* Free the shadow page. */
- free_shadow_page(
- m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
+ struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
+ free_domheap_page(page);
+ }
- /* Re-initialise the chain node. */
- x->pfn = 0;
- x->spfn_and_flags = 0;
+ shadow_audit(d, 0);
- /* Add to the free list. */
- n = x->next;
- x->next = m->shadow_ht_free;
- m->shadow_ht_free = x;
+ SH_LOG("Free shadow table.");
+}
- free++;
- }
+void shadow_mode_init(void)
+{
+}
- shadow_audit(m, 0);
+int _shadow_mode_refcounts(struct domain *d)
+{
+ return shadow_mode_refcounts(d);
+}
+
+static void alloc_monitor_pagetable(struct vcpu *v)
+{
+ unsigned long mmfn;
+ l2_pgentry_t *mpl2e;
+ struct pfn_info *mmfn_info;
+ struct domain *d = v->domain;
+
+ ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
+
+ mmfn_info = alloc_domheap_page(NULL);
+ ASSERT(mmfn_info != NULL);
+
+ mmfn = page_to_pfn(mmfn_info);
+ mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
+ memset(mpl2e, 0, PAGE_SIZE);
+
+#ifdef __i386__ /* XXX screws x86/64 build */
+ memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+#endif
+
+ mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
+ l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
+
+ // map the phys_to_machine map into the Read-Only MPT space for this domain
+ mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+ l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
+ __PAGE_HYPERVISOR);
+
+ // Don't (yet) have mappings for these...
+ // Don't want to accidentally see the idle_pg_table's linear mapping.
+ //
+ mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
+ mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
+
+ v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
+ v->arch.monitor_vtable = mpl2e;
+}
+
+/*
+ * Free the pages for monitor_table and hl2_table
+ */
+void free_monitor_pagetable(struct vcpu *v)
+{
+ l2_pgentry_t *mpl2e, hl2e, sl2e;
+ unsigned long mfn;
+
+ ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
+
+ mpl2e = v->arch.monitor_vtable;
+
+ /*
+ * First get the mfn for hl2_table by looking at monitor_table
+ */
+ hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
+ if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
+ {
+ mfn = l2e_get_pfn(hl2e);
+ ASSERT(mfn);
+ put_shadow_ref(mfn);
+ }
+
+ sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
+ if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
+ {
+ mfn = l2e_get_pfn(sl2e);
+ ASSERT(mfn);
+ put_shadow_ref(mfn);
}
- SH_LOG("Free shadow table. Freed=%d.", free);
+ unmap_domain_page(mpl2e);
+
+ /*
+ * Then free monitor_table.
+ */
+ mfn = pagetable_get_pfn(v->arch.monitor_table);
+ free_domheap_page(&frame_table[mfn]);
+
+ v->arch.monitor_table = mk_pagetable(0);
+ v->arch.monitor_vtable = 0;
}
-static inline int clear_shadow_page(
- struct mm_struct *m, struct shadow_status *x)
+int
+set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
+ struct domain_mmap_cache *l2cache,
+ struct domain_mmap_cache *l1cache)
{
- unsigned long *p;
- int restart = 0;
- struct pfn_info *spage = &frame_table[x->spfn_and_flags & PSH_pfn_mask];
+ unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
+ l2_pgentry_t *l2, l2e;
+ l1_pgentry_t *l1;
+ struct pfn_info *l1page;
+ unsigned long va = pfn << PAGE_SHIFT;
+
+ ASSERT(tabpfn != 0);
- switch ( spage->u.inuse.type_info & PGT_type_mask )
+ l2 = map_domain_page_with_cache(tabpfn, l2cache);
+ l2e = l2[l2_table_offset(va)];
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
- /* We clear L2 pages by zeroing the guest entries. */
- case PGT_l2_page_table:
- p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
- memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
- unmap_domain_mem(p);
- break;
+ l1page = alloc_domheap_page(NULL);
+ if ( !l1page )
+ {
+ unmap_domain_page_with_cache(l2, l2cache);
+ return 0;
+ }
- /* We clear L1 pages by freeing them: no benefit from zeroing them. */
- case PGT_l1_page_table:
- delete_shadow_status(m, x->pfn);
- free_shadow_page(m, spage);
- restart = 1; /* We need to go to start of list again. */
- break;
+ l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
+ memset(l1, 0, PAGE_SIZE);
+ unmap_domain_page_with_cache(l1, l1cache);
+
+ l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
+ l2[l2_table_offset(va)] = l2e;
}
+ unmap_domain_page_with_cache(l2, l2cache);
+
+ l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
+ l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+ unmap_domain_page_with_cache(l1, l1cache);
- return restart;
+ return 1;
}
-static void clear_shadow_state(struct mm_struct *m)
+static int
+alloc_p2m_table(struct domain *d)
{
- int i;
- struct shadow_status *x;
-
- shadow_audit(m, 1);
+ struct list_head *list_ent;
+ struct pfn_info *page, *l2page;
+ l2_pgentry_t *l2;
+ unsigned long mfn, pfn;
+ struct domain_mmap_cache l1cache, l2cache;
+
+ l2page = alloc_domheap_page(NULL);
+ if ( l2page == NULL )
+ return 0;
- for ( i = 0; i < shadow_ht_buckets; i++ )
+ domain_mmap_cache_init(&l1cache);
+ domain_mmap_cache_init(&l2cache);
+
+ d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
+ l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
+ memset(l2, 0, PAGE_SIZE);
+ unmap_domain_page_with_cache(l2, &l2cache);
+
+ list_ent = d->page_list.next;
+ while ( list_ent != &d->page_list )
{
- retry:
- /* Skip empty buckets. */
- x = &m->shadow_ht[i];
- if ( x->pfn == 0 )
- continue;
+ page = list_entry(list_ent, struct pfn_info, list);
+ mfn = page_to_pfn(page);
+ pfn = machine_to_phys_mapping[mfn];
+ ASSERT(pfn != INVALID_M2P_ENTRY);
+ ASSERT(pfn < (1u<<20));
- if ( clear_shadow_page(m, x) )
- goto retry;
+ set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
- for ( x = x->next; x != NULL; x = x->next )
- if ( clear_shadow_page(m, x) )
- goto retry;
+ list_ent = page->list.next;
+ }
+
+ list_ent = d->xenpage_list.next;
+ while ( list_ent != &d->xenpage_list )
+ {
+ page = list_entry(list_ent, struct pfn_info, list);
+ mfn = page_to_pfn(page);
+ pfn = machine_to_phys_mapping[mfn];
+ if ( (pfn != INVALID_M2P_ENTRY) &&
+ (pfn < (1u<<20)) )
+ {
+ set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
+ }
- shadow_audit(m, 0);
+ list_ent = page->list.next;
}
- SH_VLOG("Scan shadow table. l1=%d l2=%d",
- perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
-}
+ domain_mmap_cache_destroy(&l2cache);
+ domain_mmap_cache_destroy(&l1cache);
+ return 1;
+}
-void shadow_mode_init(void)
+static void
+free_p2m_table(struct domain *d)
{
+ // uh, this needs some work... :)
+ BUG();
}
-int shadow_mode_enable(struct domain *p, unsigned int mode)
+int __shadow_mode_enable(struct domain *d, unsigned int mode)
{
- struct mm_struct *m = &p->mm;
+ struct vcpu *v;
+ int new_modes = (mode & ~d->arch.shadow_mode);
+
+ // Gotta be adding something to call this function.
+ ASSERT(new_modes);
+
+ // can't take anything away by calling this function.
+ ASSERT(!(d->arch.shadow_mode & ~mode));
- m->shadow_ht = xmalloc(
- shadow_ht_buckets * sizeof(struct shadow_status));
- if ( m->shadow_ht == NULL )
- goto nomem;
- memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
+ for_each_vcpu(d, v)
+ {
+ invalidate_shadow_ldt(v);
+
+ // We need to set these up for __update_pagetables().
+ // See the comment there.
- if ( mode == SHM_logdirty )
+ /*
+ * arch.guest_vtable
+ */
+ if ( v->arch.guest_vtable &&
+ (v->arch.guest_vtable != __linear_l2_table) )
+ {
+ unmap_domain_page(v->arch.guest_vtable);
+ }
+ if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
+ v->arch.guest_vtable = __linear_l2_table;
+ else
+ v->arch.guest_vtable = NULL;
+
+ /*
+ * arch.shadow_vtable
+ */
+ if ( v->arch.shadow_vtable &&
+ (v->arch.shadow_vtable != __shadow_linear_l2_table) )
+ {
+ unmap_domain_page(v->arch.shadow_vtable);
+ }
+ if ( !(mode & SHM_external) )
+ v->arch.shadow_vtable = __shadow_linear_l2_table;
+ else
+ v->arch.shadow_vtable = NULL;
+
+ /*
+ * arch.hl2_vtable
+ */
+ if ( v->arch.hl2_vtable &&
+ (v->arch.hl2_vtable != __linear_hl2_table) )
+ {
+ unmap_domain_page(v->arch.hl2_vtable);
+ }
+ if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
+ v->arch.hl2_vtable = __linear_hl2_table;
+ else
+ v->arch.hl2_vtable = NULL;
+
+ /*
+ * arch.monitor_table & arch.monitor_vtable
+ */
+ if ( v->arch.monitor_vtable )
+ {
+ free_monitor_pagetable(v);
+ }
+ if ( mode & SHM_external )
+ {
+ alloc_monitor_pagetable(v);
+ }
+ }
+
+ if ( new_modes & SHM_enable )
{
- m->shadow_dirty_bitmap_size = (p->max_pages + 63) & ~63;
- m->shadow_dirty_bitmap =
- xmalloc(m->shadow_dirty_bitmap_size/8);
- if ( m->shadow_dirty_bitmap == NULL )
+ ASSERT( !d->arch.shadow_ht );
+ d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
+ if ( d->arch.shadow_ht == NULL )
+ goto nomem;
+
+ memset(d->arch.shadow_ht, 0,
+ shadow_ht_buckets * sizeof(struct shadow_status));
+ }
+
+ if ( new_modes & SHM_log_dirty )
+ {
+ ASSERT( !d->arch.shadow_dirty_bitmap );
+ d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
+ d->arch.shadow_dirty_bitmap =
+ xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
+ (8 * sizeof(unsigned long)));
+ if ( d->arch.shadow_dirty_bitmap == NULL )
{
- m->shadow_dirty_bitmap_size = 0;
+ d->arch.shadow_dirty_bitmap_size = 0;
goto nomem;
}
- memset(m->shadow_dirty_bitmap, 0, m->shadow_dirty_bitmap_size/8);
+ memset(d->arch.shadow_dirty_bitmap, 0,
+ d->arch.shadow_dirty_bitmap_size/8);
+ }
+
+ if ( new_modes & SHM_translate )
+ {
+ if ( !(new_modes & SHM_external) )
+ {
+ ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
+ if ( !alloc_p2m_table(d) )
+ {
+ printk("alloc_p2m_table failed (out-of-memory?)\n");
+ goto nomem;
+ }
+ }
+ else
+ {
+ // external guests provide their own memory for their P2M maps.
+ //
+ ASSERT( d == page_get_owner(
+ &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
+ }
+ }
+
+ printk("audit1\n");
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
+ printk("audit1 done\n");
+
+ // Get rid of any shadow pages from any previous shadow mode.
+ //
+ free_shadow_pages(d);
+
+ printk("audit2\n");
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
+ printk("audit2 done\n");
+
+ /*
+ * Tear down it's counts by disassembling its page-table-based ref counts.
+ * Also remove CR3's gcount/tcount.
+ * That leaves things like GDTs and LDTs and external refs in tact.
+ *
+ * Most pages will be writable tcount=0.
+ * Some will still be L1 tcount=0 or L2 tcount=0.
+ * Maybe some pages will be type none tcount=0.
+ * Pages granted external writable refs (via grant tables?) will
+ * still have a non-zero tcount. That's OK.
+ *
+ * gcounts will generally be 1 for PGC_allocated.
+ * GDTs and LDTs will have additional gcounts.
+ * Any grant-table based refs will still be in the gcount.
+ *
+ * We attempt to grab writable refs to each page (thus setting its type).
+ * Immediately put back those type refs.
+ *
+ * Assert that no pages are left with L1/L2/L3/L4 type.
+ */
+ audit_adjust_pgtables(d, -1, 1);
+
+ d->arch.shadow_mode = mode;
+
+ if ( shadow_mode_refcounts(d) )
+ {
+ struct list_head *list_ent = d->page_list.next;
+ while ( list_ent != &d->page_list )
+ {
+ struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
+ if ( !get_page_type(page, PGT_writable_page) )
+ BUG();
+ put_page_type(page);
+
+ list_ent = page->list.next;
+ }
}
- m->shadow_mode = mode;
+ audit_adjust_pgtables(d, 1, 1);
+
+ printk("audit3\n");
+ _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
+ printk("audit3 done\n");
- __shadow_mk_pagetable(m);
return 0;
nomem:
- if ( m->shadow_ht != NULL )
- xfree( m->shadow_ht );
- m->shadow_ht = NULL;
+ if ( (new_modes & SHM_enable) )
+ {
+ xfree(d->arch.shadow_ht);
+ d->arch.shadow_ht = NULL;
+ }
+ if ( (new_modes & SHM_log_dirty) )
+ {
+ xfree(d->arch.shadow_dirty_bitmap);
+ d->arch.shadow_dirty_bitmap = NULL;
+ }
+ if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
+ pagetable_get_paddr(d->arch.phys_table) )
+ {
+ free_p2m_table(d);
+ }
return -ENOMEM;
}
-void __shadow_mode_disable(struct domain *d)
+int shadow_mode_enable(struct domain *d, unsigned int mode)
{
- struct mm_struct *m = &d->mm;
- struct shadow_status *x, *n;
+ int rc;
+ shadow_lock(d);
+ rc = __shadow_mode_enable(d, mode);
+ shadow_unlock(d);
+ return rc;
+}
+
+static void
+translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
+{
+ int i;
+ l1_pgentry_t *l1;
+
+ l1 = map_domain_page(l1mfn);
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+ {
+ if ( is_guest_l1_slot(i) &&
+ (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
+ {
+ unsigned long mfn = l1e_get_pfn(l1[i]);
+ unsigned long gpfn = __mfn_to_gpfn(d, mfn);
+ ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
+ l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
+ }
+ }
+ unmap_domain_page(l1);
+}
+
+// This is not general enough to handle arbitrary pagetables
+// with shared L1 pages, etc., but it is sufficient for bringing
+// up dom0.
+//
+void
+translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
+ unsigned int type)
+{
+ int i;
+ l2_pgentry_t *l2;
- free_shadow_state(m);
- m->shadow_mode = 0;
+ ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
+
+ l2 = map_domain_page(l2mfn);
+ for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
+ {
+ if ( is_guest_l2_slot(type, i) &&
+ (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
+ {
+ unsigned long mfn = l2e_get_pfn(l2[i]);
+ unsigned long gpfn = __mfn_to_gpfn(d, mfn);
+ ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
+ l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
+ translate_l1pgtable(d, p2m, mfn);
+ }
+ }
+ unmap_domain_page(l2);
+}
+
+static void free_shadow_ht_entries(struct domain *d)
+{
+ struct shadow_status *x, *n;
SH_VLOG("freed tables count=%d l1=%d l2=%d",
- m->shadow_page_count, perfc_value(shadow_l1_pages),
+ d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
perfc_value(shadow_l2_pages));
- n = m->shadow_ht_extras;
+ n = d->arch.shadow_ht_extras;
while ( (x = n) != NULL )
{
- m->shadow_extras_count--;
+ d->arch.shadow_extras_count--;
n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
xfree(x);
}
- m->shadow_ht_extras = NULL;
- ASSERT(m->shadow_extras_count == 0);
- SH_LOG("freed extras, now %d", m->shadow_extras_count);
+ d->arch.shadow_ht_extras = NULL;
+ d->arch.shadow_ht_free = NULL;
- if ( m->shadow_dirty_bitmap != NULL )
+ ASSERT(d->arch.shadow_extras_count == 0);
+ SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
+
+ if ( d->arch.shadow_dirty_bitmap != NULL )
+ {
+ xfree(d->arch.shadow_dirty_bitmap);
+ d->arch.shadow_dirty_bitmap = 0;
+ d->arch.shadow_dirty_bitmap_size = 0;
+ }
+
+ xfree(d->arch.shadow_ht);
+ d->arch.shadow_ht = NULL;
+}
+
+static void free_out_of_sync_entries(struct domain *d)
+{
+ struct out_of_sync_entry *x, *n;
+
+ n = d->arch.out_of_sync_extras;
+ while ( (x = n) != NULL )
{
- xfree(m->shadow_dirty_bitmap);
- m->shadow_dirty_bitmap = 0;
- m->shadow_dirty_bitmap_size = 0;
+ d->arch.out_of_sync_extras_count--;
+ n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
+ xfree(x);
}
- xfree(m->shadow_ht);
- m->shadow_ht = NULL;
+ d->arch.out_of_sync_extras = NULL;
+ d->arch.out_of_sync_free = NULL;
+ d->arch.out_of_sync = NULL;
+
+ ASSERT(d->arch.out_of_sync_extras_count == 0);
+ FSH_LOG("freed extra out_of_sync entries, now %d",
+ d->arch.out_of_sync_extras_count);
+}
+
+void __shadow_mode_disable(struct domain *d)
+{
+ if ( unlikely(!shadow_mode_enabled(d)) )
+ return;
+
+ /*
+ * Currently this does not fix up page ref counts, so it is valid to call
+ * only when a domain is being destroyed.
+ */
+ BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
+ shadow_mode_refcounts(d));
+ d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
+
+ free_shadow_pages(d);
+ free_writable_pte_predictions(d);
+
+#ifndef NDEBUG
+ int i;
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
+ {
+ printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
+ __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
+ BUG();
+ }
+ }
+#endif
+
+ d->arch.shadow_mode = 0;
+
+ free_shadow_ht_entries(d);
+ free_out_of_sync_entries(d);
+
+ struct vcpu *v;
+ for_each_vcpu(d, v)
+ {
+ update_pagetables(v);
+ }
}
static int shadow_mode_table_op(
struct domain *d, dom0_shadow_control_t *sc)
{
unsigned int op = sc->op;
- struct mm_struct *m = &d->mm;
int i, rc = 0;
+ struct vcpu *v;
- ASSERT(spin_is_locked(&m->shadow_lock));
+ ASSERT(shadow_lock_is_acquired(d));
- SH_VLOG("shadow mode table op %08lx %08lx count %d",
- pagetable_val(m->pagetable), pagetable_val(m->shadow_table),
- m->shadow_page_count);
+ SH_VLOG("shadow mode table op %lx %lx count %d",
+ (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
+ (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
+ d->arch.shadow_page_count);
- shadow_audit(m, 1);
+ shadow_audit(d, 1);
switch ( op )
{
case DOM0_SHADOW_CONTROL_OP_FLUSH:
- free_shadow_state(m);
+ free_shadow_pages(d);
- m->shadow_fault_count = 0;
- m->shadow_dirty_count = 0;
- m->shadow_dirty_net_count = 0;
- m->shadow_dirty_block_count = 0;
+ d->arch.shadow_fault_count = 0;
+ d->arch.shadow_dirty_count = 0;
+ d->arch.shadow_dirty_net_count = 0;
+ d->arch.shadow_dirty_block_count = 0;
break;
case DOM0_SHADOW_CONTROL_OP_CLEAN:
- clear_shadow_state(m);
+ free_shadow_pages(d);
- sc->stats.fault_count = m->shadow_fault_count;
- sc->stats.dirty_count = m->shadow_dirty_count;
- sc->stats.dirty_net_count = m->shadow_dirty_net_count;
- sc->stats.dirty_block_count = m->shadow_dirty_block_count;
+ sc->stats.fault_count = d->arch.shadow_fault_count;
+ sc->stats.dirty_count = d->arch.shadow_dirty_count;
+ sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
+ sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
- m->shadow_fault_count = 0;
- m->shadow_dirty_count = 0;
- m->shadow_dirty_net_count = 0;
- m->shadow_dirty_block_count = 0;
+ d->arch.shadow_fault_count = 0;
+ d->arch.shadow_dirty_count = 0;
+ d->arch.shadow_dirty_net_count = 0;
+ d->arch.shadow_dirty_block_count = 0;
if ( (d->max_pages > sc->pages) ||
(sc->dirty_bitmap == NULL) ||
- (m->shadow_dirty_bitmap == NULL) )
+ (d->arch.shadow_dirty_bitmap == NULL) )
{
rc = -EINVAL;
break;
@@ -297,34 +1271,35 @@ static int shadow_mode_table_op(
if (copy_to_user(
sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- m->shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
bytes))
{
// copy_to_user can fail when copying to guest app memory.
// app should zero buffer after mallocing, and pin it
rc = -EINVAL;
memset(
- m->shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ d->arch.shadow_dirty_bitmap +
+ (i/(8*sizeof(unsigned long))),
0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
break;
}
memset(
- m->shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
0, bytes);
}
break;
case DOM0_SHADOW_CONTROL_OP_PEEK:
- sc->stats.fault_count = m->shadow_fault_count;
- sc->stats.dirty_count = m->shadow_dirty_count;
- sc->stats.dirty_net_count = m->shadow_dirty_net_count;
- sc->stats.dirty_block_count = m->shadow_dirty_block_count;
+ sc->stats.fault_count = d->arch.shadow_fault_count;
+ sc->stats.dirty_count = d->arch.shadow_dirty_count;
+ sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
+ sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
if ( (d->max_pages > sc->pages) ||
(sc->dirty_bitmap == NULL) ||
- (m->shadow_dirty_bitmap == NULL) )
+ (d->arch.shadow_dirty_bitmap == NULL) )
{
rc = -EINVAL;
break;
@@ -332,7 +1307,7 @@ static int shadow_mode_table_op(
sc->pages = d->max_pages;
if (copy_to_user(
- sc->dirty_bitmap, m->shadow_dirty_bitmap, (d->max_pages+7)/8))
+ sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
{
rc = -EINVAL;
break;
@@ -345,9 +1320,12 @@ static int shadow_mode_table_op(
break;
}
- SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
- shadow_audit(m, 1);
- __shadow_mk_pagetable(m);
+ SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
+ shadow_audit(d, 1);
+
+ for_each_vcpu(d,v)
+ __update_pagetables(v);
+
return rc;
}
@@ -355,349 +1333,1578 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
{
unsigned int op = sc->op;
int rc = 0;
+ struct vcpu *v;
- if ( unlikely(d == current) )
+ if ( unlikely(d == current->domain) )
{
DPRINTK("Don't try to do a shadow op on yourself!\n");
return -EINVAL;
}
domain_pause(d);
- synchronise_pagetables(~0UL);
- shadow_lock(&d->mm);
+ shadow_lock(d);
switch ( op )
{
case DOM0_SHADOW_CONTROL_OP_OFF:
- shadow_mode_disable(d);
+ __shadow_sync_all(d);
+ __shadow_mode_disable(d);
break;
case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
- shadow_mode_disable(d);
- rc = shadow_mode_enable(d, SHM_test);
+ free_shadow_pages(d);
+ rc = __shadow_mode_enable(d, SHM_enable);
break;
case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
- shadow_mode_disable(d);
- rc = shadow_mode_enable(d, SHM_logdirty);
+ free_shadow_pages(d);
+ rc = __shadow_mode_enable(
+ d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
+ free_shadow_pages(d);
+ rc = __shadow_mode_enable(
+ d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
break;
default:
- rc = shadow_mode(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
+ rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
break;
}
- shadow_unlock(&d->mm);
+ shadow_unlock(d);
+
+ for_each_vcpu(d,v)
+ update_pagetables(v);
domain_unpause(d);
return rc;
}
-static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
+/*
+ * XXX KAF: Why is this VMX specific?
+ */
+void vmx_shadow_clear_state(struct domain *d)
{
- struct pfn_info *page = alloc_domheap_page(NULL);
+ SH_VVLOG("%s:", __func__);
+ shadow_lock(d);
+ free_shadow_pages(d);
+ shadow_unlock(d);
+ update_pagetables(d->vcpu[0]);
+}
- m->shadow_page_count++;
+unsigned long
+gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+{
+ ASSERT( shadow_mode_translate(d) );
- if ( unlikely(page == NULL) )
+ perfc_incrc(gpfn_to_mfn_foreign);
+
+ unsigned long va = gpfn << PAGE_SHIFT;
+ unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
+ l2_pgentry_t *l2 = map_domain_page(tabpfn);
+ l2_pgentry_t l2e = l2[l2_table_offset(va)];
+ unmap_domain_page(l2);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
- printk("Couldn't alloc shadow page! count=%d\n",
- m->shadow_page_count);
- SH_VLOG("Shadow tables l1=%d l2=%d",
- perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages));
- BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
+ d->domain_id, gpfn, l2e_get_intpte(l2e));
+ return INVALID_MFN;
}
+ l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
+ l1_pgentry_t l1e = l1[l1_table_offset(va)];
+ unmap_domain_page(l1);
- return page;
+#if 0
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
+ d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
+#endif
+
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
+ {
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
+ d->domain_id, gpfn, l1e_get_intpte(l1e));
+ return INVALID_MFN;
+ }
+
+ return l1e_get_pfn(l1e);
}
-void unshadow_table(unsigned long gpfn, unsigned int type)
+static unsigned long
+shadow_hl2_table(struct domain *d, unsigned long gpfn, unsigned long gmfn,
+ unsigned long smfn)
{
- unsigned long spfn;
- struct domain *d = frame_table[gpfn].u.inuse.domain;
+ unsigned long hl2mfn;
+ l1_pgentry_t *hl2;
+ int limit;
- SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
+ ASSERT(PGT_base_page_table == PGT_l2_page_table);
- perfc_incrc(unshadow_table_count);
+ if ( unlikely(!(hl2mfn = alloc_shadow_page(d, gpfn, gmfn, PGT_hl2_shadow))) )
+ {
+ printk("Couldn't alloc an HL2 shadow for pfn=%lx mfn=%lx\n",
+ gpfn, gmfn);
+ BUG(); /* XXX Deal gracefully with failure. */
+ }
- /*
- * This function is the same for all p.t. pages. Even for multi-processor
- * guests there won't be a race here as this CPU was the one that
- * cmpxchg'ed the page to invalid.
- */
- spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
- delete_shadow_status(&d->mm, gpfn);
- free_shadow_page(&d->mm, &frame_table[spfn]);
+ SH_VVLOG("shadow_hl2_table(gpfn=%lx, gmfn=%lx, smfn=%lx) => %lx",
+ gpfn, gmfn, smfn, hl2mfn);
+ perfc_incrc(shadow_hl2_table_count);
+
+ hl2 = map_domain_page(hl2mfn);
+
+#ifdef __i386__
+ if ( shadow_mode_external(d) )
+ limit = L2_PAGETABLE_ENTRIES;
+ else
+ limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+#else
+ limit = 0; /* XXX x86/64 XXX */
+#endif
+
+ memset(hl2, 0, limit * sizeof(l1_pgentry_t));
+
+ if ( !shadow_mode_external(d) )
+ {
+ memset(&hl2[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 0,
+ HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+
+ // Setup easy access to the GL2, SL2, and HL2 frames.
+ //
+ hl2[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l1e_from_pfn(gmfn, __PAGE_HYPERVISOR);
+ hl2[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+ l1e_from_pfn(smfn, __PAGE_HYPERVISOR);
+ hl2[l2_table_offset(PERDOMAIN_VIRT_START)] =
+ l1e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
+ }
+
+ unmap_domain_page(hl2);
+
+ return hl2mfn;
}
-unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn)
+/*
+ * This could take and use a snapshot, and validate the entire page at
+ * once, or it could continue to fault in entries one at a time...
+ * Might be worth investigating...
+ */
+static unsigned long shadow_l2_table(
+ struct domain *d, unsigned long gpfn, unsigned long gmfn)
{
- struct pfn_info *spfn_info;
- unsigned long spfn;
- l2_pgentry_t *spl2e;
+ unsigned long smfn;
+ l2_pgentry_t *spl2e;
- SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
+ SH_VVLOG("shadow_l2_table(gpfn=%lx, gmfn=%lx)", gpfn, gmfn);
perfc_incrc(shadow_l2_table_count);
- if ( (spfn_info = alloc_shadow_page(m)) == NULL )
+ if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l2_shadow))) )
+ {
+ printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n",
+ gpfn, gmfn);
BUG(); /* XXX Deal gracefully with failure. */
+ }
- spfn_info->u.inuse.type_info = PGT_l2_page_table;
- perfc_incr(shadow_l2_pages);
+ spl2e = (l2_pgentry_t *)map_domain_page(smfn);
- spfn = spfn_info - frame_table;
+ /* Install hypervisor and 2x linear p.t. mapings. */
+ if ( (PGT_base_page_table == PGT_l2_page_table) &&
+ !shadow_mode_external(d) )
+ {
+ /*
+ * We could proactively fill in PDEs for pages that are already
+ * shadowed *and* where the guest PDE has _PAGE_ACCESSED set
+ * (restriction required for coherence of the accessed bit). However,
+ * we tried it and it didn't help performance. This is simpler.
+ */
+ memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
+
+ /* Install hypervisor and 2x linear p.t. mapings. */
+ memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+
+ spl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
+
+ spl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
+ l2e_from_paddr(__pa(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt),
+ __PAGE_HYPERVISOR);
+
+ if ( shadow_mode_translate(d) ) // NB: not external
+ {
+ unsigned long hl2mfn;
+
+ spl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+ l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
+ __PAGE_HYPERVISOR);
+
+ if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
+ hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
+
+ // shadow_mode_translate (but not external) sl2 tables hold a
+ // ref to their hl2.
+ //
+ if ( !get_shadow_ref(hl2mfn) )
+ BUG();
+
+ spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
+ }
+ else
+ spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(gmfn, __PAGE_HYPERVISOR);
+ }
+ else
+ {
+ memset(spl2e, 0, L2_PAGETABLE_ENTRIES*sizeof(l2_pgentry_t));
+ }
- /* Mark pfn as being shadowed; update field to point at shadow. */
- set_shadow_status(m, gpfn, spfn | PSH_shadowed);
-
- spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
+ unmap_domain_page(spl2e);
- /*
- * We could proactively fill in PDEs for pages that are already shadowed.
- * However, we tried it and it didn't help performance. This is simpler.
- */
- memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+ SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn);
+ return smfn;
+}
-#ifdef __i386__
- /* Install hypervisor and 2x linear p.t. mapings. */
- memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
- spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
- spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
- __PAGE_HYPERVISOR);
+void shadow_map_l1_into_current_l2(unsigned long va)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ l1_pgentry_t *gpl1e, *spl1e;
+ l2_pgentry_t gl2e, sl2e;
+ unsigned long gl1pfn, gl1mfn, sl1mfn;
+ int i, init_table = 0;
+
+ __guest_get_l2e(v, va, &gl2e);
+ ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT);
+ gl1pfn = l2e_get_pfn(gl2e);
+
+ if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) )
+ {
+ /* This L1 is NOT already shadowed so we need to shadow it. */
+ SH_VVLOG("4a: l1 not shadowed");
+
+ gl1mfn = __gpfn_to_mfn(d, gl1pfn);
+ if ( unlikely(!VALID_MFN(gl1mfn)) )
+ {
+ // Attempt to use an invalid pfn as an L1 page.
+ // XXX this needs to be more graceful!
+ BUG();
+ }
+
+ if ( unlikely(!(sl1mfn =
+ alloc_shadow_page(d, gl1pfn, gl1mfn, PGT_l1_shadow))) )
+ {
+ printk("Couldn't alloc an L1 shadow for pfn=%lx mfn=%lx\n",
+ gl1pfn, gl1mfn);
+ BUG(); /* XXX Need to deal gracefully with failure. */
+ }
+
+ perfc_incrc(shadow_l1_table_count);
+ init_table = 1;
+ }
+ else
+ {
+ /* This L1 is shadowed already, but the L2 entry is missing. */
+ SH_VVLOG("4b: was shadowed, l2 missing (%lx)", sl1mfn);
+ }
+
+#ifndef NDEBUG
+ l2_pgentry_t old_sl2e;
+ __shadow_get_l2e(v, va, &old_sl2e);
+ ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) );
#endif
- unmap_domain_mem(spl2e);
+ if ( !get_shadow_ref(sl1mfn) )
+ BUG();
+ l2pde_general(d, &gl2e, &sl2e, sl1mfn);
+ __guest_set_l2e(v, va, gl2e);
+ __shadow_set_l2e(v, va, sl2e);
- SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
- return spfn;
+ if ( init_table )
+ {
+ l1_pgentry_t sl1e;
+ int index = l1_table_offset(va);
+ int min = 1, max = 0;
+
+ gpl1e = &(linear_pg_table[l1_linear_offset(va) &
+ ~(L1_PAGETABLE_ENTRIES-1)]);
+
+ spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
+ ~(L1_PAGETABLE_ENTRIES-1)]);
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
+ if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
+ unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
+ sl1e = l1e_empty();
+ if ( l1e_get_flags(sl1e) == 0 )
+ {
+ // First copy entries from 0 until first invalid.
+ // Then copy entries from index until first invalid.
+ //
+ if ( i < index ) {
+ i = index - 1;
+ continue;
+ }
+ break;
+ }
+ spl1e[i] = sl1e;
+ if ( unlikely(i < min) )
+ min = i;
+ if ( likely(i > max) )
+ max = i;
+ }
+
+ frame_table[sl1mfn].tlbflush_timestamp =
+ SHADOW_ENCODE_MIN_MAX(min, max);
+ }
}
-static void shadow_map_l1_into_current_l2(unsigned long va)
-{
- struct mm_struct *m = &current->mm;
- unsigned long *gpl1e, *spl1e, gpde, spde, gl1pfn, sl1pfn, sl1ss;
- struct pfn_info *sl1pfn_info;
- int i;
+void shadow_invlpg(struct vcpu *v, unsigned long va)
+{
+ struct domain *d = v->domain;
+ l1_pgentry_t gpte, spte;
+
+ ASSERT(shadow_mode_enabled(d));
- gpde = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
+ shadow_lock(d);
- gl1pfn = gpde >> PAGE_SHIFT;
+ __shadow_sync_va(v, va);
- sl1ss = __shadow_status(m, gl1pfn);
- if ( !(sl1ss & PSH_shadowed) )
+ // XXX mafetter: will need to think about 4MB pages...
+
+ // It's not strictly necessary to update the shadow here,
+ // but it might save a fault later.
+ //
+ if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT],
+ sizeof(gpte))) {
+ perfc_incrc(shadow_invlpg_faults);
+ return;
+ }
+ l1pte_propagate_from_guest(d, gpte, &spte);
+ shadow_set_l1e(va, spte, 1);
+
+ shadow_unlock(d);
+}
+
+struct out_of_sync_entry *
+shadow_alloc_oos_entry(struct domain *d)
+{
+ struct out_of_sync_entry *f, *extra;
+ unsigned size, i;
+
+ if ( unlikely(d->arch.out_of_sync_free == NULL) )
{
- /* This L1 is NOT already shadowed so we need to shadow it. */
- SH_VVLOG("4a: l1 not shadowed ( %08lx )", sl1pfn);
+ FSH_LOG("Allocate more fullshadow tuple blocks.");
- sl1pfn_info = alloc_shadow_page(m);
- sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
-
- sl1pfn = sl1pfn_info - frame_table;
+ size = sizeof(void *) + (out_of_sync_extra_size * sizeof(*f));
+ extra = xmalloc_bytes(size);
- perfc_incrc(shadow_l1_table_count);
- perfc_incr(shadow_l1_pages);
+ /* XXX Should be more graceful here. */
+ if ( extra == NULL )
+ BUG();
+
+ memset(extra, 0, size);
+
+ /* Record the allocation block so it can be correctly freed later. */
+ d->arch.out_of_sync_extras_count++;
+ *((struct out_of_sync_entry **)&extra[out_of_sync_extra_size]) =
+ d->arch.out_of_sync_extras;
+ d->arch.out_of_sync_extras = &extra[0];
- set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
+ /* Thread a free chain through the newly-allocated nodes. */
+ for ( i = 0; i < (out_of_sync_extra_size - 1); i++ )
+ extra[i].next = &extra[i+1];
+ extra[i].next = NULL;
- l2pde_general(m, &gpde, &spde, sl1pfn);
+ /* Add the new nodes to the free list. */
+ d->arch.out_of_sync_free = &extra[0];
+ }
- linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(spde);
+ /* Allocate a new node from the quicklist. */
+ f = d->arch.out_of_sync_free;
+ d->arch.out_of_sync_free = f->next;
- gpl1e = (unsigned long *) &(linear_pg_table[
- (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
+ return f;
+}
- spl1e = (unsigned long *) &(shadow_linear_pg_table[
- (va>>L1_PAGETABLE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
+static inline unsigned long
+shadow_make_snapshot(
+ struct domain *d, unsigned long gpfn, unsigned long gmfn)
+{
+ unsigned long smfn, sl1mfn = 0;
+ void *original, *snapshot;
+ u32 min_max = 0;
+ int min, max, length;
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- l1pte_propagate_from_guest(m, &gpl1e[i], &spl1e[i]);
+ if ( test_and_set_bit(_PGC_out_of_sync, &frame_table[gmfn].count_info) )
+ {
+ ASSERT(__shadow_status(d, gpfn, PGT_snapshot));
+ return SHADOW_SNAPSHOT_ELSEWHERE;
}
- else
+
+ perfc_incrc(shadow_make_snapshot);
+
+ if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_snapshot))) )
{
- /* This L1 is shadowed already, but the L2 entry is missing. */
- SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
+ printk("Couldn't alloc fullshadow snapshot for pfn=%lx mfn=%lx!\n"
+ "Dom%d snapshot_count_count=%d\n",
+ gpfn, gmfn, d->domain_id, d->arch.snapshot_page_count);
+ BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ }
+
+ if ( !get_shadow_ref(smfn) )
+ BUG();
+
+ if ( shadow_mode_refcounts(d) &&
+ (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
+ min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
+ pfn_to_page(smfn)->tlbflush_timestamp = min_max;
- sl1pfn = sl1ss & PSH_pfn_mask;
- l2pde_general(m, &gpde, &spde, sl1pfn);
+ min = SHADOW_MIN(min_max);
+ max = SHADOW_MAX(min_max);
+ length = max - min + 1;
+ perfc_incr_histo(snapshot_copies, length, PT_UPDATES);
- linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
- }
+ min *= sizeof(l1_pgentry_t);
+ length *= sizeof(l1_pgentry_t);
+
+ original = map_domain_page(gmfn);
+ snapshot = map_domain_page(smfn);
+ memcpy(snapshot + min, original + min, length);
+ unmap_domain_page(original);
+ unmap_domain_page(snapshot);
+
+ return smfn;
}
-int shadow_fault(unsigned long va, long error_code)
+static void
+shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
{
- unsigned long gpte, spte;
- struct mm_struct *m = &current->mm;
+ void *snapshot;
- SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
+ if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
+ return;
- check_pagetable(m, current->mm.pagetable, "pre-sf");
+ // Clear the out_of_sync bit.
+ //
+ clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
- /*
- * STEP 1. A fast-reject set of checks with no locking.
- */
+ // XXX Need to think about how to protect the domain's
+ // information less expensively.
+ //
+ snapshot = map_domain_page(entry->snapshot_mfn);
+ memset(snapshot, 0, PAGE_SIZE);
+ unmap_domain_page(snapshot);
+
+ put_shadow_ref(entry->snapshot_mfn);
+}
- if ( unlikely(__get_user(gpte, (unsigned long *)
- &linear_pg_table[va >> PAGE_SHIFT])) )
+struct out_of_sync_entry *
+shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
+ unsigned long mfn)
+{
+ struct domain *d = v->domain;
+ struct pfn_info *page = &frame_table[mfn];
+ struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d);
+
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(pfn_valid(mfn));
+
+#ifndef NDEBUG
+ u32 type = page->u.inuse.type_info & PGT_type_mask;
+ if ( shadow_mode_refcounts(d) )
{
- SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
- return 0;
+ ASSERT(type == PGT_writable_page);
}
+ else
+ {
+ ASSERT(type && (type < PGT_l4_page_table));
+ }
+#endif
+
+ FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
+ gpfn, mfn, page->count_info, page->u.inuse.type_info);
+
+ // XXX this will require some more thought... Cross-domain sharing and
+ // modification of page tables? Hmm...
+ //
+ if ( d != page_get_owner(page) )
+ BUG();
+
+ perfc_incrc(shadow_mark_mfn_out_of_sync_calls);
- if ( !(gpte & _PAGE_PRESENT) )
+ entry->gpfn = gpfn;
+ entry->gmfn = mfn;
+ entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
+ entry->writable_pl1e = -1;
+
+#if SHADOW_DEBUG
+ mark_shadows_as_reflecting_snapshot(d, gpfn);
+#endif
+
+ // increment guest's ref count to represent the entry in the
+ // full shadow out-of-sync list.
+ //
+ get_page(page, d);
+
+ // Add to the out-of-sync list
+ //
+ entry->next = d->arch.out_of_sync;
+ d->arch.out_of_sync = entry;
+
+ return entry;
+}
+
+void shadow_mark_va_out_of_sync(
+ struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long va)
+{
+ struct out_of_sync_entry *entry =
+ shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
+ l2_pgentry_t sl2e;
+
+ // We need the address of shadow PTE that maps @va.
+ // It might not exist yet. Make sure it's there.
+ //
+ __shadow_get_l2e(v, va, &sl2e);
+ if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
{
- SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ // either this L1 isn't shadowed yet, or the shadow isn't linked into
+ // the current L2.
+ shadow_map_l1_into_current_l2(va);
+ __shadow_get_l2e(v, va, &sl2e);
+ }
+ ASSERT(l2e_get_flags(sl2e) & _PAGE_PRESENT);
+
+ // NB: this is stored as a machine address.
+ entry->writable_pl1e =
+ l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * l1_table_offset(va));
+ ASSERT( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) );
+
+ // Increment shadow's page count to represent the reference
+ // inherent in entry->writable_pl1e
+ //
+ if ( !get_shadow_ref(l2e_get_pfn(sl2e)) )
+ BUG();
+
+ FSH_LOG("mark_out_of_sync(va=%lx -> writable_pl1e=%lx)",
+ va, entry->writable_pl1e);
+}
+
+/*
+ * Returns 1 if the snapshot for @gmfn exists and its @index'th entry matches.
+ * Returns 0 otherwise.
+ */
+static int snapshot_entry_matches(
+ struct domain *d, l1_pgentry_t *guest_pt,
+ unsigned long gpfn, unsigned index)
+{
+ unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot);
+ l1_pgentry_t *snapshot; // could be L1s or L2s or ...
+ int entries_match;
+
+ perfc_incrc(snapshot_entry_matches_calls);
+
+ if ( !smfn )
+ return 0;
+
+ snapshot = map_domain_page(smfn);
+
+ // This could probably be smarter, but this is sufficent for
+ // our current needs.
+ //
+ entries_match = !l1e_has_changed(guest_pt[index], snapshot[index],
+ PAGE_FLAG_MASK);
+
+ unmap_domain_page(snapshot);
+
+#ifdef PERF_COUNTERS
+ if ( entries_match )
+ perfc_incrc(snapshot_entry_matches_true);
+#endif
+
+ return entries_match;
+}
+
+/*
+ * Returns 1 if va's shadow mapping is out-of-sync.
+ * Returns 0 otherwise.
+ */
+int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
+{
+ struct domain *d = v->domain;
+ unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table);
+ unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn);
+ l2_pgentry_t l2e;
+ unsigned long l1pfn, l1mfn;
+
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(VALID_M2P(l2pfn));
+
+ perfc_incrc(shadow_out_of_sync_calls);
+
+ if ( page_out_of_sync(&frame_table[l2mfn]) &&
+ !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable,
+ l2pfn, l2_table_offset(va)) )
+ return 1;
+
+ __guest_get_l2e(v, va, &l2e);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
return 0;
+
+ l1pfn = l2e_get_pfn(l2e);
+ l1mfn = __gpfn_to_mfn(d, l1pfn);
+
+ // If the l1 pfn is invalid, it can't be out of sync...
+ if ( !VALID_MFN(l1mfn) )
+ return 0;
+
+ if ( page_out_of_sync(&frame_table[l1mfn]) &&
+ !snapshot_entry_matches(
+ d, &linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)],
+ l1pfn, l1_table_offset(va)) )
+ return 1;
+
+ return 0;
+}
+
+#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
+static inline unsigned long
+predict_writable_pte_page(struct domain *d, unsigned long gpfn)
+{
+ return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
+}
+
+static inline void
+increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
+{
+ unsigned long score = prediction & PGT_score_mask;
+ int create = (score == 0);
+
+ // saturating addition
+ score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
+ score = score ? score : PGT_score_mask;
+
+ prediction = (prediction & PGT_mfn_mask) | score;
+
+ //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+
+ if ( create )
+ perfc_incr(writable_pte_predictions);
+}
+
+static inline void
+decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
+{
+ unsigned long score = prediction & PGT_score_mask;
+ ASSERT(score);
+
+ // divide score by 2... We don't like bad predictions.
+ //
+ score = (score >> 1) & PGT_score_mask;
+
+ prediction = (prediction & PGT_mfn_mask) | score;
+
+ //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
+
+ if ( score )
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ else
+ {
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ perfc_decr(writable_pte_predictions);
+ }
+}
+
+static void
+free_writable_pte_predictions(struct domain *d)
+{
+ int i;
+ struct shadow_status *x;
+
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ u32 count;
+ unsigned long *gpfn_list;
+
+ /* Skip empty buckets. */
+ if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
+ continue;
+
+ count = 0;
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
+ if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
+ count++;
+
+ gpfn_list = xmalloc_array(unsigned long, count);
+ count = 0;
+ for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
+ if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
+ gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
+
+ while ( count )
+ {
+ count--;
+ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ }
+
+ xfree(gpfn_list);
+ }
+}
+
+static u32 remove_all_write_access_in_ptpage(
+ struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
+ unsigned long readonly_gpfn, unsigned long readonly_gmfn,
+ u32 max_refs_to_find, unsigned long prediction)
+{
+ l1_pgentry_t *pt = map_domain_page(pt_mfn);
+ l1_pgentry_t match;
+ unsigned long flags = _PAGE_RW | _PAGE_PRESENT;
+ int i;
+ u32 found = 0;
+ int is_l1_shadow =
+ ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
+ PGT_l1_shadow);
+
+ match = l1e_from_pfn(readonly_gmfn, flags);
+
+ // returns true if all refs have been found and fixed.
+ //
+ int fix_entry(int i)
+ {
+ l1_pgentry_t old = pt[i];
+ l1_pgentry_t new = old;
+
+ l1e_remove_flags(new,_PAGE_RW);
+ if ( is_l1_shadow && !shadow_get_page_from_l1e(new, d) )
+ BUG();
+ found++;
+ pt[i] = new;
+ if ( is_l1_shadow )
+ shadow_put_page_from_l1e(old, d);
+
+#if 0
+ printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
+ "is_l1_shadow=%d\n",
+ readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
+#endif
+
+ return (found == max_refs_to_find);
+ }
+
+ i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1);
+ if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) )
+ {
+ perfc_incrc(remove_write_fast_exit);
+ increase_writable_pte_prediction(d, readonly_gpfn, prediction);
+ unmap_domain_page(pt);
+ return found;
+ }
+
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+ {
+ if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) )
+ break;
+ }
+
+ unmap_domain_page(pt);
+
+ return found;
+#undef MATCH_ENTRY
+}
+
+int shadow_remove_all_write_access(
+ struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
+{
+ int i;
+ struct shadow_status *a;
+ u32 found = 0, fixups, write_refs;
+ unsigned long prediction, predicted_gpfn, predicted_smfn;
+
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(VALID_MFN(readonly_gmfn));
+
+ perfc_incrc(remove_write_access);
+
+ // If it's not a writable page, then no writable refs can be outstanding.
+ //
+ if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
+ PGT_writable_page )
+ {
+ perfc_incrc(remove_write_not_writable);
+ return 1;
+ }
+
+ // How many outstanding writable PTEs for this page are there?
+ //
+ write_refs =
+ (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
+ if ( write_refs && MFN_PINNED(readonly_gmfn) )
+ {
+ write_refs--;
+ }
+
+ if ( write_refs == 0 )
+ {
+ perfc_incrc(remove_write_no_work);
+ return 1;
+ }
+
+ // Before searching all the L1 page tables, check the typical culprit first
+ //
+ if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
+ {
+ predicted_gpfn = prediction & PGT_mfn_mask;
+ if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
+ (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
+ {
+ found += fixups;
+ if ( found == write_refs )
+ {
+ perfc_incrc(remove_write_predicted);
+ return 1;
+ }
+ }
+ else
+ {
+ perfc_incrc(remove_write_bad_prediction);
+ decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
+ }
}
- if ( (error_code & 2) && !(gpte & _PAGE_RW) )
+ // Search all the shadow L1 page tables...
+ //
+ for (i = 0; i < shadow_ht_buckets; i++)
{
- /* Write fault on a read-only mapping. */
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
+ {
+ found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
+ if ( found == write_refs )
+ return 1;
+ }
+
+ a = a->next;
+ }
+ }
+
+ FSH_LOG("%s: looking for %d refs, found %d refs",
+ __func__, write_refs, found);
+
+ return 0;
+}
+
+static u32 remove_all_access_in_page(
+ struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
+{
+ l1_pgentry_t *pl1e = map_domain_page(l1mfn);
+ l1_pgentry_t match;
+ unsigned long flags = _PAGE_PRESENT;
+ int i;
+ u32 count = 0;
+ int is_l1_shadow =
+ ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
+ PGT_l1_shadow);
+
+ match = l1e_from_pfn(forbidden_gmfn, flags);
+
+ for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
+ {
+ if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
+ {
+ l1_pgentry_t ol2e = pl1e[i];
+ pl1e[i] = l1e_empty();
+ count++;
+
+ if ( is_l1_shadow )
+ shadow_put_page_from_l1e(ol2e, d);
+ else /* must be an hl2 page */
+ put_page(&frame_table[forbidden_gmfn]);
+ }
+ }
+
+ unmap_domain_page(pl1e);
+
+ return count;
+}
+
+u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
+{
+ int i;
+ struct shadow_status *a;
+ u32 count = 0;
+
+ if ( unlikely(!shadow_mode_enabled(d)) )
return 0;
+
+ ASSERT(shadow_lock_is_acquired(d));
+ perfc_incrc(remove_all_access);
+
+ for (i = 0; i < shadow_ht_buckets; i++)
+ {
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ switch (a->gpfn_and_flags & PGT_type_mask)
+ {
+ case PGT_l1_shadow:
+ case PGT_l2_shadow:
+ case PGT_l3_shadow:
+ case PGT_l4_shadow:
+ case PGT_hl2_shadow:
+ count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ // these can't hold refs to the forbidden page
+ break;
+ default:
+ BUG();
+ }
+
+ a = a->next;
+ }
}
+ return count;
+}
+
+static int resync_all(struct domain *d, u32 stype)
+{
+ struct out_of_sync_entry *entry;
+ unsigned i;
+ unsigned long smfn;
+ void *guest, *shadow, *snapshot;
+ int need_flush = 0, external = shadow_mode_external(d);
+ int unshadow;
+ int changed;
+
+ ASSERT(shadow_lock_is_acquired(d));
+
+ for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
+ {
+ if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
+ continue;
+
+ smfn = __shadow_status(d, entry->gpfn, stype);
+
+ if ( !smfn )
+ {
+ if ( shadow_mode_refcounts(d) )
+ continue;
+
+ // For light weight shadows, even when no shadow page exists,
+ // we need to resync the refcounts to the new contents of the
+ // guest page.
+ // This only applies when we have writable page tables.
+ //
+ if ( !shadow_mode_write_all(d) &&
+ !((stype == PGT_l1_shadow) &&
+ VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+ // Page is not writable -- no resync necessary
+ continue;
+ }
+
+ FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
+ stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
+
+ // Compare guest's new contents to its snapshot, validating
+ // and updating its shadow as appropriate.
+ //
+ guest = map_domain_page(entry->gmfn);
+ snapshot = map_domain_page(entry->snapshot_mfn);
+
+ if ( smfn )
+ shadow = map_domain_page(smfn);
+ else
+ shadow = NULL;
+
+ unshadow = 0;
+
+ switch ( stype ) {
+ case PGT_l1_shadow:
+ {
+ l1_pgentry_t *guest1 = guest;
+ l1_pgentry_t *shadow1 = shadow;
+ l1_pgentry_t *snapshot1 = snapshot;
+
+ ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) ||
+ shadow_mode_write_all(d));
+
+ if ( !shadow_mode_refcounts(d) )
+ revalidate_l1(d, guest1, snapshot1);
+
+ if ( !smfn )
+ break;
+
+ u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
+ int min_shadow = SHADOW_MIN(min_max_shadow);
+ int max_shadow = SHADOW_MAX(min_max_shadow);
+
+ u32 min_max_snapshot =
+ pfn_to_page(entry->snapshot_mfn)->tlbflush_timestamp;
+ int min_snapshot = SHADOW_MIN(min_max_snapshot);
+ int max_snapshot = SHADOW_MAX(min_max_snapshot);
+
+ changed = 0;
+
+ for ( i = min_shadow; i <= max_shadow; i++ )
+ {
+ if ( (i < min_snapshot) || (i > max_snapshot) ||
+ l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) )
+ {
+ need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]);
+
+ // can't update snapshots of linear page tables -- they
+ // are used multiple times...
+ //
+ // snapshot[i] = new_pte;
+
+ changed++;
+ }
+ }
+ perfc_incrc(resync_l1);
+ perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
+ perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
+ break;
+ }
+ case PGT_l2_shadow:
+ {
+ int max = -1;
+
+ l2_pgentry_t *guest2 = guest;
+ l2_pgentry_t *shadow2 = shadow;
+ l2_pgentry_t *snapshot2 = snapshot;
+
+ ASSERT(shadow_mode_write_all(d));
+ BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
+
+ changed = 0;
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0,i) && !external )
+ continue;
+
+ l2_pgentry_t new_pde = guest2[i];
+ if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK))
+ {
+ need_flush |= validate_pde_change(d, new_pde, &shadow2[i]);
+
+ // can't update snapshots of linear page tables -- they
+ // are used multiple times...
+ //
+ // snapshot[i] = new_pde;
+
+ changed++;
+ }
+ if ( l2e_get_intpte(new_pde) != 0 ) /* FIXME: check flags? */
+ max = i;
+
+ // XXX - This hack works for linux guests.
+ // Need a better solution long term.
+ if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
+ unlikely(l2e_get_intpte(new_pde) != 0) &&
+ !unshadow && MFN_PINNED(smfn) )
+ unshadow = 1;
+ }
+ if ( max == -1 )
+ unshadow = 1;
+ perfc_incrc(resync_l2);
+ perfc_incr_histo(shm_l2_updates, changed, PT_UPDATES);
+ break;
+ }
+ case PGT_hl2_shadow:
+ {
+ l2_pgentry_t *guest2 = guest;
+ l2_pgentry_t *snapshot2 = snapshot;
+ l1_pgentry_t *shadow2 = shadow;
+
+ ASSERT(shadow_mode_write_all(d));
+ BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
+
+ changed = 0;
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0, i) && !external )
+ continue;
+
+ l2_pgentry_t new_pde = guest2[i];
+ if ( l2e_has_changed(new_pde, snapshot2[i], PAGE_FLAG_MASK) )
+ {
+ need_flush |= validate_hl2e_change(d, new_pde, &shadow2[i]);
+
+ // can't update snapshots of linear page tables -- they
+ // are used multiple times...
+ //
+ // snapshot[i] = new_pde;
+
+ changed++;
+ }
+ }
+ perfc_incrc(resync_hl2);
+ perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES);
+ break;
+ }
+ default:
+ BUG();
+ }
+
+ if ( smfn )
+ unmap_domain_page(shadow);
+ unmap_domain_page(snapshot);
+ unmap_domain_page(guest);
+
+ if ( unlikely(unshadow) )
+ {
+ perfc_incrc(unshadow_l2_count);
+ shadow_unpin(smfn);
+ if ( unlikely(shadow_mode_external(d)) )
+ {
+ unsigned long hl2mfn;
+
+ if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
+ MFN_PINNED(hl2mfn) )
+ shadow_unpin(hl2mfn);
+ }
+ }
+ }
+
+ return need_flush;
+}
+
+void __shadow_sync_all(struct domain *d)
+{
+ struct out_of_sync_entry *entry;
+ int need_flush = 0;
+
+ perfc_incrc(shadow_sync_all);
+
+ ASSERT(shadow_lock_is_acquired(d));
+
+ // First, remove all write permissions to the page tables
+ //
+ for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
+ {
+ // Skip entries that have low bits set... Those aren't
+ // real PTEs.
+ //
+ if ( entry->writable_pl1e & (sizeof(l1_pgentry_t)-1) )
+ continue;
+
+ l1_pgentry_t *ppte = (l1_pgentry_t *)(
+ (char *)map_domain_page(entry->writable_pl1e >> PAGE_SHIFT) +
+ (entry->writable_pl1e & ~PAGE_MASK));
+ l1_pgentry_t opte = *ppte;
+ l1_pgentry_t npte = opte;
+ l1e_remove_flags(npte, _PAGE_RW);
+
+ if ( (l1e_get_flags(npte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(npte, d) )
+ BUG();
+ *ppte = npte;
+ shadow_put_page_from_l1e(opte, d);
+
+ unmap_domain_page(ppte);
+ }
+
+ // XXX mafetter: SMP
+ //
+ // With the current algorithm, we've gotta flush all the TLBs
+ // before we can safely continue. I don't think we want to
+ // do it this way, so I think we should consider making
+ // entirely private copies of the shadow for each vcpu, and/or
+ // possibly having a mix of private and shared shadow state
+ // (any path from a PTE that grants write access to an out-of-sync
+ // page table page needs to be vcpu private).
+ //
+#if 0 // this should be enabled for SMP guests...
+ flush_tlb_mask(cpu_online_map);
+#endif
+ need_flush = 1;
+
+ // Second, resync all L1 pages, then L2 pages, etc...
+ //
+ need_flush |= resync_all(d, PGT_l1_shadow);
+ if ( shadow_mode_translate(d) )
+ need_flush |= resync_all(d, PGT_hl2_shadow);
+ need_flush |= resync_all(d, PGT_l2_shadow);
+
+ if ( need_flush && !unlikely(shadow_mode_external(d)) )
+ local_flush_tlb();
+
+ free_out_of_sync_state(d);
+}
+
+int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
+{
+ l1_pgentry_t gpte, spte, orig_gpte;
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ l2_pgentry_t gpde;
+
+ spte = l1e_empty();
+
+ SH_VVLOG("shadow_fault( va=%lx, code=%lu )",
+ va, (unsigned long)regs->error_code);
+ perfc_incrc(shadow_fault_calls);
+
+ check_pagetable(v, "pre-sf");
+
/*
- * STEP 2. Take the shadow lock and re-check the guest PTE.
+ * Don't let someone else take the guest's table pages out-of-sync.
*/
+ shadow_lock(d);
- shadow_lock(m);
-
- if ( unlikely(__get_user(gpte, (unsigned long *)
- &linear_pg_table[va >> PAGE_SHIFT])) )
+ /* XXX - FIX THIS COMMENT!!!
+ * STEP 1. Check to see if this fault might have been caused by an
+ * out-of-sync table page entry, or if we should pass this
+ * fault onto the guest.
+ */
+ __shadow_sync_va(v, va);
+
+ /*
+ * STEP 2. Check the guest PTE.
+ */
+ __guest_get_l2e(v, va, &gpde);
+ if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
{
- SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+ SH_VVLOG("shadow_fault - EXIT: L1 not present");
+ perfc_incrc(shadow_fault_bail_pde_not_present);
goto fail;
}
- if ( unlikely(!(gpte & _PAGE_PRESENT)) )
+ // This can't fault because we hold the shadow lock and we've ensured that
+ // the mapping is in-sync, so the check of the PDE's present bit, above,
+ // covers this access.
+ //
+ orig_gpte = gpte = linear_pg_table[l1_linear_offset(va)];
+ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_PRESENT)) )
{
- SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ SH_VVLOG("shadow_fault - EXIT: gpte not present (%" PRIpte ")",
+ l1e_get_intpte(gpte));
+ perfc_incrc(shadow_fault_bail_pte_not_present);
goto fail;
}
/* Write fault? */
- if ( error_code & 2 )
+ if ( regs->error_code & 2 )
{
- if ( unlikely(!(gpte & _PAGE_RW)) )
+ int allow_writes = 0;
+
+ if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
{
- /* Write fault on a read-only mapping. */
- SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
- goto fail;
+ if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
+ {
+ allow_writes = 1;
+ l1e_add_flags(gpte, _PAGE_RW);
+ }
+ else
+ {
+ /* Write fault on a read-only mapping. */
+ SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")",
+ l1e_get_intpte(gpte));
+ perfc_incrc(shadow_fault_bail_ro_mapping);
+ goto fail;
+ }
}
- l1pte_write_fault(m, &gpte, &spte);
+ if ( !l1pte_write_fault(v, &gpte, &spte, va) )
+ {
+ SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
+ perfc_incrc(write_fault_bail);
+ shadow_unlock(d);
+ return 0;
+ }
+
+ if ( allow_writes )
+ l1e_remove_flags(gpte, _PAGE_RW);
}
else
{
- l1pte_read_fault(m, &gpte, &spte);
+ if ( !l1pte_read_fault(d, &gpte, &spte) )
+ {
+ SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
+ perfc_incrc(read_fault_bail);
+ shadow_unlock(d);
+ return 0;
+ }
}
/*
* STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
*/
-
- /* XXX Watch out for read-only L2 entries! (not used in Linux). */
- if ( unlikely(__put_user(gpte, (unsigned long *)
- &linear_pg_table[va >> PAGE_SHIFT])) )
+ if ( l1e_has_changed(orig_gpte, gpte, PAGE_FLAG_MASK) )
{
- domain_crash();
- goto fail;
- }
+ /* XXX Watch out for read-only L2 entries! (not used in Linux). */
+ if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
+ &gpte, sizeof(gpte))) )
+ {
+ printk("%s() failed, crashing domain %d "
+ "due to a read-only L2 page table (gpde=%" PRIpte "), va=%lx\n",
+ __func__,d->domain_id, l2e_get_intpte(gpde), va);
+ domain_crash_synchronous();
+ }
- /*
- * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
- * or because the shadow isn't linked into this shadow L2 p.t.
- */
- if ( unlikely(__put_user(spte, (unsigned long *)
- &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
- {
- SH_VVLOG("3: not shadowed/mapped gpte=%08lx spte=%08lx", gpte, spte);
- shadow_map_l1_into_current_l2(va);
- shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
+ // if necessary, record the page table page as dirty
+ if ( unlikely(shadow_mode_log_dirty(d)) )
+ __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
}
- perfc_incrc(shadow_fixup_count);
- m->shadow_fault_count++;
+ shadow_set_l1e(va, spte, 1);
- shadow_unlock(m);
+ perfc_incrc(shadow_fault_fixed);
+ d->arch.shadow_fault_count++;
- check_pagetable(m, current->mm.pagetable, "post-sf");
+ shadow_unlock(d);
+
+ check_pagetable(v, "post-sf");
return EXCRET_fault_fixed;
fail:
- shadow_unlock(m);
+ shadow_unlock(d);
return 0;
}
-
void shadow_l1_normal_pt_update(
- unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr)
+ struct domain *d,
+ unsigned long pa, l1_pgentry_t gpte,
+ struct domain_mmap_cache *cache)
{
- unsigned long spfn, spte, prev_spfn = *prev_spfn_ptr;
- l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
+ unsigned long sl1mfn;
+ l1_pgentry_t *spl1e, spte;
- /* N.B. To get here, we know the l1 page *must* be shadowed. */
- SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, "
- "prev_spfn=%08lx, prev_spl1e=%p\n",
- pa, gpte, prev_spfn, prev_spl1e);
+ shadow_lock(d);
- spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
-
- if ( spfn == prev_spfn )
+ sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
+ if ( sl1mfn )
{
- spl1e = prev_spl1e;
+ SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
+ (void *)pa, l1e_get_intpte(gpte));
+ l1pte_propagate_from_guest(current->domain, gpte, &spte);
+
+ spl1e = map_domain_page_with_cache(sl1mfn, cache);
+ spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
+ unmap_domain_page_with_cache(spl1e, cache);
}
- else
+
+ shadow_unlock(d);
+}
+
+void shadow_l2_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l2_pgentry_t gpde,
+ struct domain_mmap_cache *cache)
+{
+ unsigned long sl2mfn;
+ l2_pgentry_t *spl2e;
+
+ shadow_lock(d);
+
+ sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
+ if ( sl2mfn )
{
- if ( prev_spl1e != NULL )
- unmap_domain_mem( prev_spl1e );
- spl1e = (l1_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
- *prev_spfn_ptr = spfn;
- *prev_spl1e_ptr = spl1e;
+ SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
+ (void *)pa, l2e_get_intpte(gpde));
+ spl2e = map_domain_page_with_cache(sl2mfn, cache);
+ validate_pde_change(d, gpde,
+ &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
+ unmap_domain_page_with_cache(spl2e, cache);
}
- l1pte_propagate_from_guest(&current->mm, &gpte, &spte);
- spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
+ shadow_unlock(d);
}
-void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
+#if CONFIG_PAGING_LEVELS >= 3
+void shadow_l3_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l3_pgentry_t gpde,
+ struct domain_mmap_cache *cache)
{
- unsigned long spfn, spte;
- l2_pgentry_t *spl2e;
- unsigned long s_sh;
+ BUG(); // not yet implemented
+}
+#endif
- /* N.B. To get here, we know the l2 page *must* be shadowed. */
- SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
+#if CONFIG_PAGING_LEVELS >= 4
+void shadow_l4_normal_pt_update(
+ struct domain *d,
+ unsigned long pa, l4_pgentry_t gpde,
+ struct domain_mmap_cache *cache)
+{
+ BUG(); // not yet implemented
+}
+#endif
+
+int shadow_do_update_va_mapping(unsigned long va,
+ l1_pgentry_t val,
+ struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ l1_pgentry_t spte;
+ int rc = 0;
+
+ shadow_lock(d);
+
+ //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val));
+
+ // This is actually overkill - we don't need to sync the L1 itself,
+ // just everything involved in getting to this L1 (i.e. we need
+ // linear_pg_table[l1_linear_offset(va)] to be in sync)...
+ //
+ __shadow_sync_va(v, va);
- spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
+ l1pte_propagate_from_guest(d, val, &spte);
+ shadow_set_l1e(va, spte, 0);
- s_sh = (gpte & _PAGE_PRESENT) ?
- __shadow_status(&current->mm, gpte >> PAGE_SHIFT) : 0;
+ /*
+ * If we're in log-dirty mode then we need to note that we've updated
+ * the PTE in the PT-holding page. We need the machine frame number
+ * for this.
+ */
+ if ( shadow_mode_log_dirty(d) )
+ __mark_dirty(d, va_to_l1mfn(v, va));
- /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
- l2pde_general(&current->mm, &gpte, &spte, s_sh);
- spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
- spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spte);
- unmap_domain_mem(spl2e);
+// out:
+ shadow_unlock(d);
+
+ return rc;
}
+/*
+ * What lives where in the 32-bit address space in the various shadow modes,
+ * and what it uses to get/maintain that mapping.
+ *
+ * SHADOW MODE: none enable translate external
+ *
+ * 4KB things:
+ * guest_vtable lin_l2 mapped per gl2 lin_l2 via hl2 mapped per gl2
+ * shadow_vtable n/a sh_lin_l2 sh_lin_l2 mapped per gl2
+ * hl2_vtable n/a n/a lin_hl2 via hl2 mapped per gl2
+ * monitor_vtable n/a n/a n/a mapped once
+ *
+ * 4MB things:
+ * guest_linear lin via gl2 lin via gl2 lin via hl2 lin via hl2
+ * shadow_linear n/a sh_lin via sl2 sh_lin via sl2 sh_lin via sl2
+ * monitor_linear n/a n/a n/a ???
+ * perdomain perdomain perdomain perdomain perdomain
+ * R/O M2P R/O M2P R/O M2P n/a n/a
+ * R/W M2P R/W M2P R/W M2P R/W M2P R/W M2P
+ * P2M n/a n/a R/O M2P R/O M2P
+ *
+ * NB:
+ * update_pagetables(), __update_pagetables(), shadow_mode_enable(),
+ * shadow_l2_table(), shadow_hl2_table(), and alloc_monitor_pagetable()
+ * all play a part in maintaining these mappings.
+ */
+void __update_pagetables(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ unsigned long gmfn = pagetable_get_pfn(v->arch.guest_table);
+ unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
+ unsigned long smfn, hl2mfn, old_smfn;
+
+ int max_mode = ( shadow_mode_external(d) ? SHM_external
+ : shadow_mode_translate(d) ? SHM_translate
+ : shadow_mode_enabled(d) ? SHM_enable
+ : 0 );
+
+ ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
+ ASSERT( max_mode );
+
+ /*
+ * arch.guest_vtable
+ */
+ if ( max_mode & (SHM_enable | SHM_external) )
+ {
+ if ( likely(v->arch.guest_vtable != NULL) )
+ unmap_domain_page(v->arch.guest_vtable);
+ v->arch.guest_vtable = map_domain_page(gmfn);
+ }
+
+ /*
+ * arch.shadow_table
+ */
+ if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
+ smfn = shadow_l2_table(d, gpfn, gmfn);
+ if ( !get_shadow_ref(smfn) )
+ BUG();
+ old_smfn = pagetable_get_pfn(v->arch.shadow_table);
+ v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
+ if ( old_smfn )
+ put_shadow_ref(old_smfn);
+
+ SH_VVLOG("__update_pagetables(gmfn=%lx, smfn=%lx)", gmfn, smfn);
+
+ /*
+ * arch.shadow_vtable
+ */
+ if ( max_mode == SHM_external )
+ {
+ if ( v->arch.shadow_vtable )
+ unmap_domain_page(v->arch.shadow_vtable);
+ v->arch.shadow_vtable = map_domain_page(smfn);
+ }
+
+ /*
+ * arch.hl2_vtable
+ */
+
+ // if max_mode == SHM_translate, then the hl2 is already installed
+ // correctly in its smfn, and there's nothing to do.
+ //
+ if ( max_mode == SHM_external )
+ {
+ if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
+ hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
+ if ( v->arch.hl2_vtable )
+ unmap_domain_page(v->arch.hl2_vtable);
+ v->arch.hl2_vtable = map_domain_page(hl2mfn);
+ }
+
+ /*
+ * fixup pointers in monitor table, as necessary
+ */
+ if ( max_mode == SHM_external )
+ {
+ l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
+ l2_pgentry_t old_hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
+ l2_pgentry_t old_sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
+
+ ASSERT( shadow_mode_translate(d) );
+
+ if ( !get_shadow_ref(hl2mfn) )
+ BUG();
+ mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(hl2mfn, __PAGE_HYPERVISOR);
+ if ( l2e_get_flags(old_hl2e) & _PAGE_PRESENT )
+ put_shadow_ref(l2e_get_pfn(old_hl2e));
+
+ if ( !get_shadow_ref(smfn) )
+ BUG();
+ mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
+ if ( l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
+ put_shadow_ref(l2e_get_pfn(old_sl2e));
+
+ // XXX - maybe this can be optimized somewhat??
+ local_flush_tlb();
+ }
+}
/************************************************************************/
@@ -706,56 +2913,173 @@ void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
#if SHADOW_DEBUG
+// The following is entirely for _check_pagetable()'s benefit.
+// _check_pagetable() wants to know whether a given entry in a
+// shadow page table is supposed to be the shadow of the guest's
+// current entry, or the shadow of the entry held in the snapshot
+// taken above.
+//
+// Here, we mark all currently existing entries as reflecting
+// the snapshot, above. All other places in xen that update
+// the shadow will keep the shadow in sync with the guest's
+// entries (via l1pte_propagate_from_guest and friends), which clear
+// the SHADOW_REFLECTS_SNAPSHOT bit.
+//
+static void
+mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
+{
+ unsigned long smfn;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ unsigned i;
+
+ if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
+ {
+ l1e = map_domain_page(smfn);
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l1_slot(i) &&
+ (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
+ l1e_add_flags(l1e[i], SHADOW_REFLECTS_SNAPSHOT);
+ unmap_domain_page(l1e);
+ }
+
+ if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
+ {
+ l2e = map_domain_page(smfn);
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ if ( is_guest_l2_slot(0, i) &&
+ (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
+ l2e_add_flags(l2e[i], SHADOW_REFLECTS_SNAPSHOT);
+ unmap_domain_page(l2e);
+ }
+}
+
+// BUG: these are not SMP safe...
static int sh_l2_present;
static int sh_l1_present;
char * sh_check_name;
-
-#define FAIL(_f, _a...) \
- do { \
- printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", \
- sh_check_name, level, i, ## _a , gpte, spte); \
- BUG(); \
+int shadow_status_noswap;
+
+#define v2m(_v, _adr) ({ \
+ unsigned long _a = (unsigned long)(_adr); \
+ l2_pgentry_t _pde = shadow_linear_l2_table(_v)[l2_table_offset(_a)]; \
+ unsigned long _pa = -1; \
+ if ( l2e_get_flags(_pde) & _PAGE_PRESENT ) \
+ { \
+ l1_pgentry_t _pte; \
+ _pte = shadow_linear_pg_table[l1_linear_offset(_a)]; \
+ if ( l1e_get_flags(_pte) & _PAGE_PRESENT ) \
+ _pa = l1e_get_paddr(_pte); \
+ } \
+ _pa | (_a & ~PAGE_MASK); \
+})
+
+#define FAIL(_f, _a...) \
+ do { \
+ printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n", \
+ sh_check_name, level, l2_idx, l1_idx, ## _a, \
+ __FILE__, __LINE__); \
+ printk("guest_pte=%" PRIpte " eff_guest_pte=%" PRIpte \
+ " shadow_pte=%" PRIpte " snapshot_pte=%" PRIpte \
+ " &guest=%p &shadow=%p &snap=%p v2m(&guest)=%p" \
+ " v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n", \
+ l1e_get_intpte(guest_pte), l1e_get_intpte(eff_guest_pte), \
+ l1e_get_intpte(shadow_pte), l1e_get_intpte(snapshot_pte), \
+ p_guest_pte, p_shadow_pte, p_snapshot_pte, \
+ (void *)v2m(v, p_guest_pte), (void *)v2m(v, p_shadow_pte), \
+ (void *)v2m(v, p_snapshot_pte), \
+ (l2_idx << L2_PAGETABLE_SHIFT) | \
+ (l1_idx << L1_PAGETABLE_SHIFT)); \
+ errors++; \
} while ( 0 )
static int check_pte(
- struct mm_struct *m, unsigned long gpte, unsigned long spte,
- int level, int i)
+ struct vcpu *v,
+ l1_pgentry_t *p_guest_pte,
+ l1_pgentry_t *p_shadow_pte,
+ l1_pgentry_t *p_snapshot_pte,
+ int level, int l2_idx, int l1_idx)
{
- unsigned long mask, gpfn, spfn;
-
- if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
- return 1; /* always safe */
-
- if ( !(spte & _PAGE_PRESENT) )
- FAIL("Non zero not present spte");
+ struct domain *d = v->domain;
+ l1_pgentry_t guest_pte = *p_guest_pte;
+ l1_pgentry_t shadow_pte = *p_shadow_pte;
+ l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
+ l1_pgentry_t eff_guest_pte;
+ unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
+ int errors = 0, guest_writable;
+ int page_table_page;
+
+ if ( (l1e_get_intpte(shadow_pte) == 0) ||
+ (l1e_get_intpte(shadow_pte) == 0xdeadface) ||
+ (l1e_get_intpte(shadow_pte) == 0x00000E00) )
+ return errors; /* always safe */
+
+ if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
+ FAIL("Non zero not present shadow_pte");
if ( level == 2 ) sh_l2_present++;
if ( level == 1 ) sh_l1_present++;
- if ( !(gpte & _PAGE_PRESENT) )
+ if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
+ eff_guest_pte = snapshot_pte;
+ else
+ eff_guest_pte = guest_pte;
+
+ if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
FAIL("Guest not present yet shadow is");
- mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
+ mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
- if ( (spte & mask) != (gpte & mask) )
+ if ( ((l1e_get_intpte(shadow_pte) & mask) != (l1e_get_intpte(eff_guest_pte) & mask)) )
FAIL("Corrupt?");
- if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
+ if ( (level == 1) &&
+ (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
+ !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
FAIL("Dirty coherence");
- if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
+ if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
+ !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
FAIL("Accessed coherence");
- if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
+ if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
+ FAIL("global bit set in shadow");
+
+ eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
+ eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
+ shadow_mfn = l1e_get_pfn(shadow_pte);
+
+ if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
+ FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%" PRIpte "\n",
+ __func__, eff_guest_pfn, l1e_get_intpte(eff_guest_pte));
+
+ page_table_page = mfn_is_page_table(eff_guest_mfn);
+
+ guest_writable =
+ (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
+ (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
+
+ if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
+ {
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
+ eff_guest_pfn, eff_guest_mfn, shadow_mfn,
+ frame_table[eff_guest_mfn].u.inuse.type_info,
+ page_table_page);
FAIL("RW coherence");
+ }
- if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
+ if ( (level == 1) &&
+ (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
+ !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
+ {
+ printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
+ eff_guest_pfn, eff_guest_mfn, shadow_mfn,
+ frame_table[eff_guest_mfn].u.inuse.type_info,
+ page_table_page);
FAIL("RW2 coherence");
+ }
- spfn = spte >> PAGE_SHIFT;
- gpfn = gpte >> PAGE_SHIFT;
-
- if ( gpfn == spfn )
+ if ( eff_guest_mfn == shadow_mfn )
{
if ( level > 1 )
FAIL("Linear map ???"); /* XXX this will fail on BSD */
@@ -765,80 +3089,89 @@ static int check_pte(
if ( level < 2 )
FAIL("Shadow in L1 entry?");
- if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
- FAIL("spfn problem g.sf=%08lx", __shadow_status(m, gpfn));
+ if ( level == 2 )
+ {
+ if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
+ FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
+ __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
+ }
+ else
+ BUG(); // XXX -- not handled yet.
}
- return 1;
+ return errors;
}
-
+#undef FAIL
+#undef v2m
static int check_l1_table(
- struct mm_struct *m, unsigned long va,
- unsigned long g2, unsigned long s2)
+ struct vcpu *v, unsigned long gpfn,
+ unsigned long gmfn, unsigned long smfn, unsigned l2_idx)
{
+ struct domain *d = v->domain;
int i;
- unsigned long *gpl1e, *spl1e;
+ unsigned long snapshot_mfn;
+ l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
+ int errors = 0;
+
+ if ( page_out_of_sync(pfn_to_page(gmfn)) )
+ {
+ snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
+ ASSERT(snapshot_mfn);
+ p_snapshot = map_domain_page(snapshot_mfn);
+ }
- gpl1e = map_domain_mem(g2 << PAGE_SHIFT);
- spl1e = map_domain_mem(s2 << PAGE_SHIFT);
+ p_guest = map_domain_page(gmfn);
+ p_shadow = map_domain_page(smfn);
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- check_pte(m, gpl1e[i], spl1e[i], 1, i);
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ errors += check_pte(v, p_guest+i, p_shadow+i,
+ p_snapshot ? p_snapshot+i : NULL,
+ 1, l2_idx, i);
- unmap_domain_mem(spl1e);
- unmap_domain_mem(gpl1e);
+ unmap_domain_page(p_shadow);
+ unmap_domain_page(p_guest);
+ if ( p_snapshot )
+ unmap_domain_page(p_snapshot);
- return 1;
+ return errors;
}
-#define FAILPT(_f, _a...) \
- do { \
- printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); \
- BUG(); \
+#define FAILPT(_f, _a...) \
+ do { \
+ printk("XXX FAIL %s-PT " _f "\n", sh_check_name, ## _a ); \
+ errors++; \
} while ( 0 )
-int _check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
+int check_l2_table(
+ struct vcpu *v, unsigned long gmfn, unsigned long smfn, int oos_pdes)
{
- unsigned long gptbase = pagetable_val(pt);
- unsigned long gpfn, spfn;
- int i;
- l2_pgentry_t *gpl2e, *spl2e;
-
- sh_check_name = s;
-
- SH_VVLOG("%s-PT Audit", s);
-
- sh_l2_present = sh_l1_present = 0;
-
- gpfn = gptbase >> PAGE_SHIFT;
-
- if ( !(__shadow_status(m, gpfn) & PSH_shadowed) )
- {
- printk("%s-PT %08lx not shadowed\n", s, gptbase);
- if ( __shadow_status(m, gpfn) != 0 )
- BUG();
- return 0;
- }
-
- spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
-
- if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
- FAILPT("ptbase shadow inconsistent1");
-
- gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
- spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
-
+ struct domain *d = v->domain;
+ l2_pgentry_t *gpl2e = (l2_pgentry_t *)map_domain_page(gmfn);
+ l2_pgentry_t *spl2e = (l2_pgentry_t *)map_domain_page(smfn);
+ l2_pgentry_t match;
+ int i;
+ int errors = 0;
+ int limit;
+
+ if ( !oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != d) )
+ FAILPT("domain doesn't own page");
+ if ( oos_pdes && (page_get_owner(pfn_to_page(gmfn)) != NULL) )
+ FAILPT("bogus owner for snapshot page");
+ if ( page_get_owner(pfn_to_page(smfn)) != NULL )
+ FAILPT("shadow page mfn=0x%lx is owned by someone, domid=%d",
+ smfn, page_get_owner(pfn_to_page(smfn))->domain_id);
+
+#if 0
if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
&gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
{
- printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
i++ )
- printk("+++ (%d) %08lx %08lx\n",i,
+ printk("+++ (%d) %lx %lx\n",i,
l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
FAILPT("hypervisor entries inconsistent");
}
@@ -846,42 +3179,206 @@ int _check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
FAILPT("hypervisor linear map inconsistent");
+#endif
- if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
- L2_PAGETABLE_SHIFT]) !=
- ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
- FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
- l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
+ match = l2e_from_pfn(smfn, __PAGE_HYPERVISOR);
+ if ( !shadow_mode_external(d) &&
+ l2e_has_changed(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT],
+ match, PAGE_FLAG_MASK))
+ {
+ FAILPT("hypervisor shadow linear map inconsistent %" PRIpte " %" PRIpte,
+ l2e_get_intpte(spl2e[SH_LINEAR_PT_VIRT_START >>
L2_PAGETABLE_SHIFT]),
- (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ l2e_get_intpte(match));
+ }
- if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
- __PAGE_HYPERVISOR))) )
- FAILPT("hypervisor per-domain map inconsistent");
+ match = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
+ if ( !shadow_mode_external(d) &&
+ l2e_has_changed(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT],
+ match, PAGE_FLAG_MASK))
+ {
+ FAILPT("hypervisor per-domain map inconsistent saw %" PRIpte ", expected (va=%p) %" PRIpte,
+ l2e_get_intpte(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]),
+ d->arch.mm_perdomain_pt,
+ l2e_get_intpte(match));
+ }
+#ifdef __i386__
+ if ( shadow_mode_external(d) )
+ limit = L2_PAGETABLE_ENTRIES;
+ else
+ limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+#else
+ limit = 0; /* XXX x86/64 XXX */
+#endif
/* Check the whole L2. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- check_pte(m, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]), 2, i);
+ for ( i = 0; i < limit; i++ )
+ errors += check_pte(v,
+ (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
+ (l1_pgentry_t*)(&spl2e[i]),
+ NULL,
+ 2, i, 0);
+
+ unmap_domain_page(spl2e);
+ unmap_domain_page(gpl2e);
+
+#if 1
+ if ( errors )
+ printk("check_l2_table returning %d errors\n", errors);
+#endif
+
+ return errors;
+}
+#undef FAILPT
+
+int _check_pagetable(struct vcpu *v, char *s)
+{
+ struct domain *d = v->domain;
+ pagetable_t pt = v->arch.guest_table;
+ unsigned long gptbase = pagetable_get_paddr(pt);
+ unsigned long ptbase_pfn, smfn;
+ unsigned long i;
+ l2_pgentry_t *gpl2e, *spl2e;
+ unsigned long ptbase_mfn = 0;
+ int errors = 0, limit, oos_pdes = 0;
+
+ //_audit_domain(d, AUDIT_QUIET);
+ shadow_lock(d);
+
+ sh_check_name = s;
+ //SH_VVLOG("%s-PT Audit", s);
+ sh_l2_present = sh_l1_present = 0;
+ perfc_incrc(check_pagetable);
+
+ ptbase_mfn = gptbase >> PAGE_SHIFT;
+ ptbase_pfn = __mfn_to_gpfn(d, ptbase_mfn);
+
+ if ( !(smfn = __shadow_status(d, ptbase_pfn, PGT_base_page_table)) )
+ {
+ printk("%s-PT %lx not shadowed\n", s, gptbase);
+ goto out;
+ }
+ if ( page_out_of_sync(pfn_to_page(ptbase_mfn)) )
+ {
+ ptbase_mfn = __shadow_status(d, ptbase_pfn, PGT_snapshot);
+ oos_pdes = 1;
+ ASSERT(ptbase_mfn);
+ }
+
+ errors += check_l2_table(v, ptbase_mfn, smfn, oos_pdes);
+
+ gpl2e = (l2_pgentry_t *) map_domain_page(ptbase_mfn);
+ spl2e = (l2_pgentry_t *) map_domain_page(smfn);
/* Go back and recurse. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+#ifdef __i386__
+ if ( shadow_mode_external(d) )
+ limit = L2_PAGETABLE_ENTRIES;
+ else
+ limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+#else
+ limit = 0; /* XXX x86/64 XXX */
+#endif
+
+ for ( i = 0; i < limit; i++ )
{
- if ( l2_pgentry_val(spl2e[i]) != 0 )
- check_l1_table(
- m, i << L2_PAGETABLE_SHIFT,
- l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT,
- l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT);
+ unsigned long gl1pfn = l2e_get_pfn(gpl2e[i]);
+ unsigned long gl1mfn = __gpfn_to_mfn(d, gl1pfn);
+ unsigned long sl1mfn = l2e_get_pfn(spl2e[i]);
+
+ if ( l2e_get_intpte(spl2e[i]) != 0 ) /* FIXME: check flags? */
+ {
+ errors += check_l1_table(v, gl1pfn, gl1mfn, sl1mfn, i);
+ }
}
- unmap_domain_mem(spl2e);
- unmap_domain_mem(gpl2e);
+ unmap_domain_page(spl2e);
+ unmap_domain_page(gpl2e);
- SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
+#if 0
+ SH_VVLOG("PT verified : l2_present = %d, l1_present = %d",
sh_l2_present, sh_l1_present);
-
- return 1;
+#endif
+
+ out:
+ if ( errors )
+ BUG();
+
+ shadow_unlock(d);
+
+ return errors;
}
-#endif
+int _check_all_pagetables(struct vcpu *v, char *s)
+{
+ struct domain *d = v->domain;
+ int i;
+ struct shadow_status *a;
+ unsigned long gmfn;
+ int errors = 0;
+
+ shadow_status_noswap = 1;
+
+ sh_check_name = s;
+ SH_VVLOG("%s-PT Audit domid=%d", s, d->domain_id);
+ sh_l2_present = sh_l1_present = 0;
+ perfc_incrc(check_all_pagetables);
+
+ for (i = 0; i < shadow_ht_buckets; i++)
+ {
+ a = &d->arch.shadow_ht[i];
+ while ( a && a->gpfn_and_flags )
+ {
+ gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
+
+ switch ( a->gpfn_and_flags & PGT_type_mask )
+ {
+ case PGT_l1_shadow:
+ errors += check_l1_table(v, a->gpfn_and_flags & PGT_mfn_mask,
+ gmfn, a->smfn, 0);
+ break;
+ case PGT_l2_shadow:
+ errors += check_l2_table(v, gmfn, a->smfn,
+ page_out_of_sync(pfn_to_page(gmfn)));
+ break;
+ case PGT_l3_shadow:
+ case PGT_l4_shadow:
+ case PGT_hl2_shadow:
+ BUG(); // XXX - ought to fix this...
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ break;
+ default:
+ errors++;
+ printk("unexpected shadow type %lx, gpfn=%lx, "
+ "gmfn=%lx smfn=%lx\n",
+ a->gpfn_and_flags & PGT_type_mask,
+ a->gpfn_and_flags & PGT_mfn_mask,
+ gmfn, a->smfn);
+ BUG();
+ }
+ a = a->next;
+ }
+ }
+
+ shadow_status_noswap = 0;
+
+ if ( errors )
+ BUG();
+
+ return errors;
+}
+
+#endif // SHADOW_DEBUG
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 120acaef00..b0782ec5d7 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -8,17 +8,19 @@
* later.
*/
+#include <xen/config.h>
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/delay.h>
+#include <xen/perfc.h>
#include <xen/spinlock.h>
+#include <asm/current.h>
#include <asm/smp.h>
#include <asm/mc146818rtc.h>
#include <asm/flushtlb.h>
#include <asm/smpboot.h>
#include <asm/hardirq.h>
-
-#ifdef CONFIG_SMP
+#include <mach_apic.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -59,9 +61,7 @@
*/
/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ * The following functions deal with sending IPIs between CPUs.
*/
static inline int __prepare_ICR (unsigned int shortcut, int vector)
@@ -74,7 +74,7 @@ static inline int __prepare_ICR2 (unsigned int mask)
return SET_APIC_DEST_FIELD(mask);
}
-static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
{
/*
* Subtle. In the case of the 'never do double writes' workaround
@@ -82,22 +82,22 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
* of the value read we use an atomic rmw access to avoid costly
* cli/sti. Otherwise we use an even cheaper single atomic write
* to the APIC.
- */
+ */
unsigned int cfg;
/*
- * Wait for idle.
- */
+ * Wait for idle.
+ */
apic_wait_icr_idle();
/*
- * No need to touch the target chip field
- */
+ * No need to touch the target chip field
+ */
cfg = __prepare_ICR(shortcut, vector);
/*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
apic_write_around(APIC_ICR, cfg);
}
@@ -106,15 +106,17 @@ void send_IPI_self(int vector)
__send_IPI_shortcut(APIC_DEST_SELF, vector);
}
-static inline void send_IPI_mask(int mask, int vector)
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
{
+ unsigned long mask = cpus_addr(cpumask)[0];
unsigned long cfg;
unsigned long flags;
- __save_flags(flags);
- __cli();
+ local_irq_save(flags);
-
/*
* Wait for idle.
*/
@@ -127,7 +129,7 @@ static inline void send_IPI_mask(int mask, int vector)
apic_write_around(APIC_ICR2, cfg);
/*
- * program the ICR
+ * program the ICR
*/
cfg = __prepare_ICR(0, vector);
@@ -135,112 +137,89 @@ static inline void send_IPI_mask(int mask, int vector)
* Send the IPI. The write to APIC_ICR fires this off.
*/
apic_write_around(APIC_ICR, cfg);
-
- __restore_flags(flags);
+
+ local_irq_restore(flags);
}
-static inline void send_IPI_allbutself(int vector)
+inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
{
+ unsigned long cfg, flags;
+ unsigned int query_cpu;
+
/*
- * if there are no other CPUs in the system then
- * we get an APIC send error if we try to broadcast.
- * thus we have to avoid sending IPIs in this case.
- */
- if (!(smp_num_cpus > 1))
- return;
+ * Hack. The clustered APIC addressing mode doesn't allow us to send
+ * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+ * should be modified to do 1 message per cluster ID - mbligh
+ */
+
+ local_irq_save(flags);
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+ for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+ if (cpu_isset(query_cpu, mask)) {
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
+ apic_write_around(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write_around(APIC_ICR, cfg);
+ }
+ }
+ local_irq_restore(flags);
}
-/*
- * ********* XEN NOTICE **********
- * I've left the following comments lying around as they look liek they might
- * be useful to get multiprocessor guest OSes going. However, I suspect the
- * issues we face will be quite different so I've ripped out all the
- * TLBSTATE logic (I didn't understand it anyway :-). These comments do
- * not apply to Xen, therefore! -- Keir (8th Oct 2003).
- */
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- *
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
+#include <mach_ipi.h>
static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
-static unsigned long flush_cpumask;
+static cpumask_t flush_cpumask;
+static unsigned long flush_va;
asmlinkage void smp_invalidate_interrupt(void)
{
ack_APIC_irq();
perfc_incrc(ipis);
- local_flush_tlb();
- clear_bit(smp_processor_id(), &flush_cpumask);
+ if ( !__sync_lazy_execstate() )
+ {
+ if ( flush_va == FLUSHVA_ALL )
+ local_flush_tlb();
+ else
+ local_flush_tlb_one(flush_va);
+ }
+ cpu_clear(smp_processor_id(), flush_cpumask);
}
-void flush_tlb_mask(unsigned long mask)
+void __flush_tlb_mask(cpumask_t mask, unsigned long va)
{
ASSERT(local_irq_is_enabled());
- if ( mask & (1 << smp_processor_id()) )
+ if ( cpu_isset(smp_processor_id(), mask) )
{
local_flush_tlb();
- mask &= ~(1 << smp_processor_id());
+ cpu_clear(smp_processor_id(), mask);
}
- if ( mask != 0 )
+ if ( !cpus_empty(mask) )
{
spin_lock(&flush_lock);
-
flush_cpumask = mask;
+ flush_va = va;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
- while ( flush_cpumask != 0 )
+ while ( !cpus_empty(flush_cpumask) )
cpu_relax();
-
spin_unlock(&flush_lock);
}
}
@@ -251,12 +230,14 @@ void new_tlbflush_clock_period(void)
ASSERT(local_irq_is_enabled());
/* Flush everyone else. We definitely flushed just before entry. */
- if ( smp_num_cpus > 1 )
+ if ( num_online_cpus() > 1 )
{
spin_lock(&flush_lock);
- flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id());
+ flush_cpumask = cpu_online_map;
+ flush_va = FLUSHVA_ALL;
send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
- while ( flush_cpumask != 0 )
+ cpu_clear(smp_processor_id(), flush_cpumask);
+ while ( !cpus_empty(flush_cpumask) )
cpu_relax();
spin_unlock(&flush_lock);
}
@@ -266,124 +247,98 @@ void new_tlbflush_clock_period(void)
tlbflush_clock++;
}
-static void flush_tlb_all_pge_ipi(void* info)
+static void flush_tlb_all_pge_ipi(void *info)
{
- __flush_tlb_pge();
+ local_flush_tlb_pge();
}
void flush_tlb_all_pge(void)
{
- smp_call_function (flush_tlb_all_pge_ipi,0,1,1);
- __flush_tlb_pge();
+ smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1);
+ local_flush_tlb_pge();
}
-void smp_send_event_check_mask(unsigned long cpu_mask)
+void smp_send_event_check_mask(cpumask_t mask)
{
- cpu_mask &= ~(1<<smp_processor_id());
- if ( cpu_mask != 0 )
- send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+ cpu_clear(smp_processor_id(), mask);
+ if ( !cpus_empty(mask) )
+ send_IPI_mask(mask, EVENT_CHECK_VECTOR);
}
/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
+ * Structure and data for smp_call_function().
*/
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
struct call_data_struct {
void (*func) (void *info);
void *info;
+ int wait;
atomic_t started;
atomic_t finished;
- int wait;
};
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+static struct call_data_struct *call_data;
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler, or bottom halfs.
+ * Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, spin until function has completed on other CPUs.
+ * Returns: 0 on success, else a negative status code.
*/
+int smp_call_function(
+ void (*func) (void *info), void *info, int unused, int wait)
{
struct call_data_struct data;
- int cpus = smp_num_cpus-1;
+ unsigned int nr_cpus = num_online_cpus() - 1;
- if (!cpus)
+ ASSERT(local_irq_is_enabled());
+
+ if ( nr_cpus == 0 )
return 0;
data.func = func;
data.info = info;
- atomic_set(&data.started, 0);
data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- ASSERT(local_irq_is_enabled());
+ atomic_set(&data.started, 0);
+ atomic_set(&data.finished, 0);
spin_lock(&call_lock);
call_data = &data;
wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- barrier();
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- barrier();
+ while ( atomic_read(wait ? &data.finished : &data.started) != nr_cpus )
+ cpu_relax();
spin_unlock(&call_lock);
return 0;
}
-static void stop_this_cpu (void * dummy)
+static void stop_this_cpu (void *dummy)
{
- /*
- * Remove this CPU:
- */
clear_bit(smp_processor_id(), &cpu_online_map);
- __cli();
+
disable_local_APIC();
- for(;;) __asm__("hlt");
-}
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+}
void smp_send_stop(void)
{
+ /* Stop all other CPUs in the system. */
smp_call_function(stop_this_cpu, NULL, 1, 0);
- smp_num_cpus = 1;
- __cli();
+ local_irq_disable();
disable_local_APIC();
- __sti();
+ local_irq_enable();
}
-/*
- * Nothing to do, as all the work is done automatically when
- * we return from the interrupt.
- */
asmlinkage void smp_event_check_interrupt(void)
{
ack_APIC_irq();
@@ -392,27 +347,22 @@ asmlinkage void smp_event_check_interrupt(void)
asmlinkage void smp_call_function_interrupt(void)
{
- void (*func) (void *info) = call_data->func;
+ void (*func)(void *info) = call_data->func;
void *info = call_data->info;
- int wait = call_data->wait;
ack_APIC_irq();
perfc_incrc(ipis);
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(info);
- if (wait) {
+ if ( call_data->wait )
+ {
+ (*func)(info);
mb();
atomic_inc(&call_data->finished);
}
+ else
+ {
+ mb();
+ atomic_inc(&call_data->started);
+ (*func)(info);
+ }
}
-
-#endif /* CONFIG_SMP */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 6cf1c5f3da..80fe8122a4 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -17,7 +17,7 @@
* Fixes
* Felix Koop : NR_CPUS used properly
* Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIP report.
+ * Alan Cox : By repeated request 8) - Total BogoMIPS report.
* Greg Wright : Fix for kernel stacks panic.
* Erich Boleyn : MP v1.4 and additional changes.
* Matthias Sattler : Changes for 2.1 kernel map.
@@ -30,49 +30,52 @@
* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
* Martin J. Bligh : Added support for multi-quad systems
- */
+ * Dave Jones : Report invalid combinations of Athlon CPUs.
+* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
#include <xen/config.h>
#include <xen/init.h>
-#include <xen/irq.h>
+#include <xen/kernel.h>
#include <xen/mm.h>
-#include <xen/slab.h>
-#include <asm/flushtlb.h>
-#include <asm/mc146818rtc.h>
-#include <asm/smpboot.h>
-#include <xen/smp.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/mpspec.h>
-#include <asm/io_apic.h>
#include <xen/sched.h>
+#include <xen/irq.h>
#include <xen/delay.h>
-#include <xen/lib.h>
-
-#ifdef CONFIG_SMP
+#include <asm/current.h>
+#include <asm/mc146818rtc.h>
+#include <asm/desc.h>
+#include <asm/div64.h>
+#include <asm/flushtlb.h>
+#include <asm/msr.h>
+#include <mach_apic.h>
+#include <mach_wakecpu.h>
-/* Cconfigured maximum number of CPUs to activate. We name the parameter
-"maxcpus" rather than max_cpus to be compatible with Linux */
-static int max_cpus = -1;
-integer_param("maxcpus", max_cpus);
+static int _foo;
+#define set_kernel_exec(x,y) (_foo=0)
+#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
+int tainted;
+#define TAINT_UNSAFE_SMP 0
-/* Total count of live CPUs */
-int smp_num_cpus = 1;
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
-/* Number of hyperthreads per core */
-int ht_per_core = 1;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
-/* Bitmask of currently online CPUs */
-unsigned long cpu_online_map;
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
-static volatile unsigned long cpu_callin_map;
-static volatile unsigned long cpu_callout_map;
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS];
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-/* Set when the idlers are all forked */
-int smp_threads_ready;
+u8 x86_cpu_to_apicid[NR_CPUS] =
+ { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
/*
* Trampoline 80x86 program as an array.
@@ -81,6 +84,7 @@ int smp_threads_ready;
extern unsigned char trampoline_data [];
extern unsigned char trampoline_end [];
static unsigned char *trampoline_base;
+static int trampoline_exec;
/*
* Currently trivial. Write the real->protected mode
@@ -90,8 +94,8 @@ static unsigned char *trampoline_base;
static unsigned long __init setup_trampoline(void)
{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
}
/*
@@ -100,11 +104,17 @@ static unsigned long __init setup_trampoline(void)
*/
void __init smp_alloc_memory(void)
{
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- trampoline_base = __va(0x90000);
+ trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ if (__pa(trampoline_base) >= 0x9F000)
+ BUG();
+ /*
+ * Make the SMP trampoline executable:
+ */
+ trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
}
/*
@@ -112,39 +122,63 @@ void __init smp_alloc_memory(void)
* a given CPU
*/
-void __init smp_store_cpu_info(int id)
-{
- cpu_data[id] = boot_cpu_data;
- identify_cpu(&cpu_data[id]);
-}
-
-/*
- * Architecture specific routine called by the kernel just before init is
- * fired off. This allows the BP to have everything in order [we hope].
- * At the end of this all the APs will hit the system scheduling and off
- * we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
- * and give them jobs to do. smp_callin is a standard routine
- * we use to track CPUs as they power up.
- */
-
-static atomic_t smp_commenced = ATOMIC_INIT(0);
-
-void __init smp_commence(void)
+static void __init smp_store_cpu_info(int id)
{
- /*
- * Lets the callins below out of their loop.
- */
- Dprintk("Setting commenced=1, go go go\n");
-
- wmb();
- atomic_set(&smp_commenced,1);
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ if (id!=0)
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+
+ /*
+ * Certain Athlons might work (for various values of 'work') in SMP
+ * but they are not certified as MP capable.
+ */
+ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+ /* Athlon 660/661 is valid. */
+ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+ goto valid_k7;
+
+ /* Duron 670 is valid */
+ if ((c->x86_model==7) && (c->x86_mask==0))
+ goto valid_k7;
+
+ /*
+ * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+ * It's worth noting that the A5 stepping (662) of some Athlon XP's
+ * have the MP bit set.
+ * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+ */
+ if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+ ((c->x86_model==7) && (c->x86_mask>=1)) ||
+ (c->x86_model> 7))
+ if (cpu_has_mp)
+ goto valid_k7;
+
+ /* If we get here, it's not a certified SMP capable AMD system. */
+ tainted |= TAINT_UNSAFE_SMP;
+ }
+
+valid_k7:
+ ;
}
/*
* TSC synchronization.
*
- * We first check wether all CPUs have their TSC's synchronized,
+ * We first check whether all CPUs have their TSC's synchronized,
* then we print a warning if not, and always resync.
*/
@@ -155,604 +189,730 @@ static unsigned long long tsc_values[NR_CPUS];
#define NR_LOOPS 5
-/*
- * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
- * multiplication. Not terribly optimized but we need it at boot time only
- * anyway.
- *
- * result == a / b
- * == (a1 + a2*(2^32)) / b
- * == a1/b + a2*(2^32/b)
- * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
- * ^---- (this multiplication can overflow)
- */
-
-static unsigned long long div64 (unsigned long long a, unsigned long b0)
-{
- unsigned int a1, a2;
- unsigned long long res;
-
- a1 = ((unsigned int*)&a)[0];
- a2 = ((unsigned int*)&a)[1];
-
- res = a1/b0 +
- (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
- a2 / b0 +
- (a2 * (0xffffffff % b0)) / b0;
-
- return res;
-}
-
static void __init synchronize_tsc_bp (void)
{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- int buggy = 0;
-
- printk("checking TSC synchronization across CPUs: ");
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- t0 = tsc_values[i];
- sum += t0;
- }
- avg = div64(sum, smp_num_cpus);
-
- sum = 0;
- for (i = 0; i < smp_num_cpus; i++) {
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*ticks_per_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = div64(delta, ticks_per_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
- i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ unsigned long one_usec;
+ int buggy = 0;
+
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+ /* convert from kcyc/sec to cyc/usec */
+ one_usec = cpu_khz / 1000;
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_isset(i, cpu_callout_map)) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ }
+ avg = sum;
+ do_div(avg, num_booting_cpus());
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = delta;
+ do_div(realdelta, one_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
}
static void __init synchronize_tsc_ap (void)
{
- int i;
-
- /*
- * smp_num_cpus is not necessarily known at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
- }
+ int i;
+
+ /*
+ * Not every cpu is online at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != num_booting_cpus())
+ mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
}
#undef NR_LOOPS
+extern void calibrate_delay(void);
+
static atomic_t init_deasserted;
void __init smp_callin(void)
{
- int cpuid, phys_id, i;
-
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- while (!atomic_read(&init_deasserted));
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
- cpuid = smp_processor_id();
- if (test_and_set_bit(cpuid, &cpu_online_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- for ( i = 0; i < 200; i++ )
- {
- if ( test_bit(cpuid, &cpu_callout_map) ) break;
- mdelay(10);
- }
-
- if (!test_bit(cpuid, &cpu_callout_map)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
-
- setup_local_APIC();
-
- __sti();
-
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- setup_apic_nmi_watchdog();
-
- /*
- * Allow the master to continue.
- */
- set_bit(cpuid, &cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- synchronize_tsc_ap();
+ int cpuid, phys_id, i;
+
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ wait_for_init_deassert(&init_deasserted);
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ cpuid = smp_processor_id();
+ if (cpu_isset(cpuid, cpu_callin_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup
+ */
+ for (i = 0; i < 200; i++) {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (cpu_isset(cpuid, cpu_callout_map))
+ break;
+ rep_nop();
+ mdelay(10);
+ }
+
+ if (!cpu_isset(cpuid, cpu_callout_map)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ smp_callin_clear_local_apic();
+ setup_local_APIC();
+ map_cpu_to_logical_apicid();
+
+#if 0
+ /*
+ * Get our bogomips.
+ */
+ calibrate_delay();
+ Dprintk("Stack at about %p\n",&cpuid);
+#endif
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+ disable_APIC_timer();
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+
+ /*
+ * Synchronize the TSC with the BP
+ */
+ if (cpu_has_tsc && cpu_khz)
+ synchronize_tsc_ap();
}
-static int cpucount;
+int cpucount;
+
+#ifdef CONFIG_X86_32
+static void construct_percpu_idt(unsigned int cpu)
+{
+ unsigned char idt_load[10];
+
+ idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
+
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
+}
+#endif
/*
* Activate a secondary processor.
*/
-void __init start_secondary(void)
+void __init start_secondary(void *unused)
{
- unsigned int cpu = cpucount;
- /* 6 bytes suitable for passing to LIDT instruction. */
- unsigned char idt_load[6];
-
- extern void cpu_init(void);
-
- set_current(idle_task[cpu]);
-
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
-
- while (!atomic_read(&smp_commenced))
- cpu_relax();
-
- /*
- * At this point, boot CPU has fully initialised the IDT. It is
- * now safe to make ourselves a private copy.
- */
- idt_tables[cpu] = xmalloc(IDT_ENTRIES*8);
- memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
- *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
- *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
- __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
-
- /*
- * low-memory mappings have been cleared, flush them from the local TLBs
- * too.
- */
- local_flush_tlb();
-
- startup_cpu_idle_loop();
-
- BUG();
+ unsigned int cpu = cpucount;
+
+ extern void percpu_traps_init(void);
+ extern void cpu_init(void);
+
+ set_current(idle_task[cpu]);
+ set_processor_id(cpu);
+
+ percpu_traps_init();
+
+ cpu_init();
+ smp_callin();
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
+
+#ifdef CONFIG_X86_32
+ /*
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
+ */
+ construct_percpu_idt(cpu);
+#endif
+
+ setup_secondary_APIC_clock();
+ enable_APIC_timer();
+
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
+
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+
+ wmb();
+ startup_cpu_idle_loop();
}
extern struct {
- unsigned long esp, ss;
+ void * esp;
+ unsigned short ss;
} stack_start;
-/* which physical APIC ID maps to which logical CPU number */
-volatile int physical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which physical APIC ID */
-volatile int cpu_2_physical_apicid[NR_CPUS];
+#ifdef CONFIG_NUMA
-/* which logical APIC ID maps to which logical CPU number */
-volatile int logical_apicid_2_cpu[MAX_APICID];
-/* which logical CPU number maps to which logical APIC ID */
-volatile int cpu_2_logical_apicid[NR_CPUS];
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+ { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
-static inline void init_cpu_to_apicid(void)
-/* Initialize all maps between cpu number and apicids */
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
{
- int apicid, cpu;
-
- for (apicid = 0; apicid < MAX_APICID; apicid++) {
- physical_apicid_2_cpu[apicid] = -1;
- logical_apicid_2_cpu[apicid] = -1;
- }
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpu_2_physical_apicid[cpu] = -1;
- cpu_2_logical_apicid[cpu] = -1;
- }
+ printk("Mapping cpu %d to node %d\n", cpu, node);
+ cpu_set(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = node;
}
-static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
{
- physical_apicid_2_cpu[apicid] = cpu;
- cpu_2_physical_apicid[cpu] = apicid;
+ int node;
+
+ printk("Unmapping cpu %d from all nodes\n", cpu);
+ for (node = 0; node < MAX_NUMNODES; node ++)
+ cpu_clear(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = 0;
}
+#else /* !CONFIG_NUMA */
-static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
-/*
- * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
- * else physical apic ids
- */
+#define map_cpu_to_node(cpu, node) ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
{
- physical_apicid_2_cpu[apicid] = -1;
- cpu_2_physical_apicid[cpu] = -1;
+ int cpu = smp_processor_id();
+ int apicid = logical_smp_processor_id();
+
+ cpu_2_logical_apicid[cpu] = apicid;
+ map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+ cpu_2_logical_apicid[cpu] = BAD_APICID;
+ unmap_cpu_to_node(cpu);
}
#if APIC_DEBUG
-static inline void inquire_remote_apic(int apicid)
+static inline void __inquire_remote_apic(int apicid)
{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
}
#endif
+#ifdef WAKE_SECONDARY_VIA_NMI
+/*
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int timeout, maxlvt;
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ Dprintk("NMI sent.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
-static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
-
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
}
+#endif /* WAKE_SECONDARY_VIA_INIT */
-extern unsigned long cpu_initialized;
+extern cpumask_t cpu_initialized;
-static void __init do_boot_cpu (int apicid)
+static int __init do_boot_cpu(int apicid)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
*/
{
- struct domain *idle;
- unsigned long boot_error = 0;
- int timeout, cpu;
- unsigned long start_eip, stack;
-
- cpu = ++cpucount;
-
- if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
- panic("failed 'createdomain' for CPU %d", cpu);
-
- set_bit(DF_IDLETASK, &idle->flags);
-
- idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table));
-
- map_cpu_to_boot_apicid(cpu, apicid);
-
- idle_task[cpu] = idle;
-
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
-
- /* So we see what's up. */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-
- stack = __pa(alloc_xenheap_pages(1));
- stack_start.esp = stack + STACK_SIZE - STACK_RESERVED;
-
- /* Debug build: detect stack overflow by setting up a guard page. */
- memguard_guard_range(__va(stack), PAGE_SIZE);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
- Dprintk("Setting warm reset code and vector.\n");
-
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if ( APIC_INTEGRATED(apic_version[apicid]) )
- {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- /*
- * Status is now clean
- */
- boot_error = 0;
-
- /*
- * Starting actual IPI sequence...
- */
-
- boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- set_bit(cpu, &cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (test_bit(cpu, &cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (test_bit(cpu, &cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- printk("CPU%d has booted.\n", cpu);
- } else {
- boot_error= 1;
- if (*((volatile unsigned long *)phys_to_virt(start_eip))
- == 0xA5A5A5A5)
+ struct domain *idle;
+ struct vcpu *v;
+ void *stack;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_eip;
+ unsigned short nmi_high = 0, nmi_low = 0;
+
+ cpu = ++cpucount;
+
+ if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
+ panic("failed 'createdomain' for CPU %d", cpu);
+
+ v = idle_task[cpu] = idle->vcpu[0];
+
+ set_bit(_DOMF_idle_domain, &idle->domain_flags);
+
+ v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+
+ stack = alloc_xenheap_pages(STACK_ORDER);
+#if defined(__i386__)
+ stack_start.esp = (void *)__pa(stack);
+#elif defined(__x86_64__)
+ stack_start.esp = stack;
+#endif
+ stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
+
+ /* Debug build: detect stack overflow by setting up a guard page. */
+ memguard_guard_stack(stack);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+ Dprintk("Setting warm reset code and vector.\n");
+
+ store_NMI_vector(&nmi_high, &nmi_low);
+
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned char *)trampoline_base)
+ == 0xA5)
/* trampoline started but...? */
- printk("Stuck ??\n");
- else
+ printk("Stuck ??\n");
+ else
/* trampoline code not run */
- printk("Not responding.\n");
-#if APIC_DEBUG
- inquire_remote_apic(apicid);
-#endif
- }
- }
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_boot_apicid(cpu, apicid);
- clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
- clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
- clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
- cpucount--;
- }
+ printk("Not responding.\n");
+ inquire_remote_apic(apicid);
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)trampoline_base) = 0;
+
+ return boot_error;
}
+#if 0
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+ unsigned long cachesize; /* kB */
+ unsigned long bandwidth = 350; /* MB/s */
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_khz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1) {
+ cachesize = 16; /* Pentiums, 2x8kB cache */
+ bandwidth = 100;
+ }
+
+ cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ }
+
+ cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_khz/1000),
+ ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+ printk("task migration cache decay timeout: %ld msecs.\n",
+ cache_decay_ticks);
+}
+#else
+#define smp_tune_scheduling() ((void)0)
+#endif
/*
* Cycle through the processors sending APIC IPIs to boot each.
@@ -760,165 +920,273 @@ static void __init do_boot_cpu (int apicid)
static int boot_cpu_logical_apicid;
/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio = NULL;
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+ int apicid, cpu, bit, kicked;
+#ifdef BOGOMIPS
+ unsigned long bogosum = 0;
+#endif
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d: ", 0);
+ print_cpu_info(&cpu_data[0]);
+
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+
+ /*current_thread_info()->cpu = 0;*/
+ smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+
+ /*
+ * If we couldn't find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config && !acpi_lapic) {
+ printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ init_uniprocessor:
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ if (APIC_init_uniprocessor())
+ printk(KERN_NOTICE "Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+ map_cpu_to_logical_apicid();
+ return;
+ }
+
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ * Makes no sense to do this check in clustered apic mode, so skip it
+ */
+ if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ goto init_uniprocessor;
+ }
+
+ verify_local_APIC();
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus)
+ goto init_uniprocessor;
+
+ connect_bsp_APIC();
+ setup_local_APIC();
+ map_cpu_to_logical_apicid();
+
+
+ setup_portio_remap();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+
+ kicked = 1;
+ for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
+ apicid = cpu_present_to_apicid(bit);
+ /*
+ * Don't even attempt to start the boot CPU!
+ */
+ if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
+ continue;
+
+ if (!check_apicid_present(bit))
+ continue;
+ if (max_cpus <= cpucount+1)
+ continue;
+
+ if (do_boot_cpu(apicid))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
+ else
+ ++kicked;
+ }
+
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+
+#ifdef BOGOMIPS
+ /*
+ * Allow the user to impress friends.
+ */
+ Dprintk("Before bogomips.\n");
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_isset(cpu, cpu_callout_map))
+ bogosum += cpu_data[cpu].loops_per_jiffy;
+ printk(KERN_INFO
+ "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
+ bogosum/(500000/HZ),
+ (bogosum/(5000/HZ))%100);
+#else
+ printk("Total of %d processors activated.\n", cpucount+1);
+#endif
+
+ Dprintk("Before bogocount - setting activated=1.\n");
+
+ if (smp_b_stepping)
+ printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+ /*
+ * Don't taint if we are running SMP kernel on a single non-MP
+ * approved Athlon
+ */
+ if (tainted & TAINT_UNSAFE_SMP) {
+ if (cpucount)
+ printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+ else
+ tainted &= ~TAINT_UNSAFE_SMP;
+ }
+
+ Dprintk("Boot done.\n");
+
+ /*
+ * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * efficiently.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (siblings != smp_num_siblings)
+ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+ }
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
+
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+
+ setup_boot_APIC_clock();
+
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpu_has_tsc && cpucount && cpu_khz)
+ synchronize_tsc_bp();
+}
+
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_boot_cpus(max_cpus);
+}
-void __init smp_boot_cpus(void)
+void __devinit smp_prepare_boot_cpu(void)
{
- int apicid, bit;
-
- /* Initialize the logical to physical CPU number mapping */
- init_cpu_to_apicid();
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d booted\n", 0);
-
- /*
- * We have the boot CPU online for sure.
- */
- set_bit(0, &cpu_online_map);
- boot_cpu_logical_apicid = logical_smp_processor_id();
- map_cpu_to_boot_apicid(0, boot_cpu_apicid);
-
- /*
- * If we couldnt find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config) {
- printk("SMP motherboard not detected.\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- if (APIC_init_uniprocessor())
- printk("Local APIC not detected."
- " Using dummy APIC emulation.\n");
- goto smp_done;
- }
-
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- */
- if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- phys_cpu_present_map |= (1 << hard_smp_processor_id());
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
- printk("BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- verify_local_APIC();
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- smp_found_config = 0;
- printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- io_apic_irqs = 0;
- cpu_online_map = phys_cpu_present_map = 1;
- smp_num_cpus = 1;
- goto smp_done;
- }
-
- connect_bsp_APIC();
- setup_local_APIC();
-
- if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
- BUG();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
-
- for (bit = 0; bit < NR_CPUS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if (apicid == boot_cpu_apicid)
- continue;
-
- /*
- * Don't start hyperthreads if option noht requested.
- */
- if (opt_noht && (apicid & (ht_per_core - 1)))
- continue;
-
- if (!(phys_cpu_present_map & (1 << bit)))
- continue;
- if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
- continue;
-
- do_boot_cpu(apicid);
-
- /*
- * Make sure we unmap all failed CPUs
- */
- if ((boot_apicid_to_cpu(apicid) == -1) &&
- (phys_cpu_present_map & (1 << bit)))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
- }
-
- /*
- * Cleanup possible dangling ends...
- */
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-
- if (!cpucount) {
- printk("Error: only one processor found.\n");
- } else {
- printk("Total of %d processors activated.\n", cpucount+1);
- }
- smp_num_cpus = cpucount + 1;
-
- Dprintk("Boot done.\n");
-
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if ( nr_ioapics ) setup_IO_APIC();
-
- /* Set up all local APIC timers in the system. */
- setup_APIC_clocks();
-
- /* Synchronize the TSC with the AP(s). */
- if ( cpucount ) synchronize_tsc_bp();
-
- smp_done:
- ;
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callout_map);
}
-#endif /* CONFIG_SMP */
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* This only works at boot for x86. See "rewrite" above. */
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ local_irq_enable();
+ return -ENOSYS;
+ }
+
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ local_irq_enable();
+ return -EIO;
+ }
+
+ local_irq_enable();
+ /* Unleash the CPU! */
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#ifdef CONFIG_X86_IO_APIC
+ setup_ioapic_dest();
+#endif
+#ifdef CONFIG_X86_64
+ zap_low_mappings();
+#endif
+ /*
+ * Disable executability of the SMP trampoline:
+ */
+ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+}
+
+#if 0
+void __init smp_intr_init(void)
+{
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+ /* IPI for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for generic function call */
+ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
+#endif
diff --git a/xen/arch/x86/string.c b/xen/arch/x86/string.c
new file mode 100644
index 0000000000..745670b926
--- /dev/null
+++ b/xen/arch/x86/string.c
@@ -0,0 +1,63 @@
+/******************************************************************************
+ * string.c
+ *
+ * These provide something for compiler-emitted string operations to link
+ * against.
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+
+#undef memcpy
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ int d0, d1, d2;
+
+ __asm__ __volatile__ (
+ " rep ; movsl ; "
+ " testb $2,%b4 ; "
+ " je 1f ; "
+ " movsw ; "
+ "1: testb $1,%b4 ; "
+ " je 2f ; "
+ " movsb ; "
+ "2: "
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n/4), "q" (n), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
+}
+
+#undef memset
+void *memset(void *s, int c, size_t n)
+{
+ int d0, d1;
+
+ __asm__ __volatile__ (
+ "rep ; stosb"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (c), "1" (s), "0" (n)
+ : "memory");
+
+ return s;
+}
+
+#undef memmove
+void *memmove(void *dest, const void *src, size_t n)
+{
+ int d0, d1, d2;
+
+ if ( dest < src )
+ return memcpy(dest, src, n);
+
+ __asm__ __volatile__ (
+ " std ; "
+ " rep ; movsb ; "
+ " cld "
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+ : "0" (n), "1" (n-1+(const char *)src), "2" (n-1+(char *)dest)
+ : "memory");
+
+ return dest;
+}
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index eef9c16100..2efd0187b0 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1,5 +1,4 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
+/****************************************************************************
* (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
* (C) 2002-2003 University of Cambridge
****************************************************************************
@@ -38,7 +37,6 @@ unsigned long cpu_khz; /* Detected as we calibrate the TSC */
unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
int timer_ack = 0;
-int do_timer_lists_from_pit = 0;
unsigned long volatile jiffies;
/* PRIVATE */
@@ -52,7 +50,7 @@ static s_time_t stime_irq; /* System time at last 'time update' */
static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
static rwlock_t time_lock = RW_LOCK_UNLOCKED;
-static void timer_interrupt(int irq, void *dev_id, struct xen_regs *regs)
+void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
write_lock_irq(&time_lock);
@@ -92,7 +90,7 @@ static void timer_interrupt(int irq, void *dev_id, struct xen_regs *regs)
write_unlock_irq(&time_lock);
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
- if ( do_timer_lists_from_pit )
+ if ( !cpu_has_apic )
raise_softirq(AC_TIMER_SOFTIRQ);
}
@@ -275,19 +273,13 @@ s_time_t get_s_time(void)
return now;
}
-
-int update_dom_time(struct domain *d)
+static inline void __update_dom_time(struct vcpu *v)
{
+ struct domain *d = v->domain;
shared_info_t *si = d->shared_info;
- unsigned long flags;
- if ( d->last_propagated_timestamp == full_tsc_irq )
- return 0;
-
- read_lock_irqsave(&time_lock, flags);
+ spin_lock(&d->time_lock);
- d->last_propagated_timestamp = full_tsc_irq;
-
si->time_version1++;
wmb();
@@ -300,11 +292,20 @@ int update_dom_time(struct domain *d)
wmb();
si->time_version2++;
- read_unlock_irqrestore(&time_lock, flags);
-
- return 1;
+ spin_unlock(&d->time_lock);
}
+void update_dom_time(struct vcpu *v)
+{
+ unsigned long flags;
+
+ if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
+ {
+ read_lock_irqsave(&time_lock, flags);
+ __update_dom_time(v);
+ read_unlock_irqrestore(&time_lock, flags);
+ }
+}
/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
@@ -326,12 +327,11 @@ void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
wc_sec = secs;
wc_usec = _usecs;
- write_unlock_irq(&time_lock);
-
/* Others will pick up the change at the next tick. */
- current->last_propagated_timestamp = 0; /* force propagation */
- (void)update_dom_time(current);
+ __update_dom_time(current);
send_guest_virq(current, VIRQ_TIMER);
+
+ write_unlock_irq(&time_lock);
}
@@ -359,7 +359,6 @@ int __init init_xen_time()
wc_sec = get_cmos_time();
printk("Time init:\n");
- printk(".... System Time: %lldns\n", NOW());
printk(".... cpu_freq: %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);
printk(".... scale: %08X:%08X\n", (u32)(scale>>32),(u32)scale);
printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec);
@@ -386,3 +385,13 @@ void __init time_init(void)
setup_irq(0, &irq0);
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/trampoline.S b/xen/arch/x86/trampoline.S
index 9c89ecf059..44baea0bbb 100644
--- a/xen/arch/x86/trampoline.S
+++ b/xen/arch/x86/trampoline.S
@@ -17,6 +17,7 @@
#include <xen/config.h>
#include <public/xen.h>
+#include <asm/desc.h>
#include <asm/page.h>
#ifdef CONFIG_SMP
@@ -54,14 +55,13 @@ idt_48:
.word 0, 0 # idt base = 0L
gdt_48:
- .word (LAST_RESERVED_GDT_ENTRY*8)+7
+ .word LAST_RESERVED_GDT_BYTE
#ifdef __i386__
- .long gdt_table-__PAGE_OFFSET
+ .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
#else
- .long 0x100200 # gdt_table
+ .long 0x101000 - FIRST_RESERVED_GDT_BYTE
#endif
-
-.globl SYMBOL_NAME(trampoline_end)
-SYMBOL_NAME_LABEL(trampoline_end)
+
+ENTRY(trampoline_end)
#endif /* CONFIG_SMP */
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 9e761a3050..2986d9c2b5 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1,5 +1,5 @@
/******************************************************************************
- * arch/i386/traps.c
+ * arch/x86/traps.c
*
* Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
*
@@ -39,8 +39,8 @@
#include <xen/irq.h>
#include <xen/perfc.h>
#include <xen/softirq.h>
+#include <xen/domain_page.h>
#include <asm/shadow.h>
-#include <asm/domain_page.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/atomic.h>
@@ -51,6 +51,8 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/debugger.h>
+#include <asm/msr.h>
+#include <asm/x86_emulate.h>
/*
* opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -65,45 +67,39 @@ char opt_nmi[10] = "fatal";
#endif
string_param("nmi", opt_nmi);
-#if defined(__i386__)
-
-#define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
-
-#define DOUBLEFAULT_STACK_SIZE 1024
-static struct tss_struct doublefault_tss;
-static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+/* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
+idt_entry_t idt_table[IDT_ENTRIES];
-asmlinkage int hypercall(void);
+#define DECLARE_TRAP_HANDLER(_name) \
+asmlinkage void _name(void); \
+asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
-/* Master table, and the one used by CPU0. */
-struct desc_struct idt_table[256] = { {0, 0}, };
-/* All other CPUs have their own copy. */
-struct desc_struct *idt_tables[NR_CPUS] = { 0 };
-
-asmlinkage void divide_error(void);
-asmlinkage void debug(void);
asmlinkage void nmi(void);
-asmlinkage void int3(void);
-asmlinkage void overflow(void);
-asmlinkage void bounds(void);
-asmlinkage void invalid_op(void);
-asmlinkage void device_not_available(void);
-asmlinkage void coprocessor_segment_overrun(void);
-asmlinkage void invalid_TSS(void);
-asmlinkage void segment_not_present(void);
-asmlinkage void stack_segment(void);
-asmlinkage void general_protection(void);
-asmlinkage void page_fault(void);
-asmlinkage void coprocessor_error(void);
-asmlinkage void simd_coprocessor_error(void);
-asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
-asmlinkage void machine_check(void);
-
-int kstack_depth_to_print = 8*20;
+DECLARE_TRAP_HANDLER(divide_error);
+DECLARE_TRAP_HANDLER(debug);
+DECLARE_TRAP_HANDLER(int3);
+DECLARE_TRAP_HANDLER(overflow);
+DECLARE_TRAP_HANDLER(bounds);
+DECLARE_TRAP_HANDLER(invalid_op);
+DECLARE_TRAP_HANDLER(device_not_available);
+DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
+DECLARE_TRAP_HANDLER(invalid_TSS);
+DECLARE_TRAP_HANDLER(segment_not_present);
+DECLARE_TRAP_HANDLER(stack_segment);
+DECLARE_TRAP_HANDLER(general_protection);
+DECLARE_TRAP_HANDLER(page_fault);
+DECLARE_TRAP_HANDLER(coprocessor_error);
+DECLARE_TRAP_HANDLER(simd_coprocessor_error);
+DECLARE_TRAP_HANDLER(alignment_check);
+DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
+DECLARE_TRAP_HANDLER(machine_check);
+
+static int debug_stack_lines = 20;
+integer_param("debug_stack_lines", debug_stack_lines);
static inline int kernel_text_address(unsigned long addr)
{
+ extern char _stext, _etext;
if (addr >= (unsigned long) &_stext &&
addr <= (unsigned long) &_etext)
return 1;
@@ -111,111 +107,91 @@ static inline int kernel_text_address(unsigned long addr)
}
-void show_guest_stack()
+void show_guest_stack(void)
{
int i;
- execution_context_t *ec = get_execution_context();
- unsigned long *stack = (unsigned long *)ec->esp;
- printk("Guest EIP is %lx\n",ec->eip);
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ unsigned long *stack = (unsigned long *)regs->esp, addr;
+
+ printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
- for ( i = 0; i < kstack_depth_to_print; i++ )
+ for ( i = 0; i < (debug_stack_lines*8); i++ )
{
if ( ((long)stack & (STACK_SIZE-1)) == 0 )
break;
- if ( i && ((i % 8) == 0) )
- printk("\n ");
- printk("%08lx ", *stack++);
+ if ( get_user(addr, stack) )
+ {
+ if ( i != 0 )
+ printk("\n ");
+ printk("Fault while accessing guest memory.");
+ i = 1;
+ break;
+ }
+ if ( (i != 0) && ((i % 8) == 0) )
+ printk("\n ");
+ printk("%p ", _p(addr));
+ stack++;
}
+ if ( i == 0 )
+ printk("Stack empty.");
printk("\n");
-
}
void show_trace(unsigned long *esp)
{
- unsigned long *stack, addr;
- int i;
+ unsigned long *stack = esp, addr;
+ int i = 0;
- printk("Call Trace from ESP=%p: ", esp);
- stack = esp;
- i = 0;
- while (((long) stack & (STACK_SIZE-1)) != 0) {
+ printk("Xen call trace from "__OP"sp=%p:\n ", stack);
+
+ while ( ((long) stack & (STACK_SIZE-1)) != 0 )
+ {
addr = *stack++;
- if (kernel_text_address(addr)) {
- if (i && ((i % 6) == 0))
+ if ( kernel_text_address(addr) )
+ {
+ if ( (i != 0) && ((i % 6) == 0) )
printk("\n ");
- printk("[<%08lx>] ", addr);
+ printk("[<%p>] ", _p(addr));
i++;
}
}
+ if ( i == 0 )
+ printk("Trace empty.");
printk("\n");
}
void show_stack(unsigned long *esp)
{
- unsigned long *stack;
+ unsigned long *stack = esp, addr;
int i;
- printk("Stack trace from ESP=%p:\n", esp);
+ printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
- stack = esp;
- for ( i = 0; i < kstack_depth_to_print; i++ )
+ for ( i = 0; i < (debug_stack_lines*8); i++ )
{
if ( ((long)stack & (STACK_SIZE-1)) == 0 )
break;
- if ( i && ((i % 8) == 0) )
- printk("\n ");
- if ( kernel_text_address(*stack) )
- printk("[%08lx] ", *stack++);
+ if ( (i != 0) && ((i % 8) == 0) )
+ printk("\n ");
+ addr = *stack++;
+ if ( kernel_text_address(addr) )
+ printk("[%p] ", _p(addr));
else
- printk("%08lx ", *stack++);
+ printk("%p ", _p(addr));
}
+ if ( i == 0 )
+ printk("Stack empty.");
printk("\n");
- show_trace( esp );
+ show_trace(esp);
}
-void show_registers(struct xen_regs *regs)
-{
- unsigned long esp;
- unsigned short ss, ds, es, fs, gs;
-
- if ( GUEST_FAULT(regs) )
- {
- esp = regs->esp;
- ss = regs->ss & 0xffff;
- ds = regs->ds & 0xffff;
- es = regs->es & 0xffff;
- fs = regs->fs & 0xffff;
- gs = regs->gs & 0xffff;
- }
- else
- {
- esp = (unsigned long)(&regs->esp);
- ss = __HYPERVISOR_DS;
- ds = __HYPERVISOR_DS;
- es = __HYPERVISOR_DS;
- fs = __HYPERVISOR_DS;
- gs = __HYPERVISOR_DS;
- }
-
- printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
- smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
- printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- ds, es, fs, gs, ss);
-
- show_stack((unsigned long *)&regs->esp);
-}
-
/*
* This is called for faults at very unexpected times (e.g., when interrupts
* are disabled). In such situations we can't do much that is safe. We try to
* print out some tracing and then we just spin.
*/
-asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
+asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
{
int cpu = smp_processor_id();
unsigned long cr2;
@@ -228,12 +204,16 @@ asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
"machine check", "simd error"
};
+ watchdog_disable();
+ console_start_sync();
+
show_registers(regs);
if ( trapnr == TRAP_page_fault )
{
- __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
- printk("Faulting linear address might be %08lx\n", cr2);
+ __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
+ printk("Faulting linear address: %p\n", _p(cr2));
+ show_page_walk(cr2);
}
printk("************************************\n");
@@ -243,6 +223,8 @@ asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
printk("System shutting down -- need manual reset.\n");
printk("************************************\n");
+ (void)debugger_trap_fatal(trapnr, regs);
+
/* Lock up the console to prevent spurious output from other CPUs. */
console_force_lock();
@@ -252,20 +234,20 @@ asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
}
static inline int do_trap(int trapnr, char *str,
- struct xen_regs *regs,
+ struct cpu_user_regs *regs,
int use_error_code)
{
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
trap_info_t *ti;
unsigned long fixup;
DEBUGGER_trap_entry(trapnr, regs);
- if ( !GUEST_FAULT(regs) )
+ if ( !GUEST_MODE(regs) )
goto xen_fault;
- ti = current->thread.traps + trapnr;
+ ti = &current->arch.guest_context.trap_ctxt[trapnr];
tb->flags = TBF_EXCEPTION;
tb->cs = ti->cs;
tb->eip = ti->address;
@@ -275,14 +257,14 @@ static inline int do_trap(int trapnr, char *str,
tb->error_code = regs->error_code;
}
if ( TI_GET_IF(ti) )
- d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ tb->flags |= TBF_INTERRUPT;
return 0;
xen_fault:
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
- DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
+ DPRINTK("Trap %d: %p -> %p\n", trapnr, _p(regs->eip), _p(fixup));
regs->eip = fixup;
return 0;
}
@@ -297,13 +279,13 @@ static inline int do_trap(int trapnr, char *str,
}
#define DO_ERROR_NOCODE(trapnr, str, name) \
-asmlinkage int do_##name(struct xen_regs *regs) \
+asmlinkage int do_##name(struct cpu_user_regs *regs) \
{ \
return do_trap(trapnr, str, regs, 0); \
}
#define DO_ERROR(trapnr, str, name) \
-asmlinkage int do_##name(struct xen_regs *regs) \
+asmlinkage int do_##name(struct cpu_user_regs *regs) \
{ \
return do_trap(trapnr, str, regs, 1); \
}
@@ -320,136 +302,149 @@ DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
DO_ERROR(17, "alignment check", alignment_check)
DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
-asmlinkage int do_int3(struct xen_regs *regs)
+asmlinkage int do_int3(struct cpu_user_regs *regs)
{
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
trap_info_t *ti;
DEBUGGER_trap_entry(TRAP_int3, regs);
- if ( !GUEST_FAULT(regs) )
+ if ( !GUEST_MODE(regs) )
{
DEBUGGER_trap_fatal(TRAP_int3, regs);
show_registers(regs);
panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
- }
+ }
- ti = current->thread.traps + 3;
+ ti = &current->arch.guest_context.trap_ctxt[TRAP_int3];
tb->flags = TBF_EXCEPTION;
tb->cs = ti->cs;
tb->eip = ti->address;
if ( TI_GET_IF(ti) )
- d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ tb->flags |= TBF_INTERRUPT;
return 0;
}
-asmlinkage void do_double_fault(void)
-{
- struct tss_struct *tss = &doublefault_tss;
- unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
-
- /* Disable the NMI watchdog. It's useless now. */
- watchdog_on = 0;
-
- /* Find information saved during fault and dump it to the console. */
- tss = &init_tss[cpu];
- printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
- cpu, tss->cs, tss->eip, tss->eflags);
- printk("CR3: %08x\n", tss->__cr3);
- printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
- tss->eax, tss->ebx, tss->ecx, tss->edx);
- printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
- tss->esi, tss->edi, tss->ebp, tss->esp);
- printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
- printk("************************************\n");
- printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
- printk("System needs manual reset.\n");
- printk("************************************\n");
-
- /* Lock up the console to prevent spurious output from other CPUs. */
- console_force_lock();
-
- /* Wait for manual reset. */
- for ( ; ; )
- __asm__ __volatile__ ( "hlt" );
-}
-
-asmlinkage void do_machine_check(struct xen_regs *regs)
+asmlinkage int do_machine_check(struct cpu_user_regs *regs)
{
fatal_trap(TRAP_machine_check, regs);
+ return 0;
}
void propagate_page_fault(unsigned long addr, u16 error_code)
{
trap_info_t *ti;
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
- ti = d->thread.traps + 14;
+ ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
tb->cr2 = addr;
tb->error_code = error_code;
tb->cs = ti->cs;
tb->eip = ti->address;
if ( TI_GET_IF(ti) )
- d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ tb->flags |= TBF_INTERRUPT;
+
+ v->arch.guest_cr2 = addr;
}
-asmlinkage int do_page_fault(struct xen_regs *regs)
+static int handle_perdomain_mapping_fault(
+ unsigned long offset, struct cpu_user_regs *regs)
{
- unsigned long off, addr, fixup;
- struct domain *d = current;
extern int map_ldt_shadow_page(unsigned int);
- int cpu = d->processor;
- __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ int ret;
+
+ /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
+ unsigned int is_ldt_area = (offset >> (PDPT_VCPU_VA_SHIFT-1)) & 1;
+ unsigned int vcpu_area = (offset >> PDPT_VCPU_VA_SHIFT);
+
+ /* Should never fault in another vcpu's area. */
+ BUG_ON(vcpu_area != current->vcpu_id);
+
+ /* Byte offset within the gdt/ldt sub-area. */
+ offset &= (1UL << (PDPT_VCPU_VA_SHIFT-1)) - 1UL;
+
+ if ( likely(is_ldt_area) )
+ {
+ /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
+ LOCK_BIGLOCK(d);
+ ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
+ UNLOCK_BIGLOCK(d);
+
+ if ( unlikely(ret == 0) )
+ {
+ /* In hypervisor mode? Leave it to the #PF handler to fix up. */
+ if ( !GUEST_MODE(regs) )
+ return 0;
+ /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
+ propagate_page_fault(
+ v->arch.guest_context.ldt_base + offset, regs->error_code);
+ }
+ }
+ else
+ {
+ /* GDT fault: handle the fault as #GP(selector). */
+ regs->error_code = (u16)offset & ~7;
+ (void)do_general_protection(regs);
+ }
+
+ return EXCRET_fault_fixed;
+}
+
+asmlinkage int do_page_fault(struct cpu_user_regs *regs)
+{
+ unsigned long addr, fixup;
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+
+ __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
DEBUGGER_trap_entry(TRAP_page_fault, regs);
perfc_incrc(page_faults);
- if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
+ if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
+ !shadow_mode_enabled(d)) )
{
- if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
+ LOCK_BIGLOCK(d);
+ if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
unlikely((addr >> L2_PAGETABLE_SHIFT) ==
- ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
+ d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
{
- ptwr_flush(PTWR_PT_ACTIVE);
+ ptwr_flush(d, PTWR_PT_ACTIVE);
+ UNLOCK_BIGLOCK(d);
return EXCRET_fault_fixed;
}
- if ( (addr < PAGE_OFFSET) &&
- !VM86_MODE(regs) && ((regs->cs & 3) == 1) && /* ring 1 */
+ if ( (addr < HYPERVISOR_VIRT_START) &&
+ KERNEL_MODE(v, regs) &&
((regs->error_code & 3) == 3) && /* write-protection fault */
- ptwr_do_page_fault(addr) )
+ ptwr_do_page_fault(d, addr) )
{
- if ( unlikely(d->mm.shadow_mode) )
- (void)shadow_fault(addr, regs->error_code);
+ UNLOCK_BIGLOCK(d);
return EXCRET_fault_fixed;
}
+ UNLOCK_BIGLOCK(d);
}
- if ( unlikely(d->mm.shadow_mode) &&
- (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
+ if ( unlikely(shadow_mode_enabled(d)) &&
+ ((addr < HYPERVISOR_VIRT_START) ||
+ (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) &&
+ shadow_fault(addr, regs) )
return EXCRET_fault_fixed;
- if ( unlikely(addr >= LDT_VIRT_START) &&
- (addr < (LDT_VIRT_START + (d->mm.ldt_ents*LDT_ENTRY_SIZE))) )
- {
- /*
- * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
- * send the fault up to the guest OS to be handled.
- */
- off = addr - LDT_VIRT_START;
- addr = d->mm.ldt_base + off;
- if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
- return EXCRET_fault_fixed; /* successfully copied the mapping */
- }
+ if ( unlikely(addr >= PERDOMAIN_VIRT_START) &&
+ unlikely(addr < PERDOMAIN_VIRT_END) &&
+ handle_perdomain_mapping_fault(addr - PERDOMAIN_VIRT_START, regs) )
+ return EXCRET_fault_fixed;
- if ( !GUEST_FAULT(regs) )
+ if ( !GUEST_MODE(regs) )
goto xen_fault;
propagate_page_fault(addr, regs->error_code);
@@ -460,52 +455,428 @@ asmlinkage int do_page_fault(struct xen_regs *regs)
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
perfc_incrc(copy_user_faults);
- if ( !d->mm.shadow_mode )
- DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
+ if ( !shadow_mode_enabled(d) )
+ DPRINTK("Page fault: %p -> %p\n", _p(regs->eip), _p(fixup));
regs->eip = fixup;
return 0;
}
DEBUGGER_trap_fatal(TRAP_page_fault, regs);
- if ( addr >= PAGE_OFFSET )
+ show_registers(regs);
+ show_page_walk(addr);
+ panic("CPU%d FATAL PAGE FAULT\n"
+ "[error_code=%04x]\n"
+ "Faulting linear address: %p\n",
+ smp_processor_id(), regs->error_code, addr);
+ return 0;
+}
+
+long do_fpu_taskswitch(int set)
+{
+ struct vcpu *v = current;
+
+ if ( set )
+ {
+ set_bit(_VCPUF_guest_stts, &v->vcpu_flags);
+ stts();
+ }
+ else
{
- unsigned long page;
- page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
- printk("*pde = %08lx\n", page);
- if ( page & _PAGE_PRESENT )
+ clear_bit(_VCPUF_guest_stts, &v->vcpu_flags);
+ if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
+ clts();
+ }
+
+ return 0;
+}
+
+/* Has the guest requested sufficient permission for this I/O access? */
+static inline int guest_io_okay(
+ unsigned int port, unsigned int bytes,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ u16 x;
+#if defined(__x86_64__)
+ /* If in user mode, switch to kernel mode just to read I/O bitmap. */
+ extern void toggle_guest_mode(struct vcpu *);
+ int user_mode = !(v->arch.flags & TF_kernel_mode);
+#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
+#elif defined(__i386__)
+#define TOGGLE_MODE() ((void)0)
+#endif
+
+ if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) )
+ return 1;
+
+ if ( v->arch.iobmp_limit > (port + bytes) )
+ {
+ TOGGLE_MODE();
+ __get_user(x, (u16 *)(v->arch.iobmp+(port>>3)));
+ TOGGLE_MODE();
+ if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Has the administrator granted sufficient permission for this I/O access? */
+static inline int admin_io_okay(
+ unsigned int port, unsigned int bytes,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ struct domain *d = v->domain;
+ u16 x;
+
+ if ( d->arch.iobmp_mask != NULL )
+ {
+ x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
+ if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Check admin limits. Silently fail the access if it is disallowed. */
+#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
+#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
+#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
+#define outb_user(_v, _p, _d, _r) \
+ (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
+#define outw_user(_v, _p, _d, _r) \
+ (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
+#define outl_user(_v, _p, _d, _r) \
+ (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
+
+/* Propagate a fault back to the guest kernel. */
+#define USER_READ_FAULT 4 /* user mode, read fault */
+#define USER_WRITE_FAULT 6 /* user mode, write fault */
+#define PAGE_FAULT(_faultaddr, _errcode) \
+({ propagate_page_fault(_faultaddr, _errcode); \
+ return EXCRET_fault_fixed; \
+})
+
+/* Isntruction fetch with error handling. */
+#define insn_fetch(_type, _size, _ptr) \
+({ unsigned long _x; \
+ if ( get_user(_x, (_type *)eip) ) \
+ PAGE_FAULT(eip, USER_READ_FAULT); \
+ eip += _size; (_type)_x; })
+
+static int emulate_privileged_op(struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ unsigned long *reg, eip = regs->eip;
+ u8 opcode, modrm_reg = 0, rep_prefix = 0;
+ unsigned int port, i, op_bytes = 4, data;
+
+ /* Legacy prefixes. */
+ for ( i = 0; i < 8; i++ )
+ {
+ switch ( opcode = insn_fetch(u8, 1, eip) )
{
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
- printk(" *pte = %08lx\n", page);
+ case 0x66: /* operand-size override */
+ op_bytes ^= 6; /* switch between 2/4 bytes */
+ break;
+ case 0x67: /* address-size override */
+ case 0x2e: /* CS override */
+ case 0x3e: /* DS override */
+ case 0x26: /* ES override */
+ case 0x64: /* FS override */
+ case 0x65: /* GS override */
+ case 0x36: /* SS override */
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ break;
+ case 0xf3: /* REP/REPE/REPZ */
+ rep_prefix = 1;
+ break;
+ default:
+ goto done_prefixes;
}
-#ifdef MEMORY_GUARD
- if ( !(regs->error_code & 1) )
- printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
+ }
+ done_prefixes:
+
+#ifdef __x86_64__
+ /* REX prefix. */
+ if ( (opcode & 0xf0) == 0x40 )
+ {
+ modrm_reg = (opcode & 4) << 1; /* REX.R */
+ /* REX.W, REX.B and REX.X do not need to be decoded. */
+ opcode = insn_fetch(u8, 1, eip);
+ }
#endif
+
+ /* Input/Output String instructions. */
+ if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
+ {
+ if ( rep_prefix && (regs->ecx == 0) )
+ goto done;
+
+ continue_io_string:
+ switch ( opcode )
+ {
+ case 0x6c: /* INSB */
+ op_bytes = 1;
+ case 0x6d: /* INSW/INSL */
+ if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+ goto fail;
+ switch ( op_bytes )
+ {
+ case 1:
+ data = (u8)inb_user((u16)regs->edx, v, regs);
+ if ( put_user((u8)data, (u8 *)regs->edi) )
+ PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ break;
+ case 2:
+ data = (u16)inw_user((u16)regs->edx, v, regs);
+ if ( put_user((u16)data, (u16 *)regs->edi) )
+ PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ break;
+ case 4:
+ data = (u32)inl_user((u16)regs->edx, v, regs);
+ if ( put_user((u32)data, (u32 *)regs->edi) )
+ PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+ break;
+ }
+ regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ break;
+
+ case 0x6e: /* OUTSB */
+ op_bytes = 1;
+ case 0x6f: /* OUTSW/OUTSL */
+ if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+ goto fail;
+ switch ( op_bytes )
+ {
+ case 1:
+ if ( get_user(data, (u8 *)regs->esi) )
+ PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ outb_user((u8)data, (u16)regs->edx, v, regs);
+ break;
+ case 2:
+ if ( get_user(data, (u16 *)regs->esi) )
+ PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ outw_user((u16)data, (u16)regs->edx, v, regs);
+ break;
+ case 4:
+ if ( get_user(data, (u32 *)regs->esi) )
+ PAGE_FAULT(regs->esi, USER_READ_FAULT);
+ outl_user((u32)data, (u16)regs->edx, v, regs);
+ break;
+ }
+ regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+ break;
+ }
+
+ if ( rep_prefix && (--regs->ecx != 0) )
+ {
+ if ( !hypercall_preempt_check() )
+ goto continue_io_string;
+ eip = regs->eip;
+ }
+
+ goto done;
}
- show_registers(regs);
- panic("CPU%d FATAL PAGE FAULT\n"
- "[error_code=%04x]\n"
- "Faulting linear address might be %08lx\n",
- smp_processor_id(), regs->error_code, addr);
+ /* I/O Port and Interrupt Flag instructions. */
+ switch ( opcode )
+ {
+ case 0xe4: /* IN imm8,%al */
+ op_bytes = 1;
+ case 0xe5: /* IN imm8,%eax */
+ port = insn_fetch(u8, 1, eip);
+ exec_in:
+ if ( !guest_io_okay(port, op_bytes, v, regs) )
+ goto fail;
+ switch ( op_bytes )
+ {
+ case 1:
+ regs->eax &= ~0xffUL;
+ regs->eax |= (u8)inb_user(port, v, regs);
+ break;
+ case 2:
+ regs->eax &= ~0xffffUL;
+ regs->eax |= (u16)inw_user(port, v, regs);
+ break;
+ case 4:
+ regs->eax = (u32)inl_user(port, v, regs);
+ break;
+ }
+ goto done;
+
+ case 0xec: /* IN %dx,%al */
+ op_bytes = 1;
+ case 0xed: /* IN %dx,%eax */
+ port = (u16)regs->edx;
+ goto exec_in;
+
+ case 0xe6: /* OUT %al,imm8 */
+ op_bytes = 1;
+ case 0xe7: /* OUT %eax,imm8 */
+ port = insn_fetch(u8, 1, eip);
+ exec_out:
+ if ( !guest_io_okay(port, op_bytes, v, regs) )
+ goto fail;
+ switch ( op_bytes )
+ {
+ case 1:
+ outb_user((u8)regs->eax, port, v, regs);
+ break;
+ case 2:
+ outw_user((u16)regs->eax, port, v, regs);
+ break;
+ case 4:
+ outl_user((u32)regs->eax, port, v, regs);
+ break;
+ }
+ goto done;
+
+ case 0xee: /* OUT %al,%dx */
+ op_bytes = 1;
+ case 0xef: /* OUT %eax,%dx */
+ port = (u16)regs->edx;
+ goto exec_out;
+
+ case 0xfa: /* CLI */
+ case 0xfb: /* STI */
+ if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) )
+ goto fail;
+ /*
+ * This is just too dangerous to allow, in my opinion. Consider if the
+ * caller then tries to reenable interrupts using POPF: we can't trap
+ * that and we'll end up with hard-to-debug lockups. Fast & loose will
+ * do for us. :-)
+ */
+ /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
+ goto done;
+
+ case 0x0f: /* Two-byte opcode */
+ break;
+
+ default:
+ goto fail;
+ }
+
+ /* Remaining instructions only emulated from guest kernel. */
+ if ( !KERNEL_MODE(v, regs) )
+ goto fail;
+
+ /* Privileged (ring 0) instructions. */
+ opcode = insn_fetch(u8, 1, eip);
+ switch ( opcode )
+ {
+ case 0x06: /* CLTS */
+ (void)do_fpu_taskswitch(0);
+ break;
+
+ case 0x09: /* WBINVD */
+ /* Ignore the instruction if unprivileged. */
+ if ( !IS_CAPABLE_PHYSDEV(v->domain) )
+ DPRINTK("Non-physdev domain attempted WBINVD.\n");
+ else
+ wbinvd();
+ break;
+
+ case 0x20: /* MOV CR?,<reg> */
+ opcode = insn_fetch(u8, 1, eip);
+ if ( (opcode & 0xc0) != 0xc0 )
+ goto fail;
+ modrm_reg |= opcode & 7;
+ reg = decode_register(modrm_reg, regs, 0);
+ switch ( (opcode >> 3) & 7 )
+ {
+ case 0: /* Read CR0 */
+ *reg =
+ (read_cr0() & ~X86_CR0_TS) |
+ (test_bit(_VCPUF_guest_stts, &v->vcpu_flags) ? X86_CR0_TS:0);
+ break;
+
+ case 2: /* Read CR2 */
+ *reg = v->arch.guest_cr2;
+ break;
+
+ case 3: /* Read CR3 */
+ *reg = pagetable_get_paddr(v->arch.guest_table);
+ break;
+
+ default:
+ goto fail;
+ }
+ break;
+
+ case 0x22: /* MOV <reg>,CR? */
+ opcode = insn_fetch(u8, 1, eip);
+ if ( (opcode & 0xc0) != 0xc0 )
+ goto fail;
+ modrm_reg |= opcode & 7;
+ reg = decode_register(modrm_reg, regs, 0);
+ switch ( (opcode >> 3) & 7 )
+ {
+ case 0: /* Write CR0 */
+ (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
+ break;
+
+ case 2: /* Write CR2 */
+ v->arch.guest_cr2 = *reg;
+ break;
+
+ case 3: /* Write CR3 */
+ LOCK_BIGLOCK(v->domain);
+ (void)new_guest_cr3(*reg);
+ UNLOCK_BIGLOCK(v->domain);
+ break;
+
+ default:
+ goto fail;
+ }
+ break;
+
+ case 0x30: /* WRMSR */
+ /* Ignore the instruction if unprivileged. */
+ if ( !IS_PRIV(v->domain) )
+ DPRINTK("Non-priv domain attempted WRMSR(%p,%08lx,%08lx).\n",
+ _p(regs->ecx), (long)regs->eax, (long)regs->edx);
+ else if ( wrmsr_user(regs->ecx, regs->eax, regs->edx) )
+ goto fail;
+ break;
+
+ case 0x32: /* RDMSR */
+ if ( !IS_PRIV(v->domain) )
+ DPRINTK("Non-priv domain attempted RDMSR(%p,%08lx,%08lx).\n",
+ _p(regs->ecx), (long)regs->eax, (long)regs->edx);
+ /* Everyone can read the MSR space. */
+ if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
+ goto fail;
+ break;
+
+ default:
+ goto fail;
+ }
+
+ done:
+ regs->eip = eip;
+ return EXCRET_fault_fixed;
+
+ fail:
return 0;
}
-asmlinkage int do_general_protection(struct xen_regs *regs)
+asmlinkage int do_general_protection(struct cpu_user_regs *regs)
{
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
trap_info_t *ti;
unsigned long fixup;
DEBUGGER_trap_entry(TRAP_gp_fault, regs);
-
+
if ( regs->error_code & 1 )
goto hardware_gp;
- if ( !GUEST_FAULT(regs) )
+ if ( !GUEST_MODE(regs) )
goto gp_in_kernel;
/*
@@ -531,8 +902,8 @@ asmlinkage int do_general_protection(struct xen_regs *regs)
if ( (regs->error_code & 3) == 2 )
{
/* This fault must be due to <INT n> instruction. */
- ti = current->thread.traps + (regs->error_code>>3);
- if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
+ ti = &current->arch.guest_context.trap_ctxt[regs->error_code>>3];
+ if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) )
{
tb->flags = TBF_EXCEPTION;
regs->eip += 2;
@@ -540,30 +911,35 @@ asmlinkage int do_general_protection(struct xen_regs *regs)
}
}
+ /* Emulate some simple privileged and I/O instructions. */
+ if ( (regs->error_code == 0) &&
+ emulate_privileged_op(regs) )
+ return 0;
+
#if defined(__i386__)
- if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
+ if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
(regs->error_code == 0) &&
gpf_emulate_4gb(regs) )
return 0;
#endif
/* Pass on GPF as is. */
- ti = current->thread.traps + 13;
+ ti = &current->arch.guest_context.trap_ctxt[TRAP_gp_fault];
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
tb->error_code = regs->error_code;
finish_propagation:
tb->cs = ti->cs;
tb->eip = ti->address;
if ( TI_GET_IF(ti) )
- d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ tb->flags |= TBF_INTERRUPT;
return 0;
gp_in_kernel:
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
{
- DPRINTK("GPF (%04x): %08x -> %08lx\n",
- regs->error_code, regs->eip, fixup);
+ DPRINTK("GPF (%04x): %p -> %p\n",
+ regs->error_code, _p(regs->eip), _p(fixup));
regs->eip = fixup;
return 0;
}
@@ -577,19 +953,55 @@ asmlinkage int do_general_protection(struct xen_regs *regs)
return 0;
}
-asmlinkage void mem_parity_error(struct xen_regs *regs)
+unsigned long nmi_softirq_reason;
+static void nmi_softirq(void)
+{
+ if ( dom0 == NULL )
+ return;
+
+ if ( test_and_clear_bit(0, &nmi_softirq_reason) )
+ send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
+
+ if ( test_and_clear_bit(1, &nmi_softirq_reason) )
+ send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
+}
+
+asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
{
- console_force_unlock();
- printk("\n\nNMI - MEMORY ERROR\n");
- fatal_trap(TRAP_nmi, regs);
+ /* Clear and disable the parity-error line. */
+ outb((inb(0x61)&15)|4,0x61);
+
+ switch ( opt_nmi[0] )
+ {
+ case 'd': /* 'dom0' */
+ set_bit(0, &nmi_softirq_reason);
+ raise_softirq(NMI_SOFTIRQ);
+ case 'i': /* 'ignore' */
+ break;
+ default: /* 'fatal' */
+ console_force_unlock();
+ printk("\n\nNMI - MEMORY ERROR\n");
+ fatal_trap(TRAP_nmi, regs);
+ }
}
-asmlinkage void io_check_error(struct xen_regs *regs)
+asmlinkage void io_check_error(struct cpu_user_regs *regs)
{
- console_force_unlock();
+ /* Clear and disable the I/O-error line. */
+ outb((inb(0x61)&15)|8,0x61);
- printk("\n\nNMI - I/O ERROR\n");
- fatal_trap(TRAP_nmi, regs);
+ switch ( opt_nmi[0] )
+ {
+ case 'd': /* 'dom0' */
+ set_bit(0, &nmi_softirq_reason);
+ raise_softirq(NMI_SOFTIRQ);
+ case 'i': /* 'ignore' */
+ break;
+ default: /* 'fatal' */
+ console_force_unlock();
+ printk("\n\nNMI - I/O ERROR\n");
+ fatal_trap(TRAP_nmi, regs);
+ }
}
static void unknown_nmi_error(unsigned char reason)
@@ -599,75 +1011,58 @@ static void unknown_nmi_error(unsigned char reason)
printk("Do you have a strange power saving mode enabled?\n");
}
-asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason)
+asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
{
++nmi_count(smp_processor_id());
-#if CONFIG_X86_LOCAL_APIC
if ( nmi_watchdog )
nmi_watchdog_tick(regs);
- else
-#endif
- unknown_nmi_error((unsigned char)(reason&0xff));
-}
-
-unsigned long nmi_softirq_reason;
-static void nmi_softirq(void)
-{
- if ( dom0 == NULL )
- return;
-
- if ( test_and_clear_bit(0, &nmi_softirq_reason) )
- send_guest_virq(dom0, VIRQ_PARITY_ERR);
- if ( test_and_clear_bit(1, &nmi_softirq_reason) )
- send_guest_virq(dom0, VIRQ_IO_ERR);
+ if ( reason & 0x80 )
+ mem_parity_error(regs);
+ else if ( reason & 0x40 )
+ io_check_error(regs);
+ else if ( !nmi_watchdog )
+ unknown_nmi_error((unsigned char)(reason&0xff));
}
-asmlinkage int math_state_restore(struct xen_regs *regs)
+asmlinkage int math_state_restore(struct cpu_user_regs *regs)
{
/* Prevent recursion. */
clts();
- if ( !test_bit(DF_USEDFPU, &current->flags) )
- {
- if ( test_bit(DF_DONEFPUINIT, &current->flags) )
- restore_fpu(current);
- else
- init_fpu();
- set_bit(DF_USEDFPU, &current->flags); /* so we fnsave on switch_to() */
- }
+ setup_fpu(current);
- if ( test_and_clear_bit(DF_GUEST_STTS, &current->flags) )
+ if ( test_and_clear_bit(_VCPUF_guest_stts, &current->vcpu_flags) )
{
- struct trap_bounce *tb = &current->thread.trap_bounce;
- tb->flags = TBF_EXCEPTION;
- tb->cs = current->thread.traps[7].cs;
- tb->eip = current->thread.traps[7].address;
+ struct trap_bounce *tb = &current->arch.trap_bounce;
+ tb->flags = TBF_EXCEPTION;
+ tb->cs = current->arch.guest_context.trap_ctxt[7].cs;
+ tb->eip = current->arch.guest_context.trap_ctxt[7].address;
}
return EXCRET_fault_fixed;
}
-asmlinkage int do_debug(struct xen_regs *regs)
+asmlinkage int do_debug(struct cpu_user_regs *regs)
{
- unsigned int condition;
- struct domain *d = current;
- struct trap_bounce *tb = &d->thread.trap_bounce;
+ unsigned long condition;
+ struct vcpu *v = current;
+ struct trap_bounce *tb = &v->arch.trap_bounce;
- __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+ __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
/* Mask out spurious debug traps due to lazy DR7 setting */
if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
- (d->thread.debugreg[7] == 0) )
+ (v->arch.guest_context.debugreg[7] == 0) )
{
- __asm__("movl %0,%%db7" : : "r" (0));
+ __asm__("mov %0,%%db7" : : "r" (0UL));
goto out;
}
DEBUGGER_trap_entry(TRAP_debug, regs);
- if ( !GUEST_FAULT(regs) )
+ if ( !GUEST_MODE(regs) )
{
/* Clear TF just for absolute sanity. */
regs->eflags &= ~EF_TF;
@@ -678,110 +1073,60 @@ asmlinkage int do_debug(struct xen_regs *regs)
* breakpoint, which can't happen to us.
*/
goto out;
- }
+ }
/* Save debug status register where guest OS can peek at it */
- d->thread.debugreg[6] = condition;
+ v->arch.guest_context.debugreg[6] = condition;
tb->flags = TBF_EXCEPTION;
- tb->cs = d->thread.traps[1].cs;
- tb->eip = d->thread.traps[1].address;
+ tb->cs = v->arch.guest_context.trap_ctxt[TRAP_debug].cs;
+ tb->eip = v->arch.guest_context.trap_ctxt[TRAP_debug].address;
out:
return EXCRET_not_a_fault;
}
-asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
+asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
{
return EXCRET_not_a_fault;
}
-BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-asmlinkage void smp_deferred_nmi(struct xen_regs regs)
-{
- ack_APIC_irq();
- do_nmi(&regs, 0);
-}
-
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
- int __d0, __d1; \
- __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
-} while (0)
-
void set_intr_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n,14,0,addr);
+#ifdef __i386__
+ int i;
+ /* Keep secondary tables in sync with IRQ updates. */
+ for ( i = 1; i < NR_CPUS; i++ )
+ if ( idt_tables[i] != NULL )
+ _set_gate(&idt_tables[i][n], 14, 0, addr);
+#endif
+ _set_gate(&idt_table[n], 14, 0, addr);
}
-static void __init set_system_gate(unsigned int n, void *addr)
+void set_system_gate(unsigned int n, void *addr)
{
_set_gate(idt_table+n,14,3,addr);
}
-static void set_task_gate(unsigned int n, unsigned int sel)
+void set_task_gate(unsigned int n, unsigned int sel)
{
idt_table[n].a = sel << 16;
idt_table[n].b = 0x8500;
}
-#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
- *((gate_addr)+1) = ((base) & 0xff000000) | \
- (((base) & 0x00ff0000)>>16) | \
- ((limit) & 0xf0000) | \
- ((dpl)<<13) | \
- (0x00408000) | \
- ((type)<<8); \
- *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
- ((limit) & 0x0ffff); }
-
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
-
void set_tss_desc(unsigned int n, void *addr)
{
_set_tssldt_desc(
- gdt_table + __TSS(n),
- (int)addr,
+ gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
- 0x89);
+ 9);
}
void __init trap_init(void)
{
- /*
- * Make a separate task for double faults. This will get us debug output if
- * we blow the kernel stack.
- */
- struct tss_struct *tss = &doublefault_tss;
- memset(tss, 0, sizeof(*tss));
- tss->ds = __HYPERVISOR_DS;
- tss->es = __HYPERVISOR_DS;
- tss->ss = __HYPERVISOR_DS;
- tss->esp = (unsigned long)
- &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
- tss->__cr3 = __pa(idle_pg_table);
- tss->cs = __HYPERVISOR_CS;
- tss->eip = (unsigned long)do_double_fault;
- tss->eflags = 2;
- tss->bitmap = IOBMP_INVALID_OFFSET;
- _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
- (int)tss, 235, 0x89);
+ extern void percpu_traps_init(void);
+ extern void cpu_init(void);
/*
* Note that interrupt gates are always used, rather than trap gates. We
@@ -799,7 +1144,6 @@ void __init trap_init(void)
set_intr_gate(TRAP_bounds,&bounds);
set_intr_gate(TRAP_invalid_op,&invalid_op);
set_intr_gate(TRAP_no_device,&device_not_available);
- set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
set_intr_gate(TRAP_no_segment,&segment_not_present);
@@ -811,21 +1155,10 @@ void __init trap_init(void)
set_intr_gate(TRAP_alignment_check,&alignment_check);
set_intr_gate(TRAP_machine_check,&machine_check);
set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
- set_intr_gate(TRAP_deferred_nmi,&deferred_nmi);
- /* Only ring 1 can access Xen services. */
- _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
+ percpu_traps_init();
- /* CPU0 uses the master IDT. */
- idt_tables[0] = idt_table;
-
- /*
- * Should be a barrier for any external CPU state.
- */
- {
- extern void cpu_init(void);
- cpu_init();
- }
+ cpu_init();
open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
@@ -834,131 +1167,78 @@ void __init trap_init(void)
long do_set_trap_table(trap_info_t *traps)
{
trap_info_t cur;
- trap_info_t *dst = current->thread.traps;
+ trap_info_t *dst = current->arch.guest_context.trap_ctxt;
+ long rc = 0;
+
+ LOCK_BIGLOCK(current->domain);
for ( ; ; )
{
if ( hypercall_preempt_check() )
- return hypercall_create_continuation(
- __HYPERVISOR_set_trap_table, 1, traps);
-
- if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
-
- if ( cur.address == 0 ) break;
-
- if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
-
- memcpy(dst+cur.vector, &cur, sizeof(cur));
- traps++;
- }
-
- return 0;
-}
-
-
-long do_set_callbacks(unsigned long event_selector,
- unsigned long event_address,
- unsigned long failsafe_selector,
- unsigned long failsafe_address)
-{
- struct domain *d = current;
+ {
+ rc = hypercall1_create_continuation(
+ __HYPERVISOR_set_trap_table, traps);
+ break;
+ }
- if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
- return -EPERM;
+ if ( copy_from_user(&cur, traps, sizeof(cur)) )
+ {
+ rc = -EFAULT;
+ break;
+ }
- d->thread.event_selector = event_selector;
- d->thread.event_address = event_address;
- d->thread.failsafe_selector = failsafe_selector;
- d->thread.failsafe_address = failsafe_address;
+ if ( cur.address == 0 )
+ break;
- return 0;
-}
+ if ( !VALID_CODESEL(cur.cs) )
+ {
+ rc = -EPERM;
+ break;
+ }
+ memcpy(&dst[cur.vector], &cur, sizeof(cur));
-long set_fast_trap(struct domain *p, int idx)
-{
- trap_info_t *ti;
+ if ( cur.vector == 0x80 )
+ init_int80_direct_trap(current);
- /* Index 0 is special: it disables fast traps. */
- if ( idx == 0 )
- {
- if ( p == current )
- CLEAR_FAST_TRAP(&p->thread);
- SET_DEFAULT_FAST_TRAP(&p->thread);
- return 0;
+ traps++;
}
- /*
- * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
- * The former range is used by Windows and MS-DOS.
- * Vector 0x80 is used by Linux and the BSD variants.
- */
- if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
- return -1;
-
- ti = p->thread.traps + idx;
-
- /*
- * We can't virtualise interrupt gates, as there's no way to get
- * the CPU to automatically clear the events_mask variable.
- */
- if ( TI_GET_IF(ti) )
- return -1;
-
- if ( p == current )
- CLEAR_FAST_TRAP(&p->thread);
-
- p->thread.fast_trap_idx = idx;
- p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
- p->thread.fast_trap_desc.b =
- (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
+ UNLOCK_BIGLOCK(current->domain);
- if ( p == current )
- SET_FAST_TRAP(&p->thread);
-
- return 0;
-}
-
-
-long do_set_fast_trap(int idx)
-{
- return set_fast_trap(current, idx);
-}
-
-
-long do_fpu_taskswitch(void)
-{
- set_bit(DF_GUEST_STTS, &current->flags);
- stts();
- return 0;
+ return rc;
}
-long set_debugreg(struct domain *p, int reg, unsigned long value)
+long set_debugreg(struct vcpu *p, int reg, unsigned long value)
{
int i;
switch ( reg )
{
case 0:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( !access_ok(value, sizeof(long)) )
+ return -EPERM;
if ( p == current )
- __asm__ ( "movl %0, %%db0" : : "r" (value) );
+ __asm__ ( "mov %0, %%db0" : : "r" (value) );
break;
case 1:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( !access_ok(value, sizeof(long)) )
+ return -EPERM;
if ( p == current )
- __asm__ ( "movl %0, %%db1" : : "r" (value) );
+ __asm__ ( "mov %0, %%db1" : : "r" (value) );
break;
case 2:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( !access_ok(value, sizeof(long)) )
+ return -EPERM;
if ( p == current )
- __asm__ ( "movl %0, %%db2" : : "r" (value) );
+ __asm__ ( "mov %0, %%db2" : : "r" (value) );
break;
case 3:
- if ( value > (PAGE_OFFSET-4) ) return -EPERM;
+ if ( !access_ok(value, sizeof(long)) )
+ return -EPERM;
if ( p == current )
- __asm__ ( "movl %0, %%db3" : : "r" (value) );
+ __asm__ ( "mov %0, %%db3" : : "r" (value) );
break;
case 6:
/*
@@ -968,7 +1248,7 @@ long set_debugreg(struct domain *p, int reg, unsigned long value)
value &= 0xffffefff; /* reserved bits => 0 */
value |= 0xffff0ff0; /* reserved bits => 1 */
if ( p == current )
- __asm__ ( "movl %0, %%db6" : : "r" (value) );
+ __asm__ ( "mov %0, %%db6" : : "r" (value) );
break;
case 7:
/*
@@ -989,13 +1269,13 @@ long set_debugreg(struct domain *p, int reg, unsigned long value)
if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
}
if ( p == current )
- __asm__ ( "movl %0, %%db7" : : "r" (value) );
+ __asm__ ( "mov %0, %%db7" : : "r" (value) );
break;
default:
return -EINVAL;
}
- p->thread.debugreg[reg] = value;
+ p->arch.guest_context.debugreg[reg] = value;
return 0;
}
@@ -1007,13 +1287,15 @@ long do_set_debugreg(int reg, unsigned long value)
unsigned long do_get_debugreg(int reg)
{
if ( (reg < 0) || (reg > 7) ) return -EINVAL;
- return current->thread.debugreg[reg];
+ return current->arch.guest_context.debugreg[reg];
}
-#else
-
-asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
-{
-}
-
-#endif /* __i386__ */
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/usercopy.c b/xen/arch/x86/usercopy.c
new file mode 100644
index 0000000000..f16c4da102
--- /dev/null
+++ b/xen/arch/x86/usercopy.c
@@ -0,0 +1,139 @@
+/*
+ * User address space access functions.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ * Copyright 2002 Andi Kleen <ak@suse.de>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <asm/uaccess.h>
+
+unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned n)
+{
+ unsigned long __d0, __d1, __d2, __n = n;
+ __asm__ __volatile__(
+ " cmp $"STR(2*BYTES_PER_LONG-1)",%0\n"
+ " jbe 1f\n"
+ " mov %1,%0\n"
+ " neg %0\n"
+ " and $"STR(BYTES_PER_LONG-1)",%0\n"
+ " sub %0,%3\n"
+ "4: rep; movsb\n" /* make 'to' address aligned */
+ " mov %3,%0\n"
+ " shr $"STR(LONG_BYTEORDER)",%0\n"
+ " and $"STR(BYTES_PER_LONG-1)",%3\n"
+ " .align 2,0x90\n"
+ "0: rep; movs"__OS"\n" /* as many words as possible... */
+ " mov %3,%0\n"
+ "1: rep; movsb\n" /* ...remainder copied as bytes */
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "5: add %3,%0\n"
+ " jmp 2b\n"
+ "3: lea 0(%3,%0,"STR(BYTES_PER_LONG)"),%0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " "__FIXUP_ALIGN"\n"
+ " "__FIXUP_WORD" 4b,5b\n"
+ " "__FIXUP_WORD" 0b,3b\n"
+ " "__FIXUP_WORD" 1b,2b\n"
+ ".previous"
+ : "=&c"(__n), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)
+ : "3"(__n), "0"(__n), "1"(to), "2"(from)
+ : "memory");
+ return (unsigned)__n;
+}
+
+unsigned long
+__copy_from_user_ll(void *to, const void __user *from, unsigned n)
+{
+ unsigned long __d0, __d1, __d2, __n = n;
+ __asm__ __volatile__(
+ " cmp $"STR(2*BYTES_PER_LONG-1)",%0\n"
+ " jbe 1f\n"
+ " mov %1,%0\n"
+ " neg %0\n"
+ " and $"STR(BYTES_PER_LONG-1)",%0\n"
+ " sub %0,%3\n"
+ "4: rep; movsb\n" /* make 'to' address aligned */
+ " mov %3,%0\n"
+ " shr $"STR(LONG_BYTEORDER)",%0\n"
+ " and $"STR(BYTES_PER_LONG-1)",%3\n"
+ " .align 2,0x90\n"
+ "0: rep; movs"__OS"\n" /* as many words as possible... */
+ " mov %3,%0\n"
+ "1: rep; movsb\n" /* ...remainder copied as bytes */
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "5: add %3,%0\n"
+ " jmp 6f\n"
+ "3: lea 0(%3,%0,"STR(BYTES_PER_LONG)"),%0\n"
+ "6: push %0\n"
+ " push %%"__OP"ax\n"
+ " xor %%eax,%%eax\n"
+ " rep; stosb\n"
+ " pop %%"__OP"ax\n"
+ " pop %0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " "__FIXUP_ALIGN"\n"
+ " "__FIXUP_WORD" 4b,5b\n"
+ " "__FIXUP_WORD" 0b,3b\n"
+ " "__FIXUP_WORD" 1b,6b\n"
+ ".previous"
+ : "=&c"(__n), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)
+ : "3"(__n), "0"(__n), "1"(to), "2"(from)
+ : "memory");
+ return (unsigned)__n;
+}
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+copy_to_user(void __user *to, const void *from, unsigned n)
+{
+ if (access_ok(to, n))
+ n = __copy_to_user(to, from, n);
+ return n;
+}
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long
+copy_from_user(void *to, const void __user *from, unsigned n)
+{
+ if (access_ok(from, n))
+ n = __copy_from_user(to, from, n);
+ else
+ memset(to, 0, n);
+ return n;
+}
diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
new file mode 100644
index 0000000000..d4a16823eb
--- /dev/null
+++ b/xen/arch/x86/vmx.c
@@ -0,0 +1,1377 @@
+/*
+ * vmx.c: handling VMX architecture-related VM exits
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/softirq.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
+#include <asm/io.h>
+#include <asm/shadow.h>
+#include <asm/regs.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/spinlock.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vmcs.h>
+#include <asm/vmx_intercept.h>
+#include <asm/shadow.h>
+#include <public/io/ioreq.h>
+
+#ifdef CONFIG_VMX
+
+int vmcs_size;
+unsigned int opt_vmx_debug_level = 0;
+integer_param("vmx_debug", opt_vmx_debug_level);
+
+extern long evtchn_send(int lport);
+extern long do_block(void);
+void do_nmi(struct cpu_user_regs *, unsigned long);
+
+int start_vmx(void)
+{
+ struct vmcs_struct *vmcs;
+ u32 ecx;
+ u32 eax, edx;
+ u64 phys_vmcs; /* debugging */
+
+ /*
+ * Xen does not fill x86_capability words except 0.
+ */
+ ecx = cpuid_ecx(1);
+ boot_cpu_data.x86_capability[4] = ecx;
+
+ if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
+ return 0;
+
+ rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
+
+ if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
+ if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
+ printk("VMX disabled by Feature Control MSR.\n");
+ return 0;
+ }
+ }
+ else {
+ wrmsr(IA32_FEATURE_CONTROL_MSR,
+ IA32_FEATURE_CONTROL_MSR_LOCK |
+ IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+ }
+
+ set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
+
+ if (!(vmcs = alloc_vmcs())) {
+ printk("Failed to allocate VMCS\n");
+ return 0;
+ }
+
+ phys_vmcs = (u64) virt_to_phys(vmcs);
+
+ if (!(__vmxon(phys_vmcs))) {
+ printk("VMXON is done\n");
+ }
+
+ return 1;
+}
+
+void stop_vmx(void)
+{
+ if (read_cr4() & X86_CR4_VMXE)
+ __vmxoff();
+}
+
+/*
+ * Not all cases receive valid value in the VM-exit instruction length field.
+ */
+#define __get_instruction_length(len) \
+ __vmread(INSTRUCTION_LEN, &(len)); \
+ if ((len) < 1 || (len) > 15) \
+ __vmx_bug(&regs);
+
+static void inline __update_guest_eip(unsigned long inst_len)
+{
+ unsigned long current_eip;
+
+ __vmread(GUEST_RIP, &current_eip);
+ __vmwrite(GUEST_RIP, current_eip + inst_len);
+}
+
+
+static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
+{
+ unsigned long eip;
+ l1_pgentry_t gpte;
+ unsigned long gpa; /* FIXME: PAE */
+ int result;
+
+#if VMX_DEBUG
+ {
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_VMMU,
+ "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
+ va, eip, (unsigned long)regs->error_code);
+ }
+#endif
+
+ if (!vmx_paging_enabled(current))
+ handle_mmio(va, va);
+
+ gpte = gva_to_gpte(va);
+ if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) )
+ return 0;
+ gpa = l1e_get_paddr(gpte) + (va & ~PAGE_MASK);
+
+ /* Use 1:1 page table to identify MMIO address space */
+ if (mmio_space(gpa))
+ handle_mmio(va, gpa);
+
+ result = shadow_fault(va, regs);
+
+#if 0
+ if ( !result )
+ {
+ __vmread(GUEST_RIP, &eip);
+ printk("vmx pgfault to guest va=%p eip=%p\n", va, eip);
+ }
+#endif
+
+ return result;
+}
+
+static void vmx_do_no_device_fault(void)
+{
+ unsigned long cr0;
+
+ clts();
+ setup_fpu(current);
+ __vmread(CR0_READ_SHADOW, &cr0);
+ if (!(cr0 & X86_CR0_TS)) {
+ __vmread(GUEST_CR0, &cr0);
+ cr0 &= ~X86_CR0_TS;
+ __vmwrite(GUEST_CR0, cr0);
+ }
+ __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
+}
+
+static void vmx_do_general_protection_fault(struct cpu_user_regs *regs)
+{
+ unsigned long eip, error_code;
+ unsigned long intr_fields;
+
+ __vmread(GUEST_RIP, &eip);
+ __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "vmx_general_protection_fault: eip = %lx, erro_code = %lx",
+ eip, error_code);
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+ (unsigned long)regs->eax, (unsigned long)regs->ebx,
+ (unsigned long)regs->ecx, (unsigned long)regs->edx,
+ (unsigned long)regs->esi, (unsigned long)regs->edi);
+
+ /* Reflect it back into the guest */
+ intr_fields = (INTR_INFO_VALID_MASK |
+ INTR_TYPE_EXCEPTION |
+ INTR_INFO_DELIEVER_CODE_MASK |
+ TRAP_gp_fault);
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+}
+
+static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs *regs)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long eip;
+
+ __vmread(GUEST_RIP, &eip);
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
+ " (esi) %lx, (edi) %lx",
+ (unsigned long)regs->eax, (unsigned long)regs->ebx,
+ (unsigned long)regs->ecx, (unsigned long)regs->edx,
+ (unsigned long)regs->esi, (unsigned long)regs->edi);
+
+ cpuid(input, &eax, &ebx, &ecx, &edx);
+
+ if (input == 1) {
+ clear_bit(X86_FEATURE_PSE, &edx);
+ clear_bit(X86_FEATURE_PAE, &edx);
+ clear_bit(X86_FEATURE_PSE36, &edx);
+ }
+
+ regs->eax = (unsigned long) eax;
+ regs->ebx = (unsigned long) ebx;
+ regs->ecx = (unsigned long) ecx;
+ regs->edx = (unsigned long) edx;
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
+ eip, input, eax, ebx, ecx, edx);
+
+}
+
+#define CASE_GET_REG_P(REG, reg) \
+ case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
+
+static void vmx_dr_access (unsigned long exit_qualification, struct cpu_user_regs *regs)
+{
+ unsigned int reg;
+ unsigned long *reg_p = 0;
+ struct vcpu *v = current;
+ unsigned long eip;
+
+ __vmread(GUEST_RIP, &eip);
+
+ reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
+ eip, reg, exit_qualification);
+
+ switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
+ CASE_GET_REG_P(EAX, eax);
+ CASE_GET_REG_P(ECX, ecx);
+ CASE_GET_REG_P(EDX, edx);
+ CASE_GET_REG_P(EBX, ebx);
+ CASE_GET_REG_P(EBP, ebp);
+ CASE_GET_REG_P(ESI, esi);
+ CASE_GET_REG_P(EDI, edi);
+ case REG_ESP:
+ break;
+ default:
+ __vmx_bug(regs);
+ }
+
+ switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
+ case TYPE_MOV_TO_DR:
+ /* don't need to check the range */
+ if (reg != REG_ESP)
+ v->arch.guest_context.debugreg[reg] = *reg_p;
+ else {
+ unsigned long value;
+ __vmread(GUEST_RSP, &value);
+ v->arch.guest_context.debugreg[reg] = value;
+ }
+ break;
+ case TYPE_MOV_FROM_DR:
+ if (reg != REG_ESP)
+ *reg_p = v->arch.guest_context.debugreg[reg];
+ else {
+ __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
+ }
+ break;
+ }
+}
+
+/*
+ * Invalidate the TLB for va. Invalidate the shadow page corresponding
+ * the address va.
+ */
+static void vmx_vmexit_do_invlpg(unsigned long va)
+{
+ unsigned long eip;
+ struct vcpu *v = current;
+
+ __vmread(GUEST_RIP, &eip);
+
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
+ eip, va);
+
+ /*
+ * We do the safest things first, then try to update the shadow
+ * copying from guest
+ */
+ shadow_invlpg(v, va);
+}
+
+static int check_for_null_selector(unsigned long eip)
+{
+ unsigned char inst[MAX_INST_LEN];
+ unsigned long sel;
+ int i, inst_len;
+ int inst_copy_from_guest(unsigned char *, unsigned long, int);
+
+ __vmread(INSTRUCTION_LEN, &inst_len);
+ memset(inst, 0, MAX_INST_LEN);
+ if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
+ printf("check_for_null_selector: get guest instruction failed\n");
+ domain_crash_synchronous();
+ }
+
+ for (i = 0; i < inst_len; i++) {
+ switch (inst[i]) {
+ case 0xf3: /* REPZ */
+ case 0xf2: /* REPNZ */
+ case 0xf0: /* LOCK */
+ case 0x66: /* data32 */
+ case 0x67: /* addr32 */
+ continue;
+ case 0x2e: /* CS */
+ __vmread(GUEST_CS_SELECTOR, &sel);
+ break;
+ case 0x36: /* SS */
+ __vmread(GUEST_SS_SELECTOR, &sel);
+ break;
+ case 0x26: /* ES */
+ __vmread(GUEST_ES_SELECTOR, &sel);
+ break;
+ case 0x64: /* FS */
+ __vmread(GUEST_FS_SELECTOR, &sel);
+ break;
+ case 0x65: /* GS */
+ __vmread(GUEST_GS_SELECTOR, &sel);
+ break;
+ case 0x3e: /* DS */
+ /* FALLTHROUGH */
+ default:
+ /* DS is the default */
+ __vmread(GUEST_DS_SELECTOR, &sel);
+ }
+ return sel == 0 ? 1 : 0;
+ }
+
+ return 0;
+}
+
+static void vmx_io_instruction(struct cpu_user_regs *regs,
+ unsigned long exit_qualification, unsigned long inst_len)
+{
+ struct vcpu *d = current;
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+ unsigned long addr;
+ unsigned long eip, cs, eflags;
+ int vm86;
+
+ __vmread(GUEST_RIP, &eip);
+ __vmread(GUEST_CS_SELECTOR, &cs);
+ __vmread(GUEST_RFLAGS, &eflags);
+ vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
+
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
+ "exit_qualification = %lx",
+ vm86, cs, eip, exit_qualification);
+
+ if (test_bit(6, &exit_qualification))
+ addr = (exit_qualification >> 16) & (0xffff);
+ else
+ addr = regs->edx & 0xffff;
+
+ if (addr == 0x80) {
+ __update_guest_eip(inst_len);
+ return;
+ }
+
+ vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
+ if (vio == 0) {
+ printk("bad shared page: %lx", (unsigned long) vio);
+ domain_crash_synchronous();
+ }
+ p = &vio->vp_ioreq;
+ p->dir = test_bit(3, &exit_qualification); /* direction */
+
+ p->pdata_valid = 0;
+ p->count = 1;
+ p->size = (exit_qualification & 7) + 1;
+
+ if (test_bit(4, &exit_qualification)) { /* string instruction */
+ unsigned long laddr;
+
+ __vmread(GUEST_LINEAR_ADDRESS, &laddr);
+ /*
+ * In protected mode, guest linear address is invalid if the
+ * selector is null.
+ */
+ if (!vm86 && check_for_null_selector(eip)) {
+ printf("String I/O with null selector (cs:eip=0x%lx:0x%lx)\n",
+ cs, eip);
+ laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
+ }
+ p->pdata_valid = 1;
+
+ p->u.data = laddr;
+ if (vmx_paging_enabled(d))
+ p->u.pdata = (void *) gva_to_gpa(p->u.data);
+ p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
+
+ if (test_bit(5, &exit_qualification)) /* "rep" prefix */
+ p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+
+ /*
+ * Split up string I/O operations that cross page boundaries. Don't
+ * advance %eip so that "rep insb" will restart at the next page.
+ */
+ if ((p->u.data & PAGE_MASK) !=
+ ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
+ VMX_DBG_LOG(DBG_LEVEL_2,
+ "String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n",
+ cs, eip);
+ if (p->u.data & (p->size - 1)) {
+ printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n",
+ cs, eip);
+ domain_crash_synchronous();
+ }
+ p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
+ } else {
+ __update_guest_eip(inst_len);
+ }
+ } else if (p->dir == IOREQ_WRITE) {
+ p->u.data = regs->eax;
+ __update_guest_eip(inst_len);
+ } else
+ __update_guest_eip(inst_len);
+
+ p->addr = addr;
+ p->port_mm = 0;
+
+ /* Check if the packet needs to be intercepted */
+ if (vmx_io_intercept(p)) {
+ /* no blocking & no evtchn notification */
+ return;
+ }
+
+ set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
+ p->state = STATE_IOREQ_READY;
+ evtchn_send(IOPACKET_PORT);
+ vmx_wait_io();
+}
+
+enum { COPY_IN = 0, COPY_OUT };
+
+static inline int
+vmx_copy(void *buf, unsigned long laddr, int size, int dir)
+{
+ char *addr;
+ unsigned long mfn;
+
+ if ( (size + (laddr & (PAGE_SIZE - 1))) >= PAGE_SIZE )
+ {
+ printf("vmx_copy exceeds page boundary\n");
+ return 0;
+ }
+
+ mfn = phys_to_machine_mapping(laddr >> PAGE_SHIFT);
+ addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
+
+ if (dir == COPY_IN)
+ memcpy(buf, addr, size);
+ else
+ memcpy(addr, buf, size);
+
+ unmap_domain_page(addr);
+ return 1;
+}
+
+int
+vmx_world_save(struct vcpu *d, struct vmx_assist_context *c)
+{
+ unsigned long inst_len;
+ int error = 0;
+
+ error |= __vmread(INSTRUCTION_LEN, &inst_len);
+ error |= __vmread(GUEST_RIP, &c->eip);
+ c->eip += inst_len; /* skip transition instruction */
+ error |= __vmread(GUEST_RSP, &c->esp);
+ error |= __vmread(GUEST_RFLAGS, &c->eflags);
+
+ error |= __vmread(CR0_READ_SHADOW, &c->cr0);
+ c->cr3 = d->arch.arch_vmx.cpu_cr3;
+ error |= __vmread(CR4_READ_SHADOW, &c->cr4);
+
+ error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
+ error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
+
+ error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
+ error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
+
+ error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
+ error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
+ error |= __vmread(GUEST_CS_BASE, &c->cs_base);
+ error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
+
+ error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
+ error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
+ error |= __vmread(GUEST_DS_BASE, &c->ds_base);
+ error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
+
+ error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
+ error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
+ error |= __vmread(GUEST_ES_BASE, &c->es_base);
+ error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
+
+ error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
+ error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
+ error |= __vmread(GUEST_SS_BASE, &c->ss_base);
+ error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
+
+ error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
+ error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
+ error |= __vmread(GUEST_FS_BASE, &c->fs_base);
+ error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
+
+ error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
+ error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
+ error |= __vmread(GUEST_GS_BASE, &c->gs_base);
+ error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
+
+ error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
+ error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
+ error |= __vmread(GUEST_TR_BASE, &c->tr_base);
+ error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
+
+ error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
+ error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
+ error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
+ error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
+
+ return !error;
+}
+
+int
+vmx_world_restore(struct vcpu *d, struct vmx_assist_context *c)
+{
+ unsigned long mfn, old_cr4;
+ int error = 0;
+
+ error |= __vmwrite(GUEST_RIP, c->eip);
+ error |= __vmwrite(GUEST_RSP, c->esp);
+ error |= __vmwrite(GUEST_RFLAGS, c->eflags);
+
+ error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
+
+ if (!vmx_paging_enabled(d)) {
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table));
+ goto skip_cr3;
+ }
+
+ if (c->cr3 == d->arch.arch_vmx.cpu_cr3) {
+ /*
+ * This is simple TLB flush, implying the guest has
+ * removed some translation or changed page attributes.
+ * We simply invalidate the shadow.
+ */
+ mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
+ if (mfn != pagetable_get_pfn(d->arch.guest_table)) {
+ printk("Invalid CR3 value=%lx", c->cr3);
+ domain_crash_synchronous();
+ return 0;
+ }
+ shadow_sync_all(d->domain);
+ } else {
+ /*
+ * If different, make a shadow. Check if the PDBR is valid
+ * first.
+ */
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %lx", c->cr3);
+ if ((c->cr3 >> PAGE_SHIFT) > d->domain->max_pages) {
+ printk("Invalid CR3 value=%lx", c->cr3);
+ domain_crash_synchronous();
+ return 0;
+ }
+ mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT);
+ d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+ update_pagetables(d);
+ /*
+ * arch.shadow_table should now hold the next CR3 for shadow
+ */
+ d->arch.arch_vmx.cpu_cr3 = c->cr3;
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", c->cr3);
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
+ }
+
+skip_cr3:
+
+ error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+ error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE));
+ error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
+
+ error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+ error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+ error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+ error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+ error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+ error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+ error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
+ error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+ error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+ error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
+ error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+ error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+ error |= __vmwrite(GUEST_ES_BASE, c->es_base);
+ error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+ error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+ error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
+ error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+ error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+ error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
+ error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+ error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+ error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
+ error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+ error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+ error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
+ error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
+
+ error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+ error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+ error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+ error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
+
+ return !error;
+}
+
+enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
+
+int
+vmx_assist(struct vcpu *d, int mode)
+{
+ struct vmx_assist_context c;
+ unsigned long magic, cp;
+
+ /* make sure vmxassist exists (this is not an error) */
+ if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
+ return 0;
+ if (magic != VMXASSIST_MAGIC)
+ return 0;
+
+ switch (mode) {
+ /*
+ * Transfer control to vmxassist.
+ * Store the current context in VMXASSIST_OLD_CONTEXT and load
+ * the new VMXASSIST_NEW_CONTEXT context. This context was created
+ * by vmxassist and will transfer control to it.
+ */
+ case VMX_ASSIST_INVOKE:
+ /* save the old context */
+ if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN))
+ goto error;
+ if (cp != 0) {
+ if (!vmx_world_save(d, &c))
+ goto error;
+ if (!vmx_copy(&c, cp, sizeof(c), COPY_OUT))
+ goto error;
+ }
+
+ /* restore the new context, this should activate vmxassist */
+ if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), COPY_IN))
+ goto error;
+ if (cp != 0) {
+ if (!vmx_copy(&c, cp, sizeof(c), COPY_IN))
+ goto error;
+ if (!vmx_world_restore(d, &c))
+ goto error;
+ return 1;
+ }
+ break;
+
+ /*
+ * Restore the VMXASSIST_OLD_CONTEXT that was saved by VMX_ASSIST_INVOKE
+ * above.
+ */
+ case VMX_ASSIST_RESTORE:
+ /* save the old context */
+ if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN))
+ goto error;
+ if (cp != 0) {
+ if (!vmx_copy(&c, cp, sizeof(c), COPY_IN))
+ goto error;
+ if (!vmx_world_restore(d, &c))
+ goto error;
+ return 1;
+ }
+ break;
+ }
+
+error:
+ printf("Failed to transfer to vmxassist\n");
+ domain_crash_synchronous();
+ return 0;
+}
+
+static int vmx_set_cr0(unsigned long value)
+{
+ struct vcpu *d = current;
+ unsigned long old_base_mfn, mfn;
+ unsigned long eip;
+ int paging_enabled;
+
+ /*
+ * CR0: We don't want to lose PE and PG.
+ */
+ paging_enabled = vmx_paging_enabled(d);
+ __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
+ __vmwrite(CR0_READ_SHADOW, value);
+
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
+
+ if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
+ /*
+ * The guest CR3 must be pointing to the guest physical.
+ */
+ if ( !VALID_MFN(mfn = phys_to_machine_mapping(
+ d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
+ !get_page(pfn_to_page(mfn), d->domain) )
+ {
+ printk("Invalid CR3 value = %lx", d->arch.arch_vmx.cpu_cr3);
+ domain_crash_synchronous(); /* need to take a clean path */
+ }
+ old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
+ if (old_base_mfn)
+ put_page(pfn_to_page(old_base_mfn));
+
+ /*
+ * Now arch.guest_table points to machine physical.
+ */
+ d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+ update_pagetables(d);
+
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
+ (unsigned long) (mfn << PAGE_SHIFT));
+
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
+ /*
+ * arch->shadow_table should hold the next CR3 for shadow
+ */
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
+ d->arch.arch_vmx.cpu_cr3, mfn);
+ }
+
+ /*
+ * VMX does not implement real-mode virtualization. We emulate
+ * real-mode by performing a world switch to VMXAssist whenever
+ * a partition disables the CR0.PE bit.
+ */
+ if ((value & X86_CR0_PE) == 0) {
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Disabling CR0.PE at %%eip 0x%lx\n", eip);
+ if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
+ set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &d->arch.arch_vmx.cpu_state);
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Transfering control to vmxassist %%eip 0x%lx\n", eip);
+ return 0; /* do not update eip! */
+ }
+ } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+ &d->arch.arch_vmx.cpu_state)) {
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Enabling CR0.PE at %%eip 0x%lx\n", eip);
+ if (vmx_assist(d, VMX_ASSIST_RESTORE)) {
+ clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+ &d->arch.arch_vmx.cpu_state);
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Restoring to %%eip 0x%lx\n", eip);
+ return 0; /* do not update eip! */
+ }
+ }
+
+ return 1;
+}
+
+#define CASE_GET_REG(REG, reg) \
+ case REG_ ## REG: value = regs->reg; break
+
+/*
+ * Write to control registers
+ */
+static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
+{
+ unsigned long value;
+ unsigned long old_cr;
+ struct vcpu *d = current;
+
+ switch (gp) {
+ CASE_GET_REG(EAX, eax);
+ CASE_GET_REG(ECX, ecx);
+ CASE_GET_REG(EDX, edx);
+ CASE_GET_REG(EBX, ebx);
+ CASE_GET_REG(EBP, ebp);
+ CASE_GET_REG(ESI, esi);
+ CASE_GET_REG(EDI, edi);
+ case REG_ESP:
+ __vmread(GUEST_RSP, &value);
+ break;
+ default:
+ printk("invalid gp: %d\n", gp);
+ __vmx_bug(regs);
+ }
+
+ VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
+ VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
+
+ switch(cr) {
+ case 0:
+ {
+ return vmx_set_cr0(value);
+ }
+ case 3:
+ {
+ unsigned long old_base_mfn, mfn;
+
+ /*
+ * If paging is not enabled yet, simply copy the value to CR3.
+ */
+ if (!vmx_paging_enabled(d)) {
+ d->arch.arch_vmx.cpu_cr3 = value;
+ break;
+ }
+
+ /*
+ * We make a new one if the shadow does not exist.
+ */
+ if (value == d->arch.arch_vmx.cpu_cr3) {
+ /*
+ * This is simple TLB flush, implying the guest has
+ * removed some translation or changed page attributes.
+ * We simply invalidate the shadow.
+ */
+ mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
+ if (mfn != pagetable_get_pfn(d->arch.guest_table))
+ __vmx_bug(regs);
+ shadow_sync_all(d->domain);
+ } else {
+ /*
+ * If different, make a shadow. Check if the PDBR is valid
+ * first.
+ */
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
+ if ( ((value >> PAGE_SHIFT) > d->domain->max_pages ) ||
+ !VALID_MFN(mfn = phys_to_machine_mapping(value >> PAGE_SHIFT)) ||
+ !get_page(pfn_to_page(mfn), d->domain) )
+ {
+ printk("Invalid CR3 value=%lx", value);
+ domain_crash_synchronous(); /* need to take a clean path */
+ }
+ old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
+ d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+ if (old_base_mfn)
+ put_page(pfn_to_page(old_base_mfn));
+ update_pagetables(d);
+ /*
+ * arch.shadow_table should now hold the next CR3 for shadow
+ */
+ d->arch.arch_vmx.cpu_cr3 = value;
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
+ value);
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
+ }
+ break;
+ }
+ case 4:
+ /* CR4 */
+ if (value & X86_CR4_PAE)
+ __vmx_bug(regs); /* not implemented */
+ __vmread(CR4_READ_SHADOW, &old_cr);
+
+ __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
+ __vmwrite(CR4_READ_SHADOW, value);
+
+ /*
+ * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
+ * all TLB entries except global entries.
+ */
+ if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
+ vmx_shadow_clear_state(d->domain);
+ shadow_sync_all(d->domain);
+ }
+ break;
+ default:
+ printk("invalid cr: %d\n", gp);
+ __vmx_bug(regs);
+ }
+
+ return 1;
+}
+
+#define CASE_SET_REG(REG, reg) \
+ case REG_ ## REG: \
+ regs->reg = value; \
+ break
+
+/*
+ * Read from control registers. CR0 and CR4 are read from the shadow.
+ */
+static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+ unsigned long value;
+ struct vcpu *d = current;
+
+ if (cr != 3)
+ __vmx_bug(regs);
+
+ value = (unsigned long) d->arch.arch_vmx.cpu_cr3;
+
+ switch (gp) {
+ CASE_SET_REG(EAX, eax);
+ CASE_SET_REG(ECX, ecx);
+ CASE_SET_REG(EDX, edx);
+ CASE_SET_REG(EBX, ebx);
+ CASE_SET_REG(EBP, ebp);
+ CASE_SET_REG(ESI, esi);
+ CASE_SET_REG(EDI, edi);
+ case REG_ESP:
+ __vmwrite(GUEST_RSP, value);
+ regs->esp = value;
+ break;
+ default:
+ printk("invalid gp: %d\n", gp);
+ __vmx_bug(regs);
+ }
+
+ VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
+}
+
+static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs *regs)
+{
+ unsigned int gp, cr;
+ unsigned long value;
+
+ switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
+ case TYPE_MOV_TO_CR:
+ gp = exit_qualification & CONTROL_REG_ACCESS_REG;
+ cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+ return mov_to_cr(gp, cr, regs);
+ case TYPE_MOV_FROM_CR:
+ gp = exit_qualification & CONTROL_REG_ACCESS_REG;
+ cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+ mov_from_cr(cr, gp, regs);
+ break;
+ case TYPE_CLTS:
+ clts();
+ setup_fpu(current);
+
+ __vmread(GUEST_CR0, &value);
+ value &= ~X86_CR0_TS; /* clear TS */
+ __vmwrite(GUEST_CR0, value);
+
+ __vmread(CR0_READ_SHADOW, &value);
+ value &= ~X86_CR0_TS; /* clear TS */
+ __vmwrite(CR0_READ_SHADOW, value);
+ break;
+ case TYPE_LMSW:
+ __vmread(CR0_READ_SHADOW, &value);
+ value = (value & ~0xF) |
+ (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
+ return vmx_set_cr0(value);
+ break;
+ default:
+ __vmx_bug(regs);
+ break;
+ }
+ return 1;
+}
+
+static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
+{
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
+ (unsigned long)regs->ecx, (unsigned long)regs->eax,
+ (unsigned long)regs->edx);
+
+ rdmsr(regs->ecx, regs->eax, regs->edx);
+
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
+ "ecx=%lx, eax=%lx, edx=%lx",
+ (unsigned long)regs->ecx, (unsigned long)regs->eax,
+ (unsigned long)regs->edx);
+}
+
+/*
+ * Need to use this exit to reschedule
+ */
+static inline void vmx_vmexit_do_hlt(void)
+{
+#if VMX_DEBUG
+ unsigned long eip;
+ __vmread(GUEST_RIP, &eip);
+#endif
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%lx", eip);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+}
+
+static inline void vmx_vmexit_do_mwait(void)
+{
+#if VMX_DEBUG
+ unsigned long eip;
+ __vmread(GUEST_RIP, &eip);
+#endif
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%lx", eip);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+}
+
+#define BUF_SIZ 256
+#define MAX_LINE 80
+char print_buf[BUF_SIZ];
+static int index;
+
+static void vmx_print_line(const char c, struct vcpu *d)
+{
+
+ if (index == MAX_LINE || c == '\n') {
+ if (index == MAX_LINE) {
+ print_buf[index++] = c;
+ }
+ print_buf[index] = '\0';
+ printk("(GUEST: %u) %s\n", d->domain->domain_id, (char *) &print_buf);
+ index = 0;
+ }
+ else
+ print_buf[index++] = c;
+}
+
+void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
+{
+ __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
+ __vmread(GUEST_RSP, &ctxt->esp);
+ __vmread(GUEST_RFLAGS, &ctxt->eflags);
+ __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
+ __vmread(GUEST_RIP, &ctxt->eip);
+
+ __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
+ __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
+ __vmread(GUEST_ES_SELECTOR, &ctxt->es);
+ __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
+}
+
+#ifdef XEN_DEBUGGER
+void save_cpu_user_regs(struct cpu_user_regs *regs)
+{
+ __vmread(GUEST_SS_SELECTOR, &regs->xss);
+ __vmread(GUEST_RSP, &regs->esp);
+ __vmread(GUEST_RFLAGS, &regs->eflags);
+ __vmread(GUEST_CS_SELECTOR, &regs->xcs);
+ __vmread(GUEST_RIP, &regs->eip);
+
+ __vmread(GUEST_GS_SELECTOR, &regs->xgs);
+ __vmread(GUEST_FS_SELECTOR, &regs->xfs);
+ __vmread(GUEST_ES_SELECTOR, &regs->xes);
+ __vmread(GUEST_DS_SELECTOR, &regs->xds);
+}
+
+void restore_cpu_user_regs(struct cpu_user_regs *regs)
+{
+ __vmwrite(GUEST_SS_SELECTOR, regs->xss);
+ __vmwrite(GUEST_RSP, regs->esp);
+ __vmwrite(GUEST_RFLAGS, regs->eflags);
+ __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
+ __vmwrite(GUEST_RIP, regs->eip);
+
+ __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
+ __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
+ __vmwrite(GUEST_ES_SELECTOR, regs->xes);
+ __vmwrite(GUEST_DS_SELECTOR, regs->xds);
+}
+#endif
+
+asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
+{
+ unsigned int exit_reason, idtv_info_field;
+ unsigned long exit_qualification, eip, inst_len = 0;
+ struct vcpu *v = current;
+ int error;
+
+ if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
+ __vmx_bug(&regs);
+
+ perfc_incra(vmexits, exit_reason);
+
+ __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
+ if (idtv_info_field & INTR_INFO_VALID_MASK) {
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+ if ((idtv_info_field & 0xff) == 14) {
+ unsigned long error_code;
+
+ __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
+ printk("#PG error code: %lx\n", error_code);
+ }
+ VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x",
+ idtv_info_field);
+ }
+
+ /* don't bother H/W interrutps */
+ if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
+ exit_reason != EXIT_REASON_VMCALL &&
+ exit_reason != EXIT_REASON_IO_INSTRUCTION)
+ VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
+
+ if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+ printk("Failed vm entry\n");
+ domain_crash_synchronous();
+ return;
+ }
+
+ __vmread(GUEST_RIP, &eip);
+ TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+
+ switch (exit_reason) {
+ case EXIT_REASON_EXCEPTION_NMI:
+ {
+ /*
+ * We don't set the software-interrupt exiting (INT n).
+ * (1) We can get an exception (e.g. #PG) in the guest, or
+ * (2) NMI
+ */
+ int error;
+ unsigned int vector;
+ unsigned long va;
+
+ if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
+ || !(vector & INTR_INFO_VALID_MASK))
+ __vmx_bug(&regs);
+ vector &= 0xff;
+
+ perfc_incra(cause_vector, vector);
+
+ TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
+ switch (vector) {
+#ifdef XEN_DEBUGGER
+ case TRAP_debug:
+ {
+ save_cpu_user_regs(&regs);
+ pdb_handle_exception(1, &regs, 1);
+ restore_cpu_user_regs(&regs);
+ break;
+ }
+ case TRAP_int3:
+ {
+ save_cpu_user_regs(&regs);
+ pdb_handle_exception(3, &regs, 1);
+ restore_cpu_user_regs(&regs);
+ break;
+ }
+#else
+ case TRAP_debug:
+ {
+ void store_cpu_user_regs(struct cpu_user_regs *regs);
+ long do_sched_op(unsigned long op);
+
+
+ store_cpu_user_regs(&regs);
+ __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
+
+ set_bit(_VCPUF_ctrl_pause, &current->vcpu_flags);
+ do_sched_op(SCHEDOP_yield);
+
+ break;
+ }
+#endif
+ case TRAP_no_device:
+ {
+ vmx_do_no_device_fault();
+ break;
+ }
+ case TRAP_gp_fault:
+ {
+ vmx_do_general_protection_fault(&regs);
+ break;
+ }
+ case TRAP_page_fault:
+ {
+ __vmread(EXIT_QUALIFICATION, &va);
+ __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
+ VMX_DBG_LOG(DBG_LEVEL_VMMU,
+ "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+ (unsigned long)regs.eax, (unsigned long)regs.ebx,
+ (unsigned long)regs.ecx, (unsigned long)regs.edx,
+ (unsigned long)regs.esi, (unsigned long)regs.edi);
+ v->arch.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
+
+ if (!(error = vmx_do_page_fault(va, &regs))) {
+ /*
+ * Inject #PG using Interruption-Information Fields
+ */
+ unsigned long intr_fields;
+
+ intr_fields = (INTR_INFO_VALID_MASK |
+ INTR_TYPE_EXCEPTION |
+ INTR_INFO_DELIEVER_CODE_MASK |
+ TRAP_page_fault);
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, regs.error_code);
+ v->arch.arch_vmx.cpu_cr2 = va;
+ TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, va);
+ }
+ break;
+ }
+ case TRAP_nmi:
+ do_nmi(&regs, 0);
+ break;
+ default:
+ printk("unexpected VMexit for exception vector 0x%x\n", vector);
+ //__vmx_bug(&regs);
+ break;
+ }
+ break;
+ }
+ case EXIT_REASON_EXTERNAL_INTERRUPT:
+ {
+ extern asmlinkage void do_IRQ(struct cpu_user_regs *);
+ extern void smp_apic_timer_interrupt(struct cpu_user_regs *);
+ extern void timer_interrupt(int, void *, struct cpu_user_regs *);
+ unsigned int vector;
+
+ if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
+ && !(vector & INTR_INFO_VALID_MASK))
+ __vmx_bug(&regs);
+
+ vector &= 0xff;
+ local_irq_disable();
+
+ if (vector == LOCAL_TIMER_VECTOR) {
+ smp_apic_timer_interrupt(&regs);
+ } else {
+ regs.entry_vector = vector;
+ do_IRQ(&regs);
+ }
+ break;
+ }
+ case EXIT_REASON_PENDING_INTERRUPT:
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ MONITOR_CPU_BASED_EXEC_CONTROLS);
+ break;
+ case EXIT_REASON_TASK_SWITCH:
+ __vmx_bug(&regs);
+ break;
+ case EXIT_REASON_CPUID:
+ __get_instruction_length(inst_len);
+ vmx_vmexit_do_cpuid(regs.eax, &regs);
+ __update_guest_eip(inst_len);
+ break;
+ case EXIT_REASON_HLT:
+ __get_instruction_length(inst_len);
+ __update_guest_eip(inst_len);
+ vmx_vmexit_do_hlt();
+ break;
+ case EXIT_REASON_INVLPG:
+ {
+ unsigned long va;
+
+ __vmread(EXIT_QUALIFICATION, &va);
+ vmx_vmexit_do_invlpg(va);
+ __get_instruction_length(inst_len);
+ __update_guest_eip(inst_len);
+ break;
+ }
+ case EXIT_REASON_VMCALL:
+ __get_instruction_length(inst_len);
+ __vmread(GUEST_RIP, &eip);
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+ vmx_print_line(regs.eax, v); /* provides the current domain */
+ __update_guest_eip(inst_len);
+ break;
+ case EXIT_REASON_CR_ACCESS:
+ {
+ __vmread(GUEST_RIP, &eip);
+ __get_instruction_length(inst_len);
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+ VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
+ eip, inst_len, exit_qualification);
+ if (vmx_cr_access(exit_qualification, &regs))
+ __update_guest_eip(inst_len);
+ break;
+ }
+ case EXIT_REASON_DR_ACCESS:
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+ vmx_dr_access(exit_qualification, &regs);
+ __get_instruction_length(inst_len);
+ __update_guest_eip(inst_len);
+ break;
+ case EXIT_REASON_IO_INSTRUCTION:
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+ __get_instruction_length(inst_len);
+ vmx_io_instruction(&regs, exit_qualification, inst_len);
+ break;
+ case EXIT_REASON_MSR_READ:
+ __get_instruction_length(inst_len);
+ vmx_do_msr_read(&regs);
+ __update_guest_eip(inst_len);
+ break;
+ case EXIT_REASON_MSR_WRITE:
+ __vmread(GUEST_RIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%lx, eax=%lx, edx=%lx",
+ eip, (unsigned long)regs.eax, (unsigned long)regs.edx);
+ /* just ignore this point */
+ __get_instruction_length(inst_len);
+ __update_guest_eip(inst_len);
+ break;
+ case EXIT_REASON_MWAIT_INSTRUCTION:
+ __get_instruction_length(inst_len);
+ __update_guest_eip(inst_len);
+ vmx_vmexit_do_mwait();
+ break;
+ default:
+ __vmx_bug(&regs); /* should not happen */
+ }
+
+ vmx_intr_assist(v);
+ return;
+}
+
+asmlinkage void load_cr2(void)
+{
+ struct vcpu *d = current;
+
+ local_irq_disable();
+#ifdef __i386__
+ asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
+#else
+ asm volatile("movq %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
+#endif
+
+}
+
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/vmx_intercept.c b/xen/arch/x86/vmx_intercept.c
new file mode 100644
index 0000000000..cd7e464904
--- /dev/null
+++ b/xen/arch/x86/vmx_intercept.c
@@ -0,0 +1,264 @@
+/*
+ * vmx_intercept.c: Handle performance critical I/O packets in hypervisor space
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <asm/vmx.h>
+#include <asm/vmx_platform.h>
+#include <asm/vmx_virpit.h>
+#include <asm/vmx_intercept.h>
+#include <public/io/ioreq.h>
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#ifdef CONFIG_VMX
+
+/* for intercepting io request after vm_exit, return value: 0--not handle; 1--handled */
+int vmx_io_intercept(ioreq_t *p)
+{
+ struct vcpu *d = current;
+ struct vmx_handler_t *handler = &(d->arch.arch_vmx.vmx_platform.vmx_handler);
+ int i;
+ unsigned long addr, offset;
+ for (i = 0; i < handler->num_slot; i++) {
+ addr = handler->hdl_list[i].addr;
+ offset = handler->hdl_list[i].offset;
+ if (p->addr >= addr &&
+ p->addr < addr + offset)
+ return handler->hdl_list[i].action(p);
+ }
+ return 0;
+}
+
+int register_io_handler(unsigned long addr, unsigned long offset, intercept_action_t action)
+{
+ struct vcpu *d = current;
+ struct vmx_handler_t *handler = &(d->arch.arch_vmx.vmx_platform.vmx_handler);
+ int num = handler->num_slot;
+
+ if (num >= MAX_IO_HANDLER) {
+ printk("no extra space, register io interceptor failed!\n");
+ domain_crash_synchronous();
+ }
+
+ handler->hdl_list[num].addr = addr;
+ handler->hdl_list[num].offset = offset;
+ handler->hdl_list[num].action = action;
+ handler->num_slot++;
+ return 1;
+
+}
+
+static void pit_cal_count(struct vmx_virpit_t *vpit)
+{
+ unsigned int usec_delta = (unsigned int)((NOW() - vpit->inject_point) / 1000);
+ if (usec_delta > vpit->period * 1000)
+ VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:long time has passed from last injection!");
+ vpit->count = vpit->init_val - ((usec_delta * PIT_FREQ / 1000000) % vpit->init_val );
+}
+
+static void pit_latch_io(struct vmx_virpit_t *vpit)
+{
+ pit_cal_count(vpit);
+
+ switch(vpit->read_state) {
+ case MSByte:
+ vpit->count_MSB_latched=1;
+ break;
+ case LSByte:
+ vpit->count_LSB_latched=1;
+ break;
+ case LSByte_multiple:
+ vpit->count_LSB_latched=1;
+ vpit->count_MSB_latched=1;
+ break;
+ case MSByte_multiple:
+ VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:latch PIT counter before MSB_multiple!");
+ vpit->read_state=LSByte_multiple;
+ vpit->count_LSB_latched=1;
+ vpit->count_MSB_latched=1;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static int pit_read_io(struct vmx_virpit_t *vpit)
+{
+ if(vpit->count_LSB_latched) {
+ /* Read Least Significant Byte */
+ if(vpit->read_state==LSByte_multiple) {
+ vpit->read_state=MSByte_multiple;
+ }
+ vpit->count_LSB_latched=0;
+ return (vpit->count & 0xFF);
+ } else if(vpit->count_MSB_latched) {
+ /* Read Most Significant Byte */
+ if(vpit->read_state==MSByte_multiple) {
+ vpit->read_state=LSByte_multiple;
+ }
+ vpit->count_MSB_latched=0;
+ return ((vpit->count>>8) & 0xFF);
+ } else {
+ /* Unlatched Count Read */
+ VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: unlatched read");
+ pit_cal_count(vpit);
+ if(!(vpit->read_state & 0x1)) {
+ /* Read Least Significant Byte */
+ if(vpit->read_state==LSByte_multiple) {
+ vpit->read_state=MSByte_multiple;
+ }
+ return (vpit->count & 0xFF);
+ } else {
+ /* Read Most Significant Byte */
+ if(vpit->read_state==MSByte_multiple) {
+ vpit->read_state=LSByte_multiple;
+ }
+ return ((vpit->count>>8) & 0xFF);
+ }
+ }
+}
+
+/* vmx_io_assist light-weight version, specific to PIT DM */
+static void resume_pit_io(ioreq_t *p)
+{
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ unsigned long old_eax = regs->eax;
+ p->state = STATE_INVALID;
+
+ switch(p->size) {
+ case 1:
+ regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+ break;
+ case 2:
+ regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+ break;
+ case 4:
+ regs->eax = (p->u.data & 0xffffffff);
+ break;
+ default:
+ BUG();
+ }
+}
+
+/* the intercept action for PIT DM retval:0--not handled; 1--handled */
+int intercept_pit_io(ioreq_t *p)
+{
+ struct vcpu *d = current;
+ struct vmx_virpit_t *vpit = &(d->arch.arch_vmx.vmx_platform.vmx_pit);
+
+ if (p->size != 1 ||
+ p->pdata_valid ||
+ p->port_mm)
+ return 0;
+
+ if (p->addr == 0x43 &&
+ p->dir == 0 && /* write */
+ ((p->u.data >> 4) & 0x3) == 0 && /* latch command */
+ ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
+ pit_latch_io(vpit);
+ return 1;
+ }
+
+ if (p->addr == (0x40 + vpit->channel) &&
+ p->dir == 1) { /* read */
+ p->u.data = pit_read_io(vpit);
+ resume_pit_io(p);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* hooks function for the PIT initialization response iopacket */
+static void pit_timer_fn(void *data)
+{
+ struct vmx_virpit_t *vpit = data;
+
+ /* Set the pending intr bit, and send evtchn notification to myself. */
+ if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
+ vpit->pending_intr_nr++; /* already set, then count the pending intr */
+
+ set_ac_timer(&vpit->pit_timer, NOW() + MILLISECS(vpit->period));
+}
+
+
+/* Only some PIT operations such as load init counter need a hypervisor hook.
+ * leave all other operations in user space DM
+ */
+void vmx_hooks_assist(struct vcpu *d)
+{
+ vcpu_iodata_t *vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
+ ioreq_t *p = &vio->vp_ioreq;
+ unsigned long *intr = &(vio->vp_intr[0]);
+ struct vmx_virpit_t *vpit = &(d->arch.arch_vmx.vmx_platform.vmx_pit);
+ int rw_mode;
+
+ /* load init count*/
+ if (p->state == STATE_IORESP_HOOK) {
+ /* init count for this channel */
+ vpit->init_val = (p->u.data & 0xFFFF) ;
+ /* frequency(ms) of pit */
+ vpit->period = DIV_ROUND(((vpit->init_val) * 1000), PIT_FREQ);
+ if (vpit->period < 1) {
+ printk("VMX_PIT: guest programmed too small an init_val: %x\n",
+ vpit->init_val);
+ vpit->period = 1;
+ }
+ vpit->vector = ((p->u.data >> 16) & 0xFF);
+ vpit->channel = ((p->u.data >> 24) & 0x3);
+ vpit->first_injected = 0;
+
+ vpit->count_LSB_latched = 0;
+ vpit->count_MSB_latched = 0;
+
+ rw_mode = ((p->u.data >> 26) & 0x3);
+ switch(rw_mode) {
+ case 0x1:
+ vpit->read_state=LSByte;
+ break;
+ case 0x2:
+ vpit->read_state=MSByte;
+ break;
+ case 0x3:
+ vpit->read_state=LSByte_multiple;
+ break;
+ default:
+ printk("VMX_PIT:wrong PIT rw_mode!\n");
+ break;
+ }
+
+ vpit->intr_bitmap = intr;
+
+ /* set up the actimer */
+ init_ac_timer(&vpit->pit_timer, pit_timer_fn, vpit, 0);
+ pit_timer_fn(vpit); /* timer seed */
+
+ /*restore the state*/
+ p->state = STATE_IORESP_READY;
+
+ /* register handler to intercept the PIT io when vm_exit */
+ register_io_handler(0x40, 4, intercept_pit_io);
+ }
+
+}
+
+#endif /* CONFIG_VMX */
diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c
new file mode 100644
index 0000000000..370c3bb5db
--- /dev/null
+++ b/xen/arch/x86/vmx_io.c
@@ -0,0 +1,508 @@
+/*
+ * vmx_io.c: handling I/O, interrupts related VMX entry/exit
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/trace.h>
+
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vmcs.h>
+#include <xen/event.h>
+#include <public/io/ioreq.h>
+#include <asm/vmx_platform.h>
+#include <asm/vmx_virpit.h>
+
+#ifdef CONFIG_VMX
+#if defined (__i386__)
+static void load_cpu_user_regs(struct cpu_user_regs *regs)
+{
+ /*
+ * Write the guest register value into VMCS
+ */
+ __vmwrite(GUEST_SS_SELECTOR, regs->ss);
+ __vmwrite(GUEST_RSP, regs->esp);
+ __vmwrite(GUEST_RFLAGS, regs->eflags);
+ __vmwrite(GUEST_CS_SELECTOR, regs->cs);
+ __vmwrite(GUEST_RIP, regs->eip);
+}
+
+static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value)
+{
+ switch (size) {
+ case BYTE:
+ switch (index) {
+ case 0:
+ regs->eax &= 0xFFFFFF00;
+ regs->eax |= (value & 0xFF);
+ break;
+ case 1:
+ regs->ecx &= 0xFFFFFF00;
+ regs->ecx |= (value & 0xFF);
+ break;
+ case 2:
+ regs->edx &= 0xFFFFFF00;
+ regs->edx |= (value & 0xFF);
+ break;
+ case 3:
+ regs->ebx &= 0xFFFFFF00;
+ regs->ebx |= (value & 0xFF);
+ break;
+ case 4:
+ regs->eax &= 0xFFFF00FF;
+ regs->eax |= ((value & 0xFF) << 8);
+ break;
+ case 5:
+ regs->ecx &= 0xFFFF00FF;
+ regs->ecx |= ((value & 0xFF) << 8);
+ break;
+ case 6:
+ regs->edx &= 0xFFFF00FF;
+ regs->edx |= ((value & 0xFF) << 8);
+ break;
+ case 7:
+ regs->ebx &= 0xFFFF00FF;
+ regs->ebx |= ((value & 0xFF) << 8);
+ break;
+ default:
+ printk("size:%x, index:%x are invalid!\n", size, index);
+ break;
+
+ }
+ break;
+ case WORD:
+ switch (index) {
+ case 0:
+ regs->eax &= 0xFFFF0000;
+ regs->eax |= (value & 0xFFFF);
+ break;
+ case 1:
+ regs->ecx &= 0xFFFF0000;
+ regs->ecx |= (value & 0xFFFF);
+ break;
+ case 2:
+ regs->edx &= 0xFFFF0000;
+ regs->edx |= (value & 0xFFFF);
+ break;
+ case 3:
+ regs->ebx &= 0xFFFF0000;
+ regs->ebx |= (value & 0xFFFF);
+ break;
+ case 4:
+ regs->esp &= 0xFFFF0000;
+ regs->esp |= (value & 0xFFFF);
+ break;
+
+ case 5:
+ regs->ebp &= 0xFFFF0000;
+ regs->ebp |= (value & 0xFFFF);
+ break;
+ case 6:
+ regs->esi &= 0xFFFF0000;
+ regs->esi |= (value & 0xFFFF);
+ break;
+ case 7:
+ regs->edi &= 0xFFFF0000;
+ regs->edi |= (value & 0xFFFF);
+ break;
+ default:
+ printk("size:%x, index:%x are invalid!\n", size, index);
+ break;
+ }
+ break;
+ case LONG:
+ switch (index) {
+ case 0:
+ regs->eax = value;
+ break;
+ case 1:
+ regs->ecx = value;
+ break;
+ case 2:
+ regs->edx = value;
+ break;
+ case 3:
+ regs->ebx = value;
+ break;
+ case 4:
+ regs->esp = value;
+ break;
+ case 5:
+ regs->ebp = value;
+ break;
+ case 6:
+ regs->esi = value;
+ break;
+ case 7:
+ regs->edi = value;
+ break;
+ default:
+ printk("size:%x, index:%x are invalid!\n", size, index);
+ break;
+ }
+ break;
+ default:
+ printk("size:%x, index:%x are invalid!\n", size, index);
+ break;
+ }
+}
+#else
+static void load_cpu_user_regs(struct cpu_user_regs *regs)
+{
+ /* XXX: TBD */
+ return;
+}
+static void set_reg_value (int size, int index, int seg, struct cpu_user_regs *regs, long value)
+{
+ /* XXX: TBD */
+ return;
+}
+#endif
+
+void vmx_io_assist(struct vcpu *v)
+{
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+ unsigned long old_eax;
+ int sign;
+ struct mi_per_cpu_info *mpci_p;
+ struct cpu_user_regs *inst_decoder_regs;
+
+ mpci_p = &v->arch.arch_vmx.vmx_platform.mpci;
+ inst_decoder_regs = mpci_p->inst_decoder_regs;
+
+ vio = (vcpu_iodata_t *) v->arch.arch_vmx.vmx_platform.shared_page_va;
+ if (vio == 0) {
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "bad shared page: %lx", (unsigned long) vio);
+ domain_crash_synchronous();
+ }
+ p = &vio->vp_ioreq;
+
+ if (p->state == STATE_IORESP_HOOK){
+ vmx_hooks_assist(v);
+ }
+
+ /* clear IO wait VMX flag */
+ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+ if (p->state != STATE_IORESP_READY) {
+ /* An interrupt send event raced us */
+ return;
+ } else {
+ p->state = STATE_INVALID;
+ }
+ clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+ } else {
+ return;
+ }
+
+ sign = (p->df) ? -1 : 1;
+ if (p->port_mm) {
+ if (p->pdata_valid) {
+ regs->esi += sign * p->count * p->size;
+ regs->edi += sign * p->count * p->size;
+ } else {
+ if (p->dir == IOREQ_WRITE) {
+ return;
+ }
+ int size = -1, index = -1;
+
+ size = operand_size(v->arch.arch_vmx.vmx_platform.mpci.mmio_target);
+ index = operand_index(v->arch.arch_vmx.vmx_platform.mpci.mmio_target);
+
+ if (v->arch.arch_vmx.vmx_platform.mpci.mmio_target & WZEROEXTEND) {
+ p->u.data = p->u.data & 0xffff;
+ }
+ set_reg_value(size, index, 0, regs, p->u.data);
+
+ }
+ load_cpu_user_regs(regs);
+ return;
+ }
+
+ if (p->dir == IOREQ_WRITE) {
+ if (p->pdata_valid) {
+ regs->esi += sign * p->count * p->size;
+ regs->ecx -= p->count;
+ }
+ return;
+ } else {
+ if (p->pdata_valid) {
+ regs->edi += sign * p->count * p->size;
+ regs->ecx -= p->count;
+ return;
+ }
+ }
+
+ old_eax = regs->eax;
+
+ switch(p->size) {
+ case 1:
+ regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+ break;
+ case 2:
+ regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+ break;
+ case 4:
+ regs->eax = (p->u.data & 0xffffffff);
+ break;
+ default:
+ BUG();
+ }
+}
+
+int vmx_clear_pending_io_event(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ /* evtchn_pending is shared by other event channels in 0-31 range */
+ if (!d->shared_info->evtchn_pending[IOPACKET_PORT>>5])
+ clear_bit(IOPACKET_PORT>>5, &v->vcpu_info->evtchn_pending_sel);
+
+ /* Note: VMX domains may need upcalls as well */
+ if (!v->vcpu_info->evtchn_pending_sel)
+ clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+
+ /* clear the pending bit for IOPACKET_PORT */
+ return test_and_clear_bit(IOPACKET_PORT,
+ &d->shared_info->evtchn_pending[0]);
+}
+
+/* Because we've cleared the pending events first, we need to guarantee that
+ * all events to be handled by xen for VMX domains are taken care of here.
+ *
+ * interrupts are guaranteed to be checked before resuming guest.
+ * VMX upcalls have been already arranged for if necessary.
+ */
+void vmx_check_events(struct vcpu *d)
+{
+ /* clear the event *before* checking for work. This should avoid
+ the set-and-check races */
+ if (vmx_clear_pending_io_event(current))
+ vmx_io_assist(d);
+}
+
+/* On exit from vmx_wait_io, we're guaranteed to have a I/O response from
+ the device model */
+void vmx_wait_io()
+{
+ extern void do_block();
+
+ do {
+ if(!test_bit(IOPACKET_PORT,
+ &current->domain->shared_info->evtchn_pending[0]))
+ do_block();
+ vmx_check_events(current);
+ if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
+ break;
+ /* Events other than IOPACKET_PORT might have woken us up. In that
+ case, safely go back to sleep. */
+ clear_bit(IOPACKET_PORT>>5, &current->vcpu_info->evtchn_pending_sel);
+ clear_bit(0, &current->vcpu_info->evtchn_upcall_pending);
+ } while(1);
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+static inline int __fls(u32 word)
+{
+ int bit;
+
+ __asm__("bsrl %1,%0"
+ :"=r" (bit)
+ :"rm" (word));
+ return word ? bit : -1;
+}
+#else
+#define __fls(x) generic_fls(x)
+static __inline__ int generic_fls(u32 x)
+{
+ int r = 31;
+
+ if (!x)
+ return -1;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+#endif
+
+/* Simple minded Local APIC priority implementation. Fix later */
+static __inline__ int find_highest_irq(u32 *pintr)
+{
+ if (pintr[7])
+ return __fls(pintr[7]) + (256-32*1);
+ if (pintr[6])
+ return __fls(pintr[6]) + (256-32*2);
+ if (pintr[5])
+ return __fls(pintr[5]) + (256-32*3);
+ if (pintr[4])
+ return __fls(pintr[4]) + (256-32*4);
+ if (pintr[3])
+ return __fls(pintr[3]) + (256-32*5);
+ if (pintr[2])
+ return __fls(pintr[2]) + (256-32*6);
+ if (pintr[1])
+ return __fls(pintr[1]) + (256-32*7);
+ return __fls(pintr[0]);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ * -1 when no pending.
+ */
+static inline int find_highest_pending_irq(struct vcpu *d)
+{
+ vcpu_iodata_t *vio;
+
+ vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
+ if (vio == 0) {
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "bad shared page: %lx", (unsigned long) vio);
+ domain_crash_synchronous();
+ }
+
+ return find_highest_irq((unsigned int *)&vio->vp_intr[0]);
+}
+
+static inline void clear_highest_bit(struct vcpu *d, int vector)
+{
+ vcpu_iodata_t *vio;
+
+ vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
+ if (vio == 0) {
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "bad shared page: %lx", (unsigned long) vio);
+ domain_crash_synchronous();
+ }
+
+ clear_bit(vector, &vio->vp_intr[0]);
+}
+
+static inline int irq_masked(unsigned long eflags)
+{
+ return ((eflags & X86_EFLAGS_IF) == 0);
+}
+
+void vmx_intr_assist(struct vcpu *d)
+{
+ int highest_vector = find_highest_pending_irq(d);
+ unsigned long intr_fields, eflags;
+ struct vmx_virpit_t *vpit = &(d->arch.arch_vmx.vmx_platform.vmx_pit);
+
+ if (highest_vector == -1)
+ return;
+
+ __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields);
+ if (intr_fields & INTR_INFO_VALID_MASK) {
+ VMX_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx",
+ intr_fields);
+ return;
+ }
+
+ __vmread(GUEST_RFLAGS, &eflags);
+ if (irq_masked(eflags)) {
+ VMX_DBG_LOG(DBG_LEVEL_1, "guesting pending: %x, eflags: %lx",
+ highest_vector, eflags);
+ return;
+ }
+
+ if (vpit->pending_intr_nr && highest_vector == vpit->vector)
+ vpit->pending_intr_nr--;
+ else
+ clear_highest_bit(d, highest_vector);
+
+ /* close the window between guest PIT initialization and sti */
+ if (highest_vector == vpit->vector && !vpit->first_injected){
+ vpit->first_injected = 1;
+ vpit->pending_intr_nr = 0;
+ }
+
+ intr_fields = (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | highest_vector);
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+
+ TRACE_3D(TRC_VMX_INT, d->domain->domain_id, highest_vector, 0);
+ if (highest_vector == vpit->vector)
+ vpit->inject_point = NOW();
+
+ return;
+}
+
+void vmx_do_resume(struct vcpu *d)
+{
+ vmx_stts();
+ if ( vmx_paging_enabled(d) )
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
+ else
+ // paging is not enabled in the guest
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table));
+
+ __vmwrite(HOST_CR3, pagetable_get_paddr(d->arch.monitor_table));
+ __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
+
+ if (event_pending(d)) {
+ vmx_check_events(d);
+
+ if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags))
+ vmx_wait_io();
+ }
+
+ /* We can't resume the guest if we're waiting on I/O */
+ ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags));
+
+ /* We always check for interrupts before resuming guest */
+ vmx_intr_assist(d);
+}
+
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/vmx_platform.c b/xen/arch/x86/vmx_platform.c
new file mode 100644
index 0000000000..339f4da59b
--- /dev/null
+++ b/xen/arch/x86/vmx_platform.c
@@ -0,0 +1,674 @@
+/*
+ * vmx_platform.c: handling x86 platform related MMIO instructions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <asm/shadow.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/trace.h>
+#include <asm/vmx.h>
+#include <asm/vmx_platform.h>
+#include <public/io/ioreq.h>
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#ifdef CONFIG_VMX
+
+#define DECODE_success 1
+#define DECODE_failure 0
+
+#if defined (__x86_64__)
+void store_cpu_user_regs(struct cpu_user_regs *regs)
+{
+
+}
+
+static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{
+ return 0;
+}
+#elif defined (__i386__)
+void store_cpu_user_regs(struct cpu_user_regs *regs)
+{
+ __vmread(GUEST_SS_SELECTOR, &regs->ss);
+ __vmread(GUEST_RSP, &regs->esp);
+ __vmread(GUEST_RFLAGS, &regs->eflags);
+ __vmread(GUEST_CS_SELECTOR, &regs->cs);
+ __vmread(GUEST_DS_SELECTOR, &regs->ds);
+ __vmread(GUEST_ES_SELECTOR, &regs->es);
+ __vmread(GUEST_RIP, &regs->eip);
+}
+
+static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{
+ /*
+ * Reference the db_reg[] table
+ */
+ switch (size) {
+ case BYTE:
+ switch (index) {
+ case 0: //%al
+ return (char)(regs->eax & 0xFF);
+ case 1: //%cl
+ return (char)(regs->ecx & 0xFF);
+ case 2: //%dl
+ return (char)(regs->edx & 0xFF);
+ case 3: //%bl
+ return (char)(regs->ebx & 0xFF);
+ case 4: //%ah
+ return (char)((regs->eax & 0xFF00) >> 8);
+ case 5: //%ch
+ return (char)((regs->ecx & 0xFF00) >> 8);
+ case 6: //%dh
+ return (char)((regs->edx & 0xFF00) >> 8);
+ case 7: //%bh
+ return (char)((regs->ebx & 0xFF00) >> 8);
+ default:
+ printk("(get_reg_value)size case 0 error\n");
+ return -1;
+ }
+ case WORD:
+ switch (index) {
+ case 0: //%ax
+ return (short)(regs->eax & 0xFFFF);
+ case 1: //%cx
+ return (short)(regs->ecx & 0xFFFF);
+ case 2: //%dx
+ return (short)(regs->edx & 0xFFFF);
+ case 3: //%bx
+ return (short)(regs->ebx & 0xFFFF);
+ case 4: //%sp
+ return (short)(regs->esp & 0xFFFF);
+ break;
+ case 5: //%bp
+ return (short)(regs->ebp & 0xFFFF);
+ case 6: //%si
+ return (short)(regs->esi & 0xFFFF);
+ case 7: //%di
+ return (short)(regs->edi & 0xFFFF);
+ default:
+ printk("(get_reg_value)size case 1 error\n");
+ return -1;
+ }
+ case LONG:
+ switch (index) {
+ case 0: //%eax
+ return regs->eax;
+ case 1: //%ecx
+ return regs->ecx;
+ case 2: //%edx
+ return regs->edx;
+
+ case 3: //%ebx
+ return regs->ebx;
+ case 4: //%esp
+ return regs->esp;
+ case 5: //%ebp
+ return regs->ebp;
+ case 6: //%esi
+ return regs->esi;
+ case 7: //%edi
+ return regs->edi;
+ default:
+ printk("(get_reg_value)size case 2 error\n");
+ return -1;
+ }
+ default:
+ printk("(get_reg_value)size case error\n");
+ return -1;
+ }
+}
+#endif
+
+static inline unsigned char *check_prefix(unsigned char *inst, struct instruction *thread_inst)
+{
+ while (1) {
+ switch (*inst) {
+ case 0xf3: //REPZ
+ thread_inst->flags = REPZ;
+ break;
+ case 0xf2: //REPNZ
+ thread_inst->flags = REPNZ;
+ break;
+ case 0xf0: //LOCK
+ break;
+ case 0x2e: //CS
+ case 0x36: //SS
+ case 0x3e: //DS
+ case 0x26: //ES
+ case 0x64: //FS
+ case 0x65: //GS
+ thread_inst->seg_sel = *inst;
+ break;
+ case 0x66: //32bit->16bit
+ thread_inst->op_size = WORD;
+ break;
+ case 0x67:
+ printf("Not handling 0x67 (yet)\n");
+ domain_crash_synchronous();
+ break;
+ default:
+ return inst;
+ }
+ inst++;
+ }
+}
+
+static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size)
+{
+ int mod, reg, rm;
+ unsigned long val = 0;
+ int i;
+
+ mod = (*inst >> 6) & 3;
+ reg = (*inst >> 3) & 7;
+ rm = *inst & 7;
+
+ inst++; //skip ModR/M byte
+ if (mod != 3 && rm == 4) {
+ inst++; //skip SIB byte
+ }
+
+ switch(mod) {
+ case 0:
+ if (rm == 5) {
+ if (op16)
+ inst = inst + 2; //disp16, skip 2 bytes
+ else
+ inst = inst + 4; //disp32, skip 4 bytes
+ }
+ break;
+ case 1:
+ inst++; //disp8, skip 1 byte
+ break;
+ case 2:
+ if (op16)
+ inst = inst + 2; //disp16, skip 2 bytes
+ else
+ inst = inst + 4; //disp32, skip 4 bytes
+ break;
+ }
+ for (i = 0; i < op_size; i++) {
+ val |= (*inst++ & 0xff) << (8 * i);
+ }
+
+ return val;
+}
+
+static inline int get_index(const unsigned char *inst)
+{
+ int mod, reg, rm;
+
+ mod = (*inst >> 6) & 3;
+ reg = (*inst >> 3) & 7;
+ rm = *inst & 7;
+
+ //Only one operand in the instruction is register
+ if (mod == 3) {
+ return rm;
+ } else {
+ return reg;
+ }
+ return 0;
+}
+
+static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst)
+{
+ unsigned long eflags;
+ int index, vm86 = 0;
+
+ __vmread(GUEST_RFLAGS, &eflags);
+ if (eflags & X86_EFLAGS_VM)
+ vm86 = 1;
+
+ if (vm86) { /* meaning is reversed */
+ if (thread_inst->op_size == WORD)
+ thread_inst->op_size = LONG;
+ else if (thread_inst->op_size == LONG)
+ thread_inst->op_size = WORD;
+ else if (thread_inst->op_size == 0)
+ thread_inst->op_size = WORD;
+ }
+
+ switch(*inst) {
+ case 0x88:
+ /* mov r8 to m8 */
+ thread_inst->op_size = BYTE;
+ index = get_index((inst + 1));
+ thread_inst->operand[0] = mk_operand(BYTE, index, 0, REGISTER);
+ break;
+ case 0x89:
+ /* mov r32/16 to m32/16 */
+ index = get_index((inst + 1));
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[0] = mk_operand(WORD, index, 0, REGISTER);
+ } else {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[0] = mk_operand(LONG, index, 0, REGISTER);
+ }
+ break;
+ case 0x8a:
+ /* mov m8 to r8 */
+ thread_inst->op_size = BYTE;
+ index = get_index((inst + 1));
+ thread_inst->operand[1] = mk_operand(BYTE, index, 0, REGISTER);
+ break;
+ case 0x8b:
+ /* mov r32/16 to m32/16 */
+ index = get_index((inst + 1));
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[1] = mk_operand(WORD, index, 0, REGISTER);
+ } else {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+ }
+ break;
+ case 0x8c:
+ case 0x8e:
+ printk("%x, This opcode hasn't been handled yet!", *inst);
+ return DECODE_failure;
+ /* Not handle it yet. */
+ case 0xa0:
+ /* mov byte to al */
+ thread_inst->op_size = BYTE;
+ thread_inst->operand[1] = mk_operand(BYTE, 0, 0, REGISTER);
+ break;
+ case 0xa1:
+ /* mov word/doubleword to ax/eax */
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[1] = mk_operand(WORD, 0, 0, REGISTER);
+ } else {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[1] = mk_operand(LONG, 0, 0, REGISTER);
+ }
+ break;
+ case 0xa2:
+ /* mov al to (seg:offset) */
+ thread_inst->op_size = BYTE;
+ thread_inst->operand[0] = mk_operand(BYTE, 0, 0, REGISTER);
+ break;
+ case 0xa3:
+ /* mov ax/eax to (seg:offset) */
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[0] = mk_operand(WORD, 0, 0, REGISTER);
+ } else {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[0] = mk_operand(LONG, 0, 0, REGISTER);
+ }
+ break;
+ case 0xa4:
+ /* movsb */
+ thread_inst->op_size = BYTE;
+ strcpy((char *)thread_inst->i_name, "movs");
+ return DECODE_success;
+ case 0xa5:
+ /* movsw/movsl */
+ if (thread_inst->op_size == WORD) {
+ } else {
+ thread_inst->op_size = LONG;
+ }
+ strcpy((char *)thread_inst->i_name, "movs");
+ return DECODE_success;
+ case 0xaa:
+ /* stosb */
+ thread_inst->op_size = BYTE;
+ strcpy((char *)thread_inst->i_name, "stosb");
+ return DECODE_success;
+ case 0xab:
+ /* stosw/stosl */
+ if (thread_inst->op_size == WORD) {
+ strcpy((char *)thread_inst->i_name, "stosw");
+ } else {
+ thread_inst->op_size = LONG;
+ strcpy((char *)thread_inst->i_name, "stosl");
+ }
+ return DECODE_success;
+ case 0xc6:
+ /* mov imm8 to m8 */
+ thread_inst->op_size = BYTE;
+ thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
+ thread_inst->immediate = get_immediate(vm86,
+ (inst+1), thread_inst->op_size);
+ break;
+ case 0xc7:
+ /* mov imm16/32 to m16/32 */
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[0] = mk_operand(WORD, 0, 0, IMMEDIATE);
+ } else {
+ thread_inst->op_size = LONG;
+ thread_inst->operand[0] = mk_operand(LONG, 0, 0, IMMEDIATE);
+ }
+ thread_inst->immediate = get_immediate(vm86,
+ (inst+1), thread_inst->op_size);
+ break;
+ case 0x0f:
+ break;
+ default:
+ printk("%x, This opcode hasn't been handled yet!", *inst);
+ return DECODE_failure;
+ }
+
+ strcpy((char *)thread_inst->i_name, "mov");
+ if (*inst != 0x0f) {
+ return DECODE_success;
+ }
+
+ inst++;
+ switch (*inst) {
+
+ /* movz */
+ case 0xb6:
+ index = get_index((inst + 1));
+ if (thread_inst->op_size == WORD) {
+ thread_inst->operand[1] = mk_operand(WORD, index, 0, REGISTER);
+ } else {
+ thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+
+ }
+ thread_inst->op_size = BYTE;
+ strcpy((char *)thread_inst->i_name, "movzb");
+
+ return DECODE_success;
+ case 0xb7:
+ thread_inst->op_size = WORD;
+ index = get_index((inst + 1));
+ thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER);
+ strcpy((char *)thread_inst->i_name, "movzw");
+
+ return DECODE_success;
+ default:
+ printk("0f %x, This opcode hasn't been handled yet!", *inst);
+ return DECODE_failure;
+ }
+
+ /* will never reach here */
+ return DECODE_failure;
+}
+
+int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
+ int inst_len)
+{
+ l1_pgentry_t gpte;
+ unsigned long mfn;
+ unsigned char *inst_start;
+ int remaining = 0;
+
+ if ( (inst_len > MAX_INST_LEN) || (inst_len <= 0) )
+ return 0;
+
+ if ( vmx_paging_enabled(current) )
+ {
+ gpte = gva_to_gpte(guest_eip);
+ mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
+ /* Does this cross a page boundary ? */
+ if ( (guest_eip & PAGE_MASK) != ((guest_eip + inst_len) & PAGE_MASK) )
+ {
+ remaining = (guest_eip + inst_len) & ~PAGE_MASK;
+ inst_len -= remaining;
+ }
+ }
+ else
+ {
+ mfn = phys_to_machine_mapping(guest_eip >> PAGE_SHIFT);
+ }
+
+ inst_start = map_domain_page(mfn);
+ memcpy((char *)buf, inst_start + (guest_eip & ~PAGE_MASK), inst_len);
+ unmap_domain_page(inst_start);
+
+ if ( remaining )
+ {
+ gpte = gva_to_gpte(guest_eip+inst_len+remaining);
+ mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
+ inst_start = map_domain_page(mfn);
+ memcpy((char *)buf+inst_len, inst_start, remaining);
+ unmap_domain_page(inst_start);
+ }
+
+ return inst_len+remaining;
+}
+
+static void init_instruction(struct instruction *mmio_inst)
+{
+ memset(mmio_inst->i_name, '0', I_NAME_LEN);
+ mmio_inst->op_size = 0;
+ mmio_inst->offset = 0;
+ mmio_inst->immediate = 0;
+ mmio_inst->seg_sel = 0;
+ mmio_inst->op_num = 0;
+
+ mmio_inst->operand[0] = 0;
+ mmio_inst->operand[1] = 0;
+ mmio_inst->operand[2] = 0;
+
+ mmio_inst->flags = 0;
+}
+
+static int read_from_mmio(struct instruction *inst_p)
+{
+ // Only for mov instruction now!!!
+ if (inst_p->operand[1] & REGISTER)
+ return 1;
+
+ return 0;
+}
+
+// dir: 1 read from mmio
+// 0 write to mmio
+static void send_mmio_req(unsigned long gpa,
+ struct instruction *inst_p, long value, int dir, int pvalid)
+{
+ struct vcpu *d = current;
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+ int vm86;
+ struct mi_per_cpu_info *mpci_p;
+ struct cpu_user_regs *inst_decoder_regs;
+ extern long evtchn_send(int lport);
+
+ mpci_p = &current->arch.arch_vmx.vmx_platform.mpci;
+ inst_decoder_regs = mpci_p->inst_decoder_regs;
+
+ vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
+ if (vio == NULL) {
+ printk("bad shared page\n");
+ domain_crash_synchronous();
+ }
+ p = &vio->vp_ioreq;
+
+ vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
+
+ if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
+ printf("VMX I/O has not yet completed\n");
+ domain_crash_synchronous();
+ }
+
+ set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
+ p->dir = dir;
+ p->pdata_valid = pvalid;
+
+ p->port_mm = 1;
+ p->size = inst_p->op_size;
+ p->addr = gpa;
+ p->u.data = value;
+
+ p->state = STATE_IOREQ_READY;
+
+ if (inst_p->flags & REPZ) {
+ if (vm86)
+ p->count = inst_decoder_regs->ecx & 0xFFFF;
+ else
+ p->count = inst_decoder_regs->ecx;
+ p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
+ } else
+ p->count = 1;
+
+ if ((pvalid) && vmx_paging_enabled(current))
+ p->u.pdata = (void *) gva_to_gpa(p->u.data);
+
+#if 0
+ printf("send_mmio_req: eip 0x%lx:0x%lx, dir %d, pdata_valid %d, ",
+ inst_decoder_regs->cs, inst_decoder_regs->eip, p->dir, p->pdata_valid);
+ printf("port_mm %d, size %lld, addr 0x%llx, value 0x%lx, count %lld\n",
+ p->port_mm, p->size, p->addr, value, p->count);
+#endif
+
+ evtchn_send(IOPACKET_PORT);
+ vmx_wait_io();
+}
+
+void handle_mmio(unsigned long va, unsigned long gpa)
+{
+ unsigned long eip, eflags, cs;
+ unsigned long inst_len, inst_addr;
+ struct mi_per_cpu_info *mpci_p;
+ struct cpu_user_regs *inst_decoder_regs;
+ struct instruction mmio_inst;
+ unsigned char inst[MAX_INST_LEN];
+ int vm86, ret;
+
+ mpci_p = &current->arch.arch_vmx.vmx_platform.mpci;
+ inst_decoder_regs = mpci_p->inst_decoder_regs;
+
+ __vmread(GUEST_RIP, &eip);
+ __vmread(INSTRUCTION_LEN, &inst_len);
+
+ __vmread(GUEST_RFLAGS, &eflags);
+ vm86 = eflags & X86_EFLAGS_VM;
+
+ if (vm86) {
+ __vmread(GUEST_CS_SELECTOR, &cs);
+ inst_addr = (cs << 4) + eip;
+ } else
+ inst_addr = eip; /* XXX should really look at GDT[cs].base too */
+
+ memset(inst, '0', MAX_INST_LEN);
+ ret = inst_copy_from_guest(inst, inst_addr, inst_len);
+ if (ret != inst_len) {
+ printk("handle_mmio - EXIT: get guest instruction fault\n");
+ domain_crash_synchronous();
+ }
+
+#if 0
+ printk("handle_mmio: cs:eip 0x%lx:0x%lx(0x%lx): opcode",
+ cs, eip, inst_addr, inst_len);
+ for (ret = 0; ret < inst_len; ret++)
+ printk(" %02x", inst[ret]);
+ printk("\n");
+#endif
+
+ init_instruction(&mmio_inst);
+
+ if (vmx_decode(check_prefix(inst, &mmio_inst), &mmio_inst) == DECODE_failure)
+ domain_crash_synchronous();
+
+ __vmwrite(GUEST_RIP, eip + inst_len);
+ store_cpu_user_regs(inst_decoder_regs);
+
+ // Only handle "mov" and "movs" instructions!
+ if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) {
+ if (read_from_mmio(&mmio_inst)) {
+ // Send the request and waiting for return value.
+ mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
+ send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
+ return ;
+ } else {
+ printk("handle_mmio - EXIT: movz error!\n");
+ domain_crash_synchronous();
+ }
+ }
+
+ if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
+ unsigned long addr = 0;
+ int dir;
+
+ if (vm86) {
+ unsigned long seg;
+
+ __vmread(GUEST_ES_SELECTOR, &seg);
+ if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) {
+ dir = IOREQ_WRITE;
+ __vmread(GUEST_DS_SELECTOR, &seg);
+ addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF);
+ } else {
+ dir = IOREQ_READ;
+ addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF);
+ }
+ } else { /* XXX should really look at GDT[ds/es].base too */
+ if (va == inst_decoder_regs->edi) {
+ dir = IOREQ_WRITE;
+ addr = inst_decoder_regs->esi;
+ } else {
+ dir = IOREQ_READ;
+ addr = inst_decoder_regs->edi;
+ }
+ }
+
+ send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
+ return;
+ }
+
+ if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) {
+ long value = 0;
+ int size, index;
+
+ if (read_from_mmio(&mmio_inst)) {
+ // Send the request and waiting for return value.
+ mpci_p->mmio_target = mmio_inst.operand[1];
+ send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
+ } else {
+ // Write to MMIO
+ if (mmio_inst.operand[0] & IMMEDIATE) {
+ value = mmio_inst.immediate;
+ } else if (mmio_inst.operand[0] & REGISTER) {
+ size = operand_size(mmio_inst.operand[0]);
+ index = operand_index(mmio_inst.operand[0]);
+ value = get_reg_value(size, index, 0, inst_decoder_regs);
+ } else {
+ domain_crash_synchronous();
+ }
+ send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
+ return;
+ }
+ }
+
+ if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
+ send_mmio_req(gpa, &mmio_inst,
+ inst_decoder_regs->eax, IOREQ_WRITE, 0);
+ }
+
+ domain_crash_synchronous();
+}
+
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c
new file mode 100644
index 0000000000..9b32d1d502
--- /dev/null
+++ b/xen/arch/x86/vmx_vmcs.c
@@ -0,0 +1,466 @@
+/*
+ * vmx_vmcs.c: VMCS management
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/vmx.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <public/io/ioreq.h>
+
+#ifdef CONFIG_VMX
+
+struct vmcs_struct *alloc_vmcs(void)
+{
+ struct vmcs_struct *vmcs;
+ u32 vmx_msr_low, vmx_msr_high;
+
+ rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+ vmcs_size = vmx_msr_high & 0x1fff;
+ vmcs = alloc_xenheap_pages(get_order(vmcs_size));
+ memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
+
+ vmcs->vmcs_revision_id = vmx_msr_low;
+ return vmcs;
+}
+
+void free_vmcs(struct vmcs_struct *vmcs)
+{
+ int order;
+
+ order = (vmcs_size >> PAGE_SHIFT) - 1;
+ free_xenheap_pages(vmcs, order);
+}
+
+static inline int construct_vmcs_controls(void)
+{
+ int error = 0;
+
+ error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+ MONITOR_PIN_BASED_EXEC_CONTROLS);
+
+ error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ MONITOR_CPU_BASED_EXEC_CONTROLS);
+
+ error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
+ error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
+
+ return error;
+}
+
+#define GUEST_SEGMENT_LIMIT 0xffffffff
+#define HOST_SEGMENT_LIMIT 0xffffffff
+
+struct host_execution_env {
+ /* selectors */
+ unsigned short ldtr_selector;
+ unsigned short tr_selector;
+ unsigned short ds_selector;
+ unsigned short cs_selector;
+ /* limits */
+ unsigned short gdtr_limit;
+ unsigned short ldtr_limit;
+ unsigned short idtr_limit;
+ unsigned short tr_limit;
+ /* base */
+ unsigned long gdtr_base;
+ unsigned long ldtr_base;
+ unsigned long idtr_base;
+ unsigned long tr_base;
+ unsigned long ds_base;
+ unsigned long cs_base;
+ /* control registers */
+ unsigned long cr3;
+ unsigned long cr0;
+ unsigned long cr4;
+ unsigned long dr7;
+};
+
+#define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
+
+int vmx_setup_platform(struct vcpu *d, struct cpu_user_regs *regs)
+{
+ int i;
+ unsigned int n;
+ unsigned long *p, mpfn, offset, addr;
+ struct e820entry *e820p;
+ unsigned long gpfn = 0;
+
+ regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
+
+ n = regs->ecx;
+ if (n > 32) {
+ VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d", n);
+ return -1;
+ }
+
+ addr = regs->edi;
+ offset = (addr & ~PAGE_MASK);
+ addr = round_pgdown(addr);
+
+ mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
+ p = map_domain_page(mpfn);
+
+ e820p = (struct e820entry *) ((unsigned long) p + offset);
+
+#ifndef NDEBUG
+ print_e820_memory_map(e820p, n);
+#endif
+
+ for ( i = 0; i < n; i++ )
+ {
+ if ( e820p[i].type == E820_SHARED_PAGE )
+ {
+ gpfn = (e820p[i].addr >> PAGE_SHIFT);
+ break;
+ }
+ }
+
+ if ( gpfn == 0 )
+ {
+ unmap_domain_page(p);
+ return -1;
+ }
+
+ unmap_domain_page(p);
+
+ /* Initialise shared page */
+ mpfn = phys_to_machine_mapping(gpfn);
+ p = map_domain_page(mpfn);
+ memset(p, 0, PAGE_SIZE);
+ d->arch.arch_vmx.vmx_platform.shared_page_va = (unsigned long)p;
+
+ return 0;
+}
+
+void vmx_do_launch(struct vcpu *v)
+{
+/* Update CR3, GDT, LDT, TR */
+ unsigned int tr, cpu, error = 0;
+ struct host_execution_env host_env;
+ struct Xgt_desc_struct desc;
+ unsigned long pfn = 0;
+ struct pfn_info *page;
+ struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+ vmx_stts();
+ set_bit(_VCPUF_guest_stts, &v->vcpu_flags);
+
+ cpu = smp_processor_id();
+
+ page = (struct pfn_info *) alloc_domheap_page(NULL);
+ pfn = (unsigned long) (page - frame_table);
+
+ vmx_setup_platform(v, regs);
+
+ __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory");
+ host_env.gdtr_limit = desc.size;
+ host_env.gdtr_base = desc.address;
+
+ error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
+
+ error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
+ error |= __vmwrite(GUEST_LDTR_BASE, 0);
+ error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
+
+ __asm__ __volatile__ ("str (%0) \n" :: "a"(&tr) : "memory");
+ host_env.tr_selector = tr;
+ host_env.tr_limit = sizeof(struct tss_struct);
+ host_env.tr_base = (unsigned long) &init_tss[cpu];
+
+ error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
+ error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
+ error |= __vmwrite(GUEST_TR_BASE, 0);
+ error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
+
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.guest_table));
+ __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
+ __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
+
+ v->arch.schedule_tail = arch_vmx_do_resume;
+}
+
+/*
+ * Initially set the same environement as host.
+ */
+static inline int
+construct_init_vmcs_guest(struct cpu_user_regs *regs,
+ struct vcpu_guest_context *ctxt,
+ struct host_execution_env *host_env)
+{
+ int error = 0;
+ union vmcs_arbytes arbytes;
+ unsigned long dr7;
+ unsigned long eflags, shadow_cr;
+
+ /* MSR */
+ error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
+ error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
+
+ error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ /* interrupt */
+ error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ /* mask */
+ error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
+ error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
+
+ error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+ /* TSC */
+ error |= __vmwrite(TSC_OFFSET, 0);
+ error |= __vmwrite(CR3_TARGET_COUNT, 0);
+
+ /* Guest Selectors */
+ error |= __vmwrite(GUEST_CS_SELECTOR, regs->cs);
+ error |= __vmwrite(GUEST_ES_SELECTOR, regs->es);
+ error |= __vmwrite(GUEST_SS_SELECTOR, regs->ss);
+ error |= __vmwrite(GUEST_DS_SELECTOR, regs->ds);
+ error |= __vmwrite(GUEST_FS_SELECTOR, regs->fs);
+ error |= __vmwrite(GUEST_GS_SELECTOR, regs->gs);
+
+ /* Guest segment Limits */
+ error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
+ error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
+ error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
+ error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
+ error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
+ error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
+
+ error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
+
+ /* AR bytes */
+ arbytes.bytes = 0;
+ arbytes.fields.seg_type = 0x3; /* type = 3 */
+ arbytes.fields.s = 1; /* code or data, i.e. not system */
+ arbytes.fields.dpl = 0; /* DPL = 3 */
+ arbytes.fields.p = 1; /* segment present */
+ arbytes.fields.default_ops_size = 1; /* 32-bit */
+ arbytes.fields.g = 1;
+ arbytes.fields.null_bit = 0; /* not null */
+
+ error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
+ error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
+ error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
+ error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
+ error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
+
+ arbytes.fields.seg_type = 0xb; /* type = 0xb */
+ error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
+
+ error |= __vmwrite(GUEST_GDTR_BASE, regs->edx);
+ regs->edx = 0;
+ error |= __vmwrite(GUEST_GDTR_LIMIT, regs->eax);
+ regs->eax = 0;
+
+ arbytes.fields.s = 0; /* not code or data segement */
+ arbytes.fields.seg_type = 0x2; /* LTD */
+ arbytes.fields.default_ops_size = 0; /* 16-bit */
+ arbytes.fields.g = 0;
+ error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
+
+ arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
+ error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
+
+ error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
+
+ /* Initally PG, PE are not set*/
+ shadow_cr = host_env->cr0;
+ shadow_cr &= ~X86_CR0_PG;
+ error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
+ /* CR3 is set in vmx_final_setup_guest */
+ error |= __vmwrite(GUEST_CR4, host_env->cr4);
+ shadow_cr = host_env->cr4;
+ shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
+ error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
+
+ error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
+ error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
+ error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
+ error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
+ error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
+ error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
+ error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
+
+ error |= __vmwrite(GUEST_RSP, regs->esp);
+ error |= __vmwrite(GUEST_RIP, regs->eip);
+
+ eflags = regs->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
+ eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
+
+ error |= __vmwrite(GUEST_RFLAGS, eflags);
+
+ error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
+ error |= __vmwrite(GUEST_DR7, dr7);
+ error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
+ error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
+
+ return error;
+}
+
+static inline int construct_vmcs_host(struct host_execution_env *host_env)
+{
+ int error = 0;
+ unsigned long crn;
+ struct Xgt_desc_struct desc;
+
+ /* Host Selectors */
+ host_env->ds_selector = __HYPERVISOR_DS;
+ error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
+ error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
+ error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
+ error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
+ error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
+
+ host_env->cs_selector = __HYPERVISOR_CS;
+ error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
+
+ host_env->ds_base = 0;
+ host_env->cs_base = 0;
+ error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
+ error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
+
+/* Debug */
+ __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
+ host_env->idtr_limit = desc.size;
+ host_env->idtr_base = desc.address;
+ error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
+
+ __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
+
+ host_env->cr0 = crn;
+ error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
+
+ /* CR3 is set in vmx_final_setup_hostos */
+ __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
+ host_env->cr4 = crn;
+ error |= __vmwrite(HOST_CR4, crn);
+ error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
+
+ return error;
+}
+
+/*
+ * Need to extend to support full virtualization.
+ * The variable use_host_env indicates if the new VMCS needs to use
+ * the same setups as the host has (xenolinux).
+ */
+
+int construct_vmcs(struct arch_vmx_struct *arch_vmx,
+ struct cpu_user_regs *regs,
+ struct vcpu_guest_context *ctxt,
+ int use_host_env)
+{
+ int error;
+ u64 vmcs_phys_ptr;
+
+ struct host_execution_env host_env;
+
+ if (use_host_env != VMCS_USE_HOST_ENV)
+ return -EINVAL;
+
+ memset(&host_env, 0, sizeof(struct host_execution_env));
+
+ vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
+
+ if ((error = __vmpclear (vmcs_phys_ptr))) {
+ printk("construct_vmcs: VMCLEAR failed\n");
+ return -EINVAL;
+ }
+ if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
+ printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
+ (unsigned long) vmcs_phys_ptr);
+ return -EINVAL;
+ }
+ if ((error = construct_vmcs_controls())) {
+ printk("construct_vmcs: construct_vmcs_controls failed\n");
+ return -EINVAL;
+ }
+ /* host selectors */
+ if ((error = construct_vmcs_host(&host_env))) {
+ printk("construct_vmcs: construct_vmcs_host failed\n");
+ return -EINVAL;
+ }
+ /* guest selectors */
+ if ((error = construct_init_vmcs_guest(regs, ctxt, &host_env))) {
+ printk("construct_vmcs: construct_vmcs_guest failed\n");
+ return -EINVAL;
+ }
+
+ if ((error |= __vmwrite(EXCEPTION_BITMAP,
+ MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
+ printk("construct_vmcs: setting Exception bitmap failed\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
+{
+ int error;
+
+ if ((error = __vmptrld(phys_ptr))) {
+ clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+ return error;
+ }
+ set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+ return 0;
+}
+
+int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
+{
+ /* take the current VMCS */
+ __vmptrst(phys_ptr);
+ clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+ return 0;
+}
+
+void vm_launch_fail(unsigned long eflags)
+{
+ __vmx_bug(guest_cpu_user_regs());
+}
+
+void vm_resume_fail(unsigned long eflags)
+{
+ __vmx_bug(guest_cpu_user_regs());
+}
+
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/x86_32/asm-offsets.c b/xen/arch/x86/x86_32/asm-offsets.c
index a85aa15db4..2a9f84ba4c 100644
--- a/xen/arch/x86/x86_32/asm-offsets.c
+++ b/xen/arch/x86/x86_32/asm-offsets.c
@@ -4,7 +4,11 @@
* to extract and format the required data.
*/
+#include <xen/config.h>
+#include <xen/perfc.h>
#include <xen/sched.h>
+#include <asm/fixmap.h>
+#include <asm/hardirq.h>
#define DEFINE(_sym, _val) \
__asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
@@ -13,44 +17,58 @@
#define OFFSET(_sym, _str, _mem) \
DEFINE(_sym, offsetof(_str, _mem));
+/* base-2 logarithm */
+#define __L2(_x) (((_x) & 0x00000002) ? 1 : 0)
+#define __L4(_x) (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
+#define __L8(_x) (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
+#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
+#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
+
void __dummy__(void)
{
- OFFSET(XREGS_eax, struct xen_regs, eax);
- OFFSET(XREGS_ebx, struct xen_regs, ebx);
- OFFSET(XREGS_ecx, struct xen_regs, ecx);
- OFFSET(XREGS_edx, struct xen_regs, edx);
- OFFSET(XREGS_esi, struct xen_regs, esi);
- OFFSET(XREGS_edi, struct xen_regs, edi);
- OFFSET(XREGS_esp, struct xen_regs, esp);
- OFFSET(XREGS_ebp, struct xen_regs, ebp);
- OFFSET(XREGS_eip, struct xen_regs, eip);
- OFFSET(XREGS_cs, struct xen_regs, cs);
- OFFSET(XREGS_ds, struct xen_regs, ds);
- OFFSET(XREGS_es, struct xen_regs, es);
- OFFSET(XREGS_fs, struct xen_regs, fs);
- OFFSET(XREGS_gs, struct xen_regs, gs);
- OFFSET(XREGS_ss, struct xen_regs, ss);
- OFFSET(XREGS_eflags, struct xen_regs, eflags);
- OFFSET(XREGS_error_code, struct xen_regs, error_code);
- OFFSET(XREGS_entry_vector, struct xen_regs, entry_vector);
- OFFSET(XREGS_kernel_sizeof, struct xen_regs, esp);
- DEFINE(XREGS_user_sizeof, sizeof(struct xen_regs));
+ OFFSET(UREGS_eax, struct cpu_user_regs, eax);
+ OFFSET(UREGS_ebx, struct cpu_user_regs, ebx);
+ OFFSET(UREGS_ecx, struct cpu_user_regs, ecx);
+ OFFSET(UREGS_edx, struct cpu_user_regs, edx);
+ OFFSET(UREGS_esi, struct cpu_user_regs, esi);
+ OFFSET(UREGS_edi, struct cpu_user_regs, edi);
+ OFFSET(UREGS_esp, struct cpu_user_regs, esp);
+ OFFSET(UREGS_ebp, struct cpu_user_regs, ebp);
+ OFFSET(UREGS_eip, struct cpu_user_regs, eip);
+ OFFSET(UREGS_cs, struct cpu_user_regs, cs);
+ OFFSET(UREGS_ds, struct cpu_user_regs, ds);
+ OFFSET(UREGS_es, struct cpu_user_regs, es);
+ OFFSET(UREGS_fs, struct cpu_user_regs, fs);
+ OFFSET(UREGS_gs, struct cpu_user_regs, gs);
+ OFFSET(UREGS_ss, struct cpu_user_regs, ss);
+ OFFSET(UREGS_eflags, struct cpu_user_regs, eflags);
+ OFFSET(UREGS_error_code, struct cpu_user_regs, error_code);
+ OFFSET(UREGS_entry_vector, struct cpu_user_regs, entry_vector);
+ OFFSET(UREGS_saved_upcall_mask, struct cpu_user_regs, saved_upcall_mask);
+ OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, esp);
+ DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
BLANK();
- OFFSET(DOMAIN_processor, struct domain, processor);
- OFFSET(DOMAIN_shared_info, struct domain, shared_info);
- OFFSET(DOMAIN_event_sel, struct domain, thread.event_selector);
- OFFSET(DOMAIN_event_addr, struct domain, thread.event_address);
- OFFSET(DOMAIN_failsafe_sel, struct domain, thread.failsafe_selector);
- OFFSET(DOMAIN_failsafe_addr, struct domain, thread.failsafe_address);
- OFFSET(DOMAIN_trap_bounce, struct domain, thread.trap_bounce);
- OFFSET(DOMAIN_thread_flags, struct domain, thread.flags);
+ OFFSET(VCPU_processor, struct vcpu, processor);
+ OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
+ OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce);
+ OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
+ OFFSET(VCPU_event_sel, struct vcpu,
+ arch.guest_context.event_callback_cs);
+ OFFSET(VCPU_event_addr, struct vcpu,
+ arch.guest_context.event_callback_eip);
+ OFFSET(VCPU_failsafe_sel, struct vcpu,
+ arch.guest_context.failsafe_callback_cs);
+ OFFSET(VCPU_failsafe_addr, struct vcpu,
+ arch.guest_context.failsafe_callback_eip);
+ OFFSET(VCPU_kernel_ss, struct vcpu,
+ arch.guest_context.kernel_ss);
+ OFFSET(VCPU_kernel_sp, struct vcpu,
+ arch.guest_context.kernel_sp);
BLANK();
- OFFSET(SHINFO_upcall_pending, shared_info_t,
- vcpu_data[0].evtchn_upcall_pending);
- OFFSET(SHINFO_upcall_mask, shared_info_t,
- vcpu_data[0].evtchn_upcall_mask);
+ OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
+ OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
BLANK();
OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
@@ -60,14 +78,25 @@ void __dummy__(void)
OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip);
BLANK();
+#if PERF_COUNTERS
+ OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
+ OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
+ BLANK();
+#endif
+
OFFSET(MULTICALL_op, multicall_entry_t, op);
OFFSET(MULTICALL_arg0, multicall_entry_t, args[0]);
OFFSET(MULTICALL_arg1, multicall_entry_t, args[1]);
OFFSET(MULTICALL_arg2, multicall_entry_t, args[2]);
OFFSET(MULTICALL_arg3, multicall_entry_t, args[3]);
OFFSET(MULTICALL_arg4, multicall_entry_t, args[4]);
- OFFSET(MULTICALL_result, multicall_entry_t, args[5]);
+ OFFSET(MULTICALL_arg5, multicall_entry_t, args[5]);
+ OFFSET(MULTICALL_arg6, multicall_entry_t, args[6]);
+ OFFSET(MULTICALL_result, multicall_entry_t, result);
BLANK();
DEFINE(FIXMAP_apic_base, fix_to_virt(FIX_APIC_BASE));
+ BLANK();
+
+ DEFINE(IRQSTAT_shift, LOG_2(sizeof(irq_cpustat_t)));
}
diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c
index 5666513719..6bec96bc87 100644
--- a/xen/arch/x86/x86_32/domain_page.c
+++ b/xen/arch/x86/x86_32/domain_page.c
@@ -15,38 +15,29 @@
#include <xen/sched.h>
#include <xen/mm.h>
#include <xen/perfc.h>
-#include <asm/domain_page.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/hardirq.h>
-unsigned long *mapcache;
+#define MAPCACHE_ORDER 10
+#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
+
+l1_pgentry_t *mapcache;
static unsigned int map_idx, epoch, shadow_epoch[NR_CPUS];
static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
- unsigned long *cache = mapcache;
-
- /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
- do {
- if ( (*cache & READY_FOR_TLB_FLUSH) )
- *cache = 0;
- }
- while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
-}
-
-
-void *map_domain_mem(unsigned long pa)
+void *map_domain_page(unsigned long pfn)
{
unsigned long va;
unsigned int idx, cpu = smp_processor_id();
- unsigned long *cache = mapcache;
+ l1_pgentry_t *cache = mapcache;
+#ifndef NDEBUG
+ unsigned int flush_count = 0;
+#endif
ASSERT(!in_irq());
- perfc_incrc(map_domain_mem_count);
+ perfc_incrc(map_domain_page_count);
spin_lock(&map_lock);
@@ -62,25 +53,27 @@ void *map_domain_mem(unsigned long pa)
idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
if ( unlikely(idx == 0) )
{
- flush_all_ready_maps();
+ ASSERT(flush_count++ == 0);
perfc_incrc(domain_page_tlb_flush);
local_flush_tlb();
shadow_epoch[cpu] = ++epoch;
}
}
- while ( cache[idx] != 0 );
+ while ( l1e_get_flags(cache[idx]) & _PAGE_PRESENT );
- cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
+ cache[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
spin_unlock(&map_lock);
- va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
+ va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
return (void *)va;
}
-void unmap_domain_mem(void *va)
+void unmap_domain_page(void *va)
{
unsigned int idx;
+ ASSERT((void *)MAPCACHE_VIRT_START <= va);
+ ASSERT(va < (void *)MAPCACHE_VIRT_END);
idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
- mapcache[idx] |= READY_FOR_TLB_FLUSH;
+ mapcache[idx] = l1e_empty();
}
diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S
index b155938310..475474b99a 100644
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -58,24 +58,128 @@
#include <xen/softirq.h>
#include <asm/asm_defns.h>
#include <asm/apicdef.h>
+#include <asm/page.h>
#include <public/xen.h>
-#define GET_CURRENT(reg) \
- movl $8192-4, reg; \
- orl %esp, reg; \
- andl $~3,reg; \
+#define GET_CURRENT(reg) \
+ movl $STACK_SIZE-4, reg; \
+ orl %esp, reg; \
+ andl $~3,reg; \
movl (reg),reg;
+#ifdef CONFIG_VMX
+/*
+ * At VMExit time the processor saves the guest selectors, esp, eip,
+ * and eflags. Therefore we don't save them, but simply decrement
+ * the kernel stack pointer to make it consistent with the stack frame
+ * at usual interruption time. The eflags of the host is not saved by VMX,
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ * (10) u32 gs;
+ * (9) u32 fs;
+ * (8) u32 ds;
+ * (7) u32 es;
+ * <- get_stack_bottom() (= HOST_ESP)
+ * (6) u32 ss;
+ * (5) u32 esp;
+ * (4) u32 eflags;
+ * (3) u32 cs;
+ * (2) u32 eip;
+ * (2/1) u16 entry_vector;
+ * (1/1) u16 error_code;
+ * However, get_stack_bottom() actually returns 20 bytes before the real
+ * bottom of the stack to allow space for:
+ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
+ */
+#define VMX_MONITOR_EFLAGS 0x202 /* IF on */
+#define NR_SKIPPED_REGS 6 /* See the above explanation */
+#define VMX_SAVE_ALL_NOSEGREGS \
+ pushl $VMX_MONITOR_EFLAGS; \
+ popf; \
+ subl $(NR_SKIPPED_REGS*4), %esp; \
+ movl $0, 0xc(%esp); /* eflags==0 identifies cpu_user_regs as VMX guest */ \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx;
+
+ENTRY(vmx_asm_vmexit_handler)
+ /* selectors are restored/saved by VMX */
+ VMX_SAVE_ALL_NOSEGREGS
+ call vmx_vmexit_handler
+ jmp vmx_asm_do_resume
+
+ENTRY(vmx_asm_do_launch)
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ addl $(NR_SKIPPED_REGS*4), %esp
+ /* VMLUANCH */
+ .byte 0x0f,0x01,0xc2
+ pushf
+ call vm_launch_fail
+ hlt
+
+ ALIGN
+
+ENTRY(vmx_asm_do_resume)
+vmx_test_all_events:
+ GET_CURRENT(%ebx)
+/*test_all_events:*/
+ xorl %ecx,%ecx
+ notl %ecx
+ cli # tests must not race interrupts
+/*test_softirqs:*/
+ movl VCPU_processor(%ebx),%eax
+ shl $IRQSTAT_shift,%eax
+ test %ecx,irq_stat(%eax,1)
+ jnz vmx_process_softirqs
+
+vmx_restore_all_guest:
+ call load_cr2
+ /*
+ * Check if we are going back to VMX-based VM
+ * By this time, all the setups in the VMCS must be complete.
+ */
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ addl $(NR_SKIPPED_REGS*4), %esp
+ /* VMRESUME */
+ .byte 0x0f,0x01,0xc3
+ pushf
+ call vm_resume_fail
+ /* Should never reach here */
+ hlt
+
+ ALIGN
+vmx_process_softirqs:
+ sti
+ call do_softirq
+ jmp vmx_test_all_events
+#endif
+
ALIGN
restore_all_guest:
- testb $TF_failsafe_return,DOMAIN_thread_flags(%ebx)
- jnz failsafe_callback
- testl $X86_EFLAGS_VM,XREGS_eflags(%esp)
+ testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
jnz restore_all_vm86
-FLT1: movl XREGS_ds(%esp),%ds
-FLT2: movl XREGS_es(%esp),%es
-FLT3: movl XREGS_fs(%esp),%fs
-FLT4: movl XREGS_gs(%esp),%gs
+FLT1: mov UREGS_ds(%esp),%ds
+FLT2: mov UREGS_es(%esp),%es
+FLT3: mov UREGS_fs(%esp),%fs
+FLT4: mov UREGS_gs(%esp),%gs
restore_all_vm86:
popl %ebx
popl %ecx
@@ -89,13 +193,13 @@ FLT5: iret
.section .fixup,"ax"
FIX5: subl $28,%esp
pushl 28(%esp) # error_code/entry_vector
- movl %eax,XREGS_eax+4(%esp)
- movl %ebp,XREGS_ebp+4(%esp)
- movl %edi,XREGS_edi+4(%esp)
- movl %esi,XREGS_esi+4(%esp)
- movl %edx,XREGS_edx+4(%esp)
- movl %ecx,XREGS_ecx+4(%esp)
- movl %ebx,XREGS_ebx+4(%esp)
+ movl %eax,UREGS_eax+4(%esp)
+ movl %ebp,UREGS_ebp+4(%esp)
+ movl %edi,UREGS_edi+4(%esp)
+ movl %esi,UREGS_esi+4(%esp)
+ movl %edx,UREGS_edx+4(%esp)
+ movl %ecx,UREGS_ecx+4(%esp)
+ movl %ebx,UREGS_ebx+4(%esp)
FIX1: SET_XEN_SEGMENTS(a)
movl %eax,%fs
movl %eax,%gs
@@ -110,11 +214,21 @@ FIX1: SET_XEN_SEGMENTS(a)
jmp error_code
DBLFLT1:GET_CURRENT(%ebx)
jmp test_all_events
-DBLFIX1:GET_CURRENT(%ebx)
- testb $TF_failsafe_return,DOMAIN_thread_flags(%ebx)
- jnz domain_crash_synchronous # cannot reenter failsafe code
- orb $TF_failsafe_return,DOMAIN_thread_flags(%ebx)
- jmp test_all_events # will return via failsafe code
+failsafe_callback:
+ GET_CURRENT(%ebx)
+ leal VCPU_trap_bounce(%ebx),%edx
+ movl VCPU_failsafe_addr(%ebx),%eax
+ movl %eax,TRAPBOUNCE_eip(%edx)
+ movl VCPU_failsafe_sel(%ebx),%eax
+ movw %ax,TRAPBOUNCE_cs(%edx)
+ movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
+ call create_bounce_frame
+ xorl %eax,%eax
+ movl %eax,UREGS_ds(%esp)
+ movl %eax,UREGS_es(%esp)
+ movl %eax,UREGS_fs(%esp)
+ movl %eax,UREGS_gs(%esp)
+ jmp test_all_events
.previous
.section __pre_ex_table,"a"
.long FLT1,FIX1
@@ -124,37 +238,7 @@ DBLFIX1:GET_CURRENT(%ebx)
.long FLT5,FIX5
.previous
.section __ex_table,"a"
- .long DBLFLT1,DBLFIX1
-.previous
-
-/* No special register assumptions */
-failsafe_callback:
- GET_CURRENT(%ebx)
- andb $~TF_failsafe_return,DOMAIN_thread_flags(%ebx)
- leal DOMAIN_trap_bounce(%ebx),%edx
- movl DOMAIN_failsafe_addr(%ebx),%eax
- movl %eax,TRAPBOUNCE_eip(%edx)
- movl DOMAIN_failsafe_sel(%ebx),%eax
- movw %ax,TRAPBOUNCE_cs(%edx)
- movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
- call create_bounce_frame
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $4,%esp
-FLT6: iret
-.section .fixup,"ax"
-FIX6: pushl %ebx
- GET_CURRENT(%ebx)
- orb $TF_failsafe_return,DOMAIN_thread_flags(%ebx)
- pop %ebx
- jmp FIX5
-.section __pre_ex_table,"a"
- .long FLT6,FIX6
+ .long DBLFLT1,failsafe_callback
.previous
ALIGN
@@ -175,178 +259,165 @@ ENTRY(hypercall)
SAVE_ALL(b)
sti
GET_CURRENT(%ebx)
- andl $(NR_hypercalls-1),%eax
- call *SYMBOL_NAME(hypercall_table)(,%eax,4)
-
-ret_from_hypercall:
- movl %eax,XREGS_eax(%esp) # save the return value
+ andl $(NR_hypercalls-1),%eax
+ PERFC_INCR(PERFC_hypercalls, %eax)
+ call *hypercall_table(,%eax,4)
+ movl %eax,UREGS_eax(%esp) # save the return value
test_all_events:
xorl %ecx,%ecx
notl %ecx
cli # tests must not race interrupts
/*test_softirqs:*/
- movl DOMAIN_processor(%ebx),%eax
- shl $6,%eax # sizeof(irq_cpustat) == 64
- test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+ movl VCPU_processor(%ebx),%eax
+ shl $IRQSTAT_shift,%eax
+ test %ecx,irq_stat(%eax,1)
jnz process_softirqs
/*test_guest_events:*/
- movl DOMAIN_shared_info(%ebx),%eax
- testb $0xFF,SHINFO_upcall_mask(%eax)
+ movl VCPU_vcpu_info(%ebx),%eax
+ testb $0xFF,VCPUINFO_upcall_mask(%eax)
jnz restore_all_guest
- testb $0xFF,SHINFO_upcall_pending(%eax)
+ testb $0xFF,VCPUINFO_upcall_pending(%eax)
jz restore_all_guest
/*process_guest_events:*/
- leal DOMAIN_trap_bounce(%ebx),%edx
- movl DOMAIN_event_addr(%ebx),%eax
+ sti
+ leal VCPU_trap_bounce(%ebx),%edx
+ movl VCPU_event_addr(%ebx),%eax
movl %eax,TRAPBOUNCE_eip(%edx)
- movl DOMAIN_event_sel(%ebx),%eax
+ movl VCPU_event_sel(%ebx),%eax
movw %ax,TRAPBOUNCE_cs(%edx)
movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
call create_bounce_frame
- movl DOMAIN_shared_info(%ebx),%eax
- movb $1,SHINFO_upcall_mask(%eax) # Upcalls are masked during delivery
- jmp restore_all_guest
+ jmp test_all_events
ALIGN
process_softirqs:
sti
- call SYMBOL_NAME(do_softirq)
+ call do_softirq
jmp test_all_events
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
/* {EIP, CS, EFLAGS, [ESP, SS]} */
-/* %edx == trap_bounce, %ebx == task_struct */
-/* %eax,%ecx are clobbered. %gs:%esi contain new XREGS_ss/XREGS_esp. */
+/* %edx == trap_bounce, %ebx == struct vcpu */
+/* %eax,%ecx are clobbered. %gs:%esi contain new UREGS_ss/UREGS_esp. */
create_bounce_frame:
- movl XREGS_eflags+4(%esp),%ecx
- movb XREGS_cs+4(%esp),%cl
+ movl UREGS_eflags+4(%esp),%ecx
+ movb UREGS_cs+4(%esp),%cl
testl $(2|X86_EFLAGS_VM),%ecx
jz ring1 /* jump if returning to an existing ring-1 activation */
- /* obtain ss/esp from TSS -- no current ring-1 activations */
- movl DOMAIN_processor(%ebx),%eax
- /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */
- movl %eax, %ecx
- shll $7, %ecx
- shll $13, %eax
- addl %ecx,%eax
- addl $init_tss + 12,%eax
- movl (%eax),%esi /* tss->esp1 */
-FLT7: movl 4(%eax),%gs /* tss->ss1 */
- testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+ movl VCPU_kernel_sp(%ebx),%esi
+FLT6: mov VCPU_kernel_ss(%ebx),%gs
+ testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
jz nvm86_1
- subl $16,%esi /* push ES/DS/FS/GS (VM86 stack frame) */
- movl XREGS_es+4(%esp),%eax
-FLT8: movl %eax,%gs:(%esi)
- movl XREGS_ds+4(%esp),%eax
-FLT9: movl %eax,%gs:4(%esi)
- movl XREGS_fs+4(%esp),%eax
-FLT10: movl %eax,%gs:8(%esi)
- movl XREGS_gs+4(%esp),%eax
-FLT11: movl %eax,%gs:12(%esi)
+ subl $16,%esi /* push ES/DS/FS/GS (VM86 stack frame) */
+ movl UREGS_es+4(%esp),%eax
+FLT7: movl %eax,%gs:(%esi)
+ movl UREGS_ds+4(%esp),%eax
+FLT8: movl %eax,%gs:4(%esi)
+ movl UREGS_fs+4(%esp),%eax
+FLT9: movl %eax,%gs:8(%esi)
+ movl UREGS_gs+4(%esp),%eax
+FLT10: movl %eax,%gs:12(%esi)
nvm86_1:subl $8,%esi /* push SS/ESP (inter-priv iret) */
- movl XREGS_esp+4(%esp),%eax
-FLT12: movl %eax,%gs:(%esi)
- movl XREGS_ss+4(%esp),%eax
-FLT13: movl %eax,%gs:4(%esi)
+ movl UREGS_esp+4(%esp),%eax
+FLT11: movl %eax,%gs:(%esi)
+ movl UREGS_ss+4(%esp),%eax
+FLT12: movl %eax,%gs:4(%esi)
jmp 1f
ring1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
- movl XREGS_esp+4(%esp),%esi
-FLT14: movl XREGS_ss+4(%esp),%gs
+ movl UREGS_esp+4(%esp),%esi
+FLT13: mov UREGS_ss+4(%esp),%gs
1: /* Construct a stack frame: EFLAGS, CS/EIP */
- subl $12,%esi
- movl XREGS_eip+4(%esp),%eax
-FLT15: movl %eax,%gs:(%esi)
- movl XREGS_cs+4(%esp),%eax
-FLT16: movl %eax,%gs:4(%esi)
- movl XREGS_eflags+4(%esp),%eax
-FLT17: movl %eax,%gs:8(%esi)
movb TRAPBOUNCE_flags(%edx),%cl
+ subl $12,%esi
+ movl UREGS_eip+4(%esp),%eax
+FLT14: movl %eax,%gs:(%esi)
+ movl VCPU_vcpu_info(%ebx),%eax
+ pushl VCPUINFO_upcall_mask(%eax)
+ testb $TBF_INTERRUPT,%cl
+ setnz VCPUINFO_upcall_mask(%eax) # TBF_INTERRUPT -> clear upcall mask
+ popl %eax
+ shll $16,%eax # Bits 16-23: saved_upcall_mask
+ movw UREGS_cs+4(%esp),%ax # Bits 0-15: CS
+FLT15: movl %eax,%gs:4(%esi)
+ movl UREGS_eflags+4(%esp),%eax
+FLT16: movl %eax,%gs:8(%esi)
test $TBF_EXCEPTION_ERRCODE,%cl
jz 1f
subl $4,%esi # push error_code onto guest frame
movl TRAPBOUNCE_error_code(%edx),%eax
-FLT18: movl %eax,%gs:(%esi)
+FLT17: movl %eax,%gs:(%esi)
testb $TBF_EXCEPTION_CR2,%cl
jz 2f
subl $4,%esi # push %cr2 onto guest frame
movl TRAPBOUNCE_cr2(%edx),%eax
-FLT19: movl %eax,%gs:(%esi)
+FLT18: movl %eax,%gs:(%esi)
1: testb $TBF_FAILSAFE,%cl
jz 2f
subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
- testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+ testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
jz nvm86_2
xorl %eax,%eax # VM86: we write zero selector values
-FLT20: movl %eax,%gs:(%esi)
-FLT21: movl %eax,%gs:4(%esi)
-FLT22: movl %eax,%gs:8(%esi)
-FLT23: movl %eax,%gs:12(%esi)
+FLT19: movl %eax,%gs:(%esi)
+FLT20: movl %eax,%gs:4(%esi)
+FLT21: movl %eax,%gs:8(%esi)
+FLT22: movl %eax,%gs:12(%esi)
jmp 2f
-nvm86_2:movl XREGS_ds+4(%esp),%eax # non-VM86: write real selector values
-FLT24: movl %eax,%gs:(%esi)
- movl XREGS_es+4(%esp),%eax
-FLT25: movl %eax,%gs:4(%esi)
- movl XREGS_fs+4(%esp),%eax
-FLT26: movl %eax,%gs:8(%esi)
- movl XREGS_gs+4(%esp),%eax
-FLT27: movl %eax,%gs:12(%esi)
-2: movb $0,TRAPBOUNCE_flags(%edx)
- testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
+nvm86_2:movl UREGS_ds+4(%esp),%eax # non-VM86: write real selector values
+FLT23: movl %eax,%gs:(%esi)
+ movl UREGS_es+4(%esp),%eax
+FLT24: movl %eax,%gs:4(%esi)
+ movl UREGS_fs+4(%esp),%eax
+FLT25: movl %eax,%gs:8(%esi)
+ movl UREGS_gs+4(%esp),%eax
+FLT26: movl %eax,%gs:12(%esi)
+2: testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
jz nvm86_3
xorl %eax,%eax /* zero DS-GS, just as a real CPU would */
- movl %eax,XREGS_ds+4(%esp)
- movl %eax,XREGS_es+4(%esp)
- movl %eax,XREGS_fs+4(%esp)
- movl %eax,XREGS_gs+4(%esp)
+ movl %eax,UREGS_ds+4(%esp)
+ movl %eax,UREGS_es+4(%esp)
+ movl %eax,UREGS_fs+4(%esp)
+ movl %eax,UREGS_gs+4(%esp)
nvm86_3:/* Rewrite our stack frame and return to ring 1. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
- andl $0xfffcbeff,XREGS_eflags+4(%esp)
- movl %gs,XREGS_ss+4(%esp)
- movl %esi,XREGS_esp+4(%esp)
+ andl $0xfffcbeff,UREGS_eflags+4(%esp)
+ mov %gs,UREGS_ss+4(%esp)
+ movl %esi,UREGS_esp+4(%esp)
movzwl TRAPBOUNCE_cs(%edx),%eax
- movl %eax,XREGS_cs+4(%esp)
+ movl %eax,UREGS_cs+4(%esp)
movl TRAPBOUNCE_eip(%edx),%eax
- movl %eax,XREGS_eip+4(%esp)
+ test %eax,%eax
+ jz domain_crash_synchronous
+ movl %eax,UREGS_eip+4(%esp)
+ movb $0,TRAPBOUNCE_flags(%edx)
ret
-.section .fixup,"ax"
-FIX7: sti
- popl %esi
- addl $4,%esp # Discard create_b_frame return address
- pushfl # EFLAGS
- movl $__HYPERVISOR_CS,%eax
- pushl %eax # CS
- movl $DBLFLT2,%eax
- pushl %eax # EIP
- pushl %esi # error_code/entry_vector
- jmp error_code
-DBLFLT2:jmp process_guest_exception_and_events
-.previous
-.section __pre_ex_table,"a"
- .long FLT7,FIX7 , FLT8,FIX7 , FLT9,FIX7 , FLT10,FIX7
- .long FLT11,FIX7 , FLT12,FIX7 , FLT13,FIX7 , FLT14,FIX7
- .long FLT15,FIX7 , FLT16,FIX7 , FLT17,FIX7 , FLT18,FIX7
- .long FLT19,FIX7 , FLT20,FIX7 , FLT21,FIX7 , FLT22,FIX7
- .long FLT23,FIX7 , FLT24,FIX7 , FLT25,FIX7 , FLT26,FIX7 , FLT27,FIX7
-.previous
.section __ex_table,"a"
- .long DBLFLT2,domain_crash_synchronous
+ .long FLT6,domain_crash_synchronous , FLT7,domain_crash_synchronous
+ .long FLT8,domain_crash_synchronous , FLT9,domain_crash_synchronous
+ .long FLT10,domain_crash_synchronous , FLT11,domain_crash_synchronous
+ .long FLT12,domain_crash_synchronous , FLT13,domain_crash_synchronous
+ .long FLT14,domain_crash_synchronous , FLT15,domain_crash_synchronous
+ .long FLT16,domain_crash_synchronous , FLT17,domain_crash_synchronous
+ .long FLT18,domain_crash_synchronous , FLT19,domain_crash_synchronous
+ .long FLT20,domain_crash_synchronous , FLT21,domain_crash_synchronous
+ .long FLT22,domain_crash_synchronous , FLT23,domain_crash_synchronous
+ .long FLT24,domain_crash_synchronous , FLT25,domain_crash_synchronous
+ .long FLT26,domain_crash_synchronous
.previous
ALIGN
process_guest_exception_and_events:
- leal DOMAIN_trap_bounce(%ebx),%edx
+ leal VCPU_trap_bounce(%ebx),%edx
testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
jz test_all_events
- cli # create_bounce_frame needs CLI for pre-exceptions to work
call create_bounce_frame
jmp test_all_events
ALIGN
ENTRY(ret_from_intr)
GET_CURRENT(%ebx)
- movl XREGS_eflags(%esp),%eax
- movb XREGS_cs(%esp),%al
+ movl UREGS_eflags(%esp),%eax
+ movb UREGS_cs(%esp),%al
testl $(3|X86_EFLAGS_VM),%eax
jnz test_all_events
jmp restore_all_xen
@@ -357,50 +428,50 @@ ENTRY(divide_error)
error_code:
SAVE_ALL_NOSEGREGS(a)
SET_XEN_SEGMENTS(a)
- testb $X86_EFLAGS_IF>>8,XREGS_eflags+1(%esp)
+ testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
jz exception_with_ints_disabled
-1: sti # re-enable interrupts
+ sti # re-enable interrupts
xorl %eax,%eax
- movw XREGS_entry_vector(%esp),%ax
+ movw UREGS_entry_vector(%esp),%ax
movl %esp,%edx
- pushl %edx # push the xen_regs pointer
+ pushl %edx # push the cpu_user_regs pointer
GET_CURRENT(%ebx)
- call *SYMBOL_NAME(exception_table)(,%eax,4)
+ PERFC_INCR(PERFC_exceptions, %eax)
+ call *exception_table(,%eax,4)
addl $4,%esp
- movl XREGS_eflags(%esp),%eax
- movb XREGS_cs(%esp),%al
+ movl UREGS_eflags(%esp),%eax
+ movb UREGS_cs(%esp),%al
testl $(3|X86_EFLAGS_VM),%eax
jz restore_all_xen
jmp process_guest_exception_and_events
exception_with_ints_disabled:
- movl XREGS_eflags(%esp),%eax
- movb XREGS_cs(%esp),%al
+ movl UREGS_eflags(%esp),%eax
+ movb UREGS_cs(%esp),%al
testl $(3|X86_EFLAGS_VM),%eax # interrupts disabled outside Xen?
- jnz 1b # it really does happen!
- # (e.g., DOM0 X server)
+ jnz FATAL_exception_with_ints_disabled
pushl %esp
call search_pre_exception_table
addl $4,%esp
testl %eax,%eax # no fixup code for faulting EIP?
jz FATAL_exception_with_ints_disabled
- movl %eax,XREGS_eip(%esp)
+ movl %eax,UREGS_eip(%esp)
movl %esp,%esi
subl $4,%esp
movl %esp,%edi
- movl $XREGS_kernel_sizeof/4,%ecx
+ movl $UREGS_kernel_sizeof/4,%ecx
rep; movsl # make room for error_code/entry_vector
- movl XREGS_error_code(%esp),%eax # error_code/entry_vector
- movl %eax,XREGS_kernel_sizeof(%esp)
+ movl UREGS_error_code(%esp),%eax # error_code/entry_vector
+ movl %eax,UREGS_kernel_sizeof(%esp)
jmp restore_all_xen # return to fixup code
FATAL_exception_with_ints_disabled:
xorl %esi,%esi
- movw XREGS_entry_vector(%esp),%si
+ movw UREGS_entry_vector(%esp),%si
movl %esp,%edx
- pushl %edx # push the xen_regs pointer
+ pushl %edx # push the cpu_user_regs pointer
pushl %esi # push the trapnr (entry vector)
- call SYMBOL_NAME(fatal_trap)
+ call fatal_trap
ud2
ENTRY(coprocessor_error)
@@ -488,12 +559,12 @@ ENTRY(nmi)
# Okay, its almost a normal NMI tick. We can only process it if:
# A. We are the outermost Xen activation (in which case we have
# the selectors safely saved on our stack)
- # B. DS-GS all contain sane Xen values.
+ # B. DS and ES contain sane Xen values.
# In all other cases we bail without touching DS-GS, as we have
# interrupted an enclosing Xen activation in tricky prologue or
# epilogue code.
- movl XREGS_eflags(%esp),%eax
- movb XREGS_cs(%esp),%al
+ movl UREGS_eflags(%esp),%eax
+ movb UREGS_cs(%esp),%al
testl $(3|X86_EFLAGS_VM),%eax
jnz do_watchdog_tick
movl %ds,%eax
@@ -508,16 +579,11 @@ do_watchdog_tick:
movl %edx,%ds
movl %edx,%es
movl %esp,%edx
- pushl %ebx # reason
- pushl %edx # regs
- call SYMBOL_NAME(do_nmi)
- addl $8,%esp
- movl XREGS_eflags(%esp),%eax
- movb XREGS_cs(%esp),%al
- testl $(3|X86_EFLAGS_VM),%eax
- jz restore_all_xen
- GET_CURRENT(%ebx)
- jmp restore_all_guest
+ pushl %ebx # reason
+ pushl %edx # regs
+ call do_nmi
+ addl $8,%esp
+ jmp ret_from_intr
defer_nmi:
movl $FIXMAP_apic_base,%eax
@@ -535,40 +601,48 @@ nmi_parity_err:
andb $0xf,%al
orb $0x4,%al
outb %al,$0x61
- cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
- je restore_all_xen
- bts $0,%ss:SYMBOL_NAME(nmi_softirq_reason)
- bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
- cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
- je restore_all_xen
+ cmpb $'i',%ss:opt_nmi # nmi=ignore
+ je nmi_out
+ bts $0,%ss:nmi_softirq_reason
+ bts $NMI_SOFTIRQ,%ss:irq_stat
+ cmpb $'d',%ss:opt_nmi # nmi=dom0
+ je nmi_out
movl $(__HYPERVISOR_DS),%edx # nmi=fatal
movl %edx,%ds
movl %edx,%es
movl %esp,%edx
push %edx
- call SYMBOL_NAME(mem_parity_error)
+ call mem_parity_error
addl $4,%esp
- jmp ret_from_intr
+nmi_out:movl %ss:UREGS_eflags(%esp),%eax
+ movb %ss:UREGS_cs(%esp),%al
+ testl $(3|X86_EFLAGS_VM),%eax
+ jz restore_all_xen
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ GET_CURRENT(%ebx)
+ jmp test_all_events
nmi_io_err:
# Clear and disable the I/O-error line
andb $0xf,%al
orb $0x8,%al
outb %al,$0x61
- cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
- je restore_all_xen
- bts $1,%ss:SYMBOL_NAME(nmi_softirq_reason)
- bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
- cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
- je restore_all_xen
+ cmpb $'i',%ss:opt_nmi # nmi=ignore
+ je nmi_out
+ bts $1,%ss:nmi_softirq_reason
+ bts $NMI_SOFTIRQ,%ss:irq_stat
+ cmpb $'d',%ss:opt_nmi # nmi=dom0
+ je nmi_out
movl $(__HYPERVISOR_DS),%edx # nmi=fatal
movl %edx,%ds
movl %edx,%es
movl %esp,%edx
push %edx
- call SYMBOL_NAME(io_check_error)
+ call io_check_error
addl $4,%esp
- jmp ret_from_intr
+ jmp nmi_out
ENTRY(setup_vm86_frame)
@@ -584,38 +658,38 @@ ENTRY(setup_vm86_frame)
addl $16,%esp
ret
+do_arch_sched_op:
+ # Ensure we return success even if we return via schedule_tail()
+ xorl %eax,%eax
+ movl %eax,UREGS_eax+4(%esp)
+ jmp do_sched_op
+
do_switch_vm86:
# Discard the return address
addl $4,%esp
- movl XREGS_eflags(%esp),%edx
-
# GS:ESI == Ring-1 stack activation
- movl XREGS_esp(%esp),%esi
-VFLT1: movl XREGS_ss(%esp),%gs
+ movl UREGS_esp(%esp),%esi
+VFLT1: mov UREGS_ss(%esp),%gs
# ES:EDI == Ring-0 stack activation
- leal XREGS_eip(%esp),%edi
+ leal UREGS_eip(%esp),%edi
# Restore the hypercall-number-clobbered EAX on our stack frame
VFLT2: movl %gs:(%esi),%eax
- movl %eax,XREGS_eax(%esp)
+ movl %eax,UREGS_eax(%esp)
addl $4,%esi
# Copy the VM86 activation from the ring-1 stack to the ring-0 stack
- movl $(XREGS_user_sizeof-XREGS_eip)/4,%ecx
+ movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
VFLT3: movl %gs:(%esi),%eax
stosl
addl $4,%esi
loop VFLT3
- # Fix up EFLAGS
- andl $~X86_EFLAGS_IOPL,XREGS_eflags(%esp)
- andl $X86_EFLAGS_IOPL,%edx # Ignore attempts to change EFLAGS.IOPL
- jnz 1f
- orl $X86_EFLAGS_IF,%edx # EFLAGS.IOPL=0 => no messing with EFLAGS.IF
-1: orl $X86_EFLAGS_VM,%edx # Force EFLAGS.VM
- orl %edx,XREGS_eflags(%esp)
+ # Fix up EFLAGS: IOPL=0, IF=1, VM=1
+ andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
+ orl $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
jmp test_all_events
@@ -628,52 +702,55 @@ VFLT3: movl %gs:(%esi),%eax
.data
ENTRY(exception_table)
- .long SYMBOL_NAME(do_divide_error)
- .long SYMBOL_NAME(do_debug)
+ .long do_divide_error
+ .long do_debug
.long 0 # nmi
- .long SYMBOL_NAME(do_int3)
- .long SYMBOL_NAME(do_overflow)
- .long SYMBOL_NAME(do_bounds)
- .long SYMBOL_NAME(do_invalid_op)
- .long SYMBOL_NAME(math_state_restore)
+ .long do_int3
+ .long do_overflow
+ .long do_bounds
+ .long do_invalid_op
+ .long math_state_restore
.long 0 # double fault
- .long SYMBOL_NAME(do_coprocessor_segment_overrun)
- .long SYMBOL_NAME(do_invalid_TSS)
- .long SYMBOL_NAME(do_segment_not_present)
- .long SYMBOL_NAME(do_stack_segment)
- .long SYMBOL_NAME(do_general_protection)
- .long SYMBOL_NAME(do_page_fault)
- .long SYMBOL_NAME(do_spurious_interrupt_bug)
- .long SYMBOL_NAME(do_coprocessor_error)
- .long SYMBOL_NAME(do_alignment_check)
- .long SYMBOL_NAME(do_machine_check)
- .long SYMBOL_NAME(do_simd_coprocessor_error)
+ .long do_coprocessor_segment_overrun
+ .long do_invalid_TSS
+ .long do_segment_not_present
+ .long do_stack_segment
+ .long do_general_protection
+ .long do_page_fault
+ .long do_spurious_interrupt_bug
+ .long do_coprocessor_error
+ .long do_alignment_check
+ .long do_machine_check
+ .long do_simd_coprocessor_error
ENTRY(hypercall_table)
- .long SYMBOL_NAME(do_set_trap_table) /* 0 */
- .long SYMBOL_NAME(do_mmu_update)
- .long SYMBOL_NAME(do_set_gdt)
- .long SYMBOL_NAME(do_stack_switch)
- .long SYMBOL_NAME(do_set_callbacks)
- .long SYMBOL_NAME(do_fpu_taskswitch) /* 5 */
- .long SYMBOL_NAME(do_sched_op)
- .long SYMBOL_NAME(do_dom0_op)
- .long SYMBOL_NAME(do_set_debugreg)
- .long SYMBOL_NAME(do_get_debugreg)
- .long SYMBOL_NAME(do_update_descriptor) /* 10 */
- .long SYMBOL_NAME(do_set_fast_trap)
- .long SYMBOL_NAME(do_dom_mem_op)
- .long SYMBOL_NAME(do_multicall)
- .long SYMBOL_NAME(do_update_va_mapping)
- .long SYMBOL_NAME(do_set_timer_op) /* 15 */
- .long SYMBOL_NAME(do_event_channel_op)
- .long SYMBOL_NAME(do_xen_version)
- .long SYMBOL_NAME(do_console_io)
- .long SYMBOL_NAME(do_physdev_op)
- .long SYMBOL_NAME(do_grant_table_op) /* 20 */
- .long SYMBOL_NAME(do_vm_assist)
- .long SYMBOL_NAME(do_update_va_mapping_otherdomain)
- .long SYMBOL_NAME(do_switch_vm86)
+ .long do_set_trap_table /* 0 */
+ .long do_mmu_update
+ .long do_set_gdt
+ .long do_stack_switch
+ .long do_set_callbacks
+ .long do_fpu_taskswitch /* 5 */
+ .long do_arch_sched_op
+ .long do_dom0_op
+ .long do_set_debugreg
+ .long do_get_debugreg
+ .long do_update_descriptor /* 10 */
+ .long do_ni_hypercall
+ .long do_dom_mem_op
+ .long do_multicall
+ .long do_update_va_mapping
+ .long do_set_timer_op /* 15 */
+ .long do_event_channel_op
+ .long do_xen_version
+ .long do_console_io
+ .long do_physdev_op
+ .long do_grant_table_op /* 20 */
+ .long do_vm_assist
+ .long do_update_va_mapping_otherdomain
+ .long do_switch_vm86
+ .long do_boot_vcpu
+ .long do_ni_hypercall /* 25 */
+ .long do_mmuext_op
.rept NR_hypercalls-((.-hypercall_table)/4)
- .long SYMBOL_NAME(do_ni_hypercall)
+ .long do_ni_hypercall
.endr
diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c
index 4fc5623dc8..b388c1cc49 100644
--- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c
@@ -22,135 +22,187 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/sched.h>
+#include <asm/current.h>
#include <asm/page.h>
#include <asm/flushtlb.h>
#include <asm/fixmap.h>
-#include <asm/domain_page.h>
-unsigned long m2p_start_mfn;
+extern l1_pgentry_t *mapcache;
-static inline void set_pte_phys(unsigned long vaddr,
- l1_pgentry_t entry)
+unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
+unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
+
+static unsigned long mpt_size;
+
+struct pfn_info *alloc_xen_pagetable(void)
{
- l2_pgentry_t *l2ent;
- l1_pgentry_t *l1ent;
+ extern int early_boot;
+ extern unsigned long xenheap_phys_start;
+ struct pfn_info *pg;
- l2ent = &idle_pg_table[l2_table_offset(vaddr)];
- l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
- *l1ent = entry;
+ if ( !early_boot )
+ {
+ void *v = alloc_xenheap_page();
+ return ((v == NULL) ? NULL : virt_to_page(v));
+ }
- /* It's enough to flush this one mapping. */
- __flush_tlb_one(vaddr);
+ pg = phys_to_page(xenheap_phys_start);
+ xenheap_phys_start += PAGE_SIZE;
+ return pg;
}
-
-void __set_fixmap(enum fixed_addresses idx,
- l1_pgentry_t entry)
+void free_xen_pagetable(struct pfn_info *pg)
{
- unsigned long address = fix_to_virt(idx);
-
- if ( likely(idx < __end_of_fixed_addresses) )
- set_pte_phys(address, entry);
- else
- printk("Invalid __set_fixmap\n");
+ free_xenheap_page(page_to_virt(pg));
}
+l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
+{
+ return &idle_pg_table_l2[l2_linear_offset(v)];
+}
void __init paging_init(void)
{
void *ioremap_pt;
- unsigned long v, l2e;
+ unsigned long v;
struct pfn_info *pg;
+ int i, mapcache_order;
- /* Allocate and map the machine-to-phys table. */
- if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
- panic("Not enough memory to bootstrap Xen.\n");
- m2p_start_mfn = page_to_pfn(pg);
- idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
+#ifdef CONFIG_X86_PAE
+ printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
+#else
+ printk("PAE disabled.\n");
+#endif
+
+ idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
- /* Xen 4MB mappings can all be GLOBAL. */
if ( cpu_has_pge )
{
+ /* Suitable Xen mapping can be GLOBAL. */
+ PAGE_HYPERVISOR |= _PAGE_GLOBAL;
+ PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL;
+ /* Transform early mappings (e.g., the frametable). */
for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
- {
- l2e = l2_pgentry_val(idle_pg_table[v >> L2_PAGETABLE_SHIFT]);
- if ( l2e & _PAGE_PSE )
- l2e |= _PAGE_GLOBAL;
- idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
- }
+ if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
+ (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) )
+ l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)],
+ _PAGE_GLOBAL);
}
- /* Create page table for ioremap(). */
- ioremap_pt = (void *)alloc_xenheap_page();
- clear_page(ioremap_pt);
- idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
+ /*
+ * Allocate and map the machine-to-phys table and create read-only mapping
+ * of MPT for guest-OS use. Without PAE we'll end up with one 4MB page,
+ * with PAE we'll allocate 2MB pages depending on the amount of memory
+ * installed, but at least 4MB to cover 4GB address space. This is needed
+ * to make PCI I/O memory address lookups work in guests.
+ */
+ if ( (mpt_size = max_page * 4) < (4*1024*1024) )
+ mpt_size = 4*1024*1024;
+ for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+ {
+ if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+ panic("Not enough memory to bootstrap Xen.\n");
+ idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] =
+ l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE);
+ idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i] =
+ l2e_from_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
+ }
+ memset((void *)RDWR_MPT_VIRT_START, 0x55, mpt_size);
- /* Create read-only mapping of MPT for guest-OS use. */
- idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(l2_pgentry_val(
- idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) &
- ~_PAGE_RW);
+ /* Create page tables for ioremap(). */
+ for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+ {
+ ioremap_pt = alloc_xenheap_page();
+ clear_page(ioremap_pt);
+ idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
+ }
/* Set up mapping cache for domain pages. */
- mapcache = (unsigned long *)alloc_xenheap_page();
- clear_page(mapcache);
- idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
-
- /* Set up linear page table mapping. */
- idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
+ mapcache_order = get_order(MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
+ mapcache = alloc_xenheap_pages(mapcache_order);
+ memset(mapcache, 0, PAGE_SIZE << mapcache_order);
+ for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
+ idle_pg_table_l2[l2_linear_offset(MAPCACHE_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(mapcache) + i, __PAGE_HYPERVISOR);
}
-void __init zap_low_mappings(void)
+void __init zap_low_mappings(l2_pgentry_t *base)
{
int i;
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] = mk_l2_pgentry(0);
+ u32 addr;
+
+ for (i = 0; ; i++) {
+ addr = (i << L2_PAGETABLE_SHIFT);
+ if (addr >= HYPERVISOR_VIRT_START)
+ break;
+ if (l2e_get_paddr(base[i]) != addr)
+ continue;
+ base[i] = l2e_empty();
+ }
flush_tlb_all_pge();
}
-
-/*
- * Allows shooting down of borrowed page-table use on specific CPUs.
- * Specifically, we borrow page tables when running the idle domain.
- */
-static void __synchronise_pagetables(void *mask)
+void subarch_init_memory(struct domain *dom_xen)
{
- struct domain *d = current;
- if ( ((unsigned long)mask & (1<<d->processor)) && is_idle_task(d) )
- write_ptbase(&d->mm);
-}
-void synchronise_pagetables(unsigned long cpu_mask)
-{
- __synchronise_pagetables((void *)cpu_mask);
- smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
+ unsigned long m2p_start_mfn;
+ int i;
+
+ /*
+ * We are rather picky about the layout of 'struct pfn_info'. The
+ * count_info and domain fields must be adjacent, as we perform atomic
+ * 64-bit operations on them. Also, just for sanity, we assert the size
+ * of the structure here.
+ */
+ if ( (offsetof(struct pfn_info, u.inuse._domain) !=
+ (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
+ (sizeof(struct pfn_info) != 24) )
+ {
+ printk("Weird pfn_info layout (%ld,%ld,%d)\n",
+ offsetof(struct pfn_info, count_info),
+ offsetof(struct pfn_info, u.inuse._domain),
+ sizeof(struct pfn_info));
+ BUG();
+ }
+
+ /* M2P table is mappable read-only by privileged domains. */
+ for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
+ {
+ m2p_start_mfn = l2e_get_pfn(
+ idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
+ {
+ frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ /* Ensure it's only mapped read-only by domains. */
+ frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
+ page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
+ }
+ }
}
+
long do_stack_switch(unsigned long ss, unsigned long esp)
{
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
- /* We need to do this check as we load and use SS on guest's behalf. */
- if ( (ss & 3) == 0 )
+ if ( (ss & 3) != 1 )
return -EPERM;
- current->thread.guestos_ss = ss;
- current->thread.guestos_sp = esp;
+ current->arch.guest_context.kernel_ss = ss;
+ current->arch.guest_context.kernel_sp = esp;
t->ss1 = ss;
t->esp1 = esp;
return 0;
}
-
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(unsigned long *d)
+int check_descriptor(struct desc_struct *d)
{
- unsigned long base, limit, a = d[0], b = d[1];
+ unsigned long base, limit;
+ u32 a = d->a, b = d->b;
/* A not-present descriptor will always fault, so is safe. */
if ( !(b & _SEGMENT_P) )
@@ -159,7 +211,7 @@ int check_descriptor(unsigned long *d)
/*
* We don't allow a DPL of zero. There is no legitimate reason for
* specifying DPL==0, and it gets rather dangerous if we also accept call
- * gates (consider a call gate pointing at another guestos descriptor with
+ * gates (consider a call gate pointing at another kernel descriptor with
* DPL 0 -- this would get the OS ring-0 privileges).
*/
if ( (b & _SEGMENT_DPL) == 0 )
@@ -197,7 +249,7 @@ int check_descriptor(unsigned long *d)
/* Check that base is at least a page away from Xen-private area. */
base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
- if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
+ if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) )
goto bad;
/* Check and truncate the limit if necessary. */
@@ -209,6 +261,7 @@ int check_descriptor(unsigned long *d)
if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
{
/*
+ * DATA, GROWS-DOWN.
* Grows-down limit check.
* NB. limit == 0xFFFFF provides no access (if G=1).
* limit == 0x00000 provides 4GB-4kB access (if G=1).
@@ -222,20 +275,22 @@ int check_descriptor(unsigned long *d)
else
{
/*
+ * DATA, GROWS-UP.
+ * CODE (CONFORMING AND NON-CONFORMING).
* Grows-up limit check.
* NB. limit == 0xFFFFF provides 4GB access (if G=1).
* limit == 0x00000 provides 4kB access (if G=1).
*/
if ( ((base + limit) <= base) ||
- ((base + limit) > PAGE_OFFSET) )
+ ((base + limit) > GUEST_SEGMENT_MAX_ADDR) )
{
- limit = PAGE_OFFSET - base;
+ limit = GUEST_SEGMENT_MAX_ADDR - base;
truncate:
if ( !(b & _SEGMENT_G) )
goto bad; /* too dangerous; too hard to work out... */
limit = (limit >> 12) - 1;
- d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
- d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
+ d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
+ d->b &= ~0xf0000; d->b |= limit & 0xf0000;
}
}
@@ -245,231 +300,17 @@ int check_descriptor(unsigned long *d)
return 0;
}
-
-void destroy_gdt(struct domain *d)
-{
- int i;
- unsigned long pfn;
-
- for ( i = 0; i < 16; i++ )
- {
- if ( (pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i])) != 0 )
- put_page_and_type(&frame_table[pfn]);
- d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
- }
-}
-
-
-long set_gdt(struct domain *d,
- unsigned long *frames,
- unsigned int entries)
-{
- /* NB. There are 512 8-byte entries per GDT page. */
- int i = 0, nr_pages = (entries + 511) / 512;
- struct desc_struct *vgdt;
- unsigned long pfn;
-
- /* Check the first page in the new GDT. */
- if ( (pfn = frames[0]) >= max_page )
- goto fail;
-
- /* The first page is special because Xen owns a range of entries in it. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- {
- /* GDT checks failed: try zapping the Xen reserved entries. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
- goto fail;
- vgdt = map_domain_mem(pfn << PAGE_SHIFT);
- memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
- NR_RESERVED_GDT_ENTRIES*8);
- unmap_domain_mem(vgdt);
- put_page_and_type(&frame_table[pfn]);
-
- /* Okay, we zapped the entries. Now try the GDT checks again. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- goto fail;
- }
-
- /* Check the remaining pages in the new GDT. */
- for ( i = 1; i < nr_pages; i++ )
- if ( ((pfn = frames[i]) >= max_page) ||
- !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- goto fail;
-
- /* Copy reserved GDT entries to the new GDT. */
- vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
- memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
- gdt_table + FIRST_RESERVED_GDT_ENTRY,
- NR_RESERVED_GDT_ENTRIES*8);
- unmap_domain_mem(vgdt);
-
- /* Tear down the old GDT. */
- destroy_gdt(d);
-
- /* Install the new GDT. */
- for ( i = 0; i < nr_pages; i++ )
- d->mm.perdomain_pt[i] =
- mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
-
- SET_GDT_ADDRESS(d, GDT_VIRT_START);
- SET_GDT_ENTRIES(d, entries);
-
- return 0;
-
- fail:
- while ( i-- > 0 )
- put_page_and_type(&frame_table[frames[i]]);
- return -EINVAL;
-}
-
-
-long do_set_gdt(unsigned long *frame_list, unsigned int entries)
+void memguard_guard_stack(void *p)
{
- int nr_pages = (entries + 511) / 512;
- unsigned long frames[16];
- long ret;
-
- if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
- return -EINVAL;
-
- if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
- return -EFAULT;
-
- if ( (ret = set_gdt(current, frames, entries)) == 0 )
- {
- local_flush_tlb();
- __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
- }
-
- return ret;
+ memguard_guard_range(p, PAGE_SIZE);
}
-
-long do_update_descriptor(
- unsigned long pa, unsigned long word1, unsigned long word2)
-{
- unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
- struct pfn_info *page;
- long ret = -EINVAL;
-
- d[0] = word1;
- d[1] = word2;
-
- if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
- return -EINVAL;
-
- page = &frame_table[pfn];
- if ( unlikely(!get_page(page, current)) )
- return -EINVAL;
-
- /* Check if the given frame is in use in an unsafe context. */
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- case PGT_gdt_page:
- /* Disallow updates of Xen-reserved descriptors in the current GDT. */
- if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
- (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
- (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
- goto out;
- if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
- goto out;
- break;
- case PGT_ldt_page:
- if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
- goto out;
- break;
- default:
- if ( unlikely(!get_page_type(page, PGT_writable_page)) )
- goto out;
- break;
- }
-
- /* All is good so make the update. */
- gdt_pent = map_domain_mem(pa);
- memcpy(gdt_pent, d, 8);
- unmap_domain_mem(gdt_pent);
-
- put_page_type(page);
-
- ret = 0; /* success */
-
- out:
- put_page(page);
- return ret;
-}
-
-#ifdef MEMORY_GUARD
-
-void *memguard_init(void *heap_start)
-{
- l1_pgentry_t *l1;
- int i, j;
-
- /* Round the allocation pointer up to a page boundary. */
- heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
- PAGE_MASK);
-
- /* Memory guarding is incompatible with super pages. */
- for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
- {
- l1 = (l1_pgentry_t *)heap_start;
- heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
- for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
- l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
- (j << L1_PAGETABLE_SHIFT) |
- __PAGE_HYPERVISOR);
- idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
- mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
- }
-
- return heap_start;
-}
-
-static void __memguard_change_range(void *p, unsigned long l, int guard)
-{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- unsigned long _l = (unsigned long)l;
-
- /* Ensure we are dealing with a page-aligned whole number of pages. */
- ASSERT((_p&PAGE_MASK) != 0);
- ASSERT((_l&PAGE_MASK) != 0);
- ASSERT((_p&~PAGE_MASK) == 0);
- ASSERT((_l&~PAGE_MASK) == 0);
-
- while ( _l != 0 )
- {
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- if ( guard )
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
- else
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
- _p += PAGE_SIZE;
- _l -= PAGE_SIZE;
- }
-}
-
-void memguard_guard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 1);
- local_flush_tlb();
-}
-
-void memguard_unguard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 0);
-}
-
-int memguard_is_guarded(void *p)
-{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
-}
-
-#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/x86_32/seg_fixup.c b/xen/arch/x86/x86_32/seg_fixup.c
index 8a04f507d5..d7d318551c 100644
--- a/xen/arch/x86/x86_32/seg_fixup.c
+++ b/xen/arch/x86/x86_32/seg_fixup.c
@@ -27,7 +27,10 @@
#include <xen/errno.h>
#include <xen/mm.h>
#include <xen/perfc.h>
+#include <asm/current.h>
#include <asm/processor.h>
+#include <asm/regs.h>
+#include <asm/x86_emulate.h>
/* Make the scary benign errors go away. */
#undef DPRINTK
@@ -105,7 +108,7 @@ static unsigned char insn_decode[256] = {
*/
int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
{
- struct domain *d = current;
+ struct vcpu *d = current;
unsigned long *table, a, b;
int ldt = !!(seg & 4);
int idx = (seg >> 3) & 8191;
@@ -113,14 +116,14 @@ int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START;
- if ( idx >= d->mm.ldt_ents )
+ table = (unsigned long *)LDT_VIRT_START(d);
+ if ( idx >= d->arch.guest_context.ldt_ents )
goto fail;
}
else /* gdt */
{
- table = (unsigned long *)GET_GDT_ADDRESS(d);
- if ( idx >= GET_GDT_ENTRIES(d) )
+ table = (unsigned long *)GDT_VIRT_START(d);
+ if ( idx >= d->arch.guest_context.gdt_ents )
goto fail;
}
@@ -144,7 +147,7 @@ int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
* Anything that looks like a truncated segment we assume ought really
* to be a 4GB segment. DANGER!
*/
- if ( (PAGE_OFFSET - (*base + *limit)) < PAGE_SIZE )
+ if ( (GUEST_SEGMENT_MAX_ADDR - (*base + *limit)) < PAGE_SIZE )
*limit = 0;
return 1;
@@ -171,7 +174,7 @@ int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
int fixup_seg(u16 seg, unsigned long offset)
{
- struct domain *d = current;
+ struct vcpu *d = current;
unsigned long *table, a, b, base, limit;
int ldt = !!(seg & 4);
int idx = (seg >> 3) & 8191;
@@ -179,21 +182,21 @@ int fixup_seg(u16 seg, unsigned long offset)
/* Get base and check limit. */
if ( ldt )
{
- table = (unsigned long *)LDT_VIRT_START;
- if ( idx >= d->mm.ldt_ents )
+ table = (unsigned long *)LDT_VIRT_START(d);
+ if ( idx >= d->arch.guest_context.ldt_ents )
{
DPRINTK("Segment %04x out of LDT range (%ld)\n",
- seg, d->mm.ldt_ents);
+ seg, d->arch.guest_context.ldt_ents);
goto fail;
}
}
else /* gdt */
{
- table = (unsigned long *)GET_GDT_ADDRESS(d);
- if ( idx >= GET_GDT_ENTRIES(d) )
+ table = (unsigned long *)GDT_VIRT_START(d);
+ if ( idx >= d->arch.guest_context.gdt_ents )
{
- DPRINTK("Segment %04x out of GDT range (%d)\n",
- seg, GET_GDT_ENTRIES(d));
+ DPRINTK("Segment %04x out of GDT range (%ld)\n",
+ seg, d->arch.guest_context.gdt_ents);
goto fail;
}
}
@@ -225,14 +228,14 @@ int fixup_seg(u16 seg, unsigned long offset)
if ( ((base + limit) < PAGE_SIZE) && (offset <= limit) )
{
/* Flip to expands-up. */
- limit = PAGE_OFFSET - base;
+ limit = GUEST_SEGMENT_MAX_ADDR - base;
goto flip;
}
}
else
{
/* Expands-up: All the way to Xen space? Assume 4GB if so. */
- if ( ((PAGE_OFFSET - (base + limit)) < PAGE_SIZE) &&
+ if ( ((GUEST_SEGMENT_MAX_ADDR - (base + limit)) < PAGE_SIZE) &&
(offset > limit) )
{
/* Flip to expands-down. */
@@ -258,31 +261,13 @@ int fixup_seg(u16 seg, unsigned long offset)
return 1;
}
-/* Decode Reg field of a ModRM byte: return a pointer into a register block. */
-void *decode_reg(struct xen_regs *regs, u8 b)
-{
- switch ( b & 7 )
- {
- case 0: return &regs->eax;
- case 1: return &regs->ecx;
- case 2: return &regs->edx;
- case 3: return &regs->ebx;
- case 4: return &regs->esp;
- case 5: return &regs->ebp;
- case 6: return &regs->esi;
- case 7: return &regs->edi;
- }
-
- return NULL;
-}
-
/*
* Called from the general-protection fault handler to attempt to decode
* and emulate an instruction that depends on 4GB segments.
*/
-int gpf_emulate_4gb(struct xen_regs *regs)
+int gpf_emulate_4gb(struct cpu_user_regs *regs)
{
- struct domain *d = current;
+ struct vcpu *d = current;
trap_info_t *ti;
struct trap_bounce *tb;
u8 modrm, mod, reg, rm, decode;
@@ -399,8 +384,8 @@ int gpf_emulate_4gb(struct xen_regs *regs)
}
/* Decode Reg and R/M fields. */
- regreg = decode_reg(regs, reg);
- memreg = decode_reg(regs, rm);
+ regreg = decode_register(reg, regs, 0);
+ memreg = decode_register(rm, regs, 0);
/* Decode Mod field. */
switch ( modrm >> 6 )
@@ -455,16 +440,16 @@ int gpf_emulate_4gb(struct xen_regs *regs)
perfc_incrc(seg_fixups);
/* If requested, give a callback on otherwise unused vector 15. */
- if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments_notify) )
+ if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) )
{
- ti = &d->thread.traps[15];
- tb = &d->thread.trap_bounce;
+ ti = &d->arch.guest_context.trap_ctxt[15];
+ tb = &d->arch.trap_bounce;
tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
tb->error_code = pb - eip;
tb->cs = ti->cs;
tb->eip = ti->address;
if ( TI_GET_IF(ti) )
- d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
+ tb->flags |= TBF_INTERRUPT;
}
return EXCRET_fault_fixed;
@@ -482,3 +467,13 @@ int gpf_emulate_4gb(struct xen_regs *regs)
propagate_page_fault((unsigned long)pb, 4);
return EXCRET_fault_fixed;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c
new file mode 100644
index 0000000000..29bb56a0b2
--- /dev/null
+++ b/xen/arch/x86/x86_32/traps.c
@@ -0,0 +1,235 @@
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/sched.h>
+#include <xen/lib.h>
+#include <xen/console.h>
+#include <xen/mm.h>
+#include <xen/irq.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/vmx.h>
+
+/* All CPUs have their own IDT to allow int80 direct trap. */
+idt_entry_t *idt_tables[NR_CPUS] = { 0 };
+
+void show_registers(struct cpu_user_regs *regs)
+{
+ unsigned long ss, ds, es, fs, gs, cs;
+ unsigned long eip, esp, eflags, cr0, cr3;
+ const char *context;
+
+ if ( VMX_DOMAIN(current) && (regs->eflags == 0) )
+ {
+ __vmread(GUEST_RIP, &eip);
+ __vmread(GUEST_RSP, &esp);
+ __vmread(GUEST_RFLAGS, &eflags);
+ __vmread(GUEST_SS_SELECTOR, &ss);
+ __vmread(GUEST_DS_SELECTOR, &ds);
+ __vmread(GUEST_ES_SELECTOR, &es);
+ __vmread(GUEST_FS_SELECTOR, &fs);
+ __vmread(GUEST_GS_SELECTOR, &gs);
+ __vmread(GUEST_CS_SELECTOR, &cs);
+ __vmread(CR0_READ_SHADOW, &cr0);
+ __vmread(GUEST_CR3, &cr3);
+ context = "vmx guest";
+ }
+ else
+ {
+ eip = regs->eip;
+ eflags = regs->eflags;
+ cr0 = read_cr0();
+ cr3 = read_cr3();
+
+ __asm__ ( "movl %%fs,%0 ; movl %%gs,%1" : "=r" (fs), "=r" (gs) );
+
+ if ( GUEST_MODE(regs) )
+ {
+ esp = regs->esp;
+ ss = regs->ss & 0xffff;
+ ds = regs->ds & 0xffff;
+ es = regs->es & 0xffff;
+ cs = regs->cs & 0xffff;
+ context = "guest";
+ }
+ else
+ {
+ esp = (unsigned long)&regs->esp;
+ ss = __HYPERVISOR_DS;
+ ds = __HYPERVISOR_DS;
+ es = __HYPERVISOR_DS;
+ cs = __HYPERVISOR_CS;
+ context = "hypervisor";
+ }
+ }
+
+ printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx "
+ "CONTEXT: %s\n",
+ smp_processor_id(), (unsigned long)0xffff & regs->cs,
+ eip, eflags, context);
+ printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("cr0: %08lx cr3: %08lx\n", cr0, cr3);
+ printk("ds: %04lx es: %04lx fs: %04lx gs: %04lx "
+ "ss: %04lx cs: %04lx\n",
+ ds, es, fs, gs, ss, cs);
+
+ if ( GUEST_MODE(regs) )
+ show_guest_stack();
+ else
+ show_stack((unsigned long *)&regs->esp);
+}
+
+void show_page_walk(unsigned long addr)
+{
+ l2_pgentry_t pmd;
+ l1_pgentry_t *pte;
+
+ if ( addr < PAGE_OFFSET )
+ return;
+
+ printk("Pagetable walk from %08lx:\n", addr);
+
+ pmd = idle_pg_table_l2[l2_linear_offset(addr)];
+ printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd),
+ (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
+ if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
+ (l2e_get_flags(pmd) & _PAGE_PSE) )
+ return;
+
+ pte = __va(l2e_get_paddr(pmd));
+ pte += l1_table_offset(addr);
+ printk(" L1 = %"PRIpte"\n", l1e_get_intpte(*pte));
+}
+
+#define DOUBLEFAULT_STACK_SIZE 1024
+static struct tss_struct doublefault_tss;
+static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+
+asmlinkage void do_double_fault(void)
+{
+ struct tss_struct *tss = &doublefault_tss;
+ unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+
+ watchdog_disable();
+
+ console_force_unlock();
+
+ /* Find information saved during fault and dump it to the console. */
+ tss = &init_tss[cpu];
+ printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
+ cpu, tss->cs, tss->eip, tss->eflags);
+ printk("CR3: %08x\n", tss->__cr3);
+ printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
+ tss->eax, tss->ebx, tss->ecx, tss->edx);
+ printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
+ tss->esi, tss->edi, tss->ebp, tss->esp);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
+ printk("************************************\n");
+ printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
+ printk("System needs manual reset.\n");
+ printk("************************************\n");
+
+ /* Lock up the console to prevent spurious output from other CPUs. */
+ console_force_lock();
+
+ /* Wait for manual reset. */
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+}
+
+BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
+asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
+{
+ asmlinkage void do_nmi(struct cpu_user_regs *, unsigned long);
+ ack_APIC_irq();
+ do_nmi(&regs, 0);
+}
+
+void __init percpu_traps_init(void)
+{
+ asmlinkage int hypercall(void);
+
+ if ( smp_processor_id() != 0 )
+ return;
+
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
+
+ /* The hypercall entry vector is only accessible from ring 1. */
+ _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
+
+ set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
+
+ /*
+ * Make a separate task for double faults. This will get us debug output if
+ * we blow the kernel stack.
+ */
+ struct tss_struct *tss = &doublefault_tss;
+ memset(tss, 0, sizeof(*tss));
+ tss->ds = __HYPERVISOR_DS;
+ tss->es = __HYPERVISOR_DS;
+ tss->ss = __HYPERVISOR_DS;
+ tss->esp = (unsigned long)
+ &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+ tss->__cr3 = __pa(idle_pg_table);
+ tss->cs = __HYPERVISOR_CS;
+ tss->eip = (unsigned long)do_double_fault;
+ tss->eflags = 2;
+ tss->bitmap = IOBMP_INVALID_OFFSET;
+ _set_tssldt_desc(
+ gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)tss, 235, 9);
+
+ set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
+}
+
+void init_int80_direct_trap(struct vcpu *v)
+{
+ trap_info_t *ti = &v->arch.guest_context.trap_ctxt[0x80];
+
+ /*
+ * We can't virtualise interrupt gates, as there's no way to get
+ * the CPU to automatically clear the events_mask variable.
+ */
+ if ( TI_GET_IF(ti) )
+ return;
+
+ v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
+ v->arch.int80_desc.b =
+ (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13);
+
+ if ( v == current )
+ set_int80_direct_trap(v);
+}
+
+long do_set_callbacks(unsigned long event_selector,
+ unsigned long event_address,
+ unsigned long failsafe_selector,
+ unsigned long failsafe_address)
+{
+ struct vcpu *d = current;
+
+ if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
+ return -EPERM;
+
+ d->arch.guest_context.event_callback_cs = event_selector;
+ d->arch.guest_context.event_callback_eip = event_address;
+ d->arch.guest_context.failsafe_callback_cs = failsafe_selector;
+ d->arch.guest_context.failsafe_callback_eip = failsafe_address;
+
+ return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/x86_32/usercopy.c b/xen/arch/x86/x86_32/usercopy.c
deleted file mode 100644
index df30b4849c..0000000000
--- a/xen/arch/x86/x86_32/usercopy.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * User address space access functions.
- * The non inlined parts of asm-i386/uaccess.h are here.
- *
- * Copyright 1997 Andi Kleen <ak@muc.de>
- * Copyright 1997 Linus Torvalds
- */
-#include <xen/config.h>
-#include <xen/mm.h>
-#include <asm/uaccess.h>
-
-#define might_sleep() ((void)0)
-
-static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
-{
-#ifdef CONFIG_X86_INTEL_USERCOPY
- if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
- return 0;
-#endif
- return 1;
-}
-#define movsl_is_ok(a1,a2,n) \
- __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-#define __do_strncpy_from_user(dst,src,count,res) \
-do { \
- int __d0, __d1, __d2; \
- __asm__ __volatile__( \
- " testl %1,%1\n" \
- " jz 2f\n" \
- "0: lodsb\n" \
- " stosb\n" \
- " testb %%al,%%al\n" \
- " jz 1f\n" \
- " decl %1\n" \
- " jnz 0b\n" \
- "1: subl %1,%0\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %5,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- ".previous" \
- : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
- "=&D" (__d2) \
- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
- : "memory"); \
-} while (0)
-
-/**
- * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst: Destination address, in kernel space. This buffer must be at
- * least @count bytes long.
- * @src: Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-long
-__strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res;
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst: Destination address, in kernel space. This buffer must be at
- * least @count bytes long.
- * @src: Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-long
-strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-
-/*
- * Zero Userspace
- */
-
-#define __do_clear_user(addr,size) \
-do { \
- int __d0; \
- __asm__ __volatile__( \
- "0: rep; stosl\n" \
- " movl %2,%0\n" \
- "1: rep; stosb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: lea 0(%2,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0) \
- : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
-} while (0)
-
-/**
- * clear_user: - Zero a block of memory in user space.
- * @to: Destination address, in user space.
- * @n: Number of bytes to zero.
- *
- * Zero a block of memory in user space.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-unsigned long
-clear_user(void __user *to, unsigned long n)
-{
- might_sleep();
- if (access_ok(VERIFY_WRITE, to, n))
- __do_clear_user(to, n);
- return n;
-}
-
-/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
- * @to: Destination address, in user space.
- * @n: Number of bytes to zero.
- *
- * Zero a block of memory in user space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-unsigned long
-__clear_user(void __user *to, unsigned long n)
-{
- __do_clear_user(to, n);
- return n;
-}
-
-/**
- * strlen_user: - Get the size of a string in user space.
- * @s: The string to measure.
- * @n: The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-long strnlen_user(const char __user *s, long n)
-{
- unsigned long mask = -__addr_ok(s);
- unsigned long res, tmp;
-
- might_sleep();
-
- __asm__ __volatile__(
- " testl %0, %0\n"
- " jz 3f\n"
- " andl %0,%%ecx\n"
- "0: repne; scasb\n"
- " setne %%al\n"
- " subl %%ecx,%0\n"
- " addl %0,%%eax\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: xorl %%eax,%%eax\n"
- " jmp 1b\n"
- "3: movb $1,%%al\n"
- " jmp 1b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,2b\n"
- ".previous"
- :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
- :"0" (n), "1" (s), "2" (0), "3" (mask)
- :"cc");
- return res & mask;
-}
-
-#ifdef CONFIG_X86_INTEL_USERCOPY
-static unsigned long
-__copy_user_intel(void __user *to, const void *from, unsigned long size)
-{
- int d0, d1;
- __asm__ __volatile__(
- " .align 2,0x90\n"
- "1: movl 32(%4), %%eax\n"
- " cmpl $67, %0\n"
- " jbe 3f\n"
- "2: movl 64(%4), %%eax\n"
- " .align 2,0x90\n"
- "3: movl 0(%4), %%eax\n"
- "4: movl 4(%4), %%edx\n"
- "5: movl %%eax, 0(%3)\n"
- "6: movl %%edx, 4(%3)\n"
- "7: movl 8(%4), %%eax\n"
- "8: movl 12(%4),%%edx\n"
- "9: movl %%eax, 8(%3)\n"
- "10: movl %%edx, 12(%3)\n"
- "11: movl 16(%4), %%eax\n"
- "12: movl 20(%4), %%edx\n"
- "13: movl %%eax, 16(%3)\n"
- "14: movl %%edx, 20(%3)\n"
- "15: movl 24(%4), %%eax\n"
- "16: movl 28(%4), %%edx\n"
- "17: movl %%eax, 24(%3)\n"
- "18: movl %%edx, 28(%3)\n"
- "19: movl 32(%4), %%eax\n"
- "20: movl 36(%4), %%edx\n"
- "21: movl %%eax, 32(%3)\n"
- "22: movl %%edx, 36(%3)\n"
- "23: movl 40(%4), %%eax\n"
- "24: movl 44(%4), %%edx\n"
- "25: movl %%eax, 40(%3)\n"
- "26: movl %%edx, 44(%3)\n"
- "27: movl 48(%4), %%eax\n"
- "28: movl 52(%4), %%edx\n"
- "29: movl %%eax, 48(%3)\n"
- "30: movl %%edx, 52(%3)\n"
- "31: movl 56(%4), %%eax\n"
- "32: movl 60(%4), %%edx\n"
- "33: movl %%eax, 56(%3)\n"
- "34: movl %%edx, 60(%3)\n"
- " addl $-64, %0\n"
- " addl $64, %4\n"
- " addl $64, %3\n"
- " cmpl $63, %0\n"
- " ja 1b\n"
- "35: movl %0, %%eax\n"
- " shrl $2, %0\n"
- " andl $3, %%eax\n"
- " cld\n"
- "99: rep; movsl\n"
- "36: movl %%eax, %0\n"
- "37: rep; movsb\n"
- "100:\n"
- ".section .fixup,\"ax\"\n"
- "101: lea 0(%%eax,%0,4),%0\n"
- " jmp 100b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,100b\n"
- " .long 2b,100b\n"
- " .long 3b,100b\n"
- " .long 4b,100b\n"
- " .long 5b,100b\n"
- " .long 6b,100b\n"
- " .long 7b,100b\n"
- " .long 8b,100b\n"
- " .long 9b,100b\n"
- " .long 10b,100b\n"
- " .long 11b,100b\n"
- " .long 12b,100b\n"
- " .long 13b,100b\n"
- " .long 14b,100b\n"
- " .long 15b,100b\n"
- " .long 16b,100b\n"
- " .long 17b,100b\n"
- " .long 18b,100b\n"
- " .long 19b,100b\n"
- " .long 20b,100b\n"
- " .long 21b,100b\n"
- " .long 22b,100b\n"
- " .long 23b,100b\n"
- " .long 24b,100b\n"
- " .long 25b,100b\n"
- " .long 26b,100b\n"
- " .long 27b,100b\n"
- " .long 28b,100b\n"
- " .long 29b,100b\n"
- " .long 30b,100b\n"
- " .long 31b,100b\n"
- " .long 32b,100b\n"
- " .long 33b,100b\n"
- " .long 34b,100b\n"
- " .long 35b,100b\n"
- " .long 36b,100b\n"
- " .long 37b,100b\n"
- " .long 99b,101b\n"
- ".previous"
- : "=&c"(size), "=&D" (d0), "=&S" (d1)
- : "1"(to), "2"(from), "0"(size)
- : "eax", "edx", "memory");
- return size;
-}
-
-static unsigned long
-__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
-{
- int d0, d1;
- __asm__ __volatile__(
- " .align 2,0x90\n"
- "0: movl 32(%4), %%eax\n"
- " cmpl $67, %0\n"
- " jbe 2f\n"
- "1: movl 64(%4), %%eax\n"
- " .align 2,0x90\n"
- "2: movl 0(%4), %%eax\n"
- "21: movl 4(%4), %%edx\n"
- " movl %%eax, 0(%3)\n"
- " movl %%edx, 4(%3)\n"
- "3: movl 8(%4), %%eax\n"
- "31: movl 12(%4),%%edx\n"
- " movl %%eax, 8(%3)\n"
- " movl %%edx, 12(%3)\n"
- "4: movl 16(%4), %%eax\n"
- "41: movl 20(%4), %%edx\n"
- " movl %%eax, 16(%3)\n"
- " movl %%edx, 20(%3)\n"
- "10: movl 24(%4), %%eax\n"
- "51: movl 28(%4), %%edx\n"
- " movl %%eax, 24(%3)\n"
- " movl %%edx, 28(%3)\n"
- "11: movl 32(%4), %%eax\n"
- "61: movl 36(%4), %%edx\n"
- " movl %%eax, 32(%3)\n"
- " movl %%edx, 36(%3)\n"
- "12: movl 40(%4), %%eax\n"
- "71: movl 44(%4), %%edx\n"
- " movl %%eax, 40(%3)\n"
- " movl %%edx, 44(%3)\n"
- "13: movl 48(%4), %%eax\n"
- "81: movl 52(%4), %%edx\n"
- " movl %%eax, 48(%3)\n"
- " movl %%edx, 52(%3)\n"
- "14: movl 56(%4), %%eax\n"
- "91: movl 60(%4), %%edx\n"
- " movl %%eax, 56(%3)\n"
- " movl %%edx, 60(%3)\n"
- " addl $-64, %0\n"
- " addl $64, %4\n"
- " addl $64, %3\n"
- " cmpl $63, %0\n"
- " ja 0b\n"
- "5: movl %0, %%eax\n"
- " shrl $2, %0\n"
- " andl $3, %%eax\n"
- " cld\n"
- "6: rep; movsl\n"
- " movl %%eax,%0\n"
- "7: rep; movsb\n"
- "8:\n"
- ".section .fixup,\"ax\"\n"
- "9: lea 0(%%eax,%0,4),%0\n"
- "16: pushl %0\n"
- " pushl %%eax\n"
- " xorl %%eax,%%eax\n"
- " rep; stosb\n"
- " popl %%eax\n"
- " popl %0\n"
- " jmp 8b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,16b\n"
- " .long 1b,16b\n"
- " .long 2b,16b\n"
- " .long 21b,16b\n"
- " .long 3b,16b\n"
- " .long 31b,16b\n"
- " .long 4b,16b\n"
- " .long 41b,16b\n"
- " .long 10b,16b\n"
- " .long 51b,16b\n"
- " .long 11b,16b\n"
- " .long 61b,16b\n"
- " .long 12b,16b\n"
- " .long 71b,16b\n"
- " .long 13b,16b\n"
- " .long 81b,16b\n"
- " .long 14b,16b\n"
- " .long 91b,16b\n"
- " .long 6b,9b\n"
- " .long 7b,16b\n"
- ".previous"
- : "=&c"(size), "=&D" (d0), "=&S" (d1)
- : "1"(to), "2"(from), "0"(size)
- : "eax", "edx", "memory");
- return size;
-}
-#else
-/*
- * Leave these declared but undefined. They should not be any references to
- * them
- */
-unsigned long
-__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
-unsigned long
-__copy_user_intel(void __user *to, const void *from, unsigned long size);
-#endif /* CONFIG_X86_INTEL_USERCOPY */
-
-/* Generic arbitrary sized copy. */
-#define __copy_user(to,from,size) \
-do { \
- int __d0, __d1, __d2; \
- __asm__ __volatile__( \
- " cmp $7,%0\n" \
- " jbe 1f\n" \
- " movl %1,%0\n" \
- " negl %0\n" \
- " andl $7,%0\n" \
- " subl %0,%3\n" \
- "4: rep; movsb\n" \
- " movl %3,%0\n" \
- " shrl $2,%0\n" \
- " andl $3,%3\n" \
- " .align 2,0x90\n" \
- "0: rep; movsl\n" \
- " movl %3,%0\n" \
- "1: rep; movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "5: addl %3,%0\n" \
- " jmp 2b\n" \
- "3: lea 0(%3,%0,4),%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 4b,5b\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
- : "3"(size), "0"(size), "1"(to), "2"(from) \
- : "memory"); \
-} while (0)
-
-#define __copy_user_zeroing(to,from,size) \
-do { \
- int __d0, __d1, __d2; \
- __asm__ __volatile__( \
- " cmp $7,%0\n" \
- " jbe 1f\n" \
- " movl %1,%0\n" \
- " negl %0\n" \
- " andl $7,%0\n" \
- " subl %0,%3\n" \
- "4: rep; movsb\n" \
- " movl %3,%0\n" \
- " shrl $2,%0\n" \
- " andl $3,%3\n" \
- " .align 2,0x90\n" \
- "0: rep; movsl\n" \
- " movl %3,%0\n" \
- "1: rep; movsb\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "5: addl %3,%0\n" \
- " jmp 6f\n" \
- "3: lea 0(%3,%0,4),%0\n" \
- "6: pushl %0\n" \
- " pushl %%eax\n" \
- " xorl %%eax,%%eax\n" \
- " rep; stosb\n" \
- " popl %%eax\n" \
- " popl %0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 4b,5b\n" \
- " .long 0b,3b\n" \
- " .long 1b,6b\n" \
- ".previous" \
- : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
- : "3"(size), "0"(size), "1"(to), "2"(from) \
- : "memory"); \
-} while (0)
-
-
-unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
-{
- if (movsl_is_ok(to, from, n))
- __copy_user(to, from, n);
- else
- n = __copy_user_intel(to, from, n);
- return n;
-}
-
-unsigned long
-__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
-{
- if (movsl_is_ok(to, from, n))
- __copy_user_zeroing(to, from, n);
- else
- n = __copy_user_zeroing_intel(to, from, n);
- return n;
-}
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long
-copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- might_sleep();
- if (access_ok(VERIFY_WRITE, to, n))
- n = __copy_to_user(to, from, n);
- return n;
-}
-EXPORT_SYMBOL(copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to: Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long
-copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- might_sleep();
- if (access_ok(VERIFY_READ, from, n))
- n = __copy_from_user(to, from, n);
- else
- memset(to, 0, n);
- return n;
-}
-EXPORT_SYMBOL(copy_from_user);
diff --git a/xen/arch/x86/x86_32/xen.lds b/xen/arch/x86/x86_32/xen.lds
index 298c9fee4d..fec62dbf24 100644
--- a/xen/arch/x86/x86_32/xen.lds
+++ b/xen/arch/x86/x86_32/xen.lds
@@ -11,7 +11,7 @@ PHDRS
}
SECTIONS
{
- . = 0xFC400000 + 0x100000;
+ . = 0xFF000000 + 0x100000;
_text = .; /* Text and read-only data */
.text : {
*(.text)
@@ -25,12 +25,12 @@ SECTIONS
.rodata : { *(.rodata) *(.rodata.*) } :text
.kstrtab : { *(.kstrtab) } :text
- . = ALIGN(16); /* Exception table */
+ . = ALIGN(32); /* Exception table */
__start___ex_table = .;
__ex_table : { *(__ex_table) } :text
__stop___ex_table = .;
- . = ALIGN(16); /* Pre-exception table */
+ . = ALIGN(32); /* Pre-exception table */
__start___pre_ex_table = .;
__pre_ex_table : { *(__pre_ex_table) } :text
__stop___pre_ex_table = .;
@@ -57,7 +57,7 @@ SECTIONS
__init_begin = .;
.text.init : { *(.text.init) } :text
.data.init : { *(.data.init) } :text
- . = ALIGN(16);
+ . = ALIGN(32);
__setup_start = .;
.setup.init : { *(.setup.init) } :text
__setup_end = .;
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 2e6c3b396e..d19610a99f 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -4,7 +4,11 @@
* to extract and format the required data.
*/
+#include <xen/config.h>
+#include <xen/perfc.h>
#include <xen/sched.h>
+#include <asm/fixmap.h>
+#include <asm/hardirq.h>
#define DEFINE(_sym, _val) \
__asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
@@ -13,45 +17,58 @@
#define OFFSET(_sym, _str, _mem) \
DEFINE(_sym, offsetof(_str, _mem));
+/* base-2 logarithm */
+#define __L2(_x) (((_x) & 0x00000002) ? 1 : 0)
+#define __L4(_x) (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
+#define __L8(_x) (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
+#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
+#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
+
void __dummy__(void)
{
- OFFSET(XREGS_r15, struct xen_regs, r15);
- OFFSET(XREGS_r14, struct xen_regs, r14);
- OFFSET(XREGS_r13, struct xen_regs, r13);
- OFFSET(XREGS_r12, struct xen_regs, r12);
- OFFSET(XREGS_rbp, struct xen_regs, rbp);
- OFFSET(XREGS_rbx, struct xen_regs, rbx);
- OFFSET(XREGS_r11, struct xen_regs, r11);
- OFFSET(XREGS_r10, struct xen_regs, r10);
- OFFSET(XREGS_r9, struct xen_regs, r9);
- OFFSET(XREGS_r8, struct xen_regs, r8);
- OFFSET(XREGS_rax, struct xen_regs, rax);
- OFFSET(XREGS_rcx, struct xen_regs, rcx);
- OFFSET(XREGS_rdx, struct xen_regs, rdx);
- OFFSET(XREGS_rsi, struct xen_regs, rsi);
- OFFSET(XREGS_rdi, struct xen_regs, rdi);
- OFFSET(XREGS_orig_rax, struct xen_regs, orig_rax);
- OFFSET(XREGS_rip, struct xen_regs, rip);
- OFFSET(XREGS_cs, struct xen_regs, cs);
- OFFSET(XREGS_eflags, struct xen_regs, eflags);
- OFFSET(XREGS_rsp, struct xen_regs, rsp);
- OFFSET(XREGS_ss, struct xen_regs, ss);
+ OFFSET(UREGS_r15, struct cpu_user_regs, r15);
+ OFFSET(UREGS_r14, struct cpu_user_regs, r14);
+ OFFSET(UREGS_r13, struct cpu_user_regs, r13);
+ OFFSET(UREGS_r12, struct cpu_user_regs, r12);
+ OFFSET(UREGS_rbp, struct cpu_user_regs, rbp);
+ OFFSET(UREGS_rbx, struct cpu_user_regs, rbx);
+ OFFSET(UREGS_r11, struct cpu_user_regs, r11);
+ OFFSET(UREGS_r10, struct cpu_user_regs, r10);
+ OFFSET(UREGS_r9, struct cpu_user_regs, r9);
+ OFFSET(UREGS_r8, struct cpu_user_regs, r8);
+ OFFSET(UREGS_rax, struct cpu_user_regs, rax);
+ OFFSET(UREGS_rcx, struct cpu_user_regs, rcx);
+ OFFSET(UREGS_rdx, struct cpu_user_regs, rdx);
+ OFFSET(UREGS_rsi, struct cpu_user_regs, rsi);
+ OFFSET(UREGS_rdi, struct cpu_user_regs, rdi);
+ OFFSET(UREGS_error_code, struct cpu_user_regs, error_code);
+ OFFSET(UREGS_entry_vector, struct cpu_user_regs, entry_vector);
+ OFFSET(UREGS_rip, struct cpu_user_regs, rip);
+ OFFSET(UREGS_cs, struct cpu_user_regs, cs);
+ OFFSET(UREGS_eflags, struct cpu_user_regs, eflags);
+ OFFSET(UREGS_rsp, struct cpu_user_regs, rsp);
+ OFFSET(UREGS_ss, struct cpu_user_regs, ss);
+ OFFSET(UREGS_saved_upcall_mask, struct cpu_user_regs, saved_upcall_mask);
+ OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
+ DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
BLANK();
- OFFSET(DOMAIN_processor, struct domain, processor);
- OFFSET(DOMAIN_shared_info, struct domain, shared_info);
- OFFSET(DOMAIN_event_sel, struct domain, thread.event_selector);
- OFFSET(DOMAIN_event_addr, struct domain, thread.event_address);
- OFFSET(DOMAIN_failsafe_sel, struct domain, thread.failsafe_selector);
- OFFSET(DOMAIN_failsafe_addr, struct domain, thread.failsafe_address);
- OFFSET(DOMAIN_trap_bounce, struct domain, thread.trap_bounce);
- OFFSET(DOMAIN_thread_flags, struct domain, thread.flags);
+ OFFSET(VCPU_processor, struct vcpu, processor);
+ OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
+ OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce);
+ OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
+ OFFSET(VCPU_event_addr, struct vcpu,
+ arch.guest_context.event_callback_eip);
+ OFFSET(VCPU_failsafe_addr, struct vcpu,
+ arch.guest_context.failsafe_callback_eip);
+ OFFSET(VCPU_syscall_addr, struct vcpu,
+ arch.guest_context.syscall_callback_eip);
+ OFFSET(VCPU_kernel_sp, struct vcpu,
+ arch.guest_context.kernel_sp);
BLANK();
- OFFSET(SHINFO_upcall_pending, shared_info_t,
- vcpu_data[0].evtchn_upcall_pending);
- OFFSET(SHINFO_upcall_mask, shared_info_t,
- vcpu_data[0].evtchn_upcall_mask);
+ OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
+ OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
BLANK();
OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
@@ -61,11 +78,22 @@ void __dummy__(void)
OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip);
BLANK();
+#if PERF_COUNTERS
+ OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
+ OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
+ BLANK();
+#endif
+
OFFSET(MULTICALL_op, multicall_entry_t, op);
OFFSET(MULTICALL_arg0, multicall_entry_t, args[0]);
OFFSET(MULTICALL_arg1, multicall_entry_t, args[1]);
OFFSET(MULTICALL_arg2, multicall_entry_t, args[2]);
OFFSET(MULTICALL_arg3, multicall_entry_t, args[3]);
OFFSET(MULTICALL_arg4, multicall_entry_t, args[4]);
- OFFSET(MULTICALL_result, multicall_entry_t, args[5]);
+ OFFSET(MULTICALL_arg5, multicall_entry_t, args[5]);
+ OFFSET(MULTICALL_arg6, multicall_entry_t, args[6]);
+ OFFSET(MULTICALL_result, multicall_entry_t, result);
+ BLANK();
+
+ DEFINE(IRQSTAT_shift, LOG_2(sizeof(irq_cpustat_t)));
}
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index e69de29bb2..b0f3ffb0fb 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -0,0 +1,592 @@
+/*
+ * Hypercall and fault low-level handling routines.
+ *
+ * Copyright (c) 2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg) \
+ movq $STACK_SIZE-8, reg; \
+ orq %rsp, reg; \
+ andq $~7,reg; \
+ movq (reg),reg;
+
+ ALIGN
+/* %rbx: struct vcpu */
+switch_to_kernel:
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ movq VCPU_syscall_addr(%rbx),%rax
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw $0,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ jmp test_all_events
+
+/* %rbx: struct vcpu, interrupts disabled */
+restore_all_guest:
+ RESTORE_ALL
+ testw $TRAP_syscall,4(%rsp)
+ jz iret_exit_to_guest
+
+ addq $8,%rsp
+ popq %rcx # RIP
+ popq %r11 # CS
+ cmpw $__GUEST_CS32,%r11
+ popq %r11 # RFLAGS
+ popq %rsp # RSP
+ je 1f
+ sysretq
+1: sysretl
+
+ ALIGN
+/* No special register assumptions. */
+iret_exit_to_guest:
+ addq $8,%rsp
+FLT1: iretq
+
+.section .fixup,"ax"
+FIX1: popq -15*8-8(%rsp) # error_code/entry_vector
+ SAVE_ALL # 15*8 bytes pushed
+ movq -8(%rsp),%rsi # error_code/entry_vector
+ sti # after stack abuse (-1024(%rsp))
+ pushq $__HYPERVISOR_DS # SS
+ leaq 8(%rsp),%rax
+ pushq %rax # RSP
+ pushf # RFLAGS
+ pushq $__HYPERVISOR_CS # CS
+ leaq DBLFLT1(%rip),%rax
+ pushq %rax # RIP
+ pushq %rsi # error_code/entry_vector
+ jmp error_code
+DBLFLT1:GET_CURRENT(%rbx)
+ jmp test_all_events
+failsafe_callback:
+ GET_CURRENT(%rbx)
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ movq VCPU_failsafe_addr(%rbx),%rax
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ jmp test_all_events
+.previous
+.section __pre_ex_table,"a"
+ .quad FLT1,FIX1
+.previous
+.section __ex_table,"a"
+ .quad DBLFLT1,failsafe_callback
+.previous
+
+ ALIGN
+/* No special register assumptions. */
+restore_all_xen:
+ RESTORE_ALL
+ addq $8,%rsp
+ iretq
+
+/*
+ * When entering SYSCALL from kernel mode:
+ * %rax = hypercall vector
+ * %rdi, %rsi, %rdx, %r10, %r8, %9 = hypercall arguments
+ * %r11, %rcx = SYSCALL-saved %rflags and %rip
+ * NB. We must move %r10 to %rcx for C function-calling ABI.
+ *
+ * When entering SYSCALL from user mode:
+ * Vector directly to the registered arch.syscall_addr.
+ *
+ * Initial work is done by per-CPU stack trampolines. At this point %rsp
+ * has been initialised to point at the correct Xen stack, and %rsp, %rflags
+ * and %cs have been saved. All other registers are still to be saved onto
+ * the stack, starting with %rip, and an appropriate %ss must be saved into
+ * the space left by the trampoline.
+ */
+ ALIGN
+ENTRY(syscall_enter)
+ sti
+ movl $__GUEST_SS,24(%rsp)
+ pushq %rcx
+ pushq $0
+ movl $TRAP_syscall,4(%rsp)
+ SAVE_ALL
+ GET_CURRENT(%rbx)
+ testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+ jz switch_to_kernel
+
+/*hypercall:*/
+ movq %r10,%rcx
+ andq $(NR_hypercalls-1),%rax
+ leaq hypercall_table(%rip),%r10
+ PERFC_INCR(PERFC_hypercalls, %rax)
+ callq *(%r10,%rax,8)
+ movq %rax,UREGS_rax(%rsp) # save the return value
+
+/* %rbx: struct vcpu */
+test_all_events:
+ cli # tests must not race interrupts
+/*test_softirqs:*/
+ movl VCPU_processor(%rbx),%eax
+ shl $IRQSTAT_shift,%rax
+ leaq irq_stat(%rip),%rcx
+ testl $~0,(%rcx,%rax,1)
+ jnz process_softirqs
+/*test_guest_events:*/
+ movq VCPU_vcpu_info(%rbx),%rax
+ testb $0xFF,VCPUINFO_upcall_mask(%rax)
+ jnz restore_all_guest
+ testb $0xFF,VCPUINFO_upcall_pending(%rax)
+ jz restore_all_guest
+/*process_guest_events:*/
+ sti
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ movq VCPU_event_addr(%rbx),%rax
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ jmp test_all_events
+
+#ifdef CONFIG_VMX
+/*
+ * At VMExit time the processor saves the guest selectors, rsp, rip,
+ * and rflags. Therefore we don't save them, but simply decrement
+ * the kernel stack pointer to make it consistent with the stack frame
+ * at usual interruption time. The rflags of the host is not saved by VMX,
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ * (10) u64 gs;
+ * (9) u64 fs;
+ * (8) u64 ds;
+ * (7) u64 es;
+ * <- get_stack_bottom() (= HOST_ESP)
+ * (6) u64 ss;
+ * (5) u64 rsp;
+ * (4) u64 rflags;
+ * (3) u64 cs;
+ * (2) u64 rip;
+ * (2/1) u32 entry_vector;
+ * (1/1) u32 error_code;
+ */
+#define VMX_MONITOR_RFLAGS 0x202 /* IF on */
+#define NR_SKIPPED_REGS 6 /* See the above explanation */
+#define VMX_SAVE_ALL_NOSEGREGS \
+ pushq $VMX_MONITOR_RFLAGS; \
+ popfq; \
+ subq $(NR_SKIPPED_REGS*8), %rsp; \
+ pushq %rdi; \
+ pushq %rsi; \
+ pushq %rdx; \
+ pushq %rcx; \
+ pushq %rax; \
+ pushq %r8; \
+ pushq %r9; \
+ pushq %r10; \
+ pushq %r11; \
+ pushq %rbx; \
+ pushq %rbp; \
+ pushq %r12; \
+ pushq %r13; \
+ pushq %r14; \
+ pushq %r15; \
+
+ENTRY(vmx_asm_vmexit_handler)
+ /* selectors are restored/saved by VMX */
+ VMX_SAVE_ALL_NOSEGREGS
+ call vmx_vmexit_handler
+ jmp vmx_asm_do_resume
+
+ENTRY(vmx_asm_do_launch)
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ addq $(NR_SKIPPED_REGS*8), %rsp
+ /* VMLUANCH */
+ .byte 0x0f,0x01,0xc2
+ pushfq
+ call vm_launch_fail
+ hlt
+
+ ALIGN
+
+ENTRY(vmx_asm_do_resume)
+vmx_test_all_events:
+ GET_CURRENT(%rbx)
+/* test_all_events: */
+ cli # tests must not race interrupts
+/*test_softirqs:*/
+ movl VCPU_processor(%rbx),%eax
+ shl $IRQSTAT_shift,%rax
+ leaq irq_stat(%rip), %rdx
+ testl $~0,(%rdx,%rax,1)
+ jnz vmx_process_softirqs
+
+vmx_restore_all_guest:
+ call load_cr2
+ /*
+ * Check if we are going back to VMX-based VM
+ * By this time, all the setups in the VMCS must be complete.
+ */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ addq $(NR_SKIPPED_REGS*8), %rsp
+ /* VMRESUME */
+ .byte 0x0f,0x01,0xc3
+ pushfq
+ call vm_resume_fail
+ /* Should never reach here */
+ hlt
+
+ ALIGN
+vmx_process_softirqs:
+ sti
+ call do_softirq
+ jmp vmx_test_all_events
+#endif
+
+ ALIGN
+/* %rbx: struct vcpu */
+process_softirqs:
+ sti
+ call do_softirq
+ jmp test_all_events
+
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */
+/* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */
+/* %rdx: trap_bounce, %rbx: struct vcpu */
+/* On return only %rbx is guaranteed non-clobbered. */
+create_bounce_frame:
+ testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+ jnz 1f
+ /* Push new frame at registered guest-OS stack base. */
+ pushq %rdx
+ movq %rbx,%rdi
+ call toggle_guest_mode
+ popq %rdx
+ movq VCPU_kernel_sp(%rbx),%rsi
+ jmp 2f
+1: /* In kernel context already: push new frame at existing %rsp. */
+ movq UREGS_rsp+8(%rsp),%rsi
+ andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest.
+2: movq $HYPERVISOR_VIRT_START,%rax
+ cmpq %rax,%rsi
+ jb 1f # In +ve address space? Then okay.
+ movq $HYPERVISOR_VIRT_END+60,%rax
+ cmpq %rax,%rsi
+ jb domain_crash_synchronous # Above Xen private area? Then okay.
+1: movb TRAPBOUNCE_flags(%rdx),%cl
+ subq $40,%rsi
+ movq UREGS_ss+8(%rsp),%rax
+FLT2: movq %rax,32(%rsi) # SS
+ movq UREGS_rsp+8(%rsp),%rax
+FLT3: movq %rax,24(%rsi) # RSP
+ movq UREGS_eflags+8(%rsp),%rax
+FLT4: movq %rax,16(%rsi) # RFLAGS
+ movq VCPU_vcpu_info(%rbx),%rax
+ pushq VCPUINFO_upcall_mask(%rax)
+ testb $TBF_INTERRUPT,%cl
+ setnz VCPUINFO_upcall_mask(%rax)# TBF_INTERRUPT -> clear upcall mask
+ popq %rax
+ shlq $32,%rax # Bits 32-39: saved_upcall_mask
+ movw UREGS_cs+8(%rsp),%ax # Bits 0-15: CS
+FLT5: movq %rax,8(%rsi) # CS/saved_upcall_mask
+ movq UREGS_rip+8(%rsp),%rax
+FLT6: movq %rax,(%rsi) # RIP
+ testb $TBF_EXCEPTION_ERRCODE,%cl
+ jz 1f
+ subq $8,%rsi
+ movl TRAPBOUNCE_error_code(%rdx),%eax
+FLT7: movq %rax,(%rsi) # ERROR CODE
+ testb $TBF_EXCEPTION_CR2,%cl
+ jz 2f
+ subq $8,%rsi
+ movq TRAPBOUNCE_cr2(%rdx),%rax
+FLT8: movq %rax,(%rsi) # CR2
+1: testb $TBF_FAILSAFE,%cl
+ jz 2f
+ subq $32,%rsi
+ movl %gs,%eax
+FLT9: movq %rax,24(%rsi) # GS
+ movl %fs,%eax
+FLT10: movq %rax,16(%rsi) # FS
+ movl %es,%eax
+FLT11: movq %rax,8(%rsi) # ES
+ movl %ds,%eax
+FLT12: movq %rax,(%rsi) # DS
+2: subq $16,%rsi
+ movq UREGS_r11+8(%rsp),%rax
+FLT13: movq %rax,8(%rsi) # R11
+ movq UREGS_rcx+8(%rsp),%rax
+FLT14: movq %rax,(%rsi) # RCX
+ /* Rewrite our stack frame and return to guest-OS mode. */
+ /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+ movq $TRAP_syscall,UREGS_entry_vector+8(%rsp)
+ andl $0xfffcbeff,UREGS_eflags+8(%rsp)
+ movq $__GUEST_SS,UREGS_ss+8(%rsp)
+ movq %rsi,UREGS_rsp+8(%rsp)
+ movq $__GUEST_CS,UREGS_cs+8(%rsp)
+ movq TRAPBOUNCE_eip(%rdx),%rax
+ testq %rax,%rax
+ jz domain_crash_synchronous
+ movq %rax,UREGS_rip+8(%rsp)
+ movb $0,TRAPBOUNCE_flags(%rdx)
+ ret
+.section __ex_table,"a"
+ .quad FLT2,domain_crash_synchronous , FLT3,domain_crash_synchronous
+ .quad FLT4,domain_crash_synchronous , FLT5,domain_crash_synchronous
+ .quad FLT6,domain_crash_synchronous , FLT7,domain_crash_synchronous
+ .quad FLT8,domain_crash_synchronous , FLT9,domain_crash_synchronous
+ .quad FLT10,domain_crash_synchronous , FLT11,domain_crash_synchronous
+ .quad FLT12,domain_crash_synchronous , FLT13,domain_crash_synchronous
+ .quad FLT14,domain_crash_synchronous
+.previous
+
+ ALIGN
+/* %rbx: struct vcpu */
+process_guest_exception_and_events:
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
+ jz test_all_events
+ call create_bounce_frame
+ jmp test_all_events
+
+ ALIGN
+/* No special register assumptions. */
+ENTRY(ret_from_intr)
+ GET_CURRENT(%rbx)
+ testb $3,UREGS_cs(%rsp)
+ jnz test_all_events
+ jmp restore_all_xen
+
+ ALIGN
+/* No special register assumptions. */
+error_code:
+ SAVE_ALL
+ testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
+ jz exception_with_ints_disabled
+ sti
+ movq %rsp,%rdi
+ movl UREGS_entry_vector(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ GET_CURRENT(%rbx)
+ PERFC_INCR(PERFC_exceptions, %rax)
+ callq *(%rdx,%rax,8)
+ testb $3,UREGS_cs(%rsp)
+ jz restore_all_xen
+ jmp process_guest_exception_and_events
+
+/* No special register assumptions. */
+exception_with_ints_disabled:
+ testb $3,UREGS_cs(%rsp) # interrupts disabled outside Xen?
+ jnz FATAL_exception_with_ints_disabled
+ movq %rsp,%rdi
+ call search_pre_exception_table
+ testq %rax,%rax # no fixup code for faulting EIP?
+ jz FATAL_exception_with_ints_disabled
+ movq %rax,UREGS_rip(%rsp)
+ subq $8,UREGS_rsp(%rsp) # add ec/ev to previous stack frame
+ testb $15,UREGS_rsp(%rsp) # return %rsp is now aligned?
+ jz 1f # then there is a pad quadword already
+ movq %rsp,%rsi
+ subq $8,%rsp
+ movq %rsp,%rdi
+ movq $UREGS_kernel_sizeof/8,%rcx
+ rep; movsq # make room for ec/ev
+1: movq UREGS_error_code(%rsp),%rax # ec/ev
+ movq %rax,UREGS_kernel_sizeof(%rsp)
+ jmp restore_all_xen # return to fixup code
+
+/* No special register assumptions. */
+FATAL_exception_with_ints_disabled:
+ movl UREGS_entry_vector(%rsp),%edi
+ movq %rsp,%rsi
+ call fatal_trap
+ ud2
+
+ENTRY(divide_error)
+ pushq $0
+ movl $TRAP_divide_error,4(%rsp)
+ jmp error_code
+
+ENTRY(coprocessor_error)
+ pushq $0
+ movl $TRAP_copro_error,4(%rsp)
+ jmp error_code
+
+ENTRY(simd_coprocessor_error)
+ pushq $0
+ movl $TRAP_simd_error,4(%rsp)
+ jmp error_code
+
+ENTRY(device_not_available)
+ pushq $0
+ movl $TRAP_no_device,4(%rsp)
+ jmp error_code
+
+ENTRY(debug)
+ pushq $0
+ movl $TRAP_debug,4(%rsp)
+ jmp error_code
+
+ENTRY(int3)
+ pushq $0
+ movl $TRAP_int3,4(%rsp)
+ jmp error_code
+
+ENTRY(overflow)
+ pushq $0
+ movl $TRAP_overflow,4(%rsp)
+ jmp error_code
+
+ENTRY(bounds)
+ pushq $0
+ movl $TRAP_bounds,4(%rsp)
+ jmp error_code
+
+ENTRY(invalid_op)
+ pushq $0
+ movl $TRAP_invalid_op,4(%rsp)
+ jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+ pushq $0
+ movl $TRAP_copro_seg,4(%rsp)
+ jmp error_code
+
+ENTRY(invalid_TSS)
+ movl $TRAP_invalid_tss,4(%rsp)
+ jmp error_code
+
+ENTRY(segment_not_present)
+ movl $TRAP_no_segment,4(%rsp)
+ jmp error_code
+
+ENTRY(stack_segment)
+ movl $TRAP_stack_error,4(%rsp)
+ jmp error_code
+
+ENTRY(general_protection)
+ movl $TRAP_gp_fault,4(%rsp)
+ jmp error_code
+
+ENTRY(alignment_check)
+ movl $TRAP_alignment_check,4(%rsp)
+ jmp error_code
+
+ENTRY(page_fault)
+ movl $TRAP_page_fault,4(%rsp)
+ jmp error_code
+
+ENTRY(machine_check)
+ pushq $0
+ movl $TRAP_machine_check,4(%rsp)
+ jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+ pushq $0
+ movl $TRAP_spurious_int,4(%rsp)
+ jmp error_code
+
+ENTRY(double_fault)
+ movl $TRAP_double_fault,4(%rsp)
+ jmp error_code
+
+ENTRY(nmi)
+ pushq $0
+ SAVE_ALL
+ inb $0x61,%al
+ movl %eax,%esi # reason
+ movq %rsp,%rdi # regs
+ call do_nmi
+ jmp restore_all_xen
+
+do_arch_sched_op:
+ # Ensure we return success even if we return via schedule_tail()
+ xorl %eax,%eax
+ movq %rax,UREGS_rax+8(%rsp)
+ jmp do_sched_op
+
+.data
+
+ENTRY(exception_table)
+ .quad do_divide_error
+ .quad do_debug
+ .quad 0 # nmi
+ .quad do_int3
+ .quad do_overflow
+ .quad do_bounds
+ .quad do_invalid_op
+ .quad math_state_restore
+ .quad do_double_fault
+ .quad do_coprocessor_segment_overrun
+ .quad do_invalid_TSS
+ .quad do_segment_not_present
+ .quad do_stack_segment
+ .quad do_general_protection
+ .quad do_page_fault
+ .quad do_spurious_interrupt_bug
+ .quad do_coprocessor_error
+ .quad do_alignment_check
+ .quad do_machine_check
+ .quad do_simd_coprocessor_error
+
+ENTRY(hypercall_table)
+ .quad do_set_trap_table /* 0 */
+ .quad do_mmu_update
+ .quad do_set_gdt
+ .quad do_stack_switch
+ .quad do_set_callbacks
+ .quad do_fpu_taskswitch /* 5 */
+ .quad do_arch_sched_op
+ .quad do_dom0_op
+ .quad do_set_debugreg
+ .quad do_get_debugreg
+ .quad do_update_descriptor /* 10 */
+ .quad do_ni_hypercall
+ .quad do_dom_mem_op
+ .quad do_multicall
+ .quad do_update_va_mapping
+ .quad do_set_timer_op /* 15 */
+ .quad do_event_channel_op
+ .quad do_xen_version
+ .quad do_console_io
+ .quad do_physdev_op
+ .quad do_grant_table_op /* 20 */
+ .quad do_vm_assist
+ .quad do_update_va_mapping_otherdomain
+ .quad do_switch_to_user
+ .quad do_boot_vcpu
+ .quad do_set_segment_base /* 25 */
+ .quad do_mmuext_op
+ .rept NR_hypercalls-((.-hypercall_table)/4)
+ .quad do_ni_hypercall
+ .endr
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index c7bcb17805..ae3dac0b6b 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1,455 +1,289 @@
/******************************************************************************
* arch/x86/x86_64/mm.c
*
- * Modifications to Linux original are copyright (c) 2004, K A Fraser
+ * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This
+ * program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
*
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <xen/config.h>
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/asm_defns.h>
#include <asm/page.h>
#include <asm/flushtlb.h>
#include <asm/fixmap.h>
-#include <asm/domain_page.h>
+#include <asm/msr.h>
-static inline void set_pte_phys(unsigned long vaddr,
- l1_pgentry_t entry)
+struct pfn_info *alloc_xen_pagetable(void)
{
- l4_pgentry_t *l4ent;
- l3_pgentry_t *l3ent;
- l2_pgentry_t *l2ent;
- l1_pgentry_t *l1ent;
-
- l4ent = &idle_pg_table[l4_table_offset(vaddr)];
- l3ent = l4_pgentry_to_l3(*l4ent) + l3_table_offset(vaddr);
- l2ent = l3_pgentry_to_l2(*l3ent) + l2_table_offset(vaddr);
- l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
- *l1ent = entry;
-
- /* It's enough to flush this one mapping. */
- __flush_tlb_one(vaddr);
-}
-
+ extern int early_boot;
+ unsigned long pfn;
-void __set_fixmap(enum fixed_addresses idx,
- l1_pgentry_t entry)
-{
- unsigned long address = fix_to_virt(idx);
+ if ( !early_boot )
+ return alloc_domheap_page(NULL);
- if ( likely(idx < __end_of_fixed_addresses) )
- set_pte_phys(address, entry);
- else
- printk("Invalid __set_fixmap\n");
+ pfn = alloc_boot_pages(1, 1);
+ return ((pfn == 0) ? NULL : pfn_to_page(pfn));
}
-
-void __init paging_init(void)
+void free_xen_pagetable(struct pfn_info *pg)
{
- void *ioremap_pt;
- int i;
-
- /* Create page table for ioremap(). */
- ioremap_pt = (void *)alloc_xenheap_page();
- clear_page(ioremap_pt);
- idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
-
- /* Create read-only mapping of MPT for guest-OS use. */
- idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(l2_pgentry_val(
- idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) &
- ~_PAGE_RW);
-
- /* Set up mapping cache for domain pages. */
- mapcache = (unsigned long *)alloc_xenheap_page();
- clear_page(mapcache);
- idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
-
- /* Set up linear page table mapping. */
- idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
-
+ free_domheap_page(pg);
}
-void __init zap_low_mappings(void)
+l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
{
- idle_pg_table[0] = 0;
-}
-
+ l4_pgentry_t *pl4e;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
-/*
- * Allows shooting down of borrowed page-table use on specific CPUs.
- * Specifically, we borrow page tables when running the idle domain.
- */
-static void __synchronise_pagetables(void *mask)
-{
- struct domain *d = current;
- if ( ((unsigned long)mask & (1<<d->processor)) && is_idle_task(d) )
- write_ptbase(&d->mm);
-}
-void synchronise_pagetables(unsigned long cpu_mask)
-{
- __synchronise_pagetables((void *)cpu_mask);
- smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
+ pl4e = &idle_pg_table[l4_table_offset(v)];
+ if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
+ {
+ pl3e = page_to_virt(alloc_xen_pagetable());
+ clear_page(pl3e);
+ *pl4e = l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR);
+ }
+
+ pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+ {
+ pl2e = page_to_virt(alloc_xen_pagetable());
+ clear_page(pl2e);
+ *pl3e = l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR);
+ }
+
+ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
+ return pl2e;
}
-long do_stack_switch(unsigned long ss, unsigned long esp)
+void __init paging_init(void)
{
- int nr = smp_processor_id();
- struct tss_struct *t = &init_tss[nr];
+ unsigned long i;
+ l3_pgentry_t *l3_ro_mpt;
+ l2_pgentry_t *l2_ro_mpt;
+ struct pfn_info *pg;
+
+ idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
+
+ /* Create user-accessible L2 directory to map the MPT for guests. */
+ l3_ro_mpt = alloc_xenheap_page();
+ clear_page(l3_ro_mpt);
+ idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
+ l4e_from_page(
+ virt_to_page(l3_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
+ l2_ro_mpt = alloc_xenheap_page();
+ clear_page(l2_ro_mpt);
+ l3_ro_mpt[l3_table_offset(RO_MPT_VIRT_START)] =
+ l3e_from_page(
+ virt_to_page(l2_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
+ l2_ro_mpt += l2_table_offset(RO_MPT_VIRT_START);
- /* We need to do this check as we load and use SS on guest's behalf. */
- if ( (ss & 3) == 0 )
- return -EPERM;
-
- current->thread.guestos_ss = ss;
- current->thread.guestos_sp = esp;
- t->ss1 = ss;
- t->esp1 = esp;
+ /*
+ * Allocate and map the machine-to-phys table.
+ * This also ensures L3 is present for fixmaps.
+ */
+ for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
+ {
+ pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER);
+ if ( pg == NULL )
+ panic("Not enough memory for m2p table\n");
+ map_pages_to_xen(
+ RDWR_MPT_VIRT_START + i*8, page_to_pfn(pg),
+ 1UL << PAGETABLE_ORDER,
+ PAGE_HYPERVISOR);
+ memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
+ 1UL << L2_PAGETABLE_SHIFT);
+ *l2_ro_mpt++ = l2e_from_page(
+ pg, _PAGE_GLOBAL|_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT);
+ BUG_ON(((unsigned long)l2_ro_mpt & ~PAGE_MASK) == 0);
+ }
- return 0;
+ /* Set up linear page table mapping. */
+ idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
}
-
-/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(unsigned long *d)
+void __init zap_low_mappings(void)
{
- unsigned long base, limit, a = d[0], b = d[1];
+ idle_pg_table[0] = l4e_empty();
+ flush_tlb_all_pge();
+}
- /* A not-present descriptor will always fault, so is safe. */
- if ( !(b & _SEGMENT_P) )
- goto good;
+void subarch_init_memory(struct domain *dom_xen)
+{
+ unsigned long i, v, m2p_start_mfn;
+ l3_pgentry_t l3e;
+ l2_pgentry_t l2e;
/*
- * We don't allow a DPL of zero. There is no legitimate reason for
- * specifying DPL==0, and it gets rather dangerous if we also accept call
- * gates (consider a call gate pointing at another guestos descriptor with
- * DPL 0 -- this would get the OS ring-0 privileges).
+ * We are rather picky about the layout of 'struct pfn_info'. The
+ * count_info and domain fields must be adjacent, as we perform atomic
+ * 64-bit operations on them.
*/
- if ( (b & _SEGMENT_DPL) == 0 )
- goto bad;
-
- if ( !(b & _SEGMENT_S) )
+ if ( (offsetof(struct pfn_info, u.inuse._domain) !=
+ (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
{
- /*
- * System segment:
- * 1. Don't allow interrupt or trap gates as they belong in the IDT.
- * 2. Don't allow TSS descriptors or task gates as we don't
- * virtualise x86 tasks.
- * 3. Don't allow LDT descriptors because they're unnecessary and
- * I'm uneasy about allowing an LDT page to contain LDT
- * descriptors. In any case, Xen automatically creates the
- * required descriptor when reloading the LDT register.
- * 4. We allow call gates but they must not jump to a private segment.
- */
-
- /* Disallow everything but call gates. */
- if ( (b & _SEGMENT_TYPE) != 0xc00 )
- goto bad;
-
- /* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
- goto bad;
-
- /* Reserved bits must be zero. */
- if ( (b & 0xe0) != 0 )
- goto bad;
-
- /* No base/limit check is needed for a call gate. */
- goto good;
+ printk("Weird pfn_info layout (%ld,%ld,%ld)\n",
+ offsetof(struct pfn_info, count_info),
+ offsetof(struct pfn_info, u.inuse._domain),
+ sizeof(struct pfn_info));
+ for ( ; ; ) ;
}
-
- /* Check that base is at least a page away from Xen-private area. */
- base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
- if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
- goto bad;
-
- /* Check and truncate the limit if necessary. */
- limit = (b&0xf0000) | (a&0xffff);
- limit++; /* We add one because limit is inclusive. */
- if ( (b & _SEGMENT_G) )
- limit <<= 12;
- if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
+ /* M2P table is mappable read-only by privileged domains. */
+ for ( v = RDWR_MPT_VIRT_START;
+ v != RDWR_MPT_VIRT_END;
+ v += 1 << L2_PAGETABLE_SHIFT )
{
- /*
- * Grows-down limit check.
- * NB. limit == 0xFFFFF provides no access (if G=1).
- * limit == 0x00000 provides 4GB-4kB access (if G=1).
- */
- if ( (base + limit) > base )
+ l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
+ l3_table_offset(v)];
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+ continue;
+ l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ continue;
+ m2p_start_mfn = l2e_get_pfn(l2e);
+
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
- limit = -(base & PAGE_MASK);
- goto truncate;
+ frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ /* gdt to make sure it's only mapped read-only by non-privileged
+ domains. */
+ frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
+ page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
}
}
- else
- {
- /*
- * Grows-up limit check.
- * NB. limit == 0xFFFFF provides 4GB access (if G=1).
- * limit == 0x00000 provides 4kB access (if G=1).
- */
- if ( ((base + limit) <= base) ||
- ((base + limit) > PAGE_OFFSET) )
- {
- limit = PAGE_OFFSET - base;
- truncate:
- if ( !(b & _SEGMENT_G) )
- goto bad; /* too dangerous; too hard to work out... */
- limit = (limit >> 12) - 1;
- d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
- d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
- }
- }
-
- good:
- return 1;
- bad:
- return 0;
-}
-
-
-void destroy_gdt(struct domain *d)
-{
- int i;
- unsigned long pfn;
-
- for ( i = 0; i < 16; i++ )
- {
- if ( (pfn = l1_pgentry_to_pagenr(d->mm.perdomain_pt[i])) != 0 )
- put_page_and_type(&frame_table[pfn]);
- d->mm.perdomain_pt[i] = mk_l1_pgentry(0);
- }
}
-
-long set_gdt(struct domain *d,
- unsigned long *frames,
- unsigned int entries)
+long do_stack_switch(unsigned long ss, unsigned long esp)
{
- /* NB. There are 512 8-byte entries per GDT page. */
- int i = 0, nr_pages = (entries + 511) / 512;
- struct desc_struct *vgdt;
- unsigned long pfn;
-
- /* Check the first page in the new GDT. */
- if ( (pfn = frames[0]) >= max_page )
- goto fail;
-
- /* The first page is special because Xen owns a range of entries in it. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- {
- /* GDT checks failed: try zapping the Xen reserved entries. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
- goto fail;
- vgdt = map_domain_mem(pfn << PAGE_SHIFT);
- memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
- NR_RESERVED_GDT_ENTRIES*8);
- unmap_domain_mem(vgdt);
- put_page_and_type(&frame_table[pfn]);
-
- /* Okay, we zapped the entries. Now try the GDT checks again. */
- if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- goto fail;
- }
-
- /* Check the remaining pages in the new GDT. */
- for ( i = 1; i < nr_pages; i++ )
- if ( ((pfn = frames[i]) >= max_page) ||
- !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
- goto fail;
-
- /* Copy reserved GDT entries to the new GDT. */
- vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
- memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
- gdt_table + FIRST_RESERVED_GDT_ENTRY,
- NR_RESERVED_GDT_ENTRIES*8);
- unmap_domain_mem(vgdt);
-
- /* Tear down the old GDT. */
- destroy_gdt(d);
-
- /* Install the new GDT. */
- for ( i = 0; i < nr_pages; i++ )
- d->mm.perdomain_pt[i] =
- mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
-
- SET_GDT_ADDRESS(d, GDT_VIRT_START);
- SET_GDT_ENTRIES(d, entries);
-
+ if ( (ss & 3) != 3 )
+ return -EPERM;
+ current->arch.guest_context.kernel_ss = ss;
+ current->arch.guest_context.kernel_sp = esp;
return 0;
-
- fail:
- while ( i-- > 0 )
- put_page_and_type(&frame_table[frames[i]]);
- return -EINVAL;
}
-
-long do_set_gdt(unsigned long *frame_list, unsigned int entries)
+long do_set_segment_base(unsigned int which, unsigned long base)
{
- int nr_pages = (entries + 511) / 512;
- unsigned long frames[16];
- long ret;
-
- if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
- return -EINVAL;
-
- if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
- return -EFAULT;
+ struct vcpu *v = current;
+ long ret = 0;
- if ( (ret = set_gdt(current, frames, entries)) == 0 )
+ switch ( which )
{
- local_flush_tlb();
- __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
- }
-
- return ret;
-}
-
-
-long do_update_descriptor(
- unsigned long pa, unsigned long word1, unsigned long word2)
-{
- unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
- struct pfn_info *page;
- long ret = -EINVAL;
-
- d[0] = word1;
- d[1] = word2;
-
- if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
- return -EINVAL;
+ case SEGBASE_FS:
+ if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
+ ret = -EFAULT;
+ else
+ v->arch.guest_context.fs_base = base;
+ break;
- page = &frame_table[pfn];
- if ( unlikely(!get_page(page, current)) )
- return -EINVAL;
+ case SEGBASE_GS_USER:
+ if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
+ ret = -EFAULT;
+ else
+ v->arch.guest_context.gs_base_user = base;
+ break;
- /* Check if the given frame is in use in an unsafe context. */
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- case PGT_gdt_page:
- /* Disallow updates of Xen-reserved descriptors in the current GDT. */
- if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
- (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
- (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
- goto out;
- if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
- goto out;
+ case SEGBASE_GS_KERNEL:
+ if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
+ ret = -EFAULT;
+ else
+ v->arch.guest_context.gs_base_kernel = base;
break;
- case PGT_ldt_page:
- if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
- goto out;
+
+ case SEGBASE_GS_USER_SEL:
+ __asm__ __volatile__ (
+ " swapgs \n"
+ "1: movl %k0,%%gs \n"
+ " "safe_swapgs" \n"
+ ".section .fixup,\"ax\" \n"
+ "2: xorl %k0,%k0 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8 \n"
+ " .quad 1b,2b \n"
+ ".previous "
+ : : "r" (base&0xffff) );
break;
+
default:
- if ( unlikely(!get_page_type(page, PGT_writable_page)) )
- goto out;
+ ret = -EINVAL;
break;
}
- /* All is good so make the update. */
- gdt_pent = map_domain_mem(pa);
- memcpy(gdt_pent, d, 8);
- unmap_domain_mem(gdt_pent);
-
- put_page_type(page);
-
- ret = 0; /* success */
-
- out:
- put_page(page);
return ret;
}
-#ifdef MEMORY_GUARD
-void *memguard_init(void *heap_start)
+/* Returns TRUE if given descriptor is valid for GDT or LDT. */
+int check_descriptor(struct desc_struct *d)
{
- l1_pgentry_t *l1;
- int i, j;
+ u32 a = d->a, b = d->b;
- /* Round the allocation pointer up to a page boundary. */
- heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
- PAGE_MASK);
+ /* A not-present descriptor will always fault, so is safe. */
+ if ( !(b & _SEGMENT_P) )
+ goto good;
- /* Memory guarding is incompatible with super pages. */
- for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
- {
- l1 = (l1_pgentry_t *)heap_start;
- heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
- for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
- l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
- (j << L1_PAGETABLE_SHIFT) |
- __PAGE_HYPERVISOR);
- idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
- mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
- }
+ /* The guest can only safely be executed in ring 3. */
+ if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
+ goto bad;
- return heap_start;
-}
+ /* All code and data segments are okay. No base/limit checking. */
+ if ( (b & _SEGMENT_S) )
+ goto good;
-static void __memguard_change_range(void *p, unsigned long l, int guard)
-{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- unsigned long _l = (unsigned long)l;
-
- /* Ensure we are dealing with a page-aligned whole number of pages. */
- ASSERT((_p&PAGE_MASK) != 0);
- ASSERT((_l&PAGE_MASK) != 0);
- ASSERT((_p&~PAGE_MASK) == 0);
- ASSERT((_l&~PAGE_MASK) == 0);
-
- while ( _l != 0 )
- {
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- if ( guard )
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
- else
- *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
- _p += PAGE_SIZE;
- _l -= PAGE_SIZE;
- }
-}
+ /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
+ if ( (b & _SEGMENT_TYPE) == 0x000 )
+ goto good;
-void memguard_guard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 1);
- local_flush_tlb();
-}
+ /* Everything but a call gate is discarded here. */
+ if ( (b & _SEGMENT_TYPE) != 0xc00 )
+ goto bad;
-void memguard_unguard_range(void *p, unsigned long l)
-{
- __memguard_change_range(p, l, 0);
+ /* Can't allow far jump to a Xen-private segment. */
+ if ( !VALID_CODESEL(a>>16) )
+ goto bad;
+
+ /* Reserved bits must be zero. */
+ if ( (b & 0xe0) != 0 )
+ goto bad;
+
+ good:
+ return 1;
+ bad:
+ return 0;
}
-int memguard_is_guarded(void *p)
+void memguard_guard_stack(void *p)
{
- l1_pgentry_t *l1;
- l2_pgentry_t *l2;
- unsigned long _p = (unsigned long)p;
- l2 = &idle_pg_table[l2_table_offset(_p)];
- l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
- return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
+ p = (void *)((unsigned long)p + PAGE_SIZE);
+ memguard_guard_range(p, 2 * PAGE_SIZE);
}
-#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
new file mode 100644
index 0000000000..0d7429cf14
--- /dev/null
+++ b/xen/arch/x86/x86_64/traps.c
@@ -0,0 +1,192 @@
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/sched.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/mm.h>
+#include <xen/irq.h>
+#include <xen/console.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/msr.h>
+
+void show_registers(struct cpu_user_regs *regs)
+{
+ printk("CPU: %d\nEIP: %04x:[<%016lx>] \nEFLAGS: %016lx\n",
+ smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags);
+ printk("rax: %016lx rbx: %016lx rcx: %016lx rdx: %016lx\n",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ printk("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
+ regs->rsi, regs->rdi, regs->rbp, regs->rsp);
+ printk("r8: %016lx r9: %016lx r10: %016lx r11: %016lx\n",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ printk("r12: %016lx r13: %016lx r14: %016lx r15: %016lx\n",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+
+ if ( GUEST_MODE(regs) )
+ show_guest_stack();
+ else
+ show_stack((unsigned long *)regs->rsp);
+}
+
+void show_page_walk(unsigned long addr)
+{
+ unsigned long page = read_cr3();
+
+ printk("Pagetable walk from %016lx:\n", addr);
+
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
+ printk(" L4 = %016lx\n", page);
+ if ( !(page & _PAGE_PRESENT) )
+ return;
+
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
+ printk(" L3 = %016lx\n", page);
+ if ( !(page & _PAGE_PRESENT) )
+ return;
+
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
+ printk(" L2 = %016lx %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
+ if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ return;
+
+ page &= PAGE_MASK;
+ page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
+ printk(" L1 = %016lx\n", page);
+}
+
+asmlinkage void double_fault(void);
+asmlinkage void do_double_fault(struct cpu_user_regs *regs)
+{
+ watchdog_disable();
+
+ console_force_unlock();
+
+ /* Find information saved during fault and dump it to the console. */
+ printk("************************************\n");
+ show_registers(regs);
+ printk("************************************\n");
+ printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
+ printk("System needs manual reset.\n");
+ printk("************************************\n");
+
+ /* Lock up the console to prevent spurious output from other CPUs. */
+ console_force_lock();
+
+ /* Wait for manual reset. */
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+}
+
+asmlinkage void syscall_enter(void);
+void __init percpu_traps_init(void)
+{
+ char *stack_bottom, *stack;
+ int cpu = smp_processor_id();
+
+ if ( cpu == 0 )
+ {
+ /* Specify dedicated interrupt stacks for NMIs and double faults. */
+ set_intr_gate(TRAP_double_fault, &double_fault);
+ idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ }
+
+ stack_bottom = (char *)get_stack_bottom();
+ stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
+
+ /* Double-fault handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
+
+ /* NMI handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
+
+ /*
+ * Trampoline for SYSCALL entry from long mode.
+ */
+
+ /* Skip the NMI and DF stacks. */
+ stack = &stack[2048];
+ wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+
+ /* movq %rsp, saversp(%rip) */
+ stack[0] = 0x48;
+ stack[1] = 0x89;
+ stack[2] = 0x25;
+ *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
+
+ /* leaq saversp(%rip), %rsp */
+ stack[7] = 0x48;
+ stack[8] = 0x8d;
+ stack[9] = 0x25;
+ *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
+
+ /* pushq %r11 */
+ stack[14] = 0x41;
+ stack[15] = 0x53;
+
+ /* pushq $__GUEST_CS64 */
+ stack[16] = 0x68;
+ *(u32 *)&stack[17] = __GUEST_CS64;
+
+ /* jmp syscall_enter */
+ stack[21] = 0xe9;
+ *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
+
+ /*
+ * Trampoline for SYSCALL entry from compatibility mode.
+ */
+
+ /* Skip the long-mode entry trampoline. */
+ stack = &stack[26];
+ wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+
+ /* movq %rsp, saversp(%rip) */
+ stack[0] = 0x48;
+ stack[1] = 0x89;
+ stack[2] = 0x25;
+ *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
+
+ /* leaq saversp(%rip), %rsp */
+ stack[7] = 0x48;
+ stack[8] = 0x8d;
+ stack[9] = 0x25;
+ *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
+
+ /* pushq %r11 */
+ stack[14] = 0x41;
+ stack[15] = 0x53;
+
+ /* pushq $__GUEST_CS32 */
+ stack[16] = 0x68;
+ *(u32 *)&stack[17] = __GUEST_CS32;
+
+ /* jmp syscall_enter */
+ stack[21] = 0xe9;
+ *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
+
+ /*
+ * Common SYSCALL parameters.
+ */
+
+ wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
+ wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
+}
+
+long do_set_callbacks(unsigned long event_address,
+ unsigned long failsafe_address,
+ unsigned long syscall_address)
+{
+ struct vcpu *d = current;
+
+ d->arch.guest_context.event_callback_eip = event_address;
+ d->arch.guest_context.failsafe_callback_eip = failsafe_address;
+ d->arch.guest_context.syscall_callback_eip = syscall_address;
+
+ return 0;
+}
diff --git a/xen/arch/x86/x86_64/usercopy.c b/xen/arch/x86/x86_64/usercopy.c
deleted file mode 100644
index e7c11fa501..0000000000
--- a/xen/arch/x86/x86_64/usercopy.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * User address space access functions.
- *
- * Copyright 1997 Andi Kleen <ak@muc.de>
- * Copyright 1997 Linus Torvalds
- * Copyright 2002 Andi Kleen <ak@suse.de>
- */
-#include <asm/uaccess.h>
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-#define __do_strncpy_from_user(dst,src,count,res) \
-do { \
- long __d0, __d1, __d2; \
- __asm__ __volatile__( \
- " testq %1,%1\n" \
- " jz 2f\n" \
- "0: lodsb\n" \
- " stosb\n" \
- " testb %%al,%%al\n" \
- " jz 1f\n" \
- " decq %1\n" \
- " jnz 0b\n" \
- "1: subq %1,%0\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movq %5,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 0b,3b\n" \
- ".previous" \
- : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
- "=&D" (__d2) \
- : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
- : "memory"); \
-} while (0)
-
-long
-__strncpy_from_user(char *dst, const char *src, long count)
-{
- long res;
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-long
-strncpy_from_user(char *dst, const char *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- __do_strncpy_from_user(dst, src, count, res);
- return res;
-}
-
-/*
- * Zero Userspace
- */
-
-unsigned long __clear_user(void *addr, unsigned long size)
-{
- long __d0;
- /* no memory constraint because it doesn't change any memory gcc knows
- about */
- asm volatile(
- " testq %[size8],%[size8]\n"
- " jz 4f\n"
- "0: movq %[zero],(%[dst])\n"
- " addq %[eight],%[dst]\n"
- " decl %%ecx ; jnz 0b\n"
- "4: movq %[size1],%%rcx\n"
- " testl %%ecx,%%ecx\n"
- " jz 2f\n"
- "1: movb %b[zero],(%[dst])\n"
- " incq %[dst]\n"
- " decl %%ecx ; jnz 1b\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: lea 0(%[size1],%[size8],8),%[size8]\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,3b\n"
- " .quad 1b,2b\n"
- ".previous"
- : [size8] "=c"(size), [dst] "=&D" (__d0)
- : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst] "(addr),
- [zero] "r" (0UL), [eight] "r" (8UL));
- return size;
-}
-
-
-unsigned long clear_user(void *to, unsigned long n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- return __clear_user(to, n);
- return n;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long strnlen_user(const char *s, long n)
-{
- unsigned long res = 0;
- char c;
-
- if (!access_ok(VERIFY_READ, s, n))
- return 0;
-
- while (1) {
- if (get_user(c, s))
- return 0;
- if (!c)
- return res+1;
- if (res>n)
- return n+1;
- res++;
- s++;
- }
-}
-
-unsigned long copy_in_user(void *to, const void *from, unsigned len)
-{
- if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
- return copy_user_generic(to, from, len);
- }
- return len;
-}
diff --git a/xen/arch/x86/x86_64/xen.lds b/xen/arch/x86/x86_64/xen.lds
index 2bb2d9ed49..30a2b0ca67 100644
--- a/xen/arch/x86/x86_64/xen.lds
+++ b/xen/arch/x86/x86_64/xen.lds
@@ -23,12 +23,12 @@ SECTIONS
.rodata : { *(.rodata) *(.rodata.*) } :text
.kstrtab : { *(.kstrtab) } :text
- . = ALIGN(16); /* Exception table */
+ . = ALIGN(32); /* Exception table */
__start___ex_table = .;
__ex_table : { *(__ex_table) } :text
__stop___ex_table = .;
- . = ALIGN(16); /* Pre-exception table */
+ . = ALIGN(32); /* Pre-exception table */
__start___pre_ex_table = .;
__pre_ex_table : { *(__pre_ex_table) } :text
__stop___pre_ex_table = .;
@@ -55,7 +55,7 @@ SECTIONS
__init_begin = .;
.text.init : { *(.text.init) } :text
.data.init : { *(.data.init) } :text
- . = ALIGN(16);
+ . = ALIGN(32);
__setup_start = .;
.setup.init : { *(.setup.init) } :text
__setup_end = .;
diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
new file mode 100644
index 0000000000..c13e28de41
--- /dev/null
+++ b/xen/arch/x86/x86_emulate.c
@@ -0,0 +1,1071 @@
+/******************************************************************************
+ * x86_emulate.c
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005 Keir Fraser
+ */
+
+#ifdef __TEST_HARNESS__
+#include <stdio.h>
+#include <stdint.h>
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
+#include <public/xen.h>
+#define DPRINTF(_f, _a...) printf( _f , ## _a )
+#else
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <asm/regs.h>
+#define DPRINTF DPRINTK
+#endif
+#include <asm-x86/x86_emulate.h>
+
+/*
+ * Opcode effective-address decode tables.
+ * Note that we only emulate instructions that have at least one memory
+ * operand (excluding implicit stack references). We assume that stack
+ * references and instruction fetches will never occur in special memory
+ * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
+ * not be handled.
+ */
+
+/* Operand sizes: 8-bit operands or specified/overridden size. */
+#define ByteOp (1<<0) /* 8-bit operands. */
+/* Destination operand type. */
+#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
+#define DstReg (2<<1) /* Register operand. */
+#define DstMem (3<<1) /* Memory operand. */
+#define DstMask (3<<1)
+/* Source operand type. */
+#define SrcNone (0<<3) /* No source operand. */
+#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
+#define SrcReg (1<<3) /* Register operand. */
+#define SrcMem (2<<3) /* Memory operand. */
+#define SrcImm (3<<3) /* Immediate operand. */
+#define SrcImmByte (4<<3) /* 8-bit sign-extended immediate operand. */
+#define SrcMask (7<<3)
+/* Generic ModRM decode. */
+#define ModRM (1<<6)
+/* Destination is only written; never read. */
+#define Mov (1<<7)
+
+static u8 opcode_table[256] = {
+ /* 0x00 - 0x07 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x08 - 0x0F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x10 - 0x17 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x18 - 0x1F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x20 - 0x27 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x28 - 0x2F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x30 - 0x37 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x38 - 0x3F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, 0,
+ /* 0x40 - 0x4F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x50 - 0x5F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x60 - 0x6F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x70 - 0x7F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x80 - 0x87 */
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ /* 0x88 - 0x8F */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, DstMem|SrcNone|ModRM|Mov,
+ /* 0x90 - 0x9F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xA0 - 0xA7 */
+ ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
+ ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
+ /* 0xA8 - 0xAF */
+ 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
+ /* 0xB0 - 0xBF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xC0 - 0xC7 */
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
+ 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
+ /* 0xC8 - 0xCF */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xD0 - 0xD7 */
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+ 0, 0, 0, 0,
+ /* 0xD8 - 0xDF */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xE0 - 0xEF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xF0 - 0xF7 */
+ 0, 0, 0, 0,
+ 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
+ /* 0xF8 - 0xFF */
+ 0, 0, 0, 0,
+ 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
+};
+
+static u8 twobyte_table[256] = {
+ /* 0x00 - 0x0F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
+ /* 0x10 - 0x1F */
+ 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x20 - 0x2F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x30 - 0x3F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x40 - 0x47 */
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ /* 0x48 - 0x4F */
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ /* 0x50 - 0x5F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x60 - 0x6F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x70 - 0x7F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x80 - 0x8F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x90 - 0x9F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xA0 - 0xA7 */
+ 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
+ /* 0xA8 - 0xAF */
+ 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
+ /* 0xB0 - 0xB7 */
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
+ 0, 0, 0, 0,
+ /* 0xB8 - 0xBF */
+ 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
+ /* 0xC0 - 0xCF */
+ 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xD0 - 0xDF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xE0 - 0xEF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xF0 - 0xFF */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* Type, address-of, and value of an instruction's operand. */
+struct operand {
+ enum { OP_REG, OP_MEM, OP_IMM } type;
+ unsigned int bytes;
+ unsigned long val, orig_val, *ptr;
+};
+
+/* EFLAGS bit definitions. */
+#define EFLG_OF (1<<11)
+#define EFLG_DF (1<<10)
+#define EFLG_SF (1<<7)
+#define EFLG_ZF (1<<6)
+#define EFLG_AF (1<<4)
+#define EFLG_PF (1<<2)
+#define EFLG_CF (1<<0)
+
+/*
+ * Instruction emulation:
+ * Most instructions are emulated directly via a fragment of inline assembly
+ * code. This allows us to save/restore EFLAGS and thus very easily pick up
+ * any modified flags.
+ */
+
+#if defined(__x86_64__)
+#define _LO32 "k" /* force 32-bit operand */
+#define _STK "%%rsp" /* stack pointer */
+#elif defined(__i386__)
+#define _LO32 "" /* force 32-bit operand */
+#define _STK "%%esp" /* stack pointer */
+#endif
+
+/*
+ * These EFLAGS bits are restored from saved value during emulation, and
+ * any changes are written back to the saved value after emulation.
+ */
+#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+
+/* Before executing instruction: restore necessary bits in EFLAGS. */
+#define _PRE_EFLAGS(_sav, _msk, _tmp) \
+/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */\
+"push %"_sav"; " \
+"movl %"_msk",%"_LO32 _tmp"; " \
+"andl %"_LO32 _tmp",("_STK"); " \
+"pushf; " \
+"notl %"_LO32 _tmp"; " \
+"andl %"_LO32 _tmp",("_STK"); " \
+"pop %"_tmp"; " \
+"orl %"_LO32 _tmp",("_STK"); " \
+"popf; " \
+/* _sav &= ~msk; */ \
+"movl %"_msk",%"_LO32 _tmp"; " \
+"notl %"_LO32 _tmp"; " \
+"andl %"_LO32 _tmp",%"_sav"; "
+
+/* After executing instruction: write-back necessary bits in EFLAGS. */
+#define _POST_EFLAGS(_sav, _msk, _tmp) \
+/* _sav |= EFLAGS & _msk; */ \
+"pushf; " \
+"pop %"_tmp"; " \
+"andl %"_msk",%"_LO32 _tmp"; " \
+"orl %"_LO32 _tmp",%"_sav"; "
+
+/* Raw emulation: instruction has two explicit operands. */
+#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 2: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"w %"_wx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _wy ((_src).val), "i" (EFLAGS_MASK) ); \
+ break; \
+ case 4: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"l %"_lx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _ly ((_src).val), "i" (EFLAGS_MASK) ); \
+ break; \
+ case 8: \
+ __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy); \
+ break; \
+ } \
+} while (0)
+#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 1: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"b %"_bx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _by ((_src).val), "i" (EFLAGS_MASK) ); \
+ break; \
+ default: \
+ __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\
+ break; \
+ } \
+} while (0)
+/* Source operand is byte-sized and may be restricted to just %cl. */
+#define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
+ __emulate_2op(_op, _src, _dst, _eflags, \
+ "b", "c", "b", "c", "b", "c", "b", "c")
+/* Source operand is byte, word, long or quad sized. */
+#define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
+ __emulate_2op(_op, _src, _dst, _eflags, \
+ "b", "q", "w", "r", _LO32, "r", "", "r")
+/* Source operand is word, long or quad sized. */
+#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
+ __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
+ "w", "r", _LO32, "r", "", "r")
+
+/* Instruction has only one explicit operand (no source operand). */
+#define emulate_1op(_op,_dst,_eflags) \
+do{ unsigned long _tmp; \
+ switch ( (_dst).bytes ) \
+ { \
+ case 1: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"b %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK) ); \
+ break; \
+ case 2: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"w %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK) ); \
+ break; \
+ case 4: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"l %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK) ); \
+ break; \
+ case 8: \
+ __emulate_1op_8byte(_op, _dst, _eflags); \
+ break; \
+ } \
+} while (0)
+
+/* Emulate an instruction with quadword operands (x86/64 only). */
+#if defined(__x86_64__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
+do{ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","4","2") \
+ _op"q %"_qx"3,%1; " \
+ _POST_EFLAGS("0","4","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _qy ((_src).val), "i" (EFLAGS_MASK) ); \
+} while (0)
+#define __emulate_1op_8byte(_op, _dst, _eflags) \
+do{ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0","3","2") \
+ _op"q %1; " \
+ _POST_EFLAGS("0","3","2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK) ); \
+} while (0)
+#elif defined(__i386__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
+#define __emulate_1op_8byte(_op, _dst, _eflags)
+#endif /* __i386__ */
+
+/* Fetch next part of the instruction being emulated. */
+#define insn_fetch(_type, _size, _eip) \
+({ unsigned long _x; \
+ if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
+ goto done; \
+ (_eip) += (_size); \
+ (_type)_x; \
+})
+
+void *
+decode_register(
+ u8 modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
+{
+ void *p;
+
+ switch ( modrm_reg )
+ {
+ case 0: p = &regs->eax; break;
+ case 1: p = &regs->ecx; break;
+ case 2: p = &regs->edx; break;
+ case 3: p = &regs->ebx; break;
+ case 4: p = (highbyte_regs ?
+ ((unsigned char *)&regs->eax + 1) :
+ (unsigned char *)&regs->esp); break;
+ case 5: p = (highbyte_regs ?
+ ((unsigned char *)&regs->ecx + 1) :
+ (unsigned char *)&regs->ebp); break;
+ case 6: p = (highbyte_regs ?
+ ((unsigned char *)&regs->edx + 1) :
+ (unsigned char *)&regs->esi); break;
+ case 7: p = (highbyte_regs ?
+ ((unsigned char *)&regs->ebx + 1) :
+ (unsigned char *)&regs->edi); break;
+#if defined(__x86_64__)
+ case 8: p = &regs->r8; break;
+ case 9: p = &regs->r9; break;
+ case 10: p = &regs->r10; break;
+ case 11: p = &regs->r11; break;
+ case 12: p = &regs->r12; break;
+ case 13: p = &regs->r13; break;
+ case 14: p = &regs->r14; break;
+ case 15: p = &regs->r15; break;
+#endif
+ default: p = NULL; break;
+ }
+
+ return p;
+}
+
+int
+x86_emulate_memop(
+ struct cpu_user_regs *regs,
+ unsigned long cr2,
+ struct x86_mem_emulator *ops,
+ int mode)
+{
+ u8 b, d, sib, twobyte = 0, rex_prefix = 0;
+ u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+ unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
+ unsigned int lock_prefix = 0, rep_prefix = 0, i;
+ int rc = 0;
+ struct operand src, dst;
+
+ /* Shadow copy of register state. Committed on successful emulation. */
+ struct cpu_user_regs _regs = *regs;
+
+ /* Legacy prefixes. */
+ for ( i = 0; i < 8; i++ )
+ {
+ switch ( b = insn_fetch(u8, 1, _regs.eip) )
+ {
+ case 0x66: /* operand-size override */
+ op_bytes ^= 6; /* switch between 2/4 bytes */
+ break;
+ case 0x67: /* address-size override */
+ ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */
+ break;
+ case 0x2e: /* CS override */
+ case 0x3e: /* DS override */
+ case 0x26: /* ES override */
+ case 0x64: /* FS override */
+ case 0x65: /* GS override */
+ case 0x36: /* SS override */
+ DPRINTF("Warning: ignoring a segment override.\n");
+ break;
+ case 0xf0: /* LOCK */
+ lock_prefix = 1;
+ break;
+ case 0xf3: /* REP/REPE/REPZ */
+ rep_prefix = 1;
+ break;
+ case 0xf2: /* REPNE/REPNZ */
+ break;
+ default:
+ goto done_prefixes;
+ }
+ }
+ done_prefixes:
+
+ if ( ad_bytes == 2 )
+ {
+ DPRINTF("Cannot parse 16-bit effective addresses.\n");
+ goto cannot_emulate;
+ }
+
+ /* REX prefix. */
+ if ( (mode == 8) && ((b & 0xf0) == 0x40) )
+ {
+ rex_prefix = b;
+ if ( b & 8 )
+ op_bytes = 8; /* REX.W */
+ modrm_reg = (b & 4) << 1; /* REX.R */
+ /* REX.B and REX.X do not need to be decoded. */
+ b = insn_fetch(u8, 1, _regs.eip);
+ }
+
+ /* Opcode byte(s). */
+ d = opcode_table[b];
+ if ( d == 0 )
+ {
+ /* Two-byte opcode? */
+ if ( b == 0x0f )
+ {
+ twobyte = 1;
+ b = insn_fetch(u8, 1, _regs.eip);
+ d = twobyte_table[b];
+ }
+
+ /* Unrecognised? */
+ if ( d == 0 )
+ goto cannot_emulate;
+ }
+
+ /* ModRM and SIB bytes. */
+ if ( d & ModRM )
+ {
+ modrm = insn_fetch(u8, 1, _regs.eip);
+ modrm_mod |= (modrm & 0xc0) >> 6;
+ modrm_reg |= (modrm & 0x38) >> 3;
+ modrm_rm |= (modrm & 0x07);
+ switch ( modrm_mod )
+ {
+ case 0:
+ if ( (modrm_rm == 4) &&
+ (((sib = insn_fetch(u8, 1, _regs.eip)) & 7) == 5) )
+ _regs.eip += 4; /* skip disp32 specified by SIB.base */
+ else if ( modrm_rm == 5 )
+ _regs.eip += 4; /* skip disp32 */
+ break;
+ case 1:
+ if ( modrm_rm == 4 )
+ sib = insn_fetch(u8, 1, _regs.eip);
+ _regs.eip += 1; /* skip disp8 */
+ break;
+ case 2:
+ if ( modrm_rm == 4 )
+ sib = insn_fetch(u8, 1, _regs.eip);
+ _regs.eip += 4; /* skip disp32 */
+ break;
+ case 3:
+ DPRINTF("Cannot parse ModRM.mod == 3.\n");
+ goto cannot_emulate;
+ }
+ }
+
+ /* Decode and fetch the destination operand: register or memory. */
+ switch ( d & DstMask )
+ {
+ case ImplicitOps:
+ /* Special instructions do their own operand decoding. */
+ goto special_insn;
+ case DstReg:
+ dst.type = OP_REG;
+ if ( d & ByteOp )
+ {
+ dst.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+ dst.val = *(u8 *)dst.ptr;
+ dst.bytes = 1;
+ }
+ else
+ {
+ dst.ptr = decode_register(modrm_reg, &_regs, 0);
+ switch ( (dst.bytes = op_bytes) )
+ {
+ case 2: dst.val = *(u16 *)dst.ptr; break;
+ case 4: dst.val = *(u32 *)dst.ptr; break;
+ case 8: dst.val = *(u64 *)dst.ptr; break;
+ }
+ }
+ break;
+ case DstMem:
+ dst.type = OP_MEM;
+ dst.ptr = (unsigned long *)cr2;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
+ ((rc = ops->read_emulated((unsigned long)dst.ptr,
+ &dst.val, dst.bytes)) != 0) )
+ goto done;
+ break;
+ }
+ dst.orig_val = dst.val;
+
+ /* Decode and fetch the source operand: register, memory or immediate. */
+ switch ( d & SrcMask )
+ {
+ case SrcNone:
+ break;
+ case SrcReg:
+ src.type = OP_REG;
+ if ( d & ByteOp )
+ {
+ src.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+ src.val = src.orig_val = *(u8 *)src.ptr;
+ src.bytes = 1;
+ }
+ else
+ {
+ src.ptr = decode_register(modrm_reg, &_regs, 0);
+ switch ( (src.bytes = op_bytes) )
+ {
+ case 2: src.val = src.orig_val = *(u16 *)src.ptr; break;
+ case 4: src.val = src.orig_val = *(u32 *)src.ptr; break;
+ case 8: src.val = src.orig_val = *(u64 *)src.ptr; break;
+ }
+ }
+ break;
+ case SrcMem:
+ src.type = OP_MEM;
+ src.ptr = (unsigned long *)cr2;
+ src.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( (rc = ops->read_emulated((unsigned long)src.ptr,
+ &src.val, src.bytes)) != 0 )
+ goto done;
+ src.orig_val = src.val;
+ break;
+ case SrcImm:
+ src.type = OP_IMM;
+ src.ptr = (unsigned long *)_regs.eip;
+ src.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( src.bytes == 8 ) src.bytes = 4;
+ /* NB. Immediates are sign-extended as necessary. */
+ switch ( src.bytes )
+ {
+ case 1: src.val = insn_fetch(s8, 1, _regs.eip); break;
+ case 2: src.val = insn_fetch(s16, 2, _regs.eip); break;
+ case 4: src.val = insn_fetch(s32, 4, _regs.eip); break;
+ }
+ break;
+ case SrcImmByte:
+ src.type = OP_IMM;
+ src.ptr = (unsigned long *)_regs.eip;
+ src.bytes = 1;
+ src.val = insn_fetch(s8, 1, _regs.eip);
+ break;
+ }
+
+ if ( twobyte )
+ goto twobyte_insn;
+
+ switch ( b )
+ {
+ case 0x00 ... 0x05: add: /* add */
+ emulate_2op_SrcV("add", src, dst, _regs.eflags);
+ break;
+ case 0x08 ... 0x0d: or: /* or */
+ emulate_2op_SrcV("or", src, dst, _regs.eflags);
+ break;
+ case 0x10 ... 0x15: adc: /* adc */
+ emulate_2op_SrcV("adc", src, dst, _regs.eflags);
+ break;
+ case 0x18 ... 0x1d: sbb: /* sbb */
+ emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
+ break;
+ case 0x20 ... 0x25: and: /* and */
+ emulate_2op_SrcV("and", src, dst, _regs.eflags);
+ break;
+ case 0x28 ... 0x2d: sub: /* sub */
+ emulate_2op_SrcV("sub", src, dst, _regs.eflags);
+ break;
+ case 0x30 ... 0x35: xor: /* xor */
+ emulate_2op_SrcV("xor", src, dst, _regs.eflags);
+ break;
+ case 0x38 ... 0x3d: cmp: /* cmp */
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ break;
+ case 0x80 ... 0x83: /* Grp1 */
+ switch ( modrm_reg )
+ {
+ case 0: goto add;
+ case 1: goto or;
+ case 2: goto adc;
+ case 3: goto sbb;
+ case 4: goto and;
+ case 5: goto sub;
+ case 6: goto xor;
+ case 7: goto cmp;
+ }
+ break;
+ case 0x84 ... 0x85: test: /* test */
+ emulate_2op_SrcV("test", src, dst, _regs.eflags);
+ break;
+ case 0x86 ... 0x87: /* xchg */
+ /* Write back the register source. */
+ switch ( dst.bytes )
+ {
+ case 1: *(u8 *)src.ptr = (u8)dst.val; break;
+ case 2: *(u16 *)src.ptr = (u16)dst.val; break;
+ case 4: *src.ptr = (u32)dst.val; break; /* 64b mode: zero-extend */
+ case 8: *src.ptr = dst.val; break;
+ }
+ /* Write back the memory destination with implicit LOCK prefix. */
+ dst.val = src.val;
+ lock_prefix = 1;
+ break;
+ case 0xa0 ... 0xa1: /* mov */
+ dst.ptr = (unsigned long *)&_regs.eax;
+ dst.val = src.val;
+ _regs.eip += ad_bytes; /* skip src displacement */
+ break;
+ case 0xa2 ... 0xa3: /* mov */
+ dst.val = (unsigned long)_regs.eax;
+ _regs.eip += ad_bytes; /* skip dst displacement */
+ break;
+ case 0x88 ... 0x8b: /* mov */
+ case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+ dst.val = src.val;
+ break;
+ case 0x8f: /* pop (sole member of Grp1a) */
+ /* 64-bit mode: POP defaults to 64-bit operands. */
+ if ( (mode == 8) && (dst.bytes == 4) )
+ dst.bytes = 8;
+ if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
+ goto done;
+ _regs.esp += dst.bytes;
+ break;
+ case 0xc0 ... 0xc1: grp2: /* Grp2 */
+ switch ( modrm_reg )
+ {
+ case 0: /* rol */
+ emulate_2op_SrcB("rol", src, dst, _regs.eflags);
+ break;
+ case 1: /* ror */
+ emulate_2op_SrcB("ror", src, dst, _regs.eflags);
+ break;
+ case 2: /* rcl */
+ emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
+ break;
+ case 3: /* rcr */
+ emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
+ break;
+ case 4: /* sal/shl */
+ case 6: /* sal/shl */
+ emulate_2op_SrcB("sal", src, dst, _regs.eflags);
+ break;
+ case 5: /* shr */
+ emulate_2op_SrcB("shr", src, dst, _regs.eflags);
+ break;
+ case 7: /* sar */
+ emulate_2op_SrcB("sar", src, dst, _regs.eflags);
+ break;
+ }
+ break;
+ case 0xd0 ... 0xd1: /* Grp2 */
+ src.val = 1;
+ goto grp2;
+ case 0xd2 ... 0xd3: /* Grp2 */
+ src.val = _regs.ecx;
+ goto grp2;
+ case 0xf6 ... 0xf7: /* Grp3 */
+ switch ( modrm_reg )
+ {
+ case 0 ... 1: /* test */
+ /* Special case in Grp3: test has an immediate source operand. */
+ src.type = OP_IMM;
+ src.ptr = (unsigned long *)_regs.eip;
+ src.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( src.bytes == 8 ) src.bytes = 4;
+ switch ( src.bytes )
+ {
+ case 1: src.val = insn_fetch(s8, 1, _regs.eip); break;
+ case 2: src.val = insn_fetch(s16, 2, _regs.eip); break;
+ case 4: src.val = insn_fetch(s32, 4, _regs.eip); break;
+ }
+ goto test;
+ case 2: /* not */
+ dst.val = ~dst.val;
+ break;
+ case 3: /* neg */
+ emulate_1op("neg", dst, _regs.eflags);
+ break;
+ default:
+ goto cannot_emulate;
+ }
+ break;
+ case 0xfe ... 0xff: /* Grp4/Grp5 */
+ switch ( modrm_reg )
+ {
+ case 0: /* inc */
+ emulate_1op("inc", dst, _regs.eflags);
+ break;
+ case 1: /* dec */
+ emulate_1op("dec", dst, _regs.eflags);
+ break;
+ case 6: /* push */
+ /* 64-bit mode: PUSH defaults to 64-bit operands. */
+ if ( (mode == 8) && (dst.bytes == 4) )
+ {
+ dst.bytes = 8;
+ if ( (rc = ops->read_std((unsigned long)dst.ptr,
+ &dst.val, 8)) != 0 )
+ goto done;
+ }
+ _regs.esp -= dst.bytes;
+ if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
+ goto done;
+ dst.val = dst.orig_val; /* skanky: disable writeback */
+ break;
+ default:
+ goto cannot_emulate;
+ }
+ break;
+ }
+
+ writeback:
+ if ( (d & Mov) || (dst.orig_val != dst.val) )
+ {
+ switch ( dst.type )
+ {
+ case OP_REG:
+ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+ switch ( dst.bytes )
+ {
+ case 1: *(u8 *)dst.ptr = (u8)dst.val; break;
+ case 2: *(u16 *)dst.ptr = (u16)dst.val; break;
+ case 4: *dst.ptr = (u32)dst.val; break; /* 64b mode: zero-extend */
+ case 8: *dst.ptr = dst.val; break;
+ }
+ break;
+ case OP_MEM:
+ if ( lock_prefix )
+ rc = ops->cmpxchg_emulated(
+ (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+ else
+ rc = ops->write_emulated(
+ (unsigned long)dst.ptr, dst.val, dst.bytes);
+ if ( rc != 0 )
+ goto done;
+ default:
+ break;
+ }
+ }
+
+ /* Commit shadow register state. */
+ *regs = _regs;
+
+ done:
+ return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+
+ special_insn:
+ if ( twobyte )
+ goto twobyte_special_insn;
+ if ( rep_prefix )
+ {
+ if ( _regs.ecx == 0 )
+ {
+ regs->eip = _regs.eip;
+ goto done;
+ }
+ _regs.ecx--;
+ _regs.eip = regs->eip;
+ }
+ switch ( b )
+ {
+ case 0xa4 ... 0xa5: /* movs */
+ dst.type = OP_MEM;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ if ( _regs.error_code & 2 )
+ {
+ /* Write fault: destination is special memory. */
+ dst.ptr = (unsigned long *)cr2;
+ if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2,
+ &dst.val, dst.bytes)) != 0 )
+ goto done;
+ }
+ else
+ {
+ /* Read fault: source is special memory. */
+ dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
+ if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ goto done;
+ }
+ _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ break;
+ case 0xa6 ... 0xa7: /* cmps */
+ DPRINTF("Urk! I don't handle CMPS.\n");
+ goto cannot_emulate;
+ case 0xaa ... 0xab: /* stos */
+ dst.type = OP_MEM;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.ptr = (unsigned long *)cr2;
+ dst.val = _regs.eax;
+ _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ break;
+ case 0xac ... 0xad: /* lods */
+ dst.type = OP_REG;
+ dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+ dst.ptr = (unsigned long *)&_regs.eax;
+ if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+ goto done;
+ _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
+ break;
+ case 0xae ... 0xaf: /* scas */
+ DPRINTF("Urk! I don't handle SCAS.\n");
+ goto cannot_emulate;
+ }
+ goto writeback;
+
+ twobyte_insn:
+ switch ( b )
+ {
+ case 0x40 ... 0x4f: /* cmov */
+ dst.val = dst.orig_val = src.val;
+ d &= ~Mov; /* default to no move */
+ /* First, assume we're decoding an even cmov opcode (lsb == 0). */
+ switch ( (b & 15) >> 1 )
+ {
+ case 0: /* cmovo */
+ d |= (_regs.eflags & EFLG_OF) ? Mov : 0;
+ break;
+ case 1: /* cmovb/cmovc/cmovnae */
+ d |= (_regs.eflags & EFLG_CF) ? Mov : 0;
+ break;
+ case 2: /* cmovz/cmove */
+ d |= (_regs.eflags & EFLG_ZF) ? Mov : 0;
+ break;
+ case 3: /* cmovbe/cmovna */
+ d |= (_regs.eflags & (EFLG_CF|EFLG_ZF)) ? Mov : 0;
+ break;
+ case 4: /* cmovs */
+ d |= (_regs.eflags & EFLG_SF) ? Mov : 0;
+ break;
+ case 5: /* cmovp/cmovpe */
+ d |= (_regs.eflags & EFLG_PF) ? Mov : 0;
+ break;
+ case 7: /* cmovle/cmovng */
+ d |= (_regs.eflags & EFLG_ZF) ? Mov : 0;
+ /* fall through */
+ case 6: /* cmovl/cmovnge */
+ d |= (!(_regs.eflags & EFLG_SF) != !(_regs.eflags & EFLG_OF)) ?
+ Mov : 0;
+ break;
+ }
+ /* Odd cmov opcodes (lsb == 1) have inverted sense. */
+ d ^= (b & 1) ? Mov : 0;
+ break;
+ case 0xb0 ... 0xb1: /* cmpxchg */
+ /* Save real source value, then compare EAX against destination. */
+ src.orig_val = src.val;
+ src.val = _regs.eax;
+ emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ /* Always write back. The question is: where to? */
+ d |= Mov;
+ if ( _regs.eflags & EFLG_ZF )
+ {
+ /* Success: write back to memory. */
+ dst.val = src.orig_val;
+ }
+ else
+ {
+ /* Failure: write the value we saw to EAX. */
+ dst.type = OP_REG;
+ dst.ptr = (unsigned long *)&_regs.eax;
+ }
+ break;
+ case 0xa3: bt: /* bt */
+ src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+ emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
+ break;
+ case 0xb3: btr: /* btr */
+ src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+ emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
+ break;
+ case 0xab: bts: /* bts */
+ src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+ emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
+ break;
+ case 0xbb: btc: /* btc */
+ src.val &= (dst.bytes << 3) - 1; /* only subword offset */
+ emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
+ break;
+ case 0xba: /* Grp8 */
+ switch ( modrm_reg & 3 )
+ {
+ case 0: goto bt;
+ case 1: goto bts;
+ case 2: goto btr;
+ case 3: goto btc;
+ }
+ break;
+ }
+ goto writeback;
+
+ twobyte_special_insn:
+ /* Disable writeback. */
+ dst.orig_val = dst.val;
+ switch ( b )
+ {
+ case 0x0d: /* GrpP (prefetch) */
+ case 0x18: /* Grp16 (prefetch/nop) */
+ break;
+ case 0xc7: /* Grp9 (cmpxchg8b) */
+#if defined(__i386__)
+ {
+ unsigned long old_lo, old_hi;
+ if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
+ ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+ goto done;
+ if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
+ {
+ _regs.eax = old_lo;
+ _regs.edx = old_hi;
+ _regs.eflags &= ~EFLG_ZF;
+ }
+ else if ( ops->cmpxchg8b_emulated == NULL )
+ {
+ rc = X86EMUL_UNHANDLEABLE;
+ goto done;
+ }
+ else
+ {
+ if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
+ _regs.ebx, _regs.ecx)) != 0 )
+ goto done;
+ _regs.eflags |= EFLG_ZF;
+ }
+ break;
+ }
+#elif defined(__x86_64__)
+ {
+ unsigned long old, new;
+ if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+ goto done;
+ if ( ((u32)(old>>0) != (u32)_regs.eax) ||
+ ((u32)(old>>32) != (u32)_regs.edx) )
+ {
+ _regs.eax = (u32)(old>>0);
+ _regs.edx = (u32)(old>>32);
+ _regs.eflags &= ~EFLG_ZF;
+ }
+ else
+ {
+ new = (_regs.ecx<<32)|(u32)_regs.ebx;
+ if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+ goto done;
+ _regs.eflags |= EFLG_ZF;
+ }
+ break;
+ }
+#endif
+ }
+ goto writeback;
+
+ cannot_emulate:
+ DPRINTF("Cannot emulate %02x\n", b);
+ return -1;
+}
+
+#ifndef __TEST_HARNESS__
+
+#include <asm/mm.h>
+#include <asm/uaccess.h>
+
+int
+x86_emulate_read_std(
+ unsigned long addr,
+ unsigned long *val,
+ unsigned int bytes)
+{
+ *val = 0;
+ if ( copy_from_user((void *)val, (void *)addr, bytes) )
+ {
+ propagate_page_fault(addr, 4); /* user mode, read fault */
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+int
+x86_emulate_write_std(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes)
+{
+ if ( copy_to_user((void *)addr, (void *)&val, bytes) )
+ {
+ propagate_page_fault(addr, 6); /* user mode, write fault */
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+#endif
diff --git a/xen/common/Makefile b/xen/common/Makefile
index 1ae6148be2..ee312fde92 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -4,13 +4,6 @@ include $(BASEDIR)/Rules.mk
ifeq ($(TARGET_ARCH),ia64)
OBJS := $(subst dom_mem_ops.o,,$(OBJS))
OBJS := $(subst grant_table.o,,$(OBJS))
-OBJS := $(subst page_alloc.o,,$(OBJS))
-OBJS := $(subst slab.o,,$(OBJS))
-endif
-
-ifneq ($(debugger),y)
-OBJS := $(subst debug.o,,$(OBJS))
-OBJS := $(subst debug-linux.o,,$(OBJS))
endif
ifneq ($(perfc),y)
diff --git a/xen/common/ac_timer.c b/xen/common/ac_timer.c
index a33498090b..8dac1867f4 100644
--- a/xen/common/ac_timer.c
+++ b/xen/common/ac_timer.c
@@ -1,15 +1,8 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
- * (C) 2002-2003 University of Cambridge
- ****************************************************************************
- *
- * File: ac_timer.c
- * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
- * Keir Fraser (kaf24@cl.cam.ac.uk)
- *
- * Environment: Xen Hypervisor
- * Description: Accurate timer for the Hypervisor
+/******************************************************************************
+ * ac_timer.c
+ *
+ * Copyright (c) 2002-2003 Rolf Neugebauer
+ * Copyright (c) 2002-2005 K A Fraser
*/
#include <xen/config.h>
@@ -33,16 +26,15 @@
*/
#define TIMER_SLOP (50*1000) /* ns */
-#define DEFAULT_HEAP_LIMIT 127
-
-/* A timer list per CPU */
-typedef struct ac_timers_st
-{
+struct ac_timers {
spinlock_t lock;
struct ac_timer **heap;
-} __cacheline_aligned ac_timers_t;
-static ac_timers_t ac_timers[NR_CPUS];
+ unsigned int softirqs;
+} __cacheline_aligned;
+
+struct ac_timers ac_timers[NR_CPUS];
+extern int reprogram_ac_timer(s_time_t timeout);
/****************************************************************************
* HEAP OPERATIONS.
@@ -122,23 +114,24 @@ static int remove_entry(struct ac_timer **heap, struct ac_timer *t)
/* Add new entry @t to @heap. Return TRUE if new top of heap. */
-static int add_entry(struct ac_timer **heap, struct ac_timer *t)
+static int add_entry(struct ac_timer ***pheap, struct ac_timer *t)
{
+ struct ac_timer **heap = *pheap;
int sz = GET_HEAP_SIZE(heap);
/* Copy the heap if it is full. */
if ( unlikely(sz == GET_HEAP_LIMIT(heap)) )
{
- int i, limit = (GET_HEAP_LIMIT(heap)+1) << 1;
- struct ac_timer **new_heap = xmalloc(limit*sizeof(struct ac_timer *));
- if ( new_heap == NULL ) BUG();
- memcpy(new_heap, heap, (limit>>1)*sizeof(struct ac_timer *));
- for ( i = 0; i < smp_num_cpus; i++ )
- if ( ac_timers[i].heap == heap )
- ac_timers[i].heap = new_heap;
- xfree(heap);
- heap = new_heap;
- SET_HEAP_LIMIT(heap, limit-1);
+ /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
+ int old_limit = GET_HEAP_LIMIT(heap);
+ int new_limit = ((old_limit + 1) << 4) - 1;
+ heap = xmalloc_array(struct ac_timer *, new_limit + 1);
+ BUG_ON(heap == NULL);
+ memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap));
+ SET_HEAP_LIMIT(heap, new_limit);
+ if ( old_limit != 0 )
+ xfree(*pheap);
+ *pheap = heap;
}
SET_HEAP_SIZE(heap, ++sz);
@@ -156,22 +149,10 @@ static int add_entry(struct ac_timer **heap, struct ac_timer *t)
static inline void __add_ac_timer(struct ac_timer *timer)
{
int cpu = timer->cpu;
- if ( add_entry(ac_timers[cpu].heap, timer) )
+ if ( add_entry(&ac_timers[cpu].heap, timer) )
cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
}
-void add_ac_timer(struct ac_timer *timer)
-{
- int cpu = timer->cpu;
- unsigned long flags;
-
- spin_lock_irqsave(&ac_timers[cpu].lock, flags);
- ASSERT(timer != NULL);
- ASSERT(!active_ac_timer(timer));
- __add_ac_timer(timer);
- spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
-}
-
static inline void __rem_ac_timer(struct ac_timer *timer)
{
@@ -180,7 +161,8 @@ static inline void __rem_ac_timer(struct ac_timer *timer)
cpu_raise_softirq(cpu, AC_TIMER_SOFTIRQ);
}
-void rem_ac_timer(struct ac_timer *timer)
+
+void set_ac_timer(struct ac_timer *timer, s_time_t expires)
{
int cpu = timer->cpu;
unsigned long flags;
@@ -189,11 +171,13 @@ void rem_ac_timer(struct ac_timer *timer)
ASSERT(timer != NULL);
if ( active_ac_timer(timer) )
__rem_ac_timer(timer);
+ timer->expires = expires;
+ __add_ac_timer(timer);
spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
}
-void mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
+void rem_ac_timer(struct ac_timer *timer)
{
int cpu = timer->cpu;
unsigned long flags;
@@ -202,8 +186,6 @@ void mod_ac_timer(struct ac_timer *timer, s_time_t new_time)
ASSERT(timer != NULL);
if ( active_ac_timer(timer) )
__rem_ac_timer(timer);
- timer->expires = new_time;
- __add_ac_timer(timer);
spin_unlock_irqrestore(&ac_timers[cpu].lock, flags);
}
@@ -213,7 +195,7 @@ static void ac_timer_softirq_action(void)
int cpu = smp_processor_id();
struct ac_timer *t, **heap;
s_time_t now;
- void (*fn)(unsigned long);
+ void (*fn)(void *);
spin_lock_irq(&ac_timers[cpu].lock);
@@ -228,7 +210,7 @@ static void ac_timer_softirq_action(void)
if ( (fn = t->function) != NULL )
{
- unsigned long data = t->data;
+ void *data = t->data;
spin_unlock_irq(&ac_timers[cpu].lock);
(*fn)(data);
spin_lock_irq(&ac_timers[cpu].lock);
@@ -254,14 +236,14 @@ static void dump_timerq(unsigned char key)
printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
(u32)(now>>32), (u32)now);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu( i )
{
printk("CPU[%02d] ", i);
spin_lock_irqsave(&ac_timers[i].lock, flags);
for ( j = 1; j <= GET_HEAP_SIZE(ac_timers[i].heap); j++ )
{
t = ac_timers[i].heap[j];
- printk (" %d : %p ex=0x%08X%08X %lu\n",
+ printk (" %d : %p ex=0x%08X%08X %p\n",
j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
}
spin_unlock_irqrestore(&ac_timers[i].lock, flags);
@@ -272,19 +254,33 @@ static void dump_timerq(unsigned char key)
void __init ac_timer_init(void)
{
+ static struct ac_timer *dummy_heap;
int i;
open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
- for ( i = 0; i < smp_num_cpus; i++ )
+ /*
+ * All CPUs initially share an empty dummy heap. Only those CPUs that
+ * are brought online will be dynamically allocated their own heap.
+ */
+ SET_HEAP_SIZE(&dummy_heap, 0);
+ SET_HEAP_LIMIT(&dummy_heap, 0);
+
+ for ( i = 0; i < NR_CPUS; i++ )
{
- ac_timers[i].heap = xmalloc(
- (DEFAULT_HEAP_LIMIT+1) * sizeof(struct ac_timer *));
- if ( ac_timers[i].heap == NULL ) BUG();
- SET_HEAP_SIZE(ac_timers[i].heap, 0);
- SET_HEAP_LIMIT(ac_timers[i].heap, DEFAULT_HEAP_LIMIT);
spin_lock_init(&ac_timers[i].lock);
+ ac_timers[i].heap = &dummy_heap;
}
register_keyhandler('a', dump_timerq, "dump ac_timer queues");
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/bitmap.c b/xen/common/bitmap.c
new file mode 100644
index 0000000000..d931eca83c
--- /dev/null
+++ b/xen/common/bitmap.c
@@ -0,0 +1,365 @@
+/*
+ * lib/bitmap.c
+ * Helper functions for bitmap.h.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/bitmap.h>
+#include <xen/bitops.h>
+#include <asm/uaccess.h>
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures. See the big-endian headers
+ * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h
+ * for the best explanations of this ordering.
+ */
+
+int __bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_empty);
+
+int __bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (~bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_full);
+
+int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] != bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_equal);
+
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ dst[k] = ~src[k];
+
+ if (bits % BITS_PER_LONG)
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+}
+EXPORT_SYMBOL(__bitmap_complement);
+
+/*
+ * __bitmap_shift_right - logical right shift of the bits in a bitmap
+ * @dst - destination bitmap
+ * @src - source bitmap
+ * @nbits - shift by this many bits
+ * @bits - bitmap size, in bits
+ *
+ * Shifting right (dividing) means moving bits in the MS -> LS bit
+ * direction. Zeros are fed into the vacated MS positions and the
+ * LS bits shifted off the bottom are lost.
+ */
+void __bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ unsigned long mask = (1UL << left) - 1;
+ for (k = 0; off + k < lim; ++k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take lower rem bits of
+ * word above and make them the top rem bits of result.
+ */
+ if (!rem || off + k + 1 >= lim)
+ upper = 0;
+ else {
+ upper = src[off + k + 1];
+ if (off + k + 1 == lim - 1 && left)
+ upper &= mask;
+ }
+ lower = src[off + k];
+ if (left && off + k == lim - 1)
+ lower &= mask;
+ dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
+ if (left && k == lim - 1)
+ dst[k] &= mask;
+ }
+ if (off)
+ memset(&dst[lim - off], 0, off*sizeof(unsigned long));
+}
+EXPORT_SYMBOL(__bitmap_shift_right);
+
+
+/*
+ * __bitmap_shift_left - logical left shift of the bits in a bitmap
+ * @dst - destination bitmap
+ * @src - source bitmap
+ * @nbits - shift by this many bits
+ * @bits - bitmap size, in bits
+ *
+ * Shifting left (multiplying) means moving bits in the LS -> MS
+ * direction. Zeros are fed into the vacated LS bit positions
+ * and those MS bits shifted off the top are lost.
+ */
+
+void __bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ for (k = lim - off - 1; k >= 0; --k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take upper rem bits of
+ * word below and make them the bottom rem bits of result.
+ */
+ if (rem && k > 0)
+ lower = src[k - 1];
+ else
+ lower = 0;
+ upper = src[k];
+ if (left && k == lim - 1)
+ upper &= (1UL << left) - 1;
+ dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem;
+ if (left && k + off == lim - 1)
+ dst[k + off] &= (1UL << left) - 1;
+ }
+ if (off)
+ memset(dst, 0, off*sizeof(unsigned long));
+}
+EXPORT_SYMBOL(__bitmap_shift_left);
+
+void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] & bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_and);
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_or);
+
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_xor);
+
+void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] & ~bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_andnot);
+
+int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & bitmap2[k])
+ return 1;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(__bitmap_intersects);
+
+int __bitmap_subset(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & ~bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_subset);
+
+#if BITS_PER_LONG == 32
+int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight32(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight32(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+ return w;
+}
+#else
+int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight64(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight64(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+ return w;
+}
+#endif
+EXPORT_SYMBOL(__bitmap_weight);
+
+/**
+ * bitmap_find_free_region - find a contiguous aligned mem region
+ * @bitmap: an array of unsigned longs corresponding to the bitmap
+ * @bits: number of bits in the bitmap
+ * @order: region size to find (size is actually 1<<order)
+ *
+ * This is used to allocate a memory region from a bitmap. The idea is
+ * that the region has to be 1<<order sized and 1<<order aligned (this
+ * makes the search algorithm much faster).
+ *
+ * The region is marked as set bits in the bitmap if a free one is
+ * found.
+ *
+ * Returns either beginning of region or negative error
+ */
+int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+{
+ unsigned long mask;
+ int pages = 1 << order;
+ int i;
+
+ if(pages > BITS_PER_LONG)
+ return -EINVAL;
+
+ /* make a mask of the order */
+ mask = (1ul << (pages - 1));
+ mask += mask - 1;
+
+ /* run up the bitmap pages bits at a time */
+ for (i = 0; i < bits; i += pages) {
+ int index = i/BITS_PER_LONG;
+ int offset = i - (index * BITS_PER_LONG);
+ if((bitmap[index] & (mask << offset)) == 0) {
+ /* set region in bimap */
+ bitmap[index] |= (mask << offset);
+ return i;
+ }
+ }
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(bitmap_find_free_region);
+
+/**
+ * bitmap_release_region - release allocated bitmap region
+ * @bitmap: a pointer to the bitmap
+ * @pos: the beginning of the region
+ * @order: the order of the bits to release (number is 1<<order)
+ *
+ * This is the complement to __bitmap_find_free_region and releases
+ * the found region (by clearing it in the bitmap).
+ */
+void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+{
+ int pages = 1 << order;
+ unsigned long mask = (1ul << (pages - 1));
+ int index = pos/BITS_PER_LONG;
+ int offset = pos - (index * BITS_PER_LONG);
+ mask += mask - 1;
+ bitmap[index] &= ~(mask << offset);
+}
+EXPORT_SYMBOL(bitmap_release_region);
+
+int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+{
+ int pages = 1 << order;
+ unsigned long mask = (1ul << (pages - 1));
+ int index = pos/BITS_PER_LONG;
+ int offset = pos - (index * BITS_PER_LONG);
+
+ /* We don't do regions of pages > BITS_PER_LONG. The
+ * algorithm would be a simple look for multiple zeros in the
+ * array, but there's no driver today that needs this. If you
+ * trip this BUG(), you get to code it... */
+ BUG_ON(pages > BITS_PER_LONG);
+ mask += mask - 1;
+ if (bitmap[index] & (mask << offset))
+ return -EBUSY;
+ bitmap[index] |= (mask << offset);
+ return 0;
+}
+EXPORT_SYMBOL(bitmap_allocate_region);
diff --git a/xen/common/debug-linux.c b/xen/common/debug-linux.c
deleted file mode 100644
index 03c4995eb1..0000000000
--- a/xen/common/debug-linux.c
+++ /dev/null
@@ -1,267 +0,0 @@
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- *
- * linux specific pdb stuff
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <public/dom0_ops.h>
-#include <asm/pdb.h>
-
-/* from linux/sched.h */
-#define PIDHASH_SZ (4096 >> 2)
-#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
-
-/* from asm-xen/pgtable-2level.h */
-#define PGDIR_SHIFT 22
-#define PTRS_PER_PGD 1024
-
-/* from asm-xen/page.h */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define __PAGE_OFFSET (0xC0000000)
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-
-/* from debug.h */
-#define ENTRIES_PER_L1_PAGETABLE 1024
-#define L1_PAGE_BITS ( (ENTRIES_PER_L1_PAGETABLE - 1) << PAGE_SHIFT )
-
-void pdb_linux_process_details (unsigned long cr3, int pid, char *buffer);
-
-/* adapted from asm-xen/page.h */
-static inline unsigned long machine_to_phys(unsigned long cr3,
- unsigned long machine)
-{
- unsigned long phys;
- pdb_get_values((u_char *) &phys, sizeof(phys), cr3,
- (unsigned long) machine_to_phys_mapping +
- (machine >> PAGE_SHIFT) * 4);
- phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
- return phys;
-}
-
-unsigned long pdb_pidhash_addr = 0xc01971e0UL;
-unsigned long pdb_init_task_union_addr = 0xc0182000UL;
-
-
-unsigned int task_struct_mm_offset = 0x2c;
-unsigned int task_struct_next_task_offset = 0x48;
-unsigned int task_struct_pid_offset = 0x7c;
-unsigned int task_struct_pidhash_next_offset = 0xb0;
-unsigned int task_struct_comm_offset = 0x23e;
-unsigned int task_struct_comm_length = 0x10;
-
-unsigned int mm_struct_pgd_offset = 0x0c;
-
-/*
- * find the task structure of a process (pid)
- * given the cr3 of the guest os.
- */
-unsigned long pdb_linux_pid_task_struct (unsigned long cr3, int pid)
-{
- unsigned long task_struct_p = (unsigned long) NULL;
- unsigned long task_struct_pid;
-
- /* find the task_struct of the given process */
- pdb_get_values((u_char *) &task_struct_p, sizeof(task_struct_p),
- cr3, pdb_pidhash_addr + pid_hashfn(pid) * 4);
-
- /* find the correct task struct */
- while (task_struct_p != (unsigned long)NULL)
- {
- pdb_get_values((u_char *) &task_struct_pid, sizeof(task_struct_pid),
- cr3, task_struct_p + task_struct_pid_offset);
- if (task_struct_pid == pid)
- {
- break;
- }
-
- pdb_get_values((u_char *) &task_struct_p, sizeof(task_struct_p),
- cr3, task_struct_p + task_struct_pidhash_next_offset);
- }
- if (task_struct_p == (unsigned long) NULL)
- {
- /* oops */
- printk ("pdb error: couldn't find process 0x%x (0x%lx)\n", pid, cr3);
- }
-
- return task_struct_p;
-}
-
-/*
- * find the ptbr of a process (pid)
- * given the cr3 of the guest os.
- */
-unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid)
-{
- unsigned long task_struct_p;
- unsigned long mm_p, pgd;
-
- task_struct_p = pdb_linux_pid_task_struct(cr3, pid);
- if (task_struct_p == (unsigned long) NULL)
- {
- return (unsigned long) NULL;
- }
-
- /* get the mm_struct within the task_struct */
- pdb_get_values((u_char *) &mm_p, sizeof(mm_p),
- cr3, task_struct_p + task_struct_mm_offset);
- /* get the page global directory (cr3) within the mm_struct */
- pdb_get_values((u_char *) &pgd, sizeof(pgd),
- cr3, mm_p + mm_struct_pgd_offset);
-
- return pgd;
-}
-
-
-
-/* read a byte from a process
- *
- * in: pid: process id
- * cr3: ptbr for the process' domain
- * addr: address to read
- */
-
-u_char pdb_linux_get_value(int pid, unsigned long cr3, unsigned long addr)
-{
- u_char result = 0;
- unsigned long pgd;
- unsigned long l2tab, page;
-
- /* get the process' pgd */
- pgd = pdb_linux_pid_ptbr(cr3, pid);
-
- /* get the l2 table entry */
- pdb_get_values((u_char *) &l2tab, sizeof(l2tab),
- cr3, pgd + (addr >> PGDIR_SHIFT) * 4);
- l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK);
-
- /* get the page table entry */
- pdb_get_values((u_char *) &page, sizeof(page),
- cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4);
- page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK);
-
- /* get the byte */
- pdb_get_values((u_char *) &result, sizeof(result),
- cr3, page + (addr & ~PAGE_MASK));
-
- return result;
-}
-
-void pdb_linux_get_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3)
-{
- int loop;
-
- /* yes, this can be optimized... a lot */
- for (loop = 0; loop < length; loop++)
- {
- buffer[loop] = pdb_linux_get_value(pid, cr3, address + loop);
- }
-}
-
-
-void pdb_linux_set_value(int pid, unsigned long cr3, unsigned long addr,
- u_char *value)
-{
- unsigned long pgd;
- unsigned long l2tab, page;
-
- /* get the process' pgd */
- pgd = pdb_linux_pid_ptbr(cr3, pid);
-
- /* get the l2 table entry */
- pdb_get_values((u_char *) &l2tab, sizeof(l2tab),
- cr3, pgd + (addr >> PGDIR_SHIFT) * 4);
- l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK);
-
- /* get the page table entry */
- pdb_get_values((u_char *) &page, sizeof(page),
- cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4);
- page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK);
-
- /* set the byte */
- pdb_set_values(value, sizeof(u_char), cr3, page + (addr & ~PAGE_MASK));
-}
-
-void pdb_linux_set_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3)
-{
- int loop;
-
- /* it's difficult to imagine a more inefficient algorithm */
- for (loop = 0; loop < length; loop++)
- {
- pdb_linux_set_value(pid, cr3, address + loop, &buffer[loop * 2]);
- }
-}
-
-/**********************************************************************/
-
-/*
- * return 1 if is the virtual address is in the operating system's
- * address space, else 0
- */
-int pdb_linux_address_space (unsigned long addr)
-{
- return (addr > PAGE_OFFSET);
-}
-
-/* get a list of at most "max" processes
- * return: number of threads found
- *
- * init_task -> init_task_union.task
- * while (next_task != init_task) {}
- */
-int pdb_linux_process_list (unsigned long cr3, int array[], int max)
-{
- unsigned long task_p, next_p;
- int pid;
- int count = 0;
-
- /* task_p = init_task->next_task */
- pdb_get_values((u_char *) &task_p, sizeof(task_p),
- cr3, pdb_init_task_union_addr + task_struct_next_task_offset);
-
- while (task_p != pdb_init_task_union_addr)
- {
- pdb_get_values((u_char *) &pid, sizeof(pid),
- cr3, task_p + task_struct_pid_offset);
-
- array[count % max] = pid;
- count++;
-
- pdb_get_values((u_char *) &next_p, sizeof(next_p),
- cr3, task_p + task_struct_next_task_offset);
- task_p = next_p;
- }
-
- return count;
-}
-
-/*
- * get additional details about a particular process
- */
-void pdb_linux_process_details (unsigned long cr3, int pid, char *buffer)
-{
- unsigned long task_struct_p;
-
- task_struct_p = pdb_linux_pid_task_struct(cr3, pid);
-
- pdb_get_values((u_char *) buffer, task_struct_comm_length,
- cr3, task_struct_p + task_struct_comm_offset);
- return;
-}
-
diff --git a/xen/common/debug.c b/xen/common/debug.c
deleted file mode 100644
index fa99d6bd8f..0000000000
--- a/xen/common/debug.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * debug.c
- *
- * xen pervasive debugger
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <public/dom0_ops.h>
-#include <xen/sched.h>
-#include <xen/event.h>
-#include <asm/page.h>
-#include <asm/pdb.h>
-#include <asm/shadow.h>
-
-#undef DEBUG_TRACE
-#ifdef DEBUG_TRACE
-#define TRC(_x) _x
-#else
-#define TRC(_x)
-#endif
-
-/****************************************************************************/
-
-extern u_char pdb_linux_get_value(int pid, unsigned long cr3,
- unsigned long addr);
-
-/*
- * interactively call pervasive debugger from a privileged domain
- */
-void pdb_do_debug (dom0_op_t *op)
-{
- op->u.debug.status = 0;
-
- TRC(printk("PDB: op:%c, dom:%llu, in1:%x, in2:%x, in3:%x, in4:%x\n",
- op->u.debug.opcode, op->u.debug.domain,
- op->u.debug.in1, op->u.debug.in2,
- op->u.debug.in3, op->u.debug.in4));
-
- /* NOT NOW
- if (op->u.debug.domain == 0)
- {
- op->u.debug.status = 1;
- return;
- }
- */
-
- switch (op->u.debug.opcode)
- {
- case 'c' :
- {
- struct domain *d = find_domain_by_id(op->u.debug.domain);
- if ( d != NULL )
- {
- domain_unpause_by_systemcontroller(d);
- put_domain(d);
- }
- else
- {
- op->u.debug.status = 2; /* invalid domain */
- }
- break;
- }
- case 'r' :
- {
- int loop;
- u_char x;
- unsigned long cr3;
- struct domain *d;
-
- d = find_domain_by_id(op->u.debug.domain);
- if ( shadow_mode(d) )
- cr3 = pagetable_val(d->mm.shadow_table);
- else
- cr3 = pagetable_val(d->mm.pagetable);
-
- for (loop = 0; loop < op->u.debug.in2; loop++) /* length */
- {
- if (loop % 8 == 0)
- {
- printk ("\n%08x ", op->u.debug.in1 + loop);
- }
- x = pdb_linux_get_value(op->u.debug.in3,
- cr3, op->u.debug.in1 + loop);
- printk (" %02x", x);
- }
- printk ("\n");
- put_domain(d);
- break;
- }
- case 's' :
- {
- struct domain *d = find_domain_by_id(op->u.debug.domain);
-
- if ( d != NULL )
- {
- domain_pause_by_systemcontroller(d);
- put_domain(d);
- }
- else
- {
- op->u.debug.status = 2; /* invalid domain */
- }
- break;
- }
- default :
- {
- printk("PDB error: unknown debug opcode %c (0x%x)\n",
- op->u.debug.opcode, op->u.debug.opcode);
- }
- }
-}
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index 3af35e39e0..216af3854a 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -10,23 +10,19 @@
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/mm.h>
-#include <public/dom0_ops.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/event.h>
-#include <asm/domain_page.h>
-#include <asm/pdb.h>
+#include <xen/domain_page.h>
#include <xen/trace.h>
#include <xen/console.h>
-#include <asm/shadow.h>
+#include <asm/current.h>
+#include <public/dom0_ops.h>
#include <public/sched_ctl.h>
-#define TRC_DOM0OP_ENTER_BASE 0x00020000
-#define TRC_DOM0OP_LEAVE_BASE 0x00030000
-
-extern unsigned int alloc_new_dom_mem(struct domain *, unsigned int);
extern long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op);
extern void arch_getdomaininfo_ctxt(
- struct domain *, full_execution_context_t *);
+ struct vcpu *, struct vcpu_guest_context *);
static inline int is_free_domid(domid_t dom)
{
@@ -96,7 +92,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
long ret = 0;
dom0_op_t curop, *op = &curop;
- if ( !IS_PRIV(current) )
+ if ( !IS_PRIV(current->domain) )
return -EPERM;
if ( copy_from_user(op, u_dom0_op, sizeof(*op)) )
@@ -105,20 +101,16 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
if ( op->interface_version != DOM0_INTERFACE_VERSION )
return -EACCES;
- TRACE_5D(TRC_DOM0OP_ENTER_BASE + op->cmd,
- 0, op->u.dummy[0], op->u.dummy[1],
- op->u.dummy[2], op->u.dummy[3] );
-
switch ( op->cmd )
{
- case DOM0_BUILDDOMAIN:
+ case DOM0_SETDOMAININFO:
{
- struct domain *d = find_domain_by_id(op->u.builddomain.domain);
- ret = -EINVAL;
+ struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
+ ret = -ESRCH;
if ( d != NULL )
{
- ret = final_setup_guestos(d, &op->u.builddomain);
+ ret = set_info_guest(d, &op->u.setdomaininfo);
put_domain(d);
}
}
@@ -131,7 +123,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
if ( d != NULL )
{
ret = -EINVAL;
- if ( d != current )
+ if ( d != current->domain )
{
domain_pause_by_systemcontroller(d);
ret = 0;
@@ -148,7 +140,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
if ( d != NULL )
{
ret = -EINVAL;
- if ( test_bit(DF_CONSTRUCTED, &d->flags) )
+ if ( (d != current->domain) &&
+ test_bit(_DOMF_constructed, &d->domain_flags) )
{
domain_unpause_by_systemcontroller(d);
ret = 0;
@@ -163,6 +156,9 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
struct domain *d;
unsigned int pro;
domid_t dom;
+ struct vcpu *v;
+ unsigned int i, cnt[NR_CPUS] = { 0 };
+
dom = op->u.createdomain.domain;
if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
@@ -172,46 +168,34 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
break;
}
else if ( (ret = allocate_domid(&dom)) != 0 )
- break;
-
- if ( op->u.createdomain.cpu == -1 )
{
- /* Do an initial placement. Pick the least-populated CPU. */
- struct domain *d;
- unsigned int i, ht, cnt[NR_CPUS] = { 0 };
-
- read_lock(&domlist_lock);
- for_each_domain ( d )
- cnt[d->processor]++;
- read_unlock(&domlist_lock);
-
- /* If we're on a HT system, we only use the first HT for dom0,
- other domains will all share the second HT of each CPU.
- Since dom0 is on CPU 0, we favour high numbered CPUs in
- the event of a tie */
- ht = opt_noht ? 1 : ht_per_core;
- pro = ht-1;
- for ( i = pro; i < smp_num_cpus; i += ht )
- if ( cnt[i] <= cnt[pro] )
- pro = i;
+ break;
}
- else
- pro = op->u.createdomain.cpu % smp_num_cpus;
+
+ /* Do an initial CPU placement. Pick the least-populated CPU. */
+ read_lock(&domlist_lock);
+ for_each_domain ( d )
+ for_each_vcpu ( d, v )
+ cnt[v->processor]++;
+ read_unlock(&domlist_lock);
+
+ /*
+ * If we're on a HT system, we only use the first HT for dom0, other
+ * domains will all share the second HT of each CPU. Since dom0 is on
+ * CPU 0, we favour high numbered CPUs in the event of a tie.
+ */
+ pro = ht_per_core - 1;
+ for ( i = pro; i < num_online_cpus(); i += ht_per_core )
+ if ( cnt[i] <= cnt[pro] )
+ pro = i;
ret = -ENOMEM;
if ( (d = do_createdomain(dom, pro)) == NULL )
break;
- ret = alloc_new_dom_mem(d, op->u.createdomain.memory_kb);
- if ( ret != 0 )
- {
- domain_kill(d);
- break;
- }
-
ret = 0;
- op->u.createdomain.domain = d->id;
+ op->u.createdomain.domain = d->domain_id;
copy_to_user(u_dom0_op, op, sizeof(*op));
}
break;
@@ -223,7 +207,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
if ( d != NULL )
{
ret = -EINVAL;
- if ( d != current )
+ if ( d != current->domain )
{
domain_kill(d);
ret = 0;
@@ -237,7 +221,9 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
{
domid_t dom = op->u.pincpudomain.domain;
struct domain *d = find_domain_by_id(dom);
- int cpu = op->u.pincpudomain.cpu;
+ struct vcpu *v;
+ cpumap_t cpumap;
+
if ( d == NULL )
{
@@ -245,26 +231,53 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
break;
}
- if ( d == current )
+ if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) ||
+ !d->vcpu[op->u.pincpudomain.vcpu] )
+ {
+ ret = -EINVAL;
+ put_domain(d);
+ break;
+ }
+
+ v = d->vcpu[op->u.pincpudomain.vcpu];
+ if ( v == NULL )
+ {
+ ret = -ESRCH;
+ put_domain(d);
+ break;
+ }
+
+ if ( v == current )
{
ret = -EINVAL;
put_domain(d);
break;
}
- if ( cpu == -1 )
+ if ( copy_from_user(&cpumap, op->u.pincpudomain.cpumap,
+ sizeof(cpumap)) )
{
- clear_bit(DF_CPUPINNED, &d->flags);
+ ret = -EFAULT;
+ put_domain(d);
+ break;
}
+
+ /* update cpumap for this vcpu */
+ v->cpumap = cpumap;
+
+ if ( cpumap == CPUMAP_RUNANYWHERE )
+ clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
else
{
- domain_pause(d);
- synchronise_pagetables(~0UL);
- if ( d->processor != (cpu % smp_num_cpus) )
- set_bit(DF_MIGRATED, &d->flags);
- set_bit(DF_CPUPINNED, &d->flags);
- d->processor = cpu % smp_num_cpus;
- domain_unpause(d);
+ /* pick a new cpu from the usable map */
+ int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
+
+ vcpu_pause(v);
+ if ( v->processor != new_cpu )
+ set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
+ v->processor = new_cpu;
+ vcpu_unpause(v);
}
put_domain(d);
@@ -285,54 +298,19 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
break;
- case DOM0_GETMEMLIST:
- {
- int i;
- struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
- unsigned long max_pfns = op->u.getmemlist.max_pfns;
- unsigned long pfn;
- unsigned long *buffer = op->u.getmemlist.buffer;
- struct list_head *list_ent;
-
- ret = -EINVAL;
- if ( d != NULL )
- {
- ret = 0;
-
- spin_lock(&d->page_alloc_lock);
- list_ent = d->page_list.next;
- for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
- {
- pfn = list_entry(list_ent, struct pfn_info, list) -
- frame_table;
- if ( put_user(pfn, buffer) )
- {
- ret = -EFAULT;
- break;
- }
- buffer++;
- list_ent = frame_table[pfn].list.next;
- }
- spin_unlock(&d->page_alloc_lock);
-
- op->u.getmemlist.num_pfns = i;
- copy_to_user(u_dom0_op, op, sizeof(*op));
-
- put_domain(d);
- }
- }
- break;
-
case DOM0_GETDOMAININFO:
{
- full_execution_context_t *c;
- struct domain *d;
+ struct domain *d;
+ struct vcpu *v;
+ u64 cpu_time = 0;
+ int vcpu_count = 0;
+ int flags = DOMFLAGS_PAUSED | DOMFLAGS_BLOCKED;
read_lock(&domlist_lock);
for_each_domain ( d )
{
- if ( d->id >= op->u.getdomaininfo.domain )
+ if ( d->domain_id >= op->u.getdomaininfo.domain )
break;
}
@@ -345,48 +323,103 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
read_unlock(&domlist_lock);
- op->u.getdomaininfo.domain = d->id;
-
- op->u.getdomaininfo.flags =
- (test_bit(DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) |
- (test_bit(DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) |
- (test_bit(DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) |
- (test_bit(DF_CTRLPAUSE, &d->flags) ? DOMFLAGS_PAUSED : 0) |
- (test_bit(DF_BLOCKED, &d->flags) ? DOMFLAGS_BLOCKED : 0) |
- (test_bit(DF_RUNNING, &d->flags) ? DOMFLAGS_RUNNING : 0);
-
- op->u.getdomaininfo.flags |= d->processor << DOMFLAGS_CPUSHIFT;
- op->u.getdomaininfo.flags |=
+ op->u.getdomaininfo.domain = d->domain_id;
+
+ memset(&op->u.getdomaininfo.vcpu_to_cpu, -1,
+ sizeof(op->u.getdomaininfo.vcpu_to_cpu));
+ memset(&op->u.getdomaininfo.cpumap, 0,
+ sizeof(op->u.getdomaininfo.cpumap));
+
+ /*
+ * - domain is marked as paused or blocked only if all its vcpus
+ * are paused or blocked
+ * - domain is marked as running if any of its vcpus is running
+ */
+ for_each_vcpu ( d, v ) {
+ op->u.getdomaininfo.vcpu_to_cpu[v->vcpu_id] = v->processor;
+ op->u.getdomaininfo.cpumap[v->vcpu_id] = v->cpumap;
+ if ( !(v->vcpu_flags & VCPUF_ctrl_pause) )
+ flags &= ~DOMFLAGS_PAUSED;
+ if ( !(v->vcpu_flags & VCPUF_blocked) )
+ flags &= ~DOMFLAGS_BLOCKED;
+ if ( v->vcpu_flags & VCPUF_running )
+ flags |= DOMFLAGS_RUNNING;
+ if ( v->cpu_time > cpu_time )
+ cpu_time += v->cpu_time;
+ vcpu_count++;
+ }
+
+ op->u.getdomaininfo.cpu_time = cpu_time;
+ op->u.getdomaininfo.n_vcpu = vcpu_count;
+
+ op->u.getdomaininfo.flags = flags |
+ ((d->domain_flags & DOMF_dying) ? DOMFLAGS_DYING : 0) |
+ ((d->domain_flags & DOMF_shutdown) ? DOMFLAGS_SHUTDOWN : 0) |
d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT;
op->u.getdomaininfo.tot_pages = d->tot_pages;
op->u.getdomaininfo.max_pages = d->max_pages;
- op->u.getdomaininfo.cpu_time = d->cpu_time;
op->u.getdomaininfo.shared_info_frame =
__pa(d->shared_info) >> PAGE_SHIFT;
- if ( op->u.getdomaininfo.ctxt != NULL )
+ if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
+ ret = -EINVAL;
+
+ put_domain(d);
+ }
+ break;
+
+ case DOM0_GETVCPUCONTEXT:
+ {
+ struct vcpu_guest_context *c;
+ struct domain *d;
+ struct vcpu *v;
+
+ d = find_domain_by_id(op->u.getvcpucontext.domain);
+ if ( d == NULL )
+ {
+ ret = -ESRCH;
+ break;
+ }
+
+ if ( op->u.getvcpucontext.vcpu >= MAX_VIRT_CPUS )
+ {
+ ret = -EINVAL;
+ put_domain(d);
+ break;
+ }
+
+ v = d->vcpu[op->u.getvcpucontext.vcpu];
+ if ( v == NULL )
{
- if ( (c = xmalloc(sizeof(*c))) == NULL )
+ ret = -ESRCH;
+ put_domain(d);
+ break;
+ }
+
+ op->u.getvcpucontext.cpu_time = v->cpu_time;
+
+ if ( op->u.getvcpucontext.ctxt != NULL )
+ {
+ if ( (c = xmalloc(struct vcpu_guest_context)) == NULL )
{
ret = -ENOMEM;
put_domain(d);
break;
}
- if ( d != current )
- domain_pause(d);
+ if ( v != current )
+ vcpu_pause(v);
- arch_getdomaininfo_ctxt(d,c);
+ arch_getdomaininfo_ctxt(v,c);
- if ( d != current )
- domain_unpause(d);
+ if ( v != current )
+ vcpu_unpause(v);
- if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) )
+ if ( copy_to_user(op->u.getvcpucontext.ctxt, c, sizeof(*c)) )
ret = -EINVAL;
- if ( c != NULL )
- xfree(c);
+ xfree(c);
}
if ( copy_to_user(u_dom0_op, op, sizeof(*op)) )
@@ -396,16 +429,6 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
break;
-#ifdef XEN_DEBUGGER
- case DOM0_DEBUG:
- {
- pdb_do_debug(op);
- copy_to_user(u_dom0_op, op, sizeof(*op));
- ret = 0;
- }
- break;
-#endif
-
case DOM0_SETTIME:
{
do_settime(op->u.settime.secs,
@@ -416,9 +439,9 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
break;
#ifdef TRACE_BUFFER
- case DOM0_GETTBUFS:
+ case DOM0_TBUFCONTROL:
{
- ret = get_tb_info(&op->u.gettbufs);
+ ret = tb_control(&op->u.tbufcontrol);
copy_to_user(u_dom0_op, op, sizeof(*op));
}
break;
@@ -426,20 +449,11 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
case DOM0_READCONSOLE:
{
- ret = read_console_ring(op->u.readconsole.str,
- op->u.readconsole.count,
- op->u.readconsole.cmd);
- }
- break;
-
- case DOM0_PCIDEV_ACCESS:
- {
- extern int physdev_pci_access_modify(domid_t, int, int, int, int);
- ret = physdev_pci_access_modify(op->u.pcidev_access.domain,
- op->u.pcidev_access.bus,
- op->u.pcidev_access.dev,
- op->u.pcidev_access.func,
- op->u.pcidev_access.enable);
+ ret = read_console_ring(
+ &op->u.readconsole.buffer,
+ &op->u.readconsole.count,
+ op->u.readconsole.clear);
+ copy_to_user(u_dom0_op, op, sizeof(*op));
}
break;
@@ -451,48 +465,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
break;
- case DOM0_SETDOMAININITIALMEM:
- {
- struct domain *d;
- ret = -ESRCH;
- d = find_domain_by_id(op->u.setdomaininitialmem.domain);
- if ( d != NULL )
- {
- /* should only be used *before* domain is built. */
- if ( !test_bit(DF_CONSTRUCTED, &d->flags) )
- ret = alloc_new_dom_mem(
- d, op->u.setdomaininitialmem.initial_memkb );
- else
- ret = -EINVAL;
- put_domain(d);
- }
- }
- break;
-
case DOM0_SETDOMAINMAXMEM:
{
struct domain *d;
ret = -ESRCH;
- d = find_domain_by_id( op->u.setdomainmaxmem.domain );
- if ( d != NULL )
- {
- d->max_pages =
- (op->u.setdomainmaxmem.max_memkb+PAGE_SIZE-1)>> PAGE_SHIFT;
- put_domain(d);
- ret = 0;
- }
- }
- break;
-
- case DOM0_SETDOMAINVMASSIST:
- {
- struct domain *d;
- ret = -ESRCH;
- d = find_domain_by_id( op->u.setdomainvmassist.domain );
+ d = find_domain_by_id(op->u.setdomainmaxmem.domain);
if ( d != NULL )
{
- vm_assist(d, op->u.setdomainvmassist.cmd,
- op->u.setdomainvmassist.type);
+ d->max_pages = op->u.setdomainmaxmem.max_memkb >> (PAGE_SHIFT-10);
put_domain(d);
ret = 0;
}
@@ -514,9 +494,15 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
}
- TRACE_5D(TRC_DOM0OP_LEAVE_BASE + op->cmd, ret,
- op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3]);
-
-
return ret;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c
index d675603cdf..ad53b91d1c 100644
--- a/xen/common/dom_mem_ops.c
+++ b/xen/common/dom_mem_ops.c
@@ -13,7 +13,9 @@
#include <xen/perfc.h>
#include <xen/sched.h>
#include <xen/event.h>
-#include <asm/domain_page.h>
+#include <xen/shadow.h>
+#include <asm/current.h>
+#include <asm/hardirq.h>
/*
* To allow safe resume of do_dom_mem_op() after preemption, we need to know
@@ -24,27 +26,27 @@
#define PREEMPT_CHECK(_op) \
if ( hypercall_preempt_check() ) \
- return hypercall_create_continuation( \
- __HYPERVISOR_dom_mem_op, 5, \
+ return hypercall5_create_continuation( \
+ __HYPERVISOR_dom_mem_op, \
(_op) | (i << START_EXTENT_SHIFT), \
extent_list, nr_extents, extent_order, \
- (d == current) ? DOMID_SELF : d->id)
+ (d == current->domain) ? DOMID_SELF : d->domain_id);
static long
alloc_dom_mem(struct domain *d,
unsigned long *extent_list,
unsigned long start_extent,
- unsigned long nr_extents,
+ unsigned int nr_extents,
unsigned int extent_order)
{
struct pfn_info *page;
unsigned long i;
- if ( unlikely(!array_access_ok(VERIFY_WRITE, extent_list,
- nr_extents, sizeof(*extent_list))) )
+ if ( (extent_list != NULL) &&
+ !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
return start_extent;
- if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current) )
+ if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) )
{
DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n");
return start_extent;
@@ -61,7 +63,8 @@ alloc_dom_mem(struct domain *d,
}
/* Inform the domain of the new page's machine address. */
- if ( unlikely(__put_user(page_to_pfn(page), &extent_list[i]) != 0) )
+ if ( (extent_list != NULL) &&
+ (__put_user(page_to_pfn(page), &extent_list[i]) != 0) )
return i;
}
@@ -72,14 +75,13 @@ static long
free_dom_mem(struct domain *d,
unsigned long *extent_list,
unsigned long start_extent,
- unsigned long nr_extents,
+ unsigned int nr_extents,
unsigned int extent_order)
{
struct pfn_info *page;
unsigned long i, j, mpfn;
- if ( unlikely(!array_access_ok(VERIFY_READ, extent_list,
- nr_extents, sizeof(*extent_list))) )
+ if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
return start_extent;
for ( i = start_extent; i < nr_extents; i++ )
@@ -93,15 +95,15 @@ free_dom_mem(struct domain *d,
{
if ( unlikely((mpfn + j) >= max_page) )
{
- DPRINTK("Domain %u page number out of range (%08lx>=%08lx)\n",
- d->id, mpfn + j, max_page);
+ DPRINTK("Domain %u page number out of range (%lx >= %lx)\n",
+ d->domain_id, mpfn + j, max_page);
return i;
}
page = &frame_table[mpfn + j];
if ( unlikely(!get_page(page, d)) )
{
- DPRINTK("Bad page free for domain %u\n", d->id);
+ DPRINTK("Bad page free for domain %u\n", d->domain_id);
return i;
}
@@ -111,6 +113,8 @@ free_dom_mem(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
+ shadow_sync_and_drop_references(d, page);
+
put_page(page);
}
}
@@ -121,7 +125,7 @@ free_dom_mem(struct domain *d,
long
do_dom_mem_op(unsigned long op,
unsigned long *extent_list,
- unsigned long nr_extents,
+ unsigned int nr_extents,
unsigned int extent_order,
domid_t domid)
{
@@ -132,34 +136,43 @@ do_dom_mem_op(unsigned long op,
start_extent = op >> START_EXTENT_SHIFT;
op &= (1 << START_EXTENT_SHIFT) - 1;
- if ( unlikely(start_extent > nr_extents) ||
- unlikely(nr_extents > (~0UL >> START_EXTENT_SHIFT)) )
+ if ( unlikely(start_extent > nr_extents) )
return -EINVAL;
if ( likely(domid == DOMID_SELF) )
- d = current;
- else if ( unlikely(!IS_PRIV(current)) )
+ d = current->domain;
+ else if ( unlikely(!IS_PRIV(current->domain)) )
return -EPERM;
else if ( unlikely((d = find_domain_by_id(domid)) == NULL) )
- return -ESRCH;
+ return -ESRCH;
switch ( op )
{
case MEMOP_increase_reservation:
rc = alloc_dom_mem(
d, extent_list, start_extent, nr_extents, extent_order);
- break;
+ break;
case MEMOP_decrease_reservation:
rc = free_dom_mem(
d, extent_list, start_extent, nr_extents, extent_order);
- break;
+ break;
default:
rc = -ENOSYS;
break;
}
if ( unlikely(domid != DOMID_SELF) )
- put_domain(d);
+ put_domain(d);
return rc;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 90e893899a..b11ec069fa 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -9,15 +9,15 @@
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
-#include <xen/softirq.h>
+#include <xen/domain.h>
#include <xen/mm.h>
#include <xen/event.h>
#include <xen/time.h>
#include <xen/console.h>
-#include <asm/shadow.h>
+#include <xen/softirq.h>
+#include <xen/domain_page.h>
+#include <asm/debugger.h>
#include <public/dom0_ops.h>
-#include <asm/domain_page.h>
-#include <public/io/domain_controller.h>
/* Both these structures are protected by the domlist_lock. */
rwlock_t domlist_lock = RW_LOCK_UNLOCKED;
@@ -29,51 +29,52 @@ struct domain *dom0;
struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
{
struct domain *d, **pd;
+ struct vcpu *v;
if ( (d = alloc_domain_struct()) == NULL )
return NULL;
- atomic_set(&d->refcnt, 1);
- atomic_set(&d->pausecnt, 0);
+ v = d->vcpu[0];
- shadow_lock_init(d);
+ atomic_set(&d->refcnt, 1);
+ atomic_set(&v->pausecnt, 0);
- d->id = dom_id;
- d->processor = cpu;
- d->create_time = NOW();
+ d->domain_id = dom_id;
+ v->processor = cpu;
- memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread));
+ spin_lock_init(&d->time_lock);
+
+ spin_lock_init(&d->big_lock);
spin_lock_init(&d->page_alloc_lock);
INIT_LIST_HEAD(&d->page_list);
INIT_LIST_HEAD(&d->xenpage_list);
- /* Per-domain PCI-device list. */
- spin_lock_init(&d->pcidev_lock);
- INIT_LIST_HEAD(&d->pcidev_list);
-
- if ( (d->id != IDLE_DOMAIN_ID) &&
- ((init_event_channels(d) != 0) || (grant_table_create(d) != 0)) )
+ if ( d->domain_id == IDLE_DOMAIN_ID )
+ set_bit(_DOMF_idle_domain, &d->domain_flags);
+
+ if ( !is_idle_task(d) &&
+ ((evtchn_init(d) != 0) || (grant_table_create(d) != 0)) )
{
- destroy_event_channels(d);
+ evtchn_destroy(d);
free_domain_struct(d);
return NULL;
}
- arch_do_createdomain(d);
+ arch_do_createdomain(v);
- sched_add_domain(d);
+ sched_add_domain(v);
- if ( d->id != IDLE_DOMAIN_ID )
+ if ( !is_idle_task(d) )
{
write_lock(&domlist_lock);
pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
- for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_list )
- if ( (*pd)->id > d->id )
+ for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
+ if ( (*pd)->domain_id > d->domain_id )
break;
- d->next_list = *pd;
+ d->next_in_list = *pd;
*pd = d;
- d->next_hash = domain_hash[DOMAIN_HASH(dom_id)];
+ d->next_in_hashbucket = domain_hash[DOMAIN_HASH(dom_id)];
domain_hash[DOMAIN_HASH(dom_id)] = d;
write_unlock(&domlist_lock);
}
@@ -90,13 +91,13 @@ struct domain *find_domain_by_id(domid_t dom)
d = domain_hash[DOMAIN_HASH(dom)];
while ( d != NULL )
{
- if ( d->id == dom )
+ if ( d->domain_id == dom )
{
if ( unlikely(!get_domain(d)) )
d = NULL;
break;
}
- d = d->next_hash;
+ d = d->next_in_hashbucket;
}
read_unlock(&domlist_lock);
@@ -104,35 +105,16 @@ struct domain *find_domain_by_id(domid_t dom)
}
-/* Return the most recently created domain. */
-struct domain *find_last_domain(void)
-{
- struct domain *d, *dlast;
-
- read_lock(&domlist_lock);
- dlast = domain_list;
- d = dlast->next_list;
- while ( d != NULL )
- {
- if ( d->create_time > dlast->create_time )
- dlast = d;
- d = d->next_list;
- }
- if ( !get_domain(dlast) )
- dlast = NULL;
- read_unlock(&domlist_lock);
-
- return dlast;
-}
-
-
void domain_kill(struct domain *d)
{
+ struct vcpu *v;
+
domain_pause(d);
- if ( !test_and_set_bit(DF_DYING, &d->flags) )
+ if ( !test_and_set_bit(_DOMF_dying, &d->domain_flags) )
{
- sched_rem_domain(d);
- domain_relinquish_memory(d);
+ for_each_vcpu(d, v)
+ sched_rem_domain(v);
+ domain_relinquish_resources(d);
put_domain(d);
}
}
@@ -140,14 +122,10 @@ void domain_kill(struct domain *d)
void domain_crash(void)
{
- if ( current->id == 0 )
- BUG();
-
- set_bit(DF_CRASHED, &current->flags);
-
- send_guest_virq(dom0, VIRQ_DOM_EXC);
-
- raise_softirq(SCHEDULE_SOFTIRQ);
+ printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
+ current->domain->domain_id, current->vcpu_id, smp_processor_id());
+ show_registers(guest_cpu_user_regs());
+ domain_shutdown(SHUTDOWN_crash);
}
@@ -158,13 +136,58 @@ void domain_crash_synchronous(void)
do_softirq();
}
+
+static struct domain *domain_shuttingdown[NR_CPUS];
+
+static void domain_shutdown_finalise(void)
+{
+ struct domain *d;
+ struct vcpu *v;
+
+ d = domain_shuttingdown[smp_processor_id()];
+ domain_shuttingdown[smp_processor_id()] = NULL;
+
+ BUG_ON(d == NULL);
+ BUG_ON(d == current->domain);
+ BUG_ON(!test_bit(_DOMF_shuttingdown, &d->domain_flags));
+ BUG_ON(test_bit(_DOMF_shutdown, &d->domain_flags));
+
+ /* Make sure that every vcpu is descheduled before we finalise. */
+ for_each_vcpu ( d, v )
+ while ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+ cpu_relax();
+
+ sync_lazy_execstate_mask(d->cpumask);
+ BUG_ON(!cpus_empty(d->cpumask));
+
+ sync_pagetable_state(d);
+
+ set_bit(_DOMF_shutdown, &d->domain_flags);
+ clear_bit(_DOMF_shuttingdown, &d->domain_flags);
+
+ send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
+}
+
+static __init int domain_shutdown_finaliser_init(void)
+{
+ open_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ, domain_shutdown_finalise);
+ return 0;
+}
+__initcall(domain_shutdown_finaliser_init);
+
+
void domain_shutdown(u8 reason)
{
- if ( current->id == 0 )
+ struct domain *d = current->domain;
+ struct vcpu *v;
+
+ if ( d->domain_id == 0 )
{
extern void machine_restart(char *);
extern void machine_halt(void);
+ debugger_trap_immediate();
+
if ( reason == SHUTDOWN_poweroff )
{
printk("Domain 0 halted: halting machine.\n");
@@ -177,40 +200,40 @@ void domain_shutdown(u8 reason)
}
}
- if ( (current->shutdown_code = reason) == SHUTDOWN_crash )
- set_bit(DF_CRASHED, &current->flags);
- else
- set_bit(DF_SHUTDOWN, &current->flags);
-
- send_guest_virq(dom0, VIRQ_DOM_EXC);
+ /* Mark the domain as shutting down. */
+ d->shutdown_code = reason;
+ if ( !test_and_set_bit(_DOMF_shuttingdown, &d->domain_flags) )
+ {
+ /* This vcpu won the race to finalise the shutdown. */
+ domain_shuttingdown[smp_processor_id()] = d;
+ raise_softirq(DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ);
+ }
- raise_softirq(SCHEDULE_SOFTIRQ);
+ /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
+ for_each_vcpu ( d, v )
+ domain_sleep_nosync(v);
}
-unsigned int alloc_new_dom_mem(struct domain *d, unsigned int kbytes)
-{
- unsigned int alloc_pfns, nr_pages;
- struct pfn_info *page;
-
- nr_pages = (kbytes + ((PAGE_SIZE-1)>>10)) >> (PAGE_SHIFT - 10);
- d->max_pages = nr_pages; /* this can now be controlled independently */
- /* Grow the allocation if necessary. */
- for ( alloc_pfns = d->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ )
+void domain_pause_for_debugger(void)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v;
+
+ /*
+ * NOTE: This does not synchronously pause the domain. The debugger
+ * must issue a PAUSEDOMAIN command to ensure that all execution
+ * has ceased and guest state is committed to memory.
+ */
+ for_each_vcpu ( d, v )
{
- if ( unlikely((page = alloc_domheap_page(d)) == NULL) )
- {
- domain_relinquish_memory(d);
- return list_empty(&page_scrub_list) ? -ENOMEM : -EAGAIN;
- }
-
- /* Initialise the machine-to-phys mapping for this page. */
- set_machinetophys(page_to_pfn(page), alloc_pfns);
+ set_bit(_VCPUF_ctrl_pause, &v->vcpu_flags);
+ domain_sleep_nosync(v);
}
- return 0;
+ send_guest_virq(dom0->vcpu[0], VIRQ_DEBUGGER);
}
-
+
/* Release resources belonging to task @p. */
void domain_destruct(struct domain *d)
@@ -218,8 +241,7 @@ void domain_destruct(struct domain *d)
struct domain **pd;
atomic_t old, new;
- if ( !test_bit(DF_DYING, &d->flags) )
- BUG();
+ BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags));
/* May be already destructed, or get_domain() can race us. */
_atomic_set(old, 0);
@@ -232,57 +254,176 @@ void domain_destruct(struct domain *d)
write_lock(&domlist_lock);
pd = &domain_list;
while ( *pd != d )
- pd = &(*pd)->next_list;
- *pd = d->next_list;
- pd = &domain_hash[DOMAIN_HASH(d->id)];
+ pd = &(*pd)->next_in_list;
+ *pd = d->next_in_list;
+ pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
while ( *pd != d )
- pd = &(*pd)->next_hash;
- *pd = d->next_hash;
+ pd = &(*pd)->next_in_hashbucket;
+ *pd = d->next_in_hashbucket;
write_unlock(&domlist_lock);
- destroy_event_channels(d);
+ evtchn_destroy(d);
grant_table_destroy(d);
free_perdomain_pt(d);
- free_xenheap_page((unsigned long)d->shared_info);
+ free_xenheap_page(d->shared_info);
free_domain_struct(d);
+
+ send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC);
}
+void vcpu_pause(struct vcpu *v)
+{
+ BUG_ON(v == current);
+ atomic_inc(&v->pausecnt);
+ domain_sleep_sync(v);
+}
+
+void domain_pause(struct domain *d)
+{
+ struct vcpu *v;
+
+ for_each_vcpu( d, v )
+ {
+ BUG_ON(v == current);
+ atomic_inc(&v->pausecnt);
+ domain_sleep_sync(v);
+ }
+}
+
+void vcpu_unpause(struct vcpu *v)
+{
+ BUG_ON(v == current);
+ if ( atomic_dec_and_test(&v->pausecnt) )
+ domain_wake(v);
+}
+
+void domain_unpause(struct domain *d)
+{
+ struct vcpu *v;
+
+ for_each_vcpu( d, v )
+ vcpu_unpause(v);
+}
+
+void domain_pause_by_systemcontroller(struct domain *d)
+{
+ struct vcpu *v;
+
+ for_each_vcpu ( d, v )
+ {
+ BUG_ON(v == current);
+ if ( !test_and_set_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
+ domain_sleep_sync(v);
+ }
+}
+
+void domain_unpause_by_systemcontroller(struct domain *d)
+{
+ struct vcpu *v;
+
+ for_each_vcpu ( d, v )
+ {
+ if ( test_and_clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
+ domain_wake(v);
+ }
+}
+
+
+/*
+ * set_info_guest is used for final setup, launching, and state modification
+ * of domains other than domain 0. ie. the domains that are being built by
+ * the userspace dom0 domain builder.
+ */
+int set_info_guest(struct domain *d, dom0_setdomaininfo_t *setdomaininfo)
+{
+ int rc = 0;
+ struct vcpu_guest_context *c = NULL;
+ unsigned long vcpu = setdomaininfo->vcpu;
+ struct vcpu *v;
+
+ if ( (vcpu >= MAX_VIRT_CPUS) || ((v = d->vcpu[vcpu]) == NULL) )
+ return -EINVAL;
+
+ if (test_bit(_DOMF_constructed, &d->domain_flags) &&
+ !test_bit(_VCPUF_ctrl_pause, &v->vcpu_flags))
+ return -EINVAL;
+
+ if ( (c = xmalloc(struct vcpu_guest_context)) == NULL )
+ return -ENOMEM;
+
+ if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) )
+ {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ if ( (rc = arch_set_info_guest(v, c)) != 0 )
+ goto out;
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ out:
+ xfree(c);
+ return rc;
+}
/*
- * final_setup_guestos is used for final setup and launching of domains other
+ * final_setup_guest is used for final setup and launching of domains other
* than domain 0. ie. the domains that are being built by the userspace dom0
* domain builder.
*/
-int final_setup_guestos(struct domain *p, dom0_builddomain_t *builddomain)
+long do_boot_vcpu(unsigned long vcpu, struct vcpu_guest_context *ctxt)
{
+ struct domain *d = current->domain;
+ struct vcpu *v;
int rc = 0;
- full_execution_context_t *c;
+ struct vcpu_guest_context *c;
- if ( (c = xmalloc(sizeof(*c))) == NULL )
+ if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] != NULL) )
+ return -EINVAL;
+
+ if ( alloc_vcpu_struct(d, vcpu) == NULL )
return -ENOMEM;
- if ( test_bit(DF_CONSTRUCTED, &p->flags) )
+ if ( (c = xmalloc(struct vcpu_guest_context)) == NULL )
{
- rc = -EINVAL;
+ rc = -ENOMEM;
goto out;
}
- if ( copy_from_user(c, builddomain->ctxt, sizeof(*c)) )
+ if ( copy_from_user(c, ctxt, sizeof(*c)) )
{
rc = -EFAULT;
goto out;
}
-
- if ( (rc = arch_final_setup_guestos(p,c)) != 0 )
+
+ v = d->vcpu[vcpu];
+
+ atomic_set(&v->pausecnt, 0);
+ v->cpumap = CPUMAP_RUNANYWHERE;
+
+ memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
+
+ arch_do_boot_vcpu(v);
+
+ if ( (rc = arch_set_info_guest(v, c)) != 0 )
goto out;
- set_bit(DF_CONSTRUCTED, &p->flags);
+ sched_add_domain(v);
- out:
- if ( c != NULL )
- xfree(c);
+ /* domain_unpause_by_systemcontroller */
+ if ( test_and_clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
+ domain_wake(v);
+
+ xfree(c);
+ return 0;
+
+ out:
+ xfree(c);
+ arch_free_vcpu_struct(d->vcpu[vcpu]);
+ d->vcpu[vcpu] = NULL;
return rc;
}
@@ -295,15 +436,21 @@ long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
{
case VMASST_CMD_enable:
set_bit(type, &p->vm_assist);
- if (vm_assist_info[type].enable)
- (*vm_assist_info[type].enable)(p);
return 0;
case VMASST_CMD_disable:
clear_bit(type, &p->vm_assist);
- if (vm_assist_info[type].disable)
- (*vm_assist_info[type].disable)(p);
return 0;
}
return -ENOSYS;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/elf.c b/xen/common/elf.c
index 7feb9095db..08ff377d7c 100644
--- a/xen/common/elf.c
+++ b/xen/common/elf.c
@@ -9,45 +9,35 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/elf.h>
+#include <xen/sched.h>
-#ifdef CONFIG_X86
-#define FORCE_XENELF_IMAGE 1
-#define ELF_ADDR p_vaddr
-#elif defined(__ia64__)
-#define FORCE_XENELF_IMAGE 0
-#define ELF_ADDR p_paddr
-#endif
-
+static void loadelfsymtab(struct domain_setup_info *dsi, int doload);
static inline int is_loadable_phdr(Elf_Phdr *phdr)
{
return ((phdr->p_type == PT_LOAD) &&
((phdr->p_flags & (PF_W|PF_X)) != 0));
}
-int parseelfimage(char *elfbase,
- unsigned long elfsize,
- struct domain_setup_info *dsi)
+int parseelfimage(struct domain_setup_info *dsi)
{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
Elf_Phdr *phdr;
Elf_Shdr *shdr;
unsigned long kernstart = ~0UL, kernend=0UL;
char *shstrtab, *guestinfo=NULL, *p;
+ char *elfbase = (char *)dsi->image_addr;
int h;
- if ( !IS_ELF(*ehdr) )
- {
- printk("Kernel image does not have an ELF header.\n");
+ if ( !elf_sanity_check(ehdr) )
return -EINVAL;
- }
- if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
+ if ( (ehdr->e_phoff + (ehdr->e_phnum*ehdr->e_phentsize)) > dsi->image_len )
{
printk("ELF program headers extend beyond end of image.\n");
return -EINVAL;
}
- if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
+ if ( (ehdr->e_shoff + (ehdr->e_shnum*ehdr->e_shentsize)) > dsi->image_len )
{
printk("ELF section headers extend beyond end of image.\n");
return -EINVAL;
@@ -81,31 +71,26 @@ int parseelfimage(char *elfbase,
return -EINVAL;
}
- if ( (strstr(guestinfo, "XEN_VER=2.0") == NULL) )
+ if ( (strstr(guestinfo, "XEN_VER=3.0") == NULL) )
{
- printk("ERROR: Xen will only load images built for Xen v2.0\n");
+ printk("ERROR: Xen will only load images built for Xen v3.0\n");
return -EINVAL;
}
break;
}
- if ( guestinfo == NULL )
- {
- printk("Not a Xen-ELF image: '__xen_guest' section not found.\n");
-#if FORCE_XENELF_IMAGE
- return -EINVAL;
-#endif
- }
+
+ dsi->xen_section_string = guestinfo;
for ( h = 0; h < ehdr->e_phnum; h++ )
{
phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
if ( !is_loadable_phdr(phdr) )
continue;
- if ( phdr->ELF_ADDR < kernstart )
- kernstart = phdr->ELF_ADDR;
- if ( (phdr->ELF_ADDR + phdr->p_memsz) > kernend )
- kernend = phdr->ELF_ADDR + phdr->p_memsz;
+ if ( phdr->p_paddr < kernstart )
+ kernstart = phdr->p_paddr;
+ if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
+ kernend = phdr->p_paddr + phdr->p_memsz;
}
if ( (kernstart > kernend) ||
@@ -122,27 +107,25 @@ int parseelfimage(char *elfbase,
{
if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
dsi->v_start = simple_strtoul(p+10, &p, 0);
-
- if ( (p = strstr(guestinfo, "PT_MODE_WRITABLE")) != NULL )
- dsi->use_writable_pagetables = 1;
if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL )
- dsi->load_bsd_symtab = 1;
-
+ dsi->load_symtab = 1;
}
dsi->v_kernstart = kernstart;
dsi->v_kernend = kernend;
dsi->v_kernentry = ehdr->e_entry;
-
dsi->v_end = dsi->v_kernend;
+ loadelfsymtab(dsi, 0);
+
return 0;
}
-int loadelfimage(char *elfbase)
+int loadelfimage(struct domain_setup_info *dsi)
{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
+ char *elfbase = (char *)dsi->image_addr;
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
Elf_Phdr *phdr;
int h;
@@ -152,26 +135,31 @@ int loadelfimage(char *elfbase)
if ( !is_loadable_phdr(phdr) )
continue;
if ( phdr->p_filesz != 0 )
- memcpy((char *)phdr->ELF_ADDR, elfbase + phdr->p_offset,
+ memcpy((char *)phdr->p_paddr, elfbase + phdr->p_offset,
phdr->p_filesz);
if ( phdr->p_memsz > phdr->p_filesz )
- memset((char *)phdr->ELF_ADDR + phdr->p_filesz, 0,
+ memset((char *)phdr->p_paddr + phdr->p_filesz, 0,
phdr->p_memsz - phdr->p_filesz);
}
+ loadelfsymtab(dsi, 1);
+
return 0;
}
#define ELFROUND (ELFSIZE / 8)
-int loadelfsymtab(char *elfbase, int doload, struct domain_setup_info *dsi)
+static void loadelfsymtab(struct domain_setup_info *dsi, int doload)
{
- Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr;
+ Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr, *sym_ehdr;
Elf_Shdr *shdr;
unsigned long maxva, symva;
- char *p;
+ char *p, *elfbase = (char *)dsi->image_addr;
int h, i;
+ if ( !dsi->load_symtab )
+ return;
+
maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
symva = maxva;
maxva += sizeof(int);
@@ -179,14 +167,16 @@ int loadelfsymtab(char *elfbase, int doload, struct domain_setup_info *dsi)
dsi->symtab_len = 0;
maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
- if (doload) {
- p = (void *)symva;
-
- shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
- memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
- } else {
- shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff);
- p = NULL; /* XXX: gcc */
+ if ( doload )
+ {
+ p = (void *)symva;
+ shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
+ memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum*sizeof(Elf_Shdr));
+ }
+ else
+ {
+ p = NULL;
+ shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff);
}
for ( h = 0; h < ehdr->e_shnum; h++ )
@@ -201,9 +191,9 @@ int loadelfsymtab(char *elfbase, int doload, struct domain_setup_info *dsi)
/* Skip symtab @h if we found no corresponding strtab @i. */
if ( i == ehdr->e_shnum )
{
- if (doload) {
- shdr[h].sh_offset = 0;
- }
+ if (doload) {
+ shdr[h].sh_offset = 0;
+ }
continue;
}
}
@@ -211,47 +201,51 @@ int loadelfsymtab(char *elfbase, int doload, struct domain_setup_info *dsi)
if ( (shdr[h].sh_type == SHT_STRTAB) ||
(shdr[h].sh_type == SHT_SYMTAB) )
{
- if (doload) {
- memcpy((void *)maxva, elfbase + shdr[h].sh_offset,
- shdr[h].sh_size);
+ if (doload) {
+ memcpy((void *)maxva, elfbase + shdr[h].sh_offset,
+ shdr[h].sh_size);
- /* Mangled to be based on ELF header location. */
- shdr[h].sh_offset = maxva - dsi->symtab_addr;
+ /* Mangled to be based on ELF header location. */
+ shdr[h].sh_offset = maxva - dsi->symtab_addr;
- }
- dsi->symtab_len += shdr[h].sh_size;
- maxva += shdr[h].sh_size;
- maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
+ }
+ dsi->symtab_len += shdr[h].sh_size;
+ maxva += shdr[h].sh_size;
+ maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
}
- if (doload) {
- shdr[h].sh_name = 0; /* Name is NULL. */
- }
+ if ( doload )
+ shdr[h].sh_name = 0; /* Name is NULL. */
}
if ( dsi->symtab_len == 0 )
{
dsi->symtab_addr = 0;
- goto out;
+ return;
}
- if (doload) {
- *(int *)p = maxva - dsi->symtab_addr;
- sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
- memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
- sym_ehdr->e_phoff = 0;
- sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
- sym_ehdr->e_phentsize = 0;
- sym_ehdr->e_phnum = 0;
- sym_ehdr->e_shstrndx = SHN_UNDEF;
+ if ( doload )
+ {
+ *(int *)p = maxva - dsi->symtab_addr;
+ sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
+ memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
+ sym_ehdr->e_phoff = 0;
+ sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
+ sym_ehdr->e_phentsize = 0;
+ sym_ehdr->e_phnum = 0;
+ sym_ehdr->e_shstrndx = SHN_UNDEF;
}
-#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) /* XXX */
-
dsi->symtab_len = maxva - dsi->symtab_addr;
- dsi->v_end = round_pgup(maxva);
-
- out:
-
- return 0;
+ dsi->v_end = maxva;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 40e218c731..6f6e707667 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -22,48 +22,38 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/irq.h>
+#include <asm/current.h>
#include <public/xen.h>
#include <public/event_channel.h>
-#define INIT_EVENT_CHANNELS 16
-#define MAX_EVENT_CHANNELS 1024
+#define bucket_from_port(d,p) \
+ ((d)->evtchn[(p)/EVTCHNS_PER_BUCKET])
+#define port_is_valid(d,p) \
+ (((p) >= 0) && ((p) < MAX_EVTCHNS) && \
+ (bucket_from_port(d,p) != NULL))
+#define evtchn_from_port(d,p) \
+ (&(bucket_from_port(d,p))[(p)&(EVTCHNS_PER_BUCKET-1)])
+#define ERROR_EXIT(_errno) do { rc = (_errno); goto out; } while ( 0 )
static int get_free_port(struct domain *d)
{
- int max, port;
- event_channel_t *chn;
-
- max = d->max_event_channel;
- chn = d->event_channel;
+ struct evtchn *chn;
+ int port;
- for ( port = 0; port < max; port++ )
- if ( chn[port].state == ECS_FREE )
- break;
+ for ( port = 0; port_is_valid(d, port); port++ )
+ if ( evtchn_from_port(d, port)->state == ECS_FREE )
+ return port;
- if ( port == max )
- {
- if ( max == MAX_EVENT_CHANNELS )
- return -ENOSPC;
-
- max *= 2;
-
- chn = xmalloc(max * sizeof(event_channel_t));
- if ( unlikely(chn == NULL) )
- return -ENOMEM;
-
- memset(chn, 0, max * sizeof(event_channel_t));
-
- if ( d->event_channel != NULL )
- {
- memcpy(chn, d->event_channel, (max/2) * sizeof(event_channel_t));
- xfree(d->event_channel);
- }
+ if ( port == MAX_EVTCHNS )
+ return -ENOSPC;
- d->event_channel = chn;
- d->max_event_channel = max;
- }
+ chn = xmalloc_array(struct evtchn, EVTCHNS_PER_BUCKET);
+ if ( unlikely(chn == NULL) )
+ return -ENOMEM;
+ memset(chn, 0, EVTCHNS_PER_BUCKET * sizeof(*chn));
+ bucket_from_port(d, port) = chn;
return port;
}
@@ -71,45 +61,63 @@ static int get_free_port(struct domain *d)
static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
{
- struct domain *d = current;
- int port;
+ struct evtchn *chn;
+ struct domain *d = current->domain;
+ int port = alloc->port;
+ long rc = 0;
- spin_lock(&d->event_channel_lock);
+ spin_lock(&d->evtchn_lock);
- if ( (port = get_free_port(d)) >= 0 )
+ /* Obtain, or ensure that we already have, a valid <port>. */
+ if ( port == 0 )
{
- d->event_channel[port].state = ECS_UNBOUND;
- d->event_channel[port].u.unbound.remote_domid = alloc->dom;
+ if ( (port = get_free_port(d)) < 0 )
+ ERROR_EXIT(port);
}
+ else if ( !port_is_valid(d, port) )
+ ERROR_EXIT(-EINVAL);
+ chn = evtchn_from_port(d, port);
- spin_unlock(&d->event_channel_lock);
+ /* Validate channel's current state. */
+ switch ( chn->state )
+ {
+ case ECS_FREE:
+ chn->state = ECS_UNBOUND;
+ chn->u.unbound.remote_domid = alloc->dom;
+ break;
- if ( port < 0 )
- return port;
+ case ECS_UNBOUND:
+ if ( chn->u.unbound.remote_domid != alloc->dom )
+ ERROR_EXIT(-EINVAL);
+ break;
+
+ default:
+ ERROR_EXIT(-EINVAL);
+ }
+
+ out:
+ spin_unlock(&d->evtchn_lock);
alloc->port = port;
- return 0;
+ return rc;
}
static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
{
-#define ERROR_EXIT(_errno) do { rc = (_errno); goto out; } while ( 0 )
+ struct evtchn *chn1, *chn2;
struct domain *d1, *d2;
int port1 = bind->port1, port2 = bind->port2;
domid_t dom1 = bind->dom1, dom2 = bind->dom2;
long rc = 0;
- if ( !IS_PRIV(current) && (dom1 != DOMID_SELF) )
+ if ( !IS_PRIV(current->domain) && (dom1 != DOMID_SELF) )
return -EPERM;
- if ( (port1 < 0) || (port2 < 0) )
- return -EINVAL;
-
if ( dom1 == DOMID_SELF )
- dom1 = current->id;
+ dom1 = current->domain->domain_id;
if ( dom2 == DOMID_SELF )
- dom2 = current->id;
+ dom2 = current->domain->domain_id;
if ( ((d1 = find_domain_by_id(dom1)) == NULL) ||
((d2 = find_domain_by_id(dom2)) == NULL) )
@@ -122,14 +130,14 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
/* Avoid deadlock by first acquiring lock of domain with smaller id. */
if ( d1 < d2 )
{
- spin_lock(&d1->event_channel_lock);
- spin_lock(&d2->event_channel_lock);
+ spin_lock(&d1->evtchn_lock);
+ spin_lock(&d2->evtchn_lock);
}
else
{
if ( d1 != d2 )
- spin_lock(&d2->event_channel_lock);
- spin_lock(&d1->event_channel_lock);
+ spin_lock(&d2->evtchn_lock);
+ spin_lock(&d1->evtchn_lock);
}
/* Obtain, or ensure that we already have, a valid <port1>. */
@@ -138,41 +146,42 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
if ( (port1 = get_free_port(d1)) < 0 )
ERROR_EXIT(port1);
}
- else if ( port1 >= d1->max_event_channel )
+ else if ( !port_is_valid(d1, port1) )
ERROR_EXIT(-EINVAL);
+ chn1 = evtchn_from_port(d1, port1);
/* Obtain, or ensure that we already have, a valid <port2>. */
if ( port2 == 0 )
{
/* Make port1 non-free while we allocate port2 (in case dom1==dom2). */
- u16 tmp = d1->event_channel[port1].state;
- d1->event_channel[port1].state = ECS_INTERDOMAIN;
+ u16 state = chn1->state;
+ chn1->state = ECS_INTERDOMAIN;
port2 = get_free_port(d2);
- d1->event_channel[port1].state = tmp;
+ chn1->state = state;
if ( port2 < 0 )
ERROR_EXIT(port2);
}
- else if ( port2 >= d2->max_event_channel )
+ else if ( !port_is_valid(d2, port2) )
ERROR_EXIT(-EINVAL);
+ chn2 = evtchn_from_port(d2, port2);
/* Validate <dom1,port1>'s current state. */
- switch ( d1->event_channel[port1].state )
+ switch ( chn1->state )
{
case ECS_FREE:
break;
case ECS_UNBOUND:
- if ( d1->event_channel[port1].u.unbound.remote_domid != dom2 )
+ if ( chn1->u.unbound.remote_domid != dom2 )
ERROR_EXIT(-EINVAL);
break;
case ECS_INTERDOMAIN:
- if ( d1->event_channel[port1].u.interdomain.remote_dom != d2 )
+ if ( chn1->u.interdomain.remote_dom != d2 )
ERROR_EXIT(-EINVAL);
- if ( (d1->event_channel[port1].u.interdomain.remote_port != port2) &&
- (bind->port2 != 0) )
+ if ( (chn1->u.interdomain.remote_port != port2) && (bind->port2 != 0) )
ERROR_EXIT(-EINVAL);
- port2 = d1->event_channel[port1].u.interdomain.remote_port;
+ port2 = chn1->u.interdomain.remote_port;
goto out;
default:
@@ -180,25 +189,24 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
}
/* Validate <dom2,port2>'s current state. */
- switch ( d2->event_channel[port2].state )
+ switch ( chn2->state )
{
case ECS_FREE:
- if ( !IS_PRIV(current) && (dom2 != DOMID_SELF) )
+ if ( !IS_PRIV(current->domain) && (dom2 != DOMID_SELF) )
ERROR_EXIT(-EPERM);
break;
case ECS_UNBOUND:
- if ( d2->event_channel[port2].u.unbound.remote_domid != dom1 )
+ if ( chn2->u.unbound.remote_domid != dom1 )
ERROR_EXIT(-EINVAL);
break;
case ECS_INTERDOMAIN:
- if ( d2->event_channel[port2].u.interdomain.remote_dom != d1 )
+ if ( chn2->u.interdomain.remote_dom != d1 )
ERROR_EXIT(-EINVAL);
- if ( (d2->event_channel[port2].u.interdomain.remote_port != port1) &&
- (bind->port1 != 0) )
+ if ( (chn2->u.interdomain.remote_port != port1) && (bind->port1 != 0) )
ERROR_EXIT(-EINVAL);
- port1 = d2->event_channel[port2].u.interdomain.remote_port;
+ port1 = chn2->u.interdomain.remote_port;
goto out;
default:
@@ -209,18 +217,18 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
* Everything checked out okay -- bind <dom1,port1> to <dom2,port2>.
*/
- d1->event_channel[port1].u.interdomain.remote_dom = d2;
- d1->event_channel[port1].u.interdomain.remote_port = (u16)port2;
- d1->event_channel[port1].state = ECS_INTERDOMAIN;
+ chn1->u.interdomain.remote_dom = d2;
+ chn1->u.interdomain.remote_port = (u16)port2;
+ chn1->state = ECS_INTERDOMAIN;
- d2->event_channel[port2].u.interdomain.remote_dom = d1;
- d2->event_channel[port2].u.interdomain.remote_port = (u16)port1;
- d2->event_channel[port2].state = ECS_INTERDOMAIN;
+ chn2->u.interdomain.remote_dom = d1;
+ chn2->u.interdomain.remote_port = (u16)port1;
+ chn2->state = ECS_INTERDOMAIN;
out:
- spin_unlock(&d1->event_channel_lock);
+ spin_unlock(&d1->evtchn_lock);
if ( d1 != d2 )
- spin_unlock(&d2->event_channel_lock);
+ spin_unlock(&d2->evtchn_lock);
put_domain(d1);
put_domain(d2);
@@ -229,37 +237,66 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
bind->port2 = port2;
return rc;
-#undef ERROR_EXIT
}
static long evtchn_bind_virq(evtchn_bind_virq_t *bind)
{
- struct domain *d = current;
+ struct evtchn *chn;
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
int port, virq = bind->virq;
- if ( virq >= ARRAY_SIZE(d->virq_to_evtchn) )
+ if ( virq >= ARRAY_SIZE(v->virq_to_evtchn) )
return -EINVAL;
- spin_lock(&d->event_channel_lock);
+ spin_lock(&d->evtchn_lock);
/*
* Port 0 is the fallback port for VIRQs that haven't been explicitly
- * bound yet. The exception is the 'misdirect VIRQ', which is permanently
- * bound to port 0.
+ * bound yet.
*/
- if ( ((port = d->virq_to_evtchn[virq]) != 0) ||
- (virq == VIRQ_MISDIRECT) ||
+ if ( ((port = v->virq_to_evtchn[virq]) != 0) ||
((port = get_free_port(d)) < 0) )
goto out;
- d->event_channel[port].state = ECS_VIRQ;
- d->event_channel[port].u.virq = virq;
+ chn = evtchn_from_port(d, port);
+ chn->state = ECS_VIRQ;
+ chn->notify_vcpu_id = v->vcpu_id;
+ chn->u.virq = virq;
- d->virq_to_evtchn[virq] = port;
+ v->virq_to_evtchn[virq] = port;
out:
- spin_unlock(&d->event_channel_lock);
+ spin_unlock(&d->evtchn_lock);
+
+ if ( port < 0 )
+ return port;
+
+ bind->port = port;
+ return 0;
+}
+
+
+static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
+{
+ struct evtchn *chn;
+ struct domain *d = current->domain;
+ int port, ipi_vcpu = bind->ipi_vcpu;
+
+ if ( (ipi_vcpu >= MAX_VIRT_CPUS) || (d->vcpu[ipi_vcpu] == NULL) )
+ return -EINVAL;
+
+ spin_lock(&d->evtchn_lock);
+
+ if ( (port = get_free_port(d)) >= 0 )
+ {
+ chn = evtchn_from_port(d, port);
+ chn->state = ECS_IPI;
+ chn->notify_vcpu_id = ipi_vcpu;
+ }
+
+ spin_unlock(&d->evtchn_lock);
if ( port < 0 )
return port;
@@ -271,20 +308,23 @@ static long evtchn_bind_virq(evtchn_bind_virq_t *bind)
static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
{
- struct domain *d = current;
+ struct evtchn *chn;
+ struct domain *d = current->domain;
int port, rc, pirq = bind->pirq;
if ( pirq >= ARRAY_SIZE(d->pirq_to_evtchn) )
return -EINVAL;
- spin_lock(&d->event_channel_lock);
+ spin_lock(&d->evtchn_lock);
if ( ((rc = port = d->pirq_to_evtchn[pirq]) != 0) ||
((rc = port = get_free_port(d)) < 0) )
goto out;
+ chn = evtchn_from_port(d, port);
+
d->pirq_to_evtchn[pirq] = port;
- rc = pirq_guest_bind(d, pirq,
+ rc = pirq_guest_bind(d->vcpu[chn->notify_vcpu_id], pirq,
!!(bind->flags & BIND_PIRQ__WILL_SHARE));
if ( rc != 0 )
{
@@ -292,11 +332,11 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
goto out;
}
- d->event_channel[port].state = ECS_PIRQ;
- d->event_channel[port].u.pirq = pirq;
+ chn->state = ECS_PIRQ;
+ chn->u.pirq = pirq;
out:
- spin_unlock(&d->event_channel_lock);
+ spin_unlock(&d->evtchn_lock);
if ( rc < 0 )
return rc;
@@ -308,26 +348,26 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
static long __evtchn_close(struct domain *d1, int port1)
{
- struct domain *d2 = NULL;
- event_channel_t *chn1, *chn2;
- int port2;
- long rc = 0;
+ struct domain *d2 = NULL;
+ struct vcpu *v;
+ struct evtchn *chn1, *chn2;
+ int port2;
+ long rc = 0;
again:
- spin_lock(&d1->event_channel_lock);
-
- chn1 = d1->event_channel;
+ spin_lock(&d1->evtchn_lock);
- /* NB. Port 0 is special (VIRQ_MISDIRECT). Never let it be closed. */
- if ( (port1 <= 0) || (port1 >= d1->max_event_channel) )
+ if ( !port_is_valid(d1, port1) )
{
rc = -EINVAL;
goto out;
}
- switch ( chn1[port1].state )
+ chn1 = evtchn_from_port(d1, port1);
+ switch ( chn1->state )
{
case ECS_FREE:
+ case ECS_RESERVED:
rc = -EINVAL;
goto out;
@@ -335,18 +375,23 @@ static long __evtchn_close(struct domain *d1, int port1)
break;
case ECS_PIRQ:
- if ( (rc = pirq_guest_unbind(d1, chn1[port1].u.pirq)) == 0 )
- d1->pirq_to_evtchn[chn1[port1].u.pirq] = 0;
+ if ( (rc = pirq_guest_unbind(d1, chn1->u.pirq)) == 0 )
+ d1->pirq_to_evtchn[chn1->u.pirq] = 0;
break;
case ECS_VIRQ:
- d1->virq_to_evtchn[chn1[port1].u.virq] = 0;
+ for_each_vcpu ( d1, v )
+ if ( v->virq_to_evtchn[chn1->u.virq] == port1 )
+ v->virq_to_evtchn[chn1->u.virq] = 0;
+ break;
+
+ case ECS_IPI:
break;
case ECS_INTERDOMAIN:
if ( d2 == NULL )
{
- d2 = chn1[port1].u.interdomain.remote_dom;
+ d2 = chn1->u.interdomain.remote_dom;
/* If we unlock d1 then we could lose d2. Must get a reference. */
if ( unlikely(!get_domain(d2)) )
@@ -361,50 +406,47 @@ static long __evtchn_close(struct domain *d1, int port1)
if ( d1 < d2 )
{
- spin_lock(&d2->event_channel_lock);
+ spin_lock(&d2->evtchn_lock);
}
else if ( d1 != d2 )
{
- spin_unlock(&d1->event_channel_lock);
- spin_lock(&d2->event_channel_lock);
+ spin_unlock(&d1->evtchn_lock);
+ spin_lock(&d2->evtchn_lock);
goto again;
}
}
- else if ( d2 != chn1[port1].u.interdomain.remote_dom )
+ else if ( d2 != chn1->u.interdomain.remote_dom )
{
rc = -EINVAL;
goto out;
}
- chn2 = d2->event_channel;
- port2 = chn1[port1].u.interdomain.remote_port;
-
- if ( port2 >= d2->max_event_channel )
- BUG();
- if ( chn2[port2].state != ECS_INTERDOMAIN )
- BUG();
- if ( chn2[port2].u.interdomain.remote_dom != d1 )
- BUG();
-
- chn2[port2].state = ECS_UNBOUND;
- chn2[port2].u.unbound.remote_domid = d1->id;
+ port2 = chn1->u.interdomain.remote_port;
+ BUG_ON(!port_is_valid(d2, port2));
+
+ chn2 = evtchn_from_port(d2, port2);
+ BUG_ON(chn2->state != ECS_INTERDOMAIN);
+ BUG_ON(chn2->u.interdomain.remote_dom != d1);
+
+ chn2->state = ECS_UNBOUND;
+ chn2->u.unbound.remote_domid = d1->domain_id;
break;
default:
BUG();
}
- chn1[port1].state = ECS_FREE;
+ chn1->state = ECS_FREE;
out:
if ( d2 != NULL )
{
if ( d1 != d2 )
- spin_unlock(&d2->event_channel_lock);
+ spin_unlock(&d2->evtchn_lock);
put_domain(d2);
}
- spin_unlock(&d1->event_channel_lock);
+ spin_unlock(&d1->evtchn_lock);
return rc;
}
@@ -417,8 +459,8 @@ static long evtchn_close(evtchn_close_t *close)
domid_t dom = close->dom;
if ( dom == DOMID_SELF )
- dom = current->id;
- else if ( !IS_PRIV(current) )
+ dom = current->domain->domain_id;
+ else if ( !IS_PRIV(current->domain) )
return -EPERM;
if ( (d = find_domain_by_id(dom)) == NULL )
@@ -431,86 +473,107 @@ static long evtchn_close(evtchn_close_t *close)
}
-static long evtchn_send(int lport)
+long evtchn_send(int lport)
{
- struct domain *ld = current, *rd;
- int rport;
+ struct evtchn *lchn, *rchn;
+ struct domain *ld = current->domain, *rd;
+ int rport, ret = 0;
- spin_lock(&ld->event_channel_lock);
+ spin_lock(&ld->evtchn_lock);
- if ( unlikely(lport < 0) ||
- unlikely(lport >= ld->max_event_channel) ||
- unlikely(ld->event_channel[lport].state != ECS_INTERDOMAIN) )
+ if ( unlikely(!port_is_valid(ld, lport)) )
{
- spin_unlock(&ld->event_channel_lock);
+ spin_unlock(&ld->evtchn_lock);
return -EINVAL;
}
- rd = ld->event_channel[lport].u.interdomain.remote_dom;
- rport = ld->event_channel[lport].u.interdomain.remote_port;
-
- evtchn_set_pending(rd, rport);
+ lchn = evtchn_from_port(ld, lport);
+ switch ( lchn->state )
+ {
+ case ECS_INTERDOMAIN:
+ rd = lchn->u.interdomain.remote_dom;
+ rport = lchn->u.interdomain.remote_port;
+ rchn = evtchn_from_port(rd, rport);
+ evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
+ break;
+ case ECS_IPI:
+ evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- spin_unlock(&ld->event_channel_lock);
+ spin_unlock(&ld->evtchn_lock);
- return 0;
+ return ret;
}
+void send_guest_pirq(struct domain *d, int pirq)
+{
+ int port = d->pirq_to_evtchn[pirq];
+ struct evtchn *chn = evtchn_from_port(d, port);
+ evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port);
+}
static long evtchn_status(evtchn_status_t *status)
{
struct domain *d;
domid_t dom = status->dom;
int port = status->port;
- event_channel_t *chn;
+ struct evtchn *chn;
long rc = 0;
if ( dom == DOMID_SELF )
- dom = current->id;
- else if ( !IS_PRIV(current) )
+ dom = current->domain->domain_id;
+ else if ( !IS_PRIV(current->domain) )
return -EPERM;
if ( (d = find_domain_by_id(dom)) == NULL )
return -ESRCH;
- spin_lock(&d->event_channel_lock);
-
- chn = d->event_channel;
+ spin_lock(&d->evtchn_lock);
- if ( (port < 0) || (port >= d->max_event_channel) )
+ if ( !port_is_valid(d, port) )
{
rc = -EINVAL;
goto out;
}
- switch ( chn[port].state )
+ chn = evtchn_from_port(d, port);
+ switch ( chn->state )
{
case ECS_FREE:
+ case ECS_RESERVED:
status->status = EVTCHNSTAT_closed;
break;
case ECS_UNBOUND:
status->status = EVTCHNSTAT_unbound;
- status->u.unbound.dom = chn[port].u.unbound.remote_domid;
+ status->u.unbound.dom = chn->u.unbound.remote_domid;
break;
case ECS_INTERDOMAIN:
status->status = EVTCHNSTAT_interdomain;
- status->u.interdomain.dom = chn[port].u.interdomain.remote_dom->id;
- status->u.interdomain.port = chn[port].u.interdomain.remote_port;
+ status->u.interdomain.dom =
+ chn->u.interdomain.remote_dom->domain_id;
+ status->u.interdomain.port = chn->u.interdomain.remote_port;
break;
case ECS_PIRQ:
status->status = EVTCHNSTAT_pirq;
- status->u.pirq = chn[port].u.pirq;
+ status->u.pirq = chn->u.pirq;
break;
case ECS_VIRQ:
status->status = EVTCHNSTAT_virq;
- status->u.virq = chn[port].u.virq;
+ status->u.virq = chn->u.virq;
+ break;
+ case ECS_IPI:
+ status->status = EVTCHNSTAT_ipi;
+ status->u.ipi_vcpu = chn->notify_vcpu_id;
break;
default:
BUG();
}
out:
- spin_unlock(&d->event_channel_lock);
+ spin_unlock(&d->evtchn_lock);
put_domain(d);
return rc;
}
@@ -544,6 +607,12 @@ long do_event_channel_op(evtchn_op_t *uop)
rc = -EFAULT; /* Cleaning up here would be a mess! */
break;
+ case EVTCHNOP_bind_ipi:
+ rc = evtchn_bind_ipi(&op.u.bind_ipi);
+ if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) )
+ rc = -EFAULT; /* Cleaning up here would be a mess! */
+ break;
+
case EVTCHNOP_bind_pirq:
rc = evtchn_bind_pirq(&op.u.bind_pirq);
if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) )
@@ -573,27 +642,34 @@ long do_event_channel_op(evtchn_op_t *uop)
}
-int init_event_channels(struct domain *d)
+int evtchn_init(struct domain *d)
{
- spin_lock_init(&d->event_channel_lock);
- d->event_channel = xmalloc(INIT_EVENT_CHANNELS * sizeof(event_channel_t));
- if ( unlikely(d->event_channel == NULL) )
- return -ENOMEM;
- d->max_event_channel = INIT_EVENT_CHANNELS;
- memset(d->event_channel, 0, INIT_EVENT_CHANNELS * sizeof(event_channel_t));
- d->event_channel[0].state = ECS_VIRQ;
- d->event_channel[0].u.virq = VIRQ_MISDIRECT;
+ spin_lock_init(&d->evtchn_lock);
+ if ( get_free_port(d) != 0 )
+ return -EINVAL;
+ evtchn_from_port(d, 0)->state = ECS_RESERVED;
return 0;
}
-void destroy_event_channels(struct domain *d)
+void evtchn_destroy(struct domain *d)
{
int i;
- if ( d->event_channel != NULL )
- {
- for ( i = 0; i < d->max_event_channel; i++ )
+
+ for ( i = 0; port_is_valid(d, i); i++ )
(void)__evtchn_close(d, i);
- xfree(d->event_channel);
- }
+
+ for ( i = 0; i < NR_EVTCHN_BUCKETS; i++ )
+ if ( d->evtchn[i] != NULL )
+ xfree(d->evtchn[i]);
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index f621113342..683a051df3 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -4,6 +4,7 @@
* Mechanism for granting foreign access to page frames, and receiving
* page-ownership transfers.
*
+ * Copyright (c) 2005 Christopher Clark
* Copyright (c) 2004 K A Fraser
*
* This program is free software; you can redistribute it and/or modify
@@ -21,14 +22,20 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define GRANT_DEBUG 0
+#define GRANT_DEBUG_VERBOSE 0
+
#include <xen/config.h>
+#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
-#define PIN_FAIL(_rc, _f, _a...) \
+#define PIN_FAIL(_lbl, _rc, _f, _a...) \
do { \
DPRINTK( _f, ## _a ); \
rc = (_rc); \
- goto fail; \
+ goto _lbl; \
} while ( 0 )
static inline int
@@ -36,9 +43,10 @@ get_maptrack_handle(
grant_table_t *t)
{
unsigned int h;
- if ( unlikely((h = t->maptrack_head) == NR_MAPTRACK_ENTRIES) )
+ if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
return -1;
t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
+ t->map_count++;
return h;
}
@@ -48,21 +56,42 @@ put_maptrack_handle(
{
t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
t->maptrack_head = handle;
+ t->map_count--;
}
-static void
-__gnttab_map_grant_ref(
- gnttab_map_grant_ref_t *uop)
+static int
+__gnttab_activate_grant_ref(
+ struct domain *mapping_d, /* IN */
+ struct vcpu *mapping_ed,
+ struct domain *granting_d,
+ grant_ref_t ref,
+ u16 dev_hst_ro_flags,
+ unsigned long host_virt_addr,
+ unsigned long *pframe ) /* OUT */
{
- domid_t dom, sdom;
- grant_ref_t ref;
- struct domain *ld, *rd;
- u16 flags, sflags;
- int handle;
+ domid_t sdom;
+ u16 sflags;
active_grant_entry_t *act;
- grant_entry_t *sha;
- s16 rc = 0;
- unsigned long frame;
+ grant_entry_t *sha;
+ s16 rc = 1;
+ unsigned long frame = 0;
+ int retries = 0;
+
+ /*
+ * Objectives of this function:
+ * . Make the record ( granting_d, ref ) active, if not already.
+ * . Update shared grant entry of owner, indicating frame is mapped.
+ * . Increment the owner act->pin reference counts.
+ * . get_page on shared frame if new mapping.
+ * . get_page_type if this is first RW mapping of frame.
+ * . Add PTE to virtual address space of mapping_d, if necessary.
+ * Returns:
+ * . -ve: error
+ * . 1: ok
+ * . 0: ok and TLB invalidate of host_virt_addr needed.
+ *
+ * On success, *pframe contains mfn.
+ */
/*
* We bound the number of times we retry CMPXCHG on memory locations that
@@ -72,50 +101,12 @@ __gnttab_map_grant_ref(
* the guest to race our updates (e.g., to change the GTF_readonly flag),
* so we allow a few retries before failing.
*/
- int retries = 0;
- ld = current;
+ act = &granting_d->grant_table->active[ref];
+ sha = &granting_d->grant_table->shared[ref];
- /* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(dom, &uop->dom) |
- __get_user(ref, &uop->ref) |
- __get_user(flags, &uop->flags)) )
- {
- DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
- return; /* don't set status */
- }
-
- if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
- unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
- {
- DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags);
- (void)__put_user(GNTST_bad_gntref, &uop->handle);
- return;
- }
+ spin_lock(&granting_d->grant_table->lock);
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
- unlikely(ld == rd) )
- {
- if ( rd != NULL )
- put_domain(rd);
- DPRINTK("Could not find domain %d\n", dom);
- (void)__put_user(GNTST_bad_domain, &uop->handle);
- return;
- }
-
- if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
- {
- put_domain(rd);
- DPRINTK("No more map handles available\n");
- (void)__put_user(GNTST_no_device_space, &uop->handle);
- return;
- }
-
- act = &rd->grant_table->active[ref];
- sha = &rd->grant_table->shared[ref];
-
- spin_lock(&rd->grant_table->lock);
-
if ( act->pin == 0 )
{
/* CASE 1: Activating a previously inactive entry. */
@@ -128,29 +119,29 @@ __gnttab_map_grant_ref(
u32 scombo, prev_scombo, new_scombo;
if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
- unlikely(sdom != ld->id) )
- PIN_FAIL(GNTST_general_error,
+ unlikely(sdom != mapping_d->domain_id) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
- sflags, sdom, ld->id);
+ sflags, sdom, mapping_d->domain_id);
/* Merge two 16-bit values into a 32-bit combined update. */
/* NB. Endianness! */
prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
new_scombo = scombo | GTF_reading;
- if ( !(flags & GNTMAP_readonly) )
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
{
new_scombo |= GTF_writing;
if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a r/o grant entry.\n");
}
/* NB. prev_scombo is updated in place to seen value. */
if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
- prev_scombo,
+ prev_scombo,
new_scombo)) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Fault while modifying shared flags and domid.\n");
/* Did the combined update work (did we see what we expected?). */
@@ -158,7 +149,7 @@ __gnttab_map_grant_ref(
break;
if ( retries++ == 4 )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Shared grant entry is unstable.\n");
/* Didn't see what we expected. Split out the seen flags & dom. */
@@ -168,24 +159,26 @@ __gnttab_map_grant_ref(
}
/* rmb(); */ /* not on x86 */
- frame = sha->frame;
- if ( unlikely(!pfn_valid(frame)) ||
- unlikely(!((flags & GNTMAP_readonly) ?
- get_page(&frame_table[frame], rd) :
- get_page_and_type(&frame_table[frame], rd,
+
+ frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
+
+ if ( unlikely(!pfn_valid(frame)) ||
+ unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
+ get_page(&frame_table[frame], granting_d) :
+ get_page_and_type(&frame_table[frame], granting_d,
PGT_writable_page))) )
{
clear_bit(_GTF_writing, &sha->flags);
clear_bit(_GTF_reading, &sha->flags);
- PIN_FAIL(GNTST_general_error,
- "Could not pin the granted frame!\n");
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Could not pin the granted frame (%lx)!\n", frame);
}
- if ( flags & GNTMAP_device_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( flags & GNTMAP_host_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
act->domid = sdom;
act->frame = frame;
@@ -199,9 +192,12 @@ __gnttab_map_grant_ref(
* A more accurate check cannot be done with a single comparison.
*/
if ( (act->pin & 0x80808080U) != 0 )
- PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin);
+ PIN_FAIL(unlock_out, ENOSPC,
+ "Risk of counter overflow %08x\n", act->pin);
+
+ frame = act->frame;
- if ( !(flags & GNTMAP_readonly) &&
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
!((sflags = sha->flags) & GTF_writing) )
{
for ( ; ; )
@@ -209,7 +205,7 @@ __gnttab_map_grant_ref(
u16 prev_sflags;
if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a r/o grant entry.\n");
prev_sflags = sflags;
@@ -217,68 +213,249 @@ __gnttab_map_grant_ref(
/* NB. prev_sflags is updated in place to seen value. */
if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
prev_sflags | GTF_writing)) )
- PIN_FAIL(GNTST_general_error,
- "Fault while modifying shared flags.\n");
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Fault while modifying shared flags.\n");
if ( likely(prev_sflags == sflags) )
break;
if ( retries++ == 4 )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Shared grant entry is unstable.\n");
sflags = prev_sflags;
}
- if ( unlikely(!get_page_type(&frame_table[act->frame],
+ if ( unlikely(!get_page_type(&frame_table[frame],
PGT_writable_page)) )
{
clear_bit(_GTF_writing, &sha->flags);
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a unwritable page.\n");
}
}
- if ( flags & GNTMAP_device_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( flags & GNTMAP_host_map )
- act->pin += (flags & GNTMAP_readonly) ?
+
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
}
- ld->grant_table->maptrack[handle].domid = dom;
- ld->grant_table->maptrack[handle].ref_and_flags =
- (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK);
+ /*
+ * At this point:
+ * act->pin updated to reflect mapping.
+ * sha->flags updated to indicate to granting domain mapping done.
+ * frame contains the mfn.
+ */
- /* Unchecked and unconditional. */
- (void)__put_user(handle, &uop->handle);
- (void)__put_user(act->frame, &uop->dev_bus_addr);
+ spin_unlock(&granting_d->grant_table->lock);
- spin_unlock(&rd->grant_table->lock);
- put_domain(rd);
- return;
+ if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
+ {
+ /* Write update into the pagetable. */
+ l1_pgentry_t pte;
+ pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
+ l1e_add_flags(pte,_PAGE_RW);
+ rc = update_grant_va_mapping( host_virt_addr, pte,
+ mapping_d, mapping_ed );
+
+ /*
+ * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
+ * This is done in the outer gnttab_map_grant_ref.
+ */
+
+ if ( rc < 0 )
+ {
+ /* Failure: undo and abort. */
+
+ spin_lock(&granting_d->grant_table->lock);
+
+ if ( dev_hst_ro_flags & GNTMAP_readonly )
+ {
+ act->pin -= GNTPIN_hstr_inc;
+ }
+ else
+ {
+ act->pin -= GNTPIN_hstw_inc;
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
+ }
+ }
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
+ }
+
+ spin_unlock(&granting_d->grant_table->lock);
+ }
+
+ }
+
+ *pframe = frame;
+ return rc;
+
+ unlock_out:
+ spin_unlock(&granting_d->grant_table->lock);
+ return rc;
+}
+
+/*
+ * Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated.
+ */
+static int
+__gnttab_map_grant_ref(
+ gnttab_map_grant_ref_t *uop,
+ unsigned long *va)
+{
+ domid_t dom;
+ grant_ref_t ref;
+ struct domain *ld, *rd;
+ struct vcpu *led;
+ u16 dev_hst_ro_flags;
+ int handle;
+ unsigned long frame = 0, host_virt_addr;
+ int rc;
+
+ led = current;
+ ld = led->domain;
+
+ /* Bitwise-OR avoids short-circuiting which screws control flow. */
+ if ( unlikely(__get_user(dom, &uop->dom) |
+ __get_user(ref, &uop->ref) |
+ __get_user(host_virt_addr, &uop->host_virt_addr) |
+ __get_user(dev_hst_ro_flags, &uop->flags)) )
+ {
+ DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
+ return -EFAULT; /* don't set status */
+ }
+
+
+ if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
+ unlikely(!__addr_ok(host_virt_addr)))
+ {
+ DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
+ host_virt_addr, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
+ return GNTST_bad_gntref;
+ }
+
+ if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
+ unlikely((dev_hst_ro_flags &
+ (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
+ {
+ DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_gntref, &uop->handle);
+ return GNTST_bad_gntref;
+ }
+
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+ DPRINTK("Could not find domain %d\n", dom);
+ (void)__put_user(GNTST_bad_domain, &uop->handle);
+ return GNTST_bad_domain;
+ }
+
+ /* Get a maptrack handle. */
+ if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
+ {
+ int i;
+ grant_mapping_t *new_mt;
+ grant_table_t *lgt = ld->grant_table;
+
+ /* Grow the maptrack table. */
+ new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
+ if ( new_mt == NULL )
+ {
+ put_domain(rd);
+ DPRINTK("No more map handles available\n");
+ (void)__put_user(GNTST_no_device_space, &uop->handle);
+ return GNTST_no_device_space;
+ }
+
+ memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
+ for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
+ new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
+
+ free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
+ lgt->maptrack = new_mt;
+ lgt->maptrack_order += 1;
+ lgt->maptrack_limit <<= 1;
+
+ printk("Doubled maptrack size\n");
+ handle = get_maptrack_handle(ld->grant_table);
+ }
+
+#if GRANT_DEBUG_VERBOSE
+ DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
+ ref, dom, dev_hst_ro_flags);
+#endif
+
+ if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
+ dev_hst_ro_flags,
+ host_virt_addr, &frame)))
+ {
+ /*
+ * Only make the maptrack live _after_ writing the pte, in case we
+ * overwrite the same frame number, causing a maptrack walk to find it
+ */
+ ld->grant_table->maptrack[handle].domid = dom;
+
+ ld->grant_table->maptrack[handle].ref_and_flags
+ = (ref << MAPTRACK_REF_SHIFT) |
+ (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
+
+ (void)__put_user(frame, &uop->dev_bus_addr);
+
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ *va = host_virt_addr;
+
+ (void)__put_user(handle, &uop->handle);
+ }
+ else
+ {
+ (void)__put_user(rc, &uop->handle);
+ put_maptrack_handle(ld->grant_table, handle);
+ }
- fail:
- (void)__put_user(rc, &uop->handle);
- spin_unlock(&rd->grant_table->lock);
put_domain(rd);
- put_maptrack_handle(ld->grant_table, handle);
+ return rc;
}
static long
gnttab_map_grant_ref(
gnttab_map_grant_ref_t *uop, unsigned int count)
{
- int i;
+ int i, flush = 0;
+ unsigned long va = 0;
+
for ( i = 0; i < count; i++ )
- __gnttab_map_grant_ref(&uop[i]);
+ if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
+ flush++;
+
+ if ( flush == 1 )
+ flush_tlb_one_mask(current->domain->cpumask, va);
+ else if ( flush != 0 )
+ flush_tlb_mask(current->domain->cpumask);
+
return 0;
}
-static void
+static int
__gnttab_unmap_grant_ref(
- gnttab_unmap_grant_ref_t *uop)
+ gnttab_unmap_grant_ref_t *uop,
+ unsigned long *va)
{
domid_t dom;
grant_ref_t ref;
@@ -288,10 +465,11 @@ __gnttab_unmap_grant_ref(
active_grant_entry_t *act;
grant_entry_t *sha;
grant_mapping_t *map;
- s16 rc = 0;
+ u16 flags;
+ s16 rc = 1;
unsigned long frame, virt;
- ld = current;
+ ld = current->domain;
/* Bitwise-OR avoids short-circuiting which screws control flow. */
if ( unlikely(__get_user(virt, &uop->host_virt_addr) |
@@ -299,21 +477,22 @@ __gnttab_unmap_grant_ref(
__get_user(handle, &uop->handle)) )
{
DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
- return; /* don't set status */
+ return -EFAULT; /* don't set status */
}
map = &ld->grant_table->maptrack[handle];
- if ( unlikely(handle >= NR_MAPTRACK_ENTRIES) ||
+ if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
{
DPRINTK("Bad handle (%d).\n", handle);
(void)__put_user(GNTST_bad_handle, &uop->status);
- return;
+ return GNTST_bad_handle;
}
- dom = map->domid;
- ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+ dom = map->domid;
+ ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+ flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
unlikely(ld == rd) )
@@ -322,60 +501,150 @@ __gnttab_unmap_grant_ref(
put_domain(rd);
DPRINTK("Could not find domain %d\n", dom);
(void)__put_user(GNTST_bad_domain, &uop->status);
- return;
+ return GNTST_bad_domain;
}
+#if GRANT_DEBUG_VERBOSE
+ DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
+ ref, dom, handle);
+#endif
+
act = &rd->grant_table->active[ref];
sha = &rd->grant_table->shared[ref];
spin_lock(&rd->grant_table->lock);
- if ( frame != 0 )
+ if ( frame == 0 )
+ {
+ frame = act->frame;
+ }
+ else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
+ {
+ if ( !( flags & GNTMAP_device_map ) )
+ PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
+ "Bad frame number: frame not mapped for dev access.\n");
+ frame = act->frame;
+
+ /* Frame will be unmapped for device access below if virt addr okay. */
+ }
+ else
{
if ( unlikely(frame != act->frame) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unmap_out, GNTST_general_error,
"Bad frame number doesn't match gntref.\n");
- if ( map->ref_and_flags & GNTMAP_device_map )
- act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
+ if ( flags & GNTMAP_device_map )
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
+
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ (void)__put_user(0, &uop->dev_bus_addr);
+
+ /* Frame is now unmapped for device access. */
}
- else
+
+ if ( (virt != 0) &&
+ (flags & GNTMAP_host_map) &&
+ ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
{
- frame = act->frame;
+ l1_pgentry_t *pl1e;
+ unsigned long _ol1e;
+
+ pl1e = &linear_pg_table[l1_linear_offset(virt)];
+
+ if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
+ {
+ DPRINTK("Could not find PTE entry for address %lx\n", virt);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+
+ /*
+ * Check that the virtual address supplied is actually mapped to
+ * act->frame.
+ */
+ if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
+ {
+ DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+ _ol1e, virt, frame);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+
+ /* Delete pagetable entry. */
+ if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
+ {
+ DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
+ pl1e, virt);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+
+ map->ref_and_flags &= ~GNTMAP_host_map;
+
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
+ : GNTPIN_hstw_inc;
+
+ if ( frame == GNTUNMAP_DEV_FROM_VIRT )
+ {
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
+
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ (void)__put_user(0, &uop->dev_bus_addr);
+ }
+
+ rc = 0;
+ *va = virt;
}
- if ( (virt != 0) && (map->ref_and_flags & GNTMAP_host_map) )
+ if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
{
- act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ map->ref_and_flags = 0;
+ put_maptrack_handle(ld->grant_table, handle);
}
- if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
- !(map->ref_and_flags & GNTMAP_readonly) )
+ /* If just unmapped a writable mapping, mark as dirtied */
+ if ( unlikely(shadow_mode_log_dirty(rd)) &&
+ !( flags & GNTMAP_readonly ) )
+ mark_dirty(rd, frame);
+
+ /* If the last writable mapping has been removed, put_page_type */
+ if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
+ ( !( flags & GNTMAP_readonly ) ) )
{
- put_page_type(&frame_table[frame]);
clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
}
if ( act->pin == 0 )
{
- put_page(&frame_table[frame]);
clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
}
- fail:
+ unmap_out:
(void)__put_user(rc, &uop->status);
spin_unlock(&rd->grant_table->lock);
put_domain(rd);
+ return rc;
}
static long
gnttab_unmap_grant_ref(
gnttab_unmap_grant_ref_t *uop, unsigned int count)
{
- int i;
+ int i, flush = 0;
+ unsigned long va = 0;
+
for ( i = 0; i < count; i++ )
- __gnttab_unmap_grant_ref(&uop[i]);
+ if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
+ flush++;
+
+ if ( flush == 1 )
+ flush_tlb_one_mask(current->domain->cpumask, va);
+ else if ( flush != 0 )
+ flush_tlb_mask(current->domain->cpumask);
+
return 0;
}
@@ -385,6 +654,7 @@ gnttab_setup_table(
{
gnttab_setup_table_t op;
struct domain *d;
+ int i;
if ( count != 1 )
return -EINVAL;
@@ -395,18 +665,19 @@ gnttab_setup_table(
return -EFAULT;
}
- if ( unlikely(op.nr_frames > 1) )
+ if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
{
- DPRINTK("Xen only supports one grant-table frame per domain.\n");
+ DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
+ NR_GRANT_FRAMES);
(void)put_user(GNTST_general_error, &uop->status);
return 0;
}
if ( op.dom == DOMID_SELF )
{
- op.dom = current->id;
+ op.dom = current->domain->domain_id;
}
- else if ( unlikely(!IS_PRIV(current)) )
+ else if ( unlikely(!IS_PRIV(current->domain)) )
{
(void)put_user(GNTST_permission_denied, &uop->status);
return 0;
@@ -419,17 +690,112 @@ gnttab_setup_table(
return 0;
}
- if ( op.nr_frames == 1 )
+ if ( op.nr_frames <= NR_GRANT_FRAMES )
{
ASSERT(d->grant_table != NULL);
(void)put_user(GNTST_okay, &uop->status);
- (void)put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
- &uop->frame_list[0]);
+ for ( i = 0; i < op.nr_frames; i++ )
+ (void)put_user(
+ (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
+ &uop->frame_list[i]);
+ }
+
+ put_domain(d);
+ return 0;
+}
+
+#if GRANT_DEBUG
+static int
+gnttab_dump_table(gnttab_dump_table_t *uop)
+{
+ grant_table_t *gt;
+ gnttab_dump_table_t op;
+ struct domain *d;
+ u32 shared_mfn;
+ active_grant_entry_t *act;
+ grant_entry_t sha_copy;
+ grant_mapping_t *maptrack;
+ int i;
+
+
+ if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+ {
+ DPRINTK("Fault while reading gnttab_dump_table_t.\n");
+ return -EFAULT;
+ }
+
+ if ( op.dom == DOMID_SELF )
+ {
+ op.dom = current->domain->domain_id;
+ }
+
+ if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+ {
+ DPRINTK("Bad domid %d.\n", op.dom);
+ (void)put_user(GNTST_bad_domain, &uop->status);
+ return 0;
+ }
+
+ ASSERT(d->grant_table != NULL);
+ gt = d->grant_table;
+ (void)put_user(GNTST_okay, &uop->status);
+
+ shared_mfn = virt_to_phys(d->grant_table->shared);
+
+ DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
+ op.dom, shared_mfn);
+
+ ASSERT(d->grant_table->active != NULL);
+ ASSERT(d->grant_table->shared != NULL);
+ ASSERT(d->grant_table->maptrack != NULL);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ sha_copy = gt->shared[i];
+
+ if ( sha_copy.flags )
+ {
+ DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
+ "dom:(%hu) frame:(%lx)\n",
+ op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
+ }
}
+ spin_lock(&gt->lock);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ act = &gt->active[i];
+
+ if ( act->pin )
+ {
+ DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
+ "dom:(%hu) frame:(%lx)\n",
+ op.dom, i, act->pin, act->domid, act->frame);
+ }
+ }
+
+ for ( i = 0; i < gt->maptrack_limit; i++ )
+ {
+ maptrack = &gt->maptrack[i];
+
+ if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
+ {
+ DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
+ "dom:(%hu)\n",
+ op.dom, i,
+ maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
+ maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
+ maptrack->domid);
+ }
+ }
+
+ spin_unlock(&gt->lock);
+
put_domain(d);
return 0;
}
+#endif
long
do_grant_table_op(
@@ -437,34 +803,42 @@ do_grant_table_op(
{
long rc;
- /* XXX stubbed out XXX */
- return -ENOSYS;
-
if ( count > 512 )
return -EINVAL;
+ LOCK_BIGLOCK(current->domain);
+
+ rc = -EFAULT;
switch ( cmd )
{
case GNTTABOP_map_grant_ref:
if ( unlikely(!array_access_ok(
- VERIFY_WRITE, uop, count, sizeof(gnttab_map_grant_ref_t))) )
- return -EFAULT;
+ uop, count, sizeof(gnttab_map_grant_ref_t))) )
+ goto out;
rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
break;
case GNTTABOP_unmap_grant_ref:
if ( unlikely(!array_access_ok(
- VERIFY_WRITE, uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
- return -EFAULT;
+ uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+ goto out;
rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
break;
case GNTTABOP_setup_table:
rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
break;
+#if GRANT_DEBUG
+ case GNTTABOP_dump_table:
+ rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+ break;
+#endif
default:
rc = -ENOSYS;
break;
}
+out:
+ UNLOCK_BIGLOCK(current->domain);
+
return rc;
}
@@ -472,41 +846,157 @@ int
gnttab_check_unmap(
struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
{
- return 0;
+ /* Called when put_page is invoked on a page belonging to a foreign domain.
+ * Instead of decrementing the frame table ref count, locate the grant
+ * table entry, if any, and if found, decrement that count.
+ * Called a _lot_ at domain creation because pages mapped by priv domains
+ * also traverse this.
+ */
+
+ /* Note: If the same frame is mapped multiple times, and then one of
+ * the ptes is overwritten, which maptrack handle gets invalidated?
+ * Advice: Don't do it. Explicitly unmap.
+ */
+
+ unsigned int handle, ref, refcount;
+ grant_table_t *lgt, *rgt;
+ active_grant_entry_t *act;
+ grant_mapping_t *map;
+ int found = 0;
+
+ lgt = ld->grant_table;
+
+#if GRANT_DEBUG_VERBOSE
+ if ( ld->domain_id != 0 )
+ {
+ DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
+ rd->domain_id, ld->domain_id, frame, readonly);
+ }
+#endif
+
+ /* Fast exit if we're not mapping anything using grant tables */
+ if ( lgt->map_count == 0 )
+ return 0;
+
+ if ( get_domain(rd) == 0 )
+ {
+ DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
+ rd->domain_id);
+ return 0;
+ }
+
+ rgt = rd->grant_table;
+
+ for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+ {
+ map = &lgt->maptrack[handle];
+
+ if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
+ ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
+ {
+ ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
+ act = &rgt->active[ref];
+
+ spin_lock(&rgt->lock);
+
+ if ( act->frame != frame )
+ {
+ spin_unlock(&rgt->lock);
+ continue;
+ }
+
+ refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
+ : GNTPIN_hstw_mask );
+ if ( refcount == 0 )
+ {
+ spin_unlock(&rgt->lock);
+ continue;
+ }
+
+ /* gotcha */
+ DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+ rd->domain_id, ld->domain_id, frame, readonly);
+
+ if ( readonly )
+ act->pin -= GNTPIN_hstr_inc;
+ else
+ {
+ act->pin -= GNTPIN_hstw_inc;
+
+ /* any more granted writable mappings? */
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+ {
+ clear_bit(_GTF_writing, &rgt->shared[ref].flags);
+ put_page_type(&frame_table[frame]);
+ }
+ }
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &rgt->shared[ref].flags);
+ put_page(&frame_table[frame]);
+ }
+ spin_unlock(&rgt->lock);
+
+ clear_bit(GNTMAP_host_map, &map->ref_and_flags);
+
+ if ( !(map->ref_and_flags & GNTMAP_device_map) )
+ put_maptrack_handle(lgt, handle);
+
+ found = 1;
+ break;
+ }
+ }
+ put_domain(rd);
+
+ return found;
}
int
gnttab_prepare_for_transfer(
struct domain *rd, struct domain *ld, grant_ref_t ref)
{
- grant_table_t *t;
- grant_entry_t *e;
+ grant_table_t *rgt;
+ grant_entry_t *sha;
domid_t sdom;
u16 sflags;
u32 scombo, prev_scombo;
int retries = 0;
+ unsigned long target_pfn;
- if ( unlikely((t = rd->grant_table) == NULL) ||
+ DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->domain_id, ld->domain_id, ref);
+
+ if ( unlikely((rgt = rd->grant_table) == NULL) ||
unlikely(ref >= NR_GRANT_ENTRIES) )
{
- DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->id, ref);
+ DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
+ rd->domain_id, ref);
return 0;
}
- spin_lock(&t->lock);
+ spin_lock(&rgt->lock);
- e = &t->shared[ref];
+ sha = &rgt->shared[ref];
- sflags = e->flags;
- sdom = e->domid;
+ sflags = sha->flags;
+ sdom = sha->domid;
for ( ; ; )
{
+ target_pfn = sha->frame;
+
+ if ( unlikely(target_pfn >= max_page ) )
+ {
+ DPRINTK("Bad pfn (%lx)\n", target_pfn);
+ goto fail;
+ }
+
if ( unlikely(sflags != GTF_accept_transfer) ||
- unlikely(sdom != ld->id) )
+ unlikely(sdom != ld->domain_id) )
{
DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
- sflags, sdom, ld->id);
+ sflags, sdom, ld->domain_id);
goto fail;
}
@@ -515,7 +1005,7 @@ gnttab_prepare_for_transfer(
prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
/* NB. prev_scombo is updated in place to seen value. */
- if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo,
+ if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
prev_scombo | GTF_transfer_committed)) )
{
DPRINTK("Fault while modifying shared flags and domid.\n");
@@ -538,20 +1028,50 @@ gnttab_prepare_for_transfer(
sdom = (u16)(prev_scombo >> 16);
}
- spin_unlock(&t->lock);
+ spin_unlock(&rgt->lock);
return 1;
fail:
- spin_unlock(&t->lock);
+ spin_unlock(&rgt->lock);
return 0;
}
void
gnttab_notify_transfer(
- struct domain *rd, grant_ref_t ref, unsigned long frame)
+ struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
{
- wmb(); /* Ensure that the reassignment is globally visible. */
- rd->grant_table->shared[ref].frame = frame;
+ grant_entry_t *sha;
+ unsigned long pfn;
+
+ DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->domain_id, ld->domain_id, ref);
+
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+ pfn = sha->frame;
+
+ if ( unlikely(pfn >= max_page ) )
+ DPRINTK("Bad pfn (%lx)\n", pfn);
+ else
+ {
+ machine_to_phys_mapping[frame] = pfn;
+
+ if ( unlikely(shadow_mode_log_dirty(ld)))
+ mark_dirty(ld, frame);
+
+ if (shadow_mode_translate(ld))
+ __phys_to_machine_mapping[pfn] = frame;
+ }
+ sha->frame = __mfn_to_gpfn(rd, frame);
+ sha->domid = rd->domain_id;
+ wmb();
+ sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
+
+ spin_unlock(&rd->grant_table->lock);
+
+ return;
}
int
@@ -561,7 +1081,7 @@ grant_table_create(
grant_table_t *t;
int i;
- if ( (t = xmalloc(sizeof(*t))) == NULL )
+ if ( (t = xmalloc(grant_table_t)) == NULL )
goto no_mem;
/* Simple stuff. */
@@ -569,22 +1089,33 @@ grant_table_create(
spin_lock_init(&t->lock);
/* Active grant table. */
- if ( (t->active = xmalloc(sizeof(active_grant_entry_t) *
- NR_GRANT_ENTRIES)) == NULL )
+ if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
+ == NULL )
goto no_mem;
memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
- if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL )
+ /* Tracking of mapped foreign frames table */
+ if ( (t->maptrack = alloc_xenheap_page()) == NULL )
goto no_mem;
+ t->maptrack_order = 0;
+ t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
memset(t->maptrack, 0, PAGE_SIZE);
- for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ )
+ for ( i = 0; i < t->maptrack_limit; i++ )
t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
/* Shared grant table. */
- if ( (t->shared = (void *)alloc_xenheap_page()) == NULL )
+ t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
+ if ( t->shared == NULL )
goto no_mem;
- memset(t->shared, 0, PAGE_SIZE);
- SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
+ memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
+
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ {
+ SHARE_PFN_WITH_DOMAIN(
+ virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
+ machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
+ INVALID_M2P_ENTRY;
+ }
/* Okay, install the structure. */
wmb(); /* avoid races with lock-free access to d->grant_table */
@@ -594,16 +1125,88 @@ grant_table_create(
no_mem:
if ( t != NULL )
{
- if ( t->active != NULL )
- xfree(t->active);
+ xfree(t->active);
if ( t->maptrack != NULL )
- free_xenheap_page((unsigned long)t->maptrack);
+ free_xenheap_page(t->maptrack);
xfree(t);
}
return -ENOMEM;
}
void
+gnttab_release_dev_mappings(grant_table_t *gt)
+{
+ grant_mapping_t *map;
+ domid_t dom;
+ grant_ref_t ref;
+ u16 handle;
+ struct domain *ld, *rd;
+ unsigned long frame;
+ active_grant_entry_t *act;
+ grant_entry_t *sha;
+
+ ld = current->domain;
+
+ for ( handle = 0; handle < gt->maptrack_limit; handle++ )
+ {
+ map = &gt->maptrack[handle];
+
+ if ( map->ref_and_flags & GNTMAP_device_map )
+ {
+ dom = map->domid;
+ ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+
+ DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
+ handle, ref,
+ map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
+
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+
+ printk(KERN_WARNING "Grant release: No dom%d\n", dom);
+ continue;
+ }
+
+ act = &rd->grant_table->active[ref];
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+ if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
+ {
+ frame = act->frame;
+
+ if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
+ ( (act->pin & GNTPIN_devw_mask) > 0 ) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
+ }
+
+ act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &sha->flags);
+ map->ref_and_flags = 0;
+ put_page(&frame_table[frame]);
+ }
+ else
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ }
+
+ spin_unlock(&rd->grant_table->lock);
+
+ put_domain(rd);
+ }
+ }
+}
+
+
+void
grant_table_destroy(
struct domain *d)
{
@@ -613,8 +1216,8 @@ grant_table_destroy(
{
/* Free memory relating to this grant table. */
d->grant_table = NULL;
- free_xenheap_page((unsigned long)t->shared);
- free_xenheap_page((unsigned long)t->maptrack);
+ free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
+ free_xenheap_page(t->maptrack);
xfree(t->active);
xfree(t);
}
@@ -626,3 +1229,13 @@ grant_table_init(
{
/* Nothing. */
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index cf47e3214c..3acaac8e1b 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -13,54 +13,69 @@
#include <xen/errno.h>
#include <xen/compile.h>
#include <xen/sched.h>
+#include <asm/current.h>
void cmdline_parse(char *cmdline)
{
- unsigned char *opt_end, *opt;
+ char opt[100], *optval, *p = cmdline, *q;
struct kernel_param *param;
- if ( cmdline == NULL )
+ if ( p == NULL )
return;
- while ( *cmdline == ' ' )
- cmdline++;
- cmdline = strchr(cmdline, ' '); /* skip the image name */
- while ( cmdline != NULL )
+ /* Skip whitespace and the image name. */
+ while ( *p == ' ' )
+ p++;
+ if ( (p = strchr(p, ' ')) == NULL )
+ return;
+
+ for ( ; ; )
{
- while ( *cmdline == ' ' )
- cmdline++;
- if ( *cmdline == '\0' )
+ /* Skip whitespace. */
+ while ( *p == ' ' )
+ p++;
+ if ( *p == '\0' )
break;
- opt_end = strchr(cmdline, ' ');
- if ( opt_end != NULL )
- *opt_end++ = '\0';
- opt = strchr(cmdline, '=');
- if ( opt != NULL )
- *opt++ = '\0';
+
+ /* Grab the next whitespace-delimited option. */
+ q = opt;
+ while ( (*p != ' ') && (*p != '\0') )
+ *q++ = *p++;
+ *q = '\0';
+
+ /* Search for value part of a key=value option. */
+ optval = strchr(opt, '=');
+ if ( optval != NULL )
+ *optval++ = '\0';
+
for ( param = &__setup_start; param != &__setup_end; param++ )
{
- if ( strcmp(param->name, cmdline ) != 0 )
+ if ( strcmp(param->name, opt ) != 0 )
continue;
+
switch ( param->type )
{
case OPT_STR:
- if ( opt != NULL )
+ if ( optval != NULL )
{
- strncpy(param->var, opt, param->len);
+ strncpy(param->var, optval, param->len);
((char *)param->var)[param->len-1] = '\0';
}
break;
case OPT_UINT:
- if ( opt != NULL )
+ if ( optval != NULL )
*(unsigned int *)param->var =
- simple_strtol(opt, (char **)&opt, 0);
+ simple_strtol(optval, (char **)&optval, 0);
break;
case OPT_BOOL:
*(int *)param->var = 1;
break;
+ case OPT_CUSTOM:
+ if ( optval != NULL )
+ ((void (*)(char *))param->var)(optval);
+ break;
}
}
- cmdline = opt_end;
}
}
@@ -75,10 +90,9 @@ long do_xen_version(int cmd)
return (XEN_VERSION<<16) | (XEN_SUBVERSION);
}
-vm_assist_info_t vm_assist_info[MAX_VMASST_TYPE + 1];
long do_vm_assist(unsigned int cmd, unsigned int type)
{
- return vm_assist(current, cmd, type);
+ return vm_assist(current->domain, cmd, type);
}
long do_ni_hypercall(void)
@@ -86,3 +100,13 @@ long do_ni_hypercall(void)
/* No-op hypercall. */
return -ENOSYS;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index dad82476d0..f7136addda 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -2,6 +2,7 @@
* keyhandler.c
*/
+#include <asm/regs.h>
#include <xen/keyhandler.h>
#include <xen/reboot.h>
#include <xen/event.h>
@@ -9,6 +10,8 @@
#include <xen/serial.h>
#include <xen/sched.h>
#include <xen/softirq.h>
+#include <xen/domain.h>
+#include <asm/debugger.h>
#define KEY_MAX 256
#define STR_MAX 64
@@ -34,7 +37,7 @@ static void keypress_softirq(void)
(*h)(key);
}
-void handle_keypress(unsigned char key, struct xen_regs *regs)
+void handle_keypress(unsigned char key, struct cpu_user_regs *regs)
{
irq_keyhandler_t *h;
@@ -81,13 +84,13 @@ static void show_handlers(unsigned char key)
key_table[i].desc);
}
-static void dump_registers(unsigned char key, struct xen_regs *regs)
+static void dump_registers(unsigned char key, struct cpu_user_regs *regs)
{
printk("'%c' pressed -> dumping registers\n", key);
show_registers(regs);
}
-static void halt_machine(unsigned char key, struct xen_regs *regs)
+static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
{
printk("'%c' pressed -> rebooting machine\n", key);
machine_restart(NULL);
@@ -96,6 +99,7 @@ static void halt_machine(unsigned char key, struct xen_regs *regs)
static void do_task_queues(unsigned char key)
{
struct domain *d;
+ struct vcpu *v;
s_time_t now = NOW();
printk("'%c' pressed -> dumping task queues (now=0x%X:%08X)\n", key,
@@ -105,19 +109,31 @@ static void do_task_queues(unsigned char key)
for_each_domain ( d )
{
- printk("Xen: DOM %u, CPU %d [has=%c] flags=%lx refcnt=%d nr_pages=%d "
- "xenheap_pages=%d\n",
- d->id, d->processor,
- test_bit(DF_RUNNING, &d->flags) ? 'T':'F', d->flags,
+ printk("Xen: DOM %u, flags=%lx refcnt=%d nr_pages=%d "
+ "xenheap_pages=%d\n", d->domain_id, d->domain_flags,
atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages);
dump_pageframe_info(d);
- printk("Guest: upcall_pend = %02x, upcall_mask = %02x\n",
- d->shared_info->vcpu_data[0].evtchn_upcall_pending,
- d->shared_info->vcpu_data[0].evtchn_upcall_mask);
- printk("Notifying guest...\n");
- send_guest_virq(d, VIRQ_DEBUG);
+ for_each_vcpu ( d, v ) {
+ printk("Guest: %p CPU %d [has=%c] flags=%lx "
+ "upcall_pend = %02x, upcall_mask = %02x\n", v,
+ v->processor,
+ test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F',
+ v->vcpu_flags,
+ v->vcpu_info->evtchn_upcall_pending,
+ v->vcpu_info->evtchn_upcall_mask);
+ printk("Notifying guest... %d/%d\n", d->domain_id, v->vcpu_id);
+ printk("port %d/%d stat %d %d %d\n",
+ VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
+ test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
+ &d->shared_info->evtchn_pending[0]),
+ test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
+ &d->shared_info->evtchn_mask[0]),
+ test_bit(v->virq_to_evtchn[VIRQ_DEBUG]>>5,
+ &v->vcpu_info->evtchn_pending_sel));
+ send_guest_virq(v, VIRQ_DEBUG);
+ }
}
read_unlock(&domlist_lock);
@@ -135,6 +151,24 @@ extern void perfc_printall(unsigned char key);
extern void perfc_reset(unsigned char key);
#endif
+void do_debug_key(unsigned char key, struct cpu_user_regs *regs)
+{
+ (void)debugger_trap_fatal(0xf001, regs);
+ nop(); /* Prevent the compiler doing tail call
+ optimisation, as that confuses xendbg a
+ bit. */
+}
+
+#ifndef NDEBUG
+void debugtrace_key(unsigned char key)
+{
+ debugtrace_send_to_console = !debugtrace_send_to_console;
+ debugtrace_dump();
+ printk("debugtrace_printk now writing to %s.\n",
+ debugtrace_send_to_console ? "console" : "buffer");
+}
+#endif
+
void initialize_keytable(void)
{
open_softirq(KEYPRESS_SOFTIRQ, keypress_softirq);
@@ -156,7 +190,9 @@ void initialize_keytable(void)
#ifndef NDEBUG
register_keyhandler(
- 'o', audit_domains_key, "audit domains >0 EXPERIMENTAL");
+ 'o', audit_domains_key, "audit domains >0 EXPERIMENTAL");
+ register_keyhandler(
+ 'T', debugtrace_key, "toggle debugtrace to console/buffer");
#endif
#ifdef PERF_COUNTERS
@@ -165,4 +201,16 @@ void initialize_keytable(void)
register_keyhandler(
'P', perfc_reset, "reset performance counters");
#endif
+
+ register_irq_keyhandler('%', do_debug_key, "Trap to xendbg");
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/lib.c b/xen/common/lib.c
index b01d5a1727..764ad7143d 100644
--- a/xen/common/lib.c
+++ b/xen/common/lib.c
@@ -394,9 +394,9 @@ __udivdi3(a, b)
*/
u64 __umoddi3(u64 a, u64 b)
{
- u64 rem;
- __qdivrem(a, b, &rem);
- return rem;
+ u64 rem;
+ __qdivrem(a, b, &rem);
+ return rem;
}
/*
@@ -425,19 +425,44 @@ s64 __moddi3(s64 a, s64 b)
ub = b, neg2 = 0;
__qdivrem(ua, ub, &urem);
- /* There 4 different cases: */
- if(neg1)
- {
- if(neg2)
- return -urem;
- else
- return ub - urem;
- }
- else
- if(neg2)
- return -ub + urem;
- else
- return urem;
+ /* There 4 different cases: */
+ if (neg1) {
+ if (neg2)
+ return -urem;
+ else
+ return ub - urem;
+ } else {
+ if (neg2)
+ return -ub + urem;
+ else
+ return urem;
+ }
}
#endif /* BITS_PER_LONG == 32 */
+
+unsigned long long parse_size_and_unit(char *s)
+{
+ unsigned long long ret = simple_strtoull(s, &s, 0);
+
+ switch (*s) {
+ case 'G': case 'g':
+ ret <<= 10;
+ case 'M': case 'm':
+ ret <<= 10;
+ case 'K': case 'k':
+ ret <<= 10;
+ }
+
+ return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/xen/common/multicall.c b/xen/common/multicall.c
index 04605ebb2a..a3af8ef221 100644
--- a/xen/common/multicall.c
+++ b/xen/common/multicall.c
@@ -10,6 +10,8 @@
#include <xen/sched.h>
#include <xen/event.h>
#include <xen/multicall.h>
+#include <asm/current.h>
+#include <asm/hardirq.h>
struct mc_state mc_state[NR_CPUS];
@@ -24,11 +26,10 @@ long do_multicall(multicall_entry_t *call_list, unsigned int nr_calls)
return -EINVAL;
}
- if ( unlikely(!array_access_ok(VERIFY_WRITE, call_list,
- nr_calls, sizeof(*call_list))) )
+ if ( unlikely(!array_access_ok(call_list, nr_calls, sizeof(*call_list))) )
{
DPRINTK("Bad memory range %p for %u*%u bytes.\n",
- call_list, nr_calls, sizeof(*call_list));
+ call_list, nr_calls, (unsigned int)sizeof(*call_list));
goto fault;
}
@@ -38,13 +39,13 @@ long do_multicall(multicall_entry_t *call_list, unsigned int nr_calls)
sizeof(*call_list))) )
{
DPRINTK("Error copying from user range %p for %u bytes.\n",
- &call_list[i], sizeof(*call_list));
+ &call_list[i], (unsigned int)sizeof(*call_list));
goto fault;
}
do_multicall_call(&mcs->call);
- if ( unlikely(__put_user(mcs->call.args[5], &call_list[i].args[5])) )
+ if ( unlikely(__put_user(mcs->call.result, &call_list[i].result)) )
{
DPRINTK("Error writing result back to multicall block.\n");
goto fault;
@@ -66,8 +67,8 @@ long do_multicall(multicall_entry_t *call_list, unsigned int nr_calls)
if ( i < nr_calls )
{
mcs->flags = 0;
- return hypercall_create_continuation(
- __HYPERVISOR_multicall, 2, &call_list[i], nr_calls-i);
+ return hypercall2_create_continuation(
+ __HYPERVISOR_multicall, &call_list[i], nr_calls-i);
}
}
}
@@ -79,3 +80,13 @@ long do_multicall(multicall_entry_t *call_list, unsigned int nr_calls)
mcs->flags = 0;
return -EFAULT;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index dc5b70c843..527870de37 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -24,12 +24,15 @@
#include <xen/init.h>
#include <xen/types.h>
#include <xen/lib.h>
-#include <asm/page.h>
+#include <xen/perfc.h>
+#include <xen/sched.h>
#include <xen/spinlock.h>
-#include <xen/slab.h>
+#include <xen/mm.h>
#include <xen/irq.h>
#include <xen/softirq.h>
-#include <asm/domain_page.h>
+#include <xen/shadow.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
/*
* Comma-separated list of hexadecimal page numbers containing bad bytes.
@@ -41,8 +44,8 @@ string_param("badpage", opt_badpage);
#define round_pgdown(_p) ((_p)&PAGE_MASK)
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-static spinlock_t page_scrub_lock;
-struct list_head page_scrub_list;
+static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(page_scrub_list);
/*********************
* ALLOCATION BITMAP
@@ -53,8 +56,9 @@ static unsigned long bitmap_size; /* in bytes */
static unsigned long *alloc_bitmap;
#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
-#define allocated_in_map(_pn) \
-(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+#define allocated_in_map(_pn) \
+( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
+ (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
/*
* Hint regarding bitwise arithmetic in map_{alloc,free}:
@@ -83,13 +87,13 @@ static void map_alloc(unsigned long first_page, unsigned long nr_pages)
if ( curr_idx == end_idx )
{
- alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+ alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
}
else
{
- alloc_bitmap[curr_idx] |= -(1<<start_off);
- while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
- alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+ alloc_bitmap[curr_idx] |= -(1UL<<start_off);
+ while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
+ alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
}
}
@@ -112,13 +116,13 @@ static void map_free(unsigned long first_page, unsigned long nr_pages)
if ( curr_idx == end_idx )
{
- alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+ alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
}
else
{
- alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+ alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
- alloc_bitmap[curr_idx] &= -(1<<end_off);
+ alloc_bitmap[curr_idx] &= -(1UL<<end_off);
}
}
@@ -129,7 +133,7 @@ static void map_free(unsigned long first_page, unsigned long nr_pages)
*/
/* Initialise allocator to handle up to @max_page pages. */
-unsigned long init_boot_allocator(unsigned long bitmap_start)
+physaddr_t init_boot_allocator(physaddr_t bitmap_start)
{
bitmap_start = round_pgup(bitmap_start);
@@ -144,13 +148,15 @@ unsigned long init_boot_allocator(unsigned long bitmap_start)
return bitmap_start + bitmap_size;
}
-void init_boot_pages(unsigned long ps, unsigned long pe)
+void init_boot_pages(physaddr_t ps, physaddr_t pe)
{
unsigned long bad_pfn;
char *p;
ps = round_pgup(ps);
pe = round_pgdown(pe);
+ if ( pe <= ps )
+ return;
map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
@@ -167,29 +173,26 @@ void init_boot_pages(unsigned long ps, unsigned long pe)
if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
{
- printk("Marking page %08lx as bad\n", bad_pfn);
+ printk("Marking page %lx as bad\n", bad_pfn);
map_alloc(bad_pfn, 1);
}
}
}
-unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
+unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
{
unsigned long pg, i;
- size = round_pgup(size) >> PAGE_SHIFT;
- align = round_pgup(align) >> PAGE_SHIFT;
-
- for ( pg = 0; (pg + size) < (bitmap_size*8); pg += align )
+ for ( pg = 0; (pg + nr_pfns) < (bitmap_size*8); pg += pfn_align )
{
- for ( i = 0; i < size; i++ )
+ for ( i = 0; i < nr_pfns; i++ )
if ( allocated_in_map(pg + i) )
break;
- if ( i == size )
+ if ( i == nr_pfns )
{
- map_alloc(pg, size);
- return pg << PAGE_SHIFT;
+ map_alloc(pg, nr_pfns);
+ return pg;
}
}
@@ -206,8 +209,8 @@ unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
#define MEMZONE_DOM 1
#define NR_ZONES 2
-/* Up to 2^10 pages can be allocated at once. */
-#define MAX_ORDER 10
+/* Up to 2^20 pages can be allocated at once. */
+#define MAX_ORDER 20
static struct list_head heap[NR_ZONES][MAX_ORDER+1];
static unsigned long avail[NR_ZONES];
@@ -265,8 +268,8 @@ struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
/* Find smallest order which can satisfy the request. */
for ( i = order; i <= MAX_ORDER; i++ )
- if ( !list_empty(&heap[zone][i]) )
- goto found;
+ if ( !list_empty(&heap[zone][i]) )
+ goto found;
/* No suitable memory blocks. Fail the request. */
spin_unlock(&heap_lock);
@@ -351,15 +354,13 @@ void scrub_heap_pages(void)
unsigned long pfn, flags;
printk("Scrubbing Free RAM: ");
+ watchdog_disable();
for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
{
- /* Every 100MB, print a progress dot and appease the watchdog. */
+ /* Every 100MB, print a progress dot. */
if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
- {
printk(".");
- touch_nmi_watchdog();
- }
/* Quick lock-free check. */
if ( allocated_in_map(pfn) )
@@ -370,14 +371,25 @@ void scrub_heap_pages(void)
/* Re-check page status with lock held. */
if ( !allocated_in_map(pfn) )
{
- p = map_domain_mem(pfn << PAGE_SHIFT);
- clear_page(p);
- unmap_domain_mem(p);
+ if ( IS_XEN_HEAP_FRAME(pfn_to_page(pfn)) )
+ {
+ p = page_to_virt(pfn_to_page(pfn));
+ memguard_unguard_range(p, PAGE_SIZE);
+ clear_page(p);
+ memguard_guard_range(p, PAGE_SIZE);
+ }
+ else
+ {
+ p = map_domain_page(pfn);
+ clear_page(p);
+ unmap_domain_page(p);
+ }
}
spin_unlock_irqrestore(&heap_lock, flags);
}
+ watchdog_enable();
printk("done.\n");
}
@@ -387,14 +399,14 @@ void scrub_heap_pages(void)
* XEN-HEAP SUB-ALLOCATOR
*/
-void init_xenheap_pages(unsigned long ps, unsigned long pe)
+void init_xenheap_pages(physaddr_t ps, physaddr_t pe)
{
unsigned long flags;
ps = round_pgup(ps);
pe = round_pgdown(pe);
- memguard_guard_range(__va(ps), pe - ps);
+ memguard_guard_range(phys_to_virt(ps), pe - ps);
/*
* Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
@@ -409,13 +421,12 @@ void init_xenheap_pages(unsigned long ps, unsigned long pe)
}
-unsigned long alloc_xenheap_pages(unsigned int order)
+void *alloc_xenheap_pages(unsigned int order)
{
unsigned long flags;
struct pfn_info *pg;
- int i, attempts = 0;
+ int i;
- retry:
local_irq_save(flags);
pg = alloc_heap_pages(MEMZONE_XEN, order);
local_irq_restore(flags);
@@ -428,33 +439,26 @@ unsigned long alloc_xenheap_pages(unsigned int order)
for ( i = 0; i < (1 << order); i++ )
{
pg[i].count_info = 0;
- pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse._domain = 0;
pg[i].u.inuse.type_info = 0;
}
- return (unsigned long)page_to_virt(pg);
+ return page_to_virt(pg);
no_memory:
- if ( attempts++ < 8 )
- {
- xmem_cache_reap();
- goto retry;
- }
-
printk("Cannot handle page request order %d!\n", order);
- dump_slabinfo();
- return 0;
+ return NULL;
}
-void free_xenheap_pages(unsigned long p, unsigned int order)
+void free_xenheap_pages(void *v, unsigned int order)
{
unsigned long flags;
- memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
+ memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
local_irq_save(flags);
- free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
+ free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
local_irq_restore(flags);
}
@@ -464,7 +468,7 @@ void free_xenheap_pages(unsigned long p, unsigned int order)
* DOMAIN-HEAP SUB-ALLOCATOR
*/
-void init_domheap_pages(unsigned long ps, unsigned long pe)
+void init_domheap_pages(physaddr_t ps, physaddr_t pe)
{
ASSERT(!in_irq());
@@ -478,55 +482,52 @@ void init_domheap_pages(unsigned long ps, unsigned long pe)
struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
{
struct pfn_info *pg;
- unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
- int i, j;
+ cpumask_t mask;
+ int i;
ASSERT(!in_irq());
if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
return NULL;
- flushed_mask = 0;
- for ( i = 0; i < (1 << order); i++ )
+ mask = pg->u.free.cpumask;
+ tlbflush_filter(mask, pg->tlbflush_timestamp);
+
+ pg->count_info = 0;
+ pg->u.inuse._domain = 0;
+ pg->u.inuse.type_info = 0;
+
+ for ( i = 1; i < (1 << order); i++ )
{
- if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
- {
- pfn_stamp = pg[i].tlbflush_timestamp;
- for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
- {
- if ( mask & (1<<j) )
- {
- cpu_stamp = tlbflush_time[j];
- if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
- mask &= ~(1<<j);
- }
- }
-
- if ( unlikely(mask != 0) )
- {
- flush_tlb_mask(mask);
- perfc_incrc(need_flush_tlb_flush);
- flushed_mask |= mask;
- }
- }
+ /* Add in any extra CPUs that need flushing because of this page. */
+ cpumask_t extra_cpus_mask;
+ cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
+ tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+ cpus_or(mask, mask, extra_cpus_mask);
pg[i].count_info = 0;
- pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse._domain = 0;
pg[i].u.inuse.type_info = 0;
}
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
+
if ( d == NULL )
return pg;
spin_lock(&d->page_alloc_lock);
- if ( unlikely(test_bit(DF_DYING, &d->flags)) ||
+ if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ||
unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
{
DPRINTK("Over-allocation for domain %u: %u > %u\n",
- d->id, d->tot_pages + (1 << order), d->max_pages);
+ d->domain_id, d->tot_pages + (1 << order), d->max_pages);
DPRINTK("...or the domain is dying (%d)\n",
- !!test_bit(DF_DYING, &d->flags));
+ !!test_bit(_DOMF_dying, &d->domain_flags));
spin_unlock(&d->page_alloc_lock);
free_heap_pages(MEMZONE_DOM, pg, order);
return NULL;
@@ -539,7 +540,7 @@ struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
for ( i = 0; i < (1 << order); i++ )
{
- pg[i].u.inuse.domain = d;
+ page_set_owner(&pg[i], d);
wmb(); /* Domain pointer must be visible before updating refcnt. */
pg[i].count_info |= PGC_allocated | 1;
list_add_tail(&pg[i].list, &d->page_list);
@@ -554,13 +555,13 @@ struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
void free_domheap_pages(struct pfn_info *pg, unsigned int order)
{
int i, drop_dom_ref;
- struct domain *d = pg->u.inuse.domain;
+ struct domain *d = page_get_owner(pg);
ASSERT(!in_irq());
if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
{
- /* NB. May recursively lock from domain_relinquish_memory(). */
+ /* NB. May recursively lock from relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
@@ -573,14 +574,16 @@ void free_domheap_pages(struct pfn_info *pg, unsigned int order)
}
else if ( likely(d != NULL) )
{
- /* NB. May recursively lock from domain_relinquish_memory(). */
+ /* NB. May recursively lock from relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
{
- ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
+ shadow_drop_references(d, &pg[i]);
+ ASSERT(((pg[i].u.inuse.type_info & PGT_count_mask) == 0) ||
+ shadow_tainted_refcnts(d));
pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpu_mask = 1 << d->processor;
+ pg[i].u.free.cpumask = d->cpumask;
list_del(&pg[i].list);
}
@@ -589,7 +592,7 @@ void free_domheap_pages(struct pfn_info *pg, unsigned int order)
spin_unlock_recursive(&d->page_alloc_lock);
- if ( likely(!test_bit(DF_DYING, &d->flags)) )
+ if ( likely(!test_bit(_DOMF_dying, &d->domain_flags)) )
{
free_heap_pages(MEMZONE_DOM, pg, order);
}
@@ -668,9 +671,9 @@ static void page_scrub_softirq(void)
{
pg = list_entry(ent, struct pfn_info, list);
ent = ent->prev;
- p = map_domain_mem(page_to_phys(pg));
+ p = map_domain_page(page_to_pfn(pg));
clear_page(p);
- unmap_domain_mem(p);
+ unmap_domain_page(p);
free_heap_pages(MEMZONE_DOM, pg, 0);
}
} while ( (NOW() - start) < MILLISECS(1) );
@@ -678,9 +681,17 @@ static void page_scrub_softirq(void)
static __init int page_scrub_init(void)
{
- spin_lock_init(&page_scrub_lock);
- INIT_LIST_HEAD(&page_scrub_list);
open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
return 0;
}
__initcall(page_scrub_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/perfc.c b/xen/common/perfc.c
index d5384c4af5..7363fb98c7 100644
--- a/xen/common/perfc.c
+++ b/xen/common/perfc.c
@@ -4,6 +4,7 @@
#include <xen/time.h>
#include <xen/perfc.h>
#include <xen/keyhandler.h>
+#include <xen/spinlock.h>
#include <public/dom0_ops.h>
#include <asm/uaccess.h>
@@ -22,7 +23,7 @@
static struct {
char *name;
enum { TYPE_SINGLE, TYPE_CPU, TYPE_ARRAY,
- TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
+ TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
} type;
int nr_elements;
} perfc_info[] = {
@@ -31,7 +32,7 @@ static struct {
#define NR_PERFCTRS (sizeof(perfc_info) / sizeof(perfc_info[0]))
-struct perfcounter_t perfcounters;
+struct perfcounter perfcounters;
void perfc_printall(unsigned char key)
{
@@ -54,10 +55,11 @@ void perfc_printall(unsigned char key)
break;
case TYPE_CPU:
case TYPE_S_CPU:
- for ( j = sum = 0; j < smp_num_cpus; j++ )
+ sum = 0;
+ for_each_online_cpu ( j )
sum += atomic_read(&counters[j]);
printk("TOTAL[%10d] ", sum);
- for ( j = 0; j < smp_num_cpus; j++ )
+ for_each_online_cpu ( j )
printk("CPU%02d[%10d] ", j, atomic_read(&counters[j]));
counters += NR_CPUS;
break;
@@ -66,8 +68,14 @@ void perfc_printall(unsigned char key)
for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
sum += atomic_read(&counters[j]);
printk("TOTAL[%10d] ", sum);
+#ifdef PERF_ARRAYS
for ( j = 0; j < perfc_info[i].nr_elements; j++ )
+ {
+ if ( (j != 0) && ((j % 4) == 0) )
+ printk("\n ");
printk("ARR%02d[%10d] ", j, atomic_read(&counters[j]));
+ }
+#endif
counters += j;
break;
}
@@ -77,7 +85,7 @@ void perfc_printall(unsigned char key)
void perfc_reset(unsigned char key)
{
- int i, j, sum;
+ int i, j;
s_time_t now = NOW();
atomic_t *counters = (atomic_t *)&perfcounters;
@@ -92,19 +100,19 @@ void perfc_reset(unsigned char key)
switch ( perfc_info[i].type )
{
case TYPE_SINGLE:
- atomic_set(&counters[0],0);
+ atomic_set(&counters[0],0);
case TYPE_S_SINGLE:
counters += 1;
break;
case TYPE_CPU:
- for ( j = sum = 0; j < smp_num_cpus; j++ )
- atomic_set(&counters[j],0);
+ for ( j = 0; j < NR_CPUS; j++ )
+ atomic_set(&counters[j],0);
case TYPE_S_CPU:
counters += NR_CPUS;
break;
case TYPE_ARRAY:
- for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
- atomic_set(&counters[j],0);
+ for ( j = 0; j < NR_CPUS; j++ )
+ atomic_set(&counters[j],0);
case TYPE_S_ARRAY:
counters += perfc_info[i].nr_elements;
break;
@@ -139,7 +147,7 @@ static int perfc_copy_info(dom0_perfc_desc_t *desc)
break;
case TYPE_CPU:
case TYPE_S_CPU:
- perfc_d[i].nr_vals = smp_num_cpus;
+ perfc_d[i].nr_vals = num_online_cpus();
break;
case TYPE_ARRAY:
case TYPE_S_ARRAY:
@@ -216,3 +224,13 @@ int perfc_control(dom0_perfccontrol_t *pc)
return rc;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/physdev.c b/xen/common/physdev.c
deleted file mode 100644
index ff79c72b28..0000000000
--- a/xen/common/physdev.c
+++ /dev/null
@@ -1,746 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (c) 2004 - Rolf Neugebauer - Intel Research Cambridge
- * (c) 2004 - Keir Fraser - University of Cambridge
- ****************************************************************************
- *
- * Description: allows a domain to access devices on the PCI bus
- *
- * A guest OS may be given access to particular devices on the PCI bus.
- * For each domain a list of PCI devices is maintained, describing the
- * access mode for the domain.
- *
- * Guests can figure out the virtualised PCI space through normal PCI config
- * register access. Some of the accesses, in particular write accesses, are
- * faked. For example the sequence for detecting the IO regions, which requires
- * writes to determine the size of the region, is faked out by a very simple
- * state machine, preventing direct writes to the PCI config registers by a
- * guest.
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <xen/pci.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-#include <asm/pci.h>
-#include <public/xen.h>
-#include <public/physdev.h>
-
-/* Called by PHYSDEV_PCI_INITIALISE_DEVICE to finalise IRQ routing. */
-extern void pcibios_enable_irq(struct pci_dev *dev);
-
-#if 0
-#define VERBOSE_INFO(_f, _a...) printk( _f , ## _a )
-#else
-#define VERBOSE_INFO(_f, _a...) ((void)0)
-#endif
-
-#ifdef VERBOSE
-#define INFO(_f, _a...) printk( _f, ## _a )
-#else
-#define INFO(_f, _a...) ((void)0)
-#endif
-
-#define SLOPPY_CHECKING
-
-#define ACC_READ 1
-#define ACC_WRITE 2
-
-/* Upper bounds for PCI-device addressing. */
-#define PCI_BUSMAX 255
-#define PCI_DEVMAX 31
-#define PCI_FUNCMAX 7
-#define PCI_REGMAX 255
-
-/* Bit offsets into state. */
-#define ST_BASE_ADDRESS 0 /* bits 0-5: are for base address access */
-#define ST_ROM_ADDRESS 6 /* bit 6: is for rom address access */
-
-typedef struct _phys_dev_st {
- int flags; /* flags for access etc */
- struct pci_dev *dev; /* the device */
- struct list_head node; /* link to the list */
- struct domain *owner; /* 'owner of this device' */
- int state; /* state for various checks */
-} phys_dev_t;
-
-
-/* Find a device on a per-domain device list. */
-static phys_dev_t *find_pdev(struct domain *p, struct pci_dev *dev)
-{
- phys_dev_t *t, *res = NULL;
-
- list_for_each_entry ( t, &p->pcidev_list, node )
- {
- if ( dev == t->dev )
- {
- res = t;
- break;
- }
- }
- return res;
-}
-
-/* Add a device to a per-domain device-access list. */
-static void add_dev_to_task(struct domain *p,
- struct pci_dev *dev, int acc)
-{
- phys_dev_t *pdev;
-
- if ( (pdev = find_pdev(p, dev)) )
- {
- /* Sevice already on list: update access permissions. */
- pdev->flags = acc;
- return;
- }
-
- if ( (pdev = xmalloc(sizeof(phys_dev_t))) == NULL )
- {
- INFO("Error allocating pdev structure.\n");
- return;
- }
-
- pdev->dev = dev;
- pdev->flags = acc;
- pdev->state = 0;
- list_add(&pdev->node, &p->pcidev_list);
-
- if ( acc == ACC_WRITE )
- pdev->owner = p;
-}
-
-/*
- * physdev_pci_access_modify:
- * Allow/disallow access to a specific PCI device. Guests should not be
- * allowed to see bridge devices as it needlessly complicates things (one
- * possible exception to this is the AGP bridge). If the given device is a
- * bridge, then the domain should get access to all the leaf devices below
- * that bridge (XXX this is unimplemented!).
- */
-int physdev_pci_access_modify(
- domid_t dom, int bus, int dev, int func, int enable)
-{
- struct domain *p;
- struct pci_dev *pdev;
- int i, j, rc = 0;
-
- if ( !IS_PRIV(current) )
- BUG();
-
- if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) )
- return -EINVAL;
-
- if ( !enable )
- {
- INFO("Disallowing access is not yet supported.\n");
- return -EINVAL;
- }
-
- INFO("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
-
- if ( (p = find_domain_by_id(dom)) == NULL )
- return -ESRCH;
-
- /* Make the domain privileged. */
- set_bit(DF_PHYSDEV, &p->flags);
- /* FIXME: MAW for now make the domain REALLY privileged so that it
- * can run a backend driver (hw access should work OK otherwise) */
- set_bit(DF_PRIVILEGED, &p->flags);
-
- /* Grant write access to the specified device. */
- if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
- {
- INFO(" dev does not exist\n");
- rc = -ENODEV;
- goto out;
- }
- add_dev_to_task(p, pdev, ACC_WRITE);
-
- INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
-
- /* Is the device a bridge or cardbus? */
- if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
- INFO("XXX can't give access to bridge devices yet\n");
-
- /* Now, setup access to the IO ports and memory regions for the device. */
-
- if ( p->thread.io_bitmap == NULL )
- {
- if ( (p->thread.io_bitmap = xmalloc(IOBMP_BYTES)) == NULL )
- {
- rc = -ENOMEM;
- goto out;
- }
- memset(p->thread.io_bitmap, 0xFF, IOBMP_BYTES);
-
- p->thread.io_bitmap_sel = ~0ULL;
- }
-
- for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
- {
- struct resource *r = &pdev->resource[i];
-
- if ( r->flags & IORESOURCE_IO )
- {
- /* Give the domain access to the IO ports it needs. Currently,
- * this will allow all processes in that domain access to those
- * ports as well. This will do for now, since driver domains don't
- * run untrusted processes! */
- INFO("Giving domain %u IO resources (%lx - %lx) "
- "for device %s\n", dom, r->start, r->end, pdev->slot_name);
- for ( j = r->start; j < r->end + 1; j++ )
- {
- clear_bit(j, p->thread.io_bitmap);
- clear_bit(j / IOBMP_BITS_PER_SELBIT, &p->thread.io_bitmap_sel);
- }
- }
-
- /* rights to IO memory regions are checked when the domain maps them */
- }
- out:
- put_domain(p);
- return rc;
-}
-
-/* Check if a domain controls a device with IO memory within frame @pfn.
- * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise. */
-int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
-{
- int ret = 0;
- phys_dev_t *phys_dev;
-
- VERBOSE_INFO("Checking if physdev-capable domain %u needs access to "
- "pfn %08lx\n", p->id, pfn);
-
- spin_lock(&p->pcidev_lock);
-
- list_for_each_entry ( phys_dev, &p->pcidev_list, node )
- {
- int i;
- struct pci_dev *pci_dev = phys_dev->dev;
-
- for ( i = 0; (i < DEVICE_COUNT_RESOURCE) && (ret == 0); i++ )
- {
- struct resource *r = &pci_dev->resource[i];
-
- if ( r->flags & IORESOURCE_MEM )
- if ( (r->start >> PAGE_SHIFT) == pfn
- || (r->end >> PAGE_SHIFT) == pfn
- || ((r->start >> PAGE_SHIFT < pfn)
- && (r->end >> PAGE_SHIFT > pfn)) )
- ret = 1;
- }
-
- if ( ret != 0 ) break;
- }
-
- spin_unlock(&p->pcidev_lock);
-
- VERBOSE_INFO("Domain %u %s mapping of pfn %08lx\n",
- p->id, ret ? "allowed" : "disallowed", pfn);
-
- return ret;
-}
-
-/* check if a domain has general access to a device */
-inline static int check_dev_acc (struct domain *p,
- int bus, int dev, int func,
- phys_dev_t **pdev)
-{
- struct pci_dev *target_dev;
- phys_dev_t *target_pdev;
- unsigned int target_devfn;
-
- *pdev = NULL;
-
- if ( !IS_CAPABLE_PHYSDEV(p) )
- return -EPERM; /* no pci access permission */
-
- if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
- return -EINVAL;
-
- VERBOSE_INFO("b=%x d=%x f=%x ", bus, dev, func);
-
- /* check target device */
- target_devfn = PCI_DEVFN(dev, func);
- target_dev = pci_find_slot(bus, target_devfn);
- if ( !target_dev )
- {
- VERBOSE_INFO("target does not exist\n");
- return -ENODEV;
- }
-
- /* check access */
- target_pdev = find_pdev(p, target_dev);
- if ( !target_pdev )
- {
- VERBOSE_INFO("dom has no access to target\n");
- return -EPERM;
- }
-
- *pdev = target_pdev;
- return 0;
-}
-
-#ifndef SLOPPY_CHECKING
-/*
- * Base address registers contain the base address for IO regions.
- * The length can be determined by writing all 1s to the register and
- * reading the value again. The device will zero the lower unused bits.
- *
- * to work out the length of the io region a device probe typically does:
- * 1) a = read_base_addr_reg()
- * 2) write_base_addr_reg(0xffffffff)
- * 3) b = read_base_addr_reg() [device zeros lower bits]
- * 4) write_base_addr_reg(a) [restore original value]
- * this function fakes out step 2-4. *no* writes are made to the device.
- *
- * phys_dev_t contains a bit field (a bit for each base address register).
- * if the bit for a register is set the guest had writen all 1s to the
- * register and subsequent read request need to fake out the b.
- * if the guest restores the original value (step 4 above) the bit is
- * cleared again. If the guest attempts to "restores" a wrong value an
- * error is flagged.
- */
-static int do_base_address_access(phys_dev_t *pdev, int acc, int idx,
- int len, u32 *val)
-{
- int st_bit, reg = PCI_BASE_ADDRESS_0 + (idx*4), ret = -EINVAL;
- struct pci_dev *dev = pdev->dev;
- u32 orig_val, sz;
- struct resource *res;
-
- if ( len != sizeof(u32) )
- {
- /* This isn't illegal, but there doesn't seem to be a very good reason
- * to do it for normal devices (bridges are another matter). Since it
- * would complicate the code below, we don't support this for now. */
-
- /* We could set *val to some value but the guest may well be in trouble
- * anyway if this write fails. Hopefully the printk will give us a
- * clue what went wrong. */
- INFO("Guest %u attempting sub-dword %s to BASE_ADDRESS %d\n",
- pdev->owner->id, (acc == ACC_READ) ? "read" : "write", idx);
-
- return -EPERM;
- }
-
- st_bit = idx + ST_BASE_ADDRESS;
- res = &(pdev->dev->resource[idx]);
-
- if ( acc == ACC_WRITE )
- {
- if ( (*val == 0xffffffff) ||
- ((res->flags & IORESOURCE_IO) && (*val == 0xffff)) )
- {
- /* Set bit and return. */
- set_bit(st_bit, &pdev->state);
- ret = 0;
- }
- else
- {
- /* Assume guest wants to set the base address. */
- clear_bit(st_bit, &pdev->state);
-
- /* check if guest tries to restore orig value */
- ret = pci_read_config_dword(dev, reg, &orig_val);
- if ( (ret == 0) && (*val != orig_val) )
- {
- INFO("Guest attempting update to BASE_ADDRESS %d\n", idx);
- ret = -EPERM;
- }
- }
- VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %x\n",
- dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
- }
- else if ( acc == ACC_READ )
- {
- ret = pci_read_config_dword(dev, reg, val);
- if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
- {
- /* Cook the value. */
- sz = res->end - res->start;
- if ( res->flags & IORESOURCE_MEM )
- {
- /* this is written out explicitly for clarity */
- *val = 0xffffffff;
- /* bit 0 = 0 */
- /* bit 21 = memory type */
- /* bit 3 = prefetchable */
- /* bit 4-31 width */
- sz = sz >> 4; /* size in blocks of 16 byte */
- sz = ~sz; /* invert */
- *val = *val & (sz << 4); /* and in the size */
- /* use read values for low 4 bits */
- *val = *val | (orig_val & 0xf);
- }
- else if ( res->flags & IORESOURCE_IO )
- {
- *val = 0x0000ffff;
- /* bit 10 = 01 */
- /* bit 2-31 width */
- sz = sz >> 2; /* size in dwords */
- sz = ~sz & 0x0000ffff;
- *val = *val & (sz << 2);
- *val = *val | 0x1;
- }
- }
- VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %x\n",
- dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
- }
-
- return ret;
-}
-
-
-static int do_rom_address_access(phys_dev_t *pdev, int acc, int len, u32 *val)
-{
- int st_bit, ret = -EINVAL;
- struct pci_dev *dev = pdev->dev;
- u32 orig_val, sz;
- struct resource *res;
-
- if ( len != sizeof(u32) )
- {
- INFO("Guest attempting sub-dword %s to ROM_ADDRESS\n",
- (acc == ACC_READ) ? "read" : "write");
- return -EPERM;
- }
-
- st_bit = ST_ROM_ADDRESS;
- res = &(pdev->dev->resource[PCI_ROM_RESOURCE]);
-
- if ( acc == ACC_WRITE )
- {
- if ( (*val == 0xffffffff) || (*val == 0xfffffffe) )
- {
- /* NB. 0xffffffff would be unusual, but we trap it anyway. */
- set_bit(st_bit, &pdev->state);
- ret = 0;
- }
- else
- {
- /* Assume guest wants simply to set the base address. */
- clear_bit(st_bit, &pdev->state);
-
- /* Check if guest tries to restore the original value. */
- ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, &orig_val);
- if ( (ret == 0) && (*val != orig_val) )
- {
- if ( (*val != 0x00000000) )
- {
- INFO("caution: guest tried to change rom address.\n");
- ret = -EPERM;
- }
- else
- {
- INFO("guest disabled rom access for %02x:%02x:%02x\n",
- dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn));
- }
- }
- }
- VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %x\n",
- dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), PCI_ROM_ADDRESS, len, *val, pdev->state);
- }
- else if ( acc == ACC_READ )
- {
- ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, val);
- if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
- {
- /* Cook the value. */
- sz = res->end - res->start;
- *val = 0xffffffff;
- /* leave bit 0 untouched */
- /* bit 1-10 reserved, harwired to 0 */
- sz = sz >> 11; /* size is in 2KB blocks */
- sz = ~sz;
- *val = *val & (sz << 11);
- *val = *val | (orig_val & 0x1);
- }
- VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %x\n",
- dev->bus->number, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), PCI_ROM_ADDRESS, len, *val, pdev->state);
- }
-
- return ret;
-
-}
-#endif /* SLOPPY_CHECKING */
-
-/*
- * Handle a PCI config space read access if the domain has access privileges.
- */
-static long pci_cfgreg_read(int bus, int dev, int func, int reg,
- int len, u32 *val)
-{
- int ret;
- phys_dev_t *pdev;
-
- if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
- {
- /* PCI spec states that reads from non-existent devices should return
- * all 1s. In this case the domain has no read access, which should
- * also look like the device is non-existent. */
- *val = 0xFFFFFFFF;
- return ret;
- }
-
- /* Fake out read requests for some registers. */
- switch ( reg )
- {
-#ifndef SLOPPY_CHECKING
- case PCI_BASE_ADDRESS_0:
- ret = do_base_address_access(pdev, ACC_READ, 0, len, val);
- break;
-
- case PCI_BASE_ADDRESS_1:
- ret = do_base_address_access(pdev, ACC_READ, 1, len, val);
- break;
-
- case PCI_BASE_ADDRESS_2:
- ret = do_base_address_access(pdev, ACC_READ, 2, len, val);
- break;
-
- case PCI_BASE_ADDRESS_3:
- ret = do_base_address_access(pdev, ACC_READ, 3, len, val);
- break;
-
- case PCI_BASE_ADDRESS_4:
- ret = do_base_address_access(pdev, ACC_READ, 4, len, val);
- break;
-
- case PCI_BASE_ADDRESS_5:
- ret = do_base_address_access(pdev, ACC_READ, 5, len, val);
- break;
-
- case PCI_ROM_ADDRESS:
- ret = do_rom_address_access(pdev, ACC_READ, len, val);
- break;
-#endif
-
- case PCI_INTERRUPT_LINE:
- *val = pdev->dev->irq;
- ret = 0;
- break;
-
- default:
- ret = pci_config_read(0, bus, dev, func, reg, len, val);
- VERBOSE_INFO("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x "
- "val=0x%08x\n", bus, dev, func, reg, len, *val);
- break;
- }
-
- return ret;
-}
-
-
-/*
- * Handle a PCI config space write access if the domain has access privileges.
- */
-static long pci_cfgreg_write(int bus, int dev, int func, int reg,
- int len, u32 val)
-{
- int ret;
- phys_dev_t *pdev;
-
- if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
- return ret;
-
- /* special treatment for some registers */
- switch (reg)
- {
-#ifndef SLOPPY_CHECKING
- case PCI_BASE_ADDRESS_0:
- ret = do_base_address_access(pdev, ACC_WRITE, 0, len, &val);
- break;
-
- case PCI_BASE_ADDRESS_1:
- ret = do_base_address_access(pdev, ACC_WRITE, 1, len, &val);
- break;
-
- case PCI_BASE_ADDRESS_2:
- ret = do_base_address_access(pdev, ACC_WRITE, 2, len, &val);
- break;
-
- case PCI_BASE_ADDRESS_3:
- ret = do_base_address_access(pdev, ACC_WRITE, 3, len, &val);
- break;
-
- case PCI_BASE_ADDRESS_4:
- ret = do_base_address_access(pdev, ACC_WRITE, 4, len, &val);
- break;
-
- case PCI_BASE_ADDRESS_5:
- ret = do_base_address_access(pdev, ACC_WRITE, 5, len, &val);
- break;
-
- case PCI_ROM_ADDRESS:
- ret = do_rom_address_access(pdev, ACC_WRITE, len, &val);
- break;
-#endif
-
- default:
- if ( pdev->flags != ACC_WRITE )
- {
- INFO("pci write not allowed %02x:%02x:%02x: "
- "reg=0x%02x len=0x%02x val=0x%08x\n",
- bus, dev, func, reg, len, val);
- ret = -EPERM;
- }
- else
- {
- ret = pci_config_write(0, bus, dev, func, reg, len, val);
- VERBOSE_INFO("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x "
- "val=0x%08x\n", bus, dev, func, reg, len, val);
- }
- break;
- }
-
- return ret;
-}
-
-
-static long pci_probe_root_buses(u32 *busmask)
-{
- phys_dev_t *pdev;
-
- memset(busmask, 0, 256/8);
-
- list_for_each_entry ( pdev, &current->pcidev_list, node )
- set_bit(pdev->dev->bus->number, busmask);
-
- return 0;
-}
-
-
-/*
- * Demuxing hypercall.
- */
-long do_physdev_op(physdev_op_t *uop)
-{
- phys_dev_t *pdev;
- physdev_op_t op;
- long ret;
- int irq;
-
- if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
- return -EFAULT;
-
- switch ( op.cmd )
- {
- case PHYSDEVOP_PCI_CFGREG_READ:
- ret = pci_cfgreg_read(op.u.pci_cfgreg_read.bus,
- op.u.pci_cfgreg_read.dev,
- op.u.pci_cfgreg_read.func,
- op.u.pci_cfgreg_read.reg,
- op.u.pci_cfgreg_read.len,
- &op.u.pci_cfgreg_read.value);
- break;
-
- case PHYSDEVOP_PCI_CFGREG_WRITE:
- ret = pci_cfgreg_write(op.u.pci_cfgreg_write.bus,
- op.u.pci_cfgreg_write.dev,
- op.u.pci_cfgreg_write.func,
- op.u.pci_cfgreg_write.reg,
- op.u.pci_cfgreg_write.len,
- op.u.pci_cfgreg_write.value);
- break;
-
- case PHYSDEVOP_PCI_INITIALISE_DEVICE:
- if ( (ret = check_dev_acc(current,
- op.u.pci_initialise_device.bus,
- op.u.pci_initialise_device.dev,
- op.u.pci_initialise_device.func,
- &pdev)) == 0 )
- pcibios_enable_irq(pdev->dev);
- break;
-
- case PHYSDEVOP_PCI_PROBE_ROOT_BUSES:
- ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask);
- break;
-
- case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
- ret = pirq_guest_unmask(current);
- break;
-
- case PHYSDEVOP_IRQ_STATUS_QUERY:
- irq = op.u.irq_status_query.irq;
- ret = -EINVAL;
- if ( (irq < 0) || (irq >= NR_IRQS) )
- break;
- op.u.irq_status_query.flags = 0;
- /* Edge-triggered interrupts don't need an explicit unmask downcall. */
- if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL )
- op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
- ret = 0;
- break;
-
- default:
- ret = -EINVAL;
- break;
- }
-
- copy_to_user(uop, &op, sizeof(op));
- return ret;
-}
-
-/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
-/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
-static char opt_physdev_dom0_hide[200] = "";
-string_param("physdev_dom0_hide", opt_physdev_dom0_hide);
-
-/* Test if boot params specify this device should NOT be visible to DOM0
- * (e.g. so that another domain can control it instead) */
-static int pcidev_dom0_hidden(struct pci_dev *dev)
-{
- char cmp[10] = "(.......)";
-
- strncpy(&cmp[1], dev->slot_name, 7);
-
- if ( strstr(opt_physdev_dom0_hide, dev->slot_name) == NULL )
- return 0;
-
- return 1;
-}
-
-
-/* Domain 0 has read access to all devices. */
-void physdev_init_dom0(struct domain *p)
-{
- struct pci_dev *dev;
- phys_dev_t *pdev;
-
- INFO("Give DOM0 read access to all PCI devices\n");
-
- pci_for_each_dev(dev)
- {
- if ( pcidev_dom0_hidden(dev) )
- {
- printk("Hiding PCI device %s from DOM0\n", dev->slot_name);
- continue;
- }
-
- pdev = xmalloc(sizeof(phys_dev_t));
- pdev->dev = dev;
- pdev->flags = ACC_WRITE;
- pdev->state = 0;
- pdev->owner = p;
- list_add(&pdev->node, &p->pcidev_list);
- }
-
- set_bit(DF_PHYSDEV, &p->flags);
-}
-
diff --git a/xen/common/resource.c b/xen/common/resource.c
deleted file mode 100644
index 3ce6d89c97..0000000000
--- a/xen/common/resource.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * linux/kernel/resource.c
- *
- * Copyright (C) 1999 Linus Torvalds
- * Copyright (C) 1999 Martin Mares <mj@ucw.cz>
- *
- * Arbitrary resource management.
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/errno.h>
-#include <xen/ioport.h>
-#include <xen/init.h>
-#include <xen/slab.h>
-#include <xen/spinlock.h>
-#include <asm/io.h>
-
-struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO };
-struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM };
-
-static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
-
-/*
- * This generates reports for /proc/ioports and /proc/iomem
- */
-static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
-{
- if (offset < 0)
- offset = 0;
-
- while (entry) {
- const char *name = entry->name;
- unsigned long from, to;
-
- if ((int) (end-buf) < 80)
- return buf;
-
- from = entry->start;
- to = entry->end;
- if (!name)
- name = "<BAD>";
-
- buf += sprintf(buf, fmt + offset, from, to, name);
- if (entry->child)
- buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
- entry = entry->sibling;
- }
-
- return buf;
-}
-
-int get_resource_list(struct resource *root, char *buf, int size)
-{
- char *fmt;
- int retval;
-
- fmt = " %08lx-%08lx : %s\n";
- if (root->end < 0x10000)
- fmt = " %04lx-%04lx : %s\n";
- read_lock(&resource_lock);
- retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
- read_unlock(&resource_lock);
- return retval;
-}
-
-/* Return the conflict entry if you can't request it */
-static struct resource * __request_resource(struct resource *root, struct resource *new)
-{
- unsigned long start = new->start;
- unsigned long end = new->end;
- struct resource *tmp, **p;
-
- if (end < start)
- return root;
- if (start < root->start)
- return root;
- if (end > root->end)
- return root;
- p = &root->child;
- for (;;) {
- tmp = *p;
- if (!tmp || tmp->start > end) {
- new->sibling = tmp;
- *p = new;
- new->parent = root;
- return NULL;
- }
- p = &tmp->sibling;
- if (tmp->end < start)
- continue;
- return tmp;
- }
-}
-
-static int __release_resource(struct resource *old)
-{
- struct resource *tmp, **p;
-
- p = &old->parent->child;
- for (;;) {
- tmp = *p;
- if (!tmp)
- break;
- if (tmp == old) {
- *p = tmp->sibling;
- old->parent = NULL;
- return 0;
- }
- p = &tmp->sibling;
- }
- return -EINVAL;
-}
-
-int request_resource(struct resource *root, struct resource *new)
-{
- struct resource *conflict;
-
- write_lock(&resource_lock);
- conflict = __request_resource(root, new);
- write_unlock(&resource_lock);
- return conflict ? -EBUSY : 0;
-}
-
-int release_resource(struct resource *old)
-{
- int retval;
-
- write_lock(&resource_lock);
- retval = __release_resource(old);
- write_unlock(&resource_lock);
- return retval;
-}
-
-int check_resource(struct resource *root, unsigned long start, unsigned long len)
-{
- struct resource *conflict, tmp;
-
- tmp.start = start;
- tmp.end = start + len - 1;
- write_lock(&resource_lock);
- conflict = __request_resource(root, &tmp);
- if (!conflict)
- __release_resource(&tmp);
- write_unlock(&resource_lock);
- return conflict ? -EBUSY : 0;
-}
-
-/*
- * Find empty slot in the resource tree given range and alignment.
- */
-static int find_resource(struct resource *root, struct resource *new,
- unsigned long size,
- unsigned long min, unsigned long max,
- unsigned long align,
- void (*alignf)(void *, struct resource *,
- unsigned long, unsigned long),
- void *alignf_data)
-{
- struct resource *this = root->child;
-
- new->start = root->start;
- for(;;) {
- if (this)
- new->end = this->start;
- else
- new->end = root->end;
- if (new->start < min)
- new->start = min;
- if (new->end > max)
- new->end = max;
- new->start = (new->start + align - 1) & ~(align - 1);
- if (alignf)
- alignf(alignf_data, new, size, align);
- if (new->start < new->end && new->end - new->start + 1 >= size) {
- new->end = new->start + size - 1;
- return 0;
- }
- if (!this)
- break;
- new->start = this->end + 1;
- this = this->sibling;
- }
- return -EBUSY;
-}
-
-/*
- * Allocate empty slot in the resource tree given range and alignment.
- */
-int allocate_resource(struct resource *root, struct resource *new,
- unsigned long size,
- unsigned long min, unsigned long max,
- unsigned long align,
- void (*alignf)(void *, struct resource *,
- unsigned long, unsigned long),
- void *alignf_data)
-{
- int err;
-
- write_lock(&resource_lock);
- err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
- if (err >= 0 && __request_resource(root, new))
- err = -EBUSY;
- write_unlock(&resource_lock);
- return err;
-}
-
-/*
- * This is compatibility stuff for IO resources.
- *
- * Note how this, unlike the above, knows about
- * the IO flag meanings (busy etc).
- *
- * Request-region creates a new busy region.
- *
- * Check-region returns non-zero if the area is already busy
- *
- * Release-region releases a matching busy region.
- */
-struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
-{
- struct resource *res = xmalloc(sizeof(*res));
-
- if (res) {
- memset(res, 0, sizeof(*res));
- res->name = name;
- res->start = start;
- res->end = start + n - 1;
- res->flags = IORESOURCE_BUSY;
-
- write_lock(&resource_lock);
-
- for (;;) {
- struct resource *conflict;
-
- conflict = __request_resource(parent, res);
- if (!conflict)
- break;
- if (conflict != parent) {
- parent = conflict;
- if (!(conflict->flags & IORESOURCE_BUSY))
- continue;
- }
-
- /* Uhhuh, that didn't work out.. */
- xfree(res);
- res = NULL;
- break;
- }
- write_unlock(&resource_lock);
- }
- return res;
-}
-
-void __release_region(struct resource *parent, unsigned long start, unsigned long n)
-{
- struct resource **p;
- unsigned long end;
-
- p = &parent->child;
- end = start + n - 1;
-
- for (;;) {
- struct resource *res = *p;
-
- if (!res)
- break;
- if (res->start <= start && res->end >= end) {
- if (!(res->flags & IORESOURCE_BUSY)) {
- p = &res->child;
- continue;
- }
- if (res->start != start || res->end != end)
- break;
- *p = res->sibling;
- xfree(res);
- return;
- }
- p = &res->sibling;
- }
- printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
-}
-
-
-#if 0
-/*
- * Called from init/main.c to reserve IO ports.
- */
-#define MAXRESERVE 4
-static int __init reserve_setup(char *str)
-{
- static int reserved = 0;
- static struct resource reserve[MAXRESERVE];
-
- for (;;) {
- int io_start, io_num;
- int x = reserved;
-
- if (get_option (&str, &io_start) != 2)
- break;
- if (get_option (&str, &io_num) == 0)
- break;
- if (x < MAXRESERVE) {
- struct resource *res = reserve + x;
- res->name = "reserved";
- res->start = io_start;
- res->end = io_start + io_num - 1;
- res->flags = IORESOURCE_BUSY;
- res->child = NULL;
- if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
- reserved = x+1;
- }
- }
- return 1;
-}
-
-__setup("reserve=", reserve_setup);
-#endif
diff --git a/xen/common/sched_atropos.c b/xen/common/sched_atropos.c
deleted file mode 100644
index b5901f2397..0000000000
--- a/xen/common/sched_atropos.c
+++ /dev/null
@@ -1,691 +0,0 @@
-/*
- * atropos.c
- * ---------
- *
- * Copyright (c) 1994 University of Cambridge Computer Laboratory.
- * This is part of Nemesis; consult your contract for terms and conditions.
- *
- * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $
- *
- * This is the "atropos" CPU scheduler.
- */
-
-/* Ported to Xen's generic scheduler interface by Mark Williamson
- * these modifications are (C) 2004 Intel Research Cambridge
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/time.h>
-#include <xen/sched.h>
-#include <xen/sched-if.h>
-#include <public/sched_ctl.h>
-#include <xen/trace.h>
-
-#define ATROPOS_TASK_UNBLOCKED 16
-#define ATROPOS_TASK_WAIT 32
-#define ATROPOS_TASK_BLOCKED 48
-
-/* Atropos-specific per-domain data */
-struct at_dom_info
-{
- /* MAW Xen additions */
- struct domain *owner; /* the domain this data belongs to */
- struct list_head run_list; /* runqueue */
- struct list_head waitq; /* wait queue */
-
- /* (what remains of) the original fields */
-
- s_time_t deadline; /* Next deadline */
- s_time_t prevddln; /* Previous deadline */
-
- s_time_t remain; /* Time remaining this period */
- s_time_t period; /* Current period of time allocation */
- s_time_t nat_period; /* Natural period */
- s_time_t slice; /* Current length of allocation */
- s_time_t nat_slice; /* Natural length of allocation */
- s_time_t latency; /* Unblocking latency */
-
- int xtratime; /* Prepared to accept extra time? */
- int state; /* Keeps Atropos domain state */
-};
-
-/* Atropos-specific per-CPU data */
-struct at_cpu_info
-{
- struct list_head runq;
- struct list_head waitq;
-};
-
-
-#define DOM_INFO(_p) ((struct at_dom_info *)((_p)->sched_priv))
-#define CPU_INFO(_c) ((struct at_cpu_info *)((schedule_data[_c]).sched_priv))
-#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
-#define RUNQ(cpu) (&CPU_INFO(cpu)->runq)
-#define RUNLIST(_d) (&DOM_INFO(_d)->run_list)
-
-#define BESTEFFORT_QUANTUM MILLISECS(5)
-
-static void at_dump_cpu_state(int cpu);
-
-static xmem_cache_t *dom_info_cache;
-
-static inline void __add_to_runqueue_head(struct domain *d)
-{
- list_add(RUNLIST(d), RUNQ(d->processor));
-}
-
-static inline void __add_to_runqueue_tail(struct domain *d)
-{
- list_add_tail(RUNLIST(d), RUNQ(d->processor));
-}
-
-static inline void __del_from_runqueue(struct domain *d)
-{
- struct list_head *runlist = RUNLIST(d);
- list_del(runlist);
- runlist->next = NULL;
-}
-
-static inline int __task_on_runqueue(struct domain *d)
-{
- return (RUNLIST(d))->next != NULL;
-}
-
-
-/** calculate the length of a linked list */
-static int q_len(struct list_head *q)
-{
- int i = 0;
- struct at_dom_info *tmp;
- list_for_each_entry ( tmp, q, waitq )
- i++;
- return i;
-}
-
-
-/** waitq_el - get the domain that owns a wait queue list element */
-static inline struct domain *waitq_el(struct list_head *l)
-{
- struct at_dom_info *inf;
- inf = list_entry(l, struct at_dom_info, waitq);
- return inf->owner;
-}
-
-
-/*
- * requeue
- *
- * Places the specified domain on the appropriate queue.
- * The wait queue is ordered by the time at which the domain
- * will receive more CPU time. If a domain has no guaranteed time
- * left then the domain will be placed on the WAIT queue until
- * its next period.
- *
- * Note that domains can be on the wait queue with remain > 0
- * as a result of being blocked for a short time.
- * These are scheduled in preference to domains with remain < 0
- * in an attempt to improve interactive performance.
- */
-static void requeue(struct domain *sdom)
-{
- struct at_dom_info *i, *inf = DOM_INFO(sdom);
-
- if ( !domain_runnable(sdom) )
- return;
-
- if ( (inf->state == ATROPOS_TASK_WAIT) ||
- (inf->state == ATROPOS_TASK_UNBLOCKED) )
- {
- list_for_each_entry ( i, WAITQ(sdom->processor), waitq )
- {
- if ( i->deadline > inf->deadline )
- {
- __list_add(&inf->waitq, i->waitq.prev, &i->waitq);
- break;
- }
- }
-
- if ( &i->waitq == WAITQ(sdom->processor) )
- list_add_tail(&inf->waitq, WAITQ(sdom->processor));
- }
- else if ( domain_runnable(sdom) )
- {
- list_for_each_entry ( i, RUNQ(sdom->processor), run_list )
- {
- if ( (i->deadline > inf->deadline) || is_idle_task(i->owner) )
- {
- __list_add(&inf->run_list, i->run_list.prev, &i->run_list);
- break;
- }
- }
-
- if ( &i->waitq == RUNQ(sdom->processor) )
- list_add_tail(&inf->run_list, RUNQ(sdom->processor));
- }
- /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc
- * - they shouldn't be on any queue */
-}
-
-/** at_alloc_task - allocate private info for a task */
-static int at_alloc_task(struct domain *p)
-{
- ASSERT(p != NULL);
-
- p->sched_priv = xmem_cache_alloc(dom_info_cache);
- if ( p->sched_priv == NULL )
- return -1;
-
- return 0;
-}
-
-
-/* prepare a task to be added to scheduling */
-static void at_add_task(struct domain *p)
-{
- s_time_t now = NOW();
-
- ASSERT( p->sched_priv != NULL );
-
- DOM_INFO(p)->owner = p;
- p->lastschd = now;
-
- /* DOM 0's parameters must be set here for it to boot the system! */
- if(p->id == 0)
- {
- DOM_INFO(p)->remain = MILLISECS(15);
- DOM_INFO(p)->nat_period =
- DOM_INFO(p)->period = MILLISECS(20);
- DOM_INFO(p)->nat_slice =
- DOM_INFO(p)->slice = MILLISECS(15);
- DOM_INFO(p)->latency = MILLISECS(5);
- DOM_INFO(p)->xtratime = 1;
- DOM_INFO(p)->deadline = now;
- DOM_INFO(p)->prevddln = now;
- }
- else /* other domains run basically best effort unless otherwise set */
- {
- DOM_INFO(p)->remain = 0;
- DOM_INFO(p)->nat_period =
- DOM_INFO(p)->period = SECONDS(10);
- DOM_INFO(p)->nat_slice =
- DOM_INFO(p)->slice = MILLISECS(10);
- DOM_INFO(p)->latency = SECONDS(10);
- DOM_INFO(p)->xtratime = 1;
- DOM_INFO(p)->deadline = now;
-// DOM_INFO(p)->deadline = now + SECONDS(10);
- DOM_INFO(p)->prevddln = 0;
- }
-
- INIT_LIST_HEAD(&(DOM_INFO(p)->run_list));
- INIT_LIST_HEAD(&(DOM_INFO(p)->waitq));
-}
-
-/**
- * dequeue - remove a domain from any queues it is on.
- * @sdom: the task to remove
- */
-static void dequeue(struct domain *sdom)
-{
- struct at_dom_info *inf = DOM_INFO(sdom);
-
- ASSERT(sdom->id != IDLE_DOMAIN_ID);
-
- /* just delete it from all the queues! */
- list_del(&inf->waitq);
- INIT_LIST_HEAD(&inf->waitq);
-
-
- if(__task_on_runqueue(sdom))
- __del_from_runqueue(sdom);
-}
-
-
-/*
- * unblock
- *
- * This function deals with updating the sdom for a domain
- * which has just been unblocked.
- *
- * Xen's Atropos treats unblocking slightly differently to Nemesis:
- *
- * - "Short blocking" domains (i.e. that unblock before their deadline has
- * expired) are treated the same as in nemesis (put on the wait queue and
- * given preferential treatment in selecting domains for extra time).
- *
- * - "Long blocking" domains do not simply have their period truncated to their
- * unblocking latency as before but also have their slice recomputed to be the
- * same fraction of their new period. Each time the domain is scheduled, the
- * period and slice are doubled until they reach their original ("natural")
- * values, as set by the user (and stored in nat_period and nat_slice). The
- * idea is to give better response times to unblocking whilst preserving QoS
- * guarantees to other domains.
- */
-static void unblock(struct domain *sdom)
-{
- s_time_t time = NOW();
- struct at_dom_info *inf = DOM_INFO(sdom);
-
- dequeue(sdom);
-
- /* We distinguish two cases... short and long blocks */
- if ( inf->deadline < time )
- {
- /* Long blocking case */
-
- /* The sdom has passed its deadline since it was blocked.
- Give it its new deadline based on the latency value. */
- inf->prevddln = time;
-
- /* Scale the scheduling parameters as requested by the latency hint. */
- inf->deadline = time + inf->latency;
- inf->slice = inf->nat_slice / ( inf->nat_period / inf->latency );
- inf->period = inf->latency;
- inf->remain = inf->slice;
- }
- else
- {
- /* Short blocking case */
-
- /* We leave REMAIN intact, but put this domain on the WAIT
- queue marked as recently unblocked. It will be given
- priority over other domains on the wait queue until while
- REMAIN>0 in a generous attempt to help it make up for its
- own foolishness. */
- if(inf->remain > 0)
- inf->state = ATROPOS_TASK_UNBLOCKED;
- else
- inf->state = ATROPOS_TASK_WAIT;
- }
-
- requeue(sdom);
-}
-
-
-static int at_init_idle_task(struct domain *p)
-{
- if(at_alloc_task(p) < 0) return -1;
-
- at_add_task(p);
-
- dequeue(p);
- requeue(p);
-
- return 0;
-}
-
-
-static void block(struct domain* sdom)
-{
- DOM_INFO(sdom)->state = ATROPOS_TASK_BLOCKED;
- dequeue(sdom);
- requeue(sdom);
-}
-
-
-/**
- * ATROPOS - main scheduler function
- */
-task_slice_t ksched_scheduler(s_time_t time)
-{
- struct domain *cur_sdom = current; /* Current sdom */
- s_time_t newtime;
- s_time_t ranfor; /* How long the domain ran */
- struct domain *sdom; /* tmp. scheduling domain */
- int cpu = cur_sdom->processor; /* current CPU */
- struct at_dom_info *cur_info;
- static unsigned long waitq_rrobin = 0;
- int i;
- task_slice_t ret;
-
-
- cur_info = DOM_INFO(cur_sdom);
-
- ASSERT( cur_sdom != NULL);
-
- /* If we were spinning in the idle loop, there is no current
- * domain to deschedule. */
- if (is_idle_task(cur_sdom))
- goto deschedule_done;
-
- /*****************************
- *
- * Deschedule the current scheduling domain
- *
- ****************************/
-
- /* Record the time the domain was preempted and for how long it
- ran. Work out if the domain is going to be blocked to save
- some pointless queue shuffling */
- cur_sdom->lastdeschd = time;
-
- ranfor = (time - cur_sdom->lastschd);
-
- dequeue(cur_sdom);
-
- if ( domain_runnable(cur_sdom) ||
- (cur_info->state == ATROPOS_TASK_UNBLOCKED) )
- {
-
- /* In this block, we are doing accounting for an sdom which has
- been running in contracted time. Note that this could now happen
- even if the domain is on the wait queue (i.e. if it blocked) */
-
- /* Deduct guaranteed time from the domain */
- cur_info->remain -= ranfor;
-
- /* If guaranteed time has run out... */
- if ( cur_info->remain <= 0 )
- {
- /* Move domain to correct position in WAIT queue */
- /* XXX sdom_unblocked doesn't need this since it is
- already in the correct place. */
- cur_info->state = ATROPOS_TASK_WAIT;
- }
- }
-
- requeue(cur_sdom);
-
- deschedule_done:
- /*****************************
- *
- * We have now successfully descheduled the current sdom.
- * The next task is the allocate CPU time to any sdom it is due to.
- *
- ****************************/
- cur_sdom = NULL;
-
- /*****************************
- *
- * Allocate CPU time to any waiting domains who have passed their
- * period deadline. If necessary, move them to run queue.
- *
- ****************************/
-
- while(!list_empty(WAITQ(cpu)) &&
- DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time )
- {
-
- struct at_dom_info *inf = DOM_INFO(sdom);
- dequeue(sdom);
-
- if ( inf->period != inf->nat_period )
- {
- /* This domain has had its parameters adjusted as a result of
- * unblocking and they need to be adjusted before requeuing it */
- inf->slice *= 2;
- inf->period *= 2;
-
- if ( inf->period > inf->nat_period )
- {
- inf->period = inf->nat_period;
- inf->slice = inf->nat_slice;
- }
- }
-
- /* Domain begins a new period and receives a slice of CPU
- * If this domain has been blocking then throw away the
- * rest of it's remain - it can't be trusted */
- if (inf->remain > 0)
- inf->remain = inf->slice;
- else
- inf->remain += inf->slice;
-
- inf->prevddln = inf->deadline;
- inf->deadline += inf->period;
-
- if ( inf->remain <= 0 )
- inf->state = ATROPOS_TASK_WAIT;
-
- /* Place on the appropriate queue */
- requeue(sdom);
- }
-
- /*****************************
- *
- * Next we need to pick an sdom to run.
- * If anything is actually 'runnable', we run that.
- * If nothing is, we pick a waiting sdom to run optimistically.
- * If there aren't even any of those, we have to spin waiting for an
- * event or a suitable time condition to happen.
- *
- ****************************/
-
- /* we guarantee there's always something on the runqueue */
- cur_info = list_entry(RUNQ(cpu)->next,
- struct at_dom_info, run_list);
-
- cur_sdom = cur_info->owner;
- newtime = time + cur_info->remain;
-
- /* MAW - the idle domain is always on the run queue. We run from the
- * runqueue if it's NOT the idle domain or if there's nothing on the wait
- * queue */
- if (cur_sdom->id == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu)))
- {
- struct at_dom_info *inf;
-
- /* Try running a domain on the WAIT queue - this part of the
- scheduler isn't particularly efficient but then again, we
- don't have any guaranteed domains to worry about. */
-
- /* See if there are any unblocked domains on the WAIT
- queue who we can give preferential treatment to. */
-
- list_for_each_entry ( inf, WAITQ(cpu), waitq )
- {
- sdom = inf->owner;
-
- if (inf->state == ATROPOS_TASK_UNBLOCKED)
- {
- cur_sdom = sdom;
- cur_info = inf;
- newtime = time + inf->remain;
- goto found;
- }
- }
-
- /* init values needed to approximate round-robin for slack time */
- i = 0;
- if ( waitq_rrobin >= q_len(WAITQ(cpu)))
- waitq_rrobin = 0;
-
-
- /* Last chance: pick a domain on the wait queue with the XTRA
- flag set. The NEXT_OPTM field is used to cheaply achieve
- an approximation of round-robin order */
- list_for_each_entry ( inf, WAITQ(cpu), waitq )
- {
- sdom = inf->owner;
-
- if (inf->xtratime && i >= waitq_rrobin)
- {
- cur_sdom = sdom;
- cur_info = inf;
- newtime = time + BESTEFFORT_QUANTUM;
- waitq_rrobin = i + 1; /* set this value ready for next */
- goto found;
- }
-
- i++;
- }
- }
-
- found:
- /**********************
- *
- * We now have to work out the time when we next need to
- * make a scheduling decision. We set the alarm timer
- * to cause an interrupt at that time.
- *
- **********************/
-
-#define MIN(x,y) ( ( x < y ) ? x : y )
-#define MAX(x,y) ( ( x > y ) ? x : y )
-
- /* If we might be able to run a waiting domain before this one has */
- /* exhausted its time, cut short the time allocation */
- if (!list_empty(WAITQ(cpu)))
- {
- newtime = MIN(newtime,
- DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline);
- }
-
- /* don't allow pointlessly small time slices */
- newtime = MAX(newtime, time + BESTEFFORT_QUANTUM);
-
- ret.task = cur_sdom;
- ret.time = newtime - time;
-
- TRACE_1D(0, cur_sdom->id);
-
- return ret;
-}
-
-
-/* set up some private data structures */
-static int at_init_scheduler()
-{
- int i;
-
- for ( i = 0; i < NR_CPUS; i++ )
- {
- schedule_data[i].sched_priv = xmalloc(sizeof(struct at_cpu_info));
- if ( schedule_data[i].sched_priv == NULL )
- return -1;
- INIT_LIST_HEAD(WAITQ(i));
- INIT_LIST_HEAD(RUNQ(i));
- }
-
- dom_info_cache = xmem_cache_create("Atropos dom info",
- sizeof(struct at_dom_info),
- 0, 0, NULL, NULL);
-
- return 0;
-}
-
-
-/* print relevant per-domain info for a run queue dump */
-static void at_dump_runq_el(struct domain *p)
-{
- printk("lastschd = %llu, xtratime = %d ",
- p->lastschd, DOM_INFO(p)->xtratime);
-}
-
-
-/* dump relevant per-cpu state for a run queue dump */
-static void at_dump_cpu_state(int cpu)
-{
- struct list_head *queue;
- int loop = 0;
- struct at_dom_info *d_inf;
- struct domain *d;
-
- queue = RUNQ(cpu);
- printk("\nRUNQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
-
- list_for_each_entry ( d_inf, queue, run_list )
- {
- d = d_inf->owner;
- printk("%3d: %d has=%c ", loop++, d->id,
- test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
- at_dump_runq_el(d);
- printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
- printk(" l: %lx n: %lx p: %lx\n",
- (unsigned long)&d_inf->run_list,
- (unsigned long)d_inf->run_list.next,
- (unsigned long)d_inf->run_list.prev);
- }
-
-
- queue = WAITQ(cpu);
- printk("\nWAITQUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
-
- list_for_each_entry ( d_inf, queue, waitq )
- {
- d = d_inf->owner;
- printk("%3d: %d has=%c ", loop++, d->id,
- test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
- at_dump_runq_el(d);
- printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
- printk(" l: %lx n: %lx p: %lx\n",
- (unsigned long)&d_inf->waitq,
- (unsigned long)d_inf->waitq.next,
- (unsigned long)d_inf->waitq.prev);
- }
-
-}
-
-/* set or fetch domain scheduling parameters */
-static int at_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
-{
- if ( cmd->direction == SCHED_INFO_PUT )
- {
- /* sanity checking! */
- if( cmd->u.atropos.latency > cmd->u.atropos.nat_period
- || cmd->u.atropos.latency == 0
- || cmd->u.atropos.nat_slice > cmd->u.atropos.nat_period )
- return -EINVAL;
-
- DOM_INFO(p)->nat_period = cmd->u.atropos.nat_period;
- DOM_INFO(p)->nat_slice = cmd->u.atropos.nat_slice;
- DOM_INFO(p)->latency = cmd->u.atropos.latency;
- DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime;
- }
- else if ( cmd->direction == SCHED_INFO_GET )
- {
- cmd->u.atropos.nat_period = DOM_INFO(p)->nat_period;
- cmd->u.atropos.nat_slice = DOM_INFO(p)->nat_slice;
- cmd->u.atropos.latency = DOM_INFO(p)->latency;
- cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime;
- }
-
- return 0;
-}
-
-/* free memory associated with a task */
-static void at_free_task(struct domain *p)
-{
- xmem_cache_free( dom_info_cache, DOM_INFO(p) );
-}
-
-
-/* print decoded domain private state value (if known) */
-static int at_prn_state(int state)
-{
- int ret = 0;
-
- switch(state)
- {
- case ATROPOS_TASK_UNBLOCKED:
- printk("Unblocked");
- break;
- case ATROPOS_TASK_WAIT:
- printk("Wait");
- break;
- default:
- ret = -1;
- }
-
- return ret;
-}
-
-struct scheduler sched_atropos_def = {
- .name = "Atropos Soft Real Time Scheduler",
- .opt_name = "atropos",
- .sched_id = SCHED_ATROPOS,
- .init_scheduler = at_init_scheduler,
- .init_idle_task = at_init_idle_task,
- .alloc_task = at_alloc_task,
- .add_task = at_add_task,
- .free_task = at_free_task,
- .wake = unblock,
- .sleep = block,
- .do_schedule = ksched_scheduler,
- .adjdom = at_adjdom,
- .dump_cpu_state = at_dump_cpu_state,
- .prn_state = at_prn_state,
-};
diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c
index edad59ca18..c37b4bf2f9 100644
--- a/xen/common/sched_bvt.c
+++ b/xen/common/sched_bvt.c
@@ -1,5 +1,4 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
+/****************************************************************************
* (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
* (C) 2002-2003 University of Cambridge
* (C) 2004 - Mark Williamson - Intel Research Cambridge
@@ -24,17 +23,22 @@
#include <xen/ac_timer.h>
#include <xen/perfc.h>
#include <xen/sched-if.h>
-#include <xen/slab.h>
#include <xen/softirq.h>
/* all per-domain BVT-specific scheduling info is stored here */
-struct bvt_dom_info
+struct bvt_vcpu_info
{
- struct domain *domain; /* domain this info belongs to */
struct list_head run_list; /* runqueue list pointers */
- u32 mcu_advance; /* inverse of weight */
u32 avt; /* actual virtual time */
u32 evt; /* effective virtual time */
+ struct vcpu *vcpu;
+ struct bvt_dom_info *inf;
+};
+
+struct bvt_dom_info
+{
+ struct domain *domain; /* domain this info belongs to */
+ u32 mcu_advance; /* inverse of weight */
int warpback; /* warp? */
int warp; /* warp set and within the warp
limits*/
@@ -43,6 +47,8 @@ struct bvt_dom_info
struct ac_timer warp_timer; /* deals with warpl */
s_time_t warpu; /* unwarp time requirement */
struct ac_timer unwarp_timer; /* deals with warpu */
+
+ struct bvt_vcpu_info vcpu_inf[MAX_VIRT_CPUS];
};
struct bvt_cpu_info
@@ -52,8 +58,9 @@ struct bvt_cpu_info
};
#define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv)
+#define EBVT_INFO(p) ((struct bvt_vcpu_info *)(p)->sched_priv)
#define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv)
-#define RUNLIST(p) ((struct list_head *)&(BVT_INFO(p)->run_list))
+#define RUNLIST(p) ((struct list_head *)&(EBVT_INFO(p)->run_list))
#define RUNQUEUE(cpu) ((struct list_head *)&(CPU_INFO(cpu)->runqueue))
#define CPU_SVT(cpu) (CPU_INFO(cpu)->svt)
@@ -62,36 +69,34 @@ struct bvt_cpu_info
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */
-static xmem_cache_t *dom_info_cache;
-
-static inline void __add_to_runqueue_head(struct domain *d)
+static inline void __add_to_runqueue_head(struct vcpu *d)
{
list_add(RUNLIST(d), RUNQUEUE(d->processor));
}
-static inline void __add_to_runqueue_tail(struct domain *d)
+static inline void __add_to_runqueue_tail(struct vcpu *d)
{
list_add_tail(RUNLIST(d), RUNQUEUE(d->processor));
}
-static inline void __del_from_runqueue(struct domain *d)
+static inline void __del_from_runqueue(struct vcpu *d)
{
struct list_head *runlist = RUNLIST(d);
list_del(runlist);
runlist->next = NULL;
}
-static inline int __task_on_runqueue(struct domain *d)
+static inline int __task_on_runqueue(struct vcpu *d)
{
return (RUNLIST(d))->next != NULL;
}
/* Warp/unwarp timer functions */
-static void warp_timer_fn(unsigned long pointer)
+static void warp_timer_fn(void *data)
{
- struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
- unsigned int cpu = inf->domain->processor;
+ struct bvt_dom_info *inf = data;
+ unsigned int cpu = inf->domain->vcpu[0]->processor;
spin_lock_irq(&schedule_data[cpu].schedule_lock);
@@ -104,17 +109,15 @@ static void warp_timer_fn(unsigned long pointer)
cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
}
- /* set unwarp timer */
- inf->unwarp_timer.expires = NOW() + inf->warpu;
- add_ac_timer(&inf->unwarp_timer);
+ set_ac_timer(&inf->unwarp_timer, NOW() + inf->warpu);
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
}
-static void unwarp_timer_fn(unsigned long pointer)
+static void unwarp_timer_fn(void *data)
{
- struct bvt_dom_info *inf = (struct bvt_dom_info *)pointer;
- unsigned int cpu = inf->domain->processor;
+ struct bvt_dom_info *inf = data;
+ unsigned int cpu = inf->domain->vcpu[0]->processor;
spin_lock_irq(&schedule_data[cpu].schedule_lock);
@@ -127,24 +130,25 @@ static void unwarp_timer_fn(unsigned long pointer)
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
}
-static inline u32 calc_avt(struct domain *d, s_time_t now)
+static inline u32 calc_avt(struct vcpu *d, s_time_t now)
{
u32 ranfor, mcus;
- struct bvt_dom_info *inf = BVT_INFO(d);
+ struct bvt_dom_info *inf = BVT_INFO(d->domain);
+ struct bvt_vcpu_info *einf = EBVT_INFO(d);
ranfor = (u32)(now - d->lastschd);
mcus = (ranfor + MCU - 1)/MCU;
- return inf->avt + mcus * inf->mcu_advance;
+ return einf->avt + mcus * inf->mcu_advance;
}
/*
* Calculate the effective virtual time for a domain. Take into account
* warping limits
*/
-static inline u32 calc_evt(struct domain *d, u32 avt)
+static inline u32 calc_evt(struct vcpu *d, u32 avt)
{
- struct bvt_dom_info *inf = BVT_INFO(d);
+ struct bvt_dom_info *inf = BVT_INFO(d->domain);
/* TODO The warp routines need to be rewritten GM */
if ( inf->warp )
@@ -159,112 +163,120 @@ static inline u32 calc_evt(struct domain *d, u32 avt)
*
* Returns non-zero on failure.
*/
-static int bvt_alloc_task(struct domain *d)
+static int bvt_alloc_task(struct vcpu *v)
{
- if ( (d->sched_priv = xmem_cache_alloc(dom_info_cache)) == NULL )
- return -1;
- memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
+ struct domain *d = v->domain;
+
+ if ( (d->sched_priv == NULL) )
+ {
+ if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
+ return -1;
+ memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
+ }
+
+ v->sched_priv = &BVT_INFO(d)->vcpu_inf[v->vcpu_id];
+
+ BVT_INFO(d)->vcpu_inf[v->vcpu_id].inf = BVT_INFO(d);
+ BVT_INFO(d)->vcpu_inf[v->vcpu_id].vcpu = v;
+
return 0;
}
/*
* Add and remove a domain
*/
-static void bvt_add_task(struct domain *d)
+static void bvt_add_task(struct vcpu *v)
{
- struct bvt_dom_info *inf = BVT_INFO(d);
+ struct bvt_dom_info *inf = BVT_INFO(v->domain);
+ struct bvt_vcpu_info *einf = EBVT_INFO(v);
ASSERT(inf != NULL);
- ASSERT(d != NULL);
-
- inf->mcu_advance = MCU_ADVANCE;
- inf->domain = d;
- inf->warpback = 0;
- /* Set some default values here. */
- inf->warp = 0;
- inf->warp_value = 0;
- inf->warpl = MILLISECS(2000);
- inf->warpu = MILLISECS(1000);
- /* initialise the timers */
- init_ac_timer(&inf->warp_timer);
- inf->warp_timer.cpu = d->processor;
- inf->warp_timer.data = (unsigned long)inf;
- inf->warp_timer.function = &warp_timer_fn;
- init_ac_timer(&inf->unwarp_timer);
- inf->unwarp_timer.cpu = d->processor;
- inf->unwarp_timer.data = (unsigned long)inf;
- inf->unwarp_timer.function = &unwarp_timer_fn;
-
- if ( d->id == IDLE_DOMAIN_ID )
+ ASSERT(v != NULL);
+
+ /* Allocate per-CPU context if this is the first domain to be added. */
+ if ( CPU_INFO(v->processor) == NULL )
{
- inf->avt = inf->evt = ~0U;
+ schedule_data[v->processor].sched_priv = xmalloc(struct bvt_cpu_info);
+ BUG_ON(CPU_INFO(v->processor) == NULL);
+ INIT_LIST_HEAD(RUNQUEUE(v->processor));
+ CPU_SVT(v->processor) = 0;
+ }
+
+ if ( v->vcpu_id == 0 )
+ {
+ inf->mcu_advance = MCU_ADVANCE;
+ inf->domain = v->domain;
+ inf->warpback = 0;
+ /* Set some default values here. */
+ inf->warp = 0;
+ inf->warp_value = 0;
+ inf->warpl = MILLISECS(2000);
+ inf->warpu = MILLISECS(1000);
+ /* Initialise the warp timers. */
+ init_ac_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
+ init_ac_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
+ }
+
+ einf->vcpu = v;
+
+ if ( is_idle_task(v->domain) )
+ {
+ einf->avt = einf->evt = ~0U;
+ BUG_ON(__task_on_runqueue(v));
+ __add_to_runqueue_head(v);
}
else
{
/* Set avt and evt to system virtual time. */
- inf->avt = CPU_SVT(d->processor);
- inf->evt = CPU_SVT(d->processor);
+ einf->avt = CPU_SVT(v->processor);
+ einf->evt = CPU_SVT(v->processor);
}
}
-static int bvt_init_idle_task(struct domain *p)
+static void bvt_wake(struct vcpu *v)
{
- if ( bvt_alloc_task(p) < 0 )
- return -1;
-
- bvt_add_task(p);
-
- set_bit(DF_RUNNING, &p->flags);
- if ( !__task_on_runqueue(p) )
- __add_to_runqueue_head(p);
-
- return 0;
-}
-
-static void bvt_wake(struct domain *d)
-{
- struct bvt_dom_info *inf = BVT_INFO(d);
- struct domain *curr;
+ struct bvt_vcpu_info *einf = EBVT_INFO(v);
+ struct vcpu *curr;
s_time_t now, r_time;
- int cpu = d->processor;
+ int cpu = v->processor;
u32 curr_evt;
- if ( unlikely(__task_on_runqueue(d)) )
+ if ( unlikely(__task_on_runqueue(v)) )
return;
- __add_to_runqueue_head(d);
+ __add_to_runqueue_head(v);
now = NOW();
/* Set the BVT parameters. AVT should always be updated
if CPU migration ocurred.*/
- if ( inf->avt < CPU_SVT(cpu) ||
- unlikely(test_bit(DF_MIGRATED, &d->flags)) )
- inf->avt = CPU_SVT(cpu);
+ if ( einf->avt < CPU_SVT(cpu) ||
+ unlikely(test_bit(_VCPUF_cpu_migrated, &v->vcpu_flags)) )
+ einf->avt = CPU_SVT(cpu);
/* Deal with warping here. */
- inf->evt = calc_evt(d, inf->avt);
+ einf->evt = calc_evt(v, einf->avt);
curr = schedule_data[cpu].curr;
curr_evt = calc_evt(curr, calc_avt(curr, now));
/* Calculate the time the current domain would run assuming
the second smallest evt is of the newly woken domain */
r_time = curr->lastschd +
- ((inf->evt - curr_evt) / BVT_INFO(curr)->mcu_advance) +
+ ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
ctx_allow;
- if ( is_idle_task(curr) || (inf->evt <= curr_evt) )
+ if ( is_idle_task(curr->domain) || (einf->evt <= curr_evt) )
cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
else if ( schedule_data[cpu].s_timer.expires > r_time )
- mod_ac_timer(&schedule_data[cpu].s_timer, r_time);
+ set_ac_timer(&schedule_data[cpu].s_timer, r_time);
}
-static void bvt_sleep(struct domain *d)
+static void bvt_sleep(struct vcpu *v)
{
- if ( test_bit(DF_RUNNING, &d->flags) )
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
- else if ( __task_on_runqueue(d) )
- __del_from_runqueue(d);
+ if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
+ cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
+ else if ( __task_on_runqueue(v) )
+ __del_from_runqueue(v);
}
/**
@@ -274,7 +286,7 @@ static void bvt_sleep(struct domain *d)
static void bvt_free_task(struct domain *d)
{
ASSERT(d->sched_priv != NULL);
- xmem_cache_free(dom_info_cache, d->sched_priv);
+ xfree(d->sched_priv);
}
/* Control the scheduler. */
@@ -345,26 +357,28 @@ static int bvt_adjdom(
* i.e., the domain with lowest EVT.
* The runqueue should be ordered by EVT so that is easy.
*/
-static task_slice_t bvt_do_schedule(s_time_t now)
+static struct task_slice bvt_do_schedule(s_time_t now)
{
- struct domain *prev = current, *next = NULL, *next_prime, *p;
+ struct domain *d;
+ struct vcpu *prev = current, *next = NULL, *next_prime, *ed;
int cpu = prev->processor;
s32 r_time; /* time for new dom to run */
u32 next_evt, next_prime_evt, min_avt;
- struct bvt_dom_info *prev_inf = BVT_INFO(prev);
- struct bvt_dom_info *p_inf = NULL;
- struct bvt_dom_info *next_inf = NULL;
- struct bvt_dom_info *next_prime_inf = NULL;
- task_slice_t ret;
+ struct bvt_dom_info *prev_inf = BVT_INFO(prev->domain);
+ struct bvt_vcpu_info *prev_einf = EBVT_INFO(prev);
+ struct bvt_vcpu_info *p_einf = NULL;
+ struct bvt_vcpu_info *next_einf = NULL;
+ struct bvt_vcpu_info *next_prime_einf = NULL;
+ struct task_slice ret;
ASSERT(prev->sched_priv != NULL);
- ASSERT(prev_inf != NULL);
+ ASSERT(prev_einf != NULL);
ASSERT(__task_on_runqueue(prev));
- if ( likely(!is_idle_task(prev)) )
+ if ( likely(!is_idle_task(prev->domain)) )
{
- prev_inf->avt = calc_avt(prev, now);
- prev_inf->evt = calc_evt(prev, prev_inf->avt);
+ prev_einf->avt = calc_avt(prev, now);
+ prev_einf->evt = calc_evt(prev, prev_einf->avt);
if(prev_inf->warpback && prev_inf->warpl > 0)
rem_ac_timer(&prev_inf->warp_timer);
@@ -384,49 +398,44 @@ static task_slice_t bvt_do_schedule(s_time_t now)
* *and* the task the second lowest evt.
* this code is O(n) but we expect n to be small.
*/
- next_inf = BVT_INFO(schedule_data[cpu].idle);
- next_prime_inf = NULL;
+ next_einf = EBVT_INFO(schedule_data[cpu].idle);
+ next_prime_einf = NULL;
next_evt = ~0U;
next_prime_evt = ~0U;
min_avt = ~0U;
- list_for_each_entry ( p_inf, RUNQUEUE(cpu), run_list )
+ list_for_each_entry ( p_einf, RUNQUEUE(cpu), run_list )
{
- if ( p_inf->evt < next_evt )
+ if ( p_einf->evt < next_evt )
{
- next_prime_inf = next_inf;
+ next_prime_einf = next_einf;
next_prime_evt = next_evt;
- next_inf = p_inf;
- next_evt = p_inf->evt;
+ next_einf = p_einf;
+ next_evt = p_einf->evt;
}
else if ( next_prime_evt == ~0U )
{
- next_prime_evt = p_inf->evt;
- next_prime_inf = p_inf;
+ next_prime_evt = p_einf->evt;
+ next_prime_einf = p_einf;
}
- else if ( p_inf->evt < next_prime_evt )
+ else if ( p_einf->evt < next_prime_evt )
{
- next_prime_evt = p_inf->evt;
- next_prime_inf = p_inf;
+ next_prime_evt = p_einf->evt;
+ next_prime_einf = p_einf;
}
/* Determine system virtual time. */
- if ( p_inf->avt < min_avt )
- min_avt = p_inf->avt;
+ if ( p_einf->avt < min_avt )
+ min_avt = p_einf->avt;
}
- if(next_inf->warp && next_inf->warpl > 0)
- {
- /* Set the timer up */
- next_inf->warp_timer.expires = now + next_inf->warpl;
- /* Add it to the heap */
- add_ac_timer(&next_inf->warp_timer);
- }
+ if ( next_einf->inf->warp && next_einf->inf->warpl > 0 )
+ set_ac_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
/* Extract the domain pointers from the dom infos */
- next = next_inf->domain;
- next_prime = next_prime_inf->domain;
+ next = next_einf->vcpu;
+ next_prime = next_prime_einf->vcpu;
/* Update system virtual time. */
if ( min_avt != ~0U )
@@ -439,13 +448,15 @@ static task_slice_t bvt_do_schedule(s_time_t now)
write_lock(&domlist_lock);
- for_each_domain ( p )
+ for_each_domain ( d )
{
- if ( p->processor == cpu )
- {
- p_inf = BVT_INFO(p);
- p_inf->evt -= 0xe0000000;
- p_inf->avt -= 0xe0000000;
+ for_each_vcpu (d, ed) {
+ if ( ed->processor == cpu )
+ {
+ p_einf = EBVT_INFO(ed);
+ p_einf->evt -= 0xe0000000;
+ p_einf->avt -= 0xe0000000;
+ }
}
}
@@ -455,13 +466,13 @@ static task_slice_t bvt_do_schedule(s_time_t now)
}
/* work out time for next run through scheduler */
- if ( is_idle_task(next) )
+ if ( is_idle_task(next->domain) )
{
r_time = ctx_allow;
goto sched_done;
}
- if ( (next_prime == NULL) || is_idle_task(next_prime) )
+ if ( (next_prime == NULL) || is_idle_task(next_prime->domain) )
{
/* We have only one runnable task besides the idle task. */
r_time = 10 * ctx_allow; /* RN: random constant */
@@ -473,9 +484,9 @@ static task_slice_t bvt_do_schedule(s_time_t now)
* Work out how long 'next' can run till its evt is greater than
* 'next_prime's evt. Take context switch allowance into account.
*/
- ASSERT(next_prime_inf->evt >= next_inf->evt);
+ ASSERT(next_prime_einf->evt >= next_einf->evt);
- r_time = ((next_prime_inf->evt - next_inf->evt)/next_inf->mcu_advance)
+ r_time = ((next_prime_einf->evt - next_einf->evt)/next_einf->inf->mcu_advance)
+ ctx_allow;
ASSERT(r_time >= ctx_allow);
@@ -487,12 +498,12 @@ static task_slice_t bvt_do_schedule(s_time_t now)
}
-static void bvt_dump_runq_el(struct domain *p)
+static void bvt_dump_runq_el(struct vcpu *p)
{
- struct bvt_dom_info *inf = BVT_INFO(p);
+ struct bvt_vcpu_info *inf = EBVT_INFO(p);
printk("mcua=%d ev=0x%08X av=0x%08X ",
- inf->mcu_advance, inf->evt, inf->avt);
+ inf->inf->mcu_advance, inf->evt, inf->avt);
}
static void bvt_dump_settings(void)
@@ -504,8 +515,8 @@ static void bvt_dump_cpu_state(int i)
{
struct list_head *queue;
int loop = 0;
- struct bvt_dom_info *d_inf;
- struct domain *d;
+ struct bvt_vcpu_info *vcpu_inf;
+ struct vcpu *v;
printk("svt=0x%08lX ", CPU_SVT(i));
@@ -513,56 +524,24 @@ static void bvt_dump_cpu_state(int i)
printk("QUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
(unsigned long) queue->next, (unsigned long) queue->prev);
- list_for_each_entry ( d_inf, queue, run_list )
+ list_for_each_entry ( vcpu_inf, queue, run_list )
{
- d = d_inf->domain;
- printk("%3d: %u has=%c ", loop++, d->id,
- test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
- bvt_dump_runq_el(d);
- printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
+ v = vcpu_inf->vcpu;
+ printk("%3d: %u has=%c ", loop++, v->domain->domain_id,
+ test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F');
+ bvt_dump_runq_el(v);
+ printk("c=0x%X%08X\n", (u32)(v->cpu_time>>32), (u32)v->cpu_time);
printk(" l: %p n: %p p: %p\n",
- &d_inf->run_list, d_inf->run_list.next, d_inf->run_list.prev);
+ &vcpu_inf->run_list, vcpu_inf->run_list.next,
+ vcpu_inf->run_list.prev);
}
}
-/* Initialise the data structures. */
-static int bvt_init_scheduler(void)
-{
- int i;
-
- for ( i = 0; i < NR_CPUS; i++ )
- {
- schedule_data[i].sched_priv = xmalloc(sizeof(struct bvt_cpu_info));
-
- if ( schedule_data[i].sched_priv == NULL )
- {
- printk("Failed to allocate BVT scheduler per-CPU memory!\n");
- return -1;
- }
-
- INIT_LIST_HEAD(RUNQUEUE(i));
-
- CPU_SVT(i) = 0; /* XXX do I really need to do this? */
- }
-
- dom_info_cache = xmem_cache_create(
- "BVT dom info", sizeof(struct bvt_dom_info), 0, 0, NULL, NULL);
- if ( dom_info_cache == NULL )
- {
- printk("BVT: Failed to allocate domain info SLAB cache");
- return -1;
- }
-
- return 0;
-}
-
struct scheduler sched_bvt_def = {
.name = "Borrowed Virtual Time",
.opt_name = "bvt",
.sched_id = SCHED_BVT,
- .init_scheduler = bvt_init_scheduler,
- .init_idle_task = bvt_init_idle_task,
.alloc_task = bvt_alloc_task,
.add_task = bvt_add_task,
.free_task = bvt_free_task,
@@ -574,3 +553,13 @@ struct scheduler sched_bvt_def = {
.sleep = bvt_sleep,
.wake = bvt_wake,
};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/sched_rrobin.c b/xen/common/sched_rrobin.c
deleted file mode 100644
index 2164ce22b4..0000000000
--- a/xen/common/sched_rrobin.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/****************************************************************************
- * Round Robin Scheduler for Xen
- *
- * by Mark Williamson (C) 2004 Intel Research Cambridge
- */
-
-#include <xen/sched.h>
-#include <xen/sched-if.h>
-#include <public/sched_ctl.h>
-#include <xen/ac_timer.h>
-#include <xen/softirq.h>
-#include <xen/time.h>
-#include <xen/slab.h>
-
-#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
-
-static s_time_t rr_slice = MILLISECS(10);
-
-/* Only runqueue pointers and domain pointer*/
-struct rrobin_dom_info
-{
- struct list_head run_list;
- struct domain *domain;
-};
-
-#define RR_INFO(d) ((struct rrobin_dom_info *)d->sched_priv)
-#define RUNLIST(d) ((struct list_head *)&(RR_INFO(d)->run_list))
-#define RUNQUEUE(cpu) RUNLIST(schedule_data[cpu].idle)
-
-static xmem_cache_t *dom_info_cache;
-
-static inline void __add_to_runqueue_head(struct domain *d)
-{
- list_add(RUNLIST(d), RUNQUEUE(d->processor));
-}
-
-static inline void __add_to_runqueue_tail(struct domain *d)
-{
- list_add_tail(RUNLIST(d), RUNQUEUE(d->processor));
-}
-
-static inline void __del_from_runqueue(struct domain *d)
-{
- struct list_head *runlist = RUNLIST(d);
- list_del(runlist);
- runlist->next = NULL;
-}
-
-static inline int __task_on_runqueue(struct domain *d)
-{
- return (RUNLIST(d))->next != NULL;
-}
-
-/* Initialises the runqueues and creates the domain info cache */
-static int rr_init_scheduler()
-{
- int i;
-
- for ( i = 0; i < NR_CPUS; i++ )
- INIT_LIST_HEAD(RUNQUEUE(i));
-
- dom_info_cache = xmem_cache_create(
- "RR dom info", sizeof(struct rrobin_dom_info), 0, 0, 0, NULL);
- if ( dom_info_cache == NULL )
- {
- printk("Could not allocate SLAB cache.\n");
- return -1;
- }
-
- return 0;
-}
-
-/* Allocates memory for per domain private scheduling data*/
-static int rr_alloc_task(struct domain *d)
-{
- if ( (d->sched_priv = xmem_cache_alloc(dom_info_cache)) == NULL )
- return -1;
- memset(d->sched_priv, 0, sizeof(struct rrobin_dom_info));
- return 0;
-}
-
-/* Setup the rr_dom_info */
-static void rr_add_task(struct domain *d)
-{
- struct rrobin_dom_info *inf;
- RR_INFO(d)->domain = d;
- inf = RR_INFO(d);
-}
-
-/* Frees memory used by domain info */
-static void rr_free_task(struct domain *d)
-{
- ASSERT(d->sched_priv != NULL);
- xmem_cache_free(dom_info_cache, d->sched_priv);
-}
-
-/* Initialises idle task */
-static int rr_init_idle_task(struct domain *d)
-{
- if ( rr_alloc_task(d) < 0 )
- return -1;
-
- rr_add_task(d);
-
- set_bit(DF_RUNNING, &d->flags);
- if ( !__task_on_runqueue(d) )
- __add_to_runqueue_head(d);
-
- return 0;
-}
-
-/* Main scheduling function */
-static task_slice_t rr_do_schedule(s_time_t now)
-{
- struct domain *prev = current;
- int cpu = current->processor;
- task_slice_t ret;
-
- if ( !is_idle_task(prev) )
- {
- __del_from_runqueue(prev);
-
- if ( domain_runnable(prev) )
- __add_to_runqueue_tail(prev);
- }
-
- ret.task = list_entry(RUNQUEUE(cpu)->next,
- struct rrobin_dom_info,
- run_list)->domain;
- ret.time = rr_slice;
- return ret;
-}
-
-/* Set/retrive control parameter(s) */
-static int rr_ctl(struct sched_ctl_cmd *cmd)
-{
- if ( cmd->direction == SCHED_INFO_PUT )
- {
- rr_slice = cmd->u.rrobin.slice;
- }
- else /* cmd->direction == SCHED_INFO_GET */
- {
- cmd->u.rrobin.slice = rr_slice;
- }
-
- return 0;
-}
-
-static void rr_dump_settings()
-{
- printk("rr_slice = %llu ", rr_slice);
-}
-
-static void rr_sleep(struct domain *d)
-{
- if ( test_bit(DF_RUNNING, &d->flags) )
- cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
- else if ( __task_on_runqueue(d) )
- __del_from_runqueue(d);
-}
-
-void rr_wake(struct domain *d)
-{
- struct domain *curr;
- s_time_t now;
- int cpu = d->processor;
-
- if ( unlikely(__task_on_runqueue(d)) )
- return;
-
- __add_to_runqueue_head(d);
-
- now = NOW();
-
- curr = schedule_data[cpu].curr;
- if ( is_idle_task(curr) )
- cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
-}
-
-
-static void rr_dump_domain(struct domain *d)
-{
- printk("%u has=%c ", d->id,
- test_bit(DF_RUNNING, &d->flags) ? 'T':'F');
- printk("c=0x%X%08X\n", (u32)(d->cpu_time>>32), (u32)d->cpu_time);
-}
-
-static void rr_dump_cpu_state(int i)
-{
- struct list_head *queue;
- int loop = 0;
- struct rrobin_dom_info *d_inf;
-
- queue = RUNQUEUE(i);
- printk("QUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
- (unsigned long) queue->next, (unsigned long) queue->prev);
-
- printk("%3d: ",loop++);
- d_inf = list_entry(queue, struct rrobin_dom_info, run_list);
- rr_dump_domain(d_inf->domain);
-
- list_for_each_entry ( d_inf, queue, run_list )
- {
- printk("%3d: ",loop++);
- rr_dump_domain(d_inf->domain);
- }
-}
-
-
-struct scheduler sched_rrobin_def = {
- .name = "Round-Robin Scheduler",
- .opt_name = "rrobin",
- .sched_id = SCHED_RROBIN,
-
- .init_idle_task = rr_init_idle_task,
- .alloc_task = rr_alloc_task,
- .add_task = rr_add_task,
- .free_task = rr_free_task,
- .init_scheduler = rr_init_scheduler,
- .do_schedule = rr_do_schedule,
- .control = rr_ctl,
- .dump_settings = rr_dump_settings,
- .dump_cpu_state = rr_dump_cpu_state,
- .sleep = rr_sleep,
- .wake = rr_wake,
-};
-
-
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c
new file mode 100644
index 0000000000..ab64af3a64
--- /dev/null
+++ b/xen/common/sched_sedf.c
@@ -0,0 +1,1453 @@
+/******************************************************************************
+ * Simple EDF scheduler for xen
+ *
+ * by Stephan Diestelhorst (C) 2004 Cambridge University
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ */
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <public/sched_ctl.h>
+#include <xen/ac_timer.h>
+#include <xen/softirq.h>
+#include <xen/time.h>
+
+/*verbosity settings*/
+#define SEDFLEVEL 0
+#define PRINT(_f, _a...) \
+ if ((_f)<=SEDFLEVEL) printk(_a );
+
+#ifndef NDEBUG
+#define SEDF_STATS
+#define CHECK(_p) if ( !(_p) ) \
+ { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
+ __FILE__);}
+#else
+#define CHECK(_p) ((void)0)
+#endif
+
+/*various ways of unblocking domains*/
+#define UNBLOCK_ISOCHRONOUS_EDF 1
+#define UNBLOCK_EDF 2
+#define UNBLOCK_ATROPOS 3
+#define UNBLOCK_SHORT_RESUME 4
+#define UNBLOCK_BURST 5
+#define UNBLOCK_EXTRA_SUPPORT 6
+#define UNBLOCK UNBLOCK_EXTRA_SUPPORT
+
+/*various ways of treating extra-time*/
+#define EXTRA_OFF 1
+#define EXTRA_ROUNDR 2
+#define EXTRA_SLICE_WEIGHT 3
+#define EXTRA_BLOCK_WEIGHT 4
+
+#define EXTRA EXTRA_BLOCK_WEIGHT
+
+#define EXTRA_NONE (0)
+#define EXTRA_AWARE (1)
+#define EXTRA_RUN_PEN (2)
+#define EXTRA_RUN_UTIL (4)
+#define EXTRA_WANT_PEN_Q (8)
+#define EXTRA_PEN_Q (0)
+#define EXTRA_UTIL_Q (1)
+#define SEDF_ASLEEP (16)
+
+#define EXTRA_QUANTUM (MICROSECS(500))
+#define WEIGHT_PERIOD (MILLISECS(100))
+#define WEIGHT_SAFETY (MILLISECS(5))
+
+#define IMPLY(a, b) (!(a) || (b))
+#define EQ(a, b) ((!!(a)) == (!!(b)))
+
+
+struct sedf_dom_info {
+ struct domain *domain;
+};
+struct sedf_vcpu_info
+{
+ struct vcpu *vcpu;
+ struct list_head list;
+ struct list_head extralist[2];
+
+ /*Parameters for EDF*/
+ s_time_t period; /*=(relative deadline)*/
+ s_time_t slice; /*=worst case execution time*/
+
+ /*Advaced Parameters*/
+ /*Latency Scaling*/
+ s_time_t period_orig;
+ s_time_t slice_orig;
+ s_time_t latency;
+
+ /*status of domain*/
+ int status;
+ /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
+ short weight;
+ short extraweight;
+ /*Bookkeeping*/
+ s_time_t deadl_abs;
+ s_time_t sched_start_abs;
+ s_time_t cputime;
+ /* times the domain un-/blocked */
+ s_time_t block_abs;
+ s_time_t unblock_abs;
+
+ /*scores for {util, block penalty}-weighted extratime distribution*/
+ int score[2];
+ s_time_t short_block_lost_tot;
+
+ /*Statistics*/
+ s_time_t extra_time_tot;
+
+#ifdef SEDF_STATS
+ s_time_t block_time_tot;
+ s_time_t penalty_time_tot;
+ int block_tot;
+ int short_block_tot;
+ int long_block_tot;
+ int short_cont;
+ int pen_extra_blocks;
+ int pen_extra_slices;
+#endif
+};
+
+struct sedf_cpu_info {
+ struct list_head runnableq;
+ struct list_head waitq;
+ struct list_head extraq[2];
+ s_time_t current_slice_expires;
+};
+
+#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
+#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
+#define LIST(d) (&EDOM_INFO(d)->list)
+#define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
+#define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
+#define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
+#define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
+#define IDLETASK(cpu) ((struct vcpu *)schedule_data[cpu].idle)
+
+#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#define DIV_UP(x,y) (((x) + (y) - 1) / y)
+
+#define extra_runs(inf) ((inf->status) & 6)
+#define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
+#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
+
+
+static void sedf_dump_cpu_state(int i);
+
+static inline int extraq_on(struct vcpu *d, int i) {
+ return ((EXTRALIST(d,i)->next != NULL) &&
+ (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
+}
+
+static inline void extraq_add_head(struct vcpu *d, int i)
+{
+ list_add(EXTRALIST(d,i), EXTRAQ(d->processor,i));
+ ASSERT(extraq_on(d, i));
+}
+
+static inline void extraq_add_tail(struct vcpu *d, int i)
+{
+ list_add_tail(EXTRALIST(d,i), EXTRAQ(d->processor,i));
+ ASSERT(extraq_on(d, i));
+}
+
+static inline void extraq_del(struct vcpu *d, int i)
+{
+ struct list_head *list = EXTRALIST(d,i);
+ ASSERT(extraq_on(d,i));
+ PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
+ d->vcpu_id, i);
+ list_del(list);
+ list->next = NULL;
+ ASSERT(!extraq_on(d, i));
+}
+
+/* adds a domain to the queue of processes which are aware of extra time. List
+ is sorted by score, where a lower score means higher priority for an extra
+ slice. It also updates the score, by simply subtracting a fixed value from
+ each entry, in order to avoid overflow. The algorithm works by simply
+ charging each domain that recieved extratime with an inverse of its weight.
+ */
+static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub) {
+ struct list_head *cur;
+ struct sedf_vcpu_info *curinf;
+
+ ASSERT(!extraq_on(d,i));
+ PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
+ " to L%i extraq\n",
+ d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
+ EDOM_INFO(d)->short_block_lost_tot, i);
+ /*iterate through all elements to find our "hole" and on our way
+ update all the other scores*/
+ list_for_each(cur,EXTRAQ(d->processor,i)){
+ curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
+ curinf->score[i] -= sub;
+ if (EDOM_INFO(d)->score[i] < curinf->score[i])
+ break;
+ else
+ PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
+ curinf->vcpu->domain->domain_id,
+ curinf->vcpu->vcpu_id, curinf->score[i]);
+ }
+ /*cur now contains the element, before which we'll enqueue*/
+ PRINT(3, "\tlist_add to %p\n", cur->prev);
+ list_add(EXTRALIST(d,i),cur->prev);
+
+ /*continue updating the extraq*/
+ if ((cur != EXTRAQ(d->processor,i)) && sub)
+ for (cur = cur->next; cur != EXTRAQ(d->processor,i);
+ cur = cur-> next) {
+ curinf = list_entry(cur,struct sedf_vcpu_info,
+ extralist[i]);
+ curinf->score[i] -= sub;
+ PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
+ curinf->vcpu->domain->domain_id,
+ curinf->vcpu->vcpu_id, curinf->score[i]);
+ }
+ ASSERT(extraq_on(d,i));
+}
+static inline void extraq_check(struct vcpu *d) {
+ if (extraq_on(d, EXTRA_UTIL_Q)) {
+ PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
+ if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
+ !extra_runs(EDOM_INFO(d))) {
+ extraq_del(d, EXTRA_UTIL_Q);
+ PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
+ d->domain->domain_id, d->vcpu_id);
+ }
+ } else {
+ PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
+ d->vcpu_id);
+ if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
+ {
+#if (EXTRA == EXTRA_ROUNDR)
+ extraq_add_tail(d, EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT || \
+ EXTRA == EXTRA_BLOCK_WEIGHT)
+ extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#elif
+ ;
+#endif
+ PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
+ d->vcpu_id);
+ }
+ }
+}
+
+static inline void extraq_check_add_unblocked(struct vcpu *d,
+ int priority) {
+ struct sedf_vcpu_info *inf = EDOM_INFO(d);
+ if (inf->status & EXTRA_AWARE)
+#if (EXTRA == EXTRA_ROUNDR)
+ if (priority)
+ extraq_add_head(d,EXTRA_UTIL_Q);
+ else
+ extraq_add_tail(d,EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT \
+ || EXTRA == EXTRA_BLOCK_WEIGHT)
+ /*put in on the weighted extraq,
+ without updating any scores*/
+ extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
+#else
+ ;
+#endif
+}
+
+static inline int __task_on_queue(struct vcpu *d) {
+ return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
+}
+static inline void __del_from_queue(struct vcpu *d)
+{
+ struct list_head *list = LIST(d);
+ ASSERT(__task_on_queue(d));
+ PRINT(3,"Removing domain %i.%i (bop= %"PRIu64") from runq/waitq\n",
+ d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
+ list_del(list);
+ list->next = NULL;
+ ASSERT(!__task_on_queue(d));
+}
+
+typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
+
+static inline void list_insert_sort(struct list_head *list,
+ struct list_head *element, list_comparer comp) {
+ struct list_head *cur;
+ /*iterate through all elements to find our "hole"*/
+ list_for_each(cur,list){
+ if (comp(element, cur) < 0)
+ break;
+ }
+ /*cur now contains the element, before which we'll enqueue*/
+ PRINT(3,"\tlist_add to %p\n",cur->prev);
+ list_add(element, cur->prev);
+}
+#define DOMAIN_COMPARER(name, field, comp1, comp2) \
+int name##_comp(struct list_head* el1, struct list_head* el2) \
+{ \
+ struct sedf_vcpu_info *d1, *d2; \
+ d1 = list_entry(el1,struct sedf_vcpu_info, field); \
+ d2 = list_entry(el2,struct sedf_vcpu_info, field); \
+ if ((comp1) == (comp2)) \
+ return 0; \
+ if ((comp1) < (comp2)) \
+ return -1; \
+ else \
+ return 1; \
+}
+/* adds a domain to the queue of processes which wait for the beginning of the
+ next period; this list is therefore sortet by this time, which is simply
+ absol. deadline - period
+ */
+DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2))
+ static inline void __add_to_waitqueue_sort(struct vcpu *d) {
+ ASSERT(!__task_on_queue(d));
+ PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
+ d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
+ list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
+ ASSERT(__task_on_queue(d));
+}
+
+/* adds a domain to the queue of processes which have started their current
+ period and are runnable (i.e. not blocked, dieing,...). The first element
+ on this list is running on the processor, if the list is empty the idle
+ task will run. As we are implementing EDF, this list is sorted by deadlines.
+ */
+DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
+ static inline void __add_to_runqueue_sort(struct vcpu *d) {
+ PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
+ d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
+ list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
+}
+
+/* Allocates memory for per domain private scheduling data*/
+static int sedf_alloc_task(struct vcpu *d) {
+ PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
+ d->vcpu_id);
+ if (d->domain->sched_priv == NULL) {
+ if ((d->domain->sched_priv =
+ xmalloc(struct sedf_dom_info)) == NULL )
+ return -1;
+ memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
+ }
+ if ((d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
+ return -1;
+ memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+ return 0;
+}
+
+/* Setup the sedf_dom_info */
+static void sedf_add_task(struct vcpu *d)
+{
+ struct sedf_vcpu_info *inf = EDOM_INFO(d);
+ inf->vcpu = d;
+
+ PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
+ d->vcpu_id);
+
+ /* Allocate per-CPU context if this is the first domain to be added. */
+ if ( unlikely(schedule_data[d->processor].sched_priv == NULL) )
+ {
+ schedule_data[d->processor].sched_priv =
+ xmalloc(struct sedf_cpu_info);
+ BUG_ON(schedule_data[d->processor].sched_priv == NULL);
+ memset(CPU_INFO(d->processor), 0, sizeof(*CPU_INFO(d->processor)));
+ INIT_LIST_HEAD(WAITQ(d->processor));
+ INIT_LIST_HEAD(RUNQ(d->processor));
+ INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q));
+ INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
+ }
+
+ if (d->domain->domain_id==0) {
+ /*set dom0 to something useful to boot the machine*/
+ inf->period = MILLISECS(20);
+ inf->slice = MILLISECS(15);
+ inf->latency = 0;
+ inf->deadl_abs = 0;
+ inf->status = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
+ } else {
+ /*other domains run in best effort mode*/
+ inf->period = WEIGHT_PERIOD;
+ inf->slice = 0;
+ inf->deadl_abs = 0;
+ inf->latency = 0;
+ inf->status = EXTRA_AWARE | SEDF_ASLEEP;
+ inf->extraweight = 1;
+ }
+ inf->period_orig = inf->period; inf->slice_orig = inf->slice;
+ INIT_LIST_HEAD(&(inf->list));
+ INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
+ INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
+
+ if (!is_idle_task(d->domain)) {
+ extraq_check(d);
+ } else {
+ EDOM_INFO(d)->deadl_abs = 0;
+ EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
+ }
+}
+
+/* Frees memory used by domain info */
+static void sedf_free_task(struct domain *d)
+{
+ int i;
+ PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
+ ASSERT(d->sched_priv != NULL);
+ xfree(d->sched_priv);
+
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ if ( d->vcpu[i] ) {
+ ASSERT(d->vcpu[i]->sched_priv != NULL);
+ xfree(d->vcpu[i]->sched_priv);
+ }
+}
+
+/* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
+static inline void desched_edf_dom (s_time_t now, struct vcpu* d) {
+ struct sedf_vcpu_info* inf = EDOM_INFO(d);
+ /*current domain is running in real time mode*/
+
+ ASSERT(__task_on_queue(d));
+ /*update the domains cputime*/
+ inf->cputime += now - inf->sched_start_abs;
+
+ /*scheduling decisions, which don't remove the running domain
+ from the runq*/
+ if ((inf->cputime < inf->slice) && sedf_runnable(d))
+ return;
+
+ __del_from_queue(d);
+
+ /*manage bookkeeping (i.e. calculate next deadline,
+ memorize overun-time of slice) of finished domains*/
+ if (inf->cputime >= inf->slice) {
+ inf->cputime -= inf->slice;
+
+ if (inf->period < inf->period_orig) {
+ /*this domain runs in latency scaling or burst mode*/
+#if (UNBLOCK == UNBLOCK_BURST)
+ /*if we are runnig in burst scaling wait for two periods
+ before scaling periods up again*/
+ if (now - inf->unblock_abs >= 2 * inf->period)
+#endif
+ {
+ inf->period *= 2; inf->slice *= 2;
+ if ((inf->period > inf->period_orig) ||
+ (inf->slice > inf->slice_orig)) {
+ /*reset slice & period*/
+ inf->period = inf->period_orig;
+ inf->slice = inf->slice_orig;
+ }
+ }
+ }
+ /*set next deadline*/
+ inf->deadl_abs += inf->period;
+ }
+
+ /*add a runnable domain to the waitqueue*/
+ if (sedf_runnable(d))
+ __add_to_waitqueue_sort(d);
+ else {
+ /*we have a blocked realtime task -> remove it from exqs too*/
+#if (EXTRA > EXTRA_OFF)
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
+#endif
+ if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
+#endif
+ }
+ ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+ ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
+ sedf_runnable(d)));
+}
+
+/* Update all elements on the queues */
+static inline void update_queues(s_time_t now, struct list_head* runq,
+ struct list_head* waitq) {
+ struct list_head *cur,*tmp;
+ struct sedf_vcpu_info *curinf;
+
+ PRINT(3,"Updating waitq..\n");
+ /*check for the first elements of the waitqueue, whether their
+ next period has already started*/
+ list_for_each_safe(cur, tmp, waitq) {
+ curinf = list_entry(cur, struct sedf_vcpu_info, list);
+ PRINT(4,"\tLooking @ dom %i.%i\n",
+ curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
+ if (PERIOD_BEGIN(curinf) <= now) {
+ __del_from_queue(curinf->vcpu);
+ __add_to_runqueue_sort(curinf->vcpu);
+ }
+ else
+ break;
+ }
+
+ PRINT(3,"Updating runq..\n");
+ /*process the runq, find domains that are on
+ the runqueue which shouldn't be there*/
+ list_for_each_safe(cur, tmp, runq) {
+ curinf = list_entry(cur,struct sedf_vcpu_info,list);
+ PRINT(4,"\tLooking @ dom %i.%i\n",
+ curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
+ if (unlikely(curinf->slice == 0)) {
+ /*ignore domains with empty slice*/
+ PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
+ curinf->vcpu->domain->domain_id,
+ curinf->vcpu->vcpu_id);
+ __del_from_queue(curinf->vcpu);
+
+ /*move them to their next period*/
+ curinf->deadl_abs += curinf->period;
+ /*and put them back into the queue*/
+ __add_to_waitqueue_sort(curinf->vcpu);
+ continue;
+ }
+ if (unlikely((curinf->deadl_abs < now) ||
+ (curinf->cputime > curinf->slice))) {
+ /*we missed the deadline or the slice was
+ already finished... might hapen because
+ of dom_adj.*/
+ PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
+ "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
+ " cputime: %"PRIu64"\n",
+ curinf->vcpu->domain->domain_id,
+ curinf->vcpu->vcpu_id,
+ curinf->deadl_abs, curinf->slice, now,
+ curinf->cputime);
+ __del_from_queue(curinf->vcpu);
+ /*common case: we miss one period!*/
+ curinf->deadl_abs += curinf->period;
+
+ /*if we are still behind: modulo arithmetic,
+ force deadline to be in future and
+ aligned to period borders!*/
+ if (unlikely(curinf->deadl_abs < now))
+ curinf->deadl_abs +=
+ DIV_UP(now - curinf->deadl_abs,
+ curinf->period) * curinf->period;
+ ASSERT(curinf->deadl_abs > now);
+ /*give a fresh slice*/
+ curinf->cputime = 0;
+ if (PERIOD_BEGIN(curinf) > now)
+ __add_to_waitqueue_sort(curinf->vcpu);
+ else
+ __add_to_runqueue_sort(curinf->vcpu);
+ }
+ else
+ break;
+ }
+ PRINT(3,"done updating the queues\n");
+}
+
+#if (EXTRA > EXTRA_OFF)
+/* removes a domain from the head of the according extraQ and
+ requeues it at a specified position:
+ round-robin extratime: end of extraQ
+ weighted ext.: insert in sorted list by score
+ if the domain is blocked / has regained its short-block-loss
+ time it is not put on any queue */
+static inline void desched_extra_dom(s_time_t now, struct vcpu* d) {
+ struct sedf_vcpu_info *inf = EDOM_INFO(d);
+ int i = extra_get_cur_q(inf);
+
+#if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
+ unsigned long oldscore;
+#endif
+ ASSERT(extraq_on(d, i));
+ /*unset all running flags*/
+ inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
+ /*fresh slice for the next run*/
+ inf->cputime = 0;
+ /*accumulate total extratime*/
+ inf->extra_time_tot += now - inf->sched_start_abs;
+ /*remove extradomain from head of the queue*/
+ extraq_del(d, i);
+
+#if (EXTRA == EXTRA_ROUNDR)
+ if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
+ /*add to the tail if it is runnable => round-robin*/
+ extraq_add_tail(d, EXTRA_UTIL_Q);
+#elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
+ /*update the score*/
+ oldscore = inf->score[i];
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (i == EXTRA_PEN_Q) {
+ /*domain was running in L0 extraq*/
+ /*reduce block lost, probably more sophistication here!*/
+ /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
+ inf->short_block_lost_tot -= now - inf->sched_start_abs;
+ PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
+ inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
+ inf->short_block_lost_tot);
+ if (inf->short_block_lost_tot <= 0) {
+ PRINT(4,"Domain %i.%i compensated short block loss!\n",
+ inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
+ /*we have (over-)compensated our block penalty*/
+ inf->short_block_lost_tot = 0;
+ /*we don't want a place on the penalty queue anymore!*/
+ inf->status &= ~EXTRA_WANT_PEN_Q;
+ goto check_extra_queues;
+ }
+ /*we have to go again for another try in the block-extraq,
+ the score is not used incremantally here, as this is
+ already done by recalculating the block_lost*/
+ inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
+ inf->short_block_lost_tot;
+ oldscore = 0;
+ } else
+#endif
+ {
+ /*domain was running in L1 extraq => score is inverse of
+ utilization and is used somewhat incremental!*/
+ if (!inf->extraweight)
+ /*NB: use fixed point arithmetic with 10 bits*/
+ inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
+ inf->slice;
+ else
+ /*give a domain w/ exweight = 1 as much as a domain with
+ util = 1/128*/
+ inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
+ }
+ check_extra_queues:
+ /* Adding a runnable domain to the right queue and removing blocked ones*/
+ if (sedf_runnable(d)) {
+ /*add according to score: weighted round robin*/
+ if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
+ extraq_add_sort_update(d, i, oldscore);
+ }
+ else {
+ /*remove this blocked domain from the waitq!*/
+ __del_from_queue(d);
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ /*make sure that we remove a blocked domain from the other
+ extraq too*/
+ if (i == EXTRA_PEN_Q) {
+ if (extraq_on(d, EXTRA_UTIL_Q))
+ extraq_del(d, EXTRA_UTIL_Q);
+ }
+ else {
+ if (extraq_on(d, EXTRA_PEN_Q))
+ extraq_del(d, EXTRA_PEN_Q);
+ }
+#endif
+ }
+#endif
+ ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
+ ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
+ sedf_runnable(d)));
+}
+#endif
+
+static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
+ s_time_t end_xt, struct list_head *extraq[], int cpu) {
+ struct task_slice ret;
+ struct sedf_vcpu_info *runinf;
+
+ /* Enough time left to use for extratime? */
+ if (end_xt - now < EXTRA_QUANTUM)
+ goto return_idle;
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (!list_empty(extraq[EXTRA_PEN_Q])) {
+ /*we still have elements on the level 0 extraq
+ => let those run first!*/
+ runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
+ struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
+ runinf->status |= EXTRA_RUN_PEN;
+ ret.task = runinf->vcpu;
+ ret.time = EXTRA_QUANTUM;
+#ifdef SEDF_STATS
+ runinf->pen_extra_slices++;
+#endif
+ } else
+#endif
+ if (!list_empty(extraq[EXTRA_UTIL_Q])) {
+ /*use elements from the normal extraqueue*/
+ runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
+ struct sedf_vcpu_info, extralist[EXTRA_UTIL_Q]);
+ runinf->status |= EXTRA_RUN_UTIL;
+ ret.task = runinf->vcpu;
+ ret.time = EXTRA_QUANTUM;
+ }
+ else
+ goto return_idle;
+
+ ASSERT(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ return ret;
+
+ return_idle:
+ ret.task = IDLETASK(cpu);
+ ret.time = end_xt - now;
+ ASSERT(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ return ret;
+}
+/* Main scheduling function
+ Reasons for calling this function are:
+ -timeslice for the current period used up
+ -domain on waitqueue has started it's period
+ -and various others ;) in general: determine which domain to run next*/
+static struct task_slice sedf_do_schedule(s_time_t now)
+{
+ int cpu = current->processor;
+ struct list_head *runq = RUNQ(cpu);
+ struct list_head *waitq = WAITQ(cpu);
+#if (EXTRA > EXTRA_OFF)
+ struct sedf_vcpu_info *inf = EDOM_INFO(current);
+ struct list_head *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
+ EXTRAQ(cpu, EXTRA_UTIL_Q)};
+#endif
+ struct task_slice ret;
+ /*int i = 0;*/
+ /*idle tasks don't need any of the following stuf*/
+ if (is_idle_task(current->domain))
+ goto check_waitq;
+
+ /* create local state of the status of the domain, in order to avoid
+ inconsistent state during scheduling decisions, because data for
+ domain_runnable is not protected by the scheduling lock!*/
+ if(!domain_runnable(current))
+ inf->status |= SEDF_ASLEEP;
+
+ if (inf->status & SEDF_ASLEEP)
+ inf->block_abs = now;
+
+#if (EXTRA > EXTRA_OFF)
+ if (unlikely(extra_runs(inf))) {
+ /*special treatment of domains running in extra time*/
+ desched_extra_dom(now, current);
+ }
+ else
+#endif
+ {
+ desched_edf_dom(now, current);
+ }
+ check_waitq:
+ update_queues(now, runq, waitq);
+
+ /*now simply pick the first domain from the runqueue, which has the
+ earliest deadline, because the list is sorted*/
+ struct sedf_vcpu_info *runinf, *waitinf;
+
+ if (!list_empty(runq)) {
+ runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
+ ret.task = runinf->vcpu;
+ if (!list_empty(waitq)) {
+ waitinf = list_entry(waitq->next,
+ struct sedf_vcpu_info,list);
+ /*rerun scheduler, when scheduled domain reaches it's
+ end of slice or the first domain from the waitqueue
+ gets ready*/
+ ret.time = MIN(now + runinf->slice - runinf->cputime,
+ PERIOD_BEGIN(waitinf)) - now;
+ }
+ else {
+ ret.time = runinf->slice - runinf->cputime;
+ }
+ CHECK(ret.time > 0);
+ goto sched_done;
+ }
+
+ if (!list_empty(waitq)) {
+ waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
+ /*we could not find any suitable domain
+ => look for domains that are aware of extratime*/
+#if (EXTRA > EXTRA_OFF)
+ ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
+ extraq, cpu);
+#else
+ ret.task = IDLETASK(cpu);
+ ret.time = PERIOD_BEGIN(waitinf) - now;
+#endif
+ CHECK(ret.time > 0);
+ }
+ else {
+ /*this could probably never happen, but one never knows...*/
+ /*it can... imagine a second CPU, which is pure scifi ATM,
+ but one never knows ;)*/
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ }
+
+ sched_done:
+ /*TODO: Do something USEFUL when this happens and find out, why it
+ still can happen!!!*/
+ if (ret.time<0) {
+ printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
+ ret.time);
+ ret.time = EXTRA_QUANTUM;
+ }
+ EDOM_INFO(ret.task)->sched_start_abs = now;
+ CHECK(ret.time > 0);
+ ASSERT(sedf_runnable(ret.task));
+ CPU_INFO(cpu)->current_slice_expires = now + ret.time;
+ return ret;
+}
+
+static void sedf_sleep(struct vcpu *d) {
+ PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+
+ if (is_idle_task(d->domain))
+ return;
+
+ EDOM_INFO(d)->status |= SEDF_ASLEEP;
+
+ if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
+ cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+ }
+ else {
+ if ( __task_on_queue(d) )
+ __del_from_queue(d);
+#if (EXTRA > EXTRA_OFF)
+ if (extraq_on(d, EXTRA_UTIL_Q))
+ extraq_del(d, EXTRA_UTIL_Q);
+#endif
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (extraq_on(d, EXTRA_PEN_Q))
+ extraq_del(d, EXTRA_PEN_Q);
+#endif
+ }
+}
+
+/* This function wakes up a domain, i.e. moves them into the waitqueue
+ * things to mention are: admission control is taking place nowhere at
+ * the moment, so we can't be sure, whether it is safe to wake the domain
+ * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
+ * some considerations on when to allow the domain to wake up and have it's
+ * first deadline...
+ * I detected 3 cases, which could describe the possible behaviour of the
+ * scheduler,
+ * and I'll try to make them more clear:
+ *
+ * 1. Very conservative
+ * -when a blocked domain unblocks, it is allowed to start execution at
+ * the beginning of the next complete period
+ * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
+ *
+ * DRRB_____D__U_____DRRRRR___D________ ...
+ *
+ * -this causes the domain to miss a period (and a deadlline)
+ * -doesn't disturb the schedule at all
+ * -deadlines keep occuring isochronous
+ *
+ * 2. Conservative Part 1: Short Unblocking
+ * -when a domain unblocks in the same period as it was blocked it
+ * unblocks and may consume the rest of it's original time-slice minus
+ * the time it was blocked
+ * (assume period=9, slice=5)
+ *
+ * DRB_UR___DRRRRR___D...
+ *
+ * -this also doesn't disturb scheduling, but might lead to the fact, that
+ * the domain can't finish it's workload in the period
+ * -in addition to that the domain can be treated prioritised when
+ * extratime is available
+ * -addition: experiments hve shown that this may have a HUGE impact on
+ * performance of other domains, becaus it can lead to excessive context
+ * switches
+
+ * Part2: Long Unblocking
+ * Part 2a
+ * -it is obvious that such accounting of block time, applied when
+ * unblocking is happening in later periods, works fine aswell
+ * -the domain is treated as if it would have been running since the start
+ * of its new period
+ *
+ * DRB______D___UR___D...
+ *
+ * Part 2b
+ * -if one needs the full slice in the next period, it is necessary to
+ * treat the unblocking time as the start of the new period, i.e. move
+ * the deadline further back (later)
+ * -this doesn't disturb scheduling as well, because for EDF periods can
+ * be treated as minimal inter-release times and scheduling stays
+ * correct, when deadlines are kept relative to the time the process
+ * unblocks
+ *
+ * DRB______D___URRRR___D...<prev [Thread] next>
+ * (D) <- old deadline was here
+ * -problem: deadlines don't occur isochronous anymore
+ * Part 2c (Improved Atropos design)
+ * -when a domain unblocks it is given a very short period (=latency hint)
+ * and slice length scaled accordingly
+ * -both rise again to the original value (e.g. get doubled every period)
+ *
+ * 3. Unconservative (i.e. incorrect)
+ * -to boost the performance of I/O dependent domains it would be possible
+ * to put the domain into the runnable queue immediately, and let it run
+ * for the remainder of the slice of the current period
+ * (or even worse: allocate a new full slice for the domain)
+ * -either behaviour can lead to missed deadlines in other domains as
+ * opposed to approaches 1,2a,2b
+ */
+static inline void unblock_short_vcons
+(struct sedf_vcpu_info* inf, s_time_t now) {
+ inf->deadl_abs += inf->period;
+ inf->cputime = 0;
+}
+
+static inline void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
+{
+ /*treat blocked time as consumed by the domain*/
+ inf->cputime += now - inf->block_abs;
+ if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ /*we don't have a reasonable amount of time in
+ our slice left :( => start in next period!*/
+ unblock_short_vcons(inf, now);
+ }
+#ifdef SEDF_STATS
+ else
+ inf->short_cont++;
+#endif
+}
+static inline void unblock_short_extra_support (struct sedf_vcpu_info* inf,
+ s_time_t now) {
+ /*this unblocking scheme tries to support the domain, by assigning it
+ a priority in extratime distribution according to the loss of time
+ in this slice due to blocking*/
+ s_time_t pen;
+
+ /*no more realtime execution in this period!*/
+ inf->deadl_abs += inf->period;
+ if (likely(inf->block_abs)) {
+ //treat blocked time as consumed by the domain*/
+ /*inf->cputime += now - inf->block_abs;*/
+ /*penalty is time the domain would have
+ had if it continued to run */
+ pen = (inf->slice - inf->cputime);
+ if (pen < 0) pen = 0;
+ /*accumulate all penalties over the periods*/
+ /*inf->short_block_lost_tot += pen;*/
+ /*set penalty to the current value*/
+ inf->short_block_lost_tot = pen;
+ /*not sure which one is better.. but seems to work well...*/
+
+ if (inf->short_block_lost_tot) {
+ inf->score[0] = (inf->period << 10) /
+ inf->short_block_lost_tot;
+#ifdef SEDF_STATS
+ inf->pen_extra_blocks++;
+#endif
+ if (extraq_on(inf->vcpu, EXTRA_PEN_Q))
+ /*remove domain for possible resorting!*/
+ extraq_del(inf->vcpu, EXTRA_PEN_Q);
+ else
+ /*remember that we want to be on the penalty q
+ so that we can continue when we (un-)block
+ in penalty-extratime*/
+ inf->status |= EXTRA_WANT_PEN_Q;
+
+ /*(re-)add domain to the penalty extraq*/
+ extraq_add_sort_update(inf->vcpu,
+ EXTRA_PEN_Q, 0);
+ }
+ }
+ /*give it a fresh slice in the next period!*/
+ inf->cputime = 0;
+}
+static inline void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
+{
+ /* align to next future period */
+ inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
+ * inf->period;
+ inf->cputime = 0;
+}
+
+static inline void unblock_long_cons_a (struct sedf_vcpu_info* inf,
+ s_time_t now) {
+ /*treat the time the domain was blocked in the
+ CURRENT period as consumed by the domain*/
+ inf->cputime = (now - inf->deadl_abs) % inf->period;
+ if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
+ /*we don't have a reasonable amount of time in our slice
+ left :( => start in next period!*/
+ unblock_long_vcons(inf, now);
+ }
+}
+static inline void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now) {
+ /*Conservative 2b*/
+ /*Treat the unblocking time as a start of a new period */
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
+}
+static inline void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now) {
+ if (likely(inf->latency)) {
+ /*scale the slice and period accordingly to the latency hint*/
+ /*reduce period temporarily to the latency hint*/
+ inf->period = inf->latency;
+ /*this results in max. 4s slice/period length*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
+ }
+ else {
+ /*we don't have a latency hint.. use some other technique*/
+ unblock_long_cons_b(inf, now);
+ }
+}
+/*a new idea of dealing with short blocks: burst period scaling*/
+static inline void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
+{
+ /*treat blocked time as consumed by the domain*/
+ inf->cputime += now - inf->block_abs;
+
+ if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
+ /*if we can still use some time in the current slice
+ then use it!*/
+#ifdef SEDF_STATS
+ /*we let the domain run in the current period*/
+ inf->short_cont++;
+#endif
+ }
+ else {
+ /*we don't have a reasonable amount of time in
+ our slice left => switch to burst mode*/
+ if (likely(inf->unblock_abs)) {
+ /*set the period-length to the current blocking
+ interval, possible enhancements: average over last
+ blocking intervals, user-specified minimum,...*/
+ inf->period = now - inf->unblock_abs;
+ /*check for overflow on multiplication*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation
+ stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ /*set new (shorter) deadline*/
+ inf->deadl_abs += inf->period;
+ }
+ else {
+ /*in case we haven't unblocked before
+ start in next period!*/
+ inf->cputime=0;
+ inf->deadl_abs += inf->period;
+ }
+ }
+ inf->unblock_abs = now;
+}
+static inline void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now) {
+ if (unlikely(inf->latency && (inf->period > inf->latency))) {
+ /*scale the slice and period accordingly to the latency hint*/
+ inf->period = inf->latency;
+ /*check for overflows on multiplication*/
+ ASSERT((inf->period < ULONG_MAX)
+ && (inf->slice_orig < ULONG_MAX));
+ /*scale slice accordingly, so that utilisation stays the same*/
+ inf->slice = (inf->period * inf->slice_orig)
+ / inf->period_orig;
+ inf->deadl_abs = now + inf->period;
+ inf->cputime = 0;
+ }
+ else {
+ /*we don't have a latency hint.. or we are currently in
+ "burst mode": use some other technique
+ NB: this should be in fact the normal way of operation,
+ when we are in sync with the device!*/
+ unblock_long_cons_b(inf, now);
+ }
+ inf->unblock_abs = now;
+}
+
+#define DOMAIN_EDF 1
+#define DOMAIN_EXTRA_PEN 2
+#define DOMAIN_EXTRA_UTIL 3
+#define DOMAIN_IDLE 4
+static inline int get_run_type(struct vcpu* d) {
+ struct sedf_vcpu_info* inf = EDOM_INFO(d);
+ if (is_idle_task(d->domain))
+ return DOMAIN_IDLE;
+ if (inf->status & EXTRA_RUN_PEN)
+ return DOMAIN_EXTRA_PEN;
+ if (inf->status & EXTRA_RUN_UTIL)
+ return DOMAIN_EXTRA_UTIL;
+ return DOMAIN_EDF;
+}
+/*Compares two domains in the relation of whether the one is allowed to
+ interrupt the others execution.
+ It returns true (!=0) if a switch to the other domain is good.
+ Current Priority scheme is as follows:
+ EDF > L0 (penalty based) extra-time >
+ L1 (utilization) extra-time > idle-domain
+ In the same class priorities are assigned as following:
+ EDF: early deadline > late deadline
+ L0 extra-time: lower score > higher score*/
+static inline int should_switch(struct vcpu* cur,
+ struct vcpu* other, s_time_t now) {
+ struct sedf_vcpu_info *cur_inf, *other_inf;
+ cur_inf = EDOM_INFO(cur);
+ other_inf = EDOM_INFO(other);
+
+ /*check whether we need to make an earlier sched-decision*/
+ if (PERIOD_BEGIN(other_inf) <
+ CPU_INFO(other->processor)->current_slice_expires)
+ return 1;
+ /*no timing-based switches need to be taken into account here*/
+ switch (get_run_type(cur)) {
+ case DOMAIN_EDF:
+ /* do not interrupt a running EDF domain */
+ return 0;
+ case DOMAIN_EXTRA_PEN:
+ /*check whether we also want
+ the L0 ex-q with lower score*/
+ if ((other_inf->status & EXTRA_WANT_PEN_Q)
+ && (other_inf->score[EXTRA_PEN_Q] <
+ cur_inf->score[EXTRA_PEN_Q]))
+ return 1;
+ else return 0;
+ case DOMAIN_EXTRA_UTIL:
+ /*check whether we want the L0 extraq, don't
+ switch if both domains want L1 extraq */
+ if (other_inf->status & EXTRA_WANT_PEN_Q)
+ return 1;
+ else return 0;
+ case DOMAIN_IDLE:
+ return 1;
+ }
+ return 1;
+}
+void sedf_wake(struct vcpu *d) {
+ s_time_t now = NOW();
+ struct sedf_vcpu_info* inf = EDOM_INFO(d);
+
+ PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
+
+ if (unlikely(is_idle_task(d->domain)))
+ return;
+
+ if ( unlikely(__task_on_queue(d)) ) {
+ PRINT(3,"\tdomain %i.%i is already in some queue\n",
+ d->domain->domain_id, d->vcpu_id);
+ return;
+ }
+ ASSERT(!sedf_runnable(d));
+ inf->status &= ~SEDF_ASLEEP;
+ ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
+ ASSERT(!extraq_on(d, EXTRA_PEN_Q));
+
+ if (unlikely(inf->deadl_abs == 0))
+ /*initial setup of the deadline*/
+ inf->deadl_abs = now + inf->slice;
+
+ PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+ "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+ inf->period, now);
+#ifdef SEDF_STATS
+ inf->block_tot++;
+#endif
+ if (unlikely(now < PERIOD_BEGIN(inf))) {
+ PRINT(4,"extratime unblock\n");
+ /* unblocking in extra-time! */
+#if (EXTRA == EXTRA_BLOCK_WEIGHT)
+ if (inf->status & EXTRA_WANT_PEN_Q) {
+ /*we have a domain that wants compensation
+ for block penalty and did just block in
+ its compensation time. Give it another
+ chance!*/
+ extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
+ }
+#endif
+ extraq_check_add_unblocked(d, 0);
+ }
+ else {
+ if (now < inf->deadl_abs) {
+ PRINT(4,"short unblocking\n");
+ /*short blocking*/
+#ifdef SEDF_STATS
+ inf->short_block_tot++;
+#endif
+#if (UNBLOCK <= UNBLOCK_ATROPOS)
+ unblock_short_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+ unblock_short_cons(inf, now);
+#elif (UNBLOCK == UNBLOCK_BURST)
+ unblock_short_burst(inf, now);
+#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+ unblock_short_extra_support(inf, now);
+#endif
+
+ extraq_check_add_unblocked(d, 1);
+ }
+ else {
+ PRINT(4,"long unblocking\n");
+ /*long unblocking*/
+#ifdef SEDF_STATS
+ inf->long_block_tot++;
+#endif
+#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
+ unblock_long_vcons(inf, now);
+#elif (UNBLOCK == UNBLOCK_EDF \
+ || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
+ unblock_long_cons_b(inf, now);
+#elif (UNBLOCK == UNBLOCK_ATROPOS)
+ unblock_long_cons_c(inf, now);
+#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
+ unblock_long_cons_b(inf, now);
+ /*unblock_short_cons_c(inf, now);*/
+#elif (UNBLOCK == UNBLOCK_BURST)
+ unblock_long_burst(inf, now);
+#endif
+
+ extraq_check_add_unblocked(d, 1);
+ }
+ }
+ PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
+ "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
+ inf->period, now);
+ if (PERIOD_BEGIN(inf) > now) {
+ __add_to_waitqueue_sort(d);
+ PRINT(3,"added to waitq\n");
+ }
+ else {
+ __add_to_runqueue_sort(d);
+ PRINT(3,"added to runq\n");
+ }
+
+#ifdef SEDF_STATS
+ /*do some statistics here...*/
+ if (inf->block_abs != 0) {
+ inf->block_time_tot += now - inf->block_abs;
+ inf->penalty_time_tot +=
+ PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
+ }
+#endif
+ /*sanity check: make sure each extra-aware domain IS on the util-q!*/
+ ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
+ ASSERT(__task_on_queue(d));
+ /*check whether the awakened task needs to invoke the do_schedule
+ routine. Try to avoid unnecessary runs but:
+ Save approximation: Always switch to scheduler!*/
+ if (should_switch(schedule_data[d->processor].curr, d, now))
+ cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+}
+
+/*Print a lot of use-{full, less} information about a domains in the system*/
+static void sedf_dump_domain(struct vcpu *d) {
+ printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
+ test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
+ printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
+ EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
+ EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
+ (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
+ EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
+ if (d->cpu_time !=0)
+ printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
+ / d->cpu_time);
+#ifdef SEDF_STATS
+ if (EDOM_INFO(d)->block_time_tot!=0)
+ printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
+ EDOM_INFO(d)->block_time_tot);
+ if (EDOM_INFO(d)->block_tot!=0)
+ printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
+ "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
+ EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
+ (EDOM_INFO(d)->short_block_tot * 100)
+ / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
+ (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
+ EDOM_INFO(d)->pen_extra_blocks,
+ EDOM_INFO(d)->pen_extra_slices,
+ EDOM_INFO(d)->long_block_tot,
+ (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
+ (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
+ (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
+#endif
+ printf("\n");
+}
+
+/*dumps all domains on hte specified cpu*/
+static void sedf_dump_cpu_state(int i)
+{
+ struct list_head *list, *queue, *tmp;
+ struct sedf_vcpu_info *d_inf;
+ struct domain *d;
+ struct vcpu *ed;
+ int loop = 0;
+
+ printk("now=%"PRIu64"\n",NOW());
+ queue = RUNQ(i);
+ printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ printk("%3d: ",loop++);
+ d_inf = list_entry(list, struct sedf_vcpu_info, list);
+ sedf_dump_domain(d_inf->vcpu);
+ }
+
+ queue = WAITQ(i); loop = 0;
+ printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
+ (unsigned long) queue->next, (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ printk("%3d: ",loop++);
+ d_inf = list_entry(list, struct sedf_vcpu_info, list);
+ sedf_dump_domain(d_inf->vcpu);
+ }
+
+ queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
+ printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
+ (unsigned long)queue, (unsigned long) queue->next,
+ (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ d_inf = list_entry(list, struct sedf_vcpu_info,
+ extralist[EXTRA_PEN_Q]);
+ printk("%3d: ",loop++);
+ sedf_dump_domain(d_inf->vcpu);
+ }
+
+ queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
+ printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
+ (unsigned long)queue, (unsigned long) queue->next,
+ (unsigned long) queue->prev);
+ list_for_each_safe ( list, tmp, queue ) {
+ d_inf = list_entry(list, struct sedf_vcpu_info,
+ extralist[EXTRA_UTIL_Q]);
+ printk("%3d: ",loop++);
+ sedf_dump_domain(d_inf->vcpu);
+ }
+
+ loop = 0;
+ printk("\nnot on Q\n");
+ for_each_domain(d)
+ for_each_vcpu(d, ed)
+ {
+ if (!__task_on_queue(ed) && (ed->processor == i)) {
+ printk("%3d: ",loop++);
+ sedf_dump_domain(ed);
+ }
+ }
+}
+/*Adjusts periods and slices of the domains accordingly to their weights*/
+static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
+ struct vcpu *p;
+ struct domain *d;
+ int sumw[NR_CPUS];
+ s_time_t sumt[NR_CPUS];
+ int cpu;
+
+ for (cpu=0; cpu < NR_CPUS; cpu++) {
+ sumw[cpu] = 0;
+ sumt[cpu] = 0;
+ }
+ /*sum up all weights*/
+ for_each_domain(d)
+ for_each_vcpu(d, p) {
+ if (EDOM_INFO(p)->weight)
+ sumw[p->processor] += EDOM_INFO(p)->weight;
+ else {
+ /*don't modify domains who don't have a weight, but sum
+ up the time they need, projected to a WEIGHT_PERIOD,
+ so that this time is not given to the weight-driven
+ domains*/
+ /*check for overflows*/
+ ASSERT((WEIGHT_PERIOD < ULONG_MAX)
+ && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
+ sumt[p->processor] +=
+ (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
+ EDOM_INFO(p)->period_orig;
+ }
+ }
+ /*adjust all slices (and periods) to the new weight*/
+ for_each_domain(d)
+ for_each_vcpu(d, p) {
+ if (EDOM_INFO(p)->weight) {
+ EDOM_INFO(p)->period_orig =
+ EDOM_INFO(p)->period = WEIGHT_PERIOD;
+ EDOM_INFO(p)->slice_orig =
+ EDOM_INFO(p)->slice =
+ (EDOM_INFO(p)->weight *
+ (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
+ sumw[p->processor];
+ }
+ }
+ return 0;
+}
+
+/* set or fetch domain scheduling parameters */
+static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
+ struct vcpu *v;
+
+ PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
+ "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
+ p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
+ cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
+ if ( cmd->direction == SCHED_INFO_PUT )
+ {
+ /*check for sane parameters*/
+ if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
+ return -EINVAL;
+ if (cmd->u.sedf.weight) {
+ if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
+ (! cmd->u.sedf.period)) {
+ /*weight driven domains with xtime ONLY!*/
+ for_each_vcpu(p, v) {
+ EDOM_INFO(v)->extraweight = cmd->u.sedf.weight;
+ EDOM_INFO(v)->weight = 0;
+ EDOM_INFO(v)->slice = 0;
+ EDOM_INFO(v)->period = WEIGHT_PERIOD;
+ }
+ } else {
+ /*weight driven domains with real-time execution*/
+ for_each_vcpu(p, v)
+ EDOM_INFO(v)->weight = cmd->u.sedf.weight;
+ }
+ }
+ else {
+ /*time driven domains*/
+ for_each_vcpu(p, v) {
+ /* sanity checking! */
+ if(cmd->u.sedf.slice > cmd->u.sedf.period )
+ return -EINVAL;
+ EDOM_INFO(v)->weight = 0;
+ EDOM_INFO(v)->extraweight = 0;
+ EDOM_INFO(v)->period_orig =
+ EDOM_INFO(v)->period = cmd->u.sedf.period;
+ EDOM_INFO(v)->slice_orig =
+ EDOM_INFO(v)->slice = cmd->u.sedf.slice;
+ }
+ }
+ if (sedf_adjust_weights(cmd))
+ return -EINVAL;
+
+ for_each_vcpu(p, v) {
+ EDOM_INFO(v)->status =
+ (EDOM_INFO(v)->status &
+ ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
+ EDOM_INFO(v)->latency = cmd->u.sedf.latency;
+ extraq_check(v);
+ }
+ }
+ else if ( cmd->direction == SCHED_INFO_GET )
+ {
+ cmd->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
+ cmd->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
+ cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status
+ & EXTRA_AWARE;
+ cmd->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
+ cmd->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
+ }
+ PRINT(2,"sedf_adjdom_finished\n");
+ return 0;
+}
+
+struct scheduler sched_sedf_def = {
+ .name = "Simple EDF Scheduler",
+ .opt_name = "sedf",
+ .sched_id = SCHED_SEDF,
+
+ .alloc_task = sedf_alloc_task,
+ .add_task = sedf_add_task,
+ .free_task = sedf_free_task,
+ .do_schedule = sedf_do_schedule,
+ .dump_cpu_state = sedf_dump_cpu_state,
+ .sleep = sedf_sleep,
+ .wake = sedf_wake,
+ .adjdom = sedf_adjdom,
+};
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 0c437d8834..d3273a80e8 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1,5 +1,4 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
+/****************************************************************************
* (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
* (C) 2002-2003 University of Cambridge
* (C) 2004 - Mark Williamson - Intel Research Cambridge
@@ -14,10 +13,20 @@
*
*/
+/*#define WAKE_HISTO*/
+/*#define BLOCKTIME_HISTO*/
+
+#if defined(WAKE_HISTO)
+#define BUCKETS 31
+#elif defined(BLOCKTIME_HISTO)
+#define BUCKETS 200
+#endif
+
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/delay.h>
#include <xen/event.h>
#include <xen/time.h>
@@ -26,15 +35,13 @@
#include <xen/sched-if.h>
#include <xen/softirq.h>
#include <xen/trace.h>
+#include <xen/mm.h>
#include <public/sched_ctl.h>
/* opt_sched: scheduler - default to Borrowed Virtual Time */
static char opt_sched[10] = "bvt";
string_param("sched", opt_sched);
-/*#define WAKE_HISTO*/
-/*#define BLOCKTIME_HISTO*/
-
#if defined(WAKE_HISTO)
#define BUCKETS 31
#elif defined(BLOCKTIME_HISTO)
@@ -43,39 +50,19 @@ string_param("sched", opt_sched);
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
-/*
- * TODO MAW pull trace-related #defines out of here and into an auto-generated
- * header file later on!
- */
-#define TRC_SCHED_DOM_ADD 0x00010000
-#define TRC_SCHED_DOM_REM 0x00010001
-#define TRC_SCHED_WAKE 0x00010002
-#define TRC_SCHED_BLOCK 0x00010003
-#define TRC_SCHED_YIELD 0x00010004
-#define TRC_SCHED_SET_TIMER 0x00010005
-#define TRC_SCHED_CTL 0x00010006
-#define TRC_SCHED_ADJDOM 0x00010007
-#define TRC_SCHED_RESCHED 0x00010008
-#define TRC_SCHED_SWITCH 0x00010009
-#define TRC_SCHED_S_TIMER_FN 0x0001000A
-#define TRC_SCHED_T_TIMER_FN 0x0001000B
-#define TRC_SCHED_DOM_TIMER_FN 0x0001000C
-
/* Various timer handlers. */
-static void s_timer_fn(unsigned long unused);
-static void t_timer_fn(unsigned long unused);
-static void dom_timer_fn(unsigned long data);
+static void s_timer_fn(void *unused);
+static void t_timer_fn(void *unused);
+static void dom_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
-schedule_data_t schedule_data[NR_CPUS];
+struct schedule_data schedule_data[NR_CPUS];
extern struct scheduler sched_bvt_def;
-extern struct scheduler sched_rrobin_def;
-extern struct scheduler sched_atropos_def;
+extern struct scheduler sched_sedf_def;
static struct scheduler *schedulers[] = {
&sched_bvt_def,
- &sched_rrobin_def,
- &sched_atropos_def,
+ &sched_sedf_def,
NULL
};
@@ -92,118 +79,184 @@ static struct ac_timer t_timer[NR_CPUS];
void free_domain_struct(struct domain *d)
{
+ int i;
+
SCHED_OP(free_task, d);
- arch_free_domain_struct(d);
+ for (i = 0; i < MAX_VIRT_CPUS; i++)
+ if ( d->vcpu[i] )
+ arch_free_vcpu_struct(d->vcpu[i]);
+
+ xfree(d);
+}
+
+struct vcpu *alloc_vcpu_struct(
+ struct domain *d, unsigned long vcpu)
+{
+ struct vcpu *v, *vc;
+
+ ASSERT( d->vcpu[vcpu] == NULL );
+
+ if ( (v = arch_alloc_vcpu_struct()) == NULL )
+ return NULL;
+
+ memset(v, 0, sizeof(*v));
+
+ d->vcpu[vcpu] = v;
+ v->domain = d;
+ v->vcpu_id = vcpu;
+
+ if ( SCHED_OP(alloc_task, v) < 0 )
+ goto out;
+
+ if ( vcpu != 0 )
+ {
+ v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+
+ for_each_vcpu( d, vc )
+ {
+ if ( (vc->next_in_list == NULL) ||
+ (vc->next_in_list->vcpu_id > vcpu) )
+ break;
+ }
+ v->next_in_list = vc->next_in_list;
+ vc->next_in_list = v;
+
+ if (test_bit(_VCPUF_cpu_pinned, &vc->vcpu_flags)) {
+ v->processor = (vc->processor + 1) % num_online_cpus();
+ set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags);
+ } else {
+ v->processor = (vc->processor + 1) % num_online_cpus();
+ }
+ }
+
+ return v;
+
+ out:
+ d->vcpu[vcpu] = NULL;
+ arch_free_vcpu_struct(v);
+
+ return NULL;
}
struct domain *alloc_domain_struct(void)
{
struct domain *d;
- if ( (d = arch_alloc_domain_struct()) == NULL )
+ if ( (d = xmalloc(struct domain)) == NULL )
return NULL;
memset(d, 0, sizeof(*d));
- if ( SCHED_OP(alloc_task, d) < 0 )
- {
- arch_free_domain_struct(d);
- return NULL;
- }
+ if ( alloc_vcpu_struct(d, 0) == NULL )
+ goto out;
return d;
+
+ out:
+ xfree(d);
+ return NULL;
}
/*
* Add and remove a domain
*/
-void sched_add_domain(struct domain *d)
+void sched_add_domain(struct vcpu *v)
{
- /* Must be unpaused by control software to start execution. */
- set_bit(DF_CTRLPAUSE, &d->flags);
+ struct domain *d = v->domain;
+
+ /* Initialise the per-domain timer. */
+ init_ac_timer(&v->timer, dom_timer_fn, v, v->processor);
- if ( d->id != IDLE_DOMAIN_ID )
+ if ( is_idle_task(d) )
{
- /* Initialise the per-domain timer. */
- init_ac_timer(&d->timer);
- d->timer.cpu = d->processor;
- d->timer.data = (unsigned long)d;
- d->timer.function = &dom_timer_fn;
+ schedule_data[v->processor].curr = v;
+ schedule_data[v->processor].idle = v;
+ set_bit(_VCPUF_running, &v->vcpu_flags);
}
else
{
- schedule_data[d->processor].idle = d;
+ /* Must be unpaused by control software to start execution. */
+ set_bit(_VCPUF_ctrl_pause, &v->vcpu_flags);
}
- SCHED_OP(add_task, d);
-
- TRACE_2D(TRC_SCHED_DOM_ADD, d->id, d);
+ SCHED_OP(add_task, v);
+ TRACE_2D(TRC_SCHED_DOM_ADD, d->domain_id, v->vcpu_id);
}
-void sched_rem_domain(struct domain *d)
+void sched_rem_domain(struct vcpu *v)
{
- rem_ac_timer(&d->timer);
- SCHED_OP(rem_task, d);
- TRACE_2D(TRC_SCHED_DOM_REM, d->id, d);
+ rem_ac_timer(&v->timer);
+ SCHED_OP(rem_task, v);
+ TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
}
-void init_idle_task(void)
-{
- if ( SCHED_OP(init_idle_task, current) < 0 )
- BUG();
-}
-
-void domain_sleep(struct domain *d)
+void domain_sleep_nosync(struct vcpu *v)
{
unsigned long flags;
- spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
+ spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
+ if ( likely(!domain_runnable(v)) )
+ SCHED_OP(sleep, v);
+ spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
+
+ TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
+}
- if ( likely(!domain_runnable(d)) )
- SCHED_OP(sleep, d);
+void domain_sleep_sync(struct vcpu *v)
+{
+ domain_sleep_nosync(v);
- spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
-
- /* Synchronous. */
- while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) )
+ while ( test_bit(_VCPUF_running, &v->vcpu_flags) && !domain_runnable(v) )
cpu_relax();
+
+ if ( cpu_isset(v->processor, v->domain->cpumask) )
+ sync_lazy_execstate_cpu(v->processor);
}
-void domain_wake(struct domain *d)
+void domain_wake(struct vcpu *v)
{
unsigned long flags;
- spin_lock_irqsave(&schedule_data[d->processor].schedule_lock, flags);
-
- if ( likely(domain_runnable(d)) )
+ spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
+ if ( likely(domain_runnable(v)) )
{
- TRACE_2D(TRC_SCHED_WAKE, d->id, d);
- SCHED_OP(wake, d);
+ SCHED_OP(wake, v);
#ifdef WAKE_HISTO
- d->wokenup = NOW();
+ v->wokenup = NOW();
#endif
}
-
- clear_bit(DF_MIGRATED, &d->flags);
-
- spin_unlock_irqrestore(&schedule_data[d->processor].schedule_lock, flags);
+ clear_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ spin_unlock_irqrestore(&schedule_data[v->processor].schedule_lock, flags);
+
+ TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
}
/* Block the currently-executing domain until a pertinent event occurs. */
long do_block(void)
{
- ASSERT(current->id != IDLE_DOMAIN_ID);
- current->shared_info->vcpu_data[0].evtchn_upcall_mask = 0;
- set_bit(DF_BLOCKED, &current->flags);
- TRACE_2D(TRC_SCHED_BLOCK, current->id, current);
- __enter_scheduler();
+ struct vcpu *v = current;
+
+ v->vcpu_info->evtchn_upcall_mask = 0;
+ set_bit(_VCPUF_blocked, &v->vcpu_flags);
+
+ /* Check for events /after/ blocking: avoids wakeup waiting race. */
+ if ( event_pending(v) )
+ {
+ clear_bit(_VCPUF_blocked, &v->vcpu_flags);
+ }
+ else
+ {
+ TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
+ __enter_scheduler();
+ }
+
return 0;
}
/* Voluntarily yield the processor for this allocation. */
static long do_yield(void)
{
- TRACE_2D(TRC_SCHED_YIELD, current->id, current);
+ TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
__enter_scheduler();
return 0;
}
@@ -217,7 +270,6 @@ long do_sched_op(unsigned long op)
switch ( op & SCHEDOP_cmdmask )
{
-
case SCHEDOP_yield:
{
ret = do_yield();
@@ -232,6 +284,9 @@ long do_sched_op(unsigned long op)
case SCHEDOP_shutdown:
{
+ TRACE_3D(TRC_SCHED_SHUTDOWN,
+ current->domain->domain_id, current->vcpu_id,
+ (op >> SCHEDOP_reasonshift));
domain_shutdown((u8)(op >> SCHEDOP_reasonshift));
break;
}
@@ -244,19 +299,14 @@ long do_sched_op(unsigned long op)
}
/* Per-domain one-shot-timer hypercall. */
-long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo)
+long do_set_timer_op(s_time_t timeout)
{
- struct domain *p = current;
+ struct vcpu *v = current;
- rem_ac_timer(&p->timer);
-
- if ( (timeout_hi != 0) || (timeout_lo != 0) )
- {
- p->timer.expires = ((s_time_t)timeout_hi<<32) | ((s_time_t)timeout_lo);
- add_ac_timer(&p->timer);
- }
-
- TRACE_4D(TRC_SCHED_SET_TIMER, p->id, p, timeout_hi, timeout_lo);
+ if ( timeout == 0 )
+ rem_ac_timer(&v->timer);
+ else
+ set_ac_timer(&v->timer, timeout);
return 0;
}
@@ -269,12 +319,12 @@ int sched_id()
long sched_ctl(struct sched_ctl_cmd *cmd)
{
- TRACE_0D(TRC_SCHED_CTL);
-
if ( cmd->sched_id != ops.sched_id )
return -EINVAL;
- return SCHED_OP(control, cmd);
+ SCHED_OP(control, cmd);
+ TRACE_0D(TRC_SCHED_CTL);
+ return 0;
}
@@ -282,10 +332,22 @@ long sched_ctl(struct sched_ctl_cmd *cmd)
long sched_adjdom(struct sched_adjdom_cmd *cmd)
{
struct domain *d;
+ struct vcpu *v;
+ int cpu;
+#if NR_CPUS <=32
+ unsigned long have_lock;
+ #else
+ unsigned long long have_lock;
+#endif
+ int succ;
+ #define __set_cpu_bit(cpu, data) data |= ((typeof(data))1)<<cpu
+ #define __get_cpu_bit(cpu, data) (data & ((typeof(data))1)<<cpu)
+ #define __clear_cpu_bits(data) data = ((typeof(data))0)
+
if ( cmd->sched_id != ops.sched_id )
return -EINVAL;
-
+
if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
return -EINVAL;
@@ -293,12 +355,40 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
if ( d == NULL )
return -ESRCH;
- TRACE_1D(TRC_SCHED_ADJDOM, d->id);
-
- spin_lock_irq(&schedule_data[d->processor].schedule_lock);
+ /* acquire locks on all CPUs on which vcpus of this domain run */
+ do {
+ succ = 0;
+ __clear_cpu_bits(have_lock);
+ for_each_vcpu(d, v) {
+ cpu = v->processor;
+ if (!__get_cpu_bit(cpu, have_lock)) {
+ /* if we don't have a lock on this CPU: acquire it*/
+ if (spin_trylock(&schedule_data[cpu].schedule_lock)) {
+ /*we have this lock!*/
+ __set_cpu_bit(cpu, have_lock);
+ succ = 1;
+ } else {
+ /*we didn,t get this lock -> free all other locks too!*/
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (__get_cpu_bit(cpu, have_lock))
+ spin_unlock(&schedule_data[cpu].schedule_lock);
+ /* and start from the beginning! */
+ succ = 0;
+ /* leave the "for_each_domain_loop" */
+ break;
+ }
+ }
+ }
+ } while (!succ);
+ //spin_lock_irq(&schedule_data[d->vcpu[0]->processor].schedule_lock);
SCHED_OP(adjdom, d, cmd);
- spin_unlock_irq(&schedule_data[d->processor].schedule_lock);
+ //spin_unlock_irq(&schedule_data[d->vcpu[0]->processor].schedule_lock);
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (__get_cpu_bit(cpu, have_lock))
+ spin_unlock(&schedule_data[cpu].schedule_lock);
+ __clear_cpu_bits(have_lock);
+ TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
put_domain(d);
return 0;
}
@@ -310,31 +400,22 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
*/
static void __enter_scheduler(void)
{
- struct domain *prev = current, *next = NULL;
+ struct vcpu *prev = current, *next = NULL;
int cpu = prev->processor;
s_time_t now;
- task_slice_t next_slice;
+ struct task_slice next_slice;
s32 r_time; /* time for new dom to run */
perfc_incrc(sched_run);
spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
+
now = NOW();
rem_ac_timer(&schedule_data[cpu].s_timer);
ASSERT(!in_irq());
- if ( test_bit(DF_BLOCKED, &prev->flags) )
- {
- /* This check is needed to avoid a race condition. */
- if ( event_pending(prev) )
- clear_bit(DF_BLOCKED, &prev->flags);
- else
- SCHED_OP(do_block, prev);
- }
-
prev->cpu_time += now - prev->lastschd;
/* get policy-specific decision on scheduling... */
@@ -347,21 +428,17 @@ static void __enter_scheduler(void)
next->lastschd = now;
- /* reprogramm the timer */
- schedule_data[cpu].s_timer.expires = now + r_time;
- add_ac_timer(&schedule_data[cpu].s_timer);
+ set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
/* Must be protected by the schedule_lock! */
- set_bit(DF_RUNNING, &next->flags);
+ set_bit(_VCPUF_running, &next->vcpu_flags);
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
if ( unlikely(prev == next) )
- return;
-
- perfc_incrc(sched_ctx);
+ return continue_running(prev);
- cleanup_writable_pagetable(prev);
+ perfc_incrc(sched_ctx);
#if defined(WAKE_HISTO)
if ( !is_idle_task(next->domain) && next->wokenup ) {
@@ -381,31 +458,27 @@ static void __enter_scheduler(void)
}
#endif
- TRACE_2D(TRC_SCHED_SWITCH, next->id, next);
-
- switch_to(prev, next);
-
- /*
- * We do this late on because it doesn't need to be protected by the
- * schedule_lock, and because we want this to be the very last use of
- * 'prev' (after this point, a dying domain's info structure may be freed
- * without warning).
- */
- clear_bit(DF_RUNNING, &prev->flags);
+ prev->sleep_tick = schedule_data[cpu].tick;
/* Ensure that the domain has an up-to-date time base. */
- if ( !is_idle_task(next) && update_dom_time(next) )
- send_guest_virq(next, VIRQ_TIMER);
+ if ( !is_idle_task(next->domain) )
+ {
+ update_dom_time(next);
+ if ( next->sleep_tick != schedule_data[cpu].tick )
+ send_guest_virq(next, VIRQ_TIMER);
+ }
- schedule_tail(next);
+ TRACE_4D(TRC_SCHED_SWITCH,
+ prev->domain->domain_id, prev->vcpu_id,
+ next->domain->domain_id, next->vcpu_id);
- BUG();
+ context_switch(prev, next);
}
/* No locking needed -- pointer comparison is safe :-) */
int idle_cpu(int cpu)
{
- struct domain *p = schedule_data[cpu].curr;
+ struct vcpu *p = schedule_data[cpu].curr;
return p == idle_task[cpu];
}
@@ -417,37 +490,39 @@ int idle_cpu(int cpu)
* - dom_timer: per domain timer to specifiy timeout values
****************************************************************************/
-/* The scheduler timer: force a run through the scheduler*/
-static void s_timer_fn(unsigned long unused)
+/* The scheduler timer: force a run through the scheduler */
+static void s_timer_fn(void *unused)
{
- TRACE_0D(TRC_SCHED_S_TIMER_FN);
raise_softirq(SCHEDULE_SOFTIRQ);
perfc_incrc(sched_irq);
}
-/* Periodic tick timer: send timer event to current domain*/
-static void t_timer_fn(unsigned long unused)
+/* Periodic tick timer: send timer event to current domain */
+static void t_timer_fn(void *unused)
{
- struct domain *d = current;
+ struct vcpu *v = current;
+ unsigned int cpu = v->processor;
- TRACE_0D(TRC_SCHED_T_TIMER_FN);
+ schedule_data[cpu].tick++;
- if ( !is_idle_task(d) && update_dom_time(d) )
- send_guest_virq(d, VIRQ_TIMER);
+ if ( !is_idle_task(v->domain) )
+ {
+ update_dom_time(v);
+ send_guest_virq(v, VIRQ_TIMER);
+ }
page_scrub_schedule_work();
- t_timer[d->processor].expires = NOW() + MILLISECS(10);
- add_ac_timer(&t_timer[d->processor]);
+ set_ac_timer(&t_timer[cpu], NOW() + MILLISECS(10));
}
/* Domain timer function, sends a virtual timer interrupt to domain */
-static void dom_timer_fn(unsigned long data)
+static void dom_timer_fn(void *data)
{
- struct domain *d = (struct domain *)data;
- TRACE_0D(TRC_SCHED_DOM_TIMER_FN);
- (void)update_dom_time(d);
- send_guest_virq(d, VIRQ_TIMER);
+ struct vcpu *v = data;
+
+ update_dom_time(v);
+ send_guest_virq(v, VIRQ_TIMER);
}
/* Initialise the data structures. */
@@ -460,20 +535,12 @@ void __init scheduler_init(void)
for ( i = 0; i < NR_CPUS; i++ )
{
spin_lock_init(&schedule_data[i].schedule_lock);
- schedule_data[i].curr = &idle0_task;
-
- init_ac_timer(&schedule_data[i].s_timer);
- schedule_data[i].s_timer.cpu = i;
- schedule_data[i].s_timer.data = 2;
- schedule_data[i].s_timer.function = &s_timer_fn;
-
- init_ac_timer(&t_timer[i]);
- t_timer[i].cpu = i;
- t_timer[i].data = 3;
- t_timer[i].function = &t_timer_fn;
+ init_ac_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i);
+ init_ac_timer(&t_timer[i], t_timer_fn, NULL, i);
}
- schedule_data[0].idle = &idle0_task;
+ schedule_data[0].curr = idle_task[0];
+ schedule_data[0].idle = idle_task[0];
for ( i = 0; schedulers[i] != NULL; i++ )
{
@@ -487,8 +554,8 @@ void __init scheduler_init(void)
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- if ( SCHED_OP(init_scheduler) < 0 )
- panic("Initialising scheduler failed!");
+ BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0);
+ sched_add_domain(idle_task[0]);
}
/*
@@ -497,14 +564,10 @@ void __init scheduler_init(void)
*/
void schedulers_start(void)
{
- s_timer_fn(0);
- smp_call_function((void *)s_timer_fn, NULL, 1, 1);
-
t_timer_fn(0);
smp_call_function((void *)t_timer_fn, NULL, 1, 1);
}
-
void dump_runq(unsigned char key)
{
s_time_t now = NOW();
@@ -517,7 +580,7 @@ void dump_runq(unsigned char key)
SCHED_OP(dump_settings);
printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu ( i )
{
spin_lock(&schedule_data[i].schedule_lock);
printk("CPU[%02d] ", i);
@@ -529,10 +592,11 @@ void dump_runq(unsigned char key)
}
#if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
+
void print_sched_histo(unsigned char key)
{
int i, j, k;
- for ( k = 0; k < smp_num_cpus; k++ )
+ for_each_online_cpu ( k )
{
j = 0;
printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
@@ -552,14 +616,28 @@ void print_sched_histo(unsigned char key)
}
}
+
void reset_sched_histo(unsigned char key)
{
int i, j;
- for ( j = 0; j < smp_num_cpus; j++ )
+ for ( j = 0; j < NR_CPUS; j++ )
for ( i=0; i < BUCKETS; i++ )
schedule_data[j].hist[i] = 0;
}
+
#else
+
void print_sched_histo(unsigned char key) { }
void reset_sched_histo(unsigned char key) { }
+
#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/slab.c b/xen/common/slab.c
deleted file mode 100644
index cfbf402261..0000000000
--- a/xen/common/slab.c
+++ /dev/null
@@ -1,1844 +0,0 @@
-/*
- * linux/mm/slab.c
- * Written by Mark Hemment, 1996/97.
- * (markhe@nextd.demon.co.uk)
- *
- * xmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
- *
- * Major cleanup, different bufctl logic, per-cpu arrays
- * (c) 2000 Manfred Spraul
- *
- * An implementation of the Slab Allocator as described in outline in;
- * UNIX Internals: The New Frontiers by Uresh Vahalia
- * Pub: Prentice Hall ISBN 0-13-101908-2
- * or with a little more detail in;
- * The Slab Allocator: An Object-Caching Kernel Memory Allocator
- * Jeff Bonwick (Sun Microsystems).
- * Presented at: USENIX Summer 1994 Technical Conference
- *
- *
- * The memory is organized in caches, one cache for each object type.
- * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
- * Each cache consists out of many slabs (they are small (usually one
- * page long) and always contiguous), and each slab contains multiple
- * initialized objects.
- *
- * In order to reduce fragmentation, the slabs are sorted in 3 groups:
- * full slabs with 0 free objects
- * partial slabs
- * empty slabs with no allocated objects
- *
- * If partial slabs exist, then new allocations come from these slabs,
- * otherwise from empty slabs or new slabs are allocated.
- *
- * xmem_cache_destroy() CAN CRASH if you try to allocate from the cache
- * during xmem_cache_destroy(). The caller must prevent concurrent allocs.
- *
- * On SMP systems, each cache has a short per-cpu head array, most allocs
- * and frees go into that array, and if that array overflows, then 1/2
- * of the entries in the array are given back into the global cache.
- * This reduces the number of spinlock operations.
- *
- * The c_cpuarray may not be read with enabled local interrupts.
- *
- * SMP synchronization:
- * constructors and destructors are called without any locking.
- * Several members in xmem_cache_t and slab_t never change, they
- * are accessed without any locking.
- * The per-cpu arrays are never accessed from the wrong cpu, no locking.
- * The non-constant members are protected with a per-cache irq spinlock.
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <xen/slab.h>
-#include <xen/list.h>
-#include <xen/spinlock.h>
-#include <xen/errno.h>
-#include <xen/smp.h>
-#include <xen/sched.h>
-
-/*
- * DEBUG - 1 for xmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- * SLAB_RED_ZONE & SLAB_POISON.
- * 0 for faster, smaller code (especially in the critical paths).
- *
- * STATS - 1 to collect stats for /proc/slabinfo.
- * 0 for faster, smaller code (especially in the critical paths).
- *
- * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
- */
-#ifdef CONFIG_DEBUG_SLAB
-#define DEBUG 1
-#define STATS 1
-#define FORCED_DEBUG 1
-#else
-#define DEBUG 0
-#define STATS 0
-#define FORCED_DEBUG 0
-#endif
-
-/*
- * Parameters for xmem_cache_reap
- */
-#define REAP_SCANLEN 10
-#define REAP_PERFECT 10
-
-/* Shouldn't this be in a header file somewhere? */
-#define BYTES_PER_WORD sizeof(void *)
-
-/* Legal flag mask for xmem_cache_create(). */
-#if DEBUG
-#define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
- SLAB_POISON | SLAB_HWCACHE_ALIGN | \
- SLAB_NO_REAP)
-#else
-#define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP)
-#endif
-
-/*
- * xmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementaion relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-#define BUFCTL_END (((xmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE (((xmem_bufctl_t)(~0U))-1)
-#define SLAB_LIMIT (((xmem_bufctl_t)(~0U))-2)
-
-/* Max number of objs-per-slab for caches which use off-slab slabs.
- * Needed to avoid a possible looping condition in xmem_cache_grow().
- */
-static unsigned long offslab_limit;
-
-/*
- * slab_t
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-typedef struct slab_s {
- struct list_head list;
- unsigned long colouroff;
- void *s_mem; /* including colour offset */
- unsigned int inuse; /* num of objs active in slab */
- xmem_bufctl_t free;
-} slab_t;
-
-#define slab_bufctl(slabp) \
- ((xmem_bufctl_t *)(((slab_t*)slabp)+1))
-
-/*
- * cpucache_t
- *
- * Per cpu structures
- * The limit is stored in the per-cpu structure to reduce the data cache
- * footprint.
- */
-typedef struct cpucache_s {
- unsigned int avail;
- unsigned int limit;
-} cpucache_t;
-
-#define cc_entry(cpucache) \
- ((void **)(((cpucache_t*)(cpucache))+1))
-#define cc_data(cachep) \
- ((cachep)->cpudata[smp_processor_id()])
-/*
- * xmem_cache_t
- *
- * manages a cache.
- */
-
-#define CACHE_NAMELEN 20 /* max name length for a slab cache */
-
-struct xmem_cache_s {
-/* 1) each alloc & free */
- /* full, partial first, then free */
- struct list_head slabs_full;
- struct list_head slabs_partial;
- struct list_head slabs_free;
- unsigned int objsize;
- unsigned int flags; /* constant flags */
- unsigned int num; /* # of objs per slab */
- spinlock_t spinlock;
-#ifdef CONFIG_SMP
- unsigned int batchcount;
-#endif
-
-/* 2) slab additions /removals */
- /* order of pgs per slab (2^n) */
- unsigned int gfporder;
- size_t colour; /* cache colouring range */
- unsigned int colour_off; /* colour offset */
- unsigned int colour_next; /* cache colouring */
- xmem_cache_t *slabp_cache;
- unsigned int growing;
- unsigned int dflags; /* dynamic flags */
-
- /* constructor func */
- void (*ctor)(void *, xmem_cache_t *, unsigned long);
-
- /* de-constructor func */
- void (*dtor)(void *, xmem_cache_t *, unsigned long);
-
- unsigned long failures;
-
-/* 3) cache creation/removal */
- char name[CACHE_NAMELEN];
- struct list_head next;
-#ifdef CONFIG_SMP
-/* 4) per-cpu data */
- cpucache_t *cpudata[NR_CPUS];
-#endif
-#if STATS
- unsigned long num_active;
- unsigned long num_allocations;
- unsigned long high_mark;
- unsigned long grown;
- unsigned long reaped;
- unsigned long errors;
-#ifdef CONFIG_SMP
- atomic_t allochit;
- atomic_t allocmiss;
- atomic_t freehit;
- atomic_t freemiss;
-#endif
-#endif
-};
-
-/* internal c_flags */
-#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
-#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
-
-/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
-#define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
-
-#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
-#define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
-#define GROWN(x) ((x)->dlags & DFLGS_GROWN)
-
-#if STATS
-#define STATS_INC_ACTIVE(x) ((x)->num_active++)
-#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
-#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
-#define STATS_INC_GROWN(x) ((x)->grown++)
-#define STATS_INC_REAPED(x) ((x)->reaped++)
-#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
- (x)->high_mark = (x)->num_active; \
- } while (0)
-#define STATS_INC_ERR(x) ((x)->errors++)
-#else
-#define STATS_INC_ACTIVE(x) do { } while (0)
-#define STATS_DEC_ACTIVE(x) do { } while (0)
-#define STATS_INC_ALLOCED(x) do { } while (0)
-#define STATS_INC_GROWN(x) do { } while (0)
-#define STATS_INC_REAPED(x) do { } while (0)
-#define STATS_SET_HIGH(x) do { } while (0)
-#define STATS_INC_ERR(x) do { } while (0)
-#endif
-
-#if STATS && defined(CONFIG_SMP)
-#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
-#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
-#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
-#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
-#else
-#define STATS_INC_ALLOCHIT(x) do { } while (0)
-#define STATS_INC_ALLOCMISS(x) do { } while (0)
-#define STATS_INC_FREEHIT(x) do { } while (0)
-#define STATS_INC_FREEMISS(x) do { } while (0)
-#endif
-
-#if DEBUG
-/* Magic nums for obj red zoning.
- * Placed in the first word before and the first word after an obj.
- */
-#define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
-#define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
-
-/* ...and for poisoning */
-#define POISON_BYTE 0x5a /* byte value for poisoning */
-#define POISON_END 0xa5 /* end-byte of poisoning */
-
-#endif
-
-/* maximum size of an obj (in 2^order pages) */
-#define MAX_OBJ_ORDER 5 /* 32 pages */
-
-/*
- * Do not go above this order unless 0 objects fit into the slab.
- */
-#define BREAK_GFP_ORDER_HI 2
-#define BREAK_GFP_ORDER_LO 1
-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
-
-/*
- * Absolute limit for the gfp order
- */
-#define MAX_GFP_ORDER 5 /* 32 pages */
-
-
-/* Macros for storing/retrieving the cachep and or slab from the
- * global 'mem_map'. These are used to find the slab an obj belongs to.
- * With xfree(), these are used to find the cache which an obj belongs to.
- */
-#define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
-#define GET_PAGE_CACHE(pg) ((xmem_cache_t *)(pg)->list.next)
-#define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
-#define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
-
-/* Size description struct for general caches. */
-typedef struct cache_sizes {
- size_t cs_size;
- xmem_cache_t *cs_cachep;
-} cache_sizes_t;
-
-static cache_sizes_t cache_sizes[] = {
- { 32, NULL},
- { 64, NULL},
- { 128, NULL},
- { 256, NULL},
- { 512, NULL},
- { 1024, NULL},
- { 2048, NULL},
- { 4096, NULL},
- { 8192, NULL},
- { 16384, NULL},
- { 32768, NULL},
- { 65536, NULL},
- { 0, NULL}
-};
-
-/* internal cache of cache description objs */
-static xmem_cache_t cache_cache = {
- slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full),
- slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
- slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free),
- objsize: sizeof(xmem_cache_t),
- flags: SLAB_NO_REAP,
- spinlock: SPIN_LOCK_UNLOCKED,
- colour_off: L1_CACHE_BYTES,
- name: "xmem_cache"
-};
-
-/* Guard access to the cache-chain. */
-/* KAF: No semaphores, as we'll never wait around for I/O. */
-static spinlock_t cache_chain_sem;
-#define init_MUTEX(_m) spin_lock_init(_m)
-#define down(_m) spin_lock_irqsave(_m,spin_flags)
-#define up(_m) spin_unlock_irqrestore(_m,spin_flags)
-
-/* Place maintainer for reaping. */
-static xmem_cache_t *clock_searchp = &cache_cache;
-
-#define cache_chain (cache_cache.next)
-
-#ifdef CONFIG_SMP
-/*
- * chicken and egg problem: delay the per-cpu array allocation
- * until the general caches are up.
- */
-static int g_cpucache_up;
-
-static void enable_cpucache (xmem_cache_t *cachep);
-static void enable_all_cpucaches (void);
-#endif
-
-/* Cal the num objs, wastage, and bytes left over for a given slab size. */
-static void xmem_cache_estimate (unsigned long gfporder, size_t size,
- int flags, size_t *left_over, unsigned int *num)
-{
- int i;
- size_t wastage = PAGE_SIZE<<gfporder;
- size_t extra = 0;
- size_t base = 0;
-
- if (!(flags & CFLGS_OFF_SLAB)) {
- base = sizeof(slab_t);
- extra = sizeof(xmem_bufctl_t);
- }
- i = 0;
- while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
- i++;
- if (i > 0)
- i--;
-
- if (i > SLAB_LIMIT)
- i = SLAB_LIMIT;
-
- *num = i;
- wastage -= i*size;
- wastage -= L1_CACHE_ALIGN(base+i*extra);
- *left_over = wastage;
-}
-
-/* Initialisation - setup the `cache' cache. */
-void __init xmem_cache_init(void)
-{
- size_t left_over;
-
- init_MUTEX(&cache_chain_sem);
- INIT_LIST_HEAD(&cache_chain);
-
- xmem_cache_estimate(0, cache_cache.objsize, 0,
- &left_over, &cache_cache.num);
- if (!cache_cache.num)
- BUG();
-
- cache_cache.colour = left_over/cache_cache.colour_off;
- cache_cache.colour_next = 0;
-}
-
-
-/* Initialisation - setup remaining internal and general caches.
- * Called after the gfp() functions have been enabled, and before smp_init().
- */
-void __init xmem_cache_sizes_init(unsigned long num_physpages)
-{
- cache_sizes_t *sizes = cache_sizes;
- char name[20];
- /*
- * Fragmentation resistance on low memory - only use bigger
- * page orders on machines with more than 32MB of memory.
- */
- if (num_physpages > (32 << 20) >> PAGE_SHIFT)
- slab_break_gfp_order = BREAK_GFP_ORDER_HI;
- do {
- /* For performance, all the general caches are L1 aligned.
- * This should be particularly beneficial on SMP boxes, as it
- * eliminates "false sharing".
- * Note for systems short on memory removing the alignment will
- * allow tighter packing of the smaller caches. */
- sprintf(name,"size-%Zd",sizes->cs_size);
- if (!(sizes->cs_cachep =
- xmem_cache_create(name, sizes->cs_size,
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
- BUG();
- }
-
- /* Inc off-slab bufctl limit until the ceiling is hit. */
- if (!(OFF_SLAB(sizes->cs_cachep))) {
- offslab_limit = sizes->cs_size-sizeof(slab_t);
- offslab_limit /= 2;
- }
- sizes++;
- } while (sizes->cs_size);
-}
-
-int __init xmem_cpucache_init(void)
-{
-#ifdef CONFIG_SMP
- g_cpucache_up = 1;
- enable_all_cpucaches();
-#endif
- return 0;
-}
-
-/*__initcall(xmem_cpucache_init);*/
-
-/* Interface to system's page allocator. No need to hold the cache-lock.
- */
-static inline void *xmem_getpages(xmem_cache_t *cachep)
-{
- void *addr;
-
- addr = (void*) alloc_xenheap_pages(cachep->gfporder);
- /* Assume that now we have the pages no one else can legally
- * messes with the 'struct page's.
- * However vm_scan() might try to test the structure to see if
- * it is a named-page or buffer-page. The members it tests are
- * of no interest here.....
- */
- return addr;
-}
-
-/* Interface to system's page release. */
-static inline void xmem_freepages (xmem_cache_t *cachep, void *addr)
-{
- unsigned long i = (1<<cachep->gfporder);
- struct pfn_info *page = virt_to_page(addr);
-
- /* free_xenheap_pages() does not clear the type bit - we do that.
- * The pages have been unlinked from their cache-slab,
- * but their 'struct page's might be accessed in
- * vm_scan(). Shouldn't be a worry.
- */
- while (i--) {
- PageClearSlab(page);
- page++;
- }
-
- free_xenheap_pages((unsigned long)addr, cachep->gfporder);
-}
-
-#if DEBUG
-static inline void xmem_poison_obj (xmem_cache_t *cachep, void *addr)
-{
- int size = cachep->objsize;
- if (cachep->flags & SLAB_RED_ZONE) {
- addr += BYTES_PER_WORD;
- size -= 2*BYTES_PER_WORD;
- }
- memset(addr, POISON_BYTE, size);
- *(unsigned char *)(addr+size-1) = POISON_END;
-}
-
-static inline int xmem_check_poison_obj (xmem_cache_t *cachep, void *addr)
-{
- int size = cachep->objsize;
- void *end;
- if (cachep->flags & SLAB_RED_ZONE) {
- addr += BYTES_PER_WORD;
- size -= 2*BYTES_PER_WORD;
- }
- end = memchr(addr, POISON_END, size);
- if (end != (addr+size-1))
- return 1;
- return 0;
-}
-#endif
-
-/* Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.
- * The cache-lock is not held/needed.
- */
-static void xmem_slab_destroy (xmem_cache_t *cachep, slab_t *slabp)
-{
- if (cachep->dtor
-#if DEBUG
- || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
-#endif
- ) {
- int i;
- for (i = 0; i < cachep->num; i++) {
- void* objp = slabp->s_mem+cachep->objsize*i;
-#if DEBUG
- if (cachep->flags & SLAB_RED_ZONE) {
- if (*((unsigned long*)(objp)) != RED_MAGIC1)
- BUG();
- if (*((unsigned long*)(objp + cachep->objsize
- -BYTES_PER_WORD)) != RED_MAGIC1)
- BUG();
- objp += BYTES_PER_WORD;
- }
-#endif
- if (cachep->dtor)
- (cachep->dtor)(objp, cachep, 0);
-#if DEBUG
- if (cachep->flags & SLAB_RED_ZONE) {
- objp -= BYTES_PER_WORD;
- }
- if ((cachep->flags & SLAB_POISON) &&
- xmem_check_poison_obj(cachep, objp))
- BUG();
-#endif
- }
- }
-
- xmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
- if (OFF_SLAB(cachep))
- xmem_cache_free(cachep->slabp_cache, slabp);
-}
-
-/**
- * xmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
- * @size: The size of objects to be created in this cache.
- * @offset: The offset to use within the page.
- * @flags: SLAB flags
- * @ctor: A constructor for the objects.
- * @dtor: A destructor for the objects.
- *
- * Returns a ptr to the cache on success, NULL on failure.
- * Cannot be called within a int, but can be interrupted.
- * The @ctor is run when new pages are allocated by the cache
- * and the @dtor is run before the pages are handed back.
- * The flags are
- *
- * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
- * to catch references to uninitialised memory.
- *
- * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
- * for buffer overruns.
- *
- * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
- * memory pressure.
- *
- * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
- * cacheline. This can be beneficial if you're counting cycles as closely
- * as davem.
- */
-xmem_cache_t *
-xmem_cache_create (const char *name, size_t size, size_t offset,
- unsigned long flags,
- void (*ctor)(void*, xmem_cache_t *, unsigned long),
- void (*dtor)(void*, xmem_cache_t *, unsigned long))
-{
- const char *func_nm = KERN_ERR "xmem_create: ";
- size_t left_over, align, slab_size;
- xmem_cache_t *cachep = NULL;
- unsigned long spin_flags;
-
- /*
- * Sanity checks... these are all serious usage bugs.
- */
- if ((!name) ||
- ((strlen(name) >= CACHE_NAMELEN - 1)) ||
- (size < BYTES_PER_WORD) ||
- (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
- (dtor && !ctor) ||
- (offset < 0 || offset > size))
- BUG();
-
-#if DEBUG
- if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
- /* No constructor, but inital state check requested */
- printk("%sNo con, but init state check requested - %s\n",
- func_nm, name);
- flags &= ~SLAB_DEBUG_INITIAL;
- }
-
- if ((flags & SLAB_POISON) && ctor) {
- /* request for poisoning, but we can't do that with a constructor */
- printk("%sPoisoning requested, but con given - %s\n",
- func_nm, name);
- flags &= ~SLAB_POISON;
- }
-#if FORCED_DEBUG
- if (size < (PAGE_SIZE>>3))
- /*
- * do not red zone large object, causes severe
- * fragmentation.
- */
- flags |= SLAB_RED_ZONE;
- if (!ctor)
- flags |= SLAB_POISON;
-#endif
-#endif
-
- /*
- * Always checks flags, a caller might be expecting debug
- * support which isn't available.
- */
- if (flags & ~CREATE_MASK)
- BUG();
-
- /* Get cache's description obj. */
- cachep = (xmem_cache_t *)xmem_cache_alloc(&cache_cache);
- if (!cachep)
- goto opps;
- memset(cachep, 0, sizeof(xmem_cache_t));
-
- /* Check that size is in terms of words. This is needed to avoid
- * unaligned accesses for some archs when redzoning is used, and makes
- * sure any on-slab bufctl's are also correctly aligned.
- */
- if (size & (BYTES_PER_WORD-1)) {
- size += (BYTES_PER_WORD-1);
- size &= ~(BYTES_PER_WORD-1);
- printk("%sForcing size word alignment - %s\n", func_nm, name);
- }
-
-#if DEBUG
- if (flags & SLAB_RED_ZONE) {
- /*
- * There is no point trying to honour cache alignment
- * when redzoning.
- */
- flags &= ~SLAB_HWCACHE_ALIGN;
- size += 2*BYTES_PER_WORD; /* words for redzone */
- }
-#endif
- align = BYTES_PER_WORD;
- if (flags & SLAB_HWCACHE_ALIGN)
- align = L1_CACHE_BYTES;
-
- /* Determine if the slab management is 'on' or 'off' slab. */
- if (size >= (PAGE_SIZE>>3))
- /*
- * Size is large, assume best to place the slab management obj
- * off-slab (should allow better packing of objs).
- */
- flags |= CFLGS_OFF_SLAB;
-
- if (flags & SLAB_HWCACHE_ALIGN) {
- /* Need to adjust size so that objs are cache aligned. */
- /* Small obj size, can get at least two per cache line. */
- /* FIXME: only power of 2 supported, was better */
- while (size < align/2)
- align /= 2;
- size = (size+align-1)&(~(align-1));
- }
-
- /* Cal size (in pages) of slabs, and the num of objs per slab.
- * This could be made much more intelligent. For now, try to avoid
- * using high page-orders for slabs. When the gfp() funcs are more
- * friendly towards high-order requests, this should be changed.
- */
- do {
- unsigned int break_flag = 0;
- cal_wastage:
- xmem_cache_estimate(cachep->gfporder, size, flags,
- &left_over, &cachep->num);
- if (break_flag)
- break;
- if (cachep->gfporder >= MAX_GFP_ORDER)
- break;
- if (!cachep->num)
- goto next;
- if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
- /* Oops, this num of objs will cause problems. */
- cachep->gfporder--;
- break_flag++;
- goto cal_wastage;
- }
-
- /*
- * Large num of objs is good, but v. large slabs are currently
- * bad for the gfp()s.
- */
- if (cachep->gfporder >= slab_break_gfp_order)
- break;
-
- if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
- break; /* Acceptable internal fragmentation. */
- next:
- cachep->gfporder++;
- } while (1);
-
- if (!cachep->num) {
- printk("xmem_cache_create: couldn't create cache %s.\n", name);
- xmem_cache_free(&cache_cache, cachep);
- cachep = NULL;
- goto opps;
- }
- slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(xmem_bufctl_t) +
- sizeof(slab_t));
-
- /*
- * If the slab has been placed off-slab, and we have enough space then
- * move it on-slab. This is at the expense of any extra colouring.
- */
- if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
- flags &= ~CFLGS_OFF_SLAB;
- left_over -= slab_size;
- }
-
- /* Offset must be a multiple of the alignment. */
- offset += (align-1);
- offset &= ~(align-1);
- if (!offset)
- offset = L1_CACHE_BYTES;
- cachep->colour_off = offset;
- cachep->colour = left_over/offset;
-
- /* init remaining fields */
- if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
- flags |= CFLGS_OPTIMIZE;
-
- cachep->flags = flags;
- spin_lock_init(&cachep->spinlock);
- cachep->objsize = size;
- INIT_LIST_HEAD(&cachep->slabs_full);
- INIT_LIST_HEAD(&cachep->slabs_partial);
- INIT_LIST_HEAD(&cachep->slabs_free);
-
- if (flags & CFLGS_OFF_SLAB)
- cachep->slabp_cache = xmem_find_general_cachep(slab_size);
- cachep->ctor = ctor;
- cachep->dtor = dtor;
- /* Copy name over so we don't have problems with unloaded modules */
- strcpy(cachep->name, name);
-
-#ifdef CONFIG_SMP
- if (g_cpucache_up)
- enable_cpucache(cachep);
-#endif
- /* Need the semaphore to access the chain. */
- down(&cache_chain_sem);
- {
- xmem_cache_t *pc;
-
- list_for_each_entry(pc, &cache_chain, next) {
- /* The name field is constant - no lock needed. */
- if (!strcmp(pc->name, name))
- BUG();
- }
- }
-
- /* There is no reason to lock our new cache before we
- * link it in - no one knows about it yet...
- */
- list_add(&cachep->next, &cache_chain);
- up(&cache_chain_sem);
- opps:
- return cachep;
-}
-
-
-#if DEBUG
-/*
- * This check if the xmem_cache_t pointer is chained in the cache_cache
- * list. -arca
- */
-static int is_chained_xmem_cache(xmem_cache_t * cachep)
-{
- xmem_cache_t *pc;
- int ret = 0;
- unsigned long spin_flags;
-
- /* Find the cache in the chain of caches. */
- down(&cache_chain_sem);
- list_for_each_entry(pc, &cache_chain, next) {
- if (pc == &cachep) {
- ret = 1;
- break;
- }
- }
- up(&cache_chain_sem);
-
- return ret;
-}
-#else
-#define is_chained_xmem_cache(x) 1
-#endif
-
-#ifdef CONFIG_SMP
-/*
- * Waits for all CPUs to execute func().
- */
-static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
-{
- local_irq_disable();
- func(arg);
- local_irq_enable();
-
- if (smp_call_function(func, arg, 1, 1))
- BUG();
-}
-typedef struct ccupdate_struct_s
-{
- xmem_cache_t *cachep;
- cpucache_t *new[NR_CPUS];
-} ccupdate_struct_t;
-
-static void do_ccupdate_local(void *info)
-{
- ccupdate_struct_t *new = (ccupdate_struct_t *)info;
- cpucache_t *old = cc_data(new->cachep);
-
- cc_data(new->cachep) = new->new[smp_processor_id()];
- new->new[smp_processor_id()] = old;
-}
-
-static void free_block (xmem_cache_t* cachep, void** objpp, int len);
-
-static void drain_cpu_caches(xmem_cache_t *cachep)
-{
- ccupdate_struct_t new;
- int i;
- unsigned long spin_flags;
-
- memset(&new.new,0,sizeof(new.new));
-
- new.cachep = cachep;
-
- down(&cache_chain_sem);
- smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
-
- for (i = 0; i < smp_num_cpus; i++) {
- cpucache_t* ccold = new.new[cpu_logical_map(i)];
- if (!ccold || (ccold->avail == 0))
- continue;
- local_irq_disable();
- free_block(cachep, cc_entry(ccold), ccold->avail);
- local_irq_enable();
- ccold->avail = 0;
- }
- smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
- up(&cache_chain_sem);
-}
-
-#else
-#define drain_cpu_caches(cachep) do { } while (0)
-#endif
-
-static int __xmem_cache_shrink(xmem_cache_t *cachep)
-{
- slab_t *slabp;
- int ret;
-
- drain_cpu_caches(cachep);
-
- spin_lock_irq(&cachep->spinlock);
-
- /* If the cache is growing, stop shrinking. */
- while (!cachep->growing) {
- struct list_head *p;
-
- p = cachep->slabs_free.prev;
- if (p == &cachep->slabs_free)
- break;
-
- slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
-#if DEBUG
- if (slabp->inuse)
- BUG();
-#endif
- list_del(&slabp->list);
-
- spin_unlock_irq(&cachep->spinlock);
- xmem_slab_destroy(cachep, slabp);
- spin_lock_irq(&cachep->spinlock);
- }
- ret = (!list_empty(&cachep->slabs_full) ||
- !list_empty(&cachep->slabs_partial));
- spin_unlock_irq(&cachep->spinlock);
- return ret;
-}
-
-/**
- * xmem_cache_shrink - Shrink a cache.
- * @cachep: The cache to shrink.
- *
- * Releases as many slabs as possible for a cache.
- * To help debugging, a zero exit status indicates all slabs were released.
- */
-int xmem_cache_shrink(xmem_cache_t *cachep)
-{
- if (!cachep || !is_chained_xmem_cache(cachep))
- BUG();
-
- return __xmem_cache_shrink(cachep);
-}
-
-/**
- * xmem_cache_destroy - delete a cache
- * @cachep: the cache to destroy
- *
- * Remove a xmem_cache_t object from the slab cache.
- * Returns 0 on success.
- *
- * It is expected this function will be called by a module when it is
- * unloaded. This will remove the cache completely, and avoid a duplicate
- * cache being allocated each time a module is loaded and unloaded, if the
- * module doesn't have persistent in-kernel storage across loads and unloads.
- *
- * The caller must guarantee that noone will allocate memory from the cache
- * during the xmem_cache_destroy().
- */
-int xmem_cache_destroy (xmem_cache_t * cachep)
-{
- unsigned long spin_flags;
-
- if (!cachep || cachep->growing)
- BUG();
-
- /* Find the cache in the chain of caches. */
- down(&cache_chain_sem);
- /* the chain is never empty, cache_cache is never destroyed */
- if (clock_searchp == cachep)
- clock_searchp = list_entry(cachep->next.next,
- xmem_cache_t, next);
- list_del(&cachep->next);
- up(&cache_chain_sem);
-
- if (__xmem_cache_shrink(cachep)) {
- printk(KERN_ERR "xmem_cache_destroy: Can't free all objects %p\n",
- cachep);
- down(&cache_chain_sem);
- list_add(&cachep->next,&cache_chain);
- up(&cache_chain_sem);
- return 1;
- }
-#ifdef CONFIG_SMP
- {
- int i;
- for (i = 0; i < NR_CPUS; i++)
- xfree(cachep->cpudata[i]);
- }
-#endif
- xmem_cache_free(&cache_cache, cachep);
-
- return 0;
-}
-
-/* Get the memory for a slab management obj. */
-static inline slab_t *xmem_cache_slabmgmt(xmem_cache_t *cachep,
- void *objp, int colour_off,
- int local_flags)
-{
- slab_t *slabp;
-
- if (OFF_SLAB(cachep)) {
- /* Slab management obj is off-slab. */
- slabp = xmem_cache_alloc(cachep->slabp_cache);
- if (!slabp)
- return NULL;
- } else {
- /* FIXME: change to
- slabp = objp
- * if you enable OPTIMIZE
- */
- slabp = objp+colour_off;
- colour_off += L1_CACHE_ALIGN(cachep->num *
- sizeof(xmem_bufctl_t) + sizeof(slab_t));
- }
- slabp->inuse = 0;
- slabp->colouroff = colour_off;
- slabp->s_mem = objp+colour_off;
-
- return slabp;
-}
-
-static inline void xmem_cache_init_objs(xmem_cache_t *cachep,
- slab_t *slabp,
- unsigned long ctor_flags)
-{
- int i;
-
- for (i = 0; i < cachep->num; i++) {
- void* objp = slabp->s_mem+cachep->objsize*i;
-#if DEBUG
- if (cachep->flags & SLAB_RED_ZONE) {
- *((unsigned long*)(objp)) = RED_MAGIC1;
- *((unsigned long*)(objp + cachep->objsize -
- BYTES_PER_WORD)) = RED_MAGIC1;
- objp += BYTES_PER_WORD;
- }
-#endif
-
- /*
- * Constructors are not allowed to allocate memory from
- * the same cache which they are a constructor for.
- * Otherwise, deadlock. They must also be threaded.
- */
- if (cachep->ctor)
- cachep->ctor(objp, cachep, ctor_flags);
-#if DEBUG
- if (cachep->flags & SLAB_RED_ZONE)
- objp -= BYTES_PER_WORD;
- if (cachep->flags & SLAB_POISON)
- /* need to poison the objs */
- xmem_poison_obj(cachep, objp);
- if (cachep->flags & SLAB_RED_ZONE) {
- if (*((unsigned long*)(objp)) != RED_MAGIC1)
- BUG();
- if (*((unsigned long*)(objp + cachep->objsize -
- BYTES_PER_WORD)) != RED_MAGIC1)
- BUG();
- }
-#endif
- slab_bufctl(slabp)[i] = i+1;
- }
- slab_bufctl(slabp)[i-1] = BUFCTL_END;
- slabp->free = 0;
-}
-
-/*
- * Grow (by 1) the number of slabs within a cache. This is called by
- * xmem_cache_alloc() when there are no active objs left in a cache.
- */
-static int xmem_cache_grow(xmem_cache_t * cachep)
-{
- slab_t *slabp;
- struct pfn_info *page; unsigned int i;
- void *objp;
- size_t offset;
- unsigned long ctor_flags;
- unsigned long save_flags;
-
- ctor_flags = SLAB_CTOR_CONSTRUCTOR;
-
- /* About to mess with non-constant members - lock. */
- spin_lock_irqsave(&cachep->spinlock, save_flags);
-
- /* Get colour for the slab, and cal the next value. */
- offset = cachep->colour_next;
- cachep->colour_next++;
- if (cachep->colour_next >= cachep->colour)
- cachep->colour_next = 0;
- offset *= cachep->colour_off;
- cachep->dflags |= DFLGS_GROWN;
-
- cachep->growing++;
- spin_unlock_irqrestore(&cachep->spinlock, save_flags);
-
- /* A series of memory allocations for a new slab.
- * Neither the cache-chain semaphore, or cache-lock, are
- * held, but the incrementing c_growing prevents this
- * cache from being reaped or shrunk.
- * Note: The cache could be selected in for reaping in
- * xmem_cache_reap(), but when the final test is made the
- * growing value will be seen.
- */
-
- /* Get mem for the objs. */
- if (!(objp = xmem_getpages(cachep)))
- goto failed;
-
- /* Get slab management. */
- if (!(slabp = xmem_cache_slabmgmt(cachep, objp, offset, 0)))
- goto opps1;
-
- /* Nasty!!!!!! I hope this is OK. */
- i = 1 << cachep->gfporder;
- page = virt_to_page(objp);
- do {
- SET_PAGE_CACHE(page, cachep);
- SET_PAGE_SLAB(page, slabp);
- PageSetSlab(page);
- page++;
- } while (--i);
-
- xmem_cache_init_objs(cachep, slabp, ctor_flags);
-
- spin_lock_irqsave(&cachep->spinlock, save_flags);
- cachep->growing--;
-
- /* Make slab active. */
- list_add_tail(&slabp->list, &cachep->slabs_free);
- STATS_INC_GROWN(cachep);
- cachep->failures = 0;
-
- spin_unlock_irqrestore(&cachep->spinlock, save_flags);
- return 1;
- opps1:
- xmem_freepages(cachep, objp);
- failed:
- spin_lock_irqsave(&cachep->spinlock, save_flags);
- cachep->growing--;
- spin_unlock_irqrestore(&cachep->spinlock, save_flags);
- return 0;
-}
-
-/*
- * Perform extra freeing checks:
- * - detect double free
- * - detect bad pointers.
- * Called with the cache-lock held.
- */
-
-#if DEBUG
-static int xmem_extra_free_checks (xmem_cache_t * cachep,
- slab_t *slabp, void * objp)
-{
- int i;
- unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
-
- if (objnr >= cachep->num)
- BUG();
- if (objp != slabp->s_mem + objnr*cachep->objsize)
- BUG();
-
- /* Check slab's freelist to see if this obj is there. */
- for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
- if (i == objnr)
- BUG();
- }
- return 0;
-}
-#endif
-
-static inline void * xmem_cache_alloc_one_tail (xmem_cache_t *cachep,
- slab_t *slabp)
-{
- void *objp;
-
- STATS_INC_ALLOCED(cachep);
- STATS_INC_ACTIVE(cachep);
- STATS_SET_HIGH(cachep);
-
- /* get obj pointer */
- slabp->inuse++;
- objp = slabp->s_mem + slabp->free*cachep->objsize;
- slabp->free=slab_bufctl(slabp)[slabp->free];
-
- if (unlikely(slabp->free == BUFCTL_END)) {
- list_del(&slabp->list);
- list_add(&slabp->list, &cachep->slabs_full);
- }
-#if DEBUG
- if (cachep->flags & SLAB_POISON)
- if (xmem_check_poison_obj(cachep, objp))
- BUG();
- if (cachep->flags & SLAB_RED_ZONE) {
- /* Set alloc red-zone, and check old one. */
- if (xchg((unsigned long *)objp, RED_MAGIC2) !=
- RED_MAGIC1)
- BUG();
- if (xchg((unsigned long *)(objp+cachep->objsize -
- BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
- BUG();
- objp += BYTES_PER_WORD;
- }
-#endif
- return objp;
-}
-
-/*
- * Returns a ptr to an obj in the given cache.
- * caller must guarantee synchronization
- * #define for the goto optimization 8-)
- */
-#define xmem_cache_alloc_one(cachep) \
-({ \
- struct list_head * slabs_partial, * entry; \
- slab_t *slabp; \
- \
- slabs_partial = &(cachep)->slabs_partial; \
- entry = slabs_partial->next; \
- if (unlikely(entry == slabs_partial)) { \
- struct list_head * slabs_free; \
- slabs_free = &(cachep)->slabs_free; \
- entry = slabs_free->next; \
- if (unlikely(entry == slabs_free)) \
- goto alloc_new_slab; \
- list_del(entry); \
- list_add(entry, slabs_partial); \
- } \
- \
- slabp = list_entry(entry, slab_t, list); \
- xmem_cache_alloc_one_tail(cachep, slabp); \
-})
-
-#ifdef CONFIG_SMP
-void* xmem_cache_alloc_batch(xmem_cache_t* cachep)
-{
- int batchcount = cachep->batchcount;
- cpucache_t* cc = cc_data(cachep);
-
- spin_lock(&cachep->spinlock);
- while (batchcount--) {
- struct list_head * slabs_partial, * entry;
- slab_t *slabp;
- /* Get slab alloc is to come from. */
- slabs_partial = &(cachep)->slabs_partial;
- entry = slabs_partial->next;
- if (unlikely(entry == slabs_partial)) {
- struct list_head * slabs_free;
- slabs_free = &(cachep)->slabs_free;
- entry = slabs_free->next;
- if (unlikely(entry == slabs_free))
- break;
- list_del(entry);
- list_add(entry, slabs_partial);
- }
-
- slabp = list_entry(entry, slab_t, list);
- cc_entry(cc)[cc->avail++] =
- xmem_cache_alloc_one_tail(cachep, slabp);
- }
- spin_unlock(&cachep->spinlock);
-
- if (cc->avail)
- return cc_entry(cc)[--cc->avail];
- return NULL;
-}
-#endif
-
-static inline void *__xmem_cache_alloc(xmem_cache_t *cachep)
-{
- unsigned long flags;
- void* objp;
-
- try_again:
- local_irq_save(flags);
-#ifdef CONFIG_SMP
- {
- cpucache_t *cc = cc_data(cachep);
-
- if (cc) {
- if (cc->avail) {
- STATS_INC_ALLOCHIT(cachep);
- objp = cc_entry(cc)[--cc->avail];
- } else {
- STATS_INC_ALLOCMISS(cachep);
- objp = xmem_cache_alloc_batch(cachep);
- if (!objp)
- goto alloc_new_slab_nolock;
- }
- } else {
- spin_lock(&cachep->spinlock);
- objp = xmem_cache_alloc_one(cachep);
- spin_unlock(&cachep->spinlock);
- }
- }
-#else
- objp = xmem_cache_alloc_one(cachep);
-#endif
- local_irq_restore(flags);
- return objp;
- alloc_new_slab:
-#ifdef CONFIG_SMP
- spin_unlock(&cachep->spinlock);
- alloc_new_slab_nolock:
-#endif
- local_irq_restore(flags);
- if (xmem_cache_grow(cachep))
- /* Someone may have stolen our objs. Doesn't matter, we'll
- * just come back here again.
- */
- goto try_again;
- return NULL;
-}
-
-/*
- * Release an obj back to its cache. If the obj has a constructed
- * state, it should be in this state _before_ it is released.
- * - caller is responsible for the synchronization
- */
-
-#if DEBUG
-# define CHECK_NR(pg) \
- do { \
- if (!VALID_PAGE(pg)) { \
- printk(KERN_ERR "xfree: out of range ptr %lxh.\n", \
- (unsigned long)objp); \
- BUG(); \
- } \
- } while (0)
-# define CHECK_PAGE(page) \
- do { \
- CHECK_NR(page); \
- if (!PageSlab(page)) { \
- printk(KERN_ERR "xfree: bad ptr %lxh.\n", \
- (unsigned long)objp); \
- BUG(); \
- } \
- } while (0)
-
-#else
-# define CHECK_PAGE(pg) do { } while (0)
-#endif
-
-static inline void xmem_cache_free_one(xmem_cache_t *cachep, void *objp)
-{
- slab_t* slabp;
-
- CHECK_PAGE(virt_to_page(objp));
- /* reduces memory footprint
- *
- if (OPTIMIZE(cachep))
- slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
- else
- */
- slabp = GET_PAGE_SLAB(virt_to_page(objp));
-
-#if DEBUG
- if (cachep->flags & SLAB_DEBUG_INITIAL)
- /* Need to call the slab's constructor so the
- * caller can perform a verify of its state (debugging).
- * Called without the cache-lock held.
- */
- cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
-
- if (cachep->flags & SLAB_RED_ZONE) {
- objp -= BYTES_PER_WORD;
- if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
- /* Either write before start, or a double free. */
- BUG();
- if (xchg((unsigned long *)(objp+cachep->objsize -
- BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
- /* Either write past end, or a double free. */
- BUG();
- }
- if (cachep->flags & SLAB_POISON)
- xmem_poison_obj(cachep, objp);
- if (xmem_extra_free_checks(cachep, slabp, objp))
- return;
-#endif
- {
- unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
-
- slab_bufctl(slabp)[objnr] = slabp->free;
- slabp->free = objnr;
- }
- STATS_DEC_ACTIVE(cachep);
-
- /* fixup slab chains */
- {
- int inuse = slabp->inuse;
- if (unlikely(!--slabp->inuse)) {
- /* Was partial or full, now empty. */
- list_del(&slabp->list);
- list_add(&slabp->list, &cachep->slabs_free);
- } else if (unlikely(inuse == cachep->num)) {
- /* Was full. */
- list_del(&slabp->list);
- list_add(&slabp->list, &cachep->slabs_partial);
- }
- }
-}
-
-#ifdef CONFIG_SMP
-static inline void __free_block (xmem_cache_t* cachep,
- void** objpp, int len)
-{
- for ( ; len > 0; len--, objpp++)
- xmem_cache_free_one(cachep, *objpp);
-}
-
-static void free_block (xmem_cache_t* cachep, void** objpp, int len)
-{
- spin_lock(&cachep->spinlock);
- __free_block(cachep, objpp, len);
- spin_unlock(&cachep->spinlock);
-}
-#endif
-
-/*
- * __xmem_cache_free
- * called with disabled ints
- */
-static inline void __xmem_cache_free (xmem_cache_t *cachep, void* objp)
-{
-#ifdef CONFIG_SMP
- cpucache_t *cc = cc_data(cachep);
-
- CHECK_PAGE(virt_to_page(objp));
- if (cc) {
- int batchcount;
- if (cc->avail < cc->limit) {
- STATS_INC_FREEHIT(cachep);
- cc_entry(cc)[cc->avail++] = objp;
- return;
- }
- STATS_INC_FREEMISS(cachep);
- batchcount = cachep->batchcount;
- cc->avail -= batchcount;
- free_block(cachep,
- &cc_entry(cc)[cc->avail],batchcount);
- cc_entry(cc)[cc->avail++] = objp;
- return;
- } else {
- free_block(cachep, &objp, 1);
- }
-#else
- xmem_cache_free_one(cachep, objp);
-#endif
-}
-
-/**
- * xmem_cache_alloc - Allocate an object
- * @cachep: The cache to allocate from.
- *
- * Allocate an object from this cache. The flags are only relevant
- * if the cache has no available objects.
- */
-void *xmem_cache_alloc(xmem_cache_t *cachep)
-{
- return __xmem_cache_alloc(cachep);
-}
-
-/**
- * xmalloc - allocate memory
- * @size: how many bytes of memory are required.
- */
-void *xmalloc(size_t size)
-{
- cache_sizes_t *csizep = cache_sizes;
-
- for (; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- return __xmem_cache_alloc(csizep->cs_cachep);
- }
- return NULL;
-}
-
-/**
- * xmem_cache_free - Deallocate an object
- * @cachep: The cache the allocation was from.
- * @objp: The previously allocated object.
- *
- * Free an object which was previously allocated from this
- * cache.
- */
-void xmem_cache_free (xmem_cache_t *cachep, void *objp)
-{
- unsigned long flags;
-#if DEBUG
- CHECK_PAGE(virt_to_page(objp));
- if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
- BUG();
-#endif
-
- local_irq_save(flags);
- __xmem_cache_free(cachep, objp);
- local_irq_restore(flags);
-}
-
-/**
- * xfree - free previously allocated memory
- * @objp: pointer returned by xmalloc.
- *
- * Don't free memory not originally allocated by xmalloc()
- * or you will run into trouble.
- */
-void xfree (const void *objp)
-{
- xmem_cache_t *c;
- unsigned long flags;
-
- if (!objp)
- return;
- local_irq_save(flags);
- CHECK_PAGE(virt_to_page(objp));
- c = GET_PAGE_CACHE(virt_to_page(objp));
- __xmem_cache_free(c, (void*)objp);
- local_irq_restore(flags);
-}
-
-xmem_cache_t *xmem_find_general_cachep(size_t size)
-{
- cache_sizes_t *csizep = cache_sizes;
-
- /* This function could be moved to the header file, and
- * made inline so consumers can quickly determine what
- * cache pointer they require.
- */
- for ( ; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- break;
- }
- return csizep->cs_cachep;
-}
-
-#ifdef CONFIG_SMP
-
-/* called with cache_chain_sem acquired. */
-static int xmem_tune_cpucache (xmem_cache_t* cachep, int limit, int batchcount)
-{
- ccupdate_struct_t new;
- int i;
-
- /*
- * These are admin-provided, so we are more graceful.
- */
- if (limit < 0)
- return -EINVAL;
- if (batchcount < 0)
- return -EINVAL;
- if (batchcount > limit)
- return -EINVAL;
- if (limit != 0 && !batchcount)
- return -EINVAL;
-
- memset(&new.new,0,sizeof(new.new));
- if (limit) {
- for (i = 0; i< smp_num_cpus; i++) {
- cpucache_t* ccnew;
-
- ccnew = xmalloc(sizeof(void*)*limit+sizeof(cpucache_t));
- if (!ccnew)
- goto oom;
- ccnew->limit = limit;
- ccnew->avail = 0;
- new.new[cpu_logical_map(i)] = ccnew;
- }
- }
- new.cachep = cachep;
- spin_lock_irq(&cachep->spinlock);
- cachep->batchcount = batchcount;
- spin_unlock_irq(&cachep->spinlock);
-
- smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
-
- for (i = 0; i < smp_num_cpus; i++) {
- cpucache_t* ccold = new.new[cpu_logical_map(i)];
- if (!ccold)
- continue;
- local_irq_disable();
- free_block(cachep, cc_entry(ccold), ccold->avail);
- local_irq_enable();
- xfree(ccold);
- }
- return 0;
- oom:
- for (i--; i >= 0; i--)
- xfree(new.new[cpu_logical_map(i)]);
- return -ENOMEM;
-}
-
-static void enable_cpucache (xmem_cache_t *cachep)
-{
- int err;
- int limit;
-
- /* FIXME: optimize */
- if (cachep->objsize > PAGE_SIZE)
- return;
- if (cachep->objsize > 1024)
- limit = 60;
- else if (cachep->objsize > 256)
- limit = 124;
- else
- limit = 252;
-
- err = xmem_tune_cpucache(cachep, limit, limit/2);
- if (err)
- printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
- cachep->name, -err);
-}
-
-static void enable_all_cpucaches (void)
-{
- struct list_head* p;
- unsigned long spin_flags;
-
- down(&cache_chain_sem);
-
- p = &cache_cache.next;
- do {
- xmem_cache_t* cachep = list_entry(p, xmem_cache_t, next);
-
- enable_cpucache(cachep);
- p = cachep->next.next;
- } while (p != &cache_cache.next);
-
- up(&cache_chain_sem);
-}
-#endif
-
-/**
- * xmem_cache_reap - Reclaim memory from caches.
- */
-int xmem_cache_reap(void)
-{
- slab_t *slabp;
- xmem_cache_t *searchp;
- xmem_cache_t *best_cachep;
- unsigned int best_pages;
- unsigned int best_len;
- unsigned int scan;
- int ret = 0;
- unsigned long spin_flags;
-
- down(&cache_chain_sem);
-
- scan = REAP_SCANLEN;
- best_len = 0;
- best_pages = 0;
- best_cachep = NULL;
- searchp = clock_searchp;
- do {
- unsigned int pages;
- struct list_head* p;
- unsigned int full_free;
-
- /* It's safe to test this without holding the cache-lock. */
- if (searchp->flags & SLAB_NO_REAP)
- goto next;
- spin_lock_irq(&searchp->spinlock);
- if (searchp->growing)
- goto next_unlock;
- if (searchp->dflags & DFLGS_GROWN) {
- searchp->dflags &= ~DFLGS_GROWN;
- goto next_unlock;
- }
-#ifdef CONFIG_SMP
- {
- cpucache_t *cc = cc_data(searchp);
- if (cc && cc->avail) {
- __free_block(searchp, cc_entry(cc), cc->avail);
- cc->avail = 0;
- }
- }
-#endif
-
- full_free = 0;
- p = searchp->slabs_free.next;
- while (p != &searchp->slabs_free) {
- slabp = list_entry(p, slab_t, list);
-#if DEBUG
- if (slabp->inuse)
- BUG();
-#endif
- full_free++;
- p = p->next;
- }
-
- /*
- * Try to avoid slabs with constructors and/or
- * more than one page per slab (as it can be difficult
- * to get high orders from gfp()).
- */
- pages = full_free * (1<<searchp->gfporder);
- if (searchp->ctor)
- pages = (pages*4+1)/5;
- if (searchp->gfporder)
- pages = (pages*4+1)/5;
- if (pages > best_pages) {
- best_cachep = searchp;
- best_len = full_free;
- best_pages = pages;
- if (pages >= REAP_PERFECT) {
- clock_searchp = list_entry(searchp->next.next,
- xmem_cache_t,next);
- goto perfect;
- }
- }
- next_unlock:
- spin_unlock_irq(&searchp->spinlock);
- next:
- searchp = list_entry(searchp->next.next,xmem_cache_t,next);
- } while (--scan && searchp != clock_searchp);
-
- clock_searchp = searchp;
-
- if (!best_cachep)
- /* couldn't find anything to reap */
- goto out;
-
- spin_lock_irq(&best_cachep->spinlock);
- perfect:
- /* free only 50% of the free slabs */
- best_len = (best_len + 1)/2;
- for (scan = 0; scan < best_len; scan++) {
- struct list_head *p;
-
- if (best_cachep->growing)
- break;
- p = best_cachep->slabs_free.prev;
- if (p == &best_cachep->slabs_free)
- break;
- slabp = list_entry(p,slab_t,list);
-#if DEBUG
- if (slabp->inuse)
- BUG();
-#endif
- list_del(&slabp->list);
- STATS_INC_REAPED(best_cachep);
-
- /* Safe to drop the lock. The slab is no longer linked to the
- * cache.
- */
- spin_unlock_irq(&best_cachep->spinlock);
- xmem_slab_destroy(best_cachep, slabp);
- spin_lock_irq(&best_cachep->spinlock);
- }
- spin_unlock_irq(&best_cachep->spinlock);
- ret = scan * (1 << best_cachep->gfporder);
- out:
- up(&cache_chain_sem);
- return ret;
-}
-
-void dump_slabinfo()
-{
- struct list_head *p;
- unsigned long spin_flags;
-
- /* Output format version, so at least we can change it without _too_
- * many complaints.
- */
- printk( "slabinfo - version: 1.1"
-#if STATS
- " (statistics)"
-#endif
-#ifdef CONFIG_SMP
- " (SMP)"
-#endif
- "\n");
- down(&cache_chain_sem);
- p = &cache_cache.next;
- do {
- xmem_cache_t *cachep;
- slab_t *slabp;
- unsigned long active_objs;
- unsigned long num_objs;
- unsigned long active_slabs = 0;
- unsigned long num_slabs;
- cachep = list_entry(p, xmem_cache_t, next);
-
- spin_lock_irq(&cachep->spinlock);
- active_objs = 0;
- num_slabs = 0;
- list_for_each_entry(slabp, &cachep->slabs_full, list) {
- if (slabp->inuse != cachep->num)
- BUG();
- active_objs += cachep->num;
- active_slabs++;
- }
- list_for_each_entry(slabp, &cachep->slabs_partial, list) {
- if (slabp->inuse == cachep->num || !slabp->inuse)
- BUG();
- active_objs += slabp->inuse;
- active_slabs++;
- }
- list_for_each_entry(slabp, &cachep->slabs_free, list) {
- if (slabp->inuse)
- BUG();
- num_slabs++;
- }
- num_slabs+=active_slabs;
- num_objs = num_slabs*cachep->num;
-
- printk("%-17s %6lu %6lu %6u %4lu %4lu %4u",
- cachep->name, active_objs, num_objs, cachep->objsize,
- active_slabs, num_slabs, (1<<cachep->gfporder));
-
-#if STATS
- {
- unsigned long errors = cachep->errors;
- unsigned long high = cachep->high_mark;
- unsigned long grown = cachep->grown;
- unsigned long reaped = cachep->reaped;
- unsigned long allocs = cachep->num_allocations;
-
- printk(" : %6lu %7lu %5lu %4lu %4lu",
- high, allocs, grown, reaped, errors);
- }
-#endif
-#ifdef CONFIG_SMP
- {
- unsigned int batchcount = cachep->batchcount;
- unsigned int limit;
-
- if (cc_data(cachep))
- limit = cc_data(cachep)->limit;
- else
- limit = 0;
- printk(" : %4u %4u",
- limit, batchcount);
- }
-#endif
-#if STATS && defined(CONFIG_SMP)
- {
- unsigned long allochit = atomic_read(&cachep->allochit);
- unsigned long allocmiss = atomic_read(&cachep->allocmiss);
- unsigned long freehit = atomic_read(&cachep->freehit);
- unsigned long freemiss = atomic_read(&cachep->freemiss);
- printk(" : %6lu %6lu %6lu %6lu",
- allochit, allocmiss, freehit, freemiss);
- }
-#endif
- printk("\n");
- spin_unlock_irq(&cachep->spinlock);
-
- p = cachep->next.next;
- } while (p != &cache_cache.next);
-
- up(&cache_chain_sem);
-
- return;
-}
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index 17e850b3f6..5a7cd2dabc 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -39,3 +39,13 @@ void open_softirq(int nr, softirq_handler handler)
{
softirq_handlers[nr] = handler;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/string.c b/xen/common/string.c
index 1f51b65ecb..7c6e74df7e 100644
--- a/xen/common/string.c
+++ b/xen/common/string.c
@@ -557,3 +557,13 @@ void *memchr(const void *s, int c, size_t n)
}
#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/xen/common/trace.c b/xen/common/trace.c
index 83bf5ce55e..034dbb4d5d 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -8,6 +8,8 @@
* Author: Mark Williamson, mark.a.williamson@intel.com
* Date: January 2004
*
+ * Copyright (C) 2005 Bin Ren
+ *
* The trace buffer code is designed to allow debugging traces of Xen to be
* generated on UP / SMP machines. Each trace entry is timestamped so that
* it's possible to reconstruct a chronological record of trace events.
@@ -21,7 +23,6 @@
#include <asm/io.h>
#include <xen/lib.h>
#include <xen/sched.h>
-#include <xen/slab.h>
#include <xen/smp.h>
#include <xen/trace.h>
#include <xen/errno.h>
@@ -39,6 +40,11 @@ struct t_buf *t_bufs[NR_CPUS];
/* a flag recording whether initialisation has been done */
int tb_init_done = 0;
+/* which CPUs tracing is enabled on */
+unsigned long tb_cpu_mask = (~0UL);
+
+/* which tracing events are enabled */
+u32 tb_event_mask = TRC_ALL;
/**
* init_trace_bufs - performs initialisation of the per-cpu trace buffers.
*
@@ -59,35 +65,28 @@ void init_trace_bufs(void)
return;
}
- nr_pages = smp_num_cpus * opt_tbuf_size;
+ nr_pages = num_online_cpus() * opt_tbuf_size;
order = get_order(nr_pages * PAGE_SIZE);
- if ( (rawbuf = (char *)alloc_xenheap_pages(order)) == NULL )
+ if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
{
printk("Xen trace buffers: memory allocation failed\n");
return;
}
/* Share pages so that xentrace can map them. */
-
for ( i = 0; i < nr_pages; i++ )
- SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf+(i*PAGE_SIZE)), dom0);
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf + i * PAGE_SIZE), dom0);
- for ( i = 0; i < smp_num_cpus; i++ )
+ for_each_online_cpu ( i )
{
buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE];
- /* For use in Xen. */
- buf->vdata = (struct t_rec *)(buf+1);
- buf->head_ptr = buf->vdata;
-
- /* For use in user space. */
- buf->data = __pa(buf->vdata);
- buf->head = 0;
-
- /* For use in both. */
- buf->size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf))
- / sizeof(struct t_rec);
+ _atomic_set(buf->rec_idx, 0);
+ buf->rec_num = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf))
+ / sizeof(struct t_rec);
+ buf->rec = (struct t_rec *)(buf + 1);
+ buf->rec_addr = __pa(buf->rec);
}
printk("Xen trace buffers: initialised\n");
@@ -98,25 +97,48 @@ void init_trace_bufs(void)
}
/**
- * get_tb_info - get trace buffer details
- * @st: a pointer to a dom0_gettbufs_t to be filled out
- *
- * Called by the %DOM0_GETTBUFS dom0 op to fetch the machine address of the
- * trace buffers.
+ * tb_control - DOM0 operations on trace buffers.
+ * @tbc: a pointer to a dom0_tbufcontrol_t to be filled out
*/
-int get_tb_info(dom0_gettbufs_t *st)
+int tb_control(dom0_tbufcontrol_t *tbc)
{
- if ( tb_init_done )
- {
- st->mach_addr = __pa(t_bufs[0]);
- st->size = opt_tbuf_size * PAGE_SIZE;
-
- return 0;
- }
- else
+ static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ int rc = 0;
+
+ if ( !tb_init_done )
+ return -EINVAL;
+
+ spin_lock(&lock);
+
+ switch ( tbc->op)
{
- st->mach_addr = 0;
- st->size = 0;
- return -ENODATA;
+ case DOM0_TBUF_GET_INFO:
+ tbc->cpu_mask = tb_cpu_mask;
+ tbc->evt_mask = tb_event_mask;
+ tbc->mach_addr = __pa(t_bufs[0]);
+ tbc->size = opt_tbuf_size * PAGE_SIZE;
+ break;
+ case DOM0_TBUF_SET_CPU_MASK:
+ tb_cpu_mask = tbc->cpu_mask;
+ break;
+ case DOM0_TBUF_SET_EVT_MASK:
+ tb_event_mask = tbc->evt_mask;
+ break;
+ default:
+ rc = -EINVAL;
}
+
+ spin_unlock(&lock);
+
+ return rc;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/vsprintf.c b/xen/common/vsprintf.c
index 906e7734db..55d538563d 100644
--- a/xen/common/vsprintf.c
+++ b/xen/common/vsprintf.c
@@ -115,13 +115,13 @@ static int skip_atoi(const char **s)
return i;
}
-#define ZEROPAD 1 /* pad with zero */
-#define SIGN 2 /* unsigned/signed long */
-#define PLUS 4 /* show plus */
-#define SPACE 8 /* space if plus */
-#define LEFT 16 /* left justified */
-#define SPECIAL 32 /* 0x */
-#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
{
@@ -239,14 +239,14 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
char *str, *end, c;
const char *s;
- int flags; /* flags to number() */
+ int flags; /* flags to number() */
- int field_width; /* width of output field */
- int precision; /* min. # of digits for integers; max
- number of chars for from string */
- int qualifier; /* 'h', 'l', or 'L' for integer fields */
- /* 'z' support added 23/7/1999 S.H. */
- /* 'z' changed to 'Z' --davidm 1/25/99 */
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
str = buf;
end = buf + size - 1;
@@ -267,7 +267,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
/* process flags */
flags = 0;
repeat:
- ++fmt; /* this also skips first '%' */
+ ++fmt; /* this also skips first '%' */
switch (*fmt) {
case '-': flags |= LEFT; goto repeat;
case '+': flags |= PLUS; goto repeat;
@@ -293,12 +293,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
/* get the precision */
precision = -1;
if (*fmt == '.') {
- ++fmt;
+ ++fmt;
if (isdigit(*fmt))
precision = skip_atoi(&fmt);
else if (*fmt == '*') {
++fmt;
- /* it's the next argument */
+ /* it's the next argument */
precision = va_arg(args, int);
}
if (precision < 0)
@@ -381,8 +381,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
case 'n':
- /* FIXME:
- * What does C99 say about the overflow case here? */
+ /* FIXME:
+ * What does C99 say about the overflow case here? */
if (qualifier == 'l') {
long * ip = va_arg(args, long *);
*ip = (str - buf);
@@ -401,7 +401,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
++str;
continue;
- /* integer number formats - set up the flags and "break" */
+ /* integer number formats - set up the flags and "break" */
case 'o':
base = 8;
break;
@@ -513,3 +513,13 @@ int sprintf(char * buf, const char *fmt, ...)
return i;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/xmalloc.c b/xen/common/xmalloc.c
new file mode 100644
index 0000000000..3cfea23101
--- /dev/null
+++ b/xen/common/xmalloc.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * Simple allocator for Xen. If larger than a page, simply use the
+ * page-order allocator.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * TODO (Keir, 17/2/05):
+ * 1. Use space in pfn_info to avoid xmalloc_hdr in allocated blocks.
+ * 2. pfn_info points into free list to make xfree() O(1) complexity.
+ * 3. Perhaps make this a sub-page buddy allocator? xmalloc() == O(1).
+ * (Disadvantage is potentially greater internal fragmentation).
+ */
+
+#include <xen/config.h>
+#include <xen/mm.h>
+#include <xen/spinlock.h>
+#include <xen/ac_timer.h>
+#include <xen/cache.h>
+#include <xen/prefetch.h>
+
+static LIST_HEAD(freelist);
+static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED;
+
+struct xmalloc_hdr
+{
+ /* Total including this hdr. */
+ size_t size;
+ struct list_head freelist;
+} __cacheline_aligned;
+
+static void maybe_split(struct xmalloc_hdr *hdr, size_t size, size_t block)
+{
+ struct xmalloc_hdr *extra;
+ size_t leftover = block - size;
+
+ /* If enough is left to make a block, put it on free list. */
+ if ( leftover >= (2 * sizeof(struct xmalloc_hdr)) )
+ {
+ extra = (struct xmalloc_hdr *)((unsigned long)hdr + size);
+ extra->size = leftover;
+ list_add(&extra->freelist, &freelist);
+ }
+ else
+ {
+ size = block;
+ }
+
+ hdr->size = size;
+ /* Debugging aid. */
+ hdr->freelist.next = hdr->freelist.prev = NULL;
+}
+
+static void *xmalloc_new_page(size_t size)
+{
+ struct xmalloc_hdr *hdr;
+ unsigned long flags;
+
+ hdr = alloc_xenheap_page();
+ if ( hdr == NULL )
+ return NULL;
+
+ spin_lock_irqsave(&freelist_lock, flags);
+ maybe_split(hdr, size, PAGE_SIZE);
+ spin_unlock_irqrestore(&freelist_lock, flags);
+
+ return hdr+1;
+}
+
+/* Big object? Just use the page allocator. */
+static void *xmalloc_whole_pages(size_t size)
+{
+ struct xmalloc_hdr *hdr;
+ unsigned int pageorder = get_order(size);
+
+ hdr = alloc_xenheap_pages(pageorder);
+ if ( hdr == NULL )
+ return NULL;
+
+ hdr->size = (1 << (pageorder + PAGE_SHIFT));
+ /* Debugging aid. */
+ hdr->freelist.next = hdr->freelist.prev = NULL;
+
+ return hdr+1;
+}
+
+/* Return size, increased to alignment with align. */
+static inline size_t align_up(size_t size, size_t align)
+{
+ return (size + align - 1) & ~(align - 1);
+}
+
+void *_xmalloc(size_t size, size_t align)
+{
+ struct xmalloc_hdr *i;
+ unsigned long flags;
+
+ /* We currently always return cacheline aligned. */
+ BUG_ON(align > SMP_CACHE_BYTES);
+
+ /* Add room for header, pad to align next header. */
+ size += sizeof(struct xmalloc_hdr);
+ size = align_up(size, __alignof__(struct xmalloc_hdr));
+
+ /* For big allocs, give them whole pages. */
+ if ( size >= PAGE_SIZE )
+ return xmalloc_whole_pages(size);
+
+ /* Search free list. */
+ spin_lock_irqsave(&freelist_lock, flags);
+ list_for_each_entry( i, &freelist, freelist )
+ {
+ if ( i->size < size )
+ continue;
+ list_del(&i->freelist);
+ maybe_split(i, size, i->size);
+ spin_unlock_irqrestore(&freelist_lock, flags);
+ return i+1;
+ }
+ spin_unlock_irqrestore(&freelist_lock, flags);
+
+ /* Alloc a new page and return from that. */
+ return xmalloc_new_page(size);
+}
+
+void xfree(const void *p)
+{
+ unsigned long flags;
+ struct xmalloc_hdr *i, *tmp, *hdr;
+
+ if ( p == NULL )
+ return;
+
+ hdr = (struct xmalloc_hdr *)p - 1;
+
+ /* We know hdr will be on same page. */
+ BUG_ON(((long)p & PAGE_MASK) != ((long)hdr & PAGE_MASK));
+
+ /* Not previously freed. */
+ BUG_ON(hdr->freelist.next || hdr->freelist.prev);
+
+ /* Big allocs free directly. */
+ if ( hdr->size >= PAGE_SIZE )
+ {
+ free_xenheap_pages(hdr, get_order(hdr->size));
+ return;
+ }
+
+ /* Merge with other free block, or put in list. */
+ spin_lock_irqsave(&freelist_lock, flags);
+ list_for_each_entry_safe( i, tmp, &freelist, freelist )
+ {
+ unsigned long _i = (unsigned long)i;
+ unsigned long _hdr = (unsigned long)hdr;
+
+ /* Do not merge across page boundaries. */
+ if ( ((_i ^ _hdr) & PAGE_MASK) != 0 )
+ continue;
+
+ /* We follow this block? Swallow it. */
+ if ( (_i + i->size) == _hdr )
+ {
+ list_del(&i->freelist);
+ i->size += hdr->size;
+ hdr = i;
+ }
+
+ /* We precede this block? Swallow it. */
+ if ( (_hdr + hdr->size) == _i )
+ {
+ list_del(&i->freelist);
+ hdr->size += i->size;
+ }
+ }
+
+ /* Did we merge an entire page? */
+ if ( hdr->size == PAGE_SIZE )
+ {
+ BUG_ON((((unsigned long)hdr) & (PAGE_SIZE-1)) != 0);
+ free_xenheap_pages(hdr, 0);
+ }
+ else
+ {
+ list_add(&hdr->freelist, &freelist);
+ }
+
+ spin_unlock_irqrestore(&freelist_lock, flags);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/Makefile b/xen/drivers/Makefile
index ac1f14ca6a..e0a67e5dc2 100644
--- a/xen/drivers/Makefile
+++ b/xen/drivers/Makefile
@@ -2,9 +2,7 @@
default:
$(MAKE) -C char
$(MAKE) -C acpi
- $(MAKE) -C pci
clean:
$(MAKE) -C char clean
$(MAKE) -C acpi clean
- $(MAKE) -C pci clean
diff --git a/xen/drivers/acpi/Makefile b/xen/drivers/acpi/Makefile
index fec92eb405..21e5243004 100644
--- a/xen/drivers/acpi/Makefile
+++ b/xen/drivers/acpi/Makefile
@@ -1,10 +1,7 @@
-#
-# Makefile for the Linux ACPI interpreter
-#
include $(BASEDIR)/Rules.mk
-OBJS := acpi_ksyms.o tables.o
+OBJS := tables.o
default: driver.o
driver.o: $(OBJS)
@@ -12,53 +9,3 @@ driver.o: $(OBJS)
clean:
rm -f *.o *~ core
-
-#export ACPI_CFLAGS
-#
-#ACPI_CFLAGS := -Os
-#
-#ifdef CONFIG_ACPI_DEBUG
-# ACPI_CFLAGS += -DACPI_DEBUG_OUTPUT
-#endif
-#
-#EXTRA_CFLAGS += $(ACPI_CFLAGS)
-#
-#export-objs := acpi_ksyms.o processor.o
-#
-#obj-$(CONFIG_ACPI) := acpi_ksyms.o
-#
-#
-# ACPI Boot-Time Table Parsing
-#
-#obj-$(CONFIG_ACPI_BOOT) += tables.o
-#obj-$(CONFIG_ACPI_INTERPRETER) += blacklist.o
-#
-#
-# ACPI Core Subsystem (Interpreter)
-#
-#ifeq ($(CONFIG_ACPI_INTERPRETER),y)
-# obj-y += osl.o utils.o
-# subdir-y += dispatcher events executer hardware namespace parser \
-# resources tables utilities
-# obj-y += $(foreach dir,$(subdir-y),$(dir)/$(dir).o)
-#endif
-#
-#
-# ACPI Bus and Device Drivers
-#
-#ifeq ($(CONFIG_ACPI_BUS),y)
-# obj-y += bus.o
-# obj-$(CONFIG_ACPI_AC) += ac.o
-# obj-$(CONFIG_ACPI_BATTERY) += battery.o
-# obj-$(CONFIG_ACPI_BUTTON) += button.o
-# obj-$(CONFIG_ACPI_EC) += ec.o
-# obj-$(CONFIG_ACPI_FAN) += fan.o
-# obj-$(CONFIG_ACPI_PCI) += pci_root.o pci_link.o pci_irq.o pci_bind.o
-# obj-$(CONFIG_ACPI_POWER) += power.o
-# obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
-# obj-$(CONFIG_ACPI_THERMAL) += thermal.o
-# obj-$(CONFIG_ACPI_SYSTEM) += system.o
-# obj-$(CONFIG_ACPI_NUMA) += numa.o
-# obj-$(CONFIG_ACPI_ASUS) += asus_acpi.o
-# obj-$(CONFIG_ACPI_TOSHIBA) += toshiba_acpi.o
-#endif
diff --git a/xen/drivers/acpi/acpi_ksyms.c b/xen/drivers/acpi/acpi_ksyms.c
deleted file mode 100644
index 4a86156556..0000000000
--- a/xen/drivers/acpi/acpi_ksyms.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * acpi_ksyms.c - ACPI Kernel Symbols ($Revision: 15 $)
- *
- * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
- * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <xen/config.h>
-#include <xen/acpi.h>
-
-#ifdef CONFIG_ACPI_INTERPRETER
-
-/* ACPI Debugger */
-
-#ifdef ENABLE_DEBUGGER
-
-extern int acpi_in_debugger;
-
-EXPORT_SYMBOL(acpi_in_debugger);
-EXPORT_SYMBOL(acpi_db_user_commands);
-
-#endif /* ENABLE_DEBUGGER */
-
-/* ACPI Core Subsystem */
-
-#ifdef ACPI_DEBUG_OUTPUT
-EXPORT_SYMBOL(acpi_dbg_layer);
-EXPORT_SYMBOL(acpi_dbg_level);
-EXPORT_SYMBOL(acpi_ut_debug_print_raw);
-EXPORT_SYMBOL(acpi_ut_debug_print);
-EXPORT_SYMBOL(acpi_ut_status_exit);
-EXPORT_SYMBOL(acpi_ut_value_exit);
-EXPORT_SYMBOL(acpi_ut_exit);
-EXPORT_SYMBOL(acpi_ut_trace);
-#endif /*ACPI_DEBUG_OUTPUT*/
-
-EXPORT_SYMBOL(acpi_get_handle);
-EXPORT_SYMBOL(acpi_get_parent);
-EXPORT_SYMBOL(acpi_get_type);
-EXPORT_SYMBOL(acpi_get_name);
-EXPORT_SYMBOL(acpi_get_object_info);
-EXPORT_SYMBOL(acpi_get_next_object);
-EXPORT_SYMBOL(acpi_evaluate_object);
-EXPORT_SYMBOL(acpi_get_table);
-EXPORT_SYMBOL(acpi_get_firmware_table);
-EXPORT_SYMBOL(acpi_install_notify_handler);
-EXPORT_SYMBOL(acpi_remove_notify_handler);
-EXPORT_SYMBOL(acpi_install_gpe_handler);
-EXPORT_SYMBOL(acpi_remove_gpe_handler);
-EXPORT_SYMBOL(acpi_install_address_space_handler);
-EXPORT_SYMBOL(acpi_remove_address_space_handler);
-EXPORT_SYMBOL(acpi_install_fixed_event_handler);
-EXPORT_SYMBOL(acpi_remove_fixed_event_handler);
-EXPORT_SYMBOL(acpi_acquire_global_lock);
-EXPORT_SYMBOL(acpi_release_global_lock);
-EXPORT_SYMBOL(acpi_install_gpe_block);
-EXPORT_SYMBOL(acpi_remove_gpe_block);
-EXPORT_SYMBOL(acpi_get_current_resources);
-EXPORT_SYMBOL(acpi_get_possible_resources);
-EXPORT_SYMBOL(acpi_walk_resources);
-EXPORT_SYMBOL(acpi_set_current_resources);
-EXPORT_SYMBOL(acpi_enable_event);
-EXPORT_SYMBOL(acpi_disable_event);
-EXPORT_SYMBOL(acpi_clear_event);
-EXPORT_SYMBOL(acpi_get_timer_duration);
-EXPORT_SYMBOL(acpi_get_timer);
-EXPORT_SYMBOL(acpi_get_sleep_type_data);
-EXPORT_SYMBOL(acpi_get_register);
-EXPORT_SYMBOL(acpi_set_register);
-EXPORT_SYMBOL(acpi_enter_sleep_state);
-EXPORT_SYMBOL(acpi_enter_sleep_state_s4bios);
-EXPORT_SYMBOL(acpi_get_system_info);
-EXPORT_SYMBOL(acpi_get_devices);
-
-/* ACPI OS Services Layer (acpi_osl.c) */
-
-EXPORT_SYMBOL(acpi_os_free);
-EXPORT_SYMBOL(acpi_os_printf);
-EXPORT_SYMBOL(acpi_os_sleep);
-EXPORT_SYMBOL(acpi_os_stall);
-EXPORT_SYMBOL(acpi_os_signal);
-EXPORT_SYMBOL(acpi_os_queue_for_execution);
-EXPORT_SYMBOL(acpi_os_signal_semaphore);
-EXPORT_SYMBOL(acpi_os_create_semaphore);
-EXPORT_SYMBOL(acpi_os_delete_semaphore);
-EXPORT_SYMBOL(acpi_os_wait_semaphore);
-
-EXPORT_SYMBOL(acpi_os_read_pci_configuration);
-
-/* ACPI Utilities (acpi_utils.c) */
-
-EXPORT_SYMBOL(acpi_extract_package);
-EXPORT_SYMBOL(acpi_evaluate_integer);
-EXPORT_SYMBOL(acpi_evaluate_reference);
-
-#endif /*CONFIG_ACPI_INTERPRETER*/
-
-
-/* ACPI Bus Driver (acpi_bus.c) */
-
-#ifdef CONFIG_ACPI_BUS
-
-EXPORT_SYMBOL(acpi_fadt);
-EXPORT_SYMBOL(acpi_walk_namespace);
-EXPORT_SYMBOL(acpi_root_dir);
-EXPORT_SYMBOL(acpi_bus_get_device);
-EXPORT_SYMBOL(acpi_bus_get_status);
-EXPORT_SYMBOL(acpi_bus_get_power);
-EXPORT_SYMBOL(acpi_bus_set_power);
-EXPORT_SYMBOL(acpi_bus_generate_event);
-EXPORT_SYMBOL(acpi_bus_receive_event);
-EXPORT_SYMBOL(acpi_bus_register_driver);
-EXPORT_SYMBOL(acpi_bus_unregister_driver);
-EXPORT_SYMBOL(acpi_bus_scan);
-EXPORT_SYMBOL(acpi_init);
-
-#endif /*CONFIG_ACPI_BUS*/
-
-
-/* ACPI PCI Driver (pci_irq.c) */
-
-#ifdef CONFIG_ACPI_PCI
-
-#include <xen/pci.h>
-extern int acpi_pci_irq_enable(struct pci_dev *dev);
-EXPORT_SYMBOL(acpi_pci_irq_enable);
-extern int acpi_pci_irq_lookup (int segment, int bus, int device, int pin);
-EXPORT_SYMBOL(acpi_pci_irq_lookup);
-EXPORT_SYMBOL(acpi_pci_register_driver);
-EXPORT_SYMBOL(acpi_pci_unregister_driver);
-#endif /*CONFIG_ACPI_PCI */
-
-#ifdef CONFIG_ACPI_EC
-/* ACPI EC driver (ec.c) */
-
-EXPORT_SYMBOL(ec_read);
-EXPORT_SYMBOL(ec_write);
-#endif
-
diff --git a/xen/drivers/acpi/tables.c b/xen/drivers/acpi/tables.c
index 64a05061a6..1c718efc88 100644
--- a/xen/drivers/acpi/tables.c
+++ b/xen/drivers/acpi/tables.c
@@ -58,6 +58,7 @@ static char *acpi_table_signatures[ACPI_TABLE_COUNT] = {
[ACPI_SSDT] = "SSDT",
[ACPI_SPMI] = "SPMI",
[ACPI_HPET] = "HPET",
+ [ACPI_MCFG] = "MCFG",
};
static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
@@ -100,7 +101,7 @@ acpi_table_print (
else
name = header->signature;
- printk(KERN_INFO PREFIX "%.4s (v%3.3d %6.6s %8.8s 0x%08x %.4s 0x%08x) @ 0x%p\n",
+ printk(KERN_DEBUG PREFIX "%.4s (v%3.3d %6.6s %8.8s 0x%08x %.4s 0x%08x) @ 0x%p\n",
name, header->revision, header->oem_id,
header->oem_table_id, header->oem_revision,
header->asl_compiler_id, header->asl_compiler_revision,
@@ -130,7 +131,7 @@ acpi_table_print_madt_entry (
{
struct acpi_table_ioapic *p =
(struct acpi_table_ioapic*) header;
- printk(KERN_INFO PREFIX "IOAPIC (id[0x%02x] address[0x%08x] global_irq_base[0x%x])\n",
+ printk(KERN_INFO PREFIX "IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
p->id, p->address, p->global_irq_base);
}
break;
@@ -184,8 +185,8 @@ acpi_table_print_madt_entry (
{
struct acpi_table_iosapic *p =
(struct acpi_table_iosapic*) header;
- printk(KERN_INFO PREFIX "IOSAPIC (id[0x%x] global_irq_base[0x%x] address[%p])\n",
- p->id, p->global_irq_base, (void *) (unsigned long) p->address);
+ printk(KERN_INFO PREFIX "IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
+ p->id, (void *) (unsigned long) p->address, p->global_irq_base);
}
break;
@@ -285,7 +286,7 @@ acpi_get_table_header_early (
*header = (void *) __acpi_map_table(fadt->V1_dsdt,
sizeof(struct acpi_table_header));
} else
- *header = 0;
+ *header = NULL;
if (!*header) {
printk(KERN_WARNING PREFIX "Unable to map DSDT\n");
@@ -302,13 +303,14 @@ acpi_table_parse_madt_family (
enum acpi_table_id id,
unsigned long madt_size,
int entry_id,
- acpi_madt_entry_handler handler)
+ acpi_madt_entry_handler handler,
+ unsigned int max_entries)
{
void *madt = NULL;
- acpi_table_entry_header *entry = NULL;
- unsigned long count = 0;
- unsigned long madt_end = 0;
- unsigned int i = 0;
+ acpi_table_entry_header *entry;
+ unsigned int count = 0;
+ unsigned long madt_end;
+ unsigned int i;
if (!handler)
return -EINVAL;
@@ -341,14 +343,20 @@ acpi_table_parse_madt_family (
entry = (acpi_table_entry_header *)
((unsigned long) madt + madt_size);
- while (((unsigned long) entry) < madt_end) {
- if (entry->type == entry_id) {
- count++;
- handler(entry);
- }
+ while (((unsigned long) entry) + sizeof(acpi_table_entry_header) < madt_end) {
+ if (entry->type == entry_id &&
+ (!max_entries || count++ < max_entries))
+ if (handler(entry, madt_end))
+ return -EINVAL;
+
entry = (acpi_table_entry_header *)
((unsigned long) entry + entry->length);
}
+ if (max_entries && count > max_entries) {
+ printk(KERN_WARNING PREFIX "[%s:0x%02x] ignored %i entries of "
+ "%i found\n", acpi_table_signatures[id], entry_id,
+ count - max_entries, count);
+ }
return count;
}
@@ -357,10 +365,11 @@ acpi_table_parse_madt_family (
int __init
acpi_table_parse_madt (
enum acpi_madt_entry_id id,
- acpi_madt_entry_handler handler)
+ acpi_madt_entry_handler handler,
+ unsigned int max_entries)
{
return acpi_table_parse_madt_family(ACPI_APIC, sizeof(struct acpi_table_madt),
- id, handler);
+ id, handler, max_entries);
}
@@ -378,8 +387,13 @@ acpi_table_parse (
for (i = 0; i < sdt_count; i++) {
if (sdt_entry[i].id != id)
continue;
- handler(sdt_entry[i].pa, sdt_entry[i].size);
count++;
+ if (count == 1)
+ handler(sdt_entry[i].pa, sdt_entry[i].size);
+
+ else
+ printk(KERN_WARNING PREFIX "%d duplicate %s table ignored.\n",
+ count, acpi_table_signatures[id]);
}
return count;
@@ -543,6 +557,14 @@ acpi_table_get_sdt (
return 0;
}
+/*
+ * acpi_table_init()
+ *
+ * find RSDP, find and checksum SDT/XSDT.
+ * checksum all tables, print SDT/XSDT
+ *
+ * result: sdt_entry[] is initialized
+ */
int __init
acpi_table_init (void)
@@ -565,7 +587,7 @@ acpi_table_init (void)
return -ENODEV;
}
- printk(KERN_INFO PREFIX "RSDP (v%3.3d %6.6s ) @ 0x%p\n",
+ printk(KERN_DEBUG PREFIX "RSDP (v%3.3d %6.6s ) @ 0x%p\n",
rsdp->revision, rsdp->oem_id, (void *) rsdp_phys);
if (rsdp->revision < 2)
@@ -585,4 +607,3 @@ acpi_table_init (void)
return 0;
}
-
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index e2fa7efc26..f8fe1d69e0 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -16,31 +16,37 @@
#include <xen/spinlock.h>
#include <xen/console.h>
#include <xen/serial.h>
+#include <xen/softirq.h>
#include <xen/keyhandler.h>
+#include <xen/mm.h>
+#include <xen/delay.h>
+#include <asm/current.h>
#include <asm/uaccess.h>
-#include <asm/mm.h>
+#include <asm/debugger.h>
+#include <asm/io.h>
-/* opt_console: comma-separated list of console outputs. */
-static unsigned char opt_console[30] = "com1,vga";
+/* console: comma-separated list of console outputs. */
+static char opt_console[30] = OPT_CONSOLE_STR;
string_param("console", opt_console);
-/* opt_conswitch: a character pair controlling console switching. */
+/* conswitch: a character pair controlling console switching. */
/* Char 1: CTRL+<char1> is used to switch console input between Xen and DOM0 */
/* Char 2: If this character is 'x', then do not auto-switch to DOM0 when it */
/* boots. Any other value, or omitting the char, enables auto-switch */
static unsigned char opt_conswitch[5] = "a";
string_param("conswitch", opt_conswitch);
+/* sync_console: force synchronous console output (useful for debugging). */
+static int opt_sync_console;
+boolean_param("sync_console", opt_sync_console);
+
static int xpos, ypos;
static unsigned char *video;
-#define CONSOLE_RING_SIZE 16392
-typedef struct console_ring_st
-{
- char buf[CONSOLE_RING_SIZE];
- unsigned int len;
-} console_ring_t;
-static console_ring_t console_ring;
+#define CONRING_SIZE 16384
+#define CONRING_IDX_MASK(i) ((i)&(CONRING_SIZE-1))
+static char conring[CONRING_SIZE];
+static unsigned int conringc, conringp;
static char printk_prefix[16] = "";
@@ -49,7 +55,6 @@ static int vgacon_enabled = 0;
spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
-
/*
* *******************************************************
* *************** OUTPUT TO VGA CONSOLE *****************
@@ -211,23 +216,33 @@ static void putchar_console(int c)
static void putchar_console_ring(int c)
{
- if ( console_ring.len < CONSOLE_RING_SIZE )
- console_ring.buf[console_ring.len++] = (char)c;
+ conring[CONRING_IDX_MASK(conringp++)] = c;
+ if ( (conringp - conringc) > CONRING_SIZE )
+ conringc = conringp - CONRING_SIZE;
}
-long read_console_ring(unsigned long str, unsigned int count, unsigned cmd)
+long read_console_ring(char **pstr, u32 *pcount, int clear)
{
- unsigned int len;
-
- len = (console_ring.len < count) ? console_ring.len : count;
-
- if ( copy_to_user((char *)str, console_ring.buf, len) )
- return -EFAULT;
+ char *str = *pstr;
+ u32 count = *pcount;
+ unsigned int p, q;
+ unsigned long flags;
- if ( cmd & CONSOLE_RING_CLEAR )
- console_ring.len = 0;
-
- return len;
+ /* Start of buffer may get overwritten during copy. So copy backwards. */
+ for ( p = conringp, q = count; (p > conringc) && (q > 0); p--, q-- )
+ if ( put_user(conring[CONRING_IDX_MASK(p-1)], (char *)str+q-1) )
+ return -EFAULT;
+
+ if ( clear )
+ {
+ spin_lock_irqsave(&console_lock, flags);
+ conringc = conringp;
+ spin_unlock_irqrestore(&console_lock, flags);
+ }
+
+ *pstr = str + q;
+ *pcount = count - q;
+ return 0;
}
@@ -252,12 +267,14 @@ static void switch_serial_input(void)
static char *input_str[2] = { "DOM0", "Xen" };
xen_rx = !xen_rx;
if ( SWITCH_CODE != 0 )
+ {
printk("*** Serial input -> %s "
"(type 'CTRL-%c' three times to switch input to %s).\n",
input_str[xen_rx], opt_conswitch[0], input_str[!xen_rx]);
+ }
}
-static void __serial_rx(unsigned char c, struct xen_regs *regs)
+static void __serial_rx(char c, struct cpu_user_regs *regs)
{
if ( xen_rx )
return handle_keypress(c, regs);
@@ -266,10 +283,10 @@ static void __serial_rx(unsigned char c, struct xen_regs *regs)
if ( (serial_rx_prod-serial_rx_cons) != SERIAL_RX_SIZE )
serial_rx_ring[SERIAL_RX_MASK(serial_rx_prod++)] = c;
/* Always notify the guest: prevents receive path from getting stuck. */
- send_guest_virq(dom0, VIRQ_CONSOLE);
+ send_guest_virq(dom0->vcpu[0], VIRQ_CONSOLE);
}
-static void serial_rx(unsigned char c, struct xen_regs *regs)
+static void serial_rx(char c, struct cpu_user_regs *regs)
{
static int switch_code_count = 0;
@@ -292,31 +309,52 @@ static void serial_rx(unsigned char c, struct xen_regs *regs)
__serial_rx(c, regs);
}
+long guest_console_write(char *buffer, int count)
+{
+ char kbuf[128];
+ int kcount;
+
+ while ( count > 0 )
+ {
+ while ( serial_tx_space(sercon_handle) < (SERIAL_TXBUFSZ / 2) )
+ {
+ if ( hypercall_preempt_check() )
+ break;
+ cpu_relax();
+ }
+
+ if ( hypercall_preempt_check() )
+ return hypercall3_create_continuation(
+ __HYPERVISOR_console_io, CONSOLEIO_write, count, buffer);
+
+ kcount = min_t(int, count, sizeof(kbuf)-1);
+ if ( copy_from_user(kbuf, buffer, kcount) )
+ return -EFAULT;
+ kbuf[kcount] = '\0';
+
+ serial_puts(sercon_handle, kbuf);
+
+ buffer += kcount;
+ count -= kcount;
+ }
+
+ return 0;
+}
+
long do_console_io(int cmd, int count, char *buffer)
{
- char *kbuf;
- long rc;
+ long rc;
#ifndef VERBOSE
- /* Only domain-0 may access the emergency console. */
- if ( current->id != 0 )
+ /* Only domain 0 may access the emergency console. */
+ if ( current->domain->domain_id != 0 )
return -EPERM;
#endif
switch ( cmd )
{
case CONSOLEIO_write:
- if ( count > (PAGE_SIZE-1) )
- count = PAGE_SIZE-1;
- if ( (kbuf = (char *)alloc_xenheap_page()) == NULL )
- return -ENOMEM;
- kbuf[count] = '\0';
- rc = count;
- if ( copy_from_user(kbuf, buffer, count) )
- rc = -EFAULT;
- else
- serial_puts(sercon_handle, kbuf);
- free_xenheap_page((unsigned long)kbuf);
+ rc = guest_console_write(buffer, count);
break;
case CONSOLEIO_read:
rc = 0;
@@ -350,7 +388,9 @@ long do_console_io(int cmd, int count, char *buffer)
static inline void __putstr(const char *str)
{
int c;
+
serial_puts(sercon_handle, str);
+
while ( (c = *str++) != '\0' )
{
putchar_console(c);
@@ -403,7 +443,7 @@ void set_printk_prefix(const char *prefix)
void init_console(void)
{
- unsigned char *p;
+ char *p;
/* Where should console output go? */
for ( p = opt_console; p != NULL; p = strchr(p, ',') )
@@ -411,7 +451,7 @@ void init_console(void)
if ( *p == ',' )
p++;
if ( strncmp(p, "com", 3) == 0 )
- sercon_handle = parse_serial_handle(p);
+ sercon_handle = serial_parse_handle(p);
else if ( strncmp(p, "vga", 3) == 0 )
vgacon_enabled = 1;
}
@@ -430,6 +470,12 @@ void init_console(void)
XEN_COMPILER, XEN_COMPILE_DATE);
printk(" Latest ChangeSet: %s\n\n", XEN_CHANGESET);
set_printk_prefix("(XEN) ");
+
+ if ( opt_sync_console )
+ {
+ serial_start_sync(sercon_handle);
+ printk("Console output is synchronous.\n");
+ }
}
void console_endboot(int disable_vga)
@@ -460,6 +506,16 @@ void console_force_lock(void)
spin_lock(&console_lock);
}
+void console_start_sync(void)
+{
+ serial_start_sync(sercon_handle);
+}
+
+void console_end_sync(void)
+{
+ serial_end_sync(sercon_handle);
+}
+
void console_putc(char c)
{
serial_putc(sercon_handle, c);
@@ -470,10 +526,123 @@ int console_getc(void)
return serial_getc(sercon_handle);
}
-int irq_console_getc(void)
+
+/*
+ * **************************************************************
+ * *************** Serial console ring buffer *******************
+ * **************************************************************
+ */
+
+#ifndef NDEBUG
+
+/* Send output direct to console, or buffer it? */
+int debugtrace_send_to_console;
+
+static char *debugtrace_buf; /* Debug-trace buffer */
+static unsigned int debugtrace_prd; /* Producer index */
+static unsigned int debugtrace_kilobytes = 128, debugtrace_bytes;
+static unsigned int debugtrace_used;
+static spinlock_t debugtrace_lock = SPIN_LOCK_UNLOCKED;
+integer_param("debugtrace", debugtrace_kilobytes);
+
+void debugtrace_dump(void)
+{
+ unsigned long flags;
+
+ if ( (debugtrace_bytes == 0) || !debugtrace_used )
+ return;
+
+ watchdog_disable();
+
+ spin_lock_irqsave(&debugtrace_lock, flags);
+
+ printk("debugtrace_dump() starting\n");
+
+ /* Print oldest portion of the ring. */
+ ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
+ serial_puts(sercon_handle, &debugtrace_buf[debugtrace_prd]);
+
+ /* Print youngest portion of the ring. */
+ debugtrace_buf[debugtrace_prd] = '\0';
+ serial_puts(sercon_handle, &debugtrace_buf[0]);
+
+ memset(debugtrace_buf, '\0', debugtrace_bytes);
+
+ printk("debugtrace_dump() finished\n");
+
+ spin_unlock_irqrestore(&debugtrace_lock, flags);
+
+ watchdog_enable();
+}
+
+void debugtrace_printk(const char *fmt, ...)
+{
+ static char buf[1024];
+
+ va_list args;
+ char *p;
+ unsigned long flags;
+
+ if ( debugtrace_bytes == 0 )
+ return;
+
+ debugtrace_used = 1;
+
+ spin_lock_irqsave(&debugtrace_lock, flags);
+
+ ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
+
+ va_start(args, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if ( debugtrace_send_to_console )
+ {
+ serial_puts(sercon_handle, buf);
+ }
+ else
+ {
+ for ( p = buf; *p != '\0'; p++ )
+ {
+ debugtrace_buf[debugtrace_prd++] = *p;
+ /* Always leave a nul byte at the end of the buffer. */
+ if ( debugtrace_prd == (debugtrace_bytes - 1) )
+ debugtrace_prd = 0;
+ }
+ }
+
+ spin_unlock_irqrestore(&debugtrace_lock, flags);
+}
+
+static int __init debugtrace_init(void)
{
- return irq_serial_getc(sercon_handle);
+ int order;
+ unsigned int kbytes, bytes;
+
+ /* Round size down to next power of two. */
+ while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
+ debugtrace_kilobytes = kbytes;
+
+ bytes = debugtrace_kilobytes << 10;
+ if ( bytes == 0 )
+ return 0;
+
+ order = get_order(bytes);
+ debugtrace_buf = alloc_xenheap_pages(order);
+ ASSERT(debugtrace_buf != NULL);
+
+ memset(debugtrace_buf, '\0', bytes);
+
+ debugtrace_bytes = bytes;
+
+ memset(debugtrace_buf, '\0', debugtrace_bytes);
+
+ return 0;
}
+__initcall(debugtrace_init);
+
+#endif /* !NDEBUG */
+
/*
@@ -485,35 +654,42 @@ int irq_console_getc(void)
void panic(const char *fmt, ...)
{
va_list args;
- char buf[128];
+ char buf[128], cpustr[10];
unsigned long flags;
extern void machine_restart(char *);
+ debugtrace_dump();
+
va_start(args, fmt);
(void)vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
-
+
+ debugger_trap_immediate();
+
/* Spit out multiline message in one go. */
spin_lock_irqsave(&console_lock, flags);
__putstr("\n****************************************\n");
+ __putstr("Panic on CPU");
+ sprintf(cpustr, "%d", smp_processor_id());
+ __putstr(cpustr);
+ __putstr(":\n");
__putstr(buf);
- __putstr("Aieee! CPU");
- sprintf(buf, "%d", smp_processor_id());
- __putstr(buf);
- __putstr(" is toast...\n");
__putstr("****************************************\n\n");
__putstr("Reboot in five seconds...\n");
spin_unlock_irqrestore(&console_lock, flags);
- watchdog_on = 0;
+ watchdog_disable();
mdelay(5000);
machine_restart(0);
}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-void __out_of_line_bug(int line)
-{
- printk("kernel BUG in header file at line %d\n", line);
- BUG();
- for ( ; ; ) ;
-}
diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
new file mode 100644
index 0000000000..680a6ef1bc
--- /dev/null
+++ b/xen/drivers/char/ns16550.c
@@ -0,0 +1,297 @@
+/******************************************************************************
+ * ns16550.c
+ *
+ * Driver for 16550-series UARTs. This driver is to be kept within Xen as
+ * it permits debugging of seriously-toasted machines (e.g., in situations
+ * where a device driver within a guest OS would be inaccessible).
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/sched.h>
+#include <xen/serial.h>
+#include <asm/io.h>
+
+/* Config serial port with a string <baud>,DPS,<io-base>,<irq>. */
+char opt_com1[30] = "", opt_com2[30] = "";
+string_param("com1", opt_com1);
+string_param("com2", opt_com2);
+
+static struct ns16550 {
+ int baud, data_bits, parity, stop_bits, irq;
+ unsigned long io_base; /* I/O port or memory-mapped I/O address. */
+ char *remapped_io_base; /* Remapped virtual address of mmap I/O. */
+ struct irqaction irqaction;
+} ns16550_com[2] = {
+ { 0, 0, 0, 0, 4, 0x3f8 },
+ { 0, 0, 0, 0, 3, 0x2f8 }
+};
+
+/* Register offsets */
+#define RBR 0x00 /* receive buffer */
+#define THR 0x00 /* transmit holding */
+#define IER 0x01 /* interrupt enable */
+#define IIR 0x02 /* interrupt identity */
+#define FCR 0x02 /* FIFO control */
+#define LCR 0x03 /* line control */
+#define MCR 0x04 /* Modem control */
+#define LSR 0x05 /* line status */
+#define MSR 0x06 /* Modem status */
+#define DLL 0x00 /* divisor latch (ls) (DLAB=1) */
+#define DLM 0x01 /* divisor latch (ms) (DLAB=1) */
+
+/* Interrupt Enable Register */
+#define IER_ERDAI 0x01 /* rx data recv'd */
+#define IER_ETHREI 0x02 /* tx reg. empty */
+#define IER_ELSI 0x04 /* rx line status */
+#define IER_EMSI 0x08 /* MODEM status */
+
+/* Interrupt Identification Register */
+#define IIR_NOINT 0x01 /* no interrupt pending */
+#define IIR_IMASK 0x06 /* interrupt identity: */
+#define IIR_LSI 0x06 /* - rx line status */
+#define IIR_RDAI 0x04 /* - rx data recv'd */
+#define IIR_THREI 0x02 /* - tx reg. empty */
+#define IIR_MSI 0x00 /* - MODEM status */
+
+/* FIFO Control Register */
+#define FCR_ENABLE 0x01 /* enable FIFO */
+#define FCR_CLRX 0x02 /* clear Rx FIFO */
+#define FCR_CLTX 0x04 /* clear Tx FIFO */
+#define FCR_DMA 0x10 /* enter DMA mode */
+#define FCR_TRG1 0x00 /* Rx FIFO trig lev 1 */
+#define FCR_TRG4 0x40 /* Rx FIFO trig lev 4 */
+#define FCR_TRG8 0x80 /* Rx FIFO trig lev 8 */
+#define FCR_TRG14 0xc0 /* Rx FIFO trig lev 14 */
+
+/* Line Control Register */
+#define LCR_DLAB 0x80 /* Divisor Latch Access */
+
+/* Modem Control Register */
+#define MCR_DTR 0x01 /* Data Terminal Ready */
+#define MCR_RTS 0x02 /* Request to Send */
+#define MCR_OUT2 0x08 /* OUT2: interrupt mask */
+
+/* Line Status Register */
+#define LSR_DR 0x01 /* Data ready */
+#define LSR_OE 0x02 /* Overrun */
+#define LSR_PE 0x04 /* Parity error */
+#define LSR_FE 0x08 /* Framing error */
+#define LSR_BI 0x10 /* Break */
+#define LSR_THRE 0x20 /* Xmit hold reg empty */
+#define LSR_TEMT 0x40 /* Xmitter empty */
+#define LSR_ERR 0x80 /* Error */
+
+/* These parity settings can be ORed directly into the LCR. */
+#define PARITY_NONE (0<<3)
+#define PARITY_ODD (1<<3)
+#define PARITY_EVEN (3<<3)
+#define PARITY_MARK (5<<3)
+#define PARITY_SPACE (7<<3)
+
+static char ns_read_reg(struct ns16550 *uart, int reg)
+{
+ if ( uart->remapped_io_base == NULL )
+ return inb(uart->io_base + reg);
+ return readb(uart->remapped_io_base + reg);
+}
+
+static void ns_write_reg(struct ns16550 *uart, int reg, char c)
+{
+ if ( uart->remapped_io_base == NULL )
+ return outb(c, uart->io_base + reg);
+ writeb(c, uart->remapped_io_base + reg);
+}
+
+static void ns16550_interrupt(
+ int irq, void *dev_id, struct cpu_user_regs *regs)
+{
+ struct serial_port *port = dev_id;
+ struct ns16550 *uart = port->uart;
+
+ while ( !(ns_read_reg(uart, IIR) & IIR_NOINT) )
+ {
+ serial_tx_interrupt(port, regs);
+ serial_rx_interrupt(port, regs);
+ }
+}
+
+static int ns16550_tx_empty(struct serial_port *port)
+{
+ struct ns16550 *uart = port->uart;
+ return !!(ns_read_reg(uart, LSR) & LSR_THRE);
+}
+
+static void ns16550_putc(struct serial_port *port, char c)
+{
+ struct ns16550 *uart = port->uart;
+ ns_write_reg(uart, THR, c);
+}
+
+static int ns16550_getc(struct serial_port *port, char *pc)
+{
+ struct ns16550 *uart = port->uart;
+
+ if ( !(ns_read_reg(uart, LSR) & LSR_DR) )
+ return 0;
+
+ *pc = ns_read_reg(uart, RBR);
+ return 1;
+}
+
+static void ns16550_init_preirq(struct serial_port *port)
+{
+ struct ns16550 *uart = port->uart;
+ unsigned char lcr;
+
+ /* I/O ports are distinguished by their size (16 bits). */
+ if ( uart->io_base >= 0x10000 )
+ uart->remapped_io_base = (char *)ioremap(uart->io_base, 8);
+
+ lcr = (uart->data_bits - 5) | ((uart->stop_bits - 1) << 2) | uart->parity;
+
+ /* No interrupts. */
+ ns_write_reg(uart, IER, 0);
+
+ /* Line control and baud-rate generator. */
+ ns_write_reg(uart, LCR, lcr | LCR_DLAB);
+ ns_write_reg(uart, DLL, 115200/uart->baud); /* baud lo */
+ ns_write_reg(uart, DLM, 0); /* baud hi */
+ ns_write_reg(uart, LCR, lcr); /* parity, data, stop */
+
+ /* No flow ctrl: DTR and RTS are both wedged high to keep remote happy. */
+ ns_write_reg(uart, MCR, MCR_DTR | MCR_RTS);
+
+ /* Enable and clear the FIFOs. Set a large trigger threshold. */
+ ns_write_reg(uart, FCR, FCR_ENABLE | FCR_CLRX | FCR_CLTX | FCR_TRG14);
+
+ /* Check this really is a 16550+. Otherwise we have no FIFOs. */
+ if ( (ns_read_reg(uart, IIR) & 0xc0) == 0xc0 )
+ port->tx_fifo_size = 16;
+}
+
+static void ns16550_init_postirq(struct serial_port *port)
+{
+ struct ns16550 *uart = port->uart;
+ int rc;
+
+ serial_async_transmit(port);
+
+ uart->irqaction.handler = ns16550_interrupt;
+ uart->irqaction.name = "ns16550";
+ uart->irqaction.dev_id = port;
+ if ( (rc = setup_irq(uart->irq, &uart->irqaction)) != 0 )
+ printk("ERROR: Failed to allocate na16550 IRQ %d\n", uart->irq);
+
+ /* Master interrupt enable; also keep DTR/RTS asserted. */
+ ns_write_reg(uart, MCR, MCR_OUT2 | MCR_DTR | MCR_RTS);
+
+ /* Enable receive and transmit interrupts. */
+ ns_write_reg(uart, IER, IER_ERDAI | IER_ETHREI);
+}
+
+#ifdef CONFIG_X86
+#include <asm/physdev.h>
+static void ns16550_endboot(struct serial_port *port)
+{
+ struct ns16550 *uart = port->uart;
+ physdev_modify_ioport_access_range(dom0, 0, uart->io_base, 8);
+}
+#else
+#define ns16550_endboot NULL
+#endif
+
+static struct uart_driver ns16550_driver = {
+ .init_preirq = ns16550_init_preirq,
+ .init_postirq = ns16550_init_postirq,
+ .endboot = ns16550_endboot,
+ .tx_empty = ns16550_tx_empty,
+ .putc = ns16550_putc,
+ .getc = ns16550_getc
+};
+
+#define PARSE_ERR(_f, _a...) \
+ do { \
+ printk( "ERROR: " _f "\n" , ## _a ); \
+ return; \
+ } while ( 0 )
+
+static void ns16550_parse_port_config(struct ns16550 *uart, char *conf)
+{
+ if ( *conf == '\0' )
+ return;
+
+ uart->baud = simple_strtol(conf, &conf, 10);
+ if ( (uart->baud < 1200) || (uart->baud > 115200) )
+ PARSE_ERR("Baud rate %d outside supported range.", uart->baud);
+
+ if ( *conf != ',' )
+ PARSE_ERR("Missing data/parity/stop specifiers.");
+
+ conf++;
+
+ uart->data_bits = simple_strtol(conf, &conf, 10);
+ if ( (uart->data_bits < 5) || (uart->data_bits > 8) )
+ PARSE_ERR("%d data bits are unsupported.", uart->data_bits);
+
+ switch ( *conf )
+ {
+ case 'n':
+ uart->parity = PARITY_NONE;
+ break;
+ case 'o':
+ uart->parity = PARITY_ODD;
+ break;
+ case 'e':
+ uart->parity = PARITY_EVEN;
+ break;
+ case 'm':
+ uart->parity = PARITY_MARK;
+ break;
+ case 's':
+ uart->parity = PARITY_SPACE;
+ break;
+
+ default:
+ PARSE_ERR("Invalid parity specifier '%c'.", *conf);
+ }
+
+ conf++;
+
+ uart->stop_bits = simple_strtol(conf, &conf, 10);
+ if ( (uart->stop_bits < 1) || (uart->stop_bits > 2) )
+ PARSE_ERR("%d stop bits are unsupported.", uart->stop_bits);
+
+ if ( *conf == ',' )
+ {
+ conf++;
+ uart->io_base = simple_strtol(conf, &conf, 0);
+
+ if ( *conf == ',' )
+ {
+ conf++;
+ uart->irq = simple_strtol(conf, &conf, 10);
+ }
+ }
+
+ serial_register_uart(uart - ns16550_com, &ns16550_driver, uart);
+}
+
+void ns16550_init(void)
+{
+ ns16550_parse_port_config(&ns16550_com[0], opt_com1);
+ ns16550_parse_port_config(&ns16550_com[1], opt_com2);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/char/serial.c b/xen/drivers/char/serial.c
index 2c6fc895b4..bec789e084 100644
--- a/xen/drivers/char/serial.c
+++ b/xen/drivers/char/serial.c
@@ -1,314 +1,198 @@
/******************************************************************************
* serial.c
*
- * Driver for 16550-series UARTs. This driver is to be kept within Xen as
- * it permits debugging of seriously-toasted machines (e.g., in situations
- * where a device driver within a guest OS would be inaccessible).
+ * Framework for serial device drivers.
*
- * Copyright (c) 2003-2004, K A Fraser
+ * Copyright (c) 2003-2005, K A Fraser
*/
#include <xen/config.h>
#include <xen/init.h>
#include <xen/irq.h>
#include <xen/keyhandler.h>
-#include <asm/pdb.h>
#include <xen/reboot.h>
#include <xen/sched.h>
#include <xen/serial.h>
-#include <asm/io.h>
-
-/* opt_com[12]: Config serial port with a string <baud>,DPS,<io-base>,<irq>. */
-static unsigned char opt_com1[30] = "", opt_com2[30] = "";
-string_param("com1", opt_com1);
-string_param("com2", opt_com2);
-
-/* Register offsets */
-#define RBR 0x00 /* receive buffer */
-#define THR 0x00 /* transmit holding */
-#define IER 0x01 /* interrupt enable */
-#define IIR 0x02 /* interrupt identity */
-#define FCR 0x02 /* FIFO control */
-#define LCR 0x03 /* line control */
-#define MCR 0x04 /* Modem control */
-#define LSR 0x05 /* line status */
-#define MSR 0x06 /* Modem status */
-#define DLL 0x00 /* divisor latch (ls) ( DLAB=1) */
-#define DLM 0x01 /* divisor latch (ms) ( DLAB=1) */
-
-/* Interrupt Enable Register */
-#define IER_ERDAI 0x01 /* rx data recv'd */
-#define IER_ETHREI 0x02 /* tx reg. empty */
-#define IER_ELSI 0x04 /* rx line status */
-#define IER_EMSI 0x08 /* MODEM status */
-
-/* FIFO control register */
-#define FCR_ENABLE 0x01 /* enable FIFO */
-#define FCR_CLRX 0x02 /* clear Rx FIFO */
-#define FCR_CLTX 0x04 /* clear Tx FIFO */
-#define FCR_DMA 0x10 /* enter DMA mode */
-#define FCR_TRG1 0x00 /* Rx FIFO trig lev 1 */
-#define FCR_TRG4 0x40 /* Rx FIFO trig lev 4 */
-#define FCR_TRG8 0x80 /* Rx FIFO trig lev 8 */
-#define FCR_TRG14 0xc0 /* Rx FIFO trig lev 14 */
-
-/* Line control register */
-#define LCR_DLAB 0x80 /* Divisor Latch Access */
-
-/* Modem Control Register */
-#define MCR_DTR 0x01 /* Data Terminal Ready */
-#define MCR_RTS 0x02 /* Request to Send */
-#define MCR_OUT2 0x08 /* OUT2: interrupt mask */
-
-/* Line Status Register */
-#define LSR_DR 0x01 /* Data ready */
-#define LSR_OE 0x02 /* Overrun */
-#define LSR_PE 0x04 /* Parity error */
-#define LSR_FE 0x08 /* Framing error */
-#define LSR_BI 0x10 /* Break */
-#define LSR_THRE 0x20 /* Xmit hold reg empty */
-#define LSR_TEMT 0x40 /* Xmitter empty */
-#define LSR_ERR 0x80 /* Error */
-
-/* These parity settings can be ORed directly into the LCR. */
-#define PARITY_NONE (0<<3)
-#define PARITY_ODD (1<<3)
-#define PARITY_EVEN (3<<3)
-#define PARITY_MARK (5<<3)
-#define PARITY_SPACE (7<<3)
-
-#define RXBUFSZ 32
-#define MASK_RXBUF_IDX(_i) ((_i)&(RXBUFSZ-1))
-typedef struct {
- int baud, data_bits, parity, stop_bits, io_base, irq;
- serial_rx_fn rx_lo, rx_hi, rx;
- spinlock_t lock;
- unsigned char rxbuf[RXBUFSZ];
- unsigned int rxbufp, rxbufc;
- struct irqaction irqaction;
-} uart_t;
-
-static uart_t com[2] = {
- { 0, 0, 0, 0, 0x3f8, 4,
- NULL, NULL, NULL,
- SPIN_LOCK_UNLOCKED },
- { 0, 0, 0, 0, 0x2f8, 3,
- NULL, NULL, NULL,
- SPIN_LOCK_UNLOCKED }
-};
-
-#define UART_ENABLED(_u) ((_u)->baud != 0)
-#define DISABLE_UART(_u) ((_u)->baud = 0)
-#ifdef CONFIG_X86
-static inline int arch_serial_putc(uart_t *uart, unsigned char c)
-{
- int space;
- if ( (space = (inb(uart->io_base + LSR) & LSR_THRE)) )
- outb(c, uart->io_base + THR);
- return space;
-}
-#endif
-
-
-/***********************
- * PRIVATE FUNCTIONS
- */
+static struct serial_port com[2] = {
+ { .lock = SPIN_LOCK_UNLOCKED },
+ { .lock = SPIN_LOCK_UNLOCKED }
+};
-static void uart_rx(uart_t *uart, struct xen_regs *regs)
+void serial_rx_interrupt(struct serial_port *port, struct cpu_user_regs *regs)
{
- unsigned char c;
+ char c;
+ serial_rx_fn fn = NULL;
+ unsigned long flags;
- if ( !UART_ENABLED(uart) )
- return;
+ spin_lock_irqsave(&port->lock, flags);
- /*
- * No need for the uart spinlock here. Only the uart's own interrupt
- * handler will read from the RBR and the handler isn't reentrant.
- * Calls to serial_getc() will disable this handler before proceeding.
- */
- while ( inb(uart->io_base + LSR) & LSR_DR )
+ if ( port->driver->getc(port, &c) )
{
- c = inb(uart->io_base + RBR);
- if ( uart->rx != NULL )
- uart->rx(c, regs);
- else if ( (c & 0x80) && (uart->rx_hi != NULL) )
- uart->rx_hi(c&0x7f, regs);
- else if ( !(c & 0x80) && (uart->rx_lo != NULL) )
- uart->rx_lo(c&0x7f, regs);
- else if ( (uart->rxbufp - uart->rxbufc) != RXBUFSZ )
- uart->rxbuf[MASK_RXBUF_IDX(uart->rxbufp++)] = c;
+ if ( port->rx != NULL )
+ fn = port->rx;
+ else if ( (c & 0x80) && (port->rx_hi != NULL) )
+ fn = port->rx_hi;
+ else if ( !(c & 0x80) && (port->rx_lo != NULL) )
+ fn = port->rx_lo;
+ else if ( (port->rxbufp - port->rxbufc) != SERIAL_RXBUFSZ )
+ port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufp++)] = c;
}
-}
-static void serial_interrupt(int irq, void *dev_id, struct xen_regs *regs)
-{
- uart_rx((uart_t *)dev_id, regs);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ if ( fn != NULL )
+ (*fn)(c & 0x7f, regs);
}
-static inline void __serial_putc(uart_t *uart, int handle, unsigned char c)
+void serial_tx_interrupt(struct serial_port *port, struct cpu_user_regs *regs)
{
+ int i;
unsigned long flags;
- int space;
- if ( (c == '\n') && (handle & SERHND_COOKED) )
- __serial_putc(uart, handle, '\r');
+ spin_lock_irqsave(&port->lock, flags);
- if ( handle & SERHND_HI )
- c |= 0x80;
- else if ( handle & SERHND_LO )
- c &= 0x7f;
-
- do {
- spin_lock_irqsave(&uart->lock, flags);
- space = arch_serial_putc(uart, c);
- spin_unlock_irqrestore(&uart->lock, flags);
+ if ( port->driver->tx_empty(port) )
+ {
+ for ( i = 0; i < port->tx_fifo_size; i++ )
+ {
+ if ( port->txbufc == port->txbufp )
+ break;
+ port->driver->putc(
+ port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+ }
}
- while ( !space );
+
+ spin_unlock_irqrestore(&port->lock, flags);
}
-#define PARSE_ERR(_f, _a...) \
- do { \
- printk( "ERROR: " _f "\n" , ## _a ); \
- DISABLE_UART(uart); \
- return; \
-} while ( 0 )
-
-static void parse_port_config(char *conf, uart_t *uart)
+static void __serial_putc(struct serial_port *port, char c)
{
- if ( *conf == '\0' )
- return;
-
- uart->baud = simple_strtol(conf, &conf, 10);
- if ( (uart->baud < 1200) || (uart->baud > 115200) )
- PARSE_ERR("Baud rate %d outside supported range.", uart->baud);
-
- if ( *conf != ',' )
- PARSE_ERR("Missing data/parity/stop specifiers.");
-
- conf++;
-
- uart->data_bits = simple_strtol(conf, &conf, 10);
- if ( (uart->data_bits < 5) || (uart->data_bits > 8) )
- PARSE_ERR("%d data bits are unsupported.", uart->data_bits);
+ int i;
- switch ( *conf )
+ if ( (port->txbuf != NULL) && !port->sync )
{
- case 'n':
- uart->parity = PARITY_NONE;
- break;
- case 'o':
- uart->parity = PARITY_ODD;
- break;
- case 'e':
- uart->parity = PARITY_EVEN;
- break;
- case 'm':
- uart->parity = PARITY_MARK;
- break;
- case 's':
- uart->parity = PARITY_SPACE;
- break;
-
- default:
- PARSE_ERR("Invalid parity specifier '%c'.", *conf);
+ /* Interrupt-driven (asynchronous) transmitter. */
+ if ( (port->txbufp - port->txbufc) == SERIAL_TXBUFSZ )
+ {
+ /* Buffer is full: we spin, but could alternatively drop chars. */
+ while ( !port->driver->tx_empty(port) )
+ cpu_relax();
+ for ( i = 0; i < port->tx_fifo_size; i++ )
+ port->driver->putc(
+ port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+ port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
+ }
+ else if ( ((port->txbufp - port->txbufc) == 0) &&
+ port->driver->tx_empty(port) )
+ {
+ /* Buffer and UART FIFO are both empty. */
+ port->driver->putc(port, c);
+ }
+ else
+ {
+ /* Normal case: buffer the character. */
+ port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
+ }
}
-
- conf++;
-
- uart->stop_bits = simple_strtol(conf, &conf, 10);
- if ( (uart->stop_bits < 1) || (uart->stop_bits > 2) )
- PARSE_ERR("%d stop bits are unsupported.", uart->stop_bits);
-
- if ( *conf == ',' )
+ else if ( port->driver->tx_empty )
{
- conf++;
-
- uart->io_base = simple_strtol(conf, &conf, 0);
- if ( (uart->io_base <= 0x0000) || (uart->io_base > 0xfff0) )
- PARSE_ERR("I/O port base 0x%x is outside the supported range.",
- uart->io_base);
-
- if ( *conf != ',' )
- PARSE_ERR("Missing IRQ specifier.");
-
- conf++;
-
- uart->irq = simple_strtol(conf, &conf, 10);
- if ( (uart->irq <= 0) || (uart->irq >= 32) )
- PARSE_ERR("IRQ %d is outside the supported range.", uart->irq);
+ /* Synchronous finite-capacity transmitter. */
+ while ( !port->driver->tx_empty(port) )
+ cpu_relax();
+ port->driver->putc(port, c);
+ }
+ else
+ {
+ /* Simple synchronous transmitter. */
+ port->driver->putc(port, c);
}
}
-static void uart_config_stage1(uart_t *uart)
+void serial_putc(int handle, char c)
{
- unsigned char lcr;
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ unsigned long flags;
- if ( !UART_ENABLED(uart) )
+ if ( (handle == -1) || !port->driver || !port->driver->putc )
return;
- lcr = (uart->data_bits - 5) | ((uart->stop_bits - 1) << 2) | uart->parity;
+ spin_lock_irqsave(&port->lock, flags);
- /* No interrupts. */
- outb(0, uart->io_base + IER);
+ if ( (c == '\n') && (handle & SERHND_COOKED) )
+ __serial_putc(port, '\r');
- /* Line control and baud-rate generator. */
- outb(lcr | LCR_DLAB, uart->io_base + LCR);
- outb(115200/uart->baud, uart->io_base + DLL); /* baud lo */
- outb(0, uart->io_base + DLM); /* baud hi */
- outb(lcr, uart->io_base + LCR); /* parity, data, stop */
+ if ( handle & SERHND_HI )
+ c |= 0x80;
+ else if ( handle & SERHND_LO )
+ c &= 0x7f;
- /* No flow ctrl: DTR and RTS are both wedged high to keep remote happy. */
- outb(MCR_DTR | MCR_RTS, uart->io_base + MCR);
+ __serial_putc(port, c);
- /* Enable and clear the FIFOs. Set a large trigger threshold. */
- outb(FCR_ENABLE | FCR_CLRX | FCR_CLTX | FCR_TRG14, uart->io_base + FCR);
+ spin_unlock_irqrestore(&port->lock, flags);
}
-static void uart_config_stage2(uart_t *uart)
+void serial_puts(int handle, const char *s)
{
- int rc;
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ unsigned long flags;
+ char c;
- if ( !UART_ENABLED(uart) )
+ if ( (handle == -1) || !port->driver || !port->driver->putc )
return;
- uart->irqaction.handler = serial_interrupt;
- uart->irqaction.name = "serial";
- uart->irqaction.dev_id = uart;
- if ( (rc = setup_irq(uart->irq, &uart->irqaction)) != 0 )
- printk("ERROR: Failed to allocate serial IRQ %d\n", uart->irq);
+ spin_lock_irqsave(&port->lock, flags);
- /* For sanity, clear the receive FIFO. */
- outb(FCR_ENABLE | FCR_CLRX | FCR_TRG14, uart->io_base + FCR);
+ while ( (c = *s++) != '\0' )
+ {
+ if ( (c == '\n') && (handle & SERHND_COOKED) )
+ __serial_putc(port, '\r');
- /* Master interrupt enable; also keep DTR/RTS asserted. */
- outb(MCR_OUT2 | MCR_DTR | MCR_RTS, uart->io_base + MCR);
+ if ( handle & SERHND_HI )
+ c |= 0x80;
+ else if ( handle & SERHND_LO )
+ c &= 0x7f;
- /* Enable receive interrupts. */
- outb(IER_ERDAI, uart->io_base + IER);
+ __serial_putc(port, c);
+ }
+
+ spin_unlock_irqrestore(&port->lock, flags);
}
+char serial_getc(int handle)
+{
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ char c;
+ unsigned long flags;
-/***********************
- * PUBLIC FUNCTIONS
- */
+ if ( (handle == -1) || !port->driver || !port->driver->getc )
+ return '\0';
-void serial_init_stage1(void)
-{
- parse_port_config(opt_com1, &com[0]);
- parse_port_config(opt_com2, &com[1]);
+ do {
+ for ( ; ; )
+ {
+ spin_lock_irqsave(&port->lock, flags);
+
+ if ( port->rxbufp != port->rxbufc )
+ {
+ c = port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufc++)];
+ break;
+ }
+
+ if ( port->driver->getc(port, &c) )
+ break;
- uart_config_stage1(&com[0]);
- uart_config_stage1(&com[1]);
-}
+ spin_unlock_irqrestore(&port->lock, flags);
-void serial_init_stage2(void)
-{
- uart_config_stage2(&com[0]);
- uart_config_stage2(&com[1]);
+ cpu_relax();
+ }
+ } while ( ((handle & SERHND_LO) && (c & 0x80)) ||
+ ((handle & SERHND_HI) && !(c & 0x80)) );
+
+ return c & 0x7f;
}
-int parse_serial_handle(char *conf)
+int serial_parse_handle(char *conf)
{
int handle;
@@ -331,12 +215,6 @@ int parse_serial_handle(char *conf)
goto fail;
}
- if ( !UART_ENABLED(&com[handle]) )
- {
- printk("ERROR: cannot use unconfigured serial port COM%d\n", handle+1);
- return -1;
- }
-
if ( conf[4] == 'H' )
handle |= SERHND_HI;
else if ( conf[4] == 'L' )
@@ -353,134 +231,147 @@ int parse_serial_handle(char *conf)
void serial_set_rx_handler(int handle, serial_rx_fn fn)
{
- uart_t *uart = &com[handle & SERHND_IDX];
+ struct serial_port *port = &com[handle & SERHND_IDX];
unsigned long flags;
if ( handle == -1 )
return;
- spin_lock_irqsave(&uart->lock, flags);
+ spin_lock_irqsave(&port->lock, flags);
- if ( uart->rx != NULL )
+ if ( port->rx != NULL )
goto fail;
if ( handle & SERHND_LO )
{
- if ( uart->rx_lo != NULL )
+ if ( port->rx_lo != NULL )
goto fail;
- uart->rx_lo = fn;
+ port->rx_lo = fn;
}
else if ( handle & SERHND_HI )
{
- if ( uart->rx_hi != NULL )
+ if ( port->rx_hi != NULL )
goto fail;
- uart->rx_hi = fn;
+ port->rx_hi = fn;
}
else
{
- if ( (uart->rx_hi != NULL) || (uart->rx_lo != NULL) )
+ if ( (port->rx_hi != NULL) || (port->rx_lo != NULL) )
goto fail;
- uart->rx = fn;
+ port->rx = fn;
}
- spin_unlock_irqrestore(&uart->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
return;
fail:
- spin_unlock_irqrestore(&uart->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
printk("ERROR: Conflicting receive handlers for COM%d\n",
handle & SERHND_IDX);
}
-void serial_putc(int handle, unsigned char c)
+void serial_force_unlock(int handle)
{
- uart_t *uart = &com[handle & SERHND_IDX];
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ if ( handle != -1 )
+ port->lock = SPIN_LOCK_UNLOCKED;
+ serial_start_sync(handle);
+}
+
+void serial_start_sync(int handle)
+{
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ unsigned long flags;
if ( handle == -1 )
return;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ if ( port->sync++ == 0 )
+ {
+ while ( (port->txbufp - port->txbufc) != 0 )
+ {
+ while ( !port->driver->tx_empty(port) )
+ cpu_relax();
+ port->driver->putc(
+ port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+ }
+ }
- __serial_putc(uart, handle, c);
+ spin_unlock_irqrestore(&port->lock, flags);
}
-void serial_puts(int handle, const unsigned char *s)
+void serial_end_sync(int handle)
{
- uart_t *uart = &com[handle & SERHND_IDX];
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ unsigned long flags;
if ( handle == -1 )
return;
+
+ spin_lock_irqsave(&port->lock, flags);
- while ( *s != '\0' )
- __serial_putc(uart, handle, *s++);
+ port->sync--;
+
+ spin_unlock_irqrestore(&port->lock, flags);
}
-/* Returns TRUE if given character (*pc) matches the serial handle. */
-static int byte_matches(int handle, unsigned char *pc)
+int serial_tx_space(int handle)
{
- if ( !(handle & SERHND_HI) )
- {
- if ( !(handle & SERHND_LO) || !(*pc & 0x80) )
- return 1;
- }
- else if ( *pc & 0x80 )
- {
- *pc &= 0x7f;
- return 1;
- }
- return 0;
+ struct serial_port *port = &com[handle & SERHND_IDX];
+ if ( handle == -1 )
+ return SERIAL_TXBUFSZ;
+ return SERIAL_TXBUFSZ - (port->txbufp - port->txbufc);
}
-unsigned char irq_serial_getc(int handle)
+void serial_init_preirq(void)
{
- uart_t *uart = &com[handle & SERHND_IDX];
- unsigned char c;
-
- while ( uart->rxbufp != uart->rxbufc )
- {
- c = uart->rxbuf[MASK_RXBUF_IDX(uart->rxbufc++)];
- if ( byte_matches(handle, &c) )
- goto out;
- }
-
- /* We now wait for the UART to receive a suitable character. */
- do {
- while ( (inb(uart->io_base + LSR) & LSR_DR) == 0 )
- barrier();
- c = inb(uart->io_base + RBR);
- }
- while ( !byte_matches(handle, &c) );
-
- out:
- return c;
+ int i;
+ for ( i = 0; i < ARRAY_SIZE(com); i++ )
+ if ( com[i].driver && com[i].driver->init_preirq )
+ com[i].driver->init_preirq(&com[i]);
}
-unsigned char serial_getc(int handle)
+void serial_init_postirq(void)
{
- uart_t *uart = &com[handle & SERHND_IDX];
- unsigned char c;
- unsigned long flags;
+ int i;
+ for ( i = 0; i < ARRAY_SIZE(com); i++ )
+ if ( com[i].driver && com[i].driver->init_postirq )
+ com[i].driver->init_postirq(&com[i]);
+}
- spin_lock_irqsave(&uart->lock, flags);
+void serial_endboot(void)
+{
+ int i;
+ for ( i = 0; i < ARRAY_SIZE(com); i++ )
+ if ( com[i].driver && com[i].driver->endboot )
+ com[i].driver->endboot(&com[i]);
+}
- while ( uart->rxbufp != uart->rxbufc )
- {
- c = uart->rxbuf[MASK_RXBUF_IDX(uart->rxbufc++)];
- if ( byte_matches(handle, &c) )
- goto out;
- }
-
- disable_irq(uart->irq);
+void serial_register_uart(int idx, struct uart_driver *driver, void *uart)
+{
+ /* Store UART-specific info. */
+ com[idx].driver = driver;
+ com[idx].uart = uart;
- c = irq_serial_getc(handle);
-
- enable_irq(uart->irq);
- out:
- spin_unlock_irqrestore(&uart->lock, flags);
- return c;
+ /* Default is no transmit FIFO. */
+ com[idx].tx_fifo_size = 1;
}
-void serial_force_unlock(int handle)
+void serial_async_transmit(struct serial_port *port)
{
- uart_t *uart = &com[handle & SERHND_IDX];
- if ( handle != -1 )
- uart->lock = SPIN_LOCK_UNLOCKED;
+ BUG_ON(!port->driver->tx_empty);
+ if ( !port->txbuf )
+ port->txbuf = alloc_xenheap_pages(get_order(SERIAL_TXBUFSZ));
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/pci/Makefile b/xen/drivers/pci/Makefile
deleted file mode 100644
index f2580105df..0000000000
--- a/xen/drivers/pci/Makefile
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Makefile for the PCI bus specific drivers.
-#
-
-include $(BASEDIR)/Rules.mk
-
-OBJS := pci.o quirks.o names.o setup-res.o
-
-#obj-$(CONFIG_PCI) += pci.o quirks.o compat.o names.o
-#obj-$(CONFIG_PROC_FS) += proc.o
-
-#ifndef CONFIG_SPARC64
-#obj-$(CONFIG_PCI) += setup-res.o
-#endif
-
-#
-# Some architectures use the generic PCI setup functions
-#
-#obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o
-#obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o
-#obj-$(CONFIG_PARISC) += setup-bus.o
-#obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o
-#obj-$(CONFIG_ALL_PPC) += setup-bus.o
-#obj-$(CONFIG_DDB5476) += setup-bus.o
-#obj-$(CONFIG_SGI_IP27) += setup-irq.o
-
-#ifndef CONFIG_X86
-#obj-y += syscall.o
-#endif
-
-default: driver.o
-driver.o: $(OBJS)
- $(LD) $(LDFLAGS) -r -o driver.o $(OBJS)
-
-clean:
- rm -f *.o *~ core gen-devlist classlist.h devlist.h
-
-names.o: names.c devlist.h classlist.h
-
-devlist.h classlist.h: pci.ids gen-devlist
- ./gen-devlist <pci.ids
-
-gen-devlist: gen-devlist.c
- $(HOSTCC) $(HOSTCFLAGS) -o gen-devlist gen-devlist.c
-
diff --git a/xen/drivers/pci/gen-devlist.c b/xen/drivers/pci/gen-devlist.c
deleted file mode 100644
index c0c242010e..0000000000
--- a/xen/drivers/pci/gen-devlist.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Generate devlist.h and classlist.h from the PCI ID file.
- *
- * (c) 1999--2002 Martin Mares <mj@ucw.cz>
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#define MAX_NAME_SIZE 79
-
-static void
-pq(FILE *f, const char *c)
-{
- while (*c) {
- if (*c == '"')
- fprintf(f, "\\\"");
- else {
- fputc(*c, f);
- if (*c == '?' && c[1] == '?') {
- /* Avoid trigraphs */
- fprintf(f, "\" \"");
- }
- }
- c++;
- }
-}
-
-int
-main(void)
-{
- char line[1024], *c, *bra, vend[8];
- int vendors = 0;
- int mode = 0;
- int lino = 0;
- int vendor_len = 0;
- FILE *devf, *clsf;
-
- devf = fopen("devlist.h", "w");
- clsf = fopen("classlist.h", "w");
- if (!devf || !clsf) {
- fprintf(stderr, "Cannot create output file!\n");
- return 1;
- }
-
- while (fgets(line, sizeof(line)-1, stdin)) {
- lino++;
- if ((c = strchr(line, '\n')))
- *c = 0;
- if (!line[0] || line[0] == '#')
- continue;
- if (line[1] == ' ') {
- if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') {
- vend[0] = line[2];
- vend[1] = line[3];
- vend[2] = 0;
- mode = 2;
- } else goto err;
- }
- else if (line[0] == '\t') {
- if (line[1] == '\t')
- continue;
- switch (mode) {
- case 1:
- if (strlen(line) > 5 && line[5] == ' ') {
- c = line + 5;
- while (*c == ' ')
- *c++ = 0;
- if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
- /* Too long, try cutting off long description */
- bra = strchr(c, '[');
- if (bra && bra > c && bra[-1] == ' ')
- bra[-1] = 0;
- if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
- fprintf(stderr, "Line %d: Device name too long\n", lino);
- fprintf(stderr, "%s\n", c);
- return 1;
- }
- }
- fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1);
- pq(devf, c);
- fputs("\")\n", devf);
- } else goto err;
- break;
- case 2:
- if (strlen(line) > 3 && line[3] == ' ') {
- c = line + 3;
- while (*c == ' ')
- *c++ = 0;
- fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c);
- } else goto err;
- break;
- default:
- goto err;
- }
- } else if (strlen(line) > 4 && line[4] == ' ') {
- c = line + 4;
- while (*c == ' ')
- *c++ = 0;
- if (vendors)
- fputs("ENDVENDOR()\n\n", devf);
- vendors++;
- strcpy(vend, line);
- vendor_len = strlen(c);
- if (vendor_len + 24 > MAX_NAME_SIZE) {
- fprintf(stderr, "Line %d: Vendor name too long\n", lino);
- return 1;
- }
- fprintf(devf, "VENDOR(%s,\"", vend);
- pq(devf, c);
- fputs("\")\n", devf);
- mode = 1;
- } else {
- err:
- fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line);
- return 1;
- }
- }
- fputs("ENDVENDOR()\n\
-\n\
-#undef VENDOR\n\
-#undef DEVICE\n\
-#undef ENDVENDOR\n", devf);
- fputs("\n#undef CLASS\n", clsf);
-
- fclose(devf);
- fclose(clsf);
-
- return 0;
-}
diff --git a/xen/drivers/pci/names.c b/xen/drivers/pci/names.c
deleted file mode 100644
index efb95b8d0c..0000000000
--- a/xen/drivers/pci/names.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * PCI Class and Device Name Tables
- *
- * Copyright 1993--1999 Drew Eckhardt, Frederic Potter,
- * David Mosberger-Tang, Martin Mares
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-/*#include <xen/kernel.h>*/
-#include <xen/pci.h>
-#include <xen/init.h>
-
-#ifdef CONFIG_PCI_NAMES
-
-struct pci_device_info {
- unsigned short device;
- unsigned short seen;
- const char *name;
-};
-
-struct pci_vendor_info {
- unsigned short vendor;
- unsigned short nr;
- const char *name;
- struct pci_device_info *devices;
-};
-
-/*
- * This is ridiculous, but we want the strings in
- * the .init section so that they don't take up
- * real memory.. Parse the same file multiple times
- * to get all the info.
- */
-#define VENDOR( vendor, name ) static char __vendorstr_##vendor[] __devinitdata = name;
-#define ENDVENDOR()
-#define DEVICE( vendor, device, name ) static char __devicestr_##vendor##device[] __devinitdata = name;
-#include "devlist.h"
-
-
-#define VENDOR( vendor, name ) static struct pci_device_info __devices_##vendor[] __devinitdata = {
-#define ENDVENDOR() };
-#define DEVICE( vendor, device, name ) { 0x##device, 0, __devicestr_##vendor##device },
-#include "devlist.h"
-
-static struct pci_vendor_info __devinitdata pci_vendor_list[] = {
-#define VENDOR( vendor, name ) { 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor },
-#define ENDVENDOR()
-#define DEVICE( vendor, device, name )
-#include "devlist.h"
-};
-
-#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info))
-
-void __devinit pci_name_device(struct pci_dev *dev)
-{
- const struct pci_vendor_info *vendor_p = pci_vendor_list;
- int i = VENDORS;
- char *name = dev->name;
-
- do {
- if (vendor_p->vendor == dev->vendor)
- goto match_vendor;
- vendor_p++;
- } while (--i);
-
- /* Couldn't find either the vendor nor the device */
- sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device);
- return;
-
- match_vendor: {
- struct pci_device_info *device_p = vendor_p->devices;
- int i = vendor_p->nr;
-
- while (i > 0) {
- if (device_p->device == dev->device)
- goto match_device;
- device_p++;
- i--;
- }
-
- /* Ok, found the vendor, but unknown device */
- sprintf(name, "PCI device %04x:%04x (%s)", dev->vendor, dev->device, vendor_p->name);
- return;
-
- /* Full match */
- match_device: {
- char *n = name + sprintf(name, "%s %s", vendor_p->name, device_p->name);
- int nr = device_p->seen + 1;
- device_p->seen = nr;
- if (nr > 1)
- sprintf(n, " (#%d)", nr);
- }
- }
-}
-
-/*
- * Class names. Not in .init section as they are needed in runtime.
- */
-
-static u16 pci_class_numbers[] = {
-#define CLASS(x,y) 0x##x,
-#include "classlist.h"
-};
-
-static char *pci_class_names[] = {
-#define CLASS(x,y) y,
-#include "classlist.h"
-};
-
-char *
-pci_class_name(u32 class)
-{
- int i;
-
- for(i=0; i<sizeof(pci_class_numbers)/sizeof(pci_class_numbers[0]); i++)
- if (pci_class_numbers[i] == class)
- return pci_class_names[i];
- return NULL;
-}
-
-#else
-
-void __devinit pci_name_device(struct pci_dev *dev)
-{
-}
-
-char *
-pci_class_name(u32 class)
-{
- return NULL;
-}
-
-#endif /* CONFIG_PCI_NAMES */
-
diff --git a/xen/drivers/pci/pci.c b/xen/drivers/pci/pci.c
deleted file mode 100644
index 50a4ebb5e0..0000000000
--- a/xen/drivers/pci/pci.c
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $
- *
- * PCI Bus Services, see include/linux/pci.h for further explanation.
- *
- * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
- * David Mosberger-Tang
- *
- * Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/types.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/slab.h>
-#include <xen/ioport.h>
-#include <xen/spinlock.h>
-#include <xen/delay.h>
-#include <xen/cache.h>
-#include <asm/page.h>
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
-LIST_HEAD(pci_root_buses);
-LIST_HEAD(pci_devices);
-
-/**
- * pci_find_slot - locate PCI device from a given PCI slot
- * @bus: number of PCI bus on which desired PCI device resides
- * @devfn: encodes number of PCI slot in which the desired PCI
- * device resides and the logical device number within that slot
- * in case of multi-function devices.
- *
- * Given a PCI bus and slot/function number, the desired PCI device
- * is located in system global list of PCI devices. If the device
- * is found, a pointer to its data structure is returned. If no
- * device is found, %NULL is returned.
- */
-struct pci_dev *
-pci_find_slot(unsigned int bus, unsigned int devfn)
-{
- struct pci_dev *dev;
-
- pci_for_each_dev(dev) {
- if (dev->bus->number == bus && dev->devfn == devfn)
- return dev;
- }
- return NULL;
-}
-
-/**
- * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
- * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
- * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
- * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids
- * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids
- * @from: Previous PCI device found in search, or %NULL for new search.
- *
- * Iterates through the list of known PCI devices. If a PCI device is
- * found with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its
- * device structure is returned. Otherwise, %NULL is returned.
- * A new search is initiated by passing %NULL to the @from argument.
- * Otherwise if @from is not %NULL, searches continue from next device on the global list.
- */
-struct pci_dev *
-pci_find_subsys(unsigned int vendor, unsigned int device,
- unsigned int ss_vendor, unsigned int ss_device,
- const struct pci_dev *from)
-{
- struct list_head *n = from ? from->global_list.next : pci_devices.next;
-
- while (n != &pci_devices) {
- struct pci_dev *dev = pci_dev_g(n);
- if ((vendor == PCI_ANY_ID || dev->vendor == vendor) &&
- (device == PCI_ANY_ID || dev->device == device) &&
- (ss_vendor == PCI_ANY_ID || dev->subsystem_vendor == ss_vendor) &&
- (ss_device == PCI_ANY_ID || dev->subsystem_device == ss_device))
- return dev;
- n = n->next;
- }
- return NULL;
-}
-
-
-/**
- * pci_find_device - begin or continue searching for a PCI device by vendor/device id
- * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
- * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
- * @from: Previous PCI device found in search, or %NULL for new search.
- *
- * Iterates through the list of known PCI devices. If a PCI device is
- * found with a matching @vendor and @device, a pointer to its device structure is
- * returned. Otherwise, %NULL is returned.
- * A new search is initiated by passing %NULL to the @from argument.
- * Otherwise if @from is not %NULL, searches continue from next device on the global list.
- */
-struct pci_dev *
-pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
-{
- return pci_find_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
-}
-
-
-/**
- * pci_find_class - begin or continue searching for a PCI device by class
- * @class: search for a PCI device with this class designation
- * @from: Previous PCI device found in search, or %NULL for new search.
- *
- * Iterates through the list of known PCI devices. If a PCI device is
- * found with a matching @class, a pointer to its device structure is
- * returned. Otherwise, %NULL is returned.
- * A new search is initiated by passing %NULL to the @from argument.
- * Otherwise if @from is not %NULL, searches continue from next device
- * on the global list.
- */
-struct pci_dev *
-pci_find_class(unsigned int class, const struct pci_dev *from)
-{
- struct list_head *n = from ? from->global_list.next : pci_devices.next;
-
- while (n != &pci_devices) {
- struct pci_dev *dev = pci_dev_g(n);
- if (dev->class == class)
- return dev;
- n = n->next;
- }
- return NULL;
-}
-
-/**
- * pci_find_capability - query for devices' capabilities
- * @dev: PCI device to query
- * @cap: capability code
- *
- * Tell if a device supports a given PCI capability.
- * Returns the address of the requested capability structure within the
- * device's PCI configuration space or 0 in case the device does not
- * support it. Possible values for @cap:
- *
- * %PCI_CAP_ID_PM Power Management
- *
- * %PCI_CAP_ID_AGP Accelerated Graphics Port
- *
- * %PCI_CAP_ID_VPD Vital Product Data
- *
- * %PCI_CAP_ID_SLOTID Slot Identification
- *
- * %PCI_CAP_ID_MSI Message Signalled Interrupts
- *
- * %PCI_CAP_ID_CHSWP CompactPCI HotSwap
- *
- * %PCI_CAP_ID_PCIX PCI-X
- */
-int
-pci_find_capability(struct pci_dev *dev, int cap)
-{
- u16 status;
- u8 pos, id;
- int ttl = 48;
-
- pci_read_config_word(dev, PCI_STATUS, &status);
- if (!(status & PCI_STATUS_CAP_LIST))
- return 0;
- switch (dev->hdr_type) {
- case PCI_HEADER_TYPE_NORMAL:
- case PCI_HEADER_TYPE_BRIDGE:
- pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos);
- break;
- case PCI_HEADER_TYPE_CARDBUS:
- pci_read_config_byte(dev, PCI_CB_CAPABILITY_LIST, &pos);
- break;
- default:
- return 0;
- }
- while (ttl-- && pos >= 0x40) {
- pos &= ~3;
- pci_read_config_byte(dev, pos + PCI_CAP_LIST_ID, &id);
- if (id == 0xff)
- break;
- if (id == cap)
- return pos;
- pci_read_config_byte(dev, pos + PCI_CAP_LIST_NEXT, &pos);
- }
- return 0;
-}
-
-
-/**
- * pci_find_parent_resource - return resource region of parent bus of given region
- * @dev: PCI device structure contains resources to be searched
- * @res: child resource record for which parent is sought
- *
- * For given resource region of given device, return the resource
- * region of parent bus the given region is contained in or where
- * it should be allocated from.
- */
-struct resource *
-pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
-{
- const struct pci_bus *bus = dev->bus;
- int i;
- struct resource *best = NULL;
-
- for(i=0; i<4; i++) {
- struct resource *r = bus->resource[i];
- if (!r)
- continue;
- if (res->start && !(res->start >= r->start && res->end <= r->end))
- continue; /* Not contained */
- if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM))
- continue; /* Wrong type */
- if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH))
- return r; /* Exact match */
- if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH))
- best = r; /* Approximating prefetchable by non-prefetchable */
- }
- return best;
-}
-
-/**
- * pci_set_power_state - Set the power state of a PCI device
- * @dev: PCI device to be suspended
- * @state: Power state we're entering
- *
- * Transition a device to a new power state, using the Power Management
- * Capabilities in the device's config space.
- *
- * RETURN VALUE:
- * -EINVAL if trying to enter a lower state than we're already in.
- * 0 if we're already in the requested state.
- * -EIO if device does not support PCI PM.
- * 0 if we can successfully change the power state.
- */
-
-int
-pci_set_power_state(struct pci_dev *dev, int state)
-{
- int pm;
- u16 pmcsr;
-
- /* bound the state we're entering */
- if (state > 3) state = 3;
-
- /* Validate current state:
- * Can enter D0 from any state, but if we can only go deeper
- * to sleep if we're already in a low power state
- */
- if (state > 0 && dev->current_state > state)
- return -EINVAL;
- else if (dev->current_state == state)
- return 0; /* we're already there */
-
- /* find PCI PM capability in list */
- pm = pci_find_capability(dev, PCI_CAP_ID_PM);
-
- /* abort if the device doesn't support PM capabilities */
- if (!pm) return -EIO;
-
- /* check if this device supports the desired state */
- if (state == 1 || state == 2) {
- u16 pmc;
- pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc);
- if (state == 1 && !(pmc & PCI_PM_CAP_D1)) return -EIO;
- else if (state == 2 && !(pmc & PCI_PM_CAP_D2)) return -EIO;
- }
-
- /* If we're in D3, force entire word to 0.
- * This doesn't affect PME_Status, disables PME_En, and
- * sets PowerState to 0.
- */
- if (dev->current_state >= 3)
- pmcsr = 0;
- else {
- pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr);
- pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
- pmcsr |= state;
- }
-
- /* enter specified state */
- pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr);
-
- /* Mandatory power management transition delays */
- /* see PCI PM 1.1 5.6.1 table 18 */
- if(state == 3 || dev->current_state == 3)
- mdelay(10);
- else if(state == 2 || dev->current_state == 2)
- udelay(200);
- dev->current_state = state;
-
- return 0;
-}
-
-/**
- * pci_save_state - save the PCI configuration space of a device before suspending
- * @dev: - PCI device that we're dealing with
- * @buffer: - buffer to hold config space context
- *
- * @buffer must be large enough to hold the entire PCI 2.2 config space
- * (>= 64 bytes).
- */
-int
-pci_save_state(struct pci_dev *dev, u32 *buffer)
-{
- int i;
- if (buffer) {
- /* XXX: 100% dword access ok here? */
- for (i = 0; i < 16; i++)
- pci_read_config_dword(dev, i * 4,&buffer[i]);
- }
- return 0;
-}
-
-/**
- * pci_restore_state - Restore the saved state of a PCI device
- * @dev: - PCI device that we're dealing with
- * @buffer: - saved PCI config space
- *
- */
-int
-pci_restore_state(struct pci_dev *dev, u32 *buffer)
-{
- int i;
-
- if (buffer) {
- for (i = 0; i < 16; i++)
- pci_write_config_dword(dev,i * 4, buffer[i]);
- }
- /*
- * otherwise, write the context information we know from bootup.
- * This works around a problem where warm-booting from Windows
- * combined with a D3(hot)->D0 transition causes PCI config
- * header data to be forgotten.
- */
- else {
- for (i = 0; i < 6; i ++)
- pci_write_config_dword(dev,
- PCI_BASE_ADDRESS_0 + (i * 4),
- dev->resource[i].start);
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
- }
- return 0;
-}
-
-/**
- * pci_enable_device_bars - Initialize some of a device for use
- * @dev: PCI device to be initialized
- * @bars: bitmask of BAR's that must be configured
- *
- * Initialize device before it's used by a driver. Ask low-level code
- * to enable selected I/O and memory resources. Wake up the device if it
- * was suspended. Beware, this function can fail.
- */
-
-int
-pci_enable_device_bars(struct pci_dev *dev, int bars)
-{
- int err;
-
- pci_set_power_state(dev, 0);
- if ((err = pcibios_enable_device(dev, bars)) < 0)
- return err;
- return 0;
-}
-
-/**
- * pci_enable_device - Initialize device before it's used by a driver.
- * @dev: PCI device to be initialized
- *
- * Initialize device before it's used by a driver. Ask low-level code
- * to enable I/O and memory. Wake up the device if it was suspended.
- * Beware, this function can fail.
- */
-int
-pci_enable_device(struct pci_dev *dev)
-{
- return pci_enable_device_bars(dev, 0x3F);
-}
-
-/**
- * pci_disable_device - Disable PCI device after use
- * @dev: PCI device to be disabled
- *
- * Signal to the system that the PCI device is not in use by the system
- * anymore. This only involves disabling PCI bus-mastering, if active.
- */
-void
-pci_disable_device(struct pci_dev *dev)
-{
- u16 pci_command;
-
- pci_read_config_word(dev, PCI_COMMAND, &pci_command);
- if (pci_command & PCI_COMMAND_MASTER) {
- pci_command &= ~PCI_COMMAND_MASTER;
- pci_write_config_word(dev, PCI_COMMAND, pci_command);
- }
-}
-
-/**
- * pci_enable_wake - enable device to generate PME# when suspended
- * @dev: - PCI device to operate on
- * @state: - Current state of device.
- * @enable: - Flag to enable or disable generation
- *
- * Set the bits in the device's PM Capabilities to generate PME# when
- * the system is suspended.
- *
- * -EIO is returned if device doesn't have PM Capabilities.
- * -EINVAL is returned if device supports it, but can't generate wake events.
- * 0 if operation is successful.
- *
- */
-int pci_enable_wake(struct pci_dev *dev, u32 state, int enable)
-{
- int pm;
- u16 value;
-
- /* find PCI PM capability in list */
- pm = pci_find_capability(dev, PCI_CAP_ID_PM);
-
- /* If device doesn't support PM Capabilities, but request is to disable
- * wake events, it's a nop; otherwise fail */
- if (!pm)
- return enable ? -EIO : 0;
-
- /* Check device's ability to generate PME# */
- pci_read_config_word(dev,pm+PCI_PM_PMC,&value);
-
- value &= PCI_PM_CAP_PME_MASK;
- value >>= ffs(value); /* First bit of mask */
-
- /* Check if it can generate PME# from requested state. */
- if (!value || !(value & (1 << state)))
- return enable ? -EINVAL : 0;
-
- pci_read_config_word(dev, pm + PCI_PM_CTRL, &value);
-
- /* Clear PME_Status by writing 1 to it and enable PME# */
- value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
-
- if (!enable)
- value &= ~PCI_PM_CTRL_PME_ENABLE;
-
- pci_write_config_word(dev, pm + PCI_PM_CTRL, value);
-
- return 0;
-}
-
-int
-pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
-{
- u8 pin;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (!pin)
- return -1;
- pin--;
- while (dev->bus->self) {
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
- dev = dev->bus->self;
- }
- *bridge = dev;
- return pin;
-}
-
-/**
- * pci_release_region - Release a PCI bar
- * @pdev: PCI device whose resources were previously reserved by pci_request_region
- * @bar: BAR to release
- *
- * Releases the PCI I/O and memory resources previously reserved by a
- * successful call to pci_request_region. Call this function only
- * after all use of the PCI regions has ceased.
- */
-void pci_release_region(struct pci_dev *pdev, int bar)
-{
- if (pci_resource_len(pdev, bar) == 0)
- return;
- if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
- release_region(pci_resource_start(pdev, bar),
- pci_resource_len(pdev, bar));
- else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
- release_mem_region(pci_resource_start(pdev, bar),
- pci_resource_len(pdev, bar));
-}
-
-/**
- * pci_request_region - Reserved PCI I/O and memory resource
- * @pdev: PCI device whose resources are to be reserved
- * @bar: BAR to be reserved
- * @res_name: Name to be associated with resource.
- *
- * Mark the PCI region associated with PCI device @pdev BR @bar as
- * being reserved by owner @res_name. Do not access any
- * address inside the PCI regions unless this call returns
- * successfully.
- *
- * Returns 0 on success, or %EBUSY on error. A warning
- * message is also printed on failure.
- */
-int pci_request_region(struct pci_dev *pdev, int bar, char *res_name)
-{
- if (pci_resource_len(pdev, bar) == 0)
- return 0;
-
- if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) {
- if (!request_region(pci_resource_start(pdev, bar),
- pci_resource_len(pdev, bar), res_name))
- goto err_out;
- }
- else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
- if (!request_mem_region(pci_resource_start(pdev, bar),
- pci_resource_len(pdev, bar), res_name))
- goto err_out;
- }
-
- return 0;
-
-err_out:
- printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
- pci_resource_flags(pdev, bar) & IORESOURCE_IO ? "I/O" : "mem",
- bar + 1, /* PCI BAR # */
- pci_resource_len(pdev, bar), pci_resource_start(pdev, bar),
- pdev->slot_name);
- return -EBUSY;
-}
-
-
-/**
- * pci_release_regions - Release reserved PCI I/O and memory resources
- * @pdev: PCI device whose resources were previously reserved by pci_request_regions
- *
- * Releases all PCI I/O and memory resources previously reserved by a
- * successful call to pci_request_regions. Call this function only
- * after all use of the PCI regions has ceased.
- */
-
-void pci_release_regions(struct pci_dev *pdev)
-{
- int i;
-
- for (i = 0; i < 6; i++)
- pci_release_region(pdev, i);
-}
-
-/**
- * pci_request_regions - Reserved PCI I/O and memory resources
- * @pdev: PCI device whose resources are to be reserved
- * @res_name: Name to be associated with resource.
- *
- * Mark all PCI regions associated with PCI device @pdev as
- * being reserved by owner @res_name. Do not access any
- * address inside the PCI regions unless this call returns
- * successfully.
- *
- * Returns 0 on success, or %EBUSY on error. A warning
- * message is also printed on failure.
- */
-int pci_request_regions(struct pci_dev *pdev, char *res_name)
-{
- int i;
-
- for (i = 0; i < 6; i++)
- if(pci_request_region(pdev, i, res_name))
- goto err_out;
- return 0;
-
-err_out:
- printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
- pci_resource_flags(pdev, i) & IORESOURCE_IO ? "I/O" : "mem",
- i + 1, /* PCI BAR # */
- pci_resource_len(pdev, i), pci_resource_start(pdev, i),
- pdev->slot_name);
- while(--i >= 0)
- pci_release_region(pdev, i);
-
- return -EBUSY;
-}
-
-
-/*
- * Registration of PCI drivers and handling of hot-pluggable devices.
- */
-
-static LIST_HEAD(pci_drivers);
-
-/**
- * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure
- * @ids: array of PCI device id structures to search in
- * @dev: the PCI device structure to match against
- *
- * Used by a driver to check whether a PCI device present in the
- * system is in its list of supported devices.Returns the matching
- * pci_device_id structure or %NULL if there is no match.
- */
-const struct pci_device_id *
-pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev)
-{
- while (ids->vendor || ids->subvendor || ids->class_mask) {
- if ((ids->vendor == PCI_ANY_ID || ids->vendor == dev->vendor) &&
- (ids->device == PCI_ANY_ID || ids->device == dev->device) &&
- (ids->subvendor == PCI_ANY_ID || ids->subvendor == dev->subsystem_vendor) &&
- (ids->subdevice == PCI_ANY_ID || ids->subdevice == dev->subsystem_device) &&
- !((ids->class ^ dev->class) & ids->class_mask))
- return ids;
- ids++;
- }
- return NULL;
-}
-
-#if 0 /* NOT IN XEN */
-static int pci_announce_device(struct pci_driver *drv, struct pci_dev *dev)
-int pci_register_driver(struct pci_driver *drv)
-void pci_unregister_driver(struct pci_driver *drv)
-#endif
-
-#ifdef CONFIG_HOTPLUG
-
-#ifndef FALSE
-#define FALSE (0)
-#define TRUE (!FALSE)
-#endif
-
-static void
-run_sbin_hotplug(struct pci_dev *pdev, int insert)
-{
- int i;
- char *argv[3], *envp[8];
- char id[20], sub_id[24], bus_id[24], class_id[20];
-
- if (!hotplug_path[0])
- return;
-
- sprintf(class_id, "PCI_CLASS=%04X", pdev->class);
- sprintf(id, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device);
- sprintf(sub_id, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device);
- sprintf(bus_id, "PCI_SLOT_NAME=%s", pdev->slot_name);
-
- i = 0;
- argv[i++] = hotplug_path;
- argv[i++] = "pci";
- argv[i] = 0;
-
- i = 0;
- /* minimal command environment */
- envp[i++] = "HOME=/";
- envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-
- /* other stuff we want to pass to /sbin/hotplug */
- envp[i++] = class_id;
- envp[i++] = id;
- envp[i++] = sub_id;
- envp[i++] = bus_id;
- if (insert)
- envp[i++] = "ACTION=add";
- else
- envp[i++] = "ACTION=remove";
- envp[i] = 0;
-
- call_usermodehelper (argv [0], argv, envp);
-}
-
-/**
- * pci_announce_device_to_drivers - tell the drivers a new device has appeared
- * @dev: the device that has shown up
- *
- * Notifys the drivers that a new device has appeared, and also notifys
- * userspace through /sbin/hotplug.
- */
-void
-pci_announce_device_to_drivers(struct pci_dev *dev)
-{
- struct list_head *ln;
-
- for(ln=pci_drivers.next; ln != &pci_drivers; ln=ln->next) {
- struct pci_driver *drv = list_entry(ln, struct pci_driver, node);
- if (drv->remove && pci_announce_device(drv, dev))
- break;
- }
-
- /* notify userspace of new hotplug device */
- run_sbin_hotplug(dev, TRUE);
-}
-
-/**
- * pci_insert_device - insert a hotplug device
- * @dev: the device to insert
- * @bus: where to insert it
- *
- * Add a new device to the device lists and notify userspace (/sbin/hotplug).
- */
-void
-pci_insert_device(struct pci_dev *dev, struct pci_bus *bus)
-{
- list_add_tail(&dev->bus_list, &bus->devices);
- list_add_tail(&dev->global_list, &pci_devices);
-#ifdef CONFIG_PROC_FS
- pci_proc_attach_device(dev);
-#endif
- pci_announce_device_to_drivers(dev);
-}
-
-static void
-pci_free_resources(struct pci_dev *dev)
-{
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *res = dev->resource + i;
- if (res->parent)
- release_resource(res);
- }
-}
-
-/**
- * pci_remove_device - remove a hotplug device
- * @dev: the device to remove
- *
- * Delete the device structure from the device lists and
- * notify userspace (/sbin/hotplug).
- */
-void
-pci_remove_device(struct pci_dev *dev)
-{
- if (dev->driver) {
- if (dev->driver->remove)
- dev->driver->remove(dev);
- dev->driver = NULL;
- }
- list_del(&dev->bus_list);
- list_del(&dev->global_list);
- pci_free_resources(dev);
-#ifdef CONFIG_PROC_FS
- pci_proc_detach_device(dev);
-#endif
-
- /* notify userspace of hotplug device removal */
- run_sbin_hotplug(dev, FALSE);
-}
-
-#endif
-
-static struct pci_driver pci_compat_driver = {
- name: "compat"
-};
-
-/**
- * pci_dev_driver - get the pci_driver of a device
- * @dev: the device to query
- *
- * Returns the appropriate pci_driver structure or %NULL if there is no
- * registered driver for the device.
- */
-struct pci_driver *
-pci_dev_driver(const struct pci_dev *dev)
-{
- if (dev->driver)
- return dev->driver;
- else {
- int i;
- for(i=0; i<=PCI_ROM_RESOURCE; i++)
- if (dev->resource[i].flags & IORESOURCE_BUSY)
- return &pci_compat_driver;
- }
- return NULL;
-}
-
-
-/*
- * This interrupt-safe spinlock protects all accesses to PCI
- * configuration space.
- */
-
-static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
-
-/*
- * Wrappers for all PCI configuration access functions. They just check
- * alignment, do locking and call the low-level functions pointed to
- * by pci_dev->ops.
- */
-
-#define PCI_byte_BAD 0
-#define PCI_word_BAD (pos & 1)
-#define PCI_dword_BAD (pos & 3)
-
-#define PCI_OP(rw,size,type) \
-int pci_##rw##_config_##size (struct pci_dev *dev, int pos, type value) \
-{ \
- int res; \
- unsigned long flags; \
- if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
- spin_lock_irqsave(&pci_lock, flags); \
- res = dev->bus->ops->rw##_##size(dev, pos, value); \
- spin_unlock_irqrestore(&pci_lock, flags); \
- return res; \
-}
-
-PCI_OP(read, byte, u8 *)
-PCI_OP(read, word, u16 *)
-PCI_OP(read, dword, u32 *)
-PCI_OP(write, byte, u8)
-PCI_OP(write, word, u16)
-PCI_OP(write, dword, u32)
-
-/**
- * pci_set_master - enables bus-mastering for device dev
- * @dev: the PCI device to enable
- *
- * Enables bus-mastering on the device and calls pcibios_set_master()
- * to do the needed arch specific settings.
- */
-void
-pci_set_master(struct pci_dev *dev)
-{
- u16 cmd;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- if (! (cmd & PCI_COMMAND_MASTER)) {
- DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name);
- cmd |= PCI_COMMAND_MASTER;
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- pcibios_set_master(dev);
-}
-
-#ifndef HAVE_ARCH_PCI_MWI
-/* This can be overridden by arch code. */
-u8 pci_cache_line_size = L1_CACHE_BYTES >> 2;
-
-/**
- * pci_generic_prep_mwi - helper function for pci_set_mwi
- * @dev: the PCI device for which MWI is enabled
- *
- * Helper function for implementation the arch-specific pcibios_set_mwi
- * function. Originally copied from drivers/net/acenic.c.
- * Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>.
- *
- * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
- */
-static int
-pci_generic_prep_mwi(struct pci_dev *dev)
-{
- u8 cacheline_size;
-
- if (!pci_cache_line_size)
- return -EINVAL; /* The system doesn't support MWI. */
-
- /* Validate current setting: the PCI_CACHE_LINE_SIZE must be
- equal to or multiple of the right value. */
- pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
- if (cacheline_size >= pci_cache_line_size &&
- (cacheline_size % pci_cache_line_size) == 0)
- return 0;
-
- /* Write the correct value. */
- pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size);
- /* Read it back. */
- pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &cacheline_size);
- if (cacheline_size == pci_cache_line_size)
- return 0;
-
- printk(KERN_WARNING "PCI: cache line size of %d is not supported "
- "by device %s\n", pci_cache_line_size << 2, dev->slot_name);
-
- return -EINVAL;
-}
-#endif /* !HAVE_ARCH_PCI_MWI */
-
-/**
- * pci_set_mwi - enables memory-write-invalidate PCI transaction
- * @dev: the PCI device for which MWI is enabled
- *
- * Enables the Memory-Write-Invalidate transaction in %PCI_COMMAND,
- * and then calls @pcibios_set_mwi to do the needed arch specific
- * operations or a generic mwi-prep function.
- *
- * RETURNS: An appriopriate -ERRNO error value on eror, or zero for success.
- */
-int
-pci_set_mwi(struct pci_dev *dev)
-{
- int rc;
- u16 cmd;
-
-#ifdef HAVE_ARCH_PCI_MWI
- rc = pcibios_prep_mwi(dev);
-#else
- rc = pci_generic_prep_mwi(dev);
-#endif
-
- if (rc)
- return rc;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- if (! (cmd & PCI_COMMAND_INVALIDATE)) {
- DBG("PCI: Enabling Mem-Wr-Inval for device %s\n", dev->slot_name);
- cmd |= PCI_COMMAND_INVALIDATE;
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
-
- return 0;
-}
-
-/**
- * pci_clear_mwi - disables Memory-Write-Invalidate for device dev
- * @dev: the PCI device to disable
- *
- * Disables PCI Memory-Write-Invalidate transaction on the device
- */
-void
-pci_clear_mwi(struct pci_dev *dev)
-{
- u16 cmd;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- if (cmd & PCI_COMMAND_INVALIDATE) {
- cmd &= ~PCI_COMMAND_INVALIDATE;
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
-}
-
-#if 0 /* NOT IN XEN */
-int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
-#endif
-
-/*
- * Translate the low bits of the PCI base
- * to the resource type
- */
-static inline unsigned int pci_calc_resource_flags(unsigned int flags)
-{
- if (flags & PCI_BASE_ADDRESS_SPACE_IO)
- return IORESOURCE_IO;
-
- if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
- return IORESOURCE_MEM | IORESOURCE_PREFETCH;
-
- return IORESOURCE_MEM;
-}
-
-/*
- * Find the extent of a PCI decode, do sanity checks.
- */
-static u32 pci_size(u32 base, u32 maxbase, unsigned long mask)
-{
- u32 size = mask & maxbase; /* Find the significant bits */
- if (!size)
- return 0;
- size = size & ~(size-1); /* Get the lowest of them to find the decode size */
- size -= 1; /* extent = size - 1 */
- if (base == maxbase && ((base | size) & mask) != mask)
- return 0; /* base == maxbase can be valid only
- if the BAR has been already
- programmed with all 1s */
- return size;
-}
-
-static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
-{
- unsigned int pos, reg, next;
- u32 l, sz;
- struct resource *res;
-
- for(pos=0; pos<howmany; pos = next) {
- next = pos+1;
- res = &dev->resource[pos];
- res->name = dev->name;
- reg = PCI_BASE_ADDRESS_0 + (pos << 2);
- pci_read_config_dword(dev, reg, &l);
- pci_write_config_dword(dev, reg, ~0);
- pci_read_config_dword(dev, reg, &sz);
- pci_write_config_dword(dev, reg, l);
- if (!sz || sz == 0xffffffff)
- continue;
- if (l == 0xffffffff)
- l = 0;
- if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) {
- sz = pci_size(l, sz, PCI_BASE_ADDRESS_MEM_MASK);
- if (!sz)
- continue;
- res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
- res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK;
- } else {
- sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff);
- if (!sz)
- continue;
- res->start = l & PCI_BASE_ADDRESS_IO_MASK;
- res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK;
- }
- res->end = res->start + (unsigned long) sz;
- res->flags |= pci_calc_resource_flags(l);
- if ((l & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK))
- == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)) {
- pci_read_config_dword(dev, reg+4, &l);
- next++;
-#if BITS_PER_LONG == 64
- res->start |= ((unsigned long) l) << 32;
- res->end = res->start + sz;
- pci_write_config_dword(dev, reg+4, ~0);
- pci_read_config_dword(dev, reg+4, &sz);
- pci_write_config_dword(dev, reg+4, l);
- if (~sz)
- res->end = res->start + 0xffffffff +
- (((unsigned long) ~sz) << 32);
-#else
- if (l) {
- printk(KERN_ERR "PCI: Unable to handle 64-bit address for device %s\n", dev->slot_name);
- res->start = 0;
- res->flags = 0;
- continue;
- }
-#endif
- }
- }
- if (rom) {
- dev->rom_base_reg = rom;
- res = &dev->resource[PCI_ROM_RESOURCE];
- res->name = dev->name;
- pci_read_config_dword(dev, rom, &l);
- pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE);
- pci_read_config_dword(dev, rom, &sz);
- pci_write_config_dword(dev, rom, l);
- if (l == 0xffffffff)
- l = 0;
- if (sz && sz != 0xffffffff) {
- sz = pci_size(l, sz, PCI_ROM_ADDRESS_MASK);
- if (!sz)
- return;
- res->flags = (l & PCI_ROM_ADDRESS_ENABLE) |
- IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
- res->start = l & PCI_ROM_ADDRESS_MASK;
- res->end = res->start + (unsigned long) sz;
- }
- }
-}
-
-void __devinit pci_read_bridge_bases(struct pci_bus *child)
-{
- struct pci_dev *dev = child->self;
- u8 io_base_lo, io_limit_lo;
- u16 mem_base_lo, mem_limit_lo;
- unsigned long base, limit;
- struct resource *res;
- int i;
-
- if (!dev) /* It's a host bus, nothing to read */
- return;
-
- if (dev->transparent) {
- printk("Transparent bridge - %s\n", dev->name);
- for(i = 0; i < 4; i++)
- child->resource[i] = child->parent->resource[i];
- return;
- }
-
- for(i=0; i<3; i++)
- child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
-
- res = child->resource[0];
- pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
- pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
- base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
- limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
-
- if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
- u16 io_base_hi, io_limit_hi;
- pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
- pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
- base |= (io_base_hi << 16);
- limit |= (io_limit_hi << 16);
- }
-
- if (base && base <= limit) {
- res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
- res->start = base;
- res->end = limit + 0xfff;
- }
-
- res = child->resource[1];
- pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
- pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
- base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
- limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
- if (base && base <= limit) {
- res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
- res->start = base;
- res->end = limit + 0xfffff;
- }
-
- res = child->resource[2];
- pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
- pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
- base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
- limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
-
- if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
- u32 mem_base_hi, mem_limit_hi;
- pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
- pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
-#if BITS_PER_LONG == 64
- base |= ((long) mem_base_hi) << 32;
- limit |= ((long) mem_limit_hi) << 32;
-#else
- if (mem_base_hi || mem_limit_hi) {
- printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name);
- return;
- }
-#endif
- }
- if (base && base <= limit) {
- res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
- res->start = base;
- res->end = limit + 0xfffff;
- }
-}
-
-static struct pci_bus * __devinit pci_alloc_bus(void)
-{
- struct pci_bus *b;
-
- b = xmalloc(sizeof(*b));
- if (b) {
- memset(b, 0, sizeof(*b));
- INIT_LIST_HEAD(&b->children);
- INIT_LIST_HEAD(&b->devices);
- }
- return b;
-}
-
-struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
-{
- struct pci_bus *child;
- int i;
-
- /*
- * Allocate a new bus, and inherit stuff from the parent..
- */
- child = pci_alloc_bus();
-
- list_add_tail(&child->node, &parent->children);
- child->self = dev;
- dev->subordinate = child;
- child->parent = parent;
- child->ops = parent->ops;
- child->sysdata = parent->sysdata;
-
- /*
- * Set up the primary, secondary and subordinate
- * bus numbers.
- */
- child->number = child->secondary = busnr;
- child->primary = parent->secondary;
- child->subordinate = 0xff;
-
- /* Set up default resource pointers and names.. */
- for (i = 0; i < 4; i++) {
- child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
- child->resource[i]->name = child->name;
- }
-
- return child;
-}
-
-/*
- * If it's a bridge, configure it and scan the bus behind it.
- * For CardBus bridges, we don't scan behind as the devices will
- * be handled by the bridge driver itself.
- *
- * We need to process bridges in two passes -- first we scan those
- * already configured by the BIOS and after we are done with all of
- * them, we proceed to assigning numbers to the remaining buses in
- * order to avoid overlaps between old and new bus numbers.
- */
-static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
-{
- unsigned int buses;
- unsigned short cr;
- struct pci_bus *child;
- int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
-
- pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
- DBG("Scanning behind PCI bridge %s, config %06x, pass %d\n", dev->slot_name, buses & 0xffffff, pass);
- if ((buses & 0xffff00) && !pcibios_assign_all_busses()) {
- /*
- * Bus already configured by firmware, process it in the first
- * pass and just note the configuration.
- */
- if (pass)
- return max;
- child = pci_add_new_bus(bus, dev, 0);
- child->primary = buses & 0xFF;
- child->secondary = (buses >> 8) & 0xFF;
- child->subordinate = (buses >> 16) & 0xFF;
- child->number = child->secondary;
- if (!is_cardbus) {
- unsigned int cmax = pci_do_scan_bus(child);
- if (cmax > max) max = cmax;
- } else {
- unsigned int cmax = child->subordinate;
- if (cmax > max) max = cmax;
- }
- } else {
- /*
- * We need to assign a number to this bus which we always
- * do in the second pass. We also keep all address decoders
- * on the bridge disabled during scanning. FIXME: Why?
- */
- if (!pass)
- return max;
- pci_read_config_word(dev, PCI_COMMAND, &cr);
- pci_write_config_word(dev, PCI_COMMAND, 0x0000);
- pci_write_config_word(dev, PCI_STATUS, 0xffff);
-
- child = pci_add_new_bus(bus, dev, ++max);
- buses = (buses & 0xff000000)
- | ((unsigned int)(child->primary) << 0)
- | ((unsigned int)(child->secondary) << 8)
- | ((unsigned int)(child->subordinate) << 16);
- /*
- * We need to blast all three values with a single write.
- */
- pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
- if (!is_cardbus) {
- /* Now we can scan all subordinate buses... */
- max = pci_do_scan_bus(child);
- } else {
- /*
- * For CardBus bridges, we leave 4 bus numbers
- * as cards with a PCI-to-PCI bridge can be
- * inserted later.
- */
- max += 3;
- }
- /*
- * Set the subordinate bus number to its real value.
- */
- child->subordinate = max;
- pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
- pci_write_config_word(dev, PCI_COMMAND, cr);
- }
- sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number);
- return max;
-}
-
-/*
- * Read interrupt line and base address registers.
- * The architecture-dependent code can tweak these, of course.
- */
-static void pci_read_irq(struct pci_dev *dev)
-{
- unsigned char irq;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
- if (irq)
- pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
- dev->irq = irq;
-}
-
-/**
- * pci_setup_device - fill in class and map information of a device
- * @dev: the device structure to fill
- *
- * Initialize the device structure with information about the device's
- * vendor,class,memory and IO-space addresses,IRQ lines etc.
- * Called at initialisation of the PCI subsystem and by CardBus services.
- * Returns 0 on success and -1 if unknown type of device (not normal, bridge
- * or CardBus).
- */
-int pci_setup_device(struct pci_dev * dev)
-{
- u32 class;
-
- sprintf(dev->slot_name, "%02x:%02x.%d", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
- sprintf(dev->name, "PCI device %04x:%04x", dev->vendor, dev->device);
-
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
- class >>= 8; /* upper 3 bytes */
- dev->class = class;
- class >>= 8;
-
- DBG("Found %02x:%02x [%04x/%04x] %06x %02x\n", dev->bus->number, dev->devfn, dev->vendor, dev->device, class, dev->hdr_type);
-
- /* "Unknown power state" */
- dev->current_state = 4;
-
- switch (dev->hdr_type) { /* header type */
- case PCI_HEADER_TYPE_NORMAL: /* standard header */
- if (class == PCI_CLASS_BRIDGE_PCI)
- goto bad;
- pci_read_irq(dev);
- pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
- pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
- pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
- break;
-
- case PCI_HEADER_TYPE_BRIDGE: /* bridge header */
- if (class != PCI_CLASS_BRIDGE_PCI)
- goto bad;
- /* The PCI-to-PCI bridge spec requires that subtractive
- decoding (i.e. transparent) bridge must have programming
- interface code of 0x01. */
- dev->transparent = ((dev->class & 0xff) == 1);
- pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
- break;
-
- case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
- if (class != PCI_CLASS_BRIDGE_CARDBUS)
- goto bad;
- pci_read_irq(dev);
- pci_read_bases(dev, 1, 0);
- pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
- pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
- break;
-
- default: /* unknown header */
- printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
- dev->slot_name, dev->hdr_type);
- return -1;
-
- bad:
- printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
- dev->slot_name, class, dev->hdr_type);
- dev->class = PCI_CLASS_NOT_DEFINED;
- }
-
- /* We found a fine healthy device, go go go... */
- return 0;
-}
-
-/*
- * Read the config data for a PCI device, sanity-check it
- * and fill in the dev structure...
- */
-struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp)
-{
- struct pci_dev *dev;
- u32 l;
-
- if (pci_read_config_dword(temp, PCI_VENDOR_ID, &l))
- return NULL;
-
- /* some broken boards return 0 or ~0 if a slot is empty: */
- if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000)
- return NULL;
-
- dev = xmalloc(sizeof(*dev));
- if (!dev)
- return NULL;
-
- memcpy(dev, temp, sizeof(*dev));
- dev->vendor = l & 0xffff;
- dev->device = (l >> 16) & 0xffff;
-
- /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
- set this higher, assuming the system even supports it. */
- dev->dma_mask = 0xffffffff;
- if (pci_setup_device(dev) < 0) {
- xfree(dev);
- dev = NULL;
- }
- return dev;
-}
-
-struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp)
-{
- struct pci_bus *bus = temp->bus;
- struct pci_dev *dev;
- struct pci_dev *first_dev = NULL;
- int func = 0;
- int is_multi = 0;
- u8 hdr_type;
-
- for (func = 0; func < 8; func++, temp->devfn++) {
- if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
- continue;
- temp->hdr_type = hdr_type & 0x7f;
-
- dev = pci_scan_device(temp);
- if (!pcibios_scan_all_fns(0,0) && func == 0) {
- if (!dev)
- break;
- } else {
- if (!dev)
- continue;
- is_multi = 1;
- }
-
- pci_name_device(dev);
- if (!first_dev) {
- is_multi = hdr_type & 0x80;
- first_dev = dev;
- }
-
- /*
- * Link the device to both the global PCI device chain and
- * the per-bus list of devices.
- */
- list_add_tail(&dev->global_list, &pci_devices);
- list_add_tail(&dev->bus_list, &bus->devices);
-
- /* Fix up broken headers */
- pci_fixup_device(PCI_FIXUP_HEADER, dev);
-
- /*
- * If this is a single function device
- * don't scan past the first function.
- */
- if (!is_multi)
- break;
-
- }
- return first_dev;
-}
-
-unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus)
-{
- unsigned int devfn, max, pass;
- struct list_head *ln;
- /* XEN MODIFICATION: Allocate dev0 on heap to avoid stack overflow. */
- struct pci_dev *dev, *dev0;
-
- DBG("Scanning bus %02x\n", bus->number);
- max = bus->secondary;
-
- /* Create a device template */
- dev0 = xmalloc(sizeof(struct pci_dev));
- if(!dev0) {
- panic("Out of memory scanning PCI bus!\n");
- }
- memset(dev0, 0, sizeof(struct pci_dev));
- dev0->bus = bus;
- dev0->sysdata = bus->sysdata;
-
- /* Go find them, Rover! */
- for (devfn = 0; devfn < 0x100; devfn += 8) {
- dev0->devfn = devfn;
- pci_scan_slot(dev0);
- }
- xfree(dev0);
-
- /*
- * After performing arch-dependent fixup of the bus, look behind
- * all PCI-to-PCI bridges on this bus.
- */
- DBG("Fixups for bus %02x\n", bus->number);
- pcibios_fixup_bus(bus);
- for (pass=0; pass < 2; pass++)
- for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
- dev = pci_dev_b(ln);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
- max = pci_scan_bridge(bus, dev, max, pass);
- }
-
- /*
- * We've scanned the bus and so we know all about what's on
- * the other side of any bridges that may be on this bus plus
- * any devices.
- *
- * Return how far we've got finding sub-buses.
- */
- DBG("Bus scan for %02x returning with max=%02x\n", bus->number, max);
- return max;
-}
-
-int __devinit pci_bus_exists(const struct list_head *list, int nr)
-{
- const struct list_head *l;
-
- for(l=list->next; l != list; l = l->next) {
- const struct pci_bus *b = pci_bus_b(l);
- if (b->number == nr || pci_bus_exists(&b->children, nr))
- return 1;
- }
- return 0;
-}
-
-struct pci_bus * __devinit pci_alloc_primary_bus(int bus)
-{
- struct pci_bus *b;
-
- if (pci_bus_exists(&pci_root_buses, bus)) {
- /* If we already got to this bus through a different bridge, ignore it */
- DBG("PCI: Bus %02x already known\n", bus);
- return NULL;
- }
-
- b = pci_alloc_bus();
- list_add_tail(&b->node, &pci_root_buses);
-
- b->number = b->secondary = bus;
- b->resource[0] = &ioport_resource;
- b->resource[1] = &iomem_resource;
- return b;
-}
-
-struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata)
-{
- struct pci_bus *b = pci_alloc_primary_bus(bus);
- if (b) {
- b->sysdata = sysdata;
- b->ops = ops;
- b->subordinate = pci_do_scan_bus(b);
- }
- return b;
-}
-
-#ifdef CONFIG_PM
-
-/*
- * PCI Power management..
- *
- * This needs to be done centralized, so that we power manage PCI
- * devices in the right order: we should not shut down PCI bridges
- * before we've shut down the devices behind them, and we should
- * not wake up devices before we've woken up the bridge to the
- * device.. Eh?
- *
- * We do not touch devices that don't have a driver that exports
- * a suspend/resume function. That is just too dangerous. If the default
- * PCI suspend/resume functions work for a device, the driver can
- * easily implement them (ie just have a suspend function that calls
- * the pci_set_power_state() function).
- */
-
-static int pci_pm_save_state_device(struct pci_dev *dev, u32 state)
-{
- int error = 0;
- if (dev) {
- struct pci_driver *driver = dev->driver;
- if (driver && driver->save_state)
- error = driver->save_state(dev,state);
- }
- return error;
-}
-
-static int pci_pm_suspend_device(struct pci_dev *dev, u32 state)
-{
- int error = 0;
- if (dev) {
- struct pci_driver *driver = dev->driver;
- if (driver && driver->suspend)
- error = driver->suspend(dev,state);
- }
- return error;
-}
-
-static int pci_pm_resume_device(struct pci_dev *dev)
-{
- int error = 0;
- if (dev) {
- struct pci_driver *driver = dev->driver;
- if (driver && driver->resume)
- error = driver->resume(dev);
- }
- return error;
-}
-
-static int pci_pm_save_state_bus(struct pci_bus *bus, u32 state)
-{
- struct pci_bus *i;
- int error = 0;
-
- list_for_each_entry(i, &bus->children, node) {
- error = pci_pm_save_state_bus(i, state);
- if (error) return error;
- }
- list_for_each_entry(i, &bus->devices, node) {
- error = pci_pm_save_state_device(i, state);
- if (error) return error;
- }
- return 0;
-}
-
-static int pci_pm_suspend_bus(struct pci_bus *bus, u32 state)
-{
- struct pci_bus *i;
-
- /* Walk the bus children list */
- list_for_each_entry(i, &bus->children, node)
- pci_pm_suspend_bus(i, state);
-
- /* Walk the device children list */
- list_for_each_entry(i, &bus->devices, node)
- pci_pm_suspend_device(i, state);
- return 0;
-}
-
-static int pci_pm_resume_bus(struct pci_bus *bus)
-{
- struct pci_bus *i;
-
- /* Walk the device children list */
- list_for_each_entry(i, &bus->devices, node)
- pci_pm_resume_device(i);
-
- /* And then walk the bus children */
- list_for_each_entry(i, &bus->children, node)
- pci_pm_resume_bus(i);
- return 0;
-}
-
-static int pci_pm_save_state(u32 state)
-{
- struct pci_bus *bus;
- int error = 0;
-
- list_for_each_entry(bus, &pci_root_buses, node) {
- error = pci_pm_save_state_bus(bus,state);
- if (!error)
- error = pci_pm_save_state_device(bus->self,state);
- }
- return error;
-}
-
-static int pci_pm_suspend(u32 state)
-{
- struct pci_bus *bus;
-
- list_for_each_entry(bus, &pci_root_buses, node) {
- pci_pm_suspend_bus(bus,state);
- pci_pm_suspend_device(bus->self,state);
- }
- return 0;
-}
-
-int pci_pm_resume(void)
-{
- struct pci_bus *bus;
-
- list_for_each_entry(bus, &pci_root_buses, node) {
- pci_pm_resume_device(bus->self);
- pci_pm_resume_bus(bus);
- }
- return 0;
-}
-
-static int
-pci_pm_callback(struct pm_dev *pm_device, pm_request_t rqst, void *data)
-{
- int error = 0;
-
- switch (rqst) {
- case PM_SAVE_STATE:
- error = pci_pm_save_state((unsigned long)data);
- break;
- case PM_SUSPEND:
- error = pci_pm_suspend((unsigned long)data);
- break;
- case PM_RESUME:
- error = pci_pm_resume();
- break;
- default: break;
- }
- return error;
-}
-
-#endif
-
-/* NB. Xen doesn't include the pool allocator. */
-
-void __devinit pci_init(void)
-{
- struct pci_dev *dev;
-
- pcibios_init();
-
- pci_for_each_dev(dev) {
- pci_fixup_device(PCI_FIXUP_FINAL, dev);
- }
-
-#ifdef CONFIG_PM
- pm_register(PM_PCI_DEV, 0, pci_pm_callback);
-#endif
-}
-
-static int __devinit pci_setup(char *str)
-{
- while (str) {
- char *k = strchr(str, ',');
- if (k)
- *k++ = 0;
- if (*str && (str = pcibios_setup(str)) && *str) {
- /* PCI layer options should be handled here */
- printk(KERN_ERR "PCI: Unknown option `%s'\n", str);
- }
- str = k;
- }
- return 1;
-}
-
-__setup("pci=", pci_setup);
-
-EXPORT_SYMBOL(pci_read_config_byte);
-EXPORT_SYMBOL(pci_read_config_word);
-EXPORT_SYMBOL(pci_read_config_dword);
-EXPORT_SYMBOL(pci_write_config_byte);
-EXPORT_SYMBOL(pci_write_config_word);
-EXPORT_SYMBOL(pci_write_config_dword);
-EXPORT_SYMBOL(pci_devices);
-EXPORT_SYMBOL(pci_root_buses);
-EXPORT_SYMBOL(pci_enable_device_bars);
-EXPORT_SYMBOL(pci_enable_device);
-EXPORT_SYMBOL(pci_disable_device);
-EXPORT_SYMBOL(pci_find_capability);
-EXPORT_SYMBOL(pci_release_regions);
-EXPORT_SYMBOL(pci_request_regions);
-EXPORT_SYMBOL(pci_release_region);
-EXPORT_SYMBOL(pci_request_region);
-EXPORT_SYMBOL(pci_find_class);
-EXPORT_SYMBOL(pci_find_device);
-EXPORT_SYMBOL(pci_find_slot);
-EXPORT_SYMBOL(pci_find_subsys);
-EXPORT_SYMBOL(pci_set_master);
-EXPORT_SYMBOL(pci_set_mwi);
-EXPORT_SYMBOL(pci_clear_mwi);
-EXPORT_SYMBOL(pci_set_dma_mask);
-EXPORT_SYMBOL(pci_dac_set_dma_mask);
-EXPORT_SYMBOL(pci_assign_resource);
-EXPORT_SYMBOL(pci_register_driver);
-EXPORT_SYMBOL(pci_unregister_driver);
-EXPORT_SYMBOL(pci_dev_driver);
-EXPORT_SYMBOL(pci_match_device);
-EXPORT_SYMBOL(pci_find_parent_resource);
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pci_setup_device);
-EXPORT_SYMBOL(pci_insert_device);
-EXPORT_SYMBOL(pci_remove_device);
-EXPORT_SYMBOL(pci_announce_device_to_drivers);
-EXPORT_SYMBOL(pci_add_new_bus);
-EXPORT_SYMBOL(pci_do_scan_bus);
-EXPORT_SYMBOL(pci_scan_slot);
-EXPORT_SYMBOL(pci_scan_bus);
-EXPORT_SYMBOL(pci_scan_device);
-EXPORT_SYMBOL(pci_read_bridge_bases);
-#ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(pci_proc_attach_device);
-EXPORT_SYMBOL(pci_proc_detach_device);
-EXPORT_SYMBOL(pci_proc_attach_bus);
-EXPORT_SYMBOL(pci_proc_detach_bus);
-EXPORT_SYMBOL(proc_bus_pci_dir);
-#endif
-#endif
-
-EXPORT_SYMBOL(pci_set_power_state);
-EXPORT_SYMBOL(pci_save_state);
-EXPORT_SYMBOL(pci_restore_state);
-EXPORT_SYMBOL(pci_enable_wake);
-
-/* Obsolete functions */
-
-EXPORT_SYMBOL(pcibios_present);
-EXPORT_SYMBOL(pcibios_read_config_byte);
-EXPORT_SYMBOL(pcibios_read_config_word);
-EXPORT_SYMBOL(pcibios_read_config_dword);
-EXPORT_SYMBOL(pcibios_write_config_byte);
-EXPORT_SYMBOL(pcibios_write_config_word);
-EXPORT_SYMBOL(pcibios_write_config_dword);
-EXPORT_SYMBOL(pcibios_find_class);
-EXPORT_SYMBOL(pcibios_find_device);
-
-/* Quirk info */
-
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
-EXPORT_SYMBOL(pci_pci_problems);
diff --git a/xen/drivers/pci/pci.ids b/xen/drivers/pci/pci.ids
deleted file mode 100644
index 29843852e9..0000000000
--- a/xen/drivers/pci/pci.ids
+++ /dev/null
@@ -1,7514 +0,0 @@
-#
-# List of PCI ID's
-#
-# Maintained by Martin Mares <mj@ucw.cz> and other volunteers from the
-# Linux PCI ID's Project at http://pciids.sf.net/. New data are always
-# welcome (if they are accurate), we're eagerly expecting new entries,
-# so if you have anything to contribute, please visit the home page or
-# send a diff -u against the most recent pci.ids to pci-ids@ucw.cz.
-#
-# Daily snapshot on Thu 2003-05-29 10:00:04
-# Modded on Fri 2003-05-30 03:13:05
-#
-
-# Vendors, devices and subsystems. Please keep sorted.
-
-# Syntax:
-# vendor vendor_name
-# device device_name <-- single tab
-# subvendor subdevice subsystem_name <-- two tabs
-
-0000 Gammagraphx, Inc.
-001a Ascend Communications, Inc.
-0033 Paradyne corp.
-003d Lockheed Martin-Marietta Corp
-# Real TJN ID is e159, but they got it wrong several times --mj
-0059 Tiger Jet Network Inc. (Wrong ID)
-0070 Hauppauge computer works Inc.
-0100 Ncipher Corp Ltd
-0675 Dynalink
- 1700 IS64PH ISDN Adapter
- 1702 IS64PH ISDN Adapter
-# Wrong ID used in subsystem ID of VIA USB controllers.
-0925 VIA Technologies, Inc. (Wrong ID)
-09c1 Arris
- 0704 CM 200E Cable Modem
-0a89 BREA Technologies Inc
-0e11 Compaq Computer Corporation
- 0001 PCI to EISA Bridge
- 0002 PCI to ISA Bridge
- 0049 NC7132 Gigabit Upgrade Module
- 004a NC6136 Gigabit Server Adapter
- 0508 Netelligent 4/16 Token Ring
- 1000 Triflex/Pentium Bridge, Model 1000
- 2000 Triflex/Pentium Bridge, Model 2000
- 3032 QVision 1280/p
- 3033 QVision 1280/p
- 3034 QVision 1280/p
- 4000 4000 [Triflex]
- 6010 HotPlug PCI Bridge 6010
- 7020 USB Controller
- a0ec Fibre Channel Host Controller
- a0f0 Advanced System Management Controller
- a0f3 Triflex PCI to ISA Bridge
- a0f7 PCI Hotplug Controller
- 8086 002a PCI Hotplug Controller A
- 8086 002b PCI Hotplug Controller B
- a0f8 ZFMicro Chipset USB
- a0fc Fibre Channel Host Controller
- ae10 Smart-2/P RAID Controller
- 0e11 4030 Smart-2/P Array Controller
- 0e11 4031 Smart-2SL Array Controller
- 0e11 4032 Smart Array Controller
- 0e11 4033 Smart 3100ES Array Controller
- ae29 MIS-L
- ae2a MPC
- ae2b MIS-E
- ae31 System Management Controller
- ae32 Netelligent 10/100
- ae33 Triflex Dual EIDE Controller
- ae34 Netelligent 10
- ae35 Integrated NetFlex-3/P
- ae40 Netelligent 10/100 Dual
- ae43 ProLiant Integrated Netelligent 10/100
- ae69 CETUS-L
- ae6c Northstar
- ae6d NorthStar CPU to PCI Bridge
- b011 Integrated Netelligent 10/100
- b012 Netelligent 10 T/2
- b01e NC3120 Fast Ethernet NIC
- b01f NC3122 Fast Ethernet NIC
- b02f NC1120 Ethernet NIC
- b030 Netelligent WS 5100
- b04a 10/100 TX PCI Intel WOL UTP Controller
- b060 Smart Array 5300 Controller
- b0c6 NC3161 Fast Ethernet NIC
- b0c7 NC3160 Fast Ethernet NIC
- b0d7 NC3121 Fast Ethernet NIC
- b0dd NC3131 Fast Ethernet NIC
- b0de NC3132 Fast Ethernet Module
- b0df NC6132 Gigabit Module
- b0e0 NC6133 Gigabit Module
- b0e1 NC3133 Fast Ethernet Module
- b123 NC6134 Gigabit NIC
- b134 NC3163 Fast Ethernet NIC
- b13c NC3162 Fast Ethernet NIC
- b144 NC3123 Fast Ethernet NIC
- b163 NC3134 Fast Ethernet NIC
- b164 NC3165 Fast Ethernet Upgrade Module
- b178 Smart Array 5i/532
- b1a4 NC7131 Gigabit Server Adapter
- f130 NetFlex-3/P ThunderLAN 1.0
- f150 NetFlex-3/P ThunderLAN 2.3
-0e55 HaSoTec GmbH
-# Formerly NCR
-1000 LSI Logic / Symbios Logic
- 0001 53c810
- 1000 1000 8100S
- 0002 53c820
- 0003 53c825
- 0004 53c815
- 0005 53c810AP
- 0006 53c860
- 000a 53c1510
- 000b 53c896
- 000c 53c895
- 1de1 3907 DC-390U2W
- 000d 53c885
- 000f 53c875
- 0e11 7004 Embedded Ultra Wide SCSI Controller
- 1092 8760 FirePort 40 Dual SCSI Controller
- 1de1 3904 DC390F Ultra Wide SCSI Controller
- 0010 53c895
- 0e11 4040 Integrated Array Controller
- 0e11 4048 Integrated Array Controller
- 0012 53c895a
- 0013 53c875a
- 0020 53c1010 Ultra3 SCSI Adapter
- 1de1 1020 DC-390U3W
- 0021 53c1010 66MHz Ultra3 SCSI Adapter
- 0030 53c1030 PCI-X Fusion-MPT Dual Ultra320 SCSI
- 1028 1010 LSI U320 SCSI Controller
- 0040 53c1035
- 008f 53c875J
- 1092 8000 FirePort 40 SCSI Controller
- 1092 8760 FirePort 40 Dual SCSI Host Adapter
- 0621 FC909
- 0622 FC929
- 0623 FC929 LAN
- 0624 FC919
- 0625 FC919 LAN
- 0626 FC929X
- 0627 FC929X LAN
- 0628 FC919X
- 0629 FC919X LAN
- 0701 83C885 NT50 DigitalScape Fast Ethernet
- 0702 Yellowfin G-NIC gigabit ethernet
- 1318 0000 PEI100X
- 0901 61C102
- 1000 63C815
- 1960 PowerEdge Expandable RAID Controller 4
- 1028 0518 PowerEdge Expandable RAID Controller 4/DC
- 1028 0520 PowerEdge Expandable RAID Controller 4/SC
- 1028 0531 PowerEdge Expandable RAID Controller 4/QC
-1001 Kolter Electronic
- 0010 PCI 1616 Measurement card with 32 digital I/O lines
- 0011 OPTO-PCI Opto-Isolated digital I/O board
- 0012 PCI-AD/DA Analogue I/O board
- 0013 PCI-OPTO-RELAIS Digital I/O board with relay outputs
- 0014 PCI-Counter/Timer Counter Timer board
- 0015 PCI-DAC416 Analogue output board
- 0016 PCI-MFB Analogue I/O board
- 0017 PROTO-3 PCI Prototyping board
- 9100 INI-9100/9100W SCSI Host
-1002 ATI Technologies Inc
- 4136 Radeon IGP 320 M
-# New support forthcoming in XFree86 4.3.0
- 4144 Radeon R300 AD [Radeon 9500 Pro]
-# New support forthcoming in XFree86 4.3.0
- 4145 Radeon R300 AE [Radeon 9500 Pro]
-# New support forthcoming in XFree86 4.3.0
- 4146 Radeon R300 AF [Radeon 9500 Pro]
-# Update: Oops, AF was a typo above for 4147, should be AG
- 4147 Radeon R300 AG [FireGL Z1/X1]
- 4158 68800AX [Mach32]
- 4242 Radeon R200 BB [Radeon All in Wonder 8500DV]
- 1002 02aa Radeon 8500 AIW DV Edition
- 4336 Radeon Mobility U1
- 4337 Radeon IGP 340M
- 4354 215CT [Mach64 CT]
- 4358 210888CX [Mach64 CX]
- 4554 210888ET [Mach64 ET]
- 4654 Mach64 VT
- 4742 3D Rage Pro AGP 1X/2X
- 1002 0040 Rage Pro Turbo AGP 2X
- 1002 0044 Rage Pro Turbo AGP 2X
- 1002 0061 Rage Pro AIW AGP 2X
- 1002 0062 Rage Pro AIW AGP 2X
- 1002 0063 Rage Pro AIW AGP 2X
- 1002 0080 Rage Pro Turbo AGP 2X
- 1002 0084 Rage Pro Turbo AGP 2X
- 1002 4742 Rage Pro Turbo AGP 2X
- 1002 8001 Rage Pro Turbo AGP 2X
- 1028 0082 Rage Pro Turbo AGP 2X
- 1028 4082 Optiplex GX1 Onboard Display Adapter
- 1028 8082 Rage Pro Turbo AGP 2X
- 1028 c082 Rage Pro Turbo AGP 2X
- 8086 4152 Xpert 98D AGP 2X
- 8086 464a Rage Pro Turbo AGP 2X
- 4744 3D Rage Pro AGP 1X
- 1002 4744 Rage Pro Turbo AGP
- 4747 3D Rage Pro
- 4749 3D Rage Pro
- 1002 0061 Rage Pro AIW
- 1002 0062 Rage Pro AIW
- 474c Rage XC
- 474d Rage XL AGP 2X
- 1002 0004 Xpert 98 RXL AGP 2X
- 1002 0008 Xpert 98 RXL AGP 2X
- 1002 0080 Rage XL AGP 2X
- 1002 0084 Xpert 98 AGP 2X
- 1002 474d Rage XL AGP
- 1033 806a Rage XL AGP
- 474e Rage XC AGP
- 1002 474e Rage XC AGP
- 474f Rage XL
- 1002 0008 Rage XL
- 1002 474f Rage XL
- 4750 3D Rage Pro 215GP
- 1002 0040 Rage Pro Turbo
- 1002 0044 Rage Pro Turbo
- 1002 0080 Rage Pro Turbo
- 1002 0084 Rage Pro Turbo
- 1002 4750 Rage Pro Turbo
- 4751 3D Rage Pro 215GQ
- 4752 Rage XL
- 1002 0008 Rage XL
- 1002 4752 Rage XL
- 1002 8008 Rage XL
- 1028 00d1 PowerEdge 2550
- 4753 Rage XC
- 1002 4753 Rage XC
- 4754 3D Rage I/II 215GT [Mach64 GT]
- 4755 3D Rage II+ 215GTB [Mach64 GTB]
- 4756 3D Rage IIC 215IIC [Mach64 GT IIC]
- 1002 4756 Rage IIC
- 4757 3D Rage IIC AGP
- 1002 4757 Rage IIC AGP
- 1028 0089 Rage 3D IIC
- 1028 4082 Rage 3D IIC
- 1028 8082 Rage 3D IIC
- 1028 c082 Rage 3D IIC
- 4758 210888GX [Mach64 GX]
- 4759 3D Rage IIC
- 475a 3D Rage IIC AGP
- 1002 0087 Rage 3D IIC
- 1002 475a Rage IIC AGP
- 4964 Radeon R250 Id [Radeon 9000]
- 4965 Radeon R250 Ie [Radeon 9000]
- 4966 Radeon R250 If [Radeon 9000]
- 10f1 0002 R250 If [Tachyon G9000 PRO]
- 148c 2039 R250 If [Radeon 9000 Pro "Evil Commando"]
- 1509 9a00 R250 If [Radeon 9000 "AT009"]
-# New subdevice - 3D Prophet 9000 PCI by Hercules. AGP version probably would have same ID, so not specified.
- 1681 0040 R250 If [3D prophet 9000]
- 174b 7176 R250 If [Sapphire Radeon 9000 Pro]
- 174b 7192 R250 If [Radeon 9000 "Atlantis"]
- 17af 2005 R250 If [Excalibur Radeon 9000 Pro]
- 17af 2006 R250 If [Excalibur Radeon 9000]
- 4967 Radeon R250 Ig [Radeon 9000]
- 496e Radeon R250 [Radeon 9000] (Secondary)
- 4c42 3D Rage LT Pro AGP-133
- 0e11 b0e8 Rage 3D LT Pro
- 0e11 b10e 3D Rage LT Pro (Compaq Armada 1750)
- 1002 0040 Rage LT Pro AGP 2X
- 1002 0044 Rage LT Pro AGP 2X
- 1002 4c42 Rage LT Pro AGP 2X
- 1002 8001 Rage LT Pro AGP 2X
- 1028 0085 Rage 3D LT Pro
- 4c44 3D Rage LT Pro AGP-66
- 4c45 Rage Mobility M3 AGP
- 4c46 Rage Mobility M3 AGP 2x
- 4c47 3D Rage LT-G 215LG
- 4c49 3D Rage LT Pro
- 1002 0004 Rage LT Pro
- 1002 0040 Rage LT Pro
- 1002 0044 Rage LT Pro
- 1002 4c49 Rage LT Pro
- 4c4d Rage Mobility P/M AGP 2x
- 0e11 b111 Armada M700
- 1002 0084 Xpert 98 AGP 2X (Mobility)
- 1014 0154 ThinkPad A20m
- 4c4e Rage Mobility L AGP 2x
- 4c50 3D Rage LT Pro
- 1002 4c50 Rage LT Pro
- 4c51 3D Rage LT Pro
- 4c52 Rage Mobility P/M
- 4c53 Rage Mobility L
- 4c54 264LT [Mach64 LT]
- 4c57 Radeon Mobility M7 LW [Radeon Mobility 7500]
- 1014 0517 ThinkPad T30
- 1028 00e6 Radeon Mobility M7 LW (Dell Inspiron 8100)
- 144d c006 Radeon Mobility M7 LW in vpr Matrix 170B4
-# Update: More correct labelling for this FireGL chipset
- 4c58 Radeon RV200 LX [Mobility FireGL 7800 M7]
- 4c59 Radeon Mobility M6 LY
- 1014 0235 ThinkPad A30p (2653-64G)
- 1014 0239 ThinkPad X22/X23/X24
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 4c5a Radeon Mobility M6 LZ
-# Update: Add M9 to product name
- 4c64 Radeon R250 Ld [Radeon Mobility 9000 M9]
-# Update: Add M9 to product name
- 4c65 Radeon R250 Le [Radeon Mobility 9000 M9]
-# Update: Add M9 to product name
- 4c66 Radeon R250 Lf [Radeon Mobility 9000 M9]
-# Update: Add M9 to product name
- 4c67 Radeon R250 Lg [Radeon Mobility 9000 M9]
- 4d46 Rage Mobility M4 AGP
- 4d4c Rage Mobility M4 AGP
- 4e44 Radeon R300 ND [Radeon 9700 Pro]
- 4e45 Radeon R300 NE [Radeon 9500 Pro]
- 1002 0002 Radeon R300 NE [Radeon 9500 Pro]
- 4e46 Radeon R300 NF [Radeon 9700]
-# Update: This is FireGL X1, not Radeon 9700
- 4e47 Radeon R300 NG [FireGL X1]
- 4e48 Radeon R350 [Radeon 9800]
- 4e64 Radeon R300 [Radeon 9700 Pro] (Secondary)
- 4e65 Radeon R300 [Radeon 9500 Pro] (Secondary)
- 4e66 Radeon R300 [Radeon 9700] (Secondary)
- 4e67 Radeon R300 [FireGL X1] (Secondary)
- 4e68 Radeon R350 [Radeon 9800] (Secondary)
- 5041 Rage 128 PA/PRO
- 5042 Rage 128 PB/PRO AGP 2x
- 5043 Rage 128 PC/PRO AGP 4x
- 5044 Rage 128 PD/PRO TMDS
- 1002 0028 Rage 128 AIW
- 1002 0029 Rage 128 AIW
- 5045 Rage 128 PE/PRO AGP 2x TMDS
- 5046 Rage 128 PF/PRO AGP 4x TMDS
- 1002 0004 Rage Fury Pro
- 1002 0008 Rage Fury Pro/Xpert 2000 Pro
- 1002 0014 Rage Fury Pro
- 1002 0018 Rage Fury Pro/Xpert 2000 Pro
- 1002 0028 Rage 128 Pro AIW AGP
- 1002 002a Rage 128 Pro AIW AGP
- 1002 0048 Rage Fury Pro
- 1002 2000 Rage Fury MAXX AGP 4x (TMDS) (VGA device)
- 1002 2001 Rage Fury MAXX AGP 4x (TMDS) (Extra device?!)
- 5047 Rage 128 PG/PRO
- 5048 Rage 128 PH/PRO AGP 2x
- 5049 Rage 128 PI/PRO AGP 4x
- 504a Rage 128 PJ/PRO TMDS
- 504b Rage 128 PK/PRO AGP 2x TMDS
- 504c Rage 128 PL/PRO AGP 4x TMDS
- 504d Rage 128 PM/PRO
- 504e Rage 128 PN/PRO AGP 2x
- 504f Rage 128 PO/PRO AGP 4x
- 5050 Rage 128 PP/PRO TMDS [Xpert 128]
- 1002 0008 Xpert 128
- 5051 Rage 128 PQ/PRO AGP 2x TMDS
- 5052 Rage 128 PR/PRO AGP 4x TMDS
- 5053 Rage 128 PS/PRO
- 5054 Rage 128 PT/PRO AGP 2x
- 5055 Rage 128 PU/PRO AGP 4x
- 5056 Rage 128 PV/PRO TMDS
- 5057 Rage 128 PW/PRO AGP 2x TMDS
- 5058 Rage 128 PX/PRO AGP 4x TMDS
-# Update: This same chip is used in all 32Mb and 64Mb SDR/DDR orig Radeons, and is now known as 7200
- 5144 Radeon R100 QD [Radeon 7200]
- 1002 0008 Radeon 7000/Radeon VE
- 1002 0009 Radeon 7000/Radeon
- 1002 000a Radeon 7000/Radeon
- 1002 001a Radeon 7000/Radeon
- 1002 0029 Radeon AIW
- 1002 0038 Radeon 7000/Radeon
- 1002 0039 Radeon 7000/Radeon
- 1002 008a Radeon 7000/Radeon
- 1002 00ba Radeon 7000/Radeon
- 1002 0139 Radeon 7000/Radeon
- 1002 028a Radeon 7000/Radeon
- 1002 02aa Radeon AIW
- 1002 053a Radeon 7000/Radeon
- 5145 Radeon R100 QE
- 5146 Radeon R100 QF
- 5147 Radeon R100 QG
- 5148 Radeon R200 QH [Radeon 8500]
- 1002 010a FireGL 8800 64Mb
- 1002 0152 FireGL 8800 128Mb
- 1002 0162 FireGL 8700 32Mb
- 1002 0172 FireGL 8700 64Mb
- 5149 Radeon R200 QI
- 514a Radeon R200 QJ
- 514b Radeon R200 QK
- 514c Radeon R200 QL [Radeon 8500 LE]
- 1002 003a Radeon R200 QL [Radeon 8500 LE]
- 1002 013a Radeon 8500
- 148c 2026 R200 QL [Radeon 8500 Evil Master II Multi Display Edition]
- 174b 7149 Radeon R200 QL [Sapphire Radeon 8500 LE]
-# New: Radeon 9100 is basically a Radeon 8500LE branded as 9100 by Sapphire
- 514d Radeon R200 QM [Radeon 9100]
-# New: Radeon 8500LE chip
- 514e Radeon R200 QN [Radeon 8500LE]
-# New: Radeon 8500LE chip
- 514f Radeon R200 QO [Radeon 8500LE]
- 5157 Radeon RV200 QW [Radeon 7500]
- 1002 013a Radeon 7500
- 1458 4000 RV200 QW [RADEON 7500 PRO MAYA AR]
- 148c 2024 RV200 QW [Radeon 7500LE Dual Display]
- 148c 2025 RV200 QW [Radeon 7500 Evil Master Multi Display Edition]
- 148c 2036 RV200 QW [Radeon 7500 PCI Dual Display]
- 174b 7147 RV200 QW [Sapphire Radeon 7500LE]
- 174b 7161 Radeon RV200 QW [Radeon 7500 LE]
- 17af 0202 RV200 QW [Excalibur Radeon 7500LE]
- 5158 Radeon RV200 QX [Radeon 7500]
-# Update: More correct name
- 5159 Radeon RV100 QY [Radeon 7000/VE]
- 1002 000a Radeon 7000/Radeon VE
- 1002 000b Radeon 7000
- 1002 0038 Radeon 7000/Radeon VE
- 1002 003a Radeon 7000/Radeon VE
- 1002 00ba Radeon 7000/Radeon VE
- 1002 013a Radeon 7000/Radeon VE
- 1458 4002 RV100 QY [RADEON 7000 PRO MAYA AV Series]
- 148c 2003 RV100 QY [Radeon 7000 Multi-Display Edition]
- 148c 2023 RV100 QY [Radeon 7000 Evil Master Multi-Display]
- 174b 7112 RV100 QY [Sapphire Radeon VE 7000]
- 1787 0202 RV100 QY [Excalibur Radeon 7000]
-# Update: More correct name
- 515a Radeon RV100 QZ [Radeon 7000/VE]
- 5168 Radeon R200 Qh
- 5169 Radeon R200 Qi
- 516a Radeon R200 Qj
- 516b Radeon R200 Qk
-# new: This one is not in ATI documentation, but is in XFree86 source code
- 516c Radeon R200 Ql
- 5245 Rage 128 RE/SG
- 1002 0008 Xpert 128
- 1002 0028 Rage 128 AIW
- 1002 0029 Rage 128 AIW
- 1002 0068 Rage 128 AIW
- 5246 Rage 128 RF/SG AGP
- 1002 0004 Magnum/Xpert 128/Xpert 99
- 1002 0008 Magnum/Xpert128/X99/Xpert2000
- 1002 0028 Rage 128 AIW AGP
- 1002 0044 Rage Fury/Xpert 128/Xpert 2000
- 1002 0068 Rage 128 AIW AGP
- 1002 0448 Rage Fury
- 5247 Rage 128 RG
- 524b Rage 128 RK/VR
- 524c Rage 128 RL/VR AGP
- 1002 0008 Xpert 99/Xpert 2000
- 1002 0088 Xpert 99
- 5345 Rage 128 SE/4x
- 5346 Rage 128 SF/4x AGP 2x
- 1002 0048 RAGE 128 16MB VGA TVOUT AMC PAL
- 5347 Rage 128 SG/4x AGP 4x
- 5348 Rage 128 SH
- 534b Rage 128 SK/4x
- 534c Rage 128 SL/4x AGP 2x
- 534d Rage 128 SM/4x AGP 4x
- 1002 0008 Xpert 99/Xpert 2000
- 1002 0018 Xpert 2000
- 534e Rage 128 4x
- 5354 Mach 64 VT
- 1002 5654 Mach 64 reference
- 5446 Rage 128 Pro Ultra TF
- 1002 0004 Rage Fury Pro
- 1002 0008 Rage Fury Pro/Xpert 2000 Pro
- 1002 0018 Rage Fury Pro/Xpert 2000 Pro
- 1002 0028 Rage 128 AIW Pro AGP
- 1002 0029 Rage 128 AIW
- 1002 002a Rage 128 AIW Pro AGP
- 1002 002b Rage 128 AIW
- 1002 0048 Xpert 2000 Pro
- 544c Rage 128 Pro Ultra TL
- 5452 Rage 128 Pro Ultra TR
- 1002 001c Rage 128 Pro 4XL
- 103c 1279 Rage 128 Pro 4XL
- 5453 Rage 128 Pro Ultra TS
- 5454 Rage 128 Pro Ultra TT
- 5455 Rage 128 Pro Ultra TU
- 5654 264VT [Mach64 VT]
- 1002 5654 Mach64VT Reference
- 5655 264VT3 [Mach64 VT3]
- 5656 264VT4 [Mach64 VT4]
- 700f PCI Bridge [IGP 320M]
- 7010 PCI Bridge [IGP 340M]
- cab2 RS200/RS200M AGP Bridge [IGP 340M]
-1003 ULSI Systems
- 0201 US201
-1004 VLSI Technology Inc
- 0005 82C592-FC1
- 0006 82C593-FC1
- 0007 82C594-AFC2
- 0008 82C596/7 [Wildcat]
- 0009 82C597-AFC2
- 000c 82C541 [Lynx]
- 000d 82C543 [Lynx]
- 0101 82C532
- 0102 82C534 [Eagle]
- 0103 82C538
- 0104 82C535
- 0105 82C147
- 0200 82C975
- 0280 82C925
- 0304 QSound ThunderBird PCI Audio
- 1004 0304 QSound ThunderBird PCI Audio
- 122d 1206 DSP368 Audio
- 1483 5020 XWave Thunder 3D Audio
- 0305 QSound ThunderBird PCI Audio Gameport
- 1004 0305 QSound ThunderBird PCI Audio Gameport
- 122d 1207 DSP368 Audio Gameport
- 1483 5021 XWave Thunder 3D Audio Gameport
- 0306 QSound ThunderBird PCI Audio Support Registers
- 1004 0306 QSound ThunderBird PCI Audio Support Registers
- 122d 1208 DSP368 Audio Support Registers
- 1483 5022 XWave Thunder 3D Audio Support Registers
- 0307 Thunderbird
- 0308 Thunderbird
- 0702 VAS96011 [Golden Gate II]
- 0703 Tollgate
-1005 Avance Logic Inc. [ALI]
- 2064 ALG2032/2064
- 2128 ALG2364A
- 2301 ALG2301
- 2302 ALG2302
- 2364 ALG2364
- 2464 ALG2364A
- 2501 ALG2564A/25128A
-1006 Reply Group
-1007 NetFrame Systems Inc
-1008 Epson
-100a Phoenix Technologies
-100b National Semiconductor Corporation
- 0001 DP83810
- 0002 87415/87560 IDE
- 000e 87560 Legacy I/O
- 000f FireWire Controller
- 0011 NS87560 National PCI System I/O
- 0012 USB Controller
- 0020 DP83815 (MacPhyter) Ethernet Controller
- 0022 DP83820 10/100/1000 Ethernet Controller
- 0500 SCx200 Bridge
- 0501 SCx200 SMI
- 0502 SCx200 IDE
- 0503 SCx200 Audio
- 0504 SCx200 Video
- 0505 SCx200 XBus
- d001 87410 IDE
-100c Tseng Labs Inc
- 3202 ET4000/W32p rev A
- 3205 ET4000/W32p rev B
- 3206 ET4000/W32p rev C
- 3207 ET4000/W32p rev D
- 3208 ET6000
- 4702 ET6300
-100d AST Research Inc
-100e Weitek
- 9000 P9000 Viper
- 9001 P9000 Viper
- 9002 P9000 Viper
- 9100 P9100 Viper Pro/SE
-1010 Video Logic, Ltd.
-1011 Digital Equipment Corporation
- 0001 DECchip 21050
- 0002 DECchip 21040 [Tulip]
- 0004 DECchip 21030 [TGA]
- 0007 NVRAM [Zephyr NVRAM]
- 0008 KZPSA [KZPSA]
- 0009 DECchip 21140 [FasterNet]
- 1025 0310 21140 Fast Ethernet
- 10b8 2001 SMC9332BDT EtherPower 10/100
- 10b8 2002 SMC9332BVT EtherPower T4 10/100
- 10b8 2003 SMC9334BDT EtherPower 10/100 (1-port)
- 1109 2400 ANA-6944A/TX Fast Ethernet
- 1112 2300 RNS2300 Fast Ethernet
- 1112 2320 RNS2320 Fast Ethernet
- 1112 2340 RNS2340 Fast Ethernet
- 1113 1207 EN-1207-TX Fast Ethernet
- 1186 1100 DFE-500TX Fast Ethernet
- 1186 1112 DFE-570TX Fast Ethernet
- 1186 1140 DFE-660 Cardbus Ethernet 10/100
- 1186 1142 DFE-660 Cardbus Ethernet 10/100
- 11f6 0503 Freedomline Fast Ethernet
- 1282 9100 AEF-380TXD Fast Ethernet
- 1385 1100 FA310TX Fast Ethernet
- 2646 0001 KNE100TX Fast Ethernet
- 000a 21230 Video Codec
- 000d PBXGB [TGA2]
- 000f DEFPA
- 0014 DECchip 21041 [Tulip Pass 3]
- 1186 0100 DE-530+
- 0016 DGLPB [OPPO]
- 0017 PV-PCI Graphics Controller (ZLXp-L)
- 0019 DECchip 21142/43
- 1011 500a DE500A Fast Ethernet
- 1011 500b DE500B Fast Ethernet
- 1014 0001 10/100 EtherJet Cardbus
- 1025 0315 ALN315 Fast Ethernet
- 1033 800c PC-9821-CS01 100BASE-TX Interface Card
- 1033 800d PC-9821NR-B06 100BASE-TX Interface Card
- 108d 0016 Rapidfire 2327 10/100 Ethernet
- 108d 0017 GoCard 2250 Ethernet 10/100 Cardbus
- 10b8 2005 SMC8032DT Extreme Ethernet 10/100
- 10b8 8034 SMC8034 Extreme Ethernet 10/100
- 10ef 8169 Cardbus Fast Ethernet
- 1109 2a00 ANA-6911A/TX Fast Ethernet
- 1109 2b00 ANA-6911A/TXC Fast Ethernet
- 1109 3000 ANA-6922/TX Fast Ethernet
- 1113 1207 Cheetah Fast Ethernet
- 1113 2220 Cardbus Fast Ethernet
- 115d 0002 Cardbus Ethernet 10/100
- 1179 0203 Fast Ethernet
- 1179 0204 Cardbus Fast Ethernet
- 1186 1100 DFE-500TX Fast Ethernet
- 1186 1101 DFE-500TX Fast Ethernet
- 1186 1102 DFE-500TX Fast Ethernet
- 1259 2800 AT-2800Tx Fast Ethernet
- 1266 0004 Eagle Fast EtherMAX
- 12af 0019 NetFlyer Cardbus Fast Ethernet
- 1374 0001 Cardbus Ethernet Card 10/100
- 1374 0002 Cardbus Ethernet Card 10/100
- 1374 0007 Cardbus Ethernet Card 10/100
- 1374 0008 Cardbus Ethernet Card 10/100
- 1385 2100 FA510
- 1395 0001 10/100 Ethernet CardBus PC Card
- 13d1 ab01 EtherFast 10/100 Cardbus (PCMPC200)
- 8086 0001 EtherExpress PRO/100 Mobile CardBus 32
- 001a Farallon PN9000SX
- 0021 DECchip 21052
- 0022 DECchip 21150
- 0023 DECchip 21150
- 0024 DECchip 21152
- 0025 DECchip 21153
- 0026 DECchip 21154
- 0034 56k Modem Cardbus
- 1374 0003 56k Modem Cardbus
- 0045 DECchip 21553
- 0046 DECchip 21554
- 0e11 4050 Integrated Smart Array
- 0e11 4051 Integrated Smart Array
- 0e11 4058 Integrated Smart Array
- 103c 10c2 Hewlett-Packard NetRAID-4M
- 12d9 000a VoIP PCI Gateway
- 9005 0365 Adaptec 5400S
- 9005 1364 Dell PowerEdge RAID Controller 2
- 9005 1365 Dell PowerEdge RAID Controller 2
- e4bf 1000 CC8-1-BLUES
- 1065 StrongARM DC21285
- 1069 0020 DAC960P / DAC1164P
-1012 Micronics Computers Inc
-1013 Cirrus Logic
- 0038 GD 7548
- 0040 GD 7555 Flat Panel GUI Accelerator
- 004c GD 7556 Video/Graphics LCD/CRT Ctrlr
- 00a0 GD 5430/40 [Alpine]
- 00a2 GD 5432 [Alpine]
- 00a4 GD 5434-4 [Alpine]
- 00a8 GD 5434-8 [Alpine]
- 00ac GD 5436 [Alpine]
- 00b0 GD 5440
- 00b8 GD 5446
- 00bc GD 5480
- 1013 00bc CL-GD5480
- 00d0 GD 5462
- 00d2 GD 5462 [Laguna I]
- 00d4 GD 5464 [Laguna]
- 00d5 GD 5464 BD [Laguna]
- 00d6 GD 5465 [Laguna]
- 13ce 8031 Barco Metheus 2 Megapixel, Dual Head
- 13cf 8031 Barco Metheus 2 Megapixel, Dual Head
- 00e8 GD 5436U
- 1100 CL 6729
- 1110 PD 6832 PCMCIA/CardBus Ctrlr
- 1112 PD 6834 PCMCIA/CardBus Ctrlr
- 1113 PD 6833 PCMCIA/CardBus Ctrlr
- 1200 GD 7542 [Nordic]
- 1202 GD 7543 [Viking]
- 1204 GD 7541 [Nordic Light]
- 4400 CD 4400
- 6001 CS 4610/11 [CrystalClear SoundFusion Audio Accelerator]
- 1014 1010 CS4610 SoundFusion Audio Accelerator
- 6003 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
- 1013 4280 Crystal SoundFusion PCI Audio Accelerator
- 1681 0050 Game Theater XP
- 1681 a011 Fortissimo III 7.1
- 6004 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
- 6005 Crystal CS4281 PCI Audio
- 1013 4281 Crystal CS4281 PCI Audio
- 10cf 10a8 Crystal CS4281 PCI Audio
- 10cf 10a9 Crystal CS4281 PCI Audio
- 10cf 10aa Crystal CS4281 PCI Audio
- 10cf 10ab Crystal CS4281 PCI Audio
- 10cf 10ac Crystal CS4281 PCI Audio
- 10cf 10ad Crystal CS4281 PCI Audio
- 10cf 10b4 Crystal CS4281 PCI Audio
- 1179 0001 Crystal CS4281 PCI Audio
- 14c0 000c Crystal CS4281 PCI Audio
-1014 IBM
- 0002 PCI to MCA Bridge
- 0005 Alta Lite
- 0007 Alta MP
- 000a Fire Coral
- 0017 CPU to PCI Bridge
- 0018 TR Auto LANstreamer
- 001b GXT-150P
- 001c Carrera
- 001d 82G2675
- 0020 MCA
- 0022 IBM27-82351
- 002d Python
- 002e ServeRAID Controller
- 1014 002e ServeRAID-3x
- 1014 022e ServeRAID-4H
- 0036 Miami
- 003a CPU to PCI Bridge
- 003e 16/4 Token ring UTP/STP controller
- 1014 003e Token-Ring Adapter
- 1014 00cd Token-Ring Adapter + Wake-On-LAN
- 1014 00ce 16/4 Token-Ring Adapter 2
- 1014 00cf 16/4 Token-Ring Adapter Special
- 1014 00e4 High-Speed 100/16/4 Token-Ring Adapter
- 1014 00e5 16/4 Token-Ring Adapter 2 + Wake-On-LAN
- 1014 016d iSeries 2744 Card
- 0045 SSA Adapter
- 0046 MPIC interrupt controller
- 0047 PCI to PCI Bridge
- 0048 PCI to PCI Bridge
- 0049 Warhead SCSI Controller
- 004e ATM Controller (14104e00)
- 004f ATM Controller (14104f00)
- 0050 ATM Controller (14105000)
- 0053 25 MBit ATM Controller
- 0057 MPEG PCI Bridge
- 005c i82557B 10/100
- 007c ATM Controller (14107c00)
- 007d 3780IDSP [MWave]
- 0090 GXT 3000P
- 1014 008e GXT-3000P
- 0095 20H2999 PCI Docking Bridge
- 0096 Chukar chipset SCSI controller
- 1014 0097 iSeries 2778 DASD IOA
- 1014 0098 iSeries 2763 DASD IOA
- 1014 0099 iSeries 2748 DASD IOA
- 00a5 ATM Controller (1410a500)
- 00a6 ATM 155MBPS MM Controller (1410a600)
- 00b7 256-bit Graphics Rasterizer [Fire GL1]
- 1902 00b8 Fire GL1
- 00be ATM 622MBPS Controller (1410be00)
- 00dc Advanced Systems Management Adapter (ASMA)
- 00fc CPC710 Dual Bridge and Memory Controller (PCI-64)
- 0105 CPC710 Dual Bridge and Memory Controller (PCI-32)
- 010f Remote Supervisor Adapter (RSA)
- 0142 Yotta Video Compositor Input
- 1014 0143 Yotta Input Controller (ytin)
- 0144 Yotta Video Compositor Output
- 1014 0145 Yotta Output Controller (ytout)
- 0156 405GP PLB to PCI Bridge
- 01a7 PCI-X to PCI-X Bridge
- 01bd ServeRAID Controller
- 1014 01be ServeRAID-4M
- 1014 01bf ServeRAID-4L
- 1014 0208 ServeRAID-4Mx
- 1014 020e ServeRAID-4Lx
- 1014 022e ServeRAID-4H
- 1014 0258 ServeRAID-5i
- 1014 0259 ServeRAID-5i
- 0302 XA-32 chipset [Summit]
- ffff MPIC-2 interrupt controller
-1015 LSI Logic Corp of Canada
-1016 ICL Personal Systems
-1017 SPEA Software AG
- 5343 SPEA 3D Accelerator
-1018 Unisys Systems
-1019 Elitegroup Computer Systems
-101a AT&T GIS (NCR)
- 0005 100VG ethernet
-101b Vitesse Semiconductor
-101c Western Digital
- 0193 33C193A
- 0196 33C196A
- 0197 33C197A
- 0296 33C296A
- 3193 7193
- 3197 7197
- 3296 33C296A
- 4296 34C296
- 9710 Pipeline 9710
- 9712 Pipeline 9712
- c24a 90C
-101e American Megatrends Inc.
- 1960 MegaRAID
- 101e 0471 MegaRAID 471 Enterprise 1600 RAID Controller
- 101e 0475 MegaRAID 475 Express 500 RAID Controller
- 101e 0493 MegaRAID 493 Elite 1600 RAID Controller
- 1028 0471 PowerEdge RAID Controller 3/QC
- 1028 0475 PowerEdge RAID Controller 3/SC
- 1028 0493 PowerEdge RAID Controller 3/DC
- 1028 0511 PowerEdge Cost Effective RAID Controller ATA100/4Ch
- 9010 MegaRAID 428 Ultra RAID Controller
- 9030 EIDE Controller
- 9031 EIDE Controller
- 9032 EIDE & SCSI Controller
- 9033 SCSI Controller
- 9040 Multimedia card
- 9060 MegaRAID 434 Ultra GT RAID Controller
- 9063 MegaRAC
- 101e 0767 Dell Remote Assistant Card 2
-101f PictureTel
-1020 Hitachi Computer Products
-1021 OKI Electric Industry Co. Ltd.
-1022 Advanced Micro Devices [AMD]
- 1100 K8 NorthBridge
- 1101 K8 NorthBridge
- 1102 K8 NorthBridge
- 1103 K8 NorthBridge
- 2000 79c970 [PCnet32 LANCE]
- 1014 2000 NetFinity 10/100 Fast Ethernet
- 103c 104c Ethernet with LAN remote power Adapter
- 103c 1064 Ethernet with LAN remote power Adapter
- 103c 1065 Ethernet with LAN remote power Adapter
- 103c 106c Ethernet with LAN remote power Adapter
- 103c 106e Ethernet with LAN remote power Adapter
- 103c 10ea Ethernet with LAN remote power Adapter
- 1113 1220 EN1220 10/100 Fast Ethernet
- 1259 2450 AT-2450 10/100 Fast Ethernet
- 1259 2454 AT-2450v4 10Mb Ethernet Adapter
- 1259 2700 AT-2700TX 10/100 Fast Ethernet
- 1259 2701 AT-2700FX 100Mb Ethernet
- 2001 79c978 [HomePNA]
- 1092 0a78 Multimedia Home Network Adapter
- 1668 0299 ActionLink Home Network Adapter
- 2020 53c974 [PCscsi]
- 2040 79c974
- 3000 ELanSC520 Microcontroller
- 7006 AMD-751 [Irongate] System Controller
- 7007 AMD-751 [Irongate] AGP Bridge
- 700c AMD-760 MP [IGD4-2P] System Controller
- 700d AMD-760 MP [IGD4-2P] AGP Bridge
- 700e AMD-760 [IGD4-1P] System Controller
- 700f AMD-760 [IGD4-1P] AGP Bridge
- 7400 AMD-755 [Cobra] ISA
- 7401 AMD-755 [Cobra] IDE
- 7403 AMD-755 [Cobra] ACPI
- 7404 AMD-755 [Cobra] USB
- 7408 AMD-756 [Viper] ISA
- 7409 AMD-756 [Viper] IDE
- 740b AMD-756 [Viper] ACPI
- 740c AMD-756 [Viper] USB
- 7410 AMD-766 [ViperPlus] ISA
- 7411 AMD-766 [ViperPlus] IDE
- 7413 AMD-766 [ViperPlus] ACPI
- 7414 AMD-766 [ViperPlus] USB
- 7440 AMD-768 [Opus] ISA
- 1043 8044 A7M-D Mainboard
- 7441 AMD-768 [Opus] IDE
- 7443 AMD-768 [Opus] ACPI
- 1043 8044 A7M-D Mainboard
- 7445 AMD-768 [Opus] Audio
- 7446 AMD-768 [Opus] MC97 Modem (Smart Link HAMR5600 compatible)
- 7448 AMD-768 [Opus] PCI
- 7449 AMD-768 [Opus] USB
- 7450 AMD-8131 PCI-X Bridge
- 7451 AMD-8131 PCI-X APIC
- 7454 AMD-8151 System Controller
- 7455 AMD-8151 AGP Bridge
- 7460 AMD-8111 PCI
- 7461 AMD-8111 USB
- 7462 AMD-8111 Ethernet
- 7464 AMD-8111 USB
- 7468 AMD-8111 LPC
- 7469 AMD-8111 IDE
- 746a AMD-8111 SMBus 2.0
- 746b AMD-8111 ACPI
- 746d AMD-8111 AC97 Audio
- 746e AMD-8111 MC97 Modem
-1023 Trident Microsystems
- 0194 82C194
- 2000 4DWave DX
- 2001 4DWave NX
- 8400 CyberBlade/i7
- 1023 8400 CyberBlade i7 AGP
- 8420 CyberBlade/i7d
- 0e11 b15a CyberBlade i7 AGP
- 8500 CyberBlade/i1
- 8520 CyberBlade i1
- 0e11 b16e CyberBlade i1 AGP
- 1023 8520 CyberBlade i1 AGP
- 8620 CyberBlade/i1
- 1014 0502 ThinkPad T30
- 8820 CyberBlade XPAi1
- 9320 TGUI 9320
- 9350 GUI Accelerator
- 9360 Flat panel GUI Accelerator
- 9382 Cyber 9382 [Reference design]
- 9383 Cyber 9383 [Reference design]
- 9385 Cyber 9385 [Reference design]
- 9386 Cyber 9386
- 9388 Cyber 9388
- 9397 Cyber 9397
- 939a Cyber 9397DVD
- 9420 TGUI 9420
- 9430 TGUI 9430
- 9440 TGUI 9440
- 9460 TGUI 9460
- 9470 TGUI 9470
- 9520 Cyber 9520
- 9525 Cyber 9525
- 10cf 1094 Lifebook C6155
- 9540 Cyber 9540
- 9660 TGUI 9660/938x/968x
- 9680 TGUI 9680
- 9682 TGUI 9682
- 9683 TGUI 9683
- 9685 ProVIDIA 9685
- 9750 3DImage 9750
- 1014 9750 3DImage 9750
- 1023 9750 3DImage 9750
- 9753 TGUI 9753
- 9754 TGUI 9754
- 9759 TGUI 975
- 9783 TGUI 9783
- 9785 TGUI 9785
- 9850 3DImage 9850
- 9880 Blade 3D PCI/AGP
- 1023 9880 Blade 3D
- 9910 CyberBlade/XP
- 9930 CyberBlade/XPm
-1024 Zenith Data Systems
-1025 Acer Incorporated [ALI]
- 1435 M1435
- 1445 M1445
- 1449 M1449
- 1451 M1451
- 1461 M1461
- 1489 M1489
- 1511 M1511
- 1512 ALI M1512 Aladdin
- 1513 M1513
- 1521 ALI M1521 Aladdin III CPU Bridge
- 10b9 1521 ALI M1521 Aladdin III CPU Bridge
- 1523 ALI M1523 ISA Bridge
- 10b9 1523 ALI M1523 ISA Bridge
- 1531 M1531 Northbridge [Aladdin IV/IV+]
- 1533 M1533 PCI-to-ISA Bridge
- 10b9 1533 ALI M1533 Aladdin IV/V ISA South Bridge
- 1535 M1535 PCI Bridge + Super I/O + FIR
- 1541 M1541 Northbridge [Aladdin V]
- 10b9 1541 ALI M1541 Aladdin V/V+ AGP+PCI North Bridge
- 1542 M1542 Northbridge [Aladdin V]
- 1543 M1543 PCI-to-ISA Bridge + Super I/O + FIR
- 1561 M1561 Northbridge [Aladdin 7]
- 1621 M1621 Northbridge [Aladdin-Pro II]
- 1631 M1631 Northbridge+3D Graphics [Aladdin TNT2]
- 1641 M1641 Northbridge [Aladdin-Pro IV]
- 1647 M1647 [MaGiK1] PCI North Bridge
- 3141 M3141
- 3143 M3143
- 3145 M3145
- 3147 M3147
- 3149 M3149
- 3151 M3151
- 3307 M3307 MPEG-I Video Controller
- 3309 M3309 MPEG-II Video w/ Software Audio Decoder
- 3321 M3321 MPEG-II Audio/Video Decoder
- 5212 M4803
- 5215 ALI PCI EIDE Controller
- 5217 M5217H
- 5219 M5219
- 5225 M5225
- 5229 M5229
- 5235 M5235
- 5237 M5237 PCI USB Host Controller
- 5240 EIDE Controller
- 5241 PCMCIA Bridge
- 5242 General Purpose Controller
- 5243 PCI to PCI Bridge Controller
- 5244 Floppy Disk Controller
- 5247 M1541 PCI to PCI Bridge
- 5251 M5251 P1394 Controller
- 5427 PCI to AGP Bridge
- 5451 M5451 PCI AC-Link Controller Audio Device
- 5453 M5453 PCI AC-Link Controller Modem Device
- 7101 M7101 PCI PMU Power Management Controller
- 10b9 7101 M7101 PCI PMU Power Management Controller
-1028 Dell Computer Corporation
- 0001 PowerEdge Expandable RAID Controller 2/Si
- 1028 0001 PowerEdge Expandable RAID Controller 2/Si
- 0002 PowerEdge Expandable RAID Controller 3
- 1028 0002 PowerEdge Expandable RAID Controller 3/Di
- 1028 00d1 PowerEdge Expandable RAID Controller 3/Di
- 1028 00d9 PowerEdge Expandable RAID Controller 3/Di
- 0003 PowerEdge Expandable RAID Controller 3/Si
- 1028 0003 PowerEdge Expandable RAID Controller 3/Si
- 0004 PowerEdge Expandable RAID Controller 3/Si
- 1028 00d0 PowerEdge Expandable RAID Controller 3/Si
- 0005 PowerEdge Expandable RAID Controller 3/Di
- 0006 PowerEdge Expandable RAID Controller 3/Di
- 0007 Remote Access Controller:DRAC III
- 0008 Remote Access Controller
- 0009 BMC/SMIC device not present
- 000a PowerEdge Expandable RAID Controller 3
- 1028 0106 PowerEdge Expandable RAID Controller 3/Di
- 1028 011b PowerEdge Expandable RAID Controller 3/Di
- 1028 0121 PowerEdge Expandable RAID Controller 3/Di
- 000c Remote Access Controller:ERA or ERA/O
- 000d BMC/SMIC device
- 000e PowerEdge Expandable RAID Controller
- 000f PowerEdge Expandable RAID Controller 4/Di
-1029 Siemens Nixdorf IS
-102a LSI Logic
- 0000 HYDRA
- 0010 ASPEN
-102b Matrox Graphics, Inc.
-# DJ: I've a suspicion that 0010 is a duplicate of 0d10.
- 0010 MGA-I [Impression?]
- 0100 MGA 1064SG [Mystique]
- 0518 MGA-II [Athena]
- 0519 MGA 2064W [Millennium]
- 051a MGA 1064SG [Mystique]
- 102b 0100 MGA-1064SG Mystique
- 102b 1100 MGA-1084SG Mystique
- 102b 1200 MGA-1084SG Mystique
- 1100 102b MGA-1084SG Mystique
- 110a 0018 Scenic Pro C5 (D1025)
- 051b MGA 2164W [Millennium II]
- 102b 051b MGA-2164W Millennium II
- 102b 1100 MGA-2164W Millennium II
- 102b 1200 MGA-2164W Millennium II
- 051e MGA 1064SG [Mystique] AGP
- 051f MGA 2164W [Millennium II] AGP
- 0520 MGA G200
- 102b dbc2 G200 Multi-Monitor
- 102b dbc8 G200 Multi-Monitor
- 102b dbe2 G200 Multi-Monitor
- 102b dbe8 G200 Multi-Monitor
- 102b ff03 Millennium G200 SD
- 102b ff04 Marvel G200
- 0521 MGA G200 AGP
- 1014 ff03 Millennium G200 AGP
- 102b 48e9 Mystique G200 AGP
- 102b 48f8 Millennium G200 SD AGP
- 102b 4a60 Millennium G200 LE AGP
- 102b 4a64 Millennium G200 AGP
- 102b c93c Millennium G200 AGP
- 102b c9b0 Millennium G200 AGP
- 102b c9bc Millennium G200 AGP
- 102b ca60 Millennium G250 LE AGP
- 102b ca6c Millennium G250 AGP
- 102b dbbc Millennium G200 AGP
- 102b dbc2 Millennium G200 MMS (Dual G200)
- 102b dbc3 G200 Multi-Monitor
- 102b dbc8 Millennium G200 MMS (Dual G200)
- 102b dbd2 G200 Multi-Monitor
- 102b dbd3 G200 Multi-Monitor
- 102b dbd4 G200 Multi-Monitor
- 102b dbd5 G200 Multi-Monitor
- 102b dbd8 G200 Multi-Monitor
- 102b dbd9 G200 Multi-Monitor
- 102b dbe2 Millennium G200 MMS (Quad G200)
- 102b dbe3 G200 Multi-Monitor
- 102b dbe8 Millennium G200 MMS (Quad G200)
- 102b dbf2 G200 Multi-Monitor
- 102b dbf3 G200 Multi-Monitor
- 102b dbf4 G200 Multi-Monitor
- 102b dbf5 G200 Multi-Monitor
- 102b dbf8 G200 Multi-Monitor
- 102b dbf9 G200 Multi-Monitor
- 102b f806 Mystique G200 Video AGP
- 102b ff00 MGA-G200 AGP
- 102b ff02 Mystique G200 AGP
- 102b ff03 Millennium G200 AGP
- 102b ff04 Marvel G200 AGP
- 110a 0032 MGA-G200 AGP
- 0525 MGA G400 AGP
- 0e11 b16f MGA-G400 AGP
- 102b 0328 Millennium G400 16Mb SDRAM
- 102b 0338 Millennium G400 16Mb SDRAM
- 102b 0378 Millennium G400 32Mb SDRAM
- 102b 0541 Millennium G450 Dual Head
- 102b 0542 Millennium G450 Dual Head LX
- 102b 0543 Millennium G450 Single Head LX
- 102b 0641 Millennium G450 32Mb SDRAM Dual Head
- 102b 0642 Millennium G450 32Mb SDRAM Dual Head LX
- 102b 0643 Millennium G450 32Mb SDRAM Single Head LX
- 102b 07c0 Millennium G450 Dual Head LE
- 102b 07c1 Millennium G450 SDR Dual Head LE
- 102b 0d41 Millennium G450 Dual Head PCI
- 102b 0d42 Millennium G450 Dual Head LX PCI
- 102b 0e00 Marvel G450 eTV
- 102b 0e01 Marvel G450 eTV
- 102b 0e02 Marvel G450 eTV
- 102b 0e03 Marvel G450 eTV
- 102b 0f80 Millennium G450 Low Profile
- 102b 0f81 Millennium G450 Low Profile
- 102b 0f82 Millennium G450 Low Profile DVI
- 102b 0f83 Millennium G450 Low Profile DVI
- 102b 19d8 Millennium G400 16Mb SGRAM
- 102b 19f8 Millennium G400 32Mb SGRAM
- 102b 2159 Millennium G400 Dual Head 16Mb
- 102b 2179 Millennium G400 MAX/Dual Head 32Mb
- 102b 217d Millennium G400 Dual Head Max
- 102b 23c0 Millennium G450
- 102b 23c1 Millennium G450
- 102b 23c2 Millennium G450 DVI
- 102b 23c3 Millennium G450 DVI
- 102b 2f58 Millennium G400
- 102b 2f78 Millennium G400
- 102b 3693 Marvel G400 AGP
- 102b 5dd0 4Sight II
- 102b 5f50 4Sight II
- 102b 5f51 4Sight II
- 102b 5f52 4Sight II
- 102b 9010 Millennium G400 Dual Head
- 1458 0400 GA-G400
- 1705 0001 Millennium G450 32MB SGRAM
- 1705 0002 Millennium G450 16MB SGRAM
- 1705 0003 Millennium G450 32MB
- 1705 0004 Millennium G450 16MB
- b16f 0e11 MGA-G400 AGP
- 0527 MGA Parhelia AGP
- 102b 0840 Parhelia 128Mb
- 0d10 MGA Ultima/Impression
- 1000 MGA G100 [Productiva]
- 102b ff01 Productiva G100
- 102b ff05 Productiva G100 Multi-Monitor
- 1001 MGA G100 [Productiva] AGP
- 102b 1001 MGA-G100 AGP
- 102b ff00 MGA-G100 AGP
- 102b ff01 MGA-G100 Productiva AGP
- 102b ff03 Millennium G100 AGP
- 102b ff04 MGA-G100 AGP
- 102b ff05 MGA-G100 Productiva AGP Multi-Monitor
- 110a 001e MGA-G100 AGP
- 2007 MGA Mistral
- 2527 MGA G550 AGP
- 102b 0f83 Millennium G550
- 102b 0f84 Millennium G550 Dual Head DDR 32Mb
- 102b 1e41 Millennium G550
- 4536 VIA Framegrabber
- 6573 Shark 10/100 Multiport SwitchNIC
-102c Chips and Technologies
- 00b8 F64310
- 00c0 F69000 HiQVideo
- 102c 00c0 F69000 HiQVideo
- 00d0 F65545
- 00d8 F65545
- 00dc F65548
- 00e0 F65550
- 00e4 F65554
- 00e5 F65555 HiQVPro
- 0e11 b049 Armada 1700 Laptop Display Controller
- 00f0 F68554
- 00f4 F68554 HiQVision
- 00f5 F68555
- 0c30 F69030
-102d Wyse Technology Inc.
- 50dc 3328 Audio
-102e Olivetti Advanced Technology
-102f Toshiba America
- 0009 r4x00
- 0020 ATM Meteor 155
- 102f 00f8 ATM Meteor 155
- 0180 TX4927
-1030 TMC Research
-1031 Miro Computer Products AG
- 5601 DC20 ASIC
- 5607 Video I/O & motion JPEG compressor
- 5631 Media 3D
- 6057 MiroVideo DC10/DC30+
-1032 Compaq
-1033 NEC Corporation
- 0001 PCI to 486-like bus Bridge
- 0002 PCI to VL98 Bridge
- 0003 ATM Controller
- 0004 R4000 PCI Bridge
- 0005 PCI to 486-like bus Bridge
- 0006 PC-9800 Graphic Accelerator
- 0007 PCI to UX-Bus Bridge
- 0008 PC-9800 Graphic Accelerator
- 0009 PCI to PC9800 Core-Graph Bridge
- 0016 PCI to VL Bridge
- 001a [Nile II]
- 0021 Vrc4373 [Nile I]
- 0029 PowerVR PCX1
- 002a PowerVR 3D
- 002c Star Alpha 2
- 002d PCI to C-bus Bridge
- 0035 USB
- 1179 0001 USB
- 12ee 7000 Root Hub
- 1799 0001 Root Hub
- 003b PCI to C-bus Bridge
- 003e NAPCCARD Cardbus Controller
- 0046 PowerVR PCX2 [midas]
- 005a Vrc5074 [Nile 4]
- 0063 Firewarden
- 0067 PowerVR Neon 250 Chipset
- 1010 0020 PowerVR Neon 250 AGP 32Mb
- 1010 0080 PowerVR Neon 250 AGP 16Mb
- 1010 0088 PowerVR Neon 250 16Mb
- 1010 0090 PowerVR Neon 250 AGP 16Mb
- 1010 0098 PowerVR Neon 250 16Mb
- 1010 00a0 PowerVR Neon 250 AGP 32Mb
- 1010 00a8 PowerVR Neon 250 32Mb
- 1010 0120 PowerVR Neon 250 AGP 32Mb
- 0074 56k Voice Modem
- 1033 8014 RCV56ACF 56k Voice Modem
- 009b Vrc5476
- 00a5 VRC4173
- 00a6 VRC5477 AC97
- 00cd IEEE 1394 [OrangeLink] Host Controller
- 12ee 8011 Root hub
- 00e0 USB 2.0
- 12ee 7001 Root hub
- 1799 0002 Root Hub
-1034 Framatome Connectors USA Inc.
-1035 Comp. & Comm. Research Lab
-1036 Future Domain Corp.
- 0000 TMC-18C30 [36C70]
-1037 Hitachi Micro Systems
-1038 AMP, Inc
-1039 Silicon Integrated Systems [SiS]
-# This is what all my tests report. I don't know if this is equivalent to "5591/5592 AGP".
- 0001 SiS 530 Virtual PCI-to-PCI bridge (AGP)
- 0002 SG86C202
- 0006 85C501/2/3
- 0008 85C503/5513
- 0009 ACPI
- 0018 SiS85C503/5513 (LPC Bridge)
- 0200 5597/5598/6326 VGA
- 1039 0000 SiS5597 SVGA (Shared RAM)
- 0204 82C204
- 0205 SG86C205
- 0300 SiS300/305 PCI/AGP VGA Display Adapter
- 107d 2720 Leadtek WinFast VR300
- 0310 SiS315H PCI/AGP VGA Display Adapter
- 0315 SiS315 PCI/AGP VGA Display Adapter
- 0325 SiS315PRO PCI/AGP VGA Display Adapter
- 0330 SiS330 [Xabre] PCI/AGP VGA Display Adapter
- 0406 85C501/2
- 0496 85C496
- 0530 530 Host
- 0540 540 Host
- 0597 5513C
- 0601 85C601
- 0620 620 Host
- 0630 630 Host
- 0633 633 Host
- 0635 635 Host
- 0645 SiS645 Host & Memory & AGP Controller
- 0646 SiS645DX Host & Memory & AGP Controller
- 0648 SiS 645xx
- 0650 650 Host
- 0651 SiS651 Host
- 0730 730 Host
- 0733 733 Host
- 0735 735 Host
- 0740 740 Host
- 0745 745 Host
- 0746 746 Host
- 0755 SiS 755 Host Bridge
- 0900 SiS900 10/100 Ethernet
- 1039 0900 SiS900 10/100 Ethernet Adapter
- 0961 SiS961 [MuTIOL Media IO]
- 0962 SiS962 [MuTIOL Media IO]
- 3602 83C602
- 5107 5107
- 5300 SiS540 PCI Display Adapter
- 5315 SiS550 AGP/VGA VGA Display Adapter
- 5401 486 PCI Chipset
- 5511 5511/5512
- 5513 5513 [IDE]
- 1019 0970 P6STP-FL motherboard
- 1039 5513 SiS5513 EIDE Controller (A,B step)
- 5517 5517
- 5571 5571
- 5581 5581 Pentium Chipset
- 5582 5582
- 5591 5591/5592 Host
- 5596 5596 Pentium Chipset
- 5597 5597 [SiS5582]
- 5600 5600 Host
- 6204 Video decoder & MPEG interface
- 6205 VGA Controller
- 6236 6236 3D-AGP
- 6300 SiS630 GUI Accelerator+3D
- 1019 0970 P6STP-FL motherboard
- 6306 SiS530 3D PCI/AGP
- 1039 6306 SiS530,620 GUI Accelerator+3D
- 6325 SiS65x/M650/740 PCI/AGP VGA Display Adapter
- 6326 86C326 5598/6326
- 1039 6326 SiS6326 GUI Accelerator
- 1092 0a50 SpeedStar A50
- 1092 0a70 SpeedStar A70
- 1092 4910 SpeedStar A70
- 1092 4920 SpeedStar A70
- 1569 6326 SiS6326 GUI Accelerator
- 7001 USB 1.0 Controller
- 1039 7000 Onboard USB Controller
- 7002 USB 2.0 Controller
- 1509 7002 Onboard USB Controller
- 7007 FireWire Controller
- 7012 Sound Controller
- 7013 Intel 537 [56k Winmodem]
- 7016 10/100 Ethernet Adapter
- 1039 7016 SiS7016 10/100 Ethernet Adapter
- 7018 SiS PCI Audio Accelerator
- 1014 01b6 SiS PCI Audio Accelerator
- 1014 01b7 SiS PCI Audio Accelerator
- 1019 7018 SiS PCI Audio Accelerator
- 1025 000e SiS PCI Audio Accelerator
- 1025 0018 SiS PCI Audio Accelerator
- 1039 7018 SiS PCI Audio Accelerator
- 1043 800b SiS PCI Audio Accelerator
- 1054 7018 SiS PCI Audio Accelerator
- 107d 5330 SiS PCI Audio Accelerator
- 107d 5350 SiS PCI Audio Accelerator
- 1170 3209 SiS PCI Audio Accelerator
- 1462 400a SiS PCI Audio Accelerator
- 14a4 2089 SiS PCI Audio Accelerator
- 14cd 2194 SiS PCI Audio Accelerator
- 14ff 1100 SiS PCI Audio Accelerator
- 152d 8808 SiS PCI Audio Accelerator
- 1558 1103 SiS PCI Audio Accelerator
- 1558 2200 SiS PCI Audio Accelerator
- 1563 7018 SiS PCI Audio Accelerator
- 15c5 0111 SiS PCI Audio Accelerator
- 270f a171 SiS PCI Audio Accelerator
- a0a0 0022 SiS PCI Audio Accelerator
-103a Seiko Epson Corporation
-103b Tatung Co. of America
-103c Hewlett-Packard Company
- 1005 A4977A Visualize EG
- 1006 Visualize FX6
- 1008 Visualize FX4
- 100a Visualize FX2
- 1028 Tach TL Fibre Channel Host Adapter
- 1029 Tach XL2 Fibre Channel Host Adapter
- 107e 000f Interphase 5560 Fibre Channel Adapter
- 9004 9210 1Gb/2Gb Family Fibre Channel Controller
- 9004 9211 1Gb/2Gb Family Fibre Channel Controller
- 102a Tach TS Fibre Channel Host Adapter
- 107e 000e Interphase 5540/5541 Fibre Channel Adapter
- 9004 9110 1Gb/2Gb Family Fibre Channel Controller
- 9004 9111 1Gb/2Gb Family Fibre Channel Controller
- 1030 J2585A DeskDirect 10/100VG NIC
- 1031 J2585B HP 10/100VG PCI LAN Adapter
- 103c 1040 J2973A DeskDirect 10BaseT NIC
- 103c 1041 J2585B DeskDirect 10/100VG NIC
- 103c 1042 J2970A DeskDirect 10BaseT/2 NIC
- 1040 J2973A DeskDirect 10BaseT NIC
- 1041 J2585B DeskDirect 10/100 NIC
- 1042 J2970A DeskDirect 10BaseT/2 NIC
- 1048 Diva Serial [GSP] Multiport UART
- 103c 1049 Tosca Console
- 103c 104a Tosca Secondary
- 103c 104b Maestro SP2
- 103c 1223 Halfdome Console
- 103c 1226 Keystone SP2
- 103c 1227 Powerbar SP2
- 103c 1282 Everest SP2
- 1054 PCI Local Bus Adapter
- 1064 79C970 PCnet Ethernet Controller
- 108b Visualize FXe
- 10c1 NetServer Smart IRQ Router
- 10ed TopTools Remote Control
- 1200 82557B 10/100 NIC
- 1219 NetServer PCI Hot-Plug Controller
- 121a NetServer SMIC Controller
- 121b NetServer Legacy COM Port Decoder
- 121c NetServer PCI COM Port Decoder
- 1229 zx1 System Bus Adapter
- 122a zx1 I/O Controller
- 122e PCI-X/AGP Local Bus Adapter
- 127c sx1000 I/O Controller
- 1290 Auxiliary Diva Serial Port
- 2910 E2910A PCIBus Exerciser
- 2925 E2925A 32 Bit, 33 MHzPCI Exerciser & Analyzer
-103e Solliday Engineering
-103f Synopsys/Logic Modeling Group
-1040 Accelgraphics Inc.
-1041 Computrend
-1042 Micron
- 1000 PC Tech RZ1000
- 1001 PC Tech RZ1001
- 3000 Samurai_0
- 3010 Samurai_1
- 3020 Samurai_IDE
-1043 Asustek Computer, Inc.
- 0675 ISDNLink P-IN100-ST-D
- 4021 v7100 Combo Deluxe [GeForce2 MX + TV tuner]
- 4057 V8200 GeForce 3
-1044 Distributed Processing Technology
- 1012 Domino RAID Engine
- a400 SmartCache/Raid I-IV Controller
- a500 PCI Bridge
- a501 SmartRAID V Controller
- 1044 c001 PM1554U2 Ultra2 Single Channel
- 1044 c002 PM1654U2 Ultra2 Single Channel
- 1044 c003 PM1564U3 Ultra3 Single Channel
- 1044 c004 PM1564U3 Ultra3 Dual Channel
- 1044 c005 PM1554U2 Ultra2 Single Channel (NON ACPI)
- 1044 c00a PM2554U2 Ultra2 Single Channel
- 1044 c00b PM2654U2 Ultra2 Single Channel
- 1044 c00c PM2664U3 Ultra3 Single Channel
- 1044 c00d PM2664U3 Ultra3 Dual Channel
- 1044 c00e PM2554U2 Ultra2 Single Channel (NON ACPI)
- 1044 c00f PM2654U2 Ultra2 Single Channel (NON ACPI)
- 1044 c014 PM3754U2 Ultra2 Single Channel (NON ACPI)
- 1044 c015 PM3755U2B Ultra2 Single Channel (NON ACPI)
- 1044 c016 PM3755F Fibre Channel (NON ACPI)
- 1044 c01e PM3757U2 Ultra2 Single Channel
- 1044 c01f PM3757U2 Ultra2 Dual Channel
- 1044 c020 PM3767U3 Ultra3 Dual Channel
- 1044 c021 PM3767U3 Ultra3 Quad Channel
- 1044 c028 PM2865U3 Ultra3 Single Channel
- 1044 c029 PM2865U3 Ultra3 Dual Channel
- 1044 c02a PM2865F Fibre Channel
- 1044 c03c 2000S Ultra3 Single Channel
- 1044 c03d 2000S Ultra3 Dual Channel
- 1044 c03e 2000F Fibre Channel
- 1044 c046 3000S Ultra3 Single Channel
- 1044 c047 3000S Ultra3 Dual Channel
- 1044 c048 3000F Fibre Channel
- 1044 c050 5000S Ultra3 Single Channel
- 1044 c051 5000S Ultra3 Dual Channel
- 1044 c052 5000F Fibre Channel
- 1044 c05a 2400A UDMA Four Channel
- 1044 c05b 2400A UDMA Four Channel DAC
- 1044 c064 3010S Ultra3 Dual Channel
- 1044 c065 3010S Ultra3 Four Channel
- 1044 c066 3010S Fibre Channel
- a511 SmartRAID V Controller
-1045 OPTi Inc.
- a0f8 82C750 [Vendetta] USB Controller
- c101 92C264
- c178 92C178
- c556 82X556 [Viper]
- c557 82C557 [Viper-M]
- c558 82C558 [Viper-M ISA+IDE]
- c567 82C750 [Vendetta], device 0
- c568 82C750 [Vendetta], device 1
- c569 82C579 [Viper XPress+ Chipset]
- c621 82C621 [Viper-M/N+]
- c700 82C700 [FireStar]
- c701 82C701 [FireStar Plus]
- c814 82C814 [Firebridge 1]
- c822 82C822
- c824 82C824
- c825 82C825 [Firebridge 2]
- c832 82C832
- c861 82C861
- c895 82C895
- c935 EV1935 ECTIVA MachOne PCI Audio
- d568 82C825 [Firebridge 2]
- d721 IDE [FireStar]
-1046 IPC Corporation, Ltd.
-1047 Genoa Systems Corp
-1048 Elsa AG
- 0d22 Quadro4 900XGL [ELSA GLoria4 900XGL]
- 1000 QuickStep 1000
- 3000 QuickStep 3000
-1049 Fountain Technologies, Inc.
-104a SGS Thomson Microelectronics
- 0008 STG 2000X
- 0009 STG 1764X
- 0010 STG4000 [3D Prophet Kyro Series]
-# From <http://gatekeeper.dec.com/pub/BSD/FreeBSD/FreeBSD-stable/src/share/misc/pci_vendors>
- 0210 STPC Atlas ISA Bridge
- 0981 DEC-Tulip compatible 10/100 Ethernet
- 1746 STG 1764X
- 2774 DEC-Tulip compatible 10/100 Ethernet
- 3520 MPEG-II decoder card
-104b BusLogic
- 0140 BT-946C (old) [multimaster 01]
- 1040 BT-946C (BA80C30) [MultiMaster 10]
- 8130 Flashpoint LT
-104c Texas Instruments
- 0500 100 MBit LAN Controller
- 0508 TMS380C2X Compressor Interface
- 1000 Eagle i/f AS
- 104c PCI1510 PC card Cardbus Controller
- 3d04 TVP4010 [Permedia]
- 3d07 TVP4020 [Permedia 2]
- 1011 4d10 Comet
- 1040 000f AccelStar II
- 1040 0011 AccelStar II
- 1048 0a31 WINNER 2000
- 1048 0a32 GLoria Synergy
- 1048 0a35 GLoria Synergy
- 107d 2633 WinFast 3D L2300
- 1092 0127 FIRE GL 1000 PRO
- 1092 0136 FIRE GL 1000 PRO
- 1092 0141 FIRE GL 1000 PRO
- 1092 0146 FIRE GL 1000 PRO
- 1092 0148 FIRE GL 1000 PRO
- 1092 0149 FIRE GL 1000 PRO
- 1092 0152 FIRE GL 1000 PRO
- 1092 0154 FIRE GL 1000 PRO
- 1092 0155 FIRE GL 1000 PRO
- 1092 0156 FIRE GL 1000 PRO
- 1092 0157 FIRE GL 1000 PRO
- 1097 3d01 Jeronimo Pro
- 1102 100f Graphics Blaster Extreme
- 3d3d 0100 Reference Permedia 2 3D
- 8000 PCILynx/PCILynx2 IEEE 1394 Link Layer Controller
- e4bf 1010 CF1-1-SNARE
- e4bf 1020 CF1-2-SNARE
- 8009 FireWire Controller
- 104d 8032 8032 OHCI i.LINK (IEEE 1394) Controller
- 8017 PCI4410 FireWire Controller
- 8019 TSB12LV23 IEEE-1394 Controller
- 11bd 000a Studio DV500-1394
- 11bd 000e Studio DV
- e4bf 1010 CF2-1-CYMBAL
- 8020 TSB12LV26 IEEE-1394 Controller (Link)
- 8021 TSB43AA22 IEEE-1394 Controller (PHY/Link Integrated)
- 104d 80df Vaio PCG-FX403
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 8022 TSB43AB22 IEEE-1394a-2000 Controller (PHY/Link)
- 8023 TSB43AB22/A IEEE-1394a-2000 Controller (PHY/Link)
- 8024 TSB43AB23 IEEE-1394a-2000 Controller (PHY/Link)
- 8026 TSB43AB21 IEEE-1394a-2000 Controller (PHY/Link)
- 8027 PCI4451 IEEE-1394 Controller
- 1028 00e6 PCI4451 IEEE-1394 Controller (Dell Inspiron 8100)
- 8029 PCI4510 IEEE-1394 Controller
- 8400 ACX 100 22Mbps Wireless Interface
- a001 TDC1570
- a100 TDC1561
- a102 TNETA1575 HyperSAR Plus w/PCI Host i/f & UTOPIA i/f
- a106 TMS320C6205
- ac10 PCI1050
- ac11 PCI1053
- ac12 PCI1130
- ac13 PCI1031
- ac15 PCI1131
- ac16 PCI1250
- ac17 PCI1220
- ac18 PCI1260
- ac19 PCI1221
- ac1a PCI1210
- ac1b PCI1450
- 0e11 b113 Armada M700
- ac1c PCI1225
- ac1d PCI1251A
- ac1e PCI1211
- ac1f PCI1251B
- ac20 TI 2030
- ac21 PCI2031
- ac22 PCI2032 PCI Docking Bridge
- ac23 PCI2250 PCI-to-PCI Bridge
- ac28 PCI2050 PCI-to-PCI Bridge
- ac30 PCI1260 PC card Cardbus Controller
- ac40 PCI4450 PC card Cardbus Controller
- ac41 PCI4410 PC card Cardbus Controller
- ac42 PCI4451 PC card Cardbus Controller
- 1028 00e6 PCI4451 PC card CardBus Controller (Dell Inspiron 8100)
- ac44 PCI4510 PC card Cardbus Controller
- ac50 PCI1410 PC card Cardbus Controller
- ac51 PCI1420
- 1014 023b ThinkPad T23 (2647-4MG)
- 10cf 1095 Lifebook C6155
- e4bf 1000 CP2-2-HIPHOP
- ac52 PCI1451 PC card Cardbus Controller
- ac53 PCI1421 PC card Cardbus Controller
- ac55 PCI1250 PC card Cardbus Controller
- 1014 0512 ThinkPad T30
- ac56 PCI1510 PC card Cardbus Controller
- ac60 PCI2040 PCI to DSP Bridge Controller
- fe00 FireWire Host Controller
- fe03 12C01A FireWire Host Controller
-104d Sony Corporation
- 8009 CXD1947Q i.LINK Controller
- 8039 CXD3222 i.LINK Controller
- 8056 Rockwell HCF 56K modem
- 808a Memory Stick Controller
-104e Oak Technology, Inc
- 0017 OTI-64017
- 0107 OTI-107 [Spitfire]
- 0109 Video Adapter
- 0111 OTI-64111 [Spitfire]
- 0217 OTI-64217
- 0317 OTI-64317
-104f Co-time Computer Ltd
-1050 Winbond Electronics Corp
- 0000 NE2000
- 0001 W83769F
- 0105 W82C105
- 0840 W89C840
- 1050 0001 W89C840 Ethernet Adapter
- 1050 0840 W89C840 Ethernet Adapter
- 0940 W89C940
- 5a5a W89C940F
- 6692 W6692
- 9970 W9970CF
-1051 Anigma, Inc.
-1052 ?Young Micro Systems
-1053 Young Micro Systems
-1054 Hitachi, Ltd
-1055 Efar Microsystems
- 9130 SLC90E66 [Victory66] IDE
- 9460 SLC90E66 [Victory66] ISA
- 9462 SLC90E66 [Victory66] USB
- 9463 SLC90E66 [Victory66] ACPI
-1056 ICL
-# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this.
-1057 Motorola
- 0001 MPC105 [Eagle]
- 0002 MPC106 [Grackle]
- 0003 MPC8240 [Kahlua]
- 0004 MPC107
- 0006 MPC8245 [Unity]
- 0100 MC145575 [HFC-PCI]
- 0431 KTI829c 100VG
- 1801 Audio I/O Controller (MIDI)
- ecc0 0030 Layla
- 18c0 MPC8265A/MPC8266
- 4801 Raven
- 4802 Falcon
- 4803 Hawk
- 4806 CPX8216
- 4d68 20268
- 5600 SM56 PCI Modem
- 1057 0300 SM56 PCI Speakerphone Modem
- 1057 0301 SM56 PCI Voice Modem
- 1057 0302 SM56 PCI Fax Modem
- 1057 5600 SM56 PCI Voice modem
- 13d2 0300 SM56 PCI Speakerphone Modem
- 13d2 0301 SM56 PCI Voice modem
- 13d2 0302 SM56 PCI Fax Modem
- 1436 0300 SM56 PCI Speakerphone Modem
- 1436 0301 SM56 PCI Voice modem
- 1436 0302 SM56 PCI Fax Modem
- 144f 100c SM56 PCI Fax Modem
- 1494 0300 SM56 PCI Speakerphone Modem
- 1494 0301 SM56 PCI Voice modem
- 14c8 0300 SM56 PCI Speakerphone Modem
- 14c8 0302 SM56 PCI Fax Modem
- 1668 0300 SM56 PCI Speakerphone Modem
- 1668 0302 SM56 PCI Fax Modem
- 6400 MPC190 Security Processor (S1 family, encryption)
-1058 Electronics & Telecommunications RSH
-1059 Teknor Industrial Computers Inc
-105a Promise Technology, Inc.
- 0d30 20265
- 105a 4d33 Ultra100
- 0d38 20263
- 105a 4d39 Fasttrak66
- 1275 20275
- 3376 PDC20376
- 1043 809e A7V8X motherboard
- 4d30 20267
- 105a 4d33 Ultra100
- 105a 4d39 Fasttrak100
- 4d33 20246
- 105a 4d33 20246 IDE Controller
- 4d38 20262
- 105a 4d30 Ultra Device on SuperTrak
- 105a 4d33 Ultra66
- 105a 4d39 Fasttrak66
- 4d68 20268
- 105a 4d68 Ultra100TX2
- 4d69 20269
- 105a 4d68 Ultra133TX2
- 5275 PDC20276 IDE
- 105a 0275 SuperTrak SX6000 IDE
- 5300 DC5300
- 6268 20268R
- 6269 PDC20271
- 105a 6269 FastTrak TX2/TX2000
- 6621 PDC20621 [SX4000] 4 Channel IDE RAID Controller
- 7275 PDC20277
-105b Foxconn International, Inc.
-105c Wipro Infotech Limited
-105d Number 9 Computer Company
- 2309 Imagine 128
- 2339 Imagine 128-II
- 105d 0000 Imagine 128 series 2 4Mb VRAM
- 105d 0001 Imagine 128 series 2 4Mb VRAM
- 105d 0002 Imagine 128 series 2 4Mb VRAM
- 105d 0003 Imagine 128 series 2 4Mb VRAM
- 105d 0004 Imagine 128 series 2 4Mb VRAM
- 105d 0005 Imagine 128 series 2 4Mb VRAM
- 105d 0006 Imagine 128 series 2 4Mb VRAM
- 105d 0007 Imagine 128 series 2 4Mb VRAM
- 105d 0008 Imagine 128 series 2e 4Mb DRAM
- 105d 0009 Imagine 128 series 2e 4Mb DRAM
- 105d 000a Imagine 128 series 2 8Mb VRAM
- 105d 000b Imagine 128 series 2 8Mb H-VRAM
- 11a4 000a Barco Metheus 5 Megapixel
- 13cc 0000 Barco Metheus 5 Megapixel
- 13cc 0004 Barco Metheus 5 Megapixel
- 13cc 0005 Barco Metheus 5 Megapixel
- 13cc 0006 Barco Metheus 5 Megapixel
- 13cc 0008 Barco Metheus 5 Megapixel
- 13cc 0009 Barco Metheus 5 Megapixel
- 13cc 000a Barco Metheus 5 Megapixel
- 13cc 000c Barco Metheus 5 Megapixel
- 493d Imagine 128 T2R [Ticket to Ride]
- 11a4 000a Barco Metheus 5 Megapixel, Dual Head
- 11a4 000b Barco Metheus 5 Megapixel, Dual Head
- 13cc 0002 Barco Metheus 4 Megapixel, Dual Head
- 13cc 0003 Barco Metheus 5 Megapixel, Dual Head
- 13cc 0007 Barco Metheus 5 Megapixel, Dual Head
- 13cc 0008 Barco Metheus 5 Megapixel, Dual Head
- 13cc 0009 Barco Metheus 5 Megapixel, Dual Head
- 13cc 000a Barco Metheus 5 Megapixel, Dual Head
- 5348 Revolution 4
- 105d 0037 Revolution IV-FP AGP (For SGI 1600SW)
-105e Vtech Computers Ltd
-105f Infotronic America Inc
-1060 United Microelectronics [UMC]
- 0001 UM82C881
- 0002 UM82C886
- 0101 UM8673F
- 0881 UM8881
- 0886 UM8886F
- 0891 UM8891A
- 1001 UM886A
- 673a UM8886BF
- 673b EIDE Master/DMA
- 8710 UM8710
- 886a UM8886A
- 8881 UM8881F
- 8886 UM8886F
- 888a UM8886A
- 8891 UM8891A
- 9017 UM9017F
- 9018 UM9018
- 9026 UM9026
- e881 UM8881N
- e886 UM8886N
- e88a UM8886N
- e891 UM8891N
-1061 I.I.T.
- 0001 AGX016
- 0002 IIT3204/3501
-1062 Maspar Computer Corp
-1063 Ocean Office Automation
-1064 Alcatel
-1065 Texas Microsystems
-1066 PicoPower Technology
- 0000 PT80C826
- 0001 PT86C521 [Vesuvius v1] Host Bridge
- 0002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Master
- 0003 PT86C524 [Nile] PCI-to-PCI Bridge
- 0004 PT86C525 [Nile-II] PCI-to-PCI Bridge
- 0005 National PC87550 System Controller
- 8002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Slave
-1067 Mitsubishi Electric
- 1002 VG500 [VolumePro Volume Rendering Accelerator]
-1068 Diversified Technology
-1069 Mylex Corporation
- 0001 DAC960P
- 0002 DAC960PD
- 0010 DAC960PX
- 0050 AcceleRAID 352/170/160 support Device
- ba55 eXtremeRAID 1100 support Device
- ba56 eXtremeRAID 2000/3000 support Device
-106a Aten Research Inc
-106b Apple Computer Inc.
- 0001 Bandit PowerPC host bridge
- 0002 Grand Central I/O
- 0003 Control Video
- 0004 PlanB Video-In
- 0007 O'Hare I/O
- 000e Hydra Mac I/O
- 0010 Heathrow Mac I/O
- 0017 Paddington Mac I/O
- 0018 UniNorth FireWire
- 0019 KeyLargo USB
- 001e UniNorth Internal PCI
- 001f UniNorth PCI
- 0020 UniNorth AGP
- 0021 UniNorth GMAC (Sun GEM)
- 0022 KeyLargo Mac I/O
- 0024 UniNorth/Pangea GMAC (Sun GEM)
- 0025 KeyLargo/Pangea Mac I/O
- 0026 KeyLargo/Pangea USB
- 0027 UniNorth/Pangea AGP
- 0028 UniNorth/Pangea PCI
- 0029 UniNorth/Pangea Internal PCI
- 002d UniNorth 1.5 AGP
- 002e UniNorth 1.5 PCI
- 002f UniNorth 1.5 Internal PCI
- 0030 UniNorth/Pangea FireWire
- 0031 UniNorth 2 FireWire
- 0032 UniNorth 2 GMAC (Sun GEM)
- 0033 UniNorth 2 ATA/100
- 0034 UniNorth 2 AGP
- 1645 Tigon3 Gigabit Ethernet NIC (BCM5701)
-106c Hyundai Electronics America
- 8801 Dual Pentium ISA/PCI Motherboard
- 8802 PowerPC ISA/PCI Motherboard
- 8803 Dual Window Graphics Accelerator
- 8804 LAN Controller
- 8805 100-BaseT LAN
-106d Sequent Computer Systems
-106e DFI, Inc
-106f City Gate Development Ltd
-1070 Daewoo Telecom Ltd
-1071 Mitac
-1072 GIT Co Ltd
-1073 Yamaha Corporation
- 0001 3D GUI Accelerator
- 0002 YGV615 [RPA3 3D-Graphics Controller]
- 0003 YMF-740
- 0004 YMF-724
- 1073 0004 YMF724-Based PCI Audio Adapter
- 0005 DS1 Audio
- 1073 0005 DS-XG PCI Audio CODEC
- 0006 DS1 Audio
- 0008 DS1 Audio
- 1073 0008 DS-XG PCI Audio CODEC
- 000a DS1L Audio
- 1073 0004 DS-XG PCI Audio CODEC
- 1073 000a DS-XG PCI Audio CODEC
- 000c YMF-740C [DS-1L Audio Controller]
- 107a 000c DS-XG PCI Audio CODEC
- 000d YMF-724F [DS-1 Audio Controller]
- 1073 000d DS-XG PCI Audio CODEC
- 0010 YMF-744B [DS-1S Audio Controller]
- 1073 0006 DS-XG PCI Audio CODEC
- 1073 0010 DS-XG PCI Audio CODEC
- 0012 YMF-754 [DS-1E Audio Controller]
- 1073 0012 DS-XG PCI Audio Codec
- 0020 DS-1 Audio
- 2000 DS2416 Digital Mixing Card
- 1073 2000 DS2416 Digital Mixing Card
-1074 NexGen Microsystems
- 4e78 82c500/1
-1075 Advanced Integrations Research
-1076 Chaintech Computer Co. Ltd
-1077 QLogic Corp.
- 1016 ISP10160 Single Channel Ultra3 SCSI Processor
- 1020 ISP1020 Fast-wide SCSI
- 1022 ISP1022 Fast-wide SCSI
- 1080 ISP1080 SCSI Host Adapter
- 1216 ISP12160 Dual Channel Ultra3 SCSI Processor
- 101e 8471 QLA12160 on AMI MegaRAID
- 101e 8493 QLA12160 on AMI MegaRAID
- 1240 ISP1240 SCSI Host Adapter
- 1280 ISP1280
- 2020 ISP2020A Fast!SCSI Basic Adapter
- 2100 QLA2100 64-bit Fibre Channel Adapter
- 1077 0001 QLA2100 64-bit Fibre Channel Adapter
- 2200 QLA2200
- 1077 0002 QLA2200
- 2300 QLA2300 64-bit FC-AL Adapter
- 2312 QLA2312 Fibre Channel Adapter
-1078 Cyrix Corporation
- 0000 5510 [Grappa]
- 0001 PCI Master
- 0002 5520 [Cognac]
- 0100 5530 Legacy [Kahlua]
- 0101 5530 SMI [Kahlua]
- 0102 5530 IDE [Kahlua]
- 0103 5530 Audio [Kahlua]
- 0104 5530 Video [Kahlua]
- 0400 ZFMicro PCI Bridge
- 0401 ZFMicro Chipset SMI
- 0402 ZFMicro Chipset IDE
- 0403 ZFMicro Expansion Bus
-1079 I-Bus
-107a NetWorth
-107b Gateway 2000
-107c LG Electronics [Lucky Goldstar Co. Ltd]
-107d LeadTek Research Inc.
- 0000 P86C850
-107e Interphase Corporation
- 0001 5515 ATM Adapter [Flipper]
- 0002 100 VG AnyLan Controller
- 0004 5526 Fibre Channel Host Adapter
- 0005 x526 Fibre Channel Host Adapter
- 0008 5525/5575 ATM Adapter (155 Mbit) [Atlantic]
- 9003 5535-4P-BRI-ST
- 9007 5535-4P-BRI-U
- 9008 5535-1P-SR
- 900c 5535-1P-SR-ST
- 900e 5535-1P-SR-U
- 9011 5535-1P-PRI
- 9013 5535-2P-PRI
- 9023 5536-4P-BRI-ST
- 9027 5536-4P-BRI-U
- 9031 5536-1P-PRI
- 9033 5536-2P-PRI
-107f Data Technology Corporation
- 0802 SL82C105
-1080 Contaq Microsystems
- 0600 82C599
- c691 Cypress CY82C691
- c693 82c693
-1081 Supermac Technology
- 0d47 Radius PCI to NuBUS Bridge
-1082 EFA Corporation of America
-1083 Forex Computer Corporation
- 0001 FR710
-1084 Parador
-1085 Tulip Computers Int.B.V.
-1086 J. Bond Computer Systems
-1087 Cache Computer
-1088 Microcomputer Systems (M) Son
-1089 Data General Corporation
-# Formerly Bit3 Computer Corp.
-108a SBS Technologies
- 0001 VME Bridge Model 617
- 0010 VME Bridge Model 618
- 0040 dataBLIZZARD
- 3000 VME Bridge Model 2706
-108c Oakleigh Systems Inc.
-108d Olicom
- 0001 Token-Ring 16/4 PCI Adapter (3136/3137)
- 0002 16/4 Token Ring
- 0004 RapidFire 3139 Token-Ring 16/4 PCI Adapter
- 108d 0004 OC-3139/3140 RapidFire Token-Ring 16/4 Adapter
- 0005 GoCard 3250 Token-Ring 16/4 CardBus PC Card
- 0006 OC-3530 RapidFire Token-Ring 100
- 0007 RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter
- 108d 0007 OC-3141 RapidFire Token-Ring 16/4 Adapter
- 0008 RapidFire 3540 HSTR 100/16/4 PCI Adapter
- 108d 0008 OC-3540 RapidFire HSTR 100/16/4 Adapter
- 0011 OC-2315
- 0012 OC-2325
- 0013 OC-2183/2185
- 0014 OC-2326
- 0019 OC-2327/2250 10/100 Ethernet Adapter
- 108d 0016 OC-2327 Rapidfire 10/100 Ethernet Adapter
- 108d 0017 OC-2250 GoCard 10/100 Ethernet Adapter
- 0021 OC-6151/6152 [RapidFire ATM 155]
- 0022 ATM Adapter
-108e Sun Microsystems Computer Corp.
- 0001 EBUS
- 1000 EBUS
- 1001 Happy Meal
- 1100 RIO EBUS
- 1101 RIO GEM
- 1102 RIO 1394
- 1103 RIO USB
- 2bad GEM
- 5000 Simba Advanced PCI Bridge
- 5043 SunPCI Co-processor
- 8000 Psycho PCI Bus Module
- 8001 Schizo PCI Bus Module
- a000 Ultra IIi
- a001 Ultra IIe
- a801 Tomatillo PCI Bus Module
-108f Systemsoft
-1090 Encore Computer Corporation
-1091 Intergraph Corporation
- 0020 3D graphics processor
- 0021 3D graphics processor w/Texturing
- 0040 3D graphics frame buffer
- 0041 3D graphics frame buffer
- 0060 Proprietary bus bridge
- 00e4 Powerstorm 4D50T
- 0720 Motion JPEG codec
-1092 Diamond Multimedia Systems
- 00a0 Speedstar Pro SE
- 00a8 Speedstar 64
- 0550 Viper V550
- 08d4 Supra 2260 Modem
- 094c SupraExpress 56i Pro
- 1092 Viper V330
- 6120 Maximum DVD
- 8810 Stealth SE
- 8811 Stealth 64/SE
- 8880 Stealth
- 8881 Stealth
- 88b0 Stealth 64
- 88b1 Stealth 64
- 88c0 Stealth 64
- 88c1 Stealth 64
- 88d0 Stealth 64
- 88d1 Stealth 64
- 88f0 Stealth 64
- 88f1 Stealth 64
- 9999 DMD-I0928-1 "Monster sound" sound chip
-1093 National Instruments
- 0160 PCI-DIO-96
- 0162 PCI-MIO-16XE-50
- 1170 PCI-MIO-16XE-10
- 1180 PCI-MIO-16E-1
- 1190 PCI-MIO-16E-4
- 1330 PCI-6031E
- 1350 PCI-6071E
- 2a60 PCI-6023E
- b001 IMAQ-PCI-1408
- b011 IMAQ-PXI-1408
- b021 IMAQ-PCI-1424
- b031 IMAQ-PCI-1413
- b041 IMAQ-PCI-1407
- b051 IMAQ-PXI-1407
- b061 IMAQ-PCI-1411
- b071 IMAQ-PCI-1422
- b081 IMAQ-PXI-1422
- b091 IMAQ-PXI-1411
- c801 PCI-GPIB
- c831 PCI-GPIB bridge
-1094 First International Computers [FIC]
-1095 CMD Technology Inc
- 0240 Adaptec AAR-1210SA SATA HostRAID Controller
- 0640 PCI0640
- 0643 PCI0643
- 0646 PCI0646
- 0647 PCI0647
- 0648 PCI0648
- 0649 PCI0649
- 0e11 005d Integrated Ultra ATA-100 Dual Channel Controller
- 0e11 007e Integrated Ultra ATA-100 IDE RAID Controller
- 101e 0649 AMI MegaRAID IDE 100 Controller
- 0650 PBC0650A
- 0670 USB0670
- 1095 0670 USB0670
- 0673 USB0673
- 0680 PCI0680
- 3112 Silicon Image SiI 3112 SATARaid Controller
-1096 Alacron
-1097 Appian Technology
-1098 Quantum Designs (H.K.) Ltd
- 0001 QD-8500
- 0002 QD-8580
-1099 Samsung Electronics Co., Ltd
-109a Packard Bell
-109b Gemlight Computer Ltd.
-109c Megachips Corporation
-109d Zida Technologies Ltd.
-109e Brooktree Corporation
- 0350 Bt848 Video Capture
- 0351 Bt849A Video capture
- 0369 Bt878 Video Capture
- 1002 0001 TV-Wonder
- 1002 0003 TV-Wonder/VE
- 036c Bt879(??) Video Capture
- 13e9 0070 Win/TV (Video Section)
- 036e Bt878 Video Capture
- 0070 13eb WinTV Series
- 0070 ff01 Viewcast Osprey 200
- 107d 6606 WinFast TV 2000
- 11bd 0012 PCTV pro (TV + FM stereo receiver)
- 11bd 001c PCTV Sat (DBC receiver)
- 127a 0001 Bt878 Mediastream Controller NTSC
- 127a 0002 Bt878 Mediastream Controller PAL BG
- 127a 0003 Bt878a Mediastream Controller PAL BG
- 127a 0048 Bt878/832 Mediastream Controller
- 144f 3000 MagicTView CPH060 - Video
- 1461 0004 AVerTV WDM Video Capture
- 14f1 0001 Bt878 Mediastream Controller NTSC
- 14f1 0002 Bt878 Mediastream Controller PAL BG
- 14f1 0003 Bt878a Mediastream Controller PAL BG
- 14f1 0048 Bt878/832 Mediastream Controller
- 1851 1850 FlyVideo'98 - Video
- 1851 1851 FlyVideo II
- 1852 1852 FlyVideo'98 - Video (with FM Tuner)
- bd11 1200 PCTV pro (TV + FM stereo receiver)
- 036f Bt879 Video Capture
- 127a 0044 Bt879 Video Capture NTSC
- 127a 0122 Bt879 Video Capture PAL I
- 127a 0144 Bt879 Video Capture NTSC
- 127a 0222 Bt879 Video Capture PAL BG
- 127a 0244 Bt879a Video Capture NTSC
- 127a 0322 Bt879 Video Capture NTSC
- 127a 0422 Bt879 Video Capture NTSC
- 127a 1122 Bt879 Video Capture PAL I
- 127a 1222 Bt879 Video Capture PAL BG
- 127a 1322 Bt879 Video Capture NTSC
- 127a 1522 Bt879a Video Capture PAL I
- 127a 1622 Bt879a Video Capture PAL BG
- 127a 1722 Bt879a Video Capture NTSC
- 14f1 0044 Bt879 Video Capture NTSC
- 14f1 0122 Bt879 Video Capture PAL I
- 14f1 0144 Bt879 Video Capture NTSC
- 14f1 0222 Bt879 Video Capture PAL BG
- 14f1 0244 Bt879a Video Capture NTSC
- 14f1 0322 Bt879 Video Capture NTSC
- 14f1 0422 Bt879 Video Capture NTSC
- 14f1 1122 Bt879 Video Capture PAL I
- 14f1 1222 Bt879 Video Capture PAL BG
- 14f1 1322 Bt879 Video Capture NTSC
- 14f1 1522 Bt879a Video Capture PAL I
- 14f1 1622 Bt879a Video Capture PAL BG
- 14f1 1722 Bt879a Video Capture NTSC
- 1851 1850 FlyVideo'98 - Video
- 1851 1851 FlyVideo II
- 1852 1852 FlyVideo'98 - Video (with FM Tuner)
- 0370 Bt880 Video Capture
- 1851 1850 FlyVideo'98
- 1851 1851 FlyVideo'98 EZ - video
- 1852 1852 FlyVideo'98 (with FM Tuner)
- 0878 Bt878 Audio Capture
- 0070 13eb WinTV Series
- 0070 ff01 Viewcast Osprey 200
- 1002 0001 TV-Wonder
- 1002 0003 TV-Wonder/VE
- 11bd 0012 PCTV pro (TV + FM stereo receiver, audio section)
- 11bd 001c PCTV Sat (DBC receiver)
- 127a 0001 Bt878 Video Capture (Audio Section)
- 127a 0002 Bt878 Video Capture (Audio Section)
- 127a 0003 Bt878 Video Capture (Audio Section)
- 127a 0048 Bt878 Video Capture (Audio Section)
- 13e9 0070 Win/TV (Audio Section)
- 144f 3000 MagicTView CPH060 - Audio
- 1461 0004 AVerTV WDM Audio Capture
- 14f1 0001 Bt878 Video Capture (Audio Section)
- 14f1 0002 Bt878 Video Capture (Audio Section)
- 14f1 0003 Bt878 Video Capture (Audio Section)
- 14f1 0048 Bt878 Video Capture (Audio Section)
- bd11 1200 PCTV pro (TV + FM stereo receiver, audio section)
- 0879 Bt879 Audio Capture
- 127a 0044 Bt879 Video Capture (Audio Section)
- 127a 0122 Bt879 Video Capture (Audio Section)
- 127a 0144 Bt879 Video Capture (Audio Section)
- 127a 0222 Bt879 Video Capture (Audio Section)
- 127a 0244 Bt879 Video Capture (Audio Section)
- 127a 0322 Bt879 Video Capture (Audio Section)
- 127a 0422 Bt879 Video Capture (Audio Section)
- 127a 1122 Bt879 Video Capture (Audio Section)
- 127a 1222 Bt879 Video Capture (Audio Section)
- 127a 1322 Bt879 Video Capture (Audio Section)
- 127a 1522 Bt879 Video Capture (Audio Section)
- 127a 1622 Bt879 Video Capture (Audio Section)
- 127a 1722 Bt879 Video Capture (Audio Section)
- 14f1 0044 Bt879 Video Capture (Audio Section)
- 14f1 0122 Bt879 Video Capture (Audio Section)
- 14f1 0144 Bt879 Video Capture (Audio Section)
- 14f1 0222 Bt879 Video Capture (Audio Section)
- 14f1 0244 Bt879 Video Capture (Audio Section)
- 14f1 0322 Bt879 Video Capture (Audio Section)
- 14f1 0422 Bt879 Video Capture (Audio Section)
- 14f1 1122 Bt879 Video Capture (Audio Section)
- 14f1 1222 Bt879 Video Capture (Audio Section)
- 14f1 1322 Bt879 Video Capture (Audio Section)
- 14f1 1522 Bt879 Video Capture (Audio Section)
- 14f1 1622 Bt879 Video Capture (Audio Section)
- 14f1 1722 Bt879 Video Capture (Audio Section)
- 0880 Bt880 Audio Capture
- 2115 BtV 2115 Mediastream controller
- 2125 BtV 2125 Mediastream controller
- 2164 BtV 2164
- 2165 BtV 2165
- 8230 Bt8230 ATM Segment/Reassembly Ctrlr (SRC)
- 8472 Bt8472
- 8474 Bt8474
-109f Trigem Computer Inc.
-10a0 Meidensha Corporation
-10a1 Juko Electronics Ind. Co. Ltd
-10a2 Quantum Corporation
-10a3 Everex Systems Inc
-10a4 Globe Manufacturing Sales
-10a5 Smart Link Ltd.
- 3052 SmartPCI562 56K Modem
- 5449 SmartPCI561 modem
-10a6 Informtech Industrial Ltd.
-10a7 Benchmarq Microelectronics
-10a8 Sierra Semiconductor
- 0000 STB Horizon 64
-10a9 Silicon Graphics, Inc.
- 0001 Crosstalk to PCI Bridge
- 0002 Linc I/O controller
- 0003 IOC3 I/O controller
- 0004 O2 MACE
- 0005 RAD Audio
- 0006 HPCEX
- 0007 RPCEX
- 0008 DiVO VIP
- 0009 Alteon Gigabit Ethernet
- 10a9 8002 Acenic Gigabit Ethernet
- 0010 AMP Video I/O
- 0011 GRIP
- 0012 SGH PSHAC GSN
- 1001 Magic Carpet
- 1002 Lithium
- 1003 Dual JPEG 1
- 1004 Dual JPEG 2
- 1005 Dual JPEG 3
- 1006 Dual JPEG 4
- 1007 Dual JPEG 5
- 1008 Cesium
- 100a IOC4 I/O controller
- 2001 Fibre Channel
- 2002 ASDE
- 8001 O2 1394
- 8002 G-net NT
-10aa ACC Microelectronics
- 0000 ACCM 2188
-10ab Digicom
-10ac Honeywell IAC
-10ad Symphony Labs
- 0001 W83769F
- 0003 SL82C103
- 0005 SL82C105
- 0103 SL82c103
- 0105 SL82c105
- 0565 W83C553
-10ae Cornerstone Technology
-10af Micro Computer Systems Inc
-10b0 CardExpert Technology
-10b1 Cabletron Systems Inc
-10b2 Raytheon Company
-10b3 Databook Inc
- 3106 DB87144
- b106 DB87144
-10b4 STB Systems Inc
- 1b1d Velocity 128 3D
- 10b4 237e Velocity 4400
-10b5 PLX Technology, Inc.
- 0001 i960 PCI bus interface
- 1076 VScom 800 8 port serial adaptor
- 1077 VScom 400 4 port serial adaptor
- 1078 VScom 210 2 port serial and 1 port parallel adaptor
- 1103 VScom 200 2 port serial adaptor
- 1146 VScom 010 1 port parallel adaptor
- 1147 VScom 020 2 port parallel adaptor
- 2724 Thales PCSM Security Card
- 9030 PCI <-> IOBus Bridge Hot Swap
- 15ed 1002 MCCS 8-port Serial Hot Swap
- 15ed 1003 MCCS 16-port Serial Hot Swap
- 9036 9036
- 9050 PCI <-> IOBus Bridge
- 10b5 2036 SatPak GPS
- 10b5 2273 SH-ARC SoHard ARCnet card
- 10b5 9050 MP9050
- 1522 0001 RockForce 4 Port V.90 Data/Fax/Voice Modem
- 1522 0002 RockForce 2 Port V.90 Data/Fax/Voice Modem
- 1522 0003 RockForce 6 Port V.90 Data/Fax/Voice Modem
- 1522 0004 RockForce 8 Port V.90 Data/Fax/Voice Modem
- 1522 0010 RockForce2000 4 Port V.90 Data/Fax/Voice Modem
- 1522 0020 RockForce2000 2 Port V.90 Data/Fax/Voice Modem
- 15ed 1000 Macrolink MCCS 8-port Serial
- 15ed 1001 Macrolink MCCS 16-port Serial
- 15ed 1002 Macrolink MCCS 8-port Serial Hot Swap
- 15ed 1003 Macrolink MCCS 16-port Serial Hot Swap
- 5654 5634 OpenLine4 Telephony Card
- d531 c002 PCIntelliCAN 2xSJA1000 CAN bus
- d84d 4006 EX-4006 1P
- d84d 4008 EX-4008 1P EPP/ECP
- d84d 4014 EX-4014 2P
- d84d 4018 EX-4018 3P EPP/ECP
- d84d 4025 EX-4025 1S(16C550) RS-232
- d84d 4027 EX-4027 1S(16C650) RS-232
- d84d 4028 EX-4028 1S(16C850) RS-232
- d84d 4036 EX-4036 2S(16C650) RS-232
- d84d 4037 EX-4037 2S(16C650) RS-232
- d84d 4038 EX-4038 2S(16C850) RS-232
- d84d 4052 EX-4052 1S(16C550) RS-422/485
- d84d 4053 EX-4053 2S(16C550) RS-422/485
- d84d 4055 EX-4055 4S(16C550) RS-232
- d84d 4058 EX-4055 4S(16C650) RS-232
- d84d 4065 EX-4065 8S(16C550) RS-232
- d84d 4068 EX-4068 8S(16C650) RS-232
- d84d 4078 EX-4078 2S(16C552) RS-232+1P
- 9054 PCI <-> IOBus Bridge
- 10b5 2455 Wessex Techology PHIL-PCI
- 12d9 0002 PCI Prosody Card rev 1.5
- 9060 9060
- 906d 9060SD
- 125c 0640 Aries 16000P
- 906e 9060ES
- 9080 9080
- 10b5 9080 9080 [real subsystem ID not set]
- 129d 0002 Aculab PCI Prosidy card
- 12d9 0002 PCI Prosody Card
-10b6 Madge Networks
- 0001 Smart 16/4 PCI Ringnode
- 0002 Smart 16/4 PCI Ringnode Mk2
- 10b6 0002 Smart 16/4 PCI Ringnode Mk2
- 10b6 0006 16/4 CardBus Adapter
- 0003 Smart 16/4 PCI Ringnode Mk3
- 0e11 b0fd Compaq NC4621 PCI, 4/16, WOL
- 10b6 0003 Smart 16/4 PCI Ringnode Mk3
- 10b6 0007 Presto PCI Plus Adapter
- 0004 Smart 16/4 PCI Ringnode Mk1
- 0006 16/4 Cardbus Adapter
- 10b6 0006 16/4 CardBus Adapter
- 0007 Presto PCI Adapter
- 10b6 0007 Presto PCI
- 0009 Smart 100/16/4 PCI-HS Ringnode
- 10b6 0009 Smart 100/16/4 PCI-HS Ringnode
- 000a Smart 100/16/4 PCI Ringnode
- 10b6 000a Smart 100/16/4 PCI Ringnode
- 000b 16/4 CardBus Adapter Mk2
- 10b6 0008 16/4 CardBus Adapter Mk2
- 10b6 000b 16/4 Cardbus Adapter Mk2
- 000c RapidFire 3140V2 16/4 TR Adapter
- 10b6 000c RapidFire 3140V2 16/4 TR Adapter
- 1000 Collage 25/155 ATM Client Adapter
- 1001 Collage 155 ATM Server Adapter
-10b7 3Com Corporation
- 0001 3c985 1000BaseSX (SX/TX)
- 0910 3C910-A01
- 1006 MINI PCI type 3B Data Fax Modem
- 1007 Mini PCI 56k Winmodem
- 10b7 615c Mini PCI 56K Modem
- 1700 Gigabit Ethernet Adapter
- 10b7 0010 3Com 3C940 Gigabit LOM Ethernet Adapter
- 10b7 0020 3Com 3C941 Gigabit LOM Ethernet Adapter
- 3390 3c339 TokenLink Velocity
- 3590 3c359 TokenLink Velocity XL
- 10b7 3590 TokenLink Velocity XL Adapter (3C359/359B)
- 4500 3c450 Cyclone/unknown
- 5055 3c555 Laptop Hurricane
- 5057 3c575 [Megahertz] 10/100 LAN CardBus
- 10b7 5a57 3C575 Megahertz 10/100 LAN Cardbus PC Card
- 5157 3c575 [Megahertz] 10/100 LAN CardBus
- 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card
- 5257 3CCFE575CT Cyclone CardBus
- 10b7 5c57 FE575C-3Com 10/100 LAN CardBus-Fast Ethernet
- 5900 3c590 10BaseT [Vortex]
- 5920 3c592 EISA 10mbps Demon/Vortex
- 5950 3c595 100BaseTX [Vortex]
- 5951 3c595 100BaseT4 [Vortex]
- 5952 3c595 100Base-MII [Vortex]
- 5970 3c597 EISA Fast Demon/Vortex
- 5b57 3c595 [Megahertz] 10/100 LAN CardBus
- 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card
- 6055 3c556 Hurricane CardBus
- 6056 3c556B Hurricane CardBus
- 10b7 6556 10/100 Mini PCI Ethernet Adapter
- 6560 3CCFE656 Cyclone CardBus
- 10b7 656a 3CCFEM656 10/100 LAN+56K Modem CardBus
- 6561 3CCFEM656 10/100 LAN+56K Modem CardBus
- 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus
- 6562 3CCFEM656 [id 6562] Cyclone CardBus
- 10b7 656b 3CCFEM656B 10/100 LAN+56K Modem CardBus
- 6563 3CCFEM656B 10/100 LAN+56K Modem CardBus
- 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus
- 6564 3CCFEM656 [id 6564] Cyclone CardBus
- 7646 3cSOHO100-TX Hurricane
- 7940 3c803 FDDILink UTP Controller
- 7980 3c804 FDDILink SAS Controller
- 7990 3c805 FDDILink DAS Controller
- 8811 Token ring
- 9000 3c900 10BaseT [Boomerang]
- 9001 3c900 Combo [Boomerang]
- 9004 3c900B-TPO [Etherlink XL TPO]
- 10b7 9004 3C900B-TPO Etherlink XL TPO 10Mb
- 9005 3c900B-Combo [Etherlink XL Combo]
- 10b7 9005 3C900B-Combo Etherlink XL Combo
- 9006 3c900B-TPC [Etherlink XL TPC]
- 900a 3c900B-FL [Etherlink XL FL]
- 9050 3c905 100BaseTX [Boomerang]
- 9051 3c905 100BaseT4 [Boomerang]
- 9055 3c905B 100BaseTX [Cyclone]
- 1028 0080 3C905B Fast Etherlink XL 10/100
- 1028 0081 3C905B Fast Etherlink XL 10/100
- 1028 0082 3C905B Fast Etherlink XL 10/100
- 1028 0083 3C905B Fast Etherlink XL 10/100
- 1028 0084 3C905B Fast Etherlink XL 10/100
- 1028 0085 3C905B Fast Etherlink XL 10/100
- 1028 0086 3C905B Fast Etherlink XL 10/100
- 1028 0087 3C905B Fast Etherlink XL 10/100
- 1028 0088 3C905B Fast Etherlink XL 10/100
- 1028 0089 3C905B Fast Etherlink XL 10/100
- 1028 0090 3C905B Fast Etherlink XL 10/100
- 1028 0091 3C905B Fast Etherlink XL 10/100
- 1028 0092 3C905B Fast Etherlink XL 10/100
- 1028 0093 3C905B Fast Etherlink XL 10/100
- 1028 0094 3C905B Fast Etherlink XL 10/100
- 1028 0095 3C905B Fast Etherlink XL 10/100
- 1028 0096 3C905B Fast Etherlink XL 10/100
- 1028 0097 3C905B Fast Etherlink XL 10/100
- 1028 0098 3C905B Fast Etherlink XL 10/100
- 1028 0099 3C905B Fast Etherlink XL 10/100
- 10b7 9055 3C905B Fast Etherlink XL 10/100
- 9056 3c905B-T4 [Fast EtherLink XL 10/100]
- 9058 3c905B-Combo [Deluxe Etherlink XL 10/100]
- 905a 3c905B-FX [Fast Etherlink XL FX 10/100]
- 9200 3c905C-TX/TX-M [Tornado]
- 1028 0095 Integrated 3C905C-TX Fast Etherlink for PC Management NIC
- 10b7 1000 3C905C-TX Fast Etherlink for PC Management NIC
- 10b7 7000 10/100 Mini PCI Ethernet Adapter
- 9201 3C920B-EMB Integrated Fast Ethernet Controller
- 9300 3CSOHO100B-TX [910-A01]
- 9800 3c980-TX [Fast Etherlink XL Server Adapter]
- 10b7 9800 3c980-TX Fast Etherlink XL Server Adapter
- 9805 3c980-TX 10/100baseTX NIC [Python-T]
- 10b7 1201 3c982-TXM 10/100baseTX Dual Port A [Hydra]
- 10b7 1202 3c982-TXM 10/100baseTX Dual Port B [Hydra]
- 10b7 9805 3c980 10/100baseTX NIC [Python-T]
- 10f1 2462 Thunder K7 S2462
- 9900 3C990-TX [Typhoon]
- 9902 3CR990-TX-95 [Typhoon 56-bit]
- 9903 3CR990-TX-97 [Typhoon 168-bit]
- 9904 3C990B-TX-M/3C990BSVR [Typhoon2]
- 10b7 1000 3CR990B-TX-M [Typhoon2]
- 10b7 2000 3CR990BSVR [Typhoon2 Server]
- 9905 3CR990-FX-95/97/95 [Typhon Fiber]
- 10b7 1101 3CR990-FX-95 [Typhoon Fiber 56-bit]
- 10b7 1102 3CR990-FX-97 [Typhoon Fiber 168-bit]
- 10b7 2101 3CR990-FX-95 Server [Typhoon Fiber 56-bit]
- 10b7 2102 3CR990-FX-97 Server [Typhoon Fiber 168-bit]
- 9908 3CR990SVR95 [Typhoon Server 56-bit]
- 9909 3CR990SVR97 [Typhoon Server 168-bit]
- 990b 3C990SVR [Typhoon Server]
-10b8 Standard Microsystems Corp [SMC]
- 0005 83C170QF
- 1055 e000 LANEPIC 10/100 [EVB171Q-PCI]
- 1055 e002 LANEPIC 10/100 [EVB171G-PCI]
- 10b8 a011 EtherPower II 10/100
- 10b8 a014 EtherPower II 10/100
- 10b8 a015 EtherPower II 10/100
- 10b8 a016 EtherPower II 10/100
- 10b8 a017 EtherPower II 10/100
- 0006 LANEPIC
- 1055 e100 LANEPIC Cardbus Fast Ethernet Adapter
- 1055 e102 LANEPIC Cardbus Fast Ethernet Adapter
- 1055 e300 LANEPIC Cardbus Fast Ethernet Adapter
- 1055 e302 LANEPIC Cardbus Fast Ethernet Adapter
- 10b8 a012 LANEPIC Cardbus Fast Ethernet Adapter
- 13a2 8002 LANEPIC Cardbus Fast Ethernet Adapter
- 13a2 8006 LANEPIC Cardbus Fast Ethernet Adapter
- 1000 FDC 37c665
- 1001 FDC 37C922
- a011 83C170QF
- b106 SMC34C90
-10b9 ALi Corporation
- 0111 C-Media CMI8738/C3DX Audio Device (OEM)
- 10b9 0111 C-Media CMI8738/C3DX Audio Device (OEM)
- 1435 M1435
- 1445 M1445
- 1449 M1449
- 1451 M1451
- 1461 M1461
- 1489 M1489
- 1511 M1511 [Aladdin]
- 1512 M1512 [Aladdin]
- 1513 M1513 [Aladdin]
- 1521 M1521 [Aladdin III]
- 10b9 1521 ALI M1521 Aladdin III CPU Bridge
- 1523 M1523
- 10b9 1523 ALI M1523 ISA Bridge
- 1531 M1531 [Aladdin IV]
- 1533 M1533 PCI to ISA Bridge [Aladdin IV]
- 10b9 1533 ALI M1533 Aladdin IV ISA Bridge
- 1541 M1541
- 10b9 1541 ALI M1541 Aladdin V/V+ AGP System Controller
- 1543 M1543
- 1563 M1563 HyperTransport South Bridge
- 1621 M1621
- 1631 ALI M1631 PCI North Bridge Aladdin Pro III
- 1632 M1632M Northbridge+Trident
- 1641 ALI M1641 PCI North Bridge Aladdin Pro IV
- 1644 M1644/M1644T Northbridge+Trident
- 1646 M1646 Northbridge+Trident
- 1647 M1647 Northbridge [MAGiK 1 / MobileMAGiK 1]
- 1651 M1651/M1651T Northbridge [Aladdin-Pro 5/5M,Aladdin-Pro 5T/5TM]
- 1671 M1671 Super P4 Northbridge [AGP4X,PCI and SDR/DDR]
- 1681 M1681 P4 Northbridge [AGP8X,HyperTransport and SDR/DDR]
- 1687 M1687 K8 Northbridge [AGP8X and HyperTransport]
- 3141 M3141
- 3143 M3143
- 3145 M3145
- 3147 M3147
- 3149 M3149
- 3151 M3151
- 3307 M3307
- 3309 M3309
- 5212 M4803
- 5215 MS4803
- 5217 M5217H
- 5219 M5219
- 5225 M5225
- 5229 M5229 IDE
- 1043 8053 A7A266 Motherboard IDE
- 5235 M5225
- 5237 USB 1.1 Controller
- 5239 USB 2.0 Controller
- 5243 M1541 PCI to AGP Controller
- 5247 PCI to AGP Controller
- 5249 M5249 HTT to PCI Bridge
- 5251 M5251 P1394 OHCI 1.0 Controller
- 5253 M5253 P1394 OHCI 1.1 Controller
- 5261 M5261 Ethernet Controller
- 5450 Lucent Technologies Soft Modem AMR
- 5451 M5451 PCI AC-Link Controller Audio Device
- 1014 0506 ThinkPad R30
- 5453 M5453 PCI AC-Link Controller Modem Device
- 5455 M5455 PCI AC-Link Controller Audio Device
- 5457 Intel 537 [M5457 AC-Link Modem]
-# Same but more usefull for driver's lookup
- 5459 SmartLink SmartPCI561 56K Modem
-# SmartLink PCI SoftModem
- 545a SmartLink SmartPCI563 56K Modem
- 5471 M5471 Memory Stick Controller
- 5473 M5473 SD-MMC Controller
- 7101 M7101 PMU
- 10b9 7101 ALI M7101 Power Management Controller
-10ba Mitsubishi Electric Corp.
- 0301 AccelGraphics AccelECLIPSE
-10bb Dapha Electronics Corporation
-10bc Advanced Logic Research
-10bd Surecom Technology
- 0e34 NE-34
-10be Tseng Labs International Co.
-10bf Most Inc
-10c0 Boca Research Inc.
-10c1 ICM Co., Ltd.
-10c2 Auspex Systems Inc.
-10c3 Samsung Semiconductors, Inc.
- 1100 Smartether100 SC1100 LAN Adapter (i82557B)
-10c4 Award Software International Inc.
-10c5 Xerox Corporation
-10c6 Rambus Inc.
-10c7 Media Vision
-10c8 Neomagic Corporation
- 0001 NM2070 [MagicGraph 128]
- 0002 NM2090 [MagicGraph 128V]
- 0003 NM2093 [MagicGraph 128ZV]
- 0004 NM2160 [MagicGraph 128XD]
- 1014 00ba MagicGraph 128XD
- 1025 1007 MagicGraph 128XD
- 1028 0074 MagicGraph 128XD
- 1028 0075 MagicGraph 128XD
- 1028 007d MagicGraph 128XD
- 1028 007e MagicGraph 128XD
- 1033 802f MagicGraph 128XD
- 104d 801b MagicGraph 128XD
- 104d 802f MagicGraph 128XD
- 104d 830b MagicGraph 128XD
- 10ba 0e00 MagicGraph 128XD
- 10c8 0004 MagicGraph 128XD
- 10cf 1029 MagicGraph 128XD
- 10f7 8308 MagicGraph 128XD
- 10f7 8309 MagicGraph 128XD
- 10f7 830b MagicGraph 128XD
- 10f7 830d MagicGraph 128XD
- 10f7 8312 MagicGraph 128XD
- 0005 NM2200 [MagicGraph 256AV]
- 1014 00dd ThinkPad 570
- 0006 NM2360 [MagicMedia 256ZX]
- 0016 NM2380 [MagicMedia 256XL+]
- 10c8 0016 MagicMedia 256XL+
- 0025 NM2230 [MagicGraph 256AV+]
- 0083 NM2093 [MagicGraph 128ZV+]
- 8005 NM2200 [MagicMedia 256AV Audio]
- 0e11 b0d1 MagicMedia 256AV Audio Device on Discovery
- 0e11 b126 MagicMedia 256AV Audio Device on Durango
- 1014 00dd MagicMedia 256AV Audio Device on BlackTip Thinkpad
- 1025 1003 MagicMedia 256AV Audio Device on TravelMate 720
- 1028 008f MagicMedia 256AV Audio Device on Colorado Inspiron
- 103c 0007 MagicMedia 256AV Audio Device on Voyager II
- 103c 0008 MagicMedia 256AV Audio Device on Voyager III
- 103c 000d MagicMedia 256AV Audio Device on Omnibook 900
- 10c8 8005 MagicMedia 256AV Audio Device on FireAnt
- 110a 8005 MagicMedia 256AV Audio Device
- 14c0 0004 MagicMedia 256AV Audio Device
- 8006 NM2360 [MagicMedia 256ZX Audio]
- 8016 NM2380 [MagicMedia 256XL+ Audio]
-10c9 Dataexpert Corporation
-10ca Fujitsu Microelectr., Inc.
-10cb Omron Corporation
-10cc Mentor ARC Inc
-10cd Advanced System Products, Inc
- 1100 ASC1100
- 1200 ASC1200 [(abp940) Fast SCSI-II]
- 1300 ABP940-U / ABP960-U
- 10cd 1310 ASC1300 SCSI Adapter
- 2300 ABP940-UW
- 2500 ABP940-U2W
-10ce Radius
-10cf Citicorp TTI
- 2001 mb86605
-10d0 Fujitsu Limited
-10d1 FuturePlus Systems Corp.
-10d2 Molex Incorporated
-10d3 Jabil Circuit Inc
-10d4 Hualon Microelectronics
-10d5 Autologic Inc.
-10d6 Cetia
-10d7 BCM Advanced Research
-10d8 Advanced Peripherals Labs
-10d9 Macronix, Inc. [MXIC]
- 0512 MX98713
- 0531 MX987x5
- 1186 1200 DFE-540TX ProFAST 10/100 Adapter
- 8625 MX86250
- 8888 MX86200
-10da Compaq IPG-Austin
- 0508 TC4048 Token Ring 4/16
- 3390 Tl3c3x9
-10db Rohm LSI Systems, Inc.
-10dc CERN/ECP/EDU
- 0001 STAR/RD24 SCI-PCI (PMC)
- 0002 TAR/RD24 SCI-PCI (PMC)
- 0021 HIPPI destination
- 0022 HIPPI source
- 10dc ATT2C15-3 FPGA
-10dd Evans & Sutherland
-10de nVidia Corporation
- 0008 NV1 [EDGE 3D]
- 0009 NV1 [EDGE 3D]
- 0010 NV2 [Mutara V08]
- 0020 NV4 [RIVA TNT]
- 1043 0200 V3400 TNT
- 1048 0c18 Erazor II SGRAM
- 1048 0c1b Erazor II
- 1092 0550 Viper V550
- 1092 0552 Viper V550
- 1092 4804 Viper V550
- 1092 4808 Viper V550
- 1092 4810 Viper V550
- 1092 4812 Viper V550
- 1092 4815 Viper V550
- 1092 4820 Viper V550 with TV out
- 1092 4822 Viper V550
- 1092 4904 Viper V550
- 1092 4914 Viper V550
- 1092 8225 Viper V550
- 10b4 273d Velocity 4400
- 10b4 273e Velocity 4400
- 10b4 2740 Velocity 4400
- 10de 0020 Riva TNT
- 1102 1015 Graphics Blaster CT6710
- 1102 1016 Graphics Blaster RIVA TNT
- 0028 NV5 [RIVA TNT2/TNT2 Pro]
- 1043 0200 AGP-V3800 SGRAM
- 1043 0201 AGP-V3800 SDRAM
- 1043 0205 PCI-V3800
- 1043 4000 AGP-V3800PRO
- 1048 0c21 Synergy II
- 1092 4804 Viper V770
- 1092 4a00 Viper V770
- 1092 4a02 Viper V770 Ultra
- 1092 5a00 RIVA TNT2/TNT2 Pro
- 1092 6a02 Viper V770 Ultra
- 1092 7a02 Viper V770 Ultra
- 10de 0005 RIVA TNT2 Pro
- 10de 000f Compaq NVIDIA TNT2 Pro
- 1102 1020 3D Blaster RIVA TNT2
- 1102 1026 3D Blaster RIVA TNT2 Digital
- 14af 5810 Maxi Gamer Xentor
- 0029 NV5 [RIVA TNT2 Ultra]
- 1043 0200 AGP-V3800 Deluxe
- 1043 0201 AGP-V3800 Ultra SDRAM
- 1043 0205 PCI-V3800 Ultra
- 1102 1021 3D Blaster RIVA TNT2 Ultra
- 1102 1029 3D Blaster RIVA TNT2 Ultra
- 1102 102f 3D Blaster RIVA TNT2 Ultra
- 14af 5820 Maxi Gamer Xentor 32
- 002a NV5 [Riva TnT2]
- 002b NV5 [Riva TnT2]
- 002c NV6 [Vanta/Vanta LT]
- 1043 0200 AGP-V3800 Combat SDRAM
- 1043 0201 AGP-V3800 Combat
- 1092 6820 Viper V730
- 1102 1031 CT6938 VANTA 8MB
- 1102 1034 CT6894 VANTA 16MB
- 14af 5008 Maxi Gamer Phoenix 2
- 002d NV5M64 [RIVA TNT2 Model 64/Model 64 Pro]
- 1043 0200 AGP-V3800M
- 1043 0201 AGP-V3800M
- 1048 0c3a Erazor III LT
- 10de 001e M64 AGP4x
- 1102 1023 CT6892 RIVA TNT2 Value
- 1102 1024 CT6932 RIVA TNT2 Value 32Mb
- 1102 102c CT6931 RIVA TNT2 Value [Jumper]
- 1462 8808 MSI-8808
- 1554 1041 PixelView RIVA TNT2 M64 32MB
- 002e NV6 [Vanta]
- 002f NV6 [Vanta]
- 0060 nForce2 ISA Bridge
- 1043 80ad A7N8X Mainboard
- 0064 nForce2 SMBus (MCP)
- 0065 nForce2 IDE
- 0066 nForce2 Ethernet Controller
- 0067 nForce2 USB Controller
- 1043 0c11 A7N8X Mainboard
- 0068 nForce2 USB Controller
- 1043 0c11 A7N8X Mainboard
- 006a nForce2 AC97 Audio Controler (MCP)
- 006b nForce MultiMedia audio [Via VT82C686B]
- 006e nForce2 FireWire (IEEE 1394) Controller
- 00a0 NV5 [Aladdin TNT2]
- 14af 5810 Maxi Gamer Xentor
- 0100 NV10 [GeForce 256 SDR]
- 1043 0200 AGP-V6600 SGRAM
- 1043 0201 AGP-V6600 SDRAM
- 1043 4008 AGP-V6600 SGRAM
- 1043 4009 AGP-V6600 SDRAM
- 1102 102d CT6941 GeForce 256
- 14af 5022 3D Prophet SE
- 0101 NV10DDR [GeForce 256 DDR]
- 1043 0202 AGP-V6800 DDR
- 1043 400a AGP-V6800 DDR SGRAM
- 1043 400b AGP-V6800 DDR SDRAM
- 1102 102e CT6971 GeForce 256 DDR
- 14af 5021 3D Prophet DDR-DVI
- 0103 NV10GL [Quadro]
- 0110 NV11 [GeForce2 MX/MX 400]
- 1043 4015 AGP-V7100 Pro
- 1043 4031 V7100 Pro with TV output
- 1462 8817 MSI GeForce2 MX400 Pro32S [MS-8817]
- 14af 7102 3D Prophet II MX
- 14af 7103 3D Prophet II MX Dual-Display
- 0111 NV11DDR [GeForce2 MX 100 DDR/200 DDR]
- 0112 NV11 [GeForce2 Go]
- 0113 NV11GL [Quadro2 MXR/EX]
- 0150 NV15 [GeForce2 GTS/Pro]
- 1043 4016 V7700 AGP Video Card
- 107d 2840 WinFast GeForce2 GTS with TV output
- 1462 8831 Creative GeForce2 Pro
- 0151 NV15DDR [GeForce2 Ti]
- 1043 405f V7700Ti
- 0152 NV15BR [GeForce2 Ultra, Bladerunner]
- 1048 0c56 GLADIAC Ultra
- 0153 NV15GL [Quadro2 Pro]
- 0170 NV17 [GeForce4 MX 460]
- 0171 NV17 [GeForce4 MX 440]
- 10b0 0002 Gainward Pro/600 TV
- 1462 8661 G4MX440-VTP
- 1462 8730 MX440SES-T (MS-8873)
- 147b 8f00 Abit Siluro GeForce4MX440
- 0172 NV17 [GeForce4 MX 420]
- 0173 NV17 [GeForce4 MX 440-SE]
- 0174 NV17 [GeForce4 440 Go]
- 0175 NV17 [GeForce4 420 Go]
- 0176 NV17 [GeForce4 420 Go 32M]
- 0178 NV17GL [Quadro4 550 XGL]
- 0179 NV17 [GeForce4 440 Go 64M]
- 017a NV17GL [Quadro4 200/400 NVS]
- 017b NV17GL [Quadro4 550 XGL]
- 017c NV17GL [Quadro4 550 GoGL]
- 0181 NV18 [GeForce4 MX 440 AGP 8x]
- 0182 NV18 [GeForce4 MX 440SE AGP 8x]
- 0183 NV18 [GeForce4 MX 420 AGP 8x]
- 0188 NV18GL [Quadro4 580 XGL]
- 018a NV18GL [Quadro4 NVS]
- 018b NV18GL [Quadro4 380 XGL]
- 01a0 NV15 [GeForce2 - nForce GPU]
- 01a4 nForce CPU bridge
- 01ab nForce 420 Memory Controller (DDR)
- 01ac nForce 220/420 Memory Controller
- 01ad nForce 220/420 Memory Controller
- 01b1 nForce Audio
- 01b2 nForce ISA Bridge
- 01b4 nForce PCI System Management
- 01b7 nForce AGP to PCI Bridge
- 01b8 nForce PCI-to-PCI bridge
- 01bc nForce IDE
- 01c1 Intel 537 [nForce MC97 Modem]
- 01c2 nForce USB Controller
- 01c3 nForce Ethernet Controller
- 01e8 nForce2 AGP
- 01f0 NV18 [GeForce4 MX - nForce GPU]
- 0200 NV20 [GeForce3]
- 1043 402f AGP-V8200 DDR
- 0201 NV20 [GeForce3 Ti 200]
- 0202 NV20 [GeForce3 Ti 500]
- 1043 405b V8200 T5
- 1545 002f Xtasy 6964
- 0203 NV20DCC [Quadro DCC]
- 0250 NV25 [GeForce4 Ti 4600]
- 0251 NV25 [GeForce4 Ti 4400]
- 0252 NV25 [GeForce4 Ti]
- 0253 NV25 [GeForce4 Ti 4200]
- 107d 2896 WinFast A250 LE TD (Dual VGA/TV-out/DVI)
- 147b 8f09 Siluro (Dual VGA/TV-out/DVI)
- 0258 NV25GL [Quadro4 900 XGL]
- 0259 NV25GL [Quadro4 750 XGL]
- 025b NV25GL [Quadro4 700 XGL]
- 0280 NV28 [GeForce4 Ti 4800]
- 0281 NV28 [GeForce4 Ti 4200 AGP 8x]
- 0282 NV28 [GeForce4 Ti 4800 SE]
- 0286 NV28 [GeForce4 Ti 4200 Go AGP 8x]
- 0288 NV28GL [Quadro4 980 XGL]
- 0289 NV28GL [Quadro4 780 XGL]
- 0300 NV30 [GeForce FX]
- 0301 NV30 [GeForce FX 5800 Ultra]
- 0302 NV30 [GeForce FX 5800]
- 0308 NV30GL [Quadro FX 2000]
- 0309 NV30GL [Quadro FX 1000]
-10df Emulex Corporation
- 1ae5 LP6000 Fibre Channel Host Adapter
- f085 LP850 Fibre Channel Adapter
- f095 LP952 Fibre Channel Adapter
- f098 LP982 Fibre Channel Adapter
- f700 LP7000 Fibre Channel Host Adapter
- f800 LP8000 Fibre Channel Host Adapter
- f900 LP9000 Fibre Channel Host Adapter
- f980 LP9802 Fibre Channel Adapter
-10e0 Integrated Micro Solutions Inc.
- 5026 IMS5026/27/28
- 5027 IMS5027
- 5028 IMS5028
- 8849 IMS8849
- 8853 IMS8853
- 9128 IMS9128 [Twin turbo 128]
-10e1 Tekram Technology Co.,Ltd.
- 0391 TRM-S1040
- 10e1 0391 DC-315U SCSI-3 Host Adapter
- 690c DC-690c
- dc29 DC-290
-10e2 Aptix Corporation
-10e3 Tundra Semiconductor Corp.
- 0000 CA91C042 [Universe]
- 0860 CA91C860 [QSpan]
- 0862 CA91C862A [QSpan-II]
-10e4 Tandem Computers
-10e5 Micro Industries Corporation
-10e6 Gainbery Computer Products Inc.
-10e7 Vadem
-10e8 Applied Micro Circuits Corp.
- 2011 Q-Motion Video Capture/Edit board
- 4750 S5930 [Matchmaker]
- 5920 S5920
- 8043 LANai4.x [Myrinet LANai interface chip]
- 8062 S5933_PARASTATION
- 807d S5933 [Matchmaker]
- 8088 Kongsberg Spacetec Format Synchronizer
- 8089 Kongsberg Spacetec Serial Output Board
- 809c S5933_HEPC3
- 80d7 PCI-9112
- 80d9 PCI-9118
- 80da PCI-9812
- 811a PCI-IEEE1355-DS-DE Interface
- 8170 S5933 [Matchmaker] (Chipset Development Tool)
- 82db AJA HDNTV HD SDI Framestore
-10e9 Alps Electric Co., Ltd.
-10ea Intergraphics Systems
- 1680 IGA-1680
- 1682 IGA-1682
- 1683 IGA-1683
- 2000 CyberPro 2000
- 2010 CyberPro 2000A
- 5000 CyberPro 5000
- 5050 CyberPro 5050
- 5202 CyberPro 5202
-10eb Artists Graphics
- 0101 3GA
- 8111 Twist3 Frame Grabber
-10ec Realtek Semiconductor Co., Ltd.
- 8029 RTL-8029(AS)
- 10b8 2011 EZ-Card (SMC1208)
- 10ec 8029 RTL-8029(AS)
- 1113 1208 EN1208
- 1186 0300 DE-528
- 1259 2400 AT-2400
- 8129 RTL-8129
- 10ec 8129 RT8129 Fast Ethernet Adapter
- 8138 RT8139 (B/C) Cardbus Fast Ethernet Adapter
- 10ec 8138 RT8139 (B/C) Fast Ethernet Adapter
- 8139 RTL-8139/8139C/8139C+
- 1025 8920 ALN-325
- 1025 8921 ALN-325
- 10bd 0320 EP-320X-R
- 10ec 8139 RT8139
- 1186 1300 DFE-538TX
- 1186 1320 SN5200
- 1186 8139 DRN-32TX
- 11f6 8139 FN22-3(A) LinxPRO Ethernet Adapter
- 1259 2500 AT-2500TX
- 1259 2503 AT-2500TX/ACPI
- 1429 d010 ND010
- 1432 9130 EN-9130TX
- 1436 8139 RT8139
- 146c 1439 FE-1439TX
- 1489 6001 GF100TXRII
- 1489 6002 GF100TXRA
- 149c 139a LFE-8139ATX
- 149c 8139 LFE-8139TX
- 2646 0001 EtheRx
- 8e2e 7000 KF-230TX
- 8e2e 7100 KF-230TX/2
- a0a0 0007 ALN-325C
- 8169 RTL-8169
- 1371 434e ProG-2000L
- 8197 SmartLAN56 56K Modem
-10ed Ascii Corporation
- 7310 V7310
-10ee Xilinx Corporation
- 3fc0 RME Digi96
- 3fc1 RME Digi96/8
- 3fc2 RME Digi96/8 Pro
- 3fc3 RME Digi96/8 Pad
- 3fc4 RME Digi9652 (Hammerfall)
- 3fc5 RME Hammerfall DSP
-10ef Racore Computer Products, Inc.
- 8154 M815x Token Ring Adapter
-10f0 Peritek Corporation
-10f1 Tyan Computer
-10f2 Achme Computer, Inc.
-10f3 Alaris, Inc.
-10f4 S-MOS Systems, Inc.
-10f5 NKK Corporation
- a001 NDR4000 [NR4600 Bridge]
-10f6 Creative Electronic Systems SA
-10f7 Matsushita Electric Industrial Co., Ltd.
-10f8 Altos India Ltd
-10f9 PC Direct
-10fa Truevision
- 000c TARGA 1000
-10fb Thesys Gesellschaft für Mikroelektronik mbH
- 186f TH 6255
-10fc I-O Data Device, Inc.
-# What's in the cardbus end of a Sony ACR-A01 card, comes with newer Vaio CD-RW drives
- 0003 Cardbus IDE Controller
- 0005 Cardbus SCSI CBSC II
-10fd Soyo Computer, Inc
-10fe Fast Multimedia AG
-10ff NCube
-1100 Jazz Multimedia
-1101 Initio Corporation
- 1060 INI-A100U2W
- 9100 INI-9100/9100W
- 9400 INI-940
- 9401 INI-950
- 9500 360P
-1102 Creative Labs
- 0002 SB Live! EMU10k1
- 1102 0020 CT4850 SBLive! Value
- 1102 0021 CT4620 SBLive!
- 1102 002f SBLive! mainboard implementation
- 1102 4001 E-mu APS
- 1102 8022 CT4780 SBLive! Value
- 1102 8023 CT4790 SoundBlaster PCI512
- 1102 8024 CT4760 SBLive!
- 1102 8025 SBLive! Mainboard Implementation
- 1102 8026 CT4830 SBLive! Value
- 1102 8027 CT4832 SBLive! Value
- 1102 8028 CT4760 SBLive! OEM version
- 1102 8031 CT4831 SBLive! Value
- 1102 8040 CT4760 SBLive!
- 1102 8051 CT4850 SBLive! Value
- 1102 8061 SBLive! Player 5.1
- 1102 8064 SB Live! 5.1 Model SB0100
- 1102 8065 SBLive! 5.1 Digital Model SB0220
- 0004 SB Audigy
- 1102 0051 SB0090 Audigy Player
- 1102 0053 SB0090 Audigy Player/OEM
- 0006 [SB Live! Value] EMU10k1X
- 4001 SB Audigy FireWire Port
- 1102 0010 SB Audigy FireWire Port
- 7002 SB Live! MIDI/Game Port
- 1102 0020 Gameport Joystick
- 7003 SB Audigy MIDI/Game port
- 1102 0040 SB Audigy MIDI/Game Port
- 7004 [SB Live! Value] Input device controller
- 8064 SB0100 [SBLive! 5.1 OEM]
- 8938 Ectiva EV1938
-1103 Triones Technologies, Inc.
- 0003 HPT343
-# Revisions: 01=HPT366, 03=HPT370, 04=HPT370A, 05=HPT372
- 0004 HPT366/368/370/370A/372
- 1103 0001 HPT370A
- 1103 0005 HPT370 UDMA100
- 0005 HPT372A
- 0006 HPT302
- 0007 HPT371
- 0008 HPT374
- 0009 HPT372N
-1104 RasterOps Corp.
-1105 Sigma Designs, Inc.
- 1105 REALmagic Xcard MPEG 1/2/3/4 DVD Decoder
- 8300 REALmagic Hollywood Plus DVD Decoder
- 8400 EM840x REALmagic DVD/MPEG-2 Audio/Video Decoder
-1106 VIA Technologies, Inc.
- 0102 Embedded VIA Ethernet Controller
- 0130 VT6305 1394.A Controller
- 0305 VT8363/8365 [KT133/KM133]
- 1043 8033 A7V Mainboard
- 1043 803e A7V-E Mainboard
- 1043 8042 A7V133/A7V133-C Mainboard
- 147b a401 KT7/KT7-RAID/KT7A/KT7A-RAID Mainboard
- 0391 VT8371 [KX133]
- 0501 VT8501 [Apollo MVP4]
- 0505 VT82C505
-# Shares chip with :0576. The VT82C576M has :1571 instead of :0561.
- 0561 VT82C576MV
- 0571 VT82C586A/B/VT82C686/A/B/VT823x/A/C PIPC Bus Master IDE
- 1019 0985 P6VXA Motherboard
- 1043 8052 VT8233A Bus Master ATA100/66/33 IDE
- 1043 808c A7V8X motherboard
- 1106 0571 VT82C586/B/VT82C686/A/B/VT8233/A/C/VT8235 PIPC Bus Master IDE
- 1179 0001 Magnia Z310
- 1458 5002 GA-7VAX Mainboard
- 0576 VT82C576 3V [Apollo Master]
- 0585 VT82C585VP [Apollo VP1/VPX]
- 0586 VT82C586/A/B PCI-to-ISA [Apollo VP]
- 1106 0000 MVP3 ISA Bridge
- 0595 VT82C595 [Apollo VP2]
- 0596 VT82C596 ISA [Mobile South]
- 1106 0000 VT82C596/A/B PCI to ISA Bridge
- 1458 0596 VT82C596/A/B PCI to ISA Bridge
- 0597 VT82C597 [Apollo VP3]
- 0598 VT82C598 [Apollo MVP3]
- 0601 VT8601 [Apollo ProMedia]
- 0605 VT8605 [ProSavage PM133]
- 1043 802c CUV4X mainboard
- 0680 VT82C680 [Apollo P6]
- 0686 VT82C686 [Apollo Super South]
- 1019 0985 P6VXA Motherboard
- 1043 802c CUV4X mainboard
- 1043 8033 A7V Mainboard
- 1043 803e A7V-E Mainboard
- 1043 8040 A7M266 Mainboard
- 1043 8042 A7V133/A7V133-C Mainboard
- 1106 0000 VT82C686/A PCI to ISA Bridge
- 1106 0686 VT82C686/A PCI to ISA Bridge
- 1179 0001 Magnia Z310
- 147b a702 KG7-Lite Mainboard
- 0691 VT82C693A/694x [Apollo PRO133x]
- 1019 0985 P6VXA Motherboard
- 1179 0001 Magnia Z310
- 1458 0691 VT82C691 Apollo Pro System Controller
- 0693 VT82C693 [Apollo Pro Plus]
- 0698 VT82C693A [Apollo Pro133 AGP]
- 0926 VT82C926 [Amazon]
- 1000 VT82C570MV
- 1106 VT82C570MV
- 1571 VT82C576M/VT82C586
- 1595 VT82C595/97 [Apollo VP2/97]
- 3038 USB
- 0925 1234 USB Controller
- 1019 0985 P6VXA Motherboard
- 1043 808c A7V8X motherboard
- 1179 0001 Magnia Z310
- 3040 VT82C586B ACPI
- 3043 VT86C100A [Rhine]
- 10bd 0000 VT86C100A Fast Ethernet Adapter
- 1106 0100 VT86C100A Fast Ethernet Adapter
- 1186 1400 DFE-530TX rev A
- 3044 IEEE 1394 Host Controller
- 3050 VT82C596 Power Management
- 3051 VT82C596 Power Management
- 3057 VT82C686 [Apollo Super ACPI]
- 1019 0985 P6VXA Motherboard
- 1043 8033 A7V Mainboard
- 1043 803e A7V-E Mainboard
- 1043 8040 A7M266 Mainboard
- 1043 8042 A7V133/A7V133-C Mainboard
- 1179 0001 Magnia Z310
- 3058 VT82C686 AC97 Audio Controller
- 0e11 b194 Soundmax integrated digital audio
- 1019 0985 P6VXA Motherboard
- 1106 4511 Onboard Audio on EP7KXA
- 1458 7600 Onboard Audio
- 1462 3091 MS-6309 Onboard Audio
- 15dd 7609 Onboard Audio
- 3059 VT8233/A/8235 AC97 Audio Controller
- 1043 8095 A7V8X Motherboard (Realtek ALC650 codec)
- 1458 a002 GA-7VAX Onboard Audio (Realtek ALC650)
- 3065 VT6102 [Rhine-II]
- 1106 0102 VT6102 [Rhine II] Embeded Ethernet Controller on VT8235
- 1186 1400 DFE-530TX rev A
- 1186 1401 DFE-530TX rev B
- 13b9 1421 LD-10/100AL PCI Fast Ethernet Adapter (rev.B)
- 3068 Intel 537 [AC97 Modem]
- 3074 VT8233 PCI to ISA Bridge
- 1043 8052 VT8233A
- 3091 VT8633 [Apollo Pro266]
- 3099 VT8366/A/7 [Apollo KT266/A/333]
- 1043 8064 A7V266-E Mainboard
- 1043 807f A7V333 Mainboard
- 3101 VT8653 Host Bridge
- 3102 VT8662 Host Bridge
- 3103 VT8615 Host Bridge
- 3104 USB 2.0
- 1043 808c A7V8X motherboard
- 1458 5004 GA-7VAX Mainboard
- 3106 VT6105 [Rhine-III]
- 3109 VT8233C PCI to ISA Bridge
- 3112 VT8361 [KLE133] Host Bridge
- 3116 VT8375 [KM266/KL266] Host Bridge
-# found on EPIA M6000/9000 mainboard
- 3122 VT8623 [Apollo CLE266] integrated CastleRock graphics
-# found on EPIA M6000/9000 mainboard
- 3123 VT8623 [Apollo CLE266]
- 3128 VT8753 [P4X266 AGP]
- 3133 VT3133 Host Bridge
- 3147 VT8233A ISA Bridge
- 3148 P4M266 Host Bridge
- 3156 P/KN266 Host Bridge
- 3168 VT8374 P4X400 Host Controller/AGP Bridge
- 3177 VT8235 ISA Bridge
- 1043 808c A7V8X motherboard
- 1458 5001 GA-7VAX Mainboard
- 3189 VT8377 [KT400 AGP] Host Bridge
- 1043 807f A7V8X motherboard
- 1458 5000 GA-7VAX Mainboard
- 5030 VT82C596 ACPI [Apollo PRO]
- 6100 VT85C100A [Rhine II]
- 8231 VT8231 [PCI-to-ISA Bridge]
- 8235 VT8235 ACPI
- 8305 VT8363/8365 [KT133/KM133 AGP]
- 8391 VT8371 [KX133 AGP]
- 8501 VT8501 [Apollo MVP4 AGP]
- 8596 VT82C596 [Apollo PRO AGP]
- 8597 VT82C597 [Apollo VP3 AGP]
- 8598 VT82C598/694x [Apollo MVP3/Pro133x AGP]
- 1019 0985 P6VXA Motherboard
- 8601 VT8601 [Apollo ProMedia AGP]
- 8605 VT8605 [PM133 AGP]
- 8691 VT82C691 [Apollo Pro]
- 8693 VT82C693 [Apollo Pro Plus] PCI Bridge
- b091 VT8633 [Apollo Pro266 AGP]
- b099 VT8366/A/7 [Apollo KT266/A/333 AGP]
- b101 VT8653 AGP Bridge
- b102 VT8362 AGP Bridge
- b103 VT8615 AGP Bridge
- b112 VT8361 [KLE133] AGP Bridge
- b168 VT8235 PCI Bridge
-1107 Stratus Computers
- 0576 VIA VT82C570MV [Apollo] (Wrong vendor ID!)
-1108 Proteon, Inc.
- 0100 p1690plus_AA
- 0101 p1690plus_AB
- 0105 P1690Plus
- 0108 P1690Plus
- 0138 P1690Plus
- 0139 P1690Plus
- 013c P1690Plus
- 013d P1690Plus
-1109 Cogent Data Technologies, Inc.
- 1400 EM110TX [EX110TX]
-110a Siemens Nixdorf AG
- 0002 Pirahna 2-port
- 0005 Tulip controller, power management, switch extender
- 0006 FSC PINC (I/O-APIC)
- 0015 FSC Multiprocessor Interrupt Controller
- 001d FSC Copernicus Management Controller
- 007b FSC Remote Service Controller, mailbox device
- 007c FSC Remote Service Controller, shared memory device
- 007d FSC Remote Service Controller, SMIC device
- 2102 DSCC4 WAN adapter
- 4942 FPGA I-Bus Tracer for MBD
- 6120 SZB6120
-110b Chromatic Research Inc.
- 0001 Mpact Media Processor
- 0004 Mpact 2
-110c Mini-Max Technology, Inc.
-110d Znyx Advanced Systems
-110e CPU Technology
-110f Ross Technology
-1110 Powerhouse Systems
- 6037 Firepower Powerized SMP I/O ASIC
- 6073 Firepower Powerized SMP I/O ASIC
-1111 Santa Cruz Operation
-# Also claimed to be RNS or Rockwell International, current PCISIG records list Osicom
-1112 Osicom Technologies Inc
- 2200 FDDI Adapter
- 2300 Fast Ethernet Adapter
- 2340 4 Port Fast Ethernet Adapter
- 2400 ATM Adapter
-1113 Accton Technology Corporation
- 1211 SMC2-1211TX
- 103c 1207 EN-1207D Fast Ethernet Adapter
- 1113 1211 EN-1207D Fast Ethernet Adapter
- 1216 EN-1216 Ethernet Adapter
- 111a 1020 SpeedStream 1020 PCI 10/100 Ethernet Adaptor [EN-1207F-TX ?]
- 1217 EN-1217 Ethernet Adapter
- 5105 10Mbps Network card
- 9211 EN-1207D Fast Ethernet Adapter
- 1113 9211 EN-1207D Fast Ethernet Adapter
- 9511 Fast Ethernet Adapter
-1114 Atmel Corporation
-1115 3D Labs
-1116 Data Translation
- 0022 DT3001
- 0023 DT3002
- 0024 DT3003
- 0025 DT3004
- 0026 DT3005
- 0027 DT3001-PGL
- 0028 DT3003-PGL
-1117 Datacube, Inc
- 9500 Max-1C SVGA card
- 9501 Max-1C image processing
-1118 Berg Electronics
-1119 ICP Vortex Computersysteme GmbH
- 0000 GDT 6000/6020/6050
- 0001 GDT 6000B/6010
- 0002 GDT 6110/6510
- 0003 GDT 6120/6520
- 0004 GDT 6530
- 0005 GDT 6550
- 0006 GDT 6x17
- 0007 GDT 6x27
- 0008 GDT 6537
- 0009 GDT 6557
- 000a GDT 6115/6515
- 000b GDT 6125/6525
- 000c GDT 6535
- 000d GDT 6555
- 0100 GDT 6117RP/6517RP
- 0101 GDT 6127RP/6527RP
- 0102 GDT 6537RP
- 0103 GDT 6557RP
- 0104 GDT 6111RP/6511RP
- 0105 GDT 6121RP/6521RP
- 0110 GDT 6117RD/6517RD
- 0111 GDT 6127RD/6527RD
- 0112 GDT 6537RD
- 0113 GDT 6557RD
- 0114 GDT 6111RD/6511RD
- 0115 GDT 6121RD/6521RD
- 0118 GDT 6118RD/6518RD/6618RD
- 0119 GDT 6128RD/6528RD/6628RD
- 011a GDT 6538RD/6638RD
- 011b GDT 6558RD/6658RD
- 0120 GDT 6117RP2/6517RP2
- 0121 GDT 6127RP2/6527RP2
- 0122 GDT 6537RP2
- 0123 GDT 6557RP2
- 0124 GDT 6111RP2/6511RP2
- 0125 GDT 6121RP2/6521RP2
- 0136 GDT 6113RS/6513RS
- 0137 GDT 6123RS/6523RS
- 0138 GDT 6118RS/6518RS/6618RS
- 0139 GDT 6128RS/6528RS/6628RS
- 013a GDT 6538RS/6638RS
- 013b GDT 6558RS/6658RS
- 013c GDT 6533RS/6633RS
- 013d GDT 6543RS/6643RS
- 013e GDT 6553RS/6653RS
- 013f GDT 6563RS/6663RS
- 0166 GDT 7113RN/7513RN/7613RN
- 0167 GDT 7123RN/7523RN/7623RN
- 0168 GDT 7118RN/7518RN/7518RN
- 0169 GDT 7128RN/7528RN/7628RN
- 016a GDT 7538RN/7638RN
- 016b GDT 7558RN/7658RN
- 016c GDT 7533RN/7633RN
- 016d GDT 7543RN/7643RN
- 016e GDT 7553RN/7653RN
- 016f GDT 7563RN/7663RN
- 01d6 GDT 4x13RZ
- 01d7 GDT 4x23RZ
- 01f6 GDT 8x13RZ
- 01f7 GDT 8x23RZ
- 01fc GDT 8x33RZ
- 01fd GDT 8x43RZ
- 01fe GDT 8x53RZ
- 01ff GDT 8x63RZ
- 0210 GDT 6519RD/6619RD
- 0211 GDT 6529RD/6629RD
- 0260 GDT 7519RN/7619RN
- 0261 GDT 7529RN/7629RN
- 0300 GDT Raid Controller
-111a Efficient Networks, Inc
- 0000 155P-MF1 (FPGA)
- 0002 155P-MF1 (ASIC)
- 0003 ENI-25P ATM
- 111a 0000 ENI-25p Miniport ATM Adapter
- 0005 SpeedStream (LANAI)
- 111a 0001 ENI-3010 ATM
- 111a 0009 ENI-3060 ADSL (VPI=0)
- 111a 0101 ENI-3010 ATM
- 111a 0109 ENI-3060CO ADSL (VPI=0)
- 111a 0809 ENI-3060 ADSL (VPI=0 or 8)
- 111a 0909 ENI-3060CO ADSL (VPI=0 or 8)
- 111a 0a09 ENI-3060 ADSL (VPI=<0..15>)
- 0007 SpeedStream ADSL
- 111a 1001 ENI-3061 ADSL [ASIC]
-111b Teledyne Electronic Systems
-111c Tricord Systems Inc.
- 0001 Powerbis Bridge
-111d Integrated Device Tech
- 0001 IDT77211 ATM Adapter
- 0003 IDT77252 ATM network controller
-111e Eldec
-111f Precision Digital Images
- 4a47 Precision MX Video engine interface
- 5243 Frame capture bus interface
-1120 EMC Corporation
-1121 Zilog
-1122 Multi-tech Systems, Inc.
-1123 Excellent Design, Inc.
-1124 Leutron Vision AG
-1125 Eurocore
-1126 Vigra
-1127 FORE Systems Inc
- 0200 ForeRunner PCA-200 ATM
- 0210 PCA-200PC
- 0250 ATM
- 0300 ForeRunner PCA-200EPC ATM
- 0310 ATM
- 0400 ForeRunnerHE ATM Adapter
- 1127 0400 ForeRunnerHE ATM
-1129 Firmworks
-112a Hermes Electronics Company, Ltd.
-112b Linotype - Hell AG
-112c Zenith Data Systems
-112d Ravicad
-112e Infomedia Microelectronics Inc.
-112f Imaging Technology Inc
- 0000 MVC IC-PCI
- 0001 MVC IM-PCI Video frame grabber/processor
-1130 Computervision
-1131 Philips Semiconductors
- 1561 USB 1.1 Host Controller
- 1562 USB 2.0 Host Controller
- 3400 SmartPCI56(UCB1500) 56K Modem
- 7130 SAA7130 Video Broadcast Decoder
- 7133 SAA7133 Audio+video broadcast decoder
-# PCI audio and video broadcast decoder (http://www.semiconductors.philips.com/pip/saa7134hl)
- 7134 SAA7134
- 7135 SAA7135 Audio+video broadcast decoder
- 7145 SAA7145
- 7146 SAA7146
- 114b 2003 DVRaptor Video Edit/Capture Card
- 11bd 0006 DV500 Overlay
- 11bd 000a DV500 Overlay
-1132 Mitel Corp.
-1133 Eicon Technology Corporation
- 7901 EiconCard S90
- 7902 EiconCard S90
- 7911 EiconCard S91
- 7912 EiconCard S91
- 7941 EiconCard S94
- 7942 EiconCard S94
- 7943 EiconCard S94
- 7944 EiconCard S94
- b921 EiconCard P92
- b922 EiconCard P92
- b923 EiconCard P92
- e001 DIVA 20PRO
- 1133 e001 DIVA Pro 2.0 S/T
- e002 DIVA 20
- 1133 e002 DIVA 2.0 S/T
- e003 DIVA 20PRO_U
- 1133 e003 DIVA Pro 2.0 U
- e004 DIVA 20_U
- 1133 e004 DIVA 2.0 U
- e005 DIVA LOW
- 1133 e005 DIVA 2.01 S/T
- e00b DIVA 2.02
- e010 DIVA Server BRI-2M
- 1133 e010 DIVA Server BRI-2M
- e012 DIVA Server BRI-8M
- 1133 e012 DIVA Server BRI-8M
- e014 DIVA Server PRI-30M
- 1133 e014 DIVA Server PRI-30M
- e018 DIVA Server BRI-2M/-2F
-1134 Mercury Computer Systems
- 0001 Raceway Bridge
-1135 Fuji Xerox Co Ltd
- 0001 Printer controller
-1136 Momentum Data Systems
-1137 Cisco Systems Inc
-1138 Ziatech Corporation
- 8905 8905 [STD 32 Bridge]
-1139 Dynamic Pictures, Inc
- 0001 VGA Compatable 3D Graphics
-113a FWB Inc
-113b Network Computing Devices
-113c Cyclone Microsystems, Inc.
- 0000 PCI-9060 i960 Bridge
- 0001 PCI-SDK [PCI i960 Evaluation Platform]
- 0911 PCI-911 [i960Jx-based Intelligent I/O Controller]
- 0912 PCI-912 [i960CF-based Intelligent I/O Controller]
- 0913 PCI-913
- 0914 PCI-914 [I/O Controller w/ secondary PCI bus]
-113d Leading Edge Products Inc
-113e Sanyo Electric Co - Computer Engineering Dept
-113f Equinox Systems, Inc.
- 0808 SST-64P Adapter
- 1010 SST-128P Adapter
- 80c0 SST-16P DB Adapter
- 80c4 SST-16P RJ Adapter
- 80c8 SST-16P Adapter
- 8888 SST-4P Adapter
- 9090 SST-8P Adapter
-1140 Intervoice Inc
-1141 Crest Microsystem Inc
-1142 Alliance Semiconductor Corporation
- 3210 AP6410
- 6422 ProVideo 6422
- 6424 ProVideo 6424
- 6425 ProMotion AT25
- 643d ProMotion AT3D
-1143 NetPower, Inc
-1144 Cincinnati Milacron
- 0001 Noservo controller
-1145 Workbit Corporation
- 8007 NinjaSCSI-32 Workbit
- f007 NinjaSCSI-32 KME
- f010 NinjaSCSI-32 Workbit
- f012 NinjaSCSI-32 Logitec
- f013 NinjaSCSI-32 Logitec
- f015 NinjaSCSI-32 Melco
-1146 Force Computers
-1147 Interface Corp
-1148 Syskonnect (Schneider & Koch)
- 4000 FDDI Adapter
- 0e11 b03b Netelligent 100 FDDI DAS Fibre SC
- 0e11 b03c Netelligent 100 FDDI SAS Fibre SC
- 0e11 b03d Netelligent 100 FDDI DAS UTP
- 0e11 b03e Netelligent 100 FDDI SAS UTP
- 0e11 b03f Netelligent 100 FDDI SAS Fibre MIC
- 1148 5521 FDDI SK-5521 (SK-NET FDDI-UP)
- 1148 5522 FDDI SK-5522 (SK-NET FDDI-UP DAS)
- 1148 5541 FDDI SK-5541 (SK-NET FDDI-FP)
- 1148 5543 FDDI SK-5543 (SK-NET FDDI-LP)
- 1148 5544 FDDI SK-5544 (SK-NET FDDI-LP DAS)
- 1148 5821 FDDI SK-5821 (SK-NET FDDI-UP64)
- 1148 5822 FDDI SK-5822 (SK-NET FDDI-UP64 DAS)
- 1148 5841 FDDI SK-5841 (SK-NET FDDI-FP64)
- 1148 5843 FDDI SK-5843 (SK-NET FDDI-LP64)
- 1148 5844 FDDI SK-5844 (SK-NET FDDI-LP64 DAS)
- 4200 Token Ring adapter
- 4300 SK-98xx Gigabit Ethernet Server Adapter
- 1148 9821 SK-9821 Gigabit Ethernet Server Adapter (SK-NET GE-T)
- 1148 9822 SK-9822 Gigabit Ethernet Server Adapter (SK-NET GE-T dual link)
- 1148 9841 SK-9841 Gigabit Ethernet Server Adapter (SK-NET GE-LX)
- 1148 9842 SK-9842 Gigabit Ethernet Server Adapter (SK-NET GE-LX dual link)
- 1148 9843 SK-9843 Gigabit Ethernet Server Adapter (SK-NET GE-SX)
- 1148 9844 SK-9844 Gigabit Ethernet Server Adapter (SK-NET GE-SX dual link)
- 1148 9861 SK-9861 Gigabit Ethernet Server Adapter (SK-NET GE-SX Volition)
- 1148 9862 SK-9862 Gigabit Ethernet Server Adapter (SK-NET GE-SX Volition dual link)
-# Information got from SysKonnekt
- 1148 9871 SK-9871 Gigabit Ethernet Server Adapter (SK-NET GE-ZX)
-# Information got from SysKonnekt
- 1148 9872 SK-9872 Gigabit Ethernet Server Adapter (SK-NET GE-ZX dual link)
- 1259 2970 Allied Telesyn AT-2970SX Gigabit Ethernet Adapter
- 1259 2971 Allied Telesyn AT-2970LX Gigabit Ethernet Adapter
- 1259 2972 Allied Telesyn AT-2970TX Gigabit Ethernet Adapter
- 1259 2973 Allied Telesyn AT-2971SX Gigabit Ethernet Adapter
- 1259 2974 Allied Telesyn AT-2971T Gigabit Ethernet Adapter
- 1259 2975 Allied Telesyn AT-2970SX/2SC Gigabit Ethernet Adapter
- 1259 2976 Allied Telesyn AT-2970LX/2SC Gigabit Ethernet Adapter
- 1259 2977 Allied Telesyn AT-2970TX/2TX Gigabit Ethernet Adapter
- 4320 SK-98xx V2.0 Gigabit Ethernet Adapter
- 1148 0121 Marvell RDK-8001 Adapter
- 1148 0221 Marvell RDK-8002 Adapter
- 1148 0321 Marvell RDK-8003 Adapter
- 1148 0421 Marvell RDK-8004 Adapter
- 1148 0621 Marvell RDK-8006 Adapter
- 1148 0721 Marvell RDK-8007 Adapter
- 1148 0821 Marvell RDK-8008 Adapter
- 1148 0921 Marvell RDK-8009 Adapter
- 1148 1121 Marvell RDK-8011 Adapter
- 1148 1221 Marvell RDK-8012 Adapter
- 1148 3221 SK-9521 V2.0 10/100/1000Base-T Adapter
- 1148 5021 SK-9821 V2.0 Gigabit Ethernet 10/100/1000Base-T Adapter
- 1148 5041 SK-9841 V2.0 Gigabit Ethernet 1000Base-LX Adapter
- 1148 5043 SK-9843 V2.0 Gigabit Ethernet 1000Base-SX Adapter
- 1148 5051 SK-9851 V2.0 Gigabit Ethernet 1000Base-SX Adapter
- 1148 5061 SK-9861 V2.0 Gigabit Ethernet 1000Base-SX Adapter
- 1148 5071 SK-9871 V2.0 Gigabit Ethernet 1000Base-ZX Adapter
- 1148 9521 SK-9521 10/100/1000Base-T Adapter
-1149 Win System Corporation
-114a VMIC
- 5579 VMIPCI-5579 (Reflective Memory Card)
- 5587 VMIPCI-5587 (Reflective Memory Card)
- 6504 VMIC PCI 7755 FPGA
- 7587 VMIVME-7587
-114b Canopus Co., Ltd
-114c Annabooks
-114d IC Corporation
-114e Nikon Systems Inc
-114f Digi International
- 0002 AccelePort EPC
- 0003 RightSwitch SE-6
- 0004 AccelePort Xem
- 0005 AccelePort Xr
- 0006 AccelePort Xr,C/X
- 0009 AccelePort Xr/J
- 000a AccelePort EPC/J
- 000c DataFirePRIme T1 (1-port)
- 000d SyncPort 2-Port (x.25/FR)
- 0011 AccelePort 8r EIA-232 (IBM)
- 0012 AccelePort 8r EIA-422
- 0013 AccelePort Xr
- 0014 AccelePort 8r EIA-422
- 0015 AccelePort Xem
- 0016 AccelePort EPC/X
- 0017 AccelePort C/X
- 001a DataFirePRIme E1 (1-port)
- 001b AccelePort C/X (IBM)
- 001d DataFire RAS T1/E1/PRI
- 114f 0050 DataFire RAS E1 Adapter
- 114f 0051 DataFire RAS Dual E1 Adapter
- 114f 0052 DataFire RAS T1 Adapter
- 114f 0053 DataFire RAS Dual T1 Adapter
- 0023 AccelePort RAS
- 0024 DataFire RAS B4 ST/U
- 114f 0030 DataFire RAS BRI U Adapter
- 114f 0031 DataFire RAS BRI S/T Adapter
- 0026 AccelePort 4r 920
- 0027 AccelePort Xr 920
- 0034 AccelePort 2r 920
- 0035 DataFire DSP T1/E1/PRI cPCI
- 0040 AccelePort Xp
- 0042 AccelePort 2p PCI
- 0070 Datafire Micro V IOM2 (Europe)
- 0071 Datafire Micro V (Europe)
- 0072 Datafire Micro V IOM2 (North America)
- 0073 Datafire Micro V (North America)
- 6001 Avanstar
-1150 Thinking Machines Corp
-1151 JAE Electronics Inc.
-1152 Megatek
-1153 Land Win Electronic Corp
-1154 Melco Inc
-1155 Pine Technology Ltd
-1156 Periscope Engineering
-1157 Avsys Corporation
-1158 Voarx R & D Inc
- 3011 Tokenet/vg 1001/10m anylan
- 9050 Lanfleet/Truevalue
- 9051 Lanfleet/Truevalue
-1159 Mutech Corp
- 0001 MV-1000
-115a Harlequin Ltd
-115b Parallax Graphics
-115c Photron Ltd.
-115d Xircom
- 0003 Cardbus Ethernet 10/100
- 1014 0181 10/100 EtherJet Cardbus Adapter
- 1014 1181 10/100 EtherJet Cardbus Adapter
- 1014 8181 10/100 EtherJet Cardbus Adapter
- 1014 9181 10/100 EtherJet Cardbus Adapter
- 115d 0181 Cardbus Ethernet 10/100
- 115d 1181 Cardbus Ethernet 10/100
- 1179 0181 Cardbus Ethernet 10/100
- 8086 8181 EtherExpress PRO/100 Mobile CardBus 32 Adapter
- 8086 9181 EtherExpress PRO/100 Mobile CardBus 32 Adapter
- 0005 Cardbus Ethernet 10/100
- 1014 0182 10/100 EtherJet Cardbus Adapter
- 1014 1182 10/100 EtherJet Cardbus Adapter
- 115d 0182 Cardbus Ethernet 10/100
- 115d 1182 Cardbus Ethernet 10/100
- 0007 Cardbus Ethernet 10/100
- 1014 0182 10/100 EtherJet Cardbus Adapter
- 1014 1182 10/100 EtherJet Cardbus Adapter
- 115d 0182 Cardbus Ethernet 10/100
- 115d 1182 Cardbus Ethernet 10/100
- 000b Cardbus Ethernet 10/100
- 1014 0183 10/100 EtherJet Cardbus Adapter
- 115d 0183 Cardbus Ethernet 10/100
- 000c Mini-PCI V.90 56k Modem
- 000f Cardbus Ethernet 10/100
- 1014 0183 10/100 EtherJet Cardbus Adapter
- 115d 0183 Cardbus Ethernet 10/100
- 0101 Cardbus 56k modem
- 115d 1081 Cardbus 56k Modem
- 0103 Cardbus Ethernet + 56k Modem
- 1014 9181 Cardbus 56k Modem
- 1115 1181 Cardbus Ethernet 100 + 56k Modem
- 115d 1181 CBEM56G-100 Ethernet + 56k Modem
- 8086 9181 PRO/100 LAN + Modem56 CardBus
-115e Peer Protocols Inc
-115f Maxtor Corporation
-1160 Megasoft Inc
-1161 PFU Limited
-1162 OA Laboratory Co Ltd
-1163 Rendition
- 0001 Verite 1000
- 2000 Verite V2000/V2100/V2200
- 1092 2000 Stealth II S220
-1164 Advanced Peripherals Technologies
-1165 Imagraph Corporation
- 0001 Motion TPEG Recorder/Player with audio
-1166 ServerWorks
- 0005 CNB20-LE Host Bridge
- 0006 CNB20HE Host Bridge
- 0007 CNB20-LE Host Bridge
- 0008 CNB20HE Host Bridge
- 0009 CNB20LE Host Bridge
- 0010 CIOB30
- 0011 CMIC-HE
- 0012 CMIC-LE
- 0013 CNB20-HE Host Bridge
- 0014 CNB20-HE Host Bridge
- 0015 CMIC-GC Host Bridge
- 0016 CMIC-GC Host Bridge
- 0017 GCNB-LE Host Bridge
- 0200 OSB4 South Bridge
- 0201 CSB5 South Bridge
- 0203 CSB6 South Bridge
- 0211 OSB4 IDE Controller
- 0212 CSB5 IDE Controller
- 0213 CSB6 RAID/IDE Controller
- 0220 OSB4/CSB5 OHCI USB Controller
- 0221 CSB6 OHCI USB Controller
- 0225 GCLE Host Bridge
- 0227 GCLE-2 Host Bridge
-1167 Mutoh Industries Inc
-1168 Thine Electronics Inc
-1169 Centre for Development of Advanced Computing
-116a Polaris Communications
- 6100 Bus/Tag Channel
- 6800 Escon Channel
- 7100 Bus/Tag Channel
- 7800 Escon Channel
-116b Connectware Inc
-116c Intelligent Resources Integrated Systems
-116d Martin-Marietta
-116e Electronics for Imaging
-116f Workstation Technology
-1170 Inventec Corporation
-1171 Loughborough Sound Images Plc
-1172 Altera Corporation
-1173 Adobe Systems, Inc
-1174 Bridgeport Machines
-1175 Mitron Computer Inc.
-1176 SBE Incorporated
-1177 Silicon Engineering
-1178 Alfa, Inc.
- afa1 Fast Ethernet Adapter
-1179 Toshiba America Info Systems
- 0103 EX-IDE Type-B
- 0404 DVD Decoder card
- 0406 Tecra Video Capture device
- 0407 DVD Decoder card (Version 2)
- 0601 601
- 0603 ToPIC95 PCI to CardBus Bridge for Notebooks
- 060a ToPIC95
- 060f ToPIC97
- 0617 ToPIC95 PCI to Cardbus Bridge with ZV Support
- 0618 CPU to PCI and PCI to ISA bridge
-# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID?
- 0701 FIR Port
- 0804 TC6371AF SmartMedia Controller
- 0805 SD TypA Controller
- 0d01 FIR Port Type-DO
- 1179 0001 FIR Port Type-DO
-117a A-Trend Technology
-117b L G Electronics, Inc.
-117c Atto Technology
-117d Becton & Dickinson
-117e T/R Systems
-117f Integrated Circuit Systems
-1180 Ricoh Co Ltd
- 0465 RL5c465
- 0466 RL5c466
- 0475 RL5c475
- 144d c006 vpr Matrix 170B4 CardBus bridge
- 0476 RL5c476 II
- 1014 0185 ThinkPad A/T/X Series
- 104d 80df Vaio PCG-FX403
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 0477 RL5c477
- 0478 RL5c478
- 1014 0184 ThinkPad A30p (2653-64G)
- 0522 R5C522 IEEE 1394 Controller
- 1014 01cf ThinkPad A30p (2653-64G)
- 0551 R5C551 IEEE 1394 Controller
- 144d c006 vpr Matrix 170B4
- 0552 R5C552 IEEE 1394 Controller
- 1014 0511 ThinkPad A/T/X Series
-1181 Telmatics International
-1183 Fujikura Ltd
-1184 Forks Inc
-1185 Dataworld International Ltd
-1186 D-Link System Inc
- 0100 DC21041
- 1002 DL10050 Sundance Ethernet
- 1186 1002 DFE-550TX
- 1186 1012 DFE-580TX
- 1300 RTL8139 Ethernet
- 1186 1300 DFE-538TX 10/100 Ethernet Adapter
- 1186 1301 DFE-530TX+ 10/100 Ethernet Adapter
- 1340 DFE-690TXD CardBus PC Card
- 1561 DRP-32TXD Cardbus PC Card
- 4000 DL2K Ethernet
- 4c00 Gigabit Ethernet Adapter
- 1186 4c00 DGE-530T Gigabit Ethernet Adapter
-1187 Advanced Technology Laboratories, Inc.
-1188 Shima Seiki Manufacturing Ltd.
-1189 Matsushita Electronics Co Ltd
-118a Hilevel Technology
-118b Hypertec Pty Limited
-118c Corollary, Inc
- 0014 PCIB [C-bus II to PCI bus host bridge chip]
- 1117 Intel 8-way XEON Profusion Chipset [Cache Coherency Filter]
-118d BitFlow Inc
- 0001 Raptor-PCI framegrabber
- 0012 Model 12 Road Runner Frame Grabber
- 0014 Model 14 Road Runner Frame Grabber
- 0024 Model 24 Road Runner Frame Grabber
- 0044 Model 44 Road Runner Frame Grabber
- 0112 Model 12 Road Runner Frame Grabber
- 0114 Model 14 Road Runner Frame Grabber
- 0124 Model 24 Road Runner Frame Grabber
- 0144 Model 44 Road Runner Frame Grabber
- 0212 Model 12 Road Runner Frame Grabber
- 0214 Model 14 Road Runner Frame Grabber
- 0224 Model 24 Road Runner Frame Grabber
- 0244 Model 44 Road Runner Frame Grabber
- 0312 Model 12 Road Runner Frame Grabber
- 0314 Model 14 Road Runner Frame Grabber
- 0324 Model 24 Road Runner Frame Grabber
- 0344 Model 44 Road Runner Frame Grabber
-118e Hermstedt GmbH
-118f Green Logic
-1190 Tripace
- c731 TP-910/920/940 PCI Ultra(Wide) SCSI Adapter
-1191 Artop Electronic Corp
- 0003 SCSI Cache Host Adapter
- 0004 ATP8400
- 0005 ATP850UF
- 0006 ATP860 NO-BIOS
- 0007 ATP860
- 0008 ATP865 NO-ROM
- 0009 ATP865
- 8002 AEC6710 SCSI-2 Host Adapter
- 8010 AEC6712UW SCSI
- 8020 AEC6712U SCSI
- 8030 AEC6712S SCSI
- 8040 AEC6712D SCSI
- 8050 AEC6712SUW SCSI
-1192 Densan Company Ltd
-1193 Zeitnet Inc.
- 0001 1221
- 0002 1225
-1194 Toucan Technology
-1195 Ratoc System Inc
-1196 Hytec Electronics Ltd
-1197 Gage Applied Sciences, Inc.
-1198 Lambda Systems Inc
-1199 Attachmate Corporation
-119a Mind Share, Inc.
-119b Omega Micro Inc.
- 1221 82C092G
-119c Information Technology Inst.
-119d Bug, Inc. Sapporo Japan
-119e Fujitsu Microelectronics Ltd.
- 0001 FireStream 155
- 0003 FireStream 50
-119f Bull HN Information Systems
-11a0 Convex Computer Corporation
-11a1 Hamamatsu Photonics K.K.
-11a2 Sierra Research and Technology
-11a3 Deuretzbacher GmbH & Co. Eng. KG
-11a4 Barco Graphics NV
-11a5 Microunity Systems Eng. Inc
-11a6 Pure Data Ltd.
-11a7 Power Computing Corp.
-11a8 Systech Corp.
-11a9 InnoSys Inc.
- 4240 AMCC S933Q Intelligent Serial Card
-11aa Actel
-11ab Galileo Technology Ltd.
- 0146 GT-64010/64010A System Controller
- 4146 GT-64011/GT-64111 System Controller
- 4320 Gigabit Ethernet Adapter
- 1019 0f38 Marvell 88E8001 Gigabit LOM Ethernet Adapter (ECS)
- 1019 8001 Marvell 88E8001 Gigabit LOM Ethernet Adapter (ECS)
- 1043 173c Marvell 88E8001 Gigabit LOM Ethernet Adapter (Asus)
- 1043 811a Marvell 88E8001 Gigabit LOM Ethernet Adapter (Asus)
- 105b 0c19 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Foxconn)
- 10b8 b452 SMC EZ Card 1000 (SMC9452TXV.2)
- 11ab 0121 Marvell RDK-8001 Adapter
- 11ab 0321 Marvell RDK-8003 Adapter
- 11ab 1021 Marvell RDK-8010 Adapter
- 11ab 5021 Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (64 bit)
- 11ab 9521 Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Adapter (32 bit)
- 1458 e000 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Gigabyte)
- 147b 1406 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Abit)
- 15d4 0047 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Iwill)
- 1695 9025 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Epox)
- 17f2 1c03 Marvell 88E8001 Gigabit LOM Ethernet Adapter (Albatron)
- 4611 GT-64115 System Controller
- 4620 GT-64120/64120A/64121A System Controller
- 4801 GT-48001
- f003 GT-64010 Primary Image Piranha Image Generator
-11ac Canon Information Systems Research Aust.
-11ad Lite-On Communications Inc
- 0002 LNE100TX
- 11ad 0002 LNE100TX
- 11ad 0003 LNE100TX
- 11ad f003 LNE100TX
- 11ad ffff LNE100TX
- 1385 f004 FA310TX
- c115 LNE100TX [Linksys EtherFast 10/100]
- 11ad c001 LNE100TX [ver 2.0]
-11ae Aztech System Ltd
-11af Avid Technology Inc.
-11b0 V3 Semiconductor Inc.
- 0002 V300PSC
- 0292 V292PBC [Am29030/40 Bridge]
- 0960 V96xPBC
- c960 V96DPC
-11b1 Apricot Computers
-11b2 Eastman Kodak
-11b3 Barr Systems Inc.
-11b4 Leitch Technology International
-11b5 Radstone Technology Plc
-11b6 United Video Corp
-11b7 Motorola
-11b8 XPoint Technologies, Inc
- 0001 Quad PeerMaster
-11b9 Pathlight Technology Inc.
- c0ed SSA Controller
-11ba Videotron Corp
-11bb Pyramid Technology
-11bc Network Peripherals Inc
- 0001 NP-PCI
-11bd Pinnacle Systems Inc.
-11be International Microcircuits Inc
-11bf Astrodesign, Inc.
-11c0 Hewlett Packard
-11c1 Lucent Microelectronics
- 0440 56k WinModem
- 0001 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
- 1033 8015 LT WinModem 56k Data+Fax+Voice+Dsvd
- 1033 8047 LT WinModem 56k Data+Fax+Voice+Dsvd
- 1033 804f LT WinModem 56k Data+Fax+Voice+Dsvd
- 10cf 102c LB LT Modem V.90 56k
- 10cf 104a BIBLO LT Modem 56k
- 10cf 105f LB2 LT Modem V.90 56k
- 1179 0001 Internal V.90 Modem
- 11c1 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
- 122d 4101 MDP7800-U Modem
- 122d 4102 MDP7800SP-U Modem
- 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 0441 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 0450 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 f100 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 f101 LT WinModem 56k Data+Fax+Voice+Dsvd
- 144d 2101 LT56PV Modem
- 149f 0440 LT WinModem 56k Data+Fax+Voice+Dsvd
- 0441 56k WinModem
- 1033 804d LT WinModem 56k Data+Fax
- 1033 8065 LT WinModem 56k Data+Fax
- 1092 0440 Supra 56i
- 1179 0001 Internal V.90 Modem
- 11c1 0440 LT WinModem 56k Data+Fax
- 11c1 0441 LT WinModem 56k Data+Fax
- 122d 4100 MDP7800-U Modem
- 13e0 0040 LT WinModem 56k Data+Fax
- 13e0 0100 LT WinModem 56k Data+Fax
- 13e0 0410 LT WinModem 56k Data+Fax
- 13e0 0420 TelePath Internet 56k WinModem
- 13e0 0440 LT WinModem 56k Data+Fax
- 13e0 0443 LT WinModem 56k Data+Fax
- 13e0 f102 LT WinModem 56k Data+Fax
- 1416 9804 CommWave 56k Modem
- 141d 0440 LT WinModem 56k Data+Fax
- 144f 0441 Lucent 56k V.90 DF Modem
- 144f 0449 Lucent 56k V.90 DF Modem
- 144f 110d Lucent Win Modem
- 1468 0441 Presario 56k V.90 DF Modem
- 1668 0440 Lucent Win Modem
- 0442 56k WinModem
- 0001 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 11c1 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 11c1 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 13e0 0412 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 13e0 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 13fc 2471 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 144d 2104 LT56PT Modem
- 144f 1104 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 149f 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 1668 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 0443 LT WinModem
- 0444 LT WinModem
- 0445 LT WinModem
- 8086 2203 PRO/100+ MiniPCI (probably an Ambit U98.003.C.00 combo card)
- 0446 LT WinModem
- 0447 LT WinModem
- 0448 WinModem 56k
- 1014 0131 Lucent Win Modem
- 1033 8066 LT WinModem 56k Data+Fax+Voice+Dsvd
- 13e0 0030 56k Voice Modem
- 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd
-# Actiontech eth+modem card as used by Dell &c.
- 1668 2400 LT WinModem 56k (MiniPCI Ethernet+Modem)
- 0449 WinModem 56k
- 0e11 b14d 56k V.90 Modem
- 13e0 0020 LT WinModem 56k Data+Fax
- 13e0 0041 TelePath Internet 56k WinModem
- 1436 0440 Lucent Win Modem
- 144f 0449 Lucent 56k V.90 DFi Modem
- 1468 0410 IBM ThinkPad T23 (2647-4MG)
- 1468 0440 Lucent Win Modem
- 1468 0449 Presario 56k V.90 DFi Modem
- 044a F-1156IV WinModem (V90, 56KFlex)
- 10cf 1072 LB Global LT Modem
- 13e0 0012 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 13e0 0042 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 144f 1005 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
- 044b LT WinModem
- 044c LT WinModem
- 044d LT WinModem
- 044e LT WinModem
- 044f V90 WildWire Modem
- 0450 LT WinModem
- 144f 4005 Magnia SG20
- 0451 LT WinModem
- 0452 LT WinModem
- 0453 LT WinModem
- 0454 LT WinModem
- 0455 LT WinModem
- 0456 LT WinModem
- 0457 LT WinModem
- 0458 LT WinModem
- 0459 LT WinModem
- 045a LT WinModem
- 045c LT WinModem
- 0461 V90 WildWire Modem
- 0462 V90 WildWire Modem
- 0480 Venus Modem (V90, 56KFlex)
- 5801 USB
- 5802 USS-312 USB Controller
-# 4 port PCI USB Controller made by Agere (formely Lucent)
- 5803 USS-344S USB Controller
- 5811 FW323
- dead 0800 FireWire Host Bus Adapter
-11c2 Sand Microelectronics
-11c3 NEC Corporation
-11c4 Document Technologies, Inc
-11c5 Shiva Corporation
-11c6 Dainippon Screen Mfg. Co. Ltd
-11c7 D.C.M. Data Systems
-11c8 Dolphin Interconnect Solutions AS
- 0658 PSB32 SCI-Adapter D31x
- d665 PSB64 SCI-Adapter D32x
- d667 PSB66 SCI-Adapter D33x
-11c9 Magma
- 0010 16-line serial port w/- DMA
- 0011 4-line serial port w/- DMA
-11ca LSI Systems, Inc
-11cb Specialix Research Ltd.
- 2000 PCI_9050
- 11cb 0200 SX
- 11cb b008 I/O8+
- 4000 SUPI_1
- 8000 T225
-11cc Michels & Kleberhoff Computer GmbH
-11cd HAL Computer Systems, Inc.
-11ce Netaccess
-11cf Pioneer Electronic Corporation
-11d0 Lockheed Martin Federal Systems-Manassas
-11d1 Auravision
- 01f7 VxP524
-11d2 Intercom Inc.
-11d3 Trancell Systems Inc
-11d4 Analog Devices
- 1805 SM56 PCI modem
- 1889 AD1889 sound chip
-11d5 Ikon Corporation
- 0115 10115
- 0117 10117
-11d6 Tekelec Telecom
-11d7 Trenton Technology, Inc.
-11d8 Image Technologies Development
-11d9 TEC Corporation
-11da Novell
-11db Sega Enterprises Ltd
-11dc Questra Corporation
-11dd Crosfield Electronics Limited
-11de Zoran Corporation
- 6057 ZR36057PQC Video cutting chipset
- 1031 7efe DC10 Plus
- 1031 fc00 MiroVIDEO DC50, Motion JPEG Capture/CODEC Board
- 13ca 4231 JPEG/TV Card
- 6120 ZR36120
- 1328 f001 Cinemaster C DVD Decoder
-11df New Wave PDG
-11e0 Cray Communications A/S
-11e1 GEC Plessey Semi Inc.
-11e2 Samsung Information Systems America
-11e3 Quicklogic Corporation
- 5030 PC Watchdog
-11e4 Second Wave Inc
-11e5 IIX Consulting
-11e6 Mitsui-Zosen System Research
-11e7 Toshiba America, Elec. Company
-11e8 Digital Processing Systems Inc.
-11e9 Highwater Designs Ltd.
-11ea Elsag Bailey
-11eb Formation Inc.
-11ec Coreco Inc
-11ed Mediamatics
-11ee Dome Imaging Systems Inc
-11ef Nicolet Technologies B.V.
-11f0 Compu-Shack
- 4231 FDDI
- 4232 FASTline UTP Quattro
- 4233 FASTline FO
- 4234 FASTline UTP
- 4235 FASTline-II UTP
- 4236 FASTline-II FO
- 4731 GIGAline
-11f1 Symbios Logic Inc
-11f2 Picture Tel Japan K.K.
-11f3 Keithley Metrabyte
-11f4 Kinetic Systems Corporation
- 2915 CAMAC controller
-11f5 Computing Devices International
-11f6 Compex
- 0112 ENet100VG4
- 0113 FreedomLine 100
- 1401 ReadyLink 2000
- 2011 RL100-ATX 10/100
- 11f6 2011 RL100-ATX
- 2201 ReadyLink 100TX (Winbond W89C840)
- 11f6 2011 ReadyLink 100TX
- 9881 RL100TX
-11f7 Scientific Atlanta
-11f8 PMC-Sierra Inc.
- 7375 PM7375 [LASAR-155 ATM SAR]
-11f9 I-Cube Inc
-11fa Kasan Electronics Company, Ltd.
-11fb Datel Inc
-11fc Silicon Magic
-11fd High Street Consultants
-11fe Comtrol Corporation
- 0001 RocketPort 8 Oct
- 0002 RocketPort 8 Intf
- 0003 RocketPort 16 Intf
- 0004 RocketPort 32 Intf
- 0005 RocketPort Octacable
- 0006 RocketPort 8J
- 0007 RocketPort 4-port
- 0008 RocketPort 8-port
- 0009 RocketPort 16-port
- 000a RocketPort Plus Quadcable
- 000b RocketPort Plus Octacable
- 000c RocketPort 8-port Modem
- 8015 RocketPort 4-port UART 16954
-11ff Scion Corporation
-1200 CSS Corporation
-1201 Vista Controls Corp
-1202 Network General Corp.
- 4300 Gigabit Ethernet Adapter
- 1202 9841 SK-9841 LX
- 1202 9842 SK-9841 LX dual link
- 1202 9843 SK-9843 SX
- 1202 9844 SK-9843 SX dual link
-1203 Bayer Corporation, Agfa Division
-1204 Lattice Semiconductor Corporation
-1205 Array Corporation
-1206 Amdahl Corporation
-1208 Parsytec GmbH
- 4853 HS-Link Device
-1209 SCI Systems Inc
-120a Synaptel
-120b Adaptive Solutions
-120c Technical Corp.
-120d Compression Labs, Inc.
-120e Cyclades Corporation
- 0100 Cyclom-Y below first megabyte
- 0101 Cyclom-Y above first megabyte
- 0102 Cyclom-4Y below first megabyte
- 0103 Cyclom-4Y above first megabyte
- 0104 Cyclom-8Y below first megabyte
- 0105 Cyclom-8Y above first megabyte
- 0200 Cyclades-Z below first megabyte
- 0201 Cyclades-Z above first megabyte
- 0300 PC300/RSV or /X21 (2 ports)
- 0301 PC300/RSV or /X21 (1 port)
- 0310 PC300/TE (2 ports)
- 0311 PC300/TE (1 port)
- 0320 PC300/TE-M (2 ports)
- 0321 PC300/TE-M (1 port)
- 0400 PC400
-120f Essential Communications
- 0001 Roadrunner serial HIPPI
-1210 Hyperparallel Technologies
-1211 Braintech Inc
-1212 Kingston Technology Corp.
-1213 Applied Intelligent Systems, Inc.
-1214 Performance Technologies, Inc.
-1215 Interware Co., Ltd
-1216 Purup Prepress A/S
-1217 O2 Micro, Inc.
- 6729 OZ6729
- 673a OZ6730
- 6832 OZ6832/6833 Cardbus Controller
- 6836 OZ6836/6860 Cardbus Controller
- 6872 OZ6812 Cardbus Controller
- 6925 OZ6922 Cardbus Controller
- 6933 OZ6933 Cardbus Controller
- 1025 1016 Travelmate 612 TX
- 6972 OZ6912 Cardbus Controller
- 1179 0001 Magnia Z310
-1218 Hybricon Corp.
-1219 First Virtual Corporation
-121a 3Dfx Interactive, Inc.
- 0001 Voodoo
- 0002 Voodoo 2
- 0003 Voodoo Banshee
- 1092 0003 Monster Fusion
- 1092 4000 Monster Fusion
- 1092 4002 Monster Fusion
- 1092 4801 Monster Fusion AGP
- 1092 4803 Monster Fusion AGP
- 1092 8030 Monster Fusion
- 1092 8035 Monster Fusion AGP
- 10b0 0001 Dragon 4000
- 1102 1018 3D Blaster Banshee VE
- 121a 0001 Voodoo Banshee AGP
- 121a 0003 Voodoo Banshee AGP SGRAM
- 121a 0004 Voodoo Banshee
- 139c 0016 Raven
- 139c 0017 Raven
- 14af 0002 Maxi Gamer Phoenix
- 0004 Voodoo Banshee [Velocity 100]
- 0005 Voodoo 3
- 121a 0004 Voodoo3 AGP
- 121a 0030 Voodoo3 AGP
- 121a 0031 Voodoo3 AGP
- 121a 0034 Voodoo3 AGP
- 121a 0036 Voodoo3
- 121a 0037 Voodoo3 AGP
- 121a 0038 Voodoo3 AGP
- 121a 003a Voodoo3 AGP
- 121a 0044 Voodoo3
- 121a 004b Velocity 100
- 121a 004c Velocity 200
- 121a 004d Voodoo3 AGP
- 121a 004e Voodoo3 AGP
- 121a 0051 Voodoo3 AGP
- 121a 0052 Voodoo3 AGP
- 121a 0060 Voodoo3 3500 TV (NTSC)
- 121a 0061 Voodoo3 3500 TV (PAL)
- 121a 0062 Voodoo3 3500 TV (SECAM)
- 0009 Voodoo 4 / Voodoo 5
- 121a 0009 Voodoo5 AGP 5500/6000
- 0057 Voodoo 3/3000 [Avenger]
-121b Advanced Telecommunications Modules
-121c Nippon Texaco., Ltd
-121d Lippert Automationstechnik GmbH
-121e CSPI
-121f Arcus Technology, Inc.
-1220 Ariel Corporation
- 1220 AMCC 5933 TMS320C80 DSP/Imaging board
-1221 Contec Co., Ltd
-1222 Ancor Communications, Inc.
-1223 Artesyn Communication Products
- 0003 PM/Link
- 0004 PM/T1
- 0005 PM/E1
- 0008 PM/SLS
- 0009 BajaSpan Resource Target
- 000a BajaSpan Section 0
- 000b BajaSpan Section 1
- 000c BajaSpan Section 2
- 000d BajaSpan Section 3
- 000e PM/PPC
-1224 Interactive Images
-1225 Power I/O, Inc.
-1227 Tech-Source
-1228 Norsk Elektro Optikk A/S
-1229 Data Kinesis Inc.
-122a Integrated Telecom
-122b LG Industrial Systems Co., Ltd
-122c Sican GmbH
-122d Aztech System Ltd
- 1206 368DSP
- 50dc 3328 Audio
- 122d 0001 3328 Audio
- 80da 3328 Audio
- 122d 0001 3328 Audio
-122e Xyratex
-122f Andrew Corporation
-1230 Fishcamp Engineering
-1231 Woodward McCoach, Inc.
-1232 GPT Limited
-1233 Bus-Tech, Inc.
-1234 Technical Corp.
-1235 Risq Modular Systems, Inc.
-1236 Sigma Designs Corporation
- 0000 RealMagic64/GX
- 6401 REALmagic 64/GX (SD 6425)
-1237 Alta Technology Corporation
-1238 Adtran
-1239 3DO Company
-123a Visicom Laboratories, Inc.
-123b Seeq Technology, Inc.
-123c Century Systems, Inc.
-123d Engineering Design Team, Inc.
- 0000 EasyConnect 8/32
- 0002 EasyConnect 8/64
- 0003 EasyIO
-123e Simutech, Inc.
-123f C-Cube Microsystems
- 00e4 MPEG
- 8120 E4?
- 11bd 0006 DV500 E4
- 11bd 000a DV500 E4
- 8888 Cinemaster C 3.0 DVD Decoder
- 1002 0001 Cinemaster C 3.0 DVD Decoder
- 1002 0002 Cinemaster C 3.0 DVD Decoder
- 1328 0001 Cinemaster C 3.0 DVD Decoder
-1240 Marathon Technologies Corp.
-1241 DSC Communications
-1242 Jaycor Networks, Inc.
- 1242 JNI Corporation (former Jaycor Networks, Inc.)
- 4643 FCI-1063 Fibre Channel Adapter
- 6562 FCX2-6562 Dual Channel PCI-X Fibre Channel Adapter
- 656a FCX-6562 PCI-X Fibre Channel Adapter
-1243 Delphax
-1244 AVM Audiovisuelles MKTG & Computer System GmbH
- 0700 B1 ISDN
- 0800 C4 ISDN
- 0a00 A1 ISDN [Fritz]
- 1244 0a00 FRITZ!Card ISDN Controller
- 0e00 Fritz!PCI v2.0 ISDN
- 1100 C2 ISDN
- 1200 T1 ISDN
-1245 A.P.D., S.A.
-1246 Dipix Technologies, Inc.
-1247 Xylon Research, Inc.
-1248 Central Data Corporation
-1249 Samsung Electronics Co., Ltd.
-124a AEG Electrocom GmbH
-124b SBS/Greenspring Modular I/O
- 0040 PCI-40A or cPCI-200 Quad IndustryPack carrier
- 124b 9080 PCI9080 Bridge
-124c Solitron Technologies, Inc.
-124d Stallion Technologies, Inc.
- 0000 EasyConnection 8/32
- 0002 EasyConnection 8/64
- 0003 EasyIO
- 0004 EasyConnection/RA
-124e Cylink
-124f Infotrend Technology, Inc.
- 0041 IFT-2000 Series RAID Controller
-1250 Hitachi Microcomputer System Ltd
-1251 VLSI Solutions Oy
-1253 Guzik Technical Enterprises
-1254 Linear Systems Ltd.
-1255 Optibase Ltd
- 1110 MPEG Forge
- 1210 MPEG Fusion
- 2110 VideoPlex
- 2120 VideoPlex CC
- 2130 VideoQuest
-1256 Perceptive Solutions, Inc.
- 4201 PCI-2220I
- 4401 PCI-2240I
- 5201 PCI-2000
-1257 Vertex Networks, Inc.
-1258 Gilbarco, Inc.
-1259 Allied Telesyn International
- 2560 AT-2560 Fast Ethernet Adapter (i82557B)
-125a ABB Power Systems
-125b Asix Electronics Corporation
- 1400 ALFA GFC2204
-125c Aurora Technologies, Inc.
- 0640 Aries 16000P
-125d ESS Technology
- 0000 ES336H Fax Modem (Early Model)
- 1948 Solo?
- 1968 ES1968 Maestro 2
- 1028 0085 ES1968 Maestro-2 PCI
- 1033 8051 ES1968 Maestro-2 Audiodrive
- 1969 ES1969 Solo-1 Audiodrive
- 1014 0166 ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard
- 125d 8888 Solo-1 Audio Adapter
- 525f c888 ES1969 SOLO-1 AudioDrive (+ES1938)
- 1978 ES1978 Maestro 2E
- 0e11 b112 Armada M700
- 1033 803c ES1978 Maestro-2E Audiodrive
- 1033 8058 ES1978 Maestro-2E Audiodrive
- 1092 4000 Monster Sound MX400
- 1179 0001 ES1978 Maestro-2E Audiodrive
- 1988 ES1988 Allegro-1
- 1092 4100 Sonic Impact S100
- 125d 1988 ESS Allegro-1 Audiodrive
- 1989 ESS Modem
- 125d 1989 ESS Modem
- 1998 ES1983S Maestro-3i PCI Audio Accelerator
- 1028 00e6 ES1983S Maestro-3i (Dell Inspiron 8100)
- 1999 ES1983S Maestro-3i PCI Modem Accelerator
- 199a ES1983S Maestro-3i PCI Audio Accelerator
- 199b ES1983S Maestro-3i PCI Modem Accelerator
- 2808 ES336H Fax Modem (Later Model)
- 2838 ES2838/2839 SuperLink Modem
- 2898 ES2898 Modem
- 125d 0424 ES56-PI Data Fax Modem
- 125d 0425 ES56T-PI Data Fax Modem
- 125d 0426 ES56V-PI Data Fax Modem
- 125d 0427 VW-PI Data Fax Modem
- 125d 0428 ES56ST-PI Data Fax Modem
- 125d 0429 ES56SV-PI Data Fax Modem
- 147a c001 ES56-PI Data Fax Modem
- 14fe 0428 ES56-PI Data Fax Modem
- 14fe 0429 ES56-PI Data Fax Modem
-125e Specialvideo Engineering SRL
-125f Concurrent Technologies, Inc.
-1260 Harris Semiconductor
- 3873 Prism 2.5 Wavelan chipset
- 1186 3501 DWL-520 Wireless PCI Adapter
- 1668 0414 HWP01170-01 802.11b PCI Wireless Adapter
- 1737 3874 WMP11 Wireless 802.11b PCI Adapter
- 8086 2513 Wireless 802.11b MiniPCI Adapter
- 3890 D-Links DWL-g650 A1
- 8130 HMP8130 NTSC/PAL Video Decoder
- 8131 HMP8131 NTSC/PAL Video Decoder
-1261 Matsushita-Kotobuki Electronics Industries, Ltd.
-1262 ES Computer Company, Ltd.
-1263 Sonic Solutions
-1264 Aval Nagasaki Corporation
-1265 Casio Computer Co., Ltd.
-1266 Microdyne Corporation
- 0001 NE10/100 Adapter (i82557B)
- 1910 NE2000Plus (RT8029) Ethernet Adapter
- 1266 1910 NE2000Plus Ethernet Adapter
-1267 S. A. Telecommunications
- 5352 PCR2101
- 5a4b Telsat Turbo
-1268 Tektronix
-1269 Thomson-CSF/TTM
-126a Lexmark International, Inc.
-126b Adax, Inc.
-126c Northern Telecom
-126d Splash Technology, Inc.
-126e Sumitomo Metal Industries, Ltd.
-126f Silicon Motion, Inc.
- 0710 SM710 LynxEM
- 0712 SM712 LynxEM+
- 0720 SM720 Lynx3DM
- 0810 SM810 LynxE
- 0811 SM811 LynxE
- 0820 SM820 Lynx3D
- 0910 SM910
-1270 Olympus Optical Co., Ltd.
-1271 GW Instruments
-1272 Telematics International
-1273 Hughes Network Systems
- 0002 DirecPC
-1274 Ensoniq
- 1171 ES1373 [AudioPCI] (also Creative Labs CT5803)
- 1371 ES1371 [AudioPCI-97]
- 0e11 0024 AudioPCI on Motherboard Compaq Deskpro
- 0e11 b1a7 ES1371, ES1373 AudioPCI
- 1033 80ac ES1371, ES1373 AudioPCI
- 1042 1854 Tazer
- 107b 8054 Tabor2
- 1274 1371 Creative Sound Blaster AudioPCI64V, AudioPCI128
- 1462 6470 ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A
- 1462 6560 ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10
- 1462 6630 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A
- 1462 6631 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A
- 1462 6632 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A
- 1462 6633 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A
- 1462 6820 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00
- 1462 6822 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A
- 1462 6830 ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00
- 1462 6880 ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00
- 1462 6900 ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00
- 1462 6910 ES1371, ES1373 AudioPCI On Motherboard MS-6191
- 1462 6930 ES1371, ES1373 AudioPCI On Motherboard MS-6193
- 1462 6990 ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A
- 1462 6991 ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A
- 14a4 2077 ES1371, ES1373 AudioPCI On Motherboard KR639
- 14a4 2105 ES1371, ES1373 AudioPCI On Motherboard MR800
- 14a4 2107 ES1371, ES1373 AudioPCI On Motherboard MR801
- 14a4 2172 ES1371, ES1373 AudioPCI On Motherboard DR739
- 1509 9902 ES1371, ES1373 AudioPCI On Motherboard KW11
- 1509 9903 ES1371, ES1373 AudioPCI On Motherboard KW31
- 1509 9904 ES1371, ES1373 AudioPCI On Motherboard KA11
- 1509 9905 ES1371, ES1373 AudioPCI On Motherboard KC13
- 152d 8801 ES1371, ES1373 AudioPCI On Motherboard CP810E
- 152d 8802 ES1371, ES1373 AudioPCI On Motherboard CP810
- 152d 8803 ES1371, ES1373 AudioPCI On Motherboard P3810E
- 152d 8804 ES1371, ES1373 AudioPCI On Motherboard P3810-S
- 152d 8805 ES1371, ES1373 AudioPCI On Motherboard P3820-S
- 270f 2001 ES1371, ES1373 AudioPCI On Motherboard 6CTR
- 270f 2200 ES1371, ES1373 AudioPCI On Motherboard 6WTX
- 270f 3000 ES1371, ES1373 AudioPCI On Motherboard 6WSV
- 270f 3100 ES1371, ES1373 AudioPCI On Motherboard 6WIV2
- 270f 3102 ES1371, ES1373 AudioPCI On Motherboard 6WIV
- 270f 7060 ES1371, ES1373 AudioPCI On Motherboard 6ASA2
- 8086 4249 ES1371, ES1373 AudioPCI On Motherboard BI440ZX
- 8086 424c ES1371, ES1373 AudioPCI On Motherboard BL440ZX
- 8086 425a ES1371, ES1373 AudioPCI On Motherboard BZ440ZX
- 8086 4341 ES1371, ES1373 AudioPCI On Motherboard Cayman
- 8086 4343 ES1371, ES1373 AudioPCI On Motherboard Cape Cod
- 8086 4649 ES1371, ES1373 AudioPCI On Motherboard Fire Island
- 8086 464a ES1371, ES1373 AudioPCI On Motherboard FJ440ZX
- 8086 4d4f ES1371, ES1373 AudioPCI On Motherboard Montreal
- 8086 4f43 ES1371, ES1373 AudioPCI On Motherboard OC440LX
- 8086 5243 ES1371, ES1373 AudioPCI On Motherboard RC440BX
- 8086 5352 ES1371, ES1373 AudioPCI On Motherboard SunRiver
- 8086 5643 ES1371, ES1373 AudioPCI On Motherboard Vancouver
- 8086 5753 ES1371, ES1373 AudioPCI On Motherboard WS440BX
- 5000 ES1370 [AudioPCI]
- 4942 4c4c Creative Sound Blaster AudioPCI128
- 5880 5880 AudioPCI
- 1274 2000 Creative Sound Blaster AudioPCI128
- 1274 2003 Creative SoundBlaster AudioPCI 128
- 1274 5880 Creative Sound Blaster AudioPCI128
- 1458 a000 5880 AudioPCI On Motherboard 6OXET
- 1462 6880 5880 AudioPCI On Motherboard MS-6188 1.00
- 270f 2001 5880 AudioPCI On Motherboard 6CTR
- 270f 2200 5880 AudioPCI On Motherboard 6WTX
- 270f 7040 5880 AudioPCI On Motherboard 6ATA4
-1275 Network Appliance Corporation
-1276 Switched Network Technologies, Inc.
-1277 Comstream
-1278 Transtech Parallel Systems Ltd.
- 0701 TPE3/TM3 PowerPC Node
- 0710 TPE5 PowerPC PCI board
-1279 Transmeta Corporation
- 0295 Northbridge
- 0395 LongRun Northbridge
- 0396 SDRAM controller
- 0397 BIOS scratchpad
-127a Rockwell International
- 1002 HCF 56k Data/Fax Modem
- 1092 094c SupraExpress 56i PRO [Diamond SUP2380]
- 122d 4002 HPG / MDP3858-U
- 122d 4005 MDP3858-E
- 122d 4007 MDP3858-A/-NZ
- 122d 4012 MDP3858-SA
- 122d 4017 MDP3858-W
- 122d 4018 MDP3858-W
- 127a 1002 Rockwell 56K D/F HCF Modem
- 1003 HCF 56k Data/Fax Modem
- 0e11 b0bc 229-DF Zephyr
- 0e11 b114 229-DF Cheetah
- 1033 802b 229-DF
- 13df 1003 PCI56RX Modem
- 13e0 0117 IBM
- 13e0 0147 IBM F-1156IV+/R3 Spain V.90 Modem
- 13e0 0197 IBM
- 13e0 01c7 IBM F-1156IV+/R3 WW V.90 Modem
- 13e0 01f7 IBM
- 1436 1003 IBM
- 1436 1103 IBM 5614PM3G V.90 Modem
- 1436 1602 Compaq 229-DF Ducati
- 1004 HCF 56k Data/Fax/Voice Modem
- 1048 1500 MicroLink 56k Modem
- 10cf 1059 Fujitsu 229-DFRT
- 1005 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 1033 8029 229-DFSV
- 1033 8054 Modem
- 10cf 103c Fujitsu
- 10cf 1055 Fujitsu 229-DFSV
- 10cf 1056 Fujitsu 229-DFSV
- 122d 4003 MDP3858SP-U
- 122d 4006 Packard Bell MDP3858V-E
- 122d 4008 MDP3858SP-A/SP-NZ
- 122d 4009 MDP3858SP-E
- 122d 4010 MDP3858V-U
- 122d 4011 MDP3858SP-SA
- 122d 4013 MDP3858V-A/V-NZ
- 122d 4015 MDP3858SP-W
- 122d 4016 MDP3858V-W
- 122d 4019 MDP3858V-SA
- 13df 1005 PCI56RVP Modem
- 13e0 0187 IBM
- 13e0 01a7 IBM
- 13e0 01b7 IBM DF-1156IV+/R3 Spain V.90 Modem
- 13e0 01d7 IBM DF-1156IV+/R3 WW V.90 Modem
- 1436 1005 IBM
- 1436 1105 IBM
- 1437 1105 IBM 5614PS3G V.90 Modem
- 1022 HCF 56k Modem
- 1436 1303 M3-5614PM3G V.90 Modem
- 1023 HCF 56k Data/Fax Modem
- 122d 4020 Packard Bell MDP3858-WE
- 122d 4023 MDP3858-UE
- 13e0 0247 IBM F-1156IV+/R6 Spain V.90 Modem
- 13e0 0297 IBM
- 13e0 02c7 IBM F-1156IV+/R6 WW V.90 Modem
- 1436 1203 IBM
- 1436 1303 IBM
- 1024 HCF 56k Data/Fax/Voice Modem
- 1025 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 10cf 106a Fujitsu 235-DFSV
- 122d 4021 Packard Bell MDP3858V-WE
- 122d 4022 MDP3858SP-WE
- 122d 4024 MDP3858V-UE
- 122d 4025 MDP3858SP-UE
- 1026 HCF 56k PCI Speakerphone Modem
- 1032 HCF 56k Modem
- 1033 HCF 56k Modem
- 1034 HCF 56k Modem
- 1035 HCF 56k PCI Speakerphone Modem
- 1036 HCF 56k Modem
- 1085 HCF 56k Volcano PCI Modem
- 2005 HCF 56k Data/Fax Modem
- 104d 8044 229-DFSV
- 104d 8045 229-DFSV
- 104d 8055 PBE/Aztech 235W-DFSV
- 104d 8056 235-DFSV
- 104d 805a Modem
- 104d 805f Modem
- 104d 8074 Modem
- 2013 HSF 56k Data/Fax Modem
- 1179 0001 Modem
- 1179 ff00 Modem
- 2014 HSF 56k Data/Fax/Voice Modem
- 10cf 1057 Fujitsu Citicorp III
- 122d 4050 MSP3880-U
- 122d 4055 MSP3880-W
- 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 10cf 1063 Fujitsu
- 10cf 1064 Fujitsu
- 1468 2015 Fujitsu
- 2016 HSF 56k Data/Fax/Voice/Spkp Modem
- 122d 4051 MSP3880V-W
- 122d 4052 MSP3880SP-W
- 122d 4054 MSP3880V-U
- 122d 4056 MSP3880SP-U
- 122d 4057 MSP3880SP-A
- 4311 Riptide HSF 56k PCI Modem
- 127a 4311 Ring Modular? Riptide HSF RT HP Dom
- 13e0 0210 HP-GVC
- 4320 Riptide PCI Audio Controller
- 1235 4320 Riptide PCI Audio Controller
- 4321 Riptide HCF 56k PCI Modem
- 1235 4321 Hewlett Packard DF
- 1235 4324 Hewlett Packard DF
- 13e0 0210 Hewlett Packard DF
- 144d 2321 Riptide
- 4322 Riptide PCI Game Controller
- 1235 4322 Riptide PCI Game Controller
- 8234 RapidFire 616X ATM155 Adapter
- 108d 0022 RapidFire 616X ATM155 Adapter
- 108d 0027 RapidFire 616X ATM155 Adapter
-127b Pixera Corporation
-127c Crosspoint Solutions, Inc.
-127d Vela Research
-127e Winnov, L.P.
-127f Fujifilm
-1280 Photoscript Group Ltd.
-1281 Yokogawa Electric Corporation
-1282 Davicom Semiconductor, Inc.
- 9009 Ethernet 100/10 MBit
- 9100 Ethernet 100/10 MBit
- 9102 Ethernet 100/10 MBit
- 9132 Ethernet 100/10 MBit
-1283 Integrated Technology Express, Inc.
- 673a IT8330G
- 8181 IT8181E/F LCD/VGA Controller
- 8330 IT8330G
- 8888 IT8888F PCI to ISA Bridge with SMB
- 8889 IT8889F PCI to ISA Bridge
- e886 IT8330G
-1284 Sahara Networks, Inc.
-1285 Platform Technologies, Inc.
- 0100 AGOGO sound chip (aka ESS Maestro 1)
-1286 Mazet GmbH
-1287 M-Pact, Inc.
- 001e LS220D DVD Decoder
- 001f LS220C DVD Decoder
-1288 Timestep Corporation
-1289 AVC Technology, Inc.
-128a Asante Technologies, Inc.
-128b Transwitch Corporation
-128c Retix Corporation
-128d G2 Networks, Inc.
- 0021 ATM155 Adapter
-128e Hoontech Corporation/Samho Multi Tech Ltd.
- 0008 ST128 WSS/SB
- 0009 ST128 SAM9407
- 000a ST128 Game Port
- 000b ST128 MPU Port
- 000c ST128 Ctrl Port
-128f Tateno Dennou, Inc.
-1290 Sord Computer Corporation
-1291 NCS Computer Italia
-1292 Tritech Microelectronics Inc
-1293 Media Reality Technology
-1294 Rhetorex, Inc.
-1295 Imagenation Corporation
-1296 Kofax Image Products
-1297 Holco Enterprise Co, Ltd/Shuttle Computer
-1298 Spellcaster Telecommunications Inc.
-1299 Knowledge Technology Lab.
-129a VMetro, inc.
- 0615 PBT-615 PCI-X Bus Analyzer
-129b Image Access
-129c Jaycor
-129d Compcore Multimedia, Inc.
-129e Victor Company of Japan, Ltd.
-129f OEC Medical Systems, Inc.
-12a0 Allen-Bradley Company
-12a1 Simpact Associates, Inc.
-12a2 Newgen Systems Corporation
-12a3 Lucent Technologies
-12a4 NTT Electronics Technology Company
-12a5 Vision Dynamics Ltd.
-12a6 Scalable Networks, Inc.
-12a7 AMO GmbH
-12a8 News Datacom
-12a9 Xiotech Corporation
-12aa SDL Communications, Inc.
-12ab Yuan Yuan Enterprise Co., Ltd.
- 3000 MPG-200C PCI DVD Decoder Card
-12ac Measurex Corporation
-12ad Multidata GmbH
-12ae Alteon Networks Inc.
- 0001 AceNIC Gigabit Ethernet
- 12ae 0001 Gigabit Ethernet-SX (Universal)
- 1410 0104 Gigabit Ethernet-SX PCI Adapter
- 0002 AceNIC Gigabit Ethernet (Copper)
- 10a9 8002 Acenic Gigabit Ethernet
- 12ae 0002 Gigabit Ethernet-T (3C986-T)
-12af TDK USA Corp
-12b0 Jorge Scientific Corp
-12b1 GammaLink
-12b2 General Signal Networks
-12b3 Inter-Face Co Ltd
-12b4 FutureTel Inc
-12b5 Granite Systems Inc.
-12b6 Natural Microsystems
-12b7 Cognex Modular Vision Systems Div. - Acumen Inc.
-12b8 Korg
-12b9 US Robotics/3Com
- 1006 WinModem
- 12b9 005c USR 56k Internal Voice WinModem (Model 3472)
- 12b9 005e USR 56k Internal WinModem (Models 662975)
- 12b9 0062 USR 56k Internal Voice WinModem (Model 662978)
- 12b9 0068 USR 56k Internal Voice WinModem (Model 5690)
- 12b9 007a USR 56k Internal Voice WinModem (Model 662974)
- 12b9 007f USR 56k Internal WinModem (Models 5698, 5699)
- 12b9 0080 USR 56k Internal WinModem (Models 2975, 3528)
- 12b9 0081 USR 56k Internal Voice WinModem (Models 2974, 3529)
- 12b9 0091 USR 56k Internal Voice WinModem (Model 2978)
- 1007 USR 56k Internal WinModem
- 12b9 00a3 USR 56k Internal WinModem (Model 3595)
- 1008 56K FaxModem Model 5610
- 12b9 00a2 USR 56k Internal FAX Modem (Model 2977)
- 12b9 00aa USR 56k Internal Voice Modem (Model 2976)
- 12b9 00ab USR 56k Internal Voice Modem (Model 5609)
- 12b9 00ac USR 56k Internal Voice Modem (Model 3298)
- 12b9 00ad USR 56k Internal FAX Modem (Model 5610)
-12ba BittWare, Inc.
-12bb Nippon Unisoft Corporation
-12bc Array Microsystems
-12bd Computerm Corp.
-12be Anchor Chips Inc.
- 3041 AN3041Q CO-MEM
- 3042 AN3042Q CO-MEM Lite
- 12be 3042 Anchor Chips Lite Evaluation Board
-12bf Fujifilm Microdevices
-12c0 Infimed
-12c1 GMM Research Corp
-12c2 Mentec Limited
-12c3 Holtek Microelectronics Inc
- 0058 PCI NE2K Ethernet
- 5598 PCI NE2K Ethernet
-12c4 Connect Tech Inc
-12c5 Picture Elements Incorporated
- 007e Imaging/Scanning Subsystem Engine
- 007f Imaging/Scanning Subsystem Engine
- 0081 PCIVST [Grayscale Thresholding Engine]
- 0085 Video Simulator/Sender
- 0086 THR2 Multi-scale Thresholder
-12c6 Mitani Corporation
-12c7 Dialogic Corp
-12c8 G Force Co, Ltd
-12c9 Gigi Operations
-12ca Integrated Computing Engines
-12cb Antex Electronics Corporation
-12cc Pluto Technologies International
-12cd Aims Lab
-12ce Netspeed Inc.
-12cf Prophet Systems, Inc.
-12d0 GDE Systems, Inc.
-12d1 PSITech
-12d2 NVidia / SGS Thomson (Joint Venture)
- 0008 NV1
- 0009 DAC64
- 0018 Riva128
- 1048 0c10 VICTORY Erazor
- 107b 8030 STB Velocity 128
- 1092 0350 Viper V330
- 1092 1092 Viper V330
- 10b4 1b1b STB Velocity 128
- 10b4 1b1d STB Velocity 128
- 10b4 1b1e STB Velocity 128, PAL TV-Out
- 10b4 1b20 STB Velocity 128 Sapphire
- 10b4 1b21 STB Velocity 128
- 10b4 1b22 STB Velocity 128 AGP, NTSC TV-Out
- 10b4 1b23 STB Velocity 128 AGP, PAL TV-Out
- 10b4 1b27 STB Velocity 128 DVD
- 10b4 1b88 MVP Pro 128
- 10b4 222a STB Velocity 128 AGP
- 10b4 2230 STB Velocity 128
- 10b4 2232 STB Velocity 128
- 10b4 2235 STB Velocity 128 AGP
- 2a15 54a3 3DVision-SAGP / 3DexPlorer 3000
- 0019 Riva128ZX
- 0020 TNT
- 0028 TNT2
- 0029 UTNT2
- 002c VTNT2
- 00a0 ITNT2
-12d3 Vingmed Sound A/S
-12d4 Ulticom (Formerly DGM&S)
- 0200 T1 Card
-12d5 Equator Technologies
-12d6 Analogic Corp
-12d7 Biotronic SRL
-12d8 Pericom Semiconductor
-12d9 Aculab PLC
- 0002 PCI Prosody
- 0004 cPCI Prosody
-12da True Time Inc.
-12db Annapolis Micro Systems, Inc
-12dc Symicron Computer Communication Ltd.
-12dd Management Graphics
-12de Rainbow Technologies
-12df SBS Technologies Inc
-12e0 Chase Research
- 0010 ST16C654 Quad UART
- 0020 ST16C654 Quad UART
- 0030 ST16C654 Quad UART
-12e1 Nintendo Co, Ltd
-12e2 Datum Inc. Bancomm-Timing Division
-12e3 Imation Corp - Medical Imaging Systems
-12e4 Brooktrout Technology Inc
-12e5 Apex Semiconductor Inc
-12e6 Cirel Systems
-12e7 Sunsgroup Corporation
-12e8 Crisc Corp
-12e9 GE Spacenet
-12ea Zuken
-12eb Aureal Semiconductor
- 0001 Vortex 1
- 104d 8036 AU8820 Vortex Digital Audio Processor
- 1092 2000 Sonic Impact A3D
- 1092 2100 Sonic Impact A3D
- 1092 2110 Sonic Impact A3D
- 1092 2200 Sonic Impact A3D
- 122d 1002 AU8820 Vortex Digital Audio Processor
- 12eb 0001 AU8820 Vortex Digital Audio Processor
- 5053 3355 Montego
- 0002 Vortex 2
- 104d 8049 AU8830 Vortex 3D Digital Audio Processor
- 104d 807b AU8830 Vortex 3D Digital Audio Processor
- 1092 3000 Monster Sound II
- 1092 3001 Monster Sound II
- 1092 3002 Monster Sound II
- 1092 3003 Monster Sound II
- 1092 3004 Monster Sound II
- 12eb 0001 AU8830 Vortex 3D Digital Audio Processor
- 12eb 0002 AU8830 Vortex 3D Digital Audio Processor
- 12eb 0088 AU8830 Vortex 3D Digital Audio Processor
- 144d 3510 AU8830 Vortex 3D Digital Audio Processor
- 5053 3356 Montego II
- 0003 AU8810 Vortex Digital Audio Processor
- 104d 8049 AU8810 Vortex Digital Audio Processor
- 104d 8077 AU8810 Vortex Digital Audio Processor
- 109f 1000 AU8810 Vortex Digital Audio Processor
- 12eb 0003 AU8810 Vortex Digital Audio Processor
- 1462 6780 AU8810 Vortex Digital Audio Processor
- 14a4 2073 AU8810 Vortex Digital Audio Processor
- 14a4 2091 AU8810 Vortex Digital Audio Processor
- 14a4 2104 AU8810 Vortex Digital Audio Processor
- 14a4 2106 AU8810 Vortex Digital Audio Processor
- 8803 Vortex 56k Software Modem
- 12eb 8803 Vortex 56k Software Modem
-12ec 3A International, Inc.
-12ed Optivision Inc.
-12ee Orange Micro
-12ef Vienna Systems
-12f0 Pentek
-12f1 Sorenson Vision Inc
-12f2 Gammagraphx, Inc.
-12f3 Radstone Technology
-12f4 Megatel
-12f5 Forks
-12f6 Dawson France
-12f7 Cognex
-12f8 Electronic Design GmbH
- 0002 VideoMaker
-12f9 Four Fold Ltd
-12fb Spectrum Signal Processing
-12fc Capital Equipment Corp
-12fd I2S
-12fe ESD Electronic System Design GmbH
-12ff Lexicon
-1300 Harman International Industries Inc
-1302 Computer Sciences Corp
-1303 Innovative Integration
-1304 Juniper Networks
-1305 Netphone, Inc
-1306 Duet Technologies
-1307 Computer Boards
- 0001 PCI-DAS1602/16
- 000b PCI-DIO48H
- 000c PCI-PDISO8
- 000d PCI-PDISO16
- 000f PCI-DAS1200
- 0010 PCI-DAS1602/12
- 0014 PCI-DIO24H
- 0015 PCI-DIO24H/CTR3
- 0016 PCI-DIO48H/CTR15
- 0017 PCI-DIO96H
- 0018 PCI-CTR05
- 0019 PCI-DAS1200/JR
- 001a PCI-DAS1001
- 001b PCI-DAS1002
- 001c PCI-DAS1602JR/16
- 001d PCI-DAS6402/16
- 001e PCI-DAS6402/12
- 001f PCI-DAS16/M1
- 0020 PCI-DDA02/12
- 0021 PCI-DDA04/12
- 0022 PCI-DDA08/12
- 0023 PCI-DDA02/16
- 0024 PCI-DDA04/16
- 0025 PCI-DDA08/16
- 0026 PCI-DAC04/12-HS
- 0027 PCI-DAC04/16-HS
- 0028 PCI-DIO24
- 0029 PCI-DAS08
- 002c PCI-INT32
- 0033 PCI-DUAL-AC5
- 0034 PCI-DAS-TC
- 0035 PCI-DAS64/M1/16
- 0036 PCI-DAS64/M2/16
- 0037 PCI-DAS64/M3/16
- 004c PCI-DAS1000
- 004d PCI-QUAD04
-1308 Jato Technologies Inc.
- 0001 NetCelerator Adapter
- 1308 0001 NetCelerator Adapter
-1309 AB Semiconductor Ltd
-130a Mitsubishi Electric Microcomputer
-130b Colorgraphic Communications Corp
-130c Ambex Technologies, Inc
-130d Accelerix Inc
-130e Yamatake-Honeywell Co. Ltd
-130f Advanet Inc
-1310 Gespac
-1311 Videoserver, Inc
-1312 Acuity Imaging, Inc
-1313 Yaskawa Electric Co.
-1316 Teradyne Inc
-1317 Linksys
- 0981 Fast Ethernet 10/100
- 0985 Network Everywhere Fast Ethernet 10/100 model NC100
- 1985 Fast Ethernet 10/100
-1318 Packet Engines Inc.
- 0911 PCI Ethernet Adapter
-1319 Fortemedia, Inc
- 0801 Xwave QS3000A [FM801]
- 0802 Xwave QS3000A [FM801 game port]
- 1000 FM801 PCI Audio
- 1001 FM801 PCI Joystick
-131a Finisar Corp.
-131c Nippon Electro-Sensory Devices Corp
-131d Sysmic, Inc.
-131e Xinex Networks Inc
-131f Siig Inc
- 1000 CyberSerial (1-port) 16550
- 1001 CyberSerial (1-port) 16650
- 1002 CyberSerial (1-port) 16850
- 1010 Duet 1S(16550)+1P
- 1011 Duet 1S(16650)+1P
- 1012 Duet 1S(16850)+1P
- 1020 CyberParallel (1-port)
- 1021 CyberParallel (2-port)
- 1030 CyberSerial (2-port) 16550
- 1031 CyberSerial (2-port) 16650
- 1032 CyberSerial (2-port) 16850
- 1034 Trio 2S(16550)+1P
- 1035 Trio 2S(16650)+1P
- 1036 Trio 2S(16850)+1P
- 1050 CyberSerial (4-port) 16550
- 1051 CyberSerial (4-port) 16650
- 1052 CyberSerial (4-port) 16850
- 2000 CyberSerial (1-port) 16550
- 2001 CyberSerial (1-port) 16650
- 2002 CyberSerial (1-port) 16850
- 2010 Duet 1S(16550)+1P
- 2011 Duet 1S(16650)+1P
- 2012 Duet 1S(16850)+1P
- 2020 CyberParallel (1-port)
- 2021 CyberParallel (2-port)
- 2030 CyberSerial (2-port) 16550
- 131f 2030 PCI Serial Card
- 2031 CyberSerial (2-port) 16650
- 2032 CyberSerial (2-port) 16850
- 2040 Trio 1S(16550)+2P
- 2041 Trio 1S(16650)+2P
- 2042 Trio 1S(16850)+2P
- 2050 CyberSerial (4-port) 16550
- 2051 CyberSerial (4-port) 16650
- 2052 CyberSerial (4-port) 16850
- 2060 Trio 2S(16550)+1P
- 2061 Trio 2S(16650)+1P
- 2062 Trio 2S(16850)+1P
-1320 Crypto AG
-1321 Arcobel Graphics BV
-1322 MTT Co., Ltd
-1323 Dome Inc
-1324 Sphere Communications
-1325 Salix Technologies, Inc
-1326 Seachange international
-1327 Voss scientific
-1328 quadrant international
-1329 Productivity Enhancement
-132a Microcom Inc.
-132b Broadband Technologies
-132c Micrel Inc
-132d Integrated Silicon Solution, Inc.
-1330 MMC Networks
-1331 Radisys Corp.
-1332 Micro Memory
- 5415 MM-5415CN PCI Memory Module with Battery Backup
- 5425 MM-5425CN PCI 64/66 Memory Module with Battery Backup
-1334 Redcreek Communications, Inc
-1335 Videomail, Inc
-1337 Third Planet Publishing
-1338 BT Electronics
-133a Vtel Corp
-133b Softcom Microsystems
-133c Holontech Corp
-133d SS Technologies
-133e Virtual Computer Corp
-133f SCM Microsystems
-1340 Atalla Corp
-1341 Kyoto Microcomputer Co
-1342 Promax Systems Inc
-1343 Phylon Communications Inc
-1344 Crucial Technology
-1345 Arescom Inc
-1347 Odetics
-1349 Sumitomo Electric Industries, Ltd.
-134a DTC Technology Corp.
- 0001 Domex 536
- 0002 Domex DMX3194UP SCSI Adapter
-134b ARK Research Corp.
-134c Chori Joho System Co. Ltd
-134d PCTel Inc
- 7890 HSP MicroModem 56
- 7891 HSP MicroModem 56
- 134d 0001 HSP MicroModem 56
- 7892 HSP MicroModem 56
- 7893 HSP MicroModem 56
- 7894 HSP MicroModem 56
- 7895 HSP MicroModem 56
- 7896 HSP MicroModem 56
- 7897 HSP MicroModem 56
-134e CSTI
-134f Algo System Co Ltd
-1350 Systec Co. Ltd
-1351 Sonix Inc
-1353 Thales Idatys
- 0002 Proserver
- 0003 PCI-FUT
- 0004 PCI-S0
- 0005 PCI-FUT-S0
-1354 Dwave System Inc
-1355 Kratos Analytical Ltd
-1356 The Logical Co
-1359 Prisa Networks
-135a Brain Boxes
-135b Giganet Inc
-135c Quatech Inc
- 0010 QSC-100
- 0020 DSC-100
- 0030 DSC-200/300
- 0040 QSC-200/300
- 0050 ESC-100D
- 0060 ESC-100M
- 00f0 MPAC-100 Syncronous Serial Card (Zilog 85230)
- 0170 QSCLP-100
- 0180 DSCLP-100
- 0190 SSCLP-100
- 01a0 QSCLP-200/300
- 01b0 DSCLP-200/300
- 01c0 SSCLP-200/300
-135d ABB Network Partner AB
-135e Sealevel Systems Inc
- 7101 Single Port RS-232/422/485/530
- 7201 Dual Port RS-232/422/485 Interface
- 7202 Dual Port RS-232 Interface
- 7401 Four Port RS-232 Interface
- 7402 Four Port RS-422/485 Interface
- 7801 Eight Port RS-232 Interface
- 8001 8001 Digital I/O Adapter
-135f I-Data International A-S
-1360 Meinberg Funkuhren
-1361 Soliton Systems K.K.
-1362 Fujifacom Corporation
-1363 Phoenix Technology Ltd
-1364 ATM Communications Inc
-1365 Hypercope GmbH
-1366 Teijin Seiki Co. Ltd
-1367 Hitachi Zosen Corporation
-1368 Skyware Corporation
-1369 Digigram
-136a High Soft Tech
-136b Kawasaki Steel Corporation
-136c Adtek System Science Co Ltd
-136d Gigalabs Inc
-136f Applied Magic Inc
-1370 ATL Products
-1371 CNet Technology Inc
- 434e GigaCard Network Adapter
- 1371 434e N-Way PCI-Bus Giga-Card 1000/100/10Mbps(L)
-1373 Silicon Vision Inc
-1374 Silicom Ltd
-1375 Argosystems Inc
-1376 LMC
-1377 Electronic Equipment Production & Distribution GmbH
-1378 Telemann Co. Ltd
-1379 Asahi Kasei Microsystems Co Ltd
-137a Mark of the Unicorn Inc
-137b PPT Vision
-137c Iwatsu Electric Co Ltd
-137d Dynachip Corporation
-137e Patriot Scientific Corporation
-137f Japan Satellite Systems Inc
-1380 Sanritz Automation Co Ltd
-1381 Brains Co. Ltd
-1382 Marian - Electronic & Software
-1383 Controlnet Inc
-1384 Reality Simulation Systems Inc
-1385 Netgear
- 4100 802.11b Wireless Adapter (MA301)
- 4105 MA311 802.11b wireless adapter
- 620a GA620
- 622a GA622
- 630a GA630
- f311 FA311
-1386 Video Domain Technologies
-1387 Systran Corp
-1388 Hitachi Information Technology Co Ltd
-1389 Applicom International
- 0001 PCI1500PFB [Intelligent fieldbus adaptor]
-138a Fusion Micromedia Corp
-138b Tokimec Inc
-138c Silicon Reality
-138d Future Techno Designs pte Ltd
-138e Basler GmbH
-138f Patapsco Designs Inc
-1390 Concept Development Inc
-1391 Development Concepts Inc
-1392 Medialight Inc
-1393 Moxa Technologies Co Ltd
- 1040 Smartio C104H/PCI
- 1680 Smartio C168H/PCI
- 2040 Intellio CP-204J
- 2180 Intellio C218 Turbo PCI
- 3200 Intellio C320 Turbo PCI
-1394 Level One Communications
- 0001 LXT1001 Gigabit Ethernet
- 1394 0001 NetCelerator Adapter
-1395 Ambicom Inc
-1396 Cipher Systems Inc
-1397 Cologne Chip Designs GmbH
- 2bd0 ISDN network controller [HFC-PCI]
- 1397 2bd0 ISDN Board
- e4bf 1000 CI1-1-Harp
-1398 Clarion co. Ltd
-1399 Rios systems Co Ltd
-139a Alacritech Inc
- 0001 Quad Port 10/100 Server Accelerator
- 0003 Single Port 10/100 Server Accelerator
- 0005 Single Port Gigabit Server Accelerator
-139b Mediasonic Multimedia Systems Ltd
-139c Quantum 3d Inc
-139d EPL limited
-139e Media4
-139f Aethra s.r.l.
-13a0 Crystal Group Inc
-13a1 Kawasaki Heavy Industries Ltd
-13a2 Ositech Communications Inc
-13a3 Hifn Inc.
- 0005 7751 Security Processor
- 0006 6500 Public Key Processor
- 0007 7811 Security Processor
- 0012 7951 Security Processor
- 0014 78XX Security Processor
- 0016 8065 Security Processor
- 0017 8165 Security Processor
- 0018 8154 Security Processor
-13a4 Rascom Inc
-13a5 Audio Digital Imaging Inc
-13a6 Videonics Inc
-13a7 Teles AG
-13a8 Exar Corp.
- 0158 XR17C158 Octal UART
-13a9 Siemens Medical Systems, Ultrasound Group
-13aa Broadband Networks Inc
-13ab Arcom Control Systems Ltd
-13ac Motion Media Technology Ltd
-13ad Nexus Inc
-13ae ALD Technology Ltd
-13af T.Sqware
-13b0 Maxspeed Corp
-13b1 Tamura corporation
-13b2 Techno Chips Co. Ltd
-13b3 Lanart Corporation
-13b4 Wellbean Co Inc
-13b5 ARM
-13b6 Dlog GmbH
-13b7 Logic Devices Inc
-13b8 Nokia Telecommunications oy
-13b9 Elecom Co Ltd
-13ba Oxford Instruments
-13bb Sanyo Technosound Co Ltd
-13bc Bitran Corporation
-13bd Sharp corporation
-13be Miroku Jyoho Service Co. Ltd
-13bf Sharewave Inc
-13c0 Microgate Corporation
- 0010 SyncLink WAN Adapter
-13c1 3ware Inc
- 1000 3ware ATA-RAID
- 1001 3ware 7000-series ATA-RAID
- 1002 3ware ATA-RAID
-13c2 Technotrend Systemtechnik GmbH
-13c3 Janz Computer AG
-13c4 Phase Metrics
-13c5 Alphi Technology Corp
-13c6 Condor Engineering Inc
-13c7 Blue Chip Technology Ltd
-13c8 Apptech Inc
-13c9 Eaton Corporation
-13ca Iomega Corporation
-13cb Yano Electric Co Ltd
-13cc Metheus Corporation
-13cd Compatible Systems Corporation
-13ce Cocom A/S
-13cf Studio Audio & Video Ltd
-13d0 Techsan Electronics Co Ltd
-# http://www.b2c2inc.com/products/pc-specs.html
- 2103 B2C2 Sky2PC PCI [SkyStar2]
-13d1 Abocom Systems Inc
- ab02 ADMtek Centaur-C rev 17 [D-Link DFE-680TX] CardBus Fast Ethernet Adapter
- ab06 RTL8139 [FE2000VX] CardBus Fast Ethernet Attached Port Adapter
-13d2 Shark Multimedia Inc
-13d3 IMC Networks
-13d4 Graphics Microsystems Inc
-13d5 Media 100 Inc
-13d6 K.I. Technology Co Ltd
-13d7 Toshiba Engineering Corporation
-13d8 Phobos corporation
-13d9 Apex PC Solutions Inc
-13da Intresource Systems pte Ltd
-13db Janich & Klass Computertechnik GmbH
-13dc Netboost Corporation
-13dd Multimedia Bundle Inc
-13de ABB Robotics Products AB
-13df E-Tech Inc
- 0001 PCI56RVP Modem
- 13df 0001 PCI56RVP Modem
-13e0 GVC Corporation
-13e1 Silicom Multimedia Systems Inc
-13e2 Dynamics Research Corporation
-13e3 Nest Inc
-13e4 Calculex Inc
-13e5 Telesoft Design Ltd
-13e6 Argosy research Inc
-13e7 NAC Incorporated
-13e8 Chip Express Corporation
-13e9 Intraserver Technology Inc
-13ea Dallas Semiconductor
-13eb Hauppauge Computer Works Inc
-13ec Zydacron Inc
-13ed Raytheion E-Systems
-13ee Hayes Microcomputer Products Inc
-13ef Coppercom Inc
-13f0 Sundance Technology Inc
- 0201 ST201 Sundance Ethernet
-13f1 Oce' - Technologies B.V.
-13f2 Ford Microelectronics Inc
-13f3 Mcdata Corporation
-13f4 Troika Networks, Inc.
- 1401 Zentai Fibre Channel Adapter
-13f5 Kansai Electric Co. Ltd
-13f6 C-Media Electronics Inc
- 0100 CM8338A
- 13f6 ffff CMI8338/C3DX PCI Audio Device
- 0101 CM8338B
- 13f6 0101 CMI8338-031 PCI Audio Device
- 0111 CM8738
- 1019 0970 P6STP-FL motherboard
- 1043 8077 CMI8738 6-channel audio controller
- 1043 80e2 CMI8738 6ch-MX
- 13f6 0111 CMI8738/C3DX PCI Audio Device
- 1681 a000 Gamesurround MUSE XL
- 0211 CM8738
-13f7 Wildfire Communications
-13f8 Ad Lib Multimedia Inc
-13f9 NTT Advanced Technology Corp.
-13fa Pentland Systems Ltd
-13fb Aydin Corp
-13fc Computer Peripherals International
-13fd Micro Science Inc
-13fe Advantech Co. Ltd
- 1756 PCI-1756
-13ff Silicon Spice Inc
-1400 Artx Inc
- 1401 9432 TX
-1401 CR-Systems A/S
-1402 Meilhaus Electronic GmbH
-1403 Ascor Inc
-1404 Fundamental Software Inc
-1405 Excalibur Systems Inc
-1406 Oce' Printing Systems GmbH
-1407 Lava Computer mfg Inc
- 0100 Lava Dual Serial
- 0101 Lava Quatro A
- 0102 Lava Quatro B
- 0200 Lava Port Plus
- 0201 Lava Quad A
- 0202 Lava Quad B
- 0500 Lava Single Serial
- 0600 Lava Port 650
- 8000 Lava Parallel
- 8001 Dual parallel port controller A
- 8002 Lava Dual Parallel port A
- 8003 Lava Dual Parallel port B
- 8800 BOCA Research IOPPAR
-1408 Aloka Co. Ltd
-1409 Timedia Technology Co Ltd
- 7168 PCI2S550 (Dual 16550 UART)
-140a DSP Research Inc
-140b Ramix Inc
-140c Elmic Systems Inc
-140d Matsushita Electric Works Ltd
-140e Goepel Electronic GmbH
-140f Salient Systems Corp
-1410 Midas lab Inc
-1411 Ikos Systems Inc
-1412 IC Ensemble Inc
- 1712 ICE1712 [Envy24]
- 1724 ICE1724 [Envy24HT]
-1413 Addonics
-1414 Microsoft Corporation
-1415 Oxford Semiconductor Ltd
- 8403 VScom 011H-EP1 1 port parallel adaptor
- 9501 OX16PCI954 (Quad 16950 UART) function 0
- 15ed 2000 MCCR Serial p0-3 of 8
- 15ed 2001 MCCR Serial p0-3 of 16
- 950a EXSYS EX-41092 Dual 16950 Serial adapter
- 950b OXCB950 Cardbus 16950 UART
- 9511 OX16PCI954 (Quad 16950 UART) function 1
- 15ed 2000 MCCR Serial p4-7 of 8
- 15ed 2001 MCCR Serial p4-15 of 16
- 9521 OX16PCI952 (Dual 16950 UART)
-1416 Multiwave Innovation pte Ltd
-1417 Convergenet Technologies Inc
-1418 Kyushu electronics systems Inc
-1419 Excel Switching Corp
-141a Apache Micro Peripherals Inc
-141b Zoom Telephonics Inc
-141d Digitan Systems Inc
-141e Fanuc Ltd
-141f Visiontech Ltd
-1420 Psion Dacom plc
-1421 Ads Technologies Inc
-1422 Ygrec Systems Co Ltd
-1423 Custom Technology Corp.
-1424 Videoserver Connections
-1425 ASIC Designers Inc
-1426 Storage Technology Corp.
-1427 Better On-Line Solutions
-1428 Edec Co Ltd
-1429 Unex Technology Corp.
-142a Kingmax Technology Inc
-142b Radiolan
-142c Minton Optic Industry Co Ltd
-142d Pix stream Inc
-142e Vitec Multimedia
-142f Radicom Research Inc
-1430 ITT Aerospace/Communications Division
-1431 Gilat Satellite Networks
-1432 Edimax Computer Co.
-1433 Eltec Elektronik GmbH
-1435 Real Time Devices US Inc.
-1436 CIS Technology Inc
-1437 Nissin Inc Co
-1438 Atmel-dream
-1439 Outsource Engineering & Mfg. Inc
-143a Stargate Solutions Inc
-143b Canon Research Center, America
-143c Amlogic Inc
-143d Tamarack Microelectronics Inc
-143e Jones Futurex Inc
-143f Lightwell Co Ltd - Zax Division
-1440 ALGOL Corp.
-1441 AGIE Ltd
-1442 Phoenix Contact GmbH & Co.
-1443 Unibrain S.A.
-1444 TRW
-1445 Logical DO Ltd
-1446 Graphin Co Ltd
-1447 AIM GmBH
-1448 Alesis Studio Electronics
-1449 TUT Systems Inc
-144a Adlink Technology
- 7296 PCI-7296
- 7432 PCI-7432
- 7433 PCI-7433
- 7434 PCI-7434
- 7841 PCI-7841
- 8133 PCI-8133
- 8554 PCI-8554
- 9111 PCI-9111
- 9113 PCI-9113
- 9114 PCI-9114
-144b Loronix Information Systems Inc
-144c Catalina Research Inc
-144d Samsung Electronics Co Ltd
-144e OLITEC
-144f Askey Computer Corp.
-1450 Octave Communications Ind.
-1451 SP3D Chip Design GmBH
-1453 MYCOM Inc
-1454 Altiga Networks
-1455 Logic Plus Plus Inc
-1456 Advanced Hardware Architectures
-1457 Nuera Communications Inc
-1458 Giga-byte Technology
-1459 DOOIN Electronics
-145a Escalate Networks Inc
-145b PRAIM SRL
-145c Cryptek
-145d Gallant Computer Inc
-145e Aashima Technology B.V.
-145f Baldor Electric Company
- 0001 NextMove PCI
-1460 DYNARC INC
-1461 Avermedia Technologies Inc
-1462 Micro-Star International Co., Ltd.
-1463 Fast Corporation
-1464 Interactive Circuits & Systems Ltd
-1465 GN NETTEST Telecom DIV.
-1466 Designpro Inc.
-1467 DIGICOM SPA
-1468 AMBIT Microsystem Corp.
-1469 Cleveland Motion Controls
-146a IFR
-146b Parascan Technologies Ltd
-146c Ruby Tech Corp.
- 1430 FE-1430TX Fast Ethernet PCI Adapter
-146d Tachyon, INC.
-146e Williams Electronics Games, Inc.
-146f Multi Dimensional Consulting Inc
-1470 Bay Networks
-1471 Integrated Telecom Express Inc
-1472 DAIKIN Industries, Ltd
-1473 ZAPEX Technologies Inc
-1474 Doug Carson & Associates
-1475 PICAZO Communications
-1476 MORTARA Instrument Inc
-1477 Net Insight
-1478 DIATREND Corporation
-1479 TORAY Industries Inc
-147a FORMOSA Industrial Computing
-147b ABIT Computer Corp.
-147c AWARE, Inc.
-147d Interworks Computer Products
-147e Matsushita Graphic Communication Systems, Inc.
-147f NIHON UNISYS, Ltd.
-1480 SCII Telecom
-1481 BIOPAC Systems Inc
-1482 ISYTEC - Integrierte Systemtechnik GmBH
-1483 LABWAY Corporation
-1484 Logic Corporation
-1485 ERMA - Electronic GmBH
-1486 L3 Communications Telemetry & Instrumentation
-1487 MARQUETTE Medical Systems
-1488 KONTRON Electronik GmBH
-1489 KYE Systems Corporation
-148a OPTO
-148b INNOMEDIALOGIC Inc.
-148c C.P. Technology Co. Ltd
-148d DIGICOM Systems, Inc.
- 1003 HCF 56k Data/Fax Modem
-148e OSI Plus Corporation
-148f Plant Equipment, Inc.
-1490 Stone Microsystems PTY Ltd.
-1491 ZEAL Corporation
-1492 Time Logic Corporation
-1493 MAKER Communications
-1494 WINTOP Technology, Inc.
-1495 TOKAI Communications Industry Co. Ltd
-1496 JOYTECH Computer Co., Ltd.
-1497 SMA Regelsysteme GmBH
-1498 TEWS Datentechnik GmBH
-1499 EMTEC CO., Ltd
-149a ANDOR Technology Ltd
-149b SEIKO Instruments Inc
-149c OVISLINK Corp.
-149d NEWTEK Inc
- 0001 Video Toaster for PC
-149e Mapletree Networks Inc.
-149f LECTRON Co Ltd
-14a0 SOFTING GmBH
-14a1 Systembase Co Ltd
-14a2 Millennium Engineering Inc
-14a3 Maverick Networks
-14a4 GVC/BCM Advanced Research
-14a5 XIONICS Document Technologies Inc
-14a6 INOVA Computers GmBH & Co KG
-14a7 MYTHOS Systems Inc
-14a8 FEATRON Technologies Corporation
-14a9 HIVERTEC Inc
-14aa Advanced MOS Technology Inc
-14ab Mentor Graphics Corp.
-14ac Novaweb Technologies Inc
-14ad Time Space Radio AB
-14ae CTI, Inc
-14af Guillemot Corporation
- 7102 3D Prophet II MX
-14b0 BST Communication Technology Ltd
-14b1 Nextcom K.K.
-14b2 ENNOVATE Networks Inc
-14b3 XPEED Inc
- 0000 DSL NIC
-14b4 PHILIPS Business Electronics B.V.
-14b5 Creamware GmBH
- 0200 Scope
- 0300 Pulsar
- 0400 Pulsar2
- 0600 Pulsar2
- 0800 DSP-Board
- 0900 DSP-Board
- 0a00 DSP-Board
- 0b00 DSP-Board
-14b6 Quantum Data Corp.
-14b7 PROXIM Inc
- 0001 Symphony 4110
-14b8 Techsoft Technology Co Ltd
-14b9 AIRONET Wireless Communications
- 0001 PC4800
- 0340 PC4800
- 0350 PC4800
- 4500 PC4500
- 4800 PC4800
- a504 Cisco Aironet Wireless 802.11b
-14ba INTERNIX Inc.
-14bb SEMTECH Corporation
-14bc Globespan Semiconductor Inc.
-14bd CARDIO Control N.V.
-14be L3 Communications
-14bf SPIDER Communications Inc.
-14c0 COMPAL Electronics Inc
-14c1 MYRICOM Inc.
-14c2 DTK Computer
-14c3 MEDIATEK Corp.
-14c4 IWASAKI Information Systems Co Ltd
-14c5 Automation Products AB
-14c6 Data Race Inc
-14c7 Modular Technology Holdings Ltd
-14c8 Turbocomm Tech. Inc.
-14c9 ODIN Telesystems Inc
-14ca PE Logic Corp.
-14cb Billionton Systems Inc
-14cc NAKAYO Telecommunications Inc
-14cd Universal Scientific Ind.
-14ce Whistle Communications
-14cf TEK Microsystems Inc.
-14d0 Ericsson Axe R & D
-14d1 Computer Hi-Tech Co Ltd
-14d2 Titan Electronics Inc
- 8001 VScom 010L 1 port parallel adaptor
- 8002 VScom 020L 2 port parallel adaptor
- 8010 VScom 100L 1 port serial adaptor
- 8011 VScom 110L 1 port serial and 1 port parallel adaptor
- 8020 VScom 200L 1 port serial adaptor
- 8021 VScom 210L 2 port serial and 1 port parallel adaptor
- 8040 VScom 400L 4 port serial adaptor
- 8080 VScom 800L 8 port serial adaptor
- a000 VScom 010H 1 port parallel adaptor
- a001 VScom 100H 1 port serial adaptor
- a003 VScom 400H 4 port serial adaptor
- a004 VScom 400HF1 4 port serial adaptor
- a005 VScom 200H 2 port serial adaptor
- e001 VScom 010HV2 1 port parallel adaptor
- e010 VScom 100HV2 1 port serial adaptor
- e020 VScom 200HV2 2 port serial adaptor
-14d3 CIRTECH (UK) Ltd
-14d4 Panacom Technology Corp
-14d5 Nitsuko Corporation
-14d6 Accusys Inc
-14d7 Hirakawa Hewtech Corp
-14d8 HOPF Elektronik GmBH
-14d9 Alpha Processor Inc
-14da National Aerospace Laboratories
-14db AFAVLAB Technology Inc
- 2120 TK9902
-14dc Amplicon Liveline Ltd
- 0000 PCI230
- 0001 PCI242
- 0002 PCI244
- 0003 PCI247
- 0004 PCI248
- 0005 PCI249
- 0006 PCI260
- 0007 PCI224
- 0008 PCI234
- 0009 PCI236
- 000a PCI272
- 000b PCI215
-14dd Boulder Design Labs Inc
-14de Applied Integration Corporation
-14df ASIC Communications Corp
-14e1 INVERTEX
-14e2 INFOLIBRIA
-14e3 AMTELCO
-14e4 Broadcom Corporation
- 1644 NetXtreme BCM5700 Gigabit Ethernet
- 1014 0277 Broadcom Vigil B5700 1000Base-T
- 1028 00d1 Broadcom BCM5700
- 1028 0106 Broadcom BCM5700
- 1028 0109 Broadcom BCM5700 1000Base-T
- 1028 010a Broadcom BCM5700 1000BaseTX
- 10b7 1000 3C996-T 1000Base-T
- 10b7 1001 3C996B-T 1000Base-T
- 10b7 1002 3C996C-T 1000Base-T
- 10b7 1003 3C997-T 1000Base-T Dual Port
- 10b7 1004 3C996-SX 1000Base-SX
- 10b7 1005 3C997-SX 1000Base-SX Dual Port
- 10b7 1008 3C942 Gigabit LOM (31X31)
- 14e4 0002 NetXtreme 1000Base-SX
- 14e4 0003 NetXtreme 1000Base-SX
- 14e4 0004 NetXtreme 1000Base-T
- 14e4 1028 NetXtreme 1000BaseTX
- 14e4 1644 BCM5700 1000Base-T
- 1645 NetXtreme BCM5701 Gigabit Ethernet
- 0e11 007c NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
- 0e11 007d NC6770 Gigabit Server Adapter (PCI-X, 1000-SX)
- 0e11 0085 NC7780 Gigabit Server Adapter (embedded, WOL)
- 0e11 0099 NC7780 Gigabit Server Adapter (embedded, WOL)
- 0e11 009a NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
- 0e11 00c1 NC6770 Gigabit Server Adapter (PCI-X, 1000-SX)
- 1028 0121 Broadcom BCM5701 1000Base-T
- 10a9 8010 SGI IO9 Gigabit Ethernet (Copper)
- 10a9 8011 SGI Gigabit Ethernet (Copper)
- 10a9 8012 SGI Gigabit Ethernet (Fiber)
- 10b7 1004 3C996-SX 1000Base-SX
- 10b7 1006 3C996B-T 1000Base-T
- 10b7 1007 3C1000-T 1000Base-T
- 10b7 1008 3C940-BR01 1000Base-T
- 14e4 0001 BCM5701 1000Base-T
- 14e4 0005 BCM5701 1000Base-T
- 14e4 0006 BCM5701 1000Base-T
- 14e4 0007 BCM5701 1000Base-SX
- 14e4 0008 BCM5701 1000Base-T
- 14e4 8008 BCM5701 1000Base-T
- 1646 NetXtreme BCM5702 Gigabit Ethernet
- 0e11 00bb NC7760 1000BaseTX
- 1028 0126 Broadcom BCM5702 1000BaseTX
- 14e4 8009 BCM5702 1000BaseTX
- 1647 NetXtreme BCM5703 Gigabit Ethernet
- 0e11 0099 NC7780 1000BaseTX
- 0e11 009a NC7770 1000BaseTX
- 10a9 8010 SGI IO9 Gigabit Ethernet (Copper)
- 14e4 0009 BCM5703 1000BaseTX
- 14e4 000a BCM5703 1000BaseSX
- 14e4 000b BCM5703 1000BaseTX
- 14e4 8009 BCM5703 1000BaseTX
- 14e4 800a BCM5703 1000BaseTX
- 1648 NetXtreme BCM5704 Gigabit Ethernet
- 0e11 00cf NC7772 Gigabit Server Adapter (PCI-X, 10,100,1000-T)
- 0e11 00d0 NC7782 Gigabit Server Adapter (PCI-X, 10,100,1000-T)
- 0e11 00d1 NC7783 Gigabit Server Adapter (PCI-X, 10,100,1000-T)
- 10b7 2000 3C998-T Dual Port 10/100/1000 PCI-X
- 10b7 3000 3C999-T Quad Port 10/100/1000 PCI-X
- 1166 1648 NetXtreme CIOB-E 1000Base-T
- 1649 NetXtreme BCM5704S Gigabit Ethernet
- 164d NetXtreme BCM5702FE Gigabit Ethernet
- 1653 NetXtreme BCM5705 Gigabit Ethernet
- 1654 NetXtreme BCM5705 Gigabit Ethernet
- 165d NetXtreme BCM5705M Gigabit Ethernet
- 165e NetXtreme BCM5705M Gigabit Ethernet
- 166e NetXtreme BCM5705F Gigabit Ethernet
- 1696 NetXtreme BCM5782 Gigabit Ethernet
- 14e4 000d NetXtreme BCM5782 1000Base-T
- 169c NetXtreme BCM5788 Gigabit Ethernet
- 16a6 NetXtreme BCM5702 Gigabit Ethernet
- 0e11 00bb NC7760 Gigabit Server Adapter (PCI-X, 10/100/1000-T)
- 1028 0126 BCM5702 1000Base-T
- 14e4 000c BCM5702 1000Base-T
- 14e4 8009 BCM5702 1000Base-T
- 16a7 NetXtreme BCM5703 Gigabit Ethernet
- 0e11 00ca NC7771 Gigabit Server Adapter (PCI-X, 10,100,1000-T)
- 0e11 00cb NC7781 Gigabit Server Adapter (PCI-X, 10,100,1000-T)
- 14e4 0009 NetXtreme BCM5703 1000Base-T
- 14e4 000a NetXtreme BCM5703 1000Base-SX
- 14e4 000b NetXtreme BCM5703 1000Base-T
- 14e4 800a NetXtreme BCM5703 1000Base-T
- 16a8 NetXtreme BCM5704S Gigabit Ethernet
- 10b7 2001 3C998-SX Dual Port 1000-SX PCI-X
- 16c6 NetXtreme BCM5702 Gigabit Ethernet
- 10b7 1100 3C1000B-T 10/100/1000 PCI
- 14e4 000c BCM5702 1000Base-T
- 14e4 8009 BCM5702 1000Base-T
- 16c7 NetXtreme BCM5703 Gigabit Ethernet
- 14e4 0009 NetXtreme BCM5703 1000Base-T
- 14e4 000a NetXtreme BCM5703 1000Base-SX
- 170d NetXtreme BCM5901 Gigabit Ethernet
- 170e NetXtreme BCM5901 Gigabit Ethernet
- 4210 BCM4210 iLine10 HomePNA 2.0
- 4211 BCM4211 iLine10 HomePNA 2.0 + V.90 56k modem
- 4212 BCM4212 v.90 56k modem
- 4301 BCM4301 802.11b
- 4320 BCM94306 802.11g
- 4401 BCM4401 100Base-T
- 1043 80a8 A7V8X motherboard
- 4402 BCM4402 Integrated 10/100BaseT
- 4410 BCM4413 iLine32 HomePNA 2.0
- 4411 BCM4413 V.90 56k modem
- 4412 BCM4413 10/100BaseT
- 5820 BCM5820 Crypto Accelerator
- 5821 BCM5821 Crypto Accelerator
-14e5 Pixelfusion Ltd
-14e6 SHINING Technology Inc
-14e7 3CX
-14e8 RAYCER Inc
-14e9 GARNETS System CO Ltd
-14ea Planex Communications, Inc
- ab06 FNW-3603-TX CardBus Fast Ethernet
-14eb SEIKO EPSON Corp
-14ec ACQIRIS
-14ed DATAKINETICS Ltd
-14ee MASPRO KENKOH Corp
-14ef CARRY Computer ENG. CO Ltd
-14f0 CANON RESEACH CENTRE FRANCE
-14f1 Conexant
- 1002 HCF 56k Modem
- 1003 HCF 56k Modem
- 1004 HCF 56k Modem
- 1005 HCF 56k Modem
- 1006 HCF 56k Modem
- 1022 HCF 56k Modem
- 1023 HCF 56k Modem
- 1024 HCF 56k Modem
- 1025 HCF 56k Modem
- 1026 HCF 56k Modem
- 1032 HCF 56k Modem
- 1033 HCF 56k Data/Fax Modem
- 1033 8077 NEC
- 122d 4027 Dell Zeus - MDP3880-W(B) Data Fax Modem
- 122d 4030 Dell Mercury - MDP3880-U(B) Data Fax Modem
- 122d 4034 Dell Thor - MDP3880-W(U) Data Fax Modem
- 13e0 020d Dell Copper
- 13e0 020e Dell Silver
- 13e0 0261 IBM
- 13e0 0290 Compaq Goldwing
- 13e0 02a0 IBM
- 13e0 02b0 IBM
- 13e0 02c0 Compaq Scooter
- 13e0 02d0 IBM
- 144f 1500 IBM P85-DF (1)
- 144f 1501 IBM P85-DF (2)
- 144f 150a IBM P85-DF (3)
- 144f 150b IBM P85-DF Low Profile (1)
- 144f 1510 IBM P85-DF Low Profile (2)
- 1034 HCF 56k Data/Fax/Voice Modem
- 1035 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 10cf 1098 Fujitsu P85-DFSV
- 1036 HCF 56k Data/Fax/Voice/Spkp Modem
- 104d 8067 HCF 56k Modem
- 122d 4029 MDP3880SP-W
- 122d 4031 MDP3880SP-U
- 13e0 0209 Dell Titanium
- 13e0 020a Dell Graphite
- 13e0 0260 Gateway Red Owl
- 13e0 0270 Gateway White Horse
- 1052 HCF 56k Data/Fax Modem (Worldwide)
- 1053 HCF 56k Data/Fax Modem (Worldwide)
- 1054 HCF 56k Data/Fax/Voice Modem (Worldwide)
- 1055 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide)
- 1056 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
- 1057 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
- 1059 HCF 56k Data/Fax/Voice Modem (Worldwide)
- 1063 HCF 56k Data/Fax Modem
- 1064 HCF 56k Data/Fax/Voice Modem
- 1065 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 1066 HCF 56k Data/Fax/Voice/Spkp Modem
- 122d 4033 Dell Athena - MDP3900V-U
- 1433 HCF 56k Data/Fax Modem
- 1434 HCF 56k Data/Fax/Voice Modem
- 1435 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 1436 HCF 56k Data/Fax Modem
- 1453 HCF 56k Data/Fax Modem
- 13e0 0240 IBM
- 13e0 0250 IBM
- 144f 1502 IBM P95-DF (1)
- 144f 1503 IBM P95-DF (2)
- 1454 HCF 56k Data/Fax/Voice Modem
- 1455 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 1456 HCF 56k Data/Fax/Voice/Spkp Modem
- 122d 4035 Dell Europa - MDP3900V-W
- 122d 4302 Dell MP3930V-W(C) MiniPCI
- 1610 ADSL AccessRunner PCI Arbitration Device
- 1611 AccessRunner PCI ADSL Interface Device
- 1620 ADSL AccessRunner V2 PCI Arbitration Device
- 1621 AccessRunner V2 PCI ADSL Interface Device
- 1622 AccessRunner V2 PCI ADSL Yukon WAN Adapter
- 1803 HCF 56k Modem
- 0e11 0023 623-LAN Grizzly
- 0e11 0043 623-LAN Yogi
- 1815 HCF 56k Modem
- 0e11 0022 Grizzly
- 0e11 0042 Yogi
- 2003 HSF 56k Data/Fax Modem
- 2004 HSF 56k Data/Fax/Voice Modem
- 2005 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 2006 HSF 56k Data/Fax/Voice/Spkp Modem
- 2013 HSF 56k Data/Fax Modem
- 0e11 b195 Bear
- 0e11 b196 Seminole 1
- 0e11 b1be Seminole 2
- 1025 8013 Acer
- 1033 809d NEC
- 1033 80bc NEC
- 155d 6793 HP
- 155d 8850 E Machines
- 2014 HSF 56k Data/Fax/Voice Modem
- 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
- 2016 HSF 56k Data/Fax/Voice/Spkp Modem
- 2043 HSF 56k Data/Fax Modem (WorldW SmartDAA)
- 2044 HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA)
- 2045 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA)
- 2046 HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA)
- 2063 HSF 56k Data/Fax Modem (SmartDAA)
- 2064 HSF 56k Data/Fax/Voice Modem (SmartDAA)
- 2065 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA)
- 2066 HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA)
- 2093 HSF 56k Modem
- 155d 2f07 Legend
- 2143 HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA)
- 2144 HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA)
- 2145 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA)
- 2146 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA)
- 2163 HSF 56k Data/Fax/Cell Modem (Mob SmartDAA)
- 2164 HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA)
- 2165 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA)
- 2166 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA)
- 2343 HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA)
- 2344 HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA)
- 2345 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA)
- 2346 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA)
- 2363 HSF 56k Data/Fax CardBus Modem (Mob SmartDAA)
- 2364 HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA)
- 2365 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA)
- 2366 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA)
- 2443 HSF 56k Data/Fax Modem (Mob WorldW SmartDAA)
- 104d 8075 Modem
- 104d 8083 Modem
- 104d 8097 Modem
- 2444 HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA)
- 2445 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA)
- 2446 HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA)
- 2463 HSF 56k Data/Fax Modem (Mob SmartDAA)
- 2464 HSF 56k Data/Fax/Voice Modem (Mob SmartDAA)
- 2465 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA)
- 2466 HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA)
- 2f00 HSF 56k HSFi Modem
- 13e0 8d84 IBM HSFi V.90
- 13e0 8d85 Compaq Stinger
- 14f1 2004 Dynalink 56PMi
- 8234 RS8234 ATM SAR Controller [ServiceSAR Plus]
-14f2 MOBILITY Electronics
-14f3 BROADLOGIC
-14f4 TOKYO Electronic Industry CO Ltd
-14f5 SOPAC Ltd
-14f6 COYOTE Technologies LLC
-14f7 WOLF Technology Inc
-14f8 AUDIOCODES Inc
-14f9 AG COMMUNICATIONS
-14fa WANDEL & GOCHERMANN
-14fb TRANSAS MARINE (UK) Ltd
-14fc Quadrics Ltd
- 0000 QsNet Cluster Interconnect
- 0001 QsNetII Cluster Interconnect
-14fd JAPAN Computer Industry Inc
-14fe ARCHTEK TELECOM Corp
-14ff TWINHEAD INTERNATIONAL Corp
-1500 DELTA Electronics, Inc
-1501 BANKSOFT CANADA Ltd
-1502 MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd
-1503 KAWASAKI LSI USA Inc
-1504 KAISER Electronics
-1505 ITA INGENIEURBURO FUR TESTAUFGABEN GmbH
-1506 CHAMELEON Systems Inc
-# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057).
-1507 Motorola ?? / HTEC
- 0001 MPC105 [Eagle]
- 0002 MPC106 [Grackle]
- 0003 MPC8240 [Kahlua]
- 0100 MC145575 [HFC-PCI]
- 0431 KTI829c 100VG
- 4801 Raven
- 4802 Falcon
- 4803 Hawk
- 4806 CPX8216
-1508 HONDA CONNECTORS/MHOTRONICS Inc
-1509 FIRST INTERNATIONAL Computer Inc
-150a FORVUS RESEARCH Inc
-150b YAMASHITA Systems Corp
-150c KYOPAL CO Ltd
-150d WARPSPPED Inc
-150e C-PORT Corp
-150f INTEC GmbH
-1510 BEHAVIOR TECH Computer Corp
-1511 CENTILLIUM Technology Corp
-1512 ROSUN Technologies Inc
-1513 Raychem
-1514 TFL LAN Inc
-1515 Advent design
-1516 MYSON Technology Inc
- 0803 SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
- 1320 10bd SURECOM EP-320X-S 100/10M Ethernet PCI Adapter
-1517 ECHOTEK Corp
-1518 PEP MODULAR Computers GmbH
-1519 TELEFON AKTIEBOLAGET LM Ericsson
-151a Globetek
- 1002 PCI-1002
- 1004 PCI-1004
- 1008 PCI-1008
-151b COMBOX Ltd
-151c DIGITAL AUDIO LABS Inc
-151d Fujitsu Computer Products Of America
-151e MATRIX Corp
-151f TOPIC SEMICONDUCTOR Corp
- 0000 TP560 Data/Fax/Voice 56k modem
-1520 CHAPLET System Inc
-1521 BELL Corp
-1522 MainPine Ltd
- 0100 PCI <-> IOBus Bridge
- 1522 0200 RockForceDUO 2 Port V.92/V.44 Data/Fax/Voice Modem
- 1522 0300 RockForceQUATRO 4 Port V.92/V.44 Data/Fax/Voice Modem
- 1522 0400 RockForceDUO+ 2 Port V.92/V.44 Data/Fax/Voice Modem
- 1522 0500 RockForceQUATRO+ 4 Port V.92/V.44 Data/Fax/Voice Modem
- 1522 0600 RockForce+ 2 Port V.90 Data/Fax/Voice Modem
- 1522 0700 RockForce+ 4 Port V.90 Data/Fax/Voice Modem
- 1522 0800 RockForceOCTO+ 8 Port V.92/V.44 Data/Fax/Voice Modem
-1523 MUSIC Semiconductors
-1524 ENE Technology Inc
- 1211 CB1211 Cardbus Controller
- 1225 CB1225 Cardbus Controller
- 1410 CB1410 Cardbus Controller
- 1420 CB1420 Cardbus Controller
-1525 IMPACT Technologies
-1526 ISS, Inc
-1527 SOLECTRON
-1528 ACKSYS
-1529 AMERICAN MICROSystems Inc
-152a QUICKTURN DESIGN Systems
-152b FLYTECH Technology CO Ltd
-152c MACRAIGOR Systems LLC
-152d QUANTA Computer Inc
-152e MELEC Inc
-152f PHILIPS - CRYPTO
-1530 ACQIS Technology Inc
-1531 CHRYON Corp
-1532 ECHELON Corp
-1533 BALTIMORE
-1534 ROAD Corp
-1535 EVERGREEN Technologies Inc
-1537 DATALEX COMMUNCATIONS
-1538 ARALION Inc
-1539 ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A.
-153a ONO SOKKI
-153b TERRATEC Electronic GmbH
-153c ANTAL Electronic
-153d FILANET Corp
-153e TECHWELL Inc
-153f MIPS DENMARK
-1540 PROVIDEO MULTIMEDIA Co Ltd
-1541 MACHONE Communications
-1542 VIVID Technology Inc
-1543 SILICON Laboratories
- 3052 Intel 537 [Winmodem]
- 4c22 Si3036 MC'97 DAA
-1544 DCM DATA Systems
-1545 VISIONTEK
-1546 IOI Technology Corp
-1547 MITUTOYO Corp
-1548 JET PROPULSION Laboratory
-1549 INTERCONNECT Systems Solutions
-154a MAX Technologies Inc
-154b COMPUTEX Co Ltd
-154c VISUAL Technology Inc
-154d PAN INTERNATIONAL Industrial Corp
-154e SERVOTEST Ltd
-154f STRATABEAM Technology
-1550 OPEN NETWORK Co Ltd
-1551 SMART Electronic DEVELOPMENT GmBH
-1552 RACAL AIRTECH Ltd
-1553 CHICONY Electronics Co Ltd
-1554 PROLINK Microsystems Corp
-1555 GESYTEC GmBH
-1556 PLD APPLICATIONS
-1557 MEDIASTAR Co Ltd
-1558 CLEVO/KAPOK Computer
-1559 SI LOGIC Ltd
-155a INNOMEDIA Inc
-155b PROTAC INTERNATIONAL Corp
-155c Cemax-Icon Inc
-155d Mac System Co Ltd
-155e LP Elektronik GmbH
-155f Perle Systems Ltd
-1560 Terayon Communications Systems
-1561 Viewgraphics Inc
-1562 Symbol Technologies
-1563 A-Trend Technology Co Ltd
-1564 Yamakatsu Electronics Industry Co Ltd
-1565 Biostar Microtech Int'l Corp
-1566 Ardent Technologies Inc
-1567 Jungsoft
-1568 DDK Electronics Inc
-1569 Palit Microsystems Inc.
-156a Avtec Systems
-156b 2wire Inc
-156c Vidac Electronics GmbH
-156d Alpha-Top Corp
-156e Alfa Inc
-156f M-Systems Flash Disk Pioneers Ltd
-1570 Lecroy Corp
-1571 Contemporary Controls
- a001 CCSI PCI20-485 ARCnet
- a002 CCSI PCI20-485D ARCnet
- a003 CCSI PCI20-485X ARCnet
- a004 CCSI PCI20-CXB ARCnet
- a005 CCSI PCI20-CXS ARCnet
- a006 CCSI PCI20-FOG-SMA ARCnet
- a007 CCSI PCI20-FOG-ST ARCnet
- a008 CCSI PCI20-TB5 ARCnet
- a009 CCSI PCI20-5-485 5Mbit ARCnet
- a00a CCSI PCI20-5-485D 5Mbit ARCnet
- a00b CCSI PCI20-5-485X 5Mbit ARCnet
- a00c CCSI PCI20-5-FOG-ST 5Mbit ARCnet
- a00d CCSI PCI20-5-FOG-SMA 5Mbit ARCnet
- a201 CCSI PCI22-485 10Mbit ARCnet
- a202 CCSI PCI22-485D 10Mbit ARCnet
- a203 CCSI PCI22-485X 10Mbit ARCnet
- a204 CCSI PCI22-CHB 10Mbit ARCnet
- a205 CCSI PCI22-FOG_ST 10Mbit ARCnet
- a206 CCSI PCI22-THB 10Mbit ARCnet
-1572 Otis Elevator Company
-1573 Lattice - Vantis
-1574 Fairchild Semiconductor
-1575 Voltaire Advanced Data Security Ltd
-1576 Viewcast COM
-1578 HITT
-1579 Dual Technology Corp
-157a Japan Elecronics Ind Inc
-157b Star Multimedia Corp
-157c Eurosoft (UK)
- 8001 Fix2000 PCI Y2K Compliance Card
-157d Gemflex Networks
-157e Transition Networks
-157f PX Instruments Technology Ltd
-1580 Primex Aerospace Co
-1581 SEH Computertechnik GmbH
-1582 Cytec Corp
-1583 Inet Technologies Inc
-1584 Uniwill Computer Corp
-1585 Logitron
-1586 Lancast Inc
-1587 Konica Corp
-1588 Solidum Systems Corp
-1589 Atlantek Microsystems Pty Ltd
-158a Digalog Systems Inc
-158b Allied Data Technologies
-158c Hitachi Semiconductor & Devices Sales Co Ltd
-158d Point Multimedia Systems
-158e Lara Technology Inc
-158f Ditect Coop
-1590 3pardata Inc
-1591 ARN
-1592 Syba Tech Ltd
- 0781 Multi-IO Card
- 0782 Parallel Port Card 2xEPP
- 0783 Multi-IO Card
- 0785 Multi-IO Card
- 0786 Multi-IO Card
- 0787 Multi-IO Card
- 0788 Multi-IO Card
- 078a Multi-IO Card
-1593 Bops Inc
-1594 Netgame Ltd
-1595 Diva Systems Corp
-1596 Folsom Research Inc
-1597 Memec Design Services
-1598 Granite Microsystems
-1599 Delta Electronics Inc
-159a General Instrument
-159b Faraday Technology Corp
-159c Stratus Computer Systems
-159d Ningbo Harrison Electronics Co Ltd
-159e A-Max Technology Co Ltd
-159f Galea Network Security
-15a0 Compumaster SRL
-15a1 Geocast Network Systems
-15a2 Catalyst Enterprises Inc
- 0001 TA700 PCI Bus Analyzer/Exerciser
-15a3 Italtel
-15a4 X-Net OY
-15a5 Toyota Macs Inc
-15a6 Sunlight Ultrasound Technologies Ltd
-15a7 SSE Telecom Inc
-15a8 Shanghai Communications Technologies Center
-15aa Moreton Bay
-15ab Bluesteel Networks Inc
-15ac North Atlantic Instruments
-15ad VMWare Inc
- 0710 Virtual SVGA
-15ae Amersham Pharmacia Biotech
-15b0 Zoltrix International Ltd
-15b1 Source Technology Inc
-15b2 Mosaid Technologies Inc
-15b3 Mellanox Technology
- 5274 MT21108 InfiniBridge
-15b4 CCI/TRIAD
-15b5 Cimetrics Inc
-15b6 Texas Memory Systems Inc
-15b7 Sandisk Corp
-15b8 ADDI-DATA GmbH
-15b9 Maestro Digital Communications
-15ba Impacct Technology Corp
-15bb Portwell Inc
-15bc Agilent Technologies
- 2929 E2929A PCI/PCI-X Bus Analyzer
-15bd DFI Inc
-15be Sola Electronics
-15bf High Tech Computer Corp (HTC)
-15c0 BVM Ltd
-15c1 Quantel
-15c2 Newer Technology Inc
-15c3 Taiwan Mycomp Co Ltd
-15c4 EVSX Inc
-15c5 Procomp Informatics Ltd
-15c6 Technical University of Budapest
-15c7 Tateyama System Laboratory Co Ltd
- 0349 Tateyama C-PCI PLC/NC card Rev.01A
-15c8 Penta Media Co Ltd
-15c9 Serome Technology Inc
-15ca Bitboys OY
-15cb AG Electronics Ltd
-15cc Hotrail Inc
-15cd Dreamtech Co Ltd
-15ce Genrad Inc
-15cf Hilscher GmbH
-15d1 Infineon Technologies AG
-15d2 FIC (First International Computer Inc)
-15d3 NDS Technologies Israel Ltd
-15d4 Iwill Corp
-15d5 Tatung Co
-15d6 Entridia Corp
-15d7 Rockwell-Collins Inc
-15d8 Cybernetics Technology Co Ltd
-15d9 Super Micro Computer Inc
-15da Cyberfirm Inc
-15db Applied Computing Systems Inc
-15dc Litronic Inc
- 0001 Argus 300 PCI Cryptography Module
-15dd Sigmatel Inc
-15de Malleable Technologies Inc
-15df Infinilink Corp
-15e0 Cacheflow Inc
-15e1 Voice Technologies Group Inc
-15e2 Quicknet Technologies Inc
-15e3 Networth Technologies Inc
-15e4 VSN Systemen BV
-15e5 Valley technologies Inc
-15e6 Agere Inc
-15e7 Get Engineering Corp
-15e8 National Datacomm Corp
- 0130 Wireless PCI Card
-15e9 Pacific Digital Corp
- 1841 ADMA-100 DiscStaQ ATA Controller
-15ea Tokyo Denshi Sekei K.K.
-15eb Drsearch GmbH
-15ec Beckhoff GmbH
-15ed Macrolink Inc
-15ee In Win Development Inc
-15ef Intelligent Paradigm Inc
-15f0 B-Tree Systems Inc
-15f1 Times N Systems Inc
-15f2 Diagnostic Instruments Inc
-15f3 Digitmedia Corp
-15f4 Valuesoft
-15f5 Power Micro Research
-15f6 Extreme Packet Device Inc
-15f7 Banctec
-15f8 Koga Electronics Co
-15f9 Zenith Electronics Corp
-15fa J.P. Axzam Corp
-15fb Zilog Inc
-15fc Techsan Electronics Co Ltd
-15fd N-CUBED.NET
-15fe Kinpo Electronics Inc
-15ff Fastpoint Technologies Inc
-1600 Northrop Grumman - Canada Ltd
-1601 Tenta Technology
-1602 Prosys-tec Inc
-1603 Nokia Wireless Communications
-1604 Central System Research Co Ltd
-1605 Pairgain Technologies
-1606 Europop AG
-1607 Lava Semiconductor Manufacturing Inc
-1608 Automated Wagering International
-1609 Scimetric Instruments Inc
-1612 Telesynergy Research Inc.
-1619 FarSite Communications Ltd
- 0400 FarSync T2P (2 port X.21/V.35/V.24)
- 0440 FarSync T4P (4 port X.21/V.35/V.24)
-1629 Kongsberg Spacetec AS
- 1003 Format synchronizer v3.0
- 2002 Fast Universal Data Output
-1638 Standard Microsystems Corp [SMC]
- 1100 SMC2602W EZConnect / Addtron AWA-100
-163c Smart Link Ltd.
- 3052 SmartLink SmartPCI562 56K Modem
- 5449 SmartPCI561 Modem
-1657 Brocade Communications Systems, Inc.
-165a Epix Inc
- c100 PIXCI(R) CL1 Camera Link Video Capture Board [custom QL5232]
- d200 PIXCI(R) D2X Digital Video Capture Board [custom QL5232]
- d300 PIXCI(R) D3X Digital Video Capture Board [custom QL5232]
-165d Hsing Tech. Enterprise Co., Ltd.
-1661 Worldspace Corp.
-1668 Actiontec Electronics Inc
-1681 Hercules
-16ab Global Sun Technology Inc
- 1102 PCMCIA-to-PCI Wireless Network Bridge
-16be Creatix Polymedia GmbH
-16ca CENATEK Inc
- 0001 Rocket Drive DL
-16ec U.S. Robotics
- 3685 Wireless Access PCI Adapter Model 022415
-16f6 VideoTele.com, Inc.
-1705 Digital First, Inc.
-170b NetOctave Inc
-170c YottaYotta Inc.
-172a Accelerated Encryption
-1737 Linksys
- 1032 Gigabit Network Adapter
- 1737 0015 EG1032 v2 Instant Gigabit Network Adapter
- 1064 Gigabit Network Adapter
- 1737 0016 EG1064 v2 Instant Gigabit Network Adapter
-173b Altima (nee Broadcom)
- 03e8 AC1000 Gigabit Ethernet
- 03e9 AC1001 Gigabit Ethernet
- 03ea AC9100 Gigabit Ethernet
- 173b 0001 AC1002
- 03eb AC1003 Gigabit Ethernet
-1743 Peppercon AG
- 8139 ROL/F-100 Fast Ethernet Adapter with ROL
-174b PC Partner Limited
-175e Sanera Systems, Inc.
-1787 Hightech Information System Ltd.
-# also used by Struck Innovative Systeme for joint developments
-1796 Research Centre Juelich
- 0001 SIS1100 [Gigabit link]
- 0002 HOTlink
- 0003 Counter Timer
- 0004 CAMAC Controller
- 0005 PROFIBUS
- 0006 AMCC HOTlink
-1799 Belkin
-17af Hightech Information System Ltd.
-17cc NetChip Technology, Inc
- 2280 USB 2.0
-1813 Ambient Technologies Inc
- 4000 HaM controllerless modem
- 16be 0001 V9x HAM Data Fax Modem
- 4100 HaM plus Data Fax Modem
- 16be 0002 V9x HAM 1394
-1851 Microtune, Inc.
-1852 Anritsu Corp.
-1888 Varisys Ltd
- 0301 VMFX1 FPGA PMC module
- 0601 VSM2 dual PMC carrier
- 0710 VS14x series PowerPC PCI board
- 0720 VS24x series PowerPC PCI board
-1a08 Sierra semiconductor
- 0000 SC15064
-1b13 Jaton Corp
-1c1c Symphony
- 0001 82C101
-1d44 DPT
- a400 PM2x24/PM3224
-1de1 Tekram Technology Co.,Ltd.
- 0391 TRM-S1040
- 2020 DC-390
- 690c 690c
- dc29 DC290
-1fc0 Tumsan Oy
- 0300 E2200 Dual E1/Rawpipe Card
-2000 Smart Link Ltd.
-2001 Temporal Research Ltd
-2003 Smart Link Ltd.
-2004 Smart Link Ltd.
-21c3 21st Century Computer Corp.
-2348 Racore
- 2010 8142 100VG/AnyLAN
-2646 Kingston Technologies
-270b Xantel Corporation
-270f Chaintech Computer Co. Ltd
-2711 AVID Technology Inc.
-2a15 3D Vision(???)
-3000 Hansol Electronics Inc.
-3142 Post Impression Systems.
-3388 Hint Corp
- 0013 HiNT HC4 PCI to ISDN bridge, Multimedia audio controller
- 0014 HiNT HC4 PCI to ISDN bridge, Network controller
- 0021 HB1-SE33 PCI-PCI Bridge
- 101a E.Band [AudioTrak Inca88]
- 101b E.Band [AudioTrak Inca88]
- 8011 VXPro II Chipset
- 3388 8011 VXPro II Chipset CPU to PCI Bridge
- 8012 VXPro II Chipset
- 3388 8012 VXPro II Chipset PCI to ISA Bridge
- 8013 VXPro II IDE
- 3388 8013 VXPro II Chipset EIDE Controller
-3411 Quantum Designs (H.K.) Inc
-3513 ARCOM Control Systems Ltd
-38ef 4Links
-3d3d 3DLabs
- 0001 GLINT 300SX
- 0002 GLINT 500TX
- 0003 GLINT Delta
- 0004 Permedia
- 0005 Permedia
- 0006 GLINT MX
- 0007 3D Extreme
- 0008 GLINT Gamma G1
- 0009 Permedia II 2D+3D
- 1040 0011 AccelStar II
- 3d3d 0100 AccelStar II 3D Accelerator
- 3d3d 0111 Permedia 3:16
- 3d3d 0114 Santa Ana
- 3d3d 0116 Oxygen GVX1
- 3d3d 0119 Scirocco
- 3d3d 0120 Santa Ana PCL
- 3d3d 0125 Oxygen VX1
- 3d3d 0127 Permedia3 Create!
- 000a GLINT R3
- 3d3d 0121 Oxygen VX1
- 000c GLINT R3 [Oxygen VX1]
- 3d3d 0144 Oxygen VX1-4X AGP [Permedia 4]
- 0100 Permedia II 2D+3D
- 1004 Permedia
- 3d04 Permedia
- ffff Glint VGA
-4005 Avance Logic Inc.
- 0300 ALS300 PCI Audio Device
- 0308 ALS300+ PCI Audio Device
- 0309 PCI Input Controller
- 1064 ALG-2064
- 2064 ALG-2064i
- 2128 ALG-2364A GUI Accelerator
- 2301 ALG-2301
- 2302 ALG-2302
- 2303 AVG-2302 GUI Accelerator
- 2364 ALG-2364A
- 2464 ALG-2464
- 2501 ALG-2564A/25128A
- 4000 ALS4000 Audio Chipset
- 4005 4000 ALS4000 Audio Chipset
- 4710 ALC200/200P
-4033 Addtron Technology Co, Inc.
- 1360 RTL8139 Ethernet
-4143 Digital Equipment Corp
-416c Aladdin Knowledge Systems
- 0100 AladdinCARD
- 0200 CPC
-4444 Internext Compression Inc
- 0803 iTVC15 MPEG-2 Encoder
-4468 Bridgeport machines
-4594 Cogetec Informatique Inc
-45fb Baldor Electric Company
-4680 Umax Computer Corp
-4843 Hercules Computer Technology Inc
-4916 RedCreek Communications Inc
- 1960 RedCreek PCI adapter
-4943 Growth Networks
-4978 Axil Computer Inc
-4a14 NetVin
- 5000 NV5000SC
- 4a14 5000 RT8029-Based Ethernet Adapter
-4b10 Buslogic Inc.
-4c48 LUNG HWA Electronics
-4c53 SBS Technologies
-4ca1 Seanix Technology Inc
-4d51 MediaQ Inc.
- 0200 MQ-200
-4d54 Microtechnica Co Ltd
-4ddc ILC Data Device Corp
- 0100 DD-42924I5-300 (ARINC 429 Data Bus)
- 0801 BU-65570I1 MIL-STD-1553 Test and Simulation
- 0802 BU-65570I2 MIL-STD-1553 Test and Simulation
- 0811 BU-65572I1 MIL-STD-1553 Test and Simulation
- 0812 BU-65572I2 MIL-STD-1553 Test and Simulation
- 0881 BU-65570T1 MIL-STD-1553 Test and Simulation
- 0882 BU-65570T2 MIL-STD-1553 Test and Simulation
- 0891 BU-65572T1 MIL-STD-1553 Test and Simulation
- 0892 BU-65572T2 MIL-STD-1553 Test and Simulation
- 0901 BU-65565C1 MIL-STD-1553 Data Bus
- 0902 BU-65565C2 MIL-STD-1553 Data Bus
- 0903 BU-65565C3 MIL-STD-1553 Data Bus
- 0904 BU-65565C4 MIL-STD-1553 Data Bus
- 0b01 BU-65569I1 MIL-STD-1553 Data Bus
- 0b02 BU-65569I2 MIL-STD-1553 Data Bus
- 0b03 BU-65569I3 MIL-STD-1553 Data Bus
- 0b04 BU-65569I4 MIL-STD-1553 Data Bus
-5046 GemTek Technology Corporation
- 1001 PCI Radio
-5053 Voyetra Technologies
- 2010 Daytona Audio Adapter
-5136 S S Technologies
-5143 Qualcomm Inc
-5145 Ensoniq (Old)
- 3031 Concert AudioPCI
-5168 Animation Technologies Inc.
-5301 Alliance Semiconductor Corp.
- 0001 ProMotion aT3D
-5333 S3 Inc.
- 0551 Plato/PX (system)
- 5631 86c325 [ViRGE]
- 8800 86c866 [Vision 866]
- 8801 86c964 [Vision 964]
- 8810 86c764_0 [Trio 32 vers 0]
- 8811 86c764/765 [Trio32/64/64V+]
- 8812 86cM65 [Aurora64V+]
- 8813 86c764_3 [Trio 32/64 vers 3]
- 8814 86c767 [Trio 64UV+]
- 8815 86cM65 [Aurora 128]
- 883d 86c988 [ViRGE/VX]
- 8870 FireGL
- 8880 86c868 [Vision 868 VRAM] vers 0
- 8881 86c868 [Vision 868 VRAM] vers 1
- 8882 86c868 [Vision 868 VRAM] vers 2
- 8883 86c868 [Vision 868 VRAM] vers 3
- 88b0 86c928 [Vision 928 VRAM] vers 0
- 88b1 86c928 [Vision 928 VRAM] vers 1
- 88b2 86c928 [Vision 928 VRAM] vers 2
- 88b3 86c928 [Vision 928 VRAM] vers 3
- 88c0 86c864 [Vision 864 DRAM] vers 0
- 88c1 86c864 [Vision 864 DRAM] vers 1
- 88c2 86c864 [Vision 864-P DRAM] vers 2
- 88c3 86c864 [Vision 864-P DRAM] vers 3
- 88d0 86c964 [Vision 964 VRAM] vers 0
- 88d1 86c964 [Vision 964 VRAM] vers 1
- 88d2 86c964 [Vision 964-P VRAM] vers 2
- 88d3 86c964 [Vision 964-P VRAM] vers 3
- 88f0 86c968 [Vision 968 VRAM] rev 0
- 88f1 86c968 [Vision 968 VRAM] rev 1
- 88f2 86c968 [Vision 968 VRAM] rev 2
- 88f3 86c968 [Vision 968 VRAM] rev 3
- 8900 86c755 [Trio 64V2/DX]
- 5333 8900 86C775 Trio64V2/DX
- 8901 86c775/86c785 [Trio 64V2/DX or /GX]
- 5333 8901 86C775 Trio64V2/DX, 86C785 Trio64V2/GX
- 8902 Plato/PX
- 8903 Trio 3D business multimedia
- 8904 Trio 64 3D
- 1014 00db Integrated Trio3D
- 5333 8904 86C365 Trio3D AGP
- 8905 Trio 64V+ family
- 8906 Trio 64V+ family
- 8907 Trio 64V+ family
- 8908 Trio 64V+ family
- 8909 Trio 64V+ family
- 890a Trio 64V+ family
- 890b Trio 64V+ family
- 890c Trio 64V+ family
- 890d Trio 64V+ family
- 890e Trio 64V+ family
- 890f Trio 64V+ family
- 8a01 ViRGE/DX or /GX
- 0e11 b032 ViRGE/GX
- 10b4 1617 Nitro 3D
- 10b4 1717 Nitro 3D
- 5333 8a01 ViRGE/DX
- 8a10 ViRGE/GX2
- 1092 8a10 Stealth 3D 4000
- 8a13 86c368 [Trio 3D/2X]
- 5333 8a13 Trio3D/2X
- 8a20 86c794 [Savage 3D]
- 5333 8a20 86C391 Savage3D
- 8a21 86c390 [Savage 3D/MV]
- 5333 8a21 86C390 Savage3D/MV
- 8a22 Savage 4
- 1033 8068 Savage 4
- 1033 8069 Savage 4
- 105d 0018 SR9 8Mb SDRAM
- 105d 002a SR9 Pro 16Mb SDRAM
- 105d 003a SR9 Pro 32Mb SDRAM
- 105d 092f SR9 Pro+ 16Mb SGRAM
- 1092 4207 Stealth III S540
- 1092 4800 Stealth III S540
- 1092 4807 SpeedStar A90
- 1092 4808 Stealth III S540
- 1092 4809 Stealth III S540
- 1092 480e Stealth III S540
- 1092 4904 Stealth III S520
- 1092 4905 SpeedStar A200
- 1092 4a09 Stealth III S540
- 1092 4a0b Stealth III S540 Xtreme
- 1092 4a0f Stealth III S540
- 1092 4e01 Stealth III S540
- 1102 101d 3d Blaster Savage 4
- 1102 101e 3d Blaster Savage 4
- 5333 8100 86C394-397 Savage4 SDRAM 100
- 5333 8110 86C394-397 Savage4 SDRAM 110
- 5333 8125 86C394-397 Savage4 SDRAM 125
- 5333 8143 86C394-397 Savage4 SDRAM 143
- 5333 8a22 86C394-397 Savage4
- 5333 8a2e 86C394-397 Savage4 32bit
- 5333 9125 86C394-397 Savage4 SGRAM 125
- 5333 9143 86C394-397 Savage4 SGRAM 143
- 8a23 Savage 4
- 8a25 ProSavage PM133
- 8a26 ProSavage KM133
- 8c00 ViRGE/M3
- 8c01 ViRGE/MX
- 1179 0001 ViRGE/MX
- 8c02 ViRGE/MX+
- 8c03 ViRGE/MX+MV
- 8c10 86C270-294 Savage/MX-MV
- 8c11 82C270-294 Savage/MX
- 8c12 86C270-294 Savage/IX-MV
- 1014 017f ThinkPad T20
- 8c13 86C270-294 Savage/IX
- 1179 0001 Magnia Z310
- 8c22 SuperSavage MX/128
- 8c24 SuperSavage MX/64
- 8c26 SuperSavage MX/64C
- 8c2a SuperSavage IX/128 SDR
- 8c2b SuperSavage IX/128 DDR
- 8c2c SuperSavage IX/64 SDR
- 8c2d SuperSavage IX/64 DDR
- 8c2e SuperSavage IX/C SDR
- 1014 01fc ThinkPad T23 (2647-4MG)
- 8c2f SuperSavage IX/C DDR
- 8d01 86C380 [ProSavageDDR K4M266]
- 8d02 VT8636A [ProSavage KN133] AGP4X VGA Controller (TwisterK)
- 8d03 VT8751 [ProSavageDDR P4M266]
- 8d04 VT8375 [ProSavage8 KM266/KL266]
- 9102 86C410 Savage 2000
- 1092 5932 Viper II Z200
- 1092 5934 Viper II Z200
- 1092 5952 Viper II Z200
- 1092 5954 Viper II Z200
- 1092 5a35 Viper II Z200
- 1092 5a37 Viper II Z200
- 1092 5a55 Viper II Z200
- 1092 5a57 Viper II Z200
- ca00 SonicVibes
-544c Teralogic Inc
- 0350 TL880-based HDTV/ATSC tuner
-5455 Technische University Berlin
- 4458 S5933
-5519 Cnet Technologies, Inc.
-5544 Dunord Technologies
- 0001 I-30xx Scanner Interface
-5555 Genroco, Inc
- 0003 TURBOstor HFP-832 [HiPPI NIC]
-5654 VoiceTronix Pty Ltd
-5700 Netpower
-6356 UltraStor
-6374 c't Magazin für Computertechnik
- 6773 GPPCI
-6409 Logitec Corp.
-6666 Decision Computer International Co.
- 0001 PCCOM4
- 0002 PCCOM8
-7604 O.N. Electronic Co Ltd.
-7bde MIDAC Corporation
-7fed PowerTV
-8008 Quancom Electronic GmbH
- 0010 WDOG1 [PCI-Watchdog 1]
- 0011 PWDOG2 [PCI-Watchdog 2]
-8086 Intel Corp.
- 0007 82379AB
- 0008 Extended Express System Support Controller
- 0039 21145
- 0122 82437FX
- 0482 82375EB
- 0483 82424ZX [Saturn]
- 0484 82378IB [SIO ISA Bridge]
- 0486 82430ZX [Aries]
- 04a3 82434LX [Mercury/Neptune]
- 04d0 82437FX [Triton FX]
- 0600 RAID Controller
- 0960 80960RP [i960 RP Microprocessor/Bridge]
- 0962 80960RM [i960RM Bridge]
- 0964 80960RP [i960 RP Microprocessor/Bridge]
- 1000 82542 Gigabit Ethernet Controller
- 0e11 b0df NC1632 Gigabit Ethernet Adapter (1000-SX)
- 0e11 b0e0 NC1633 Gigabit Ethernet Adapter (1000-LX)
- 0e11 b123 NC1634 Gigabit Ethernet Adapter (1000-SX)
- 1014 0119 Netfinity Gigabit Ethernet SX Adapter
- 8086 1000 PRO/1000 Gigabit Server Adapter
- 1001 82543GC Gigabit Ethernet Controller (Fiber)
- 0e11 004a NC6136 Gigabit Server Adapter
- 1014 01ea Netfinity Gigabit Ethernet SX Adapter
- 8086 1003 PRO/1000 F Server Adapter
- 1002 Pro 100 LAN+Modem 56 Cardbus II
- 8086 200e Pro 100 LAN+Modem 56 Cardbus II
- 8086 2013 Pro 100 SR Mobile Combo Adapter
- 8086 2017 Pro 100 S Combo Mobile Adapter
- 1004 82543GC Gigabit Ethernet Controller (Copper)
- 0e11 0049 NC7132 Gigabit Upgrade Module
- 0e11 b1a4 NC7131 Gigabit Server Adapter
- 1014 10f2 Gigabit Ethernet Server Adapter
- 8086 1004 PRO/1000 T Server Adapter
- 8086 2004 PRO/1000 T Server Adapter
- 1008 82544EI Gigabit Ethernet Controller (Copper)
- 8086 1107 PRO/1000 XT Server Adapter
- 8086 2107 PRO/1000 XT Server Adapter
- 8086 2110 PRO/1000 XT Server Adapter
- 1009 82544EI Gigabit Ethernet Controller (Fiber)
- 8086 1109 PRO/1000 XF Server Adapter
- 8086 2109 PRO/1000 XF Server Adapter
- 100c 82544GC Gigabit Ethernet Controller (Copper)
- 8086 1112 PRO/1000 T Desktop Adapter
- 8086 2112 PRO/1000 T Desktop Adapter
- 100d 82544GC Gigabit Ethernet Controller (LOM)
- 100e 82540EM Gigabit Ethernet Controller
- 8086 001e PRO/1000 MT Desktop Adapter
- 8086 002e PRO/1000 MT Desktop Adapter
- 100f 82545EM Gigabit Ethernet Controller (Copper)
- 8086 1001 PRO/1000 MT Server Adapter
- 1010 82546EB Gigabit Ethernet Controller (Copper)
- 8086 1011 PRO/1000 MT Dual Port Server Adapter
- 1011 82545EM Gigabit Ethernet Controller (Fiber)
- 8086 1002 PRO/1000 MF Server Adapter
- 1012 82546EB Gigabit Ethernet Controller (Fiber)
- 8086 1012 PRO/1000 MF Dual Port Server Adapter
- 1015 82540EM Gigabit Ethernet Controller (LOM)
- 1029 82559 Ethernet Controller
- 1030 82559 InBusiness 10/100
- 1031 82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller
- 1014 0209 ThinkPad A/T/X Series
- 104d 80e7 Vaio PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 107b 5350 EtherExpress PRO/100 VE
- 1179 0001 EtherExpress PRO/100 VE
- 144d c000 EtherExpress PRO/100 VE
- 144d c001 EtherExpress PRO/100 VE
- 144d c003 EtherExpress PRO/100 VE
- 144d c006 vpr Matrix 170B4
- 1032 82801CAM (ICH3) PRO/100 VE Ethernet Controller
- 1033 82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller
- 1034 82801CAM (ICH3) PRO/100 VM Ethernet Controller
- 1035 82801CAM (ICH3)/82562EH (LOM) Ethernet Controller
- 1036 82801CAM (ICH3) 82562EH Ethernet Controller
- 1037 82801CAM (ICH3) Chipset Ethernet Controller
- 1038 82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller
- 1039 82801BD PRO/100 VE (LOM) Ethernet Controller
- 103a 82801BD PRO/100 VE (CNR) Ethernet Controller
- 103b 82801BD PRO/100 VM (LOM) Ethernet Controller
- 103c 82801BD PRO/100 VM (CNR) Ethernet Controller
- 103d 82801BD PRO/100 VE (MOB) Ethernet Controller
- 103e 82801BD PRO/100 VM (MOB) Ethernet Controller
- 1040 536EP Data Fax Modem
- 16be 1040 V.9X DSP Data Fax Modem
- 1043 PRO/Wireless LAN 2100 3B Mini PCI Adapter
- 1059 82551QM Ethernet Controller
- 1130 82815 815 Chipset Host Bridge and Memory Controller Hub
- 1025 1016 Travelmate 612 TX
- 1043 8027 TUSL2-C Mainboard
- 104d 80df Vaio PCG-FX403
- 8086 4532 D815EEA2 mainboard
- 8086 4557 D815EGEW Mainboard
- 1131 82815 815 Chipset AGP Bridge
- 1132 82815 CGC [Chipset Graphics Controller]
- 1025 1016 Travelmate 612 TX
- 104d 80df Vaio PCG-FX403
- 8086 4532 D815EEA2 Mainboard
- 8086 4557 D815EGEW Mainboard
- 1161 82806AA PCI64 Hub Advanced Programmable Interrupt Controller
- 8086 1161 82806AA PCI64 Hub APIC
- 1162 Xscale 80200 Big Endian Companion Chip
- 1200 Intel IXP1200 Network Processor
- 172a 0000 AEP SSL Accelerator
- 1209 82559ER
- 1221 82092AA_0
- 1222 82092AA_1
- 1223 SAA7116
- 1225 82452KX/GX [Orion]
- 1226 82596 PRO/10 PCI
- 1227 82865 EtherExpress PRO/100A
- 1228 82556 EtherExpress PRO/100 Smart
-# the revision field differentiates between them (1-3 is 82557, 4-5 is 82558, 6-8 is 82559, 9 is 82559ER)
- 1229 82557/8/9 [Ethernet Pro 100]
- 0e11 3001 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3002 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3003 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3004 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3005 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3006 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 3007 82559 Fast Ethernet LOM with Alert on LAN*
- 0e11 b01e NC3120 Fast Ethernet NIC
- 0e11 b01f NC3122 Fast Ethernet NIC (dual port)
- 0e11 b02f NC1120 Ethernet NIC
- 0e11 b04a Netelligent 10/100TX NIC with Wake on LAN
- 0e11 b0c6 NC3161 Fast Ethernet NIC (embedded, WOL)
- 0e11 b0c7 NC3160 Fast Ethernet NIC (embedded)
- 0e11 b0d7 NC3121 Fast Ethernet NIC (WOL)
- 0e11 b0dd NC3131 Fast Ethernet NIC (dual port)
- 0e11 b0de NC3132 Fast Ethernet Module (dual port)
- 0e11 b0e1 NC3133 Fast Ethernet Module (100-FX)
- 0e11 b134 NC3163 Fast Ethernet NIC (embedded, WOL)
- 0e11 b13c NC3162 Fast Ethernet NIC (embedded)
- 0e11 b144 NC3123 Fast Ethernet NIC (WOL)
- 0e11 b163 NC3134 Fast Ethernet NIC (dual port)
- 0e11 b164 NC3135 Fast Ethernet Upgrade Module (dual port)
- 0e11 b1a4 NC7131 Gigabit Server Adapter
- 1014 005c 82558B Ethernet Pro 10/100
- 1014 01bc 82559 Fast Ethernet LAN On Motherboard
- 1014 01f1 10/100 Ethernet Server Adapter
- 1014 01f2 10/100 Ethernet Server Adapter
- 1014 0207 Ethernet Pro/100 S
- 1014 0232 10/100 Dual Port Server Adapter
- 1014 023a ThinkPad R30
- 1014 105c Netfinity 10/100
- 1014 2205 ThinkPad A22p
- 1014 305c 10/100 EtherJet Management Adapter
- 1014 405c 10/100 EtherJet Adapter with Alert on LAN
- 1014 505c 10/100 EtherJet Secure Management Adapter
- 1014 605c 10/100 EtherJet Secure Management Adapter
- 1014 705c 10/100 Netfinity 10/100 Ethernet Security Adapter
- 1014 805c 10/100 Netfinity 10/100 Ethernet Security Adapter
- 1028 009b PowerEdge 2550
- 1033 8000 PC-9821X-B06
- 1033 8016 PK-UG-X006
- 1033 801f PK-UG-X006
- 1033 8026 PK-UG-X006
- 1033 8063 82559-based Fast Ethernet Adapter
- 1033 8064 82559-based Fast Ethernet Adapter
- 103c 10c0 NetServer 10/100TX
- 103c 10c3 NetServer 10/100TX
- 103c 10ca NetServer 10/100TX
- 103c 10cb NetServer 10/100TX
- 103c 10e3 NetServer 10/100TX
- 103c 10e4 NetServer 10/100TX
- 103c 1200 NetServer 10/100TX
- 10c3 1100 SmartEther100 SC1100
- 10cf 1115 8255x-based Ethernet Adapter (10/100)
- 10cf 1143 8255x-based Ethernet Adapter (10/100)
- 1179 0001 8255x-based Ethernet Adapter (10/100)
- 1179 0002 PCI FastEther LAN on Docker
- 1179 0003 8255x-based Fast Ethernet
- 1259 2560 AT-2560 100
- 1259 2561 AT-2560 100 FX Ethernet Adapter
- 1266 0001 NE10/100 Adapter
- 144d 2501 SEM-2000 MiniPCI LAN Adapter
- 144d 2502 SEM-2100IL MiniPCI LAN Adapter
- 1668 1100 EtherExpress PRO/100B (TX) (MiniPCI Ethernet+Modem)
- 8086 0001 EtherExpress PRO/100B (TX)
- 8086 0002 EtherExpress PRO/100B (T4)
- 8086 0003 EtherExpress PRO/10+
- 8086 0004 EtherExpress PRO/100 WfM
- 8086 0005 82557 10/100
- 8086 0006 82557 10/100 with Wake on LAN
- 8086 0007 82558 10/100 Adapter
- 8086 0008 82558 10/100 with Wake on LAN
- 8086 0009 EtherExpress PRO/100+
- 8086 000a EtherExpress PRO/100+ Management Adapter
- 8086 000b EtherExpress PRO/100+
- 8086 000c EtherExpress PRO/100+ Management Adapter
- 8086 000d EtherExpress PRO/100+ Alert On LAN II* Adapter
- 8086 000e EtherExpress PRO/100+ Management Adapter with Alert On LAN*
- 8086 000f EtherExpress PRO/100 Desktop Adapter
- 8086 0010 EtherExpress PRO/100 S Management Adapter
- 8086 0011 EtherExpress PRO/100 S Management Adapter
- 8086 0012 EtherExpress PRO/100 S Advanced Management Adapter (D)
- 8086 0013 EtherExpress PRO/100 S Advanced Management Adapter (E)
- 8086 0030 EtherExpress PRO/100 Management Adapter with Alert On LAN* GC
- 8086 0031 EtherExpress PRO/100 Desktop Adapter
- 8086 0040 EtherExpress PRO/100 S Desktop Adapter
- 8086 0041 EtherExpress PRO/100 S Desktop Adapter
- 8086 0042 EtherExpress PRO/100 Desktop Adapter
- 8086 0050 EtherExpress PRO/100 S Desktop Adapter
- 8086 1009 EtherExpress PRO/100+ Server Adapter
- 8086 100c EtherExpress PRO/100+ Server Adapter (PILA8470B)
- 8086 1012 EtherExpress PRO/100 S Server Adapter (D)
- 8086 1013 EtherExpress PRO/100 S Server Adapter (E)
- 8086 1015 EtherExpress PRO/100 S Dual Port Server Adapter
- 8086 1017 EtherExpress PRO/100+ Dual Port Server Adapter
- 8086 1030 EtherExpress PRO/100+ Management Adapter with Alert On LAN* G Server
- 8086 1040 EtherExpress PRO/100 S Server Adapter
- 8086 1041 EtherExpress PRO/100 S Server Adapter
- 8086 1042 EtherExpress PRO/100 Server Adapter
- 8086 1050 EtherExpress PRO/100 S Server Adapter
- 8086 1051 EtherExpress PRO/100 Server Adapter
- 8086 1052 EtherExpress PRO/100 Server Adapter
- 8086 10f0 EtherExpress PRO/100+ Dual Port Adapter
- 8086 2009 EtherExpress PRO/100 S Mobile Adapter
- 8086 200d EtherExpress PRO/100 Cardbus
- 8086 200e EtherExpress PRO/100 LAN+V90 Cardbus Modem
- 8086 200f EtherExpress PRO/100 SR Mobile Adapter
- 8086 2010 EtherExpress PRO/100 S Mobile Combo Adapter
- 8086 2013 EtherExpress PRO/100 SR Mobile Combo Adapter
- 8086 2016 EtherExpress PRO/100 S Mobile Adapter
- 8086 2017 EtherExpress PRO/100 S Combo Mobile Adapter
- 8086 2018 EtherExpress PRO/100 SR Mobile Adapter
- 8086 2019 EtherExpress PRO/100 SR Combo Mobile Adapter
- 8086 2101 EtherExpress PRO/100 P Mobile Adapter
- 8086 2102 EtherExpress PRO/100 SP Mobile Adapter
- 8086 2103 EtherExpress PRO/100 SP Mobile Adapter
- 8086 2104 EtherExpress PRO/100 SP Mobile Adapter
- 8086 2105 EtherExpress PRO/100 SP Mobile Adapter
- 8086 2106 EtherExpress PRO/100 P Mobile Adapter
- 8086 2107 EtherExpress PRO/100 Network Connection
- 8086 2108 EtherExpress PRO/100 Network Connection
- 8086 2200 EtherExpress PRO/100 P Mobile Combo Adapter
- 8086 2201 EtherExpress PRO/100 P Mobile Combo Adapter
- 8086 2202 EtherExpress PRO/100 SP Mobile Combo Adapter
- 8086 2203 EtherExpress PRO/100+ MiniPCI
- 8086 2204 EtherExpress PRO/100+ MiniPCI
- 8086 2205 EtherExpress PRO/100 SP Mobile Combo Adapter
- 8086 2206 EtherExpress PRO/100 SP Mobile Combo Adapter
- 8086 2207 EtherExpress PRO/100 SP Mobile Combo Adapter
- 8086 2208 EtherExpress PRO/100 P Mobile Combo Adapter
- 8086 2402 EtherExpress PRO/100+ MiniPCI
- 8086 2407 EtherExpress PRO/100+ MiniPCI
- 8086 2408 EtherExpress PRO/100+ MiniPCI
- 8086 2409 EtherExpress PRO/100+ MiniPCI
- 8086 240f EtherExpress PRO/100+ MiniPCI
- 8086 2410 EtherExpress PRO/100+ MiniPCI
- 8086 2411 EtherExpress PRO/100+ MiniPCI
- 8086 2412 EtherExpress PRO/100+ MiniPCI
- 8086 2413 EtherExpress PRO/100+ MiniPCI
- 8086 3000 82559 Fast Ethernet LAN on Motherboard
- 8086 3001 82559 Fast Ethernet LOM with Basic Alert on LAN*
- 8086 3002 82559 Fast Ethernet LOM with Alert on LAN II*
- 8086 3006 EtherExpress PRO/100 S Network Connection
- 8086 3007 EtherExpress PRO/100 S Network Connection
- 8086 3008 EtherExpress PRO/100 Network Connection
- 8086 3010 EtherExpress PRO/100 S Network Connection
- 8086 3011 EtherExpress PRO/100 S Network Connection
- 8086 3012 EtherExpress PRO/100 Network Connection
- 122d 430FX - 82437FX TSC [Triton I]
- 122e 82371FB PIIX ISA [Triton I]
- 1230 82371FB PIIX IDE [Triton I]
- 1231 DSVD Modem
- 1234 430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX)
- 1235 430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP)
- 1237 440FX - 82441FX PMC [Natoma]
- 1239 82371FB
- 123b 82380PB
- 123c 82380AB
- 123d 683053 Programmable Interrupt Device
- 123f 82466GX Integrated Hot-Plug Controller (IHPC)
- 1240 752 AGP
- 124b 82380FB
- 1250 430HX - 82439HX TXC [Triton II]
- 1360 82806AA PCI64 Hub PCI Bridge
- 1361 82806AA PCI64 Hub Controller (HRes)
- 8086 1361 82806AA PCI64 Hub Controller (HRes)
- 8086 8000 82806AA PCI64 Hub Controller (HRes)
- 1460 82870P2 P64H2 Hub PCI Bridge
- 1461 82870P2 P64H2 I/OxAPIC
- 15d9 3480 P4DP6
- 1462 82870P2 P64H2 Hot Plug Controller
- 1960 80960RP [i960RP Microprocessor]
- 101e 0431 MegaRAID 431 RAID Controller
- 101e 0438 MegaRAID 438 Ultra2 LVD RAID Controller
- 101e 0466 MegaRAID 466 Express Plus RAID Controller
- 101e 0467 MegaRAID 467 Enterprise 1500 RAID Controller
- 101e 0490 MegaRAID 490 Express 300 RAID Controller
- 101e 0762 MegaRAID 762 Express RAID Controller
- 101e 09a0 PowerEdge Expandable RAID Controller 2/SC
- 1028 0467 PowerEdge Expandable RAID Controller 2/DC
- 1028 1111 PowerEdge Expandable RAID Controller 2/SC
- 103c 03a2 MegaRAID
- 103c 10c6 MegaRAID 438, HP NetRAID-3Si
- 103c 10c7 MegaRAID T5, Integrated HP NetRAID
- 103c 10cc MegaRAID, Integrated HP NetRAID
- 103c 10cd HP NetRAID-1Si
- 105a 0000 SuperTrak
- 105a 2168 SuperTrak Pro
- 105a 5168 SuperTrak66/100
- 1111 1111 MegaRAID 466, PowerEdge Expandable RAID Controller 2/SC
- 1111 1112 PowerEdge Expandable RAID Controller 2/SC
- 113c 03a2 MegaRAID
- 1962 80960RM [i960RM Microprocessor]
- 105a 0000 SuperTrak SX6000 I2O CPU
- 1a21 82840 840 (Carmel) Chipset Host Bridge (Hub A)
- 1a23 82840 840 (Carmel) Chipset AGP Bridge
- 1a24 82840 840 (Carmel) Chipset PCI Bridge (Hub B)
- 1a30 82845 845 (Brookdale) Chipset Host Bridge
- 1a31 82845 845 (Brookdale) Chipset AGP Bridge
- 2410 82801AA ISA Bridge (LPC)
- 2411 82801AA IDE
- 2412 82801AA USB
- 2413 82801AA SMBus
- 2415 82801AA AC'97 Audio
- 1028 0095 Precision Workstation 220 Integrated Digital Audio
- 11d4 0040 SoundMAX Integrated Digital Audio
- 11d4 0048 SoundMAX Integrated Digital Audio
- 11d4 5340 SoundMAX Integrated Digital Audio
- 2416 82801AA AC'97 Modem
- 2418 82801AA PCI Bridge
- 2420 82801AB ISA Bridge (LPC)
- 2421 82801AB IDE
- 2422 82801AB USB
- 2423 82801AB SMBus
- 2425 82801AB AC'97 Audio
- 11d4 0040 SoundMAX Integrated Digital Audio
- 11d4 0048 SoundMAX Integrated Digital Audio
- 2426 82801AB AC'97 Modem
- 2428 82801AB PCI Bridge
- 2440 82801BA ISA Bridge (LPC)
- 2442 82801BA/BAM USB (Hub #1)
- 1014 01c6 Netvista A40/A40p
- 1025 1016 Travelmate 612 TX
- 104d 80df Vaio PCG-FX403
- 147b 0507 TH7II-RAID
- 8086 4532 D815EEA2 mainboard
- 8086 4557 D815EGEW Mainboard
- 2443 82801BA/BAM SMBus
- 1014 01c6 Netvista A40/A40p
- 1025 1016 Travelmate 612 TX
- 1043 8027 TUSL2-C Mainboard
- 104d 80df Vaio PCG-FX403
- 147b 0507 TH7II-RAID
- 8086 4532 D815EEA2 mainboard
- 8086 4557 D815EGEW Mainboard
- 2444 82801BA/BAM USB (Hub #2)
- 1025 1016 Travelmate 612 TX
- 104d 80df Vaio PCG-FX403
- 147b 0507 TH7II-RAID
- 8086 4532 D815EEA2 mainboard
- 2445 82801BA/BAM AC'97 Audio
- 1014 01c6 Netvista A40/A40p
- 1025 1016 Travelmate 612 TX
- 104d 80df Vaio PCG-FX403
- 1462 3370 STAC9721 AC
- 147b 0507 TH7II-RAID
- 8086 4557 D815EGEW Mainboard
- 2446 Intel 537 [82801BA/BAM AC'97 Modem]
- 1025 1016 Travelmate 612 TX
- 104d 80df Vaio PCG-FX403
- 2448 82801BAM/CAM PCI Bridge
- 2449 82801BA/BAM/CA/CAM Ethernet Controller
- 0e11 0012 EtherExpress PRO/100 VM
- 0e11 0091 EtherExpress PRO/100 VE
- 1014 01ce EtherExpress PRO/100 VE
- 1014 01dc EtherExpress PRO/100 VE
- 1014 01eb EtherExpress PRO/100 VE
- 1014 01ec EtherExpress PRO/100 VE
- 1014 0202 EtherExpress PRO/100 VE
- 1014 0205 EtherExpress PRO/100 VE
- 1014 0217 EtherExpress PRO/100 VE
- 1014 0234 EtherExpress PRO/100 VE
- 1014 023d EtherExpress PRO/100 VE
- 1014 0244 EtherExpress PRO/100 VE
- 1014 0245 EtherExpress PRO/100 VE
- 1014 0265 PRO/100 VE Desktop Connection
- 1014 0267 PRO/100 VE Desktop Connection
- 1014 026a PRO/100 VE Desktop Connection
- 109f 315d EtherExpress PRO/100 VE
- 109f 3181 EtherExpress PRO/100 VE
- 1179 ff01 PRO/100 VE Network Connection
- 1186 7801 EtherExpress PRO/100 VE
- 144d 2602 HomePNA 1M CNR
- 8086 3010 EtherExpress PRO/100 VE
- 8086 3011 EtherExpress PRO/100 VM
- 8086 3012 82562EH based Phoneline
- 8086 3013 EtherExpress PRO/100 VE
- 8086 3014 EtherExpress PRO/100 VM
- 8086 3015 82562EH based Phoneline
- 8086 3016 EtherExpress PRO/100 P Mobile Combo
- 8086 3017 EtherExpress PRO/100 P Mobile
- 8086 3018 EtherExpress PRO/100
- 244a 82801BAM IDE U100
- 1025 1016 Travelmate 612TX
- 104d 80df Vaio PCG-FX403
- 244b 82801BA IDE U100
- 1014 01c6 Netvista A40/A40p
- 1043 8027 TUSL2-C Mainboard
- 147b 0507 TH7II-RAID
- 8086 4532 D815EEA2 mainboard
- 8086 4557 D815EGEW Mainboard
- 244c 82801BAM ISA Bridge (LPC)
- 244e 82801BA/CA/DB/EB PCI Bridge
- 2450 82801E ISA Bridge (LPC)
- 2452 82801E USB
- 2453 82801E SMBus
- 2459 82801E Ethernet Controller 0
- 245b 82801E IDE U100
- 245d 82801E Ethernet Controller 1
- 245e 82801E PCI Bridge
- 2480 82801CA LPC Interface Controller
- 2482 82801CA/CAM USB (Hub #1)
- 1014 0220 ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 15d9 3480 P4DP6
- 8086 1958 vpr Matrix 170B4
- 2483 82801CA/CAM SMBus Controller
- 1014 0220 ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 15d9 3480 P4DP6
- 8086 1958 vpr Matrix 170B4
- 2484 82801CA/CAM USB (Hub #2)
- 1014 0220 ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 15d9 3480 P4DP6
- 8086 1958 vpr Matrix 170B4
- 2485 82801CA/CAM AC'97 Audio Controller
- 1014 0222 ThinkPad T23 (2647-4MG) or A30p (2653-64G)
- 1014 0508 ThinkPad T30
- 1014 051c ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 144d c006 vpr Matrix 170B4
- 2486 82801CA/CAM AC'97 Modem Controller
- 1014 0223 ThinkPad A/T/X Series
- 1014 0503 ThinkPad R31 2656BBG
- 1014 051a ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 1179 0001 Toshiba Satellite 1110 Z15 internal Modem
- 134d 4c21 Dell Inspiron 2100 internal modem
- 144d 2115 vpr Matrix 170B4 internal modem
- 14f1 5421 MD56ORD V.92 MDC Modem
- 2487 82801CA/CAM USB (Hub #3)
- 1014 0220 ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 15d9 3480 P4DP6
- 8086 1958 vpr Matrix 170B4
- 248a 82801CAM IDE U100
- 1014 0220 ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 8086 1958 vpr Matrix 170B4
- 248b 82801CA Ultra ATA Storage Controller
- 15d9 3480 P4DP6
- 248c 82801CAM ISA Bridge (LPC)
- 24c0 82801DB LPC Interface Controller
- 1462 5800 845PE Max (MS-6580)
- 24c2 82801DB USB (Hub #1)
- 1462 5800 845PE Max (MS-6580)
- 24c3 82801DB/DBM SMBus Controller
- 1462 5800 845PE Max (MS-6580)
- 24c4 82801DB USB (Hub #2)
- 1462 5800 845PE Max (MS-6580)
- 24c5 82801DB AC'97 Audio Controller
- 1462 5800 845PE Max (MS-6580)
- 24c6 82801DB AC'97 Modem Controller
- 24c7 82801DB USB (Hub #3)
- 1462 5800 845PE Max (MS-6580)
- 24ca 82801DBM Ultra ATA Storage Controller
- 24cb 82801DB Ultra ATA Storage Controller
- 1462 5800 845PE Max (MS-6580)
- 24cc 82801DBM LPC Interface Controller
- 24cd 82801DB USB2
- 1462 3981 845PE Max (MS-6580) Onboard USB EHCI Controller
- 24d0 82801EB LPC Interface Controller
- 24d1 82801EB Ultra ATA Storage Controller
- 24d2 82801EB USB
- 24d3 82801EB SMBus Controller
- 24d4 82801EB USB
- 24d5 82801EB AC'97 Audio Controller
- 24d6 82801EB AC'97 Modem Controller
- 24d7 82801EB USB
- 24db 82801EB Ultra ATA Storage Controller
- 24dc 82801EB LPC Interface Controller
- 24dd 82801EB USB2
- 24de 82801EB USB
- 2500 82820 820 (Camino) Chipset Host Bridge (MCH)
- 1028 0095 Precision Workstation 220 Chipset
- 1043 801c P3C-2000 system chipset
- 2501 82820 820 (Camino) Chipset Host Bridge (MCH)
- 1043 801c P3C-2000 system chipset
- 250b 82820 820 (Camino) Chipset Host Bridge
- 250f 82820 820 (Camino) Chipset AGP Bridge
- 2520 82805AA MTH Memory Translator Hub
- 2521 82804AA MRH-S Memory Repeater Hub for SDRAM
- 2530 82850 850 (Tehama) Chipset Host Bridge (MCH)
- 147b 0507 TH7II-RAID
- 2531 82860 860 (Wombat) Chipset Host Bridge (MCH)
- 2532 82850 850 (Tehama) Chipset AGP Bridge
- 2533 82860 860 (Wombat) Chipset AGP Bridge
- 2534 82860 860 (Wombat) Chipset PCI Bridge
- 2540 E7500 Memory Controller Hub
- 15d9 3480 P4DP6
- 2541 E7000 Series Host RASUM Controller
- 15d9 3480 P4DP6
- 2543 E7000 Series Hub Interface B PCI-to-PCI Bridge
- 2544 E7000 Series Hub Interface B RASUM Controller
- 2545 E7000 Series Hub Interface C PCI-to-PCI Bridge
- 2546 E7000 Series Hub Interface C RASUM Controller
- 2547 E7000 Series Hub Interface D PCI-to-PCI Bridge
- 2548 E7000 Series Hub Interface D RASUM Controller
- 254c E7501 Memory Controller Hub
- 2550 E7505 Memory Controller Hub
- 2551 E7000 Series RAS Controller
- 2552 E7000 Series Processor to AGP Controller
- 2553 E7000 Series Hub Interface B PCI-to-PCI Bridge
- 2554 E7000 Series Hub Interface B PCI-to-PCI Bridge RAS Controller
- 255d E7205 Memory Controller Hub
- 2560 82845G/GL [Brookdale-G] Chipset Host Bridge
- 1462 5800 845PE Max (MS-6580)
- 2561 82845G/GL [Brookdale-G] Chipset AGP Bridge
- 2562 82845G/GL [Brookdale-G] Chipset Integrated Graphics Device
- 2570 82865G/PE/P Processor to I/O Controller
- 2571 82865G/PE/P Processor to AGP Controller
- 2572 82865G Integrated Graphics Device
- 2573 82865G/PE/P Processor to PCI to CSA Bridge
- 2576 82864G/PE/P Processor to I/O Memory Interface
- 2578 82875P Memory Controller Hub
- 2579 82875P Processor to AGP Controller
- 257b 82875P Processor to PCI to CSA Bridge
- 257e 82875P Processor to I/O Memory Interface
- 3092 Integrated RAID
- 3340 82855PM Processor to I/O Controller
- 3341 82855PM Processor to AGP Controller
- 3575 82830 830 Chipset Host Bridge
- 1014 021d ThinkPad A/T/X Series
- 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP
- 3576 82830 830 Chipset AGP Bridge
- 3577 82830 CGC [Chipset Graphics Controller]
- 1014 0513 ThinkPad A/T/X Series
- 3578 82830 830 Chipset Host Bridge
- 3580 82852/855GM Host Bridge
- 3582 82852/855GM Integrated Graphics Device
- 5200 EtherExpress PRO/100 Intelligent Server
- 5201 EtherExpress PRO/100 Intelligent Server
- 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter
- 530d 80310 IOP [IO Processor]
- 7000 82371SB PIIX3 ISA [Natoma/Triton II]
- 7010 82371SB PIIX3 IDE [Natoma/Triton II]
- 7020 82371SB PIIX3 USB [Natoma/Triton II]
- 7030 430VX - 82437VX TVX [Triton VX]
- 7100 430TX - 82439TX MTXC
- 7110 82371AB/EB/MB PIIX4 ISA
- 7111 82371AB/EB/MB PIIX4 IDE
- 7112 82371AB/EB/MB PIIX4 USB
- 7113 82371AB/EB/MB PIIX4 ACPI
- 7120 82810 GMCH [Graphics Memory Controller Hub]
- 7121 82810 CGC [Chipset Graphics Controller]
- 8086 4341 Cayman (CA810) Mainboard
- 7122 82810 DC-100 GMCH [Graphics Memory Controller Hub]
- 7123 82810 DC-100 CGC [Chipset Graphics Controller]
- 7124 82810E DC-133 GMCH [Graphics Memory Controller Hub]
- 7125 82810E DC-133 CGC [Chipset Graphics Controller]
- 7126 82810 DC-133 System and Graphics Controller
- 7128 82810-M DC-100 System and Graphics Controller
- 712a 82810-M DC-133 System and Graphics Controller
- 7180 440LX/EX - 82443LX/EX Host bridge
- 7181 440LX/EX - 82443LX/EX AGP bridge
- 7190 440BX/ZX/DX - 82443BX/ZX/DX Host bridge
- 0e11 0500 Armada 1750 Laptop System Chipset
- 0e11 b110 Armada M700
- 1179 0001 Toshiba Tecra 8100 Laptop System Chipset
- 7191 440BX/ZX/DX - 82443BX/ZX/DX AGP bridge
- 7192 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (AGP disabled)
- 0e11 0460 Armada 1700 Laptop System Chipset
- 7194 82440MX Host Bridge
- 7195 82440MX AC'97 Audio Controller
- 10cf 1099 QSound_SigmaTel Stac97 PCI Audio
- 11d4 0040 SoundMAX Integrated Digital Audio
- 11d4 0048 SoundMAX Integrated Digital Audio
- 7196 82440MX AC'97 Modem Controller
- 7198 82440MX ISA Bridge
- 7199 82440MX EIDE Controller
- 719a 82440MX USB Universal Host Controller
- 719b 82440MX Power Management Controller
- 71a0 440GX - 82443GX Host bridge
- 71a1 440GX - 82443GX AGP bridge
- 71a2 440GX - 82443GX Host bridge (AGP disabled)
- 7600 82372FB PIIX5 ISA
- 7601 82372FB PIIX5 IDE
- 7602 82372FB PIIX5 USB
- 7603 82372FB PIIX5 SMBus
- 7800 i740
- 003d 0008 Starfighter AGP
- 003d 000b Starfighter AGP
- 1092 0100 Stealth II G460
- 10b4 201a Lightspeed 740
- 10b4 202f Lightspeed 740
- 8086 0000 Terminator 2x/i
- 8086 0100 Intel740 Graphics Accelerator
- 84c4 450KX/GX [Orion] - 82454KX/GX PCI bridge
- 84c5 450KX/GX [Orion] - 82453KX/GX Memory controller
- 84ca 450NX - 82451NX Memory & I/O Controller
- 84cb 450NX - 82454NX/84460GX PCI Expander Bridge
- 84e0 460GX - 84460GX System Address Controller (SAC)
- 84e1 460GX - 84460GX System Data Controller (SDC)
- 84e2 460GX - 84460GX AGP Bridge (GXB function 2)
- 84e3 460GX - 84460GX Memory Address Controller (MAC)
- 84e4 460GX - 84460GX Memory Data Controller (MDC)
- 84e6 460GX - 82466GX Wide and fast PCI eXpander Bridge (WXB)
- 84ea 460GX - 84460GX AGP Bridge (GXB function 1)
- 9621 Integrated RAID
- 9622 Integrated RAID
- 9641 Integrated RAID
- 96a1 Integrated RAID
- b152 21152 PCI-to-PCI Bridge
-# observed, and documented in Intel revision note; new mask of 1011:0026
- b154 21154 PCI-to-PCI Bridge
- b555 21555 Non transparent PCI-to-PCI Bridge
- e4bf 1000 CC8-1-BLUES
- ffff 450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
-8800 Trigem Computer Inc.
- 2008 Video assistent component
-8866 T-Square Design Inc.
-8888 Silicon Magic
-8e0e Computone Corporation
-8e2e KTI
- 3000 ET32P2
-9004 Adaptec
- 1078 AIC-7810
- 1160 AIC-1160 [Family Fibre Channel Adapter]
- 2178 AIC-7821
- 3860 AHA-2930CU
- 3b78 AHA-4844W/4844UW
- 5075 AIC-755x
- 5078 AHA-7850
- 9004 7850 AHA-2904/Integrated AIC-7850
- 5175 AIC-755x
- 5178 AIC-7851
- 5275 AIC-755x
- 5278 AIC-7852
- 5375 AIC-755x
- 5378 AIC-7850
- 5475 AIC-755x
- 5478 AIC-7850
- 5575 AVA-2930
- 5578 AIC-7855
- 5647 ANA-7711 TCP Offload Engine
- 5675 AIC-755x
- 5678 AIC-7856
- 5775 AIC-755x
- 5778 AIC-7850
- 5800 AIC-5800
- 5900 ANA-5910/5930/5940 ATM155 & 25 LAN Adapter
- 5905 ANA-5910A/5930A/5940A ATM Adapter
- 6038 AIC-3860
- 6075 AIC-1480 / APA-1480
- 9004 7560 AIC-1480 / APA-1480 Cardbus
- 6078 AIC-7860
- 6178 AIC-7861
- 9004 7861 AHA-2940AU Single
- 6278 AIC-7860
- 6378 AIC-7860
- 6478 AIC-786x
- 6578 AIC-786x
- 6678 AIC-786x
- 6778 AIC-786x
- 6915 ANA620xx/ANA69011A
- 9004 0008 ANA69011A/TX 10/100
- 9004 0009 ANA69011A/TX 10/100
- 9004 0010 ANA62022 2-port 10/100
- 9004 0018 ANA62044 4-port 10/100
- 9004 0019 ANA62044 4-port 10/100
- 9004 0020 ANA62022 2-port 10/100
- 9004 0028 ANA69011A/TX 10/100
- 9004 8008 ANA69011A/TX 64 bit 10/100
- 9004 8009 ANA69011A/TX 64 bit 10/100
- 9004 8010 ANA62022 2-port 64 bit 10/100
- 9004 8018 ANA62044 4-port 64 bit 10/100
- 9004 8019 ANA62044 4-port 64 bit 10/100
- 9004 8020 ANA62022 2-port 64 bit 10/100
- 9004 8028 ANA69011A/TX 64 bit 10/100
- 7078 AHA-294x / AIC-7870
- 7178 AHA-2940/2940W / AIC-7871
- 7278 AHA-3940/3940W / AIC-7872
- 7378 AHA-3985 / AIC-7873
- 7478 AHA-2944/2944W / AIC-7874
- 7578 AHA-3944/3944W / AIC-7875
- 7678 AHA-4944W/UW / AIC-7876
- 7710 ANA-7711F Network Accelerator Card (NAC) - Optical
- 7711 ANA-7711C Network Accelerator Card (NAC) - Copper
- 7778 AIC-787x
- 7810 AIC-7810
- 7815 AIC-7815 RAID+Memory Controller IC
- 9004 7815 ARO-1130U2 RAID Controller
- 9004 7840 AIC-7815 RAID+Memory Controller IC
- 7850 AIC-7850
- 7855 AHA-2930
- 7860 AIC-7860
- 7870 AIC-7870
- 7871 AHA-2940
- 7872 AHA-3940
- 7873 AHA-3980
- 7874 AHA-2944
- 7880 AIC-7880P
- 7890 AIC-7890
- 7891 AIC-789x
- 7892 AIC-789x
- 7893 AIC-789x
- 7894 AIC-789x
- 7895 AHA-2940U/UW / AHA-39xx / AIC-7895
- 9004 7890 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
- 9004 7891 AHA-2940U/2940UW Dual
- 9004 7892 AHA-3940AU/AUW/AUWD/UWD
- 9004 7894 AHA-3944AUWD
- 9004 7895 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
- 9004 7896 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
- 9004 7897 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
- 7896 AIC-789x
- 7897 AIC-789x
- 8078 AIC-7880U
- 9004 7880 AIC-7880P Ultra/Ultra Wide SCSI Chipset
- 8178 AHA-2940U/UW/D / AIC-7881U
- 9004 7881 AHA-2940UW SCSI Host Adapter
- 8278 AHA-3940U/UW/UWD / AIC-7882U
- 8378 AHA-3940U/UW / AIC-7883U
- 8478 AHA-2944UW / AIC-7884U
- 8578 AHA-3944U/UWD / AIC-7885
- 8678 AHA-4944UW / AIC-7886
- 8778 AHA-2940UW Pro / AIC-788x
- 9004 7887 2940UW Pro Ultra-Wide SCSI Controller
- 8878 AHA-2930UW / AIC-7888
- 9004 7888 AHA-2930UW SCSI Controller
- 8b78 ABA-1030
- ec78 AHA-4944W/UW
-9005 Adaptec
- 0010 AHA-2940U2/U2W
- 9005 2180 AHA-2940U2 SCSI Controller
- 9005 8100 AHA-2940U2B SCSI Controller
- 9005 a180 AHA-2940U2W SCSI Controller
- 9005 e100 AHA-2950U2B SCSI Controller
- 0011 AHA-2930U2
- 0013 78902
- 9005 0003 AAA-131U2 Array1000 1 Channel RAID Controller
- 001f AHA-2940U2/U2W / 7890/7891
- 9005 000f 2940U2W SCSI Controller
- 9005 a180 2940U2W SCSI Controller
- 0020 AIC-7890
- 002f AIC-7890
- 0030 AIC-7890
- 003f AIC-7890
- 0050 AHA-3940U2x/395U2x
- 9005 f500 AHA-3950U2B
- 0051 AHA-3950U2D
- 9005 b500 AHA-3950U2D
- 0053 AIC-7896 SCSI Controller
- 9005 ffff AIC-7896 SCSI Controller mainboard implementation
- 005f AIC-7896U2/7897U2
- 0080 AIC-7892A U160/m
- 0e11 e2a0 Compaq 64-Bit/66MHz Wide Ultra3 SCSI Adapter
- 9005 62a0 29160N Ultra160 SCSI Controller
- 9005 e220 29160LP Low Profile Ultra160 SCSI Controller
- 9005 e2a0 29160 Ultra160 SCSI Controller
- 0081 AIC-7892B U160/m
- 9005 62a1 19160 Ultra160 SCSI Controller
- 0083 AIC-7892D U160/m
- 008f AIC-7892P U160/m
- 1179 0001 Magnia Z310
- 15d9 9005 Onboard SCSI Host Adapter
- 00c0 AHA-3960D / AIC-7899A U160/m
- 0e11 f620 Compaq 64-Bit/66MHz Dual Channel Wide Ultra3 SCSI Adapter
- 9005 f620 AHA-3960D U160/m
- 00c1 AIC-7899B U160/m
- 00c3 AIC-7899D U160/m
- 00c5 RAID subsystem HBA
- 1028 00c5 PowerEdge 2550
- 00cf AIC-7899P U160/m
- 1028 00d1 PowerEdge 2550
- 10f1 2462 Thunder K7 S2462
- 15d9 9005 Onboard SCSI Host Adapter
- 0250 ServeRAID Controller
- 1014 0279 ServeRAID-xx
- 1014 028c ServeRAID-xx
- 0285 AAC-RAID
- 1028 0287 PowerEdge Expandable RAID Controller 320/DC
- 8000 ASC-29320A U320
- 800f AIC-7901 U320
- 8010 ASC-39320 U320
- 8011 ASC-32320D U320
- 0e11 00ac U320
- 9005 0041 ASC-39320D U320
- 8012 ASC-29320 U320
- 8013 ASC-29320B U320
- 8014 ASC-29320LP U320
- 801e AIC-7901A U320
- 801f AIC-7902 U320
- 8080 ASC-29320A U320 w/HostRAID
- 808f AIC-7901 U320 w/HostRAID
- 8090 ASC-39320 U320 w/HostRAID
- 8091 ASC-39320D U320 w/HostRAID
- 8092 ASC-29320 U320 w/HostRAID
- 8093 ASC-29320B U320 w/HostRAID
- 8094 ASC-29320LP U320 w/HostRAID
- 8095 ASC-39320(B) U320 w/HostRAID
- 8096 ASC-39320A U320 w/HostRAID
- 8097 ASC-29320ALP U320 w/HostRAID
- 809c ASC-39320D(B) U320 w/HostRAID
- 809d AIC-7902(B) U320 w/HostRAID
- 809e AIC-7901A U320 w/HostRAID
- 809f AIC-7902 U320 w/HostRAID
-907f Atronics
- 2015 IDE-2015PL
-919a Gigapixel Corp
-9412 Holtek
- 6565 6565
-9699 Omni Media Technology Inc
- 6565 6565
-9710 NetMos Technology
- 9815 VScom 021H-EP2 2 port parallel adaptor
- 9835 222N-2 I/O Card (2S+1P)
-a0a0 AOPEN Inc.
-a0f1 UNISYS Corporation
-a200 NEC Corporation
-a259 Hewlett Packard
-a25b Hewlett Packard GmbH PL24-MKT
-a304 Sony
-a727 3Com Corporation
-aa42 Scitex Digital Video
-ac1e Digital Receiver Technology Inc
-b1b3 Shiva Europe Limited
-# Pinnacle should be 11bd, but they got it wrong several times --mj
-bd11 Pinnacle Systems, Inc. (Wrong ID)
-c001 TSI Telsys
-c0a9 Micron/Crucial Technology
-c0de Motorola
-c0fe Motion Engineering, Inc.
-ca50 Varian Australia Pty Ltd
-cafe Chrysalis-ITS
-cccc Catapult Communications
-cddd Tyzx, Inc.
- 0101 DeepSea 1 High Speed Stereo Vision Frame Grabber
- 0200 DeepSea 2 High Speed Stereo Vision Frame Grabber
-d4d4 Dy4 Systems Inc
- 0601 PCI Mezzanine Card
-d531 I+ME ACTIA GmbH
-d84d Exsys
-dead Indigita Corporation
-e000 Winbond
- e000 W89C940
-e159 Tiger Jet Network Inc.
- 0001 Intel 537
- 0059 0001 128k ISDN-S/T Adapter
- 0059 0003 128k ISDN-U Adapter
- 0002 Tiger100APC ISDN chipset
-e4bf EKF Elektronik GmbH
-ea01 Eagle Technology
-# The main chip of all these devices is by Xilinx -> It could also be a Xilinx ID.
-ea60 RME
- 9896 Digi32
- 9897 Digi32 Pro
- 9898 Digi32/8
-eabb Aashima Technology B.V.
-eace Endace Measurement Systems, Ltd
- 3100 DAG 3.10 OC-3/OC-12
- 3200 DAG 3.2x OC-3/OC-12
- 320e DAG 3.2E Fast Ethernet
- 340e DAG 3.4E Fast Ethernet
- 341e DAG 3.41E Fast Ethernet
- 3500 DAG 3.5 OC-3/OC-12
- 351c DAG 3.5ECM Fast Ethernet
- 4100 DAG 4.10 OC-48
- 4110 DAG 4.11 OC-48
- 4220 DAG 4.2 OC-48
- 422e DAG 4.2E Dual Gigabit Ethernet
-ec80 Belkin Corporation
- ec00 F5D6000
-ecc0 Echo Digital Audio Corporation
- 0050 Gina24_301
- 0051 Gina24_361
- 0060 Layla24
- 0070 Mona_301_80
- 0071 Mona_301_66
- 0072 Mona_361
- 0080 Mia
-edd8 ARK Logic Inc
- a091 1000PV [Stingray]
- a099 2000PV [Stingray]
- a0a1 2000MT
- a0a9 2000MI
-f1d0 AJA Video
-# All boards I have seen have this ID not efac, though all docs say efac...
- cafe KONA SD SMPTE 259M I/O
- efac KONA SD SMPTE 259M I/O
- facd KONA HD SMPTE 292M I/O
-fa57 Fast Search & Transfer ASA
-febd Ultraview Corp.
-feda Broadcom Inc (nee Epigram)
- a0fa BCM4210 iLine10 HomePNA 2.0
- a10e BCM4230 iLine10 HomePNA 2.0
-fffe VMWare Inc
- 0710 Virtual SVGA
-ffff Illegal Vendor ID
-
-
-# List of known device classes, subclasses and programming interfaces
-
-# Syntax:
-# C class class_name
-# subclass subclass_name <-- single tab
-# prog-if prog-if_name <-- two tabs
-
-C 00 Unclassified device
- 00 Non-VGA unclassified device
- 01 VGA compatible unclassified device
-C 01 Mass storage controller
- 00 SCSI storage controller
- 01 IDE interface
- 02 Floppy disk controller
- 03 IPI bus controller
- 04 RAID bus controller
- 80 Unknown mass storage controller
-C 02 Network controller
- 00 Ethernet controller
- 01 Token ring network controller
- 02 FDDI network controller
- 03 ATM network controller
- 04 ISDN controller
- 80 Network controller
-C 03 Display controller
- 00 VGA compatible controller
- 00 VGA
- 01 8514
- 01 XGA compatible controller
- 02 3D controller
- 80 Display controller
-C 04 Multimedia controller
- 00 Multimedia video controller
- 01 Multimedia audio controller
- 02 Computer telephony device
- 80 Multimedia controller
-C 05 Memory controller
- 00 RAM memory
- 01 FLASH memory
- 80 Memory controller
-C 06 Bridge
- 00 Host bridge
- 01 ISA bridge
- 02 EISA bridge
- 03 MicroChannel bridge
- 04 PCI bridge
- 00 Normal decode
- 01 Subtractive decode
- 05 PCMCIA bridge
- 06 NuBus bridge
- 07 CardBus bridge
- 08 RACEway bridge
- 00 Transparent mode
- 01 Endpoint mode
- 09 Semi-transparent PCI-to-PCI bridge
- 40 Primary bus towards host CPU
- 80 Secondary bus towards host CPU
- 0a InfiniBand to PCI host bridge
- 80 Bridge
-C 07 Communication controller
- 00 Serial controller
- 00 8250
- 01 16450
- 02 16550
- 03 16650
- 04 16750
- 05 16850
- 06 16950
- 01 Parallel controller
- 00 SPP
- 01 BiDir
- 02 ECP
- 03 IEEE1284
- fe IEEE1284 Target
- 02 Multiport serial controller
- 03 Modem
- 00 Generic
- 01 Hayes/16450
- 02 Hayes/16550
- 03 Hayes/16650
- 04 Hayes/16750
- 80 Communication controller
-C 08 Generic system peripheral
- 00 PIC
- 00 8259
- 01 ISA PIC
- 02 EISA PIC
- 10 IO-APIC
- 20 IO(X)-APIC
- 01 DMA controller
- 00 8237
- 01 ISA DMA
- 02 EISA DMA
- 02 Timer
- 00 8254
- 01 ISA Timer
- 02 EISA Timers
- 03 RTC
- 00 Generic
- 01 ISA RTC
- 04 PCI Hot-plug controller
- 80 System peripheral
-C 09 Input device controller
- 00 Keyboard controller
- 01 Digitizer Pen
- 02 Mouse controller
- 03 Scanner controller
- 04 Gameport controller
- 00 Generic
- 10 Extended
- 80 Input device controller
-C 0a Docking station
- 00 Generic Docking Station
- 80 Docking Station
-C 0b Processor
- 00 386
- 01 486
- 02 Pentium
- 10 Alpha
- 20 Power PC
- 30 MIPS
- 40 Co-processor
-C 0c Serial bus controller
- 00 FireWire (IEEE 1394)
- 00 Generic
- 10 OHCI
- 01 ACCESS Bus
- 02 SSA
- 03 USB Controller
- 00 UHCI
- 10 OHCI
- 20 EHCI
- 80 Unspecified
- fe USB Device
- 04 Fibre Channel
- 05 SMBus
- 06 InfiniBand
-C 0d Wireless controller
- 00 IRDA controller
- 01 Consumer IR controller
- 10 RF controller
- 80 Wireless controller
-C 0e Intelligent controller
- 00 I2O
-C 0f Satellite communications controller
- 00 Satellite TV controller
- 01 Satellite audio communication controller
- 03 Satellite voice communication controller
- 04 Satellite data communication controller
-C 10 Encryption controller
- 00 Network and computing encryption device
- 10 Entertainment encryption device
- 80 Encryption controller
-C 11 Signal processing controller
- 00 DPIO module
- 01 Performance counters
- 10 Communication synchronizer
- 80 Signal processing controller
diff --git a/xen/drivers/pci/quirks.c b/xen/drivers/pci/quirks.c
deleted file mode 100644
index 25de96ba44..0000000000
--- a/xen/drivers/pci/quirks.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * $Id: quirks.c,v 1.5 1998/05/02 19:24:14 mj Exp $
- *
- * This file contains work-arounds for many known PCI hardware
- * bugs. Devices present only on certain architectures (host
- * bridges et cetera) should be handled in arch-specific code.
- *
- * Copyright (c) 1999 Martin Mares <mj@ucw.cz>
- *
- * The bridge optimization stuff has been removed. If you really
- * have a silly BIOS which is unable to set your host bridge right,
- * use the PowerTweak utility (see http://powertweak.sourceforge.net).
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/pci.h>
-#include <xen/init.h>
-#include <xen/delay.h>
-
-#undef DEBUG
-
-/* Deal with broken BIOS'es that neglect to enable passive release,
- which can cause problems in combination with the 82441FX/PPro MTRRs */
-static void __init quirk_passive_release(struct pci_dev *dev)
-{
- struct pci_dev *d = NULL;
- unsigned char dlc;
-
- /* We have to make sure a particular bit is set in the PIIX3
- ISA bridge, so we have to go out and find it. */
- while ((d = pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) {
- pci_read_config_byte(d, 0x82, &dlc);
- if (!(dlc & 1<<1)) {
- printk(KERN_ERR "PCI: PIIX3: Enabling Passive Release on %s\n", d->slot_name);
- dlc |= 1<<1;
- pci_write_config_byte(d, 0x82, dlc);
- }
- }
-}
-
-/* The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround
- but VIA don't answer queries. If you happen to have good contacts at VIA
- ask them for me please -- Alan
-
- This appears to be BIOS not version dependent. So presumably there is a
- chipset level fix */
-
-
-int isa_dma_bridge_buggy; /* Exported */
-
-static void __init quirk_isa_dma_hangs(struct pci_dev *dev)
-{
- if (!isa_dma_bridge_buggy) {
- isa_dma_bridge_buggy=1;
- printk(KERN_INFO "Activating ISA DMA hang workarounds.\n");
- }
-}
-
-int pci_pci_problems;
-
-/*
- * Chipsets where PCI->PCI transfers vanish or hang
- */
-
-static void __init quirk_nopcipci(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_FAIL)==0)
- {
- printk(KERN_INFO "Disabling direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_FAIL;
- }
-}
-
-/*
- * Triton requires workarounds to be used by the drivers
- */
-
-static void __init quirk_triton(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_TRITON)==0)
- {
- printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_TRITON;
- }
-}
-
-/*
- * VIA Apollo KT133 needs PCI latency patch
- * Made according to a windows driver based patch by George E. Breese
- * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
- * Also see http://www.au-ja.org/review-kt133a-1-en.phtml for the info on which
- * Mr Breese based his work.
- *
- * Updated based on further information from the site and also on
- * information provided by VIA
- */
-static void __init quirk_vialatency(struct pci_dev *dev)
-{
- struct pci_dev *p;
- u8 rev;
- u8 busarb;
- /* Ok we have a potential problem chipset here. Now see if we have
- a buggy southbridge */
-
- p=pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
- if(p!=NULL)
- {
- pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
- /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
- /* Check for buggy part revisions */
- if (rev < 0x40 || rev > 0x42)
- return;
- }
- else
- {
- p = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
- if(p==NULL) /* No problem parts */
- return;
- pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
- /* Check for buggy part revisions */
- if (rev < 0x10 || rev > 0x12)
- return;
- }
-
- /*
- * Ok we have the problem. Now set the PCI master grant to
- * occur every master grant. The apparent bug is that under high
- * PCI load (quite common in Linux of course) you can get data
- * loss when the CPU is held off the bus for 3 bus master requests
- * This happens to include the IDE controllers....
- *
- * VIA only apply this fix when an SB Live! is present but under
- * both Linux and Windows this isnt enough, and we have seen
- * corruption without SB Live! but with things like 3 UDMA IDE
- * controllers. So we ignore that bit of the VIA recommendation..
- */
-
- pci_read_config_byte(dev, 0x76, &busarb);
- /* Set bit 4 and bi 5 of byte 76 to 0x01
- "Master priority rotation on every PCI master grant */
- busarb &= ~(1<<5);
- busarb |= (1<<4);
- pci_write_config_byte(dev, 0x76, busarb);
- printk(KERN_INFO "Applying VIA southbridge workaround.\n");
-}
-
-/*
- * VIA Apollo VP3 needs ETBF on BT848/878
- */
-
-static void __init quirk_viaetbf(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_VIAETBF)==0)
- {
- printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_VIAETBF;
- }
-}
-static void __init quirk_vsfx(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_VSFX)==0)
- {
- printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_VSFX;
- }
-}
-
-/*
- * Ali Magik requires workarounds to be used by the drivers
- * that DMA to AGP space. Latency must be set to 0xA and triton
- * workaround applied too
- * [Info kindly provided by ALi]
- */
-
-static void __init quirk_alimagik(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_ALIMAGIK)==0)
- {
- printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_ALIMAGIK|PCIPCI_TRITON;
- }
-}
-
-/*
- * Natoma has some interesting boundary conditions with Zoran stuff
- * at least
- */
-
-static void __init quirk_natoma(struct pci_dev *dev)
-{
- if((pci_pci_problems&PCIPCI_NATOMA)==0)
- {
- printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
- pci_pci_problems|=PCIPCI_NATOMA;
- }
-}
-
-/*
- * S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
- * If it's needed, re-allocate the region.
- */
-
-static void __init quirk_s3_64M(struct pci_dev *dev)
-{
- struct resource *r = &dev->resource[0];
-
- if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) {
- r->start = 0;
- r->end = 0x3ffffff;
- }
-}
-
-static void __init quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr)
-{
- region &= ~(size-1);
- if (region) {
- struct resource *res = dev->resource + nr;
-
- res->name = dev->name;
- res->start = region;
- res->end = region + size - 1;
- res->flags = IORESOURCE_IO;
- pci_claim_resource(dev, nr);
- }
-}
-
-/*
- * ATI Northbridge setups MCE the processor if you even
- * read somewhere between 0x3b0->0x3bb or read 0x3d3
- */
-
-static void __devinit quirk_ati_exploding_mce(struct pci_dev *dev)
-{
- printk(KERN_INFO "ATI Northbridge, reserving I/O ports 0x3b0 to 0x3bb.\n");
- request_region(0x3b0, 0x0C, "RadeonIGP");
- request_region(0x3d3, 0x01, "RadeonIGP");
-}
-
-/*
- * Let's make the southbridge information explicit instead
- * of having to worry about people probing the ACPI areas,
- * for example.. (Yes, it happens, and if you read the wrong
- * ACPI register it will put the machine to sleep with no
- * way of waking it up again. Bummer).
- *
- * ALI M7101: Two IO regions pointed to by words at
- * 0xE0 (64 bytes of ACPI registers)
- * 0xE2 (32 bytes of SMB registers)
- */
-static void __init quirk_ali7101_acpi(struct pci_dev *dev)
-{
- u16 region;
-
- pci_read_config_word(dev, 0xE0, &region);
- quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
- pci_read_config_word(dev, 0xE2, &region);
- quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
-}
-
-/*
- * PIIX4 ACPI: Two IO regions pointed to by longwords at
- * 0x40 (64 bytes of ACPI registers)
- * 0x90 (32 bytes of SMB registers)
- */
-static void __init quirk_piix4_acpi(struct pci_dev *dev)
-{
- u32 region;
-
- pci_read_config_dword(dev, 0x40, &region);
- quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
- pci_read_config_dword(dev, 0x90, &region);
- quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
-}
-
-/*
- * ICH4, ICH4-M, ICH5, ICH5-M ACPI: Three IO regions pointed to by longwords at
- * 0x40 (128 bytes of ACPI, GPIO & TCO registers)
- * 0x58 (64 bytes of GPIO I/O space)
- */
-static void __devinit quirk_ich4_lpc_acpi(struct pci_dev *dev)
-{
- u32 region;
-
- pci_read_config_dword(dev, 0x40, &region);
- quirk_io_region(dev, region, 128, PCI_BRIDGE_RESOURCES);
-
- pci_read_config_dword(dev, 0x58, &region);
- quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1);
-}
-
-/*
- * VIA ACPI: One IO region pointed to by longword at
- * 0x48 or 0x20 (256 bytes of ACPI registers)
- */
-static void __init quirk_vt82c586_acpi(struct pci_dev *dev)
-{
- u8 rev;
- u32 region;
-
- pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
- if (rev & 0x10) {
- pci_read_config_dword(dev, 0x48, &region);
- region &= PCI_BASE_ADDRESS_IO_MASK;
- quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES);
- }
-}
-
-/*
- * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at
- * 0x48 (256 bytes of ACPI registers)
- * 0x70 (128 bytes of hardware monitoring register)
- * 0x90 (16 bytes of SMB registers)
- */
-static void __init quirk_vt82c686_acpi(struct pci_dev *dev)
-{
- u16 hm;
- u32 smb;
-
- quirk_vt82c586_acpi(dev);
-
- pci_read_config_word(dev, 0x70, &hm);
- hm &= PCI_BASE_ADDRESS_IO_MASK;
- quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1);
-
- pci_read_config_dword(dev, 0x90, &smb);
- smb &= PCI_BASE_ADDRESS_IO_MASK;
- quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2);
-}
-
-
-#ifdef CONFIG_X86_IO_APIC
-
-#include <asm/io_apic.h>
-
-/*
- * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip
- * devices to the external APIC.
- *
- * TODO: When we have device-specific interrupt routers,
- * this code will go away from quirks.
- */
-static void __init quirk_via_ioapic(struct pci_dev *dev)
-{
- u8 tmp;
-
- if (nr_ioapics < 1)
- tmp = 0; /* nothing routed to external APIC */
- else
- tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
-
- printk(KERN_INFO "PCI: %sbling Via external APIC routing\n",
- tmp == 0 ? "Disa" : "Ena");
-
- /* Offset 0x58: External APIC IRQ output control */
- pci_write_config_byte (dev, 0x58, tmp);
-}
-
-#endif /* CONFIG_X86_IO_APIC */
-
-
-/*
- * Via 686A/B: The PCI_INTERRUPT_LINE register for the on-chip
- * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature:
- * when written, it makes an internal connection to the PIC.
- * For these devices, this register is defined to be 4 bits wide.
- * Normally this is fine. However for IO-APIC motherboards, or
- * non-x86 architectures (yes Via exists on PPC among other places),
- * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
- * interrupts delivered properly.
- *
- * TODO: When we have device-specific interrupt routers,
- * quirk_via_irqpic will go away from quirks.
- */
-
-/*
- * FIXME: it is questionable that quirk_via_acpi
- * is needed. It shows up as an ISA bridge, and does not
- * support the PCI_INTERRUPT_LINE register at all. Therefore
- * it seems like setting the pci_dev's 'irq' to the
- * value of the ACPI SCI interrupt is only done for convenience.
- * -jgarzik
- */
-static void __init quirk_via_acpi(struct pci_dev *d)
-{
- /*
- * VIA ACPI device: SCI IRQ line in PCI config byte 0x42
- */
- u8 irq;
- pci_read_config_byte(d, 0x42, &irq);
- irq &= 0xf;
- if (irq && (irq != 2))
- d->irq = irq;
-}
-
-static void __init quirk_via_irqpic(struct pci_dev *dev)
-{
- u8 irq, new_irq = dev->irq & 0xf;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
-
- if (new_irq != irq) {
- printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
- dev->slot_name, irq, new_irq);
-
- udelay(15);
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
- }
-}
-
-
-/*
- * PIIX3 USB: We have to disable USB interrupts that are
- * hardwired to PIRQD# and may be shared with an
- * external device.
- *
- * Legacy Support Register (LEGSUP):
- * bit13: USB PIRQ Enable (USBPIRQDEN),
- * bit4: Trap/SMI On IRQ Enable (USBSMIEN).
- *
- * We mask out all r/wc bits, too.
- */
-static void __init quirk_piix3_usb(struct pci_dev *dev)
-{
- u16 legsup;
-
- pci_read_config_word(dev, 0xc0, &legsup);
- legsup &= 0x50ef;
- pci_write_config_word(dev, 0xc0, legsup);
-}
-
-/*
- * VIA VT82C598 has its device ID settable and many BIOSes
- * set it to the ID of VT82C597 for backward compatibility.
- * We need to switch it off to be able to recognize the real
- * type of the chip.
- */
-static void __init quirk_vt82c598_id(struct pci_dev *dev)
-{
- pci_write_config_byte(dev, 0xfc, 0);
- pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
-}
-
-/*
- * CardBus controllers have a legacy base address that enables them
- * to respond as i82365 pcmcia controllers. We don't want them to
- * do this even if the Linux CardBus driver is not loaded, because
- * the Linux i82365 driver does not (and should not) handle CardBus.
- */
-static void __init quirk_cardbus_legacy(struct pci_dev *dev)
-{
- if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class)
- return;
- pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
-}
-
-/*
- * The AMD io apic can hang the box when an apic irq is masked.
- * We check all revs >= B0 (yet not in the pre production!) as the bug
- * is currently marked NoFix
- *
- * We have multiple reports of hangs with this chipset that went away with
- * noapic specified. For the moment we assume its the errata. We may be wrong
- * of course. However the advice is demonstrably good even if so..
- */
-
-static void __init quirk_amd_ioapic(struct pci_dev *dev)
-{
- u8 rev;
-
- pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
- if(rev >= 0x02)
- {
- printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n");
- printk(KERN_WARNING " : booting with the \"noapic\" option.\n");
- }
-}
-
-/*
- * Following the PCI ordering rules is optional on the AMD762. I'm not
- * sure what the designers were smoking but let's not inhale...
- *
- * To be fair to AMD, it follows the spec by default, its BIOS people
- * who turn it off!
- */
-
-static void __init quirk_amd_ordering(struct pci_dev *dev)
-{
- u32 pcic;
- pci_read_config_dword(dev, 0x4C, &pcic);
- if((pcic&6)!=6)
- {
- pcic |= 6;
- printk(KERN_WARNING "BIOS failed to enable PCI standards compliance, fixing this error.\n");
- pci_write_config_dword(dev, 0x4C, pcic);
- pci_read_config_dword(dev, 0x84, &pcic);
- pcic |= (1<<23); /* Required in this mode */
- pci_write_config_dword(dev, 0x84, pcic);
- }
-}
-
-#ifdef CONFIG_X86_IO_APIC
-
-#define AMD8131_revA0 0x01
-#define AMD8131_revB0 0x11
-#define AMD8131_MISC 0x40
-#define AMD8131_NIOAMODE_BIT 0
-
-static void __init quirk_amd_8131_ioapic(struct pci_dev *dev)
-{
- unsigned char revid, tmp;
-
- if (nr_ioapics == 0)
- return;
-
- pci_read_config_byte(dev, PCI_REVISION_ID, &revid);
- if (revid == AMD8131_revA0 || revid == AMD8131_revB0) {
- printk(KERN_INFO "Fixing up AMD8131 IOAPIC mode\n");
- pci_read_config_byte( dev, AMD8131_MISC, &tmp);
- tmp &= ~(1 << AMD8131_NIOAMODE_BIT);
- pci_write_config_byte( dev, AMD8131_MISC, tmp);
- }
-}
-#endif
-
-
-/*
- * DreamWorks provided workaround for Dunord I-3000 problem
- *
- * This card decodes and responds to addresses not apparently
- * assigned to it. We force a larger allocation to ensure that
- * nothing gets put too close to it.
- */
-
-static void __init quirk_dunord ( struct pci_dev * dev )
-{
- struct resource * r = & dev -> resource [ 1 ];
- r -> start = 0;
- r -> end = 0xffffff;
-}
-
-static void __init quirk_transparent_bridge(struct pci_dev *dev)
-{
- dev->transparent = 1;
-}
-
-/*
- * Common misconfiguration of the MediaGX/Geode PCI master that will
- * reduce PCI bandwidth from 70MB/s to 25MB/s. See the GXM/GXLV/GX1
- * datasheets found at http://www.national.com/ds/GX for info on what
- * these bits do. <christer@weinigel.se>
- */
-
-static void __init quirk_mediagx_master(struct pci_dev *dev)
-{
- u8 reg;
- pci_read_config_byte(dev, 0x41, &reg);
- if (reg & 2) {
- reg &= ~2;
- printk(KERN_INFO "PCI: Fixup for MediaGX/Geode Slave Disconnect Boundary (0x41=0x%02x)\n", reg);
- pci_write_config_byte(dev, 0x41, reg);
- }
-}
-
-/*
- * As per PCI spec, ignore base address registers 0-3 of the IDE controllers
- * running in Compatible mode (bits 0 and 2 in the ProgIf for primary and
- * secondary channels respectively). If the device reports Compatible mode
- * but does use BAR0-3 for address decoding, we assume that firmware has
- * programmed these BARs with standard values (0x1f0,0x3f4 and 0x170,0x374).
- * Exceptions (if they exist) must be handled in chip/architecture specific
- * fixups.
- *
- * Note: for non x86 people. You may need an arch specific quirk to handle
- * moving IDE devices to native mode as well. Some plug in card devices power
- * up in compatible mode and assume the BIOS will adjust them.
- *
- * Q: should we load the 0x1f0,0x3f4 into the registers or zap them as
- * we do now ? We don't want is pci_enable_device to come along
- * and assign new resources. Both approaches work for that.
- */
-
-static void __devinit quirk_ide_bases(struct pci_dev *dev)
-{
- struct resource *res;
- int first_bar = 2, last_bar = 0;
-
- if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
- return;
-
- res = &dev->resource[0];
-
- /* primary channel: ProgIf bit 0, BAR0, BAR1 */
- if (!(dev->class & 1) && (res[0].flags || res[1].flags)) {
- res[0].start = res[0].end = res[0].flags = 0;
- res[1].start = res[1].end = res[1].flags = 0;
- first_bar = 0;
- last_bar = 1;
- }
-
- /* secondary channel: ProgIf bit 2, BAR2, BAR3 */
- if (!(dev->class & 4) && (res[2].flags || res[3].flags)) {
- res[2].start = res[2].end = res[2].flags = 0;
- res[3].start = res[3].end = res[3].flags = 0;
- last_bar = 3;
- }
-
- if (!last_bar)
- return;
-
- printk(KERN_INFO "PCI: Ignoring BAR%d-%d of IDE controller %s\n",
- first_bar, last_bar, dev->slot_name);
-}
-
-/*
- * Ensure C0 rev restreaming is off. This is normally done by
- * the BIOS but in the odd case it is not the results are corruption
- * hence the presence of a Linux check
- */
-
-static void __init quirk_disable_pxb(struct pci_dev *pdev)
-{
- u16 config;
- u8 rev;
-
- pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
- if(rev != 0x04) /* Only C0 requires this */
- return;
- pci_read_config_word(pdev, 0x40, &config);
- if(config & (1<<6))
- {
- config &= ~(1<<6);
- pci_write_config_word(pdev, 0x40, config);
- printk(KERN_INFO "PCI: C0 revision 450NX. Disabling PCI restreaming.\n");
- }
-}
-
-/*
- * VIA northbridges care about PCI_INTERRUPT_LINE
- */
-
-int interrupt_line_quirk;
-
-static void __init quirk_via_bridge(struct pci_dev *pdev)
-{
- if(pdev->devfn == 0)
- interrupt_line_quirk = 1;
-}
-
-/*
- * Serverworks CSB5 IDE does not fully support native mode
- */
-static void __init quirk_svwks_csb5ide(struct pci_dev *pdev)
-{
- u8 prog;
- pci_read_config_byte(pdev, PCI_CLASS_PROG, &prog);
- if (prog & 5) {
- prog &= ~5;
- pdev->class &= ~5;
- pci_write_config_byte(pdev, PCI_CLASS_PROG, prog);
- /* need to re-assign BARs for compat mode */
- quirk_ide_bases(pdev);
- }
-}
-
-/*
- * On ASUS P4B boards, the SMBus PCI Device within the ICH2/4 southbridge
- * is not activated. The myth is that Asus said that they do not want the
- * users to be irritated by just another PCI Device in the Win98 device
- * manager. (see the file prog/hotplug/README.p4b in the lm_sensors
- * package 2.7.0 for details)
- *
- * The SMBus PCI Device can be activated by setting a bit in the ICH LPC
- * bridge. Unfortunately, this device has no subvendor/subdevice ID. So it
- * becomes necessary to do this tweak in two steps -- I've chosen the Host
- * bridge as trigger.
- */
-
-static int __initdata asus_hides_smbus = 0;
-
-static void __init asus_hides_smbus_hostbridge(struct pci_dev *dev)
-{
- if (likely(dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK))
- return;
-
- if (dev->device == PCI_DEVICE_ID_INTEL_82845_HB)
- switch(dev->subsystem_device) {
- case 0x8070: /* P4B */
- case 0x8088: /* P4B533 */
- asus_hides_smbus = 1;
- }
- if ((dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) &&
- (dev->subsystem_device == 0x80b2)) /* P4PE */
- asus_hides_smbus = 1;
- if ((dev->device == PCI_DEVICE_ID_INTEL_82850_HB) &&
- (dev->subsystem_device == 0x8030)) /* P4T533 */
- asus_hides_smbus = 1;
- if ((dev->device == PCI_DEVICE_ID_INTEL_7205_0) &&
- (dev->subsystem_device == 0x8070)) /* P4G8X Deluxe */
- asus_hides_smbus = 1;
- return;
-}
-
-static void __init asus_hides_smbus_lpc(struct pci_dev *dev)
-{
- u16 val;
-
- if (likely(!asus_hides_smbus))
- return;
-
- pci_read_config_word(dev, 0xF2, &val);
- if (val & 0x8) {
- pci_write_config_word(dev, 0xF2, val & (~0x8));
- pci_read_config_word(dev, 0xF2, &val);
- if(val & 0x8)
- printk(KERN_INFO "PCI: i801 SMBus device continues to play 'hide and seek'! 0x%x\n", val);
- else
- printk(KERN_INFO "PCI: Enabled i801 SMBus device\n");
- }
-}
-
-/*
- * The main table of quirks.
- */
-
-static struct pci_fixup pci_fixups[] __initdata = {
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_DUNORD, PCI_DEVICE_ID_DUNORD_I3000, quirk_dunord },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release },
- /*
- * Its not totally clear which chipsets are the problematic ones
- * We know 82C586 and 82C596 variants are affected.
- */
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_isa_dma_hangs },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, quirk_isa_dma_hangs },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, quirk_isa_dma_hangs },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, quirk_disable_pxb },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_968, quirk_s3_64M },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1647, quirk_alimagik },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1651, quirk_alimagik },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, quirk_nopcipci },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, quirk_nopcipci },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, quirk_vialatency },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8371_1, quirk_vialatency },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, quirk_vialatency },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576, quirk_vsfx },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_viaetbf },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_vt82c598_id },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_vt82c586_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_vt82c686_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, quirk_ich4_lpc_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, quirk_ali7101_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_2, quirk_piix3_usb },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_2, quirk_piix3_usb },
- { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, quirk_ide_bases },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_ANY_ID, quirk_via_bridge },
- { PCI_FIXUP_FINAL, PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy },
-
-#ifdef CONFIG_X86_IO_APIC
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic },
-#endif
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_via_acpi },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_acpi },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, quirk_via_irqpic },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irqpic },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_6, quirk_via_irqpic },
-
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering },
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_IGP, quirk_ati_exploding_mce },
- /*
- * i82380FB mobile docking controller: its PCI-to-PCI bridge
- * is subtractive decoding (transparent), and does indicate this
- * in the ProgIf. Unfortunately, the ProgIf value is wrong - 0x80
- * instead of 0x01.
- */
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82380FB, quirk_transparent_bridge },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_TOSHIBA, 0x605, quirk_transparent_bridge },
-
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_PCI_MASTER, quirk_mediagx_master },
-
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, quirk_svwks_csb5ide },
-
-#ifdef CONFIG_X86_IO_APIC
- { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_APIC,
- quirk_amd_8131_ioapic },
-#endif
-
- /*
- * on Asus P4B boards, the i801SMBus device is disabled at startup.
- */
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845_HB, asus_hides_smbus_hostbridge },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_HB, asus_hides_smbus_hostbridge },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82850_HB, asus_hides_smbus_hostbridge },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_7205_0, asus_hides_smbus_hostbridge },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, asus_hides_smbus_lpc },
- { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, asus_hides_smbus_lpc },
-
- { 0 }
-};
-
-
-static void pci_do_fixups(struct pci_dev *dev, int pass, struct pci_fixup *f)
-{
- while (f->pass) {
- if (f->pass == pass &&
- (f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
- (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
-#ifdef DEBUG
- printk(KERN_INFO "PCI: Calling quirk %p for %s\n", f->hook, dev->slot_name);
-#endif
- f->hook(dev);
- }
- f++;
- }
-}
-
-void pci_fixup_device(int pass, struct pci_dev *dev)
-{
- pci_do_fixups(dev, pass, pcibios_fixups);
- pci_do_fixups(dev, pass, pci_fixups);
-}
diff --git a/xen/drivers/pci/setup-res.c b/xen/drivers/pci/setup-res.c
deleted file mode 100644
index 3435b2ac9c..0000000000
--- a/xen/drivers/pci/setup-res.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * drivers/pci/setup-res.c
- *
- * Extruded from code written by
- * Dave Rusling (david.rusling@reo.mts.dec.com)
- * David Mosberger (davidm@cs.arizona.edu)
- * David Miller (davem@redhat.com)
- *
- * Support routines for initializing a PCI subsystem.
- */
-
-/* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
-
-/*
- * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
- * Resource sorting
- */
-
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/pci.h>
-#include <xen/errno.h>
-#include <xen/ioport.h>
-#include <xen/cache.h>
-#include <xen/slab.h>
-
-
-#define DEBUG_CONFIG 0
-#if DEBUG_CONFIG
-# define DBGC(args) printk args
-#else
-# define DBGC(args)
-#endif
-
-
-int __init
-pci_claim_resource(struct pci_dev *dev, int resource)
-{
- struct resource *res = &dev->resource[resource];
- struct resource *root = pci_find_parent_resource(dev, res);
- int err;
-
- err = -EINVAL;
- if (root != NULL) {
- err = request_resource(root, res);
- if (err) {
- printk(KERN_ERR "PCI: Address space collision on "
- "region %d of device %s [%lx:%lx]\n",
- resource, dev->name, res->start, res->end);
- }
- } else {
- printk(KERN_ERR "PCI: No parent found for region %d "
- "of device %s\n", resource, dev->name);
- }
-
- return err;
-}
-
-/*
- * Given the PCI bus a device resides on, try to
- * find an acceptable resource allocation for a
- * specific device resource..
- */
-static int pci_assign_bus_resource(const struct pci_bus *bus,
- struct pci_dev *dev,
- struct resource *res,
- unsigned long size,
- unsigned long min,
- unsigned int type_mask,
- int resno)
-{
- unsigned long align;
- int i;
-
- type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
- for (i = 0 ; i < 4; i++) {
- struct resource *r = bus->resource[i];
- if (!r)
- continue;
-
- /* type_mask must match */
- if ((res->flags ^ r->flags) & type_mask)
- continue;
-
- /* We cannot allocate a non-prefetching resource
- from a pre-fetching area */
- if ((r->flags & IORESOURCE_PREFETCH) &&
- !(res->flags & IORESOURCE_PREFETCH))
- continue;
-
- /* The bridge resources are special, as their
- size != alignment. Sizing routines return
- required alignment in the "start" field. */
- align = (resno < PCI_BRIDGE_RESOURCES) ? size : res->start;
-
- /* Ok, try it out.. */
- if (allocate_resource(r, res, size, min, -1, align,
- pcibios_align_resource, dev) < 0)
- continue;
-
- /* Update PCI config space. */
- pcibios_update_resource(dev, r, res, resno);
- return 0;
- }
- return -EBUSY;
-}
-
-int
-pci_assign_resource(struct pci_dev *dev, int i)
-{
- const struct pci_bus *bus = dev->bus;
- struct resource *res = dev->resource + i;
- unsigned long size, min;
-
- size = res->end - res->start + 1;
- min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
-
- /* First, try exact prefetching match.. */
- if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) {
- /*
- * That failed.
- *
- * But a prefetching area can handle a non-prefetching
- * window (it will just not perform as well).
- */
- if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) {
- printk(KERN_ERR "PCI: Failed to allocate resource %d(%lx-%lx) for %s\n",
- i, res->start, res->end, dev->slot_name);
- return -EBUSY;
- }
- }
-
- DBGC((KERN_ERR " got res[%lx:%lx] for resource %d of %s\n", res->start,
- res->end, i, dev->name));
-
- return 0;
-}
-
-/* Sort resources by alignment */
-void __init
-pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
-{
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *r;
- struct resource_list *list, *tmp;
- unsigned long r_align;
-
- r = &dev->resource[i];
- r_align = r->end - r->start;
-
- if (!(r->flags) || r->parent)
- continue;
- if (!r_align) {
- printk(KERN_WARNING "PCI: Ignore bogus resource %d "
- "[%lx:%lx] of %s\n",
- i, r->start, r->end, dev->name);
- continue;
- }
- r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
- for (list = head; ; list = list->next) {
- unsigned long align = 0;
- struct resource_list *ln = list->next;
- int idx;
-
- if (ln) {
- idx = ln->res - &ln->dev->resource[0];
- align = (idx < PCI_BRIDGE_RESOURCES) ?
- ln->res->end - ln->res->start + 1 :
- ln->res->start;
- }
- if (r_align > align) {
- tmp = xmalloc(sizeof(*tmp));
- if (!tmp)
- panic("pdev_sort_resources(): "
- "xmalloc() failed!\n");
- tmp->next = ln;
- tmp->res = r;
- tmp->dev = dev;
- list->next = tmp;
- break;
- }
- }
- }
-}
-
-void __init
-pdev_enable_device(struct pci_dev *dev)
-{
- u32 reg;
- u16 cmd;
- int i;
-
- DBGC((KERN_ERR "PCI enable device: (%s)\n", dev->name));
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *res = &dev->resource[i];
-
- if (res->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- else if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
-
- /* Special case, disable the ROM. Several devices act funny
- (ie. do not respond to memory space writes) when it is left
- enabled. A good example are QlogicISP adapters. */
-
- if (dev->rom_base_reg) {
- pci_read_config_dword(dev, dev->rom_base_reg, &reg);
- reg &= ~PCI_ROM_ADDRESS_ENABLE;
- pci_write_config_dword(dev, dev->rom_base_reg, reg);
- dev->resource[PCI_ROM_RESOURCE].flags &= ~PCI_ROM_ADDRESS_ENABLE;
- }
-
- /* All of these (may) have I/O scattered all around and may not
- use I/O base address registers at all. So we just have to
- always enable IO to these devices. */
- if ((dev->class >> 8) == PCI_CLASS_NOT_DEFINED
- || (dev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA
- || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE
- || (dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
- cmd |= PCI_COMMAND_IO;
- }
-
- /* ??? Always turn on bus mastering. If the device doesn't support
- it, the bit will go into the bucket. */
- cmd |= PCI_COMMAND_MASTER;
-
- /* Set the cache line and default latency (32). */
- pci_write_config_word(dev, PCI_CACHE_LINE_SIZE,
- (32 << 8) | (L1_CACHE_BYTES / sizeof(u32)));
-
- /* Enable the appropriate bits in the PCI command register. */
- pci_write_config_word(dev, PCI_COMMAND, cmd);
-
- DBGC((KERN_ERR " cmd reg 0x%x\n", cmd));
-}
diff --git a/xen/include/acpi/acconfig.h b/xen/include/acpi/acconfig.h
index 8879c2c830..6701c11b40 100644
--- a/xen/include/acpi/acconfig.h
+++ b/xen/include/acpi/acconfig.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -64,11 +64,21 @@
/* Version string */
-#define ACPI_CA_VERSION 0x20040116
+#define ACPI_CA_VERSION 0x20050211
+
+/*
+ * OS name, used for the _OS object. The _OS object is essentially obsolete,
+ * but there is a large base of ASL/AML code in existing machines that check
+ * for the string below. The use of this string usually guarantees that
+ * the ASL will execute down the most tested code path. Also, there is some
+ * code that will not execute the _OSI method unless _OS matches the string
+ * below. Therefore, change this string at your own risk.
+ */
+#define ACPI_OS_NAME "Microsoft Windows NT"
/* Maximum objects in the various object caches */
-#define ACPI_MAX_STATE_CACHE_DEPTH 64 /* State objects for stacks */
+#define ACPI_MAX_STATE_CACHE_DEPTH 64 /* State objects */
#define ACPI_MAX_PARSE_CACHE_DEPTH 96 /* Parse tree objects */
#define ACPI_MAX_EXTPARSE_CACHE_DEPTH 64 /* Parse tree objects */
#define ACPI_MAX_OBJECT_CACHE_DEPTH 64 /* Interpreter operand objects */
@@ -89,7 +99,7 @@
/* Version of ACPI supported */
-#define ACPI_CA_SUPPORT_LEVEL 2
+#define ACPI_CA_SUPPORT_LEVEL 3
/* String size constants */
@@ -152,10 +162,11 @@
/* Constants used in searching for the RSDP in low memory */
-#define ACPI_LO_RSDP_WINDOW_BASE 0 /* Physical Address */
-#define ACPI_HI_RSDP_WINDOW_BASE 0xE0000 /* Physical Address */
-#define ACPI_LO_RSDP_WINDOW_SIZE 0x400
-#define ACPI_HI_RSDP_WINDOW_SIZE 0x20000
+#define ACPI_EBDA_PTR_LOCATION 0x0000040E /* Physical Address */
+#define ACPI_EBDA_PTR_LENGTH 2
+#define ACPI_EBDA_WINDOW_SIZE 1024
+#define ACPI_HI_RSDP_WINDOW_BASE 0x000E0000 /* Physical Address */
+#define ACPI_HI_RSDP_WINDOW_SIZE 0x00020000
#define ACPI_RSDP_SCAN_STEP 16
/* Operation regions */
@@ -185,6 +196,10 @@
#define ACPI_SMBUS_BUFFER_SIZE 34
+/* Number of strings associated with the _OSI reserved method */
+
+#define ACPI_NUM_OSI_STRINGS 9
+
/******************************************************************************
*
diff --git a/xen/include/acpi/acdebug.h b/xen/include/acpi/acdebug.h
deleted file mode 100644
index bb8411e89d..0000000000
--- a/xen/include/acpi/acdebug.h
+++ /dev/null
@@ -1,469 +0,0 @@
-/******************************************************************************
- *
- * Name: acdebug.h - ACPI/AML debugger
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACDEBUG_H__
-#define __ACDEBUG_H__
-
-
-#define ACPI_DEBUG_BUFFER_SIZE 4196
-
-struct command_info
-{
- char *name; /* Command Name */
- u8 min_args; /* Minimum arguments required */
-};
-
-
-struct argument_info
-{
- char *name; /* Argument Name */
-};
-
-
-#define PARAM_LIST(pl) pl
-
-#define DBTEST_OUTPUT_LEVEL(lvl) if (acpi_gbl_db_opt_verbose)
-
-#define VERBOSE_PRINT(fp) DBTEST_OUTPUT_LEVEL(lvl) {\
- acpi_os_printf PARAM_LIST(fp);}
-
-#define EX_NO_SINGLE_STEP 1
-#define EX_SINGLE_STEP 2
-
-
-/* Prototypes */
-
-
-/*
- * dbxface - external debugger interfaces
- */
-
-acpi_status
-acpi_db_initialize (
- void);
-
-void
-acpi_db_terminate (
- void);
-
-acpi_status
-acpi_db_single_step (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- u32 op_type);
-
-acpi_status
-acpi_db_start_command (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-void
-acpi_db_method_end (
- struct acpi_walk_state *walk_state);
-
-
-/*
- * dbcmds - debug commands and output routines
- */
-
-void
-acpi_db_display_table_info (
- char *table_arg);
-
-void
-acpi_db_unload_acpi_table (
- char *table_arg,
- char *instance_arg);
-
-void
-acpi_db_set_method_breakpoint (
- char *location,
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-void
-acpi_db_set_method_call_breakpoint (
- union acpi_parse_object *op);
-
-void
-acpi_db_disassemble_aml (
- char *statements,
- union acpi_parse_object *op);
-
-void
-acpi_db_dump_namespace (
- char *start_arg,
- char *depth_arg);
-
-void
-acpi_db_dump_namespace_by_owner (
- char *owner_arg,
- char *depth_arg);
-
-void
-acpi_db_send_notify (
- char *name,
- u32 value);
-
-void
-acpi_db_set_method_data (
- char *type_arg,
- char *index_arg,
- char *value_arg);
-
-acpi_status
-acpi_db_display_objects (
- char *obj_type_arg,
- char *display_count_arg);
-
-acpi_status
-acpi_db_find_name_in_namespace (
- char *name_arg);
-
-void
-acpi_db_set_scope (
- char *name);
-
-void
-acpi_db_find_references (
- char *object_arg);
-
-void
-acpi_db_display_locks (void);
-
-
-void
-acpi_db_display_resources (
- char *object_arg);
-
-void
-acpi_db_display_gpes (void);
-
-void
-acpi_db_check_integrity (
- void);
-
-acpi_status
-acpi_db_integrity_walk (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_db_walk_and_match_name (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_db_walk_for_references (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_db_walk_for_specific_objects (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-void
-acpi_db_generate_gpe (
- char *gpe_arg,
- char *block_arg);
-
-/*
- * dbdisply - debug display commands
- */
-
-void
-acpi_db_display_method_info (
- union acpi_parse_object *op);
-
-void
-acpi_db_decode_and_display_object (
- char *target,
- char *output_type);
-
-void
-acpi_db_display_result_object (
- union acpi_operand_object *obj_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_db_display_all_methods (
- char *display_count_arg);
-
-void
-acpi_db_display_arguments (
- void);
-
-void
-acpi_db_display_locals (
- void);
-
-void
-acpi_db_display_results (
- void);
-
-void
-acpi_db_display_calling_tree (
- void);
-
-void
-acpi_db_display_object_type (
- char *object_arg);
-
-void
-acpi_db_display_argument_object (
- union acpi_operand_object *obj_desc,
- struct acpi_walk_state *walk_state);
-
-void
-acpi_db_dump_parser_descriptor (
- union acpi_parse_object *op);
-
-void *
-acpi_db_get_pointer (
- void *target);
-
-
-/*
- * dbexec - debugger control method execution
- */
-
-void
-acpi_db_execute (
- char *name,
- char **args,
- u32 flags);
-
-void
-acpi_db_create_execution_threads (
- char *num_threads_arg,
- char *num_loops_arg,
- char *method_name_arg);
-
-acpi_status
-acpi_db_execute_method (
- struct acpi_db_method_info *info,
- struct acpi_buffer *return_obj);
-
-void
-acpi_db_execute_setup (
- struct acpi_db_method_info *info);
-
-u32
-acpi_db_get_outstanding_allocations (
- void);
-
-void ACPI_SYSTEM_XFACE
-acpi_db_method_thread (
- void *context);
-
-acpi_status
-acpi_db_execution_walk (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-
-/*
- * dbfileio - Debugger file I/O commands
- */
-
-acpi_object_type
-acpi_db_match_argument (
- char *user_argument,
- struct argument_info *arguments);
-
-acpi_status
-ae_local_load_table (
- struct acpi_table_header *table_ptr);
-
-void
-acpi_db_close_debug_file (
- void);
-
-void
-acpi_db_open_debug_file (
- char *name);
-
-acpi_status
-acpi_db_load_acpi_table (
- char *filename);
-
-acpi_status
-acpi_db_get_table_from_file (
- char *filename,
- struct acpi_table_header **table);
-
-acpi_status
-acpi_db_read_table_from_file (
- char *filename,
- struct acpi_table_header **table);
-
-/*
- * dbhistry - debugger HISTORY command
- */
-
-void
-acpi_db_add_to_history (
- char *command_line);
-
-void
-acpi_db_display_history (void);
-
-char *
-acpi_db_get_from_history (
- char *command_num_arg);
-
-
-/*
- * dbinput - user front-end to the AML debugger
- */
-
-acpi_status
-acpi_db_command_dispatch (
- char *input_buffer,
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-void ACPI_SYSTEM_XFACE
-acpi_db_execute_thread (
- void *context);
-
-acpi_status
-acpi_db_user_commands (
- char prompt,
- union acpi_parse_object *op);
-
-void
-acpi_db_display_help (
- char *help_type);
-
-char *
-acpi_db_get_next_token (
- char *string,
- char **next);
-
-u32
-acpi_db_get_line (
- char *input_buffer);
-
-u32
-acpi_db_match_command (
- char *user_command);
-
-void
-acpi_db_single_thread (
- void);
-
-
-/*
- * dbstats - Generation and display of ACPI table statistics
- */
-
-void
-acpi_db_generate_statistics (
- union acpi_parse_object *root,
- u8 is_method);
-
-
-acpi_status
-acpi_db_display_statistics (
- char *type_arg);
-
-acpi_status
-acpi_db_classify_one_object (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-void
-acpi_db_count_namespace_objects (
- void);
-
-void
-acpi_db_enumerate_object (
- union acpi_operand_object *obj_desc);
-
-
-/*
- * dbutils - AML debugger utilities
- */
-
-void
-acpi_db_set_output_destination (
- u32 where);
-
-void
-acpi_db_dump_buffer (
- u32 address);
-
-void
-acpi_db_dump_object (
- union acpi_object *obj_desc,
- u32 level);
-
-void
-acpi_db_prep_namestring (
- char *name);
-
-
-acpi_status
-acpi_db_second_pass_parse (
- union acpi_parse_object *root);
-
-struct acpi_namespace_node *
-acpi_db_local_ns_lookup (
- char *name);
-
-
-#endif /* __ACDEBUG_H__ */
diff --git a/xen/include/acpi/acdisasm.h b/xen/include/acpi/acdisasm.h
deleted file mode 100644
index 220bd1b35f..0000000000
--- a/xen/include/acpi/acdisasm.h
+++ /dev/null
@@ -1,402 +0,0 @@
-/******************************************************************************
- *
- * Name: acdisasm.h - AML disassembler
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACDISASM_H__
-#define __ACDISASM_H__
-
-#include "amlresrc.h"
-
-
-#define BLOCK_NONE 0
-#define BLOCK_PAREN 1
-#define BLOCK_BRACE 2
-#define BLOCK_COMMA_LIST 4
-
-extern const char *acpi_gbl_io_decode[2];
-extern const char *acpi_gbl_word_decode[4];
-extern const char *acpi_gbl_consume_decode[2];
-extern const char *acpi_gbl_min_decode[2];
-extern const char *acpi_gbl_max_decode[2];
-extern const char *acpi_gbl_DECdecode[2];
-extern const char *acpi_gbl_RNGdecode[4];
-extern const char *acpi_gbl_MEMdecode[4];
-extern const char *acpi_gbl_RWdecode[2];
-extern const char *acpi_gbl_irq_decode[2];
-extern const char *acpi_gbl_HEdecode[2];
-extern const char *acpi_gbl_LLdecode[2];
-extern const char *acpi_gbl_SHRdecode[2];
-extern const char *acpi_gbl_TYPdecode[4];
-extern const char *acpi_gbl_BMdecode[2];
-extern const char *acpi_gbl_SIZdecode[4];
-extern const char *acpi_gbl_lock_rule[ACPI_NUM_LOCK_RULES];
-extern const char *acpi_gbl_access_types[ACPI_NUM_ACCESS_TYPES];
-extern const char *acpi_gbl_update_rules[ACPI_NUM_UPDATE_RULES];
-extern const char *acpi_gbl_match_ops[ACPI_NUM_MATCH_OPS];
-
-
-struct acpi_op_walk_info
-{
- u32 level;
- u32 bit_offset;
-};
-
-typedef
-acpi_status (*asl_walk_callback) (
- union acpi_parse_object *op,
- u32 level,
- void *context);
-
-
-/*
- * dmwalk
- */
-
-void
-acpi_dm_walk_parse_tree (
- union acpi_parse_object *op,
- asl_walk_callback descending_callback,
- asl_walk_callback ascending_callback,
- void *context);
-
-acpi_status
-acpi_dm_descending_op (
- union acpi_parse_object *op,
- u32 level,
- void *context);
-
-acpi_status
-acpi_dm_ascending_op (
- union acpi_parse_object *op,
- u32 level,
- void *context);
-
-
-/*
- * dmopcode
- */
-
-void
-acpi_dm_validate_name (
- char *name,
- union acpi_parse_object *op);
-
-u32
-acpi_dm_dump_name (
- char *name);
-
-void
-acpi_dm_unicode (
- union acpi_parse_object *op);
-
-void
-acpi_dm_disassemble (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *origin,
- u32 num_opcodes);
-
-void
-acpi_dm_namestring (
- char *name);
-
-void
-acpi_dm_display_path (
- union acpi_parse_object *op);
-
-void
-acpi_dm_disassemble_one_op (
- struct acpi_walk_state *walk_state,
- struct acpi_op_walk_info *info,
- union acpi_parse_object *op);
-
-void
-acpi_dm_decode_internal_object (
- union acpi_operand_object *obj_desc);
-
-u32
-acpi_dm_block_type (
- union acpi_parse_object *op);
-
-u32
-acpi_dm_list_type (
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ps_display_object_pathname (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-void
-acpi_dm_method_flags (
- union acpi_parse_object *op);
-
-void
-acpi_dm_field_flags (
- union acpi_parse_object *op);
-
-void
-acpi_dm_address_space (
- u8 space_id);
-
-void
-acpi_dm_region_flags (
- union acpi_parse_object *op);
-
-void
-acpi_dm_match_op (
- union acpi_parse_object *op);
-
-void
-acpi_dm_match_keyword (
- union acpi_parse_object *op);
-
-u8
-acpi_dm_comma_if_list_member (
- union acpi_parse_object *op);
-
-void
-acpi_dm_comma_if_field_member (
- union acpi_parse_object *op);
-
-
-/*
- * dmobject
- */
-
-void
-acpi_dm_decode_node (
- struct acpi_namespace_node *node);
-
-void
-acpi_dm_display_internal_object (
- union acpi_operand_object *obj_desc,
- struct acpi_walk_state *walk_state);
-
-void
-acpi_dm_display_arguments (
- struct acpi_walk_state *walk_state);
-
-void
-acpi_dm_display_locals (
- struct acpi_walk_state *walk_state);
-
-void
-acpi_dm_dump_method_info (
- acpi_status status,
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-
-/*
- * dmbuffer
- */
-
-void
-acpi_is_eisa_id (
- union acpi_parse_object *op);
-
-void
-acpi_dm_eisa_id (
- u32 encoded_id);
-
-u8
-acpi_dm_is_unicode_buffer (
- union acpi_parse_object *op);
-
-u8
-acpi_dm_is_string_buffer (
- union acpi_parse_object *op);
-
-
-/*
- * dmresrc
- */
-
-void
-acpi_dm_disasm_byte_list (
- u32 level,
- u8 *byte_data,
- u32 byte_count);
-
-void
-acpi_dm_byte_list (
- struct acpi_op_walk_info *info,
- union acpi_parse_object *op);
-
-void
-acpi_dm_resource_descriptor (
- struct acpi_op_walk_info *info,
- u8 *byte_data,
- u32 byte_count);
-
-u8
-acpi_dm_is_resource_descriptor (
- union acpi_parse_object *op);
-
-void
-acpi_dm_indent (
- u32 level);
-
-void
-acpi_dm_bit_list (
- u16 mask);
-
-void
-acpi_dm_decode_attribute (
- u8 attribute);
-
-/*
- * dmresrcl
- */
-
-void
-acpi_dm_io_flags (
- u8 flags);
-
-void
-acpi_dm_memory_flags (
- u8 flags,
- u8 specific_flags);
-
-void
-acpi_dm_word_descriptor (
- struct asl_word_address_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_dword_descriptor (
- struct asl_dword_address_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_qword_descriptor (
- struct asl_qword_address_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_memory24_descriptor (
- struct asl_memory_24_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_memory32_descriptor (
- struct asl_memory_32_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_fixed_mem32_descriptor (
- struct asl_fixed_memory_32_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_generic_register_descriptor (
- struct asl_general_register_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_interrupt_descriptor (
- struct asl_extended_xrupt_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_vendor_large_descriptor (
- struct asl_large_vendor_desc *resource,
- u32 length,
- u32 level);
-
-
-/*
- * dmresrcs
- */
-
-void
-acpi_dm_irq_descriptor (
- struct asl_irq_format_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_dma_descriptor (
- struct asl_dma_format_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_io_descriptor (
- struct asl_io_port_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_fixed_io_descriptor (
- struct asl_fixed_io_port_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_start_dependent_descriptor (
- struct asl_start_dependent_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_end_dependent_descriptor (
- struct asl_start_dependent_desc *resource,
- u32 length,
- u32 level);
-
-void
-acpi_dm_vendor_small_descriptor (
- struct asl_small_vendor_desc *resource,
- u32 length,
- u32 level);
-
-
-#endif /* __ACDISASM_H__ */
diff --git a/xen/include/acpi/acdispat.h b/xen/include/acpi/acdispat.h
deleted file mode 100644
index 2a09e926cc..0000000000
--- a/xen/include/acpi/acdispat.h
+++ /dev/null
@@ -1,513 +0,0 @@
-/******************************************************************************
- *
- * Name: acdispat.h - dispatcher (parser to interpreter interface)
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-
-#ifndef _ACDISPAT_H_
-#define _ACDISPAT_H_
-
-
-#define NAMEOF_LOCAL_NTE "__L0"
-#define NAMEOF_ARG_NTE "__A0"
-
-
-/* Common interfaces */
-
-acpi_status
-acpi_ds_obj_stack_push (
- void *object,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_obj_stack_pop (
- u32 pop_count,
- struct acpi_walk_state *walk_state);
-
-void *
-acpi_ds_obj_stack_get_value (
- u32 index,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_obj_stack_pop_object (
- union acpi_operand_object **object,
- struct acpi_walk_state *walk_state);
-
-
-/* dsopcode - support for late evaluation */
-
-acpi_status
-acpi_ds_execute_arguments (
- struct acpi_namespace_node *node,
- struct acpi_namespace_node *scope_node,
- u32 aml_length,
- u8 *aml_start);
-
-acpi_status
-acpi_ds_get_buffer_field_arguments (
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ds_get_region_arguments (
- union acpi_operand_object *rgn_desc);
-
-acpi_status
-acpi_ds_get_buffer_arguments (
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ds_get_package_arguments (
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ds_init_buffer_field (
- u16 aml_opcode,
- union acpi_operand_object *obj_desc,
- union acpi_operand_object *buffer_desc,
- union acpi_operand_object *offset_desc,
- union acpi_operand_object *length_desc,
- union acpi_operand_object *result_desc);
-
-acpi_status
-acpi_ds_eval_buffer_field_operands (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ds_eval_region_operands (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ds_eval_data_object_operands (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ds_initialize_region (
- acpi_handle obj_handle);
-
-
-/* dsctrl - Parser/Interpreter interface, control stack routines */
-
-
-acpi_status
-acpi_ds_exec_begin_control_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ds_exec_end_control_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-
-/* dsexec - Parser/Interpreter interface, method execution callbacks */
-
-
-acpi_status
-acpi_ds_get_predicate_value (
- struct acpi_walk_state *walk_state,
- union acpi_operand_object *result_obj);
-
-acpi_status
-acpi_ds_exec_begin_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object **out_op);
-
-acpi_status
-acpi_ds_exec_end_op (
- struct acpi_walk_state *state);
-
-
-/* dsfield - Parser/Interpreter interface for AML fields */
-
-acpi_status
-acpi_ds_get_field_names (
- struct acpi_create_field_info *info,
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *arg);
-
-acpi_status
-acpi_ds_create_field (
- union acpi_parse_object *op,
- struct acpi_namespace_node *region_node,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_create_bank_field (
- union acpi_parse_object *op,
- struct acpi_namespace_node *region_node,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_create_index_field (
- union acpi_parse_object *op,
- struct acpi_namespace_node *region_node,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_create_buffer_field (
- union acpi_parse_object *op,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_init_field_objects (
- union acpi_parse_object *op,
- struct acpi_walk_state *walk_state);
-
-
-/* dsload - Parser/Interpreter interface, namespace load callbacks */
-
-acpi_status
-acpi_ds_load1_begin_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object **out_op);
-
-acpi_status
-acpi_ds_load1_end_op (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_load2_begin_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object **out_op);
-
-acpi_status
-acpi_ds_load2_end_op (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_init_callbacks (
- struct acpi_walk_state *walk_state,
- u32 pass_number);
-
-
-/* dsmthdat - method data (locals/args) */
-
-
-acpi_status
-acpi_ds_store_object_to_local (
- u16 opcode,
- u32 index,
- union acpi_operand_object *src_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_method_data_get_entry (
- u16 opcode,
- u32 index,
- struct acpi_walk_state *walk_state,
- union acpi_operand_object ***node);
-
-void
-acpi_ds_method_data_delete_all (
- struct acpi_walk_state *walk_state);
-
-u8
-acpi_ds_is_method_value (
- union acpi_operand_object *obj_desc);
-
-acpi_object_type
-acpi_ds_method_data_get_type (
- u16 opcode,
- u32 index,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_method_data_get_value (
- u16 opcode,
- u32 index,
- struct acpi_walk_state *walk_state,
- union acpi_operand_object **dest_desc);
-
-void
-acpi_ds_method_data_delete_value (
- u16 opcode,
- u32 index,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_method_data_init_args (
- union acpi_operand_object **params,
- u32 max_param_count,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_method_data_get_node (
- u16 opcode,
- u32 index,
- struct acpi_walk_state *walk_state,
- struct acpi_namespace_node **node);
-
-void
-acpi_ds_method_data_init (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_method_data_set_value (
- u16 opcode,
- u32 index,
- union acpi_operand_object *object,
- struct acpi_walk_state *walk_state);
-
-
-/* dsmethod - Parser/Interpreter interface - control method parsing */
-
-acpi_status
-acpi_ds_parse_method (
- acpi_handle obj_handle);
-
-acpi_status
-acpi_ds_call_control_method (
- struct acpi_thread_state *thread,
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ds_restart_control_method (
- struct acpi_walk_state *walk_state,
- union acpi_operand_object *return_desc);
-
-acpi_status
-acpi_ds_terminate_control_method (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_begin_method_execution (
- struct acpi_namespace_node *method_node,
- union acpi_operand_object *obj_desc,
- struct acpi_namespace_node *calling_method_node);
-
-
-/* dsobj - Parser/Interpreter interface - object initialization and conversion */
-
-acpi_status
-acpi_ds_init_one_object (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_ds_initialize_objects (
- struct acpi_table_desc *table_desc,
- struct acpi_namespace_node *start_node);
-
-acpi_status
-acpi_ds_build_internal_buffer_obj (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- u32 buffer_length,
- union acpi_operand_object **obj_desc_ptr);
-
-acpi_status
-acpi_ds_build_internal_package_obj (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- u32 package_length,
- union acpi_operand_object **obj_desc);
-
-acpi_status
-acpi_ds_build_internal_object (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- union acpi_operand_object **obj_desc_ptr);
-
-acpi_status
-acpi_ds_init_object_from_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- u16 opcode,
- union acpi_operand_object **obj_desc);
-
-acpi_status
-acpi_ds_create_node (
- struct acpi_walk_state *walk_state,
- struct acpi_namespace_node *node,
- union acpi_parse_object *op);
-
-
-/* dsutils - Parser/Interpreter interface utility routines */
-
-u8
-acpi_ds_is_result_used (
- union acpi_parse_object *op,
- struct acpi_walk_state *walk_state);
-
-void
-acpi_ds_delete_result_if_not_used (
- union acpi_parse_object *op,
- union acpi_operand_object *result_obj,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_create_operand (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *arg,
- u32 args_remaining);
-
-acpi_status
-acpi_ds_create_operands (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *first_arg);
-
-acpi_status
-acpi_ds_resolve_operands (
- struct acpi_walk_state *walk_state);
-
-void
-acpi_ds_clear_operands (
- struct acpi_walk_state *walk_state);
-
-
-/*
- * dswscope - Scope Stack manipulation
- */
-
-acpi_status
-acpi_ds_scope_stack_push (
- struct acpi_namespace_node *node,
- acpi_object_type type,
- struct acpi_walk_state *walk_state);
-
-
-acpi_status
-acpi_ds_scope_stack_pop (
- struct acpi_walk_state *walk_state);
-
-void
-acpi_ds_scope_stack_clear (
- struct acpi_walk_state *walk_state);
-
-
-/* dswstate - parser WALK_STATE management routines */
-
-struct acpi_walk_state *
-acpi_ds_create_walk_state (
- acpi_owner_id owner_id,
- union acpi_parse_object *origin,
- union acpi_operand_object *mth_desc,
- struct acpi_thread_state *thread);
-
-acpi_status
-acpi_ds_init_aml_walk (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- struct acpi_namespace_node *method_node,
- u8 *aml_start,
- u32 aml_length,
- union acpi_operand_object **params,
- union acpi_operand_object **return_obj_desc,
- u32 pass_number);
-
-acpi_status
-acpi_ds_obj_stack_delete_all (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_obj_stack_pop_and_delete (
- u32 pop_count,
- struct acpi_walk_state *walk_state);
-
-void
-acpi_ds_delete_walk_state (
- struct acpi_walk_state *walk_state);
-
-struct acpi_walk_state *
-acpi_ds_pop_walk_state (
- struct acpi_thread_state *thread);
-
-void
-acpi_ds_push_walk_state (
- struct acpi_walk_state *walk_state,
- struct acpi_thread_state *thread);
-
-acpi_status
-acpi_ds_result_stack_pop (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_stack_push (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_stack_clear (
- struct acpi_walk_state *walk_state);
-
-struct acpi_walk_state *
-acpi_ds_get_current_walk_state (
- struct acpi_thread_state *thread);
-
-void
-acpi_ds_delete_walk_state_cache (
- void);
-
-acpi_status
-acpi_ds_result_insert (
- void *object,
- u32 index,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_remove (
- union acpi_operand_object **object,
- u32 index,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_pop (
- union acpi_operand_object **object,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_push (
- union acpi_operand_object *object,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ds_result_pop_from_bottom (
- union acpi_operand_object **object,
- struct acpi_walk_state *walk_state);
-
-#endif /* _ACDISPAT_H_ */
diff --git a/xen/include/acpi/acevents.h b/xen/include/acpi/acevents.h
deleted file mode 100644
index 4ab99b2fd6..0000000000
--- a/xen/include/acpi/acevents.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/******************************************************************************
- *
- * Name: acevents.h - Event subcomponent prototypes and defines
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACEVENTS_H__
-#define __ACEVENTS_H__
-
-
-acpi_status
-acpi_ev_initialize (
- void);
-
-acpi_status
-acpi_ev_handler_initialize (
- void);
-
-
-/*
- * Evfixed - Fixed event handling
- */
-
-acpi_status
-acpi_ev_fixed_event_initialize (
- void);
-
-u32
-acpi_ev_fixed_event_detect (
- void);
-
-u32
-acpi_ev_fixed_event_dispatch (
- u32 event);
-
-
-/*
- * Evmisc
- */
-
-u8
-acpi_ev_is_notify_object (
- struct acpi_namespace_node *node);
-
-acpi_status
-acpi_ev_acquire_global_lock(
- u16 timeout);
-
-acpi_status
-acpi_ev_release_global_lock(
- void);
-
-acpi_status
-acpi_ev_init_global_lock_handler (
- void);
-
-u32
-acpi_ev_get_gpe_number_index (
- u32 gpe_number);
-
-acpi_status
-acpi_ev_queue_notify_request (
- struct acpi_namespace_node *node,
- u32 notify_value);
-
-void ACPI_SYSTEM_XFACE
-acpi_ev_notify_dispatch (
- void *context);
-
-
-/*
- * Evgpe - GPE handling and dispatch
- */
-
-acpi_status
-acpi_ev_walk_gpe_list (
- ACPI_GPE_CALLBACK gpe_walk_callback);
-
-u8
-acpi_ev_valid_gpe_event (
- struct acpi_gpe_event_info *gpe_event_info);
-
-struct acpi_gpe_event_info *
-acpi_ev_get_gpe_event_info (
- acpi_handle gpe_device,
- u32 gpe_number);
-
-acpi_status
-acpi_ev_gpe_initialize (
- void);
-
-acpi_status
-acpi_ev_create_gpe_block (
- struct acpi_namespace_node *gpe_device,
- struct acpi_generic_address *gpe_block_address,
- u32 register_count,
- u8 gpe_block_base_number,
- u32 interrupt_level,
- struct acpi_gpe_block_info **return_gpe_block);
-
-acpi_status
-acpi_ev_delete_gpe_block (
- struct acpi_gpe_block_info *gpe_block);
-
-u32
-acpi_ev_gpe_dispatch (
- struct acpi_gpe_event_info *gpe_event_info,
- u32 gpe_number);
-
-u32
-acpi_ev_gpe_detect (
- struct acpi_gpe_xrupt_info *gpe_xrupt_list);
-
-/*
- * Evregion - Address Space handling
- */
-
-acpi_status
-acpi_ev_init_address_spaces (
- void);
-
-acpi_status
-acpi_ev_address_space_dispatch (
- union acpi_operand_object *region_obj,
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- void *value);
-
-acpi_status
-acpi_ev_install_handler (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_ev_attach_region (
- union acpi_operand_object *handler_obj,
- union acpi_operand_object *region_obj,
- u8 acpi_ns_is_locked);
-
-void
-acpi_ev_detach_region (
- union acpi_operand_object *region_obj,
- u8 acpi_ns_is_locked);
-
-acpi_status
-acpi_ev_execute_reg_method (
- union acpi_operand_object *region_obj,
- u32 function);
-
-acpi_status
-acpi_ev_reg_run (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-/*
- * Evregini - Region initialization and setup
- */
-
-acpi_status
-acpi_ev_system_memory_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_io_space_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_pci_config_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_cmos_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_pci_bar_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_default_region_setup (
- acpi_handle handle,
- u32 function,
- void *handler_context,
- void **region_context);
-
-acpi_status
-acpi_ev_initialize_region (
- union acpi_operand_object *region_obj,
- u8 acpi_ns_locked);
-
-
-/*
- * Evsci - SCI (System Control Interrupt) handling/dispatch
- */
-
-u32 ACPI_SYSTEM_XFACE
-acpi_ev_gpe_xrupt_handler (
- void *context);
-
-u32
-acpi_ev_install_sci_handler (
- void);
-
-acpi_status
-acpi_ev_remove_sci_handler (
- void);
-
-u32
-acpi_ev_initialize_sCI (
- u32 program_sCI);
-
-void
-acpi_ev_terminate (
- void);
-
-
-#endif /* __ACEVENTS_H__ */
diff --git a/xen/include/acpi/acexcep.h b/xen/include/acpi/acexcep.h
index 9441f5c1a7..53f8b50fac 100644
--- a/xen/include/acpi/acexcep.h
+++ b/xen/include/acpi/acexcep.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,8 +95,9 @@
#define AE_LOGICAL_ADDRESS (acpi_status) (0x001B | AE_CODE_ENVIRONMENTAL)
#define AE_ABORT_METHOD (acpi_status) (0x001C | AE_CODE_ENVIRONMENTAL)
#define AE_SAME_HANDLER (acpi_status) (0x001D | AE_CODE_ENVIRONMENTAL)
+#define AE_WAKE_ONLY_GPE (acpi_status) (0x001E | AE_CODE_ENVIRONMENTAL)
-#define AE_CODE_ENV_MAX 0x001D
+#define AE_CODE_ENV_MAX 0x001E
/*
* Programmer exceptions
@@ -165,7 +166,7 @@
#define AE_AML_CIRCULAR_REFERENCE (acpi_status) (0x0020 | AE_CODE_AML)
#define AE_AML_BAD_RESOURCE_LENGTH (acpi_status) (0x0021 | AE_CODE_AML)
-#define AE_CODE_AML_MAX 0x0020
+#define AE_CODE_AML_MAX 0x0021
/*
* Internal exceptions used for control
@@ -222,7 +223,8 @@ char const *acpi_gbl_exception_names_env[] =
"AE_NO_GLOBAL_LOCK",
"AE_LOGICAL_ADDRESS",
"AE_ABORT_METHOD",
- "AE_SAME_HANDLER"
+ "AE_SAME_HANDLER",
+ "AE_WAKE_ONLY_GPE"
};
char const *acpi_gbl_exception_names_pgm[] =
diff --git a/xen/include/acpi/acglobal.h b/xen/include/acpi/acglobal.h
index 9cf587e950..c7f387a972 100644
--- a/xen/include/acpi/acglobal.h
+++ b/xen/include/acpi/acglobal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,17 +46,26 @@
/*
- * Ensure that the globals are actually defined only once.
+ * Ensure that the globals are actually defined and initialized only once.
*
- * The use of these defines allows a single list of globals (here) in order
+ * The use of these macros allows a single list of globals (here) in order
* to simplify maintenance of the code.
*/
#ifdef DEFINE_ACPI_GLOBALS
#define ACPI_EXTERN
+#define ACPI_INIT_GLOBAL(a,b) a=b
#else
#define ACPI_EXTERN extern
+#define ACPI_INIT_GLOBAL(a,b) a
#endif
+/*
+ * Keep local copies of these FADT-based registers. NOTE: These globals
+ * are first in this file for alignment reasons on 64-bit systems.
+ */
+ACPI_EXTERN struct acpi_generic_address acpi_gbl_xpm1a_enable;
+ACPI_EXTERN struct acpi_generic_address acpi_gbl_xpm1b_enable;
+
/*****************************************************************************
*
@@ -76,6 +85,46 @@ extern u32 acpi_gbl_nesting_level;
/*****************************************************************************
*
+ * Runtime configuration (static defaults that can be overriden at runtime)
+ *
+ ****************************************************************************/
+
+/*
+ * Enable "slack" in the AML interpreter? Default is FALSE, and the
+ * interpreter strictly follows the ACPI specification. Setting to TRUE
+ * allows the interpreter to forgive certain bad AML constructs. Currently:
+ * 1) Allow "implicit return" of last value in a control method
+ * 2) Allow access beyond end of operation region
+ * 3) Allow access to uninitialized locals/args (auto-init to integer 0)
+ * 4) Allow ANY object type to be a source operand for the Store() operator
+ */
+ACPI_EXTERN u8 ACPI_INIT_GLOBAL (acpi_gbl_enable_interpreter_slack, FALSE);
+
+/*
+ * Automatically serialize ALL control methods? Default is FALSE, meaning
+ * to use the Serialized/not_serialized method flags on a per method basis.
+ * Only change this if the ASL code is poorly written and cannot handle
+ * reentrancy even though methods are marked "not_serialized".
+ */
+ACPI_EXTERN u8 ACPI_INIT_GLOBAL (acpi_gbl_all_methods_serialized, FALSE);
+
+/*
+ * Create the predefined _OSI method in the namespace? Default is TRUE
+ * because ACPI CA is fully compatible with other ACPI implementations.
+ * Changing this will revert ACPI CA (and machine ASL) to pre-OSI behavior.
+ */
+ACPI_EXTERN u8 ACPI_INIT_GLOBAL (acpi_gbl_create_osi_method, TRUE);
+
+/*
+ * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and
+ * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only
+ * be enabled just before going to sleep.
+ */
+ACPI_EXTERN u8 ACPI_INIT_GLOBAL (acpi_gbl_leave_wake_gpes_disabled, TRUE);
+
+
+/*****************************************************************************
+ *
* ACPI Table globals
*
****************************************************************************/
@@ -87,7 +136,6 @@ extern u32 acpi_gbl_nesting_level;
*
* These tables are single-table only; meaning that there can be at most one
* of each in the system. Each global points to the actual table.
- *
*/
ACPI_EXTERN u32 acpi_gbl_table_flags;
ACPI_EXTERN u32 acpi_gbl_rsdt_table_count;
@@ -97,6 +145,11 @@ ACPI_EXTERN FADT_DESCRIPTOR *acpi_gbl_FADT;
ACPI_EXTERN struct acpi_table_header *acpi_gbl_DSDT;
ACPI_EXTERN FACS_DESCRIPTOR *acpi_gbl_FACS;
ACPI_EXTERN struct acpi_common_facs acpi_gbl_common_fACS;
+/*
+ * Since there may be multiple SSDTs and PSDTS, a single pointer is not
+ * sufficient; Therefore, there isn't one!
+ */
+
/*
* Handle both ACPI 1.0 and ACPI 2.0 Integer widths
@@ -107,17 +160,6 @@ ACPI_EXTERN u8 acpi_gbl_integer_bit_width;
ACPI_EXTERN u8 acpi_gbl_integer_byte_width;
ACPI_EXTERN u8 acpi_gbl_integer_nybble_width;
-/* Keep local copies of these FADT-based registers */
-
-ACPI_EXTERN struct acpi_generic_address acpi_gbl_xpm1a_enable;
-ACPI_EXTERN struct acpi_generic_address acpi_gbl_xpm1b_enable;
-
-/*
- * Since there may be multiple SSDTs and PSDTS, a single pointer is not
- * sufficient; Therefore, there isn't one!
- */
-
-
/*
* ACPI Table info arrays
*/
@@ -142,6 +184,7 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[NUM_MUTEX];
ACPI_EXTERN struct acpi_memory_list acpi_gbl_memory_lists[ACPI_NUM_MEM_LISTS];
ACPI_EXTERN struct acpi_object_notify_handler acpi_gbl_device_notify;
ACPI_EXTERN struct acpi_object_notify_handler acpi_gbl_system_notify;
+ACPI_EXTERN acpi_exception_handler acpi_gbl_exception_handler;
ACPI_EXTERN acpi_init_handler acpi_gbl_init_handler;
ACPI_EXTERN struct acpi_walk_state *acpi_gbl_breakpoint_walk;
ACPI_EXTERN acpi_handle acpi_gbl_global_lock_semaphore;
@@ -161,13 +204,16 @@ ACPI_EXTERN u8 acpi_gbl_step_to_next_call;
ACPI_EXTERN u8 acpi_gbl_acpi_hardware_present;
ACPI_EXTERN u8 acpi_gbl_global_lock_present;
ACPI_EXTERN u8 acpi_gbl_events_initialized;
+ACPI_EXTERN u8 acpi_gbl_system_awake_and_running;
extern u8 acpi_gbl_shutdown;
extern u32 acpi_gbl_startup_flags;
extern const u8 acpi_gbl_decode_to8bit[8];
-extern const char *acpi_gbl_db_sleep_states[ACPI_S_STATE_COUNT];
+extern const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT];
+extern const char *acpi_gbl_highest_dstate_names[4];
extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
+extern const char *acpi_gbl_valid_osi_strings[ACPI_NUM_OSI_STRINGS];
/*****************************************************************************
@@ -178,7 +224,7 @@ extern const char *acpi_gbl_region_types[ACPI_NUM_P
#define NUM_NS_TYPES ACPI_TYPE_INVALID+1
-#if defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY)
+#if !defined (ACPI_NO_METHOD_EXECUTION) || defined (ACPI_CONSTANT_EVAL_ONLY)
#define NUM_PREDEFINED_NAMES 10
#else
#define NUM_PREDEFINED_NAMES 9
@@ -186,6 +232,7 @@ extern const char *acpi_gbl_region_types[ACPI_NUM_P
ACPI_EXTERN struct acpi_namespace_node acpi_gbl_root_node_struct;
ACPI_EXTERN struct acpi_namespace_node *acpi_gbl_root_node;
+ACPI_EXTERN struct acpi_namespace_node *acpi_gbl_fadt_gpe_device;
extern const u8 acpi_gbl_ns_properties[NUM_NS_TYPES];
extern const struct acpi_predefined_names acpi_gbl_pre_defined_names [NUM_PREDEFINED_NAMES];
diff --git a/xen/include/acpi/achware.h b/xen/include/acpi/achware.h
index cfcff91f6d..28ad1398c1 100644
--- a/xen/include/acpi/achware.h
+++ b/xen/include/acpi/achware.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,15 +114,7 @@ acpi_hw_clear_acpi_status (
/* GPE support */
acpi_status
-acpi_hw_enable_gpe (
- struct acpi_gpe_event_info *gpe_event_info);
-
-void
-acpi_hw_enable_gpe_for_wakeup (
- struct acpi_gpe_event_info *gpe_event_info);
-
-acpi_status
-acpi_hw_disable_gpe (
+acpi_hw_write_gpe_enable_reg (
struct acpi_gpe_event_info *gpe_event_info);
acpi_status
@@ -130,10 +122,6 @@ acpi_hw_disable_gpe_block (
struct acpi_gpe_xrupt_info *gpe_xrupt_info,
struct acpi_gpe_block_info *gpe_block);
-void
-acpi_hw_disable_gpe_for_wakeup (
- struct acpi_gpe_event_info *gpe_event_info);
-
acpi_status
acpi_hw_clear_gpe (
struct acpi_gpe_event_info *gpe_event_info);
@@ -143,22 +131,39 @@ acpi_hw_clear_gpe_block (
struct acpi_gpe_xrupt_info *gpe_xrupt_info,
struct acpi_gpe_block_info *gpe_block);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_hw_get_gpe_status (
struct acpi_gpe_event_info *gpe_event_info,
acpi_event_status *event_status);
+#endif
acpi_status
-acpi_hw_disable_non_wakeup_gpes (
- void);
+acpi_hw_disable_all_gpes (
+ u32 flags);
acpi_status
-acpi_hw_enable_non_wakeup_gpes (
- void);
+acpi_hw_enable_all_runtime_gpes (
+ u32 flags);
+
+acpi_status
+acpi_hw_enable_all_wakeup_gpes (
+ u32 flags);
+
+acpi_status
+acpi_hw_enable_runtime_gpe_block (
+ struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+ struct acpi_gpe_block_info *gpe_block);
+
+acpi_status
+acpi_hw_enable_wakeup_gpe_block (
+ struct acpi_gpe_xrupt_info *gpe_xrupt_info,
+ struct acpi_gpe_block_info *gpe_block);
/* ACPI Timer prototypes */
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_timer_resolution (
u32 *resolution);
@@ -172,6 +177,6 @@ acpi_get_timer_duration (
u32 start_ticks,
u32 end_ticks,
u32 *time_elapsed);
-
+#endif /* ACPI_FUTURE_USAGE */
#endif /* __ACHWARE_H__ */
diff --git a/xen/include/acpi/acinterp.h b/xen/include/acpi/acinterp.h
deleted file mode 100644
index 0bcb194c0f..0000000000
--- a/xen/include/acpi/acinterp.h
+++ /dev/null
@@ -1,738 +0,0 @@
-/******************************************************************************
- *
- * Name: acinterp.h - Interpreter subcomponent prototypes and defines
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACINTERP_H__
-#define __ACINTERP_H__
-
-
-#define ACPI_WALK_OPERANDS (&(walk_state->operands [walk_state->num_operands -1]))
-
-
-acpi_status
-acpi_ex_resolve_operands (
- u16 opcode,
- union acpi_operand_object **stack_ptr,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_check_object_type (
- acpi_object_type type_needed,
- acpi_object_type this_type,
- void *object);
-
-/*
- * exxface - External interpreter interfaces
- */
-
-acpi_status
-acpi_ex_load_table (
- acpi_table_type table_id);
-
-acpi_status
-acpi_ex_execute_method (
- struct acpi_namespace_node *method_node,
- union acpi_operand_object **params,
- union acpi_operand_object **return_obj_desc);
-
-
-/*
- * exconvrt - object conversion
- */
-
-acpi_status
-acpi_ex_convert_to_integer (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **result_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_convert_to_buffer (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **result_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_convert_to_string (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **result_desc,
- u32 base,
- u32 max_length,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_convert_to_target_type (
- acpi_object_type destination_type,
- union acpi_operand_object *source_desc,
- union acpi_operand_object **result_desc,
- struct acpi_walk_state *walk_state);
-
-u32
-acpi_ex_convert_to_ascii (
- acpi_integer integer,
- u32 base,
- u8 *string,
- u8 max_length);
-
-/*
- * exfield - ACPI AML (p-code) execution - field manipulation
- */
-
-acpi_status
-acpi_ex_extract_from_field (
- union acpi_operand_object *obj_desc,
- void *buffer,
- u32 buffer_length);
-
-acpi_status
-acpi_ex_insert_into_field (
- union acpi_operand_object *obj_desc,
- void *buffer,
- u32 buffer_length);
-
-acpi_status
-acpi_ex_setup_region (
- union acpi_operand_object *obj_desc,
- u32 field_datum_byte_offset);
-
-acpi_status
-acpi_ex_access_region (
- union acpi_operand_object *obj_desc,
- u32 field_datum_byte_offset,
- acpi_integer *value,
- u32 read_write);
-
-u8
-acpi_ex_register_overflow (
- union acpi_operand_object *obj_desc,
- acpi_integer value);
-
-acpi_status
-acpi_ex_field_datum_io (
- union acpi_operand_object *obj_desc,
- u32 field_datum_byte_offset,
- acpi_integer *value,
- u32 read_write);
-
-acpi_status
-acpi_ex_write_with_update_rule (
- union acpi_operand_object *obj_desc,
- acpi_integer mask,
- acpi_integer field_value,
- u32 field_datum_byte_offset);
-
-void
-acpi_ex_get_buffer_datum(
- acpi_integer *datum,
- void *buffer,
- u32 buffer_length,
- u32 byte_granularity,
- u32 buffer_offset);
-
-void
-acpi_ex_set_buffer_datum (
- acpi_integer merged_datum,
- void *buffer,
- u32 buffer_length,
- u32 byte_granularity,
- u32 buffer_offset);
-
-acpi_status
-acpi_ex_read_data_from_field (
- struct acpi_walk_state *walk_state,
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **ret_buffer_desc);
-
-acpi_status
-acpi_ex_write_data_to_field (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **result_desc);
-
-/*
- * exmisc - ACPI AML (p-code) execution - specific opcodes
- */
-
-acpi_status
-acpi_ex_opcode_3A_0T_0R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_3A_1T_1R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_6A_0T_1R (
- struct acpi_walk_state *walk_state);
-
-u8
-acpi_ex_do_match (
- u32 match_op,
- acpi_integer package_value,
- acpi_integer match_value);
-
-acpi_status
-acpi_ex_get_object_reference (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object **return_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_resolve_multiple (
- struct acpi_walk_state *walk_state,
- union acpi_operand_object *operand,
- acpi_object_type *return_type,
- union acpi_operand_object **return_desc);
-
-acpi_status
-acpi_ex_concat_template (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object *obj_desc2,
- union acpi_operand_object **actual_return_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_do_concatenate (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object *obj_desc2,
- union acpi_operand_object **actual_return_desc,
- struct acpi_walk_state *walk_state);
-
-u8
-acpi_ex_do_logical_op (
- u16 opcode,
- acpi_integer operand0,
- acpi_integer operand1);
-
-acpi_integer
-acpi_ex_do_math_op (
- u16 opcode,
- acpi_integer operand0,
- acpi_integer operand1);
-
-acpi_status
-acpi_ex_create_mutex (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_processor (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_power_resource (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_region (
- u8 *aml_start,
- u32 aml_length,
- u8 region_space,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_table_region (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_event (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_alias (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_create_method (
- u8 *aml_start,
- u32 aml_length,
- struct acpi_walk_state *walk_state);
-
-
-/*
- * exconfig - dynamic table load/unload
- */
-
-acpi_status
-acpi_ex_add_table (
- struct acpi_table_header *table,
- struct acpi_namespace_node *parent_node,
- union acpi_operand_object **ddb_handle);
-
-acpi_status
-acpi_ex_load_op (
- union acpi_operand_object *obj_desc,
- union acpi_operand_object *target,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_load_table_op (
- struct acpi_walk_state *walk_state,
- union acpi_operand_object **return_desc);
-
-acpi_status
-acpi_ex_unload_table (
- union acpi_operand_object *ddb_handle);
-
-
-/*
- * exmutex - mutex support
- */
-
-acpi_status
-acpi_ex_acquire_mutex (
- union acpi_operand_object *time_desc,
- union acpi_operand_object *obj_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_release_mutex (
- union acpi_operand_object *obj_desc,
- struct acpi_walk_state *walk_state);
-
-void
-acpi_ex_release_all_mutexes (
- struct acpi_thread_state *thread);
-
-void
-acpi_ex_unlink_mutex (
- union acpi_operand_object *obj_desc);
-
-void
-acpi_ex_link_mutex (
- union acpi_operand_object *obj_desc,
- struct acpi_thread_state *thread);
-
-/*
- * exprep - ACPI AML (p-code) execution - prep utilities
- */
-
-acpi_status
-acpi_ex_prep_common_field_object (
- union acpi_operand_object *obj_desc,
- u8 field_flags,
- u8 field_attribute,
- u32 field_bit_position,
- u32 field_bit_length);
-
-acpi_status
-acpi_ex_prep_field_value (
- struct acpi_create_field_info *info);
-
-/*
- * exsystem - Interface to OS services
- */
-
-acpi_status
-acpi_ex_system_do_notify_op (
- union acpi_operand_object *value,
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_do_suspend(
- u32 time);
-
-acpi_status
-acpi_ex_system_do_stall (
- u32 time);
-
-acpi_status
-acpi_ex_system_acquire_mutex(
- union acpi_operand_object *time,
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_release_mutex(
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_signal_event(
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_wait_event(
- union acpi_operand_object *time,
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_reset_event(
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ex_system_wait_semaphore (
- acpi_handle semaphore,
- u16 timeout);
-
-
-/*
- * exmonadic - ACPI AML (p-code) execution, monadic operators
- */
-
-acpi_status
-acpi_ex_opcode_1A_0T_0R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_1A_0T_1R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_1A_1T_1R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_1A_1T_0R (
- struct acpi_walk_state *walk_state);
-
-/*
- * exdyadic - ACPI AML (p-code) execution, dyadic operators
- */
-
-acpi_status
-acpi_ex_opcode_2A_0T_0R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_2A_0T_1R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_2A_1T_1R (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_opcode_2A_2T_1R (
- struct acpi_walk_state *walk_state);
-
-
-/*
- * exresolv - Object resolution and get value functions
- */
-
-acpi_status
-acpi_ex_resolve_to_value (
- union acpi_operand_object **stack_ptr,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_resolve_node_to_value (
- struct acpi_namespace_node **stack_ptr,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_resolve_object_to_value (
- union acpi_operand_object **stack_ptr,
- struct acpi_walk_state *walk_state);
-
-
-/*
- * exdump - Scanner debug output routines
- */
-
-void
-acpi_ex_dump_operand (
- union acpi_operand_object *entry_desc);
-
-void
-acpi_ex_dump_operands (
- union acpi_operand_object **operands,
- acpi_interpreter_mode interpreter_mode,
- char *ident,
- u32 num_levels,
- char *note,
- char *module_name,
- u32 line_number);
-
-void
-acpi_ex_dump_object_descriptor (
- union acpi_operand_object *object,
- u32 flags);
-
-void
-acpi_ex_dump_node (
- struct acpi_namespace_node *node,
- u32 flags);
-
-void
-acpi_ex_out_string (
- char *title,
- char *value);
-
-void
-acpi_ex_out_pointer (
- char *title,
- void *value);
-
-void
-acpi_ex_out_integer (
- char *title,
- u32 value);
-
-void
-acpi_ex_out_address (
- char *title,
- acpi_physical_address value);
-
-
-/*
- * exnames - interpreter/scanner name load/execute
- */
-
-char *
-acpi_ex_allocate_name_string (
- u32 prefix_count,
- u32 num_name_segs);
-
-u32
-acpi_ex_good_char (
- u32 character);
-
-acpi_status
-acpi_ex_name_segment (
- u8 **in_aml_address,
- char *name_string);
-
-acpi_status
-acpi_ex_get_name_string (
- acpi_object_type data_type,
- u8 *in_aml_address,
- char **out_name_string,
- u32 *out_name_length);
-
-acpi_status
-acpi_ex_do_name (
- acpi_object_type data_type,
- acpi_interpreter_mode load_exec_mode);
-
-
-/*
- * exstore - Object store support
- */
-
-acpi_status
-acpi_ex_store (
- union acpi_operand_object *val_desc,
- union acpi_operand_object *dest_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_store_object_to_index (
- union acpi_operand_object *val_desc,
- union acpi_operand_object *dest_desc,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_store_object_to_node (
- union acpi_operand_object *source_desc,
- struct acpi_namespace_node *node,
- struct acpi_walk_state *walk_state);
-
-
-/*
- * exstoren
- */
-
-acpi_status
-acpi_ex_resolve_object (
- union acpi_operand_object **source_desc_ptr,
- acpi_object_type target_type,
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ex_store_object_to_object (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *dest_desc,
- union acpi_operand_object **new_desc,
- struct acpi_walk_state *walk_state);
-
-
-/*
- * excopy - object copy
- */
-
-acpi_status
-acpi_ex_store_buffer_to_buffer (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *target_desc);
-
-acpi_status
-acpi_ex_store_string_to_string (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *target_desc);
-
-acpi_status
-acpi_ex_copy_integer_to_index_field (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *target_desc);
-
-acpi_status
-acpi_ex_copy_integer_to_bank_field (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *target_desc);
-
-acpi_status
-acpi_ex_copy_data_to_named_field (
- union acpi_operand_object *source_desc,
- struct acpi_namespace_node *node);
-
-acpi_status
-acpi_ex_copy_integer_to_buffer_field (
- union acpi_operand_object *source_desc,
- union acpi_operand_object *target_desc);
-
-/*
- * exutils - interpreter/scanner utilities
- */
-
-acpi_status
-acpi_ex_enter_interpreter (
- void);
-
-void
-acpi_ex_exit_interpreter (
- void);
-
-void
-acpi_ex_truncate_for32bit_table (
- union acpi_operand_object *obj_desc);
-
-u8
-acpi_ex_acquire_global_lock (
- u32 rule);
-
-void
-acpi_ex_release_global_lock (
- u8 locked);
-
-u32
-acpi_ex_digits_needed (
- acpi_integer value,
- u32 base);
-
-void
-acpi_ex_eisa_id_to_string (
- u32 numeric_id,
- char *out_string);
-
-void
-acpi_ex_unsigned_integer_to_string (
- acpi_integer value,
- char *out_string);
-
-
-/*
- * exregion - default op_region handlers
- */
-
-acpi_status
-acpi_ex_system_memory_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_system_io_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_pci_config_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_cmos_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_pci_bar_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_embedded_controller_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-acpi_status
-acpi_ex_sm_bus_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-
-acpi_status
-acpi_ex_data_table_space_handler (
- u32 function,
- acpi_physical_address address,
- u32 bit_width,
- acpi_integer *value,
- void *handler_context,
- void *region_context);
-
-#endif /* __INTERP_H__ */
diff --git a/xen/include/acpi/aclocal.h b/xen/include/acpi/aclocal.h
index 62c743bb8e..f2b96f6be4 100644
--- a/xen/include/acpi/aclocal.h
+++ b/xen/include/acpi/aclocal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,7 +53,7 @@ typedef u32 acpi_mutex_handle;
/* Total number of aml opcodes defined */
-#define AML_NUM_OPCODES 0x7E
+#define AML_NUM_OPCODES 0x7F
/*****************************************************************************
@@ -189,8 +189,6 @@ struct acpi_namespace_node
u8 type; /* Type associated with this name */
u16 owner_id;
union acpi_name_union name; /* ACPI Name, always 4 chars per ACPI spec */
-
-
union acpi_operand_object *object; /* Pointer to attached ACPI object (optional) */
struct acpi_namespace_node *child; /* First child */
struct acpi_namespace_node *peer; /* Next peer*/
@@ -211,10 +209,8 @@ struct acpi_namespace_node
#define ANOBJ_METHOD_LOCAL 0x10
#define ANOBJ_METHOD_NO_RETVAL 0x20
#define ANOBJ_METHOD_SOME_NO_RETVAL 0x40
-
#define ANOBJ_IS_BIT_OFFSET 0x80
-
/*
* ACPI Table Descriptor. One per ACPI table
*/
@@ -309,16 +305,31 @@ struct acpi_create_field_info
*
****************************************************************************/
-/* Information about a GPE, one per each GPE in an array */
+/* Dispatch info for each GPE -- either a method or handler, cannot be both */
-struct acpi_gpe_event_info
+struct acpi_handler_info
{
- struct acpi_namespace_node *method_node; /* Method node for this GPE level */
- acpi_gpe_handler handler; /* Address of handler, if any */
+ acpi_event_handler address; /* Address of handler, if any */
void *context; /* Context to be passed to handler */
+ struct acpi_namespace_node *method_node; /* Method node for this GPE level (saved) */
+};
+
+union acpi_gpe_dispatch_info
+{
+ struct acpi_namespace_node *method_node; /* Method node for this GPE level */
+ struct acpi_handler_info *handler;
+};
+
+/*
+ * Information about a GPE, one per each GPE in an array.
+ * NOTE: Important to keep this struct as small as possible.
+ */
+struct acpi_gpe_event_info
+{
+ union acpi_gpe_dispatch_info dispatch; /* Either Method or Handler */
struct acpi_gpe_register_info *register_info; /* Backpointer to register info */
- u8 flags; /* Level or Edge */
- u8 bit_mask; /* This GPE within the register */
+ u8 flags; /* Misc info about this GPE */
+ u8 register_bit; /* This GPE bit within the register */
};
/* Information about a GPE register pair, one per each status/enable pair in an array */
@@ -327,9 +338,8 @@ struct acpi_gpe_register_info
{
struct acpi_generic_address status_address; /* Address of status reg */
struct acpi_generic_address enable_address; /* Address of enable reg */
- u8 status; /* Current value of status reg */
- u8 enable; /* Current value of enable reg */
- u8 wake_enable; /* Mask of bits to keep enabled when sleeping */
+ u8 enable_for_wake; /* GPEs to keep enabled when sleeping */
+ u8 enable_for_run; /* GPEs to keep enabled when running */
u8 base_gpe_number; /* Base GPE number for this register */
};
@@ -339,6 +349,7 @@ struct acpi_gpe_register_info
*/
struct acpi_gpe_block_info
{
+ struct acpi_namespace_node *node;
struct acpi_gpe_block_info *previous;
struct acpi_gpe_block_info *next;
struct acpi_gpe_xrupt_info *xrupt_block; /* Backpointer to interrupt block */
@@ -360,6 +371,13 @@ struct acpi_gpe_xrupt_info
};
+struct acpi_gpe_walk_info
+{
+ struct acpi_namespace_node *gpe_device;
+ struct acpi_gpe_block_info *gpe_block;
+};
+
+
typedef acpi_status (*ACPI_GPE_CALLBACK) (
struct acpi_gpe_xrupt_info *gpe_xrupt_info,
struct acpi_gpe_block_info *gpe_block);
@@ -495,7 +513,7 @@ struct acpi_thread_state
struct acpi_walk_state *walk_state_list; /* Head of list of walk_states for this thread */
union acpi_operand_object *acquired_mutex_list; /* List of all currently acquired mutexes */
u32 thread_id; /* Running thread ID */
- u16 current_sync_level; /* Mutex Sync (nested acquire) level */
+ u8 current_sync_level; /* Mutex Sync (nested acquire) level */
};
@@ -844,7 +862,6 @@ struct acpi_bit_register_info
/*
* Large resource descriptor types
*/
-
#define ACPI_RDESC_TYPE_MEMORY_24 0x81
#define ACPI_RDESC_TYPE_GENERAL_REGISTER 0x82
#define ACPI_RDESC_TYPE_LARGE_VENDOR 0x84
@@ -854,6 +871,7 @@ struct acpi_bit_register_info
#define ACPI_RDESC_TYPE_WORD_ADDRESS_SPACE 0x88
#define ACPI_RDESC_TYPE_EXTENDED_XRUPT 0x89
#define ACPI_RDESC_TYPE_QWORD_ADDRESS_SPACE 0x8A
+#define ACPI_RDESC_TYPE_EXTENDED_ADDRESS_SPACE 0x8B
/*****************************************************************************
diff --git a/xen/include/acpi/acmacros.h b/xen/include/acpi/acmacros.h
index e1abf964bf..fcaced16b1 100644
--- a/xen/include/acpi/acmacros.h
+++ b/xen/include/acpi/acmacros.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,6 +53,10 @@
#define ACPI_LOBYTE(l) ((u8)(u16)(l))
#define ACPI_HIBYTE(l) ((u8)((((u16)(l)) >> 8) & 0xFF))
+#define ACPI_SET_BIT(target,bit) ((target) |= (bit))
+#define ACPI_CLEAR_BIT(target,bit) ((target) &= ~(bit))
+#define ACPI_MIN(a,b) (((a)<(b))?(a):(b))
+
#if ACPI_MACHINE_WIDTH == 16
@@ -97,7 +101,7 @@
* printf() format helpers
*/
-/* Split 64-bit integer into two 32-bit values. use with %8,8_x%8.8X */
+/* Split 64-bit integer into two 32-bit values. Use with %8.8X%8.8X */
#define ACPI_FORMAT_UINT64(i) ACPI_HIDWORD(i),ACPI_LODWORD(i)
@@ -361,24 +365,6 @@
#define ACPI_IS_OCTAL_DIGIT(d) (((char)(d) >= '0') && ((char)(d) <= '7'))
-/* Macros for GAS addressing */
-
-#if ACPI_MACHINE_WIDTH != 16
-
-#define ACPI_PCI_DEVICE(a) (u16) ((ACPI_HIDWORD ((a))) & 0x0000FFFF)
-#define ACPI_PCI_FUNCTION(a) (u16) ((ACPI_LODWORD ((a))) >> 16)
-#define ACPI_PCI_REGISTER(a) (u16) ((ACPI_LODWORD ((a))) & 0x0000FFFF)
-
-#else
-
-/* No support for GAS and PCI IDs in 16-bit mode */
-
-#define ACPI_PCI_FUNCTION(a) (u16) ((a) & 0xFFFF0000)
-#define ACPI_PCI_DEVICE(a) (u16) ((a) & 0x0000FFFF)
-#define ACPI_PCI_REGISTER(a) (u16) ((a) & 0x0000FFFF)
-
-#endif
-
/* Bitfields within ACPI registers */
@@ -502,19 +488,19 @@
* The first parameter should be the procedure name as a quoted string. This is declared
* as a local string ("_proc_name) so that it can be also used by the function exit macros below.
*/
-#define ACPI_FUNCTION_NAME(a) struct acpi_debug_print_info _dbg; \
- _dbg.component_id = _COMPONENT; \
- _dbg.proc_name = a; \
- _dbg.module_name = _THIS_MODULE;
+#define ACPI_FUNCTION_NAME(a) struct acpi_debug_print_info _debug_info; \
+ _debug_info.component_id = _COMPONENT; \
+ _debug_info.proc_name = a; \
+ _debug_info.module_name = _THIS_MODULE;
#define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \
- acpi_ut_trace(__LINE__,&_dbg)
+ acpi_ut_trace(__LINE__,&_debug_info)
#define ACPI_FUNCTION_TRACE_PTR(a,b) ACPI_FUNCTION_NAME(a) \
- acpi_ut_trace_ptr(__LINE__,&_dbg,(void *)b)
+ acpi_ut_trace_ptr(__LINE__,&_debug_info,(void *)b)
#define ACPI_FUNCTION_TRACE_U32(a,b) ACPI_FUNCTION_NAME(a) \
- acpi_ut_trace_u32(__LINE__,&_dbg,(u32)b)
+ acpi_ut_trace_u32(__LINE__,&_debug_info,(u32)b)
#define ACPI_FUNCTION_TRACE_STR(a,b) ACPI_FUNCTION_NAME(a) \
- acpi_ut_trace_str(__LINE__,&_dbg,(char *)b)
+ acpi_ut_trace_str(__LINE__,&_debug_info,(char *)b)
#define ACPI_FUNCTION_ENTRY() acpi_ut_track_stack_ptr()
@@ -531,10 +517,10 @@
#define ACPI_DO_WHILE0(a) a
#endif
-#define return_VOID ACPI_DO_WHILE0 ({acpi_ut_exit(__LINE__,&_dbg);return;})
-#define return_ACPI_STATUS(s) ACPI_DO_WHILE0 ({acpi_ut_status_exit(__LINE__,&_dbg,(s));return((s));})
-#define return_VALUE(s) ACPI_DO_WHILE0 ({acpi_ut_value_exit(__LINE__,&_dbg,(acpi_integer)(s));return((s));})
-#define return_PTR(s) ACPI_DO_WHILE0 ({acpi_ut_ptr_exit(__LINE__,&_dbg,(u8 *)(s));return((s));})
+#define return_VOID ACPI_DO_WHILE0 ({acpi_ut_exit(__LINE__,&_debug_info);return;})
+#define return_ACPI_STATUS(s) ACPI_DO_WHILE0 ({acpi_ut_status_exit(__LINE__,&_debug_info,(s));return((s));})
+#define return_VALUE(s) ACPI_DO_WHILE0 ({acpi_ut_value_exit(__LINE__,&_debug_info,(acpi_integer)(s));return((s));})
+#define return_PTR(s) ACPI_DO_WHILE0 ({acpi_ut_ptr_exit(__LINE__,&_debug_info,(u8 *)(s));return((s));})
/* Conditional execution */
@@ -548,12 +534,16 @@
/* Stack and buffer dumping */
-#define ACPI_DUMP_STACK_ENTRY(a) acpi_ex_dump_operand(a)
+#define ACPI_DUMP_STACK_ENTRY(a) acpi_ex_dump_operand((a),0)
#define ACPI_DUMP_OPERANDS(a,b,c,d,e) acpi_ex_dump_operands(a,b,c,d,e,_THIS_MODULE,__LINE__)
#define ACPI_DUMP_ENTRY(a,b) acpi_ns_dump_entry (a,b)
+
+#ifdef ACPI_FUTURE_USAGE
#define ACPI_DUMP_TABLES(a,b) acpi_ns_dump_tables(a,b)
+#endif
+
#define ACPI_DUMP_PATHNAME(a,b,c,d) acpi_ns_dump_pathname(a,b,c,d)
#define ACPI_DUMP_RESOURCE_LIST(a) acpi_rs_dump_resource_list(a)
#define ACPI_DUMP_BUFFER(a,b) acpi_ut_dump_buffer((u8 *)a,b,DB_BYTE_DISPLAY,_COMPONENT)
@@ -606,7 +596,11 @@
#define ACPI_DUMP_STACK_ENTRY(a)
#define ACPI_DUMP_OPERANDS(a,b,c,d,e)
#define ACPI_DUMP_ENTRY(a,b)
+
+#ifdef ACPI_FUTURE_USAGE
#define ACPI_DUMP_TABLES(a,b)
+#endif
+
#define ACPI_DUMP_PATHNAME(a,b,c,d)
#define ACPI_DUMP_RESOURCE_LIST(a)
#define ACPI_DUMP_BUFFER(a,b)
@@ -681,7 +675,4 @@
#endif /* ACPI_DBG_TRACK_ALLOCATIONS */
-
-#define ACPI_GET_STACK_POINTER _asm {mov eax, ebx}
-
#endif /* ACMACROS_H */
diff --git a/xen/include/acpi/acnamesp.h b/xen/include/acpi/acnamesp.h
deleted file mode 100644
index de54ba7ba6..0000000000
--- a/xen/include/acpi/acnamesp.h
+++ /dev/null
@@ -1,513 +0,0 @@
-/******************************************************************************
- *
- * Name: acnamesp.h - Namespace subcomponent prototypes and defines
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACNAMESP_H__
-#define __ACNAMESP_H__
-
-
-/* To search the entire name space, pass this as search_base */
-
-#define ACPI_NS_ALL ((acpi_handle)0)
-
-/*
- * Elements of acpi_ns_properties are bit significant
- * and should be one-to-one with values of acpi_object_type
- */
-#define ACPI_NS_NORMAL 0
-#define ACPI_NS_NEWSCOPE 1 /* a definition of this type opens a name scope */
-#define ACPI_NS_LOCAL 2 /* suppress search of enclosing scopes */
-
-
-/* Definitions of the predefined namespace names */
-
-#define ACPI_UNKNOWN_NAME (u32) 0x3F3F3F3F /* Unknown name is "????" */
-#define ACPI_ROOT_NAME (u32) 0x5F5F5F5C /* Root name is "\___" */
-#define ACPI_SYS_BUS_NAME (u32) 0x5F53425F /* Sys bus name is "_SB_" */
-
-#define ACPI_NS_ROOT_PATH "\\"
-#define ACPI_NS_SYSTEM_BUS "_SB_"
-
-
-/* Flags for acpi_ns_lookup, acpi_ns_search_and_enter */
-
-#define ACPI_NS_NO_UPSEARCH 0
-#define ACPI_NS_SEARCH_PARENT 0x01
-#define ACPI_NS_DONT_OPEN_SCOPE 0x02
-#define ACPI_NS_NO_PEER_SEARCH 0x04
-#define ACPI_NS_ERROR_IF_FOUND 0x08
-
-#define ACPI_NS_WALK_UNLOCK TRUE
-#define ACPI_NS_WALK_NO_UNLOCK FALSE
-
-
-acpi_status
-acpi_ns_load_namespace (
- void);
-
-acpi_status
-acpi_ns_initialize_objects (
- void);
-
-acpi_status
-acpi_ns_initialize_devices (
- void);
-
-
-/* Namespace init - nsxfinit */
-
-acpi_status
-acpi_ns_init_one_device (
- acpi_handle obj_handle,
- u32 nesting_level,
- void *context,
- void **return_value);
-
-acpi_status
-acpi_ns_init_one_object (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-
-acpi_status
-acpi_ns_walk_namespace (
- acpi_object_type type,
- acpi_handle start_object,
- u32 max_depth,
- u8 unlock_before_callback,
- acpi_walk_callback user_function,
- void *context,
- void **return_value);
-
-struct acpi_namespace_node *
-acpi_ns_get_next_node (
- acpi_object_type type,
- struct acpi_namespace_node *parent,
- struct acpi_namespace_node *child);
-
-void
-acpi_ns_delete_namespace_by_owner (
- u16 table_id);
-
-
-/* Namespace loading - nsload */
-
-acpi_status
-acpi_ns_one_complete_parse (
- u32 pass_number,
- struct acpi_table_desc *table_desc);
-
-acpi_status
-acpi_ns_parse_table (
- struct acpi_table_desc *table_desc,
- struct acpi_namespace_node *scope);
-
-acpi_status
-acpi_ns_load_table (
- struct acpi_table_desc *table_desc,
- struct acpi_namespace_node *node);
-
-acpi_status
-acpi_ns_load_table_by_type (
- acpi_table_type table_type);
-
-
-/*
- * Top-level namespace access - nsaccess
- */
-
-acpi_status
-acpi_ns_root_initialize (
- void);
-
-acpi_status
-acpi_ns_lookup (
- union acpi_generic_state *scope_info,
- char *name,
- acpi_object_type type,
- acpi_interpreter_mode interpreter_mode,
- u32 flags,
- struct acpi_walk_state *walk_state,
- struct acpi_namespace_node **ret_node);
-
-
-/*
- * Named object allocation/deallocation - nsalloc
- */
-
-struct acpi_namespace_node *
-acpi_ns_create_node (
- u32 name);
-
-void
-acpi_ns_delete_node (
- struct acpi_namespace_node *node);
-
-void
-acpi_ns_delete_namespace_subtree (
- struct acpi_namespace_node *parent_handle);
-
-void
-acpi_ns_detach_object (
- struct acpi_namespace_node *node);
-
-void
-acpi_ns_delete_children (
- struct acpi_namespace_node *parent);
-
-int
-acpi_ns_compare_names (
- char *name1,
- char *name2);
-
-void
-acpi_ns_remove_reference (
- struct acpi_namespace_node *node);
-
-
-/*
- * Namespace modification - nsmodify
- */
-
-acpi_status
-acpi_ns_unload_namespace (
- acpi_handle handle);
-
-acpi_status
-acpi_ns_delete_subtree (
- acpi_handle start_handle);
-
-
-/*
- * Namespace dump/print utilities - nsdump
- */
-
-void
-acpi_ns_dump_tables (
- acpi_handle search_base,
- u32 max_depth);
-
-void
-acpi_ns_dump_entry (
- acpi_handle handle,
- u32 debug_level);
-
-void
-acpi_ns_dump_pathname (
- acpi_handle handle,
- char *msg,
- u32 level,
- u32 component);
-
-void
-acpi_ns_print_pathname (
- u32 num_segments,
- char *pathname);
-
-acpi_status
-acpi_ns_dump_one_device (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-void
-acpi_ns_dump_root_devices (
- void);
-
-acpi_status
-acpi_ns_dump_one_object (
- acpi_handle obj_handle,
- u32 level,
- void *context,
- void **return_value);
-
-void
-acpi_ns_dump_objects (
- acpi_object_type type,
- u8 display_type,
- u32 max_depth,
- u32 ownder_id,
- acpi_handle start_handle);
-
-
-/*
- * Namespace evaluation functions - nseval
- */
-
-acpi_status
-acpi_ns_evaluate_by_handle (
- struct acpi_namespace_node *prefix_node,
- union acpi_operand_object **params,
- union acpi_operand_object **return_object);
-
-acpi_status
-acpi_ns_evaluate_by_name (
- char *pathname,
- union acpi_operand_object **params,
- union acpi_operand_object **return_object);
-
-acpi_status
-acpi_ns_evaluate_relative (
- struct acpi_namespace_node *prefix_node,
- char *pathname,
- union acpi_operand_object **params,
- union acpi_operand_object **return_object);
-
-acpi_status
-acpi_ns_execute_control_method (
- struct acpi_namespace_node *method_node,
- union acpi_operand_object **params,
- union acpi_operand_object **return_obj_desc);
-
-acpi_status
-acpi_ns_get_object_value (
- struct acpi_namespace_node *object_node,
- union acpi_operand_object **return_obj_desc);
-
-
-/*
- * Parent/Child/Peer utility functions
- */
-
-acpi_name
-acpi_ns_find_parent_name (
- struct acpi_namespace_node *node_to_search);
-
-
-/*
- * Name and Scope manipulation - nsnames
- */
-
-u32
-acpi_ns_opens_scope (
- acpi_object_type type);
-
-void
-acpi_ns_build_external_path (
- struct acpi_namespace_node *node,
- acpi_size size,
- char *name_buffer);
-
-char *
-acpi_ns_get_external_pathname (
- struct acpi_namespace_node *node);
-
-char *
-acpi_ns_name_of_current_scope (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ns_handle_to_pathname (
- acpi_handle target_handle,
- struct acpi_buffer *buffer);
-
-u8
-acpi_ns_pattern_match (
- struct acpi_namespace_node *obj_node,
- char *search_for);
-
-acpi_status
-acpi_ns_get_node_by_path (
- char *external_pathname,
- struct acpi_namespace_node *in_prefix_node,
- u32 flags,
- struct acpi_namespace_node **out_node);
-
-acpi_size
-acpi_ns_get_pathname_length (
- struct acpi_namespace_node *node);
-
-
-/*
- * Object management for namespace nodes - nsobject
- */
-
-acpi_status
-acpi_ns_attach_object (
- struct acpi_namespace_node *node,
- union acpi_operand_object *object,
- acpi_object_type type);
-
-union acpi_operand_object *
-acpi_ns_get_attached_object (
- struct acpi_namespace_node *node);
-
-union acpi_operand_object *
-acpi_ns_get_secondary_object (
- union acpi_operand_object *obj_desc);
-
-acpi_status
-acpi_ns_attach_data (
- struct acpi_namespace_node *node,
- acpi_object_handler handler,
- void *data);
-
-acpi_status
-acpi_ns_detach_data (
- struct acpi_namespace_node *node,
- acpi_object_handler handler);
-
-acpi_status
-acpi_ns_get_attached_data (
- struct acpi_namespace_node *node,
- acpi_object_handler handler,
- void **data);
-
-
-/*
- * Namespace searching and entry - nssearch
- */
-
-acpi_status
-acpi_ns_search_and_enter (
- u32 entry_name,
- struct acpi_walk_state *walk_state,
- struct acpi_namespace_node *node,
- acpi_interpreter_mode interpreter_mode,
- acpi_object_type type,
- u32 flags,
- struct acpi_namespace_node **ret_node);
-
-acpi_status
-acpi_ns_search_node (
- u32 entry_name,
- struct acpi_namespace_node *node,
- acpi_object_type type,
- struct acpi_namespace_node **ret_node);
-
-void
-acpi_ns_install_node (
- struct acpi_walk_state *walk_state,
- struct acpi_namespace_node *parent_node,
- struct acpi_namespace_node *node,
- acpi_object_type type);
-
-
-/*
- * Utility functions - nsutils
- */
-
-u8
-acpi_ns_valid_root_prefix (
- char prefix);
-
-u8
-acpi_ns_valid_path_separator (
- char sep);
-
-acpi_object_type
-acpi_ns_get_type (
- struct acpi_namespace_node *node);
-
-u32
-acpi_ns_local (
- acpi_object_type type);
-
-void
-acpi_ns_report_error (
- char *module_name,
- u32 line_number,
- u32 component_id,
- char *internal_name,
- acpi_status lookup_status);
-
-void
-acpi_ns_report_method_error (
- char *module_name,
- u32 line_number,
- u32 component_id,
- char *message,
- struct acpi_namespace_node *node,
- char *path,
- acpi_status lookup_status);
-
-void
-acpi_ns_print_node_pathname (
- struct acpi_namespace_node *node,
- char *msg);
-
-acpi_status
-acpi_ns_build_internal_name (
- struct acpi_namestring_info *info);
-
-void
-acpi_ns_get_internal_name_length (
- struct acpi_namestring_info *info);
-
-acpi_status
-acpi_ns_internalize_name (
- char *dotted_name,
- char **converted_name);
-
-acpi_status
-acpi_ns_externalize_name (
- u32 internal_name_length,
- char *internal_name,
- u32 *converted_name_length,
- char **converted_name);
-
-struct acpi_namespace_node *
-acpi_ns_map_handle_to_node (
- acpi_handle handle);
-
-acpi_handle
-acpi_ns_convert_entry_to_handle(
- struct acpi_namespace_node *node);
-
-void
-acpi_ns_terminate (
- void);
-
-struct acpi_namespace_node *
-acpi_ns_get_parent_node (
- struct acpi_namespace_node *node);
-
-
-struct acpi_namespace_node *
-acpi_ns_get_next_valid_node (
- struct acpi_namespace_node *node);
-
-
-#endif /* __ACNAMESP_H__ */
diff --git a/xen/include/acpi/acobject.h b/xen/include/acpi/acobject.h
index 6caaf6ade2..036023a940 100644
--- a/xen/include/acpi/acobject.h
+++ b/xen/include/acpi/acobject.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,9 +94,7 @@
u32 bit_length; /* Length of field in bits */\
u32 base_byte_offset; /* Byte offset within containing object */\
u8 start_field_bit_offset;/* Bit offset within first field datum (0-63) */\
- u8 datum_valid_bits; /* Valid bit in first "Field datum" */\
- u8 end_field_valid_bits; /* Valid bits in the last "field datum" */\
- u8 end_buffer_valid_bits; /* Valid bits in the last "buffer datum" */\
+ u8 access_bit_width; /* Read/Write size in bits (8-64) */\
u32 value; /* Value to store into the Bank or Index register */\
struct acpi_namespace_node *node; /* Link back to parent node */
@@ -135,7 +133,10 @@ struct acpi_object_integer
acpi_integer value;
};
-
+/*
+ * Note: The String and Buffer object must be identical through the Pointer
+ * element. There is code that depends on this.
+ */
struct acpi_object_string /* Null terminated, ASCII characters only */
{
ACPI_OBJECT_COMMON_HEADER
@@ -180,7 +181,11 @@ struct acpi_object_event
};
-#define INFINITE_CONCURRENCY 0xFF
+#define ACPI_INFINITE_CONCURRENCY 0xFF
+
+typedef
+acpi_status (*ACPI_INTERNAL_METHOD) (
+ struct acpi_walk_state *walk_state);
struct acpi_object_method
{
@@ -190,6 +195,7 @@ struct acpi_object_method
u32 aml_length;
void *semaphore;
u8 *aml_start;
+ ACPI_INTERNAL_METHOD implementation;
u8 concurrency;
u8 thread_count;
acpi_owner_id owning_id;
@@ -199,13 +205,14 @@ struct acpi_object_method
struct acpi_object_mutex
{
ACPI_OBJECT_COMMON_HEADER
- u16 sync_level;
- u16 acquisition_depth;
- struct acpi_thread_state *owner_thread;
- void *semaphore;
+ u8 sync_level; /* 0-15, specified in Mutex() call */
+ u16 acquisition_depth; /* Allow multiple Acquires, same thread */
+ struct acpi_thread_state *owner_thread; /* Current owner of the mutex */
+ void *semaphore; /* Actual OS synchronization object */
union acpi_operand_object *prev; /* Link for list of acquired mutexes */
union acpi_operand_object *next; /* Link for list of acquired mutexes */
- struct acpi_namespace_node *node; /* containing object */
+ struct acpi_namespace_node *node; /* Containing namespace node */
+ u8 original_sync_level; /* Owner's original sync level (0-15) */
};
@@ -215,7 +222,7 @@ struct acpi_object_region
u8 space_id;
union acpi_operand_object *handler; /* Handler for region access */
- struct acpi_namespace_node *node; /* containing object */
+ struct acpi_namespace_node *node; /* Containing namespace node */
union acpi_operand_object *next;
u32 length;
acpi_physical_address address;
diff --git a/xen/include/acpi/acoutput.h b/xen/include/acpi/acoutput.h
index 0ed98ae4d3..2fbe180fee 100644
--- a/xen/include/acpi/acoutput.h
+++ b/xen/include/acpi/acoutput.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -136,7 +136,7 @@
/*
* Debug level macros that are used in the DEBUG_PRINT macros
*/
-#define ACPI_DEBUG_LEVEL(dl) (u32) dl,__LINE__,&_dbg
+#define ACPI_DEBUG_LEVEL(dl) (u32) dl,__LINE__,&_debug_info
/* Exception level -- used in the global "debug_level" */
diff --git a/xen/include/acpi/acparser.h b/xen/include/acpi/acparser.h
deleted file mode 100644
index 562e66002f..0000000000
--- a/xen/include/acpi/acparser.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/******************************************************************************
- *
- * Module Name: acparser.h - AML Parser subcomponent prototypes and defines
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-
-#ifndef __ACPARSER_H__
-#define __ACPARSER_H__
-
-
-#define OP_HAS_RETURN_VALUE 1
-
-/* variable # arguments */
-
-#define ACPI_VAR_ARGS ACPI_UINT32_MAX
-
-
-#define ACPI_PARSE_DELETE_TREE 0x0001
-#define ACPI_PARSE_NO_TREE_DELETE 0x0000
-#define ACPI_PARSE_TREE_MASK 0x0001
-
-#define ACPI_PARSE_LOAD_PASS1 0x0010
-#define ACPI_PARSE_LOAD_PASS2 0x0020
-#define ACPI_PARSE_EXECUTE 0x0030
-#define ACPI_PARSE_MODE_MASK 0x0030
-
-#define ACPI_PARSE_DEFERRED_OP 0x0100
-
-/* Parser external interfaces */
-
-acpi_status
-acpi_psx_load_table (
- u8 *pcode_addr,
- u32 pcode_length);
-
-acpi_status
-acpi_psx_execute (
- struct acpi_namespace_node *method_node,
- union acpi_operand_object **params,
- union acpi_operand_object **return_obj_desc);
-
-
-/******************************************************************************
- *
- * Parser interfaces
- *
- *****************************************************************************/
-
-
-/* psargs - Parse AML opcode arguments */
-
-u8 *
-acpi_ps_get_next_package_end (
- struct acpi_parse_state *parser_state);
-
-u32
-acpi_ps_get_next_package_length (
- struct acpi_parse_state *parser_state);
-
-char *
-acpi_ps_get_next_namestring (
- struct acpi_parse_state *parser_state);
-
-void
-acpi_ps_get_next_simple_arg (
- struct acpi_parse_state *parser_state,
- u32 arg_type,
- union acpi_parse_object *arg);
-
-acpi_status
-acpi_ps_get_next_namepath (
- struct acpi_walk_state *walk_state,
- struct acpi_parse_state *parser_state,
- union acpi_parse_object *arg,
- u8 method_call);
-
-union acpi_parse_object *
-acpi_ps_get_next_field (
- struct acpi_parse_state *parser_state);
-
-acpi_status
-acpi_ps_get_next_arg (
- struct acpi_walk_state *walk_state,
- struct acpi_parse_state *parser_state,
- u32 arg_type,
- union acpi_parse_object **return_arg);
-
-
-/* psfind */
-
-union acpi_parse_object *
-acpi_ps_find_name (
- union acpi_parse_object *scope,
- u32 name,
- u32 opcode);
-
-union acpi_parse_object*
-acpi_ps_get_parent (
- union acpi_parse_object *op);
-
-
-/* psopcode - AML Opcode information */
-
-const struct acpi_opcode_info *
-acpi_ps_get_opcode_info (
- u16 opcode);
-
-char *
-acpi_ps_get_opcode_name (
- u16 opcode);
-
-
-/* psparse - top level parsing routines */
-
-u32
-acpi_ps_get_opcode_size (
- u32 opcode);
-
-void
-acpi_ps_complete_this_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op);
-
-acpi_status
-acpi_ps_next_parse_state (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- acpi_status callback_status);
-
-acpi_status
-acpi_ps_find_object (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object **out_op);
-
-void
-acpi_ps_delete_parse_tree (
- union acpi_parse_object *root);
-
-acpi_status
-acpi_ps_parse_loop (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ps_parse_aml (
- struct acpi_walk_state *walk_state);
-
-acpi_status
-acpi_ps_parse_table (
- u8 *aml,
- u32 aml_size,
- acpi_parse_downwards descending_callback,
- acpi_parse_upwards ascending_callback,
- union acpi_parse_object **root_object);
-
-u16
-acpi_ps_peek_opcode (
- struct acpi_parse_state *state);
-
-
-/* psscope - Scope stack management routines */
-
-
-acpi_status
-acpi_ps_init_scope (
- struct acpi_parse_state *parser_state,
- union acpi_parse_object *root);
-
-union acpi_parse_object *
-acpi_ps_get_parent_scope (
- struct acpi_parse_state *state);
-
-u8
-acpi_ps_has_completed_scope (
- struct acpi_parse_state *parser_state);
-
-void
-acpi_ps_pop_scope (
- struct acpi_parse_state *parser_state,
- union acpi_parse_object **op,
- u32 *arg_list,
- u32 *arg_count);
-
-acpi_status
-acpi_ps_push_scope (
- struct acpi_parse_state *parser_state,
- union acpi_parse_object *op,
- u32 remaining_args,
- u32 arg_count);
-
-void
-acpi_ps_cleanup_scope (
- struct acpi_parse_state *state);
-
-
-/* pstree - parse tree manipulation routines */
-
-void
-acpi_ps_append_arg(
- union acpi_parse_object *op,
- union acpi_parse_object *arg);
-
-union acpi_parse_object*
-acpi_ps_find (
- union acpi_parse_object *scope,
- char *path,
- u16 opcode,
- u32 create);
-
-union acpi_parse_object *
-acpi_ps_get_arg(
- union acpi_parse_object *op,
- u32 argn);
-
-union acpi_parse_object *
-acpi_ps_get_child (
- union acpi_parse_object *op);
-
-union acpi_parse_object *
-acpi_ps_get_depth_next (
- union acpi_parse_object *origin,
- union acpi_parse_object *op);
-
-
-/* pswalk - parse tree walk routines */
-
-acpi_status
-acpi_ps_walk_parsed_aml (
- union acpi_parse_object *start_op,
- union acpi_parse_object *end_op,
- union acpi_operand_object *mth_desc,
- struct acpi_namespace_node *start_node,
- union acpi_operand_object **params,
- union acpi_operand_object **caller_return_desc,
- acpi_owner_id owner_id,
- acpi_parse_downwards descending_callback,
- acpi_parse_upwards ascending_callback);
-
-acpi_status
-acpi_ps_get_next_walk_op (
- struct acpi_walk_state *walk_state,
- union acpi_parse_object *op,
- acpi_parse_upwards ascending_callback);
-
-acpi_status
-acpi_ps_delete_completed_op (
- struct acpi_walk_state *walk_state);
-
-
-/* psutils - parser utilities */
-
-union acpi_parse_object *
-acpi_ps_create_scope_op (
- void);
-
-void
-acpi_ps_init_op (
- union acpi_parse_object *op,
- u16 opcode);
-
-union acpi_parse_object *
-acpi_ps_alloc_op (
- u16 opcode);
-
-void
-acpi_ps_free_op (
- union acpi_parse_object *op);
-
-void
-acpi_ps_delete_parse_cache (
- void);
-
-u8
-acpi_ps_is_leading_char (
- u32 c);
-
-u8
-acpi_ps_is_prefix_char (
- u32 c);
-
-u32
-acpi_ps_get_name(
- union acpi_parse_object *op);
-
-void
-acpi_ps_set_name(
- union acpi_parse_object *op,
- u32 name);
-
-
-/* psdump - display parser tree */
-
-u32
-acpi_ps_sprint_path (
- char *buffer_start,
- u32 buffer_size,
- union acpi_parse_object *op);
-
-u32
-acpi_ps_sprint_op (
- char *buffer_start,
- u32 buffer_size,
- union acpi_parse_object *op);
-
-void
-acpi_ps_show (
- union acpi_parse_object *op);
-
-
-#endif /* __ACPARSER_H__ */
diff --git a/xen/include/acpi/acpi.h b/xen/include/acpi/acpi.h
index bb5bd8f03e..ad53252dd4 100644
--- a/xen/include/acpi/acpi.h
+++ b/xen/include/acpi/acpi.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/xen/include/acpi/acpi_bus.h b/xen/include/acpi/acpi_bus.h
index 4326bc934b..a4197e9abe 100644
--- a/xen/include/acpi/acpi_bus.h
+++ b/xen/include/acpi/acpi_bus.h
@@ -26,16 +26,10 @@
#ifndef __ACPI_BUS_H__
#define __ACPI_BUS_H__
-#if 0
-#include <xen/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,4))
-#include <xen/device.h>
-#define CONFIG_LDM
-#endif
-#endif /* 0 */
-
#include <acpi/acpi.h>
+#define PREFIX "ACPI: "
+
/* TBD: Make dynamic */
#define ACPI_MAX_HANDLES 10
struct acpi_handle_list {
@@ -66,10 +60,10 @@ acpi_evaluate_reference (
#ifdef CONFIG_ACPI_BUS
-/*#include <xen/proc_fs.h>*/
+#include <linux/proc_fs.h>
#define ACPI_BUS_FILE_ROOT "acpi"
-/*extern struct proc_dir_entry *acpi_root_dir;*/
+extern struct proc_dir_entry *acpi_root_dir;
extern FADT_DESCRIPTOR acpi_fadt;
enum acpi_bus_removal_type {
@@ -108,6 +102,9 @@ typedef int (*acpi_op_suspend) (struct acpi_device *device, int state);
typedef int (*acpi_op_resume) (struct acpi_device *device, int state);
typedef int (*acpi_op_scan) (struct acpi_device *device);
typedef int (*acpi_op_bind) (struct acpi_device *device);
+typedef int (*acpi_op_unbind) (struct acpi_device *device);
+typedef int (*acpi_op_match) (struct acpi_device *device,
+ struct acpi_driver *driver);
struct acpi_device_ops {
acpi_op_add add;
@@ -119,13 +116,15 @@ struct acpi_device_ops {
acpi_op_resume resume;
acpi_op_scan scan;
acpi_op_bind bind;
+ acpi_op_unbind unbind;
+ acpi_op_match match;
};
struct acpi_driver {
struct list_head node;
char name[80];
char class[80];
- int references;
+ atomic_t references;
char *ids; /* Supported Hardware IDs */
struct acpi_device_ops ops;
};
@@ -161,7 +160,8 @@ struct acpi_device_flags {
u32 suprise_removal_ok:1;
u32 power_manageable:1;
u32 performance_manageable:1;
- u32 reserved:21;
+ u32 wake_capable:1; /* Wakeup(_PRW) supported? */
+ u32 reserved:20;
};
@@ -207,10 +207,8 @@ struct acpi_device_power_flags {
u32 explicit_get:1; /* _PSC present? */
u32 power_resources:1; /* Power resources */
u32 inrush_current:1; /* Serialize Dx->D0 */
- u32 wake_capable:1; /* Wakeup supported? */
- u32 wake_enabled:1; /* Enabled for wakeup */
u32 power_removed:1; /* Optimize Dx->D0 */
- u32 reserved:26;
+ u32 reserved:28;
};
struct acpi_device_power_state {
@@ -254,6 +252,25 @@ struct acpi_device_perf {
struct acpi_device_perf_state *states;
};
+/* Wakeup Management */
+struct acpi_device_wakeup_flags {
+ u8 valid:1; /* Can successfully enable wakeup? */
+ u8 run_wake:1; /* Run-Wake GPE devices */
+};
+
+struct acpi_device_wakeup_state {
+ u8 enabled:1;
+ u8 active:1;
+};
+
+struct acpi_device_wakeup {
+ acpi_handle gpe_device;
+ acpi_integer gpe_number;;
+ acpi_integer sleep_state;
+ struct acpi_handle_list resources;
+ struct acpi_device_wakeup_state state;
+ struct acpi_device_wakeup_flags flags;
+};
/* Device */
@@ -262,18 +279,19 @@ struct acpi_device {
struct acpi_device *parent;
struct list_head children;
struct list_head node;
+ struct list_head wakeup_list;
+ struct list_head g_list;
struct acpi_device_status status;
struct acpi_device_flags flags;
struct acpi_device_pnp pnp;
struct acpi_device_power power;
+ struct acpi_device_wakeup wakeup;
struct acpi_device_perf performance;
struct acpi_device_dir dir;
struct acpi_device_ops ops;
struct acpi_driver *driver;
void *driver_data;
-#ifdef CONFIG_LDM
- struct device dev;
-#endif
+ struct kobject kobj;
};
#define acpi_driver_data(d) ((d)->driver_data)
@@ -292,12 +310,14 @@ struct acpi_bus_event {
u32 data;
};
+extern struct subsystem acpi_subsys;
/*
* External Functions
*/
-int acpi_bus_get_device(acpi_handle, struct acpi_device **device);
+int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
+void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context);
int acpi_bus_get_status (struct acpi_device *device);
int acpi_bus_get_power (acpi_handle handle, int *state);
int acpi_bus_set_power (acpi_handle handle, int state);
@@ -305,10 +325,15 @@ int acpi_bus_generate_event (struct acpi_device *device, u8 type, int data);
int acpi_bus_receive_event (struct acpi_bus_event *event);
int acpi_bus_register_driver (struct acpi_driver *driver);
int acpi_bus_unregister_driver (struct acpi_driver *driver);
-int acpi_bus_scan (struct acpi_device *device);
-int acpi_init (void);
-void acpi_exit (void);
+int acpi_bus_scan (struct acpi_device *start);
+int acpi_bus_trim(struct acpi_device *start, int rmdevice);
+int acpi_bus_add (struct acpi_device **child, struct acpi_device *parent,
+ acpi_handle handle, int type);
+
+int acpi_match_ids (struct acpi_device *device, char *ids);
+int acpi_create_dir(struct acpi_device *);
+void acpi_remove_dir(struct acpi_device *);
#endif /*CONFIG_ACPI_BUS*/
diff --git a/xen/include/acpi/acpi_drivers.h b/xen/include/acpi/acpi_drivers.h
index 3b5273a230..c26d5ea5b1 100644
--- a/xen/include/acpi/acpi_drivers.h
+++ b/xen/include/acpi/acpi_drivers.h
@@ -1,5 +1,5 @@
/*
- * acpi_drivers.h ($Revision: 32 $)
+ * acpi_drivers.h ($Revision: 31 $)
*
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -27,122 +27,22 @@
#define __ACPI_DRIVERS_H__
#include <xen/acpi.h>
-#include "acpi_bus.h"
+#include <acpi/acpi_bus.h>
#define ACPI_MAX_STRING 80
-
-/* --------------------------------------------------------------------------
- ACPI Bus
- -------------------------------------------------------------------------- */
-
#define ACPI_BUS_COMPONENT 0x00010000
-#define ACPI_BUS_CLASS "system_bus"
-#define ACPI_BUS_HID "ACPI_BUS"
-#define ACPI_BUS_DRIVER_NAME "ACPI Bus Driver"
-#define ACPI_BUS_DEVICE_NAME "System Bus"
-
-
-/* --------------------------------------------------------------------------
- AC Adapter
- -------------------------------------------------------------------------- */
-
-#define ACPI_AC_COMPONENT 0x00020000
-#define ACPI_AC_CLASS "ac_adapter"
-#define ACPI_AC_HID "ACPI0003"
-#define ACPI_AC_DRIVER_NAME "ACPI AC Adapter Driver"
-#define ACPI_AC_DEVICE_NAME "AC Adapter"
-#define ACPI_AC_FILE_STATE "state"
-#define ACPI_AC_NOTIFY_STATUS 0x80
-#define ACPI_AC_STATUS_OFFLINE 0x00
-#define ACPI_AC_STATUS_ONLINE 0x01
-#define ACPI_AC_STATUS_UNKNOWN 0xFF
-
-
-/* --------------------------------------------------------------------------
- Battery
- -------------------------------------------------------------------------- */
-
-#define ACPI_BATTERY_COMPONENT 0x00040000
-#define ACPI_BATTERY_CLASS "battery"
-#define ACPI_BATTERY_HID "PNP0C0A"
-#define ACPI_BATTERY_DRIVER_NAME "ACPI Battery Driver"
-#define ACPI_BATTERY_DEVICE_NAME "Battery"
-#define ACPI_BATTERY_FILE_INFO "info"
-#define ACPI_BATTERY_FILE_STATUS "state"
-#define ACPI_BATTERY_FILE_ALARM "alarm"
-#define ACPI_BATTERY_NOTIFY_STATUS 0x80
-#define ACPI_BATTERY_NOTIFY_INFO 0x81
-#define ACPI_BATTERY_UNITS_WATTS "mW"
-#define ACPI_BATTERY_UNITS_AMPS "mA"
-
-
-/* --------------------------------------------------------------------------
- Button
- -------------------------------------------------------------------------- */
+#define ACPI_SYSTEM_COMPONENT 0x02000000
-#define ACPI_BUTTON_COMPONENT 0x00080000
-#define ACPI_BUTTON_DRIVER_NAME "ACPI Button Driver"
-#define ACPI_BUTTON_CLASS "button"
-#define ACPI_BUTTON_FILE_INFO "info"
-#define ACPI_BUTTON_FILE_STATE "state"
-#define ACPI_BUTTON_TYPE_UNKNOWN 0x00
-#define ACPI_BUTTON_NOTIFY_STATUS 0x80
+/* _HID definitions */
-#define ACPI_BUTTON_SUBCLASS_POWER "power"
-#define ACPI_BUTTON_HID_POWER "PNP0C0C"
+#define ACPI_POWER_HID "ACPI_PWR"
+#define ACPI_PROCESSOR_HID "ACPI_CPU"
+#define ACPI_SYSTEM_HID "ACPI_SYS"
+#define ACPI_THERMAL_HID "ACPI_THM"
#define ACPI_BUTTON_HID_POWERF "ACPI_FPB"
-#define ACPI_BUTTON_DEVICE_NAME_POWER "Power Button (CM)"
-#define ACPI_BUTTON_DEVICE_NAME_POWERF "Power Button (FF)"
-#define ACPI_BUTTON_TYPE_POWER 0x01
-#define ACPI_BUTTON_TYPE_POWERF 0x02
-
-#define ACPI_BUTTON_SUBCLASS_SLEEP "sleep"
-#define ACPI_BUTTON_HID_SLEEP "PNP0C0E"
#define ACPI_BUTTON_HID_SLEEPF "ACPI_FSB"
-#define ACPI_BUTTON_DEVICE_NAME_SLEEP "Sleep Button (CM)"
-#define ACPI_BUTTON_DEVICE_NAME_SLEEPF "Sleep Button (FF)"
-#define ACPI_BUTTON_TYPE_SLEEP 0x03
-#define ACPI_BUTTON_TYPE_SLEEPF 0x04
-
-#define ACPI_BUTTON_SUBCLASS_LID "lid"
-#define ACPI_BUTTON_HID_LID "PNP0C0D"
-#define ACPI_BUTTON_DEVICE_NAME_LID "Lid Switch"
-#define ACPI_BUTTON_TYPE_LID 0x05
-
-
-/* --------------------------------------------------------------------------
- Embedded Controller
- -------------------------------------------------------------------------- */
-
-#define ACPI_EC_COMPONENT 0x00100000
-#define ACPI_EC_CLASS "embedded_controller"
-#define ACPI_EC_HID "PNP0C09"
-#define ACPI_EC_DRIVER_NAME "ACPI Embedded Controller Driver"
-#define ACPI_EC_DEVICE_NAME "Embedded Controller"
-#define ACPI_EC_FILE_INFO "info"
-
-#ifdef CONFIG_ACPI_EC
-
-int acpi_ec_ecdt_probe (void);
-int acpi_ec_init (void);
-void acpi_ec_exit (void);
-
-#endif
-
-
-/* --------------------------------------------------------------------------
- Fan
- -------------------------------------------------------------------------- */
-
-#define ACPI_FAN_COMPONENT 0x00200000
-#define ACPI_FAN_CLASS "fan"
-#define ACPI_FAN_HID "PNP0C0B"
-#define ACPI_FAN_DRIVER_NAME "ACPI Fan Driver"
-#define ACPI_FAN_DEVICE_NAME "Fan"
-#define ACPI_FAN_FILE_STATE "state"
-#define ACPI_FAN_NOTIFY_STATUS 0x80
/* --------------------------------------------------------------------------
@@ -153,41 +53,28 @@ void acpi_ec_exit (void);
#define ACPI_PCI_COMPONENT 0x00400000
-/* ACPI PCI Root Bridge (pci_root.c) */
-
-#define ACPI_PCI_ROOT_CLASS "pci_bridge"
-#define ACPI_PCI_ROOT_HID "PNP0A03"
-#define ACPI_PCI_ROOT_DRIVER_NAME "ACPI PCI Root Bridge Driver"
-#define ACPI_PCI_ROOT_DEVICE_NAME "PCI Root Bridge"
-
-int acpi_pci_root_init (void);
-void acpi_pci_root_exit (void);
-
/* ACPI PCI Interrupt Link (pci_link.c) */
-#define ACPI_PCI_LINK_CLASS "pci_irq_routing"
-#define ACPI_PCI_LINK_HID "PNP0C0F"
-#define ACPI_PCI_LINK_DRIVER_NAME "ACPI PCI Interrupt Link Driver"
-#define ACPI_PCI_LINK_DEVICE_NAME "PCI Interrupt Link"
-#define ACPI_PCI_LINK_FILE_INFO "info"
-#define ACPI_PCI_LINK_FILE_STATUS "state"
-
-int acpi_pci_link_check (void);
+int acpi_irq_penalty_init (void);
int acpi_pci_link_get_irq (acpi_handle handle, int index, int* edge_level, int* active_high_low);
-int acpi_pci_link_init (void);
-void acpi_pci_link_exit (void);
/* ACPI PCI Interrupt Routing (pci_irq.c) */
int acpi_pci_irq_add_prt (acpi_handle handle, int segment, int bus);
+void acpi_pci_irq_del_prt (int segment, int bus);
/* ACPI PCI Device Binding (pci_bind.c) */
struct pci_bus;
int acpi_pci_bind (struct acpi_device *device);
+int acpi_pci_unbind (struct acpi_device *device);
int acpi_pci_bind_root (struct acpi_device *device, struct acpi_pci_id *id, struct pci_bus *bus);
+/* Arch-defined function to add a bus to the system */
+
+struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, int bus);
+
#endif /*CONFIG_ACPI_PCI*/
@@ -195,43 +82,25 @@ int acpi_pci_bind_root (struct acpi_device *device, struct acpi_pci_id *id, stru
Power Resource
-------------------------------------------------------------------------- */
-#define ACPI_POWER_COMPONENT 0x00800000
-#define ACPI_POWER_CLASS "power_resource"
-#define ACPI_POWER_HID "ACPI_PWR"
-#define ACPI_POWER_DRIVER_NAME "ACPI Power Resource Driver"
-#define ACPI_POWER_DEVICE_NAME "Power Resource"
-#define ACPI_POWER_FILE_INFO "info"
-#define ACPI_POWER_FILE_STATUS "state"
-#define ACPI_POWER_RESOURCE_STATE_OFF 0x00
-#define ACPI_POWER_RESOURCE_STATE_ON 0x01
-#define ACPI_POWER_RESOURCE_STATE_UNKNOWN 0xFF
-
#ifdef CONFIG_ACPI_POWER
-
+int acpi_enable_wakeup_device_power (struct acpi_device *dev);
+int acpi_disable_wakeup_device_power (struct acpi_device *dev);
int acpi_power_get_inferred_state (struct acpi_device *device);
int acpi_power_transition (struct acpi_device *device, int state);
-int acpi_power_init (void);
-void acpi_power_exit (void);
-
#endif
/* --------------------------------------------------------------------------
+ Embedded Controller
+ -------------------------------------------------------------------------- */
+#ifdef CONFIG_ACPI_EC
+int acpi_ec_ecdt_probe (void);
+#endif
+
+/* --------------------------------------------------------------------------
Processor
-------------------------------------------------------------------------- */
-#define ACPI_PROCESSOR_COMPONENT 0x01000000
-#define ACPI_PROCESSOR_CLASS "processor"
-#define ACPI_PROCESSOR_HID "ACPI_CPU"
-#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
-#define ACPI_PROCESSOR_DEVICE_NAME "Processor"
-#define ACPI_PROCESSOR_FILE_INFO "info"
-#define ACPI_PROCESSOR_FILE_POWER "power"
-#define ACPI_PROCESSOR_FILE_PERFORMANCE "performance"
-#define ACPI_PROCESSOR_FILE_THROTTLING "throttling"
-#define ACPI_PROCESSOR_FILE_LIMIT "limit"
-#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
-#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
#define ACPI_PROCESSOR_LIMIT_NONE 0x00
#define ACPI_PROCESSOR_LIMIT_INCREMENT 0x01
#define ACPI_PROCESSOR_LIMIT_DECREMENT 0x02
@@ -239,109 +108,4 @@ void acpi_power_exit (void);
int acpi_processor_set_thermal_limit(acpi_handle handle, int type);
-/* --------------------------------------------------------------------------
- System
- -------------------------------------------------------------------------- */
-
-#define ACPI_SYSTEM_COMPONENT 0x02000000
-#define ACPI_SYSTEM_CLASS "system"
-#define ACPI_SYSTEM_HID "ACPI_SYS"
-#define ACPI_SYSTEM_DRIVER_NAME "ACPI System Driver"
-#define ACPI_SYSTEM_DEVICE_NAME "System"
-#define ACPI_SYSTEM_FILE_INFO "info"
-#define ACPI_SYSTEM_FILE_EVENT "event"
-#define ACPI_SYSTEM_FILE_ALARM "alarm"
-#define ACPI_SYSTEM_FILE_DSDT "dsdt"
-#define ACPI_SYSTEM_FILE_FADT "fadt"
-#define ACPI_SYSTEM_FILE_SLEEP "sleep"
-#define ACPI_SYSTEM_FILE_DEBUG_LAYER "debug_layer"
-#define ACPI_SYSTEM_FILE_DEBUG_LEVEL "debug_level"
-
-#ifdef CONFIG_ACPI_SYSTEM
-
-int acpi_system_init (void);
-void acpi_system_exit (void);
-
-#endif
-
-
-/* --------------------------------------------------------------------------
- Thermal Zone
- -------------------------------------------------------------------------- */
-
-#define ACPI_THERMAL_COMPONENT 0x04000000
-#define ACPI_THERMAL_CLASS "thermal_zone"
-#define ACPI_THERMAL_HID "ACPI_THM"
-#define ACPI_THERMAL_DRIVER_NAME "ACPI Thermal Zone Driver"
-#define ACPI_THERMAL_DEVICE_NAME "Thermal Zone"
-#define ACPI_THERMAL_FILE_STATE "state"
-#define ACPI_THERMAL_FILE_TEMPERATURE "temperature"
-#define ACPI_THERMAL_FILE_TRIP_POINTS "trip_points"
-#define ACPI_THERMAL_FILE_COOLING_MODE "cooling_mode"
-#define ACPI_THERMAL_FILE_POLLING_FREQ "polling_frequency"
-#define ACPI_THERMAL_NOTIFY_TEMPERATURE 0x80
-#define ACPI_THERMAL_NOTIFY_THRESHOLDS 0x81
-#define ACPI_THERMAL_NOTIFY_DEVICES 0x82
-#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
-#define ACPI_THERMAL_NOTIFY_HOT 0xF1
-#define ACPI_THERMAL_MODE_ACTIVE 0x00
-#define ACPI_THERMAL_MODE_PASSIVE 0x01
-#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
-
-
-/* --------------------------------------------------------------------------
- Debug Support
- -------------------------------------------------------------------------- */
-
-#define ACPI_DEBUG_RESTORE 0
-#define ACPI_DEBUG_LOW 1
-#define ACPI_DEBUG_MEDIUM 2
-#define ACPI_DEBUG_HIGH 3
-#define ACPI_DEBUG_DRIVERS 4
-
-/*extern u32 acpi_dbg_level;*/
-/*extern u32 acpi_dbg_layer;*/
-
-static inline void
-acpi_set_debug (
- u32 flag)
-{
- static u32 layer_save;
- static u32 level_save;
-
- switch (flag) {
- case ACPI_DEBUG_RESTORE:
- acpi_dbg_layer = layer_save;
- acpi_dbg_level = level_save;
- break;
- case ACPI_DEBUG_LOW:
- case ACPI_DEBUG_MEDIUM:
- case ACPI_DEBUG_HIGH:
- case ACPI_DEBUG_DRIVERS:
- layer_save = acpi_dbg_layer;
- level_save = acpi_dbg_level;
- break;
- }
-
- switch (flag) {
- case ACPI_DEBUG_LOW:
- acpi_dbg_layer = ACPI_COMPONENT_DEFAULT | ACPI_ALL_DRIVERS;
- acpi_dbg_level = ACPI_DEBUG_DEFAULT;
- break;
- case ACPI_DEBUG_MEDIUM:
- acpi_dbg_layer = ACPI_COMPONENT_DEFAULT | ACPI_ALL_DRIVERS;
- acpi_dbg_level = ACPI_LV_FUNCTIONS | ACPI_LV_ALL_EXCEPTIONS;
- break;
- case ACPI_DEBUG_HIGH:
- acpi_dbg_layer = 0xFFFFFFFF;
- acpi_dbg_level = 0xFFFFFFFF;
- break;
- case ACPI_DEBUG_DRIVERS:
- acpi_dbg_layer = ACPI_ALL_DRIVERS;
- acpi_dbg_level = 0xFFFFFFFF;
- break;
- }
-}
-
-
#endif /*__ACPI_DRIVERS_H__*/
diff --git a/xen/include/acpi/acpiosxf.h b/xen/include/acpi/acpiosxf.h
index 6f485231ac..57779919c1 100644
--- a/xen/include/acpi/acpiosxf.h
+++ b/xen/include/acpi/acpiosxf.h
@@ -9,7 +9,7 @@
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -169,17 +169,19 @@ acpi_status
acpi_os_map_memory (
acpi_physical_address physical_address,
acpi_size size,
- void **logical_address);
+ void __iomem **logical_address);
void
acpi_os_unmap_memory (
- void *logical_address,
+ void __iomem *logical_address,
acpi_size size);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_os_get_physical_address (
void *logical_address,
acpi_physical_address *physical_address);
+#endif
/*
@@ -188,14 +190,14 @@ acpi_os_get_physical_address (
acpi_status
acpi_os_install_interrupt_handler (
- u32 interrupt_number,
- OSD_HANDLER service_routine,
+ u32 gsi,
+ acpi_osd_handler service_routine,
void *context);
acpi_status
acpi_os_remove_interrupt_handler (
- u32 interrupt_number,
- OSD_HANDLER service_routine);
+ u32 gsi,
+ acpi_osd_handler service_routine);
/*
@@ -209,13 +211,16 @@ acpi_os_get_thread_id (
acpi_status
acpi_os_queue_for_execution (
u32 priority,
- OSD_EXECUTION_CALLBACK function,
+ acpi_osd_exec_callback function,
+ void *context);
+
+void
+acpi_os_wait_events_complete (
void *context);
void
acpi_os_sleep (
- u32 seconds,
- u32 milliseconds);
+ acpi_integer milliseconds);
void
acpi_os_stall (
@@ -258,25 +263,28 @@ acpi_os_write_memory (
/*
* Platform and hardware-independent PCI configuration space access
+ * Note: Can't use "Register" as a parameter, changed to "Reg" --
+ * certain compilers complain.
*/
acpi_status
acpi_os_read_pci_configuration (
struct acpi_pci_id *pci_id,
- u32 register,
+ u32 reg,
void *value,
u32 width);
acpi_status
acpi_os_write_pci_configuration (
struct acpi_pci_id *pci_id,
- u32 register,
+ u32 reg,
acpi_integer value,
u32 width);
/*
* Interim function needed for PCI IRQ routing
*/
+
void
acpi_os_derive_pci_id(
acpi_handle rhandle,
@@ -292,12 +300,14 @@ acpi_os_readable (
void *pointer,
acpi_size length);
+#ifdef ACPI_FUTURE_USAGE
u8
acpi_os_writable (
void *pointer,
acpi_size length);
+#endif
-u32
+u64
acpi_os_get_timer (
void);
@@ -329,9 +339,11 @@ acpi_os_redirect_output (
* Debug input
*/
+#ifdef ACPI_FUTURE_USAGE
u32
acpi_os_get_line (
char *buffer);
+#endif
/*
diff --git a/xen/include/acpi/acpixf.h b/xen/include/acpi/acpixf.h
index fcebceb3e2..00d78b7965 100644
--- a/xen/include/acpi/acpixf.h
+++ b/xen/include/acpi/acpixf.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -70,9 +70,11 @@ acpi_status
acpi_terminate (
void);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_subsystem_status (
void);
+#endif
acpi_status
acpi_enable (
@@ -82,9 +84,11 @@ acpi_status
acpi_disable (
void);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_system_info (
struct acpi_buffer *ret_buffer);
+#endif
const char *
acpi_format_exception (
@@ -94,10 +98,12 @@ acpi_status
acpi_purge_cached_objects (
void);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_install_initialization_handler (
acpi_init_handler handler,
u32 function);
+#endif
/*
* ACPI Memory manager
@@ -129,6 +135,7 @@ acpi_status
acpi_load_tables (
void);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_load_table (
struct acpi_table_header *table_ptr);
@@ -142,6 +149,7 @@ acpi_get_table_header (
acpi_table_type table_type,
u32 instance,
struct acpi_table_header *out_table_header);
+#endif /* ACPI_FUTURE_USAGE */
acpi_status
acpi_get_table (
@@ -218,6 +226,7 @@ acpi_evaluate_object (
struct acpi_object_list *parameter_objects,
struct acpi_buffer *return_object_buffer);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_evaluate_object_typed (
acpi_handle object,
@@ -225,6 +234,7 @@ acpi_evaluate_object_typed (
struct acpi_object_list *external_params,
struct acpi_buffer *return_buffer,
acpi_object_type return_type);
+#endif
acpi_status
acpi_get_object_info (
@@ -296,9 +306,20 @@ acpi_install_gpe_handler (
acpi_handle gpe_device,
u32 gpe_number,
u32 type,
- acpi_gpe_handler handler,
+ acpi_event_handler address,
void *context);
+#ifdef ACPI_FUTURE_USAGE
+acpi_status
+acpi_install_exception_handler (
+ acpi_exception_handler handler);
+#endif
+
+
+/*
+ * Event interfaces
+ */
+
acpi_status
acpi_acquire_global_lock (
u16 timeout,
@@ -312,7 +333,7 @@ acpi_status
acpi_remove_gpe_handler (
acpi_handle gpe_device,
u32 gpe_number,
- acpi_gpe_handler handler);
+ acpi_event_handler address);
acpi_status
acpi_enable_event (
@@ -328,10 +349,18 @@ acpi_status
acpi_clear_event (
u32 event);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_event_status (
u32 event,
acpi_event_status *event_status);
+#endif /* ACPI_FUTURE_USAGE */
+
+acpi_status
+acpi_set_gpe_type (
+ acpi_handle gpe_device,
+ u32 gpe_number,
+ u8 type);
acpi_status
acpi_enable_gpe (
@@ -351,12 +380,14 @@ acpi_clear_gpe (
u32 gpe_number,
u32 flags);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_gpe_status (
acpi_handle gpe_device,
u32 gpe_number,
u32 flags,
acpi_event_status *event_status);
+#endif /* ACPI_FUTURE_USAGE */
acpi_status
acpi_install_gpe_block (
@@ -385,10 +416,12 @@ acpi_get_current_resources(
acpi_handle device_handle,
struct acpi_buffer *ret_buffer);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_possible_resources(
acpi_handle device_handle,
struct acpi_buffer *ret_buffer);
+#endif
acpi_status
acpi_walk_resources (
@@ -432,9 +465,11 @@ acpi_status
acpi_set_firmware_waking_vector (
acpi_physical_address physical_address);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_get_firmware_waking_vector (
acpi_physical_address *physical_address);
+#endif
acpi_status
acpi_get_sleep_type_data (
@@ -446,11 +481,11 @@ acpi_status
acpi_enter_sleep_state_prep (
u8 sleep_state);
-acpi_status
+acpi_status asmlinkage
acpi_enter_sleep_state (
u8 sleep_state);
-acpi_status
+acpi_status asmlinkage
acpi_enter_sleep_state_s4bios (
void);
diff --git a/xen/include/acpi/acresrc.h b/xen/include/acpi/acresrc.h
deleted file mode 100644
index e1a9187505..0000000000
--- a/xen/include/acpi/acresrc.h
+++ /dev/null
@@ -1,391 +0,0 @@
-/******************************************************************************
- *
- * Name: acresrc.h - Resource Manager function prototypes
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACRESRC_H__
-#define __ACRESRC_H__
-
-
-/*
- * Function prototypes called from Acpi* APIs
- */
-
-acpi_status
-acpi_rs_get_prt_method_data (
- acpi_handle handle,
- struct acpi_buffer *ret_buffer);
-
-
-acpi_status
-acpi_rs_get_crs_method_data (
- acpi_handle handle,
- struct acpi_buffer *ret_buffer);
-
-acpi_status
-acpi_rs_get_prs_method_data (
- acpi_handle handle,
- struct acpi_buffer *ret_buffer);
-
-acpi_status
-acpi_rs_get_method_data (
- acpi_handle handle,
- char *path,
- struct acpi_buffer *ret_buffer);
-
-acpi_status
-acpi_rs_set_srs_method_data (
- acpi_handle handle,
- struct acpi_buffer *ret_buffer);
-
-acpi_status
-acpi_rs_create_resource_list (
- union acpi_operand_object *byte_stream_buffer,
- struct acpi_buffer *output_buffer);
-
-acpi_status
-acpi_rs_create_byte_stream (
- struct acpi_resource *linked_list_buffer,
- struct acpi_buffer *output_buffer);
-
-acpi_status
-acpi_rs_create_pci_routing_table (
- union acpi_operand_object *package_object,
- struct acpi_buffer *output_buffer);
-
-
-/*
- * Function prototypes called from acpi_rs_create*
- */
-void
-acpi_rs_dump_irq (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_address16 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_address32 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_address64 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_dma (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_io (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_extended_irq (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_fixed_io (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_fixed_memory32 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_memory24 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_memory32 (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_start_depend_fns (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_vendor_specific (
- union acpi_resource_data *data);
-
-void
-acpi_rs_dump_resource_list (
- struct acpi_resource *resource);
-
-void
-acpi_rs_dump_irq_list (
- u8 *route_table);
-
-acpi_status
-acpi_rs_get_byte_stream_start (
- u8 *byte_stream_buffer,
- u8 **byte_stream_start,
- u32 *size);
-
-acpi_status
-acpi_rs_get_list_length (
- u8 *byte_stream_buffer,
- u32 byte_stream_buffer_length,
- acpi_size *size_needed);
-
-acpi_status
-acpi_rs_get_byte_stream_length (
- struct acpi_resource *linked_list_buffer,
- acpi_size *size_needed);
-
-acpi_status
-acpi_rs_get_pci_routing_table_length (
- union acpi_operand_object *package_object,
- acpi_size *buffer_size_needed);
-
-acpi_status
-acpi_rs_byte_stream_to_list (
- u8 *byte_stream_buffer,
- u32 byte_stream_buffer_length,
- u8 *output_buffer);
-
-acpi_status
-acpi_rs_list_to_byte_stream (
- struct acpi_resource *linked_list,
- acpi_size byte_stream_size_needed,
- u8 *output_buffer);
-
-acpi_status
-acpi_rs_io_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_fixed_io_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_io_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_fixed_io_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_irq_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_irq_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_dma_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_dma_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_address16_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_address16_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_address32_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_address32_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_address64_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_address64_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_start_depend_fns_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_end_depend_fns_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_start_depend_fns_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_end_depend_fns_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_memory24_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_memory24_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_memory32_range_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_fixed_memory32_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_memory32_range_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_fixed_memory32_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_extended_irq_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_extended_irq_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_end_tag_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_end_tag_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-acpi_status
-acpi_rs_vendor_resource (
- u8 *byte_stream_buffer,
- acpi_size *bytes_consumed,
- u8 **output_buffer,
- acpi_size *structure_size);
-
-acpi_status
-acpi_rs_vendor_stream (
- struct acpi_resource *linked_list,
- u8 **output_buffer,
- acpi_size *bytes_consumed);
-
-u8
-acpi_rs_get_resource_type (
- u8 resource_start_byte);
-
-#endif /* __ACRESRC_H__ */
diff --git a/xen/include/acpi/acstruct.h b/xen/include/acpi/acstruct.h
index 56a3782cdb..6b2284c9e8 100644
--- a/xen/include/acpi/acstruct.h
+++ b/xen/include/acpi/acstruct.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -69,13 +69,14 @@
struct acpi_walk_state
{
u8 data_type; /* To differentiate various internal objs MUST BE FIRST!*/\
+ u8 walk_type;
acpi_owner_id owner_id; /* Owner of objects created during the walk */
u8 last_predicate; /* Result of last predicate */
+ u8 reserved; /* For alignment */
u8 current_result; /* */
u8 next_op_info; /* Info about next_op */
u8 num_operands; /* Stack pointer for Operands[] array */
u8 return_used;
- u8 walk_type;
u16 opcode; /* Current AML opcode */
u8 scope_depth;
u8 reserved1;
@@ -91,7 +92,8 @@ struct acpi_walk_state
struct acpi_namespace_node arguments[ACPI_METHOD_NUM_ARGS]; /* Control method arguments */
union acpi_operand_object **caller_return_desc;
union acpi_generic_state *control_state; /* List of control states (nested IFs) */
- struct acpi_namespace_node *deferred_node; /* Used when executing deferred opcodes */
+ struct acpi_namespace_node *deferred_node; /* Used when executing deferred opcodes */
+ struct acpi_gpe_event_info *gpe_event_info; /* Info for GPE (_Lxx/_Exx methods only */
struct acpi_namespace_node local_variables[ACPI_METHOD_NUM_LOCALS]; /* Control method locals */
struct acpi_namespace_node *method_call_node; /* Called method Node*/
union acpi_parse_object *method_call_op; /* method_call Op if running a method */
@@ -200,4 +202,21 @@ union acpi_aml_operands
};
+/* Internal method parameter list */
+
+struct acpi_parameter_info
+{
+ struct acpi_namespace_node *node;
+ union acpi_operand_object **parameters;
+ union acpi_operand_object *return_object;
+ u8 parameter_type;
+ u8 return_object_type;
+};
+
+/* Types for parameter_type above */
+
+#define ACPI_PARAM_ARGS 0
+#define ACPI_PARAM_GPE 1
+
+
#endif
diff --git a/xen/include/acpi/actables.h b/xen/include/acpi/actables.h
deleted file mode 100644
index 43112abd6a..0000000000
--- a/xen/include/acpi/actables.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/******************************************************************************
- *
- * Name: actables.h - ACPI table management
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACTABLES_H__
-#define __ACTABLES_H__
-
-
-/* Used in acpi_tb_map_acpi_table for size parameter if table header is to be used */
-
-#define SIZE_IN_HEADER 0
-
-
-acpi_status
-acpi_tb_handle_to_object (
- u16 table_id,
- struct acpi_table_desc **table_desc);
-
-/*
- * tbconvrt - Table conversion routines
- */
-
-acpi_status
-acpi_tb_convert_to_xsdt (
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_convert_table_fadt (
- void);
-
-acpi_status
-acpi_tb_build_common_facs (
- struct acpi_table_desc *table_info);
-
-u32
-acpi_tb_get_table_count (
- struct rsdp_descriptor *RSDP,
- struct acpi_table_header *RSDT);
-
-/*
- * tbget - Table "get" routines
- */
-
-acpi_status
-acpi_tb_get_table (
- struct acpi_pointer *address,
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_get_table_header (
- struct acpi_pointer *address,
- struct acpi_table_header *return_header);
-
-acpi_status
-acpi_tb_get_table_body (
- struct acpi_pointer *address,
- struct acpi_table_header *header,
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_get_this_table (
- struct acpi_pointer *address,
- struct acpi_table_header *header,
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_table_override (
- struct acpi_table_header *header,
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_get_table_ptr (
- acpi_table_type table_type,
- u32 instance,
- struct acpi_table_header **table_ptr_loc);
-
-acpi_status
-acpi_tb_verify_rsdp (
- struct acpi_pointer *address);
-
-void
-acpi_tb_get_rsdt_address (
- struct acpi_pointer *out_address);
-
-acpi_status
-acpi_tb_validate_rsdt (
- struct acpi_table_header *table_ptr);
-
-acpi_status
-acpi_tb_get_required_tables (
- void);
-
-acpi_status
-acpi_tb_get_primary_table (
- struct acpi_pointer *address,
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_get_secondary_table (
- struct acpi_pointer *address,
- acpi_string signature,
- struct acpi_table_desc *table_info);
-
-/*
- * tbinstall - Table installation
- */
-
-acpi_status
-acpi_tb_install_table (
- struct acpi_table_desc *table_info);
-
-acpi_status
-acpi_tb_match_signature (
- char *signature,
- struct acpi_table_desc *table_info,
- u8 search_type);
-
-acpi_status
-acpi_tb_recognize_table (
- struct acpi_table_desc *table_info,
- u8 search_type);
-
-acpi_status
-acpi_tb_init_table_descriptor (
- acpi_table_type table_type,
- struct acpi_table_desc *table_info);
-
-
-/*
- * tbremove - Table removal and deletion
- */
-
-void
-acpi_tb_delete_all_tables (
- void);
-
-void
-acpi_tb_delete_tables_by_type (
- acpi_table_type type);
-
-void
-acpi_tb_delete_single_table (
- struct acpi_table_desc *table_desc);
-
-struct acpi_table_desc *
-acpi_tb_uninstall_table (
- struct acpi_table_desc *table_desc);
-
-
-/*
- * tbrsd - RSDP, RSDT utilities
- */
-
-acpi_status
-acpi_tb_get_table_rsdt (
- void);
-
-u8 *
-acpi_tb_scan_memory_for_rsdp (
- u8 *start_address,
- u32 length);
-
-acpi_status
-acpi_tb_find_rsdp (
- struct acpi_table_desc *table_info,
- u32 flags);
-
-
-/*
- * tbutils - common table utilities
- */
-
-acpi_status
-acpi_tb_find_table (
- char *signature,
- char *oem_id,
- char *oem_table_id,
- struct acpi_table_header **table_ptr);
-
-acpi_status
-acpi_tb_verify_table_checksum (
- struct acpi_table_header *table_header);
-
-u8
-acpi_tb_checksum (
- void *buffer,
- u32 length);
-
-acpi_status
-acpi_tb_validate_table_header (
- struct acpi_table_header *table_header);
-
-
-#endif /* __ACTABLES_H__ */
diff --git a/xen/include/acpi/actbl.h b/xen/include/acpi/actbl.h
index 55680ad34b..f39ed91b5b 100644
--- a/xen/include/acpi/actbl.h
+++ b/xen/include/acpi/actbl.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -288,19 +288,6 @@ struct smart_battery_table
};
-/*
- * High performance timer
- */
-struct hpet_table
-{
- ACPI_TABLE_HEADER_DEF
- u32 hardware_id;
- u32 base_address [3];
- u8 hpet_number;
- u16 clock_tick;
- u8 attributes;
-};
-
#pragma pack()
@@ -343,5 +330,23 @@ struct acpi_table_support
#include "actbl1.h" /* Acpi 1.0 table definitions */
#include "actbl2.h" /* Acpi 2.0 table definitions */
+extern u8 acpi_fadt_is_v1; /* is set to 1 if FADT is revision 1,
+ * needed for certain workarounds */
+
+#pragma pack(1)
+/*
+ * High performance timer
+ */
+struct hpet_table
+{
+ ACPI_TABLE_HEADER_DEF
+ u32 hardware_id;
+ struct acpi_generic_address base_address;
+ u8 hpet_number;
+ u16 clock_tick;
+ u8 attributes;
+};
+
+#pragma pack()
#endif /* __ACTBL_H__ */
diff --git a/xen/include/acpi/actbl1.h b/xen/include/acpi/actbl1.h
index 0db07c3291..33de5f4d2c 100644
--- a/xen/include/acpi/actbl1.h
+++ b/xen/include/acpi/actbl1.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/xen/include/acpi/actbl2.h b/xen/include/acpi/actbl2.h
index 188cedb529..ea3ca08d88 100644
--- a/xen/include/acpi/actbl2.h
+++ b/xen/include/acpi/actbl2.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -62,6 +62,7 @@
#define BAF_8042_KEYBOARD_CONTROLLER 0x0002
#define FADT2_REVISION_ID 3
+#define FADT2_MINUS_REVISION_ID 2
#pragma pack(1)
@@ -114,53 +115,56 @@ struct acpi_generic_address
u8 address_space_id; /* Address space where struct or register exists. */
u8 register_bit_width; /* Size in bits of given register */
u8 register_bit_offset; /* Bit offset within the register */
- u8 reserved; /* Must be 0 */
+ u8 access_width; /* Minimum Access size (ACPI 3.0) */
u64 address; /* 64-bit address of struct or register */
};
+#define FADT_REV2_COMMON \
+ u32 V1_firmware_ctrl; /* 32-bit physical address of FACS */ \
+ u32 V1_dsdt; /* 32-bit physical address of DSDT */ \
+ u8 reserved1; /* System Interrupt Model isn't used in ACPI 2.0*/ \
+ u8 prefer_PM_profile; /* Conveys preferred power management profile to OSPM. */ \
+ u16 sci_int; /* System vector of SCI interrupt */ \
+ u32 smi_cmd; /* Port address of SMI command port */ \
+ u8 acpi_enable; /* Value to write to smi_cmd to enable ACPI */ \
+ u8 acpi_disable; /* Value to write to smi_cmd to disable ACPI */ \
+ u8 S4bios_req; /* Value to write to SMI CMD to enter S4BIOS state */ \
+ u8 pstate_cnt; /* Processor performance state control*/ \
+ u32 V1_pm1a_evt_blk; /* Port address of Power Mgt 1a acpi_event Reg Blk */ \
+ u32 V1_pm1b_evt_blk; /* Port address of Power Mgt 1b acpi_event Reg Blk */ \
+ u32 V1_pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */ \
+ u32 V1_pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */ \
+ u32 V1_pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */ \
+ u32 V1_pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */ \
+ u32 V1_gpe0_blk; /* Port addr of General Purpose acpi_event 0 Reg Blk */ \
+ u32 V1_gpe1_blk; /* Port addr of General Purpose acpi_event 1 Reg Blk */ \
+ u8 pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */ \
+ u8 pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */ \
+ u8 pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */ \
+ u8 pm_tm_len; /* Byte Length of ports at pm_tm_blk */ \
+ u8 gpe0_blk_len; /* Byte Length of ports at gpe0_blk */ \
+ u8 gpe1_blk_len; /* Byte Length of ports at gpe1_blk */ \
+ u8 gpe1_base; /* Offset in gpe model where gpe1 events start */ \
+ u8 cst_cnt; /* Support for the _CST object and C States change notification.*/ \
+ u16 plvl2_lat; /* Worst case HW latency to enter/exit C2 state */ \
+ u16 plvl3_lat; /* Worst case HW latency to enter/exit C3 state */ \
+ u16 flush_size; /* Number of flush strides that need to be read */ \
+ u16 flush_stride; /* Processor's memory cache line width, in bytes */ \
+ u8 duty_offset; /* Processor's duty cycle index in processor's P_CNT reg*/ \
+ u8 duty_width; /* Processor's duty cycle value bit width in P_CNT register.*/ \
+ u8 day_alrm; /* Index to day-of-month alarm in RTC CMOS RAM */ \
+ u8 mon_alrm; /* Index to month-of-year alarm in RTC CMOS RAM */ \
+ u8 century; /* Index to century in RTC CMOS RAM */ \
+ u16 iapc_boot_arch; /* IA-PC Boot Architecture Flags. See Table 5-10 for description*/
+
/*
* ACPI 2.0 Fixed ACPI Description Table (FADT)
*/
struct fadt_descriptor_rev2
{
ACPI_TABLE_HEADER_DEF /* ACPI common table header */
- u32 V1_firmware_ctrl; /* 32-bit physical address of FACS */
- u32 V1_dsdt; /* 32-bit physical address of DSDT */
- u8 reserved1; /* System Interrupt Model isn't used in ACPI 2.0*/
- u8 prefer_PM_profile; /* Conveys preferred power management profile to OSPM. */
- u16 sci_int; /* System vector of SCI interrupt */
- u32 smi_cmd; /* Port address of SMI command port */
- u8 acpi_enable; /* Value to write to smi_cmd to enable ACPI */
- u8 acpi_disable; /* Value to write to smi_cmd to disable ACPI */
- u8 S4bios_req; /* Value to write to SMI CMD to enter S4BIOS state */
- u8 pstate_cnt; /* Processor performance state control*/
- u32 V1_pm1a_evt_blk; /* Port address of Power Mgt 1a acpi_event Reg Blk */
- u32 V1_pm1b_evt_blk; /* Port address of Power Mgt 1b acpi_event Reg Blk */
- u32 V1_pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */
- u32 V1_pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */
- u32 V1_pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */
- u32 V1_pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */
- u32 V1_gpe0_blk; /* Port addr of General Purpose acpi_event 0 Reg Blk */
- u32 V1_gpe1_blk; /* Port addr of General Purpose acpi_event 1 Reg Blk */
- u8 pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */
- u8 pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */
- u8 pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */
- u8 pm_tm_len; /* Byte Length of ports at pm_tm_blk */
- u8 gpe0_blk_len; /* Byte Length of ports at gpe0_blk */
- u8 gpe1_blk_len; /* Byte Length of ports at gpe1_blk */
- u8 gpe1_base; /* Offset in gpe model where gpe1 events start */
- u8 cst_cnt; /* Support for the _CST object and C States change notification.*/
- u16 plvl2_lat; /* Worst case HW latency to enter/exit C2 state */
- u16 plvl3_lat; /* Worst case HW latency to enter/exit C3 state */
- u16 flush_size; /* Number of flush strides that need to be read */
- u16 flush_stride; /* Processor's memory cache line width, in bytes */
- u8 duty_offset; /* Processor's duty cycle index in processor's P_CNT reg*/
- u8 duty_width; /* Processor's duty cycle value bit width in P_CNT register.*/
- u8 day_alrm; /* Index to day-of-month alarm in RTC CMOS RAM */
- u8 mon_alrm; /* Index to month-of-year alarm in RTC CMOS RAM */
- u8 century; /* Index to century in RTC CMOS RAM */
- u16 iapc_boot_arch; /* IA-PC Boot Architecture Flags. See Table 5-10 for description*/
+ FADT_REV2_COMMON
u8 reserved2; /* Reserved */
u32 wb_invd : 1; /* The wbinvd instruction works properly */
u32 wb_invd_flush : 1; /* The wbinvd flushes but does not invalidate */
@@ -195,6 +199,20 @@ struct fadt_descriptor_rev2
};
+/* "Downrevved" ACPI 2.0 FADT descriptor */
+
+struct fadt_descriptor_rev2_minus
+{
+ ACPI_TABLE_HEADER_DEF /* ACPI common table header */
+ FADT_REV2_COMMON
+ u8 reserved2; /* Reserved */
+ u32 flags;
+ struct acpi_generic_address reset_register; /* Reset register address in GAS format */
+ u8 reset_value; /* Value to write to the reset_register port to reset the system. */
+ u8 reserved7[3]; /* These three bytes must be zero */
+};
+
+
/* Embedded Controller */
struct ec_boot_resources
diff --git a/xen/include/acpi/actbl71.h b/xen/include/acpi/actbl71.h
deleted file mode 100644
index 7b4fb44261..0000000000
--- a/xen/include/acpi/actbl71.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/******************************************************************************
- *
- * Name: actbl71.h - IA-64 Extensions to the ACPI Spec Rev. 0.71
- * This file includes tables specific to this
- * specification revision.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2003, R. Byron Moore
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ACTBL71_H__
-#define __ACTBL71_H__
-
-
-/* 0.71 FADT address_space data item bitmasks defines */
-/* If the associated bit is zero then it is in memory space else in io space */
-
-#define SMI_CMD_ADDRESS_SPACE 0x01
-#define PM1_BLK_ADDRESS_SPACE 0x02
-#define PM2_CNT_BLK_ADDRESS_SPACE 0x04
-#define PM_TMR_BLK_ADDRESS_SPACE 0x08
-#define GPE0_BLK_ADDRESS_SPACE 0x10
-#define GPE1_BLK_ADDRESS_SPACE 0x20
-
-/* Only for clarity in declarations */
-
-typedef u64 IO_ADDRESS;
-
-
-#pragma pack(1)
-struct /* Root System Descriptor Pointer */
-{
- NATIVE_CHAR signature [8]; /* contains "RSD PTR " */
- u8 checksum; /* to make sum of struct == 0 */
- NATIVE_CHAR oem_id [6]; /* OEM identification */
- u8 reserved; /* Must be 0 for 1.0, 2 for 2.0 */
- u64 rsdt_physical_address; /* 64-bit physical address of RSDT */
-};
-
-
-/*****************************************/
-/* IA64 Extensions to ACPI Spec Rev 0.71 */
-/* for the Root System Description Table */
-/*****************************************/
-struct
-{
- struct acpi_table_header header; /* Table header */
- u32 reserved_pad; /* IA64 alignment, must be 0 */
- u64 table_offset_entry [1]; /* Array of pointers to other */
- /* tables' headers */
-};
-
-
-/*******************************************/
-/* IA64 Extensions to ACPI Spec Rev 0.71 */
-/* for the Firmware ACPI Control Structure */
-/*******************************************/
-struct
-{
- NATIVE_CHAR signature[4]; /* signature "FACS" */
- u32 length; /* length of structure, in bytes */
- u32 hardware_signature; /* hardware configuration signature */
- u32 reserved4; /* must be 0 */
- u64 firmware_waking_vector; /* ACPI OS waking vector */
- u64 global_lock; /* Global Lock */
- u32 S4bios_f : 1; /* Indicates if S4BIOS support is present */
- u32 reserved1 : 31; /* must be 0 */
- u8 reserved3 [28]; /* reserved - must be zero */
-};
-
-
-/******************************************/
-/* IA64 Extensions to ACPI Spec Rev 0.71 */
-/* for the Fixed ACPI Description Table */
-/******************************************/
-struct
-{
- struct acpi_table_header header; /* table header */
- u32 reserved_pad; /* IA64 alignment, must be 0 */
- u64 firmware_ctrl; /* 64-bit Physical address of FACS */
- u64 dsdt; /* 64-bit Physical address of DSDT */
- u8 model; /* System Interrupt Model */
- u8 address_space; /* Address Space Bitmask */
- u16 sci_int; /* System vector of SCI interrupt */
- u8 acpi_enable; /* value to write to smi_cmd to enable ACPI */
- u8 acpi_disable; /* value to write to smi_cmd to disable ACPI */
- u8 S4bios_req; /* Value to write to SMI CMD to enter S4BIOS state */
- u8 reserved2; /* reserved - must be zero */
- u64 smi_cmd; /* Port address of SMI command port */
- u64 pm1a_evt_blk; /* Port address of Power Mgt 1a acpi_event Reg Blk */
- u64 pm1b_evt_blk; /* Port address of Power Mgt 1b acpi_event Reg Blk */
- u64 pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */
- u64 pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */
- u64 pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */
- u64 pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */
- u64 gpe0_blk; /* Port addr of General Purpose acpi_event 0 Reg Blk */
- u64 gpe1_blk; /* Port addr of General Purpose acpi_event 1 Reg Blk */
- u8 pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */
- u8 pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */
- u8 pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */
- u8 pm_tm_len; /* Byte Length of ports at pm_tm_blk */
- u8 gpe0_blk_len; /* Byte Length of ports at gpe0_blk */
- u8 gpe1_blk_len; /* Byte Length of ports at gpe1_blk */
- u8 gpe1_base; /* offset in gpe model where gpe1 events start */
- u8 reserved3; /* reserved */
- u16 plvl2_lat; /* worst case HW latency to enter/exit C2 state */
- u16 plvl3_lat; /* worst case HW latency to enter/exit C3 state */
- u8 day_alrm; /* index to day-of-month alarm in RTC CMOS RAM */
- u8 mon_alrm; /* index to month-of-year alarm in RTC CMOS RAM */
- u8 century; /* index to century in RTC CMOS RAM */
- u8 reserved4; /* reserved */
- u32 flush_cash : 1; /* PAL_FLUSH_CACHE is correctly supported */
- u32 reserved5 : 1; /* reserved - must be zero */
- u32 proc_c1 : 1; /* all processors support C1 state */
- u32 plvl2_up : 1; /* C2 state works on MP system */
- u32 pwr_button : 1; /* Power button is handled as a generic feature */
- u32 sleep_button : 1; /* Sleep button is handled as a generic feature, or not present */
- u32 fixed_rTC : 1; /* RTC wakeup stat not in fixed register space */
- u32 rtcs4 : 1; /* RTC wakeup stat not possible from S4 */
- u32 tmr_val_ext : 1; /* tmr_val is 32 bits */
- u32 dock_cap : 1; /* Supports Docking */
- u32 reserved6 : 22; /* reserved - must be zero */
-};
-
-#pragma pack()
-
-#endif /* __ACTBL71_H__ */
-
diff --git a/xen/include/acpi/actypes.h b/xen/include/acpi/actypes.h
index ae1a73e617..51cb780c24 100644
--- a/xen/include/acpi/actypes.h
+++ b/xen/include/acpi/actypes.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -303,7 +303,7 @@ struct uint32_struct
typedef u32 acpi_integer;
#define ACPI_INTEGER_MAX ACPI_UINT32_MAX
#define ACPI_INTEGER_BIT_SIZE 32
-#define ACPI_MAX_DECIMAL_DIGITS 10
+#define ACPI_MAX_DECIMAL_DIGITS 10 /* 2^32 = 4,294,967,296 */
#define ACPI_USE_NATIVE_DIVIDE /* Use compiler native 32-bit divide */
@@ -315,13 +315,18 @@ typedef u32 acpi_integer;
typedef u64 acpi_integer;
#define ACPI_INTEGER_MAX ACPI_UINT64_MAX
#define ACPI_INTEGER_BIT_SIZE 64
-#define ACPI_MAX_DECIMAL_DIGITS 19
+#define ACPI_MAX_DECIMAL_DIGITS 20 /* 2^64 = 18,446,744,073,709,551,616 */
+
#if ACPI_MACHINE_WIDTH == 64
#define ACPI_USE_NATIVE_DIVIDE /* Use compiler native 64-bit divide */
#endif
#endif
+#define ACPI_MAX64_DECIMAL_DIGITS 20
+#define ACPI_MAX32_DECIMAL_DIGITS 10
+#define ACPI_MAX16_DECIMAL_DIGITS 5
+#define ACPI_MAX8_DECIMAL_DIGITS 3
/*
* Constants with special meanings
@@ -349,7 +354,6 @@ typedef u64 acpi_integer;
/*
* Power state values
*/
-
#define ACPI_STATE_UNKNOWN (u8) 0xFF
#define ACPI_STATE_S0 (u8) 0
@@ -393,7 +397,6 @@ typedef u64 acpi_integer;
#define ACPI_NOTIFY_BUS_MODE_MISMATCH (u8) 6
#define ACPI_NOTIFY_POWER_FAULT (u8) 7
-
/*
* Table types. These values are passed to the table related APIs
*/
@@ -409,14 +412,13 @@ typedef u32 acpi_table_type;
#define ACPI_TABLE_MAX 6
#define NUM_ACPI_TABLE_TYPES (ACPI_TABLE_MAX+1)
-
/*
* Types associated with ACPI names and objects. The first group of
* values (up to ACPI_TYPE_EXTERNAL_MAX) correspond to the definition
* of the ACPI object_type() operator (See the ACPI Spec). Therefore,
* only add to the first group if the spec changes.
*
- * Types must be kept in sync with the global acpi_ns_properties
+ * NOTE: Types must be kept in sync with the global acpi_ns_properties
* and acpi_ns_type_names arrays.
*/
typedef u32 acpi_object_type;
@@ -453,26 +455,27 @@ typedef u32 acpi_object_type;
#define ACPI_TYPE_LOCAL_INDEX_FIELD 0x13
#define ACPI_TYPE_LOCAL_REFERENCE 0x14 /* Arg#, Local#, Name, Debug, ref_of, Index */
#define ACPI_TYPE_LOCAL_ALIAS 0x15
-#define ACPI_TYPE_LOCAL_NOTIFY 0x16
-#define ACPI_TYPE_LOCAL_ADDRESS_HANDLER 0x17
-#define ACPI_TYPE_LOCAL_RESOURCE 0x18
-#define ACPI_TYPE_LOCAL_RESOURCE_FIELD 0x19
-#define ACPI_TYPE_LOCAL_SCOPE 0x1A /* 1 Name, multiple object_list Nodes */
+#define ACPI_TYPE_LOCAL_METHOD_ALIAS 0x16
+#define ACPI_TYPE_LOCAL_NOTIFY 0x17
+#define ACPI_TYPE_LOCAL_ADDRESS_HANDLER 0x18
+#define ACPI_TYPE_LOCAL_RESOURCE 0x19
+#define ACPI_TYPE_LOCAL_RESOURCE_FIELD 0x1A
+#define ACPI_TYPE_LOCAL_SCOPE 0x1B /* 1 Name, multiple object_list Nodes */
-#define ACPI_TYPE_NS_NODE_MAX 0x1A /* Last typecode used within a NS Node */
+#define ACPI_TYPE_NS_NODE_MAX 0x1B /* Last typecode used within a NS Node */
/*
* These are special object types that never appear in
* a Namespace node, only in an union acpi_operand_object
*/
-#define ACPI_TYPE_LOCAL_EXTRA 0x1B
-#define ACPI_TYPE_LOCAL_DATA 0x1C
+#define ACPI_TYPE_LOCAL_EXTRA 0x1C
+#define ACPI_TYPE_LOCAL_DATA 0x1D
-#define ACPI_TYPE_LOCAL_MAX 0x1C
+#define ACPI_TYPE_LOCAL_MAX 0x1D
/* All types above here are invalid */
-#define ACPI_TYPE_INVALID 0x1D
+#define ACPI_TYPE_INVALID 0x1E
#define ACPI_TYPE_NOT_FOUND 0xFF
@@ -514,9 +517,8 @@ typedef u32 acpi_object_type;
#define ACPI_WRITE 1
#define ACPI_IO_MASK 1
-
/*
- * Acpi Event Types: Fixed & General Purpose
+ * Event Types: Fixed & General Purpose
*/
typedef u32 acpi_event_type;
@@ -531,25 +533,8 @@ typedef u32 acpi_event_type;
#define ACPI_EVENT_MAX 4
#define ACPI_NUM_FIXED_EVENTS ACPI_EVENT_MAX + 1
-#define ACPI_GPE_INVALID 0xFF
-#define ACPI_GPE_MAX 0xFF
-#define ACPI_NUM_GPE 256
-
-#define ACPI_EVENT_LEVEL_TRIGGERED 1
-#define ACPI_EVENT_EDGE_TRIGGERED 2
-
-/*
- * Flags for GPE and Lock interfaces
- */
-#define ACPI_EVENT_WAKE_ENABLE 0x2
-#define ACPI_EVENT_WAKE_DISABLE 0x2
-
-#define ACPI_NOT_ISR 0x1
-#define ACPI_ISR 0x0
-
-
/*
- * acpi_event Status:
+ * Event Status - Per event
* -------------
* The encoding of acpi_event_status is illustrated below.
* Note that a set bit (1) indicates the property is TRUE
@@ -570,12 +555,74 @@ typedef u32 acpi_event_status;
#define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02
#define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04
+/*
+ * General Purpose Events (GPE)
+ */
+#define ACPI_GPE_INVALID 0xFF
+#define ACPI_GPE_MAX 0xFF
+#define ACPI_NUM_GPE 256
+
+#define ACPI_GPE_ENABLE 0
+#define ACPI_GPE_DISABLE 1
+
+
+/*
+ * GPE info flags - Per GPE
+ * +-+-+-+---+---+-+
+ * |7|6|5|4:3|2:1|0|
+ * +-+-+-+---+---+-+
+ * | | | | | |
+ * | | | | | +--- Interrupt type: Edge or Level Triggered
+ * | | | | +--- Type: Wake-only, Runtime-only, or wake/runtime
+ * | | | +--- Type of dispatch -- to method, handler, or none
+ * | | +--- Enabled for runtime?
+ * | +--- Enabled for wake?
+ * +--- System state when GPE ocurred (running/waking)
+ */
+#define ACPI_GPE_XRUPT_TYPE_MASK (u8) 0x01
+#define ACPI_GPE_LEVEL_TRIGGERED (u8) 0x01
+#define ACPI_GPE_EDGE_TRIGGERED (u8) 0x00
+
+#define ACPI_GPE_TYPE_MASK (u8) 0x06
+#define ACPI_GPE_TYPE_WAKE_RUN (u8) 0x06
+#define ACPI_GPE_TYPE_WAKE (u8) 0x02
+#define ACPI_GPE_TYPE_RUNTIME (u8) 0x04 /* Default */
+
+#define ACPI_GPE_DISPATCH_MASK (u8) 0x18
+#define ACPI_GPE_DISPATCH_HANDLER (u8) 0x08
+#define ACPI_GPE_DISPATCH_METHOD (u8) 0x10
+#define ACPI_GPE_DISPATCH_NOT_USED (u8) 0x00 /* Default */
+
+#define ACPI_GPE_RUN_ENABLE_MASK (u8) 0x20
+#define ACPI_GPE_RUN_ENABLED (u8) 0x20
+#define ACPI_GPE_RUN_DISABLED (u8) 0x00 /* Default */
+
+#define ACPI_GPE_WAKE_ENABLE_MASK (u8) 0x40
+#define ACPI_GPE_WAKE_ENABLED (u8) 0x40
+#define ACPI_GPE_WAKE_DISABLED (u8) 0x00 /* Default */
+
+#define ACPI_GPE_ENABLE_MASK (u8) 0x60 /* Both run/wake */
+
+#define ACPI_GPE_SYSTEM_MASK (u8) 0x80
+#define ACPI_GPE_SYSTEM_RUNNING (u8) 0x80
+#define ACPI_GPE_SYSTEM_WAKING (u8) 0x00
+
+/*
+ * Flags for GPE and Lock interfaces
+ */
+#define ACPI_EVENT_WAKE_ENABLE 0x2 /* acpi_gpe_enable */
+#define ACPI_EVENT_WAKE_DISABLE 0x2 /* acpi_gpe_disable */
+
+#define ACPI_NOT_ISR 0x1
+#define ACPI_ISR 0x0
+
/* Notify types */
-#define ACPI_SYSTEM_NOTIFY 0
-#define ACPI_DEVICE_NOTIFY 1
-#define ACPI_MAX_NOTIFY_HANDLER_TYPE 1
+#define ACPI_SYSTEM_NOTIFY 0x1
+#define ACPI_DEVICE_NOTIFY 0x2
+#define ACPI_ALL_NOTIFY 0x3
+#define ACPI_MAX_NOTIFY_HANDLER_TYPE 0x3
#define ACPI_MAX_SYS_NOTIFY 0x7f
@@ -756,11 +803,11 @@ struct acpi_system_info
*/
typedef u32
-(ACPI_SYSTEM_XFACE *OSD_HANDLER) (
+(ACPI_SYSTEM_XFACE *acpi_osd_handler) (
void *context);
typedef void
-(ACPI_SYSTEM_XFACE *OSD_EXECUTION_CALLBACK) (
+(ACPI_SYSTEM_XFACE *acpi_osd_exec_callback) (
void *context);
/*
@@ -771,10 +818,6 @@ u32 (*acpi_event_handler) (
void *context);
typedef
-void (*acpi_gpe_handler) (
- void *context);
-
-typedef
void (*acpi_notify_handler) (
acpi_handle device,
u32 value,
@@ -793,8 +836,16 @@ acpi_status (*acpi_init_handler) (
#define ACPI_INIT_DEVICE_INI 1
+typedef
+acpi_status (*acpi_exception_handler) (
+ acpi_status aml_status,
+ acpi_name name,
+ u16 opcode,
+ u32 aml_offset,
+ void *context);
+
-/* Address Spaces (Operation Regions */
+/* Address Spaces (For Operation Regions) */
typedef
acpi_status (*acpi_adr_space_handler) (
@@ -861,6 +912,7 @@ struct acpi_compatible_id_list
#define ACPI_VALID_HID 0x0004
#define ACPI_VALID_UID 0x0008
#define ACPI_VALID_CID 0x0010
+#define ACPI_VALID_SXDS 0x0020
#define ACPI_COMMON_OBJ_INFO \
@@ -880,11 +932,12 @@ struct acpi_device_info
{
ACPI_COMMON_OBJ_INFO;
- u32 valid; /* Indicates which fields are valid */
+ u32 valid; /* Indicates which fields below are valid */
u32 current_status; /* _STA value */
acpi_integer address; /* _ADR value if any */
struct acpi_device_id hardware_id; /* _HID value if any */
struct acpi_device_id unique_id; /* _UID value if any */
+ u8 highest_dstates[4]; /* _sx_d values: 0xFF indicates not valid */
struct acpi_compatible_id_list compatibility_id; /* List of _CIDs if any */
};
diff --git a/xen/include/acpi/acutils.h b/xen/include/acpi/acutils.h
index 4927d6beab..0de26b8f10 100644
--- a/xen/include/acpi/acutils.h
+++ b/xen/include/acpi/acutils.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,7 +52,6 @@ acpi_status (*acpi_pkg_callback) (
union acpi_generic_state *state,
void *context);
-
acpi_status
acpi_ut_walk_package_tree (
union acpi_operand_object *source_object,
@@ -60,7 +59,6 @@ acpi_ut_walk_package_tree (
acpi_pkg_callback walk_callback,
void *context);
-
struct acpi_pkg_info
{
u8 *free_space;
@@ -180,6 +178,12 @@ acpi_ut_strncpy (
acpi_size count);
int
+acpi_ut_memcmp (
+ const char *buffer1,
+ const char *buffer2,
+ acpi_size count);
+
+int
acpi_ut_strncmp (
const char *string1,
const char *string2,
@@ -473,9 +477,14 @@ acpi_ut_delete_internal_object_list (
#define METHOD_NAME__PRT "_PRT"
#define METHOD_NAME__CRS "_CRS"
#define METHOD_NAME__PRS "_PRS"
+#define METHOD_NAME__PRW "_PRW"
acpi_status
+acpi_ut_osi_implementation (
+ struct acpi_walk_state *walk_state);
+
+acpi_status
acpi_ut_evaluate_object (
struct acpi_namespace_node *prefix_node,
char *path,
@@ -508,6 +517,10 @@ acpi_ut_execute_UID (
struct acpi_namespace_node *device_node,
struct acpi_device_id *uid);
+acpi_status
+acpi_ut_execute_sxds (
+ struct acpi_namespace_node *device_node,
+ u8 *highest);
/*
* ut_mutex - mutual exclusion interfaces
@@ -570,6 +583,10 @@ union acpi_operand_object *
acpi_ut_create_buffer_object (
acpi_size buffer_size);
+union acpi_operand_object *
+acpi_ut_create_string_object (
+ acpi_size string_size);
+
/*
* ut_ref_cnt - Object reference count management
@@ -649,12 +666,14 @@ acpi_ut_create_update_state_and_push (
u16 action,
union acpi_generic_state **state_list);
+#ifdef ACPI_FUTURE_USAGE
acpi_status
acpi_ut_create_pkg_state_and_push (
void *internal_object,
void *external_object,
u16 index,
union acpi_generic_state **state_list);
+#endif
union acpi_generic_state *
acpi_ut_create_control_state (
@@ -664,6 +683,7 @@ void
acpi_ut_delete_generic_state (
union acpi_generic_state *state);
+#ifdef ACPI_ENABLE_OBJECT_CACHE
void
acpi_ut_delete_generic_state_cache (
void);
@@ -671,6 +691,7 @@ acpi_ut_delete_generic_state_cache (
void
acpi_ut_delete_object_cache (
void);
+#endif
/*
* utmisc
@@ -683,14 +704,14 @@ acpi_ut_print_string (
acpi_status
acpi_ut_divide (
- acpi_integer *in_dividend,
- acpi_integer *in_divisor,
+ acpi_integer in_dividend,
+ acpi_integer in_divisor,
acpi_integer *out_quotient,
acpi_integer *out_remainder);
acpi_status
acpi_ut_short_divide (
- acpi_integer *in_dividend,
+ acpi_integer in_dividend,
u32 divisor,
acpi_integer *out_quotient,
u32 *out_remainder);
@@ -709,9 +730,15 @@ acpi_ut_strtoul64 (
u32 base,
acpi_integer *ret_integer);
+/* Values for Base above (16=Hex, 10=Decimal) */
+
+#define ACPI_ANY_BASE 0
+
+#ifdef ACPI_FUTURE_USAGE
char *
acpi_ut_strupr (
char *src_string);
+#endif
u8 *
acpi_ut_get_resource_end_tag (
@@ -753,9 +780,11 @@ acpi_ut_release_to_cache (
u32 list_id,
void *object);
+#ifdef ACPI_ENABLE_OBJECT_CACHE
void
acpi_ut_delete_generic_cache (
u32 list_id);
+#endif
acpi_status
acpi_ut_validate_buffer (
@@ -830,9 +859,11 @@ acpi_ut_remove_allocation (
char *module,
u32 line);
+#ifdef ACPI_FUTURE_USAGE
void
acpi_ut_dump_allocation_info (
void);
+#endif
void
acpi_ut_dump_allocations (
diff --git a/xen/include/acpi/amlcode.h b/xen/include/acpi/amlcode.h
deleted file mode 100644
index 512071acbc..0000000000
--- a/xen/include/acpi/amlcode.h
+++ /dev/null
@@ -1,506 +0,0 @@
-/******************************************************************************
- *
- * Name: amlcode.h - Definitions for AML, as included in "definition blocks"
- * Declarations and definitions contained herein are derived
- * directly from the ACPI specification.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __AMLCODE_H__
-#define __AMLCODE_H__
-
-/* primary opcodes */
-
-#define AML_NULL_CHAR (u16) 0x00
-
-#define AML_ZERO_OP (u16) 0x00
-#define AML_ONE_OP (u16) 0x01
-#define AML_UNASSIGNED (u16) 0x02
-#define AML_ALIAS_OP (u16) 0x06
-#define AML_NAME_OP (u16) 0x08
-#define AML_BYTE_OP (u16) 0x0a
-#define AML_WORD_OP (u16) 0x0b
-#define AML_DWORD_OP (u16) 0x0c
-#define AML_STRING_OP (u16) 0x0d
-#define AML_QWORD_OP (u16) 0x0e /* ACPI 2.0 */
-#define AML_SCOPE_OP (u16) 0x10
-#define AML_BUFFER_OP (u16) 0x11
-#define AML_PACKAGE_OP (u16) 0x12
-#define AML_VAR_PACKAGE_OP (u16) 0x13 /* ACPI 2.0 */
-#define AML_METHOD_OP (u16) 0x14
-#define AML_DUAL_NAME_PREFIX (u16) 0x2e
-#define AML_MULTI_NAME_PREFIX_OP (u16) 0x2f
-#define AML_NAME_CHAR_SUBSEQ (u16) 0x30
-#define AML_NAME_CHAR_FIRST (u16) 0x41
-#define AML_OP_PREFIX (u16) 0x5b
-#define AML_ROOT_PREFIX (u16) 0x5c
-#define AML_PARENT_PREFIX (u16) 0x5e
-#define AML_LOCAL_OP (u16) 0x60
-#define AML_LOCAL0 (u16) 0x60
-#define AML_LOCAL1 (u16) 0x61
-#define AML_LOCAL2 (u16) 0x62
-#define AML_LOCAL3 (u16) 0x63
-#define AML_LOCAL4 (u16) 0x64
-#define AML_LOCAL5 (u16) 0x65
-#define AML_LOCAL6 (u16) 0x66
-#define AML_LOCAL7 (u16) 0x67
-#define AML_ARG_OP (u16) 0x68
-#define AML_ARG0 (u16) 0x68
-#define AML_ARG1 (u16) 0x69
-#define AML_ARG2 (u16) 0x6a
-#define AML_ARG3 (u16) 0x6b
-#define AML_ARG4 (u16) 0x6c
-#define AML_ARG5 (u16) 0x6d
-#define AML_ARG6 (u16) 0x6e
-#define AML_STORE_OP (u16) 0x70
-#define AML_REF_OF_OP (u16) 0x71
-#define AML_ADD_OP (u16) 0x72
-#define AML_CONCAT_OP (u16) 0x73
-#define AML_SUBTRACT_OP (u16) 0x74
-#define AML_INCREMENT_OP (u16) 0x75
-#define AML_DECREMENT_OP (u16) 0x76
-#define AML_MULTIPLY_OP (u16) 0x77
-#define AML_DIVIDE_OP (u16) 0x78
-#define AML_SHIFT_LEFT_OP (u16) 0x79
-#define AML_SHIFT_RIGHT_OP (u16) 0x7a
-#define AML_BIT_AND_OP (u16) 0x7b
-#define AML_BIT_NAND_OP (u16) 0x7c
-#define AML_BIT_OR_OP (u16) 0x7d
-#define AML_BIT_NOR_OP (u16) 0x7e
-#define AML_BIT_XOR_OP (u16) 0x7f
-#define AML_BIT_NOT_OP (u16) 0x80
-#define AML_FIND_SET_LEFT_BIT_OP (u16) 0x81
-#define AML_FIND_SET_RIGHT_BIT_OP (u16) 0x82
-#define AML_DEREF_OF_OP (u16) 0x83
-#define AML_CONCAT_RES_OP (u16) 0x84 /* ACPI 2.0 */
-#define AML_MOD_OP (u16) 0x85 /* ACPI 2.0 */
-#define AML_NOTIFY_OP (u16) 0x86
-#define AML_SIZE_OF_OP (u16) 0x87
-#define AML_INDEX_OP (u16) 0x88
-#define AML_MATCH_OP (u16) 0x89
-#define AML_CREATE_DWORD_FIELD_OP (u16) 0x8a
-#define AML_CREATE_WORD_FIELD_OP (u16) 0x8b
-#define AML_CREATE_BYTE_FIELD_OP (u16) 0x8c
-#define AML_CREATE_BIT_FIELD_OP (u16) 0x8d
-#define AML_TYPE_OP (u16) 0x8e
-#define AML_CREATE_QWORD_FIELD_OP (u16) 0x8f /* ACPI 2.0 */
-#define AML_LAND_OP (u16) 0x90
-#define AML_LOR_OP (u16) 0x91
-#define AML_LNOT_OP (u16) 0x92
-#define AML_LEQUAL_OP (u16) 0x93
-#define AML_LGREATER_OP (u16) 0x94
-#define AML_LLESS_OP (u16) 0x95
-#define AML_TO_BUFFER_OP (u16) 0x96 /* ACPI 2.0 */
-#define AML_TO_DECSTRING_OP (u16) 0x97 /* ACPI 2.0 */
-#define AML_TO_HEXSTRING_OP (u16) 0x98 /* ACPI 2.0 */
-#define AML_TO_INTEGER_OP (u16) 0x99 /* ACPI 2.0 */
-#define AML_TO_STRING_OP (u16) 0x9c /* ACPI 2.0 */
-#define AML_COPY_OP (u16) 0x9d /* ACPI 2.0 */
-#define AML_MID_OP (u16) 0x9e /* ACPI 2.0 */
-#define AML_CONTINUE_OP (u16) 0x9f /* ACPI 2.0 */
-#define AML_IF_OP (u16) 0xa0
-#define AML_ELSE_OP (u16) 0xa1
-#define AML_WHILE_OP (u16) 0xa2
-#define AML_NOOP_OP (u16) 0xa3
-#define AML_RETURN_OP (u16) 0xa4
-#define AML_BREAK_OP (u16) 0xa5
-#define AML_BREAK_POINT_OP (u16) 0xcc
-#define AML_ONES_OP (u16) 0xff
-
-/* prefixed opcodes */
-
-#define AML_EXTOP (u16) 0x005b
-
-
-#define AML_MUTEX_OP (u16) 0x5b01
-#define AML_EVENT_OP (u16) 0x5b02
-#define AML_SHIFT_RIGHT_BIT_OP (u16) 0x5b10
-#define AML_SHIFT_LEFT_BIT_OP (u16) 0x5b11
-#define AML_COND_REF_OF_OP (u16) 0x5b12
-#define AML_CREATE_FIELD_OP (u16) 0x5b13
-#define AML_LOAD_TABLE_OP (u16) 0x5b1f /* ACPI 2.0 */
-#define AML_LOAD_OP (u16) 0x5b20
-#define AML_STALL_OP (u16) 0x5b21
-#define AML_SLEEP_OP (u16) 0x5b22
-#define AML_ACQUIRE_OP (u16) 0x5b23
-#define AML_SIGNAL_OP (u16) 0x5b24
-#define AML_WAIT_OP (u16) 0x5b25
-#define AML_RESET_OP (u16) 0x5b26
-#define AML_RELEASE_OP (u16) 0x5b27
-#define AML_FROM_BCD_OP (u16) 0x5b28
-#define AML_TO_BCD_OP (u16) 0x5b29
-#define AML_UNLOAD_OP (u16) 0x5b2a
-#define AML_REVISION_OP (u16) 0x5b30
-#define AML_DEBUG_OP (u16) 0x5b31
-#define AML_FATAL_OP (u16) 0x5b32
-#define AML_REGION_OP (u16) 0x5b80
-#define AML_FIELD_OP (u16) 0x5b81
-#define AML_DEVICE_OP (u16) 0x5b82
-#define AML_PROCESSOR_OP (u16) 0x5b83
-#define AML_POWER_RES_OP (u16) 0x5b84
-#define AML_THERMAL_ZONE_OP (u16) 0x5b85
-#define AML_INDEX_FIELD_OP (u16) 0x5b86
-#define AML_BANK_FIELD_OP (u16) 0x5b87
-#define AML_DATA_REGION_OP (u16) 0x5b88 /* ACPI 2.0 */
-
-
-/* Bogus opcodes (they are actually two separate opcodes) */
-
-#define AML_LGREATEREQUAL_OP (u16) 0x9295
-#define AML_LLESSEQUAL_OP (u16) 0x9294
-#define AML_LNOTEQUAL_OP (u16) 0x9293
-
-
-/*
- * Internal opcodes
- * Use only "Unknown" AML opcodes, don't attempt to use
- * any valid ACPI ASCII values (A-Z, 0-9, '-')
- */
-
-#define AML_INT_NAMEPATH_OP (u16) 0x002d
-#define AML_INT_NAMEDFIELD_OP (u16) 0x0030
-#define AML_INT_RESERVEDFIELD_OP (u16) 0x0031
-#define AML_INT_ACCESSFIELD_OP (u16) 0x0032
-#define AML_INT_BYTELIST_OP (u16) 0x0033
-#define AML_INT_STATICSTRING_OP (u16) 0x0034
-#define AML_INT_METHODCALL_OP (u16) 0x0035
-#define AML_INT_RETURN_VALUE_OP (u16) 0x0036
-#define AML_INT_EVAL_SUBTREE_OP (u16) 0x0037
-
-
-#define ARG_NONE 0x0
-
-/*
- * Argument types for the AML Parser
- * Each field in the arg_types u32 is 5 bits, allowing for a maximum of 6 arguments.
- * There can be up to 31 unique argument types
- * Zero is reserved as end-of-list indicator
- */
-
-#define ARGP_BYTEDATA 0x01
-#define ARGP_BYTELIST 0x02
-#define ARGP_CHARLIST 0x03
-#define ARGP_DATAOBJ 0x04
-#define ARGP_DATAOBJLIST 0x05
-#define ARGP_DWORDDATA 0x06
-#define ARGP_FIELDLIST 0x07
-#define ARGP_NAME 0x08
-#define ARGP_NAMESTRING 0x09
-#define ARGP_OBJLIST 0x0A
-#define ARGP_PKGLENGTH 0x0B
-#define ARGP_SUPERNAME 0x0C
-#define ARGP_TARGET 0x0D
-#define ARGP_TERMARG 0x0E
-#define ARGP_TERMLIST 0x0F
-#define ARGP_WORDDATA 0x10
-#define ARGP_QWORDDATA 0x11
-#define ARGP_SIMPLENAME 0x12
-
-/*
- * Resolved argument types for the AML Interpreter
- * Each field in the arg_types u32 is 5 bits, allowing for a maximum of 6 arguments.
- * There can be up to 31 unique argument types (0 is end-of-arg-list indicator)
- *
- * Note1: These values are completely independent from the ACPI_TYPEs
- * i.e., ARGI_INTEGER != ACPI_TYPE_INTEGER
- *
- * Note2: If and when 5 bits becomes insufficient, it would probably be best
- * to convert to a 6-byte array of argument types, allowing 8 bits per argument.
- */
-
-/* Single, simple types */
-
-#define ARGI_ANYTYPE 0x01 /* Don't care */
-#define ARGI_PACKAGE 0x02
-#define ARGI_EVENT 0x03
-#define ARGI_MUTEX 0x04
-#define ARGI_DDBHANDLE 0x05
-
-/* Interchangeable types (via implicit conversion) */
-
-#define ARGI_INTEGER 0x06
-#define ARGI_STRING 0x07
-#define ARGI_BUFFER 0x08
-#define ARGI_BUFFER_OR_STRING 0x09 /* Used by MID op only */
-#define ARGI_COMPUTEDATA 0x0A /* Buffer, String, or Integer */
-
-/* Reference objects */
-
-#define ARGI_INTEGER_REF 0x0B
-#define ARGI_OBJECT_REF 0x0C
-#define ARGI_DEVICE_REF 0x0D
-#define ARGI_REFERENCE 0x0E
-#define ARGI_TARGETREF 0x0F /* Target, subject to implicit conversion */
-#define ARGI_FIXED_TARGET 0x10 /* Target, no implicit conversion */
-#define ARGI_SIMPLE_TARGET 0x11 /* Name, Local, Arg -- no implicit conversion */
-
-/* Multiple/complex types */
-
-#define ARGI_DATAOBJECT 0x12 /* Buffer, String, package or reference to a Node - Used only by size_of operator*/
-#define ARGI_COMPLEXOBJ 0x13 /* Buffer, String, or package (Used by INDEX op only) */
-#define ARGI_REF_OR_STRING 0x14 /* Reference or String (Used by DEREFOF op only) */
-#define ARGI_REGION_OR_FIELD 0x15 /* Used by LOAD op only */
-
-/* Note: types above can expand to 0x1F maximum */
-
-#define ARGI_INVALID_OPCODE 0xFFFFFFFF
-
-
-/*
- * hash offsets
- */
-#define AML_EXTOP_HASH_OFFSET 22
-#define AML_LNOT_HASH_OFFSET 19
-
-
-/*
- * opcode groups and types
- */
-
-#define OPGRP_NAMED 0x01
-#define OPGRP_FIELD 0x02
-#define OPGRP_BYTELIST 0x04
-
-
-/*
- * Opcode information
- */
-
-/* Opcode flags */
-
-#define AML_HAS_ARGS 0x0800
-#define AML_HAS_TARGET 0x0400
-#define AML_HAS_RETVAL 0x0200
-#define AML_NSOBJECT 0x0100
-#define AML_NSOPCODE 0x0080
-#define AML_NSNODE 0x0040
-#define AML_NAMED 0x0020
-#define AML_DEFER 0x0010
-#define AML_FIELD 0x0008
-#define AML_CREATE 0x0004
-#define AML_MATH 0x0002
-#define AML_LOGICAL 0x0001
-#define AML_CONSTANT 0x1000
-
-/* Convenient flag groupings */
-
-#define AML_FLAGS_EXEC_1A_0T_0R AML_HAS_ARGS /* Monadic1 */
-#define AML_FLAGS_EXEC_1A_0T_1R AML_HAS_ARGS | AML_HAS_RETVAL /* Monadic2 */
-#define AML_FLAGS_EXEC_1A_1T_0R AML_HAS_ARGS | AML_HAS_TARGET
-#define AML_FLAGS_EXEC_1A_1T_1R AML_HAS_ARGS | AML_HAS_TARGET | AML_HAS_RETVAL /* monadic2_r */
-#define AML_FLAGS_EXEC_2A_0T_0R AML_HAS_ARGS /* Dyadic1 */
-#define AML_FLAGS_EXEC_2A_0T_1R AML_HAS_ARGS | AML_HAS_RETVAL /* Dyadic2 */
-#define AML_FLAGS_EXEC_2A_1T_1R AML_HAS_ARGS | AML_HAS_TARGET | AML_HAS_RETVAL /* dyadic2_r */
-#define AML_FLAGS_EXEC_2A_2T_1R AML_HAS_ARGS | AML_HAS_TARGET | AML_HAS_RETVAL
-#define AML_FLAGS_EXEC_3A_0T_0R AML_HAS_ARGS
-#define AML_FLAGS_EXEC_3A_1T_1R AML_HAS_ARGS | AML_HAS_TARGET | AML_HAS_RETVAL
-#define AML_FLAGS_EXEC_6A_0T_1R AML_HAS_ARGS | AML_HAS_RETVAL
-
-
-/*
- * The opcode Type is used in a dispatch table, do not change
- * without updating the table.
- */
-#define AML_TYPE_EXEC_1A_0T_0R 0x00 /* Monadic1 */
-#define AML_TYPE_EXEC_1A_0T_1R 0x01 /* Monadic2 */
-#define AML_TYPE_EXEC_1A_1T_0R 0x02
-#define AML_TYPE_EXEC_1A_1T_1R 0x03 /* monadic2_r */
-#define AML_TYPE_EXEC_2A_0T_0R 0x04 /* Dyadic1 */
-#define AML_TYPE_EXEC_2A_0T_1R 0x05 /* Dyadic2 */
-#define AML_TYPE_EXEC_2A_1T_1R 0x06 /* dyadic2_r */
-#define AML_TYPE_EXEC_2A_2T_1R 0x07
-#define AML_TYPE_EXEC_3A_0T_0R 0x08
-#define AML_TYPE_EXEC_3A_1T_1R 0x09
-#define AML_TYPE_EXEC_6A_0T_1R 0x0A
-/* End of types used in dispatch table */
-
-#define AML_TYPE_LITERAL 0x0B
-#define AML_TYPE_CONSTANT 0x0C
-#define AML_TYPE_METHOD_ARGUMENT 0x0D
-#define AML_TYPE_LOCAL_VARIABLE 0x0E
-#define AML_TYPE_DATA_TERM 0x0F
-
-/* Generic for an op that returns a value */
-
-#define AML_TYPE_METHOD_CALL 0x10
-
-/* Misc */
-
-#define AML_TYPE_CREATE_FIELD 0x11
-#define AML_TYPE_CREATE_OBJECT 0x12
-#define AML_TYPE_CONTROL 0x13
-#define AML_TYPE_NAMED_NO_OBJ 0x14
-#define AML_TYPE_NAMED_FIELD 0x15
-#define AML_TYPE_NAMED_SIMPLE 0x16
-#define AML_TYPE_NAMED_COMPLEX 0x17
-#define AML_TYPE_RETURN 0x18
-
-#define AML_TYPE_UNDEFINED 0x19
-#define AML_TYPE_BOGUS 0x1A
-
-
-/*
- * Opcode classes
- */
-#define AML_CLASS_EXECUTE 0x00
-#define AML_CLASS_CREATE 0x01
-#define AML_CLASS_ARGUMENT 0x02
-#define AML_CLASS_NAMED_OBJECT 0x03
-#define AML_CLASS_CONTROL 0x04
-#define AML_CLASS_ASCII 0x05
-#define AML_CLASS_PREFIX 0x06
-#define AML_CLASS_INTERNAL 0x07
-#define AML_CLASS_RETURN_VALUE 0x08
-#define AML_CLASS_METHOD_CALL 0x09
-#define AML_CLASS_UNKNOWN 0x0A
-
-
-/* Predefined Operation Region space_iDs */
-
-typedef enum
-{
- REGION_MEMORY = 0,
- REGION_IO,
- REGION_PCI_CONFIG,
- REGION_EC,
- REGION_SMBUS,
- REGION_CMOS,
- REGION_PCI_BAR,
- REGION_DATA_TABLE, /* Internal use only */
- REGION_FIXED_HW = 0x7F
-
-} AML_REGION_TYPES;
-
-
-/* Comparison operation codes for match_op operator */
-
-typedef enum
-{
- MATCH_MTR = 0,
- MATCH_MEQ = 1,
- MATCH_MLE = 2,
- MATCH_MLT = 3,
- MATCH_MGE = 4,
- MATCH_MGT = 5
-
-} AML_MATCH_OPERATOR;
-
-#define MAX_MATCH_OPERATOR 5
-
-
-/*
- * field_flags
- *
- * This byte is extracted from the AML and includes three separate
- * pieces of information about the field:
- * 1) The field access type
- * 2) The field update rule
- * 3) The lock rule for the field
- *
- * Bits 00 - 03 : access_type (any_acc, byte_acc, etc.)
- * 04 : lock_rule (1 == Lock)
- * 05 - 06 : update_rule
- */
-#define AML_FIELD_ACCESS_TYPE_MASK 0x0F
-#define AML_FIELD_LOCK_RULE_MASK 0x10
-#define AML_FIELD_UPDATE_RULE_MASK 0x60
-
-
-/* 1) Field Access Types */
-
-typedef enum
-{
- AML_FIELD_ACCESS_ANY = 0x00,
- AML_FIELD_ACCESS_BYTE = 0x01,
- AML_FIELD_ACCESS_WORD = 0x02,
- AML_FIELD_ACCESS_DWORD = 0x03,
- AML_FIELD_ACCESS_QWORD = 0x04, /* ACPI 2.0 */
- AML_FIELD_ACCESS_BUFFER = 0x05 /* ACPI 2.0 */
-
-} AML_ACCESS_TYPE;
-
-
-/* 2) Field Lock Rules */
-
-typedef enum
-{
- AML_FIELD_LOCK_NEVER = 0x00,
- AML_FIELD_LOCK_ALWAYS = 0x10
-
-} AML_LOCK_RULE;
-
-
-/* 3) Field Update Rules */
-
-typedef enum
-{
- AML_FIELD_UPDATE_PRESERVE = 0x00,
- AML_FIELD_UPDATE_WRITE_AS_ONES = 0x20,
- AML_FIELD_UPDATE_WRITE_AS_ZEROS = 0x40
-
-} AML_UPDATE_RULE;
-
-
-/*
- * Field Access Attributes.
- * This byte is extracted from the AML via the
- * access_as keyword
- */
-typedef enum
-{
- AML_FIELD_ATTRIB_SMB_QUICK = 0x02,
- AML_FIELD_ATTRIB_SMB_SEND_RCV = 0x04,
- AML_FIELD_ATTRIB_SMB_BYTE = 0x06,
- AML_FIELD_ATTRIB_SMB_WORD = 0x08,
- AML_FIELD_ATTRIB_SMB_BLOCK = 0x0A,
- AML_FIELD_ATTRIB_SMB_WORD_CALL = 0x0C,
- AML_FIELD_ATTRIB_SMB_BLOCK_CALL = 0x0D
-
-} AML_ACCESS_ATTRIBUTE;
-
-
-/* bit fields in method_flags byte */
-
-#define METHOD_FLAGS_ARG_COUNT 0x07
-#define METHOD_FLAGS_SERIALIZED 0x08
-#define METHOD_FLAGS_SYNCH_LEVEL 0xF0
-
-
-#endif /* __AMLCODE_H__ */
diff --git a/xen/include/acpi/amlresrc.h b/xen/include/acpi/amlresrc.h
deleted file mode 100644
index b28b6905b7..0000000000
--- a/xen/include/acpi/amlresrc.h
+++ /dev/null
@@ -1,329 +0,0 @@
-
-/******************************************************************************
- *
- * Module Name: amlresrc.h - AML resource descriptors
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-
-#ifndef __AMLRESRC_H
-#define __AMLRESRC_H
-
-
-#define ASL_RESNAME_ADDRESS "_ADR"
-#define ASL_RESNAME_ALIGNMENT "_ALN"
-#define ASL_RESNAME_ADDRESSSPACE "_ASI"
-#define ASL_RESNAME_BASEADDRESS "_BAS"
-#define ASL_RESNAME_BUSMASTER "_BM_" /* Master(1), Slave(0) */
-#define ASL_RESNAME_DECODE "_DEC"
-#define ASL_RESNAME_DMA "_DMA"
-#define ASL_RESNAME_DMATYPE "_TYP" /* Compatible(0), A(1), B(2), F(3) */
-#define ASL_RESNAME_GRANULARITY "_GRA"
-#define ASL_RESNAME_INTERRUPT "_INT"
-#define ASL_RESNAME_INTERRUPTLEVEL "_LL_" /* active_lo(1), active_hi(0) */
-#define ASL_RESNAME_INTERRUPTSHARE "_SHR" /* Shareable(1), no_share(0) */
-#define ASL_RESNAME_INTERRUPTTYPE "_HE_" /* Edge(1), Level(0) */
-#define ASL_RESNAME_LENGTH "_LEN"
-#define ASL_RESNAME_MEMATTRIBUTES "_MTP" /* Memory(0), Reserved(1), ACPI(2), NVS(3) */
-#define ASL_RESNAME_MEMTYPE "_MEM" /* non_cache(0), Cacheable(1) Cache+combine(2), Cache+prefetch(3) */
-#define ASL_RESNAME_MAXADDR "_MAX"
-#define ASL_RESNAME_MINADDR "_MIN"
-#define ASL_RESNAME_MAXTYPE "_MAF"
-#define ASL_RESNAME_MINTYPE "_MIF"
-#define ASL_RESNAME_REGISTERBITOFFSET "_RBO"
-#define ASL_RESNAME_REGISTERBITWIDTH "_RBW"
-#define ASL_RESNAME_RANGETYPE "_RNG"
-#define ASL_RESNAME_READWRITETYPE "_RW_" /* read_only(0), Writeable (1) */
-#define ASL_RESNAME_TRANSLATION "_TRA"
-#define ASL_RESNAME_TRANSTYPE "_TRS" /* Sparse(1), Dense(0) */
-#define ASL_RESNAME_TYPE "_TTP" /* Translation(1), Static (0) */
-#define ASL_RESNAME_XFERTYPE "_SIz" /* 8(0), 8_and16(1), 16(2) */
-
-
-/* Default sizes for "small" resource descriptors */
-
-#define ASL_RDESC_IRQ_SIZE 0x02
-#define ASL_RDESC_DMA_SIZE 0x02
-#define ASL_RDESC_ST_DEPEND_SIZE 0x00
-#define ASL_RDESC_END_DEPEND_SIZE 0x00
-#define ASL_RDESC_IO_SIZE 0x07
-#define ASL_RDESC_FIXED_IO_SIZE 0x03
-#define ASL_RDESC_END_TAG_SIZE 0x01
-
-
-struct asl_resource_node
-{
- u32 buffer_length;
- void *buffer;
- struct asl_resource_node *next;
-};
-
-
-/*
- * Resource descriptors defined in the ACPI specification.
- *
- * Alignment must be BYTE because these descriptors
- * are used to overlay the AML byte stream.
- */
-#pragma pack(1)
-
-struct asl_irq_format_desc
-{
- u8 descriptor_type;
- u16 irq_mask;
- u8 flags;
-};
-
-
-struct asl_irq_noflags_desc
-{
- u8 descriptor_type;
- u16 irq_mask;
-};
-
-
-struct asl_dma_format_desc
-{
- u8 descriptor_type;
- u8 dma_channel_mask;
- u8 flags;
-};
-
-
-struct asl_start_dependent_desc
-{
- u8 descriptor_type;
- u8 flags;
-};
-
-
-struct asl_start_dependent_noprio_desc
-{
- u8 descriptor_type;
-};
-
-
-struct asl_end_dependent_desc
-{
- u8 descriptor_type;
-};
-
-
-struct asl_io_port_desc
-{
- u8 descriptor_type;
- u8 information;
- u16 address_min;
- u16 address_max;
- u8 alignment;
- u8 length;
-};
-
-
-struct asl_fixed_io_port_desc
-{
- u8 descriptor_type;
- u16 base_address;
- u8 length;
-};
-
-
-struct asl_small_vendor_desc
-{
- u8 descriptor_type;
- u8 vendor_defined[7];
-};
-
-
-struct asl_end_tag_desc
-{
- u8 descriptor_type;
- u8 checksum;
-};
-
-
-/* LARGE descriptors */
-
-struct asl_memory_24_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 information;
- u16 address_min;
- u16 address_max;
- u16 alignment;
- u16 range_length;
-};
-
-
-struct asl_large_vendor_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 vendor_defined[1];
-};
-
-
-struct asl_memory_32_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 information;
- u32 address_min;
- u32 address_max;
- u32 alignment;
- u32 range_length;
-};
-
-
-struct asl_fixed_memory_32_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 information;
- u32 base_address;
- u32 range_length;
-};
-
-
-struct asl_qword_address_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 resource_type;
- u8 flags;
- u8 specific_flags;
- u64 granularity;
- u64 address_min;
- u64 address_max;
- u64 translation_offset;
- u64 address_length;
- u8 optional_fields[2];
-};
-
-
-struct asl_dword_address_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 resource_type;
- u8 flags;
- u8 specific_flags;
- u32 granularity;
- u32 address_min;
- u32 address_max;
- u32 translation_offset;
- u32 address_length;
- u8 optional_fields[2];
-};
-
-
-struct asl_word_address_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 resource_type;
- u8 flags;
- u8 specific_flags;
- u16 granularity;
- u16 address_min;
- u16 address_max;
- u16 translation_offset;
- u16 address_length;
- u8 optional_fields[2];
-};
-
-
-struct asl_extended_xrupt_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 flags;
- u8 table_length;
- u32 interrupt_number[1];
- /* res_source_index, res_source optional fields follow */
-};
-
-
-struct asl_general_register_desc
-{
- u8 descriptor_type;
- u16 length;
- u8 address_space_id;
- u8 bit_width;
- u8 bit_offset;
- u8 reserved;
- u64 address;
-};
-
-/* restore default alignment */
-
-#pragma pack()
-
-/* Union of all resource descriptors, sow we can allocate the worst case */
-
-union asl_resource_desc
-{
- struct asl_irq_format_desc irq;
- struct asl_dma_format_desc dma;
- struct asl_start_dependent_desc std;
- struct asl_end_dependent_desc end;
- struct asl_io_port_desc iop;
- struct asl_fixed_io_port_desc fio;
- struct asl_small_vendor_desc smv;
- struct asl_end_tag_desc et;
-
- struct asl_memory_24_desc M24;
- struct asl_large_vendor_desc lgv;
- struct asl_memory_32_desc M32;
- struct asl_fixed_memory_32_desc F32;
- struct asl_qword_address_desc qas;
- struct asl_dword_address_desc das;
- struct asl_word_address_desc was;
- struct asl_extended_xrupt_desc exx;
- struct asl_general_register_desc grg;
- u32 u32_item;
- u16 u16_item;
- u8 U8item;
-};
-
-
-#endif
-
diff --git a/xen/include/acpi/platform/acenv.h b/xen/include/acpi/platform/acenv.h
index 9a0cd41084..5a956b549b 100644
--- a/xen/include/acpi/platform/acenv.h
+++ b/xen/include/acpi/platform/acenv.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,6 +57,7 @@
#define ACPI_DISASSEMBLER
#define ACPI_NO_METHOD_EXECUTION
#define ACPI_USE_SYSTEM_CLIBRARY
+#define ACPI_ENABLE_OBJECT_CACHE
#endif
#ifdef _ACPI_EXEC_APP
@@ -67,6 +68,7 @@
#define ACPI_DEBUGGER
#define ACPI_DISASSEMBLER
#define ACPI_USE_SYSTEM_CLIBRARY
+#define ACPI_ENABLE_OBJECT_CACHE
#endif
#ifdef _ACPI_ASL_COMPILER
@@ -75,6 +77,7 @@
#define ACPI_DISASSEMBLER
#define ACPI_CONSTANT_EVAL_ONLY
#define ACPI_USE_SYSTEM_CLIBRARY
+#define ACPI_ENABLE_OBJECT_CACHE
#endif
/*
@@ -152,12 +155,8 @@
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
-
-/* Name of host operating system (returned by the _OS_ namespace object) */
-
-#define ACPI_OS_NAME "Intel ACPI/CA Core Subsystem"
-
-/* This macro is used to tag functions as "printf-like" because
+/*
+ * This macro is used to tag functions as "printf-like" because
* some compilers can catch printf format string problems. MSVC
* doesn't, so this is proprocessed away.
*/
@@ -227,7 +226,11 @@
*/
#define ACPI_STRSTR(s1,s2) strstr((s1), (s2))
+
+#ifdef ACPI_FUTURE_USAGE
#define ACPI_STRUPR(s) (void) acpi_ut_strupr ((s))
+#endif
+
#define ACPI_STRLEN(s) (acpi_size) strlen((s))
#define ACPI_STRCPY(d,s) (void) strcpy((d), (s))
#define ACPI_STRNCPY(d,s,n) (void) strncpy((d), (s), (acpi_size)(n))
@@ -236,6 +239,7 @@
#define ACPI_STRCAT(d,s) (void) strcat((d), (s))
#define ACPI_STRNCAT(d,s,n) strncat((d), (s), (acpi_size)(n))
#define ACPI_STRTOUL(d,s,n) strtoul((d), (s), (acpi_size)(n))
+#define ACPI_MEMCMP(s1,s2,n) memcmp((s1), (s2), (acpi_size)(n))
#define ACPI_MEMCPY(d,s,n) (void) memcpy((d), (s), (acpi_size)(n))
#define ACPI_MEMSET(d,s,n) (void) memset((d), (s), (acpi_size)(n))
@@ -290,7 +294,11 @@ typedef char *va_list;
#define ACPI_STRSTR(s1,s2) acpi_ut_strstr ((s1), (s2))
+
+#ifdef ACPI_FUTURE_USAGE
#define ACPI_STRUPR(s) (void) acpi_ut_strupr ((s))
+#endif
+
#define ACPI_STRLEN(s) (acpi_size) acpi_ut_strlen ((s))
#define ACPI_STRCPY(d,s) (void) acpi_ut_strcpy ((d), (s))
#define ACPI_STRNCPY(d,s,n) (void) acpi_ut_strncpy ((d), (s), (acpi_size)(n))
@@ -299,6 +307,7 @@ typedef char *va_list;
#define ACPI_STRCAT(d,s) (void) acpi_ut_strcat ((d), (s))
#define ACPI_STRNCAT(d,s,n) acpi_ut_strncat ((d), (s), (acpi_size)(n))
#define ACPI_STRTOUL(d,s,n) acpi_ut_strtoul ((d), (s), (acpi_size)(n))
+#define ACPI_MEMCMP(s1,s2,n) acpi_ut_memcmp((s1), (s2), (acpi_size)(n))
#define ACPI_MEMCPY(d,s,n) (void) acpi_ut_memcpy ((d), (s), (acpi_size)(n))
#define ACPI_MEMSET(d,v,n) (void) acpi_ut_memset ((d), (v), (acpi_size)(n))
#define ACPI_TOUPPER acpi_ut_to_upper
diff --git a/xen/include/acpi/platform/acgcc.h b/xen/include/acpi/platform/acgcc.h
index ac15b08460..91fda36b04 100644
--- a/xen/include/acpi/platform/acgcc.h
+++ b/xen/include/acpi/platform/acgcc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,6 @@
* to to tell the compiler warning in a per-variable manner that a variable
* is unused.
*/
-#define ACPI_UNUSED_VAR __attribute_used__
+#define ACPI_UNUSED_VAR __attribute__ ((unused))
#endif /* __ACGCC_H__ */
diff --git a/xen/include/acpi/platform/aclinux.h b/xen/include/acpi/platform/aclinux.h
index eecd6c888c..4006b0ce01 100644
--- a/xen/include/acpi/platform/aclinux.h
+++ b/xen/include/acpi/platform/aclinux.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2004, R. Byron Moore
+ * Copyright (C) 2000 - 2005, R. Byron Moore
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,6 @@
#ifndef __ACLINUX_H__
#define __ACLINUX_H__
-#define ACPI_OS_NAME "Linux"
-
#define ACPI_USE_SYSTEM_CLIBRARY
#define ACPI_USE_DO_WHILE_0
@@ -83,6 +81,8 @@
#define ACPI_USE_NATIVE_DIVIDE
#endif
+#define __cdecl
+#define ACPI_FLUSH_CPU_CACHE()
#endif /* __KERNEL__ */
/* Linux uses GCC */
diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h
new file mode 100644
index 0000000000..442d49a382
--- /dev/null
+++ b/xen/include/asm-ia64/config.h
@@ -0,0 +1,281 @@
+#ifndef _IA64_CONFIG_H_
+#define _IA64_CONFIG_H_
+
+// control flags for turning on/off features under test
+#undef CLONE_DOMAIN0
+//#define CLONE_DOMAIN0 1
+#define DOMU_BUILD_STAGING
+#define VHPT_GLOBAL
+#define DOMU_AUTO_RESTART
+
+// manufactured from component pieces
+
+// defined in linux/arch/ia64/defconfig
+//#define CONFIG_IA64_GENERIC
+#define CONFIG_IA64_HP_SIM
+#define CONFIG_IA64_L1_CACHE_SHIFT 7
+// needed by include/asm-ia64/page.h
+#define CONFIG_IA64_PAGE_SIZE_16KB // 4KB doesn't work?!?
+#define CONFIG_IA64_GRANULE_16MB
+
+#define CONFIG_EFI_PCDP
+#define CONFIG_SERIAL_SGI_L1_CONSOLE
+
+#ifndef __ASSEMBLY__
+
+// can't find where this typedef was before?!?
+// needed by include/asm-ia64/processor.h (and other places)
+typedef int pid_t;
+
+// now needed for xen/include/mm.h
+typedef unsigned long physaddr_t;
+// from include/linux/kernel.h
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
+//////////////////////////////////////
+
+#define FASTCALL(x) x // see linux/include/linux/linkage.h
+#define fastcall // " "
+
+#define watchdog_disable() ((void)0)
+#define watchdog_enable() ((void)0)
+// from linux/include/linux/types.h
+#define CLEAR_BITMAP(name,bits) \
+ memset(name, 0, BITS_TO_LONGS(bits)*sizeof(unsigned long))
+
+// FIXME?: x86-ism used in xen/mm.h
+#define LOCK_PREFIX
+
+extern unsigned long xenheap_phys_end;
+extern unsigned long xen_pstart;
+extern unsigned long xenheap_size;
+extern unsigned long dom0_start;
+extern unsigned long dom0_size;
+
+// from linux/include/linux/mm.h
+extern struct page *mem_map;
+
+// xen/include/asm/config.h
+extern char _end[]; /* standard ELF symbol */
+
+// linux/include/linux/compiler.h
+#define __attribute_const__
+#define __user
+//#define __kernel
+//#define __safe
+#define __force
+#define __chk_user_ptr(x) (void)0
+//#define __chk_io_ptr(x) (void)0
+//#define __builtin_warning(x, y...) (1)
+//#define __acquires(x)
+//#define __releases(x)
+//#define __acquire(x) (void)0
+//#define __release(x) (void)0
+//#define __cond_lock(x) (x)
+#define __must_check
+#define __deprecated
+
+// xen/include/asm/config.h
+#define HZ 100
+// leave SMP for a later time
+#define NR_CPUS 1
+//#define NR_CPUS 16
+//#define CONFIG_NR_CPUS 16
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+///////////////////////////////////////////////////////////////
+// xen/include/asm/config.h
+// Natural boundary upon TR size to define xenheap space
+#define XENHEAP_DEFAULT_MB (1 << (KERNEL_TR_PAGE_SHIFT - 20))
+#define XENHEAP_DEFAULT_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
+#define ELFSIZE 64
+
+///////////////////////////////////////////////////////////////
+
+// get rid of difficult circular include dependency
+#define CMPXCHG_BUGCHECK(v)
+#define CMPXCHG_BUGCHECK_DECL
+
+// from include/asm-ia64/smp.h
+#ifdef CONFIG_SMP
+#error "Lots of things to fix to enable CONFIG_SMP!"
+#endif
+#define get_cpu() 0
+#define put_cpu() do {} while(0)
+
+// from linux/include/linux/mm.h
+struct page;
+
+// function calls; see decl in xen/include/xen/sched.h
+#undef free_task_struct
+#undef alloc_task_struct
+
+// initial task has a different name in Xen
+//#define idle0_task init_task
+#define idle0_vcpu init_task
+
+// avoid redefining task_t in asm/thread_info.h
+#define task_t struct domain
+
+// avoid redefining task_struct in asm/current.h
+#define task_struct vcpu
+
+// linux/include/asm-ia64/machvec.h (linux/arch/ia64/lib/io.c)
+#define platform_inb __ia64_inb
+#define platform_inw __ia64_inw
+#define platform_inl __ia64_inl
+#define platform_outb __ia64_outb
+#define platform_outw __ia64_outw
+#define platform_outl __ia64_outl
+
+// FIXME: This just overrides a use in a typedef (not allowed in ia64,
+// or maybe just in older gcc's?) used in ac_timer.c but should be OK
+// (and indeed is probably required!) elsewhere
+#undef __cacheline_aligned
+#undef ____cacheline_aligned
+#undef ____cacheline_aligned_in_smp
+#define __cacheline_aligned
+#define __cacheline_aligned_in_smp
+#define ____cacheline_aligned
+#define ____cacheline_aligned_in_smp
+#define ____cacheline_maxaligned_in_smp
+
+#include "asm/types.h" // for u64
+
+// warning: unless search_extable is declared, the return value gets
+// truncated to 32-bits, causing a very strange error in privop handling
+struct exception_table_entry;
+
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value);
+void sort_extable(struct exception_table_entry *start,
+ struct exception_table_entry *finish);
+void sort_main_extable(void);
+
+#define printk printf
+
+#undef __ARCH_IRQ_STAT
+
+#define find_first_set_bit(x) (ffs(x)-1) // FIXME: Is this right???
+
+// from include/asm-x86/*/uaccess.h
+#define array_access_ok(addr,count,size) \
+ (likely(sizeof(count) <= 4) /* disallow 64-bit counts */ && \
+ access_ok(type,addr,count*size))
+
+// see drivers/char/console.c
+#ifndef CONFIG_VTI
+#define OPT_CONSOLE_STR "com1"
+#else // CONFIG_VTI
+#define OPT_CONSOLE_STR "com2"
+#endif // CONFIG_VTI
+
+#define __attribute_used__ __attribute__ ((unused))
+
+// see include/asm-x86/atomic.h (different from standard linux)
+#define _atomic_set(v,i) (((v).counter) = (i))
+#define _atomic_read(v) ((v).counter)
+// FIXME following needs work
+#define atomic_compareandswap(old, new, v) old
+
+// see include/asm-ia64/mm.h, handle remaining pfn_info uses until gone
+#define pfn_info page
+
+// see common/keyhandler.c
+#define nop() asm volatile ("nop 0")
+
+// from include/linux/preempt.h (needs including from interrupt.h or smp.h)
+#define preempt_enable() do { } while (0)
+#define preempt_disable() do { } while (0)
+
+// needed for include/xen/linuxtime.h
+typedef s64 time_t;
+typedef s64 suseconds_t;
+
+// needed for include/linux/jiffies.h
+typedef long clock_t;
+
+// from include/linux/kernel.h, needed by jiffies.h
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+// from include/linux/timex.h, needed by arch/ia64/time.c
+#define TIME_SOURCE_CPU 0
+
+// used in common code
+#define softirq_pending(cpu) (cpu_data(cpu)->softirq_pending)
+
+// dup'ed from signal.h to avoid changes to includes
+#define SA_SHIRQ 0x04000000
+#define SA_INTERRUPT 0x20000000
+
+// needed for setup.c
+extern unsigned long loops_per_jiffy;
+extern char saved_command_line[];
+struct screen_info { };
+#define seq_printf(a,b...) printf(b)
+#define CONFIG_BLK_DEV_INITRD // needed to reserve memory for domain0
+
+// needed for newer ACPI code
+#define asmlinkage
+
+#define FORCE_CRASH() asm("break 0;;");
+
+// these declarations got moved at some point, find a better place for them
+extern int ht_per_core;
+
+// needed for include/xen/smp.h
+#define __smp_processor_id() 0
+
+// xen/include/asm/config.h
+/******************************************************************************
+ * config.h
+ *
+ * A Linux-style configuration list.
+ */
+
+#ifndef __XEN_IA64_CONFIG_H__
+#define __XEN_IA64_CONFIG_H__
+
+#undef CONFIG_X86
+
+#define CONFIG_MCKINLEY
+
+//#define CONFIG_SMP 1
+//#define CONFIG_NR_CPUS 2
+//leave SMP for a later time
+#undef CONFIG_SMP
+#undef CONFIG_X86_LOCAL_APIC
+#undef CONFIG_X86_IO_APIC
+#undef CONFIG_X86_L1_CACHE_SHIFT
+
+// this needs to be on to run on hp zx1 with more than 4GB
+// it is hacked around for now though
+//#define CONFIG_VIRTUAL_MEM_MAP
+
+//#ifndef CONFIG_IA64_HP_SIM
+// looks like this is hard to turn off for Xen
+#define CONFIG_ACPI 1
+#define CONFIG_ACPI_BOOT 1
+//#endif
+
+#define CONFIG_XEN_ATTENTION_KEY 1
+#endif /* __ASSEMBLY__ */
+#endif /* __XEN_IA64_CONFIG_H__ */
+
+// FOLLOWING ADDED FOR XEN POST-NGIO and/or LINUX 2.6.7
+
+// following derived from linux/include/linux/compiler-gcc3.h
+// problem because xen (over?)simplifies include/xen/compiler.h
+#if __GNUC_MAJOR < 3 || __GNUC_MINOR__ >= 3
+# define __attribute_used__ __attribute__((__used__))
+#else
+# define __attribute_used__ __attribute__((__unused__))
+#endif
+#endif /* _IA64_CONFIG_H_ */
diff --git a/xen/include/asm-ia64/debugger.h b/xen/include/asm-ia64/debugger.h
new file mode 100644
index 0000000000..e933b11487
--- /dev/null
+++ b/xen/include/asm-ia64/debugger.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * asm/debugger.h
+ *
+ * Generic hooks into arch-dependent Xen.
+ *
+ * Each debugger should define two functions here:
+ *
+ * 1. debugger_trap_entry():
+ * Called at start of any synchronous fault or trap, before any other work
+ * is done. The idea is that if your debugger deliberately caused the trap
+ * (e.g. to implement breakpoints or data watchpoints) then you can take
+ * appropriate action and return a non-zero value to cause early exit from
+ * the trap function.
+ *
+ * 2. debugger_trap_fatal():
+ * Called when Xen is about to give up and crash. Typically you will use this
+ * hook to drop into a debug session. It can also be used to hook off
+ * deliberately caused traps (which you then handle and return non-zero)
+ * but really these should be hooked off 'debugger_trap_entry'.
+ */
+
+#ifndef __ASM_DEBUGGER_H__
+#define __ASM_DEBUGGER_H__
+
+#include <xen/softirq.h>
+
+/* The main trap handlers use these helper macros which include early bail. */
+static inline int debugger_trap_entry(
+ unsigned int vector, struct cpu_user_regs *regs)
+{
+ return 0;
+}
+
+static inline int debugger_trap_fatal(
+ unsigned int vector, struct cpu_user_regs *regs)
+{
+ return 0;
+}
+
+#define debugger_trap_immediate() do {} while(0)
+
+#endif /* __ASM_DEBUGGER_H__ */
diff --git a/xen/include/asm-ia64/dom_fw.h b/xen/include/asm-ia64/dom_fw.h
new file mode 100644
index 0000000000..7603473079
--- /dev/null
+++ b/xen/include/asm-ia64/dom_fw.h
@@ -0,0 +1,119 @@
+/*
+ * Xen domain firmware emulation
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ */
+
+extern unsigned long dom_pa(unsigned long);
+extern unsigned long dom_fw_setup(struct domain *, char *, int);
+
+#ifndef MB
+#define MB (1024*1024)
+#endif
+
+/* This is used to determined the portion of a domain's metaphysical memory
+ space reserved for the hypercall patch table. */
+//FIXME: experiment with smaller sizes
+#define HYPERCALL_START 1*MB
+#define HYPERCALL_END 2*MB
+
+#define FW_HYPERCALL_BASE_PADDR HYPERCALL_START
+#define FW_HYPERCALL_END_PADDR HYPERCALL_END
+#define FW_HYPERCALL_PADDR(index) (FW_HYPERCALL_BASE_PADDR + (16UL * index))
+
+/*
+ * PAL can be called in physical or virtual mode simply by
+ * branching to pal_entry_point, which is found in one of the
+ * SAL system table entrypoint descriptors (type=0). Parameters
+ * may be passed in r28-r31 (static) or r32-r35 (stacked); which
+ * convention is used depends on which procedure is being called.
+ * r28 contains the PAL index, the indicator of which PAL procedure
+ * is to be called: Index=0 is reserved, 1-255 indicates static
+ * parameters, 256-511 indicates stacked parameters. 512-1023
+ * are implementation-specific and 1024+ are reserved.
+ * rp=b0 indicates the return point.
+ *
+ * A single hypercall is used for all PAL calls.
+ */
+
+#define FW_HYPERCALL_PAL_CALL_INDEX 0x80UL
+#define FW_HYPERCALL_PAL_CALL_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_PAL_CALL_INDEX)
+#define FW_HYPERCALL_PAL_CALL 0x1000UL
+
+/*
+ * SAL consists of a table of descriptors, one of which (type=0)
+ * contains a sal_entry_point which provides access to a number of
+ * functions. Parameters are passed in r33-r39; r32 contains the
+ * index of the SAL function being called. At entry, r1=gp contains
+ * a global pointer which may be needed by the function. rp=b0
+ * indicates the return point. SAL may not be re-entrant; an
+ * OS must ensure it is called by one processor at a time.
+ *
+ * A single hypercall is used for all SAL calls.
+ */
+
+#define FW_HYPERCALL_SAL_CALL_INDEX 0x81UL
+#define FW_HYPERCALL_SAL_CALL_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_SAL_CALL_INDEX)
+#define FW_HYPERCALL_SAL_CALL 0x1001UL
+
+/*
+ * EFI is accessed via the EFI system table, which contains:
+ * - a header which contains version info
+ * - console information (stdin,stdout,stderr)
+ * as well as pointers to:
+ * - the EFI configuration table, which contains GUID/pointer pairs,
+ * one of which is a pointer to the SAL system table; another is
+ * a pointer to the ACPI table
+ * - the runtime services table, which contains a header followed by
+ * a list of (11) unique "runtime" entry points. EFI runtime entry
+ * points are real function descriptors so contain both a (physical)
+ * address and a global pointer. They are entered (at first) in
+ * physical mode, though it is possible (optionally... requests can
+ * be ignored and calls still must be OK) to call one entry point
+ * which switches the others so they are capable of being called in
+ * virtual mode. Parameters are passed in stacked registers, and
+ * rp=b0 indicates the return point.
+ * - the boot services table, which contains bootloader-related
+ * entry points (ADD MORE HERE LATER)
+ *
+ * Each runtime (and boot) entry point requires a unique hypercall.
+ */
+
+/* these are indexes into the runtime services table */
+#define FW_HYPERCALL_EFI_BASE
+#define FW_HYPERCALL_EFI_GET_TIME_INDEX 0UL
+#define FW_HYPERCALL_EFI_SET_TIME_INDEX 1UL
+#define FW_HYPERCALL_EFI_GET_WAKEUP_TIME_INDEX 2UL
+#define FW_HYPERCALL_EFI_SET_WAKEUP_TIME_INDEX 3UL
+#define FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP_INDEX 4UL
+#define FW_HYPERCALL_EFI_GET_VARIABLE_INDEX 5UL
+#define FW_HYPERCALL_EFI_GET_NEXT_VARIABLE_INDEX 6UL
+#define FW_HYPERCALL_EFI_SET_VARIABLE_INDEX 7UL
+#define FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT_INDEX 8UL
+#define FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX 9UL
+
+/* these are hypercall numbers */
+#define FW_HYPERCALL_EFI_GET_TIME 0x300UL
+#define FW_HYPERCALL_EFI_SET_TIME 0x301UL
+#define FW_HYPERCALL_EFI_GET_WAKEUP_TIME 0x302UL
+#define FW_HYPERCALL_EFI_SET_WAKEUP_TIME 0x303UL
+#define FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP 0x304UL
+#define FW_HYPERCALL_EFI_GET_VARIABLE 0x305UL
+#define FW_HYPERCALL_EFI_GET_NEXT_VARIABLE 0x306UL
+#define FW_HYPERCALL_EFI_SET_VARIABLE 0x307UL
+#define FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT 0x308UL
+#define FW_HYPERCALL_EFI_RESET_SYSTEM 0x309UL
+
+/* these are the physical addresses of the pseudo-entry points that
+ * contain the hypercalls */
+#define FW_HYPERCALL_EFI_GET_TIME_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_TIME_INDEX)
+#define FW_HYPERCALL_EFI_SET_TIME_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_SET_TIME_INDEX)
+#define FW_HYPERCALL_EFI_GET_WAKEUP_TIME_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_WAKEUP_TIME_INDEX)
+#define FW_HYPERCALL_EFI_SET_WAKEUP_TIME_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_SET_WAKEUP_TIME_INDEX)
+#define FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP_INDEX)
+#define FW_HYPERCALL_EFI_GET_VARIABLE_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_VARIABLE_INDEX)
+#define FW_HYPERCALL_EFI_GET_NEXT_VARIABLE_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_NEXT_VARIABLE_INDEX)
+#define FW_HYPERCALL_EFI_SET_VARIABLE_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_SET_VARIABLE_INDEX)
+#define FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT_INDEX)
+#define FW_HYPERCALL_EFI_RESET_SYSTEM_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_EFI_RESET_SYSTEM_INDEX)
diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h
new file mode 100644
index 0000000000..0f0e37895b
--- /dev/null
+++ b/xen/include/asm-ia64/domain.h
@@ -0,0 +1,164 @@
+#ifndef __ASM_DOMAIN_H__
+#define __ASM_DOMAIN_H__
+
+#include <linux/thread_info.h>
+#ifdef CONFIG_VTI
+#include <asm/vmx_vpd.h>
+#include <asm/vmmu.h>
+#include <asm/regionreg.h>
+#endif // CONFIG_VTI
+#include <xen/list.h>
+
+extern void arch_do_createdomain(struct vcpu *);
+
+extern int arch_final_setup_guestos(
+ struct vcpu *, struct vcpu_guest_context *);
+
+extern void domain_relinquish_resources(struct domain *);
+
+#ifdef CONFIG_VTI
+struct trap_bounce {
+ // TO add, FIXME Eddie
+};
+
+#define PMT_SIZE (32L*1024*1024) // 32M for PMT
+#endif // CONFIG_VTI
+
+struct arch_domain {
+ struct mm_struct *active_mm;
+ struct mm_struct *mm;
+ int metaphysical_rr0;
+ int starting_rid; /* first RID assigned to domain */
+ int ending_rid; /* one beyond highest RID assigned to domain */
+ int rid_bits; /* number of virtual rid bits (default: 18) */
+ int breakimm;
+#ifdef CONFIG_VTI
+ int imp_va_msb;
+ ia64_rr emul_phy_rr0;
+ ia64_rr emul_phy_rr4;
+ u64 *pmt; /* physical to machine table */
+#endif //CONFIG_VTI
+ u64 xen_vastart;
+ u64 xen_vaend;
+ u64 shared_info_va;
+#ifdef DOMU_AUTO_RESTART
+ u64 image_start;
+ u64 image_len;
+ u64 entry;
+#endif
+};
+#define starting_rid arch.starting_rid
+#define ending_rid arch.ending_rid
+#define rid_bits arch.rid_bits
+#define xen_vastart arch.xen_vastart
+#define xen_vaend arch.xen_vaend
+#define shared_info_va arch.shared_info_va
+
+struct arch_vcpu {
+#if 1
+ TR_ENTRY itrs[NITRS];
+ TR_ENTRY dtrs[NDTRS];
+ TR_ENTRY itlb;
+ TR_ENTRY dtlb;
+ unsigned long itlb_pte;
+ unsigned long dtlb_pte;
+ unsigned long irr[4];
+ unsigned long insvc[4];
+ unsigned long iva;
+ unsigned long dcr;
+ unsigned long itc;
+ unsigned long domain_itm;
+ unsigned long domain_itm_last;
+ unsigned long xen_itm;
+ unsigned long xen_timer_interval;
+#endif
+ void *regs; /* temporary until find a better way to do privops */
+ int metaphysical_rr0; // from arch_domain (so is pinned)
+ int metaphysical_saved_rr0; // from arch_domain (so is pinned)
+ int breakimm; // from arch_domain (so is pinned)
+ struct mm_struct *active_mm;
+ struct thread_struct _thread; // this must be last
+#ifdef CONFIG_VTI
+ void (*schedule_tail) (struct vcpu *);
+ struct trap_bounce trap_bounce;
+ thash_cb_t *vtlb;
+ //for phycial emulation
+ unsigned long old_rsc;
+ int mode_flags;
+
+ struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
+#endif // CONFIG_VTI
+};
+
+#define active_mm arch.active_mm
+//#define thread arch._thread
+
+// FOLLOWING FROM linux-2.6.7/include/sched.h
+
+struct mm_struct {
+ struct vm_area_struct * mmap; /* list of VMAs */
+#ifndef XEN
+ struct rb_root mm_rb;
+#endif
+ struct vm_area_struct * mmap_cache; /* last find_vma result */
+ unsigned long free_area_cache; /* first hole */
+ pgd_t * pgd;
+ atomic_t mm_users; /* How many users with user space? */
+ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
+ int map_count; /* number of VMAs */
+#ifndef XEN
+ struct rw_semaphore mmap_sem;
+#endif
+ spinlock_t page_table_lock; /* Protects task page tables and mm->rss */
+
+ struct list_head mmlist; /* List of all active mm's. These are globally strung
+ * together off init_mm.mmlist, and are protected
+ * by mmlist_lock
+ */
+
+ unsigned long start_code, end_code, start_data, end_data;
+ unsigned long start_brk, brk, start_stack;
+ unsigned long arg_start, arg_end, env_start, env_end;
+ unsigned long rss, total_vm, locked_vm;
+ unsigned long def_flags;
+
+ unsigned long saved_auxv[40]; /* for /proc/PID/auxv */
+
+ unsigned dumpable:1;
+#ifdef CONFIG_HUGETLB_PAGE
+ int used_hugetlb;
+#endif
+#ifndef XEN
+ cpumask_t cpu_vm_mask;
+
+ /* Architecture-specific MM context */
+ mm_context_t context;
+
+ /* coredumping support */
+ int core_waiters;
+ struct completion *core_startup_done, core_done;
+
+ /* aio bits */
+ rwlock_t ioctx_list_lock;
+ struct kioctx *ioctx_list;
+
+ struct kioctx default_kioctx;
+#endif
+};
+
+extern struct mm_struct init_mm;
+
+#include <asm/uaccess.h> /* for KERNEL_DS */
+#include <asm/pgtable.h>
+
+#endif /* __ASM_DOMAIN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-ia64/flushtlb.h b/xen/include/asm-ia64/flushtlb.h
new file mode 100644
index 0000000000..2f5ecead31
--- /dev/null
+++ b/xen/include/asm-ia64/flushtlb.h
@@ -0,0 +1,14 @@
+#ifndef __FLUSHTLB_H__
+#define __FLUSHTLB_H__
+
+/* The current time as shown by the virtual TLB clock. */
+extern u32 tlbflush_clock;
+
+/* Time at which each CPU's TLB was last flushed. */
+extern u32 tlbflush_time[NR_CPUS];
+
+#define tlbflush_current_time() tlbflush_clock
+#define tlbflush_filter(x,y) ((void)0)
+#define NEED_FLUSH(x, y) (0)
+
+#endif
diff --git a/xen/include/asm-ia64/ia64_int.h b/xen/include/asm-ia64/ia64_int.h
new file mode 100644
index 0000000000..2e4f61b6b1
--- /dev/null
+++ b/xen/include/asm-ia64/ia64_int.h
@@ -0,0 +1,56 @@
+#ifndef _ASM_IA64_INT_H
+#define _ASM_IA64_INT_H
+
+//#include "ia64.h"
+
+#define IA64_VHPT_TRANS_VECTOR 0x0000 /* UNUSED */
+#define IA64_INST_TLB_VECTOR 0x0400
+#define IA64_DATA_TLB_VECTOR 0x0800
+#define IA64_ALT_INST_TLB_VECTOR 0x0c00 /* UNUSED */
+#define IA64_ALT_DATA_TLB_VECTOR 0x1000 /* UNUSED */
+#define IA64_DATA_NESTED_TLB_VECTOR 0x1400
+#define IA64_INST_KEY_MISS_VECTOR 0x1800
+#define IA64_DATA_KEY_MISS_VECTOR 0x1c00
+#define IA64_DIRTY_BIT_VECTOR 0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR 0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800
+#define IA64_BREAK_VECTOR 0x2c00
+#define IA64_EXTINT_VECTOR 0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000
+#define IA64_KEY_PERMISSION_VECTOR 0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300
+#define IA64_GENEX_VECTOR 0x5400
+#define IA64_DISABLED_FPREG_VECTOR 0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR 0x5600
+#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR 0x5900
+#define IA64_UNALIGNED_REF_VECTOR 0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00
+#define IA64_FP_FAULT_VECTOR 0x5c00
+#define IA64_FP_TRAP_VECTOR 0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000
+
+#define IA64_NO_FAULT 0x0000
+#define IA64_RFI_IN_PROGRESS 0x0001
+#define IA64_RETRY 0x0002
+#ifdef CONFIG_VTI
+#define IA64_FAULT 0x0001
+#define IA64_INJ_FAULT 0x0005
+#endif //CONFIG_VTI
+#define IA64_FORCED_IFA 0x0004
+#define IA64_ILLOP_FAULT (IA64_GENEX_VECTOR | 0x00)
+#define IA64_PRIVOP_FAULT (IA64_GENEX_VECTOR | 0x10)
+#define IA64_PRIVREG_FAULT (IA64_GENEX_VECTOR | 0x20)
+#define IA64_RSVDREG_FAULT (IA64_GENEX_VECTOR | 0x30)
+#define IA64_DISIST_FAULT (IA64_GENEX_VECTOR | 0x40)
+#define IA64_ILLDEP_FAULT (IA64_GENEX_VECTOR | 0x80)
+#define IA64_DTLB_FAULT (IA64_DATA_TLB_VECTOR)
+
+#if !defined(__ASSEMBLY__)
+typedef unsigned long IA64FAULT;
+typedef unsigned long IA64INTVECTOR;
+#endif /* !ASSEMBLY */
+#endif
diff --git a/xen/include/asm-ia64/init.h b/xen/include/asm-ia64/init.h
new file mode 100644
index 0000000000..7e5df20da6
--- /dev/null
+++ b/xen/include/asm-ia64/init.h
@@ -0,0 +1,29 @@
+#ifndef _XEN_ASM_INIT_H
+#define _XEN_ASM_INIT_H
+
+/*
+ * Mark functions and data as being only used at initialization
+ * or exit time.
+ */
+#define __init \
+ __attribute__ ((__section__ (".init.text")))
+#define __exit \
+ __attribute_used__ __attribute__ ((__section__(".text.exit")))
+#define __initdata \
+ __attribute__ ((__section__ (".init.data")))
+#define __exitdata \
+ __attribute_used__ __attribute__ ((__section__ (".data.exit")))
+#define __initsetup \
+ __attribute_used__ __attribute__ ((__section__ (".init.setup")))
+#define __init_call \
+ __attribute_used__ __attribute__ ((__section__ (".initcall1.init")))
+#define __exit_call \
+ __attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
+
+/* For assembly routines
+#define __INIT .section ".text.init","ax"
+#define __FINIT .previous
+#define __INITDATA .section ".data.init","aw"
+*/
+
+#endif /* _XEN_ASM_INIT_H */
diff --git a/xen/include/asm-ia64/mm.h b/xen/include/asm-ia64/mm.h
new file mode 100644
index 0000000000..a762ec6318
--- /dev/null
+++ b/xen/include/asm-ia64/mm.h
@@ -0,0 +1,380 @@
+#ifndef __ASM_IA64_MM_H__
+#define __ASM_IA64_MM_H__
+
+#include <xen/config.h>
+#ifdef LINUX_2_6
+#include <xen/gfp.h>
+#endif
+#include <xen/list.h>
+#include <xen/spinlock.h>
+#include <xen/perfc.h>
+#include <xen/sched.h>
+
+#include <linux/rbtree.h>
+
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/flushtlb.h>
+#include <asm/io.h>
+
+#include <public/xen.h>
+
+/*
+ * The following is for page_alloc.c.
+ */
+
+typedef unsigned long page_flags_t;
+
+/*
+ * Per-page-frame information.
+ */
+
+//FIXME: This can go away when common/dom0_ops.c is fully arch-independent
+#if 0
+struct pfn_info
+{
+ /* Each frame can be threaded onto a doubly-linked list. */
+ struct list_head list;
+ /* Context-dependent fields follow... */
+ union {
+
+ /* Page is in use by a domain. */
+ struct {
+ /* Owner of this page. */
+ struct domain *domain;
+ /* Reference count and various PGC_xxx flags and fields. */
+ u32 count_info;
+ /* Type reference count and various PGT_xxx flags and fields. */
+ u32 type_info;
+ } inuse;
+
+ /* Page is on a free list. */
+ struct {
+ /* Mask of possibly-tainted TLBs. */
+ unsigned long cpu_mask;
+ /* Must be at same offset as 'u.inuse.count_flags'. */
+ u32 __unavailable;
+ /* Order-size of the free chunk this page is the head of. */
+ u8 order;
+ } free;
+
+ } u;
+
+ /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
+ u32 tlbflush_timestamp;
+};
+#endif
+
+struct page
+{
+ /* Each frame can be threaded onto a doubly-linked list. */
+ struct list_head list;
+
+ /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
+ u32 tlbflush_timestamp;
+
+ /* Reference count and various PGC_xxx flags and fields. */
+ u32 count_info;
+
+ /* Context-dependent fields follow... */
+ union {
+
+ /* Page is in use by a domain. */
+ struct {
+ /* Owner of this page. */
+ u64 _domain;
+ /* Type reference count and various PGT_xxx flags and fields. */
+ u32 type_info;
+ } inuse;
+
+ /* Page is on a free list. */
+ struct {
+ /* Mask of possibly-tainted TLBs. */
+ cpumask_t cpumask;
+ /* Order-size of the free chunk this page is the head of. */
+ u8 order;
+ } free;
+
+ } u;
+// following added for Linux compiling
+ page_flags_t flags;
+ atomic_t _count;
+ struct list_head lru; // is this the same as above "list"?
+};
+
+#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1)
+
+//FIXME: These can go away when common/dom0_ops.c is fully arch-independent
+ /* The following page types are MUTUALLY EXCLUSIVE. */
+#define PGT_none (0<<29) /* no special uses of this page */
+#define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */
+#define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */
+#define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */
+#define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */
+#define PGT_gdt_page (5<<29) /* using this page in a GDT? */
+#define PGT_ldt_page (6<<29) /* using this page in an LDT? */
+#define PGT_writeable_page (7<<29) /* has writable mappings of this page? */
+#define PGT_type_mask (7<<29) /* Bits 29-31. */
+ /* Has this page been validated for use as its current type? */
+#define _PGT_validated 28
+#define PGT_validated (1<<_PGT_validated)
+ /* 28-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1<<28)-1)
+
+/* Cleared when the owning guest 'frees' this page. */
+#define _PGC_allocated 31
+#define PGC_allocated (1U<<_PGC_allocated)
+#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
+
+#define IS_XEN_HEAP_FRAME(_pfn) ((page_to_phys(_pfn) < xenheap_phys_end) \
+ && (page_to_phys(_pfn) >= xen_pstart))
+
+#define pickle_domptr(_d) ((u64)(_d))
+#define unpickle_domptr(_d) ((struct domain*)(_d))
+
+#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
+extern struct pfn_info *frame_table;
+extern unsigned long frame_table_size;
+extern struct list_head free_list;
+extern spinlock_t free_list_lock;
+extern unsigned int free_pfns;
+extern unsigned long max_page;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+void __init init_frametable(void *frametable_vstart, unsigned long nr_pages);
+#else
+extern void __init init_frametable(void);
+#endif
+void add_to_domain_alloc_list(unsigned long ps, unsigned long pe);
+
+static inline void put_page(struct pfn_info *page)
+{
+ dummy();
+}
+
+
+static inline int get_page(struct pfn_info *page,
+ struct domain *domain)
+{
+ dummy();
+}
+
+#define set_machinetophys(_mfn, _pfn) do { } while(0);
+
+#ifdef MEMORY_GUARD
+void *memguard_init(void *heap_start);
+void memguard_guard_stack(void *p);
+void memguard_guard_range(void *p, unsigned long l);
+void memguard_unguard_range(void *p, unsigned long l);
+#else
+#define memguard_init(_s) (_s)
+#define memguard_guard_stack(_p) ((void)0)
+#define memguard_guard_range(_p,_l) ((void)0)
+#define memguard_unguard_range(_p,_l) ((void)0)
+#endif
+
+// FOLLOWING FROM linux-2.6.7/include/mm.h
+
+/*
+ * This struct defines a memory VMM memory area. There is one of these
+ * per VM-area/task. A VM area is any part of the process virtual memory
+ * space that has a special rule for the page-fault handlers (ie a shared
+ * library, the executable area etc).
+ */
+struct vm_area_struct {
+ struct mm_struct * vm_mm; /* The address space we belong to. */
+ unsigned long vm_start; /* Our start address within vm_mm. */
+ unsigned long vm_end; /* The first byte after our end address
+ within vm_mm. */
+
+ /* linked list of VM areas per task, sorted by address */
+ struct vm_area_struct *vm_next;
+
+ pgprot_t vm_page_prot; /* Access permissions of this VMA. */
+ unsigned long vm_flags; /* Flags, listed below. */
+
+#ifndef XEN
+ struct rb_node vm_rb;
+
+// XEN doesn't need all the backing store stuff
+ /*
+ * For areas with an address space and backing store,
+ * linkage into the address_space->i_mmap prio tree, or
+ * linkage to the list of like vmas hanging off its node, or
+ * linkage of vma in the address_space->i_mmap_nonlinear list.
+ */
+ union {
+ struct {
+ struct list_head list;
+ void *parent; /* aligns with prio_tree_node parent */
+ struct vm_area_struct *head;
+ } vm_set;
+
+ struct prio_tree_node prio_tree_node;
+ } shared;
+
+ /*
+ * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
+ * list, after a COW of one of the file pages. A MAP_SHARED vma
+ * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
+ * or brk vma (with NULL file) can only be in an anon_vma list.
+ */
+ struct list_head anon_vma_node; /* Serialized by anon_vma->lock */
+ struct anon_vma *anon_vma; /* Serialized by page_table_lock */
+
+ /* Function pointers to deal with this struct. */
+ struct vm_operations_struct * vm_ops;
+
+ /* Information about our backing store: */
+ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
+ units, *not* PAGE_CACHE_SIZE */
+ struct file * vm_file; /* File we map to (can be NULL). */
+ void * vm_private_data; /* was vm_pte (shared mem) */
+
+#ifdef CONFIG_NUMA
+ struct mempolicy *vm_policy; /* NUMA policy for the VMA */
+#endif
+#endif
+};
+/*
+ * vm_flags..
+ */
+#define VM_READ 0x00000001 /* currently active flags */
+#define VM_WRITE 0x00000002
+#define VM_EXEC 0x00000004
+#define VM_SHARED 0x00000008
+
+#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */
+#define VM_MAYWRITE 0x00000020
+#define VM_MAYEXEC 0x00000040
+#define VM_MAYSHARE 0x00000080
+
+#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
+#define VM_GROWSUP 0x00000200
+#define VM_SHM 0x00000400 /* shared memory area, don't swap out */
+#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
+
+#define VM_EXECUTABLE 0x00001000
+#define VM_LOCKED 0x00002000
+#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
+
+ /* Used by sys_madvise() */
+#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
+#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
+
+#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
+#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
+#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
+#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
+#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
+#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
+
+#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
+#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+#endif
+
+#ifdef CONFIG_STACK_GROWSUP
+#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#else
+#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#endif
+
+/*
+ * The zone field is never updated after free_area_init_core()
+ * sets it, so none of the operations on it need to be atomic.
+ * We'll have up to (MAX_NUMNODES * MAX_NR_ZONES) zones total,
+ * so we use (MAX_NODES_SHIFT + MAX_ZONES_SHIFT) here to get enough bits.
+ */
+#define NODEZONE_SHIFT (sizeof(page_flags_t)*8 - MAX_NODES_SHIFT - MAX_ZONES_SHIFT)
+#define NODEZONE(node, zone) ((node << ZONES_SHIFT) | zone)
+
+static inline unsigned long page_zonenum(struct page *page)
+{
+ return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT));
+}
+static inline unsigned long page_to_nid(struct page *page)
+{
+ return (page->flags >> (NODEZONE_SHIFT + ZONES_SHIFT));
+}
+
+struct zone;
+extern struct zone *zone_table[];
+
+static inline struct zone *page_zone(struct page *page)
+{
+ return zone_table[page->flags >> NODEZONE_SHIFT];
+}
+
+static inline void set_page_zone(struct page *page, unsigned long nodezone_num)
+{
+ page->flags &= ~(~0UL << NODEZONE_SHIFT);
+ page->flags |= nodezone_num << NODEZONE_SHIFT;
+}
+
+#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
+extern unsigned long max_mapnr;
+#endif
+
+static inline void *lowmem_page_address(struct page *page)
+{
+ return __va(page_to_pfn(page) << PAGE_SHIFT);
+}
+
+#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
+#define HASHED_PAGE_VIRTUAL
+#endif
+
+#if defined(WANT_PAGE_VIRTUAL)
+#define page_address(page) ((page)->virtual)
+#define set_page_address(page, address) \
+ do { \
+ (page)->virtual = (address); \
+ } while(0)
+#define page_address_init() do { } while(0)
+#endif
+
+#if defined(HASHED_PAGE_VIRTUAL)
+void *page_address(struct page *page);
+void set_page_address(struct page *page, void *virtual);
+void page_address_init(void);
+#endif
+
+#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
+#define page_address(page) lowmem_page_address(page)
+#define set_page_address(page, address) do { } while(0)
+#define page_address_init() do { } while(0)
+#endif
+
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+}
+#endif
+
+extern unsigned long num_physpages;
+extern unsigned long totalram_pages;
+extern int nr_swap_pages;
+
+#ifdef CONFIG_VTI
+extern unsigned long *mpt_table;
+#undef machine_to_phys_mapping
+#define machine_to_phys_mapping mpt_table
+
+/* If pmt table is provided by control pannel later, we need __get_user
+* here. However if it's allocated by HV, we should access it directly
+*/
+#define phys_to_machine_mapping(d, gpfn) \
+ ((d) == dom0 ? gpfn : (d)->arch.pmt[(gpfn)])
+
+#define __mfn_to_gpfn(_d, mfn) \
+ machine_to_phys_mapping[(mfn)]
+
+#define __gpfn_to_mfn(_d, gpfn) \
+ phys_to_machine_mapping((_d), (gpfn))
+#endif // CONFIG_VTI
+
+#endif /* __ASM_IA64_MM_H__ */
diff --git a/xen/include/asm-ia64/mmu_context.h b/xen/include/asm-ia64/mmu_context.h
new file mode 100644
index 0000000000..4f51c65756
--- /dev/null
+++ b/xen/include/asm-ia64/mmu_context.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_MMU_CONTEXT_H
+#define __ASM_MMU_CONTEXT_H
+//dummy file to resolve non-arch-indep include
+#ifdef XEN
+#ifndef CONFIG_VTI
+#define IA64_REGION_ID_KERNEL 0
+#else // CONFIG_VTI
+#define IA64_REGION_ID_KERNEL 0x1e0000 /* Start from all 1 in highest 4 bits */
+#endif // CONFIG_VTI
+#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61))
+
+#ifndef __ASSEMBLY__
+struct ia64_ctx {
+ spinlock_t lock;
+ unsigned int next; /* next context number to use */
+ unsigned int limit; /* next >= limit => must call wrap_mmu_context() */
+ unsigned int max_ctx; /* max. context value supported by all CPUs */
+};
+
+extern struct ia64_ctx ia64_ctx;
+#endif /* ! __ASSEMBLY__ */
+#endif
+#endif /* ! __ASM_MMU_CONTEXT_H */
diff --git a/xen/include/asm-ia64/multicall.h b/xen/include/asm-ia64/multicall.h
new file mode 100644
index 0000000000..8e7953bece
--- /dev/null
+++ b/xen/include/asm-ia64/multicall.h
@@ -0,0 +1,5 @@
+#ifndef __ASM_IA64_MULTICALL_H__
+#define __ASM_IA64_MULTICALL_H__
+
+#define do_multicall_call(_call) BUG()
+#endif /* __ASM_IA64_MULTICALL_H__ */
diff --git a/xen/include/asm-ia64/offsets.h b/xen/include/asm-ia64/offsets.h
new file mode 100644
index 0000000000..0973b319d9
--- /dev/null
+++ b/xen/include/asm-ia64/offsets.h
@@ -0,0 +1,2 @@
+//dummy file to resolve non-arch-indep include
+#include <asm/asm-offsets.h>
diff --git a/xen/include/asm-ia64/privop.h b/xen/include/asm-ia64/privop.h
new file mode 100644
index 0000000000..4cad79ee3a
--- /dev/null
+++ b/xen/include/asm-ia64/privop.h
@@ -0,0 +1,177 @@
+#ifndef _XEN_IA64_PRIVOP_H
+#define _XEN_IA64_PRIVOP_H
+
+#include <asm/ia64_int.h>
+#ifdef CONFIG_VTI
+#include <asm/vmx_vcpu.h>
+#else //CONFIG_VTI
+#include <asm/vcpu.h>
+#endif //CONFIG_VTI
+
+typedef unsigned long IA64_INST;
+
+extern IA64FAULT priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr);
+
+typedef union U_IA64_BUNDLE {
+ unsigned long i64[2];
+ struct { unsigned long template:5,slot0:41,slot1a:18,slot1b:23,slot2:41; };
+ // NOTE: following doesn't work because bitfields can't cross natural
+ // size boundaries
+ //struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; };
+} IA64_BUNDLE;
+
+typedef enum E_IA64_SLOT_TYPE { I, M, F, B, L, ILLEGAL } IA64_SLOT_TYPE;
+
+typedef union U_INST64_A5 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4;};
+} INST64_I27;
+
+typedef union U_INST64_I28 { // not privileged (mov from AR)
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, imm:7, ar3:7,x4:4,x2:2,x3:3,s:1,major:4;};
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4;};
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M41 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M46;
+
+#ifdef CONFIG_VTI
+typedef union U_INST64_M47 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+#endif // CONFIG_VTI
+
+typedef union U_INST64 {
+ IA64_INST inst;
+ struct { unsigned long :37, major:4; } generic;
+ INST64_A5 A5; // used in build_hypercall_bundle only
+ INST64_B4 B4; // used in build_hypercall_bundle only
+ INST64_B8 B8; // rfi, bsw.[01]
+ INST64_B9 B9; // break.b
+ INST64_I19 I19; // used in build_hypercall_bundle only
+ INST64_I26 I26; // mov register to ar (I unit)
+ INST64_I27 I27; // mov immediate to ar (I unit)
+ INST64_I28 I28; // mov from ar (I unit)
+ INST64_M28 M28; // purge translation cache entry
+ INST64_M29 M29; // mov register to ar (M unit)
+ INST64_M30 M30; // mov immediate to ar (M unit)
+ INST64_M31 M31; // mov from ar (M unit)
+ INST64_M32 M32; // mov reg to cr
+ INST64_M33 M33; // mov from cr
+ INST64_M35 M35; // mov to psr
+ INST64_M36 M36; // mov from psr
+ INST64_M41 M41; // translation cache insert
+ INST64_M42 M42; // mov to indirect reg/translation reg insert
+ INST64_M43 M43; // mov from indirect reg
+ INST64_M44 M44; // set/reset system mask
+ INST64_M45 M45; // translation purge
+ INST64_M46 M46; // translation access (tpa,tak)
+#ifdef CONFIG_VTI
+ INST64_M47 M47; // purge translation entry
+#endif // CONFIG_VTI
+} INST64;
+
+#define MASK_41 ((UINT64)0x1ffffffffff)
+
+extern void privify_memory(void *start, UINT64 len);
+
+#endif
diff --git a/xen/include/asm-ia64/regionreg.h b/xen/include/asm-ia64/regionreg.h
new file mode 100644
index 0000000000..aa590dfbf9
--- /dev/null
+++ b/xen/include/asm-ia64/regionreg.h
@@ -0,0 +1,42 @@
+#ifndef _REGIONREG_H_
+#define _REGIONREG_H_
+#ifdef CONFIG_VTI
+#define XEN_DEFAULT_RID 0xf00000
+#define DOMAIN_RID_SHIFT 20
+#define DOMAIN_RID_MASK (~(1U<<DOMAIN_RID_SHIFT -1))
+#else //CONFIG_VTI
+#define XEN_DEFAULT_RID 7
+#endif // CONFIG_VTI
+#define IA64_MIN_IMPL_RID_MSB 17
+#define _REGION_ID(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.rid;})
+#define _REGION_PAGE_SIZE(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.ps;})
+#define _REGION_HW_WALKER(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.ve;})
+#define _MAKE_RR(r, sz, v) ({ia64_rr _v; _v.rrval=0;_v.rid=(r);_v.ps=(sz);_v.ve=(v);_v.rrval;})
+
+typedef union ia64_rr {
+ struct {
+ unsigned long ve : 1; /* enable hw walker */
+ unsigned long reserved0 : 1; /* reserved */
+ unsigned long ps : 6; /* log page size */
+ unsigned long rid: 24; /* region id */
+ unsigned long reserved1 : 32; /* reserved */
+ };
+ unsigned long rrval;
+} ia64_rr;
+
+//
+// region register macros
+//
+#define RR_TO_VE(arg) (((arg) >> 0) & 0x0000000000000001)
+#define RR_VE(arg) (((arg) & 0x0000000000000001) << 0)
+#define RR_VE_MASK 0x0000000000000001L
+#define RR_VE_SHIFT 0
+#define RR_TO_PS(arg) (((arg) >> 2) & 0x000000000000003f)
+#define RR_PS(arg) (((arg) & 0x000000000000003f) << 2)
+#define RR_PS_MASK 0x00000000000000fcL
+#define RR_PS_SHIFT 2
+#define RR_TO_RID(arg) (((arg) >> 8) & 0x0000000000ffffff)
+#define RR_RID(arg) (((arg) & 0x0000000000ffffff) << 8)
+#define RR_RID_MASK 0x00000000ffffff00L
+
+#endif /* !_REGIONREG_H_ */
diff --git a/xen/include/asm-ia64/regs.h b/xen/include/asm-ia64/regs.h
new file mode 100644
index 0000000000..3f7b97d996
--- /dev/null
+++ b/xen/include/asm-ia64/regs.h
@@ -0,0 +1,3 @@
+#include <asm/ptrace.h>
+#define cpu_user_regs pt_regs
+#define xen_regs pt_regs
diff --git a/xen/include/asm-ia64/serial.h b/xen/include/asm-ia64/serial.h
new file mode 100644
index 0000000000..4acf7a77f1
--- /dev/null
+++ b/xen/include/asm-ia64/serial.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_SERIAL_H__
+#define __ASM_SERIAL_H__
+
+#include <asm/regs.h>
+#include <asm/irq.h>
+#include <xen/serial.h>
+#include <asm/hpsim_ssc.h>
+
+#ifndef CONFIG_VTI
+#define arch_serial_putc(_uart, _c) \
+ ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \
+ ( longs_peak_putc(c), 1 ))
+#else
+#define arch_serial_putc(_uart, _c) \
+ ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \
+ ( (inb((_uart)->io_base + LSR) & LSR_THRE) ? \
+ (outb((_c), (_uart)->io_base + THR), 1) : 0 ))
+#endif
+
+#endif /* __ASM_SERIAL_H__ */
diff --git a/xen/include/asm-ia64/slab.h b/xen/include/asm-ia64/slab.h
new file mode 100644
index 0000000000..a3239a4cbc
--- /dev/null
+++ b/xen/include/asm-ia64/slab.h
@@ -0,0 +1,3 @@
+#include <xen/xmalloc.h>
+#include <linux/gfp.h>
+#include <asm/delay.h>
diff --git a/xen/include/asm-ia64/time.h b/xen/include/asm-ia64/time.h
new file mode 100644
index 0000000000..0c6e7732e2
--- /dev/null
+++ b/xen/include/asm-ia64/time.h
@@ -0,0 +1 @@
+#include <xen/linuxtime.h>
diff --git a/xen/include/asm-ia64/tlb.h b/xen/include/asm-ia64/tlb.h
new file mode 100644
index 0000000000..7947bf3dcc
--- /dev/null
+++ b/xen/include/asm-ia64/tlb.h
@@ -0,0 +1,51 @@
+#ifndef XEN_ASM_IA64_TLB_H
+#define XEN_ASM_IA64_TLB_H
+
+#define NITRS 8
+#define NDTRS 8
+
+typedef struct {
+ union {
+ struct {
+ unsigned long p : 1; // 0
+ unsigned long : 1; // 1
+ unsigned long ma : 3; // 2-4
+ unsigned long a : 1; // 5
+ unsigned long d : 1; // 6
+ unsigned long pl : 2; // 7-8
+ unsigned long ar : 3; // 9-11
+ unsigned long ppn : 38; // 12-49
+ unsigned long : 2; // 50-51
+ unsigned long ed : 1; // 52
+ };
+ unsigned long page_flags;
+ };
+
+ union {
+ struct {
+ unsigned long : 2; // 0-1
+ unsigned long ps : 6; // 2-7
+ unsigned long key : 24; // 8-31
+ unsigned long : 32; // 32-63
+ };
+ unsigned long itir;
+ };
+
+ unsigned long vadr;
+ unsigned long rid;
+} TR_ENTRY;
+
+#ifdef CONFIG_VTI
+typedef union {
+ unsigned long value;
+ struct {
+ uint64_t ve : 1;
+ uint64_t rv1 : 1;
+ uint64_t ps : 6;
+ uint64_t rid : 24;
+ uint64_t rv2 : 32;
+ };
+} rr_t;
+#endif // CONFIG_VTI
+
+#endif
diff --git a/xen/include/asm-ia64/vcpu.h b/xen/include/asm-ia64/vcpu.h
new file mode 100644
index 0000000000..d3ef4c229b
--- /dev/null
+++ b/xen/include/asm-ia64/vcpu.h
@@ -0,0 +1,154 @@
+#ifndef _XEN_IA64_VCPU_H
+#define _XEN_IA64_VCPU_H
+
+// TODO: Many (or perhaps most) of these should eventually be
+// static inline functions
+
+//#include "thread.h"
+#include <asm/ia64_int.h>
+
+typedef unsigned long UINT64;
+typedef unsigned int UINT;
+typedef int BOOLEAN;
+struct vcpu;
+typedef struct vcpu VCPU;
+
+// NOTE: The actual VCPU structure (struct virtualcpu) is defined in
+// thread.h. Moving it to here caused a lot of files to change, so
+// for now, we'll leave well enough alone.
+typedef struct pt_regs REGS;
+//#define PSCB(vcpu) (((struct spk_thread_t *)vcpu)->pscb)
+//#define vcpu_regs(vcpu) &((struct spk_thread_t *)vcpu)->thread_regs
+//#define vcpu_thread(vcpu) ((struct spk_thread_t *)vcpu)
+
+#define PRIVOP_ADDR_COUNT
+#ifdef PRIVOP_ADDR_COUNT
+#define _RSM 0
+#define _SSM 1
+#define PRIVOP_COUNT_NINSTS 2
+#define PRIVOP_COUNT_NADDRS 30
+
+struct privop_addr_count {
+ char *instname;
+ unsigned long addr[PRIVOP_COUNT_NADDRS];
+ unsigned long count[PRIVOP_COUNT_NADDRS];
+ unsigned long overflow;
+};
+#endif
+
+/* general registers */
+extern UINT64 vcpu_get_gr(VCPU *vcpu, unsigned reg);
+extern IA64FAULT vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value);
+/* application registers */
+extern IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val);
+/* psr */
+extern BOOLEAN vcpu_get_psr_ic(VCPU *vcpu);
+extern UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr);
+extern IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm);
+extern IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm);
+extern IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val);
+/* control registers */
+extern IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_increment_iip(VCPU *vcpu);
+extern IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval);
+extern unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa);
+extern IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval);
+/* interrupt registers */
+extern IA64FAULT vcpu_get_itv(VCPU *vcpu,UINT64 *pval);
+extern IA64FAULT vcpu_get_pmv(VCPU *vcpu,UINT64 *pval);
+extern IA64FAULT vcpu_get_cmcv(VCPU *vcpu,UINT64 *pval);
+extern IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval);
+extern IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val);
+extern IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val);
+/* interval timer registers */
+extern IA64FAULT vcpu_set_itm(VCPU *vcpu,UINT64 val);
+extern IA64FAULT vcpu_set_itc(VCPU *vcpu,UINT64 val);
+/* debug breakpoint registers */
+extern IA64FAULT vcpu_set_ibr(VCPU *vcpu,UINT64 reg,UINT64 val);
+extern IA64FAULT vcpu_set_dbr(VCPU *vcpu,UINT64 reg,UINT64 val);
+extern IA64FAULT vcpu_get_ibr(VCPU *vcpu,UINT64 reg,UINT64 *pval);
+extern IA64FAULT vcpu_get_dbr(VCPU *vcpu,UINT64 reg,UINT64 *pval);
+/* performance monitor registers */
+extern IA64FAULT vcpu_set_pmc(VCPU *vcpu,UINT64 reg,UINT64 val);
+extern IA64FAULT vcpu_set_pmd(VCPU *vcpu,UINT64 reg,UINT64 val);
+extern IA64FAULT vcpu_get_pmc(VCPU *vcpu,UINT64 reg,UINT64 *pval);
+extern IA64FAULT vcpu_get_pmd(VCPU *vcpu,UINT64 reg,UINT64 *pval);
+/* banked general registers */
+extern IA64FAULT vcpu_bsw0(VCPU *vcpu);
+extern IA64FAULT vcpu_bsw1(VCPU *vcpu);
+/* region registers */
+extern IA64FAULT vcpu_set_rr(VCPU *vcpu,UINT64 reg,UINT64 val);
+extern IA64FAULT vcpu_get_rr(VCPU *vcpu,UINT64 reg,UINT64 *pval);
+extern IA64FAULT vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr);
+/* protection key registers */
+extern IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval);
+extern IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val);
+extern IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key);
+/* TLB */
+extern IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 padr,
+ UINT64 itir, UINT64 ifa);
+extern IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 padr,
+ UINT64 itir, UINT64 ifa);
+extern IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 padr, UINT64 itir, UINT64 ifa);
+extern IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 padr, UINT64 itir, UINT64 ifa);
+extern IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr);
+extern IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_ptc_ga(VCPU *vcpu, UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr, UINT64 addr_range);
+extern IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
+/* misc */
+extern IA64FAULT vcpu_rfi(VCPU *vcpu);
+extern IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval);
+
+extern void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector);
+extern void vcpu_pend_timer(VCPU *vcpu);
+extern void vcpu_poke_timer(VCPU *vcpu);
+extern void vcpu_set_next_timer(VCPU *vcpu);
+extern BOOLEAN vcpu_timer_expired(VCPU *vcpu);
+extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu);
+extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64);
+extern UINT64 vcpu_get_tmp(VCPU *, UINT64);
+extern void vcpu_set_tmp(VCPU *, UINT64, UINT64);
+
+
+#endif
diff --git a/xen/include/asm-ia64/vhpt.h b/xen/include/asm-ia64/vhpt.h
new file mode 100644
index 0000000000..e4268f0947
--- /dev/null
+++ b/xen/include/asm-ia64/vhpt.h
@@ -0,0 +1,494 @@
+#ifndef ASM_VHPT_H
+#define ASM_VHPT_H
+
+#define VHPT_ENABLED 1
+#define VHPT_ENABLED_REGION_0_TO_6 1
+#define VHPT_ENABLED_REGION_7 0
+
+
+#if 0
+#define VHPT_CACHE_ENTRY_SIZE 64
+#define VHPT_CACHE_MASK 2097151
+#define VHPT_CACHE_NUM_ENTRIES 32768
+#define VHPT_NUM_ENTRIES 2097152
+#define VHPT_CACHE_ENTRY_SIZE_LOG2 6
+#define VHPT_SIZE_LOG2 26 //????
+#define VHPT_PAGE_SHIFT 26 //????
+#else
+//#define VHPT_CACHE_NUM_ENTRIES 2048
+//#define VHPT_NUM_ENTRIES 131072
+//#define VHPT_CACHE_MASK 131071
+//#define VHPT_SIZE_LOG2 22 //????
+#define VHPT_CACHE_ENTRY_SIZE 64
+#define VHPT_CACHE_NUM_ENTRIES 8192
+#define VHPT_NUM_ENTRIES 524288
+#define VHPT_CACHE_MASK 524287
+#define VHPT_SIZE_LOG2 24 //????
+#define VHPT_PAGE_SHIFT 24 //????
+#endif
+
+// FIXME: These should be automatically generated
+
+#define VLE_PGFLAGS_OFFSET 0
+#define VLE_ITIR_OFFSET 8
+#define VLE_TITAG_OFFSET 16
+#define VLE_CCHAIN_OFFSET 24
+
+#define VCE_TITAG_OFFSET 0
+#define VCE_CCNEXT_OFFSET 8
+#define VCE_CCPREV_OFFSET 16
+#define VCE_PGFLAGS_OFFSET 24
+#define VCE_ITIR_OFFSET 32
+#define VCE_FNEXT_OFFSET 32
+#define VCE_CCHEAD_OFFSET 40
+#define VCE_VADDR_OFFSET 48
+
+//FIXME: change and declare elsewhere
+#define CAUSE_VHPT_CC_HANDLED 0
+
+#ifndef __ASSEMBLY__
+
+//
+// VHPT collison chain entry (part of the "V-Cache")
+// DO NOT CHANGE THE SIZE OF THIS STRUCTURE (see vhpt.S banked regs calculations)
+//
+typedef struct vcache_entry {
+ union {
+ struct {
+ unsigned long tag : 63; // 0-62
+ unsigned long ti : 1; // 63
+ };
+ unsigned long ti_tag;
+ };
+
+ struct vcache_entry *CCNext; // collision chain next
+ struct vcache_entry *CCPrev; // collision chain previous
+
+ union {
+ struct {
+ unsigned long p : 1; // 0
+ unsigned long : 1; // 1
+ unsigned long ma : 3; // 2-4
+ unsigned long a : 1; // 5
+ unsigned long d : 1; // 6
+ unsigned long pl : 2; // 7-8
+ unsigned long ar : 3; // 9-11
+ unsigned long ppn : 38; // 12-49
+ unsigned long : 2; // 50-51
+ unsigned long ed : 1; // 52
+
+ unsigned long translation_type : 2; // 53-54 -- hack
+ unsigned long Counter : 9; // 55-63
+ };
+ unsigned long page_flags;
+ };
+
+ union {
+ struct {
+ unsigned long : 2; // 0-1
+ unsigned long ps : 6; // 2-7
+ unsigned long key : 24; // 8-31
+ unsigned long : 32; // 32-63
+ };
+ unsigned long itir;
+
+ //
+ // the free list pointer when entry not in use
+ //
+ struct vcache_entry *FNext; // free list
+ };
+
+ //
+ // store head of collison chain for removal since thash will only work if
+ // current RID is same as when element was added to chain.
+ //
+ struct vhpt_lf_entry *CCHead;
+
+ unsigned long virtual_address;
+
+ unsigned int CChainCnt;
+ unsigned int Signature;
+};
+
+
+//
+// VHPT Long Format Entry (as recognized by hw)
+//
+struct vhpt_lf_entry {
+ unsigned long page_flags;
+ unsigned long itir;
+ unsigned long ti_tag;
+ struct vcache_entry *CChain;
+};
+
+#define INVALID_TI_TAG 0x8000000000000000L
+
+#endif /* !__ASSEMBLY */
+
+#if !VHPT_ENABLED
+#define VHPT_CCHAIN_LOOKUP(Name, i_or_d)
+#else
+#ifdef CONFIG_SMP
+#error "VHPT_CCHAIN_LOOKUP needs a semaphore on the VHPT!"
+#endif
+
+// VHPT_CCHAIN_LOOKUP is intended to run with psr.i+ic off
+#define VHPT_CCHAIN_LOOKUP(Name, i_or_d) \
+ \
+CC_##Name:; \
+ mov r31 = pr; \
+ mov r16 = cr.ifa; \
+ movl r30 = int_counts; \
+ ;; \
+ thash r28 = r16; \
+ adds r30 = CAUSE_VHPT_CC_HANDLED << 3, r30; \
+ ;; \
+ ttag r19 = r16; \
+ ld8 r27 = [r30]; \
+ adds r17 = VLE_CCHAIN_OFFSET, r28; \
+ ;; \
+ ld8 r17 = [r17]; \
+ ;; \
+ cmp.eq p6,p0 = 0, r17; \
+ mov r21 = r17; \
+ adds r22 = VCE_CCNEXT_OFFSET, r17; \
+ adds r28 = VLE_ITIR_OFFSET, r28; \
+(p6) br .Out_##Name; \
+ ;; \
+ \
+.loop_##Name:; \
+ ld8 r20 = [r21]; \
+ ld8 r18 = [r22]; \
+ adds r23 = VCE_PGFLAGS_OFFSET, r21; \
+ adds r24 = VCE_ITIR_OFFSET, r21; \
+ cmp.eq p6,p0 = r17, r21; \
+ cmp.eq p7,p0 = r0, r0; \
+ ;; \
+ lfetch [r18]; \
+ cmp.eq.andcm p6,p7 = r19, r20; \
+ mov r21 = r18; \
+ adds r22 = VCE_CCNEXT_OFFSET, r18; \
+(p6) br.spnt .Out_##Name; \
+(p7) br.sptk .loop_##Name; \
+ ;; \
+ \
+ ld8 r26 = [r23]; \
+ ld8 r25 = [r24]; \
+ adds r29 = VLE_TITAG_OFFSET - VLE_ITIR_OFFSET, r28; \
+ adds r27 = 1, r27; \
+ ;; \
+ mov cr.itir = r25; \
+ st8 [r28] = r25, VLE_PGFLAGS_OFFSET - VLE_ITIR_OFFSET; \
+ or r26 = 1, r26; \
+ st8 [r30] = r27; \
+ ;; \
+ itc.i_or_d r26; \
+ ;; \
+ srlz.i_or_d; \
+ ;; \
+ st8 [r28] = r26; \
+ mov pr = r31, 0x1ffff; \
+ st8 [r29] = r20; \
+ rfi; \
+ ;; \
+ \
+.Out_##Name:; \
+ mov pr = r31, 0x1ffff; \
+ ;; \
+.End_##Name:;
+
+// br.cond.sptk.few dorfi;
+
+
+
+#define VHPT_INSERT() \
+ {.mmi;\
+ thash r17 = r16;\
+ or r26 = 1, r26;\
+ nop 0;\
+ ;;\
+ };\
+ {.mii;\
+ ttag r21 = r16;\
+ adds r18 = VLE_ITIR_OFFSET, r17;\
+ adds r19 = VLE_PGFLAGS_OFFSET, r17;\
+ ;;\
+ };\
+ {.mmi;\
+\
+ st8[r18] = r27;\
+ adds r20 = VLE_TITAG_OFFSET, r17;\
+ nop 0;\
+ ;;\
+ };\
+ {.mmb;\
+ st8[r19] = r26;\
+ st8[r20] = r21;\
+ nop 0;\
+ ;;\
+ };\
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#define VHPT_INSERT1() \
+VCacheInsert:;\
+ mov r18 = 1;\
+ extr.u r17 = r27, 2, 6;\
+ ;;\
+\
+\
+ shl r17 = r18, r17;\
+ ;;\
+\
+\
+ add r30 = r16, r17;\
+ ;;\
+\
+.MainLoop:;\
+ thash r18 = r16;\
+ ;;\
+\
+ ttag r24 = r16;\
+ adds r29 = VLE_CCHAIN_OFFSET, r18;\
+ ;;\
+\
+\
+ ld8 r21 = [r29];\
+ ;;\
+\
+ adds r19 = VCE_CCNEXT_OFFSET, r21;\
+ adds r20 = VCE_TITAG_OFFSET, r21;\
+ mov r28 = r21;\
+\
+ cmp.eq p11, p4 = r0, r21;\
+(p11) br FindOne;\
+ ;;\
+\
+\
+.find_loop:;\
+\
+ ld8 r17 = [r19];\
+ ld8 r18 = [r20];\
+ ;;\
+\
+ adds r19 = VCE_CCNEXT_OFFSET, r17;\
+ adds r20 = VCE_TITAG_OFFSET, r17;\
+ cmp.eq.unc p10, p8 = r18, r24;\
+\
+\
+\
+ cmp.eq.unc p1, p2 = r17, r21;\
+\
+\
+(p10) br .FillVce;\
+ ;;\
+\
+\
+(p8) mov r28 = r17;\
+\
+ lfetch [r19];\
+\
+(p2) br .find_loop;\
+ ;;\
+\
+FindOne:;\
+\
+\
+\
+ movl r22 = G_VCacheRpl;\
+ ;;\
+\
+\
+ ld8 r23 = [r22];\
+ ;;\
+\
+\
+ mov r28 = r23;\
+\
+\
+ adds r17 = VCE_FNEXT_OFFSET, r23;\
+\
+\
+ cmp.eq p14, p3 = r0, r23;\
+ ;;\
+\
+(p3) ld8 r23 = [r17];\
+ ;;\
+\
+\
+(p3) st8 [r22] = r23;\
+(p3) br .AddChain;\
+ ;;\
+\
+\
+\
+\
+ movl r24 = VHPT_CACHE_MASK;\
+\
+\
+ adds r25 = 8, r22;\
+ ;;\
+\
+\
+ ld8 r23 = [r25];\
+ ;;\
+\
+\
+ adds r23 = VHPT_CACHE_ENTRY_SIZE, r23;\
+ ;;\
+\
+\
+ and r23 = r23, r24;\
+\
+\
+ movl r17 = VHPT_ADDR;\
+ ;;\
+\
+\
+ st8 [r25] = r23;\
+\
+\
+ add r28 = r17, r23;\
+ ;;\
+\
+\
+ adds r22 = VCE_CCHEAD_OFFSET, r28;\
+ ;;\
+\
+ ld8 r17 = [r22], VLE_PGFLAGS_OFFSET - VLE_CCHAIN_OFFSET;\
+\
+ adds r19 = VCE_CCNEXT_OFFSET, r28;\
+ adds r20 = VCE_CCPREV_OFFSET, r28;\
+ ;;\
+\
+ ld8 r20 = [r20];\
+ ld8 r19 = [r19];\
+\
+ adds r21 = VLE_CCHAIN_OFFSET, r17;\
+ ;;\
+\
+ ld8 r18 = [r21];\
+\
+\
+ cmp.eq.unc p9, p7 = r19, r28;\
+\
+\
+ adds r23 = VLE_TITAG_OFFSET + 7, r17;\
+\
+\
+ mov r17 = 0x80;\
+ ;;\
+\
+\
+(p9) st8 [r21] = r0;\
+\
+\
+(p9) st1 [r23] = r17;\
+\
+ adds r24 = VCE_CCPREV_OFFSET, r19;\
+ adds r25 = VCE_CCNEXT_OFFSET, r20;\
+\
+\
+(p7) cmp.eq.unc p13, p6 = r18, r28;\
+ ;;\
+\
+(p7) st8 [r24] = r20;\
+(p7) st8 [r25] = r19;\
+\
+ adds r17 = VCE_PGFLAGS_OFFSET, r28;\
+ ;;\
+\
+(p13) st8 [r21] = r19;\
+(p13) ld8 r18 = [r17], VCE_ITIR_OFFSET - VCE_PGFLAGS_OFFSET;\
+ ;;\
+(p13) st8 [r22] = r18, VLE_ITIR_OFFSET - VLE_PGFLAGS_OFFSET;\
+\
+ ;;\
+(p13) ld8 r18 = [r17], VCE_TITAG_OFFSET - VCE_ITIR_OFFSET;\
+ ;;\
+\
+(p13) st8 [r22] = r18, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET;\
+ ;;\
+\
+.AddChain:;\
+\
+\
+ ld8 r24 = [r29];\
+ ;;\
+\
+\
+ st8 [r29] = r28, 0 - VLE_CCHAIN_OFFSET;\
+\
+ adds r25 = VCE_CCNEXT_OFFSET, r28;\
+ adds r19 = VCE_CCPREV_OFFSET, r28;\
+ adds r20 = VCE_CCHEAD_OFFSET, r28;\
+ ;;\
+\
+\
+ st8 [r20] = r29;\
+\
+ cmp.eq p12, p5 = r0, r24;\
+\
+ adds r23 = VCE_CCPREV_OFFSET, r24;\
+ ;;\
+\
+(p12) st8 [r25] = r28;\
+(p12) st8 [r19] = r28;\
+\
+(p5)ld8 r21 = [r23];\
+ adds r29 = VLE_CCHAIN_OFFSET, r29;\
+ ;;\
+\
+(p5)st8 [r25] = r24;\
+(p5)st8 [r19] = r21;\
+\
+ adds r22 = VCE_CCNEXT_OFFSET, r21;\
+ ;;\
+\
+(p5)st8 [r22] = r28;\
+(p5)st8 [r23] = r28;\
+ ;;\
+\
+.FillVce:;\
+ ttag r24 = r16;\
+\
+\
+ adds r29 = 0 - VLE_CCHAIN_OFFSET, r29;\
+ adds r17 = VCE_PGFLAGS_OFFSET, r28;\
+ movl r19 = PAGE_SIZE_OFFSET;\
+ ;;\
+\
+ st8 [r29] = r26, VLE_ITIR_OFFSET - VLE_PGFLAGS_OFFSET;\
+ st8 [r17] = r26, VCE_ITIR_OFFSET - VCE_PGFLAGS_OFFSET;\
+ add r16 = r16, r19;\
+ ;;\
+\
+ st8 [r29] = r27, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET;\
+ st8 [r17] = r27, VCE_TITAG_OFFSET - VCE_ITIR_OFFSET;\
+ ;;\
+\
+ st8 [r29] = r24;\
+ st8 [r17] = r24;\
+\
+ cmp.lt p15, p0 = r16, r30;\
+(p15) br .MainLoop;\
+ ;;\
+
+
+
+
+#endif /* VHPT_ENABLED */
+#endif
diff --git a/xen/include/asm-ia64/virt_event.h b/xen/include/asm-ia64/virt_event.h
new file mode 100644
index 0000000000..d0b66afd7e
--- /dev/null
+++ b/xen/include/asm-ia64/virt_event.h
@@ -0,0 +1,114 @@
+#ifndef __VIRT_EVENT_H__
+#define __VIRT_EVENT_H__
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * virt_event.h:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susie Li) (susie.li@intel.com)
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+
+#define EVENT_MOV_TO_AR 1
+#define EVENT_MOV_TO_AR_IMM 2
+#define EVENT_MOV_FROM_AR 3
+#define EVENT_MOV_TO_CR 4
+#define EVENT_MOV_FROM_CR 5
+#define EVENT_MOV_TO_PSR 6
+#define EVENT_MOV_FROM_PSR 7
+#define EVENT_ITC_D 8
+#define EVENT_ITC_I 9
+#define EVENT_MOV_TO_RR 10
+#define EVENT_MOV_TO_DBR 11
+#define EVENT_MOV_TO_IBR 12
+#define EVENT_MOV_TO_PKR 13
+#define EVENT_MOV_TO_PMC 14
+#define EVENT_MOV_TO_PMD 15
+#define EVENT_ITR_D 16
+#define EVENT_ITR_I 17
+#define EVENT_MOV_FROM_RR 18
+#define EVENT_MOV_FROM_DBR 19
+#define EVENT_MOV_FROM_IBR 20
+#define EVENT_MOV_FROM_PKR 21
+#define EVENT_MOV_FROM_PMC 22
+#define EVENT_MOV_FROM_CPUID 23
+#define EVENT_SSM 24
+#define EVENT_RSM 25
+#define EVENT_PTC_L 26
+#define EVENT_PTC_G 27
+#define EVENT_PTC_GA 28
+#define EVENT_PTR_D 29
+#define EVENT_PTR_I 30
+#define EVENT_THASH 31
+#define EVENT_TTAG 32
+#define EVENT_TPA 33
+#define EVENT_TAK 34
+#define EVENT_PTC_E 35
+#define EVENT_COVER 36
+#define EVENT_RFI 37
+#define EVENT_BSW_0 38
+#define EVENT_BSW_1 39
+#define EVENT_VMSW 40
+
+#if 0
+/* VMAL 1.0 */
+#define EVENT_MOV_TO_AR 1
+#define EVENT_MOV_TO_AR_IMM 2
+#define EVENT_MOV_FROM_AR 3
+#define EVENT_MOV_TO_CR 4
+#define EVENT_MOV_FROM_CR 5
+#define EVENT_MOV_TO_PSR 6
+#define EVENT_MOV_FROM_PSR 7
+#define EVENT_ITC_D 8
+#define EVENT_ITC_I 9
+#define EVENT_MOV_TO_RR 10
+#define EVENT_MOV_TO_DBR 11
+#define EVENT_MOV_TO_IBR 12
+#define EVENT_MOV_TO_PKR 13
+#define EVENT_MOV_TO_PMC 14
+#define EVENT_MOV_TO_PMD 15
+#define EVENT_ITR_D 16
+#define EVENT_ITR_I 17
+#define EVENT_MOV_FROM_RR 18
+#define EVENT_MOV_FROM_DBR 19
+#define EVENT_MOV_FROM_IBR 20
+#define EVENT_MOV_FROM_PKR 21
+#define EVENT_MOV_FROM_PMC 22
+#define EVENT_MOV_FROM_PMD 23
+#define EVENT_MOV_FROM_CPUID 24
+#define EVENT_SSM 25
+#define EVENT_RSM 26
+#define EVENT_PTC_L 27
+#define EVENT_PTC_G 28
+#define EVENT_PTC_GA 29
+#define EVENT_PTR_D 30
+#define EVENT_PTR_I 31
+#define EVENT_THASH 32
+#define EVENT_TTAG 33
+#define EVENT_TPA 34
+#define EVENT_TAK 35
+#define EVENT_PTC_E 36
+#define EVENT_COVER 37
+#define EVENT_RFI 38
+#define EVENT_BSW_0 39
+#define EVENT_BSW_1 40
+#define EVENT_VMSW 41
+
+
+#endif /* VMAL 2.0 */
+#endif /* __VIRT_EVENT_H__ */
diff --git a/xen/include/asm-ia64/vmmu.h b/xen/include/asm-ia64/vmmu.h
new file mode 100644
index 0000000000..cee7d89a90
--- /dev/null
+++ b/xen/include/asm-ia64/vmmu.h
@@ -0,0 +1,344 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmmu.h: virtual memory management unit related APIs and data structure.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ */
+
+#ifndef XEN_TLBthash_H
+#define XEN_TLBthash_H
+
+#include "xen/config.h"
+#include "xen/types.h"
+#include "public/xen.h"
+#include "asm/tlb.h"
+
+#define THASH_TLB_TR 0
+#define THASH_TLB_TC 1
+#define THASH_TLB_FM 2 // foreign map
+
+#define THASH_SECTION_TR (1<<0)
+#define THASH_SECTION_TC (1<<1)
+#define THASH_SECTION_FM (1<<2)
+
+/*
+ * Next bit definition must be same with THASH_TLB_XX
+ */
+typedef union search_section {
+ struct {
+ u32 tr : 1;
+ u32 tc : 1;
+ u32 fm : 1;
+ u32 rsv: 29;
+ };
+ u32 v;
+} search_section_t;
+
+#define MAX_CCN_DEPTH 4 // collision chain depth
+#define VCPU_TLB_SHIFT (22)
+#define VCPU_TLB_SIZE (1UL<<VCPU_TLB_SHIFT)
+#define VCPU_TLB_ORDER VCPU_TLB_SHIFT - PAGE_SHIFT
+#define PTA_BASE_SHIFT (15)
+
+#ifndef __ASSEMBLY__
+#define HIGH_32BITS(x) bits(x,32,63)
+#define LOW_32BITS(x) bits(x,0,31)
+
+typedef enum {
+ ISIDE_TLB=0,
+ DSIDE_TLB=1
+} CACHE_LINE_TYPE;
+
+typedef struct thash_data {
+ union {
+ struct {
+ u64 p : 1; // 0
+ u64 rv1 : 1; // 1
+ u64 ma : 3; // 2-4
+ u64 a : 1; // 5
+ u64 d : 1; // 6
+ u64 pl : 2; // 7-8
+ u64 ar : 3; // 9-11
+ u64 ppn : 38; // 12-49
+ u64 rv2 : 2; // 50-51
+ u64 ed : 1; // 52
+ u64 ig1 : 11; //53-63
+ };
+ struct {
+ u64 __rv1 : 12;
+ // sizeof(domid_t) must be less than 38!!! Refer to its definition
+ u64 fm_dom : 38; // 12-49 foreign map domain ID
+ u64 __rv2 : 3; // 50-52
+ // next extension to ig1, only for TLB instance
+ u64 section : 2; // 53-54 TR, TC or FM (thash_TLB_XX)
+ CACHE_LINE_TYPE cl : 1; // I side or D side cache line
+ u64 nomap : 1; // entry cann't be inserted into machine TLB.
+ u64 __ig1 : 5; // 56-61
+ u64 checked : 1; // for VTLB/VHPT sanity check
+ u64 invalid : 1; // invalid entry
+ };
+ u64 page_flags;
+ }; // same for VHPT and TLB
+
+ union {
+ struct {
+ u64 rv3 : 2; // 0-1
+ u64 ps : 6; // 2-7
+ u64 key : 24; // 8-31
+ u64 rv4 : 32; // 32-63
+ };
+ struct {
+ u64 __rv3 : 32; // 0-31
+ // next extension to rv4
+ u64 rid : 24; // 32-55
+ u64 __rv4 : 8; // 56-63
+ };
+ u64 itir;
+ };
+ union {
+ struct { // For TLB
+ u64 ig2 : 12; // 0-11
+ u64 vpn : 49; // 12-60
+ u64 vrn : 3; // 61-63
+ };
+ u64 vadr;
+ u64 ifa;
+ struct { // For VHPT
+ u64 tag : 63; // 0-62
+ u64 ti : 1; // 63, invalid entry for VHPT
+ };
+ u64 etag; // extended tag for VHPT
+ };
+ union {
+ struct thash_data *next;
+ u64 tr_idx;
+ };
+} thash_data_t;
+
+#define INVALID_VHPT(hdata) ((hdata)->ti)
+#define INVALID_TLB(hdata) ((hdata)->invalid)
+#define INVALID_ENTRY(hcb, hdata) \
+ ((hcb)->ht==THASH_TLB ? INVALID_TLB(hdata) : INVALID_VHPT(hdata))
+
+typedef enum {
+ THASH_TLB=0,
+ THASH_VHPT
+} THASH_TYPE;
+
+struct thash_cb;
+typedef union thash_cch_mem {
+ thash_data_t data;
+ union thash_cch_mem *next;
+} thash_cch_mem_t;
+
+
+/*
+ * Use to calculate the HASH index of thash_data_t.
+ */
+typedef u64 *(THASH_FN)(PTA pta, u64 va, u64 rid, u64 ps);
+typedef u64 *(TTAG_FN)(PTA pta, u64 va, u64 rid, u64 ps);
+typedef u64 *(GET_MFN_FN)(domid_t d, u64 gpfn, u64 pages);
+typedef void *(REM_NOTIFIER_FN)(struct hash_cb *hcb, thash_data_t *entry);
+typedef void (RECYCLE_FN)(struct hash_cb *hc, u64 para);
+typedef rr_t (GET_RR_FN)(struct vcpu *vcpu, u64 reg);
+typedef thash_data_t *(FIND_OVERLAP_FN)(struct thash_cb *hcb,
+ u64 va, u64 ps, int rid, char cl, search_section_t s_sect);
+typedef thash_data_t *(FIND_NEXT_OVL_FN)(struct thash_cb *hcb);
+typedef void (REM_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry);
+typedef void (INS_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry, u64 va);
+
+typedef struct tlb_special {
+ thash_data_t itr[NITRS];
+ thash_data_t dtr[NDTRS];
+ struct thash_cb *vhpt;
+} tlb_special_t;
+
+typedef struct vhpt_cb {
+ //u64 pta; // pta value.
+ GET_MFN_FN *get_mfn;
+ TTAG_FN *tag_func;
+} vhpt_special;
+
+typedef struct thash_internal {
+ thash_data_t *hash_base;
+ thash_data_t *cur_cch; // head of overlap search
+ int rid;
+ int ps;
+ union {
+ u64 tag; // for VHPT
+ struct { // for TLB
+ char _tr_idx; // -1: means done of TR search
+ char cl;
+ search_section_t s_sect; // search section combinations
+ };
+ };
+ u64 _curva; // current address to search
+ u64 _eva;
+} thash_internal_t;
+
+#define THASH_CB_MAGIC 0x55aa00aa55aa55aaUL
+typedef struct thash_cb {
+ /* THASH base information */
+ THASH_TYPE ht; // For TLB or VHPT
+ u64 magic;
+ thash_data_t *hash; // hash table pointer, aligned at thash_sz.
+ u64 hash_sz; // size of above data.
+ void *cch_buf; // base address of collision chain.
+ u64 cch_sz; // size of above data.
+ THASH_FN *hash_func;
+ GET_RR_FN *get_rr_fn;
+ RECYCLE_FN *recycle_notifier;
+ thash_cch_mem_t *cch_freelist;
+ struct vcpu *vcpu;
+ PTA pta;
+ /* VTLB/VHPT common information */
+ FIND_OVERLAP_FN *find_overlap;
+ FIND_NEXT_OVL_FN *next_overlap;
+ REM_THASH_FN *rem_hash; // remove hash entry.
+ INS_THASH_FN *ins_hash; // insert hash entry.
+ REM_NOTIFIER_FN *remove_notifier;
+ /* private information */
+ thash_internal_t priv;
+ union {
+ tlb_special_t *ts;
+ vhpt_special *vs;
+ };
+ // Internal positon information, buffer and storage etc. TBD
+} thash_cb_t;
+
+#define ITR(hcb,id) ((hcb)->ts->itr[id])
+#define DTR(hcb,id) ((hcb)->ts->dtr[id])
+#define INVALIDATE_HASH(hcb,hash) { \
+ INVALID_ENTRY(hcb, hash) = 1; \
+ hash->next = NULL; }
+
+#define PURGABLE_ENTRY(hcb,en) \
+ ((hcb)->ht == THASH_VHPT || (en)->section == THASH_TLB_TC)
+
+
+/*
+ * Initialize internal control data before service.
+ */
+extern void thash_init(thash_cb_t *hcb, u64 sz);
+
+/*
+ * Insert an entry to hash table.
+ * NOTES:
+ * 1: TLB entry may be TR, TC or Foreign Map. For TR entry,
+ * itr[]/dtr[] need to be updated too.
+ * 2: Inserting to collision chain may trigger recycling if
+ * the buffer for collision chain is empty.
+ * 3: The new entry is inserted at the hash table.
+ * (I.e. head of the collision chain)
+ * 4: Return the entry in hash table or collision chain.
+ *
+ */
+extern void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va);
+extern void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx);
+
+/*
+ * Force to delete a found entry no matter TR or foreign map for TLB.
+ * NOTES:
+ * 1: TLB entry may be TR, TC or Foreign Map. For TR entry,
+ * itr[]/dtr[] need to be updated too.
+ * 2: This API must be called after thash_find_overlap() or
+ * thash_find_next_overlap().
+ * 3: Return TRUE or FALSE
+ *
+ */
+extern void thash_remove(thash_cb_t *hcb, thash_data_t *entry);
+extern void thash_tr_remove(thash_cb_t *hcb, thash_data_t *entry/*, int idx*/);
+
+/*
+ * Find an overlap entry in hash table and its collision chain.
+ * Refer to SDM2 4.1.1.4 for overlap definition.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * va & ps identify the address space for overlap lookup
+ * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX)
+ * 3: cl means I side or D side.
+ * RETURNS:
+ * NULL to indicate the end of findings.
+ * NOTES:
+ *
+ */
+extern thash_data_t *thash_find_overlap(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t s_sect);
+extern thash_data_t *thash_find_overlap_ex(thash_cb_t *hcb,
+ u64 va, u64 ps, int rid, char cl, search_section_t s_sect);
+
+
+/*
+ * Similar with thash_find_overlap but find next entry.
+ * NOTES:
+ * Intermediate position information is stored in hcb->priv.
+ */
+extern thash_data_t *thash_find_next_overlap(thash_cb_t *hcb);
+
+/*
+ * Find and purge overlap entries in hash table and its collision chain.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * rid, va & ps identify the address space for purge
+ * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX)
+ * 3: cl means I side or D side.
+ * NOTES:
+ *
+ */
+extern void thash_purge_entries(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t p_sect);
+extern void thash_purge_entries_ex(thash_cb_t *hcb,
+ u64 rid, u64 va, u64 sz,
+ search_section_t p_sect,
+ CACHE_LINE_TYPE cl);
+extern thash_cb_t *init_domain_tlb(struct vcpu *d);
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+extern void thash_purge_all(thash_cb_t *hcb);
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va.
+ *
+ */
+extern thash_data_t *vtlb_lookup(thash_cb_t *hcb,
+ thash_data_t *in);
+extern thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb,
+ u64 rid, u64 va,CACHE_LINE_TYPE cl);
+
+
+#define ITIR_RV_MASK (((1UL<<32)-1)<<32 | 0x3)
+#define PAGE_FLAGS_RV_MASK (0x2 | (0x3UL<<50)|(((1UL<<11)-1)<<53))
+extern u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps);
+extern u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps);
+extern void purge_machine_tc_by_domid(domid_t domid);
+extern void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb);
+extern rr_t vmmu_get_rr(struct vcpu *vcpu, u64 va);
+
+#define VTLB_DEBUG
+#ifdef VTLB_DEBUG
+extern void check_vtlb_sanity(thash_cb_t *vtlb);
+extern void dump_vtlb(thash_cb_t *vtlb);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* XEN_TLBthash_H */
diff --git a/xen/include/asm-ia64/vmx.h b/xen/include/asm-ia64/vmx.h
new file mode 100644
index 0000000000..3610bd5cd6
--- /dev/null
+++ b/xen/include/asm-ia64/vmx.h
@@ -0,0 +1,38 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx.h: prototype for generial vmx related interface
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ */
+
+#ifndef _ASM_IA64_VT_H
+#define _ASM_IA64_VT_H
+
+#define RR7_SWITCH_SHIFT 12 /* 4k enough */
+
+extern void identify_vmx_feature(void);
+extern unsigned int vmx_enabled;
+extern void vmx_init_env(void);
+extern void vmx_final_setup_domain(struct domain *d);
+extern void vmx_init_double_mapping_stub(void);
+extern void vmx_save_state(struct vcpu *v);
+extern void vmx_load_state(struct vcpu *v);
+extern vmx_insert_double_mapping(u64,u64,u64,u64,u64);
+extern void vmx_purge_double_mapping(u64, u64, u64);
+extern void vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7);
+
+#endif /* _ASM_IA64_VT_H */
diff --git a/xen/include/asm-ia64/vmx_mm_def.h b/xen/include/asm-ia64/vmx_mm_def.h
new file mode 100644
index 0000000000..3ea642d898
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_mm_def.h
@@ -0,0 +1,176 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_mm_def.h:
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ */
+#ifndef _MM_DEF_H_
+#define _MM_DEF_H_
+
+
+/* VHPT size 4M */
+//#define VHPT_SIZE_PS 22
+//#define VHPT_SIZE (1 << VHPT_SIZE_PS)
+#define ARCH_PAGE_SHIFT 12
+#define ARCH_PAGE_SIZE PSIZE(ARCH_PAGE_SHIFT)
+#define INVALID_MFN (-1)
+
+#define MAX_PHYS_ADDR_BITS 50
+#define PMASK(size) (~((size) - 1))
+#define PSIZE(size) (1UL<<(size))
+//#define PAGE_SIZE_4K PSIZE(12)
+#define POFFSET(vaddr, ps) ((vaddr) & (PSIZE(ps) - 1))
+#define PPN_2_PA(ppn) ((ppn)<<12)
+#define CLEARLSB(ppn, nbits) ((((uint64_t)ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps) (va & ~(PSIZE(ps)-1))
+
+#define TLB_AR_R 0
+#define TLB_AR_RX 1
+#define TLB_AR_RW 2
+#define TLB_AR_RWX 3
+#define TLB_AR_R_RW 4
+#define TLB_AR_RX_RWX 5
+#define TLB_AR_RWX_RW 6
+#define TLB_AR_XP 7
+
+#define IA64_ISR_CODE_MASK0 0xf
+#define IA64_UNIMPL_DADDR_FAULT 0x30
+#define IA64_UNIMPL_IADDR_TRAP 0x10
+#define IA64_RESERVED_REG_FAULT 0x30
+#define IA64_REG_NAT_CONSUMPTION_FAULT 0x10
+#define IA64_NAT_CONSUMPTION_FAULT 0x20
+#define IA64_PRIV_OP_FAULT 0x10
+
+#define DEFER_NONE 0
+#define DEFER_ALWAYS 0x1
+#define DEFER_DM 0x100 /* bit 8 */
+#define DEFER_DP 0X200 /* bit 9 */
+#define DEFER_DK 0x400 /* bit 10 */
+#define DEFER_DX 0x800 /* bit 11 */
+#define DEFER_DR 0x1000 /* bit 12 */
+#define DEFER_DA 0x2000 /* bit 13 */
+#define DEFER_DD 0x4000 /* bit 14 */
+
+#define ACCESS_RIGHT(a) ((a) & (ACCESS_FETCHADD - 1))
+
+#define ACCESS_READ 0x1
+#define ACCESS_WRITE 0x2
+#define ACCESS_EXECUTE 0x4
+#define ACCESS_XP0 0x8
+#define ACCESS_XP1 0x10
+#define ACCESS_XP2 0x20
+#define ACCESS_FETCHADD 0x40
+#define ACCESS_XCHG 0x80
+#define ACCESS_CMPXCHG 0x100
+
+#define ACCESS_SIZE_1 0x10000
+#define ACCESS_SIZE_2 0x20000
+#define ACCESS_SIZE_4 0x40000
+#define ACCESS_SIZE_8 0x80000
+#define ACCESS_SIZE_10 0x100000
+#define ACCESS_SIZE_16 0x200000
+
+#define STLB_TC 0
+#define STLB_TR 1
+
+#define VMM_RR_MASK 0xfffff
+#define VMM_RR_SHIFT 20
+
+#define IA64_RR_SHIFT 61
+
+#define PHYS_PAGE_SHIFT PPN_SHIFT
+
+#define STLB_SZ_SHIFT 8 // 256
+#define STLB_SIZE (1UL<<STLB_SZ_SHIFT)
+#define STLB_PPS_SHIFT 12
+#define STLB_PPS (1UL<<STLB_PPS_SHIFT)
+#define GUEST_TRNUM 8
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB 0x0
+#define VA_MATTR_UC 0x4
+#define VA_MATTR_UCE 0x5
+#define VA_MATTR_WC 0x6
+#define VA_MATTR_NATPAGE 0x7
+
+#define VRN_MASK 0xe000000000000000L
+#define PTA_BASE_MASK 0x3fffffffffffL
+#define PTA_BASE_SHIFT 15
+#define VHPT_OFFSET_MASK 0x7fff
+
+#define BITS_SHIFT_256MB 28
+#define SIZE_256MB (1UL<<BITS_SHIFT_256MB)
+#define TLB_GR_RV_BITS ((1UL<<1) | (3UL<<50))
+#define HPA_MAPPING_ATTRIBUTE 0x61 //ED:0;AR:0;PL:0;D:1;A:1;P:1
+#define VPN_2_VRN(vpn) ((vpn << PPN_SHIFT) >> IA64_VRN_SHIFT)
+
+typedef enum { INSTRUCTION, DATA, REGISTER } miss_type;
+
+//typedef enum { MVHPT, STLB } vtlb_loc_type_t;
+typedef enum { DATA_REF, NA_REF, INST_REF, RSE_REF } vhpt_ref_t;
+
+typedef enum {
+ PIB_MMIO=0,
+ VGA_BUFF,
+ CHIPSET_IO,
+ LOW_MMIO,
+ LEGACY_IO,
+ IO_SAPIC,
+ NOT_IO
+} mmio_type_t;
+
+typedef struct mmio_list {
+ mmio_type_t iot;
+ u64 start; // start address of this memory IO block
+ u64 end; // end address (include this one)
+} mmio_list_t;
+
+static __inline__ uint64_t
+bits_v(uint64_t v, uint32_t bs, uint32_t be)
+{
+ uint64_t result;
+ __asm __volatile("shl %0=%1, %2;; shr.u %0=%0, %3;;"
+ : "=r" (result): "r"(v), "r"(63-be), "r" (bs+63-be) );
+}
+
+#define bits(val, bs, be) \
+({ \
+ u64 ret; \
+ \
+ __asm __volatile("extr.u %0=%1, %2, %3" \
+ : "=r" (ret): "r"(val), \
+ "M" ((bs)), \
+ "M" ((be) - (bs) + 1) ); \
+ ret; \
+})
+
+/*
+ * clear bits (pos, len) from v.
+ *
+ */
+#define clearbits(v, pos, len) \
+({ \
+ u64 ret; \
+ \
+ __asm __volatile("dep.z %0=%1, %2, %3" \
+ : "=r" (ret): "r"(v), \
+ "M" ((pos)), \
+ "M" ((len))); \
+ ret; \
+ })
+
+#endif
diff --git a/xen/include/asm-ia64/vmx_pal.h b/xen/include/asm-ia64/vmx_pal.h
new file mode 100644
index 0000000000..de1c7ccd4a
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_pal.h
@@ -0,0 +1,120 @@
+#ifndef _ASM_IA64_VT_PAL_H
+#define _ASM_IA64_VT_PAL_H
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_pal.h: VT-I specific PAL (Processor Abstraction Layer) definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Fred Yang (fred.yang@intel.com)
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ */
+
+#include <xen/types.h>
+/* PAL PROCEDURE FOR VIRTUALIZATION */
+#define PAL_VP_CREATE 265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+*/
+#define PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define PAL_VP_RESUME 270
+/* Renamed from PAL_VP_RESUME */
+#define PAL_VP_RESTORE 270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define PAL_VP_SUSPEND 271
+/* Renamed from PAL_VP_SUSPEND */
+#define PAL_VP_SAVE 271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+static inline s64
+ia64_pal_vp_env_info(u64 *buffer_size, u64 *vp_env_info)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+ *buffer_size=iprv.v0;
+ *vp_env_info=iprv.v1;
+ return iprv.status;
+}
+
+static inline s64
+ia64_pal_vp_exit_env(u64 iva)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+ return iprv.status;
+}
+
+/* config_options in pal_vp_init_env */
+#define VP_INITIALIZE 1UL
+#define VP_FR_PMC 1UL<<1
+#define VP_OPCODE 1UL<<8
+#define VP_CAUSE 1UL<<9
+/* init vp env with initializing vm_buffer */
+#define VP_INIT_ENV_INITALIZE VP_INITIALIZE|VP_FR_PMC|VP_OPCODE|VP_CAUSE
+/* init vp env without initializing vm_buffer */
+#define VP_INIT_ENV VP_FR_PMC|VP_OPCODE|VP_CAUSE
+
+static inline s64
+ia64_pal_vp_init_env (u64 config_options, u64 pbase_addr, \
+ u64 vbase_addr, u64 * vsa_base)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,\
+ vbase_addr);
+ *vsa_base=iprv.v0;
+ return iprv.status;
+}
+
+static inline s64
+ia64_pal_vp_create (u64 *vpd, u64 *host_iva, u64* opt_handler)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+ (u64)opt_handler);
+ return iprv.status;
+}
+
+static inline s64
+ia64_pal_vp_restore (u64 *vpd, u64 pal_proc_vector)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+ return iprv.status;
+}
+
+static inline s64
+ia64_pal_vp_save (u64 *vpd, u64 pal_proc_vector)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+ return iprv.status;
+}
+
+#define PAL_PROC_VM_BIT (1UL << 40)
+#define PAL_PROC_VMSW_BIT (1UL << 54)
+#endif /* _ASM_IA64_VT_PAL_H */
diff --git a/xen/include/asm-ia64/vmx_pal_vsa.h b/xen/include/asm-ia64/vmx_pal_vsa.h
new file mode 100644
index 0000000000..72ad1e6ca7
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_pal_vsa.h
@@ -0,0 +1,44 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ */
+
+
+
+#ifndef _PAL_VSA_H_
+#define _PAL_VSA_H_
+
+/* PAL virtualization services */
+
+#ifndef __ASSEMBLY__
+extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2,
+ UINT64 arg3, UINT64 arg4, UINT64 arg5,
+ UINT64 arg6, UINT64 arg7);
+extern UINT64 __vsa_base;
+#endif /* __ASSEMBLY__ */
+
+#define PAL_VPS_RESUME_NORMAL 0x0000
+#define PAL_VPS_RESUME_HANDLER 0x0400
+#define PAL_VPS_SYNC_READ 0x0800
+#define PAL_VPS_SYNC_WRITE 0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000
+#define PAL_VPS_THASH 0x1400
+#define PAL_VPS_TTAG 0x1800
+
+#endif /* _PAL_VSA_H_ */
+
diff --git a/xen/include/asm-ia64/vmx_phy_mode.h b/xen/include/asm-ia64/vmx_phy_mode.h
new file mode 100644
index 0000000000..20c669e69c
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_phy_mode.h
@@ -0,0 +1,126 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_phy_mode.h:
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef _PHY_MODE_H_
+#define _PHY_MODE_H_
+
+/*
+ * Guest Physical Mode is emulated by GVMM, which is actually running
+ * in virtual mode.
+ *
+ * For all combinations of (it,dt,rt), only three were taken into
+ * account:
+ * (0,0,0): some firmware and kernel start code execute in this mode;
+ * (1,1,1): most kernel C code execute in this mode;
+ * (1,0,1): some low level TLB miss handler code execute in this mode;
+ * Till now, no other kind of combinations were found.
+ *
+ * Because all physical addresses fall into two categories:
+ * 0x0xxxxxxxxxxxxxxx, which is cacheable, and 0x8xxxxxxxxxxxxxxx, which
+ * is uncacheable. These two kinds of addresses reside in region 0 and 4
+ * of the virtual mode. Therefore, we load two different Region IDs
+ * (A, B) into RR0 and RR4, respectively, when guest is entering phsical
+ * mode. These two RIDs are totally different from the RIDs used in
+ * virtual mode. So, the aliasness between physical addresses and virtual
+ * addresses can be disambiguated by different RIDs.
+ *
+ * RID A and B are stolen from the cpu ulm region id. In linux, each
+ * process is allocated 8 RIDs:
+ * mmu_context << 3 + 0
+ * mmu_context << 3 + 1
+ * mmu_context << 3 + 2
+ * mmu_context << 3 + 3
+ * mmu_context << 3 + 4
+ * mmu_context << 3 + 5
+ * mmu_context << 3 + 6
+ * mmu_context << 3 + 7
+ * Because all processes share region 5~7, the last 3 are left untouched.
+ * So, we stolen "mmu_context << 3 + 5" and "mmu_context << 3 + 6" from
+ * ulm and use them as RID A and RID B.
+ *
+ * When guest is running in (1,0,1) mode, the instructions been accessed
+ * reside in region 5~7, not in region 0 or 4. So, instruction can be
+ * accessed in virtual mode without interferring physical data access.
+ *
+ * When dt!=rt, it is rarely to perform "load/store" and "RSE" operation
+ * at the same time. No need to consider such a case. We consider (0,1)
+ * as (0,0).
+ *
+ */
+
+
+#include <asm/vmx_vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pgtable.h>
+/* Due to change of ia64_set_rr interface */
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifdef PHY_16M /* 16M: large granule for test*/
+#define EMUL_PHY_PAGE_SHIFT 24
+#else /* 4K: emulated physical page granule */
+#define EMUL_PHY_PAGE_SHIFT 12
+#endif
+#define IA64_RSC_MODE 0x0000000000000003
+#define XEN_RR7_RID (0xf00010)
+#define GUEST_IN_PHY 0x1
+extern int valid_mm_mode[];
+extern int mm_switch_table[][8];
+extern void physical_mode_init(VCPU *);
+extern void switch_to_physical_rid(VCPU *);
+extern void switch_to_virtual_rid(VCPU *vcpu);
+extern void switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr);
+extern void stlb_phys_lookup(VCPU *vcpu, UINT64 paddr, UINT64 type);
+extern void check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr);
+extern void prepare_if_physical_mode(VCPU *vcpu);
+extern void recover_if_physical_mode(VCPU *vcpu);
+extern void vmx_init_all_rr(VCPU *vcpu);
+extern void vmx_load_all_rr(VCPU *vcpu);
+/*
+ * No sanity check here, since all psr changes have been
+ * checked in switch_mm_mode().
+ */
+#define is_physical_mode(v) \
+ ((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v) \
+ (!is_physical_mode(v))
+
+#define MODE_IND(psr) \
+ (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#define SW_BAD 0 /* Bad mode transitition */
+#define SW_V2P 1 /* Physical emulatino is activated */
+#define SW_P2V 2 /* Exit physical mode emulation */
+#define SW_SELF 3 /* No mode transition */
+#define SW_NOP 4 /* Mode transition, but without action required */
+
+#define INV_MODE 0 /* Invalid mode */
+#define GUEST_VIRT 1 /* Guest in virtual mode */
+#define GUEST_PHYS 2 /* Guest in physical mode, requiring emulation */
+
+
+
+#endif /* _PHY_MODE_H_ */
+
+
+
diff --git a/xen/include/asm-ia64/vmx_platform.h b/xen/include/asm-ia64/vmx_platform.h
new file mode 100644
index 0000000000..bf59e61fec
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_platform.h
@@ -0,0 +1,37 @@
+/*
+ * vmx_platform.h: VMX platform support
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_IA64_VMX_PLATFORM_H__
+#define __ASM_IA64_VMX_PLATFORM_H__
+
+#include <public/xen.h>
+
+
+struct mmio_list;
+typedef struct virutal_platform_def {
+ //unsigned long *real_mode_data; /* E820, etc. */
+ //unsigned long shared_page_va;
+ //struct vmx_virpit_t vmx_pit;
+ //struct vmx_handler_t vmx_handler;
+ //struct mi_per_cpu_info mpci; /* MMIO */
+ unsigned long pib_base;
+ unsigned char xtp;
+ struct mmio_list *mmio;
+} vir_plat_t;
+
+#endif
diff --git a/xen/include/asm-ia64/vmx_ptrace.h b/xen/include/asm-ia64/vmx_ptrace.h
new file mode 100644
index 0000000000..4065c097f4
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_ptrace.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2003 Intel Co
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ * Arun Sharma <arun.sharma@intel.com>
+ *
+ * 12/07/98 S. Eranian added pt_regs & switch_stack
+ * 12/21/98 D. Mosberger updated to match latest code
+ * 6/17/99 D. Mosberger added second unat member to "struct switch_stack"
+ * 4/28/05 Anthony Xu ported to Xen
+ *
+ */
+
+struct pt_regs {
+ /* The following registers are saved by SAVE_MIN: */
+ unsigned long b6; /* scratch */
+ unsigned long b7; /* scratch */
+
+ unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
+ unsigned long ar_ssd; /* reserved for future use (scratch) */
+
+ unsigned long r8; /* scratch (return value register 0) */
+ unsigned long r9; /* scratch (return value register 1) */
+ unsigned long r10; /* scratch (return value register 2) */
+ unsigned long r11; /* scratch (return value register 3) */
+
+ unsigned long cr_ipsr; /* interrupted task's psr */
+ unsigned long cr_iip; /* interrupted task's instruction pointer */
+ unsigned long cr_ifs; /* interrupted task's function state */
+
+ unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
+ unsigned long ar_pfs; /* prev function state */
+ unsigned long ar_rsc; /* RSE configuration */
+ /* The following two are valid only if cr_ipsr.cpl > 0: */
+ unsigned long ar_rnat; /* RSE NaT */
+ unsigned long ar_bspstore; /* RSE bspstore */
+
+ unsigned long pr; /* 64 predicate registers (1 bit each) */
+ unsigned long b0; /* return pointer (bp) */
+ unsigned long loadrs; /* size of dirty partition << 16 */
+
+ unsigned long r1; /* the gp pointer */
+ unsigned long r12; /* interrupted task's memory stack pointer */
+ unsigned long r13; /* thread pointer */
+
+ unsigned long ar_fpsr; /* floating point status (preserved) */
+ unsigned long r15; /* scratch */
+
+ /* The remaining registers are NOT saved for system calls. */
+
+ unsigned long r14; /* scratch */
+ unsigned long r2; /* scratch */
+ unsigned long r3; /* scratch */
+ unsigned long r4; /* preserved */
+ unsigned long r5; /* preserved */
+ unsigned long r6; /* preserved */
+ unsigned long r7; /* preserved */
+ unsigned long cr_iipa; /* for emulation */
+ unsigned long cr_isr; /* for emulation */
+ unsigned long eml_unat; /* used for emulating instruction */
+ unsigned long rfi_pfs; /* used for elulating rfi */
+
+ /* The following registers are saved by SAVE_REST: */
+ unsigned long r16; /* scratch */
+ unsigned long r17; /* scratch */
+ unsigned long r18; /* scratch */
+ unsigned long r19; /* scratch */
+ unsigned long r20; /* scratch */
+ unsigned long r21; /* scratch */
+ unsigned long r22; /* scratch */
+ unsigned long r23; /* scratch */
+ unsigned long r24; /* scratch */
+ unsigned long r25; /* scratch */
+ unsigned long r26; /* scratch */
+ unsigned long r27; /* scratch */
+ unsigned long r28; /* scratch */
+ unsigned long r29; /* scratch */
+ unsigned long r30; /* scratch */
+ unsigned long r31; /* scratch */
+
+ unsigned long ar_ccv; /* compare/exchange value (scratch) */
+
+ /*
+ * Floating point registers that the kernel considers scratch:
+ */
+ struct ia64_fpreg f6; /* scratch */
+ struct ia64_fpreg f7; /* scratch */
+ struct ia64_fpreg f8; /* scratch */
+ struct ia64_fpreg f9; /* scratch */
+ struct ia64_fpreg f10; /* scratch */
+ struct ia64_fpreg f11; /* scratch */
+};
+
+
diff --git a/xen/include/asm-ia64/vmx_vcpu.h b/xen/include/asm-ia64/vmx_vcpu.h
new file mode 100644
index 0000000000..dc4e0977b7
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_vcpu.h
@@ -0,0 +1,598 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vcpu.h:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ */
+
+#ifndef _XEN_IA64_VMX_VCPU_H
+#define _XEN_IA64_VMX_VCPU_H
+
+
+#include <xen/sched.h>
+#include <asm/ia64_int.h>
+#include <asm/vmx_vpd.h>
+#include <asm/ptrace.h>
+#include <asm/regs.h>
+#include <asm/regionreg.h>
+#include <asm/types.h>
+#include <asm/vcpu.h>
+
+#define VRN_SHIFT 61
+#define VRN0 0x0UL
+#define VRN1 0x1UL
+#define VRN2 0x2UL
+#define VRN3 0x3UL
+#define VRN4 0x4UL
+#define VRN5 0x5UL
+#define VRN6 0x6UL
+#define VRN7 0x7UL
+
+// this def for vcpu_regs won't work if kernel stack is present
+#define vcpu_regs(vcpu) (((struct pt_regs *) ((char *) (vcpu) + IA64_STK_OFFSET)) - 1)
+#define VMX_VPD(x,y) ((x)->arch.arch_vmx.vpd->y)
+
+#define VMX(x,y) ((x)->arch.arch_vmx.y)
+
+#define VPD_CR(x,y) (((cr_t*)VMX_VPD(x,vcr))->y)
+
+#define VMM_RR_SHIFT 20
+#define VMM_RR_MASK ((1UL<<VMM_RR_SHIFT)-1)
+#define VRID_2_MRID(vcpu,rid) ((rid) & VMM_RR_MASK) | \
+ ((vcpu->domain->domain_id) << VMM_RR_SHIFT)
+extern u64 indirect_reg_igfld_MASK ( int type, int index, u64 value);
+extern u64 cr_igfld_mask (int index, u64 value);
+extern int check_indirect_reg_rsv_fields ( int type, int index, u64 value );
+extern u64 set_isr_ei_ni (VCPU *vcpu);
+extern u64 set_isr_for_na_inst(VCPU *vcpu, int op);
+
+
+/* next all for CONFIG_VTI APIs definition */
+extern void vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value);
+extern UINT64 vmx_vcpu_sync_mpsr(UINT64 mipsr, UINT64 value);
+extern void vmx_vcpu_set_psr_sync_mpsr(VCPU * vcpu, UINT64 value);
+extern IA64FAULT vmx_vcpu_cover(VCPU *vcpu);
+extern thash_cb_t *vmx_vcpu_get_vtlb(VCPU *vcpu);
+extern thash_cb_t *vmx_vcpu_get_vhpt(VCPU *vcpu);
+ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
+extern IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val);
+extern IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval);
+extern IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval);
+IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val);
+extern IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa);
+extern IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa);
+extern IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx);
+extern IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx);
+extern IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps);
+extern IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps);
+extern IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps);
+extern IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr);
+extern IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps);
+extern IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps);
+extern IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval);
+extern u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa);
+extern IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval);
+extern IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr);
+extern IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key);
+extern IA64FAULT vmx_vcpu_rfi(VCPU *vcpu);
+extern UINT64 vmx_vcpu_get_psr(VCPU *vcpu);
+extern IA64FAULT vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val);
+extern IA64FAULT vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat);
+extern IA64FAULT vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val);
+extern IA64FAULT vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat);
+extern IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24);
+extern IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24);
+extern IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val);
+extern void vtm_init(VCPU *vcpu);
+extern uint64_t vtm_get_itc(VCPU *vcpu);
+extern void vtm_set_itc(VCPU *vcpu, uint64_t new_itc);
+extern void vtm_set_itv(VCPU *vcpu);
+extern void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm);
+extern void vtm_domain_out(VCPU *vcpu);
+extern void vtm_domain_in(VCPU *vcpu);
+extern void vlsapic_reset(VCPU *vcpu);
+extern int vmx_check_pending_irq(VCPU *vcpu);
+extern void guest_write_eoi(VCPU *vcpu);
+extern uint64_t guest_read_vivr(VCPU *vcpu);
+extern void vmx_inject_vhpi(VCPU *vcpu, u8 vec);
+extern void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector);
+extern struct virutal_platform_def *vmx_vcpu_get_plat(VCPU *vcpu);
+extern void memread_p(VCPU *vcpu, void *src, void *dest, size_t s);
+extern void memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s);
+extern void memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s);
+extern void memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s);
+
+
+/**************************************************************************
+ VCPU control register access routines
+**************************************************************************/
+
+static inline
+IA64FAULT vmx_vcpu_get_dcr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,dcr);
+ return (IA64_NO_FAULT);
+}
+
+static inline
+IA64FAULT vmx_vcpu_get_itm(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,itm);
+ return (IA64_NO_FAULT);
+}
+
+static inline
+IA64FAULT vmx_vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,iva);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_pta(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,pta);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,ipsr);
+ return (IA64_NO_FAULT);
+}
+
+static inline
+IA64FAULT vmx_vcpu_get_isr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,isr);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_iip(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,iip);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_ifa(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,ifa);
+ return (IA64_NO_FAULT);
+}
+
+static inline
+IA64FAULT vmx_vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,itir);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,iipa);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,ifs);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_iim(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,iim);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_iha(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,iha);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_lid(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,lid);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = guest_read_vivr(vcpu);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,tpr);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_eoi(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = 0L; // reads of eoi always return 0
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_irr0(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,irr[0]);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_irr1(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,irr[1]);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_irr2(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,irr[2]);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,irr[3]);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,itv);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,pmv);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,cmcv);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = VPD_CR(vcpu,lrr0);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval)
+{ *pval = VPD_CR(vcpu,lrr1);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_dcr(VCPU *vcpu, u64 val)
+{
+ u64 mdcr, mask;
+ VPD_CR(vcpu,dcr)=val;
+ /* All vDCR bits will go to mDCR, except for be/pp bit */
+ mdcr = ia64_get_dcr();
+ mask = IA64_DCR_BE | IA64_DCR_PP;
+ mdcr = ( mdcr & mask ) | ( val & (~mask) );
+ ia64_set_dcr( mdcr);
+
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_itm(VCPU *vcpu, u64 val)
+{
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ VPD_CR(vcpu,itm)=val;
+ vtm_interruption_update(vcpu, vtm);
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_iva(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,iva)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_pta(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,pta)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_ipsr(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,ipsr)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_isr(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,isr)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_iip(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,iip)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_ifa(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,ifa)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_itir(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,itir)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_iipa(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,iipa)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_ifs(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,ifs)=val;
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_iim(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,iim)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_iha(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,iha)=val;
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_lid(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,lid)=val;
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,tpr)=val;
+ //TODO
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_eoi(VCPU *vcpu, u64 val)
+{
+ guest_write_eoi(vcpu);
+ return IA64_NO_FAULT;
+}
+
+static inline
+IA64FAULT
+vmx_vcpu_set_itv(VCPU *vcpu, u64 val)
+{
+
+ VPD_CR(vcpu,itv)=val;
+ vtm_set_itv(vcpu);
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_pmv(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,pmv)=val;
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_cmcv(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,cmcv)=val;
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_lrr0(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,lrr0)=val;
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT
+vmx_vcpu_set_lrr1(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,lrr1)=val;
+ return IA64_NO_FAULT;
+}
+
+
+
+
+/**************************************************************************
+ VCPU privileged application register access routines
+**************************************************************************/
+static inline
+IA64FAULT vmx_vcpu_set_itc(VCPU *vcpu, UINT64 val)
+{
+ vtm_set_itc(vcpu, val);
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT vmx_vcpu_get_itc(VCPU *vcpu,UINT64 *val)
+{
+ *val = vtm_get_itc(vcpu);
+ return IA64_NO_FAULT;
+}
+static inline
+IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ *pval = VMX(vcpu,vrr[reg>>61]);
+ return (IA64_NO_FAULT);
+}
+/**************************************************************************
+ VCPU debug breakpoint register access routines
+**************************************************************************/
+
+static inline
+IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // TODO: unimplemented DBRs return a reserved register fault
+ // TODO: Should set Logical CPU state, not just physical
+ if(reg > 4){
+ panic("there are only five cpuid registers");
+ }
+ *pval=VMX_VPD(vcpu,vcpuid[reg]);
+ return (IA64_NO_FAULT);
+}
+
+
+static inline
+IA64FAULT vmx_vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: unimplemented DBRs return a reserved register fault
+ // TODO: Should set Logical CPU state, not just physical
+ ia64_set_dbr(reg,val);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: unimplemented IBRs return a reserved register fault
+ // TODO: Should set Logical CPU state, not just physical
+ ia64_set_ibr(reg,val);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // TODO: unimplemented DBRs return a reserved register fault
+ UINT64 val = ia64_get_dbr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // TODO: unimplemented IBRs return a reserved register fault
+ UINT64 val = ia64_get_ibr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU performance monitor register access routines
+**************************************************************************/
+static inline
+IA64FAULT vmx_vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: Should set Logical CPU state, not just physical
+ // NOTE: Writes to unimplemented PMC registers are discarded
+ ia64_set_pmc(reg,val);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: Should set Logical CPU state, not just physical
+ // NOTE: Writes to unimplemented PMD registers are discarded
+ ia64_set_pmd(reg,val);
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // NOTE: Reads from unimplemented PMC registers return zero
+ UINT64 val = (UINT64)ia64_get_pmc(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // NOTE: Reads from unimplemented PMD registers return zero
+ UINT64 val = (UINT64)ia64_get_pmd(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU banked general register access routines
+**************************************************************************/
+static inline
+IA64FAULT vmx_vcpu_bsw0(VCPU *vcpu)
+{
+
+ VMX_VPD(vcpu,vpsr) &= ~IA64_PSR_BN;
+ return (IA64_NO_FAULT);
+}
+static inline
+IA64FAULT vmx_vcpu_bsw1(VCPU *vcpu)
+{
+
+ VMX_VPD(vcpu,vpsr) |= IA64_PSR_BN;
+ return (IA64_NO_FAULT);
+}
+
+#define redistribute_rid(rid) (((rid) & ~0xffff) | (((rid) << 8) & 0xff00) | (((rid) >> 8) & 0xff))
+static inline unsigned long
+vmx_vrrtomrr(VCPU *vcpu,unsigned long val)
+{
+ ia64_rr rr;
+ u64 rid;
+ rr.rrval=val;
+ rid=(((u64)vcpu->domain->domain_id)<<DOMAIN_RID_SHIFT) + rr.rid;
+ rr.rid = redistribute_rid(rid);
+ rr.ve=1;
+ return rr.rrval;
+}
+#endif
diff --git a/xen/include/asm-ia64/vmx_vpd.h b/xen/include/asm-ia64/vmx_vpd.h
new file mode 100644
index 0000000000..78149ba31f
--- /dev/null
+++ b/xen/include/asm-ia64/vmx_vpd.h
@@ -0,0 +1,193 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx.h: prototype for generial vmx related interface
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ */
+
+#ifndef _VPD_H_
+#define _VPD_H_
+
+#ifndef __ASSEMBLY__
+
+#include <asm/vtm.h>
+#include <asm/vmx_platform.h>
+
+#define VPD_SHIFT 17 /* 128K requirement */
+#define VPD_SIZE (1 << VPD_SHIFT)
+typedef union {
+ unsigned long value;
+ struct {
+ int a_int:1;
+ int a_from_int_cr:1;
+ int a_to_int_cr:1;
+ int a_from_psr:1;
+ int a_from_cpuid:1;
+ int a_cover:1;
+ int a_bsw:1;
+ long reserved:57;
+ };
+} vac_t;
+
+typedef union {
+ unsigned long value;
+ struct {
+ int d_vmsw:1;
+ int d_extint:1;
+ int d_ibr_dbr:1;
+ int d_pmc:1;
+ int d_to_pmd:1;
+ int d_itm:1;
+ long reserved:58;
+ };
+} vdc_t;
+
+typedef struct {
+ unsigned long dcr; // CR0
+ unsigned long itm;
+ unsigned long iva;
+ unsigned long rsv1[5];
+ unsigned long pta; // CR8
+ unsigned long rsv2[7];
+ unsigned long ipsr; // CR16
+ unsigned long isr;
+ unsigned long rsv3;
+ unsigned long iip;
+ unsigned long ifa;
+ unsigned long itir;
+ unsigned long iipa;
+ unsigned long ifs;
+ unsigned long iim; // CR24
+ unsigned long iha;
+ unsigned long rsv4[38];
+ unsigned long lid; // CR64
+ unsigned long ivr;
+ unsigned long tpr;
+ unsigned long eoi;
+ unsigned long irr[4];
+ unsigned long itv; // CR72
+ unsigned long pmv;
+ unsigned long cmcv;
+ unsigned long rsv5[5];
+ unsigned long lrr0; // CR80
+ unsigned long lrr1;
+ unsigned long rsv6[46];
+} cr_t;
+
+typedef struct vpd {
+ vac_t vac;
+ vdc_t vdc;
+ unsigned long virt_env_vaddr;
+ unsigned long reserved1[29];
+ unsigned long vhpi;
+ unsigned long reserved2[95];
+ unsigned long vgr[16];
+ unsigned long vbgr[16];
+ unsigned long vnat;
+ unsigned long vbnat;
+ unsigned long vcpuid[5];
+ unsigned long reserved3[11];
+ unsigned long vpsr;
+ unsigned long vpr;
+ unsigned long reserved4[76];
+ unsigned long vcr[128];
+ unsigned long reserved5[128];
+ unsigned long reserved6[3456];
+ unsigned long vmm_avail[128];
+ unsigned long reserved7[4096];
+} vpd_t;
+
+void vmx_enter_scheduler(void);
+
+//FIXME: Map for LID to vcpu, Eddie
+#define MAX_NUM_LPS (1UL<<16)
+extern struct vcpu *lid_edt[MAX_NUM_LPS];
+
+struct arch_vmx_struct {
+// struct virutal_platform_def vmx_platform;
+ vpd_t *vpd;
+ vtime_t vtm;
+ unsigned long vrr[8];
+ unsigned long mrr5;
+ unsigned long mrr6;
+ unsigned long mrr7;
+ unsigned long mpta;
+ unsigned long rfi_pfs;
+ unsigned long rfi_iip;
+ unsigned long rfi_ipsr;
+ unsigned long rfi_ifs;
+ unsigned long in_service[4]; // vLsapic inservice IRQ bits
+ struct virutal_platform_def vmx_platform;
+ unsigned long flags;
+};
+
+#define vmx_schedule_tail(next) \
+ (next)->thread.arch_vmx.arch_vmx_schedule_tail((next))
+
+#define VMX_DOMAIN(d) d->arch.arch_vmx.flags
+
+#define ARCH_VMX_VMCS_LOADED 0 /* VMCS has been loaded and active */
+#define ARCH_VMX_VMCS_LAUNCH 1 /* Needs VMCS launch */
+#define ARCH_VMX_VMCS_RESUME 2 /* Needs VMCS resume */
+#define ARCH_VMX_IO_WAIT 3 /* Waiting for I/O completion */
+
+
+#define VMX_DEBUG 1
+#if VMX_DEBUG
+#define DBG_LEVEL_0 (1 << 0)
+#define DBG_LEVEL_1 (1 << 1)
+#define DBG_LEVEL_2 (1 << 2)
+#define DBG_LEVEL_3 (1 << 3)
+#define DBG_LEVEL_IO (1 << 4)
+#define DBG_LEVEL_VMMU (1 << 5)
+
+extern unsigned int opt_vmx_debug_level;
+#define VMX_DBG_LOG(level, _f, _a...) \
+ if ((level) & opt_vmx_debug_level) \
+ printk("[VMX]" _f "\n", ## _a )
+#else
+#define VMX_DBG_LOG(level, _f, _a...)
+#endif
+
+#define __vmx_bug(regs) \
+ do { \
+ printk("__vmx_bug at %s:%d\n", __FILE__, __LINE__); \
+ show_registers(regs); \
+ domain_crash(); \
+ } while (0)
+
+#endif //__ASSEMBLY__
+
+
+// VPD field offset
+#define VPD_VAC_START_OFFSET 0
+#define VPD_VDC_START_OFFSET 8
+#define VPD_VHPI_START_OFFSET 256
+#define VPD_VGR_START_OFFSET 1024
+#define VPD_VBGR_START_OFFSET 1152
+#define VPD_VNAT_START_OFFSET 1280
+#define VPD_VBNAT_START_OFFSET 1288
+#define VPD_VCPUID_START_OFFSET 1296
+#define VPD_VPSR_START_OFFSET 1424
+#define VPD_VPR_START_OFFSET 1432
+#define VPD_VRSE_CFLE_START_OFFSET 1440
+#define VPD_VCR_START_OFFSET 2048
+#define VPD_VRR_START_OFFSET 3072
+#define VPD_VMM_VAIL_START_OFFSET 31744
+
+
+#endif /* _VPD_H_ */
diff --git a/xen/include/asm-ia64/vtm.h b/xen/include/asm-ia64/vtm.h
new file mode 100644
index 0000000000..92564b9b6c
--- /dev/null
+++ b/xen/include/asm-ia64/vtm.h
@@ -0,0 +1,67 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vtm.h: virtual timer head file.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ */
+
+#ifndef _VTM_H_
+#define _VTM_H_
+
+#include <xen/ac_timer.h>
+#include <xen/types.h>
+
+#define MAX_JUMP_STEP (5000) /* 500ms, max jump step */
+#define MIN_GUEST_RUNNING_TIME (0) /* 10ms for guest os to run */
+#define ITV_VECTOR_MASK (0xff)
+
+typedef struct vtime {
+ long vtm_offset; // guest ITC = host ITC + vtm_offset
+ uint64_t vtm_local_drift;
+ uint64_t last_itc;
+ /*
+ * Local drift (temporary) after guest suspension
+ * In case of long jump amount of ITC after suspension,
+ * guest ITC = host ITC + vtm_offset - vtm_local_drift;
+ * so that the duration passed saw in guest ITC is limited to
+ * cfg_max_jump that will make all kind of device driver happy.
+ */
+
+ // next all uses ITC tick as unit
+ uint64_t cfg_max_jump; // max jump within one time suspendsion
+ uint64_t cfg_min_grun; // min guest running time since last jump
+// uint64_t latest_read_itc; // latest guest read ITC
+ struct ac_timer vtm_timer;
+// int triggered;
+
+
+ uint64_t guest_running_time; // guest running time since last switch
+ //uint64_t vtm_last_suspending_time;
+ //uint64_t switch_in_time;
+ //uint64_t switch_out_time;
+ //uint64_t itc_freq;
+
+} vtime_t;
+
+#define ITV_VECTOR(itv) (itv&0xff)
+#define ITV_IRQ_MASK(itv) (itv&(1<<16))
+
+#define VTM_FIRED(vtm) ((vtm)->triggered)
+
+extern void vtm_init();
+#endif /* _STATS_H_ */
diff --git a/xen/include/asm-ia64/xenprocessor.h b/xen/include/asm-ia64/xenprocessor.h
new file mode 100644
index 0000000000..abc13dacee
--- /dev/null
+++ b/xen/include/asm-ia64/xenprocessor.h
@@ -0,0 +1,213 @@
+#ifndef _ASM_IA64_XENPROCESSOR_H
+#define _ASM_IA64_XENPROCESSOR_H
+/*
+ * xen specific processor definition
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ * Copyright (C) 2005 Intel Co.
+ * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
+ *
+ */
+
+
+#define ia64_is_local_fpu_owner(t) 0
+
+/* like above but expressed as bitfields for more efficient access: */
+struct ia64_psr {
+ __u64 reserved0 : 1;
+ __u64 be : 1;
+ __u64 up : 1;
+ __u64 ac : 1;
+ __u64 mfl : 1;
+ __u64 mfh : 1;
+ __u64 reserved1 : 7;
+ __u64 ic : 1;
+ __u64 i : 1;
+ __u64 pk : 1;
+ __u64 reserved2 : 1;
+ __u64 dt : 1;
+ __u64 dfl : 1;
+ __u64 dfh : 1;
+ __u64 sp : 1;
+ __u64 pp : 1;
+ __u64 di : 1;
+ __u64 si : 1;
+ __u64 db : 1;
+ __u64 lp : 1;
+ __u64 tb : 1;
+ __u64 rt : 1;
+ __u64 reserved3 : 4;
+ __u64 cpl : 2;
+ __u64 is : 1;
+ __u64 mc : 1;
+ __u64 it : 1;
+ __u64 id : 1;
+ __u64 da : 1;
+ __u64 dd : 1;
+ __u64 ss : 1;
+ __u64 ri : 2;
+ __u64 ed : 1;
+ __u64 bn : 1;
+#ifdef CONFIG_VTI
+ __u64 ia : 1;
+ __u64 vm : 1;
+ __u64 reserved5 : 17;
+#else // CONFIG_VTI
+ __u64 reserved4 : 19;
+#endif // CONFIG_VTI
+};
+
+#ifdef CONFIG_VTI
+/* vmx like above but expressed as bitfields for more efficient access: */
+typedef union{
+ __u64 val;
+ struct{
+ __u64 reserved0 : 1;
+ __u64 be : 1;
+ __u64 up : 1;
+ __u64 ac : 1;
+ __u64 mfl : 1;
+ __u64 mfh : 1;
+ __u64 reserved1 : 7;
+ __u64 ic : 1;
+ __u64 i : 1;
+ __u64 pk : 1;
+ __u64 reserved2 : 1;
+ __u64 dt : 1;
+ __u64 dfl : 1;
+ __u64 dfh : 1;
+ __u64 sp : 1;
+ __u64 pp : 1;
+ __u64 di : 1;
+ __u64 si : 1;
+ __u64 db : 1;
+ __u64 lp : 1;
+ __u64 tb : 1;
+ __u64 rt : 1;
+ __u64 reserved3 : 4;
+ __u64 cpl : 2;
+ __u64 is : 1;
+ __u64 mc : 1;
+ __u64 it : 1;
+ __u64 id : 1;
+ __u64 da : 1;
+ __u64 dd : 1;
+ __u64 ss : 1;
+ __u64 ri : 2;
+ __u64 ed : 1;
+ __u64 bn : 1;
+ __u64 reserved4 : 19;
+ };
+} IA64_PSR;
+
+typedef union {
+ __u64 val;
+ struct {
+ __u64 code : 16;
+ __u64 vector : 8;
+ __u64 reserved1 : 8;
+ __u64 x : 1;
+ __u64 w : 1;
+ __u64 r : 1;
+ __u64 na : 1;
+ __u64 sp : 1;
+ __u64 rs : 1;
+ __u64 ir : 1;
+ __u64 ni : 1;
+ __u64 so : 1;
+ __u64 ei : 2;
+ __u64 ed : 1;
+ __u64 reserved2 : 20;
+ };
+} ISR;
+
+
+typedef union {
+ __u64 val;
+ struct {
+ __u64 ve : 1;
+ __u64 reserved0 : 1;
+ __u64 size : 6;
+ __u64 vf : 1;
+ __u64 reserved1 : 6;
+ __u64 base : 49;
+ };
+} PTA;
+
+typedef union {
+ __u64 val;
+ struct {
+ __u64 rv : 16;
+ __u64 eid : 8;
+ __u64 id : 8;
+ __u64 ig : 32;
+ };
+} LID;
+
+typedef union{
+ __u64 val;
+ struct {
+ __u64 rv : 3;
+ __u64 ir : 1;
+ __u64 eid : 8;
+ __u64 id : 8;
+ __u64 ib_base : 44;
+ };
+} ipi_a_t;
+
+typedef union{
+ __u64 val;
+ struct {
+ __u64 vector : 8;
+ __u64 dm : 3;
+ __u64 ig : 53;
+ };
+} ipi_d_t;
+
+
+#define IA64_ISR_CODE_MASK0 0xf
+#define IA64_UNIMPL_DADDR_FAULT 0x30
+#define IA64_UNIMPL_IADDR_TRAP 0x10
+#define IA64_RESERVED_REG_FAULT 0x30
+#define IA64_REG_NAT_CONSUMPTION_FAULT 0x10
+#define IA64_NAT_CONSUMPTION_FAULT 0x20
+#define IA64_PRIV_OP_FAULT 0x10
+
+/* indirect register type */
+enum {
+ IA64_CPUID, /* cpuid */
+ IA64_DBR, /* dbr */
+ IA64_IBR, /* ibr */
+ IA64_PKR, /* pkr */
+ IA64_PMC, /* pmc */
+ IA64_PMD, /* pmd */
+ IA64_RR /* rr */
+};
+
+/* instruction type */
+enum {
+ IA64_INST_TPA=1,
+ IA64_INST_TAK
+};
+
+/* Generate Mask
+ * Parameter:
+ * bit -- starting bit
+ * len -- how many bits
+ */
+#define MASK(bit,len) \
+({ \
+ __u64 ret; \
+ \
+ __asm __volatile("dep %0=-1, r0, %1, %2" \
+ : "=r" (ret): \
+ "M" (bit), \
+ "M" (len) ); \
+ ret; \
+})
+
+#endif // CONFIG_VTI
+
+#endif // _ASM_IA64_XENPROCESSOR_H
diff --git a/xen/drivers/pci/compat.c b/xen/include/asm-ia64/xenserial.h
index e69de29bb2..e69de29bb2 100644
--- a/xen/drivers/pci/compat.c
+++ b/xen/include/asm-ia64/xenserial.h
diff --git a/xen/include/asm-ia64/xensystem.h b/xen/include/asm-ia64/xensystem.h
new file mode 100644
index 0000000000..c7c9771fee
--- /dev/null
+++ b/xen/include/asm-ia64/xensystem.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_IA64_XENSYSTEM_H
+#define _ASM_IA64_XENSYSTEM_H
+/*
+ * xen specific context definition
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@hp.com)
+ *
+ * Copyright (C) 2005 Intel Co.
+ * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
+ *
+ */
+#include <asm/config.h>
+#include <linux/kernel.h>
+
+/* Define HV space hierarchy */
+#define XEN_VIRT_SPACE_LOW 0xe800000000000000
+#define XEN_VIRT_SPACE_HIGH 0xf800000000000000
+/* This is address to mapping rr7 switch stub, in region 5 */
+#ifdef CONFIG_VTI
+#define XEN_RR7_SWITCH_STUB 0xb700000000000000
+#endif // CONFIG_VTI
+
+#define KERNEL_START 0xf000000004000000
+#define PERCPU_ADDR 0xf100000000000000-PERCPU_PAGE_SIZE
+#define SHAREDINFO_ADDR 0xf100000000000000
+#define VHPT_ADDR 0xf200000000000000
+
+#ifndef __ASSEMBLY__
+
+#define IA64_HAS_EXTRA_STATE(t) 0
+
+#ifdef CONFIG_VTI
+extern struct task_struct *vmx_ia64_switch_to (void *next_task);
+#define __switch_to(prev,next,last) do { \
+ if (VMX_DOMAIN(prev)) \
+ vmx_save_state(prev); \
+ else { \
+ if (IA64_HAS_EXTRA_STATE(prev)) \
+ ia64_save_extra(prev); \
+ } \
+ if (VMX_DOMAIN(next)) \
+ vmx_load_state(next); \
+ else { \
+ if (IA64_HAS_EXTRA_STATE(next)) \
+ ia64_save_extra(next); \
+ } \
+ ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
+ (last) = vmx_ia64_switch_to((next)); \
+} while (0)
+#else // CONFIG_VTI
+#define __switch_to(prev,next,last) do { \
+ ia64_save_fpu(prev->arch._thread.fph); \
+ ia64_load_fpu(next->arch._thread.fph); \
+ if (IA64_HAS_EXTRA_STATE(prev)) \
+ ia64_save_extra(prev); \
+ if (IA64_HAS_EXTRA_STATE(next)) \
+ ia64_load_extra(next); \
+ ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
+ (last) = ia64_switch_to((next)); \
+} while (0)
+#endif // CONFIG_VTI
+
+#endif // __ASSEMBLY__
+#endif // _ASM_IA64_XENSYSTEM_H
diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h
index 3a15181819..b13f8ff79a 100644
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -27,7 +27,7 @@
#define _ASM_ACPI_H
#include <xen/config.h>
-#include <asm/system.h>
+#include <asm/system.h> /* defines cmpxchg */
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -49,8 +49,8 @@
#define ACPI_ASM_MACROS
#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() __cli()
-#define ACPI_ENABLE_IRQS() __sti()
+#define ACPI_DISABLE_IRQS() local_irq_disable()
+#define ACPI_ENABLE_IRQS() local_irq_enable()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
@@ -100,6 +100,11 @@ __acpi_release_global_lock (unsigned int *lock)
:"=r"(n_hi), "=r"(n_lo) \
:"0"(n_hi), "1"(n_lo))
+/*
+ * Refer Intel ACPI _PDC support document for bit definitions
+ */
+#define ACPI_PDC_EST_CAPABILITY_SMP 0xa
+#define ACPI_PDC_EST_CAPABILITY_MSR 0x1
#ifdef CONFIG_ACPI_BOOT
extern int acpi_lapic;
@@ -108,46 +113,52 @@ extern int acpi_noirq;
extern int acpi_strict;
extern int acpi_disabled;
extern int acpi_ht;
-static inline void disable_acpi(void) { acpi_disabled = 1; acpi_ht = 0; }
+extern int acpi_pci_disabled;
+static inline void disable_acpi(void)
+{
+ acpi_disabled = 1;
+ acpi_ht = 0;
+ acpi_pci_disabled = 1;
+ acpi_noirq = 1;
+}
/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
#define FIX_ACPI_PAGES 4
-#else /* !CONFIG_ACPI_BOOT */
-# define acpi_lapic 0
-# define acpi_ioapic 0
-
-#endif /* !CONFIG_ACPI_BOOT */
+extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
-#ifdef CONFIG_ACPI_PCI
-static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
-extern int acpi_irq_balance_set(char *str);
-#else
-static inline void acpi_noirq_set(void) { }
-static inline int acpi_irq_balance_set(char *str) { return 0; }
-#endif
+#ifdef CONFIG_X86_IO_APIC
+extern int skip_ioapic_setup;
+extern int acpi_skip_timer_override;
-#ifdef CONFIG_ACPI_SLEEP
+extern void check_acpi_pci(void);
-extern unsigned long saved_eip;
-extern unsigned long saved_esp;
-extern unsigned long saved_ebp;
-extern unsigned long saved_ebx;
-extern unsigned long saved_esi;
-extern unsigned long saved_edi;
+static inline void disable_ioapic_setup(void)
+{
+ skip_ioapic_setup = 1;
+}
-static inline void acpi_save_register_state(unsigned long return_point)
+static inline int ioapic_setup_disabled(void)
{
- saved_eip = return_point;
- asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp));
- asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp));
- asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx));
- asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi));
- asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi));
+ return skip_ioapic_setup;
}
-#define acpi_restore_register_state() do {} while (0)
+#else
+static inline void disable_ioapic_setup(void) { }
+static inline void check_acpi_pci(void) { }
+#endif
+
+#else /* CONFIG_ACPI_BOOT */
+# define acpi_lapic 0
+# define acpi_ioapic 0
+
+#endif
+
+static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+static inline int acpi_irq_balance_set(char *str) { return 0; }
+
+#ifdef CONFIG_ACPI_SLEEP
/* routines for saving/restoring kernel state */
extern int acpi_save_state_mem(void);
@@ -156,11 +167,11 @@ extern void acpi_restore_state_mem(void);
extern unsigned long acpi_wakeup_address;
-extern void do_suspend_lowlevel_s4bios(int resume);
-
/* early initialization routine */
extern void acpi_reserve_bootmem(void);
#endif /*CONFIG_ACPI_SLEEP*/
+extern u8 x86_acpiid_to_apicid[];
+
#endif /*_ASM_ACPI_H*/
diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
index 54289910ab..2dc0ce8c27 100644
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -2,19 +2,35 @@
#define __ASM_APIC_H
#include <xen/config.h>
-#include <asm/regs.h>
+#include <asm/fixmap.h>
#include <asm/apicdef.h>
+#include <asm/processor.h>
#include <asm/system.h>
-#ifdef CONFIG_X86_LOCAL_APIC
+#define Dprintk(x...)
-#define APIC_DEBUG 0
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET 0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG 2
-#if APIC_DEBUG
-#define Dprintk(x...) printk(x)
-#else
-#define Dprintk(x...)
-#endif
+extern int apic_verbosity;
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+ } while (0)
+
+
+#ifdef CONFIG_X86_LOCAL_APIC
/*
* Basic functions accessing APICs.
@@ -37,9 +53,12 @@ static __inline u32 apic_read(unsigned long reg)
static __inline__ void apic_wait_icr_idle(void)
{
- do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+ while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
+ cpu_relax();
}
+int get_physical_broadcast(void);
+
#ifdef CONFIG_X86_GOOD_APIC
# define FORCE_READ_AROUND_WRITE 0
# define apic_read_around(x)
@@ -63,33 +82,38 @@ static inline void ack_APIC_irq(void)
apic_write_around(APIC_EOI, 0);
}
+extern void (*wait_timer_tick)(void);
+
extern int get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC (void);
extern void disconnect_bsp_APIC (void);
extern void disable_local_APIC (void);
+extern void lapic_shutdown (void);
extern int verify_local_APIC (void);
extern void cache_APIC_registers (void);
extern void sync_Arb_IDs (void);
extern void init_bsp_APIC (void);
extern void setup_local_APIC (void);
extern void init_apic_mappings (void);
-extern void smp_local_timer_interrupt (struct xen_regs * regs);
-extern void setup_APIC_clocks (void);
+extern void smp_local_timer_interrupt (struct cpu_user_regs *regs);
+extern void setup_boot_APIC_clock (void);
+extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
-extern void nmi_watchdog_tick (struct xen_regs * regs);
-extern void touch_nmi_watchdog(void);
+extern int reserve_lapic_nmi(void);
+extern void release_lapic_nmi(void);
+extern void disable_timer_nmi_watchdog(void);
+extern void enable_timer_nmi_watchdog(void);
+extern void nmi_watchdog_tick (struct cpu_user_regs *regs);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
-/*extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);*/
-/*extern void apic_pm_unregister(struct pm_dev*);*/
-
-extern unsigned int watchdog_on;
-
-extern unsigned int apic_timer_irqs [NR_CPUS];
extern int check_nmi_watchdog (void);
+extern void enable_NMI_through_LVT0 (void * dummy);
+
+extern void watchdog_disable(void);
+extern void watchdog_enable(void);
extern unsigned int nmi_watchdog;
#define NMI_NONE 0
@@ -97,6 +121,9 @@ extern unsigned int nmi_watchdog;
#define NMI_LOCAL_APIC 2
#define NMI_INVALID 3
-#endif /* CONFIG_X86_LOCAL_APIC */
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+
+#endif /* !CONFIG_X86_LOCAL_APIC */
#endif /* __ASM_APIC_H */
diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h
index 8d7f3aa3d7..911a55ef82 100644
--- a/xen/include/asm-x86/apicdef.h
+++ b/xen/include/asm-x86/apicdef.h
@@ -11,14 +11,11 @@
#define APIC_DEFAULT_PHYS_BASE 0xfee00000
#define APIC_ID 0x20
-#define APIC_ID_MASK (0x0F<<24)
-#define GET_APIC_ID(x) (((x)>>24)&0x0F)
#define APIC_LVR 0x30
#define APIC_LVR_MASK 0xFF00FF
#define GET_APIC_VERSION(x) ((x)&0xFF)
#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF)
#define APIC_INTEGRATED(x) ((x)&0xF0)
-#define APIC_XAPIC_SUPPORT(x) ((x)>=0x14)
#define APIC_TASKPRI 0x80
#define APIC_TPRI_MASK 0xFF
#define APIC_ARBPRI 0x90
@@ -33,8 +30,8 @@
#define SET_APIC_LOGICAL_ID(x) (((x)<<24))
#define APIC_ALL_CPUS 0xFF
#define APIC_DFR 0xE0
-#define APIC_DFR_CLUSTER 0x0FFFFFFFul /* Clustered */
-#define APIC_DFR_FLAT 0xFFFFFFFFul /* Flat mode */
+#define APIC_DFR_CLUSTER 0x0FFFFFFFul
+#define APIC_DFR_FLAT 0xFFFFFFFFul
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1<<9)
#define APIC_SPIV_APIC_ENABLED (1<<8)
@@ -60,7 +57,6 @@
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_PHYSICAL 0x00000
#define APIC_DEST_LOGICAL 0x00800
#define APIC_DM_FIXED 0x00000
#define APIC_DM_LOWEST 0x00100
@@ -75,6 +71,7 @@
#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF)
#define SET_APIC_DEST_FIELD(x) ((x)<<24)
#define APIC_LVTT 0x320
+#define APIC_LVTTHMR 0x330
#define APIC_LVTPC 0x340
#define APIC_LVT0 0x350
#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
@@ -111,18 +108,272 @@
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#ifdef CONFIG_X86_CLUSTERED_APIC
-#define MAX_IO_APICS 32
+#ifdef CONFIG_NUMA
+ #define MAX_IO_APICS 32
#else
-#define MAX_IO_APICS 8
+ #define MAX_IO_APICS 8
#endif
-
/*
- * The broadcast ID is 0xF for old APICs and 0xFF for xAPICs. SAPICs
- * don't broadcast (yet?), but if they did, they might use 0xFFFF.
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
*/
-#define APIC_BROADCAST_ID_XAPIC (0xFF)
-#define APIC_BROADCAST_ID_APIC (0x0F)
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+#ifndef __ASSEMBLY__
+struct local_apic {
+
+/*000*/ struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/ struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/ struct { /* APIC ID Register */
+ u32 __reserved_1 : 24,
+ phys_apic_id : 4,
+ __reserved_2 : 4;
+ u32 __reserved[3];
+ } id;
+
+/*030*/ const
+ struct { /* APIC Version Register */
+ u32 version : 8,
+ __reserved_1 : 8,
+ max_lvt : 8,
+ __reserved_2 : 8;
+ u32 __reserved[3];
+ } version;
+
+/*040*/ struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/ struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/ struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/ struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/ struct { /* Task Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } tpr;
+
+/*090*/ const
+ struct { /* Arbitration Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } apr;
+
+/*0A0*/ const
+ struct { /* Processor Priority Register */
+ u32 priority : 8,
+ __reserved_1 : 24;
+ u32 __reserved_2[3];
+ } ppr;
+
+/*0B0*/ struct { /* End Of Interrupt Register */
+ u32 eoi;
+ u32 __reserved[3];
+ } eoi;
+
+/*0C0*/ struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/ struct { /* Logical Destination Register */
+ u32 __reserved_1 : 24,
+ logical_dest : 8;
+ u32 __reserved_2[3];
+ } ldr;
+
+/*0E0*/ struct { /* Destination Format Register */
+ u32 __reserved_1 : 28,
+ model : 4;
+ u32 __reserved_2[3];
+ } dfr;
+
+/*0F0*/ struct { /* Spurious Interrupt Vector Register */
+ u32 spurious_vector : 8,
+ apic_enabled : 1,
+ focus_cpu : 1,
+ __reserved_2 : 22;
+ u32 __reserved_3[3];
+ } svr;
+
+/*100*/ struct { /* In Service Register */
+/*170*/ u32 bitfield;
+ u32 __reserved[3];
+ } isr [8];
+
+/*180*/ struct { /* Trigger Mode Register */
+/*1F0*/ u32 bitfield;
+ u32 __reserved[3];
+ } tmr [8];
+
+/*200*/ struct { /* Interrupt Request Register */
+/*270*/ u32 bitfield;
+ u32 __reserved[3];
+ } irr [8];
+
+/*280*/ union { /* Error Status Register */
+ struct {
+ u32 send_cs_error : 1,
+ receive_cs_error : 1,
+ send_accept_error : 1,
+ receive_accept_error : 1,
+ __reserved_1 : 1,
+ send_illegal_vector : 1,
+ receive_illegal_vector : 1,
+ illegal_register_address : 1,
+ __reserved_2 : 24;
+ u32 __reserved_3[3];
+ } error_bits;
+ struct {
+ u32 errors;
+ u32 __reserved_3[3];
+ } all_errors;
+ } esr;
+
+/*290*/ struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/ struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/ struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/ struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/ struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/ struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/ struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/ struct { /* Interrupt Command Register 1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ destination_mode : 1,
+ delivery_status : 1,
+ __reserved_1 : 1,
+ level : 1,
+ trigger : 1,
+ __reserved_2 : 2,
+ shorthand : 2,
+ __reserved_3 : 12;
+ u32 __reserved_4[3];
+ } icr1;
+
+/*310*/ struct { /* Interrupt Command Register 2 */
+ union {
+ u32 __reserved_1 : 24,
+ phys_dest : 4,
+ __reserved_2 : 4;
+ u32 __reserved_3 : 24,
+ logical_dest : 8;
+ } dest;
+ u32 __reserved_4[3];
+ } icr2;
+
+/*320*/ struct { /* LVT - Timer */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ timer_mode : 1,
+ __reserved_3 : 14;
+ u32 __reserved_4[3];
+ } lvt_timer;
+
+/*330*/ struct { /* LVT - Thermal Sensor */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_thermal;
+
+/*340*/ struct { /* LVT - Performance Counter */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_pc;
+
+/*350*/ struct { /* LVT - LINT0 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint0;
+
+/*360*/ struct { /* LVT - LINT1 */
+ u32 vector : 8,
+ delivery_mode : 3,
+ __reserved_1 : 1,
+ delivery_status : 1,
+ polarity : 1,
+ remote_irr : 1,
+ trigger : 1,
+ mask : 1,
+ __reserved_2 : 15;
+ u32 __reserved_3[3];
+ } lvt_lint1;
+
+/*370*/ struct { /* LVT - Error */
+ u32 vector : 8,
+ __reserved_1 : 4,
+ delivery_status : 1,
+ __reserved_2 : 3,
+ mask : 1,
+ __reserved_3 : 15;
+ u32 __reserved_4[3];
+ } lvt_error;
+
+/*380*/ struct { /* Timer Initial Count Register */
+ u32 initial_count;
+ u32 __reserved_2[3];
+ } timer_icr;
+
+/*390*/ const
+ struct { /* Timer Current Count Register */
+ u32 curr_count;
+ u32 __reserved_2[3];
+ } timer_ccr;
+
+/*3A0*/ struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/ struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/ struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/ struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/ struct { /* Timer Divide Configuration Register */
+ u32 divisor : 4,
+ __reserved_1 : 28;
+ u32 __reserved_2[3];
+ } timer_dcr;
+
+/*3F0*/ struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+#endif /* !__ASSEMBLY__ */
+
+#undef u32
#endif
diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
index 0fb3e44727..17777ad123 100644
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -6,8 +6,10 @@
#include <asm/asm-offsets.h>
#include <asm/processor.h>
+#ifndef STR
#define __STR(x) #x
#define STR(x) __STR(x)
+#endif
#ifdef __x86_64__
#include <asm/x86_64/asm_defns.h>
diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h
index cf98f2e118..9682ec96da 100644
--- a/xen/include/asm-x86/bitops.h
+++ b/xen/include/asm-x86/bitops.h
@@ -7,6 +7,11 @@
#include <xen/config.h>
+#ifndef STR
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#endif
+
/*
* These have to be done with inline assembly: that way the bit-setting
* is guaranteed to be atomic. All bit operations return 0 if the bit
@@ -33,10 +38,10 @@
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static __inline__ void set_bit(long nr, volatile void * addr)
+static __inline__ void set_bit(int nr, volatile void * addr)
{
__asm__ __volatile__( LOCK_PREFIX
- "bts"__OS" %1,%0"
+ "btsl %1,%0"
:"=m" (ADDR)
:"dIr" (nr));
}
@@ -50,10 +55,10 @@ static __inline__ void set_bit(long nr, volatile void * addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static __inline__ void __set_bit(long nr, volatile void * addr)
+static __inline__ void __set_bit(int nr, volatile void * addr)
{
__asm__(
- "bts"__OS" %1,%0"
+ "btsl %1,%0"
:"=m" (ADDR)
:"dIr" (nr));
}
@@ -68,10 +73,10 @@ static __inline__ void __set_bit(long nr, volatile void * addr)
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
* in order to ensure changes are visible on other processors.
*/
-static __inline__ void clear_bit(long nr, volatile void * addr)
+static __inline__ void clear_bit(int nr, volatile void * addr)
{
__asm__ __volatile__( LOCK_PREFIX
- "btr"__OS" %1,%0"
+ "btrl %1,%0"
:"=m" (ADDR)
:"dIr" (nr));
}
@@ -87,10 +92,10 @@ static __inline__ void clear_bit(long nr, volatile void * addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static __inline__ void __change_bit(long nr, volatile void * addr)
+static __inline__ void __change_bit(int nr, volatile void * addr)
{
__asm__ __volatile__(
- "btc"__OS" %1,%0"
+ "btcl %1,%0"
:"=m" (ADDR)
:"dIr" (nr));
}
@@ -104,10 +109,10 @@ static __inline__ void __change_bit(long nr, volatile void * addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static __inline__ void change_bit(long nr, volatile void * addr)
+static __inline__ void change_bit(int nr, volatile void * addr)
{
__asm__ __volatile__( LOCK_PREFIX
- "btc"__OS" %1,%0"
+ "btcl %1,%0"
:"=m" (ADDR)
:"dIr" (nr));
}
@@ -120,12 +125,12 @@ static __inline__ void change_bit(long nr, volatile void * addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static __inline__ int test_and_set_bit(long nr, volatile void * addr)
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__ __volatile__( LOCK_PREFIX
- "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btsl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr) : "memory");
return oldbit;
@@ -140,12 +145,12 @@ static __inline__ int test_and_set_bit(long nr, volatile void * addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static __inline__ int __test_and_set_bit(long nr, volatile void * addr)
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__(
- "bts"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btsl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr));
return oldbit;
@@ -159,12 +164,12 @@ static __inline__ int __test_and_set_bit(long nr, volatile void * addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static __inline__ int test_and_clear_bit(long nr, volatile void * addr)
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__ __volatile__( LOCK_PREFIX
- "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btrl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr) : "memory");
return oldbit;
@@ -179,24 +184,24 @@ static __inline__ int test_and_clear_bit(long nr, volatile void * addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static __inline__ int __test_and_clear_bit(long nr, volatile void * addr)
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__(
- "btr"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btrl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr));
return oldbit;
}
/* WARNING: non atomic and it can be reordered! */
-static __inline__ int __test_and_change_bit(long nr, volatile void * addr)
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__ __volatile__(
- "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btcl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr) : "memory");
return oldbit;
@@ -210,29 +215,29 @@ static __inline__ int __test_and_change_bit(long nr, volatile void * addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static __inline__ int test_and_change_bit(long nr, volatile void * addr)
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__ __volatile__( LOCK_PREFIX
- "btc"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btcl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit),"=m" (ADDR)
:"dIr" (nr) : "memory");
return oldbit;
}
-static __inline__ int constant_test_bit(long nr, const volatile void * addr)
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
{
return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
}
-static __inline__ int variable_test_bit(long nr, volatile void * addr)
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
{
- long oldbit;
+ int oldbit;
__asm__ __volatile__(
- "bt"__OS" %2,%1\n\tsbb"__OS" %0,%0"
+ "btl %2,%1\n\tsbbl %0,%0"
:"=r" (oldbit)
:"m" (ADDR),"dIr" (nr));
return oldbit;
@@ -243,122 +248,82 @@ static __inline__ int variable_test_bit(long nr, volatile void * addr)
constant_test_bit((nr),(addr)) : \
variable_test_bit((nr),(addr)))
+extern unsigned int __find_first_bit(
+ const unsigned long *addr, unsigned int size);
+extern unsigned int __find_next_bit(
+ const unsigned long *addr, unsigned int size, unsigned int offset);
+extern unsigned int __find_first_zero_bit(
+ const unsigned long *addr, unsigned int size);
+extern unsigned int __find_next_zero_bit(
+ const unsigned long *addr, unsigned int size, unsigned int offset);
+
+/* return index of first bit set in val or BITS_PER_LONG when no bit is set */
+static inline unsigned int __scanbit(unsigned long val)
+{
+ __asm__ ( "bsf %1,%0" : "=r" (val) : "r" (val), "0" (BITS_PER_LONG) );
+ return (unsigned int)val;
+}
+
/**
- * find_first_zero_bit - find the first zero bit in a memory region
+ * find_first_bit - find the first set bit in a memory region
* @addr: The address to start the search at
- * @size: The maximum bitnumber to search
+ * @size: The maximum size to search
*
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit. -1 when none found.
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
*/
-static __inline__ int find_first_zero_bit(void * addr, unsigned size)
-{
- int d0, d1, d2;
- int res;
-
- if (!size)
- return 0;
- __asm__ __volatile__(
- "movl $-1,%%eax\n\t"
- "xorl %%edx,%%edx\n\t"
- "repe; scasl\n\t"
- "je 1f\n\t"
- "xorl -4(%%"__OP"di),%%eax\n\t"
- "sub"__OS" $4,%%"__OP"di\n\t"
- "bsfl %%eax,%%edx\n"
- "1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t"
- "shl"__OS" $3,%%"__OP"di\n\t"
- "add"__OS" %%"__OP"di,%%"__OP"dx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
- return res;
-}
+#define find_first_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ (__scanbit(*(unsigned long *)addr)) : \
+ __find_first_bit(addr,size)))
/**
- * find_next_zero_bit - find the first zero bit in a memory region
+ * find_next_bit - find the first set bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
*/
-static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
-{
- unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
- int set = 0, bit = offset & 31, res;
-
- if (bit) {
- /*
- * Look for zero in first byte
- */
- __asm__("bsfl %1,%0\n\t"
- "jne 1f\n\t"
- "movl $32, %0\n"
- "1:"
- : "=r" (set)
- : "r" (~(*p >> bit)));
- if (set < (32 - bit))
- return set + offset;
- set = 32 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full bytes for a zero
- */
- res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
- return (offset + set + res);
-}
+#define find_next_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ ((off) + (__scanbit((*(unsigned long *)addr) >> (off)))) : \
+ __find_next_bit(addr,size,off)))
/**
- * ffz - find first zero in word.
- * @word: The word to search
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
*
- * Undefined if no zero exists, so code should check against ~0UL first.
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
*/
-static __inline__ unsigned long ffz(unsigned long word)
-{
- __asm__("bsf"__OS" %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
-}
+#define find_first_zero_bit(addr,size) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ (__scanbit(~*(unsigned long *)addr)) : \
+ __find_first_zero_bit(addr,size)))
/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
*/
-static __inline__ int ffs(int x)
-{
- int r;
+#define find_next_zero_bit(addr,size,off) \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
+ ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off))))) : \
+ __find_next_zero_bit(addr,size,off)))
- __asm__("bsfl %1,%0\n\t"
- "jnz 1f\n\t"
- "movl $-1,%0\n"
- "1:" : "=r" (r) : "g" (x));
- return r+1;
-}
-/*
- * These are the preferred 'find first' functions in Xen.
- * Both return the appropriate bit index, with the l.s.b. having index 0.
- * If an appropriate bit is not found then the result is undefined.
+/**
+ * find_first_set_bit - find the first set bit in @word
+ * @word: the word to search
+ *
+ * Returns the bit-number of the first set bit. If no bits are set then the
+ * result is undefined.
*/
-static __inline__ unsigned long find_first_clear_bit(unsigned long word)
-{
- __asm__("bsf"__OS" %1,%0"
- :"=r" (word)
- :"r" (~word));
- return word;
-}
-
-static __inline__ unsigned long find_first_set_bit(unsigned long word)
+static __inline__ unsigned int find_first_set_bit(unsigned long word)
{
- __asm__("bsf"__OS" %1,%0"
- :"=r" (word)
- :"r" (word));
- return word;
+ __asm__ ( "bsf %1,%0" : "=r" (word) : "r" (word) );
+ return (unsigned int)word;
}
/**
@@ -367,22 +332,9 @@ static __inline__ unsigned long find_first_set_bit(unsigned long word)
*
* The Hamming Weight of a number is the total number of bits set in it.
*/
-
+#define hweight64(x) generic_hweight64(x)
#define hweight32(x) generic_hweight32(x)
#define hweight16(x) generic_hweight16(x)
#define hweight8(x) generic_hweight8(x)
-#define ext2_set_bit __test_and_set_bit
-#define ext2_clear_bit __test_and_clear_bit
-#define ext2_test_bit test_bit
-#define ext2_find_first_zero_bit find_first_zero_bit
-#define ext2_find_next_zero_bit find_next_zero_bit
-
-/* Bitmap functions for the minix filesystem. */
-#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
-#define minix_set_bit(nr,addr) __set_bit(nr,addr)
-#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
-#define minix_test_bit(nr,addr) test_bit(nr,addr)
-#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
-
#endif /* _X86_BITOPS_H */
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 5166c3f484..bf196d0b17 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -4,72 +4,53 @@
* A Linux-style configuration list.
*/
-#ifndef __XEN_I386_CONFIG_H__
-#define __XEN_I386_CONFIG_H__
+#ifndef __X86_CONFIG_H__
+#define __X86_CONFIG_H__
-#define CONFIG_X86 1
+#if defined(__i386__)
+// # define CONFIG_X86_PAE 1 /* yes */
+ # undef CONFIG_X86_PAE /* no */
+#endif
+
+#if defined(__x86_64__)
+# define CONFIG_PAGING_LEVELS 4
+#elif defined(CONFIG_X86_PAE)
+# define CONFIG_PAGING_LEVELS 3
+#else
+# define CONFIG_PAGING_LEVELS 2
+#endif
+#define CONFIG_X86 1
+#define CONFIG_X86_HT 1
+#define CONFIG_SHADOW 1
+#define CONFIG_VMX 1
#define CONFIG_SMP 1
#define CONFIG_X86_LOCAL_APIC 1
#define CONFIG_X86_GOOD_APIC 1
#define CONFIG_X86_IO_APIC 1
-#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+/* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
+#define CONFIG_X86_L1_CACHE_SHIFT 7
#define CONFIG_ACPI 1
#define CONFIG_ACPI_BOOT 1
-#define CONFIG_PCI 1
-#define CONFIG_PCI_DIRECT 1
-#if defined(__i386__)
-#define CONFIG_PCI_BIOS 1
-#endif
-
-#define CONFIG_IDE 1
-#define CONFIG_BLK_DEV_IDE 1
-#define CONFIG_BLK_DEV_IDEDMA 1
-#define CONFIG_BLK_DEV_IDEPCI 1
-#define CONFIG_IDEDISK_MULTI_MODE 1
-#define CONFIG_IDEDISK_STROKE 1
-#define CONFIG_IDEPCI_SHARE_IRQ 1
-#define CONFIG_BLK_DEV_IDEDMA_PCI 1
-#define CONFIG_IDEDMA_PCI_AUTO 1
-#define CONFIG_IDEDMA_AUTO 1
-#define CONFIG_IDEDMA_ONLYDISK 1
-#define CONFIG_BLK_DEV_IDE_MODES 1
-#define CONFIG_BLK_DEV_PIIX 1
-
-#define CONFIG_SCSI 1
-#define CONFIG_SCSI_LOGGING 1
-#define CONFIG_BLK_DEV_SD 1
-#define CONFIG_SD_EXTRA_DEVS 40
-#define CONFIG_SCSI_MULTI_LUN 1
-
-#define CONFIG_XEN_ATTENTION_KEY 1
-
#define HZ 100
-/*
- * Just to keep compiler happy.
- * NB. DO NOT CHANGE SMP_CACHE_BYTES WITHOUT FIXING arch/i386/entry.S!!!
- * It depends on size of irq_cpustat_t, for example, being 64 bytes. :-)
- */
-#define SMP_CACHE_BYTES 64
-#define NR_CPUS 16
+#define OPT_CONSOLE_STR "com1,vga"
+
+#define NR_CPUS 32
/* Linkage for x86 */
-#define asmlinkage __attribute__((regparm(0)))
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
-#define SYMBOL_NAME_STR(X) #X
-#define SYMBOL_NAME(X) X
-#define SYMBOL_NAME_LABEL(X) X##:
#ifdef __ASSEMBLY__
#define ALIGN __ALIGN
#define ALIGN_STR __ALIGN_STR
-#define ENTRY(name) \
- .globl SYMBOL_NAME(name); \
- ALIGN; \
- SYMBOL_NAME_LABEL(name)
+#define ENTRY(name) \
+ .globl name; \
+ ALIGN; \
+ name:
#endif
#define barrier() __asm__ __volatile__("": : :"memory")
@@ -78,25 +59,41 @@
#ifndef NDEBUG
#define MEMORY_GUARD
+#ifdef __x86_64__
+#define STACK_ORDER 2
+#endif
+#endif
+
+#ifndef STACK_ORDER
+#define STACK_ORDER 1
#endif
+#define STACK_SIZE (PAGE_SIZE << STACK_ORDER)
#ifndef __ASSEMBLY__
extern unsigned long _end; /* standard ELF symbol */
-extern void __out_of_line_bug(int line) __attribute__((noreturn));
-#define out_of_line_bug() __out_of_line_bug(__LINE__)
#endif /* __ASSEMBLY__ */
-#define BUG() do { \
- printk("BUG at %s:%d\n", __FILE__, __LINE__); \
- __asm__ __volatile__("ud2"); \
-} while (0)
+#define FORCE_CRASH() __asm__ __volatile__ ( "ud2" )
#if defined(__x86_64__)
+#define CONFIG_X86_64 1
+
+#define asmlinkage
+
#define XENHEAP_DEFAULT_MB (16)
#define PML4_ENTRY_BITS 39
-#define PML4_ENTRY_BYTES (1UL<<PML4_ENTRY_BITS)
+#ifndef __ASSEMBLY__
+#define PML4_ENTRY_BYTES (1UL << PML4_ENTRY_BITS)
+#define PML4_ADDR(_slot) \
+ ((((_slot ## UL) >> 8) * 0xffff000000000000UL) | \
+ (_slot ## UL << PML4_ENTRY_BITS))
+#else
+#define PML4_ENTRY_BYTES (1 << PML4_ENTRY_BITS)
+#define PML4_ADDR(_slot) \
+ (((_slot >> 8) * 0xffff000000000000) | (_slot << PML4_ENTRY_BITS))
+#endif
/*
* Memory layout:
@@ -116,7 +113,13 @@ extern void __out_of_line_bug(int line) __attribute__((noreturn));
* Shadow linear page table.
* 0xffff820000000000 - 0xffff827fffffffff [512GB, 2^39 bytes, PML4:260]
* Per-domain mappings (e.g., GDT, LDT).
- * 0xffff828000000000 - 0xffff8287ffffffff [512GB, 2^39 bytes, PML4:261]
+ * 0xffff828000000000 - 0xffff8283ffffffff [16GB, 2^34 bytes, PML4:261]
+ * Machine-to-phys translation table.
+ * 0xffff828400000000 - 0xffff8287ffffffff [16GB, 2^34 bytes, PML4:261]
+ * Page-frame information array.
+ * 0xffff828800000000 - 0xffff828bffffffff [16GB, 2^34 bytes, PML4:261]
+ * ioremap()/fixmap area.
+ * 0xffff828c00000000 - 0xffff82ffffffffff [464GB, PML4:261]
* Reserved for future use.
* 0xffff830000000000 - 0xffff83ffffffffff [1TB, 2^40 bytes, PML4:262-263]
* 1:1 direct mapping of all physical memory. Xen and its heap live here.
@@ -126,88 +129,154 @@ extern void __out_of_line_bug(int line) __attribute__((noreturn));
* Guest-defined use.
*/
+
+#define ROOT_PAGETABLE_FIRST_XEN_SLOT 256
+#define ROOT_PAGETABLE_LAST_XEN_SLOT 271
+#define ROOT_PAGETABLE_XEN_SLOTS \
+ (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
+
/* Hypervisor reserves PML4 slots 256 to 271 inclusive. */
-#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
-#define HYPERVISOR_VIRT_END (0xFFFF880000000000UL)
+#define HYPERVISOR_VIRT_START (PML4_ADDR(256))
+#define HYPERVISOR_VIRT_END (HYPERVISOR_VIRT_START + PML4_ENTRY_BYTES*16)
/* Slot 256: read-only guest-accessible machine-to-phys translation table. */
-#define RO_MPT_VIRT_START (HYPERVISOR_VIRT_START)
+#define RO_MPT_VIRT_START (PML4_ADDR(256))
#define RO_MPT_VIRT_END (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
/* Slot 257: read-only guest-accessible linear page table. */
-#define RO_LINEAR_PT_VIRT_START (RO_MPT_VIRT_END + PML4_ENTRY_BYTES/2)
+#define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257))
#define RO_LINEAR_PT_VIRT_END (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 258: linear page table (guest table). */
-#define LINEAR_PT_VIRT_START (RO_LINEAR_PT_VIRT_END)
+#define LINEAR_PT_VIRT_START (PML4_ADDR(258))
#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 259: linear page table (shadow table). */
-#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END)
+#define SH_LINEAR_PT_VIRT_START (PML4_ADDR(259))
#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
/* Slot 260: per-domain mappings. */
-#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END)
+#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + PML4_ENTRY_BYTES)
+/* Slot 261: machine-to-phys conversion table (16GB). */
+#define RDWR_MPT_VIRT_START (PML4_ADDR(261))
+#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (16UL<<30))
+/* Slot 261: page-frame information array (16GB). */
+#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
+#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (16UL<<30))
+/* Slot 261: ioremap()/fixmap area (16GB). */
+#define IOREMAP_VIRT_START (FRAMETABLE_VIRT_END)
+#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (16UL<<30))
/* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
-#define DIRECTMAP_VIRT_START (PERDOMAIN_VIRT_END + PML4_ENTRY_BYTES)
+#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
#define PGT_base_page_table PGT_l4_page_table
-#define __HYPERVISOR_CS64 0x0810
-#define __HYPERVISOR_CS32 0x0808
-#define __HYPERVISOR_DS 0x0818
+#define __HYPERVISOR_CS64 0xe010
+#define __HYPERVISOR_CS32 0xe008
+#define __HYPERVISOR_CS __HYPERVISOR_CS64
+#define __HYPERVISOR_DS64 0x0000
+#define __HYPERVISOR_DS32 0xe018
+#define __HYPERVISOR_DS __HYPERVISOR_DS64
+
+#define __GUEST_CS64 0xe033
+#define __GUEST_CS32 0xe023
+#define __GUEST_CS __GUEST_CS64
+#define __GUEST_DS 0x0000
+#define __GUEST_SS 0xe02b
/* For generic assembly code: use macros to define operation/operand sizes. */
-#define __OS "q" /* Operation Suffix */
-#define __OP "r" /* Operand Prefix */
+#define __OS "q" /* Operation Suffix */
+#define __OP "r" /* Operand Prefix */
+#define __FIXUP_ALIGN ".align 8"
+#define __FIXUP_WORD ".quad"
#elif defined(__i386__)
-#define XENHEAP_DEFAULT_MB (12)
-#define DIRECTMAP_PHYS_END (12*1024*1024)
+#define CONFIG_X86_32 1
+#define CONFIG_DOMAIN_PAGE 1
-/* Hypervisor owns top 64MB of virtual address space. */
-#define __HYPERVISOR_VIRT_START 0xFC000000
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#define asmlinkage __attribute__((regparm(0)))
/*
- * First 4MB are mapped read-only for all. It's for the machine->physical
- * mapping table (MPT table). The following are virtual addresses.
+ * Memory layout (high to low): SIZE PAE-SIZE
+ * ------ ------
+ * I/O remapping area ( 4MB)
+ * Direct-map (1:1) area [Xen code/data/heap] (12MB)
+ * map_domain_page cache ( 4MB)
+ * Per-domain mappings ( 4MB)
+ * Shadow linear pagetable ( 4MB) ( 8MB)
+ * Guest linear pagetable ( 4MB) ( 8MB)
+ * Machine-to-physical translation table [writable] ( 4MB) (16MB)
+ * Frame-info table (24MB) (96MB)
+ * * Start of guest inaccessible area
+ * Machine-to-physical translation table [read-only] ( 4MB)
+ * * Start of guest unmodifiable area
*/
-#define RO_MPT_VIRT_START (HYPERVISOR_VIRT_START)
-#define RO_MPT_VIRT_END (RO_MPT_VIRT_START + (4*1024*1024))
-/* Xen heap extends to end of 1:1 direct-mapped memory region. */
-#define DIRECTMAP_VIRT_START (RO_MPT_VIRT_END)
-#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + DIRECTMAP_PHYS_END)
-#define XENHEAP_VIRT_START (DIRECTMAP_VIRT_START)
-#define XENHEAP_VIRT_END (DIRECTMAP_VIRT_END)
-/* Machine-to-phys conversion table. */
-#define RDWR_MPT_VIRT_START (XENHEAP_VIRT_END)
-#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + (4*1024*1024))
-/* Variable-length page-frame information array. */
-#define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
-#define FRAMETABLE_VIRT_END (FRAMETABLE_VIRT_START + (24*1024*1024))
-/* Next 4MB of virtual address space is used as a linear p.t. mapping. */
-#define LINEAR_PT_VIRT_START (FRAMETABLE_VIRT_END)
-#define LINEAR_PT_VIRT_END (LINEAR_PT_VIRT_START + (4*1024*1024))
-/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */
-#define SH_LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END)
-#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + (4*1024*1024))
-/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
-#define PERDOMAIN_VIRT_START (SH_LINEAR_PT_VIRT_END)
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (4*1024*1024))
-/* Penultimate 4MB of virtual address space used for domain page mappings. */
-#define MAPCACHE_VIRT_START (PERDOMAIN_VIRT_END)
-#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + (4*1024*1024))
-/* Final 4MB of virtual address space used for ioremap(). */
-#define IOREMAP_VIRT_START (MAPCACHE_VIRT_END)
-#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + (4*1024*1024))
+
+#define IOREMAP_MBYTES 4
+#define DIRECTMAP_MBYTES 12
+#define MAPCACHE_MBYTES 4
+#define PERDOMAIN_MBYTES 4
+
+#ifdef CONFIG_X86_PAE
+# define LINEARPT_MBYTES 8
+# define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
+# define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
+#else
+# define LINEARPT_MBYTES 4
+# define MACHPHYS_MBYTES 4
+# define FRAMETABLE_MBYTES 24
+#endif
+
+#define IOREMAP_VIRT_END 0UL
+#define IOREMAP_VIRT_START (IOREMAP_VIRT_END - (IOREMAP_MBYTES<<20))
+#define DIRECTMAP_VIRT_END IOREMAP_VIRT_START
+#define DIRECTMAP_VIRT_START (DIRECTMAP_VIRT_END - (DIRECTMAP_MBYTES<<20))
+#define MAPCACHE_VIRT_END DIRECTMAP_VIRT_START
+#define MAPCACHE_VIRT_START (MAPCACHE_VIRT_END - (MAPCACHE_MBYTES<<20))
+#define PERDOMAIN_VIRT_END MAPCACHE_VIRT_START
+#define PERDOMAIN_VIRT_START (PERDOMAIN_VIRT_END - (PERDOMAIN_MBYTES<<20))
+#define SH_LINEAR_PT_VIRT_END PERDOMAIN_VIRT_START
+#define SH_LINEAR_PT_VIRT_START (SH_LINEAR_PT_VIRT_END - (LINEARPT_MBYTES<<20))
+#define LINEAR_PT_VIRT_END SH_LINEAR_PT_VIRT_START
+#define LINEAR_PT_VIRT_START (LINEAR_PT_VIRT_END - (LINEARPT_MBYTES<<20))
+#define RDWR_MPT_VIRT_END LINEAR_PT_VIRT_START
+#define RDWR_MPT_VIRT_START (RDWR_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
+#define FRAMETABLE_VIRT_END RDWR_MPT_VIRT_START
+#define FRAMETABLE_VIRT_START (FRAMETABLE_VIRT_END - (FRAMETABLE_MBYTES<<20))
+#define RO_MPT_VIRT_END FRAMETABLE_VIRT_START
+#define RO_MPT_VIRT_START (RO_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
+
+#define XENHEAP_DEFAULT_MB (DIRECTMAP_MBYTES)
+#define DIRECTMAP_PHYS_END (DIRECTMAP_MBYTES<<20)
+
+/* Maximum linear address accessible via guest memory segments. */
+#define GUEST_SEGMENT_MAX_ADDR RO_MPT_VIRT_END
+
+#ifdef CONFIG_X86_PAE
+/* Hypervisor owns top 168MB of virtual address space. */
+# define __HYPERVISOR_VIRT_START 0xF5800000
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#else
+/* Hypervisor owns top 64MB of virtual address space. */
+# define __HYPERVISOR_VIRT_START 0xFC000000
+# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#endif
+
+#define L2_PAGETABLE_FIRST_XEN_SLOT \
+ (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
+#define L2_PAGETABLE_LAST_XEN_SLOT \
+ (~0UL >> L2_PAGETABLE_SHIFT)
+#define L2_PAGETABLE_XEN_SLOTS \
+ (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1)
#define PGT_base_page_table PGT_l2_page_table
-#define __HYPERVISOR_CS 0x0808
-#define __HYPERVISOR_DS 0x0810
+#define __HYPERVISOR_CS 0xe008
+#define __HYPERVISOR_DS 0xe010
/* For generic assembly code: use macros to define operation/operand sizes. */
-#define __OS "l" /* Operation Suffix */
-#define __OP "e" /* Operand Prefix */
+#define __OS "l" /* Operation Suffix */
+#define __OP "e" /* Operand Prefix */
+#define __FIXUP_ALIGN ".align 4"
+#define __FIXUP_WORD ".long"
#endif /* __i386__ */
@@ -215,10 +284,13 @@ extern void __out_of_line_bug(int line) __attribute__((noreturn));
extern unsigned long xenheap_phys_end; /* user-configurable */
#endif
-#define GDT_VIRT_START (PERDOMAIN_VIRT_START)
-#define GDT_VIRT_END (GDT_VIRT_START + (64*1024))
-#define LDT_VIRT_START (GDT_VIRT_END)
-#define LDT_VIRT_END (LDT_VIRT_START + (64*1024))
+#define GDT_VIRT_START(ed) \
+ (PERDOMAIN_VIRT_START + ((ed)->vcpu_id << PDPT_VCPU_VA_SHIFT))
+#define LDT_VIRT_START(ed) \
+ (GDT_VIRT_START(ed) + (64*1024))
+
+#define PDPT_VCPU_SHIFT 5
+#define PDPT_VCPU_VA_SHIFT (PDPT_VCPU_SHIFT + PAGE_SHIFT)
#if defined(__x86_64__)
#define ELFSIZE 64
@@ -226,4 +298,4 @@ extern unsigned long xenheap_phys_end; /* user-configurable */
#define ELFSIZE 32
#endif
-#endif /* __XEN_I386_CONFIG_H__ */
+#endif /* __X86_CONFIG_H__ */
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 8b2e913bff..e695162de4 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -4,13 +4,12 @@
* Defines x86 CPU feature bits
*/
-#ifndef __ASM_X86_CPUFEATURE_H
-#define __ASM_X86_CPUFEATURE_H
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
-/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
-#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+#include <xen/bitops.h>
-#define NCAPINTS 6 /* Currently we have 6 32-bit words worth of info */
+#define NCAPINTS 7 /* N 32-bit words worth of info */
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
@@ -48,6 +47,7 @@
/* Don't duplicate feature flags which are redundant with Intel! */
#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
@@ -71,11 +71,25 @@
#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
+#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */
+#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_VMXE (4*32+ 5) /* Virtual Machine Extensions */
#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_CID (4*32+10) /* Context ID */
+#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
+#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */
+#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */
+#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM (5*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY (5*32+ 1) /* If yes HyperThreading not valid */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
@@ -87,18 +101,30 @@
#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_sse2 boot_cpu_has(X86_FEATURE_XMM2)
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
+#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
+#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
+#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
#define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
+#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN)
+#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT)
+#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#endif /* __ASM_X86_CPUFEATURE_H */
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/*
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index 1c27b9d7c8..7916b57ae9 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -1,6 +1,56 @@
+/******************************************************************************
+ * current.h
+ *
+ * Information structure that lives at the bottom of the per-cpu Xen stack.
+ */
-#ifdef __x86_64__
-#include <asm/x86_64/current.h>
-#else
-#include <asm/x86_32/current.h>
-#endif
+#ifndef __X86_CURRENT_H__
+#define __X86_CURRENT_H__
+
+#include <xen/config.h>
+#include <public/xen.h>
+#include <asm/page.h>
+
+struct vcpu;
+
+struct cpu_info {
+ struct cpu_user_regs guest_cpu_user_regs;
+ unsigned int processor_id;
+ struct vcpu *current_ed;
+};
+
+static inline struct cpu_info *get_cpu_info(void)
+{
+ struct cpu_info *cpu_info;
+ __asm__ ( "and %%"__OP"sp,%0; or %2,%0"
+ : "=r" (cpu_info)
+ : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-sizeof(struct cpu_info))
+ );
+ return cpu_info;
+}
+
+#define get_current() (get_cpu_info()->current_ed)
+#define set_current(_ed) (get_cpu_info()->current_ed = (_ed))
+#define current (get_current())
+
+#define get_processor_id() (get_cpu_info()->processor_id)
+#define set_processor_id(_id) (get_cpu_info()->processor_id = (_id))
+
+#define guest_cpu_user_regs() (&get_cpu_info()->guest_cpu_user_regs)
+
+/*
+ * Get the bottom-of-stack, as stored in the per-CPU TSS. This actually points
+ * into the middle of cpu_info.guest_cpu_user_regs, at the section that
+ * precisely corresponds to a CPU trap frame.
+ */
+#define get_stack_bottom() \
+ ((unsigned long)&get_cpu_info()->guest_cpu_user_regs.es)
+
+#define reset_stack_and_jump(__fn) \
+ __asm__ __volatile__ ( \
+ "mov %0,%%"__OP"sp; jmp "STR(__fn) \
+ : : "r" (guest_cpu_user_regs()) )
+
+#define schedule_tail(_ed) (((_ed)->arch.schedule_tail)(_ed))
+
+#endif /* __X86_CURRENT_H__ */
diff --git a/xen/include/asm-x86/debugger.h b/xen/include/asm-x86/debugger.h
index d44b2d5e62..aa2050f849 100644
--- a/xen/include/asm-x86/debugger.h
+++ b/xen/include/asm-x86/debugger.h
@@ -17,6 +17,14 @@
* hook to drop into a debug session. It can also be used to hook off
* deliberately caused traps (which you then handle and return non-zero)
* but really these should be hooked off 'debugger_trap_entry'.
+ *
+ * 3. debugger_trap_immediate():
+ * Called if we want to drop into a debugger now. This is essentially the
+ * same as debugger_trap_fatal, except that we use the current register state
+ * rather than the state which was in effect when we took the trap.
+ * Essentially, if we're dying because of an unhandled exception, we call
+ * debugger_trap_fatal; if we're dying because of a panic() we call
+ * debugger_trap_immediate().
*/
#ifndef __X86_DEBUGGER_H__
@@ -30,89 +38,72 @@
#define DEBUGGER_trap_fatal(_v, _r) \
if ( debugger_trap_fatal(_v, _r) ) return EXCRET_fault_fixed;
-#ifdef XEN_DEBUGGER
+#if defined(CRASH_DEBUG)
-#include <asm/pdb.h>
+extern int __trap_to_cdb(struct cpu_user_regs *r);
+#define debugger_trap_entry(_v, _r) (0)
-static inline int debugger_trap_entry(
- unsigned int vector, struct xen_regs *regs)
+static inline int debugger_trap_fatal(
+ unsigned int vector, struct cpu_user_regs *regs)
{
- int ret = 0;
+ (void)__trap_to_cdb(regs);
+ return (vector == TRAP_int3); /* int3 is harmless */
+}
- switch ( vector )
- {
- case TRAP_debug:
- if ( pdb_initialized )
- {
- pdb_handle_debug_trap(regs, regs->error_code);
- ret = 1; /* early exit */
- }
- break;
+/* Int3 is a trivial way to gather cpu_user_regs context. */
+#define debugger_trap_immediate() __asm__ __volatile__ ( "int3" );
- case TRAP_int3:
- if ( pdb_initialized && (pdb_handle_exception(vector, regs) == 0) )
- ret = 1; /* early exit */
- break;
-
- case TRAP_gp_fault:
- if ( (VM86_MODE(regs) || !RING_0(regs)) &&
- ((regs->error_code & 3) == 2) &&
- pdb_initialized && (pdb_ctx.system_call != 0) )
- {
- unsigned long cr3 = read_cr3();
- if ( cr3 == pdb_ctx.ptbr )
- pdb_linux_syscall_enter_bkpt(
- regs, regs->error_code,
- current->thread.traps + (regs->error_code>>3));
- }
- break;
- }
+#elif defined(DOMU_DEBUG)
- return ret;
-}
+#include <xen/sched.h>
+#include <asm/regs.h>
-static inline int debugger_trap_fatal(
- unsigned int vector, struct xen_regs *regs)
+static inline int debugger_trap_entry(
+ unsigned int vector, struct cpu_user_regs *regs)
{
- int ret = 0;
+ struct vcpu *v = current;
+ if ( !KERNEL_MODE(v, regs) || (v->domain->domain_id == 0) )
+ return 0;
+
switch ( vector )
{
- case TRAP_page_fault:
- if ( pdb_page_fault_possible )
- {
- pdb_page_fault = 1;
- /* make eax & edx valid to complete the instruction */
- regs->eax = (long)&pdb_page_fault_scratch;
- regs->edx = (long)&pdb_page_fault_scratch;
- ret = 1; /* exit - do not crash! */
- }
- break;
+ case TRAP_int3:
+ case TRAP_debug:
+ domain_pause_for_debugger();
+ return 1;
}
- return ret;
+ return 0;
}
+#define debugger_trap_fatal(_v, _r) (0)
+#define debugger_trap_immediate()
+
#elif 0
-extern int kdb_trap(int, int, struct xen_regs *);
+extern int kdb_trap(int, int, struct cpu_user_regs *);
static inline int debugger_trap_entry(
- unsigned int vector, struct xen_regs *regs)
+ unsigned int vector, struct cpu_user_regs *regs)
{
return 0;
}
static inline int debugger_trap_fatal(
- unsigned int vector, struct xen_regs *regs)
+ unsigned int vector, struct cpu_user_regs *regs)
{
return kdb_trap(vector, 0, regs);
}
+/* Int3 is a trivial way to gather cpu_user_regs context. */
+#define debugger_trap_immediate() __asm__ __volatile__ ( "int3" );
+
#else
#define debugger_trap_entry(_v, _r) (0)
#define debugger_trap_fatal(_v, _r) (0)
+#define debugger_trap_immediate()
#endif
diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h
index 0d6fc65b32..6d45d0cb2b 100644
--- a/xen/include/asm-x86/desc.h
+++ b/xen/include/asm-x86/desc.h
@@ -1,31 +1,41 @@
#ifndef __ARCH_DESC_H
#define __ARCH_DESC_H
-#define LDT_ENTRY_SIZE 8
+/*
+ * Xen reserves a memory page of GDT entries.
+ * No guest GDT entries exist beyond the Xen reserved area.
+ */
+#define NR_RESERVED_GDT_PAGES 1
+#define NR_RESERVED_GDT_BYTES (NR_RESERVED_GDT_PAGES * PAGE_SIZE)
+#define NR_RESERVED_GDT_ENTRIES (NR_RESERVED_GDT_BYTES / 8)
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+#define LAST_RESERVED_GDT_PAGE \
+ (FIRST_RESERVED_GDT_PAGE + NR_RESERVED_GDT_PAGES - 1)
+#define LAST_RESERVED_GDT_BYTE \
+ (FIRST_RESERVED_GDT_BYTE + NR_RESERVED_GDT_BYTES - 1)
+#define LAST_RESERVED_GDT_ENTRY \
+ (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1)
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+#define LDT_ENTRY_SIZE 8
#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
+#if defined(__x86_64__)
+#define GUEST_KERNEL_RPL 3
+#elif defined(__i386__)
+#define GUEST_KERNEL_RPL 1
+#endif
+
/*
- * Guest OS must provide its own code selectors, or use the one we provide. The
- * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
- * value is okay. Note that checking only the RPL is insufficient: if the
- * selector is poked into an interrupt, trap or call gate then the RPL is
- * ignored when the gate is accessed.
+ * Guest OS must provide its own code selectors, or use the one we provide. Any
+ * LDT selector value is okay. Note that checking only the RPL is insufficient:
+ * if the selector is poked into an interrupt, trap or call gate then the RPL
+ * is ignored when the gate is accessed.
*/
#define VALID_SEL(_s) \
- (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
- (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
- ((_s)&4)) && \
- (((_s)&3) == 1))
-#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s))
+ (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || ((_s)&4)) && \
+ (((_s)&3) == GUEST_KERNEL_RPL))
+#define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
/* These are bitmasks for the high 32 bits of a descriptor table entry. */
#define _SEGMENT_TYPE (15<< 8)
@@ -39,24 +49,103 @@
#define _SEGMENT_G ( 1<<23) /* Granularity */
#ifndef __ASSEMBLY__
+
struct desc_struct {
- unsigned long a,b;
+ u32 a, b;
};
+#if defined(__x86_64__)
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
+
+#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+
+typedef struct {
+ u64 a, b;
+} idt_entry_t;
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+ (gate_addr)->a = \
+ (((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
+ ((unsigned long)(dpl) << 45) | \
+ ((unsigned long)(type) << 40) | \
+ ((unsigned long)(addr) & 0xFFFFUL) | \
+ ((unsigned long)__HYPERVISOR_CS64 << 16) | \
+ (1UL << 47); \
+ (gate_addr)->b = \
+ ((unsigned long)(addr) >> 32); \
+} while (0)
+
+#define _set_tssldt_desc(desc,addr,limit,type) \
+do { \
+ (desc)[0].a = \
+ ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF); \
+ (desc)[0].b = \
+ ((u32)(addr) & 0xFF000000U) | \
+ ((u32)(type) << 8) | 0x8000U | \
+ (((u32)(addr) & 0x00FF0000U) >> 16); \
+ (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32); \
+ (desc)[1].b = 0; \
+} while (0)
+
+#elif defined(__i386__)
+
+#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
+
+#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+typedef struct desc_struct idt_entry_t;
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+ "movw %4,%%dx\n\t" \
+ "movl %%eax,%0\n\t" \
+ "movl %%edx,%1" \
+ :"=m" (*((long *) (gate_addr))), \
+ "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+ :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+ "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+} while (0)
+
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+ "movw %%ax,2(%2)\n\t" \
+ "rorl $16,%%eax\n\t" \
+ "movb %%al,4(%2)\n\t" \
+ "movb %4,5(%2)\n\t" \
+ "movb $0,6(%2)\n\t" \
+ "movb %%ah,7(%2)\n\t" \
+ "rorl $16,%%eax" \
+ : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
+
+#endif
+
extern struct desc_struct gdt_table[];
-extern struct desc_struct *idt, *gdt;
+extern struct desc_struct *gdt;
+extern idt_entry_t *idt;
struct Xgt_desc_struct {
- unsigned short size;
- unsigned long address __attribute__((packed));
+ unsigned short size;
+ unsigned long address __attribute__((packed));
};
#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_system_gate(unsigned int n, void *addr);
+extern void set_task_gate(unsigned int n, unsigned int sel);
extern void set_tss_desc(unsigned int n, void *addr);
#endif /* !__ASSEMBLY__ */
-#endif
+#endif /* __ARCH_DESC_H */
diff --git a/xen/include/asm-x86/div64.h b/xen/include/asm-x86/div64.h
index ef915df700..b43f1ab486 100644
--- a/xen/include/asm-x86/div64.h
+++ b/xen/include/asm-x86/div64.h
@@ -1,17 +1,46 @@
#ifndef __I386_DIV64
#define __I386_DIV64
+#include <xen/types.h>
+
+#if BITS_PER_LONG == 64
+
+# define do_div(n,base) ({ \
+ uint32_t __base = (base); \
+ uint32_t __rem; \
+ __rem = ((uint64_t)(n)) % __base; \
+ (n) = ((uint64_t)(n)) / __base; \
+ __rem; \
+ })
+
+#else
+
+/*
+ * do_div() is NOT a C function. It wants to return
+ * two values (the quotient and the remainder), but
+ * since that doesn't work very well in C, what it
+ * does is:
+ *
+ * - modifies the 64-bit dividend _in_place_
+ * - returns the 32-bit remainder
+ *
+ * This ends up being the most efficient "calling
+ * convention" on x86.
+ */
#define do_div(n,base) ({ \
- unsigned long __upper, __low, __high, __mod; \
+ unsigned long __upper, __low, __high, __mod, __base; \
+ __base = (base); \
asm("":"=a" (__low), "=d" (__high):"A" (n)); \
__upper = __high; \
if (__high) { \
- __upper = __high % (base); \
- __high = __high / (base); \
+ __upper = __high % (__base); \
+ __high = __high / (__base); \
} \
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
+ asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
asm("":"=A" (n):"a" (__low),"d" (__high)); \
__mod; \
})
#endif
+
+#endif
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
new file mode 100644
index 0000000000..397b65d031
--- /dev/null
+++ b/xen/include/asm-x86/domain.h
@@ -0,0 +1,132 @@
+
+#ifndef __ASM_DOMAIN_H__
+#define __ASM_DOMAIN_H__
+
+#include <xen/config.h>
+#include <xen/mm.h>
+#include <asm/vmx_vmcs.h>
+
+struct trap_bounce {
+ unsigned long error_code;
+ unsigned long cr2;
+ unsigned short flags; /* TBF_ */
+ unsigned short cs;
+ unsigned long eip;
+};
+
+struct arch_domain
+{
+ l1_pgentry_t *mm_perdomain_pt;
+#ifdef CONFIG_X86_64
+ l2_pgentry_t *mm_perdomain_l2;
+ l3_pgentry_t *mm_perdomain_l3;
+#endif
+
+ /* Writable pagetables. */
+ struct ptwr_info ptwr[2];
+
+ /* I/O-port access bitmap mask. */
+ u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */
+
+ /* Shadow mode status and controls. */
+ unsigned int shadow_mode; /* flags to control shadow table operation */
+ unsigned int shadow_nest; /* Recursive depth of shadow_lock() nesting */
+ /* Shadow mode has tainted page reference counts? */
+ unsigned int shadow_tainted_refcnts;
+
+ /* shadow hashtable */
+ struct shadow_status *shadow_ht;
+ struct shadow_status *shadow_ht_free;
+ struct shadow_status *shadow_ht_extras; /* extra allocation units */
+ unsigned int shadow_extras_count;
+
+ /* shadow dirty bitmap */
+ unsigned long *shadow_dirty_bitmap;
+ unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
+
+ /* shadow mode stats */
+ unsigned int shadow_page_count;
+ unsigned int hl2_page_count;
+ unsigned int snapshot_page_count;
+
+ unsigned int shadow_fault_count;
+ unsigned int shadow_dirty_count;
+ unsigned int shadow_dirty_net_count;
+ unsigned int shadow_dirty_block_count;
+
+ /* full shadow mode */
+ struct out_of_sync_entry *out_of_sync; /* list of out-of-sync pages */
+ struct out_of_sync_entry *out_of_sync_free;
+ struct out_of_sync_entry *out_of_sync_extras;
+ unsigned int out_of_sync_extras_count;
+
+ struct list_head free_shadow_frames;
+
+ pagetable_t phys_table; /* guest 1:1 pagetable */
+
+} __cacheline_aligned;
+
+struct arch_vcpu
+{
+ struct vcpu_guest_context guest_context;
+
+ unsigned long flags; /* TF_ */
+
+ void (*schedule_tail) (struct vcpu *);
+
+ /* Bounce information for propagating an exception to guest OS. */
+ struct trap_bounce trap_bounce;
+
+ /* I/O-port access bitmap. */
+ u8 *iobmp; /* Guest kernel virtual address of the bitmap. */
+ int iobmp_limit; /* Number of ports represented in the bitmap. */
+ int iopl; /* Current IOPL for this VCPU. */
+
+#ifdef CONFIG_X86_32
+ struct desc_struct int80_desc;
+#endif
+
+ /* Virtual Machine Extensions */
+ struct arch_vmx_struct arch_vmx;
+
+ /*
+ * Every domain has a L1 pagetable of its own. Per-domain mappings
+ * are put in this table (eg. the current GDT is mapped here).
+ */
+ l1_pgentry_t *perdomain_ptes;
+
+ pagetable_t guest_table_user; /* x86/64: user-space pagetable. */
+ pagetable_t guest_table; /* (MA) guest notion of cr3 */
+ pagetable_t shadow_table; /* (MA) shadow of guest */
+ pagetable_t monitor_table; /* (MA) used in hypervisor */
+
+ l2_pgentry_t *guest_vtable; /* virtual address of pagetable */
+ l2_pgentry_t *shadow_vtable; /* virtual address of shadow_table */
+ l2_pgentry_t *monitor_vtable; /* virtual address of monitor_table */
+ l1_pgentry_t *hl2_vtable; /* virtual address of hl2_table */
+
+#ifdef CONFIG_X86_64
+ l3_pgentry_t *guest_vl3table;
+ l4_pgentry_t *guest_vl4table;
+#endif
+
+ unsigned long monitor_shadow_ref;
+
+ /* Virtual CR2 value. Can be read/written by guest. */
+ unsigned long guest_cr2;
+
+ /* Current LDT details. */
+ unsigned long shadow_ldt_mapcnt;
+} __cacheline_aligned;
+
+#endif /* __ASM_DOMAIN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/domain_page.h b/xen/include/asm-x86/domain_page.h
deleted file mode 100644
index d8cdf0b74e..0000000000
--- a/xen/include/asm-x86/domain_page.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/******************************************************************************
- * domain_page.h
- *
- * Allow temporary mapping of domain page frames into Xen space.
- */
-
-#ifndef __ASM_DOMAIN_PAGE_H__
-#define __ASM_DOMAIN_PAGE_H__
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-extern unsigned long *mapcache;
-#define MAPCACHE_ENTRIES 1024
-
-/*
- * Maps a given physical address, returning corresponding virtual address.
- * The entire page containing that VA is now accessible until a
- * corresponding call to unmap_domain_mem().
- */
-extern void *map_domain_mem(unsigned long pa);
-
-/*
- * Pass a VA within a page previously mapped with map_domain_mem().
- * That page will then be removed from the mapping lists.
- */
-extern void unmap_domain_mem(void *va);
-
-#endif /* __ASM_DOMAIN_PAGE_H__ */
diff --git a/xen/include/asm-x86/e820.h b/xen/include/asm-x86/e820.h
index 52d342b523..f510c7c12b 100644
--- a/xen/include/asm-x86/e820.h
+++ b/xen/include/asm-x86/e820.h
@@ -5,10 +5,12 @@
#define E820MAX 32
-#define E820_RAM 1
-#define E820_RESERVED 2
-#define E820_ACPI 3
-#define E820_NVS 4
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3
+#define E820_NVS 4
+#define E820_IO 16
+#define E820_SHARED_PAGE 17
#ifndef __ASSEMBLY__
struct e820entry {
@@ -22,9 +24,13 @@ struct e820map {
struct e820entry map[E820MAX];
};
-extern unsigned long init_e820(struct e820entry *, int);
+extern unsigned long init_e820(struct e820entry *, int *);
extern struct e820map e820;
+#ifndef NDEBUG
+extern void print_e820_memory_map(struct e820entry *map, int entries);
+#endif
+
#endif /*!__ASSEMBLY__*/
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h
index 211d4cf895..ca01c687fe 100644
--- a/xen/include/asm-x86/fixmap.h
+++ b/xen/include/asm-x86/fixmap.h
@@ -13,8 +13,8 @@
#define _ASM_FIXMAP_H
#include <xen/config.h>
-#include <asm/acpi.h>
#include <asm/apicdef.h>
+#include <asm/acpi.h>
#include <asm/page.h>
/*
@@ -25,33 +25,36 @@
* from the end of virtual memory backwards.
*/
enum fixed_addresses {
-#ifdef CONFIG_X86_LOCAL_APIC
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
-#endif
-#ifdef CONFIG_X86_IO_APIC
+ FIX_APIC_BASE,
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-#endif
-#ifdef CONFIG_ACPI_BOOT
FIX_ACPI_BEGIN,
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
-#endif
__end_of_fixed_addresses
};
-#define FIXADDR_TOP (0xffffe000UL)
+#define FIXADDR_TOP (IOREMAP_VIRT_END - PAGE_SIZE)
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-extern void __set_fixmap(enum fixed_addresses idx,
- l1_pgentry_t entry);
+extern void __set_fixmap(
+ enum fixed_addresses idx, unsigned long p, unsigned long flags);
#define set_fixmap(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
+ __set_fixmap(idx, phys, PAGE_HYPERVISOR)
#define set_fixmap_nocache(idx, phys) \
- __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
+ __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+#define fix_to_virt(x) (__fix_to_virt(x))
-#define fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+ return __virt_to_fix(vaddr);
+}
#endif
diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h
index cebb78ebb5..9db2de7443 100644
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -43,6 +43,18 @@ static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
(lastuse_stamp <= curr_time)));
}
+/*
+ * Filter the given set of CPUs, removing those that definitely flushed their
+ * TLB since @page_timestamp.
+ */
+#define tlbflush_filter(mask, page_timestamp) \
+do { \
+ unsigned int cpu; \
+ for_each_cpu_mask ( cpu, mask ) \
+ if ( !NEED_FLUSH(tlbflush_time[cpu], page_timestamp) ) \
+ cpu_clear(cpu, mask); \
+} while ( 0 )
+
extern void new_tlbflush_clock_period(void);
/* Read pagetable base. */
@@ -50,53 +62,42 @@ static inline unsigned long read_cr3(void)
{
unsigned long cr3;
__asm__ __volatile__ (
- "mov"__OS" %%cr3, %0" : "=r" (cr3) : );
+ "mov %%cr3, %0" : "=r" (cr3) : );
return cr3;
}
/* Write pagetable base and implicitly tick the tlbflush clock. */
extern void write_cr3(unsigned long cr3);
-/*
- * TLB flushing:
- *
- * - flush_tlb() flushes the current mm struct TLBs
- * - flush_tlb_all() flushes all processes TLBs
- * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
- *
- * ..but the i386 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
- */
-
-#define __flush_tlb() \
+#define local_flush_tlb() \
do { \
unsigned long cr3 = read_cr3(); \
write_cr3(cr3); \
} while ( 0 )
-#ifndef CONFIG_SMP
+#define local_flush_tlb_pge() \
+ do { \
+ __pge_off(); \
+ local_flush_tlb(); \
+ __pge_on(); \
+ } while ( 0 )
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb()
-#define flush_tlb_all_pge() __flush_tlb_pge()
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) __flush_tlb()
-#define flush_tlb_mask(_mask) __flush_tlb()
-#define try_flush_tlb_mask(_mask) __flush_tlb()
+#define local_flush_tlb_one(__addr) \
+ __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
-#else
+#define flush_tlb_all() flush_tlb_mask(cpu_online_map)
+#ifndef CONFIG_SMP
+#define flush_tlb_all_pge() local_flush_tlb_pge()
+#define flush_tlb_mask(mask) local_flush_tlb()
+#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v)
+#else
#include <xen/smp.h>
-
-extern int try_flush_tlb_mask(unsigned long mask);
-extern void flush_tlb_mask(unsigned long mask);
+#define FLUSHVA_ALL (~0UL)
extern void flush_tlb_all_pge(void);
-
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() flush_tlb_mask((1 << smp_num_cpus) - 1)
-#define local_flush_tlb() __flush_tlb()
-#define flush_tlb_cpu(_cpu) flush_tlb_mask(1 << (_cpu))
-
+extern void __flush_tlb_mask(cpumask_t mask, unsigned long va);
+#define flush_tlb_mask(mask) __flush_tlb_mask(mask,FLUSHVA_ALL)
+#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,v)
#endif
#endif /* __FLUSHTLB_H__ */
diff --git a/xen/include/asm-x86/genapic.h b/xen/include/asm-x86/genapic.h
new file mode 100644
index 0000000000..fc813b2e82
--- /dev/null
+++ b/xen/include/asm-x86/genapic.h
@@ -0,0 +1,115 @@
+#ifndef _ASM_GENAPIC_H
+#define _ASM_GENAPIC_H 1
+
+/*
+ * Generic APIC driver interface.
+ *
+ * An straight forward mapping of the APIC related parts of the
+ * x86 subarchitecture interface to a dynamic object.
+ *
+ * This is used by the "generic" x86 subarchitecture.
+ *
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ */
+
+struct mpc_config_translation;
+struct mpc_config_bus;
+struct mp_config_table;
+struct mpc_config_processor;
+
+struct genapic {
+ char *name;
+ int (*probe)(void);
+
+ int (*apic_id_registered)(void);
+ cpumask_t (*target_cpus)(void);
+ int int_delivery_mode;
+ int int_dest_mode;
+ int ESR_DISABLE;
+ int apic_destination_logical;
+ unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+ unsigned long (*check_apicid_present)(int apicid);
+ int no_balance_irq;
+ int no_ioapic_check;
+ void (*init_apic_ldr)(void);
+ physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+
+ void (*clustered_apic_check)(void);
+ int (*multi_timer_check)(int apic, int irq);
+ int (*apicid_to_node)(int logical_apicid);
+ int (*cpu_to_logical_apicid)(int cpu);
+ int (*cpu_present_to_apicid)(int mps_cpu);
+ physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+ int (*mpc_apic_id)(struct mpc_config_processor *m,
+ struct mpc_config_translation *t);
+ void (*setup_portio_remap)(void);
+ int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
+ void (*enable_apic_mode)(void);
+ u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb);
+
+ /* mpparse */
+ void (*mpc_oem_bus_info)(struct mpc_config_bus *, char *,
+ struct mpc_config_translation *);
+ void (*mpc_oem_pci_bus)(struct mpc_config_bus *,
+ struct mpc_config_translation *);
+
+ /* When one of the next two hooks returns 1 the genapic
+ is switched to this. Essentially they are additional probe
+ functions. */
+ int (*mps_oem_check)(struct mp_config_table *mpc, char *oem,
+ char *productid);
+ int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+
+ unsigned (*get_apic_id)(unsigned long x);
+ unsigned long apic_id_mask;
+ unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+
+ /* ipi */
+ void (*send_IPI_mask)(cpumask_t mask, int vector);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+};
+
+#define APICFUNC(x) .x = x
+
+#define APIC_INIT(aname, aprobe) { \
+ .name = aname, \
+ .probe = aprobe, \
+ .int_delivery_mode = INT_DELIVERY_MODE, \
+ .int_dest_mode = INT_DEST_MODE, \
+ .no_balance_irq = NO_BALANCE_IRQ, \
+ .no_ioapic_check = NO_IOAPIC_CHECK, \
+ .ESR_DISABLE = esr_disable, \
+ .apic_destination_logical = APIC_DEST_LOGICAL, \
+ APICFUNC(apic_id_registered), \
+ APICFUNC(target_cpus), \
+ APICFUNC(check_apicid_used), \
+ APICFUNC(check_apicid_present), \
+ APICFUNC(init_apic_ldr), \
+ APICFUNC(ioapic_phys_id_map), \
+ APICFUNC(clustered_apic_check), \
+ APICFUNC(multi_timer_check), \
+ APICFUNC(apicid_to_node), \
+ APICFUNC(cpu_to_logical_apicid), \
+ APICFUNC(cpu_present_to_apicid), \
+ APICFUNC(apicid_to_cpu_present), \
+ APICFUNC(mpc_apic_id), \
+ APICFUNC(setup_portio_remap), \
+ APICFUNC(check_phys_apicid_present), \
+ APICFUNC(mpc_oem_bus_info), \
+ APICFUNC(mpc_oem_pci_bus), \
+ APICFUNC(mps_oem_check), \
+ APICFUNC(get_apic_id), \
+ .apic_id_mask = APIC_ID_MASK, \
+ APICFUNC(cpu_mask_to_apicid), \
+ APICFUNC(acpi_madt_oem_check), \
+ APICFUNC(send_IPI_mask), \
+ APICFUNC(send_IPI_allbutself), \
+ APICFUNC(send_IPI_all), \
+ APICFUNC(enable_apic_mode), \
+ APICFUNC(phys_pkg_id), \
+ }
+
+extern struct genapic *genapic;
+
+#endif
diff --git a/xen/include/asm-x86/hardirq.h b/xen/include/asm-x86/hardirq.h
index 576efd3c7c..04fa38b4be 100644
--- a/xen/include/asm-x86/hardirq.h
+++ b/xen/include/asm-x86/hardirq.h
@@ -15,7 +15,7 @@ typedef struct {
#define in_irq() (local_irq_count(smp_processor_id()) != 0)
-#define irq_enter(cpu, irq) (local_irq_count(cpu)++)
-#define irq_exit(cpu, irq) (local_irq_count(cpu)--)
+#define irq_enter(cpu) (local_irq_count(cpu)++)
+#define irq_exit(cpu) (local_irq_count(cpu)--)
#endif /* __ASM_HARDIRQ_H */
diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h
index 5221da49db..f9216605ed 100644
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -15,24 +15,29 @@
#include <asm/processor.h>
extern void init_fpu(void);
-extern void save_init_fpu( struct domain *tsk );
-extern void restore_fpu( struct domain *tsk );
+extern void save_init_fpu(struct vcpu *tsk);
+extern void restore_fpu(struct vcpu *tsk);
-#define unlazy_fpu( tsk ) do { \
- if ( test_bit(DF_USEDFPU, &tsk->flags) ) \
- save_init_fpu( tsk ); \
-} while (0)
-
-#define clear_fpu( tsk ) do { \
- if ( test_and_clear_bit(DF_USEDFPU, &tsk->flags) ) { \
- asm volatile("fwait"); \
- stts(); \
- } \
-} while (0)
+#define unlazy_fpu(_tsk) do { \
+ if ( test_bit(_VCPUF_fpu_dirtied, &(_tsk)->vcpu_flags) ) \
+ save_init_fpu(_tsk); \
+} while ( 0 )
#define load_mxcsr( val ) do { \
- unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
- asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
-} while (0)
+ unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+ __asm__ __volatile__ ( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while ( 0 )
+
+/* Make domain the FPU owner */
+static inline void setup_fpu(struct vcpu *v)
+{
+ if ( !test_and_set_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
+ {
+ if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) )
+ restore_fpu(v);
+ else
+ init_fpu();
+ }
+}
#endif /* __ASM_I386_I387_H */
diff --git a/xen/include/asm-x86/init.h b/xen/include/asm-x86/init.h
new file mode 100644
index 0000000000..8f1e764080
--- /dev/null
+++ b/xen/include/asm-x86/init.h
@@ -0,0 +1,29 @@
+#ifndef _XEN_ASM_INIT_H
+#define _XEN_ASM_INIT_H
+
+/*
+ * Mark functions and data as being only used at initialization
+ * or exit time.
+ */
+#define __init \
+ __attribute__ ((__section__ (".init.text")))
+#define __exit \
+ __attribute_used__ __attribute__ ((__section__(".text.exit")))
+#define __initdata \
+ __attribute__ ((__section__ (".init.data")))
+#define __exitdata \
+ __attribute_used__ __attribute__ ((__section__ (".data.exit")))
+#define __initsetup \
+ __attribute_used__ __attribute__ ((__section__ (".setup.init")))
+#define __init_call \
+ __attribute_used__ __attribute__ ((__section__ (".initcall.init")))
+#define __exit_call \
+ __attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
+
+/* For assembly routines
+#define __INIT .section ".text.init","ax"
+#define __FINIT .previous
+#define __INITDATA .section ".data.init","aw"
+*/
+
+#endif /* _XEN_ASM_INIT_H */
diff --git a/xen/include/asm-x86/io.h b/xen/include/asm-x86/io.h
index 2d92fc9234..2b733e4e86 100644
--- a/xen/include/asm-x86/io.h
+++ b/xen/include/asm-x86/io.h
@@ -54,6 +54,15 @@ static inline void * phys_to_virt(unsigned long address)
#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
+/* We don't need real ioremap() on Xen/x86. */
+#define ioremap(x,l) (__va(x))
+
+#define readb(x) (*(volatile char *)(x))
+#define readw(x) (*(volatile short *)(x))
+#define readl(x) (*(volatile int *)(x))
+#define writeb(d,x) (*(volatile char *)(x) = (d))
+#define writew(d,x) (*(volatile short *)(x) = (d))
+#define writel(d,x) (*(volatile int *)(x) = (d))
/*
* IO bus memory addresses are also 1:1 with the physical address
diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h
index 1c2b24085a..4cacb8b419 100644
--- a/xen/include/asm-x86/io_apic.h
+++ b/xen/include/asm-x86/io_apic.h
@@ -2,7 +2,8 @@
#define __ASM_IO_APIC_H
#include <xen/config.h>
-#include <xen/types.h>
+#include <asm/fixmap.h>
+#include <asm/types.h>
#include <asm/mpspec.h>
/*
@@ -13,42 +14,51 @@
#ifdef CONFIG_X86_IO_APIC
-#define APIC_MISMATCH_DEBUG
-
#define IO_APIC_BASE(idx) \
- ((volatile int *)(fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
/*
* The structure of the IO-APIC:
*/
-struct IO_APIC_reg_00 {
- __u32 __reserved_2 : 14,
- LTS : 1,
- delivery_type : 1,
- __reserved_1 : 8,
- ID : 4,
- __reserved_0 : 4;
-} __attribute__ ((packed));
+union IO_APIC_reg_00 {
+ u32 raw;
+ struct {
+ u32 __reserved_2 : 14,
+ LTS : 1,
+ delivery_type : 1,
+ __reserved_1 : 8,
+ ID : 8;
+ } __attribute__ ((packed)) bits;
+};
-struct IO_APIC_reg_01 {
- __u32 version : 8,
- __reserved_2 : 7,
- PRQ : 1,
- entries : 8,
- __reserved_1 : 8;
-} __attribute__ ((packed));
+union IO_APIC_reg_01 {
+ u32 raw;
+ struct {
+ u32 version : 8,
+ __reserved_2 : 7,
+ PRQ : 1,
+ entries : 8,
+ __reserved_1 : 8;
+ } __attribute__ ((packed)) bits;
+};
-struct IO_APIC_reg_02 {
- __u32 __reserved_2 : 24,
- arbitration : 4,
- __reserved_1 : 4;
-} __attribute__ ((packed));
+union IO_APIC_reg_02 {
+ u32 raw;
+ struct {
+ u32 __reserved_2 : 24,
+ arbitration : 4,
+ __reserved_1 : 4;
+ } __attribute__ ((packed)) bits;
+};
-struct IO_APIC_reg_03 {
- __u32 boot_DT : 1,
- __reserved_1 : 31;
-} __attribute__ ((packed));
+union IO_APIC_reg_03 {
+ u32 raw;
+ struct {
+ u32 boot_DT : 1,
+ __reserved_1 : 31;
+ } __attribute__ ((packed)) bits;
+};
/*
* # of IO-APICs and # of IRQ routing registers
@@ -106,7 +116,7 @@ extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
extern int mp_irq_entries;
/* MP IRQ source entries */
-extern struct mpc_config_intsrc *mp_irqs;
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* non-0 if default (table-less) MP configuration */
extern int mpc_default_type;
@@ -124,45 +134,41 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
}
/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index regiser
*/
-static inline void io_apic_sync(unsigned int apic)
+#define sis_apic_bug 0 /* This may need propagating from domain0. */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
- (void) *(IO_APIC_BASE(apic)+4);
+ if (sis_apic_bug)
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = value;
}
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
*/
-#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
#ifdef CONFIG_ACPI_BOOT
extern int io_apic_get_unique_id (int ioapic, int apic_id);
extern int io_apic_get_version (int ioapic);
extern int io_apic_get_redir_entries (int ioapic);
extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low);
-#endif
-
-extern int skip_ioapic_setup; /* 1 for "noapic" */
-
-static inline void disable_ioapic_setup(void)
-{
- skip_ioapic_setup = 1;
-}
+#endif /*CONFIG_ACPI_BOOT*/
-static inline int ioapic_setup_disabled(void)
-{
- return skip_ioapic_setup;
-}
+extern int (*ioapic_renumber_irq)(int ioapic, int irq);
-#else /* !CONFIG_X86_IO_APIC */
+#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
+#endif
-static inline void disable_ioapic_setup(void)
-{ }
-
-#endif /* !CONFIG_X86_IO_APIC */
+extern int assign_irq_vector(int irq);
#endif
diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h
index 2779282659..07f9d32ccf 100644
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -6,147 +6,37 @@
#include <xen/config.h>
#include <asm/atomic.h>
#include <asm/asm_defns.h>
+#include <irq_vectors.h>
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
+#define IO_APIC_IRQ(irq) (((irq) >= 16) || ((1<<(irq)) & io_apic_irqs))
+#define IO_APIC_VECTOR(irq) (irq_vector[irq])
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR 0x30
+#define LEGACY_VECTOR(irq) ((irq) + FIRST_EXTERNAL_VECTOR)
+#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_EXTERNAL_VECTOR)
-#define NR_IRQS (256 - FIRST_EXTERNAL_VECTOR)
+#define irq_to_vector(irq) \
+ (IO_APIC_IRQ(irq) ? IO_APIC_VECTOR(irq) : LEGACY_VECTOR(irq))
+#define vector_to_irq(vec) (vector_irq[vec])
-#define HYPERCALL_VECTOR 0x82
+extern int vector_irq[NR_VECTORS];
+extern u8 irq_vector[NR_IRQ_VECTORS];
+#define AUTO_ASSIGN -1
-/*
- * Vectors 0x30-0x3f are used for ISA interrupts.
- */
+#define platform_legacy_irq(irq) ((irq) < 16)
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- */
-#define SPURIOUS_APIC_VECTOR 0xff
-#define ERROR_APIC_VECTOR 0xfe
-#define INVALIDATE_TLB_VECTOR 0xfd
-#define EVENT_CHECK_VECTOR 0xfc
-#define CALL_FUNCTION_VECTOR 0xfb
-#define KDB_VECTOR 0xfa
+void disable_8259A_irq(unsigned int irq);
+void enable_8259A_irq(unsigned int irq);
+int i8259A_irq_pending(unsigned int irq);
+void init_8259A(int aeoi);
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
-/*
- * First APIC vector available to drivers: (vectors 0x40-0xee)
- * we start at 0x41 to spread out vectors evenly between priority
- * levels. (0x82 is the hypercall vector)
- */
-#define FIRST_DEVICE_VECTOR 0x41
-#define FIRST_SYSTEM_VECTOR 0xef
-
-extern int irq_vector[NR_IRQS];
-#define IO_APIC_VECTOR(irq) irq_vector[irq]
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
-
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern void enable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void make_8259A_irq(unsigned int irq);
-extern void init_8259A(int aeoi);
-extern void send_IPI_self(int vector);
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-extern void print_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void send_IPI(int dest, int vector);
+void setup_IO_APIC(void);
+void disable_IO_APIC(void);
+void print_IO_APIC(void);
+void setup_ioapic_dest(void);
extern unsigned long io_apic_irqs;
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
-extern char _stext, _etext;
-
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-
-#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
-#define XBUILD_SMP_INTERRUPT(x,v)\
-asmlinkage void x(void); \
-asmlinkage void call_##x(void); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "push"__OS" $"#v"<<16\n\t" \
- SAVE_ALL(a) \
- SYMBOL_NAME_STR(call_##x)":\n\t" \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
-#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
-asmlinkage void x(struct xen_regs * regs); \
-asmlinkage void call_##x(void); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "push"__OS" $"#v"<<16\n\t" \
- SAVE_ALL(a) \
- "mov %"__OP"sp,%"__OP"ax\n\t" \
- "push %"__OP"ax\n\t" \
- SYMBOL_NAME_STR(call_##x)":\n\t" \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "add $4,%"__OP"sp\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_COMMON_IRQ() \
-asmlinkage void call_do_IRQ(void); \
-__asm__( \
- "\n" __ALIGN_STR"\n" \
- "common_interrupt:\n\t" \
- SAVE_ALL(a) \
- SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
- "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
- "jmp ret_from_intr\n");
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-#define BUILD_IRQ(nr) \
-asmlinkage void IRQ_NAME(nr); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "push"__OS" $"#nr"<<16\n\t" \
- "jmp common_interrupt");
-
-extern unsigned long prof_cpu_mask;
-extern unsigned int *prof_buffer;
-extern unsigned long prof_len;
-extern unsigned long prof_shift;
-
-#include <xen/irq.h>
-
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
-{
-#if defined(CONFIG_X86_IO_APIC)
- if (IO_APIC_IRQ(i))
- send_IPI_self(IO_APIC_VECTOR(i));
-#endif
-}
-
#endif /* _ASM_HW_IRQ_H */
diff --git a/xen/include/asm-x86/ldt.h b/xen/include/asm-x86/ldt.h
index 5b13bec994..8288ffa118 100644
--- a/xen/include/asm-x86/ldt.h
+++ b/xen/include/asm-x86/ldt.h
@@ -1,25 +1,26 @@
+
#ifndef __ARCH_LDT_H
#define __ARCH_LDT_H
#ifndef __ASSEMBLY__
-static inline void load_LDT(struct domain *p)
+static inline void load_LDT(struct vcpu *v)
{
unsigned int cpu;
struct desc_struct *desc;
unsigned long ents;
-
- if ( (ents = p->mm.ldt_ents) == 0 )
+
+ if ( (ents = v->arch.guest_context.ldt_ents) == 0 )
{
__asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );
}
else
{
cpu = smp_processor_id();
- desc = (struct desc_struct *)GET_GDT_ADDRESS(p) + __LDT(cpu);
- desc->a = ((LDT_VIRT_START&0xffff)<<16) | (ents*8-1);
- desc->b = (LDT_VIRT_START&(0xff<<24)) | 0x8200 |
- ((LDT_VIRT_START&0xff0000)>>16);
+ desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+ desc->a = ((LDT_VIRT_START(v)&0xffff)<<16) | (ents*8-1);
+ desc->b = (LDT_VIRT_START(v)&(0xff<<24)) | 0x8200 |
+ ((LDT_VIRT_START(v)&0xff0000)>>16);
__asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
}
}
@@ -27,3 +28,13 @@ static inline void load_LDT(struct domain *p)
#endif /* !__ASSEMBLY__ */
#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/mach-bigsmp/mach_apic.h b/xen/include/asm-x86/mach-bigsmp/mach_apic.h
new file mode 100644
index 0000000000..1540c1f934
--- /dev/null
+++ b/xen/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -0,0 +1,167 @@
+#ifndef __ASM_MACH_APIC_H
+#define __ASM_MACH_APIC_H
+#include <asm/smp.h>
+
+#define SEQUENTIAL_APICID
+#ifdef SEQUENTIAL_APICID
+#define xapic_phys_to_log_apicid(phys_apic) ( (1ul << ((phys_apic) & 0x3)) |\
+ ((phys_apic<<2) & (~0xf)) )
+#elif CLUSTERED_APICID
+#define xapic_phys_to_log_apicid(phys_apic) ( (1ul << ((phys_apic) & 0x3)) |\
+ ((phys_apic) & (~0xf)) )
+#endif
+
+#define NO_BALANCE_IRQ (1)
+#define esr_disable (1)
+
+#define NO_IOAPIC_CHECK (0)
+
+static inline int apic_id_registered(void)
+{
+ return (1);
+}
+
+#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+/* Round robin the irqs amoung the online cpus */
+static inline cpumask_t target_cpus(void)
+{
+ static unsigned long cpu = NR_CPUS;
+ do {
+ if (cpu >= NR_CPUS)
+ cpu = first_cpu(cpu_online_map);
+ else
+ cpu = next_cpu(cpu, cpu_online_map);
+ } while (cpu >= NR_CPUS);
+ return cpumask_of_cpu(cpu);
+}
+#define TARGET_CPUS (target_cpus())
+
+#define INT_DELIVERY_MODE dest_Fixed
+#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
+
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+
+/* we don't use the phys_cpu_present_map to indicate apicid presence */
+static inline unsigned long check_apicid_present(int bit)
+{
+ return 1;
+}
+
+#define apicid_cluster(apicid) (apicid & 0xF0)
+
+static inline unsigned long calculate_ldr(unsigned long old)
+{
+ unsigned long id;
+ id = xapic_phys_to_log_apicid(hard_smp_processor_id());
+ return ((old & ~APIC_LDR_MASK) | SET_APIC_LOGICAL_ID(id));
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static inline void init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val = calculate_ldr(val);
+ apic_write_around(APIC_LDR, val);
+}
+
+static inline void clustered_apic_check(void)
+{
+ printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
+ "Cluster", nr_ioapics);
+}
+
+static inline int multi_timer_check(int apic, int irq)
+{
+ return 0;
+}
+
+static inline int apicid_to_node(int logical_apicid)
+{
+ return 0;
+}
+
+extern u8 bios_cpu_apicid[];
+
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < NR_CPUS)
+ return (int)bios_cpu_apicid[mps_cpu];
+ else
+ return BAD_APICID;
+}
+
+static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+{
+ return physid_mask_of_physid(phys_apicid);
+}
+
+extern u8 cpu_2_logical_apicid[];
+/* Mapping from cpu number to logical apicid */
+static inline int cpu_to_logical_apicid(int cpu)
+{
+ if (cpu >= NR_CPUS)
+ return BAD_APICID;
+ return (int)cpu_2_logical_apicid[cpu];
+ }
+
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+ struct mpc_config_translation *translation_record)
+{
+ printk("Processor #%d %d:%d APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return m->mpc_apicid;
+}
+
+static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0xFUL);
+}
+
+#define WAKE_SECONDARY_VIA_INIT
+
+static inline void setup_portio_remap(void)
+{
+}
+
+static inline void enable_apic_mode(void)
+{
+}
+
+static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return (1);
+}
+
+/* As we are using single CPU as destination, pick only one CPU here */
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+ int apicid;
+
+ cpu = first_cpu(cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ return apicid;
+}
+
+static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+#endif /* __ASM_MACH_APIC_H */
diff --git a/xen/include/asm-x86/mach-bigsmp/mach_apicdef.h b/xen/include/asm-x86/mach-bigsmp/mach_apicdef.h
new file mode 100644
index 0000000000..23e58b317c
--- /dev/null
+++ b/xen/include/asm-x86/mach-bigsmp/mach_apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_MACH_APICDEF_H
+#define __ASM_MACH_APICDEF_H
+
+#define APIC_ID_MASK (0x0F<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+ return (((x)>>24)&0x0F);
+}
+
+#define GET_APIC_ID(x) get_apic_id(x)
+
+#endif
diff --git a/xen/include/asm-x86/mach-bigsmp/mach_ipi.h b/xen/include/asm-x86/mach-bigsmp/mach_ipi.h
new file mode 100644
index 0000000000..9404c535b7
--- /dev/null
+++ b/xen/include/asm-x86/mach-bigsmp/mach_ipi.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_MACH_IPI_H
+#define __ASM_MACH_IPI_H
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
+
+static inline void send_IPI_mask(cpumask_t mask, int vector)
+{
+ send_IPI_mask_sequence(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ send_IPI_mask(cpu_online_map, vector);
+}
+
+#endif /* __ASM_MACH_IPI_H */
diff --git a/xen/include/asm-x86/mach-default/bios_ebda.h b/xen/include/asm-x86/mach-default/bios_ebda.h
new file mode 100644
index 0000000000..9cbd9a668a
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/bios_ebda.h
@@ -0,0 +1,15 @@
+#ifndef _MACH_BIOS_EBDA_H
+#define _MACH_BIOS_EBDA_H
+
+/*
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E.
+ */
+static inline unsigned int get_bios_ebda(void)
+{
+ unsigned int address = *(unsigned short *)phys_to_virt(0x40E);
+ address <<= 4;
+ return address; /* 0 means none */
+}
+
+#endif /* _MACH_BIOS_EBDA_H */
diff --git a/xen/include/asm-x86/mach-default/io_ports.h b/xen/include/asm-x86/mach-default/io_ports.h
new file mode 100644
index 0000000000..a96d9f6604
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/io_ports.h
@@ -0,0 +1,30 @@
+/*
+ * arch/i386/mach-generic/io_ports.h
+ *
+ * Machine specific IO port address definition for generic.
+ * Written by Osamu Tomita <tomita@cinet.co.jp>
+ */
+#ifndef _MACH_IO_PORTS_H
+#define _MACH_IO_PORTS_H
+
+/* i8253A PIT registers */
+#define PIT_MODE 0x43
+#define PIT_CH0 0x40
+#define PIT_CH2 0x42
+
+/* i8259A PIC registers */
+#define PIC_MASTER_CMD 0x20
+#define PIC_MASTER_IMR 0x21
+#define PIC_MASTER_ISR PIC_MASTER_CMD
+#define PIC_MASTER_POLL PIC_MASTER_ISR
+#define PIC_MASTER_OCW3 PIC_MASTER_ISR
+#define PIC_SLAVE_CMD 0xa0
+#define PIC_SLAVE_IMR 0xa1
+
+/* i8259A PIC related value */
+#define PIC_CASCADE_IR 2
+#define MASTER_ICW4_DEFAULT 0x01
+#define SLAVE_ICW4_DEFAULT 0x01
+#define PIC_ICW4_AEOI 2
+
+#endif /* !_MACH_IO_PORTS_H */
diff --git a/xen/include/asm-x86/mach-default/irq_vectors.h b/xen/include/asm-x86/mach-default/irq_vectors.h
new file mode 100644
index 0000000000..de16b5bc9c
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h
@@ -0,0 +1,96 @@
+/*
+ * This file should contain #defines for all of the interrupt vector
+ * numbers used by this architecture.
+ *
+ * In addition, there are some standard defines:
+ *
+ * FIRST_EXTERNAL_VECTOR:
+ * The first free place for external interrupts
+ *
+ * SYSCALL_VECTOR:
+ * The IRQ vector a syscall makes the user to kernel transition
+ * under.
+ *
+ * TIMER_IRQ:
+ * The IRQ number the timer interrupt comes in at.
+ *
+ * NR_IRQS:
+ * The total number of interrupt vectors (including all the
+ * architecture specific interrupts) needed.
+ *
+ */
+#ifndef _ASM_IRQ_VECTORS_H
+#define _ASM_IRQ_VECTORS_H
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR 0x20
+
+#define HYPERCALL_VECTOR 0x82
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ * some of the following vectors are 'rare', they are merged
+ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ * TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ * Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define EVENT_CHECK_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+
+#define THERMAL_APIC_VECTOR 0xf0
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR 0x31
+#define FIRST_SYSTEM_VECTOR 0xef
+
+#define TIMER_IRQ 0
+
+/*
+ * 16 8259A IRQ's, 208 potential APIC interrupt sources.
+ * Right now the APIC is mostly only used for SMP.
+ * 256 vectors is an architectural limit. (we can have
+ * more than 256 devices theoretically, but they will
+ * have to use shared interrupts)
+ * Since vectors 0x00-0x1f are used/reserved for the CPU,
+ * the usable vector space is 0x20-0xff (224 vectors)
+ */
+
+/*
+ * The maximum number of vectors supported by i386 processors
+ * is limited to 256. For processors other than i386, NR_VECTORS
+ * should be changed accordingly.
+ */
+#define NR_VECTORS 256
+
+#include "irq_vectors_limits.h"
+
+#define FPU_IRQ 13
+
+#define FIRST_VM86_IRQ 3
+#define LAST_VM86_IRQ 15
+#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+
+
+#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/xen/include/asm-x86/mach-default/irq_vectors_limits.h b/xen/include/asm-x86/mach-default/irq_vectors_limits.h
new file mode 100644
index 0000000000..11e263cb18
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/irq_vectors_limits.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_IRQ_VECTORS_LIMITS_H
+#define _ASM_IRQ_VECTORS_LIMITS_H
+
+/* Limited by number of trap vectors. */
+#define NR_IRQS FIRST_SYSTEM_VECTOR
+#define NR_IRQ_VECTORS NR_IRQS
+
+#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/xen/include/asm-x86/mach-default/mach_apic.h b/xen/include/asm-x86/mach-default/mach_apic.h
new file mode 100644
index 0000000000..4bf3c5ae34
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/mach_apic.h
@@ -0,0 +1,133 @@
+#ifndef __ASM_MACH_APIC_H
+#define __ASM_MACH_APIC_H
+
+#include <mach_apicdef.h>
+#include <asm/smp.h>
+
+#define APIC_DFR_VALUE (APIC_DFR_FLAT)
+
+static inline cpumask_t target_cpus(void)
+{
+#ifdef CONFIG_SMP
+ return cpu_online_map;
+#else
+ return cpumask_of_cpu(0);
+#endif
+}
+#define TARGET_CPUS (target_cpus())
+
+#define NO_BALANCE_IRQ (0)
+#define esr_disable (0)
+
+#define NO_IOAPIC_CHECK (0)
+
+#define INT_DELIVERY_MODE dest_LowestPrio
+#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
+
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return physid_isset(apicid, bitmap);
+}
+
+static inline unsigned long check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static inline void init_apic_ldr(void)
+{
+ unsigned long val;
+
+ apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+ apic_write_around(APIC_LDR, val);
+}
+
+static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ return phys_map;
+}
+
+static inline void clustered_apic_check(void)
+{
+ printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
+ "Flat", nr_ioapics);
+}
+
+static inline int multi_timer_check(int apic, int irq)
+{
+ return 0;
+}
+
+static inline int apicid_to_node(int logical_apicid)
+{
+ return 0;
+}
+
+/* Mapping from cpu number to logical apicid */
+static inline int cpu_to_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < get_physical_broadcast())
+ return mps_cpu;
+ else
+ return BAD_APICID;
+}
+
+static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+{
+ return physid_mask_of_physid(phys_apicid);
+}
+
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+ struct mpc_config_translation *translation_record)
+{
+ printk("Processor #%d %d:%d APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return (m->mpc_apicid);
+}
+
+static inline void setup_portio_remap(void)
+{
+}
+
+static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
+}
+
+static inline int apic_id_registered(void)
+{
+ return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map);
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ return cpus_addr(cpumask)[0];
+}
+
+static inline void enable_apic_mode(void)
+{
+}
+
+static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+#endif /* __ASM_MACH_APIC_H */
diff --git a/xen/include/asm-x86/mach-default/mach_apicdef.h b/xen/include/asm-x86/mach-default/mach_apicdef.h
new file mode 100644
index 0000000000..7bcb350c3e
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/mach_apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_MACH_APICDEF_H
+#define __ASM_MACH_APICDEF_H
+
+#define APIC_ID_MASK (0xF<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+ return (((x)>>24)&0xF);
+}
+
+#define GET_APIC_ID(x) get_apic_id(x)
+
+#endif
diff --git a/xen/include/asm-x86/mach-default/mach_ipi.h b/xen/include/asm-x86/mach-default/mach_ipi.h
new file mode 100644
index 0000000000..6f2b17a200
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/mach_ipi.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_MACH_IPI_H
+#define __ASM_MACH_IPI_H
+
+void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void __send_IPI_shortcut(unsigned int shortcut, int vector);
+
+static inline void send_IPI_mask(cpumask_t mask, int vector)
+{
+ send_IPI_mask_bitmask(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then we get an APIC send
+ * error if we try to broadcast, thus avoid sending IPIs in this case.
+ */
+ if (!(num_online_cpus() > 1))
+ return;
+
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+ return;
+}
+
+static inline void send_IPI_all(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+#endif /* __ASM_MACH_IPI_H */
diff --git a/xen/include/asm-x86/mach-default/mach_mpparse.h b/xen/include/asm-x86/mach-default/mach_mpparse.h
new file mode 100644
index 0000000000..1d38324825
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/mach_mpparse.h
@@ -0,0 +1,28 @@
+#ifndef __ASM_MACH_MPPARSE_H
+#define __ASM_MACH_MPPARSE_H
+
+static inline void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+ struct mpc_config_translation *translation)
+{
+// Dprintk("Bus #%d is %s\n", m->mpc_busid, name);
+}
+
+static inline void mpc_oem_pci_bus(struct mpc_config_bus *m,
+ struct mpc_config_translation *translation)
+{
+}
+
+static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
+
+
+#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/xen/include/asm-x86/mach-default/mach_wakecpu.h b/xen/include/asm-x86/mach-default/mach_wakecpu.h
new file mode 100644
index 0000000000..673b85c9b2
--- /dev/null
+++ b/xen/include/asm-x86/mach-default/mach_wakecpu.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_MACH_WAKECPU_H
+#define __ASM_MACH_WAKECPU_H
+
+/*
+ * This file copes with machines that wakeup secondary CPUs by the
+ * INIT, INIT, STARTUP sequence.
+ */
+
+#define WAKE_SECONDARY_VIA_INIT
+
+#define TRAMPOLINE_LOW phys_to_virt(0x467)
+#define TRAMPOLINE_HIGH phys_to_virt(0x469)
+
+#define boot_cpu_apicid boot_cpu_physical_apicid
+
+static inline void wait_for_init_deassert(atomic_t *deassert)
+{
+ while (!atomic_read(deassert));
+ return;
+}
+
+/* Nothing to do for most platforms, since cleared by the INIT cycle */
+static inline void smp_callin_clear_local_apic(void)
+{
+}
+
+static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
+{
+}
+
+static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
+{
+}
+
+#if APIC_DEBUG
+ #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
+#else
+ #define inquire_remote_apic(apicid) {}
+#endif
+
+#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/xen/include/asm-x86/mach-es7000/mach_apic.h b/xen/include/asm-x86/mach-es7000/mach_apic.h
new file mode 100644
index 0000000000..4cf0e75a6d
--- /dev/null
+++ b/xen/include/asm-x86/mach-es7000/mach_apic.h
@@ -0,0 +1,207 @@
+#ifndef __ASM_MACH_APIC_H
+#define __ASM_MACH_APIC_H
+
+extern u8 bios_cpu_apicid[];
+
+#define xapic_phys_to_log_apicid(cpu) (bios_cpu_apicid[cpu])
+#define esr_disable (1)
+
+static inline int apic_id_registered(void)
+{
+ return (1);
+}
+
+static inline cpumask_t target_cpus(void)
+{
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+ return CPU_MASK_ALL;
+#else
+ return cpumask_of_cpu(smp_processor_id());
+#endif
+}
+#define TARGET_CPUS (target_cpus())
+
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
+#define NO_BALANCE_IRQ (1)
+#undef WAKE_SECONDARY_VIA_INIT
+#define WAKE_SECONDARY_VIA_MIP
+#else
+#define APIC_DFR_VALUE (APIC_DFR_FLAT)
+#define INT_DELIVERY_MODE (dest_Fixed)
+#define INT_DEST_MODE (0) /* phys delivery to target procs */
+#define NO_BALANCE_IRQ (0)
+#undef APIC_DEST_LOGICAL
+#define APIC_DEST_LOGICAL 0x0
+#define WAKE_SECONDARY_VIA_INIT
+#endif
+
+#define NO_IOAPIC_CHECK (1)
+
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+static inline unsigned long check_apicid_present(int bit)
+{
+ return physid_isset(bit, phys_cpu_present_map);
+}
+
+#define apicid_cluster(apicid) (apicid & 0xF0)
+
+static inline unsigned long calculate_ldr(int cpu)
+{
+ unsigned long id;
+ id = xapic_phys_to_log_apicid(cpu);
+ return (SET_APIC_LOGICAL_ID(id));
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LdR and TPR before enabling
+ * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116). So here it goes...
+ */
+static inline void init_apic_ldr(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+ val = calculate_ldr(cpu);
+ apic_write_around(APIC_LDR, val);
+}
+
+extern void es7000_sw_apic(void);
+static inline void enable_apic_mode(void)
+{
+ es7000_sw_apic();
+ return;
+}
+
+extern int apic_version [MAX_APICS];
+static inline void clustered_apic_check(void)
+{
+ int apic = bios_cpu_apicid[smp_processor_id()];
+ printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ (apic_version[apic] == 0x14) ?
+ "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
+}
+
+static inline int multi_timer_check(int apic, int irq)
+{
+ return 0;
+}
+
+static inline int apicid_to_node(int logical_apicid)
+{
+ return 0;
+}
+
+
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (!mps_cpu)
+ return boot_cpu_physical_apicid;
+ else if (mps_cpu < NR_CPUS)
+ return (int) bios_cpu_apicid[mps_cpu];
+ else
+ return BAD_APICID;
+}
+
+static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+{
+ static int id = 0;
+ physid_mask_t mask;
+ mask = physid_mask_of_physid(id);
+ ++id;
+ return mask;
+}
+
+extern u8 cpu_2_logical_apicid[];
+/* Mapping from cpu number to logical apicid */
+static inline int cpu_to_logical_apicid(int cpu)
+{
+ if (cpu >= NR_CPUS)
+ return BAD_APICID;
+ return (int)cpu_2_logical_apicid[cpu];
+}
+
+static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
+{
+ printk("Processor #%d %d:%d APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return (m->mpc_apicid);
+}
+
+static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0xff);
+}
+
+
+static inline void setup_portio_remap(void)
+{
+}
+
+extern unsigned int boot_cpu_physical_apicid;
+static inline int check_phys_apicid_present(int cpu_physical_apicid)
+{
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+ return (1);
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int num_bits_set;
+ int cpus_found = 0;
+ int cpu;
+ int apicid;
+
+ num_bits_set = cpus_weight(cpumask);
+ /* Return id to all */
+ if (num_bits_set == NR_CPUS)
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+ return 0xFF;
+#else
+ return cpu_to_logical_apicid(0);
+#endif
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = first_cpu(cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, cpumask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)){
+ printk ("%s: Not a valid mask!\n",__FUNCTION__);
+#if defined CONFIG_ES7000_CLUSTERED_APIC
+ return 0xFF;
+#else
+ return cpu_to_logical_apicid(0);
+#endif
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+{
+ return cpuid_apic >> index_msb;
+}
+
+#endif /* __ASM_MACH_APIC_H */
diff --git a/xen/include/asm-x86/mach-es7000/mach_apicdef.h b/xen/include/asm-x86/mach-es7000/mach_apicdef.h
new file mode 100644
index 0000000000..a58ab5a75c
--- /dev/null
+++ b/xen/include/asm-x86/mach-es7000/mach_apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_MACH_APICDEF_H
+#define __ASM_MACH_APICDEF_H
+
+#define APIC_ID_MASK (0xFF<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+ return (((x)>>24)&0xFF);
+}
+
+#define GET_APIC_ID(x) get_apic_id(x)
+
+#endif
diff --git a/xen/include/asm-x86/mach-es7000/mach_ipi.h b/xen/include/asm-x86/mach-es7000/mach_ipi.h
new file mode 100644
index 0000000000..5e61bd220b
--- /dev/null
+++ b/xen/include/asm-x86/mach-es7000/mach_ipi.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_MACH_IPI_H
+#define __ASM_MACH_IPI_H
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
+
+static inline void send_IPI_mask(cpumask_t mask, int vector)
+{
+ send_IPI_mask_sequence(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(smp_processor_id(), mask);
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ send_IPI_mask(cpu_online_map, vector);
+}
+
+#endif /* __ASM_MACH_IPI_H */
diff --git a/xen/include/asm-x86/mach-es7000/mach_mpparse.h b/xen/include/asm-x86/mach-es7000/mach_mpparse.h
new file mode 100644
index 0000000000..a7c2658e3c
--- /dev/null
+++ b/xen/include/asm-x86/mach-es7000/mach_mpparse.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_MACH_MPPARSE_H
+#define __ASM_MACH_MPPARSE_H
+
+static inline void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+ struct mpc_config_translation *translation)
+{
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, name);
+}
+
+static inline void mpc_oem_pci_bus(struct mpc_config_bus *m,
+ struct mpc_config_translation *translation)
+{
+}
+
+extern int parse_unisys_oem (char *oemptr, int oem_entries);
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length);
+
+static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (mpc->mpc_oemptr) {
+ struct mp_config_oemtable *oem_table =
+ (struct mp_config_oemtable *)(long)mpc->mpc_oemptr;
+ if (!strncmp(oem, "UNISYS", 6))
+ return parse_unisys_oem((char *)oem_table, oem_table->oem_length);
+ }
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ unsigned long oem_addr;
+ int oem_entries;
+ if (!find_unisys_acpi_oem_table(&oem_addr, &oem_entries))
+ return parse_unisys_oem((char *)oem_addr, oem_entries);
+ return 0;
+}
+
+
+#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/xen/include/asm-x86/mach-es7000/mach_wakecpu.h b/xen/include/asm-x86/mach-es7000/mach_wakecpu.h
new file mode 100644
index 0000000000..efc903b734
--- /dev/null
+++ b/xen/include/asm-x86/mach-es7000/mach_wakecpu.h
@@ -0,0 +1,58 @@
+#ifndef __ASM_MACH_WAKECPU_H
+#define __ASM_MACH_WAKECPU_H
+
+/*
+ * This file copes with machines that wakeup secondary CPUs by the
+ * INIT, INIT, STARTUP sequence.
+ */
+
+#ifdef CONFIG_ES7000_CLUSTERED_APIC
+#define WAKE_SECONDARY_VIA_MIP
+#else
+#define WAKE_SECONDARY_VIA_INIT
+#endif
+
+#ifdef WAKE_SECONDARY_VIA_MIP
+extern int es7000_start_cpu(int cpu, unsigned long eip);
+static inline int
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+ int boot_error = 0;
+ boot_error = es7000_start_cpu(phys_apicid, start_eip);
+ return boot_error;
+}
+#endif
+
+#define TRAMPOLINE_LOW phys_to_virt(0x467)
+#define TRAMPOLINE_HIGH phys_to_virt(0x469)
+
+#define boot_cpu_apicid boot_cpu_physical_apicid
+
+static inline void wait_for_init_deassert(atomic_t *deassert)
+{
+#ifdef WAKE_SECONDARY_VIA_INIT
+ while (!atomic_read(deassert));
+#endif
+ return;
+}
+
+/* Nothing to do for most platforms, since cleared by the INIT cycle */
+static inline void smp_callin_clear_local_apic(void)
+{
+}
+
+static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
+{
+}
+
+static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
+{
+}
+
+#if APIC_DEBUG
+ #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
+#else
+ #define inquire_remote_apic(apicid) {}
+#endif
+
+#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/xen/include/asm-x86/mach-generic/mach_apic.h b/xen/include/asm-x86/mach-generic/mach_apic.h
new file mode 100644
index 0000000000..ab36d02ebe
--- /dev/null
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_MACH_APIC_H
+#define __ASM_MACH_APIC_H
+
+#include <asm/genapic.h>
+
+#define esr_disable (genapic->ESR_DISABLE)
+#define NO_BALANCE_IRQ (genapic->no_balance_irq)
+#define NO_IOAPIC_CHECK (genapic->no_ioapic_check)
+#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
+#define INT_DEST_MODE (genapic->int_dest_mode)
+#undef APIC_DEST_LOGICAL
+#define APIC_DEST_LOGICAL (genapic->apic_destination_logical)
+#define TARGET_CPUS (genapic->target_cpus())
+#define apic_id_registered (genapic->apic_id_registered)
+#define init_apic_ldr (genapic->init_apic_ldr)
+#define ioapic_phys_id_map (genapic->ioapic_phys_id_map)
+#define clustered_apic_check (genapic->clustered_apic_check)
+#define multi_timer_check (genapic->multi_timer_check)
+#define apicid_to_node (genapic->apicid_to_node)
+#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid)
+#define cpu_present_to_apicid (genapic->cpu_present_to_apicid)
+#define apicid_to_cpu_present (genapic->apicid_to_cpu_present)
+#define mpc_apic_id (genapic->mpc_apic_id)
+#define setup_portio_remap (genapic->setup_portio_remap)
+#define check_apicid_present (genapic->check_apicid_present)
+#define check_phys_apicid_present (genapic->check_phys_apicid_present)
+#define check_apicid_used (genapic->check_apicid_used)
+#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define enable_apic_mode (genapic->enable_apic_mode)
+#define phys_pkg_id (genapic->phys_pkg_id)
+
+#endif /* __ASM_MACH_APIC_H */
diff --git a/xen/include/asm-x86/mach-generic/mach_apicdef.h b/xen/include/asm-x86/mach-generic/mach_apicdef.h
new file mode 100644
index 0000000000..28ed98972c
--- /dev/null
+++ b/xen/include/asm-x86/mach-generic/mach_apicdef.h
@@ -0,0 +1,11 @@
+#ifndef _GENAPIC_MACH_APICDEF_H
+#define _GENAPIC_MACH_APICDEF_H 1
+
+#ifndef APIC_DEFINITION
+#include <asm/genapic.h>
+
+#define GET_APIC_ID (genapic->get_apic_id)
+#define APIC_ID_MASK (genapic->apic_id_mask)
+#endif
+
+#endif
diff --git a/xen/include/asm-x86/mach-generic/mach_ipi.h b/xen/include/asm-x86/mach-generic/mach_ipi.h
new file mode 100644
index 0000000000..441b0fe3ed
--- /dev/null
+++ b/xen/include/asm-x86/mach-generic/mach_ipi.h
@@ -0,0 +1,10 @@
+#ifndef _MACH_IPI_H
+#define _MACH_IPI_H 1
+
+#include <asm/genapic.h>
+
+#define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_allbutself (genapic->send_IPI_allbutself)
+#define send_IPI_all (genapic->send_IPI_all)
+
+#endif
diff --git a/xen/include/asm-x86/mach-generic/mach_mpparse.h b/xen/include/asm-x86/mach-generic/mach_mpparse.h
new file mode 100644
index 0000000000..dbd9fce54f
--- /dev/null
+++ b/xen/include/asm-x86/mach-generic/mach_mpparse.h
@@ -0,0 +1,12 @@
+#ifndef _MACH_MPPARSE_H
+#define _MACH_MPPARSE_H 1
+
+#include <asm/genapic.h>
+
+#define mpc_oem_bus_info (genapic->mpc_oem_bus_info)
+#define mpc_oem_pci_bus (genapic->mpc_oem_pci_bus)
+
+int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid);
+int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
+
+#endif
diff --git a/xen/include/asm-x86/mach-generic/mach_mpspec.h b/xen/include/asm-x86/mach-generic/mach_mpspec.h
new file mode 100644
index 0000000000..9ef0b941bb
--- /dev/null
+++ b/xen/include/asm-x86/mach-generic/mach_mpspec.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_MACH_MPSPEC_H
+#define __ASM_MACH_MPSPEC_H
+
+#define MAX_IRQ_SOURCES 256
+
+/* Summit or generic (i.e. installer) kernels need lots of bus entries. */
+/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
+#define MAX_MP_BUSSES 260
+
+#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/xen/include/asm-x86/mach-summit/mach_apic.h b/xen/include/asm-x86/mach-summit/mach_apic.h
new file mode 100644
index 0000000000..61cac6b453
--- /dev/null
+++ b/xen/include/asm-x86/mach-summit/mach_apic.h
@@ -0,0 +1,189 @@
+#ifndef __ASM_MACH_APIC_H
+#define __ASM_MACH_APIC_H
+
+#include <xen/config.h>
+#include <asm/smp.h>
+
+#define esr_disable (1)
+#define NO_BALANCE_IRQ (0)
+
+#define NO_IOAPIC_CHECK (1) /* Don't check I/O APIC ID for xAPIC */
+
+/* In clustered mode, the high nibble of APIC ID is a cluster number.
+ * The low nibble is a 4-bit bitmap. */
+#define XAPIC_DEST_CPUS_SHIFT 4
+#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+
+#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+static inline cpumask_t target_cpus(void)
+{
+ /* CPU_MASK_ALL (0xff) has undefined behaviour with
+ * dest_LowestPrio mode logical clustered apic interrupt routing
+ * Just start on cpu 0. IRQ balancing will spread load
+ */
+ return cpumask_of_cpu(0);
+}
+#define TARGET_CPUS (target_cpus())
+
+#define INT_DELIVERY_MODE (dest_LowestPrio)
+#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
+
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+ return 0;
+}
+
+/* we don't use the phys_cpu_present_map to indicate apicid presence */
+static inline unsigned long check_apicid_present(int bit)
+{
+ return 1;
+}
+
+#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
+
+extern u8 bios_cpu_apicid[];
+extern u8 cpu_2_logical_apicid[];
+
+static inline void init_apic_ldr(void)
+{
+ unsigned long val, id;
+ int i, count;
+ u8 lid;
+ u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_cluster = (u8)apicid_cluster(my_id);
+
+ /* Create logical APIC IDs by counting CPUs already in cluster. */
+ for (count = 0, i = NR_CPUS; --i >= 0; ) {
+ lid = cpu_2_logical_apicid[i];
+ if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
+ ++count;
+ }
+ /* We only have a 4 wide bitmap in cluster mode. If a deranged
+ * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
+ BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
+ id = my_cluster | (1UL << count);
+ apic_write_around(APIC_DFR, APIC_DFR_VALUE);
+ val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+ val |= SET_APIC_LOGICAL_ID(id);
+ apic_write_around(APIC_LDR, val);
+}
+
+static inline int multi_timer_check(int apic, int irq)
+{
+ return 0;
+}
+
+static inline int apic_id_registered(void)
+{
+ return 1;
+}
+
+static inline void clustered_apic_check(void)
+{
+ printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
+ nr_ioapics);
+}
+
+static inline int apicid_to_node(int logical_apicid)
+{
+ return logical_apicid >> 5; /* 2 clusterids per CEC */
+}
+
+/* Mapping from cpu number to logical apicid */
+static inline int cpu_to_logical_apicid(int cpu)
+{
+ if (cpu >= NR_CPUS)
+ return BAD_APICID;
+ return (int)cpu_2_logical_apicid[cpu];
+}
+
+static inline int cpu_present_to_apicid(int mps_cpu)
+{
+ if (mps_cpu < NR_CPUS)
+ return (int)bios_cpu_apicid[mps_cpu];
+ else
+ return BAD_APICID;
+}
+
+static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
+{
+ /* For clustered we don't have a good way to do this yet - hack */
+ return physids_promote(0x0F);
+}
+
+static inline physid_mask_t apicid_to_cpu_present(int apicid)
+{
+ return physid_mask_of_physid(0);
+}
+
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+ struct mpc_config_translation *translation_record)
+{
+ printk("Processor #%d %d:%d APIC version %d\n",
+ m->mpc_apicid,
+ (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+ (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+ m->mpc_apicver);
+ return (m->mpc_apicid);
+}
+
+static inline void setup_portio_remap(void)
+{
+}
+
+static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+{
+ return 1;
+}
+
+static inline void enable_apic_mode(void)
+{
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int num_bits_set;
+ int cpus_found = 0;
+ int cpu;
+ int apicid;
+
+ num_bits_set = cpus_weight(cpumask);
+ /* Return id to all */
+ if (num_bits_set == NR_CPUS)
+ return (int) 0xFF;
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = first_cpu(cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, cpumask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)){
+ printk ("%s: Not a valid mask!\n",__FUNCTION__);
+ return 0xFF;
+ }
+ apicid = apicid | new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+/* cpuid returns the value latched in the HW at reset, not the APIC ID
+ * register's value. For any box whose BIOS changes APIC IDs, like
+ * clustered APIC systems, we must use hard_smp_processor_id.
+ *
+ * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
+ */
+static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+{
+ return hard_smp_processor_id() >> index_msb;
+}
+
+#endif /* __ASM_MACH_APIC_H */
diff --git a/xen/include/asm-x86/mach-summit/mach_apicdef.h b/xen/include/asm-x86/mach-summit/mach_apicdef.h
new file mode 100644
index 0000000000..a58ab5a75c
--- /dev/null
+++ b/xen/include/asm-x86/mach-summit/mach_apicdef.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_MACH_APICDEF_H
+#define __ASM_MACH_APICDEF_H
+
+#define APIC_ID_MASK (0xFF<<24)
+
+static inline unsigned get_apic_id(unsigned long x)
+{
+ return (((x)>>24)&0xFF);
+}
+
+#define GET_APIC_ID(x) get_apic_id(x)
+
+#endif
diff --git a/xen/include/asm-x86/mach-summit/mach_ipi.h b/xen/include/asm-x86/mach-summit/mach_ipi.h
new file mode 100644
index 0000000000..9404c535b7
--- /dev/null
+++ b/xen/include/asm-x86/mach-summit/mach_ipi.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_MACH_IPI_H
+#define __ASM_MACH_IPI_H
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector);
+
+static inline void send_IPI_mask(cpumask_t mask, int vector)
+{
+ send_IPI_mask_sequence(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ send_IPI_mask(cpu_online_map, vector);
+}
+
+#endif /* __ASM_MACH_IPI_H */
diff --git a/xen/include/asm-x86/mach-summit/mach_mpparse.h b/xen/include/asm-x86/mach-summit/mach_mpparse.h
new file mode 100644
index 0000000000..88f6c50da2
--- /dev/null
+++ b/xen/include/asm-x86/mach-summit/mach_mpparse.h
@@ -0,0 +1,121 @@
+#ifndef __ASM_MACH_MPPARSE_H
+#define __ASM_MACH_MPPARSE_H
+
+#include <mach_apic.h>
+
+extern int use_cyclone;
+
+#ifdef CONFIG_X86_SUMMIT_NUMA
+extern void setup_summit(void);
+#else
+#define setup_summit() {}
+#endif
+
+static inline void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+ struct mpc_config_translation *translation)
+{
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, name);
+}
+
+static inline void mpc_oem_pci_bus(struct mpc_config_bus *m,
+ struct mpc_config_translation *translation)
+{
+}
+
+extern int usb_early_handoff;
+static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
+ char *productid)
+{
+ if (!strncmp(oem, "IBM ENSW", 8) &&
+ (!strncmp(productid, "VIGIL SMP", 9)
+ || !strncmp(productid, "EXA", 3)
+ || !strncmp(productid, "RUTHLESS SMP", 12))){
+ /*use_cyclone = 1;*/ /*enable cyclone-timer*/
+ setup_summit();
+ /*usb_early_handoff = 1;*/
+ return 1;
+ }
+ return 0;
+}
+
+/* Hook from generic ACPI tables.c */
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERVIGIL", 8)
+ || !strncmp(oem_table_id, "EXA", 3))){
+ /*use_cyclone = 1;*/ /*enable cyclone-timer*/
+ setup_summit();
+ /*usb_early_handoff = 1;*/
+ return 1;
+ }
+ return 0;
+}
+
+struct rio_table_hdr {
+ unsigned char version; /* Version number of this data structure */
+ /* Version 3 adds chassis_num & WP_index */
+ unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
+ unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
+} __attribute__((packed));
+
+struct scal_detail {
+ unsigned char node_id; /* Scalability Node ID */
+ unsigned long CBAR; /* Address of 1MB register space */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port2node; /* Node ID port connected to: 0xFF = None */
+ unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
+} __attribute__((packed));
+
+struct rio_detail {
+ unsigned char node_id; /* RIO Node ID */
+ unsigned long BBAR; /* Address of 1MB register space */
+ unsigned char type; /* Type of device */
+ unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
+ /* For CYC: Node ID of Twister that owns this CYC */
+ unsigned char port0node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char port1node; /* Node ID port connected to: 0xFF=None */
+ unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
+ unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
+ /* For CYC: 0 */
+ unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
+ /* = 0 : the XAPIC is not used, ie:*/
+ /* ints fwded to another XAPIC */
+ /* Bits1:7 Reserved */
+ /* For CYC: Bits0:7 Reserved */
+ unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
+ /* lower slot numbers/PCI bus numbers */
+ /* For CYC: No meaning */
+ unsigned char chassis_num; /* 1 based Chassis number */
+ /* For LookOut WPEGs this field indicates the */
+ /* Expansion Chassis #, enumerated from Boot */
+ /* Node WPEG external port, then Boot Node CYC */
+ /* external port, then Next Vigil chassis WPEG */
+ /* external port, etc. */
+ /* Shared Lookouts have only 1 chassis number (the */
+ /* first one assigned) */
+} __attribute__((packed));
+
+
+typedef enum {
+ CompatTwister = 0, /* Compatibility Twister */
+ AltTwister = 1, /* Alternate Twister of internal 8-way */
+ CompatCyclone = 2, /* Compatibility Cyclone */
+ AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
+ CompatWPEG = 4, /* Compatibility WPEG */
+ AltWPEG = 5, /* Second Planar WPEG */
+ LookOutAWPEG = 6, /* LookOut WPEG */
+ LookOutBWPEG = 7, /* LookOut WPEG */
+} node_type;
+
+static inline int is_WPEG(struct rio_detail *rio){
+ return (rio->type == CompatWPEG || rio->type == AltWPEG ||
+ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
+}
+
+#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2af462e37d..47793cb2d3 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -3,18 +3,10 @@
#define __ASM_X86_MM_H__
#include <xen/config.h>
+#include <xen/cpumask.h>
#include <xen/list.h>
-#include <xen/spinlock.h>
-#include <xen/perfc.h>
-#include <xen/sched.h>
-
-#include <asm/processor.h>
-#include <asm/atomic.h>
-#include <asm/desc.h>
-#include <asm/flushtlb.h>
#include <asm/io.h>
-
-#include <public/xen.h>
+#include <asm/uaccess.h>
/*
* Per-page-frame information.
@@ -30,6 +22,9 @@ struct pfn_info
/* Each frame can be threaded onto a doubly-linked list. */
struct list_head list;
+ /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
+ u32 tlbflush_timestamp;
+
/* Reference count and various PGC_xxx flags and fields. */
u32 count_info;
@@ -39,24 +34,22 @@ struct pfn_info
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
/* Owner of this page (NULL if page is anonymous). */
- struct domain *domain;
+ u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
u32 type_info;
- } inuse;
+ } PACKED inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
/* Mask of possibly-tainted TLBs. */
- unsigned long cpu_mask;
+ cpumask_t cpumask;
/* Order-size of the free chunk this page is the head of. */
u8 order;
- } free;
+ } PACKED free;
- } u;
+ } PACKED u;
- /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
- u32 tlbflush_timestamp;
-};
+} PACKED;
/* The following page types are MUTUALLY EXCLUSIVE. */
#define PGT_none (0<<29) /* no special uses of this page */
@@ -67,28 +60,49 @@ struct pfn_info
#define PGT_gdt_page (5<<29) /* using this page in a GDT? */
#define PGT_ldt_page (6<<29) /* using this page in an LDT? */
#define PGT_writable_page (7<<29) /* has writable mappings of this page? */
+
+#define PGT_l1_shadow PGT_l1_page_table
+#define PGT_l2_shadow PGT_l2_page_table
+#define PGT_l3_shadow PGT_l3_page_table
+#define PGT_l4_shadow PGT_l4_page_table
+#define PGT_hl2_shadow (5<<29)
+#define PGT_snapshot (6<<29)
+#define PGT_writable_pred (7<<29) /* predicted gpfn with writable ref */
+
#define PGT_type_mask (7<<29) /* Bits 29-31. */
+
/* Has this page been validated for use as its current type? */
#define _PGT_validated 28
#define PGT_validated (1U<<_PGT_validated)
/* Owning guest has pinned this page to its current type? */
#define _PGT_pinned 27
#define PGT_pinned (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift 17
-#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift)
+ /* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 16
+#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<17)-1)
+#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<16)-1)
+
+#define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
+
+#define PGT_score_shift 20
+#define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
#define PGC_allocated (1U<<_PGC_allocated)
- /* 31-bit count of references to this frame. */
-#define PGC_count_mask ((1U<<31)-1)
+ /* Set when fullshadow mode marks a page out-of-sync */
+#define _PGC_out_of_sync 30
+#define PGC_out_of_sync (1U<<_PGC_out_of_sync)
+ /* Set when fullshadow mode is using a page as a page table */
+#define _PGC_page_table 29
+#define PGC_page_table (1U<<_PGC_page_table)
+ /* 29-bit count of references to this frame. */
+#define PGC_count_mask ((1U<<29)-1)
/* We trust the slab allocator in slab.c, and our use of it. */
#define PageSlab(page) (1)
@@ -97,9 +111,22 @@ struct pfn_info
#define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
+#if defined(__i386__)
+#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
+#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
+#elif defined(__x86_64__)
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return (_domain == 0) ? NULL : __va(_domain); }
+static inline u32 pickle_domptr(struct domain *domain)
+{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+#endif
+
+#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
do { \
- (_pfn)->u.inuse.domain = (_dom); \
+ page_set_owner((_pfn), (_dom)); \
/* The incremented type count is intended to pin to 'writable'. */ \
(_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
wmb(); /* install valid domain ptr before updating refcnt. */ \
@@ -113,15 +140,17 @@ struct pfn_info
spin_unlock(&(_dom)->page_alloc_lock); \
} while ( 0 )
-#define INVALID_P2M_ENTRY (~0UL)
-
extern struct pfn_info *frame_table;
-extern unsigned long frame_table_size;
extern unsigned long max_page;
void init_frametable(void);
int alloc_page_type(struct pfn_info *page, unsigned int type);
void free_page_type(struct pfn_info *page, unsigned int type);
+extern void invalidate_shadow_ldt(struct vcpu *d);
+extern int shadow_remove_all_write_access(
+ struct domain *d, unsigned long gpfn, unsigned long gmfn);
+extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
+extern int _shadow_mode_refcounts(struct domain *d);
static inline void put_page(struct pfn_info *page)
{
@@ -142,7 +171,8 @@ static inline int get_page(struct pfn_info *page,
struct domain *domain)
{
u32 x, nx, y = page->count_info;
- struct domain *d, *nd = page->u.inuse.domain;
+ u32 d, nd = page->u.inuse._domain;
+ u32 _domain = pickle_domptr(domain);
do {
x = y;
@@ -150,11 +180,12 @@ static inline int get_page(struct pfn_info *page,
d = nd;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
- unlikely(d != domain) ) /* Wrong owner? */
+ unlikely(d != _domain) ) /* Wrong owner? */
{
- DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n",
- page_to_pfn(page), domain, d,
- x, page->u.inuse.type_info);
+ if ( !_shadow_mode_refcounts(domain) )
+ DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%08x\n",
+ page_to_pfn(page), domain, unpickle_domptr(d),
+ x, page->u.inuse.type_info);
return 0;
}
__asm__ __volatile__(
@@ -170,6 +201,8 @@ static inline int get_page(struct pfn_info *page,
void put_page_type(struct pfn_info *page);
int get_page_type(struct pfn_info *page, u32 type);
+int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
+void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
static inline void put_page_and_type(struct pfn_info *page)
{
@@ -198,15 +231,9 @@ static inline int get_page_and_type(struct pfn_info *page,
ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
#define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
- ASSERT((_p)->u.inuse.domain == (_d))
+ ASSERT(page_get_owner(_p) == (_d))
-int check_descriptor(unsigned long *d);
-
-/*
- * Use currently-executing domain's pagetables on the specified CPUs.
- * i.e., stop borrowing someone else's tables if you are the idle domain.
- */
-void synchronise_pagetables(unsigned long cpu_mask);
+int check_descriptor(struct desc_struct *d);
/*
* The MPT (machine->physical mapping table) is an array of word-sized
@@ -215,42 +242,52 @@ void synchronise_pagetables(unsigned long cpu_mask);
* contiguous (or near contiguous) physical memory.
*/
#undef machine_to_phys_mapping
-#ifdef __x86_64__
-extern unsigned long *machine_to_phys_mapping;
-#else
-/* Don't call virt_to_phys on this: it isn't direct mapped. Using
- m2p_start_mfn instead. */
-#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
-extern unsigned long m2p_start_mfn;
-#endif
+#define machine_to_phys_mapping ((u32 *)RDWR_MPT_VIRT_START)
+#define INVALID_M2P_ENTRY (~0U)
+#define VALID_M2P(_e) (!((_e) & (1U<<31)))
+#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
-#define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
+/*
+ * The phys_to_machine_mapping is the reversed mapping of MPT for full
+ * virtualization. It is only used by shadow_mode_translate()==true
+ * guests, so we steal the address space that would have normally
+ * been used by the read-only MPT map.
+ */
+#define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
+#define INVALID_MFN (~0UL)
+#define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
-#define DEFAULT_GDT_ENTRIES (LAST_RESERVED_GDT_ENTRY+1)
-#define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
+/* Returns the machine physical */
+static inline unsigned long phys_to_machine_mapping(unsigned long pfn)
+{
+ unsigned long mfn;
+ l1_pgentry_t pte;
+
+ if ( (__copy_from_user(&pte, &__phys_to_machine_mapping[pfn],
+ sizeof(pte)) == 0) &&
+ (l1e_get_flags(pte) & _PAGE_PRESENT) )
+ mfn = l1e_get_pfn(pte);
+ else
+ mfn = INVALID_MFN;
+
+ return mfn;
+}
+#define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
#ifdef MEMORY_GUARD
-void *memguard_init(void *heap_start);
+void memguard_init(void);
void memguard_guard_range(void *p, unsigned long l);
void memguard_unguard_range(void *p, unsigned long l);
-int memguard_is_guarded(void *p);
#else
-#define memguard_init(_s) (_s)
+#define memguard_init() ((void)0)
#define memguard_guard_range(_p,_l) ((void)0)
#define memguard_unguard_range(_p,_l) ((void)0)
-#define memguard_is_guarded(_p) (0)
#endif
-
-typedef struct {
- void (*enable)(struct domain *);
- void (*disable)(struct domain *);
-} vm_assist_info_t;
-extern vm_assist_info_t vm_assist_info[];
-
+void memguard_guard_stack(void *p);
/* Writable Pagetables */
-typedef struct {
+struct ptwr_info {
/* Linear address where the guest is updating the p.t. page. */
unsigned long l1va;
/* Copy of the p.t. page, taken before guest is given write access. */
@@ -259,13 +296,11 @@ typedef struct {
l1_pgentry_t *pl1e;
/* Index in L2 page table where this L1 p.t. is always hooked. */
unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
-} ptwr_ptinfo_t;
-
-typedef struct {
- ptwr_ptinfo_t ptinfo[2];
-} __cacheline_aligned ptwr_info_t;
-
-extern ptwr_info_t ptwr_info[];
+ /* Info about last ptwr update batch. */
+ unsigned int prev_nr_updates;
+ /* Exec domain which created writable mapping. */
+ struct vcpu *vcpu;
+};
#define PTWR_PT_ACTIVE 0
#define PTWR_PT_INACTIVE 1
@@ -273,35 +308,45 @@ extern ptwr_info_t ptwr_info[];
#define PTWR_CLEANUP_ACTIVE 1
#define PTWR_CLEANUP_INACTIVE 2
-void ptwr_flush(const int);
-int ptwr_do_page_fault(unsigned long);
-
-#define __cleanup_writable_pagetable(_what) \
-do { \
- int cpu = smp_processor_id(); \
- if ((_what) & PTWR_CLEANUP_ACTIVE) \
- if (ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) \
- ptwr_flush(PTWR_PT_ACTIVE); \
- if ((_what) & PTWR_CLEANUP_INACTIVE) \
- if (ptwr_info[cpu].ptinfo[PTWR_PT_INACTIVE].l1va) \
- ptwr_flush(PTWR_PT_INACTIVE); \
-} while ( 0 )
-
-#define cleanup_writable_pagetable(_d) \
- do { \
- if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
- __cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | \
- PTWR_CLEANUP_INACTIVE); \
- } while ( 0 )
+int ptwr_init(struct domain *);
+void ptwr_destroy(struct domain *);
+void ptwr_flush(struct domain *, const int);
+int ptwr_do_page_fault(struct domain *, unsigned long);
+int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
+
+void cleanup_writable_pagetable(struct domain *d);
+#define sync_pagetable_state(d) cleanup_writable_pagetable(d)
+
+int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
#ifndef NDEBUG
-void audit_domain(struct domain *d);
+
+#define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
+#define AUDIT_ERRORS_OK ( 1u << 1 )
+#define AUDIT_QUIET ( 1u << 2 )
+
+void _audit_domain(struct domain *d, int flags);
+#define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
void audit_domains(void);
+
#else
-#define audit_domain(_d) ((void)0)
-#define audit_domains() ((void)0)
+
+#define _audit_domain(_d, _f) ((void)0)
+#define audit_domain(_d) ((void)0)
+#define audit_domains() ((void)0)
+
#endif
+int new_guest_cr3(unsigned long pfn);
+
void propagate_page_fault(unsigned long addr, u16 error_code);
+/*
+ * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
+ * hold a reference to the page.
+ */
+int update_grant_va_mapping(unsigned long va,
+ l1_pgentry_t _nl1e,
+ struct domain *d,
+ struct vcpu *v);
#endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/asm-x86/mpspec.h b/xen/include/asm-x86/mpspec.h
index 1e73671c25..7add527173 100644
--- a/xen/include/asm-x86/mpspec.h
+++ b/xen/include/asm-x86/mpspec.h
@@ -1,242 +1,84 @@
#ifndef __ASM_MPSPEC_H
#define __ASM_MPSPEC_H
-#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/cpumask.h>
+#include <asm/mpspec_def.h>
+#include <mach_mpspec.h>
-/*
- * Structure definitions for SMP machines following the
- * Intel Multiprocessing Specification 1.1 and 1.4.
- */
-
-/*
- * This tag identifies where the SMP configuration
- * information is.
- */
-
-#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
-
-/*
- * a maximum of 16 APICs with the current APIC ID architecture.
- * xAPICs can have up to 256. SAPICs have 16 ID bits.
- */
-#ifdef CONFIG_X86_CLUSTERED_APIC
-#define MAX_APICS 256
-#else
-#define MAX_APICS 16
-#endif
-
-#define MAX_MPC_ENTRY 1024
-
-struct intel_mp_floating
-{
- char mpf_signature[4]; /* "_MP_" */
- unsigned int mpf_physptr; /* Configuration table address */
- unsigned char mpf_length; /* Our length (paragraphs) */
- unsigned char mpf_specification;/* Specification version */
- unsigned char mpf_checksum; /* Checksum (makes sum 0) */
- unsigned char mpf_feature1; /* Standard or configuration ? */
- unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
- unsigned char mpf_feature3; /* Unused (0) */
- unsigned char mpf_feature4; /* Unused (0) */
- unsigned char mpf_feature5; /* Unused (0) */
-};
-
-struct mp_config_table
-{
- char mpc_signature[4];
-#define MPC_SIGNATURE "PCMP"
- unsigned short mpc_length; /* Size of table */
- char mpc_spec; /* 0x01 */
- char mpc_checksum;
- char mpc_oem[8];
- char mpc_productid[12];
- unsigned int mpc_oemptr; /* 0 if not present */
- unsigned short mpc_oemsize; /* 0 if not present */
- unsigned short mpc_oemcount;
- unsigned int mpc_lapic; /* APIC address */
- unsigned int reserved;
-};
-
-/* Followed by entries */
-
-#define MP_PROCESSOR 0
-#define MP_BUS 1
-#define MP_IOAPIC 2
-#define MP_INTSRC 3
-#define MP_LINTSRC 4
-#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
-
-struct mpc_config_processor
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid; /* Local APIC number */
- unsigned char mpc_apicver; /* Its versions */
- unsigned char mpc_cpuflag;
-#define CPU_ENABLED 1 /* Processor is available */
-#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
- unsigned int mpc_cpufeature;
-#define CPU_STEPPING_MASK 0x0F
-#define CPU_MODEL_MASK 0xF0
-#define CPU_FAMILY_MASK 0xF00
- unsigned int mpc_featureflag; /* CPUID feature value */
- unsigned int mpc_reserved[2];
-};
-
-struct mpc_config_bus
-{
- unsigned char mpc_type;
- unsigned char mpc_busid;
- unsigned char mpc_bustype[6] __attribute((packed));
-};
-
-/* List of Bus Type string values, Intel MP Spec. */
-#define BUSTYPE_EISA "EISA"
-#define BUSTYPE_ISA "ISA"
-#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
-#define BUSTYPE_MCA "MCA"
-#define BUSTYPE_VL "VL" /* Local bus */
-#define BUSTYPE_PCI "PCI"
-#define BUSTYPE_PCMCIA "PCMCIA"
-#define BUSTYPE_CBUS "CBUS"
-#define BUSTYPE_CBUSII "CBUSII"
-#define BUSTYPE_FUTURE "FUTURE"
-#define BUSTYPE_MBI "MBI"
-#define BUSTYPE_MBII "MBII"
-#define BUSTYPE_MPI "MPI"
-#define BUSTYPE_MPSA "MPSA"
-#define BUSTYPE_NUBUS "NUBUS"
-#define BUSTYPE_TC "TC"
-#define BUSTYPE_VME "VME"
-#define BUSTYPE_XPRESS "XPRESS"
-
-struct mpc_config_ioapic
-{
- unsigned char mpc_type;
- unsigned char mpc_apicid;
- unsigned char mpc_apicver;
- unsigned char mpc_flags;
-#define MPC_APIC_USABLE 0x01
- unsigned int mpc_apicaddr;
-};
-
-struct mpc_config_intsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbus;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_dstapic;
- unsigned char mpc_dstirq;
-};
-
-enum mp_irq_source_types {
- mp_INT = 0,
- mp_NMI = 1,
- mp_SMI = 2,
- mp_ExtINT = 3
-};
-
-#define MP_IRQDIR_DEFAULT 0
-#define MP_IRQDIR_HIGH 1
-#define MP_IRQDIR_LOW 3
-
-
-struct mpc_config_lintsrc
-{
- unsigned char mpc_type;
- unsigned char mpc_irqtype;
- unsigned short mpc_irqflag;
- unsigned char mpc_srcbusid;
- unsigned char mpc_srcbusirq;
- unsigned char mpc_destapic;
-#define MP_APIC_ALL 0xFF
- unsigned char mpc_destapiclint;
-};
-
-struct mp_config_oemtable
-{
- char oem_signature[4];
-#define MPC_OEM_SIGNATURE "_OEM"
- unsigned short oem_length; /* Size of table */
- char oem_rev; /* 0x01 */
- char oem_checksum;
- char mpc_oem[8];
-};
-
-struct mpc_config_translation
-{
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-/*
- * Default configurations
- *
- * 1 2 CPU ISA 82489DX
- * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
- * 3 2 CPU EISA 82489DX
- * 4 2 CPU MCA 82489DX
- * 5 2 CPU ISA+PCI
- * 6 2 CPU EISA+PCI
- * 7 2 CPU MCA+PCI
- */
-
-#ifdef CONFIG_MULTIQUAD
-#define MAX_IRQ_SOURCES 512
-#else /* !CONFIG_MULTIQUAD */
-#define MAX_IRQ_SOURCES 256
-#endif /* CONFIG_MULTIQUAD */
-
-#define MAX_MP_BUSSES 32
-enum mp_bustype {
- MP_BUS_ISA = 1,
- MP_BUS_EISA,
- MP_BUS_PCI,
- MP_BUS_MCA
-};
-extern int *mp_bus_id_to_type;
-extern int *mp_bus_id_to_node;
-extern int *mp_bus_id_to_local;
-extern int *mp_bus_id_to_pci_bus;
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_bus_id_to_node [MAX_MP_BUSSES];
+extern int mp_bus_id_to_local [MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern unsigned int boot_cpu_physical_apicid;
-/*extern unsigned long phys_cpu_present_map;*/
extern int smp_found_config;
extern void find_smp_config (void);
extern void get_smp_config (void);
-/*extern int nr_ioapics;*/
+extern int nr_ioapics;
extern int apic_version [MAX_APICS];
-/*extern int mp_irq_entries;*/
-/*extern struct mpc_config_intsrc *mp_irqs;*/
-/*extern int mpc_default_type;*/
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern int mp_current_pci_id;
extern unsigned long mp_lapic_addr;
-/*extern int pic_mode;*/
+extern int pic_mode;
extern int using_apic_timer;
#ifdef CONFIG_ACPI_BOOT
extern void mp_register_lapic (u8 id, u8 enabled);
extern void mp_register_lapic_address (u64 address);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void mp_register_ioapic (u8 id, u32 address, u32 irq_base);
-extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 global_irq);
+extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
+extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
extern void mp_config_acpi_legacy_irqs (void);
-extern void mp_config_ioapic_for_sci(int irq);
-extern void mp_parse_prt (void);
-#else /*!CONFIG_X86_IO_APIC*/
-static inline void mp_config_ioapic_for_sci(int irq) { }
-#endif /*!CONFIG_X86_IO_APIC*/
-
+extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
#endif /*CONFIG_ACPI_BOOT*/
+#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
+
+struct physid_mask
+{
+ unsigned long mask[PHYSID_ARRAY_SIZE];
+};
+
+typedef struct physid_mask physid_mask_t;
+
+#define physid_set(physid, map) set_bit(physid, (map).mask)
+#define physid_clear(physid, map) clear_bit(physid, (map).mask)
+#define physid_isset(physid, map) test_bit(physid, (map).mask)
+#define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask)
+
+#define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+#define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+#define physids_clear(map) bitmap_zero((map).mask, MAX_APICS)
+#define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS)
+#define physids_empty(map) bitmap_empty((map).mask, MAX_APICS)
+#define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+#define physids_weight(map) bitmap_weight((map).mask, MAX_APICS)
+#define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+#define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+#define physids_coerce(map) ((map).mask[0])
+
+#define physids_promote(physids) \
+ ({ \
+ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
+ __physid_mask.mask[0] = physids; \
+ __physid_mask; \
+ })
+
+#define physid_mask_of_physid(physid) \
+ ({ \
+ physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
+ physid_set(physid, __physid_mask); \
+ __physid_mask; \
+ })
+
+#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
+#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
+
+extern physid_mask_t phys_cpu_present_map;
+
#endif
diff --git a/xen/include/asm-x86/mpspec_def.h b/xen/include/asm-x86/mpspec_def.h
new file mode 100644
index 0000000000..902c030f7f
--- /dev/null
+++ b/xen/include/asm-x86/mpspec_def.h
@@ -0,0 +1,188 @@
+#ifndef __ASM_MPSPEC_DEF_H
+#define __ASM_MPSPEC_DEF_H
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is.
+ */
+
+#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+#define MAX_MPC_ENTRY 1024
+#define MAX_APICS 256
+
+struct intel_mp_floating
+{
+ char mpf_signature[4]; /* "_MP_" */
+ unsigned int mpf_physptr; /* Configuration table address */
+ unsigned char mpf_length; /* Our length (paragraphs) */
+ unsigned char mpf_specification;/* Specification version */
+ unsigned char mpf_checksum; /* Checksum (makes sum 0) */
+ unsigned char mpf_feature1; /* Standard or configuration ? */
+ unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */
+ unsigned char mpf_feature3; /* Unused (0) */
+ unsigned char mpf_feature4; /* Unused (0) */
+ unsigned char mpf_feature5; /* Unused (0) */
+};
+
+struct mp_config_table
+{
+ char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+ unsigned short mpc_length; /* Size of table */
+ char mpc_spec; /* 0x01 */
+ char mpc_checksum;
+ char mpc_oem[8];
+ char mpc_productid[12];
+ unsigned int mpc_oemptr; /* 0 if not present */
+ unsigned short mpc_oemsize; /* 0 if not present */
+ unsigned short mpc_oemcount;
+ unsigned int mpc_lapic; /* APIC address */
+ unsigned int reserved;
+};
+
+/* Followed by entries */
+
+#define MP_PROCESSOR 0
+#define MP_BUS 1
+#define MP_IOAPIC 2
+#define MP_INTSRC 3
+#define MP_LINTSRC 4
+#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid; /* Local APIC number */
+ unsigned char mpc_apicver; /* Its versions */
+ unsigned char mpc_cpuflag;
+#define CPU_ENABLED 1 /* Processor is available */
+#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */
+ unsigned int mpc_cpufeature;
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK 0xF00
+ unsigned int mpc_featureflag; /* CPUID feature value */
+ unsigned int mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+ unsigned char mpc_type;
+ unsigned char mpc_busid;
+ unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA "EISA"
+#define BUSTYPE_ISA "ISA"
+#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
+#define BUSTYPE_MCA "MCA"
+#define BUSTYPE_VL "VL" /* Local bus */
+#define BUSTYPE_PCI "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI "MBI"
+#define BUSTYPE_MBII "MBII"
+#define BUSTYPE_MPI "MPI"
+#define BUSTYPE_MPSA "MPSA"
+#define BUSTYPE_NUBUS "NUBUS"
+#define BUSTYPE_TC "TC"
+#define BUSTYPE_VME "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+#define BUSTYPE_NEC98 "NEC98"
+
+struct mpc_config_ioapic
+{
+ unsigned char mpc_type;
+ unsigned char mpc_apicid;
+ unsigned char mpc_apicver;
+ unsigned char mpc_flags;
+#define MPC_APIC_USABLE 0x01
+ unsigned int mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbus;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_dstapic;
+ unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+ mp_INT = 0,
+ mp_NMI = 1,
+ mp_SMI = 2,
+ mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT 0
+#define MP_IRQDIR_HIGH 1
+#define MP_IRQDIR_LOW 3
+
+
+struct mpc_config_lintsrc
+{
+ unsigned char mpc_type;
+ unsigned char mpc_irqtype;
+ unsigned short mpc_irqflag;
+ unsigned char mpc_srcbusid;
+ unsigned char mpc_srcbusirq;
+ unsigned char mpc_destapic;
+#define MP_APIC_ALL 0xFF
+ unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+ char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+ unsigned short oem_length; /* Size of table */
+ char oem_rev; /* 0x01 */
+ char oem_checksum;
+ char mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+ unsigned char mpc_type;
+ unsigned char trans_len;
+ unsigned char trans_type;
+ unsigned char trans_quad;
+ unsigned char trans_global;
+ unsigned char trans_local;
+ unsigned short trans_reserved;
+};
+
+/*
+ * Default configurations
+ *
+ * 1 2 CPU ISA 82489DX
+ * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ * 3 2 CPU EISA 82489DX
+ * 4 2 CPU MCA 82489DX
+ * 5 2 CPU ISA+PCI
+ * 6 2 CPU EISA+PCI
+ * 7 2 CPU MCA+PCI
+ */
+
+enum mp_bustype {
+ MP_BUS_ISA = 1,
+ MP_BUS_EISA,
+ MP_BUS_PCI,
+ MP_BUS_MCA,
+ MP_BUS_NEC98
+};
+#endif
+
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index a412963fd9..35163029e5 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -1,12 +1,6 @@
#ifndef __ASM_MSR_H
#define __ASM_MSR_H
-/*
- * Access to machine-specific registers (available on 586 and better only)
- * Note: the rd* operations modify the parameters directly (without using
- * pointer indirection), this allows gcc to optimize better
- */
-
#define rdmsr(msr,val1,val2) \
__asm__ __volatile__("rdmsr" \
: "=a" (val1), "=d" (val2) \
@@ -24,6 +18,36 @@
: /* no outputs */ \
: "c" (msr), "a" (val1), "d" (val2))
+#define rdmsr_user(msr,val1,val2) ({\
+ int _rc; \
+ __asm__ __volatile__( \
+ "1: rdmsr\n2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%2\n; jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " "__FIXUP_ALIGN"\n" \
+ " "__FIXUP_WORD" 1b,3b\n" \
+ ".previous\n" \
+ : "=a" (val1), "=d" (val2), "=&r" (_rc) \
+ : "c" (msr), "2" (0)); \
+ _rc; })
+
+#define wrmsr_user(msr,val1,val2) ({\
+ int _rc; \
+ __asm__ __volatile__( \
+ "1: wrmsr\n2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%0\n; jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " "__FIXUP_ALIGN"\n" \
+ " "__FIXUP_WORD" 1b,3b\n" \
+ ".previous\n" \
+ : "=&r" (_rc) \
+ : "c" (msr), "a" (val1), "d" (val2), "0" (0)); \
+ _rc; })
+
#define rdtsc(low,high) \
__asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
@@ -55,6 +79,23 @@
#define MSR_IA32_PLATFORM_ID 0x17
#define MSR_IA32_EBL_CR_POWERON 0x2a
+#define MSR_IA32_APICBASE 0x1b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE 0x79
+#define MSR_IA32_UCODE_REV 0x8b
+
+#define MSR_P6_PERFCTR0 0xc1
+#define MSR_P6_PERFCTR1 0xc2
+
+/* MSRs & bits used for VMX enabling */
+#define MSR_IA32_VMX_BASIC_MSR 0x480
+#define IA32_FEATURE_CONTROL_MSR 0x3a
+#define IA32_FEATURE_CONTROL_MSR_LOCK 0x1
+#define IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON 0x4
+
/* AMD/K8 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
@@ -63,7 +104,7 @@
#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */
#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */
-#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */
+#define MSR_SHADOW_GS_BASE 0xc0000102 /* SwapGS GS shadow */
/* EFER bits: */
#define _EFER_SCE 0 /* SYSCALL/SYSRET */
#define _EFER_LME 8 /* Long mode enable */
@@ -78,25 +119,17 @@
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PLATFORM_ID 0x17
-#define MSR_IA32_PERFCTR0 0xc1
-#define MSR_IA32_PERFCTR1 0xc2
-
#define MSR_MTRRcap 0x0fe
#define MSR_IA32_BBL_CR_CTL 0x119
+#define MSR_IA32_SYSENTER_CS 0x174
+#define MSR_IA32_SYSENTER_ESP 0x175
+#define MSR_IA32_SYSENTER_EIP 0x176
+
#define MSR_IA32_MCG_CAP 0x179
#define MSR_IA32_MCG_STATUS 0x17a
#define MSR_IA32_MCG_CTL 0x17b
-#define MSR_IA32_EVNTSEL0 0x186
-#define MSR_IA32_EVNTSEL1 0x187
-
-#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
-
#define MSR_MTRRfix64K_00000 0x250
#define MSR_MTRRfix16K_80000 0x258
#define MSR_MTRRfix16K_A0000 0x259
@@ -115,13 +148,7 @@
#define MSR_IA32_MC0_ADDR 0x402
#define MSR_IA32_MC0_MISC 0x403
-#define MSR_IA32_APICBASE 0x1b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-
-#define MSR_IA32_UCODE_WRITE 0x79
-#define MSR_IA32_UCODE_REV 0x8b
+#define MSR_IA32_DS_AREA 0x600
#define MSR_IA32_BBL_CR_CTL 0x119
@@ -134,11 +161,22 @@
#define MSR_IA32_THERM_STATUS 0x19c
#define MSR_IA32_MISC_ENABLE 0x1a0
+#define MSR_IA32_MISC_ENABLE_PERF_AVAIL (1<<7)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1<<11)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
+
#define MSR_IA32_DEBUGCTLMSR 0x1d9
-#define MSR_IA32_LASTBRANCHFROMIP 0x1db
-#define MSR_IA32_LASTBRANCHTOIP 0x1dc
-#define MSR_IA32_LASTINTFROMIP 0x1dd
-#define MSR_IA32_LASTINTTOIP 0x1de
+#define MSR_IA32_DEBUGCTLMSR_LBR (1<<0)
+#define MSR_IA32_DEBUGCTLMSR_BTF (1<<1)
+#define MSR_IA32_DEBUGCTLMSR_TR (1<<2)
+#define MSR_IA32_DEBUGCTLMSR_BTS (1<<3)
+#define MSR_IA32_DEBUGCTLMSR_BTINT (1<<4)
+
+#define MSR_IA32_LASTBRANCH_TOS 0x1da
+#define MSR_IA32_LASTBRANCH_0 0x1db
+#define MSR_IA32_LASTBRANCH_1 0x1dc
+#define MSR_IA32_LASTBRANCH_2 0x1dd
+#define MSR_IA32_LASTBRANCH_3 0x1de
#define MSR_IA32_MC0_CTL 0x400
#define MSR_IA32_MC0_STATUS 0x401
@@ -150,6 +188,7 @@
#define MSR_P6_EVNTSEL0 0x186
#define MSR_P6_EVNTSEL1 0x187
+
/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
#define MSR_K7_EVNTSEL0 0xC0010000
#define MSR_K7_PERFCTR0 0xC0010004
@@ -195,6 +234,7 @@
/* VIA Cyrix defined MSRs*/
#define MSR_VIA_FCR 0x1107
#define MSR_VIA_LONGHAUL 0x110a
+#define MSR_VIA_RNG 0x110b
#define MSR_VIA_BCR2 0x1147
/* Transmeta defined MSRs */
diff --git a/xen/include/asm-x86/multicall.h b/xen/include/asm-x86/multicall.h
index d03ac9ffb1..23d7c8e5ba 100644
--- a/xen/include/asm-x86/multicall.h
+++ b/xen/include/asm-x86/multicall.h
@@ -9,7 +9,25 @@
#ifdef __x86_64__
-#define do_multicall_call(_call) BUG()
+#define do_multicall_call(_call) \
+ do { \
+ __asm__ __volatile__ ( \
+ "movq "STR(MULTICALL_op)"(%0),%%rax; " \
+ "andq $("STR(NR_hypercalls)"-1),%%rax; " \
+ "leaq "STR(hypercall_table)"(%%rip),%%rdi; "\
+ "leaq (%%rdi,%%rax,8),%%rax; " \
+ "movq "STR(MULTICALL_arg0)"(%0),%%rdi; " \
+ "movq "STR(MULTICALL_arg1)"(%0),%%rsi; " \
+ "movq "STR(MULTICALL_arg2)"(%0),%%rdx; " \
+ "movq "STR(MULTICALL_arg3)"(%0),%%rcx; " \
+ "movq "STR(MULTICALL_arg4)"(%0),%%r8; " \
+ "callq *(%%rax); " \
+ "movq %%rax,"STR(MULTICALL_result)"(%0); " \
+ : : "b" (_call) \
+ /* all the caller-saves registers */ \
+ : "rax", "rcx", "rdx", "rsi", "rdi", \
+ "r8", "r9", "r10", "r11" ); \
+ } while ( 0 )
#else
@@ -26,7 +44,9 @@
"call *hypercall_table(,%%eax,4); " \
"movl %%eax,"STR(MULTICALL_result)"(%0); "\
"addl $20,%%esp; " \
- : : "b" (_call) : "eax", "ecx", "edx" ); \
+ : : "b" (_call) \
+ /* all the caller-saves registers */ \
+ : "eax", "ecx", "edx" ); \
} while ( 0 )
#endif
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 103455846a..87a47f8667 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -1,184 +1,269 @@
-/******************************************************************************
- * asm-x86/page.h
- *
- * Definitions relating to page tables.
- */
#ifndef __X86_PAGE_H__
#define __X86_PAGE_H__
-#if defined(__x86_64__)
-
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 21
-#define L3_PAGETABLE_SHIFT 30
-#define L4_PAGETABLE_SHIFT 39
-
-#define ENTRIES_PER_L1_PAGETABLE 512
-#define ENTRIES_PER_L2_PAGETABLE 512
-#define ENTRIES_PER_L3_PAGETABLE 512
-#define ENTRIES_PER_L4_PAGETABLE 512
-
-#define __PAGE_OFFSET (0xFFFF830000000000)
-
-#elif defined(__i386__)
-
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-
-#define ENTRIES_PER_L1_PAGETABLE 1024
-#define ENTRIES_PER_L2_PAGETABLE 1024
-
-#define __PAGE_OFFSET (0xFC400000)
-
+#ifndef __ASSEMBLY__
+#define PAGE_SIZE (1UL << PAGE_SHIFT)
+#else
+#define PAGE_SIZE (1 << PAGE_SHIFT)
#endif
+#define PAGE_MASK (~(intpte_t)(PAGE_SIZE-1))
+#define PAGE_FLAG_MASK (~0U)
-#define PAGE_SHIFT L1_PAGETABLE_SHIFT
-#define PAGE_SIZE (1UL << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+# include <xen/lib.h>
+#endif
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
-#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+#if defined(__i386__)
+# include <asm/x86_32/page.h>
+#elif defined(__x86_64__)
+# include <asm/x86_64/page.h>
+#endif
+/* Get direct integer representation of a pte's contents (intpte_t). */
+#define l1e_get_intpte(x) ((x).l1)
+#define l2e_get_intpte(x) ((x).l2)
+#define l3e_get_intpte(x) ((x).l3)
+#define l4e_get_intpte(x) ((x).l4)
+
+/* Get pfn mapped by pte (unsigned long). */
+#define l1e_get_pfn(x) \
+ ((unsigned long)(((x).l1 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l2e_get_pfn(x) \
+ ((unsigned long)(((x).l2 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l3e_get_pfn(x) \
+ ((unsigned long)(((x).l3 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+#define l4e_get_pfn(x) \
+ ((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+
+/* Get physical address of page mapped by pte (physaddr_t). */
+#define l1e_get_paddr(x) \
+ ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr(x) \
+ ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+#define l3e_get_paddr(x) \
+ ((physaddr_t)(((x).l3 & (PADDR_MASK&PAGE_MASK))))
+#define l4e_get_paddr(x) \
+ ((physaddr_t)(((x).l4 & (PADDR_MASK&PAGE_MASK))))
+
+/* Get pointer to info structure of page mapped by pte (struct pfn_info *). */
+#define l1e_get_page(x) (pfn_to_page(l1e_get_pfn(x)))
+#define l2e_get_page(x) (pfn_to_page(l2e_get_pfn(x)))
+#define l3e_get_page(x) (pfn_to_page(l3e_get_pfn(x)))
+#define l4e_get_page(x) (pfn_to_page(l4e_get_pfn(x)))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags(x) (get_pte_flags((x).l1))
+#define l2e_get_flags(x) (get_pte_flags((x).l2))
+#define l3e_get_flags(x) (get_pte_flags((x).l3))
+#define l4e_get_flags(x) (get_pte_flags((x).l4))
+
+/* Construct an empty pte. */
+#define l1e_empty() ((l1_pgentry_t) { 0 })
+#define l2e_empty() ((l2_pgentry_t) { 0 })
+#define l3e_empty() ((l3_pgentry_t) { 0 })
+#define l4e_empty() ((l4_pgentry_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_from_pfn(pfn, flags) \
+ ((l1_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l2e_from_pfn(pfn, flags) \
+ ((l2_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l3e_from_pfn(pfn, flags) \
+ ((l3_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+#define l4e_from_pfn(pfn, flags) \
+ ((l4_pgentry_t) { ((intpte_t)(pfn) << PAGE_SHIFT) | put_pte_flags(flags) })
+
+/* Construct a pte from a physical address and access flags. */
#ifndef __ASSEMBLY__
-#include <xen/config.h>
-typedef struct { unsigned long l1_lo; } l1_pgentry_t;
-typedef struct { unsigned long l2_lo; } l2_pgentry_t;
-typedef struct { unsigned long l3_lo; } l3_pgentry_t;
-typedef struct { unsigned long l4_lo; } l4_pgentry_t;
-typedef struct { unsigned long pt_lo; } pagetable_t;
+static inline l1_pgentry_t l1e_from_paddr(physaddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l1_pgentry_t) { pa | put_pte_flags(flags) };
+}
+static inline l2_pgentry_t l2e_from_paddr(physaddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l2_pgentry_t) { pa | put_pte_flags(flags) };
+}
+#if CONFIG_PAGING_LEVELS >= 3
+static inline l3_pgentry_t l3e_from_paddr(physaddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l3_pgentry_t) { pa | put_pte_flags(flags) };
+}
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+static inline l4_pgentry_t l4e_from_paddr(physaddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l4_pgentry_t) { pa | put_pte_flags(flags) };
+}
+#endif
#endif /* !__ASSEMBLY__ */
-/* Strip type from a table entry. */
-#define l1_pgentry_val(_x) ((_x).l1_lo)
-#define l2_pgentry_val(_x) ((_x).l2_lo)
-#define l3_pgentry_val(_x) ((_x).l3_lo)
-#define l4_pgentry_val(_x) ((_x).l4_lo)
-#define pagetable_val(_x) ((_x).pt_lo)
-
-/* Add type to a table entry. */
-#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } )
-#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } )
-#define mk_l3_pgentry(_x) ( (l3_pgentry_t) { (_x) } )
-#define mk_l4_pgentry(_x) ( (l4_pgentry_t) { (_x) } )
-#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
-
-/* Turn a typed table entry into a page index. */
-#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT)
-#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
-#define l3_pgentry_to_pagenr(_x) (l3_pgentry_val(_x) >> PAGE_SHIFT)
-#define l4_pgentry_to_pagenr(_x) (l4_pgentry_val(_x) >> PAGE_SHIFT)
-
-/* Turn a typed table entry into a physical address. */
-#define l1_pgentry_to_phys(_x) (l1_pgentry_val(_x) & PAGE_MASK)
-#define l2_pgentry_to_phys(_x) (l2_pgentry_val(_x) & PAGE_MASK)
-#define l3_pgentry_to_phys(_x) (l3_pgentry_val(_x) & PAGE_MASK)
-#define l4_pgentry_to_phys(_x) (l4_pgentry_val(_x) & PAGE_MASK)
+/* Construct a pte from its direct integer representation. */
+#define l1e_from_intpte(intpte) ((l1_pgentry_t) { (intpte_t)(intpte) })
+#define l2e_from_intpte(intpte) ((l2_pgentry_t) { (intpte_t)(intpte) })
+#define l3e_from_intpte(intpte) ((l3_pgentry_t) { (intpte_t)(intpte) })
+#define l4e_from_intpte(intpte) ((l4_pgentry_t) { (intpte_t)(intpte) })
+
+/* Construct a pte from a page pointer and access flags. */
+#define l1e_from_page(page, flags) (l1e_from_pfn(page_to_pfn(page),(flags)))
+#define l2e_from_page(page, flags) (l2e_from_pfn(page_to_pfn(page),(flags)))
+#define l3e_from_page(page, flags) (l3e_from_pfn(page_to_pfn(page),(flags)))
+#define l4e_from_page(page, flags) (l4e_from_pfn(page_to_pfn(page),(flags)))
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags(x, flags) ((x).l1 |= put_pte_flags(flags))
+#define l2e_add_flags(x, flags) ((x).l2 |= put_pte_flags(flags))
+#define l3e_add_flags(x, flags) ((x).l3 |= put_pte_flags(flags))
+#define l4e_add_flags(x, flags) ((x).l4 |= put_pte_flags(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags(x, flags) ((x).l1 &= ~put_pte_flags(flags))
+#define l2e_remove_flags(x, flags) ((x).l2 &= ~put_pte_flags(flags))
+#define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags))
+#define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed(x,y,flags) \
+ ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l2e_has_changed(x,y,flags) \
+ ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l3e_has_changed(x,y,flags) \
+ ( !!(((x).l3 ^ (y).l3) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+#define l4e_has_changed(x,y,flags) \
+ ( !!(((x).l4 ^ (y).l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
/* Pagetable walking. */
-#define l2_pgentry_to_l1(_x) \
- ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
-#define l3_pgentry_to_l2(_x) \
- ((l2_pgentry_t *)__va(l3_pgentry_val(_x) & PAGE_MASK))
-#define l4_pgentry_to_l3(_x) \
- ((l3_pgentry_t *)__va(l4_pgentry_val(_x) & PAGE_MASK))
+#define l2e_to_l1e(x) ((l1_pgentry_t *)__va(l2e_get_paddr(x)))
+#define l3e_to_l2e(x) ((l2_pgentry_t *)__va(l3e_get_paddr(x)))
+#define l4e_to_l3e(x) ((l3_pgentry_t *)__va(l4e_get_paddr(x)))
/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset(_a) \
- (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
-#if defined(__i386__)
-#define l2_table_offset(_a) \
- ((_a) >> L2_PAGETABLE_SHIFT)
-#elif defined(__x86_64__)
-#define l2_table_offset(_a) \
- (((_a) >> L2_PAGETABLE_SHIFT) & (ENTRIES_PER_L2_PAGETABLE -1))
-#define l3_table_offset(_a) \
- (((_a) >> L3_PAGETABLE_SHIFT) & (ENTRIES_PER_L3_PAGETABLE -1))
-#define l4_table_offset(_a) \
- ((_a) >> L4_PAGETABLE_SHIFT)
+#define l1_table_offset(a) \
+ (((a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
+#define l2_table_offset(a) \
+ (((a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(a) \
+ (((a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1))
+#define l4_table_offset(a) \
+ (((a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1))
+
+/* Convert a pointer to a page-table entry into pagetable slot index. */
+#define pgentry_ptr_to_slot(_p) \
+ (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
+
+/* Page-table type. */
+#ifndef __ASSEMBLY__
+#if CONFIG_PAGING_LEVELS == 2
+/* x86_32 default */
+typedef struct { u32 pfn; } pagetable_t;
+#elif CONFIG_PAGING_LEVELS == 3
+/* x86_32 PAE */
+typedef struct { u32 pfn; } pagetable_t;
+#elif CONFIG_PAGING_LEVELS == 4
+/* x86_64 */
+typedef struct { u64 pfn; } pagetable_t;
+#endif
+#define pagetable_get_paddr(x) ((physaddr_t)(x).pfn << PAGE_SHIFT)
+#define pagetable_get_pfn(x) ((x).pfn)
+#define mk_pagetable(pa) \
+ ({ pagetable_t __p; __p.pfn = (pa) >> PAGE_SHIFT; __p; })
#endif
-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
-#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
-#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT))
-#define pfn_to_page(_pfn) (frame_table + (_pfn))
-#define phys_to_page(kaddr) (frame_table + ((kaddr) >> PAGE_SHIFT))
-#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
-#define pfn_valid(_pfn) ((_pfn) < max_page)
+#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define pfn_to_page(_pfn) (frame_table + (_pfn))
+#define phys_to_page(kaddr) (frame_table + ((kaddr) >> PAGE_SHIFT))
+#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define pfn_valid(_pfn) ((_pfn) < max_page)
/* High table entries are reserved by the hypervisor. */
-#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
- (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
-
-#ifndef __ASSEMBLY__
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/bitops.h>
-#include <asm/flushtlb.h>
-
-#define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
-#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
+ (L2_PAGETABLE_ENTRIES - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+#else
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE 0
+#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE 0
+#endif
-#define va_to_l1mfn(_va) (l2_pgentry_val(linear_l2_table[_va>>L2_PAGETABLE_SHIFT]) >> PAGE_SHIFT)
+#define linear_l1_table \
+ ((l1_pgentry_t *)(LINEAR_PT_VIRT_START))
+#define __linear_l2_table \
+ ((l2_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0))))
+#define __linear_l3_table \
+ ((l3_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1))))
+#define __linear_l4_table \
+ ((l4_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<0)) + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<1)) + \
+ (LINEAR_PT_VIRT_START >> (PAGETABLE_ORDER<<2))))
+
+#define linear_pg_table linear_l1_table
+#define linear_l2_table(_ed) ((_ed)->arch.guest_vtable)
+#define linear_l3_table(_ed) ((_ed)->arch.guest_vl3table)
+#define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table)
+
+#define va_to_l1mfn(_ed, _va) \
+ (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
-extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
+#ifndef __ASSEMBLY__
+#if CONFIG_PAGING_LEVELS == 3
+extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l3_pgentry_t idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
+#else
+extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#endif
extern void paging_init(void);
-
-/* Flush global pages as well. */
+#endif
#define __pge_off() \
- do { \
- __asm__ __volatile__( \
- "mov %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \
- } while (0)
+ do { \
+ __asm__ __volatile__( \
+ "mov %0, %%cr4; # turn off PGE " \
+ : : "r" (mmu_cr4_features & ~X86_CR4_PGE) ); \
+ } while ( 0 )
#define __pge_on() \
- do { \
- __asm__ __volatile__( \
- "mov %0, %%cr4; # turn off PGE " \
- :: "r" (mmu_cr4_features)); \
- } while (0)
-
-
-#define __flush_tlb_pge() \
- do { \
- __pge_off(); \
- __flush_tlb(); \
- __pge_on(); \
- } while (0)
-
-#define __flush_tlb_one(__addr) \
-__asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
-
-#endif /* !__ASSEMBLY__ */
-
-
-#define _PAGE_PRESENT 0x001
-#define _PAGE_RW 0x002
-#define _PAGE_USER 0x004
-#define _PAGE_PWT 0x008
-#define _PAGE_PCD 0x010
-#define _PAGE_ACCESSED 0x020
-#define _PAGE_DIRTY 0x040
-#define _PAGE_PAT 0x080
-#define _PAGE_PSE 0x080
-#define _PAGE_GLOBAL 0x100
+ do { \
+ __asm__ __volatile__( \
+ "mov %0, %%cr4; # turn off PGE " \
+ : : "r" (mmu_cr4_features) ); \
+ } while ( 0 )
+
+#define _PAGE_PRESENT 0x001U
+#define _PAGE_RW 0x002U
+#define _PAGE_USER 0x004U
+#define _PAGE_PWT 0x008U
+#define _PAGE_PCD 0x010U
+#define _PAGE_ACCESSED 0x020U
+#define _PAGE_DIRTY 0x040U
+#define _PAGE_PAT 0x080U
+#define _PAGE_PSE 0x080U
+#define _PAGE_GLOBAL 0x100U
+#define _PAGE_AVAIL 0xE00U
#define __PAGE_HYPERVISOR \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
#define __PAGE_HYPERVISOR_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
-
-#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
-
-#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
-#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
#ifndef __ASSEMBLY__
+
static __inline__ int get_order(unsigned long size)
{
int order;
@@ -192,7 +277,30 @@ static __inline__ int get_order(unsigned long size)
return order;
}
-extern void zap_low_mappings(void);
-#endif
+/* Allocator functions for Xen pagetables. */
+struct pfn_info *alloc_xen_pagetable(void);
+void free_xen_pagetable(struct pfn_info *pg);
+l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
+
+/* Map physical page range in Xen virtual address space. */
+#define MAP_SMALL_PAGES (1UL<<16) /* don't use superpages for the mapping */
+int
+map_pages_to_xen(
+ unsigned long virt,
+ unsigned long pfn,
+ unsigned long nr_pfns,
+ unsigned long flags);
+
+#endif /* !__ASSEMBLY__ */
#endif /* __I386_PAGE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/pci.h b/xen/include/asm-x86/pci.h
deleted file mode 100644
index c5f72c0fae..0000000000
--- a/xen/include/asm-x86/pci.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __X86_PCI_H
-#define __X86_PCI_H
-
-#include <xen/config.h>
-
-/* Can be used to override the logic in pci_scan_bus for skipping
- already-configured bus numbers - to be used for buggy BIOSes
- or architectures with incomplete PCI setup by the loader */
-
-#ifdef CONFIG_PCI
-extern unsigned int pcibios_assign_all_busses(void);
-#else
-#define pcibios_assign_all_busses() 0
-#endif
-#define pcibios_scan_all_fns(a,b) 0
-
-extern unsigned long pci_mem_start;
-#define PCIBIOS_MIN_IO 0x1000
-#define PCIBIOS_MIN_MEM (pci_mem_start)
-
-void pcibios_config_init(void);
-struct pci_bus * pcibios_scan_root(int bus);
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
-
-void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq);
-struct irq_routing_table *pcibios_get_irq_routing_table(void);
-int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
-
-#include <xen/types.h>
-#include <xen/slab.h>
-#include <asm/io.h>
-
-#endif /* __X86_PCI_H */
diff --git a/xen/include/asm-x86/pdb.h b/xen/include/asm-x86/pdb.h
deleted file mode 100644
index 79b0c4183b..0000000000
--- a/xen/include/asm-x86/pdb.h
+++ /dev/null
@@ -1,89 +0,0 @@
-
-/*
- * pervasive debugger
- * www.cl.cam.ac.uk/netos/pdb
- *
- * alex ho
- * 2004
- * university of cambridge computer laboratory
- */
-
-
-#ifndef __PDB_H__
-#define __PDB_H__
-
-#include <asm/regs.h>
-#include <xen/list.h>
-#include <public/dom0_ops.h>
-#include <public/xen.h> /* for domain id */
-
-extern int pdb_initialized;
-extern int pdb_com_port;
-extern int pdb_high_bit;
-extern int pdb_page_fault_possible;
-extern int pdb_page_fault_scratch;
-extern int pdb_page_fault;
-
-extern void initialize_pdb(void);
-
-/* Get/set values from generic debug interface. */
-extern int pdb_set_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-extern int pdb_get_values(u_char *buffer, int length,
- unsigned long cr3, unsigned long addr);
-
-/* External entry points. */
-extern int pdb_handle_exception(int exceptionVector,
- struct xen_regs *xen_regs);
-extern void pdb_do_debug(dom0_op_t *op);
-
-/* PDB Context. */
-struct pdb_context
-{
- int valid;
- int domain;
- int process;
- int system_call; /* 0x01 break on enter, 0x02 break on exit */
- unsigned long ptbr;
-};
-extern struct pdb_context pdb_ctx;
-
-/* Breakpoints. */
-struct pdb_breakpoint
-{
- struct list_head list;
- unsigned long address;
- unsigned long cr3;
- domid_t domain;
-};
-extern void pdb_bkpt_add (unsigned long cr3, unsigned long address);
-extern struct pdb_breakpoint* pdb_bkpt_search (unsigned long cr3,
- unsigned long address);
-extern int pdb_bkpt_remove (unsigned long cr3, unsigned long address);
-
-/* Conversions. */
-extern int hex (char);
-extern char *mem2hex (char *, char *, int);
-extern char *hex2mem (char *, char *, int);
-extern int hexToInt (char **ptr, int *intValue);
-
-/* Temporary Linux specific definitions */
-extern int pdb_system_call;
-extern unsigned char pdb_system_call_enter_instr; /* original enter instr */
-extern unsigned char pdb_system_call_leave_instr; /* original next instr */
-extern unsigned long pdb_system_call_next_addr; /* instr after int 0x80 */
-extern unsigned long pdb_system_call_eflags_addr; /* saved eflags on stack */
-
-unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
-void pdb_linux_get_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-void pdb_linux_set_values(char *buffer, int length, unsigned long address,
- int pid, unsigned long cr3);
-void pdb_linux_syscall_enter_bkpt (struct xen_regs *regs, long error_code,
- trap_info_t *ti);
-void pdb_linux_syscall_exit_bkpt (struct xen_regs *regs,
- struct pdb_context *pdb_ctx);
-
-void pdb_handle_debug_trap(struct xen_regs *regs, long error_code);
-
-#endif /* __PDB_H__ */
diff --git a/xen/include/asm-x86/physdev.h b/xen/include/asm-x86/physdev.h
new file mode 100644
index 0000000000..0b004d4958
--- /dev/null
+++ b/xen/include/asm-x86/physdev.h
@@ -0,0 +1,17 @@
+/******************************************************************************
+ * physdev.h
+ */
+
+#ifndef __XEN_PHYSDEV_H__
+#define __XEN_PHYSDEV_H__
+
+#include <public/physdev.h>
+
+void physdev_modify_ioport_access_range(
+ struct domain *d, int enable, int port, int num );
+void physdev_destroy_state(struct domain *d);
+int domain_iomem_in_pfn(struct domain *p, unsigned long pfn);
+long do_physdev_op(physdev_op_t *uop);
+void physdev_init_dom0(struct domain *d);
+
+#endif /* __XEN_PHYSDEV_H__ */
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 00b3259f98..bec90dbab0 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -1,22 +1,17 @@
-/*
- * include/asm-x86/processor.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
+
+/* Portions are: Copyright (c) 1994 Linus Torvalds */
#ifndef __ASM_X86_PROCESSOR_H
#define __ASM_X86_PROCESSOR_H
#ifndef __ASSEMBLY__
-#include <asm/page.h>
+#include <xen/config.h>
+#include <xen/cache.h>
+#include <xen/types.h>
+#include <public/xen.h>
#include <asm/types.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
-#include <asm/flushtlb.h>
-#include <asm/pdb.h>
-#include <xen/config.h>
-#include <xen/spinlock.h>
-#include <public/xen.h>
#endif
/*
@@ -31,8 +26,7 @@
#define X86_VENDOR_RISE 6
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
-#define X86_VENDOR_SIS 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_NUM 9
#define X86_VENDOR_UNKNOWN 0xff
/*
@@ -84,31 +78,36 @@
#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+#define X86_CR4_VMXE 0x2000 /* enable VMX */
/*
* Trap/fault mnemonics.
*/
-#define TRAP_divide_error 0
-#define TRAP_debug 1
-#define TRAP_nmi 2
-#define TRAP_int3 3
-#define TRAP_overflow 4
-#define TRAP_bounds 5
-#define TRAP_invalid_op 6
-#define TRAP_no_device 7
-#define TRAP_double_fault 8
-#define TRAP_copro_seg 9
-#define TRAP_invalid_tss 10
-#define TRAP_no_segment 11
-#define TRAP_stack_error 12
-#define TRAP_gp_fault 13
-#define TRAP_page_fault 14
-#define TRAP_spurious_int 15
-#define TRAP_copro_error 16
-#define TRAP_alignment_check 17
-#define TRAP_machine_check 18
-#define TRAP_simd_error 19
-#define TRAP_deferred_nmi 31
+#define TRAP_divide_error 0
+#define TRAP_debug 1
+#define TRAP_nmi 2
+#define TRAP_int3 3
+#define TRAP_overflow 4
+#define TRAP_bounds 5
+#define TRAP_invalid_op 6
+#define TRAP_no_device 7
+#define TRAP_double_fault 8
+#define TRAP_copro_seg 9
+#define TRAP_invalid_tss 10
+#define TRAP_no_segment 11
+#define TRAP_stack_error 12
+#define TRAP_gp_fault 13
+#define TRAP_page_fault 14
+#define TRAP_spurious_int 15
+#define TRAP_copro_error 16
+#define TRAP_alignment_check 17
+#define TRAP_machine_check 18
+#define TRAP_simd_error 19
+#define TRAP_deferred_nmi 31
+
+/* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
+/* NB. Same as VGCF_IN_SYSCALL. No bits in common with any other TRAP_ defn. */
+#define TRAP_syscall 256
/*
* Non-fatal fault/trap handlers return an error code to the caller. If the
@@ -119,23 +118,21 @@
#define EXCRET_not_a_fault 1 /* It was a trap. No instruction replay needed. */
#define EXCRET_fault_fixed 1 /* It was fault that we fixed: try a replay. */
-/*
- * 'trap_bounce' flags values.
- */
+/* 'trap_bounce' flags values */
#define TBF_EXCEPTION 1
#define TBF_EXCEPTION_ERRCODE 2
#define TBF_EXCEPTION_CR2 4
#define TBF_INTERRUPT 8
#define TBF_FAILSAFE 16
-/*
- * thread.flags values.
- */
-#define TF_failsafe_return 1
+/* 'arch_vcpu' flags values */
+#define _TF_kernel_mode 0
+#define TF_kernel_mode (1<<_TF_kernel_mode)
#ifndef __ASSEMBLY__
struct domain;
+struct vcpu;
/*
* Default implementation of macro that returns current
@@ -148,31 +145,32 @@ struct domain;
({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
#endif
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
- * before touching them. [mj]
- */
-
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- __u32 x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- int x86_cache_size; /* in KB - for CPUS that support this call */
- int x86_clflush_size;
- int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined */
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ char wp_works_ok; /* It doesn't on 386's */
+ char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
+ char hard_math;
+ char rfu;
+ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
+ unsigned int x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB - valid for CPUS which support this call */
+ int x86_cache_alignment; /* In bytes */
+ int fdiv_bug;
+ int f00f_bug;
+ int coma_bug;
+ unsigned char x86_num_cores;
+} __cacheline_aligned;
/*
* capabilities of CPUs
*/
extern struct cpuinfo_x86 boot_cpu_data;
-extern struct tss_struct init_tss[NR_CPUS];
#ifdef CONFIG_SMP
extern struct cpuinfo_x86 cpu_data[];
@@ -182,24 +180,31 @@ extern struct cpuinfo_x86 cpu_data[];
#define current_cpu_data boot_cpu_data
#endif
-extern char ignore_irq13;
+extern int phys_proc_id[NR_CPUS];
extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void dodgy_tsc(void);
+#ifdef CONFIG_X86_HT
+extern void detect_ht(struct cpuinfo_x86 *c);
+#else
+static inline void detect_ht(struct cpuinfo_x86 *c) {}
+#endif
+
/*
* Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
*/
-static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op));
-}
+#define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+ __asm__("cpuid" \
+ : "=a" (*(int *)(_eax)), \
+ "=b" (*(int *)(_ebx)), \
+ "=c" (*(int *)(_ecx)), \
+ "=d" (*(int *)(_edx)) \
+ : "0" (_op), "2" (0))
/*
* CPUID functions returning a single datum
@@ -249,24 +254,24 @@ static inline unsigned int cpuid_edx(unsigned int op)
#define read_cr0() ({ \
unsigned long __dummy; \
__asm__( \
- "mov"__OS" %%cr0,%0\n\t" \
+ "mov %%cr0,%0\n\t" \
:"=r" (__dummy)); \
__dummy; \
})
#define write_cr0(x) \
- __asm__("mov"__OS" %0,%%cr0": :"r" ((unsigned long)x));
+ __asm__("mov %0,%%cr0": :"r" ((unsigned long)x));
#define read_cr4() ({ \
unsigned long __dummy; \
__asm__( \
- "mov"__OS" %%cr4,%0\n\t" \
+ "mov %%cr4,%0\n\t" \
:"=r" (__dummy)); \
__dummy; \
})
#define write_cr4(x) \
- __asm__("mov"__OS" %0,%%cr4": :"r" ((unsigned long)x));
+ __asm__("mov %0,%%cr4": :"r" ((unsigned long)x));
/*
* Save the cr4 feature set we're using (ie
@@ -278,22 +283,24 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4 (unsigned long mask)
{
+ unsigned long dummy;
mmu_cr4_features |= mask;
- __asm__("mov"__OS" %%cr4,%%"__OP"ax\n\t"
- "or"__OS" %0,%%"__OP"ax\n\t"
- "mov"__OS" %%"__OP"ax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
+ __asm__ __volatile__ (
+ "mov %%cr4,%0\n\t"
+ "or %1,%0\n\t"
+ "mov %0,%%cr4\n"
+ : "=&r" (dummy) : "irg" (mask) );
}
static inline void clear_in_cr4 (unsigned long mask)
{
+ unsigned long dummy;
mmu_cr4_features &= ~mask;
- __asm__("mov"__OS" %%cr4,%%"__OP"ax\n\t"
- "and"__OS" %0,%%"__OP"ax\n\t"
- "mov"__OS" %%"__OP"ax,%%cr4\n"
- : : "irg" (~mask)
- :"ax");
+ __asm__ __volatile__ (
+ "mov %%cr4,%0\n\t"
+ "and %1,%0\n\t"
+ "mov %0,%%cr4\n"
+ : "=&r" (dummy) : "irg" (~mask) );
}
/*
@@ -327,16 +334,26 @@ static inline void clear_in_cr4 (unsigned long mask)
outb((data), 0x23); \
} while (0)
+static inline void __monitor(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitor %eax,%ecx,%edx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc8;"
+ : :"a" (eax), "c" (ecx), "d"(edx));
+}
+
+static inline void __mwait(unsigned long eax, unsigned long ecx)
+{
+ /* "mwait %eax,%ecx;" */
+ asm volatile(
+ ".byte 0x0f,0x01,0xc9;"
+ : :"a" (eax), "c" (ecx));
+}
+
#define IOBMP_BYTES 8192
-#define IOBMP_BYTES_PER_SELBIT (IOBMP_BYTES / 64)
-#define IOBMP_BITS_PER_SELBIT (IOBMP_BYTES_PER_SELBIT * 8)
-#define IOBMP_OFFSET offsetof(struct tss_struct, io_bitmap)
#define IOBMP_INVALID_OFFSET 0x8000
-struct i387_state {
- u8 state[512]; /* big enough for FXSAVE */
-} __attribute__ ((aligned (16)));
-
struct tss_struct {
unsigned short back_link,__blh;
#ifdef __x86_64__
@@ -372,166 +389,40 @@ struct tss_struct {
u16 trace;
#endif
u16 bitmap;
- u8 io_bitmap[IOBMP_BYTES+1];
- /* Pads the TSS to be cacheline-aligned (total size is 0x2080). */
- u8 __cacheline_filler[23];
-};
-
-struct trap_bounce {
- unsigned long error_code;
- unsigned long cr2;
- unsigned short flags; /* TBF_ */
- unsigned short cs;
- unsigned long eip;
-};
-
-struct thread_struct {
- unsigned long guestos_sp;
- unsigned long guestos_ss;
-
- unsigned long flags; /* TF_ */
-
- /* Hardware debugging registers */
- unsigned long debugreg[8]; /* %%db0-7 debug registers */
-
- /* floating point info */
- struct i387_state i387;
-
- /* general user-visible register state */
- execution_context_t user_ctxt;
-
- void (*schedule_tail) (struct domain *);
-
- /*
- * Return vectors pushed to us by guest OS.
- * The stack frame for events is exactly that of an x86 hardware interrupt.
- * The stack frame for a failsafe callback is augmented with saved values
- * for segment registers %ds, %es, %fs and %gs:
- * %ds, %es, %fs, %gs, %eip, %cs, %eflags [, %oldesp, %oldss]
- */
- unsigned long event_selector; /* 08: entry CS */
- unsigned long event_address; /* 12: entry EIP */
-
- unsigned long failsafe_selector; /* 16: entry CS */
- unsigned long failsafe_address; /* 20: entry EIP */
-
- /* Bounce information for propagating an exception to guest OS. */
- struct trap_bounce trap_bounce;
-
- /* I/O-port access bitmap. */
- u64 io_bitmap_sel; /* Selector to tell us which part of the IO bitmap are
- * "interesting" (i.e. have clear bits) */
- u8 *io_bitmap; /* Pointer to task's IO bitmap or NULL */
-
- /* Trap info. */
-#ifdef __i386__
- int fast_trap_idx;
- struct desc_struct fast_trap_desc;
-#endif
- trap_info_t traps[256];
-};
+ /* Pads the TSS to be cacheline-aligned (total size is 0x80). */
+ u8 __cacheline_filler[24];
+} __cacheline_aligned __attribute__((packed));
#define IDT_ENTRIES 256
-extern struct desc_struct idt_table[];
-extern struct desc_struct *idt_tables[];
+extern idt_entry_t idt_table[];
+extern idt_entry_t *idt_tables[];
-#if defined(__i386__)
+extern struct tss_struct init_tss[NR_CPUS];
-#define SET_DEFAULT_FAST_TRAP(_p) \
- (_p)->fast_trap_idx = 0x20; \
- (_p)->fast_trap_desc.a = 0; \
- (_p)->fast_trap_desc.b = 0;
+#ifdef CONFIG_X86_32
-#define CLEAR_FAST_TRAP(_p) \
- (memset(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- 0, 8))
+extern void init_int80_direct_trap(struct vcpu *v);
+#define set_int80_direct_trap(_ed) \
+ (memcpy(idt_tables[(_ed)->processor] + 0x80, \
+ &((_ed)->arch.int80_desc), 8))
-#ifdef XEN_DEBUGGER
-#define SET_FAST_TRAP(_p) \
- (pdb_initialized ? (void *) 0 : \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- &((_p)->fast_trap_desc), 8)))
#else
-#define SET_FAST_TRAP(_p) \
- (memcpy(idt_tables[smp_processor_id()] + (_p)->fast_trap_idx, \
- &((_p)->fast_trap_desc), 8))
-#endif
-long set_fast_trap(struct domain *p, int idx);
+#define init_int80_direct_trap(_ed) ((void)0)
+#define set_int80_direct_trap(_ed) ((void)0)
#endif
-#define INIT_THREAD { 0 }
-
-extern int gpf_emulate_4gb(struct xen_regs *regs);
-
-struct mm_struct {
- /*
- * Every domain has a L1 pagetable of its own. Per-domain mappings
- * are put in this table (eg. the current GDT is mapped here).
- */
- l1_pgentry_t *perdomain_pt;
- pagetable_t pagetable;
-
- /* shadow mode status and controls */
- unsigned int shadow_mode; /* flags to control shadow table operation */
- pagetable_t shadow_table;
- spinlock_t shadow_lock;
- unsigned int shadow_max_page_count; // currently unused
-
- /* shadow hashtable */
- struct shadow_status *shadow_ht;
- struct shadow_status *shadow_ht_free;
- struct shadow_status *shadow_ht_extras; /* extra allocation units */
- unsigned int shadow_extras_count;
-
- /* shadow dirty bitmap */
- unsigned long *shadow_dirty_bitmap;
- unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */
-
- /* shadow mode stats */
- unsigned int shadow_page_count;
- unsigned int shadow_fault_count;
- unsigned int shadow_dirty_count;
- unsigned int shadow_dirty_net_count;
- unsigned int shadow_dirty_block_count;
-
- /* Current LDT details. */
- unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
- /* Next entry is passed to LGDT on domain switch. */
- char gdt[10]; /* NB. 10 bytes needed for x86_64. Use 6 bytes for x86_32. */
-};
-
-static inline void write_ptbase(struct mm_struct *mm)
-{
- unsigned long pa;
-
- if ( unlikely(mm->shadow_mode) )
- pa = pagetable_val(mm->shadow_table);
- else
- pa = pagetable_val(mm->pagetable);
+extern int gpf_emulate_4gb(struct cpu_user_regs *regs);
- write_cr3(pa);
-}
-
-#define IDLE0_MM \
-{ \
- perdomain_pt: 0, \
- pagetable: mk_pagetable(__pa(idle_pg_table)) \
-}
-
-/* Convenient accessor for mm.gdt. */
-#define SET_GDT_ENTRIES(_p, _e) ((*(u16 *)((_p)->mm.gdt + 0)) = (((_e)<<3)-1))
-#define SET_GDT_ADDRESS(_p, _a) ((*(unsigned long *)((_p)->mm.gdt + 2)) = (_a))
-#define GET_GDT_ENTRIES(_p) (((*(u16 *)((_p)->mm.gdt + 0))+1)>>3)
-#define GET_GDT_ADDRESS(_p) (*(unsigned long *)((_p)->mm.gdt + 2))
+extern void write_ptbase(struct vcpu *v);
-void destroy_gdt(struct domain *d);
-long set_gdt(struct domain *d,
+void destroy_gdt(struct vcpu *d);
+long set_gdt(struct vcpu *d,
unsigned long *frames,
unsigned int entries);
-long set_debugreg(struct domain *p, int reg, unsigned long value);
+long set_debugreg(struct vcpu *p, int reg, unsigned long value);
struct microcode_header {
unsigned int hdrver;
@@ -607,9 +498,20 @@ extern inline void prefetchw(const void *x)
void show_guest_stack();
void show_trace(unsigned long *esp);
void show_stack(unsigned long *esp);
-void show_registers(struct xen_regs *regs);
-asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs);
+void show_registers(struct cpu_user_regs *regs);
+void show_page_walk(unsigned long addr);
+asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/regs.h b/xen/include/asm-x86/regs.h
index 1f4ccdddf7..5d99125d6c 100644
--- a/xen/include/asm-x86/regs.h
+++ b/xen/include/asm-x86/regs.h
@@ -1,6 +1,42 @@
+#ifndef __X86_REGS_H__
+#define __X86_REGS_H__
+
#ifdef __x86_64__
#include <asm/x86_64/regs.h>
#else
#include <asm/x86_32/regs.h>
#endif
+
+enum EFLAGS {
+ EF_CF = 0x00000001,
+ EF_PF = 0x00000004,
+ EF_AF = 0x00000010,
+ EF_ZF = 0x00000040,
+ EF_SF = 0x00000080,
+ EF_TF = 0x00000100,
+ EF_IE = 0x00000200,
+ EF_DF = 0x00000400,
+ EF_OF = 0x00000800,
+ EF_IOPL = 0x00003000,
+ EF_IOPL_RING0 = 0x00000000,
+ EF_IOPL_RING1 = 0x00001000,
+ EF_IOPL_RING2 = 0x00002000,
+ EF_NT = 0x00004000, /* nested task */
+ EF_RF = 0x00010000, /* resume */
+ EF_VM = 0x00020000, /* virtual mode */
+ EF_AC = 0x00040000, /* alignment */
+ EF_VIF = 0x00080000, /* virtual interrupt */
+ EF_VIP = 0x00100000, /* virtual interrupt pending */
+ EF_ID = 0x00200000, /* id */
+};
+
+#define GUEST_MODE(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
+
+#ifdef CONFIG_VMX
+#define GUEST_CONTEXT(_ed, _r) ( (VMX_DOMAIN(_ed) && ((_r)->eflags == 0)) || GUEST_MODE(_r) )
+#else
+#define GUEST_CONTEXT(_ed, _r) GUEST_MODE(_r)
+#endif
+
+#endif /* __X86_REGS_H__ */
diff --git a/xen/include/asm-x86/rwlock.h b/xen/include/asm-x86/rwlock.h
index 7519f32713..e8c8846cb5 100644
--- a/xen/include/asm-x86/rwlock.h
+++ b/xen/include/asm-x86/rwlock.h
@@ -35,10 +35,10 @@
"js 2f\n" \
"1:\n" \
".section .text.lock,\"ax\"\n" \
- "2:\tpush"__OS" %%"__OP"ax\n\t" \
- "lea"__OS" %0,%%"__OP"ax\n\t" \
+ "2:\tpush %%"__OP"ax\n\t" \
+ "lea %0,%%"__OP"ax\n\t" \
"call " helper "\n\t" \
- "pop"__OS" %%"__OP"ax\n\t" \
+ "pop %%"__OP"ax\n\t" \
"jmp 1b\n" \
".previous" \
:"=m" (*(volatile int *)rw) : : "memory")
@@ -65,10 +65,10 @@
"jnz 2f\n" \
"1:\n" \
".section .text.lock,\"ax\"\n" \
- "2:\tpush"__OS" %%"__OP"ax\n\t" \
- "lea"__OS" %0,%%"__OP"ax\n\t" \
+ "2:\tpush %%"__OP"ax\n\t" \
+ "lea %0,%%"__OP"ax\n\t" \
"call " helper "\n\t" \
- "pop"__OS" %%"__OP"ax\n\t" \
+ "pop %%"__OP"ax\n\t" \
"jmp 1b\n" \
".previous" \
:"=m" (*(volatile int *)rw) : : "memory")
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 97082e5964..e4788054e0 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -1,4 +1,23 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */
+/******************************************************************************
+ * include/asm-x86/shadow.h
+ *
+ * Copyright (c) 2005 Michael A Fetterman
+ * Based on an earlier implementation by Ian Pratt et al
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
#ifndef _XEN_SHADOW_H
#define _XEN_SHADOW_H
@@ -6,94 +25,436 @@
#include <xen/config.h>
#include <xen/types.h>
#include <xen/perfc.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
#include <asm/processor.h>
+#include <asm/vmx.h>
+#include <public/dom0_ops.h>
+
+/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
-/* Shadow PT flag bits in pfn_info */
-#define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */
-#define PSH_pfn_mask ((1<<21)-1)
+#define SHM_enable (1<<0) /* we're in one of the shadow modes */
+#define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
+ guest tables */
+#define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
+ regardless of pte write permissions */
+#define SHM_log_dirty (1<<3) /* enable log dirty mode */
+#define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */
+#define SHM_external (1<<5) /* external page table, not used by Xen */
-/* Shadow PT operation mode : shadowmode variable in mm_struct */
-#define SHM_test (1) /* just run domain on shadow PTs */
-#define SHM_logdirty (2) /* log pages that are dirtied */
-#define SHM_translate (3) /* lookup machine pages in translation table */
-#define SHM_cow (4) /* copy on write all dirtied pages */
+#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
+#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
+#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
+#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
+#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
+#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
+
+#define shadow_tainted_refcnts(_d) ((_d)->arch.shadow_tainted_refcnts)
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
-#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
+#define __shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
(SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
+#define shadow_linear_l2_table(_v) ((_v)->arch.shadow_vtable)
+
+// easy access to the hl2 table (for translated but not external modes only)
+#define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \
+ (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
+
+/*
+ * For now we use the per-domain BIGLOCK rather than a shadow-specific lock.
+ * We usually have the BIGLOCK already acquired anyway, so this is unlikely
+ * to cause much unnecessary extra serialisation. Also it's a recursive
+ * lock, and there are some code paths containing nested shadow_lock().
+ * The #if0'ed code below is therefore broken until such nesting is removed.
+ */
+#if 0
+#define shadow_lock_init(_d) \
+ spin_lock_init(&(_d)->arch.shadow_lock)
+#define shadow_lock_is_acquired(_d) \
+ spin_is_locked(&(_d)->arch.shadow_lock)
+#define shadow_lock(_d) \
+do { \
+ ASSERT(!shadow_lock_is_acquired(_d)); \
+ spin_lock(&(_d)->arch.shadow_lock); \
+} while (0)
+#define shadow_unlock(_d) \
+do { \
+ ASSERT(!shadow_lock_is_acquired(_d)); \
+ spin_unlock(&(_d)->arch.shadow_lock); \
+} while (0)
+#else
+#define shadow_lock_init(_d) \
+ ((_d)->arch.shadow_nest = 0)
+#define shadow_lock_is_acquired(_d) \
+ (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0))
+#define shadow_lock(_d) \
+do { \
+ LOCK_BIGLOCK(_d); \
+ (_d)->arch.shadow_nest++; \
+} while (0)
+#define shadow_unlock(_d) \
+do { \
+ ASSERT(shadow_lock_is_acquired(_d)); \
+ (_d)->arch.shadow_nest--; \
+ UNLOCK_BIGLOCK(_d); \
+} while (0)
+#endif
-#define shadow_mode(_d) ((_d)->mm.shadow_mode)
-#define shadow_lock_init(_d) spin_lock_init(&(_d)->mm.shadow_lock)
-#define shadow_lock(_m) spin_lock(&(_m)->shadow_lock)
-#define shadow_unlock(_m) spin_unlock(&(_m)->shadow_lock)
+#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
+#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
+#define SHADOW_MAX(_encoded) ((L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
extern void shadow_mode_init(void);
extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
-extern int shadow_fault(unsigned long va, long error_code);
-extern void shadow_l1_normal_pt_update(
- unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr, l1_pgentry_t **prev_spl1e_ptr);
-extern void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte);
-extern void unshadow_table(unsigned long gpfn, unsigned int type);
+extern int shadow_fault(unsigned long va, struct cpu_user_regs *regs);
extern int shadow_mode_enable(struct domain *p, unsigned int mode);
+extern void shadow_invlpg(struct vcpu *, unsigned long);
+extern struct out_of_sync_entry *shadow_mark_mfn_out_of_sync(
+ struct vcpu *v, unsigned long gpfn, unsigned long mfn);
+extern void free_monitor_pagetable(struct vcpu *v);
+extern void __shadow_sync_all(struct domain *d);
+extern int __shadow_out_of_sync(struct vcpu *v, unsigned long va);
+extern int set_p2m_entry(
+ struct domain *d, unsigned long pfn, unsigned long mfn,
+ struct domain_mmap_cache *l2cache,
+ struct domain_mmap_cache *l1cache);
+extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
+
+extern void shadow_l1_normal_pt_update(struct domain *d,
+ unsigned long pa, l1_pgentry_t l1e,
+ struct domain_mmap_cache *cache);
+extern void shadow_l2_normal_pt_update(struct domain *d,
+ unsigned long pa, l2_pgentry_t l2e,
+ struct domain_mmap_cache *cache);
+#if CONFIG_PAGING_LEVELS >= 3
+extern void shadow_l3_normal_pt_update(struct domain *d,
+ unsigned long pa, l3_pgentry_t l3e,
+ struct domain_mmap_cache *cache);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+extern void shadow_l4_normal_pt_update(struct domain *d,
+ unsigned long pa, l4_pgentry_t l4e,
+ struct domain_mmap_cache *cache);
+#endif
+extern int shadow_do_update_va_mapping(unsigned long va,
+ l1_pgentry_t val,
+ struct vcpu *v);
+
+
+static inline unsigned long __shadow_status(
+ struct domain *d, unsigned long gpfn, unsigned long stype);
+static inline void update_hl2e(struct vcpu *v, unsigned long va);
+
+extern void vmx_shadow_clear_state(struct domain *);
+
+static inline int page_is_page_table(struct pfn_info *page)
+{
+ struct domain *owner = page_get_owner(page);
+
+ if ( owner && shadow_mode_refcounts(owner) )
+ return page->count_info & PGC_page_table;
+
+ u32 type_info = page->u.inuse.type_info & PGT_type_mask;
+ return type_info && (type_info <= PGT_l4_page_table);
+}
+
+static inline int mfn_is_page_table(unsigned long mfn)
+{
+ if ( !pfn_valid(mfn) )
+ return 0;
+
+ return page_is_page_table(pfn_to_page(mfn));
+}
+
+static inline int page_out_of_sync(struct pfn_info *page)
+{
+ return page->count_info & PGC_out_of_sync;
+}
+
+static inline int mfn_out_of_sync(unsigned long mfn)
+{
+ if ( !pfn_valid(mfn) )
+ return 0;
+
+ return page_out_of_sync(pfn_to_page(mfn));
+}
+
+
+/************************************************************************/
+
+static void inline
+__shadow_sync_mfn(struct domain *d, unsigned long mfn)
+{
+ if ( d->arch.out_of_sync )
+ {
+ // XXX - could be smarter
+ //
+ __shadow_sync_all(d);
+ }
+}
+
+static void inline
+__shadow_sync_va(struct vcpu *v, unsigned long va)
+{
+ struct domain *d = v->domain;
+
+ if ( d->arch.out_of_sync && __shadow_out_of_sync(v, va) )
+ {
+ perfc_incrc(shadow_sync_va);
+
+ // XXX - could be smarter
+ //
+ __shadow_sync_all(v->domain);
+ }
+
+ // Also make sure the HL2 is up-to-date for this address.
+ //
+ if ( unlikely(shadow_mode_translate(v->domain)) )
+ update_hl2e(v, va);
+}
+
+static void inline
+shadow_sync_all(struct domain *d)
+{
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ shadow_lock(d);
+
+ if ( d->arch.out_of_sync )
+ __shadow_sync_all(d);
+
+ ASSERT(d->arch.out_of_sync == NULL);
+
+ shadow_unlock(d);
+ }
+}
+
+// SMP BUG: This routine can't ever be used properly in an SMP context.
+// It should be something like get_shadow_and_sync_va().
+// This probably shouldn't exist.
+//
+static void inline
+shadow_sync_va(struct vcpu *v, unsigned long gva)
+{
+ struct domain *d = v->domain;
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ shadow_lock(d);
+ __shadow_sync_va(v, gva);
+ shadow_unlock(d);
+ }
+}
extern void __shadow_mode_disable(struct domain *d);
static inline void shadow_mode_disable(struct domain *d)
{
- if ( shadow_mode(d) )
+ if ( unlikely(shadow_mode_enabled(d)) )
+ {
+ shadow_lock(d);
__shadow_mode_disable(d);
+ shadow_unlock(d);
+ }
}
-extern unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn);
+/************************************************************************/
-#define SHADOW_DEBUG 0
-#define SHADOW_HASH_DEBUG 0
+#define __mfn_to_gpfn(_d, mfn) \
+ ( (shadow_mode_translate(_d)) \
+ ? machine_to_phys_mapping[(mfn)] \
+ : (mfn) )
+
+#define __gpfn_to_mfn(_d, gpfn) \
+ ({ \
+ ASSERT(current->domain == (_d)); \
+ (shadow_mode_translate(_d)) \
+ ? phys_to_machine_mapping(gpfn) \
+ : (gpfn); \
+ })
+
+#define __gpfn_to_mfn_foreign(_d, gpfn) \
+ ( (shadow_mode_translate(_d)) \
+ ? gpfn_to_mfn_foreign(_d, gpfn) \
+ : (gpfn) )
+
+extern unsigned long gpfn_to_mfn_foreign(
+ struct domain *d, unsigned long gpfn);
+
+/************************************************************************/
struct shadow_status {
- unsigned long pfn; /* Guest pfn. */
- unsigned long spfn_and_flags; /* Shadow pfn plus flags. */
- struct shadow_status *next; /* Pull-to-front list. */
+ struct shadow_status *next; /* Pull-to-front list per hash bucket. */
+ unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
+ unsigned long smfn; /* Shadow mfn. */
};
#define shadow_ht_extra_size 128
#define shadow_ht_buckets 256
+struct out_of_sync_entry {
+ struct out_of_sync_entry *next;
+ unsigned long gpfn; /* why is this here? */
+ unsigned long gmfn;
+ unsigned long snapshot_mfn;
+ unsigned long writable_pl1e; /* NB: this is a machine address */
+};
+
+#define out_of_sync_extra_size 127
+
+#define SHADOW_SNAPSHOT_ELSEWHERE (-1L)
+
+/************************************************************************/
+#define SHADOW_DEBUG 0
+#define SHADOW_VERBOSE_DEBUG 0
+#define SHADOW_VVERBOSE_DEBUG 0
+#define SHADOW_VVVERBOSE_DEBUG 0
+#define SHADOW_HASH_DEBUG 0
+#define FULLSHADOW_DEBUG 0
+
+#if SHADOW_DEBUG
+extern int shadow_status_noswap;
+#define _SHADOW_REFLECTS_SNAPSHOT ( 9)
+#define SHADOW_REFLECTS_SNAPSHOT (1u << _SHADOW_REFLECTS_SNAPSHOT)
+#endif
+
#ifdef VERBOSE
-#define SH_LOG(_f, _a...) \
-printk("DOM%u: (file=shadow.c, line=%d) " _f "\n", \
- current->id , __LINE__ , ## _a )
+#define SH_LOG(_f, _a...) \
+ printk("DOM%uP%u: SH_LOG(%d): " _f "\n", \
+ current->domain->domain_id , current->processor, __LINE__ , ## _a )
#else
-#define SH_LOG(_f, _a...)
+#define SH_LOG(_f, _a...) ((void)0)
#endif
-#if SHADOW_DEBUG
-#define SH_VLOG(_f, _a...) \
- printk("DOM%u: (file=shadow.c, line=%d) " _f "\n", \
- current->id , __LINE__ , ## _a )
+#if SHADOW_VERBOSE_DEBUG
+#define SH_VLOG(_f, _a...) \
+ printk("DOM%uP%u: SH_VLOG(%d): " _f "\n", \
+ current->domain->domain_id, current->processor, __LINE__ , ## _a )
#else
-#define SH_VLOG(_f, _a...)
+#define SH_VLOG(_f, _a...) ((void)0)
#endif
-#if 0
-#define SH_VVLOG(_f, _a...) \
- printk("DOM%u: (file=shadow.c, line=%d) " _f "\n", \
- current->id , __LINE__ , ## _a )
+#if SHADOW_VVERBOSE_DEBUG
+#define SH_VVLOG(_f, _a...) \
+ printk("DOM%uP%u: SH_VVLOG(%d): " _f "\n", \
+ current->domain->domain_id, current->processor, __LINE__ , ## _a )
#else
-#define SH_VVLOG(_f, _a...)
+#define SH_VVLOG(_f, _a...) ((void)0)
+#endif
+
+#if SHADOW_VVVERBOSE_DEBUG
+#define SH_VVVLOG(_f, _a...) \
+ printk("DOM%uP%u: SH_VVVLOG(%d): " _f "\n", \
+ current->domain->domain_id, current->processor, __LINE__ , ## _a )
+#else
+#define SH_VVVLOG(_f, _a...) ((void)0)
+#endif
+
+#if FULLSHADOW_DEBUG
+#define FSH_LOG(_f, _a...) \
+ printk("DOM%uP%u: FSH_LOG(%d): " _f "\n", \
+ current->domain->domain_id, current->processor, __LINE__ , ## _a )
+#else
+#define FSH_LOG(_f, _a...) ((void)0)
#endif
/************************************************************************/
-static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn)
+static inline int
+shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
+{
+ l1_pgentry_t nl1e;
+ int res;
+ unsigned long mfn;
+ struct domain *owner;
+
+ ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
+
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+
+ nl1e = l1e;
+ l1e_remove_flags(nl1e, _PAGE_GLOBAL);
+ res = get_page_from_l1e(nl1e, d);
+
+ if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
+ !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
+ (mfn = l1e_get_pfn(nl1e)) &&
+ pfn_valid(mfn) &&
+ (owner = page_get_owner(pfn_to_page(mfn))) &&
+ (d != owner) )
+ {
+ res = get_page_from_l1e(nl1e, owner);
+ printk("tried to map mfn %lx from domain %d into shadow page tables "
+ "of domain %d; %s\n",
+ mfn, owner->domain_id, d->domain_id,
+ res ? "success" : "failed");
+ }
+
+ if ( unlikely(!res) )
+ {
+ perfc_incrc(shadow_get_page_fail);
+ FSH_LOG("%s failed to get ref l1e=%" PRIpte "\n",
+ __func__, l1e_get_intpte(l1e));
+ }
+
+ return res;
+}
+
+static inline void
+shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+ put_page_from_l1e(l1e, d);
+}
+
+static inline void
+shadow_put_page_type(struct domain *d, struct pfn_info *page)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+ put_page_type(page);
+}
+
+static inline int shadow_get_page(struct domain *d,
+ struct pfn_info *page,
+ struct domain *owner)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return 1;
+ return get_page(page, owner);
+}
+
+static inline void shadow_put_page(struct domain *d,
+ struct pfn_info *page)
+{
+ if ( !shadow_mode_refcounts(d) )
+ return;
+ put_page(page);
+}
+
+/************************************************************************/
+
+static inline int __mark_dirty(struct domain *d, unsigned int mfn)
{
unsigned long pfn;
int rc = 0;
- ASSERT(spin_is_locked(&m->shadow_lock));
- ASSERT(m->shadow_dirty_bitmap != NULL);
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(d->arch.shadow_dirty_bitmap != NULL);
+
+ if ( !VALID_MFN(mfn) )
+ return rc;
+ // N.B. This doesn't use __mfn_to_gpfn().
+ // This wants the nice compact set of PFNs from 0..domain's max,
+ // which __mfn_to_gpfn() only returns for translated domains.
+ //
pfn = machine_to_phys_mapping[mfn];
/*
@@ -101,30 +462,27 @@ static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn)
* domain's pseudo-physical memory map (e.g., the shared info frame).
* Nothing to do here...
*/
- if ( unlikely(pfn & 0x80000000UL) )
+ if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
return rc;
- if ( likely(pfn < m->shadow_dirty_bitmap_size) )
+ if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) )
{
/* N.B. Can use non-atomic TAS because protected by shadow_lock. */
- if ( !__test_and_set_bit(pfn, m->shadow_dirty_bitmap) )
+ if ( !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
{
- m->shadow_dirty_count++;
+ d->arch.shadow_dirty_count++;
rc = 1;
}
}
#ifndef NDEBUG
else if ( mfn < max_page )
{
- unsigned long *esp;
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (mm %p)",
- mfn, pfn, m->shadow_dirty_bitmap_size, m );
- SH_LOG("dom=%p caf=%08x taf=%08x\n",
- frame_table[mfn].u.inuse.domain,
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
+ mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
+ SH_LOG("dom=%p caf=%08x taf=%08x",
+ page_get_owner(&frame_table[mfn]),
frame_table[mfn].count_info,
frame_table[mfn].u.inuse.type_info );
- __asm__ __volatile__ ("movl %%esp,%0" : "=r" (esp) : );
- show_trace(esp);
}
#endif
@@ -132,195 +490,656 @@ static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn)
}
-static inline int mark_dirty(struct mm_struct *m, unsigned int mfn)
+static inline int mark_dirty(struct domain *d, unsigned int mfn)
{
int rc;
- shadow_lock(m);
- rc = __mark_dirty(m, mfn);
- shadow_unlock(m);
+ shadow_lock(d);
+ rc = __mark_dirty(d, mfn);
+ shadow_unlock(d);
return rc;
}
/************************************************************************/
-static inline void l1pte_write_fault(
- struct mm_struct *m, unsigned long *gpte_p, unsigned long *spte_p)
-{
- unsigned long gpte = *gpte_p;
- unsigned long spte = *spte_p;
+static inline void
+__shadow_get_l2e(
+ struct vcpu *v, unsigned long va, l2_pgentry_t *psl2e)
+{
+ ASSERT(shadow_mode_enabled(v->domain));
+
+ *psl2e = v->arch.shadow_vtable[l2_table_offset(va)];
+}
+
+static inline void
+__shadow_set_l2e(
+ struct vcpu *v, unsigned long va, l2_pgentry_t value)
+{
+ ASSERT(shadow_mode_enabled(v->domain));
+
+ v->arch.shadow_vtable[l2_table_offset(va)] = value;
+}
+
+static inline void
+__guest_get_l2e(
+ struct vcpu *v, unsigned long va, l2_pgentry_t *pl2e)
+{
+ *pl2e = v->arch.guest_vtable[l2_table_offset(va)];
+}
+
+static inline void
+__guest_set_l2e(
+ struct vcpu *v, unsigned long va, l2_pgentry_t value)
+{
+ struct domain *d = v->domain;
+
+ v->arch.guest_vtable[l2_table_offset(va)] = value;
+
+ if ( unlikely(shadow_mode_translate(d)) )
+ update_hl2e(v, va);
+
+ if ( unlikely(shadow_mode_log_dirty(d)) )
+ __mark_dirty(d, pagetable_get_pfn(v->arch.guest_table));
+}
+
+static inline void
+update_hl2e(struct vcpu *v, unsigned long va)
+{
+ int index = l2_table_offset(va);
+ unsigned long mfn;
+ l2_pgentry_t gl2e = v->arch.guest_vtable[index];
+ l1_pgentry_t old_hl2e, new_hl2e;
+ int need_flush = 0;
+
+ ASSERT(shadow_mode_translate(v->domain));
+
+ old_hl2e = v->arch.hl2_vtable[index];
+
+ if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) &&
+ VALID_MFN(mfn = phys_to_machine_mapping(l2e_get_pfn(gl2e))) )
+ new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+ else
+ new_hl2e = l1e_empty();
+
+ // only do the ref counting if something has changed.
+ //
+ if ( (l1e_has_changed(old_hl2e, new_hl2e, PAGE_FLAG_MASK)) )
+ {
+ if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
+ !shadow_get_page(v->domain, pfn_to_page(l1e_get_pfn(new_hl2e)),
+ v->domain) )
+ new_hl2e = l1e_empty();
+ if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
+ {
+ shadow_put_page(v->domain, pfn_to_page(l1e_get_pfn(old_hl2e)));
+ need_flush = 1;
+ }
+
+ v->arch.hl2_vtable[l2_table_offset(va)] = new_hl2e;
+
+ if ( need_flush )
+ {
+ perfc_incrc(update_hl2e_invlpg);
+ // SMP BUG???
+ local_flush_tlb_one(&linear_pg_table[l1_linear_offset(va)]);
+ }
+ }
+}
+
+static inline void shadow_drop_references(
+ struct domain *d, struct pfn_info *page)
+{
+ if ( likely(!shadow_mode_refcounts(d)) ||
+ ((page->u.inuse.type_info & PGT_count_mask) == 0) )
+ return;
+
+ /* XXX This needs more thought... */
+ printk("%s: needing to call shadow_remove_all_access for mfn=%lx\n",
+ __func__, page_to_pfn(page));
+ printk("Before: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ page->count_info, page->u.inuse.type_info);
+
+ shadow_lock(d);
+ shadow_remove_all_access(d, page_to_pfn(page));
+ shadow_unlock(d);
+
+ printk("After: mfn=%lx c=%08x t=%08x\n", page_to_pfn(page),
+ page->count_info, page->u.inuse.type_info);
+}
+
+/* XXX Needs more thought. Neither pretty nor fast: a place holder. */
+static inline void shadow_sync_and_drop_references(
+ struct domain *d, struct pfn_info *page)
+{
+ if ( likely(!shadow_mode_refcounts(d)) )
+ return;
+
+ shadow_lock(d);
+
+ if ( page_out_of_sync(page) )
+ __shadow_sync_mfn(d, page_to_pfn(page));
+
+ shadow_remove_all_access(d, page_to_pfn(page));
+
+ shadow_unlock(d);
+}
+
+/************************************************************************/
+
+/*
+ * Add another shadow reference to smfn.
+ */
+static inline int
+get_shadow_ref(unsigned long smfn)
+{
+ u32 x, nx;
+
+ ASSERT(pfn_valid(smfn));
+
+ x = frame_table[smfn].count_info;
+ nx = x + 1;
+
+ if ( unlikely(nx == 0) )
+ {
+ printk("get_shadow_ref overflow, gmfn=%x smfn=%lx\n",
+ frame_table[smfn].u.inuse.type_info & PGT_mfn_mask,
+ smfn);
+ BUG();
+ }
+
+ // Guarded by the shadow lock...
+ //
+ frame_table[smfn].count_info = nx;
- ASSERT(gpte & _PAGE_RW);
+ return 1;
+}
+
+extern void free_shadow_page(unsigned long smfn);
+
+/*
+ * Drop a shadow reference to smfn.
+ */
+static inline void
+put_shadow_ref(unsigned long smfn)
+{
+ u32 x, nx;
+
+ ASSERT(pfn_valid(smfn));
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+ x = frame_table[smfn].count_info;
+ nx = x - 1;
- switch ( m->shadow_mode )
+ if ( unlikely(x == 0) )
{
- case SHM_test:
- spte = gpte | _PAGE_RW;
- break;
+ printk("put_shadow_ref underflow, smfn=%lx oc=%08x t=%08x\n",
+ smfn,
+ frame_table[smfn].count_info,
+ frame_table[smfn].u.inuse.type_info);
+ BUG();
+ }
+
+ // Guarded by the shadow lock...
+ //
+ frame_table[smfn].count_info = nx;
+
+ if ( unlikely(nx == 0) )
+ {
+ free_shadow_page(smfn);
+ }
+}
+
+static inline void
+shadow_pin(unsigned long smfn)
+{
+ ASSERT( !(frame_table[smfn].u.inuse.type_info & PGT_pinned) );
+
+ frame_table[smfn].u.inuse.type_info |= PGT_pinned;
+ if ( unlikely(!get_shadow_ref(smfn)) )
+ BUG();
+}
+
+static inline void
+shadow_unpin(unsigned long smfn)
+{
+ ASSERT( (frame_table[smfn].u.inuse.type_info & PGT_pinned) );
+
+ frame_table[smfn].u.inuse.type_info &= ~PGT_pinned;
+ put_shadow_ref(smfn);
+}
+
+
+/************************************************************************/
- case SHM_logdirty:
- spte = gpte | _PAGE_RW;
- __mark_dirty(m, gpte >> PAGE_SHIFT);
- break;
+extern void shadow_mark_va_out_of_sync(
+ struct vcpu *v, unsigned long gpfn, unsigned long mfn,
+ unsigned long va);
+
+static inline int l1pte_write_fault(
+ struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p,
+ unsigned long va)
+{
+ struct domain *d = v->domain;
+ l1_pgentry_t gpte = *gpte_p;
+ l1_pgentry_t spte;
+ unsigned long gpfn = l1e_get_pfn(gpte);
+ unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
+
+ //printk("l1pte_write_fault gmfn=%lx\n", gmfn);
+
+ if ( unlikely(!VALID_MFN(gmfn)) )
+ {
+ SH_LOG("l1pte_write_fault: invalid gpfn=%lx", gpfn);
+ *spte_p = l1e_empty();
+ return 0;
}
+ ASSERT(l1e_get_flags(gpte) & _PAGE_RW);
+ l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED);
+ spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
+
+ SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
+ l1e_get_intpte(spte), l1e_get_intpte(gpte));
+
+ if ( shadow_mode_log_dirty(d) )
+ __mark_dirty(d, gmfn);
+
+ if ( mfn_is_page_table(gmfn) )
+ shadow_mark_va_out_of_sync(v, gpfn, gmfn, va);
+
*gpte_p = gpte;
*spte_p = spte;
+
+ return 1;
}
-static inline void l1pte_read_fault(
- struct mm_struct *m, unsigned long *gpte_p, unsigned long *spte_p)
+static inline int l1pte_read_fault(
+ struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p)
{
- unsigned long gpte = *gpte_p;
- unsigned long spte = *spte_p;
+ l1_pgentry_t gpte = *gpte_p;
+ l1_pgentry_t spte = *spte_p;
+ unsigned long pfn = l1e_get_pfn(gpte);
+ unsigned long mfn = __gpfn_to_mfn(d, pfn);
- gpte |= _PAGE_ACCESSED;
-
- switch ( m->shadow_mode )
+ if ( unlikely(!VALID_MFN(mfn)) )
{
- case SHM_test:
- spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
- break;
+ SH_LOG("l1pte_read_fault: invalid gpfn=%lx", pfn);
+ *spte_p = l1e_empty();
+ return 0;
+ }
+
+ l1e_add_flags(gpte, _PAGE_ACCESSED);
+ spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
- case SHM_logdirty:
- spte = gpte & ~_PAGE_RW;
- break;
+ if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
+ mfn_is_page_table(mfn) )
+ {
+ l1e_remove_flags(spte, _PAGE_RW);
}
+ SH_VVLOG("l1pte_read_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte,
+ l1e_get_intpte(spte), l1e_get_intpte(gpte));
*gpte_p = gpte;
*spte_p = spte;
+
+ return 1;
}
static inline void l1pte_propagate_from_guest(
- struct mm_struct *m, unsigned long *gpte_p, unsigned long *spte_p)
+ struct domain *d, l1_pgentry_t gpte, l1_pgentry_t *spte_p)
{
- unsigned long gpte = *gpte_p;
- unsigned long spte = *spte_p;
+ unsigned long mfn;
+ l1_pgentry_t spte;
- switch ( m->shadow_mode )
+ spte = l1e_empty();
+
+ if ( ((l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
+ VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
{
- case SHM_test:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
- break;
+ spte = l1e_from_pfn(
+ mfn, l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL));
- case SHM_logdirty:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- spte = gpte & ~_PAGE_RW;
- break;
+ if ( shadow_mode_log_dirty(d) ||
+ !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
+ mfn_is_page_table(mfn) )
+ {
+ l1e_remove_flags(spte, _PAGE_RW);
+ }
}
- *gpte_p = gpte;
+ if ( l1e_get_intpte(spte) || l1e_get_intpte(gpte) )
+ SH_VVVLOG("%s: gpte=%" PRIpte ", new spte=%" PRIpte,
+ __func__, l1e_get_intpte(gpte), l1e_get_intpte(spte));
+
*spte_p = spte;
}
-static inline void l2pde_general(
- struct mm_struct *m,
- unsigned long *gpde_p,
- unsigned long *spde_p,
- unsigned long sl1pfn)
+static inline void hl2e_propagate_from_guest(
+ struct domain *d, l2_pgentry_t gpde, l1_pgentry_t *hl2e_p)
{
- unsigned long gpde = *gpde_p;
- unsigned long spde = *spde_p;
+ unsigned long pfn = l2e_get_pfn(gpde);
+ unsigned long mfn;
+ l1_pgentry_t hl2e;
+
+ hl2e = l1e_empty();
+
+ if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
+ {
+ if ( unlikely((current->domain != d) && !shadow_mode_external(d)) )
+ {
+ // Can't use __gpfn_to_mfn() if we don't have one of this domain's
+ // page tables currently installed.
+ // This isn't common -- it only happens during shadow mode setup
+ // and mode changes.
+ //
+ mfn = gpfn_to_mfn_foreign(d, pfn);
+ }
+ else
+ mfn = __gpfn_to_mfn(d, pfn);
- spde = 0;
+ if ( VALID_MFN(mfn) && (mfn < max_page) )
+ hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
+ }
+
+ if ( l1e_get_intpte(hl2e) || l2e_get_intpte(gpde) )
+ SH_VVLOG("%s: gpde=%" PRIpte " hl2e=%" PRIpte, __func__,
+ l2e_get_intpte(gpde), l1e_get_intpte(hl2e));
+
+ *hl2e_p = hl2e;
+}
+
+static inline void l2pde_general(
+ struct domain *d,
+ l2_pgentry_t *gpde_p,
+ l2_pgentry_t *spde_p,
+ unsigned long sl1mfn)
+{
+ l2_pgentry_t gpde = *gpde_p;
+ l2_pgentry_t spde;
- if ( sl1pfn != 0 )
+ spde = l2e_empty();
+ if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
{
- spde = (gpde & ~PAGE_MASK) | (sl1pfn << PAGE_SHIFT) |
- _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
- gpde |= _PAGE_ACCESSED | _PAGE_DIRTY;
+ spde = l2e_from_pfn(
+ sl1mfn,
+ (l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL);
- /* Detect linear p.t. mappings and write-protect them. */
- if ( (frame_table[sl1pfn].u.inuse.type_info & PGT_type_mask) ==
- PGT_l2_page_table )
- spde = gpde & ~_PAGE_RW;
+ /* N.B. PDEs do not have a dirty bit. */
+ l2e_add_flags(gpde, _PAGE_ACCESSED);
+
+ *gpde_p = gpde;
}
- *gpde_p = gpde;
+ if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) )
+ SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__,
+ l2e_get_intpte(gpde), l2e_get_intpte(spde));
+
*spde_p = spde;
}
+static inline void l2pde_propagate_from_guest(
+ struct domain *d, l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p)
+{
+ l2_pgentry_t gpde = *gpde_p;
+ unsigned long sl1mfn = 0;
+
+ if ( l2e_get_flags(gpde) & _PAGE_PRESENT )
+ sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow);
+ l2pde_general(d, gpde_p, spde_p, sl1mfn);
+}
+
+/************************************************************************/
+
+// returns true if a tlb flush is needed
+//
+static int inline
+validate_pte_change(
+ struct domain *d,
+ l1_pgentry_t new_pte,
+ l1_pgentry_t *shadow_pte_p)
+{
+ l1_pgentry_t old_spte, new_spte;
+ int need_flush = 0;
+
+ perfc_incrc(validate_pte_calls);
+
+ l1pte_propagate_from_guest(d, new_pte, &new_spte);
+
+ if ( shadow_mode_refcounts(d) )
+ {
+ old_spte = *shadow_pte_p;
+
+ if ( l1e_get_intpte(old_spte) == l1e_get_intpte(new_spte) )
+ {
+ // No accounting required...
+ //
+ perfc_incrc(validate_pte_changes1);
+ }
+ else if ( l1e_get_intpte(old_spte) == (l1e_get_intpte(new_spte)|_PAGE_RW) )
+ {
+ // Fast path for PTEs that have merely been write-protected
+ // (e.g., during a Unix fork()). A strict reduction in privilege.
+ //
+ perfc_incrc(validate_pte_changes2);
+ if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
+ shadow_put_page_type(d, &frame_table[l1e_get_pfn(new_spte)]);
+ }
+ else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
+ _PAGE_PRESENT ) &&
+ l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ {
+ // only do the ref counting if something important changed.
+ //
+ perfc_incrc(validate_pte_changes3);
+
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ new_spte = l1e_empty();
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ {
+ shadow_put_page_from_l1e(old_spte, d);
+ need_flush = 1;
+ }
+ }
+ else
+ {
+ perfc_incrc(validate_pte_changes4);
+ }
+ }
+
+ *shadow_pte_p = new_spte;
+
+ return need_flush;
+}
+
+// returns true if a tlb flush is needed
+//
+static int inline
+validate_hl2e_change(
+ struct domain *d,
+ l2_pgentry_t new_gpde,
+ l1_pgentry_t *shadow_hl2e_p)
+{
+ l1_pgentry_t old_hl2e, new_hl2e;
+ int need_flush = 0;
+
+ perfc_incrc(validate_hl2e_calls);
+
+ old_hl2e = *shadow_hl2e_p;
+ hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
+
+ // Only do the ref counting if something important changed.
+ //
+ if ( ((l1e_get_flags(old_hl2e) | l1e_get_flags(new_hl2e)) & _PAGE_PRESENT) &&
+ l1e_has_changed(old_hl2e, new_hl2e, _PAGE_PRESENT) )
+ {
+ perfc_incrc(validate_hl2e_changes);
+
+ if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
+ !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), d) )
+ new_hl2e = l1e_empty();
+ if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
+ {
+ put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
+ need_flush = 1;
+ }
+ }
+
+ *shadow_hl2e_p = new_hl2e;
+
+ return need_flush;
+}
+
+// returns true if a tlb flush is needed
+//
+static int inline
+validate_pde_change(
+ struct domain *d,
+ l2_pgentry_t new_gpde,
+ l2_pgentry_t *shadow_pde_p)
+{
+ l2_pgentry_t old_spde, new_spde;
+ int need_flush = 0;
+
+ perfc_incrc(validate_pde_calls);
+
+ old_spde = *shadow_pde_p;
+ l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
+
+ // Only do the ref counting if something important changed.
+ //
+ if ( ((l2e_get_intpte(old_spde) | l2e_get_intpte(new_spde)) & _PAGE_PRESENT) &&
+ l2e_has_changed(old_spde, new_spde, _PAGE_PRESENT) )
+ {
+ perfc_incrc(validate_pde_changes);
+
+ if ( (l2e_get_flags(new_spde) & _PAGE_PRESENT) &&
+ !get_shadow_ref(l2e_get_pfn(new_spde)) )
+ BUG();
+ if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
+ {
+ put_shadow_ref(l2e_get_pfn(old_spde));
+ need_flush = 1;
+ }
+ }
+
+ *shadow_pde_p = new_spde;
+
+ return need_flush;
+}
+
/*********************************************************************/
#if SHADOW_HASH_DEBUG
-static void shadow_audit(struct mm_struct *m, int print)
+
+static void shadow_audit(struct domain *d, int print)
{
int live = 0, free = 0, j = 0, abs;
struct shadow_status *a;
for ( j = 0; j < shadow_ht_buckets; j++ )
{
- a = &m->shadow_ht[j];
- if ( a->pfn ) { live++; ASSERT(a->spfn_and_flags & PSH_pfn_mask); }
- ASSERT(a->pfn < 0x00100000UL);
+ a = &d->arch.shadow_ht[j];
+ if ( a->gpfn_and_flags )
+ {
+ live++;
+ ASSERT(a->smfn);
+ }
+ else
+ ASSERT(!a->next);
+
a = a->next;
while ( a && (live < 9999) )
{
live++;
- if ( (a->pfn == 0) || (a->spfn_and_flags == 0) )
+ if ( (a->gpfn_and_flags == 0) || (a->smfn == 0) )
{
- printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
- live, a->pfn, a->spfn_and_flags, a->next);
+ printk("XXX live=%d gpfn+flags=%lx sp=%lx next=%p\n",
+ live, a->gpfn_and_flags, a->smfn, a->next);
BUG();
}
- ASSERT(a->pfn < 0x00100000UL);
- ASSERT(a->spfn_and_flags & PSH_pfn_mask);
+ ASSERT(a->smfn);
a = a->next;
}
ASSERT(live < 9999);
}
- for ( a = m->shadow_ht_free; a != NULL; a = a->next )
+ for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next )
free++;
- if ( print)
- printk("Xlive=%d free=%d\n",live,free);
-
- abs = (perfc_value(shadow_l1_pages) + perfc_value(shadow_l2_pages)) - live;
+ if ( print )
+ printk("Xlive=%d free=%d\n", live, free);
+
+ // BUG: this only works if there's only a single domain which is
+ // using shadow tables.
+ //
+ abs = (
+ perfc_value(shadow_l1_pages) +
+ perfc_value(shadow_l2_pages) +
+ perfc_value(hl2_table_pages) +
+ perfc_value(snapshot_pages) +
+ perfc_value(writable_pte_predictions)
+ ) - live;
+#ifdef PERF_COUNTERS
if ( (abs < -1) || (abs > 1) )
{
- printk("live=%d free=%d l1=%d l2=%d\n",live,free,
- perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
+ printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
+ live, free,
+ perfc_value(shadow_l1_pages),
+ perfc_value(shadow_l2_pages),
+ perfc_value(hl2_table_pages),
+ perfc_value(snapshot_pages),
+ perfc_value(writable_pte_predictions));
BUG();
}
+#endif
+
+ // XXX ought to add some code to audit the out-of-sync entries, too.
+ //
}
#else
#define shadow_audit(p, print) ((void)0)
#endif
-
static inline struct shadow_status *hash_bucket(
- struct mm_struct *m, unsigned int gpfn)
+ struct domain *d, unsigned int gpfn)
{
- return &m->shadow_ht[gpfn % shadow_ht_buckets];
+ return &d->arch.shadow_ht[gpfn % shadow_ht_buckets];
}
-static inline unsigned long __shadow_status(
- struct mm_struct *m, unsigned int gpfn)
+/*
+ * N.B. This takes a guest pfn (i.e. a pfn in the guest's namespace,
+ * which, depending on full shadow mode, may or may not equal
+ * its mfn).
+ * It returns the shadow's mfn, or zero if it doesn't exist.
+ */
+
+static inline unsigned long ___shadow_status(
+ struct domain *d, unsigned long gpfn, unsigned long stype)
{
struct shadow_status *p, *x, *head;
+ unsigned long key = gpfn | stype;
+
+ perfc_incrc(shadow_status_calls);
- x = head = hash_bucket(m, gpfn);
+ x = head = hash_bucket(d, gpfn);
p = NULL;
- SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, x);
- shadow_audit(m, 0);
+ //SH_VVLOG("lookup gpfn=%08x type=%08x bucket=%p", gpfn, stype, x);
+ shadow_audit(d, 0);
do
{
- ASSERT(x->pfn || ((x == head) && (x->next == NULL)));
+ ASSERT(x->gpfn_and_flags || ((x == head) && (x->next == NULL)));
- if ( x->pfn == gpfn )
+ if ( x->gpfn_and_flags == key )
{
+#if SHADOW_DEBUG
+ if ( unlikely(shadow_status_noswap) )
+ return x->smfn;
+#endif
/* Pull-to-front if 'x' isn't already the head item. */
if ( unlikely(x != head) )
{
@@ -330,11 +1149,16 @@ static inline unsigned long __shadow_status(
head->next = x;
/* Swap 'x' contents with head contents. */
- SWAP(head->pfn, x->pfn);
- SWAP(head->spfn_and_flags, x->spfn_and_flags);
+ SWAP(head->gpfn_and_flags, x->gpfn_and_flags);
+ SWAP(head->smfn, x->smfn);
+ }
+ else
+ {
+ perfc_incrc(shadow_status_hit_head);
}
- return head->spfn_and_flags;
+ //SH_VVLOG("lookup gpfn=%p => status=%p", key, head->smfn);
+ return head->smfn;
}
p = x;
@@ -342,20 +1166,126 @@ static inline unsigned long __shadow_status(
}
while ( x != NULL );
+ //SH_VVLOG("lookup gpfn=%p => status=0", key);
+ perfc_incrc(shadow_status_miss);
return 0;
}
+static inline unsigned long __shadow_status(
+ struct domain *d, unsigned long gpfn, unsigned long stype)
+{
+ unsigned long gmfn = ((current->domain == d)
+ ? __gpfn_to_mfn(d, gpfn)
+ : INVALID_MFN);
+
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(gpfn == (gpfn & PGT_mfn_mask));
+ ASSERT(stype && !(stype & ~PGT_type_mask));
+
+ if ( VALID_MFN(gmfn) && (gmfn < max_page) &&
+ (stype != PGT_writable_pred) &&
+ ((stype == PGT_snapshot)
+ ? !mfn_out_of_sync(gmfn)
+ : !mfn_is_page_table(gmfn)) )
+ {
+ perfc_incrc(shadow_status_shortcut);
+#ifndef NDEBUG
+ if ( ___shadow_status(d, gpfn, stype) != 0 )
+ {
+ printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x "
+ "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
+ d->domain_id, gpfn, gmfn, stype,
+ frame_table[gmfn].count_info,
+ frame_table[gmfn].u.inuse.type_info,
+ mfn_out_of_sync(gmfn), mfn_is_page_table(gmfn));
+ BUG();
+ }
+
+ // Undo the affects of the above call to ___shadow_status()'s perf
+ // counters, since that call is really just part of an assertion.
+ //
+ perfc_decrc(shadow_status_calls);
+ perfc_decrc(shadow_status_miss);
+#endif
+ return 0;
+ }
+
+ return ___shadow_status(d, gpfn, stype);
+}
+
+/*
+ * Not clear if pull-to-front is worth while for this or not,
+ * as it generally needs to scan the entire bucket anyway.
+ * Much simpler without.
+ *
+ * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
+ */
+static inline u32
+shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
+ unsigned long *smfn)
+{
+ struct shadow_status *x;
+ u32 pttype = PGT_none, type;
+
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(gpfn == (gpfn & PGT_mfn_mask));
+
+ perfc_incrc(shadow_max_type);
+
+ x = hash_bucket(d, gpfn);
+
+ while ( x && x->gpfn_and_flags )
+ {
+ if ( (x->gpfn_and_flags & PGT_mfn_mask) == gpfn )
+ {
+ type = x->gpfn_and_flags & PGT_type_mask;
+
+ switch ( type )
+ {
+ case PGT_hl2_shadow:
+ // Treat an HL2 as if it's an L1
+ //
+ type = PGT_l1_shadow;
+ break;
+ case PGT_snapshot:
+ case PGT_writable_pred:
+ // Ignore snapshots -- they don't in and of themselves constitute
+ // treating a page as a page table
+ //
+ goto next;
+ case PGT_base_page_table:
+ // Early exit if we found the max possible value
+ //
+ return type;
+ default:
+ break;
+ }
+
+ if ( type > pttype )
+ {
+ pttype = type;
+ if ( smfn )
+ *smfn = x->smfn;
+ }
+ }
+ next:
+ x = x->next;
+ }
+
+ return pttype;
+}
+
/*
* N.B. We can make this locking more fine grained (e.g., per shadow page) if
* it ever becomes a problem, but since we need a spin lock on the hash table
* anyway it's probably not worth being too clever.
*/
static inline unsigned long get_shadow_status(
- struct mm_struct *m, unsigned int gpfn )
+ struct domain *d, unsigned long gpfn, unsigned long stype)
{
unsigned long res;
- ASSERT(m->shadow_mode);
+ ASSERT(shadow_mode_enabled(d));
/*
* If we get here we know that some sort of update has happened to the
@@ -363,63 +1293,66 @@ static inline unsigned long get_shadow_status(
* has changed type. If we're in log dirty mode, we should set the
* appropriate bit in the dirty bitmap.
* N.B. The VA update path doesn't use this and is handled independently.
+ *
+ * XXX need to think this through for vmx guests, but probably OK
*/
- shadow_lock(m);
+ shadow_lock(d);
- if ( m->shadow_mode == SHM_logdirty )
- __mark_dirty( m, gpfn );
+ if ( shadow_mode_log_dirty(d) )
+ __mark_dirty(d, __gpfn_to_mfn(d, gpfn));
- if ( !(res = __shadow_status(m, gpfn)) )
- shadow_unlock(m);
+ if ( !(res = __shadow_status(d, gpfn, stype)) )
+ shadow_unlock(d);
return res;
}
-static inline void put_shadow_status(
- struct mm_struct *m)
+static inline void put_shadow_status(struct domain *d)
{
- shadow_unlock(m);
+ shadow_unlock(d);
}
static inline void delete_shadow_status(
- struct mm_struct *m, unsigned int gpfn)
+ struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
{
struct shadow_status *p, *x, *n, *head;
+ unsigned long key = gpfn | stype;
- ASSERT(spin_is_locked(&m->shadow_lock));
- ASSERT(gpfn != 0);
+ ASSERT(shadow_lock_is_acquired(d));
+ ASSERT(!(gpfn & ~PGT_mfn_mask));
+ ASSERT(stype && !(stype & ~PGT_type_mask));
- head = hash_bucket(m, gpfn);
+ head = hash_bucket(d, gpfn);
- SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b);
- shadow_audit(m, 0);
+ SH_VLOG("delete gpfn=%lx t=%08x bucket=%p", gpfn, stype, head);
+ shadow_audit(d, 0);
/* Match on head item? */
- if ( head->pfn == gpfn )
+ if ( head->gpfn_and_flags == key )
{
if ( (n = head->next) != NULL )
{
/* Overwrite head with contents of following node. */
- head->pfn = n->pfn;
- head->spfn_and_flags = n->spfn_and_flags;
+ head->gpfn_and_flags = n->gpfn_and_flags;
+ head->smfn = n->smfn;
/* Delete following node. */
head->next = n->next;
/* Add deleted node to the free list. */
- n->pfn = 0;
- n->spfn_and_flags = 0;
- n->next = m->shadow_ht_free;
- m->shadow_ht_free = n;
+ n->gpfn_and_flags = 0;
+ n->smfn = 0;
+ n->next = d->arch.shadow_ht_free;
+ d->arch.shadow_ht_free = n;
}
else
{
/* This bucket is now empty. Initialise the head node. */
- head->pfn = 0;
- head->spfn_and_flags = 0;
+ head->gpfn_and_flags = 0;
+ head->smfn = 0;
}
goto found;
@@ -430,16 +1363,16 @@ static inline void delete_shadow_status(
do
{
- if ( x->pfn == gpfn )
+ if ( x->gpfn_and_flags == key )
{
/* Delete matching node. */
p->next = x->next;
/* Add deleted node to the free list. */
- x->pfn = 0;
- x->spfn_and_flags = 0;
- x->next = m->shadow_ht_free;
- m->shadow_ht_free = x;
+ x->gpfn_and_flags = 0;
+ x->smfn = 0;
+ x->next = d->arch.shadow_ht_free;
+ d->arch.shadow_ht_free = x;
goto found;
}
@@ -453,34 +1386,60 @@ static inline void delete_shadow_status(
BUG();
found:
- shadow_audit(m, 0);
-}
+ // release ref to page
+ if ( stype != PGT_writable_pred )
+ put_page(pfn_to_page(gmfn));
+ shadow_audit(d, 0);
+}
static inline void set_shadow_status(
- struct mm_struct *m, unsigned int gpfn, unsigned long s)
+ struct domain *d, unsigned long gpfn, unsigned long gmfn,
+ unsigned long smfn, unsigned long stype)
{
struct shadow_status *x, *head, *extra;
int i;
+ unsigned long key = gpfn | stype;
- ASSERT(spin_is_locked(&m->shadow_lock));
- ASSERT(gpfn != 0);
- ASSERT(s & PSH_shadowed);
+ SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
- x = head = hash_bucket(m, gpfn);
+ ASSERT(shadow_lock_is_acquired(d));
+
+ ASSERT(shadow_mode_translate(d) || gpfn);
+ ASSERT(!(gpfn & ~PGT_mfn_mask));
+
+ // XXX - need to be more graceful.
+ ASSERT(VALID_MFN(gmfn));
+
+ ASSERT(stype && !(stype & ~PGT_type_mask));
+
+ x = head = hash_bucket(d, gpfn);
- SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next);
- shadow_audit(m, 0);
+ SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)",
+ gpfn, smfn, stype, x, x->next);
+ shadow_audit(d, 0);
+
+ // grab a reference to the guest page to represent the entry in the shadow
+ // hash table
+ //
+ // XXX - Should PGT_writable_pred grab a page ref?
+ // - Who/how are these hash table entry refs flushed if/when a page
+ // is given away by the domain?
+ //
+ if ( stype != PGT_writable_pred )
+ get_page(pfn_to_page(gmfn), d);
/*
* STEP 1. If page is already in the table, update it in place.
*/
-
do
{
- if ( x->pfn == gpfn )
+ if ( unlikely(x->gpfn_and_flags == key) )
{
- x->spfn_and_flags = s;
+ if ( stype != PGT_writable_pred )
+ BUG(); // we should never replace entries into the hash table
+ x->smfn = smfn;
+ put_page(pfn_to_page(gmfn)); // already had a ref...
goto done;
}
@@ -493,20 +1452,20 @@ static inline void set_shadow_status(
*/
/* If the bucket is empty then insert the new page as the head item. */
- if ( head->pfn == 0 )
+ if ( head->gpfn_and_flags == 0 )
{
- head->pfn = gpfn;
- head->spfn_and_flags = s;
+ head->gpfn_and_flags = key;
+ head->smfn = smfn;
ASSERT(head->next == NULL);
goto done;
}
/* We need to allocate a new node. Ensure the quicklist is non-empty. */
- if ( unlikely(m->shadow_ht_free == NULL) )
+ if ( unlikely(d->arch.shadow_ht_free == NULL) )
{
SH_LOG("Allocate more shadow hashtable blocks.");
- extra = xmalloc(
+ extra = xmalloc_bytes(
sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
/* XXX Should be more graceful here. */
@@ -516,10 +1475,10 @@ static inline void set_shadow_status(
memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
/* Record the allocation block so it can be correctly freed later. */
- m->shadow_extras_count++;
+ d->arch.shadow_extras_count++;
*((struct shadow_status **)&extra[shadow_ht_extra_size]) =
- m->shadow_ht_extras;
- m->shadow_ht_extras = &extra[0];
+ d->arch.shadow_ht_extras;
+ d->arch.shadow_ht_extras = &extra[0];
/* Thread a free chain through the newly-allocated nodes. */
for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
@@ -527,56 +1486,250 @@ static inline void set_shadow_status(
extra[i].next = NULL;
/* Add the new nodes to the free list. */
- m->shadow_ht_free = &extra[0];
+ d->arch.shadow_ht_free = &extra[0];
}
/* Allocate a new node from the quicklist. */
- x = m->shadow_ht_free;
- m->shadow_ht_free = x->next;
+ x = d->arch.shadow_ht_free;
+ d->arch.shadow_ht_free = x->next;
/* Initialise the new node and insert directly after the head item. */
- x->pfn = gpfn;
- x->spfn_and_flags = s;
+ x->gpfn_and_flags = key;
+ x->smfn = smfn;
x->next = head->next;
head->next = x;
done:
- shadow_audit(m, 0);
+ shadow_audit(d, 0);
+
+ if ( stype <= PGT_l4_shadow )
+ {
+ // add to front of list of pages to check when removing write
+ // permissions for a page...
+ //
+ }
+}
+
+/************************************************************************/
+
+void static inline
+shadow_update_min_max(unsigned long smfn, int index)
+{
+ struct pfn_info *sl1page = pfn_to_page(smfn);
+ u32 min_max = sl1page->tlbflush_timestamp;
+ int min = SHADOW_MIN(min_max);
+ int max = SHADOW_MAX(min_max);
+ int update = 0;
+
+ if ( index < min )
+ {
+ min = index;
+ update = 1;
+ }
+ if ( index > max )
+ {
+ max = index;
+ update = 1;
+ }
+ if ( update )
+ sl1page->tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max);
+}
+
+extern void shadow_map_l1_into_current_l2(unsigned long va);
+
+void static inline
+shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ l2_pgentry_t sl2e;
+
+ __shadow_get_l2e(v, va, &sl2e);
+ if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
+ {
+ /*
+ * Either the L1 is not shadowed, or the shadow isn't linked into
+ * the current shadow L2.
+ */
+ if ( create_l1_shadow )
+ {
+ perfc_incrc(shadow_set_l1e_force_map);
+ shadow_map_l1_into_current_l2(va);
+ }
+ else /* check to see if it exists; if so, link it in */
+ {
+ l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)];
+ unsigned long gl1pfn = l2e_get_pfn(gpde);
+ unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow);
+
+ ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT );
+
+ if ( sl1mfn )
+ {
+ perfc_incrc(shadow_set_l1e_unlinked);
+ if ( !get_shadow_ref(sl1mfn) )
+ BUG();
+ l2pde_general(d, &gpde, &sl2e, sl1mfn);
+ __guest_set_l2e(v, va, gpde);
+ __shadow_set_l2e(v, va, sl2e);
+ }
+ else
+ {
+ // no shadow exists, so there's nothing to do.
+ perfc_incrc(shadow_set_l1e_fail);
+ return;
+ }
+ }
+ }
+
+ if ( shadow_mode_refcounts(d) )
+ {
+ l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
+
+ // only do the ref counting if something important changed.
+ //
+ if ( l1e_has_changed(old_spte, new_spte, _PAGE_RW | _PAGE_PRESENT) )
+ {
+ if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
+ !shadow_get_page_from_l1e(new_spte, d) )
+ new_spte = l1e_empty();
+ if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
+ shadow_put_page_from_l1e(old_spte, d);
+ }
+ }
+
+ shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
+
+ shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va));
+}
+
+/************************************************************************/
+
+static inline int
+shadow_mode_page_writable(struct domain *d, unsigned long gpfn)
+{
+ unsigned long mfn = __gpfn_to_mfn(d, gpfn);
+ u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask;
+
+ if ( shadow_mode_refcounts(d) &&
+ (type == PGT_writable_page) )
+ type = shadow_max_pgtable_type(d, gpfn, NULL);
+
+ if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
+ (type == PGT_l1_page_table) )
+ return 1;
+
+ if ( shadow_mode_write_all(d) &&
+ type && (type <= PGT_l4_page_table) )
+ return 1;
+
+ return 0;
}
-static inline void __shadow_mk_pagetable(struct mm_struct *mm)
+static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
{
- unsigned long gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
- unsigned long spfn = __shadow_status(mm, gpfn);
+ l2_pgentry_t gpde;
+ l1_pgentry_t gpte;
+ struct vcpu *v = current;
+
+ ASSERT( shadow_mode_translate(current->domain) );
- if ( unlikely(spfn == 0) )
- spfn = shadow_l2_table(mm, gpfn);
+ __guest_get_l2e(v, gva, &gpde);
+ if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
+ return l1e_empty();;
- mm->shadow_table = mk_pagetable(spfn << PAGE_SHIFT);
+ // This is actually overkill - we only need to make sure the hl2
+ // is in-sync.
+ //
+ shadow_sync_va(v, gva);
+
+ if ( unlikely(__copy_from_user(&gpte,
+ &linear_pg_table[gva >> PAGE_SHIFT],
+ sizeof(gpte))) )
+ {
+ FSH_LOG("gva_to_gpte got a fault on gva=%lx", gva);
+ return l1e_empty();
+ }
+
+ return gpte;
}
-static inline void shadow_mk_pagetable(struct mm_struct *mm)
+static inline unsigned long gva_to_gpa(unsigned long gva)
+{
+ l1_pgentry_t gpte;
+
+ gpte = gva_to_gpte(gva);
+ if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
+ return 0;
+
+ return l1e_get_paddr(gpte) + (gva & ~PAGE_MASK);
+}
+
+/************************************************************************/
+
+extern void __update_pagetables(struct vcpu *v);
+static inline void update_pagetables(struct vcpu *v)
{
- SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
- pagetable_val(mm->pagetable), mm->shadow_mode );
+ struct domain *d = v->domain;
+ int paging_enabled;
+
+#ifdef CONFIG_VMX
+ if ( VMX_DOMAIN(v) )
+ paging_enabled = vmx_paging_enabled(v);
+
+ else
+#endif
+ // HACK ALERT: there's currently no easy way to figure out if a domU
+ // has set its arch.guest_table to zero, vs not yet initialized it.
+ //
+ paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
- if ( unlikely(mm->shadow_mode) )
+ /*
+ * We don't call __update_pagetables() when vmx guest paging is
+ * disabled as we want the linear_pg_table to be inaccessible so that
+ * we bail out early of shadow_fault() if the vmx guest tries illegal
+ * accesses while it thinks paging is turned off.
+ */
+ if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
{
- shadow_lock(mm);
- __shadow_mk_pagetable(mm);
- shadow_unlock(mm);
+ shadow_lock(d);
+ __update_pagetables(v);
+ shadow_unlock(d);
}
- SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
- pagetable_val(mm->pagetable), mm->shadow_mode,
- pagetable_val(mm->shadow_table) );
+ if ( likely(!shadow_mode_external(d)) )
+ {
+#ifdef __x86_64__
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ v->arch.monitor_table = v->arch.guest_table_user;
+ else
+#endif
+ if ( shadow_mode_enabled(d) )
+ v->arch.monitor_table = v->arch.shadow_table;
+ else
+ v->arch.monitor_table = v->arch.guest_table;
+ }
}
#if SHADOW_DEBUG
-extern int _check_pagetable(struct mm_struct *m, pagetable_t pt, char *s);
-#define check_pagetable(m, pt, s) _check_pagetable(m, pt, s)
+extern int _check_pagetable(struct vcpu *v, char *s);
+extern int _check_all_pagetables(struct vcpu *v, char *s);
+
+#define check_pagetable(_v, _s) _check_pagetable(_v, _s)
+//#define check_pagetable(_v, _s) _check_all_pagetables(_v, _s)
+
#else
-#define check_pagetable(m, pt, s) ((void)0)
+#define check_pagetable(_v, _s) ((void)0)
#endif
#endif /* XEN_SHADOW_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index b4d79087c5..c70f4d90fc 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -1,14 +1,28 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
#ifndef __ASSEMBLY__
#include <xen/config.h>
+#include <xen/kernel.h>
+#include <xen/cpumask.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
#include <asm/fixmap.h>
+#include <asm/bitops.h>
#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
#include <asm/io_apic.h>
+#endif
#include <asm/apic.h>
#endif
+#endif
+#define BAD_APICID 0xFFu
#ifdef CONFIG_SMP
#ifndef __ASSEMBLY__
@@ -17,84 +31,66 @@
*/
extern void smp_alloc_memory(void);
-extern unsigned long phys_cpu_present_map;
-extern unsigned long cpu_online_map;
-extern volatile unsigned long smp_invalidate_needed;
extern int pic_mode;
extern int smp_num_siblings;
-extern int cpu_sibling_map[];
+extern cpumask_t cpu_sibling_map[];
-/*
- * On x86 all CPUs are mapped 1:1 to the APIC space.
- * This simplifies scheduling and IPI sending and
- * compresses data structures.
- */
-static inline int cpu_logical_map(int cpu)
-{
- return cpu;
-}
-static inline int cpu_number_map(int cpu)
-{
- return cpu;
-}
+extern void smp_flush_tlb(void);
+extern void smp_invalidate_rcv(void); /* Process an NMI */
+extern void (*mtrr_hook) (void);
-/*
- * Some lowlevel functions might want to know about
- * the real APIC ID <-> CPU # mapping.
- */
-#define MAX_APICID 256
-extern volatile int cpu_to_physical_apicid[NR_CPUS];
-extern volatile int physical_apicid_to_cpu[MAX_APICID];
-extern volatile int cpu_to_logical_apicid[NR_CPUS];
-extern volatile int logical_apicid_to_cpu[MAX_APICID];
+#ifdef CONFIG_X86_64
+extern void zap_low_mappings(void);
+#else
+extern void zap_low_mappings(l2_pgentry_t *base);
+#endif
-/*
- * General functions that each host system must provide.
- */
-
-/*extern void smp_boot_cpus(void);*/
-extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
+#define MAX_APICID 256
+extern u8 x86_cpu_to_apicid[];
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
+#define __smp_processor_id() (get_processor_id())
-#if defined(__i386__)
-#define smp_processor_id() (current->processor)
-#elif defined(__x86_64__)
-#include <asm/pda.h>
-#define smp_processor_id() read_pda(cpunumber)
-#endif
+extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_callin_map;
+#define cpu_possible_map cpu_callout_map
+
+/* We don't mark CPUs online until __cpu_up(), so we need another measure */
+static inline int num_booting_cpus(void)
+{
+ return cpus_weight(cpu_callout_map);
+}
-static __inline int hard_smp_processor_id(void)
+extern void map_cpu_to_logical_apicid(void);
+extern void unmap_cpu_to_logical_apicid(int cpu);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#ifdef APIC_DEFINITION
+extern int hard_smp_processor_id(void);
+#else
+#include <mach_apicdef.h>
+static inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned *)(APIC_BASE+APIC_ID));
+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
}
+#endif
static __inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(unsigned *)(APIC_BASE+APIC_LDR));
+ return GET_APIC_LOGICAL_ID(*(unsigned int *)(APIC_BASE+APIC_LDR));
}
+#endif
#endif /* !__ASSEMBLY__ */
#define NO_PROC_ID 0xFF /* No processor magic marker */
-/*
- * This magic constant controls our willingness to transfer
- * a process across CPUs. Such a transfer incurs misses on the L1
- * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- * gut feeling is this will vary by board in value. For a board
- * with separate L2 cache it probably depends also on the RSS, and
- * for a board with shared L2 cache it ought to decay fast as other
- * processes are run.
- */
-
-#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */
-
#endif
#endif
diff --git a/xen/include/asm-x86/smpboot.h b/xen/include/asm-x86/smpboot.h
index 7a0b157114..6def59963b 100644
--- a/xen/include/asm-x86/smpboot.h
+++ b/xen/include/asm-x86/smpboot.h
@@ -1,132 +1,16 @@
#ifndef __ASM_SMPBOOT_H
#define __ASM_SMPBOOT_H
-/*emum for clustered_apic_mode values*/
-enum{
- CLUSTERED_APIC_NONE = 0,
- CLUSTERED_APIC_XAPIC,
- CLUSTERED_APIC_NUMAQ
-};
-
-#ifdef CONFIG_X86_CLUSTERED_APIC
-extern unsigned int apic_broadcast_id;
-extern unsigned char clustered_apic_mode;
-extern unsigned char esr_disable;
-extern unsigned char int_delivery_mode;
-extern unsigned int int_dest_addr_mode;
-extern int cyclone_setup(char*);
-
-static inline void detect_clustered_apic(char* oem, char* prod)
-{
- /*
- * Can't recognize Summit xAPICs at present, so use the OEM ID.
- */
- if (!strncmp(oem, "IBM ENSW", 8) && !strncmp(prod, "VIGIL SMP", 9)){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- /* check for ACPI tables */
- } else if (!strncmp(oem, "IBM", 3) &&
- (!strncmp(prod, "SERVIGIL", 8) ||
- !strncmp(prod, "EXA", 3) ||
- !strncmp(prod, "RUTHLESS", 8))){
- clustered_apic_mode = CLUSTERED_APIC_XAPIC;
- apic_broadcast_id = APIC_BROADCAST_ID_XAPIC;
- int_dest_addr_mode = APIC_DEST_PHYSICAL;
- int_delivery_mode = dest_Fixed;
- esr_disable = 1;
- /*Start cyclone clock*/
- cyclone_setup(0);
- } else if (!strncmp(oem, "IBM NUMA", 8)){
- clustered_apic_mode = CLUSTERED_APIC_NUMAQ;
- apic_broadcast_id = APIC_BROADCAST_ID_APIC;
- int_dest_addr_mode = APIC_DEST_LOGICAL;
- int_delivery_mode = dest_LowestPrio;
- esr_disable = 1;
- }
-}
-#define INT_DEST_ADDR_MODE (int_dest_addr_mode)
-#define INT_DELIVERY_MODE (int_delivery_mode)
-#else /* CONFIG_X86_CLUSTERED_APIC */
-#define apic_broadcast_id (APIC_BROADCAST_ID_APIC)
-#define clustered_apic_mode (CLUSTERED_APIC_NONE)
-#define esr_disable (0)
-#define detect_clustered_apic(x,y)
-#define INT_DEST_ADDR_MODE (APIC_DEST_LOGICAL) /* logical delivery */
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#endif /* CONFIG_X86_CLUSTERED_APIC */
-#define BAD_APICID 0xFFu
-
-#define TRAMPOLINE_LOW phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0x8:0x467)
-#define TRAMPOLINE_HIGH phys_to_virt((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?0xa:0x469)
-
-#define boot_cpu_apicid ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ)?boot_cpu_logical_apicid:boot_cpu_physical_apicid)
-
-extern unsigned char raw_phys_apicid[NR_CPUS];
-
-/*
- * How to map from the cpu_present_map
- */
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
- if (clustered_apic_mode == CLUSTERED_APIC_XAPIC)
- return raw_phys_apicid[mps_cpu];
- if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ)
- return (mps_cpu/4)*16 + (1<<(mps_cpu%4));
- return mps_cpu;
-}
-
static inline unsigned long apicid_to_phys_cpu_present(int apicid)
{
- if(clustered_apic_mode)
- return 1UL << (((apicid >> 4) << 2) + (apicid & 0x3));
return 1UL << apicid;
}
-#define physical_to_logical_apicid(phys_apic) ( (1ul << (phys_apic & 0x3)) | (phys_apic & 0xF0u) )
-
-/*
- * Mappings between logical cpu number and logical / physical apicid
- * The first four macros are trivial, but it keeps the abstraction consistent
- */
extern volatile int logical_apicid_2_cpu[];
extern volatile int cpu_2_logical_apicid[];
extern volatile int physical_apicid_2_cpu[];
extern volatile int cpu_2_physical_apicid[];
-#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
-#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
-#ifdef CONFIG_MULTIQUAD /* use logical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
-#else /* !CONFIG_MULTIQUAD */ /* use physical IDs to bootstrap */
#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
-#endif /* CONFIG_MULTIQUAD */
-#ifdef CONFIG_X86_CLUSTERED_APIC
-static inline int target_cpus(void)
-{
- static int cpu;
- switch(clustered_apic_mode){
- case CLUSTERED_APIC_NUMAQ:
- /* Broadcast intrs to local quad only. */
- return APIC_BROADCAST_ID_APIC;
- case CLUSTERED_APIC_XAPIC:
- /*round robin the interrupts*/
- cpu = (cpu+1)%smp_num_cpus;
- return cpu_to_physical_apicid(cpu);
- default:
- }
- return cpu_online_map;
-}
-#else
-#define target_cpus() (cpu_online_map)
-#endif
#endif
diff --git a/xen/include/asm-x86/string.h b/xen/include/asm-x86/string.h
index fd7ae02a85..6dee130fa7 100644
--- a/xen/include/asm-x86/string.h
+++ b/xen/include/asm-x86/string.h
@@ -1,5 +1,397 @@
-#ifdef __x86_64__
-#include <asm/x86_64/string.h>
+#ifndef __X86_STRING_H__
+#define __X86_STRING_H__
+
+#include <xen/config.h>
+
+#define __HAVE_ARCH_STRCPY
+static inline char *strcpy(char *dest, const char *src)
+{
+ long d0, d1, d2;
+ __asm__ __volatile__ (
+ "1: lodsb \n"
+ " stosb \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2)
+ : "0" (src), "1" (dest) : "memory" );
+ return dest;
+}
+
+#define __HAVE_ARCH_STRNCPY
+static inline char *strncpy(char *dest, const char *src, size_t count)
+{
+ long d0, d1, d2, d3;
+ __asm__ __volatile__ (
+ "1: dec %2 \n"
+ " js 2f \n"
+ " lodsb \n"
+ " stosb \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ " rep ; stosb \n"
+ "2: \n"
+ : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
+ : "0" (src), "1" (dest), "2" (count) : "memory" );
+ return dest;
+}
+
+#define __HAVE_ARCH_STRCAT
+static inline char *strcat(char *dest, const char *src)
+{
+ long d0, d1, d2, d3;
+ __asm__ __volatile__ (
+ " repne ; scasb \n"
+ " dec %1 \n"
+ "1: lodsb \n"
+ " stosb \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
+ return dest;
+}
+
+#define __HAVE_ARCH_STRNCAT
+static inline char *strncat(char *dest, const char *src, size_t count)
+{
+ long d0, d1, d2, d3;
+ __asm__ __volatile__ (
+ " repne ; scasb \n"
+ " dec %1 \n"
+ " mov %8,%3 \n"
+ "1: dec %3 \n"
+ " js 2f \n"
+ " lodsb \n"
+ " stosb \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ "2: xor %%eax,%%eax\n"
+ " stosb"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
+ : "memory" );
+ return dest;
+}
+
+#define __HAVE_ARCH_STRCMP
+static inline int strcmp(const char *cs, const char *ct)
+{
+ long d0, d1;
+ register int __res;
+ __asm__ __volatile__ (
+ "1: lodsb \n"
+ " scasb \n"
+ " jne 2f \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ " xor %%eax,%%eax\n"
+ " jmp 3f \n"
+ "2: sbb %%eax,%%eax\n"
+ " or $1,%%al \n"
+ "3: \n"
+ : "=a" (__res), "=&S" (d0), "=&D" (d1)
+ : "1" (cs), "2" (ct) );
+ return __res;
+}
+
+#define __HAVE_ARCH_STRNCMP
+static inline int strncmp(const char *cs, const char *ct, size_t count)
+{
+ long d0, d1, d2;
+ register int __res;
+ __asm__ __volatile__ (
+ "1: dec %3 \n"
+ " js 2f \n"
+ " lodsb \n"
+ " scasb \n"
+ " jne 3f \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ "2: xor %%eax,%%eax\n"
+ " jmp 4f \n"
+ "3: sbb %%eax,%%eax\n"
+ " or $1,%%al \n"
+ "4: \n"
+ : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+ : "1" (cs), "2" (ct), "3" (count) );
+ return __res;
+}
+
+#define __HAVE_ARCH_STRCHR
+static inline char *strchr(const char *s, int c)
+{
+ long d0;
+ register char *__res;
+ __asm__ __volatile__ (
+ " mov %%al,%%ah \n"
+ "1: lodsb \n"
+ " cmp %%ah,%%al \n"
+ " je 2f \n"
+ " test %%al,%%al \n"
+ " jne 1b \n"
+ " mov $1,%1 \n"
+ "2: mov %1,%0 \n"
+ " dec %0 \n"
+ : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
+ return __res;
+}
+
+#define __HAVE_ARCH_STRLEN
+static inline size_t strlen(const char *s)
+{
+ long d0;
+ register int __res;
+ __asm__ __volatile__ (
+ " repne ; scasb \n"
+ " notl %0 \n"
+ " decl %0 \n"
+ : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
+ return __res;
+}
+
+static inline void *__variable_memcpy(void *to, const void *from, size_t n)
+{
+ long d0, d1, d2;
+ __asm__ __volatile__ (
+ " rep ; movs"__OS"\n"
+ " mov %4,%3 \n"
+ " rep ; movsb \n"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n/BYTES_PER_LONG), "r" (n%BYTES_PER_LONG), "1" (to), "2" (from)
+ : "memory" );
+ return to;
+}
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as the count is constant.
+ */
+static always_inline void * __constant_memcpy(
+ void * to, const void * from, size_t n)
+{
+ switch ( n )
+ {
+ case 0:
+ return to;
+ case 1:
+ *(u8 *)to = *(const u8 *)from;
+ return to;
+ case 2:
+ *(u16 *)to = *(const u16 *)from;
+ return to;
+ case 3:
+ *(u16 *)to = *(const u16 *)from;
+ *(2+(u8 *)to) = *(2+(const u8 *)from);
+ return to;
+ case 4:
+ *(u32 *)to = *(const u32 *)from;
+ return to;
+ case 5:
+ *(u32 *)to = *(const u32 *)from;
+ *(4+(u8 *)to) = *(4+(const u8 *)from);
+ return to;
+ case 6:
+ *(u32 *)to = *(const u32 *)from;
+ *(2+(u16 *)to) = *(2+(const u16 *)from);
+ return to;
+ case 7:
+ *(u32 *)to = *(const u32 *)from;
+ *(2+(u16 *)to) = *(2+(const u16 *)from);
+ *(6+(u8 *)to) = *(6+(const u8 *)from);
+ return to;
+ case 8:
+ *(u64 *)to = *(const u64 *)from;
+ return to;
+ case 12:
+ *(u64 *)to = *(const u64 *)from;
+ *(2+(u32 *)to) = *(2+(const u32 *)from);
+ return to;
+ case 16:
+ *(u64 *)to = *(const u64 *)from;
+ *(1+(u64 *)to) = *(1+(const u64 *)from);
+ return to;
+ case 20:
+ *(u64 *)to = *(const u64 *)from;
+ *(1+(u64 *)to) = *(1+(const u64 *)from);
+ *(4+(u32 *)to) = *(4+(const u32 *)from);
+ return to;
+ }
+#define COMMON(x) \
+ __asm__ __volatile__ ( \
+ "rep ; movs"__OS \
+ x \
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+ : "0" (n/BYTES_PER_LONG), "1" (to), "2" (from) \
+ : "memory" );
+ {
+ long d0, d1, d2;
+ switch ( n % BYTES_PER_LONG )
+ {
+ case 0: COMMON(""); return to;
+ case 1: COMMON("\n\tmovsb"); return to;
+ case 2: COMMON("\n\tmovsw"); return to;
+ case 3: COMMON("\n\tmovsw\n\tmovsb"); return to;
+ case 4: COMMON("\n\tmovsl"); return to;
+ case 5: COMMON("\n\tmovsl\n\tmovsb"); return to;
+ case 6: COMMON("\n\tmovsl\n\tmovsw"); return to;
+ case 7: COMMON("\n\tmovsl\n\tmovsw\n\tmovsb"); return to;
+ }
+ }
+#undef COMMON
+ return to;
+}
+
+#define __HAVE_ARCH_MEMCPY
+#define memcpy(t,f,n) (__memcpy((t),(f),(n)))
+static always_inline
+void *__memcpy(void *t, const void *f, size_t n)
+{
+ return (__builtin_constant_p(n) ?
+ __constant_memcpy((t),(f),(n)) :
+ __variable_memcpy((t),(f),(n)));
+}
+
+/* Some version of gcc don't have this builtin. It's non-critical anyway. */
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *dest, const void *src, size_t n);
+
+#define __HAVE_ARCH_MEMCMP
+#define memcmp __builtin_memcmp
+
+#define __HAVE_ARCH_MEMCHR
+static inline void *memchr(const void *cs, int c, size_t count)
+{
+ long d0;
+ register void *__res;
+ if ( count == 0 )
+ return NULL;
+ __asm__ __volatile__ (
+ " repne ; scasb\n"
+ " je 1f \n"
+ " mov $1,%0 \n"
+ "1: dec %0 \n"
+ : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
+ return __res;
+}
+
+static inline void *__memset_generic(void *s, char c, size_t count)
+{
+ long d0, d1;
+ __asm__ __volatile__ (
+ "rep ; stosb"
+ : "=&c" (d0), "=&D" (d1) : "a" (c), "1" (s), "0" (count) : "memory" );
+ return s;
+}
+
+/* we might want to write optimized versions of these later */
+#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
+
+/*
+ * memset(x,0,y) is a reasonably common thing to do, so we want to fill
+ * things 32 bits at a time even when we don't know the size of the
+ * area at compile-time..
+ */
+static inline void *__constant_c_memset(void *s, unsigned long c, size_t count)
+{
+ long d0, d1;
+ __asm__ __volatile__(
+ " rep ; stos"__OS"\n"
+ " mov %3,%4 \n"
+ " rep ; stosb \n"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (c), "r" (count%BYTES_PER_LONG),
+ "0" (count/BYTES_PER_LONG), "1" (s)
+ : "memory" );
+ return s;
+}
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as we by now know that both pattern and count is constant..
+ */
+static always_inline void *__constant_c_and_count_memset(
+ void *s, unsigned long pattern, size_t count)
+{
+ switch ( count )
+ {
+ case 0:
+ return s;
+ case 1:
+ *(u8 *)s = pattern;
+ return s;
+ case 2:
+ *(u16 *)s = pattern;
+ return s;
+ case 3:
+ *(u16 *)s = pattern;
+ *(2+(u8 *)s) = pattern;
+ return s;
+ case 4:
+ *(u32 *)s = pattern;
+ return s;
+ case 5:
+ *(u32 *)s = pattern;
+ *(4+(u8 *)s) = pattern;
+ return s;
+ case 6:
+ *(u32 *)s = pattern;
+ *(2+(u16 *)s) = pattern;
+ return s;
+ case 7:
+ *(u32 *)s = pattern;
+ *(2+(u16 *)s) = pattern;
+ *(6+(u8 *)s) = pattern;
+ return s;
+ case 8:
+ *(u64 *)s = pattern;
+ return s;
+ }
+#define COMMON(x) \
+ __asm__ __volatile__ ( \
+ "rep ; stos"__OS \
+ x \
+ : "=&c" (d0), "=&D" (d1) \
+ : "a" (pattern), "0" (count/BYTES_PER_LONG), "1" (s) \
+ : "memory" )
+ {
+ long d0, d1;
+ switch ( count % BYTES_PER_LONG )
+ {
+ case 0: COMMON(""); return s;
+ case 1: COMMON("\n\tstosb"); return s;
+ case 2: COMMON("\n\tstosw"); return s;
+ case 3: COMMON("\n\tstosw\n\tstosb"); return s;
+ case 4: COMMON("\n\tstosl"); return s;
+ case 5: COMMON("\n\tstosl\n\tstosb"); return s;
+ case 6: COMMON("\n\tstosl\n\tstosw"); return s;
+ case 7: COMMON("\n\tstosl\n\tstosw\n\tstosb"); return s;
+ }
+ }
+#undef COMMON
+ return s;
+}
+
+#define __constant_c_x_memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_c_and_count_memset((s),(c),(count)) : \
+ __constant_c_memset((s),(c),(count)))
+
+#define __var_x_memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_count_memset((s),(c),(count)) : \
+ __memset_generic((s),(c),(count)))
+
+#ifdef CONFIG_X86_64
+#define MEMSET_PATTERN_MUL 0x0101010101010101UL
#else
-#include <asm/x86_32/string.h>
+#define MEMSET_PATTERN_MUL 0x01010101UL
#endif
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s, c, count) (__memset((s),(c),(count)))
+#define __memset(s, c, count) \
+(__builtin_constant_p(c) ? \
+ __constant_c_x_memset((s),(MEMSET_PATTERN_MUL*(unsigned char)(c)),(count)) : \
+ __var_x_memset((s),(c),(count)))
+
+#endif /* __X86_STRING_H__ */
diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
index 18854edc3f..cb5f64d001 100644
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -2,6 +2,7 @@
#define __ASM_SYSTEM_H
#include <xen/config.h>
+#include <xen/types.h>
#include <asm/bitops.h>
/* Clear and set 'TS' bit respectively */
@@ -70,8 +71,8 @@ static always_inline unsigned long __xchg(unsigned long x, volatile void * ptr,
* indicated by comparing RETURN with OLD.
*/
-static always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
+static always_inline unsigned long __cmpxchg(
+ volatile void *ptr, unsigned long old, unsigned long new, int size)
{
unsigned long prev;
switch (size) {
@@ -112,9 +113,50 @@ static always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long o
return old;
}
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
+#define __HAVE_ARCH_CMPXCHG
+
+#if BITS_PER_LONG == 64
+
+#define cmpxchg(ptr,o,n) \
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o), \
+ (unsigned long)(n),sizeof(*(ptr))))
+#else
+
+static always_inline unsigned long long __cmpxchg8b(
+ volatile void *ptr, unsigned long long old, unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__ (
+ LOCK_PREFIX "cmpxchg8b %3"
+ : "=A" (prev)
+ : "c" ((u32)(new>>32)), "b" ((u32)new),
+ "m" (*__xg((volatile void *)ptr)), "0" (old)
+ : "memory" );
+ return prev;
+}
+
+#define cmpxchg(ptr,o,n) \
+({ \
+ __typeof__(*(ptr)) __prev; \
+ switch ( sizeof(*(ptr)) ) { \
+ case 8: \
+ __prev = ((__typeof__(*(ptr)))__cmpxchg8b( \
+ (ptr), \
+ (unsigned long long)(o), \
+ (unsigned long long)(n))); \
+ break; \
+ default: \
+ __prev = ((__typeof__(*(ptr)))__cmpxchg( \
+ (ptr), \
+ (unsigned long)(o), \
+ (unsigned long)(n), \
+ sizeof(*(ptr)))); \
+ break; \
+ } \
+ __prev; \
+})
+
+#endif
/*
@@ -152,6 +194,23 @@ static always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long o
case 4: \
__cmpxchg_user(_p,_o,_n,"l","","r"); \
break; \
+ case 8: \
+ __asm__ __volatile__ ( \
+ "1: " LOCK_PREFIX "cmpxchg8b %4\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $1,%1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,3b\n" \
+ ".previous" \
+ : "=A" (_o), "=r" (_rc) \
+ : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)), \
+ "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0) \
+ : "memory"); \
+ break; \
} \
_rc; \
})
@@ -274,4 +333,6 @@ static inline int local_irq_is_enabled(void)
#define BROKEN_ACPI_Sx 0x0001
#define BROKEN_INIT_AFTER_S1 0x0002
+extern int es7000_plat;
+
#endif
diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h
new file mode 100644
index 0000000000..8f48cd31dc
--- /dev/null
+++ b/xen/include/asm-x86/time.h
@@ -0,0 +1,7 @@
+
+#ifndef __X86_TIME_H__
+#define __X86_TIME_H__
+
+extern int timer_ack;
+
+#endif /* __X86_TIME_H__ */
diff --git a/xen/include/asm-x86/types.h b/xen/include/asm-x86/types.h
index 9bb1f6ec85..9cec42e7a7 100644
--- a/xen/include/asm-x86/types.h
+++ b/xen/include/asm-x86/types.h
@@ -1,8 +1,5 @@
-#ifndef _X86_TYPES_H
-#define _X86_TYPES_H
-
-typedef unsigned short umode_t;
-
+#ifndef __X86_TYPES_H__
+#define __X86_TYPES_H__
/*
* __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
@@ -43,19 +40,22 @@ typedef unsigned int u32;
typedef signed long long s64;
typedef unsigned long long u64;
#define BITS_PER_LONG 32
-typedef unsigned int size_t;
+#define BYTES_PER_LONG 4
+#define LONG_BYTEORDER 2
+#if defined(CONFIG_X86_PAE)
+typedef u64 physaddr_t;
+#else
+typedef u32 physaddr_t;
+#endif
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
#define BITS_PER_LONG 64
-typedef unsigned long size_t;
+#define BYTES_PER_LONG 8
+#define LONG_BYTEORDER 3
+typedef u64 physaddr_t;
#endif
-/* DMA addresses come in generic and 64-bit flavours. */
-
-typedef unsigned long dma_addr_t;
-typedef u64 dma64_addr_t;
-
-typedef unsigned short xmem_bufctl_t;
+typedef unsigned long size_t;
-#endif
+#endif /* __X86_TYPES_H__ */
diff --git a/xen/include/asm-x86/uaccess.h b/xen/include/asm-x86/uaccess.h
index 46c02ecef4..e5e32d0938 100644
--- a/xen/include/asm-x86/uaccess.h
+++ b/xen/include/asm-x86/uaccess.h
@@ -2,12 +2,265 @@
#ifndef __X86_UACCESS_H__
#define __X86_UACCESS_H__
+#include <xen/config.h>
+#include <xen/compiler.h>
+#include <xen/errno.h>
+#include <xen/prefetch.h>
+#include <asm/page.h>
+
+#define __user
+
#ifdef __x86_64__
#include <asm/x86_64/uaccess.h>
#else
#include <asm/x86_32/uaccess.h>
#endif
+unsigned long copy_to_user(void *to, const void *from, unsigned len);
+unsigned long copy_from_user(void *to, const void *from, unsigned len);
+/* Handles exceptions in both to and from, but doesn't do access_ok */
+unsigned long __copy_to_user_ll(void *to, const void *from, unsigned n);
+unsigned long __copy_from_user_ll(void *to, const void *from, unsigned n);
+
+extern long __get_user_bad(void);
+extern void __put_user_bad(void);
+
+/**
+ * get_user: - Get a simple variable from user space.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) \
+ __get_user_check((x),(ptr),sizeof(*(ptr)))
+
+/**
+ * put_user: - Write a simple value into user space.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define put_user(x,ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+/**
+ * __get_user: - Get a simple variable from user space, with less checking.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) \
+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+
+/**
+ * __put_user: - Write a simple value into user space, with less checking.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Returns zero on success, or -EFAULT on error.
+ */
+#define __put_user(x,ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size) \
+({ \
+ long __pu_err; \
+ __put_user_size((x),(ptr),(size),__pu_err,-EFAULT); \
+ __pu_err; \
+})
+
+#define __put_user_check(x,ptr,size) \
+({ \
+ long __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
+ if (__addr_ok(__pu_addr)) \
+ __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
+ __pu_err; \
+})
+
+#define __get_user_nocheck(x,ptr,size) \
+({ \
+ long __gu_err, __gu_val; \
+ __get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+#define __get_user_check(x,ptr,size) \
+({ \
+ long __gu_err, __gu_val; \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __get_user_size(__gu_val,__gu_addr,(size),__gu_err,-EFAULT); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT; \
+ __gu_err; \
+})
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %"rtype"1,%2\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov %3,%0\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " "__FIXUP_ALIGN"\n" \
+ " "__FIXUP_WORD" 1b,3b\n" \
+ ".previous" \
+ : "=r"(err) \
+ : ltype (x), "m"(__m(addr)), "i"(errret), "0"(err))
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
+ __asm__ __volatile__( \
+ "1: mov"itype" %2,%"rtype"1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov %3,%0\n" \
+ " xor"itype" %"rtype"1,%"rtype"1\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " "__FIXUP_ALIGN"\n" \
+ " "__FIXUP_WORD" 1b,3b\n" \
+ ".previous" \
+ : "=r"(err), ltype (x) \
+ : "m"(__m(addr)), "i"(errret), "0"(err))
+
+/**
+ * __copy_to_user: - Copy a block of data into user space, with less checking
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from kernel space to user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+static always_inline unsigned long
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+
+ switch (n) {
+ case 1:
+ __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret, 1);
+ return ret;
+ case 2:
+ __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret, 2);
+ return ret;
+ case 4:
+ __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4);
+ return ret;
+ case 8:
+ __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret, 8);
+ return ret;
+ }
+ }
+ return __copy_to_user_ll(to, from, n);
+}
+
+/**
+ * __copy_from_user: - Copy a block of data from user space, with less checking
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep.
+ *
+ * Copy data from user space to kernel space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+static always_inline unsigned long
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (__builtin_constant_p(n)) {
+ unsigned long ret;
+
+ switch (n) {
+ case 1:
+ __get_user_size(*(u8 *)to, from, 1, ret, 1);
+ return ret;
+ case 2:
+ __get_user_size(*(u16 *)to, from, 2, ret, 2);
+ return ret;
+ case 4:
+ __get_user_size(*(u32 *)to, from, 4, ret, 4);
+ return ret;
+ case 8:
+ __get_user_size(*(u64*)to, from, 8, ret, 8);
+ return ret;
+ }
+ }
+ return __copy_from_user_ll(to, from, n);
+}
+
/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
diff --git a/xen/include/asm-x86/vmx.h b/xen/include/asm-x86/vmx.h
new file mode 100644
index 0000000000..1aab977981
--- /dev/null
+++ b/xen/include/asm-x86/vmx.h
@@ -0,0 +1,340 @@
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_VMX_H__
+#define __ASM_X86_VMX_H__
+
+#include <xen/sched.h>
+#include <asm/types.h>
+#include <asm/regs.h>
+#include <asm/processor.h>
+#include <asm/vmx_vmcs.h>
+#include <asm/i387.h>
+
+extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
+extern void vmx_asm_do_resume(void);
+extern void vmx_asm_do_launch(void);
+extern void vmx_intr_assist(struct vcpu *d);
+
+extern void arch_vmx_do_launch(struct vcpu *);
+extern void arch_vmx_do_resume(struct vcpu *);
+
+extern int vmcs_size;
+extern unsigned int cpu_rev;
+
+/*
+ * Need fill bits for SENTER
+ */
+
+#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x00000016
+
+#define MONITOR_PIN_BASED_EXEC_CONTROLS \
+ MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE | \
+ PIN_BASED_EXT_INTR_MASK | \
+ PIN_BASED_NMI_EXITING
+
+#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE 0x0401e172
+
+#define MONITOR_CPU_BASED_EXEC_CONTROLS \
+ MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE | \
+ CPU_BASED_HLT_EXITING | \
+ CPU_BASED_INVDPG_EXITING | \
+ CPU_BASED_MWAIT_EXITING | \
+ CPU_BASED_MOV_DR_EXITING | \
+ CPU_BASED_UNCOND_IO_EXITING | \
+ CPU_BASED_CR8_LOAD_EXITING | \
+ CPU_BASED_CR8_STORE_EXITING
+
+#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE 0x0003edff
+
+#define VM_EXIT_CONTROLS_IA_32E_MODE 0x00000200
+
+#define MONITOR_VM_EXIT_CONTROLS \
+ MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |\
+ VM_EXIT_ACK_INTR_ON_EXIT
+
+#define VM_ENTRY_CONTROLS_RESERVED_VALUE 0x000011ff
+#define VM_ENTRY_CONTROLS_IA_32E_MODE 0x00000200
+#define MONITOR_VM_ENTRY_CONTROLS VM_ENTRY_CONTROLS_RESERVED_VALUE
+/*
+ * Exit Reasons
+ */
+#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+
+#define EXIT_REASON_PENDING_INTERRUPT 7
+
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+
+/*
+ * Interruption-information format
+ */
+#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
+#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
+#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */
+#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
+
+#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
+#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+
+/*
+ * Exit Qualifications for MOV for Control Register Access
+ */
+#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control register */
+#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
+#define TYPE_MOV_TO_CR (0 << 4)
+#define TYPE_MOV_FROM_CR (1 << 4)
+#define TYPE_CLTS (2 << 4)
+#define TYPE_LMSW (3 << 4)
+#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */
+#define LMSW_SOURCE_DATA (0xFFFF << 16) /* 16:31 lmsw source */
+#define REG_EAX (0 << 8)
+#define REG_ECX (1 << 8)
+#define REG_EDX (2 << 8)
+#define REG_EBX (3 << 8)
+#define REG_ESP (4 << 8)
+#define REG_EBP (5 << 8)
+#define REG_ESI (6 << 8)
+#define REG_EDI (7 << 8)
+#define REG_R8 (8 << 8)
+#define REG_R9 (9 << 8)
+#define REG_R10 (10 << 8)
+#define REG_R11 (11 << 8)
+#define REG_R12 (12 << 8)
+#define REG_R13 (13 << 8)
+#define REG_R14 (14 << 8)
+#define REG_R15 (15 << 8)
+
+/*
+ * Exit Qualifications for MOV for Debug Register Access
+ */
+#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */
+#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
+#define TYPE_MOV_TO_DR (0 << 4)
+#define TYPE_MOV_FROM_DR (1 << 4)
+#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */
+
+#define EXCEPTION_BITMAP_DE (1 << 0) /* Divide Error */
+#define EXCEPTION_BITMAP_DB (1 << 1) /* Debug */
+#define EXCEPTION_BITMAP_NMI (1 << 2) /* NMI */
+#define EXCEPTION_BITMAP_BP (1 << 3) /* Breakpoint */
+#define EXCEPTION_BITMAP_OF (1 << 4) /* Overflow */
+#define EXCEPTION_BITMAP_BR (1 << 5) /* BOUND Range Exceeded */
+#define EXCEPTION_BITMAP_UD (1 << 6) /* Invalid Opcode */
+#define EXCEPTION_BITMAP_NM (1 << 7) /* Device Not Available */
+#define EXCEPTION_BITMAP_DF (1 << 8) /* Double Fault */
+/* reserved */
+#define EXCEPTION_BITMAP_TS (1 << 10) /* Invalid TSS */
+#define EXCEPTION_BITMAP_NP (1 << 11) /* Segment Not Present */
+#define EXCEPTION_BITMAP_SS (1 << 12) /* Stack-Segment Fault */
+#define EXCEPTION_BITMAP_GP (1 << 13) /* General Protection */
+#define EXCEPTION_BITMAP_PG (1 << 14) /* Page Fault */
+#define EXCEPTION_BITMAP_MF (1 << 16) /* x87 FPU Floating-Point Error (Math Fault) */
+#define EXCEPTION_BITMAP_AC (1 << 17) /* Alignment Check */
+#define EXCEPTION_BITMAP_MC (1 << 18) /* Machine Check */
+#define EXCEPTION_BITMAP_XF (1 << 19) /* SIMD Floating-Point Exception */
+
+/* Pending Debug exceptions */
+
+#define PENDING_DEBUG_EXC_BP (1 << 12) /* break point */
+#define PENDING_DEBUG_EXC_BS (1 << 14) /* Single step */
+
+#ifdef XEN_DEBUGGER
+#define MONITOR_DEFAULT_EXCEPTION_BITMAP \
+ ( EXCEPTION_BITMAP_PG | \
+ EXCEPTION_BITMAP_DB | \
+ EXCEPTION_BITMAP_BP | \
+ EXCEPTION_BITMAP_GP )
+#else
+#define MONITOR_DEFAULT_EXCEPTION_BITMAP \
+ ( EXCEPTION_BITMAP_PG | \
+ EXCEPTION_BITMAP_GP )
+#endif
+
+#define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n"
+#define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */
+#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
+#define VMPTRLD_OPCODE ".byte 0x0f,0xc7\n" /* reg/opcode: /6 */
+#define VMPTRST_OPCODE ".byte 0x0f,0xc7\n" /* reg/opcode: /7 */
+#define VMREAD_OPCODE ".byte 0x0f,0x78\n"
+#define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
+#define VMWRITE_OPCODE ".byte 0x0f,0x79\n"
+#define VMXOFF_OPCODE ".byte 0x0f,0x01,0xc4\n"
+#define VMXON_OPCODE ".byte 0xf3,0x0f,0xc7\n"
+
+#define MODRM_EAX_06 ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
+#define MODRM_EAX_07 ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
+#define MODRM_EAX_ECX ".byte 0xc1\n" /* [EAX], [ECX] */
+
+static inline int __vmptrld (u64 addr)
+{
+ unsigned long eflags;
+ __asm__ __volatile__ ( VMPTRLD_OPCODE
+ MODRM_EAX_06
+ :
+ : "a" (&addr)
+ : "memory");
+
+ __save_flags(eflags);
+ if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+ return -1;
+ return 0;
+}
+
+static inline void __vmptrst (u64 addr)
+{
+ __asm__ __volatile__ ( VMPTRST_OPCODE
+ MODRM_EAX_07
+ :
+ : "a" (&addr)
+ : "memory");
+}
+
+static inline int __vmpclear (u64 addr)
+{
+ unsigned long eflags;
+
+ __asm__ __volatile__ ( VMCLEAR_OPCODE
+ MODRM_EAX_06
+ :
+ : "a" (&addr)
+ : "memory");
+ __save_flags(eflags);
+ if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+ return -1;
+ return 0;
+}
+
+static inline int __vmread (unsigned long field, void *value)
+{
+ unsigned long eflags;
+ unsigned long ecx = 0;
+
+ __asm__ __volatile__ ( VMREAD_OPCODE
+ MODRM_EAX_ECX
+ : "=c" (ecx)
+ : "a" (field)
+ : "memory");
+
+ *((long *) value) = ecx;
+
+ __save_flags(eflags);
+ if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+ return -1;
+ return 0;
+}
+
+static inline int __vmwrite (unsigned long field, unsigned long value)
+{
+ unsigned long eflags;
+
+ __asm__ __volatile__ ( VMWRITE_OPCODE
+ MODRM_EAX_ECX
+ :
+ : "a" (field) , "c" (value)
+ : "memory");
+ __save_flags(eflags);
+ if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+ return -1;
+ return 0;
+}
+
+static inline int __vm_set_bit(unsigned long field, unsigned long mask)
+{
+ unsigned long tmp;
+ int err = 0;
+
+ err |= __vmread(field, &tmp);
+ tmp |= mask;
+ err |= __vmwrite(field, tmp);
+
+ return err;
+}
+
+static inline int __vm_clear_bit(unsigned long field, unsigned long mask)
+{
+ unsigned long tmp;
+ int err = 0;
+
+ err |= __vmread(field, &tmp);
+ tmp &= ~mask;
+ err |= __vmwrite(field, tmp);
+
+ return err;
+}
+
+static inline void __vmxoff (void)
+{
+ __asm__ __volatile__ ( VMXOFF_OPCODE
+ ::: "memory");
+}
+
+static inline int __vmxon (u64 addr)
+{
+ unsigned long eflags;
+
+ __asm__ __volatile__ ( VMXON_OPCODE
+ MODRM_EAX_06
+ :
+ : "a" (&addr)
+ : "memory");
+ __save_flags(eflags);
+ if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+ return -1;
+ return 0;
+}
+
+/* Make sure that xen intercepts any FP accesses from current */
+static inline void vmx_stts()
+{
+ unsigned long cr0;
+
+ __vmread(GUEST_CR0, &cr0);
+ if (!(cr0 & X86_CR0_TS))
+ __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
+
+ __vmread(CR0_READ_SHADOW, &cr0);
+ if (!(cr0 & X86_CR0_TS))
+ __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
+}
+
+/* Works only for ed == current */
+static inline int vmx_paging_enabled(struct vcpu *v)
+{
+ unsigned long cr0;
+
+ __vmread(CR0_READ_SHADOW, &cr0);
+ return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
+}
+
+#endif /* __ASM_X86_VMX_H__ */
diff --git a/xen/include/asm-x86/vmx_cpu.h b/xen/include/asm-x86/vmx_cpu.h
new file mode 100644
index 0000000000..2cccc151dd
--- /dev/null
+++ b/xen/include/asm-x86/vmx_cpu.h
@@ -0,0 +1,35 @@
+/*
+ * vmx_cpu.h: Virtual CPU state
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_VMX_VMCS_H__
+#define __ASM_X86_VMX_VMCS_H__
+
+/*
+ * Virtual CPU
+ */
+struct arch_state_struct {
+ unsigned long mode_flags; /* vm86, 32-bit, 64-bit, etc. */
+ /* debug registers */
+ /* MSRs */
+};
+
+#define VMX_MF_VM86 0
+#define VMX_MF_32 1
+#define VMX_MF_64 2
+
+#endif
diff --git a/xen/include/asm-x86/vmx_intercept.h b/xen/include/asm-x86/vmx_intercept.h
new file mode 100644
index 0000000000..54f118ce39
--- /dev/null
+++ b/xen/include/asm-x86/vmx_intercept.h
@@ -0,0 +1,31 @@
+
+#ifndef _VMX_INTERCEPT_H
+#define _VMX_INTERCEPT_H
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/time.h>
+#include <xen/errno.h>
+#include <public/io/ioreq.h>
+
+
+#define MAX_IO_HANDLER 6
+
+typedef int (*intercept_action_t)(ioreq_t*);
+
+struct vmx_handler_t {
+ int num_slot;
+ struct {
+ unsigned long addr;
+ unsigned long offset;
+ intercept_action_t action;
+ } hdl_list[MAX_IO_HANDLER];
+};
+
+/* global io interception point in HV */
+extern int vmx_io_intercept(ioreq_t*);
+extern int register_io_handler(unsigned long, unsigned long, intercept_action_t);
+
+
+#endif /* _VMX_INTERCEPT_H */
diff --git a/xen/include/asm-x86/vmx_platform.h b/xen/include/asm-x86/vmx_platform.h
new file mode 100644
index 0000000000..2382ebbc7a
--- /dev/null
+++ b/xen/include/asm-x86/vmx_platform.h
@@ -0,0 +1,94 @@
+/*
+ * vmx_platform.h: VMX platform support
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_VMX_PLATFORM_H__
+#define __ASM_X86_VMX_PLATFORM_H__
+
+#include <public/xen.h>
+#include <asm/e820.h>
+#include <asm/vmx_virpit.h>
+#include <asm/vmx_intercept.h>
+
+#define MAX_OPERAND_NUM 3
+#define I_NAME_LEN 16
+
+#define mk_operand(size, index, seg, flag) \
+ (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
+
+#define operand_size(operand) \
+ ((operand >> 24) & 0xFF)
+
+#define operand_index(operand) \
+ ((operand >> 16) & 0xFF)
+ //For instruction.operand[].size
+#define BYTE 1
+#define WORD 2
+#define LONG 4
+#define QUAD 8
+
+ //For instruction.operand[].flag
+#define REGISTER 0x1
+#define MEMORY 0x2
+#define IMMEDIATE 0x4
+#define WZEROEXTEND 0x8
+
+ //For instruction.flags
+#define REPZ 0x1
+#define REPNZ 0x2
+
+struct instruction {
+ __s8 i_name[I_NAME_LEN]; //Instruction's name
+ __s16 op_size; //The operand's bit size, e.g. 16-bit or 32-bit.
+
+ __u64 offset; //The effective address
+ //offset = Base + (Index * Scale) + Displacement
+
+ __u64 immediate;
+
+ __u16 seg_sel; //Segmentation selector
+
+ __u32 operand[MAX_OPERAND_NUM]; //The order of operand is from AT&T Assembly
+ __s16 op_num; //The operand numbers
+
+ __u32 flags; //
+};
+
+#define MAX_INST_LEN 32
+
+struct mi_per_cpu_info
+{
+ unsigned long mmio_target;
+ struct cpu_user_regs *inst_decoder_regs;
+};
+
+struct virutal_platform_def {
+ unsigned long *real_mode_data; /* E820, etc. */
+ unsigned long shared_page_va;
+ struct vmx_virpit_t vmx_pit;
+ struct vmx_handler_t vmx_handler;
+ struct mi_per_cpu_info mpci; /* MMIO */
+};
+
+extern void handle_mmio(unsigned long, unsigned long);
+extern void vmx_wait_io(void);
+extern int vmx_setup_platform(struct vcpu *, struct cpu_user_regs *);
+
+// XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO frame.
+#define mmio_space(gpa) (!VALID_MFN(phys_to_machine_mapping((gpa) >> PAGE_SHIFT)))
+
+#endif
diff --git a/xen/include/asm-x86/vmx_virpit.h b/xen/include/asm-x86/vmx_virpit.h
new file mode 100644
index 0000000000..1ba907cf3c
--- /dev/null
+++ b/xen/include/asm-x86/vmx_virpit.h
@@ -0,0 +1,42 @@
+#ifndef _VMX_VIRPIT_H
+#define _VMX_VIRPIT_H
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/time.h>
+#include <xen/errno.h>
+#include <xen/ac_timer.h>
+#include <asm/vmx_vmcs.h>
+
+#define PIT_FREQ 1193181
+
+#define LSByte 0
+#define MSByte 1
+#define LSByte_multiple 2
+#define MSByte_multiple 3
+
+struct vmx_virpit_t {
+ /* for simulation of counter 0 in mode 2*/
+ int vector; /* the pit irq vector */
+ unsigned int period; /* the frequency. e.g. 10ms*/
+ unsigned int channel; /* the pit channel, counter 0~2 */
+ unsigned long *intr_bitmap;
+ unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
+ unsigned long long inject_point; /* the time inject virt intr */
+ struct ac_timer pit_timer; /* periodic timer for mode 2*/
+ int first_injected; /* flag to prevent shadow window */
+
+ /* virtual PIT state for handle related I/O */
+ int read_state;
+ int count_LSB_latched;
+ int count_MSB_latched;
+
+ unsigned int count; /* the 16 bit channel count */
+ unsigned int init_val; /* the init value for the counter */
+
+} ;
+
+/* to hook the ioreq packet to get the PIT initializaiton info */
+extern void vmx_hooks_assist(struct vcpu *d);
+
+#endif /* _VMX_VIRPIT_H_ */
diff --git a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h
new file mode 100644
index 0000000000..571a4b05b8
--- /dev/null
+++ b/xen/include/asm-x86/vmx_vmcs.h
@@ -0,0 +1,284 @@
+/*
+ * vmx_vmcs.h: VMCS related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_VMX_VMCS_H__
+#define __ASM_X86_VMX_VMCS_H__
+
+#include <asm/config.h>
+#include <asm/vmx_cpu.h>
+#include <asm/vmx_platform.h>
+#include <public/vmx_assist.h>
+
+extern int start_vmx(void);
+extern void stop_vmx(void);
+
+void vmx_enter_scheduler(void);
+
+enum {
+ VMX_CPU_STATE_PG_ENABLED=0,
+ VMX_CPU_STATE_PAE_ENABLED,
+ VMX_CPU_STATE_LME_ENABLED,
+ VMX_CPU_STATE_LMA_ENABLED,
+ VMX_CPU_STATE_ASSIST_ENABLED,
+};
+
+#define VMX_LONG_GUEST(ed) \
+ (test_bit(VMX_CPU_STATE_LMA_ENABLED, &ed->arch.arch_vmx.cpu_state))
+
+struct vmcs_struct {
+ u32 vmcs_revision_id;
+ unsigned char data [0]; /* vmcs size is read from MSR */
+};
+
+enum {
+ VMX_INDEX_MSR_LSTAR = 0,
+ VMX_INDEX_MSR_STAR,
+ VMX_INDEX_MSR_CSTAR,
+ VMX_INDEX_MSR_SYSCALL_MASK,
+ VMX_INDEX_MSR_EFER,
+
+ VMX_MSR_COUNT,
+};
+
+struct msr_state{
+ unsigned long flags;
+ unsigned long msr_items[VMX_MSR_COUNT];
+ unsigned long shadow_gs;
+};
+
+struct arch_vmx_struct {
+ struct vmcs_struct *vmcs; /* VMCS pointer in virtual */
+ unsigned long flags; /* VMCS flags */
+ unsigned long cpu_cr2; /* save CR2 */
+ unsigned long cpu_cr3;
+ unsigned long cpu_state;
+ struct msr_state msr_content;
+ struct virutal_platform_def vmx_platform;
+};
+
+#define vmx_schedule_tail(next) \
+ (next)->thread.arch_vmx.arch_vmx_schedule_tail((next))
+
+#define VMX_DOMAIN(ed) ((ed)->arch.arch_vmx.flags)
+
+#define ARCH_VMX_VMCS_LOADED 0 /* VMCS has been loaded and active */
+#define ARCH_VMX_VMCS_LAUNCH 1 /* Needs VMCS launch */
+#define ARCH_VMX_VMCS_RESUME 2 /* Needs VMCS resume */
+#define ARCH_VMX_IO_WAIT 3 /* Waiting for I/O completion */
+
+void vmx_do_launch(struct vcpu *);
+void vmx_do_resume(struct vcpu *);
+
+struct vmcs_struct *alloc_vmcs(void);
+void free_vmcs(struct vmcs_struct *);
+int load_vmcs(struct arch_vmx_struct *, u64);
+int store_vmcs(struct arch_vmx_struct *, u64);
+void dump_vmcs(void);
+int construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *,
+ struct vcpu_guest_context *, int);
+
+#define VMCS_USE_HOST_ENV 1
+#define VMCS_USE_SEPARATE_ENV 0
+
+/* this works for both 32bit & 64bit eflags filteration done in construct_init_vmcs_guest() */
+#define VMCS_EFLAGS_RESERVED_0 0xffc08028 /* bitmap for 0 */
+#define VMCS_EFLAGS_RESERVED_1 0x00000002 /* bitmap for 1 */
+
+extern int vmcs_version;
+
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVDPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define PIN_BASED_EXT_INTR_MASK 0x1
+#define PIN_BASED_NMI_EXITING 0x8
+
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200
+
+
+/* VMCS Encordings */
+enum vmcs_field {
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LENGTH = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+#define VMX_DEBUG 1
+#if VMX_DEBUG
+#define DBG_LEVEL_0 (1 << 0)
+#define DBG_LEVEL_1 (1 << 1)
+#define DBG_LEVEL_2 (1 << 2)
+#define DBG_LEVEL_3 (1 << 3)
+#define DBG_LEVEL_IO (1 << 4)
+#define DBG_LEVEL_VMMU (1 << 5)
+
+extern unsigned int opt_vmx_debug_level;
+#define VMX_DBG_LOG(level, _f, _a...) \
+ if ((level) & opt_vmx_debug_level) \
+ printk("[VMX]" _f "\n", ## _a )
+#else
+#define VMX_DBG_LOG(level, _f, _a...)
+#endif
+
+#define __vmx_bug(regs) \
+ do { \
+ printk("__vmx_bug at %s:%d\n", __FILE__, __LINE__); \
+ show_registers(regs); \
+ domain_crash_synchronous(); \
+ } while (0)
+
+#endif /* ASM_X86_VMX_VMCS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/x86_32/asm_defns.h b/xen/include/asm-x86/x86_32/asm_defns.h
index e11ea34964..7ec44ec290 100644
--- a/xen/include/asm-x86/x86_32/asm_defns.h
+++ b/xen/include/asm-x86/x86_32/asm_defns.h
@@ -4,78 +4,132 @@
/* Maybe auto-generate the following two cases (quoted vs. unquoted). */
#ifndef __ASSEMBLY__
-#define __SAVE_ALL_PRE \
- "cld;" \
- "pushl %eax;" \
- "pushl %ebp;" \
- "pushl %edi;" \
- "pushl %esi;" \
- "pushl %edx;" \
- "pushl %ecx;" \
- "pushl %ebx;" \
- "testl $"STR(X86_EFLAGS_VM)","STR(XREGS_eflags)"(%esp);" \
- "jz 2f;" \
- "call setup_vm86_frame;" \
- "jmp 3f;" \
- "2:testb $3,"STR(XREGS_cs)"(%esp);" \
- "jz 1f;" \
- "movl %ds,"STR(XREGS_ds)"(%esp);" \
- "movl %es,"STR(XREGS_es)"(%esp);" \
- "movl %fs,"STR(XREGS_fs)"(%esp);" \
- "movl %gs,"STR(XREGS_gs)"(%esp);" \
+#define __SAVE_ALL_PRE \
+ "cld;" \
+ "pushl %eax;" \
+ "pushl %ebp;" \
+ "pushl %edi;" \
+ "pushl %esi;" \
+ "pushl %edx;" \
+ "pushl %ecx;" \
+ "pushl %ebx;" \
+ "testl $"STR(X86_EFLAGS_VM)","STR(UREGS_eflags)"(%esp);" \
+ "jz 2f;" \
+ "call setup_vm86_frame;" \
+ "jmp 3f;" \
+ "2:testb $3,"STR(UREGS_cs)"(%esp);" \
+ "jz 1f;" \
+ "mov %ds,"STR(UREGS_ds)"(%esp);" \
+ "mov %es,"STR(UREGS_es)"(%esp);" \
+ "mov %fs,"STR(UREGS_fs)"(%esp);" \
+ "mov %gs,"STR(UREGS_gs)"(%esp);" \
"3:"
-#define SAVE_ALL_NOSEGREGS(_reg) \
- __SAVE_ALL_PRE \
+#define SAVE_ALL_NOSEGREGS(_reg) \
+ __SAVE_ALL_PRE \
"1:"
-#define SET_XEN_SEGMENTS(_reg) \
- "movl $("STR(__HYPERVISOR_DS)"),%e"STR(_reg)"x;" \
- "movl %e"STR(_reg)"x,%ds;" \
- "movl %e"STR(_reg)"x,%es;"
+#define SET_XEN_SEGMENTS(_reg) \
+ "movl $("STR(__HYPERVISOR_DS)"),%e"STR(_reg)"x;" \
+ "mov %e"STR(_reg)"x,%ds;" \
+ "mov %e"STR(_reg)"x,%es;"
-#define SAVE_ALL(_reg) \
- __SAVE_ALL_PRE \
- SET_XEN_SEGMENTS(_reg) \
+#define SAVE_ALL(_reg) \
+ __SAVE_ALL_PRE \
+ SET_XEN_SEGMENTS(_reg) \
"1:"
#else
-#define __SAVE_ALL_PRE \
- cld; \
- pushl %eax; \
- pushl %ebp; \
- pushl %edi; \
- pushl %esi; \
- pushl %edx; \
- pushl %ecx; \
- pushl %ebx; \
- testl $X86_EFLAGS_VM,XREGS_eflags(%esp); \
- jz 2f; \
- call setup_vm86_frame; \
- jmp 3f; \
- 2:testb $3,XREGS_cs(%esp); \
- jz 1f; \
- movl %ds,XREGS_ds(%esp); \
- movl %es,XREGS_es(%esp); \
- movl %fs,XREGS_fs(%esp); \
- movl %gs,XREGS_gs(%esp); \
+#define __SAVE_ALL_PRE \
+ cld; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+ testl $X86_EFLAGS_VM,UREGS_eflags(%esp); \
+ jz 2f; \
+ call setup_vm86_frame; \
+ jmp 3f; \
+ 2:testb $3,UREGS_cs(%esp); \
+ jz 1f; \
+ mov %ds,UREGS_ds(%esp); \
+ mov %es,UREGS_es(%esp); \
+ mov %fs,UREGS_fs(%esp); \
+ mov %gs,UREGS_gs(%esp); \
3:
-#define SAVE_ALL_NOSEGREGS(_reg) \
- __SAVE_ALL_PRE \
+#define SAVE_ALL_NOSEGREGS(_reg) \
+ __SAVE_ALL_PRE \
1:
-#define SET_XEN_SEGMENTS(_reg) \
- movl $(__HYPERVISOR_DS),%e ## _reg ## x; \
- movl %e ## _reg ## x,%ds; \
- movl %e ## _reg ## x,%es;
+#define SET_XEN_SEGMENTS(_reg) \
+ movl $(__HYPERVISOR_DS),%e ## _reg ## x; \
+ mov %e ## _reg ## x,%ds; \
+ mov %e ## _reg ## x,%es;
-#define SAVE_ALL(_reg) \
- __SAVE_ALL_PRE \
- SET_XEN_SEGMENTS(_reg) \
+#define SAVE_ALL(_reg) \
+ __SAVE_ALL_PRE \
+ SET_XEN_SEGMENTS(_reg) \
1:
+#ifdef PERF_COUNTERS
+#define PERFC_INCR(_name,_idx) \
+ lock incl perfcounters+_name(,_idx,4)
+#else
+#define PERFC_INCR(_name,_idx)
+#endif
+
#endif
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v) \
+asmlinkage void x(void); \
+__asm__( \
+ "\n"__ALIGN_STR"\n" \
+ STR(x) ":\n\t" \
+ "pushl $"#v"<<16\n\t" \
+ SAVE_ALL(a) \
+ "call "STR(smp_##x)"\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct cpu_user_regs * regs); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+STR(x) ":\n\t" \
+ "pushl $"#v"<<16\n\t" \
+ SAVE_ALL(a) \
+ "movl %esp,%eax\n\t" \
+ "pushl %eax\n\t" \
+ "call "STR(smp_##x)"\n\t" \
+ "addl $4,%esp\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+__asm__( \
+ "\n" __ALIGN_STR"\n" \
+ "common_interrupt:\n\t" \
+ SAVE_ALL(a) \
+ "movl %esp,%eax\n\t" \
+ "pushl %eax\n\t" \
+ "call " STR(do_IRQ) "\n\t" \
+ "addl $4,%esp\n\t" \
+ "jmp ret_from_intr\n");
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+STR(IRQ) #nr "_interrupt:\n\t" \
+ "pushl $"#nr"<<16\n\t" \
+ "jmp common_interrupt");
+
#endif /* __X86_32_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/x86_32/current.h b/xen/include/asm-x86/x86_32/current.h
deleted file mode 100644
index 2c76a133aa..0000000000
--- a/xen/include/asm-x86/x86_32/current.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _X86_CURRENT_H
-#define _X86_CURRENT_H
-
-struct domain;
-
-#define STACK_RESERVED \
- (sizeof(execution_context_t) + sizeof(struct domain *))
-
-static inline struct domain * get_current(void)
-{
- struct domain *current;
- __asm__ ( "orl %%esp,%0; andl $~3,%0; movl (%0),%0"
- : "=r" (current) : "0" (STACK_SIZE-4) );
- return current;
-}
-
-#define current get_current()
-
-static inline void set_current(struct domain *p)
-{
- __asm__ ( "orl %%esp,%0; andl $~3,%0; movl %1,(%0)"
- : : "r" (STACK_SIZE-4), "r" (p) );
-}
-
-static inline execution_context_t *get_execution_context(void)
-{
- execution_context_t *execution_context;
- __asm__ ( "andl %%esp,%0; addl %2,%0"
- : "=r" (execution_context)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
- return execution_context;
-}
-
-/*
- * Get the top-of-stack, as stored in the per-CPU TSS. This is actually
- * 20 bytes below the real top of the stack to allow space for:
- * domain pointer, DS, ES, FS, GS.
- */
-static inline unsigned long get_stack_top(void)
-{
- unsigned long p;
- __asm__ ( "andl %%esp,%0; addl %2,%0"
- : "=r" (p)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-20) );
- return p;
-}
-
-#define reset_stack_and_jump(__fn) \
- __asm__ __volatile__ ( \
- "movl %0,%%esp; jmp "STR(__fn) \
- : : "r" (get_execution_context()) )
-
-#define schedule_tail(_d) ((_d)->thread.schedule_tail)(_d)
-
-#endif /* _X86_CURRENT_H */
diff --git a/xen/include/asm-x86/x86_32/page-2level.h b/xen/include/asm-x86/x86_32/page-2level.h
new file mode 100644
index 0000000000..34128f24fa
--- /dev/null
+++ b/xen/include/asm-x86/x86_32/page-2level.h
@@ -0,0 +1,56 @@
+#ifndef __X86_32_PAGE_2L_H__
+#define __X86_32_PAGE_2L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 10
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
+
+#define PADDR_BITS 32
+#define PADDR_MASK (~0UL)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
+/* read access (should only be used for debug printk's) */
+typedef u32 intpte_t;
+#define PRIpte "08x"
+
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef l2_pgentry_t root_pgentry_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l2e_get_pfn
+#define root_get_flags l2e_get_flags
+#define root_get_intpte l2e_get_intpte
+#define root_empty l2e_empty
+#define root_from_paddr l2e_from_paddr
+#define PGT_root_page_table PGT_l2_page_table
+
+/* misc */
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
+
+/*
+ * PTE pfn and flags:
+ * 20-bit pfn = (pte[31:12])
+ * 12-bit flags = (pte[11:0])
+ */
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+#define get_pte_flags(x) ((int)(x) & 0xFFF)
+#define put_pte_flags(x) ((intpte_t)((x) & 0xFFF))
+
+#define L1_DISALLOW_MASK (0xFFFFF180U) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U) /* PSE/GLOBAL */
+
+#endif /* __X86_32_PAGE_2L_H__ */
diff --git a/xen/include/asm-x86/x86_32/page-3level.h b/xen/include/asm-x86/x86_32/page-3level.h
new file mode 100644
index 0000000000..1fc423c073
--- /dev/null
+++ b/xen/include/asm-x86/x86_32/page-3level.h
@@ -0,0 +1,70 @@
+#ifndef __X86_32_PAGE_3L_H__
+#define __X86_32_PAGE_3L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 9
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L3_PAGETABLE_ENTRIES 4
+#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES
+
+/*
+ * Architecturally, physical addresses may be up to 52 bits. However, the
+ * page-frame number (pfn) of a 52-bit address will not fit into a 32-bit
+ * word. Instead we treat bits 44-51 of a pte as flag bits which are never
+ * allowed to be set by a guest kernel. This 'limits' us to addressing 16TB
+ * of physical memory on a 32-bit PAE system.
+ */
+#define PADDR_BITS 44
+#define PADDR_MASK ((1ULL << PADDR_BITS)-1)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/types.h>
+
+/* read access (should only be used for debug printk's) */
+typedef u64 intpte_t;
+#define PRIpte "016llx"
+
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef struct { intpte_t l3; } l3_pgentry_t;
+typedef l3_pgentry_t root_pgentry_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l3e_get_pfn
+#define root_get_flags l3e_get_flags
+#define root_get_intpte l3e_get_intpte
+#define root_empty l3e_empty
+#define root_from_paddr l3e_from_paddr
+#define PGT_root_page_table PGT_l3_page_table
+
+/* misc */
+#define is_guest_l1_slot(s) (1)
+#define is_guest_l2_slot(t,s) \
+ ( ((((t) & PGT_va_mask) >> PGT_va_shift) != 3) || \
+ ((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
+#define is_guest_l3_slot(s) (1)
+
+/*
+ * PTE pfn and flags:
+ * 32-bit pfn = (pte[43:12])
+ * 32-bit flags = (pte[63:44],pte[11:0])
+ */
+
+/* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */
+#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
+#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
+
+#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
+#define L3_DISALLOW_MASK (0xFFFFF1E6U) /* must-be-zero */
+
+#endif /* __X86_32_PAGE_3L_H__ */
diff --git a/xen/include/asm-x86/x86_32/page.h b/xen/include/asm-x86/x86_32/page.h
new file mode 100644
index 0000000000..9546706876
--- /dev/null
+++ b/xen/include/asm-x86/x86_32/page.h
@@ -0,0 +1,38 @@
+
+#ifndef __X86_32_PAGE_H__
+#define __X86_32_PAGE_H__
+
+#define __PAGE_OFFSET (0xFF000000)
+
+#define VADDR_BITS 32
+#define VADDR_MASK (~0UL)
+
+#define _PAGE_NX 0U
+
+#include <xen/config.h>
+#ifdef CONFIG_X86_PAE
+# include <asm/x86_32/page-3level.h>
+#else
+# include <asm/x86_32/page-2level.h>
+#endif
+
+/* Given a virtual address, get an entry offset into a linear page table. */
+#define l1_linear_offset(_a) ((_a) >> L1_PAGETABLE_SHIFT)
+#define l2_linear_offset(_a) ((_a) >> L2_PAGETABLE_SHIFT)
+
+#ifndef __ASSEMBLY__
+extern unsigned int PAGE_HYPERVISOR;
+extern unsigned int PAGE_HYPERVISOR_NOCACHE;
+#endif
+
+#endif /* __X86_32_PAGE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/x86_32/regs.h b/xen/include/asm-x86/x86_32/regs.h
index 57d21e3cb6..f2bdb3606e 100644
--- a/xen/include/asm-x86/x86_32/regs.h
+++ b/xen/include/asm-x86/x86_32/regs.h
@@ -1,55 +1,8 @@
#ifndef _I386_REGS_H
#define _I386_REGS_H
-#include <asm/types.h>
-
-struct xen_regs
-{
- /* All saved activations contain the following fields. */
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
- u32 edi;
- u32 ebp;
- u32 eax;
- u16 error_code;
- u16 entry_vector;
- u32 eip;
- u32 cs;
- u32 eflags;
-
- /* Only saved guest activations contain the following fields. */
- u32 esp;
- u32 ss;
- u32 es;
- u32 ds;
- u32 fs;
- u32 gs;
-} __attribute__ ((packed));
-
-enum EFLAGS {
- EF_CF = 0x00000001,
- EF_PF = 0x00000004,
- EF_AF = 0x00000010,
- EF_ZF = 0x00000040,
- EF_SF = 0x00000080,
- EF_TF = 0x00000100,
- EF_IE = 0x00000200,
- EF_DF = 0x00000400,
- EF_OF = 0x00000800,
- EF_IOPL = 0x00003000,
- EF_IOPL_RING0 = 0x00000000,
- EF_IOPL_RING1 = 0x00001000,
- EF_IOPL_RING2 = 0x00002000,
- EF_NT = 0x00004000, /* nested task */
- EF_RF = 0x00010000, /* resume */
- EF_VM = 0x00020000, /* virtual mode */
- EF_AC = 0x00040000, /* alignment */
- EF_VIF = 0x00080000, /* virtual interrupt */
- EF_VIP = 0x00100000, /* virtual interrupt pending */
- EF_ID = 0x00200000, /* id */
-};
+#include <xen/types.h>
+#include <public/xen.h>
#define VM86_MODE(_r) ((_r)->eflags & EF_VM)
#define RING_0(_r) (((_r)->cs & 3) == 0)
@@ -57,4 +10,12 @@ enum EFLAGS {
#define RING_2(_r) (((_r)->cs & 3) == 2)
#define RING_3(_r) (((_r)->cs & 3) == 3)
+#define KERNEL_MODE(_e, _r) (!VM86_MODE(_r) && RING_1(_r))
+
+#define PERMIT_SOFTINT(_dpl, _e, _r) \
+ ((_dpl) >= (VM86_MODE(_r) ? 3 : ((_r)->cs & 3)))
+
+/* Number of bytes of on-stack execution state to be context-switched. */
+#define CTXT_SWITCH_STACK_BYTES (sizeof(struct cpu_user_regs))
+
#endif
diff --git a/xen/include/asm-x86/x86_32/string.h b/xen/include/asm-x86/x86_32/string.h
deleted file mode 100644
index 43fad09eea..0000000000
--- a/xen/include/asm-x86/x86_32/string.h
+++ /dev/null
@@ -1,486 +0,0 @@
-#ifndef _I386_STRING_H_
-#define _I386_STRING_H_
-
-#include <xen/config.h>
-
-/*
- * This string-include defines all string functions as inline
- * functions. Use gcc. It also assumes ds=es=data space, this should be
- * normal. Most of the string-functions are rather heavily hand-optimized,
- * see especially strtok,strstr,str[c]spn. They should work, but are not
- * very easy to understand. Everything is done entirely within the register
- * set, making the functions fast and clean. String instructions have been
- * used through-out, making for "slightly" unclear code :-)
- *
- * NO Copyright (C) 1991, 1992 Linus Torvalds,
- * consider these trivial functions to be PD.
- */
-
-
-#define __HAVE_ARCH_STRCPY
-static inline char * strcpy(char * dest,const char *src)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
- "1:\tlodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2)
- :"0" (src),"1" (dest) : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char * strncpy(char * dest,const char *src,size_t count)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "1:\tdecl %2\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "rep\n\t"
- "stosb\n"
- "2:"
- : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- :"0" (src),"1" (dest),"2" (count) : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char * strcat(char * dest,const char * src)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "decl %1\n"
- "1:\tlodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char * strncat(char * dest,const char * src,size_t count)
-{
-int d0, d1, d2, d3;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "decl %1\n\t"
- "movl %8,%3\n"
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %2,%2\n\t"
- "stosb"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count)
- : "memory");
-return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char * cs,const char * ct)
-{
-int d0, d1;
-register int __res;
-__asm__ __volatile__(
- "1:\tlodsb\n\t"
- "scasb\n\t"
- "jne 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "xorl %%eax,%%eax\n\t"
- "jmp 3f\n"
- "2:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "3:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1)
- :"1" (cs),"2" (ct));
-return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char * cs,const char * ct,size_t count)
-{
-register int __res;
-int d0, d1, d2;
-__asm__ __volatile__(
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "scasb\n\t"
- "jne 3f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %%eax,%%eax\n\t"
- "jmp 4f\n"
- "3:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "4:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"1" (cs),"2" (ct),"3" (count));
-return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char * strchr(const char * s, int c)
-{
-int d0;
-register char * __res;
-__asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "je 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "movl $1,%1\n"
- "2:\tmovl %1,%0\n\t"
- "decl %0"
- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
-return __res;
-}
-
-#define __HAVE_ARCH_STRRCHR
-static inline char * strrchr(const char * s, int c)
-{
-int d0, d1;
-register char * __res;
-__asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "jne 2f\n\t"
- "leal -1(%%esi),%0\n"
- "2:\ttestb %%al,%%al\n\t"
- "jne 1b"
- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
-return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char * s)
-{
-int d0;
-register int __res;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "notl %0\n\t"
- "decl %0"
- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
-return __res;
-}
-
-static inline void * __memcpy(void * to, const void * from, size_t n)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
-return (to);
-}
-
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as the count is constant.
- */
-static always_inline void * __constant_memcpy(void * to, const void * from, size_t n)
-{
- switch (n) {
- case 0:
- return to;
- case 1:
- *(unsigned char *)to = *(const unsigned char *)from;
- return to;
- case 2:
- *(unsigned short *)to = *(const unsigned short *)from;
- return to;
- case 3:
- *(unsigned short *)to = *(const unsigned short *)from;
- *(2+(unsigned char *)to) = *(2+(const unsigned char *)from);
- return to;
- case 4:
- *(unsigned long *)to = *(const unsigned long *)from;
- return to;
- case 6: /* for Ethernet addresses */
- *(unsigned long *)to = *(const unsigned long *)from;
- *(2+(unsigned short *)to) = *(2+(const unsigned short *)from);
- return to;
- case 8:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- return to;
- case 12:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- return to;
- case 16:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
- return to;
- case 20:
- *(unsigned long *)to = *(const unsigned long *)from;
- *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
- *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
- *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
- *(4+(unsigned long *)to) = *(4+(const unsigned long *)from);
- return to;
- }
-#define COMMON(x) \
-__asm__ __volatile__( \
- "rep ; movsl" \
- x \
- : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
- : "0" (n/4),"1" ((long) to),"2" ((long) from) \
- : "memory");
-{
- int d0, d1, d2;
- switch (n % 4) {
- case 0: COMMON(""); return to;
- case 1: COMMON("\n\tmovsb"); return to;
- case 2: COMMON("\n\tmovsw"); return to;
- default: COMMON("\n\tmovsw\n\tmovsb"); return to;
- }
-}
-
-#undef COMMON
-}
-
-#define __HAVE_ARCH_MEMCPY
-static always_inline __attribute_used__
-void memcpy(void *t, const void *f, size_t n)
-{
- (__builtin_constant_p(n) ?
- __constant_memcpy((t),(f),(n)) :
- __memcpy((t),(f),(n)));
-}
-
-/*
- * struct_cpy(x,y), copy structure *x into (matching structure) *y.
- *
- * We get link-time errors if the structure sizes do not match.
- * There is no runtime overhead, it's all optimized away at
- * compile time.
- */
-//extern void __struct_cpy_bug (void);
-
-/*
-#define struct_cpy(x,y) \
-({ \
- if (sizeof(*(x)) != sizeof(*(y))) \
- __struct_cpy_bug; \
- memcpy(x, y, sizeof(*(x))); \
-})
-*/
-
-#define __HAVE_ARCH_MEMMOVE
-static inline void * memmove(void * dest,const void * src, size_t n)
-{
-int d0, d1, d2;
-if (dest<src)
-__asm__ __volatile__(
- "rep\n\t"
- "movsb"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),"1" (src),"2" (dest)
- : "memory");
-else
-__asm__ __volatile__(
- "std\n\t"
- "rep\n\t"
- "movsb\n\t"
- "cld"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),
- "1" (n-1+(const char *)src),
- "2" (n-1+(char *)dest)
- :"memory");
-return dest;
-}
-
-#define __HAVE_ARCH_MEMCMP
-#define memcmp __builtin_memcmp
-
-#define __HAVE_ARCH_MEMCHR
-static inline void * memchr(const void * cs,int c,size_t count)
-{
-int d0;
-register void * __res;
-if (!count)
- return NULL;
-__asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "je 1f\n\t"
- "movl $1,%0\n"
- "1:\tdecl %0"
- :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
-return __res;
-}
-
-static inline void * __memset_generic(void * s, char c,size_t count)
-{
-int d0, d1;
-__asm__ __volatile__(
- "rep\n\t"
- "stosb"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c),"1" (s),"0" (count)
- :"memory");
-return s;
-}
-
-/* we might want to write optimized versions of these later */
-#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
-
-/*
- * memset(x,0,y) is a reasonably common thing to do, so we want to fill
- * things 32 bits at a time even when we don't know the size of the
- * area at compile-time..
- */
-static inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
-{
-int d0, d1;
-__asm__ __volatile__(
- "rep ; stosl\n\t"
- "testb $2,%b3\n\t"
- "je 1f\n\t"
- "stosw\n"
- "1:\ttestb $1,%b3\n\t"
- "je 2f\n\t"
- "stosb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
- :"memory");
-return (s);
-}
-
-/* Added by Gertjan van Wingerde to make minix and sysv module work */
-#define __HAVE_ARCH_STRNLEN
-static inline size_t strnlen(const char * s, size_t count)
-{
-int d0;
-register int __res;
-__asm__ __volatile__(
- "movl %2,%0\n\t"
- "jmp 2f\n"
- "1:\tcmpb $0,(%0)\n\t"
- "je 3f\n\t"
- "incl %0\n"
- "2:\tdecl %1\n\t"
- "cmpl $-1,%1\n\t"
- "jne 1b\n"
- "3:\tsubl %2,%0"
- :"=a" (__res), "=&d" (d0)
- :"c" (s),"1" (count));
-return __res;
-}
-/* end of additional stuff */
-
-//#define __HAVE_ARCH_STRSTR
-
-//extern char *strstr(const char *cs, const char *ct);
-
-/*
- * This looks horribly ugly, but the compiler can optimize it totally,
- * as we by now know that both pattern and count is constant..
- */
-static always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
-{
- switch (count) {
- case 0:
- return s;
- case 1:
- *(unsigned char *)s = pattern;
- return s;
- case 2:
- *(unsigned short *)s = pattern;
- return s;
- case 3:
- *(unsigned short *)s = pattern;
- *(2+(unsigned char *)s) = pattern;
- return s;
- case 4:
- *(unsigned long *)s = pattern;
- return s;
- }
-#define COMMON(x) \
-__asm__ __volatile__( \
- "rep ; stosl" \
- x \
- : "=&c" (d0), "=&D" (d1) \
- : "a" (pattern),"0" (count/4),"1" ((long) s) \
- : "memory")
-{
- int d0, d1;
- switch (count % 4) {
- case 0: COMMON(""); return s;
- case 1: COMMON("\n\tstosb"); return s;
- case 2: COMMON("\n\tstosw"); return s;
- default: COMMON("\n\tstosw\n\tstosb"); return s;
- }
-}
-
-#undef COMMON
-}
-
-#define __constant_c_x_memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_c_and_count_memset((s),(c),(count)) : \
- __constant_c_memset((s),(c),(count)))
-
-#define __memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __constant_count_memset((s),(c),(count)) : \
- __memset_generic((s),(c),(count)))
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, count) \
-(__builtin_constant_p(c) ? \
- __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
- __memset((s),(c),(count)))
-
-/*
- * find the first occurrence of byte 'c', or 1 past the area if none
- */
-#define __HAVE_ARCH_MEMSCAN
-static inline void * memscan(void * addr, int c, size_t size)
-{
- if (!size)
- return addr;
- __asm__("repnz; scasb\n\t"
- "jnz 1f\n\t"
- "dec %%edi\n"
- "1:"
- : "=D" (addr), "=c" (size)
- : "0" (addr), "1" (size), "a" (c));
- return addr;
-}
-
-#endif
diff --git a/xen/include/asm-x86/x86_32/uaccess.h b/xen/include/asm-x86/x86_32/uaccess.h
index b202a1a12b..eb9b87ceb1 100644
--- a/xen/include/asm-x86/x86_32/uaccess.h
+++ b/xen/include/asm-x86/x86_32/uaccess.h
@@ -1,31 +1,6 @@
#ifndef __i386_UACCESS_H
#define __i386_UACCESS_H
-/*
- * User space memory access functions
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/prefetch.h>
-#include <xen/string.h>
-#include <xen/sched.h>
-
-/* No user-pointer checking. */
-#define __user
-#define __chk_user_ptr(_p) ((void)0)
-
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * movsl can be slow when source and dest are not both 8-byte aligned
- */
-#ifdef CONFIG_X86_INTEL_USERCOPY
-extern struct movsl_mask {
- int mask;
-} __cacheline_aligned movsl_mask;
-#endif
-
#define __addr_ok(addr) ((unsigned long)(addr) < HYPERVISOR_VIRT_START)
/*
@@ -34,146 +9,20 @@ extern struct movsl_mask {
*
* This is equivalent to the following test:
* (u33)addr + (u33)size >= (u33)HYPERVISOR_VIRT_START
- *
- * This needs 33-bit arithmetic. We have a carry...
*/
-#define __range_ok(addr,size) ({ \
+#define __range_not_ok(addr,size) ({ \
unsigned long flag,sum; \
- __chk_user_ptr(addr); \
asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
:"=&r" (flag), "=r" (sum) \
:"1" (addr),"g" ((int)(size)),"r" (HYPERVISOR_VIRT_START)); \
flag; })
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
- * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
- * to write to a block, it is always safe to read from it.
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(type,addr,size) (likely(__range_ok(addr,size) == 0))
-
-#define array_access_ok(type,addr,count,size) \
- (likely(count < (~0UL/size)) && access_ok(type,addr,count*size))
-
-/**
- * get_user: - Get a simple variable from user space.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define get_user(x,ptr) \
- __get_user_check((x),(ptr),sizeof(*(ptr)))
+#define access_ok(addr,size) (likely(__range_not_ok(addr,size) == 0))
-extern void __put_user_bad(void);
+#define array_access_ok(addr,count,size) \
+ (likely(count < (~0UL/size)) && access_ok(addr,count*size))
-/**
- * put_user: - Write a simple value into user space.
- * @x: Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep.
- *
- * This macro copies a single simple value from kernel space to user
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-
-/**
- * __get_user: - Get a simple variable from user space, with less checking.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-#define __get_user(x,ptr) \
- __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-
-
-/**
- * __put_user: - Write a simple value into user space, with less checking.
- * @x: Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep.
- *
- * This macro copies a single simple value from kernel space to user
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Returns zero on success, or -EFAULT on error.
- */
-#define __put_user(x,ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __put_user_nocheck(x,ptr,size) \
-({ \
- long __pu_err; \
- __put_user_size((x),(ptr),(size),__pu_err,-EFAULT); \
- __pu_err; \
-})
-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- if (__addr_ok(__pu_addr)) \
- __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
- __pu_err; \
-})
-
-#define __put_user_u64(x, addr, err) \
+#define __put_user_u64(x, addr, retval, errret) \
__asm__ __volatile__( \
"1: movl %%eax,0(%2)\n" \
"2: movl %%edx,4(%2)\n" \
@@ -187,216 +36,50 @@ extern void __put_user_bad(void);
" .long 1b,4b\n" \
" .long 2b,4b\n" \
".previous" \
- : "=r"(err) \
- : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
-
-#ifdef CONFIG_X86_WP_WORKS_OK
+ : "=r"(retval) \
+ : "A" (x), "r" (addr), "i"(errret), "0"(retval))
#define __put_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0; \
- __chk_user_ptr(ptr); \
switch (size) { \
case 1: __put_user_asm(x,ptr,retval,"b","b","iq",errret);break; \
case 2: __put_user_asm(x,ptr,retval,"w","w","ir",errret);break; \
case 4: __put_user_asm(x,ptr,retval,"l","","ir",errret); break; \
- case 8: __put_user_u64((__typeof__(*ptr))(x),ptr,retval); break;\
- default: __put_user_bad(); \
+ case 8: __put_user_u64((__typeof__(*ptr))(x),ptr,retval,errret);break;\
+ default: __put_user_bad(); \
} \
} while (0)
-#else
-
-#define __put_user_size(x,ptr,size,retval,errret) \
-do { \
- __typeof__(*(ptr)) __pus_tmp = x; \
- retval = 0; \
- \
- if(unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
- retval = errret; \
-} while (0)
-
-#endif
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
- __asm__ __volatile__( \
- "1: mov"itype" %"rtype"1,%2\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=r"(err) \
- : ltype (x), "m"(__m(addr)), "i"(errret), "0"(err))
-
-
-#define __get_user_nocheck(x,ptr,size) \
-({ \
- long __gu_err, __gu_val; \
- __get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
-
-#define __get_user_check(x,ptr,size) \
-({ \
- long __gu_err, __gu_val; \
- __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- __get_user_size(__gu_val,__gu_addr,(size),__gu_err,-EFAULT); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT; \
- __gu_err; \
-})
-
-extern long __get_user_bad(void);
+#define __get_user_u64(x, addr, retval, errret) \
+ __asm__ __volatile__( \
+ "1: movl 0(%2),%%eax\n" \
+ "2: movl 4(%2),%%edx\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: movl %3,%0\n" \
+ " xorl %%eax,%%eax\n" \
+ " xorl %%edx,%%edx\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,4b\n" \
+ " .long 2b,4b\n" \
+ ".previous" \
+ : "=r" (retval), "=A" (x) \
+ : "r" (addr), "i"(errret), "0"(retval))
#define __get_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0; \
- __chk_user_ptr(ptr); \
switch (size) { \
case 1: __get_user_asm(x,ptr,retval,"b","b","=q",errret);break; \
case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \
case 4: __get_user_asm(x,ptr,retval,"l","","=r",errret);break; \
+ case 8: __get_user_u64(x,ptr,retval,errret);break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
- __asm__ __volatile__( \
- "1: mov"itype" %2,%"rtype"1\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl %3,%0\n" \
- " xor"itype" %"rtype"1,%"rtype"1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
- : "=r"(err), ltype (x) \
- : "m"(__m(addr)), "i"(errret), "0"(err))
-
-
-unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n);
-unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n);
-
-/*
- * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
- * we return the initial request size (1, 2 or 4), as copy_*_user should do.
- * If a store crosses a page boundary and gets a fault, the x86 will not write
- * anything, so this is accurate.
- */
-
-/**
- * __copy_to_user: - Copy a block of data into user space, with less checking.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from kernel space to user space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-static always_inline unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret, 1);
- return ret;
- case 2:
- __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret, 2);
- return ret;
- case 4:
- __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4);
- return ret;
- }
- }
- return __copy_to_user_ll(to, from, n);
-}
-
-/**
- * __copy_from_user: - Copy a block of data from user space, with less checking.
- * @to: Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep.
- *
- * Copy data from user space to kernel space. Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-static always_inline unsigned long
-__copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- if (__builtin_constant_p(n)) {
- unsigned long ret;
-
- switch (n) {
- case 1:
- __get_user_size(*(u8 *)to, from, 1, ret, 1);
- return ret;
- case 2:
- __get_user_size(*(u16 *)to, from, 2, ret, 2);
- return ret;
- case 4:
- __get_user_size(*(u32 *)to, from, 4, ret, 4);
- return ret;
- }
- }
- return __copy_from_user_ll(to, from, n);
-}
-
-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n);
-unsigned long copy_from_user(void *to,
- const void __user *from, unsigned long n);
-long strncpy_from_user(char *dst, const char __user *src, long count);
-long __strncpy_from_user(char *dst, const char __user *src, long count);
-
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only. This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
-
-long strnlen_user(const char __user *str, long n);
-unsigned long clear_user(void __user *mem, unsigned long len);
-unsigned long __clear_user(void __user *mem, unsigned long len);
-
#endif /* __i386_UACCESS_H */
diff --git a/xen/include/asm-x86/x86_64/asm_defns.h b/xen/include/asm-x86/x86_64/asm_defns.h
index fa0b978304..60fd76d14a 100644
--- a/xen/include/asm-x86/x86_64/asm_defns.h
+++ b/xen/include/asm-x86/x86_64/asm_defns.h
@@ -1,6 +1,141 @@
#ifndef __X86_64_ASM_DEFNS_H__
#define __X86_64_ASM_DEFNS_H__
-#define SAVE_ALL(_r) ""
+/* Maybe auto-generate the following two cases (quoted vs. unquoted). */
+#ifndef __ASSEMBLY__
+
+#define SAVE_ALL \
+ "cld;" \
+ "pushq %rdi;" \
+ "pushq %rsi;" \
+ "pushq %rdx;" \
+ "pushq %rcx;" \
+ "pushq %rax;" \
+ "pushq %r8;" \
+ "pushq %r9;" \
+ "pushq %r10;" \
+ "pushq %r11;" \
+ "pushq %rbx;" \
+ "pushq %rbp;" \
+ "pushq %r12;" \
+ "pushq %r13;" \
+ "pushq %r14;" \
+ "pushq %r15;"
+
+#define RESTORE_ALL \
+ "popq %r15;" \
+ "popq %r14;" \
+ "popq %r13;" \
+ "popq %r12;" \
+ "popq %rbp;" \
+ "popq %rbx;" \
+ "popq %r11;" \
+ "popq %r10;" \
+ "popq %r9;" \
+ "popq %r8;" \
+ "popq %rax;" \
+ "popq %rcx;" \
+ "popq %rdx;" \
+ "popq %rsi;" \
+ "popq %rdi;"
+
+/* Work around AMD erratum #88 */
+#define safe_swapgs \
+ "mfence; swapgs;"
+
+#else
+
+#define SAVE_ALL \
+ cld; \
+ pushq %rdi; \
+ pushq %rsi; \
+ pushq %rdx; \
+ pushq %rcx; \
+ pushq %rax; \
+ pushq %r8; \
+ pushq %r9; \
+ pushq %r10; \
+ pushq %r11; \
+ pushq %rbx; \
+ pushq %rbp; \
+ pushq %r12; \
+ pushq %r13; \
+ pushq %r14; \
+ pushq %r15;
+
+#define RESTORE_ALL \
+ popq %r15; \
+ popq %r14; \
+ popq %r13; \
+ popq %r12; \
+ popq %rbp; \
+ popq %rbx; \
+ popq %r11; \
+ popq %r10; \
+ popq %r9; \
+ popq %r8; \
+ popq %rax; \
+ popq %rcx; \
+ popq %rdx; \
+ popq %rsi; \
+ popq %rdi;
+
+#ifdef PERF_COUNTERS
+#define PERFC_INCR(_name,_idx) \
+ pushq %rdx; \
+ leaq perfcounters+_name(%rip),%rdx; \
+ lock incl (%rdx,_idx,4); \
+ popq %rdx;
+#else
+#define PERFC_INCR(_name,_idx)
+#endif
+
+#endif
+
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v) \
+asmlinkage void x(void); \
+__asm__( \
+ "\n"__ALIGN_STR"\n" \
+ STR(x) ":\n\t" \
+ "pushq $0\n\t" \
+ "movl $"#v",4(%rsp)\n\t" \
+ SAVE_ALL \
+ "callq "STR(smp_##x)"\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct cpu_user_regs * regs); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+STR(x) ":\n\t" \
+ "pushq $0\n\t" \
+ "movl $"#v",4(%rsp)\n\t" \
+ SAVE_ALL \
+ "movq %rsp,%rdi\n\t" \
+ "callq "STR(smp_##x)"\n\t" \
+ "jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+__asm__( \
+ "\n" __ALIGN_STR"\n" \
+ "common_interrupt:\n\t" \
+ SAVE_ALL \
+ "movq %rsp,%rdi\n\t" \
+ "callq " STR(do_IRQ) "\n\t" \
+ "jmp ret_from_intr\n");
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+STR(IRQ) #nr "_interrupt:\n\t" \
+ "pushq $0\n\t" \
+ "movl $"#nr",4(%rsp)\n\t" \
+ "jmp common_interrupt");
#endif /* __X86_64_ASM_DEFNS_H__ */
diff --git a/xen/include/asm-x86/x86_64/current.h b/xen/include/asm-x86/x86_64/current.h
deleted file mode 100644
index 2ee550643b..0000000000
--- a/xen/include/asm-x86/x86_64/current.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _X86_64_CURRENT_H
-#define _X86_64_CURRENT_H
-
-#if !defined(__ASSEMBLY__)
-struct domain;
-
-#include <asm/pda.h>
-
-#define STACK_RESERVED \
- (sizeof(execution_context_t))
-
-static inline struct domain * get_current(void)
-{
- struct domain *current;
- current = read_pda(pcurrent);
- return current;
-}
-
-#define current get_current()
-
-static inline void set_current(struct domain *p)
-{
- write_pda(pcurrent,p);
-}
-
-static inline execution_context_t *get_execution_context(void)
-{
- execution_context_t *execution_context;
- __asm__( "andq %%rsp,%0; addq %2,%0"
- : "=r" (execution_context)
- : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) );
- return execution_context;
-}
-
-static inline unsigned long get_stack_top(void)
-{
- unsigned long p;
- __asm__ ( "orq %%rsp,%0; andq $~7,%0"
- : "=r" (p) : "0" (STACK_SIZE-8) );
- return p;
-}
-
-#define reset_stack_and_jump(__fn) \
- __asm__ __volatile__ ( \
- "movq %0,%%rsp; jmp "STR(__fn) \
- : : "r" (get_execution_context()) )
-
-#define schedule_tail(_d) ((_d)->thread.schedule_tail)(_d)
-
-#else
-
-#ifndef ASM_OFFSET_H
-#include <asm/offset.h>
-#endif
-
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
-
-#endif
-
-#endif /* !(_X86_64_CURRENT_H) */
diff --git a/xen/include/asm-x86/x86_64/desc.h b/xen/include/asm-x86/x86_64/desc.h
deleted file mode 100644
index d1171de39d..0000000000
--- a/xen/include/asm-x86/x86_64/desc.h
+++ /dev/null
@@ -1,118 +0,0 @@
-#ifndef __ARCH_DESC_H
-#define __ARCH_DESC_H
-
-#define LDT_ENTRY_SIZE 16
-
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-
-#define __CPU_DESC_INDEX(x,field) \
- ((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8))
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
-
-#define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss)));
-#define __load_LDT(cpu) asm volatile("lldt %w0"::"r" (__CPU_DESC_INDEX(cpu, ldt)));
-#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0))
-
-/*
- * Guest OS must provide its own code selectors, or use the one we provide. The
- * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
- * value is okay. Note that checking only the RPL is insufficient: if the
- * selector is poked into an interrupt, trap or call gate then the RPL is
- * ignored when the gate is accessed.
- */
-#define VALID_SEL(_s) \
- (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || \
- (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \
- ((_s)&4)) && \
- (((_s)&3) == 0))
-#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS || VALID_SEL(_s))
-
-/* These are bitmasks for the first 32 bits of a descriptor table entry. */
-#define _SEGMENT_TYPE (15<< 8)
-#define _SEGMENT_S ( 1<<12) /* System descriptor (yes iff S==0) */
-#define _SEGMENT_DPL ( 3<<13) /* Descriptor Privilege Level */
-#define _SEGMENT_P ( 1<<15) /* Segment Present */
-#define _SEGMENT_G ( 1<<23) /* Granularity */
-
-#ifndef __ASSEMBLY__
-
-enum {
- GATE_INTERRUPT = 0xE,
- GATE_TRAP = 0xF,
- GATE_CALL = 0xC,
-};
-
-// 16byte gate
-struct gate_struct {
- u16 offset_low;
- u16 segment;
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
- u16 offset_middle;
- u32 offset_high;
- u32 zero1;
-} __attribute__((packed));
-
-// 8 byte segment descriptor
-struct desc_struct {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
-} __attribute__((packed));
-
-// LDT or TSS descriptor in the GDT. 16 bytes.
-struct ldttss_desc {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 5, dpl : 2, p : 1;
- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
- u32 base3;
- u32 zero1;
-} __attribute__((packed));
-
-// Union of above structures
-union desc_union {
- struct desc_struct seg;
- struct ldttss_desc ldttss;
- struct gate_struct gate;
-};
-
-struct per_cpu_gdt {
- struct ldttss_desc tss;
- struct ldttss_desc ldt;
-} __cacheline_aligned;
-
-
-struct Xgt_desc_struct {
- unsigned short size;
- unsigned long address;
-} __attribute__((packed));
-
-extern __u8 gdt_table[];
-extern __u8 gdt_end[];
-extern union desc_union *gdt;
-
-extern struct per_cpu_gdt gdt_cpu_table[];
-
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
-
-enum {
- DESC_TSS = 0x9,
- DESC_LDT = 0x2,
-};
-
-extern struct gate_struct *idt;
-
-#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
-#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
-
-extern void set_intr_gate(unsigned int irq, void * addr);
-extern void set_tss_desc(unsigned int n, void *addr);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/xen/include/asm-x86/x86_64/ldt.h b/xen/include/asm-x86/x86_64/ldt.h
deleted file mode 100644
index 1e09163867..0000000000
--- a/xen/include/asm-x86/x86_64/ldt.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __ARCH_LDT_H
-#define __ARCH_LDT_H
-
-#ifndef __ASSEMBLY__
-
-static inline void load_LDT(struct domain *p)
-{
- unsigned long ents;
-
- if ( (ents = p->mm.ldt_ents) == 0 )
- {
- __asm__ __volatile__ ( "lldt %w0" : : "r" (0) );
- }
- else
- {
- unsigned int cpu;
- struct ldttss_desc *desc;
-
- cpu = smp_processor_id();
- desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt));
- desc->limit0 = ents*8-1;
- desc->base0 = LDT_VIRT_START&0xffff;
- desc->base1 = (LDT_VIRT_START&0xff0000)>>16;
- desc->type = DESC_LDT;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0;
- desc->zero0 = 0;
- desc->g = 0;
- desc->base2 = (LDT_VIRT_START&0xff000000)>>24;
- desc->base3 = LDT_VIRT_START>>32;
- desc->zero1 = 0;
- __load_LDT(cpu);
- }
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
new file mode 100644
index 0000000000..eeafe4fea5
--- /dev/null
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -0,0 +1,92 @@
+
+#ifndef __X86_64_PAGE_H__
+#define __X86_64_PAGE_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define L4_PAGETABLE_SHIFT 39
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L4_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 9
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L3_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define ROOT_PAGETABLE_ENTRIES L4_PAGETABLE_ENTRIES
+
+#define __PAGE_OFFSET (0xFFFF830000000000)
+
+/* These are architectural limits. Current CPUs support only 40-bit phys. */
+#define PADDR_BITS 52
+#define VADDR_BITS 48
+#define PADDR_MASK ((1UL << PADDR_BITS)-1)
+#define VADDR_MASK ((1UL << VADDR_BITS)-1)
+
+#ifndef __ASSEMBLY__
+
+#include <xen/config.h>
+#include <asm/types.h>
+
+/* read access (should only be used for debug printk's) */
+typedef u64 intpte_t;
+#define PRIpte "016lx"
+
+typedef struct { intpte_t l1; } l1_pgentry_t;
+typedef struct { intpte_t l2; } l2_pgentry_t;
+typedef struct { intpte_t l3; } l3_pgentry_t;
+typedef struct { intpte_t l4; } l4_pgentry_t;
+typedef l4_pgentry_t root_pgentry_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/* Given a virtual address, get an entry offset into a linear page table. */
+#define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> PAGE_SHIFT)
+
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t, _s) (1)
+#define is_guest_l3_slot(_s) (1)
+#define is_guest_l4_slot(_s) \
+ (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \
+ ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT))
+
+#define root_get_pfn l4e_get_pfn
+#define root_get_flags l4e_get_flags
+#define root_get_intpte l4e_get_intpte
+#define root_empty l4e_empty
+#define root_from_paddr l4e_from_paddr
+#define PGT_root_page_table PGT_l4_page_table
+
+/*
+ * PTE pfn and flags:
+ * 40-bit pfn = (pte[51:12])
+ * 24-bit flags = (pte[63:52],pte[11:0])
+ */
+
+/* Extract flags into 24-bit integer, or turn 24-bit flags into a pte mask. */
+#define get_pte_flags(x) (((int)((x) >> 40) & ~0xFFF) | ((int)(x) & 0xFFF))
+#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF))
+
+/* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/
+#define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U)
+
+#define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */
+#define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
+#define L3_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
+#define L4_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* must-be-zero */
+
+#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
+#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
+
+#endif /* __X86_64_PAGE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h
index 7daea0f0ed..38d31db1bb 100644
--- a/xen/include/asm-x86/x86_64/regs.h
+++ b/xen/include/asm-x86/x86_64/regs.h
@@ -1,114 +1,22 @@
#ifndef _X86_64_REGS_H
#define _X86_64_REGS_H
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 36
-#define RBX 40
-/* arguments: interrupts/hypercalls only save upto here*/
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast hypercall. */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-#define ARGOFFSET R11
-#endif /* __ASSEMBLY__ */
+#include <xen/types.h>
+#include <public/xen.h>
-/* top of stack page */
-#define FRAME_SIZE 168
+#define VM86_MODE(_r) (0) /* No VM86 support in long mode. */
+#define RING_0(_r) (((_r)->cs & 3) == 0)
+#define RING_1(_r) (((_r)->cs & 3) == 1)
+#define RING_2(_r) (((_r)->cs & 3) == 2)
+#define RING_3(_r) (((_r)->cs & 3) == 3)
-#define PTRACE_SETOPTIONS 21
+#define KERNEL_MODE(_e, _r) ((_e)->arch.flags & TF_kernel_mode)
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD 0x00000001
+#define PERMIT_SOFTINT(_dpl, _e, _r) \
+ ((_dpl) >= (KERNEL_MODE(_e, _r) ? 1 : 3))
-/* Dummy values for ptrace */
-#define FS 1000
-#define GS 1008
-
-#ifndef __ASSEMBLY__
-
-struct xen_regs {
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long rbp;
- unsigned long rbx;
-/* arguments: non interrupts/hypercalls only save upto here*/
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rdx;
- unsigned long rsi;
- unsigned long rdi;
- unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
- unsigned long rip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long rsp;
- unsigned long ss;
-/* top of stack page */
-};
-
-#endif
-
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_GETFPXREGS 18
-#define PTRACE_SETFPXREGS 19
-
-#if !defined(__ASSEMBLY__)
-
-#define instruction_pointer(regs) ((regs)->rip)
-extern void show_regs(struct xen_regs *);
-
-enum {
- EF_CF = 0x00000001,
- EF_PF = 0x00000004,
- EF_AF = 0x00000010,
- EF_ZF = 0x00000040,
- EF_SF = 0x00000080,
- EF_TF = 0x00000100,
- EF_IE = 0x00000200,
- EF_DF = 0x00000400,
- EF_OF = 0x00000800,
- EF_IOPL = 0x00003000,
- EF_IOPL_RING0 = 0x00000000,
- EF_IOPL_RING1 = 0x00001000,
- EF_IOPL_RING2 = 0x00002000,
- EF_NT = 0x00004000, /* nested task */
- EF_RF = 0x00010000, /* resume */
- EF_VM = 0x00020000, /* virtual mode */
- EF_AC = 0x00040000, /* alignment */
- EF_VIF = 0x00080000, /* virtual interrupt */
- EF_VIP = 0x00100000, /* virtual interrupt pending */
- EF_ID = 0x00200000, /* id */
-};
-
-#endif
+/* Number of bytes of on-stack execution state to be context-switched. */
+/* NB. Segment registers and bases are not saved/restored on x86/64 stack. */
+#define CTXT_SWITCH_STACK_BYTES (offsetof(struct cpu_user_regs, es))
#endif
diff --git a/xen/include/asm-x86/x86_64/string.h b/xen/include/asm-x86/x86_64/string.h
deleted file mode 100644
index 27876b9da0..0000000000
--- a/xen/include/asm-x86/x86_64/string.h
+++ /dev/null
@@ -1,69 +0,0 @@
-#ifndef _X86_64_STRING_H_
-#define _X86_64_STRING_H_
-
-#ifdef __KERNEL__
-
-#define struct_cpy(x,y) (*(x)=*(y))
-
-/* Written 2002 by Andi Kleen */
-
-/* Only used for special circumstances. Stolen from i386/string.h */
-static inline void * __inline_memcpy(void * to, const void * from, size_t n)
-{
-unsigned long d0, d1, d2;
-__asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
-return (to);
-}
-
-/* Even with __builtin_ the compiler may decide to use the out of line
- function. */
-
-#define __HAVE_ARCH_MEMCPY 1
-extern void *__memcpy(void *to, const void *from, size_t len);
-#define memcpy(dst,src,len) \
- ({ size_t __len = (len); \
- void *__ret; \
- if (__builtin_constant_p(len) && __len >= 64) \
- __ret = __memcpy((dst),(src),__len); \
- else \
- __ret = __builtin_memcpy((dst),(src),__len); \
- __ret; })
-
-
-#define __HAVE_ARCH_MEMSET
-#define memset __builtin_memset
-
-#define __HAVE_ARCH_MEMMOVE
-void * memmove(void * dest,const void *src,size_t count);
-
-/* Use C out of line version for memcmp */
-#define memcmp __builtin_memcmp
-int memcmp(const void * cs,const void * ct,size_t count);
-
-/* out of line string functions use always C versions */
-#define strlen __builtin_strlen
-size_t strlen(const char * s);
-
-#define strcpy __builtin_strcpy
-char * strcpy(char * dest,const char *src);
-
-#define strcat __builtin_strcat
-char * strcat(char * dest, const char * src);
-
-#define strcmp __builtin_strcmp
-int strcmp(const char * cs,const char * ct);
-
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h
index f965c87d32..4d5f65c890 100644
--- a/xen/include/asm-x86/x86_64/uaccess.h
+++ b/xen/include/asm-x86/x86_64/uaccess.h
@@ -2,323 +2,41 @@
#define __X86_64_UACCESS_H
/*
- * User space memory access functions
+ * Valid if in +ve half of 48-bit address space, or above Xen-reserved area.
+ * This is also valid for range checks (addr, addr+size). As long as the
+ * start address is outside the Xen-reserved area then we will access a
+ * non-canonical address (and thus fault) before ever reaching VIRT_START.
*/
-#include <xen/config.h>
-#include <xen/compiler.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/prefetch.h>
-#include <asm/page.h>
+#define __addr_ok(addr) \
+ (((unsigned long)(addr) < (1UL<<48)) || \
+ ((unsigned long)(addr) >= HYPERVISOR_VIRT_END))
-/* No user-pointer checking. */
-#define __user
-#define __force
-#define __chk_user_ptr(_p) ((void)0)
+#define access_ok(addr, size) (__addr_ok(addr))
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
+#define array_access_ok(addr, count, size) (__addr_ok(addr))
-#define __addr_ok(addr) ((unsigned long)(addr) < HYPERVISOR_VIRT_START)
-
-/*
- * Uhhuh, this needs 65-bit arithmetic. We have a carry..
- */
-#define __range_not_ok(addr,size) ({ \
- unsigned long flag,sum; \
- __chk_user_ptr(addr); \
- asm("# range_ok\n\r" \
- "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \
- :"=&r" (flag), "=r" (sum) \
- :"1" (addr),"g" ((long)(size)),"r" (HYPERVISOR_VIRT_START)); \
- flag; })
-
-#define access_ok(type, addr, size) (__range_not_ok(addr,size) == 0)
-
-#define array_access_ok(type,addr,count,size) \
- (likely(sizeof(count) <= 4) /* disallow 64-bit counts */ && \
- access_ok(type,addr,count*size))
-
-extern inline int verify_area(int type, const void __user * addr, unsigned long size)
-{
- return access_ok(type,addr,size) ? 0 : -EFAULT;
-}
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-extern void __get_user_8(void);
-
-#define __get_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __get_user_" #size \
- :"=a" (ret),"=d" (x) \
- :"0" (ptr) \
- :"rbx")
-
-/* Careful: we have to cast the result to the type of the pointer for sign reasons */
-#define get_user(x,ptr) \
-({ long __val_gu; \
- int __ret_gu; \
- __chk_user_ptr(ptr); \
- switch(sizeof (*(ptr))) { \
- case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
- case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
- case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \
- case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \
- default: __get_user_bad(); break; \
- } \
- (x) = (__typeof__(*(ptr)))__val_gu; \
- __ret_gu; \
-})
-
-extern void __put_user_1(void);
-extern void __put_user_2(void);
-extern void __put_user_4(void);
-extern void __put_user_8(void);
-
-extern void __put_user_bad(void);
-
-#define __put_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __put_user_" #size \
- :"=a" (ret) \
- :"0" (ptr),"d" (x) \
- :"rbx")
-
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __get_user(x,ptr) \
- __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-#define __put_user(x,ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
-#define __put_user_nocheck(x,ptr,size) \
-({ \
- int __pu_err; \
- __put_user_size((x),(ptr),(size),__pu_err); \
- __pu_err; \
-})
-
-
-#define __put_user_check(x,ptr,size) \
-({ \
- int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- if (likely(access_ok(VERIFY_WRITE,__pu_addr,size))) \
- __put_user_size((x),__pu_addr,(size),__pu_err); \
- __pu_err; \
-})
-
-#define __put_user_size(x,ptr,size,retval) \
+#define __put_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0; \
- __chk_user_ptr(ptr); \
switch (size) { \
- case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\
- case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\
- case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\
- case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\
- default: __put_user_bad(); \
+ case 1: __put_user_asm(x,ptr,retval,"b","b","iq",errret);break; \
+ case 2: __put_user_asm(x,ptr,retval,"w","w","ir",errret);break; \
+ case 4: __put_user_asm(x,ptr,retval,"l","k","ir",errret);break; \
+ case 8: __put_user_asm(x,ptr,retval,"q","","ir",errret);break; \
+ default: __put_user_bad(); \
} \
} while (0)
-/* FIXME: this hack is definitely wrong -AK */
-struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct *)(x))
-
-/*
- * Tell gcc we read from memory instead of writing: this is because
- * we do not write to any memory gcc knows about, so there are no
- * aliasing issues.
- */
-#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
- __asm__ __volatile__( \
- "1: mov"itype" %"rtype"1,%2\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous" \
- : "=r"(err) \
- : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err))
-
-
-#define __get_user_nocheck(x,ptr,size) \
-({ \
- int __gu_err; \
- long __gu_val; \
- __get_user_size(__gu_val,(ptr),(size),__gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
-
-extern int __get_user_bad(void);
-
-#define __get_user_size(x,ptr,size,retval) \
+#define __get_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0; \
- __chk_user_ptr(ptr); \
switch (size) { \
- case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\
- case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\
- case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\
- case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\
- default: (x) = __get_user_bad(); \
+ case 1: __get_user_asm(x,ptr,retval,"b","b","=q",errret);break; \
+ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \
+ case 4: __get_user_asm(x,ptr,retval,"l","k","=r",errret);break; \
+ case 8: __get_user_asm(x,ptr,retval,"q","","=r",errret); break; \
+ default: (x) = __get_user_bad(); \
} \
} while (0)
-#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
- __asm__ __volatile__( \
- "1: mov"itype" %2,%"rtype"1\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: mov %3,%0\n" \
- " xor"itype" %"rtype"1,%"rtype"1\n" \
- " jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 1b,3b\n" \
- ".previous" \
- : "=r"(err), ltype (x) \
- : "m"(__m(addr)), "i"(errno), "0"(err))
-
-/*
- * Copy To/From Userspace
- */
-
-/* Handles exceptions in both to and from, but doesn't do access_ok */
-extern unsigned long copy_user_generic(void *to, const void *from, unsigned len);
-
-extern unsigned long copy_to_user(void __user *to, const void *from, unsigned len);
-extern unsigned long copy_from_user(void *to, const void __user *from, unsigned len);
-extern unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len);
-
-static always_inline int __copy_from_user(void *dst, const void __user *src, unsigned size)
-{
- int ret = 0;
- if (!__builtin_constant_p(size))
- return copy_user_generic(dst,(__force void *)src,size);
- switch (size) {
- case 1:__get_user_asm(*(u8*)dst,(u8 __user *)src,ret,"b","b","=q",1);
- return ret;
- case 2:__get_user_asm(*(u16*)dst,(u16 __user *)src,ret,"w","w","=r",2);
- return ret;
- case 4:__get_user_asm(*(u32*)dst,(u32 __user *)src,ret,"l","k","=r",4);
- return ret;
- case 8:__get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",8);
- return ret;
- case 10:
- __get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",16);
- if (unlikely(ret)) return ret;
- __get_user_asm(*(u16*)(8+(char*)dst),(u16 __user *)(8+(char __user *)src),ret,"w","w","=r",2);
- return ret;
- case 16:
- __get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",16);
- if (unlikely(ret)) return ret;
- __get_user_asm(*(u64*)(8+(char*)dst),(u64 __user *)(8+(char __user *)src),ret,"q","","=r",8);
- return ret;
- default:
- return copy_user_generic(dst,(__force void *)src,size);
- }
-}
-
-static always_inline int __copy_to_user(void __user *dst, const void *src, unsigned size)
-{
- int ret = 0;
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst,src,size);
- switch (size) {
- case 1:__put_user_asm(*(u8*)src,(u8 __user *)dst,ret,"b","b","iq",1);
- return ret;
- case 2:__put_user_asm(*(u16*)src,(u16 __user *)dst,ret,"w","w","ir",2);
- return ret;
- case 4:__put_user_asm(*(u32*)src,(u32 __user *)dst,ret,"l","k","ir",4);
- return ret;
- case 8:__put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",8);
- return ret;
- case 10:
- __put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",10);
- if (unlikely(ret)) return ret;
- asm("":::"memory");
- __put_user_asm(4[(u16*)src],4+(u16 __user *)dst,ret,"w","w","ir",2);
- return ret;
- case 16:
- __put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",16);
- if (unlikely(ret)) return ret;
- asm("":::"memory");
- __put_user_asm(1[(u64*)src],1+(u64 __user *)dst,ret,"q","","ir",8);
- return ret;
- default:
- return copy_user_generic((__force void *)dst,src,size);
- }
-}
-
-
-static always_inline int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
-{
- int ret = 0;
- if (!__builtin_constant_p(size))
- return copy_user_generic((__force void *)dst,(__force void *)src,size);
- switch (size) {
- case 1: {
- u8 tmp;
- __get_user_asm(tmp,(u8 __user *)src,ret,"b","b","=q",1);
- if (likely(!ret))
- __put_user_asm(tmp,(u8 __user *)dst,ret,"b","b","iq",1);
- return ret;
- }
- case 2: {
- u16 tmp;
- __get_user_asm(tmp,(u16 __user *)src,ret,"w","w","=r",2);
- if (likely(!ret))
- __put_user_asm(tmp,(u16 __user *)dst,ret,"w","w","ir",2);
- return ret;
- }
-
- case 4: {
- u32 tmp;
- __get_user_asm(tmp,(u32 __user *)src,ret,"l","k","=r",4);
- if (likely(!ret))
- __put_user_asm(tmp,(u32 __user *)dst,ret,"l","k","ir",4);
- return ret;
- }
- case 8: {
- u64 tmp;
- __get_user_asm(tmp,(u64 __user *)src,ret,"q","","=r",8);
- if (likely(!ret))
- __put_user_asm(tmp,(u64 __user *)dst,ret,"q","","ir",8);
- return ret;
- }
- default:
- return copy_user_generic((__force void *)dst,(__force void *)src,size);
- }
-}
-
-long strncpy_from_user(char *dst, const char __user *src, long count);
-long __strncpy_from_user(char *dst, const char __user *src, long count);
-long strnlen_user(const char __user *str, long n);
-long strlen_user(const char __user *str);
-unsigned long clear_user(void __user *mem, unsigned long len);
-unsigned long __clear_user(void __user *mem, unsigned long len);
-
#endif /* __X86_64_UACCESS_H */
diff --git a/xen/include/asm-x86/x86_emulate.h b/xen/include/asm-x86/x86_emulate.h
new file mode 100644
index 0000000000..bfcc987f57
--- /dev/null
+++ b/xen/include/asm-x86/x86_emulate.h
@@ -0,0 +1,169 @@
+/******************************************************************************
+ * x86_emulate.h
+ *
+ * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
+ *
+ * Copyright (c) 2005 Keir Fraser
+ */
+
+#ifndef __X86_EMULATE_H__
+#define __X86_EMULATE_H__
+
+/*
+ * x86_mem_emulator:
+ *
+ * These operations represent the instruction emulator's interface to memory.
+ * There are two categories of operation: those that act on ordinary memory
+ * regions (*_std), and those that act on memory regions known to require
+ * special treatment or emulation (*_emulated).
+ *
+ * The emulator assumes that an instruction accesses only one 'emulated memory'
+ * location, and that this is one of its data operands. Instruction fetches and
+ * stack operations are assumed never to access emulated memory. The emulator
+ * automatically deduces which operand of a string-move operation is accessing
+ * emulated memory, and requires that the other operand accesses normal memory.
+ *
+ * NOTES:
+ * 1. The emulator isn't very smart about emulated vs. standard memory.
+ * 'Emulated memory' access addresses should be checked for sanity.
+ * 'Normal memory' accesses may fault, and the caller must arrange to
+ * detect and handle reentrancy into the emulator via recursive faults.
+ * Accesses may be unaligned and may cross page boundaries.
+ * 2. If the access fails (cannot emulate, or a standard access faults) then
+ * it is up to the memop to propagate the fault to the guest VM via
+ * some out-of-band mechanism, unknown to the emulator. The memop signals
+ * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
+ * then immediately bail.
+ * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
+ * cmpxchg8b_emulated need support 8-byte accesses.
+ */
+/* Access completed successfully: continue emulation as normal. */
+#define X86EMUL_CONTINUE 0
+/* Access is unhandleable: bail from emulation and return error to caller. */
+#define X86EMUL_UNHANDLEABLE 1
+/* Terminate emulation but return success to the caller. */
+#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
+#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */
+#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */
+struct x86_mem_emulator
+{
+ /*
+ * read_std: Read bytes of standard (non-emulated/special) memory.
+ * Used for instruction fetch, stack operations, and others.
+ * @addr: [IN ] Linear address from which to read.
+ * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
+ * @bytes: [IN ] Number of bytes to read from memory.
+ */
+ int (*read_std)(
+ unsigned long addr,
+ unsigned long *val,
+ unsigned int bytes);
+
+ /*
+ * write_std: Write bytes of standard (non-emulated/special) memory.
+ * Used for stack operations, and others.
+ * @addr: [IN ] Linear address to which to write.
+ * @val: [IN ] Value to write to memory (low-order bytes used as req'd).
+ * @bytes: [IN ] Number of bytes to write to memory.
+ */
+ int (*write_std)(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes);
+
+ /*
+ * read_emulated: Read bytes from emulated/special memory area.
+ * @addr: [IN ] Linear address from which to read.
+ * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
+ * @bytes: [IN ] Number of bytes to read from memory.
+ */
+ int (*read_emulated)(
+ unsigned long addr,
+ unsigned long *val,
+ unsigned int bytes);
+
+ /*
+ * write_emulated: Read bytes from emulated/special memory area.
+ * @addr: [IN ] Linear address to which to write.
+ * @val: [IN ] Value to write to memory (low-order bytes used as req'd).
+ * @bytes: [IN ] Number of bytes to write to memory.
+ */
+ int (*write_emulated)(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes);
+
+ /*
+ * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
+ * emulated/special memory area.
+ * @addr: [IN ] Linear address to access.
+ * @old: [IN ] Value expected to be current at @addr.
+ * @new: [IN ] Value to write to @addr.
+ * @bytes: [IN ] Number of bytes to access using CMPXCHG.
+ */
+ int (*cmpxchg_emulated)(
+ unsigned long addr,
+ unsigned long old,
+ unsigned long new,
+ unsigned int bytes);
+
+ /*
+ * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+ * emulated/special memory area.
+ * @addr: [IN ] Linear address to access.
+ * @old: [IN ] Value expected to be current at @addr.
+ * @new: [IN ] Value to write to @addr.
+ * NOTES:
+ * 1. This function is only ever called when emulating a real CMPXCHG8B.
+ * 2. This function is *never* called on x86/64 systems.
+ * 2. Not defining this function (i.e., specifying NULL) is equivalent
+ * to defining a function that always returns X86EMUL_UNHANDLEABLE.
+ */
+ int (*cmpxchg8b_emulated)(
+ unsigned long addr,
+ unsigned long old_lo,
+ unsigned long old_hi,
+ unsigned long new_lo,
+ unsigned long new_hi);
+};
+
+/* Standard reader/writer functions that callers may wish to use. */
+extern int
+x86_emulate_read_std(
+ unsigned long addr,
+ unsigned long *val,
+ unsigned int bytes);
+extern int
+x86_emulate_write_std(
+ unsigned long addr,
+ unsigned long val,
+ unsigned int bytes);
+
+struct cpu_user_regs;
+
+/*
+ * x86_emulate_memop: Emulate an instruction that faulted attempting to
+ * read/write a 'special' memory area.
+ * @regs: Register state at time of fault.
+ * @cr2: Linear faulting address.
+ * @ops: Interface to access special memory.
+ * @mode: Current execution mode, represented by the default size of memory
+ * addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
+ */
+extern int
+x86_emulate_memop(
+ struct cpu_user_regs *regs,
+ unsigned long cr2,
+ struct x86_mem_emulator *ops,
+ int mode);
+
+/*
+ * Given the 'reg' portion of a ModRM byte, and a register block, return a
+ * pointer into the block that addresses the relevant register.
+ * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
+ */
+extern void *
+decode_register(
+ u8 modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
+
+#endif /* __X86_EMULATE_H__ */
diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h
new file mode 100644
index 0000000000..ec00554959
--- /dev/null
+++ b/xen/include/public/arch-ia64.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * arch-ia64/hypervisor-if.h
+ *
+ * Guest OS interface to IA64 Xen.
+ */
+
+#ifndef __HYPERVISOR_IF_IA64_H__
+#define __HYPERVISOR_IF_IA64_H__
+
+// "packed" generates awful code
+#define PACKED
+
+/* Pointers are naturally 64 bits in this architecture; no padding needed. */
+#define _MEMORY_PADDING(_X)
+#define MEMORY_PADDING
+
+#ifndef __ASSEMBLY__
+
+/* NB. Both the following are 64 bits each. */
+typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
+
+typedef struct
+{
+} PACKED cpu_user_regs;
+
+/*
+ * NB. This may become a 64-bit count with no shift. If this happens then the
+ * structure size will still be 8 bytes, so no other alignments will change.
+ */
+typedef struct {
+ u32 tsc_bits; /* 0: 32 bits read from the CPU's TSC. */
+ u32 tsc_bitshift; /* 4: 'tsc_bits' uses N:N+31 of TSC. */
+} PACKED tsc_timestamp_t; /* 8 bytes */
+
+#include <asm/tlb.h> /* TR_ENTRY */
+
+typedef struct {
+ unsigned long ipsr;
+ unsigned long iip;
+ unsigned long ifs;
+ unsigned long precover_ifs;
+ unsigned long isr;
+ unsigned long ifa;
+ unsigned long iipa;
+ unsigned long iim;
+ unsigned long unat; // not sure if this is needed until NaT arch is done
+ unsigned long tpr;
+ unsigned long iha;
+ unsigned long itir;
+ unsigned long itv;
+ unsigned long pmv;
+ unsigned long cmcv;
+ unsigned long pta;
+ int interrupt_collection_enabled; // virtual psr.ic
+ int interrupt_delivery_enabled; // virtual psr.i
+ int pending_interruption;
+ int incomplete_regframe; // see SDM vol2 6.8
+ unsigned long delivery_mask[4];
+ int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual
+ int banknum; // 0 or 1, which virtual register bank is active
+ unsigned long bank0_regs[16]; // bank0 regs (r16-r31) when bank1 active
+ unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active
+ unsigned long rrs[8]; // region registers
+ unsigned long krs[8]; // kernel registers
+ unsigned long pkrs[8]; // protection key registers
+ unsigned long tmp[8]; // temp registers (e.g. for hyperprivops)
+//} PACKED arch_shared_info_t;
+} arch_vcpu_info_t; // DON'T PACK
+
+typedef struct {
+} arch_shared_info_t; // DON'T PACK
+
+/*
+ * The following is all CPU context. Note that the i387_ctxt block is filled
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ */
+typedef struct vcpu_guest_context {
+ //unsigned long flags;
+} PACKED vcpu_guest_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#define XEN_HYPER_RFI 1
+#define XEN_HYPER_RSM_DT 2
+#define XEN_HYPER_SSM_DT 3
+#define XEN_HYPER_COVER 4
+#define XEN_HYPER_ITC_D 5
+#define XEN_HYPER_ITC_I 6
+#define XEN_HYPER_SSM_I 7
+
+#endif /* __HYPERVISOR_IF_IA64_H__ */
diff --git a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h
index f3d402d2b3..21f97669d5 100644
--- a/xen/include/public/arch-x86_32.h
+++ b/xen/include/public/arch-x86_32.h
@@ -31,31 +31,30 @@
* A number of GDT entries are reserved by Xen. These are not situated at the
* start of the GDT because some stupid OSes export hard-coded selector values
* in their ABI. These hard-coded values are always near the start of the GDT,
- * so Xen places itself out of the way.
- *
- * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY
- * and LAST_RESERVED_GDT_ENTRY are reserved).
+ * so Xen places itself out of the way, at the far end of the GDT.
*/
-#define NR_RESERVED_GDT_ENTRIES 40
-#define FIRST_RESERVED_GDT_ENTRY 256
-#define LAST_RESERVED_GDT_ENTRY \
- (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1)
-
+#define FIRST_RESERVED_GDT_PAGE 14
+#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
/*
* These flat segments are in the Xen-private section of every GDT. Since these
* are also present in the initial GDT, many OSes will be able to avoid
* installing their own GDT.
*/
-#define FLAT_RING1_CS 0x0819 /* GDT index 259 */
-#define FLAT_RING1_DS 0x0821 /* GDT index 260 */
-#define FLAT_RING3_CS 0x082b /* GDT index 261 */
-#define FLAT_RING3_DS 0x0833 /* GDT index 262 */
-
-#define FLAT_GUESTOS_CS FLAT_RING1_CS
-#define FLAT_GUESTOS_DS FLAT_RING1_DS
+#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021 /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033 /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033 /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
#define FLAT_USER_CS FLAT_RING3_CS
#define FLAT_USER_DS FLAT_RING3_DS
+#define FLAT_USER_SS FLAT_RING3_SS
/* And the trap vector is... */
#define TRAP_INSTR "int $0x82"
@@ -65,16 +64,19 @@
* Virtual addresses beyond this are not modifiable by guest OSes. The
* machine->physical mapping table starts at this address, read-only.
*/
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifdef CONFIG_X86_PAE
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#else
+# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#endif
#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START)
#endif
#ifndef __ASSEMBLY__
/* NB. Both the following are 32 bits each. */
typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
-typedef unsigned long cpureg_t; /* Full-sized register. */
/*
* Send an array of these to HYPERVISOR_set_trap_table()
@@ -90,58 +92,63 @@ typedef struct {
memory_t address; /* 4: code address */
} PACKED trap_info_t; /* 8 bytes */
-typedef struct
-{
- unsigned long ebx;
- unsigned long ecx;
- unsigned long edx;
- unsigned long esi;
- unsigned long edi;
- unsigned long ebp;
- unsigned long eax;
- unsigned long _unused;
- unsigned long eip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long esp;
- unsigned long ss;
- unsigned long es;
- unsigned long ds;
- unsigned long fs;
- unsigned long gs;
-} PACKED execution_context_t;
+typedef struct cpu_user_regs {
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u32 esi;
+ u32 edi;
+ u32 ebp;
+ u32 eax;
+ u16 error_code; /* private */
+ u16 entry_vector; /* private */
+ u32 eip;
+ u16 cs;
+ u8 saved_upcall_mask;
+ u8 _pad0;
+ u32 eflags;
+ u32 esp;
+ u16 ss, _pad1;
+ u16 es, _pad2;
+ u16 ds, _pad3;
+ u16 fs, _pad4;
+ u16 gs, _pad5;
+} cpu_user_regs_t;
typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
/*
- * The following is all CPU context. Note that the i387_ctxt block is filled
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
-typedef struct {
-#define ECF_I387_VALID (1<<0)
- unsigned long flags;
- execution_context_t cpu_ctxt; /* User-level CPU registers */
- char fpu_ctxt[256]; /* User-level FPU registers */
+typedef struct vcpu_guest_context {
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_VMX_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+ unsigned long flags; /* VGCF_* flags */
+ cpu_user_regs_t user_regs; /* User-level CPU registers */
+ struct { char x[512]; } fpu_ctxt /* User-level FPU registers */
+ __attribute__((__aligned__(16))); /* (needs 16-byte alignment) */
trap_info_t trap_ctxt[256]; /* Virtual IDT */
- unsigned int fast_trap_idx; /* "Fast trap" vector offset */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
- unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
unsigned long event_callback_cs; /* CS:EIP of event callback */
unsigned long event_callback_eip;
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
-} PACKED full_execution_context_t;
+ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+} vcpu_guest_context_t;
typedef struct {
- u64 mfn_to_pfn_start; /* MFN of start of m2p table */
- u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that
- make up p2m table */
-} PACKED arch_shared_info_t;
+ /* MFN of a table of MFNs that make up p2m table */
+ u64 pfn_to_mfn_frame_list;
+} arch_shared_info_t;
-#define ARCH_HAS_FAST_TRAP
+typedef struct {
+} arch_vcpu_info_t;
#endif
diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h
index abba7bdf12..a4f4ac2fcf 100644
--- a/xen/include/public/arch-x86_64.h
+++ b/xen/include/public/arch-x86_64.h
@@ -25,15 +25,11 @@
* A number of GDT entries are reserved by Xen. These are not situated at the
* start of the GDT because some stupid OSes export hard-coded selector values
* in their ABI. These hard-coded values are always near the start of the GDT,
- * so Xen places itself out of the way.
- *
- * NB. The reserved range is inclusive (that is, both FIRST_RESERVED_GDT_ENTRY
- * and LAST_RESERVED_GDT_ENTRY are reserved).
+ * so Xen places itself out of the way, at the far end of the GDT.
*/
-#define NR_RESERVED_GDT_ENTRIES 40
-#define FIRST_RESERVED_GDT_ENTRY 256
-#define LAST_RESERVED_GDT_ENTRY \
- (FIRST_RESERVED_GDT_ENTRY + NR_RESERVED_GDT_ENTRIES - 1)
+#define FIRST_RESERVED_GDT_PAGE 14
+#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
/*
* 64-bit segment selectors
@@ -42,34 +38,89 @@
* installing their own GDT.
*/
-#define FLAT_RING3_CS32 0x0823 /* GDT index 260 */
-#define FLAT_RING3_CS64 0x082b /* GDT index 261 */
-#define FLAT_RING3_DS 0x0833 /* GDT index 262 */
-
-#define FLAT_GUESTOS_DS FLAT_RING3_DS
-#define FLAT_GUESTOS_CS FLAT_RING3_CS64
-#define FLAT_GUESTOS_CS32 FLAT_RING3_CS32
-
-#define FLAT_USER_DS FLAT_RING3_DS
-#define FLAT_USER_CS FLAT_RING3_CS64
-#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000 /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS FLAT_USER_SS64
/* And the trap vector is... */
#define TRAP_INSTR "syscall"
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START (0xFFFF800000000000UL)
+#define HYPERVISOR_VIRT_END (0xFFFF880000000000UL)
+#endif
+
+#ifndef __ASSEMBLY__
+
/* The machine->physical mapping table starts at this address, read-only. */
#ifndef machine_to_phys_mapping
-#define machine_to_phys_mapping ((unsigned long *)0xffff810000000000ULL)
+#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START)
#endif
-#ifndef __ASSEMBLY__
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ * @which == SEGBASE_* ; @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS 0
+#define SEGBASE_GS_USER 1
+#define SEGBASE_GS_KERNEL 2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_switch_to_user(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * If flags contains VGCF_IN_SYSCALL:
+ * Restore RAX, RIP, RFLAGS, RSP.
+ * Discard R11, RCX, CS, SS.
+ * Otherwise:
+ * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define VGCF_IN_SYSCALL (1<<8)
+struct switch_to_user {
+ /* Top of stack (%rsp at point of hypercall). */
+ u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ /* Bottom of switch_to_user stack frame. */
+} PACKED;
/* NB. Both the following are 64 bits each. */
typedef unsigned long memory_t; /* Full-sized pointer/address/memory-size. */
-typedef unsigned long cpureg_t; /* Full-sized register. */
/*
- * Send an array of these to HYPERVISOR_set_trap_table()
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * N.B. As in x86/32 mode, the privilege level specifies which modes may enter
+ * a trap via a software interrupt. Since rings 1 and 2 are unavailable, we
+ * allocate privilege levels as follows:
+ * Level == 0: Noone may enter
+ * Level == 1: Kernel may enter
+ * Level == 2: Kernel may enter
+ * Level == 3: Everyone may enter
*/
#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
#define TI_GET_IF(_ti) ((_ti)->flags & 4)
@@ -83,58 +134,74 @@ typedef struct {
memory_t address; /* 8: code address */
} PACKED trap_info_t; /* 16 bytes */
-typedef struct
-{
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long rbp;
- unsigned long rbx;
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rdx;
- unsigned long rsi;
- unsigned long rdi;
- unsigned long rip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long rsp;
- unsigned long ss;
-} PACKED execution_context_t;
+typedef struct cpu_user_regs {
+ u64 r15;
+ u64 r14;
+ u64 r13;
+ u64 r12;
+ union { u64 rbp, ebp; };
+ union { u64 rbx, ebx; };
+ u64 r11;
+ u64 r10;
+ u64 r9;
+ u64 r8;
+ union { u64 rax, eax; };
+ union { u64 rcx, ecx; };
+ union { u64 rdx, edx; };
+ union { u64 rsi, esi; };
+ union { u64 rdi, edi; };
+ u32 error_code; /* private */
+ u32 entry_vector; /* private */
+ union { u64 rip, eip; };
+ u16 cs, _pad0[1];
+ u8 saved_upcall_mask;
+ u8 _pad1[3];
+ union { u64 rflags, eflags; };
+ union { u64 rsp, esp; };
+ u16 ss, _pad2[3];
+ u16 es, _pad3[3];
+ u16 ds, _pad4[3];
+ u16 fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */
+ u16 gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_user. */
+} cpu_user_regs_t;
typedef u64 tsc_timestamp_t; /* RDTSC timestamp */
/*
- * The following is all CPU context. Note that the i387_ctxt block is filled
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
*/
-typedef struct {
-#define ECF_I387_VALID (1<<0)
- unsigned long flags;
- execution_context_t cpu_ctxt; /* User-level CPU registers */
- char fpu_ctxt[512]; /* User-level FPU registers */
+typedef struct vcpu_guest_context {
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_VMX_GUEST (1<<1)
+#define VGCF_IN_KERNEL (1<<2)
+ unsigned long flags; /* VGCF_* flags */
+ cpu_user_regs_t user_regs; /* User-level CPU registers */
+ struct { char x[512]; } fpu_ctxt /* User-level FPU registers */
+ __attribute__((__aligned__(16))); /* (needs 16-byte alignment) */
trap_info_t trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
- unsigned long guestos_ss, guestos_esp; /* Virtual TSS (only SS1/ESP1) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
- unsigned long event_callback_cs; /* CS:EIP of event callback */
unsigned long event_callback_eip;
- unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
-} PACKED full_execution_context_t;
+ unsigned long syscall_callback_eip;
+ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+ /* Segment base addresses. */
+ u64 fs_base;
+ u64 gs_base_kernel;
+ u64 gs_base_user;
+} vcpu_guest_context_t;
+
+typedef struct {
+ /* MFN of a table of MFNs that make up p2m table */
+ u64 pfn_to_mfn_frame_list;
+} arch_shared_info_t;
typedef struct {
- u64 mfn_to_pfn_start; /* MFN of start of m2p table */
- u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that
- make up p2m table */
-} PACKED arch_shared_info_t;
+} arch_vcpu_info_t;
#endif /* !__ASSEMBLY__ */
diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h
index eb4766192d..0768b8c6ae 100644
--- a/xen/include/public/dom0_ops.h
+++ b/xen/include/public/dom0_ops.h
@@ -19,24 +19,19 @@
* This makes sure that old versions of dom0 tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define DOM0_INTERFACE_VERSION 0xAAAA001A
+#define DOM0_INTERFACE_VERSION 0xAAAA1006
/************************************************************************/
#define DOM0_GETMEMLIST 2
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0;
- u32 __pad1;
- memory_t max_pfns; /* 8 */
- MEMORY_PADDING;
- void *buffer; /* 16 */
- MEMORY_PADDING;
+ domid_t domain;
+ memory_t max_pfns;
+ void *buffer;
/* OUT variables. */
- memory_t num_pfns; /* 24 */
- MEMORY_PADDING;
-} PACKED dom0_getmemlist_t; /* 32 bytes */
+ memory_t num_pfns;
+} dom0_getmemlist_t;
#define DOM0_SCHEDCTL 6
/* struct sched_ctl_cmd is from sched-ctl.h */
@@ -48,48 +43,36 @@ typedef struct sched_adjdom_cmd dom0_adjustdom_t;
#define DOM0_CREATEDOMAIN 8
typedef struct {
- /* IN parameters. */
- memory_t memory_kb; /* 0 */
- MEMORY_PADDING;
- u32 cpu; /* 8 */
- u32 __pad0; /* 12 */
/* IN/OUT parameters. */
- /* If 0, domain is allocated. If non-zero use it unless in use. */
- domid_t domain; /* 16 */
- u16 __pad1;
- /* OUT parameters. */
-} PACKED dom0_createdomain_t; /* 20 bytes */
+ /* Identifier for new domain (auto-allocate if zero is specified). */
+ domid_t domain;
+} dom0_createdomain_t;
#define DOM0_DESTROYDOMAIN 9
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad;
-} PACKED dom0_destroydomain_t; /* 4 bytes */
+ domid_t domain;
+} dom0_destroydomain_t;
#define DOM0_PAUSEDOMAIN 10
typedef struct {
/* IN parameters. */
- domid_t domain; /* 0 */
- u16 __pad;
-} PACKED dom0_pausedomain_t; /* 4 bytes */
+ domid_t domain;
+} dom0_pausedomain_t;
#define DOM0_UNPAUSEDOMAIN 11
typedef struct {
/* IN parameters. */
- domid_t domain; /* 0 */
- u16 __pad;
-} PACKED dom0_unpausedomain_t; /* 4 bytes */
+ domid_t domain;
+} dom0_unpausedomain_t;
#define DOM0_GETDOMAININFO 12
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */ /* NB. IN/OUT variable. */
- u16 __pad;
+ domid_t domain; /* NB. IN/OUT variable. */
/* OUT variables. */
#define DOMFLAGS_DYING (1<<0) /* Domain is scheduled to die. */
-#define DOMFLAGS_CRASHED (1<<1) /* Crashed domain; frozen for postmortem. */
-#define DOMFLAGS_SHUTDOWN (1<<2) /* The guest OS has shut itself down. */
+#define DOMFLAGS_SHUTDOWN (1<<2) /* The guest OS has shut down. */
#define DOMFLAGS_PAUSED (1<<3) /* Currently paused by control software. */
#define DOMFLAGS_BLOCKED (1<<4) /* Currently blocked pending an event. */
#define DOMFLAGS_RUNNING (1<<5) /* Domain is currently running. */
@@ -97,64 +80,52 @@ typedef struct {
#define DOMFLAGS_CPUSHIFT 8
#define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */
#define DOMFLAGS_SHUTDOWNSHIFT 16
- u32 flags; /* 4 */
- full_execution_context_t *ctxt; /* 8 */ /* NB. IN/OUT variable. */
- MEMORY_PADDING;
- memory_t tot_pages; /* 16 */
- MEMORY_PADDING;
- memory_t max_pages; /* 24 */
- MEMORY_PADDING;
- memory_t shared_info_frame; /* 32: MFN of shared_info struct */
- MEMORY_PADDING;
- u64 cpu_time; /* 40 */
-} PACKED dom0_getdomaininfo_t; /* 48 bytes */
-
-#define DOM0_BUILDDOMAIN 13
+ u32 flags;
+ memory_t tot_pages;
+ memory_t max_pages;
+ memory_t shared_info_frame; /* MFN of shared_info struct */
+ u64 cpu_time;
+ u32 n_vcpu;
+ s32 vcpu_to_cpu[MAX_VIRT_CPUS]; /* current mapping */
+ cpumap_t cpumap[MAX_VIRT_CPUS]; /* allowable mapping */
+} dom0_getdomaininfo_t;
+
+#define DOM0_SETDOMAININFO 13
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0; /* 2 */
- u32 __pad1; /* 4 */
+ domid_t domain;
+ u16 vcpu;
/* IN/OUT parameters */
- full_execution_context_t *ctxt; /* 8 */
- MEMORY_PADDING;
-} PACKED dom0_builddomain_t; /* 16 bytes */
-
-#define DOM0_IOPL 14
-typedef struct {
- domid_t domain; /* 0 */
- u16 __pad;
- u32 iopl; /* 4 */
-} PACKED dom0_iopl_t; /* 8 bytes */
+ vcpu_guest_context_t *ctxt;
+} dom0_setdomaininfo_t;
#define DOM0_MSR 15
typedef struct {
/* IN variables. */
- u32 write; /* 0 */
- u32 cpu_mask; /* 4 */
- u32 msr; /* 8 */
- u32 in1; /* 12 */
- u32 in2; /* 16 */
+ u32 write;
+ u32 cpu_mask;
+ u32 msr;
+ u32 in1;
+ u32 in2;
/* OUT variables. */
- u32 out1; /* 20 */
- u32 out2; /* 24 */
-} PACKED dom0_msr_t; /* 28 bytes */
+ u32 out1;
+ u32 out2;
+} dom0_msr_t;
#define DOM0_DEBUG 16
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u8 opcode; /* 2 */
- u8 __pad;
- u32 in1; /* 4 */
- u32 in2; /* 8 */
- u32 in3; /* 12 */
- u32 in4; /* 16 */
+ domid_t domain;
+ u8 opcode;
+ u32 in1;
+ u32 in2;
+ u32 in3;
+ u32 in4;
/* OUT variables. */
- u32 status; /* 20 */
- u32 out1; /* 24 */
- u32 out2; /* 28 */
-} PACKED dom0_debug_t; /* 32 bytes */
+ u32 status;
+ u32 out1;
+ u32 out2;
+} dom0_debug_t;
/*
* Set clock such that it would read <secs,usecs> after 00:00:00 UTC,
@@ -163,10 +134,10 @@ typedef struct {
#define DOM0_SETTIME 17
typedef struct {
/* IN variables. */
- u32 secs; /* 0 */
- u32 usecs; /* 4 */
- u64 system_time; /* 8 */
-} PACKED dom0_settime_t; /* 16 bytes */
+ u32 secs;
+ u32 usecs;
+ u64 system_time;
+} dom0_settime_t;
#define DOM0_GETPAGEFRAMEINFO 18
#define NOTAB 0 /* normal page */
@@ -181,74 +152,63 @@ typedef struct {
typedef struct {
/* IN variables. */
- memory_t pfn; /* 0: Machine page frame number to query. */
- MEMORY_PADDING;
- domid_t domain; /* 8: To which domain does the frame belong? */
- u16 __pad;
+ memory_t pfn; /* Machine page frame number to query. */
+ domid_t domain; /* To which domain does the frame belong? */
/* OUT variables. */
/* Is the page PINNED to a type? */
- u32 type; /* 12: see above type defs */
-} PACKED dom0_getpageframeinfo_t; /* 16 bytes */
+ u32 type; /* see above type defs */
+} dom0_getpageframeinfo_t;
/*
* Read console content from Xen buffer ring.
*/
#define DOM0_READCONSOLE 19
typedef struct {
- memory_t str; /* 0 */
- MEMORY_PADDING;
- u32 count; /* 8 */
- u32 cmd; /* 12 */
-} PACKED dom0_readconsole_t; /* 16 bytes */
+ /* IN variables. */
+ u32 clear; /* Non-zero -> clear after reading. */
+ /* IN/OUT variables. */
+ char *buffer; /* In: Buffer start; Out: Used buffer start */
+ u32 count; /* In: Buffer size; Out: Used buffer size */
+} dom0_readconsole_t;
/*
- * Pin Domain to a particular CPU (use -1 to unpin)
+ * Set which physical cpus a vcpu can execute on.
*/
#define DOM0_PINCPUDOMAIN 20
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad;
- s32 cpu; /* 4: -1 implies unpin */
-} PACKED dom0_pincpudomain_t; /* 8 bytes */
+ domid_t domain;
+ u16 vcpu;
+ cpumap_t *cpumap;
+} dom0_pincpudomain_t;
/* Get trace buffers machine base address */
-#define DOM0_GETTBUFS 21
+#define DOM0_TBUFCONTROL 21
typedef struct {
+ /* IN variables */
+#define DOM0_TBUF_GET_INFO 0
+#define DOM0_TBUF_SET_CPU_MASK 1
+#define DOM0_TBUF_SET_EVT_MASK 2
+ u8 op;
+ /* IN/OUT variables */
+ unsigned long cpu_mask;
+ u32 evt_mask;
/* OUT variables */
- memory_t mach_addr; /* 0: location of the trace buffers */
- MEMORY_PADDING;
- u32 size; /* 8: size of each trace buffer, in bytes */
-} PACKED dom0_gettbufs_t; /* 12 bytes */
+ memory_t mach_addr;
+ u32 size;
+} dom0_tbufcontrol_t;
/*
* Get physical information about the host machine
*/
#define DOM0_PHYSINFO 22
typedef struct {
- u32 ht_per_core; /* 0 */
- u32 cores; /* 4 */
- u32 cpu_khz; /* 8 */
- u32 __pad; /* 12 */
- memory_t total_pages; /* 16 */
- MEMORY_PADDING;
- memory_t free_pages; /* 24 */
- MEMORY_PADDING;
-} PACKED dom0_physinfo_t; /* 32 bytes */
-
-/*
- * Allow a domain access to a physical PCI device
- */
-#define DOM0_PCIDEV_ACCESS 23
-typedef struct {
- /* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad;
- u32 bus; /* 4 */
- u32 dev; /* 8 */
- u32 func; /* 12 */
- u32 enable; /* 16 */
-} PACKED dom0_pcidev_access_t; /* 20 bytes */
+ u32 ht_per_core;
+ u32 cores;
+ u32 cpu_khz;
+ memory_t total_pages;
+ memory_t free_pages;
+} dom0_physinfo_t;
/*
* Get the ID of the current scheduler.
@@ -256,8 +216,8 @@ typedef struct {
#define DOM0_SCHED_ID 24
typedef struct {
/* OUT variable */
- u32 sched_id; /* 0 */
-} PACKED dom0_sched_id_t; /* 4 bytes */
+ u32 sched_id;
+} dom0_sched_id_t;
/*
* Control shadow pagetables operation
@@ -268,6 +228,7 @@ typedef struct {
#define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1
#define DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY 2
#define DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE 3
+
#define DOM0_SHADOW_CONTROL_OP_FLUSH 10 /* table ops */
#define DOM0_SHADOW_CONTROL_OP_CLEAN 11
#define DOM0_SHADOW_CONTROL_OP_PEEK 12
@@ -282,59 +243,30 @@ typedef struct dom0_shadow_control
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad;
- u32 op; /* 4 */
- unsigned long *dirty_bitmap; /* 8: pointer to locked buffer */
- MEMORY_PADDING;
+ domid_t domain;
+ u32 op;
+ unsigned long *dirty_bitmap; /* pointer to locked buffer */
/* IN/OUT variables. */
- memory_t pages; /* 16: size of buffer, updated with actual size */
- MEMORY_PADDING;
+ memory_t pages; /* size of buffer, updated with actual size */
/* OUT variables. */
dom0_shadow_control_stats_t stats;
-} PACKED dom0_shadow_control_t;
-
-#define DOM0_SETDOMAININITIALMEM 27
-typedef struct {
- /* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0;
- u32 __pad1;
- memory_t initial_memkb; /* 8 */
- MEMORY_PADDING;
-} PACKED dom0_setdomaininitialmem_t; /* 16 bytes */
+} dom0_shadow_control_t;
#define DOM0_SETDOMAINMAXMEM 28
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0;
- u32 __pad1;
- memory_t max_memkb; /* 8 */
- MEMORY_PADDING;
-} PACKED dom0_setdomainmaxmem_t; /* 16 bytes */
+ domid_t domain;
+ memory_t max_memkb;
+} dom0_setdomainmaxmem_t;
#define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */
typedef struct {
/* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0;
- u32 __pad1;
- memory_t num; /* 8 */
- MEMORY_PADDING;
+ domid_t domain;
+ memory_t num;
/* IN/OUT variables. */
- unsigned long *array; /* 16 */
- MEMORY_PADDING;
-} PACKED dom0_getpageframeinfo2_t; /* 24 bytes */
-
-#define DOM0_SETDOMAINVMASSIST 30
-typedef struct {
- /* IN variables. */
- domid_t domain; /* 0 */
- u16 __pad0;
- u32 cmd; /* 4: vm_assist cmd */
- u32 type; /* 8: vm_assist cmd */
-} PACKED dom0_setdomainvmassist_t; /* 12 bytes */
+ unsigned long *array;
+} dom0_getpageframeinfo2_t;
/*
* Request memory range (@pfn, @pfn+@nr_pfns-1) to have type @type.
@@ -346,16 +278,13 @@ typedef struct {
#define DOM0_ADD_MEMTYPE 31
typedef struct {
/* IN variables. */
- memory_t pfn; /* 0 */
- MEMORY_PADDING;
- memory_t nr_pfns; /* 8 */
- MEMORY_PADDING;
- u32 type; /* 16 */
- u32 __pad0;
+ memory_t pfn;
+ memory_t nr_pfns;
+ u32 type;
/* OUT variables. */
- u32 handle; /* 24 */
- u32 reg; /* 28 */
-} PACKED dom0_add_memtype_t; /* 32 bytes */
+ u32 handle;
+ u32 reg;
+} dom0_add_memtype_t;
/*
* Tear down an existing memory-range type. If @handle is remembered then it
@@ -367,24 +296,20 @@ typedef struct {
#define DOM0_DEL_MEMTYPE 32
typedef struct {
/* IN variables. */
- u32 handle; /* 0 */
- u32 reg; /* 4 */
-} PACKED dom0_del_memtype_t; /* 8 bytes */
+ u32 handle;
+ u32 reg;
+} dom0_del_memtype_t;
/* Read current type of an MTRR (x86-specific). */
#define DOM0_READ_MEMTYPE 33
typedef struct {
/* IN variables. */
- u32 reg; /* 0 */
- u32 __pad0;
+ u32 reg;
/* OUT variables. */
- memory_t pfn; /* 8 */
- MEMORY_PADDING;
- memory_t nr_pfns; /* 16 */
- MEMORY_PADDING;
- u32 type; /* 24 */
- u32 __pad1;
-} PACKED dom0_read_memtype_t; /* 32 bytes */
+ memory_t pfn;
+ memory_t nr_pfns;
+ u32 type;
+} dom0_read_memtype_t;
/* Interface for controlling Xen software performance counters. */
#define DOM0_PERFCCONTROL 34
@@ -392,33 +317,45 @@ typedef struct {
#define DOM0_PERFCCONTROL_OP_RESET 1 /* Reset all counters to zero. */
#define DOM0_PERFCCONTROL_OP_QUERY 2 /* Get perfctr information. */
typedef struct {
- u8 name[80]; /* 0: name of perf counter */
- u32 nr_vals; /* 80: number of values for this counter */
- u32 vals[64]; /* 84: array of values */
-} PACKED dom0_perfc_desc_t; /* 340 bytes */
+ u8 name[80]; /* name of perf counter */
+ u32 nr_vals; /* number of values for this counter */
+ u32 vals[64]; /* array of values */
+} dom0_perfc_desc_t;
typedef struct {
/* IN variables. */
- u32 op; /* 0: DOM0_PERFCCONTROL_OP_??? */
+ u32 op; /* DOM0_PERFCCONTROL_OP_??? */
/* OUT variables. */
- u32 nr_counters; /* 4: number of counters */
- dom0_perfc_desc_t *desc; /* 8: counter information (or NULL) */
- MEMORY_PADDING;
-} PACKED dom0_perfccontrol_t; /* 16 bytes */
+ u32 nr_counters; /* number of counters */
+ dom0_perfc_desc_t *desc; /* counter information (or NULL) */
+} dom0_perfccontrol_t;
#define DOM0_MICROCODE 35
typedef struct {
/* IN variables. */
- void *data; /* 0: Pointer to microcode data */
- MEMORY_PADDING;
- u32 length; /* 8: Length of microcode data. */
- u32 _pad0;
-} PACKED dom0_microcode_t; /* 16 bytes */
+ void *data; /* Pointer to microcode data */
+ u32 length; /* Length of microcode data. */
+} dom0_microcode_t;
+
+#define DOM0_IOPORT_PERMISSION 36
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ u16 first_port; /* first port int range */
+ u16 nr_ports; /* size of port range */
+ u16 allow_access; /* allow or deny access to range? */
+} dom0_ioport_permission_t;
+
+#define DOM0_GETVCPUCONTEXT 37
+typedef struct {
+ domid_t domain; /* domain to be affected */
+ u16 vcpu; /* vcpu # */
+ vcpu_guest_context_t *ctxt; /* NB. IN/OUT variable. */
+ u64 cpu_time;
+} dom0_getvcpucontext_t;
typedef struct {
- u32 cmd; /* 0 */
- u32 interface_version; /* 4 */ /* DOM0_INTERFACE_VERSION */
- union { /* 8 */
- u32 dummy[18]; /* 72 bytes */
+ u32 cmd;
+ u32 interface_version; /* DOM0_INTERFACE_VERSION */
+ union {
dom0_createdomain_t createdomain;
dom0_pausedomain_t pausedomain;
dom0_unpausedomain_t unpausedomain;
@@ -426,30 +363,28 @@ typedef struct {
dom0_getmemlist_t getmemlist;
dom0_schedctl_t schedctl;
dom0_adjustdom_t adjustdom;
- dom0_builddomain_t builddomain;
+ dom0_setdomaininfo_t setdomaininfo;
dom0_getdomaininfo_t getdomaininfo;
dom0_getpageframeinfo_t getpageframeinfo;
- dom0_iopl_t iopl;
- dom0_msr_t msr;
- dom0_debug_t debug;
- dom0_settime_t settime;
- dom0_readconsole_t readconsole;
- dom0_pincpudomain_t pincpudomain;
- dom0_gettbufs_t gettbufs;
+ dom0_msr_t msr;
+ dom0_debug_t debug;
+ dom0_settime_t settime;
+ dom0_readconsole_t readconsole;
+ dom0_pincpudomain_t pincpudomain;
+ dom0_tbufcontrol_t tbufcontrol;
dom0_physinfo_t physinfo;
- dom0_pcidev_access_t pcidev_access;
dom0_sched_id_t sched_id;
- dom0_shadow_control_t shadow_control;
- dom0_setdomaininitialmem_t setdomaininitialmem;
- dom0_setdomainmaxmem_t setdomainmaxmem;
- dom0_getpageframeinfo2_t getpageframeinfo2;
- dom0_setdomainvmassist_t setdomainvmassist;
+ dom0_shadow_control_t shadow_control;
+ dom0_setdomainmaxmem_t setdomainmaxmem;
+ dom0_getpageframeinfo2_t getpageframeinfo2;
dom0_add_memtype_t add_memtype;
dom0_del_memtype_t del_memtype;
dom0_read_memtype_t read_memtype;
dom0_perfccontrol_t perfccontrol;
dom0_microcode_t microcode;
- } PACKED u;
-} PACKED dom0_op_t; /* 80 bytes */
+ dom0_ioport_permission_t ioport_permission;
+ dom0_getvcpucontext_t getvcpucontext;
+ } u;
+} dom0_op_t;
#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
diff --git a/xen/include/public/event_channel.h b/xen/include/public/event_channel.h
index 284326d9b6..e787a0524a 100644
--- a/xen/include/public/event_channel.h
+++ b/xen/include/public/event_channel.h
@@ -10,15 +10,16 @@
#define __XEN_PUBLIC_EVENT_CHANNEL_H__
/*
- * EVTCHNOP_alloc_unbound: Allocate a fresh local port and prepare
- * it for binding to <dom>.
+ * EVTCHNOP_alloc_unbound: Prepare a local port for binding to <dom>.
+ * <port> may be wildcarded by setting to zero, in which case a fresh port
+ * will be allocated, and the field filled in on return.
*/
#define EVTCHNOP_alloc_unbound 6
typedef struct {
/* IN parameters */
domid_t dom; /* 0 */
u16 __pad;
- /* OUT parameters */
+ /* IN/OUT parameters */
u32 port; /* 4 */
} PACKED evtchn_alloc_unbound_t; /* 8 bytes */
@@ -51,9 +52,11 @@ typedef struct {
} PACKED evtchn_bind_interdomain_t; /* 12 bytes */
/*
- * EVTCHNOP_bind_virq: Bind a local event channel to IRQ <irq>.
+ * EVTCHNOP_bind_virq: Bind a local event channel to IRQ <irq> on calling vcpu.
* NOTES:
- * 1. A virtual IRQ may be bound to at most one event channel per domain.
+ * 1. A virtual IRQ may be bound to at most one event channel per vcpu.
+ * 2. The allocated event channel is bound to the calling vcpu. The binding
+ * may not be changed.
*/
#define EVTCHNOP_bind_virq 1
typedef struct {
@@ -80,6 +83,20 @@ typedef struct {
} PACKED evtchn_bind_pirq_t; /* 12 bytes */
/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ * 1. The allocated event channel is bound to the calling vcpu. The binding
+ * may not be changed.
+ */
+#define EVTCHNOP_bind_ipi 7
+typedef struct {
+ /* IN parameters. */
+ u32 ipi_vcpu; /* 0 */
+ /* OUT parameters. */
+ u32 port; /* 4 */
+} PACKED evtchn_bind_ipi_t; /* 8 bytes */
+
+/*
* EVTCHNOP_close: Close the communication channel which has an endpoint at
* <dom, port>. If the channel is interdomain then the remote end is placed in
* the unbound state (EVTCHNSTAT_unbound), awaiting a new connection.
@@ -128,6 +145,7 @@ typedef struct {
#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
u32 status; /* 8 */
union { /* 12 */
struct {
@@ -140,6 +158,7 @@ typedef struct {
} PACKED interdomain; /* EVTCHNSTAT_interdomain */
u32 pirq; /* EVTCHNSTAT_pirq */ /* 12 */
u32 virq; /* EVTCHNSTAT_virq */ /* 12 */
+ u32 ipi_vcpu; /* EVTCHNSTAT_ipi */ /* 12 */
} PACKED u;
} PACKED evtchn_status_t; /* 20 bytes */
@@ -151,6 +170,7 @@ typedef struct {
evtchn_bind_interdomain_t bind_interdomain;
evtchn_bind_virq_t bind_virq;
evtchn_bind_pirq_t bind_pirq;
+ evtchn_bind_ipi_t bind_ipi;
evtchn_close_t close;
evtchn_send_t send;
evtchn_status_t status;
diff --git a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
index 389e082646..79c87a7cff 100644
--- a/xen/include/public/grant_table.h
+++ b/xen/include/public/grant_table.h
@@ -185,6 +185,8 @@ typedef struct {
u32 __pad;
} PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */
+#define GNTUNMAP_DEV_FROM_VIRT (~0U)
+
/*
* GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
* <nr_frames> pages. The frame addresses are written to the <frame_list>.
@@ -207,6 +209,19 @@ typedef struct {
} PACKED gnttab_setup_table_t; /* 16 bytes */
/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+#define GNTTABOP_dump_table 3
+typedef struct {
+ /* IN parameters. */
+ domid_t dom; /* 0 */
+ /* OUT parameters. */
+ s16 status; /* 2: GNTST_* */
+} PACKED gnttab_dump_table_t; /* 4 bytes */
+
+
+/*
* Bitfield values for update_pin_status.flags.
*/
/* Map the grant entry for access by I/O devices. */
@@ -233,9 +248,11 @@ typedef struct {
#define GNTST_general_error (-1) /* General undefined error. */
#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
-#define GNTST_bad_handle (-3) /* Unrecognised or inappropriate handle. */
-#define GNTST_no_device_space (-4) /* Out of space in I/O MMU. */
-#define GNTST_permission_denied (-5) /* Not enough privilege for operation. */
+#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
#define GNTTABOP_error_msgs { \
"okay", \
@@ -243,9 +260,21 @@ typedef struct {
"unrecognised domain id", \
"invalid grant reference", \
"invalid mapping handle", \
+ "invalid virtual address", \
+ "invalid device address", \
"no spare translation slot in the I/O MMU", \
"permission denied" \
}
+
+typedef struct {
+ union { /* 0 */
+ gnttab_map_grant_ref_t map_grant_ref;
+ gnttab_unmap_grant_ref_t unmap_grant_ref;
+ gnttab_setup_table_t setup_table;
+ gnttab_dump_table_t dump_table;
+ u8 __dummy[24];
+ } PACKED u;
+} PACKED gnttab_op_t; /* 32 bytes */
#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h
index 8cd3696eb6..9bc465cf1b 100644
--- a/xen/include/public/io/blkif.h
+++ b/xen/include/public/io/blkif.h
@@ -9,6 +9,8 @@
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
#define __XEN_PUBLIC_IO_BLKIF_H__
+#include "ring.h"
+
#define blkif_vdev_t u16
#define blkif_sector_t u64
@@ -32,16 +34,24 @@ typedef struct {
blkif_vdev_t device; /* 2: only for read/write requests */
unsigned long id; /* 4: private guest value, echoed in resp */
blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */
- /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame. */
+ /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* @f_a_s[:16]= grant reference (16 bits) */
+#else
+ /* @f_a_s[:12]=@frame: machine page frame number. */
+#endif
/* @first_sect: first sector in frame to transfer (inclusive). */
/* @last_sect: last sector in frame to transfer (inclusive). */
- /* @frame: machine page frame number. */
unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} PACKED blkif_request_t;
#define blkif_first_sect(_fas) (((_fas)>>3)&7)
#define blkif_last_sect(_fas) ((_fas)&7)
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#define blkif_gref_from_fas(_fas) ((_fas)>>16)
+#endif
+
typedef struct {
unsigned long id; /* copied from request */
u8 operation; /* copied from request */
@@ -52,27 +62,10 @@ typedef struct {
#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
/*
- * We use a special capitalised type name because it is _essential_ that all
- * arithmetic on indexes is done on an integer type of the correct size.
- */
-typedef u32 BLKIF_RING_IDX;
-
-/*
- * Ring indexes are 'free running'. That is, they are not stored modulo the
- * size of the ring buffer. The following macro converts a free-running counter
- * into a value that can directly index a ring-buffer array.
+ * Generate blkif ring structures and types.
*/
-#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
-
-typedef struct {
- BLKIF_RING_IDX req_prod; /* 0: Request producer. Updated by front-end. */
- BLKIF_RING_IDX resp_prod; /* 4: Response producer. Updated by back-end. */
- union { /* 8 */
- blkif_request_t req;
- blkif_response_t resp;
- } PACKED ring[BLKIF_RING_SIZE];
-} PACKED blkif_ring_t;
+DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
/*
* BLKIF_OP_PROBE:
@@ -90,26 +83,16 @@ typedef struct {
* of vdisk_t elements.
*/
-/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
-#define VDISK_TYPE_FLOPPY 0x00
-#define VDISK_TYPE_TAPE 0x01
-#define VDISK_TYPE_CDROM 0x05
-#define VDISK_TYPE_OPTICAL 0x07
-#define VDISK_TYPE_DISK 0x20
-
-#define VDISK_TYPE_MASK 0x3F
-#define VDISK_TYPE(_x) ((_x) & VDISK_TYPE_MASK)
+#define VDISK_CDROM 0x1
+#define VDISK_REMOVABLE 0x2
+#define VDISK_READONLY 0x4
-/* The top two bits of the type field encode various flags. */
-#define VDISK_FLAG_RO 0x40
-#define VDISK_FLAG_VIRT 0x80
-#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
-#define VDISK_VIRTUAL(_x) ((_x) & VDISK_FLAG_VIRT)
-
-typedef struct {
+typedef struct vdisk {
blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */
blkif_vdev_t device; /* 8: Device number (opaque 16 bit value). */
u16 info; /* 10: Device type and flags (VDISK_*). */
-} PACKED vdisk_t; /* 12 bytes */
+ u16 sector_size; /* 12: Minimum alignment for requests. */
+ u16 _pad;
+} PACKED vdisk_t; /* 16 bytes */
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/xen/include/public/io/domain_controller.h b/xen/include/public/io/domain_controller.h
index 58652a530e..69a8359aa9 100644
--- a/xen/include/public/io/domain_controller.h
+++ b/xen/include/public/io/domain_controller.h
@@ -10,6 +10,8 @@
#ifndef __XEN_PUBLIC_IO_DOMAIN_CONTROLLER_H__
#define __XEN_PUBLIC_IO_DOMAIN_CONTROLLER_H__
+#include "ring.h"
+
/*
* CONTROLLER MESSAGING INTERFACE.
*/
@@ -22,15 +24,29 @@ typedef struct {
u8 msg[60]; /* 4: type-specific message data */
} PACKED control_msg_t; /* 64 bytes */
+/* These are used by the control message deferred ring. */
#define CONTROL_RING_SIZE 8
typedef u32 CONTROL_RING_IDX;
#define MASK_CONTROL_IDX(_i) ((_i)&(CONTROL_RING_SIZE-1))
+/*
+ * Generate control ring structures and types.
+ *
+ * CONTROL_RING_MEM is currently an 8-slot ring of ctrl_msg_t structs and
+ * two 32-bit counters: (64 * 8) + (2 * 4) = 520
+ */
+#define CONTROL_RING_MEM 520
+DEFINE_RING_TYPES(ctrl, control_msg_t, control_msg_t);
+
typedef struct {
- control_msg_t tx_ring[CONTROL_RING_SIZE]; /* 0: guest -> controller */
- control_msg_t rx_ring[CONTROL_RING_SIZE]; /* 512: controller -> guest */
- CONTROL_RING_IDX tx_req_prod, tx_resp_prod; /* 1024, 1028 */
- CONTROL_RING_IDX rx_req_prod, rx_resp_prod; /* 1032, 1036 */
+ union {
+ ctrl_sring_t tx_ring; /* 0: guest -> controller */
+ char __x[CONTROL_RING_MEM];
+ } PACKED;
+ union {
+ ctrl_sring_t rx_ring; /* 520: controller -> guest */
+ char __y[CONTROL_RING_MEM];
+ } PACKED;
} PACKED control_if_t; /* 1040 bytes */
/*
@@ -43,7 +59,8 @@ typedef struct {
#define CMSG_NETIF_FE 4 /* Network-device frontend */
#define CMSG_SHUTDOWN 6 /* Shutdown messages */
#define CMSG_MEM_REQUEST 7 /* Memory reservation reqs */
-
+#define CMSG_USBIF_BE 8 /* USB controller backend */
+#define CMSG_USBIF_FE 9 /* USB controller frontend */
/******************************************************************************
* CONSOLE DEFINITIONS
@@ -158,8 +175,6 @@ typedef struct {
#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */
#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */
-#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */
-#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */
/* Messages to domain controller. */
#define CMSG_BLKIF_BE_DRIVER_STATUS 32
@@ -168,12 +183,6 @@ typedef struct {
* Message request/response definitions for block-device messages.
*/
-typedef struct {
- blkif_sector_t sector_start; /* 0 */
- blkif_sector_t sector_length; /* 8 */
- blkif_pdev_t device; /* 16 */
-} PACKED blkif_extent_t; /* 20 bytes */
-
/* Non-specific 'okay' return. */
#define BLKIF_BE_STATUS_OKAY 0
/* Non-specific 'error' return. */
@@ -185,7 +194,7 @@ typedef struct {
#define BLKIF_BE_STATUS_VBD_EXISTS 5
#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6
#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7
-#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8
+#define BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND 8
#define BLKIF_BE_STATUS_MAPPING_ERROR 9
/* This macro can be used to create an array of descriptive error strings. */
@@ -270,11 +279,13 @@ typedef struct {
domid_t domid; /* 0: Identify blkdev interface. */
u16 __pad;
u32 blkif_handle; /* 4: ...ditto... */
- blkif_vdev_t vdevice; /* 8: Interface-specific id for this VBD. */
- u16 readonly; /* 10: Non-zero -> VBD isn't writable. */
+ blkif_pdev_t pdevice; /* 8 */
+ u32 dev_handle; /* 12: Extended device id field. */
+ blkif_vdev_t vdevice; /* 16: Interface-specific id for this VBD. */
+ u16 readonly; /* 18: Non-zero -> VBD isn't writable. */
/* OUT */
- u32 status; /* 12 */
-} PACKED blkif_be_vbd_create_t; /* 16 bytes */
+ u32 status; /* 20 */
+} PACKED blkif_be_vbd_create_t; /* 24 bytes */
/* CMSG_BLKIF_BE_VBD_DESTROY */
typedef struct {
@@ -288,31 +299,6 @@ typedef struct {
u32 status; /* 12 */
} PACKED blkif_be_vbd_destroy_t; /* 16 bytes */
-/* CMSG_BLKIF_BE_VBD_GROW */
-typedef struct {
- /* IN */
- domid_t domid; /* 0: Identify blkdev interface. */
- u16 __pad0; /* 2 */
- u32 blkif_handle; /* 4: ...ditto... */
- blkif_extent_t extent; /* 8: Physical extent to append to VBD. */
- blkif_vdev_t vdevice; /* 28: Interface-specific id of the VBD. */
- u16 __pad1; /* 30 */
- /* OUT */
- u32 status; /* 32 */
-} PACKED blkif_be_vbd_grow_t; /* 36 bytes */
-
-/* CMSG_BLKIF_BE_VBD_SHRINK */
-typedef struct {
- /* IN */
- domid_t domid; /* 0: Identify blkdev interface. */
- u16 __pad0; /* 2 */
- u32 blkif_handle; /* 4: ...ditto... */
- blkif_vdev_t vdevice; /* 8: Interface-specific id of the VBD. */
- u16 __pad1; /* 10 */
- /* OUT */
- u32 status; /* 12 */
-} PACKED blkif_be_vbd_shrink_t; /* 16 bytes */
-
/*
* CMSG_BLKIF_BE_DRIVER_STATUS:
* Notify the domain controller that the back-end driver is DOWN or UP.
@@ -424,6 +410,7 @@ typedef struct {
#define CMSG_NETIF_BE_DESTROY 1 /* Destroy a net-device interface. */
#define CMSG_NETIF_BE_CONNECT 2 /* Connect i/f to remote driver. */
#define CMSG_NETIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
+#define CMSG_NETIF_BE_CREDITLIMIT 4 /* Limit i/f to a given credit limit. */
/* Messages to domain controller. */
#define CMSG_NETIF_BE_DRIVER_STATUS 32
@@ -466,9 +453,11 @@ typedef struct {
u32 netif_handle; /* 4: Domain-specific interface handle. */
u8 mac[6]; /* 8 */
u16 __pad1; /* 14 */
+ u8 be_mac[6]; /* 16 */
+ u16 __pad2; /* 22 */
/* OUT */
- u32 status; /* 16 */
-} PACKED netif_be_create_t; /* 20 bytes */
+ u32 status; /* 24 */
+} PACKED netif_be_create_t; /* 28 bytes */
/*
* CMSG_NETIF_BE_DESTROY:
@@ -486,6 +475,22 @@ typedef struct {
} PACKED netif_be_destroy_t; /* 12 bytes */
/*
+ * CMSG_NETIF_BE_CREDITLIMIT:
+ * Limit a virtual interface to "credit_bytes" bytes per "period_usec"
+ * microseconds.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* 0: Domain attached to new interface. */
+ u16 __pad0; /* 2 */
+ u32 netif_handle; /* 4: Domain-specific interface handle. */
+ u32 credit_bytes; /* 8: Vifs credit of bytes per period. */
+ u32 period_usec; /* 12: Credit replenishment period. */
+ /* OUT */
+ u32 status; /* 16 */
+} PACKED netif_be_creditlimit_t; /* 20 bytes */
+
+/*
* CMSG_NETIF_BE_CONNECT:
* When the driver sends a successful response then the interface is fully
* connected. The controller will send a CONNECTED notification to the
@@ -532,6 +537,208 @@ typedef struct {
} PACKED netif_be_driver_status_t; /* 4 bytes */
+
+/******************************************************************************
+ * USB-INTERFACE FRONTEND DEFINITIONS
+ */
+
+/* Messages from domain controller to guest. */
+#define CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED 0
+
+/* Messages from guest to domain controller. */
+#define CMSG_USBIF_FE_DRIVER_STATUS_CHANGED 32
+#define CMSG_USBIF_FE_INTERFACE_CONNECT 33
+#define CMSG_USBIF_FE_INTERFACE_DISCONNECT 34
+/*
+ * CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
+ * Notify a guest about a status change on one of its block interfaces.
+ * If the interface is DESTROYED or DOWN then the interface is disconnected:
+ * 1. The shared-memory frame is available for reuse.
+ * 2. Any unacknowledged messages pending on the interface were dropped.
+ */
+#define USBIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */
+#define USBIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
+#define USBIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */
+typedef struct {
+ u32 status; /* 0 */
+ u16 evtchn; /* 4: (only if status == BLKIF_INTERFACE_STATUS_CONNECTED). */
+ domid_t domid; /* 6: status != BLKIF_INTERFACE_STATUS_DESTROYED */
+ u32 bandwidth; /* 8 */
+ u32 num_ports; /* 12 */
+} PACKED usbif_fe_interface_status_changed_t; /* 12 bytes */
+
+/*
+ * CMSG_USBIF_FE_DRIVER_STATUS_CHANGED:
+ * Notify the domain controller that the front-end driver is DOWN or UP.
+ * When the driver goes DOWN then the controller will send no more
+ * status-change notifications.
+ * If the driver goes DOWN while interfaces are still UP, the domain
+ * will automatically take the interfaces DOWN.
+ *
+ * NB. The controller should not send an INTERFACE_STATUS_CHANGED message
+ * for interfaces that are active when it receives an UP notification. We
+ * expect that the frontend driver will query those interfaces itself.
+ */
+#define USBIF_DRIVER_STATUS_DOWN 0
+#define USBIF_DRIVER_STATUS_UP 1
+typedef struct {
+ /* IN */
+ u32 status; /* 0: USBIF_DRIVER_STATUS_??? */
+} PACKED usbif_fe_driver_status_changed_t; /* 4 bytes */
+
+/*
+ * CMSG_USBIF_FE_INTERFACE_CONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_CONNECTED message.
+ */
+typedef struct {
+ u32 __pad;
+ memory_t shmem_frame; /* 8 */
+ MEMORY_PADDING;
+} PACKED usbif_fe_interface_connect_t; /* 16 bytes */
+
+/*
+ * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
+ * If successful, the domain controller will acknowledge with a
+ * STATUS_DISCONNECTED message.
+ */
+typedef struct {} PACKED usbif_fe_interface_disconnect_t; /* 4 bytes */
+
+
+/******************************************************************************
+ * USB-INTERFACE BACKEND DEFINITIONS
+ */
+
+/* Messages from domain controller. */
+#define CMSG_USBIF_BE_CREATE 0 /* Create a new block-device interface. */
+#define CMSG_USBIF_BE_DESTROY 1 /* Destroy a block-device interface. */
+#define CMSG_USBIF_BE_CONNECT 2 /* Connect i/f to remote driver. */
+#define CMSG_USBIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */
+#define CMSG_USBIF_BE_CLAIM_PORT 4 /* Claim host port for a domain. */
+#define CMSG_USBIF_BE_RELEASE_PORT 5 /* Release host port. */
+/* Messages to domain controller. */
+#define CMSG_USBIF_BE_DRIVER_STATUS_CHANGED 32
+
+/* Non-specific 'okay' return. */
+#define USBIF_BE_STATUS_OKAY 0
+/* Non-specific 'error' return. */
+#define USBIF_BE_STATUS_ERROR 1
+/* The following are specific error returns. */
+#define USBIF_BE_STATUS_INTERFACE_EXISTS 2
+#define USBIF_BE_STATUS_INTERFACE_NOT_FOUND 3
+#define USBIF_BE_STATUS_INTERFACE_CONNECTED 4
+#define USBIF_BE_STATUS_OUT_OF_MEMORY 7
+#define USBIF_BE_STATUS_MAPPING_ERROR 9
+
+/* This macro can be used to create an array of descriptive error strings. */
+#define USBIF_BE_STATUS_ERRORS { \
+ "Okay", \
+ "Non-specific error", \
+ "Interface already exists", \
+ "Interface not found", \
+ "Interface is still connected", \
+ "Out of memory", \
+ "Could not map domain memory" }
+
+/*
+ * CMSG_USBIF_BE_CREATE:
+ * When the driver sends a successful response then the interface is fully
+ * created. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* 0: Domain attached to new interface. */
+ u16 __pad;
+ /* OUT */
+ u32 status; /* 8 */
+} PACKED usbif_be_create_t; /* 12 bytes */
+
+/*
+ * CMSG_USBIF_BE_DESTROY:
+ * When the driver sends a successful response then the interface is fully
+ * torn down. The controller will send a DESTROYED notification to the
+ * front-end driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* 0: Identify interface to be destroyed. */
+ u16 __pad;
+ /* OUT */
+ u32 status; /* 8 */
+} PACKED usbif_be_destroy_t; /* 12 bytes */
+
+/*
+ * CMSG_USBIF_BE_CONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * connected. The controller will send a CONNECTED notification to the
+ * front-end driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* 0: Domain attached to new interface. */
+ u16 __pad;
+ memory_t shmem_frame; /* 8: Page cont. shared comms window. */
+ MEMORY_PADDING;
+ u32 evtchn; /* 16: Event channel for notifications. */
+ u32 bandwidth; /* 20: Bandwidth allocated for isoch / int - us
+ * per 1ms frame (ie between 0 and 900 or 800
+ * depending on USB version). */
+ /* OUT */
+ u32 status; /* 24 */
+} PACKED usbif_be_connect_t; /* 28 bytes */
+
+/*
+ * CMSG_USBIF_BE_DISCONNECT:
+ * When the driver sends a successful response then the interface is fully
+ * disconnected. The controller will send a DOWN notification to the front-end
+ * driver.
+ */
+typedef struct {
+ /* IN */
+ domid_t domid; /* 0: Domain attached to new interface. */
+ u16 __pad;
+ /* OUT */
+ u32 status; /* 8 */
+} PACKED usbif_be_disconnect_t; /* 12 bytes */
+
+/*
+ * CMSG_USBIF_BE_DRIVER_STATUS_CHANGED:
+ * Notify the domain controller that the back-end driver is DOWN or UP.
+ * If the driver goes DOWN while interfaces are still UP, the controller
+ * will automatically send DOWN notifications.
+ */
+typedef struct {
+ u32 status; /* 0: USBIF_DRIVER_STATUS_??? */
+} PACKED usbif_be_driver_status_changed_t; /* 4 bytes */
+
+#define USB_PATH_LEN 16
+
+/*
+ * CMSG_USBIF_BE_CLAIM_PORT:
+ * Instruct the backend driver to claim any device plugged into the specified
+ * host port and to allow the specified domain to control that port.
+ */
+typedef struct
+{
+ /* IN */
+ domid_t domid; /* 0: which domain */
+ u32 usbif_port; /* 6: port on the virtual root hub */
+ u32 status; /* 10: status of operation */
+ char path[USB_PATH_LEN]; /* Currently specified in the Linux style - may need to be
+ * converted to some OS-independent format at some stage. */
+} PACKED usbif_be_claim_port_t;
+
+/*
+ * CMSG_USBIF_BE_RELEASE_PORT:
+ * Instruct the backend driver to release any device plugged into the specified
+ * host port.
+ */
+typedef struct
+{
+ char path[USB_PATH_LEN];
+} PACKED usbif_be_release_port_t;
+
/******************************************************************************
* SHUTDOWN DEFINITIONS
*/
diff --git a/xen/include/public/io/ioreq.h b/xen/include/public/io/ioreq.h
new file mode 100644
index 0000000000..2f01d88385
--- /dev/null
+++ b/xen/include/public/io/ioreq.h
@@ -0,0 +1,61 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ 1
+#define IOREQ_WRITE 0
+
+#define STATE_INVALID 0
+#define STATE_IOREQ_READY 1
+#define STATE_IOREQ_INPROCESS 2
+#define STATE_IORESP_READY 3
+#define STATE_IORESP_HOOK 4
+
+#define IOPACKET_PORT 2
+
+/* VMExit dispatcher should cooperate with instruction decoder to
+ prepare this structure and notify service OS and DM by sending
+ virq */
+typedef struct {
+ u64 addr; /* physical address */
+ u64 size; /* size in bytes */
+ u64 count; /* for rep prefixes */
+ union {
+ u64 data; /* data */
+ void *pdata; /* pointer to data */
+ } u;
+ u8 state:4;
+ u8 pdata_valid:1; /* if 1, use pdata above */
+ u8 dir:1; /* 1=read, 0=write */
+ u8 port_mm:1; /* 0=portio, 1=mmio */
+ u8 df:1;
+} ioreq_t;
+
+#define MAX_VECTOR 256
+#define BITS_PER_BYTE 8
+#define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(unsigned long)))
+
+typedef struct {
+ ioreq_t vp_ioreq;
+ unsigned long vp_intr[INTR_LEN];
+} vcpu_iodata_t;
+
+#endif /* _IOREQ_H_ */
diff --git a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h
index 839cc8dade..87a5ce0a32 100644
--- a/xen/include/public/io/netif.h
+++ b/xen/include/public/io/netif.h
@@ -12,7 +12,8 @@
typedef struct {
memory_t addr; /* 0: Machine address of packet. */
MEMORY_PADDING;
- u16 id; /* 8: Echoed in response message. */
+ u16 csum_blank:1; /* Proto csum field blank? */
+ u16 id:15; /* 8: Echoed in response message. */
u16 size; /* 10: Packet size in bytes. */
} PACKED netif_tx_request_t; /* 12 bytes */
@@ -29,7 +30,8 @@ typedef struct {
typedef struct {
memory_t addr; /* 0: Machine address of packet. */
MEMORY_PADDING;
- u16 id; /* 8: */
+ u16 csum_valid:1; /* Protocol checksum is validated? */
+ u16 id:15; /* 8: */
s16 status; /* 10: -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
} PACKED netif_rx_response_t; /* 12 bytes */
diff --git a/xen/include/public/io/ring.h b/xen/include/public/io/ring.h
new file mode 100644
index 0000000000..68970f0399
--- /dev/null
+++ b/xen/include/public/io/ring.h
@@ -0,0 +1,199 @@
+/*
+ * Shared producer-consumer ring macros.
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
+#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x))
+#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - 2*sizeof(RING_IDX)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ * DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ * mytag_sring_t - The shared ring.
+ * mytag_front_ring_t - The 'front' half of the ring.
+ * mytag_back_ring_t - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ * mytag_front_ring_t front_ring;
+ *
+ * SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly...
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
+ \
+/* Shared ring entry */ \
+union __name##_sring_entry { \
+ __req_t req; \
+ __rsp_t rsp; \
+}; \
+ \
+/* Shared ring page */ \
+struct __name##_sring { \
+ RING_IDX req_prod; \
+ RING_IDX rsp_prod; \
+ union __name##_sring_entry ring[1]; /* variable-length */ \
+}; \
+ \
+/* "Front" end's private variables */ \
+struct __name##_front_ring { \
+ RING_IDX req_prod_pvt; \
+ RING_IDX rsp_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* "Back" end's private variables */ \
+struct __name##_back_ring { \
+ RING_IDX rsp_prod_pvt; \
+ RING_IDX req_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* Syntactic sugar */ \
+typedef struct __name##_sring __name##_sring_t; \
+typedef struct __name##_front_ring __name##_front_ring_t; \
+typedef struct __name##_back_ring __name##_back_ring_t;
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do { \
+ (_s)->req_prod = 0; \
+ (_s)->rsp_prod = 0; \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do { \
+ (_r)->req_prod_pvt = 0; \
+ (_r)->rsp_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do { \
+ (_r)->rsp_prod_pvt = 0; \
+ (_r)->req_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+/* Initialize to existing shared indexes -- for recovery */
+#define FRONT_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->req_prod_pvt = (_s)->req_prod; \
+ (_r)->rsp_cons = (_s)->rsp_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->rsp_prod_pvt = (_s)->rsp_prod; \
+ (_r)->req_cons = (_s)->req_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r) \
+ ((_r)->nr_ents)
+
+/* How many empty slots are on a ring? */
+#define RING_PENDING_REQUESTS(_r) \
+ ( ((_r)->req_prod_pvt - (_r)->rsp_cons) )
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r) \
+ (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+ ( (_r)->rsp_cons != (_r)->sring->rsp_prod )
+
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
+ ( ((_r)->req_cons != (_r)->sring->req_prod ) && \
+ (((_r)->req_cons - (_r)->rsp_prod_pvt) != \
+ RING_SIZE(_r)) )
+
+/* Test if there are messages waiting to be pushed. */
+#define RING_HAS_UNPUSHED_REQUESTS(_r) \
+ ( (_r)->req_prod_pvt != (_r)->sring->req_prod )
+
+#define RING_HAS_UNPUSHED_RESPONSES(_r) \
+ ( (_r)->rsp_prod_pvt != (_r)->sring->rsp_prod )
+
+/* Copy the private producer pointer into the shared ring so the other end
+ * can see the updates we've made. */
+#define RING_PUSH_REQUESTS(_r) do { \
+ wmb(); \
+ (_r)->sring->req_prod = (_r)->req_prod_pvt; \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do { \
+ wmb(); \
+ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
+} while (0)
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx) \
+ (&((_r)->sring->ring[ \
+ ((_idx) & (RING_SIZE(_r) - 1)) \
+ ].req))
+
+#define RING_GET_RESPONSE(_r, _idx) \
+ (&((_r)->sring->ring[ \
+ ((_idx) & (RING_SIZE(_r) - 1)) \
+ ].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
+ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
diff --git a/xen/include/public/io/usbif.h b/xen/include/public/io/usbif.h
new file mode 100644
index 0000000000..ff4e1480f2
--- /dev/null
+++ b/xen/include/public/io/usbif.h
@@ -0,0 +1,66 @@
+/******************************************************************************
+ * usbif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __SHARED_USBIF_H__
+#define __SHARED_USBIF_H__
+
+#define usbif_vdev_t u16
+#define usbif_sector_t u64
+
+#define USBIF_OP_IO 0 /* Request IO to a device */
+#define USBIF_OP_PROBE 1 /* Is there a device on this port? */
+#define USBIF_OP_RESET 2 /* Reset a virtual USB port. */
+
+typedef struct {
+ unsigned long id; /* private guest value, echoed in resp */
+ u8 operation; /* USBIF_OP_??? */
+ u8 __pad1;
+ usbif_vdev_t port; /* guest virtual USB port */
+ unsigned long devnum :7; /* Device address, as seen by the guest.*/
+ unsigned long endpoint :4; /* Device endpoint. */
+ unsigned long direction :1; /* Pipe direction. */
+ unsigned long speed :1; /* Pipe speed. */
+ unsigned long pipe_type :2; /* Pipe type (iso, bulk, int, ctrl) */
+ unsigned long __pad2 :18;
+ unsigned long transfer_buffer; /* Machine address */
+ unsigned long length; /* Buffer length */
+ unsigned long transfer_flags; /* For now just pass Linux transfer
+ * flags - this may change. */
+ unsigned char setup[8]; /* Embed setup packets directly. */
+ unsigned long iso_schedule; /* Machine address of transfer sched (iso
+ * only) */
+ unsigned long num_iso; /* length of iso schedule */
+ unsigned long timeout; /* timeout in ms */
+} usbif_request_t;
+
+/* Data we need to pass:
+ * - Transparently handle short packets or complain at us?
+ */
+
+typedef struct {
+ unsigned long id; /* copied from request */
+ u8 operation; /* copied from request */
+ u8 data; /* Small chunk of in-band data */
+ s16 status; /* USBIF_RSP_??? */
+ unsigned long transfer_mutex; /* Used for cancelling requests atomically. */
+ unsigned long length; /* How much data we really got */
+} usbif_response_t;
+
+#define USBIF_RSP_ERROR -1 /* non-specific 'error' */
+#define USBIF_RSP_OKAY 0 /* non-specific 'okay' */
+
+DEFINE_RING_TYPES(usbif, usbif_request_t, usbif_response_t);
+
+typedef struct {
+ unsigned long length; /* IN = expected, OUT = actual */
+ unsigned long buffer_offset; /* IN offset in buffer specified in main
+ packet */
+ unsigned long status; /* OUT Status for this packet. */
+} usbif_iso_t;
+
+#endif /* __SHARED_USBIF_H__ */
diff --git a/xen/include/public/physdev.h b/xen/include/public/physdev.h
index ab1af9998a..41ad23bb9f 100644
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -1,80 +1,75 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (c) 2004 - Rolf Neugebauer - Intel Research Cambridge
- * (c) 2004 - Keir Fraser - University of Cambridge
- ****************************************************************************
- * Description: Interface for domains to access physical devices on the PCI bus
- */
#ifndef __XEN_PUBLIC_PHYSDEV_H__
#define __XEN_PUBLIC_PHYSDEV_H__
/* Commands to HYPERVISOR_physdev_op() */
-#define PHYSDEVOP_PCI_CFGREG_READ 0
-#define PHYSDEVOP_PCI_CFGREG_WRITE 1
-#define PHYSDEVOP_PCI_INITIALISE_DEVICE 2
-#define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3
#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
#define PHYSDEVOP_IRQ_STATUS_QUERY 5
+#define PHYSDEVOP_SET_IOPL 6
+#define PHYSDEVOP_SET_IOBITMAP 7
+#define PHYSDEVOP_APIC_READ 8
+#define PHYSDEVOP_APIC_WRITE 9
+#define PHYSDEVOP_ASSIGN_VECTOR 10
-/* Read from PCI configuration space. */
typedef struct {
/* IN */
- u32 bus; /* 0 */
- u32 dev; /* 4 */
- u32 func; /* 8 */
- u32 reg; /* 12 */
- u32 len; /* 16 */
+ u32 irq; /* 0 */
/* OUT */
- u32 value; /* 20 */
-} PACKED physdevop_pci_cfgreg_read_t; /* 24 bytes */
+/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0)
+ u32 flags; /* 4 */
+} PACKED physdevop_irq_status_query_t; /* 8 bytes */
-/* Write to PCI configuration space. */
typedef struct {
/* IN */
- u32 bus; /* 0 */
- u32 dev; /* 4 */
- u32 func; /* 8 */
- u32 reg; /* 12 */
- u32 len; /* 16 */
- u32 value; /* 20 */
-} PACKED physdevop_pci_cfgreg_write_t; /* 24 bytes */
+ u32 iopl; /* 0 */
+} PACKED physdevop_set_iopl_t; /* 4 bytes */
-/* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */
typedef struct {
/* IN */
- u32 bus; /* 0 */
- u32 dev; /* 4 */
- u32 func; /* 8 */
-} PACKED physdevop_pci_initialise_device_t; /* 12 bytes */
+ memory_t bitmap; /* 0 */
+ MEMORY_PADDING;
+ u32 nr_ports; /* 8 */
+ u32 __pad0; /* 12 */
+} PACKED physdevop_set_iobitmap_t; /* 16 bytes */
-/* Find the root buses for subsequent scanning. */
typedef struct {
- /* OUT */
- u32 busmask[256/32]; /* 0 */
-} PACKED physdevop_pci_probe_root_buses_t; /* 32 bytes */
+ /* IN */
+ u32 apic; /* 0 */
+ u32 offset;
+ /* IN or OUT */
+ u32 value;
+} PACKED physdevop_apic_t;
typedef struct {
/* IN */
u32 irq; /* 0 */
/* OUT */
-/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */
-#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0)
- u32 flags; /* 4 */
-} PACKED physdevop_irq_status_query_t; /* 8 bytes */
+ u32 vector;
+} PACKED physdevop_irq_t;
typedef struct _physdev_op_st
{
u32 cmd; /* 0 */
u32 __pad; /* 4 */
union { /* 8 */
- physdevop_pci_cfgreg_read_t pci_cfgreg_read;
- physdevop_pci_cfgreg_write_t pci_cfgreg_write;
- physdevop_pci_initialise_device_t pci_initialise_device;
- physdevop_pci_probe_root_buses_t pci_probe_root_buses;
physdevop_irq_status_query_t irq_status_query;
+ physdevop_set_iopl_t set_iopl;
+ physdevop_set_iobitmap_t set_iobitmap;
+ physdevop_apic_t apic_op;
+ physdevop_irq_t irq_op;
u8 __dummy[32];
} PACKED u;
} PACKED physdev_op_t; /* 40 bytes */
#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/public/sched_ctl.h b/xen/include/public/sched_ctl.h
index bd6a717521..01d6481c6e 100644
--- a/xen/include/public/sched_ctl.h
+++ b/xen/include/public/sched_ctl.h
@@ -1,4 +1,4 @@
-/**
+/******************************************************************************
* Generic scheduler control interface.
*
* Mark Williamson, (C) 2004 Intel Research Cambridge
@@ -7,13 +7,11 @@
#ifndef __XEN_PUBLIC_SCHED_CTL_H__
#define __XEN_PUBLIC_SCHED_CTL_H__
-/* Scheduler types */
+/* Scheduler types. */
#define SCHED_BVT 0
-#define SCHED_ATROPOS 2
-#define SCHED_RROBIN 3
+#define SCHED_SEDF 4
-/* these describe the intended direction used for a scheduler control or domain
- * command */
+/* Set or get info? */
#define SCHED_INFO_PUT 0
#define SCHED_INFO_GET 1
@@ -23,22 +21,14 @@
*/
struct sched_ctl_cmd
{
- u32 sched_id; /* 0 */
- u32 direction; /* 4 */
- union { /* 8 */
- struct bvt_ctl
- {
- /* IN variables. */
- u32 ctx_allow; /* 8: context switch allowance */
- } PACKED bvt;
-
- struct rrobin_ctl
- {
- /* IN variables */
- u64 slice; /* 8: round robin time slice */
- } PACKED rrobin;
- } PACKED u;
-} PACKED; /* 16 bytes */
+ u32 sched_id;
+ u32 direction;
+ union {
+ struct bvt_ctl {
+ u32 ctx_allow;
+ } bvt;
+ } u;
+};
struct sched_adjdom_cmd
{
@@ -56,14 +46,16 @@ struct sched_adjdom_cmd
long long warpl; /* 32: warp limit */
long long warpu; /* 40: unwarp time requirement */
} PACKED bvt;
-
- struct atropos_adjdom
+
+ struct sedf_adjdom
{
- u64 nat_period; /* 16 */
- u64 nat_slice; /* 24 */
+ u64 period; /* 16 */
+ u64 slice; /* 24 */
u64 latency; /* 32 */
- u32 xtratime; /* 36 */
- } PACKED atropos;
+ u16 extratime; /* 36 */
+ u16 weight; /* 38 */
+ } PACKED sedf;
+
} PACKED u;
} PACKED; /* 40 bytes */
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index e4ee78b975..1b15314cce 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -1,17 +1,45 @@
/******************************************************************************
- * trace.h
+ * include/public/trace.h
*
* Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
*/
#ifndef __XEN_PUBLIC_TRACE_H__
#define __XEN_PUBLIC_TRACE_H__
+/* Trace classes */
+#define TRC_GEN 0x00010000 /* General trace */
+#define TRC_SCHED 0x00020000 /* Xen Scheduler trace */
+#define TRC_DOM0OP 0x00040000 /* Xen DOM0 operation trace */
+#define TRC_VMX 0x00080000 /* Xen VMX trace */
+#define TRC_ALL 0xffff0000
+
+/* Trace events per class */
+
+#define TRC_SCHED_DOM_ADD (TRC_SCHED + 1)
+#define TRC_SCHED_DOM_REM (TRC_SCHED + 2)
+#define TRC_SCHED_SLEEP (TRC_SCHED + 3)
+#define TRC_SCHED_WAKE (TRC_SCHED + 4)
+#define TRC_SCHED_YIELD (TRC_SCHED + 5)
+#define TRC_SCHED_BLOCK (TRC_SCHED + 6)
+#define TRC_SCHED_SHUTDOWN (TRC_SCHED + 7)
+#define TRC_SCHED_CTL (TRC_SCHED + 8)
+#define TRC_SCHED_ADJDOM (TRC_SCHED + 9)
+#define TRC_SCHED_SWITCH (TRC_SCHED + 10)
+#define TRC_SCHED_S_TIMER_FN (TRC_SCHED + 11)
+#define TRC_SCHED_T_TIMER_FN (TRC_SCHED + 12)
+#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED + 13)
+
+#define TRC_VMX_VMEXIT (TRC_VMX + 1)
+#define TRC_VMX_VECTOR (TRC_VMX + 2)
+#define TRC_VMX_INT (TRC_VMX + 3)
+
/* This structure represents a single trace buffer record. */
struct t_rec {
- u64 cycles; /* 64 bit cycle counter timestamp */
- u32 event; /* 32 bit event ID */
- u32 d1, d2, d3, d4, d5; /* event data items */
+ u64 cycles; /* cycle counter timestamp */
+ u32 event; /* event ID */
+ unsigned long data[5]; /* event data items */
};
/*
@@ -19,15 +47,13 @@ struct t_rec {
* field, indexes into an array of struct t_rec's.
*/
struct t_buf {
- unsigned long data; /* pointer to data area. machine address
- * for convenience in user space code */
-
- unsigned long size; /* size of the data area, in t_recs */
- unsigned long head; /* array index of the most recent record */
-
- /* Xen-private elements follow... */
- struct t_rec *head_ptr; /* pointer to the head record */
- struct t_rec *vdata; /* virtual address pointer to data */
+ /* Used by both Xen and user space. */
+ atomic_t rec_idx; /* the next record to save to */
+ unsigned int rec_num; /* number of records in this trace buffer */
+ /* Used by Xen only. */
+ struct t_rec *rec; /* start of records */
+ /* Used by user space only. */
+ unsigned long rec_addr; /* machine address of the start of records */
};
#endif /* __XEN_PUBLIC_TRACE_H__ */
diff --git a/xen/include/public/vmx_assist.h b/xen/include/public/vmx_assist.h
new file mode 100644
index 0000000000..087391e1ff
--- /dev/null
+++ b/xen/include/public/vmx_assist.h
@@ -0,0 +1,101 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#define VMXASSIST_BASE 0xD0000
+#define VMXASSIST_MAGIC 0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+union vmcs_arbytes {
+ struct arbyte_fields {
+ unsigned int seg_type : 4,
+ s : 1,
+ dpl : 2,
+ p : 1,
+ reserved0 : 4,
+ avl : 1,
+ reserved1 : 1,
+ default_ops_size: 1,
+ g : 1,
+ null_bit : 1,
+ reserved2 : 15;
+ } __attribute__((packed)) fields;
+ unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+typedef struct vmx_assist_context {
+ unsigned long eip; /* execution pointer */
+ unsigned long esp; /* stack point */
+ unsigned long eflags; /* flags register */
+ unsigned long cr0;
+ unsigned long cr3; /* page table directory */
+ unsigned long cr4;
+ unsigned long idtr_limit; /* idt */
+ unsigned long idtr_base;
+ unsigned long gdtr_limit; /* gdt */
+ unsigned long gdtr_base;
+ unsigned long cs_sel; /* cs selector */
+ unsigned long cs_limit;
+ unsigned long cs_base;
+ union vmcs_arbytes cs_arbytes;
+ unsigned long ds_sel; /* ds selector */
+ unsigned long ds_limit;
+ unsigned long ds_base;
+ union vmcs_arbytes ds_arbytes;
+ unsigned long es_sel; /* es selector */
+ unsigned long es_limit;
+ unsigned long es_base;
+ union vmcs_arbytes es_arbytes;
+ unsigned long ss_sel; /* ss selector */
+ unsigned long ss_limit;
+ unsigned long ss_base;
+ union vmcs_arbytes ss_arbytes;
+ unsigned long fs_sel; /* fs selector */
+ unsigned long fs_limit;
+ unsigned long fs_base;
+ union vmcs_arbytes fs_arbytes;
+ unsigned long gs_sel; /* gs selector */
+ unsigned long gs_limit;
+ unsigned long gs_base;
+ union vmcs_arbytes gs_arbytes;
+ unsigned long tr_sel; /* task selector */
+ unsigned long tr_limit;
+ unsigned long tr_base;
+ union vmcs_arbytes tr_arbytes;
+ unsigned long ldtr_sel; /* ldtr selector */
+ unsigned long ldtr_limit;
+ unsigned long ldtr_base;
+ union vmcs_arbytes ldtr_arbytes;
+} vmx_assist_context_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index f226416a94..d46472c16c 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -23,7 +23,14 @@
* XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
*/
-/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */
+/*
+ * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
+ * EAX = return value
+ * (argument registers may be clobbered on return)
+ * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
+ * RAX = return value
+ * (argument registers not clobbered on return; RCX, R11 are)
+ */
#define __HYPERVISOR_set_trap_table 0
#define __HYPERVISOR_mmu_update 1
#define __HYPERVISOR_set_gdt 2
@@ -35,7 +42,6 @@
#define __HYPERVISOR_set_debugreg 8
#define __HYPERVISOR_get_debugreg 9
#define __HYPERVISOR_update_descriptor 10
-#define __HYPERVISOR_set_fast_trap 11
#define __HYPERVISOR_dom_mem_op 12
#define __HYPERVISOR_multicall 13
#define __HYPERVISOR_update_va_mapping 14
@@ -47,44 +53,34 @@
#define __HYPERVISOR_grant_table_op 20
#define __HYPERVISOR_vm_assist 21
#define __HYPERVISOR_update_va_mapping_otherdomain 22
-#define __HYPERVISOR_switch_vm86 23
-
-/*
- * MULTICALLS
- *
- * Multicalls are listed in an array, with each element being a fixed size
- * (BYTES_PER_MULTICALL_ENTRY). Each is of the form (op, arg1, ..., argN)
- * where each element of the tuple is a machine word.
- */
-#define ARGS_PER_MULTICALL_ENTRY 8
-
+#define __HYPERVISOR_switch_vm86 23 /* x86/32 only */
+#define __HYPERVISOR_switch_to_user 23 /* x86/64 only */
+#define __HYPERVISOR_boot_vcpu 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
/*
* VIRTUAL INTERRUPTS
*
* Virtual interrupts that a guest OS may receive from Xen.
*/
-#define VIRQ_MISDIRECT 0 /* Catch-all interrupt for unbound VIRQs. */
-#define VIRQ_TIMER 1 /* Timebase update, and/or requested timeout. */
-#define VIRQ_DEBUG 2 /* Request guest to dump debug info. */
-#define VIRQ_CONSOLE 3 /* (DOM0) bytes received on emergency console. */
-#define VIRQ_DOM_EXC 4 /* (DOM0) Exceptional event for some domain. */
-#define VIRQ_PARITY_ERR 5 /* (DOM0) NMI parity error. */
-#define VIRQ_IO_ERR 6 /* (DOM0) NMI I/O error. */
+#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */
+#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
+#define VIRQ_CONSOLE 2 /* (DOM0) bytes received on emergency console. */
+#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
+#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error. */
+#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error. */
+#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
#define NR_VIRQS 7
/*
* MMU-UPDATE REQUESTS
*
* HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
* ptr[1:0] specifies the appropriate MMU_* command.
*
- * FOREIGN DOMAIN (FD)
- * -------------------
- * Some commands recognise an explicitly-declared foreign domain,
- * in which case they will operate with respect to the foreigner rather than
- * the calling domain. Where the FD has some effect, it is described below.
- *
* ptr[1:0] == MMU_NORMAL_PT_UPDATE:
* Updates an entry in a page table. If updating an L1 table, and the new
* table entry is valid/present, the mapped frame must belong to the FD, if
@@ -100,72 +96,103 @@
* ptr[:2] -- Machine address within the frame whose mapping to modify.
* The frame must belong to the FD, if one is specified.
* val -- Value to write into the mapping entry.
- *
- * ptr[1:0] == MMU_EXTENDED_COMMAND:
- * val[7:0] -- MMUEXT_* command.
+ */
+#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
+#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
+
+/*
+ * MMU EXTENDED OPERATIONS
*
- * val[7:0] == MMUEXT_(UN)PIN_*_TABLE:
- * ptr[:2] -- Machine address of frame to be (un)pinned as a p.t. page.
- * The frame must belong to the FD, if one is specified.
+ * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
*
- * val[7:0] == MMUEXT_NEW_BASEPTR:
- * ptr[:2] -- Machine address of new page-table base to install in MMU.
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ * The frame must belong to the FD, if one is specified.
*
- * val[7:0] == MMUEXT_TLB_FLUSH:
- * No additional arguments.
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
*
- * val[7:0] == MMUEXT_INVLPG:
- * ptr[:2] -- Linear address to be flushed from the TLB.
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ * when in user space.
*
- * val[7:0] == MMUEXT_FLUSH_CACHE:
- * No additional arguments. Writes back and flushes cache contents.
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
*
- * val[7:0] == MMUEXT_SET_LDT:
- * ptr[:2] -- Linear address of LDT base (NB. must be page-aligned).
- * val[:8] -- Number of entries in LDT.
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
*
- * val[7:0] == MMUEXT_TRANSFER_PAGE:
- * val[31:16] -- Domain to whom page is to be transferred.
- * (val[15:8],ptr[9:2]) -- 16-bit reference into transferee's grant table.
- * ptr[:12] -- Page frame to be reassigned to the FD.
- * (NB. The frame must currently belong to the calling domain).
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
*
- * val[7:0] == MMUEXT_SET_FOREIGNDOM:
- * val[31:16] -- Domain to set as the Foreign Domain (FD).
- * (NB. DOMID_SELF is not recognised)
- * If FD != DOMID_IO then the caller must be privileged.
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
*
- * val[7:0] == MMUEXT_CLEAR_FOREIGNDOM:
- * Clears the FD.
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
*
- * val[7:0] == MMUEXT_REASSIGN_PAGE:
- * ptr[:2] -- A machine address within the page to be reassigned to the FD.
- * (NB. page must currently belong to the calling domain).
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
+ *
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_REASSIGN_PAGE
+ * mfn: Machine frame number to be reassigned to the FD.
+ * (NB. page must currently belong to the calling domain).
*/
-#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
-#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */
-#define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */
-#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */
-#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */
-#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */
-#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */
-#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */
-#define MMUEXT_INVLPG 7 /* ptr = VA to invalidate */
-#define MMUEXT_FLUSH_CACHE 8
-#define MMUEXT_SET_LDT 9 /* ptr = VA of table; val = # entries */
-#define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */
-#define MMUEXT_CLEAR_FOREIGNDOM 11
-#define MMUEXT_TRANSFER_PAGE 12 /* ptr = MA of frame; val[31:16] = dom */
-#define MMUEXT_REASSIGN_PAGE 13
-#define MMUEXT_CMD_MASK 255
-#define MMUEXT_CMD_SHIFT 8
+#define MMUEXT_PIN_L1_TABLE 0
+#define MMUEXT_PIN_L2_TABLE 1
+#define MMUEXT_PIN_L3_TABLE 2
+#define MMUEXT_PIN_L4_TABLE 3
+#define MMUEXT_UNPIN_TABLE 4
+#define MMUEXT_NEW_BASEPTR 5
+#define MMUEXT_TLB_FLUSH_LOCAL 6
+#define MMUEXT_INVLPG_LOCAL 7
+#define MMUEXT_TLB_FLUSH_MULTI 8
+#define MMUEXT_INVLPG_MULTI 9
+#define MMUEXT_TLB_FLUSH_ALL 10
+#define MMUEXT_INVLPG_ALL 11
+#define MMUEXT_FLUSH_CACHE 12
+#define MMUEXT_SET_LDT 13
+#define MMUEXT_REASSIGN_PAGE 14
+#define MMUEXT_NEW_USER_BASEPTR 15
-/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
-#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */
-#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+ unsigned int cmd;
+ union {
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR, REASSIGN_PAGE */
+ memory_t mfn;
+ /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+ memory_t linear_addr;
+ };
+ union {
+ /* SET_LDT */
+ unsigned int nr_ents;
+ /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+ void *vcpumask;
+ };
+};
+#endif
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */
+#define UVMF_NONE (0UL<<0) /* No flushing at all. */
+#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */
+#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK (3UL<<0)
+#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */
+#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */
+#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */
/*
* Commands to HYPERVISOR_sched_op().
@@ -243,9 +270,9 @@ typedef u16 domid_t;
*/
typedef struct
{
- memory_t ptr; /* Machine address of PTE. */
- memory_t val; /* New contents of PTE. */
-} PACKED mmu_update_t;
+ memory_t ptr; /* Machine address of PTE. */
+ memory_t val; /* New contents of PTE. */
+} mmu_update_t;
/*
* Send an array of these to HYPERVISOR_multicall().
@@ -253,56 +280,63 @@ typedef struct
*/
typedef struct
{
- cpureg_t op;
- cpureg_t args[7];
-} PACKED multicall_entry_t;
+ unsigned long op, result;
+ unsigned long args[6];
+} multicall_entry_t;
/* Event channel endpoints per domain. */
#define NR_EVENT_CHANNELS 1024
-/* No support for multi-processor guests. */
-#define MAX_VIRT_CPUS 1
+/* Support for multi-processor guests. */
+#define MAX_VIRT_CPUS 32
/*
- * Xen/guestos shared data -- pointer provided in start_info.
- * NB. We expect that this struct is smaller than a page.
+ * Per-VCPU information goes here. This will be cleaned up more when Xen
+ * actually supports multi-VCPU guests.
*/
-typedef struct shared_info_st
+typedef struct
{
/*
- * Per-VCPU information goes here. This will be cleaned up more when Xen
- * actually supports multi-VCPU guests.
+ * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+ * a pending notification for a particular VCPU. It is then cleared
+ * by the guest OS /before/ checking for pending work, thus avoiding
+ * a set-and-check race. Note that the mask is only accessed by Xen
+ * on the CPU that is currently hosting the VCPU. This means that the
+ * pending and mask flags can be updated by the guest without special
+ * synchronisation (i.e., no need for the x86 LOCK prefix).
+ * This may seem suboptimal because if the pending flag is set by
+ * a different CPU then an IPI may be scheduled even when the mask
+ * is set. However, note:
+ * 1. The task of 'interrupt holdoff' is covered by the per-event-
+ * channel mask bits. A 'noisy' event that is continually being
+ * triggered can be masked at source at this very precise
+ * granularity.
+ * 2. The main purpose of the per-VCPU mask is therefore to restrict
+ * reentrant execution: whether for concurrency control, or to
+ * prevent unbounded stack usage. Whatever the purpose, we expect
+ * that the mask will be asserted only for short periods at a time,
+ * and so the likelihood of a 'spurious' IPI is suitably small.
+ * The mask is read before making an event upcall to the guest: a
+ * non-zero mask therefore guarantees that the VCPU will not receive
+ * an upcall activation. The mask is cleared when the VCPU requests
+ * to block: this avoids wakeup-waiting races.
*/
- struct {
- /*
- * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
- * a pending notification for a particular VCPU. It is then cleared
- * by the guest OS /before/ checking for pending work, thus avoiding
- * a set-and-check race. Note that the mask is only accessed by Xen
- * on the CPU that is currently hosting the VCPU. This means that the
- * pending and mask flags can be updated by the guest without special
- * synchronisation (i.e., no need for the x86 LOCK prefix).
- * This may seem suboptimal because if the pending flag is set by
- * a different CPU then an IPI may be scheduled even when the mask
- * is set. However, note:
- * 1. The task of 'interrupt holdoff' is covered by the per-event-
- * channel mask bits. A 'noisy' event that is continually being
- * triggered can be masked at source at this very precise
- * granularity.
- * 2. The main purpose of the per-VCPU mask is therefore to restrict
- * reentrant execution: whether for concurrency control, or to
- * prevent unbounded stack usage. Whatever the purpose, we expect
- * that the mask will be asserted only for short periods at a time,
- * and so the likelihood of a 'spurious' IPI is suitably small.
- * The mask is read before making an event upcall to the guest: a
- * non-zero mask therefore guarantees that the VCPU will not receive
- * an upcall activation. The mask is cleared when the VCPU requests
- * to block: this avoids wakeup-waiting races.
- */
- u8 evtchn_upcall_pending;
- u8 evtchn_upcall_mask;
- u8 pad0, pad1;
- } PACKED vcpu_data[MAX_VIRT_CPUS]; /* 0 */
+ u8 evtchn_upcall_pending; /* 0 */
+ u8 evtchn_upcall_mask; /* 1 */
+ u8 pad0, pad1;
+ u32 evtchn_pending_sel; /* 4 */
+ arch_vcpu_info_t arch; /* 8 */
+} PACKED vcpu_info_t; /* 8 + arch */
+
+/*
+ * Xen/kernel shared data -- pointer provided in start_info.
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct shared_info_st
+{
+ vcpu_info_t vcpu_data[MAX_VIRT_CPUS]; /* 0 */
+
+ u32 n_vcpu;
/*
* A domain can have up to 1024 "event channels" on which it can send
@@ -336,7 +370,6 @@ typedef struct shared_info_st
* word in the PENDING bitfield array.
*/
u32 evtchn_pending[32]; /* 4 */
- u32 evtchn_pending_sel; /* 132 */
u32 evtchn_mask[32]; /* 136 */
/*
@@ -397,39 +430,44 @@ typedef struct shared_info_st
* extended by an extra 4MB to ensure this.
*/
-#define MAX_CMDLINE 256
+#define MAX_GUEST_CMDLINE 1024
typedef struct {
- /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
- memory_t nr_pages; /* 0: Total pages allocated to this domain. */
+ /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
+ memory_t nr_pages; /* 0: Total pages allocated to this domain. */
_MEMORY_PADDING(A);
- memory_t shared_info; /* 8: MACHINE address of shared info struct.*/
+ memory_t shared_info; /* 8: MACHINE address of shared info struct. */
_MEMORY_PADDING(B);
- u32 flags; /* 16: SIF_xxx flags. */
+ u32 flags; /* 16: SIF_xxx flags. */
u16 domain_controller_evtchn; /* 20 */
u16 __pad;
- /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
- memory_t pt_base; /* 24: VIRTUAL address of page directory. */
+ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
+ memory_t pt_base; /* 24: VIRTUAL address of page directory. */
_MEMORY_PADDING(C);
- memory_t nr_pt_frames; /* 32: Number of bootstrap p.t. frames. */
+ memory_t nr_pt_frames; /* 32: Number of bootstrap p.t. frames. */
_MEMORY_PADDING(D);
- memory_t mfn_list; /* 40: VIRTUAL address of page-frame list. */
+ memory_t mfn_list; /* 40: VIRTUAL address of page-frame list. */
_MEMORY_PADDING(E);
- memory_t mod_start; /* 48: VIRTUAL address of pre-loaded module. */
+ memory_t mod_start; /* 48: VIRTUAL address of pre-loaded module. */
_MEMORY_PADDING(F);
- memory_t mod_len; /* 56: Size (bytes) of pre-loaded module. */
+ memory_t mod_len; /* 56: Size (bytes) of pre-loaded module. */
_MEMORY_PADDING(G);
- u8 cmd_line[MAX_CMDLINE]; /* 64 */
-} PACKED start_info_t; /* 320 bytes */
+ s8 cmd_line[MAX_GUEST_CMDLINE]; /* 64 */
+ memory_t store_page; /* 1088: VIRTUAL address of store page. */
+ _MEMORY_PADDING(H);
+ u16 store_evtchn; /* 1096: Event channel for store communication. */
+} PACKED start_info_t; /* 1098 bytes */
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
#define SIF_BLK_BE_DOMAIN (1<<4) /* Is this a block backend domain? */
#define SIF_NET_BE_DOMAIN (1<<5) /* Is this a net backend domain? */
-
+#define SIF_USB_BE_DOMAIN (1<<6) /* Is this a usb backend domain? */
/* For use in guest OSes. */
extern shared_info_t *HYPERVISOR_shared_info;
+typedef u64 cpumap_t;
+
#endif /* !__ASSEMBLY__ */
#endif /* __XEN_PUBLIC_XEN_H__ */
diff --git a/xen/include/xen/ac_timer.h b/xen/include/xen/ac_timer.h
index c561272b8b..a1d3f7095d 100644
--- a/xen/include/xen/ac_timer.h
+++ b/xen/include/xen/ac_timer.h
@@ -1,91 +1,75 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
- ****************************************************************************
- *
- * File: ac_timer.h
- * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
- * Changes:
- *
- * Date: Nov 2002
+/******************************************************************************
+ * ac_timer.h
*
- * Environment: Xen Hypervisor
- * Description: Accurate timer for the Hypervisor
- *
- ****************************************************************************
- * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
- ****************************************************************************
+ * Copyright (c) 2002-2003 Rolf Neugebauer
+ * Copyright (c) 2002-2005 K A Fraser
*/
#ifndef _AC_TIMER_H_
#define _AC_TIMER_H_
+#include <xen/spinlock.h>
#include <xen/time.h>
struct ac_timer {
- /*
- * PUBLIC FIELDS
- */
/* System time expiry value (nanoseconds since boot). */
- s_time_t expires;
+ s_time_t expires;
/* CPU on which this timer will be installed and executed. */
- unsigned int cpu;
+ unsigned int cpu;
/* On expiry, '(*function)(data)' will be executed in softirq context. */
- unsigned long data;
- void (*function)(unsigned long);
-
- /*
- * PRIVATE FIELDS
- */
- unsigned int heap_offset;
+ void (*function)(void *);
+ void *data;
+ /* Timer-heap offset. */
+ unsigned int heap_offset;
};
/*
- * This function can be called for any CPU from any CPU in any context.
- * It initialises the private fields of the ac_timer structure.
+ * All functions below can be called for any CPU from any CPU in any context.
*/
-static __inline__ void init_ac_timer(struct ac_timer *timer)
-{
- timer->heap_offset = 0;
-}
-/*
- * This function can be called for any CPU from any CPU in any context.
- * It returns TRUE if the given timer is on a timer list.
- */
+/* Returns TRUE if the given timer is on a timer list. */
static __inline__ int active_ac_timer(struct ac_timer *timer)
{
return (timer->heap_offset != 0);
}
/*
- * This function can be called for any CPU from any CPU in any context, BUT:
- * -- The private fields must have been initialised (ac_timer_init).
- * -- All public fields must be initialised.
- * -- The timer must not currently be on a timer list.
+ * It initialises the static fields of the ac_timer structure.
+ * It can be called multiple times to reinitialise a single (inactive) timer.
*/
-extern void add_ac_timer(struct ac_timer *timer);
+static __inline__ void init_ac_timer(
+ struct ac_timer *timer,
+ void (*function)(void *),
+ void *data,
+ unsigned int cpu)
+{
+ memset(timer, 0, sizeof(*timer));
+ timer->function = function;
+ timer->data = data;
+ timer->cpu = cpu;
+}
/*
- * This function can be called for any CPU from any CPU in any context, BUT:
- * -- The private fields must have been initialised (ac_timer_init).
- * -- All public fields must be initialised.
- * -- The timer must currently be on a timer list.
+ * Set the expiry time and activate a timer (which must previously have been
+ * initialised by init_ac_timer).
*/
-extern void rem_ac_timer(struct ac_timer *timer);
+extern void set_ac_timer(struct ac_timer *timer, s_time_t expires);
/*
- * This function can be called for any CPU from any CPU in any context, BUT:
- * -- The private fields must have been initialised (ac_timer_init).
- * -- All public fields must be initialised.
+ * Deactivate a timer (which must previously have been initialised by
+ * init_ac_timer). This function has no effect if the timer is not currently
+ * active.
*/
-extern void mod_ac_timer(struct ac_timer *timer, s_time_t new_time);
+extern void rem_ac_timer(struct ac_timer *timer);
+#endif /* _AC_TIMER_H_ */
/*
- * PRIVATE DEFINITIONS
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
*/
-
-extern int reprogram_ac_timer(s_time_t timeout);
-
-#endif /* _AC_TIMER_H_ */
diff --git a/xen/include/xen/acpi.h b/xen/include/xen/acpi.h
index feda09036a..b84f047651 100644
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -80,7 +80,7 @@ typedef struct {
struct acpi_table_rsdt {
struct acpi_table_header header;
- u32 entry[1];
+ u32 entry[8];
} __attribute__ ((packed));
/* Extended System Description Table (XSDT) */
@@ -233,8 +233,27 @@ struct acpi_table_hpet {
} __attribute__ ((packed));
/*
+ * Simple Boot Flags
+ * http://www.microsoft.com/whdc/hwdev/resources/specs/simp_bios.mspx
+ */
+struct acpi_table_sbf
+{
+ u8 sbf_signature[4];
+ u32 sbf_len;
+ u8 sbf_revision;
+ u8 sbf_csum;
+ u8 sbf_oemid[6];
+ u8 sbf_oemtable[8];
+ u8 sbf_revdata[4];
+ u8 sbf_creator[4];
+ u8 sbf_crearev[4];
+ u8 sbf_cmos;
+ u8 sbf_spare[3];
+} __attribute__ ((packed));
+
+/*
* System Resource Affinity Table (SRAT)
- * see http://www.microsoft.com/hwdev/design/srat.htm
+ * http://www.microsoft.com/whdc/hwdev/platform/proc/SRAT.mspx
*/
struct acpi_table_srat {
@@ -309,7 +328,7 @@ struct acpi_table_sbst {
/* Embedded Controller Boot Resources Table (ECDT) */
struct acpi_table_ecdt {
- struct acpi_table_header header;
+ struct acpi_table_header header;
struct acpi_generic_address ec_control;
struct acpi_generic_address ec_data;
u32 uid;
@@ -317,6 +336,15 @@ struct acpi_table_ecdt {
char ec_id[0];
} __attribute__ ((packed));
+/* PCI MMCONFIG */
+
+struct acpi_table_mcfg {
+ struct acpi_table_header header;
+ u8 reserved[8];
+ u32 base_address;
+ u32 base_reserved;
+} __attribute__ ((packed));
+
/* Table Handlers */
enum acpi_table_id {
@@ -338,6 +366,7 @@ enum acpi_table_id {
ACPI_SSDT,
ACPI_SPMI,
ACPI_HPET,
+ ACPI_MCFG,
ACPI_TABLE_COUNT
};
@@ -345,18 +374,19 @@ typedef int (*acpi_table_handler) (unsigned long phys_addr, unsigned long size);
extern acpi_table_handler acpi_table_ops[ACPI_TABLE_COUNT];
-typedef int (*acpi_madt_entry_handler) (acpi_table_entry_header *header);
+typedef int (*acpi_madt_entry_handler) (acpi_table_entry_header *header, const unsigned long end);
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
unsigned long acpi_find_rsdp (void);
int acpi_boot_init (void);
+int acpi_boot_table_init (void);
int acpi_numa_init (void);
int acpi_table_init (void);
int acpi_table_parse (enum acpi_table_id id, acpi_table_handler handler);
int acpi_get_table_header_early (enum acpi_table_id id, struct acpi_table_header **header);
-int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler);
-int acpi_table_parse_srat (enum acpi_srat_entry_id id, acpi_madt_entry_handler handler);
+int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
+int acpi_table_parse_srat (enum acpi_srat_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
void acpi_table_print (struct acpi_table_header *header, unsigned long phys_addr);
void acpi_table_print_madt_entry (acpi_table_entry_header *madt);
void acpi_table_print_srat_entry (acpi_table_entry_header *srat);
@@ -367,15 +397,45 @@ void acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa
void acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma);
void acpi_numa_arch_fixup(void);
-#else /*!CONFIG_ACPI_BOOT*/
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+/* Arch dependent functions for cpu hotplug support */
+int acpi_map_lsapic(acpi_handle handle, int *pcpu);
+int acpi_unmap_lsapic(int cpu);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+extern int acpi_mp_config;
+
+extern u32 pci_mmcfg_base_addr;
+
+extern int sbf_port ;
+
+#else /*!CONFIG_ACPI_BOOT*/
+
+#define acpi_mp_config 0
static inline int acpi_boot_init(void)
{
return 0;
}
-#endif /*!CONFIG_ACPI_BOOT*/
+static inline int acpi_boot_table_init(void)
+{
+ return 0;
+}
+
+#endif /*!CONFIG_ACPI_BOOT*/
+unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low);
+int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+
+/*
+ * This function undoes the effect of one call to acpi_register_gsi().
+ * If this matches the last registration, any IRQ resources for gsi
+ * are freed.
+ */
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void acpi_unregister_gsi (u32 gsi);
+#endif
#ifdef CONFIG_ACPI_PCI
@@ -400,7 +460,11 @@ extern struct acpi_prt_list acpi_prt;
struct pci_dev;
int acpi_pci_irq_enable (struct pci_dev *dev);
-int acpi_pci_irq_init (void);
+void acpi_penalize_isa_irq(int irq);
+
+#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
+void acpi_pci_irq_disable (struct pci_dev *dev);
+#endif
struct acpi_pci_driver {
struct acpi_pci_driver *next;
@@ -415,14 +479,15 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver);
#ifdef CONFIG_ACPI_EC
-int ec_read(u8 addr, u8 *val);
-int ec_write(u8 addr, u8 val);
+extern int ec_read(u8 addr, u8 *val);
+extern int ec_write(u8 addr, u8 val);
#endif /*CONFIG_ACPI_EC*/
#ifdef CONFIG_ACPI_INTERPRETER
-int acpi_blacklisted(void);
+extern int acpi_blacklisted(void);
+extern void acpi_bios_year(char *s);
#else /*!CONFIG_ACPI_INTERPRETER*/
@@ -433,4 +498,41 @@ static inline int acpi_blacklisted(void)
#endif /*!CONFIG_ACPI_INTERPRETER*/
+#define ACPI_CSTATE_LIMIT_DEFINED /* for driver builds */
+#ifdef CONFIG_ACPI
+
+/*
+ * Set highest legal C-state
+ * 0: C0 okay, but not C1
+ * 1: C1 okay, but not C2
+ * 2: C2 okay, but not C3 etc.
+ */
+
+extern unsigned int max_cstate;
+
+static inline unsigned int acpi_get_cstate_limit(void)
+{
+ return max_cstate;
+}
+static inline void acpi_set_cstate_limit(unsigned int new_limit)
+{
+ max_cstate = new_limit;
+ return;
+}
+#else
+static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
+static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+int acpi_get_pxm(acpi_handle handle);
+#else
+static inline int acpi_get_pxm(acpi_handle handle)
+{
+ return 0;
+}
+#endif
+
+extern int pnpacpi_disabled;
+
#endif /*_LINUX_ACPI_H*/
diff --git a/xen/include/xen/bitmap.h b/xen/include/xen/bitmap.h
new file mode 100644
index 0000000000..3703384c3d
--- /dev/null
+++ b/xen/include/xen/bitmap.h
@@ -0,0 +1,250 @@
+#ifndef __XEN_BITMAP_H
+#define __XEN_BITMAP_H
+
+#ifndef __ASSEMBLY__
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/bitops.h>
+
+/*
+ * bitmaps provide bit arrays that consume one or more unsigned
+ * longs. The bitmap interface and available operations are listed
+ * here, in bitmap.h
+ *
+ * Function implementations generic to all architectures are in
+ * lib/bitmap.c. Functions implementations that are architecture
+ * specific are in various include/asm-<arch>/bitops.h headers
+ * and other arch/<arch> specific files.
+ *
+ * See lib/bitmap.c for more details.
+ */
+
+/*
+ * The available bitmap operations and their rough meaning in the
+ * case that the bitmap is a single unsigned long are thus:
+ *
+ * bitmap_zero(dst, nbits) *dst = 0UL
+ * bitmap_fill(dst, nbits) *dst = ~0UL
+ * bitmap_copy(dst, src, nbits) *dst = *src
+ * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits) *dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal?
+ * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap?
+ * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2?
+ * bitmap_empty(src, nbits) Are all bits zero in *src?
+ * bitmap_full(src, nbits) Are all bits set in *src?
+ * bitmap_weight(src, nbits) Hamming Weight: number set bits
+ * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
+ * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ */
+
+/*
+ * Also the following operations in asm/bitops.h apply to bitmaps.
+ *
+ * set_bit(bit, addr) *addr |= bit
+ * clear_bit(bit, addr) *addr &= ~bit
+ * change_bit(bit, addr) *addr ^= bit
+ * test_bit(bit, addr) Is bit set in *addr?
+ * test_and_set_bit(bit, addr) Set bit and return old value
+ * test_and_clear_bit(bit, addr) Clear bit and return old value
+ * test_and_change_bit(bit, addr) Change bit and return old value
+ * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
+ * find_first_bit(addr, nbits) Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
+ */
+
+/*
+ * The DECLARE_BITMAP(name,bits) macro, in xen/types.h, can be used
+ * to declare an array named 'name' of just enough unsigned longs to
+ * contain all bit positions from 0 to 'bits' - 1.
+ */
+
+/*
+ * lib/bitmap.c provides these functions:
+ */
+
+extern int __bitmap_empty(const unsigned long *bitmap, int bits);
+extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits);
+extern void __bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+extern void __bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_subset(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+
+extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = 0UL;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+
+static inline void bitmap_fill(unsigned long *dst, int nbits)
+{
+ size_t nlongs = BITS_TO_LONGS(nbits);
+ if (nlongs > 1) {
+ int len = (nlongs - 1) * sizeof(unsigned long);
+ memset(dst, 0xff, len);
+ }
+ dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memcpy(dst, src, len);
+ }
+}
+
+static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 & *src2;
+ else
+ __bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 | *src2;
+ else
+ __bitmap_or(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 ^ *src2;
+ else
+ __bitmap_xor(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 & ~(*src2);
+ else
+ __bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ else
+ __bitmap_complement(dst, src, nbits);
+}
+
+static inline int bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_equal(src1, src2, nbits);
+}
+
+static inline int bitmap_intersects(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ else
+ return __bitmap_intersects(src1, src2, nbits);
+}
+
+static inline int bitmap_subset(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_subset(src1, src2, nbits);
+}
+
+static inline int bitmap_empty(const unsigned long *src, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_empty(src, nbits);
+}
+
+static inline int bitmap_full(const unsigned long *src, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_full(src, nbits);
+}
+
+static inline int bitmap_weight(const unsigned long *src, int nbits)
+{
+ return __bitmap_weight(src, nbits);
+}
+
+static inline void bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int n, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src >> n;
+ else
+ __bitmap_shift_right(dst, src, n, nbits);
+}
+
+static inline void bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int n, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
+ else
+ __bitmap_shift_left(dst, src, n, nbits);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_BITMAP_H */
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
new file mode 100644
index 0000000000..e743c0059d
--- /dev/null
+++ b/xen/include/xen/bitops.h
@@ -0,0 +1,129 @@
+#ifndef _LINUX_BITOPS_H
+#define _LINUX_BITOPS_H
+#include <asm/types.h>
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+
+static inline int generic_ffs(int x)
+{
+ int r = 1;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff)) {
+ x >>= 16;
+ r += 16;
+ }
+ if (!(x & 0xff)) {
+ x >>= 8;
+ r += 8;
+ }
+ if (!(x & 0xf)) {
+ x >>= 4;
+ r += 4;
+ }
+ if (!(x & 3)) {
+ x >>= 2;
+ r += 2;
+ }
+ if (!(x & 1)) {
+ x >>= 1;
+ r += 1;
+ }
+ return r;
+}
+
+/*
+ * fls: find last bit set.
+ */
+
+static __inline__ int generic_fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+/*
+ * Include this here because some architectures need generic_ffs/fls in
+ * scope
+ */
+#include <asm/bitops.h>
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+
+static inline unsigned int generic_hweight32(unsigned int w)
+{
+ unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+ res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+ return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+
+static inline unsigned int generic_hweight16(unsigned int w)
+{
+ unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
+ return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+}
+
+static inline unsigned int generic_hweight8(unsigned int w)
+{
+ unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res & 0x0F) + ((res >> 4) & 0x0F);
+}
+
+static inline unsigned long generic_hweight64(__u64 w)
+{
+#if BITS_PER_LONG < 64
+ return generic_hweight32((unsigned int)(w >> 32)) +
+ generic_hweight32((unsigned int)w);
+#else
+ u64 res;
+ res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
+ res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
+ res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
+ return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+#endif
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
+}
+
+#endif
diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h
index 95101174ba..ca3650f80f 100644
--- a/xen/include/xen/config.h
+++ b/xen/include/xen/config.h
@@ -15,7 +15,7 @@
#define always_inline __inline__ __attribute__ ((always_inline))
-/* syslog levels ==> nothing! */
+/* Linux syslog levels. */
#define KERN_NOTICE ""
#define KERN_WARNING ""
#define KERN_DEBUG ""
@@ -25,6 +25,10 @@
#define KERN_EMERG ""
#define KERN_ALERT ""
+/* Linux 'checker' project. */
+#define __iomem
+#define __user
+
#ifdef VERBOSE
#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
__FILE__ , __LINE__ , ## _a )
diff --git a/xen/include/xen/console.h b/xen/include/xen/console.h
index abcb2fa1d8..12028cdc97 100644
--- a/xen/include/xen/console.h
+++ b/xen/include/xen/console.h
@@ -13,8 +13,7 @@ extern spinlock_t console_lock;
void set_printk_prefix(const char *prefix);
-#define CONSOLE_RING_CLEAR 1
-long read_console_ring(unsigned long, unsigned int, unsigned int);
+long read_console_ring(char **, u32 *, int);
void init_console(void);
void console_endboot(int disable_vga);
@@ -22,8 +21,7 @@ void console_endboot(int disable_vga);
void console_force_unlock(void);
void console_force_lock(void);
-void console_putc(char c);
-int console_getc(void);
-int irq_console_getc(void);
+void console_start_sync(void);
+void console_end_sync(void);
-#endif
+#endif /* __CONSOLE_H__ */
diff --git a/xen/include/xen/cpumask.h b/xen/include/xen/cpumask.h
new file mode 100644
index 0000000000..9ccafc7999
--- /dev/null
+++ b/xen/include/xen/cpumask.h
@@ -0,0 +1,378 @@
+#ifndef __XEN_CPUMASK_H
+#define __XEN_CPUMASK_H
+
+/*
+ * Cpumasks provide a bitmap suitable for representing the
+ * set of CPU's in a system, one bit position per CPU number.
+ *
+ * See detailed comments in the file xen/bitmap.h describing the
+ * data type on which these cpumasks are based.
+ *
+ * For details of cpumask_scnprintf() and cpumask_parse(),
+ * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ *
+ * The available cpumask operations are:
+ *
+ * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
+ * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask
+ * void cpus_setall(mask) set all bits
+ * void cpus_clear(mask) clear all bits
+ * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask
+ * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask
+ *
+ * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection]
+ * void cpus_or(dst, src1, src2) dst = src1 | src2 [union]
+ * void cpus_xor(dst, src1, src2) dst = src1 ^ src2
+ * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
+ * void cpus_complement(dst, src) dst = ~src
+ *
+ * int cpus_equal(mask1, mask2) Does mask1 == mask2?
+ * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
+ * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2?
+ * int cpus_empty(mask) Is mask empty (no bits sets)?
+ * int cpus_full(mask) Is mask full (all bits sets)?
+ * int cpus_weight(mask) Hamming weigh - number of set bits
+ *
+ * void cpus_shift_right(dst, src, n) Shift right
+ * void cpus_shift_left(dst, src, n) Shift left
+ *
+ * int first_cpu(mask) Number lowest set bit, or >= NR_CPUS
+ * int next_cpu(cpu, mask) Next cpu past 'cpu', or >= NR_CPUS
+ *
+ * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
+ * CPU_MASK_ALL Initializer - all bits set
+ * CPU_MASK_NONE Initializer - no bits set
+ * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
+ *
+ * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
+ * int cpumask_parse(ubuf, ulen, mask) Parse ascii string as cpumask
+ *
+ * for_each_cpu_mask(cpu, mask) for-loop cpu over mask
+ *
+ * int num_online_cpus() Number of online CPUs
+ * int num_possible_cpus() Number of all possible CPUs
+ * int num_present_cpus() Number of present CPUs
+ *
+ * int cpu_online(cpu) Is some cpu online?
+ * int cpu_possible(cpu) Is some cpu possible?
+ * int cpu_present(cpu) Is some cpu present (can schedule)?
+ *
+ * int any_online_cpu(mask) First online cpu in mask, or NR_CPUS
+ *
+ * for_each_cpu(cpu) for-loop cpu over cpu_possible_map
+ * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map
+ * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
+ * to generate slightly worse code. Note for example the additional
+ * 40 lines of assembly code compiling the "for each possible cpu"
+ * loops buried in the disk_stat_read() macros calls when compiling
+ * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple
+ * one-line #define for cpu_isset(), instead of wrapping an inline
+ * inside a macro, the way we do the other calls.
+ */
+
+#include <xen/config.h>
+#include <xen/bitmap.h>
+#include <xen/kernel.h>
+
+typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+extern cpumask_t _unused_cpumask_arg_;
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+ set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+ clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+ return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
+static inline void __cpus_complement(cpumask_t *dstp,
+ const cpumask_t *srcp, int nbits)
+{
+ bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
+static inline int __cpus_full(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_full(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_right(dst, src, n) \
+ __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_right(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
+static inline int __first_cpu(const cpumask_t *srcp, int nbits)
+{
+ return find_first_bit(srcp->bits, nbits);
+}
+
+#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
+static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
+{
+ return find_next_bit(srcp->bits, nbits, n+1);
+}
+
+#define cpumask_of_cpu(cpu) \
+({ \
+ typeof(_unused_cpumask_arg_) m; \
+ if (sizeof(m) == sizeof(unsigned long)) { \
+ m.bits[0] = 1UL<<(cpu); \
+ } else { \
+ cpus_clear(m); \
+ cpu_set((cpu), m); \
+ } \
+ m; \
+})
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#else
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#endif
+
+#define CPU_MASK_NONE \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
+} }
+
+#define CPU_MASK_CPU0 \
+(cpumask_t) { { \
+ [0] = 1UL \
+} }
+
+#define cpus_addr(src) ((src).bits)
+
+/*
+#define cpumask_scnprintf(buf, len, src) \
+ __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpumask_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
+{
+ return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#define cpumask_parse(ubuf, ulen, src) \
+ __cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
+static inline int __cpumask_parse(const char __user *buf, int len,
+ cpumask_t *dstp, int nbits)
+{
+ return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+*/
+
+#if NR_CPUS > 1
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = first_cpu(mask); \
+ (cpu) < NR_CPUS; \
+ (cpu) = next_cpu((cpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_cpu_mask(cpu, mask) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#endif /* NR_CPUS */
+
+/*
+ * The following particular system cpumasks and operations manage
+ * possible, present and online cpus. Each of them is a fixed size
+ * bitmap of size NR_CPUS.
+ *
+ * #ifdef CONFIG_HOTPLUG_CPU
+ * cpu_possible_map - all NR_CPUS bits set
+ * cpu_present_map - has bit 'cpu' set iff cpu is populated
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #else
+ * cpu_possible_map - has bit 'cpu' set iff cpu is populated
+ * cpu_present_map - copy of cpu_possible_map
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #endif
+ *
+ * In either case, NR_CPUS is fixed at compile time, as the static
+ * size of these bitmaps. The cpu_possible_map is fixed at boot
+ * time, as the set of CPU id's that it is possible might ever
+ * be plugged in at anytime during the life of that system boot.
+ * The cpu_present_map is dynamic(*), representing which CPUs
+ * are currently plugged in. And cpu_online_map is the dynamic
+ * subset of cpu_present_map, indicating those CPUs available
+ * for scheduling.
+ *
+ * If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ * all NR_CPUS bits set, otherwise it is just the set of CPUs that
+ * ACPI reports present at boot.
+ *
+ * If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ * depending on what ACPI reports as currently plugged in, otherwise
+ * cpu_present_map is just a copy of cpu_possible_map.
+ *
+ * (*) Well, cpu_present_map is dynamic in the hotplug case. If not
+ * hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *
+ * Subtleties:
+ * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
+ * assumption that their single CPU is online. The UP
+ * cpu_{online,possible,present}_maps are placebos. Changing them
+ * will have no useful affect on the following num_*_cpus()
+ * and cpu_*() macros in the UP case. This ugliness is a UP
+ * optimization - don't waste any instructions or memory references
+ * asking if you're online or how many CPUs there are if there is
+ * only one CPU.
+ * 2) Most SMP arch's #define some of these maps to be some
+ * other map specific to that arch. Therefore, the following
+ * must be #define macros, not inlines. To see why, examine
+ * the assembly code produced by the following. Note that
+ * set1() writes phys_x_map, but set2() writes x_map:
+ * int x_map, phys_x_map;
+ * #define set1(a) x_map = a
+ * inline void set2(int a) { x_map = a; }
+ * #define x_map phys_x_map
+ * main(){ set1(3); set2(5); }
+ */
+
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_present_map;
+
+#if NR_CPUS > 1
+#define num_online_cpus() cpus_weight(cpu_online_map)
+#define num_possible_cpus() cpus_weight(cpu_possible_map)
+#define num_present_cpus() cpus_weight(cpu_present_map)
+#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
+#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
+#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
+#else
+#define num_online_cpus() 1
+#define num_possible_cpus() 1
+#define num_present_cpus() 1
+#define cpu_online(cpu) ((cpu) == 0)
+#define cpu_possible(cpu) ((cpu) == 0)
+#define cpu_present(cpu) ((cpu) == 0)
+#endif
+
+#define any_online_cpu(mask) \
+({ \
+ int cpu; \
+ for_each_cpu_mask(cpu, (mask)) \
+ if (cpu_online(cpu)) \
+ break; \
+ min_t(int, NR_CPUS, cpu); \
+})
+
+#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map)
+#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
+
+#endif /* __XEN_CPUMASK_H */
diff --git a/xen/include/xen/dmi.h b/xen/include/xen/dmi.h
new file mode 100644
index 0000000000..ba42456c50
--- /dev/null
+++ b/xen/include/xen/dmi.h
@@ -0,0 +1,38 @@
+#ifndef __DMI_H__
+#define __DMI_H__
+
+enum dmi_field {
+ DMI_NONE,
+ DMI_BIOS_VENDOR,
+ DMI_BIOS_VERSION,
+ DMI_BIOS_DATE,
+ DMI_SYS_VENDOR,
+ DMI_PRODUCT_NAME,
+ DMI_PRODUCT_VERSION,
+ DMI_BOARD_VENDOR,
+ DMI_BOARD_NAME,
+ DMI_BOARD_VERSION,
+ DMI_STRING_MAX,
+};
+
+/*
+ * DMI callbacks for problem boards
+ */
+struct dmi_strmatch {
+ u8 slot;
+ char *substr;
+};
+
+struct dmi_system_id {
+ int (*callback)(struct dmi_system_id *);
+ char *ident;
+ struct dmi_strmatch matches[4];
+ void *driver_data;
+};
+
+#define DMI_MATCH(a,b) { a, b }
+
+extern int dmi_check_system(struct dmi_system_id *list);
+extern char * dmi_get_system_info(int field);
+
+#endif /* __DMI_H__ */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 94d1e01635..46c2e4bb5d 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -6,20 +6,20 @@
* Arch-specifics.
*/
-extern void domain_startofday(void);
+struct vcpu *arch_alloc_vcpu_struct(void);
-extern struct domain *arch_alloc_domain_struct(void);
+extern void arch_free_vcpu_struct(struct vcpu *v);
-extern void arch_free_domain_struct(struct domain *d);
+extern void arch_do_createdomain(struct vcpu *v);
-extern void arch_do_createdomain(struct domain *d);
+extern void arch_do_boot_vcpu(struct vcpu *v);
-extern int arch_final_setup_guestos(
- struct domain *d, full_execution_context_t *c);
+extern int arch_set_info_guest(
+ struct vcpu *d, struct vcpu_guest_context *c);
extern void free_perdomain_pt(struct domain *d);
-extern void domain_relinquish_memory(struct domain *d);
+extern void domain_relinquish_resources(struct domain *d);
extern void dump_pageframe_info(struct domain *d);
diff --git a/xen/include/xen/domain_page.h b/xen/include/xen/domain_page.h
new file mode 100644
index 0000000000..f20e3f28c3
--- /dev/null
+++ b/xen/include/xen/domain_page.h
@@ -0,0 +1,100 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain page frames into Xen space.
+ */
+
+#ifndef __XEN_DOMAIN_PAGE_H__
+#define __XEN_DOMAIN_PAGE_H__
+
+#include <xen/config.h>
+#include <xen/mm.h>
+
+#ifdef CONFIG_DOMAIN_PAGE
+
+/*
+ * Maps a given page frame, returning the mmap'ed virtual address. The page is
+ * now accessible until a corresponding call to unmap_domain_page().
+ */
+extern void *map_domain_page(unsigned long pfn);
+
+/*
+ * Pass a VA within a page previously mapped with map_domain_page().
+ * That page will then be removed from the mapping lists.
+ */
+extern void unmap_domain_page(void *va);
+
+#define DMCACHE_ENTRY_VALID 1U
+#define DMCACHE_ENTRY_HELD 2U
+
+struct domain_mmap_cache {
+ unsigned long pfn;
+ void *va;
+ unsigned int flags;
+};
+
+static inline void
+domain_mmap_cache_init(struct domain_mmap_cache *cache)
+{
+ ASSERT(cache != NULL);
+ cache->flags = 0;
+}
+
+static inline void *
+map_domain_page_with_cache(unsigned long pfn, struct domain_mmap_cache *cache)
+{
+ ASSERT(cache != NULL);
+ BUG_ON(cache->flags & DMCACHE_ENTRY_HELD);
+
+ if ( likely(cache->flags & DMCACHE_ENTRY_VALID) )
+ {
+ cache->flags |= DMCACHE_ENTRY_HELD;
+ if ( likely(pfn == cache->pfn) )
+ goto done;
+ unmap_domain_page(cache->va);
+ }
+
+ cache->pfn = pfn;
+ cache->va = map_domain_page(pfn);
+ cache->flags = DMCACHE_ENTRY_HELD | DMCACHE_ENTRY_VALID;
+
+ done:
+ return cache->va;
+}
+
+static inline void
+unmap_domain_page_with_cache(void *va, struct domain_mmap_cache *cache)
+{
+ ASSERT(cache != NULL);
+ cache->flags &= ~DMCACHE_ENTRY_HELD;
+}
+
+static inline void
+domain_mmap_cache_destroy(struct domain_mmap_cache *cache)
+{
+ ASSERT(cache != NULL);
+ BUG_ON(cache->flags & DMCACHE_ENTRY_HELD);
+
+ if ( likely(cache->flags & DMCACHE_ENTRY_VALID) )
+ {
+ unmap_domain_page(cache->va);
+ cache->flags = 0;
+ }
+}
+
+#else /* !CONFIG_DOMAIN_PAGE */
+
+#define map_domain_page(pfn) phys_to_virt((pfn)<<PAGE_SHIFT)
+#define unmap_domain_page(va) ((void)(va))
+
+struct domain_mmap_cache {
+};
+
+#define domain_mmap_cache_init(c) ((void)(c))
+#define map_domain_page_with_cache(pfn,c) (map_domain_page(pfn))
+#define unmap_domain_page_with_cache(va,c) ((void)(va))
+#define domain_mmap_cache_destroy(c) ((void)(c))
+
+#endif /* !CONFIG_DOMAIN_PAGE */
+
+#endif /* __XEN_DOMAIN_PAGE_H__ */
diff --git a/xen/include/xen/elf.h b/xen/include/xen/elf.h
index 3a62173b1e..1d2ac6919a 100644
--- a/xen/include/xen/elf.h
+++ b/xen/include/xen/elf.h
@@ -173,6 +173,7 @@ typedef struct {
#define EM_PARISC 15 /* HPPA */
#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */
#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
#define EM_ARM 40 /* Advanced RISC Machines ARM */
#define EM_ALPHA 41 /* DEC ALPHA */
#define EM_SPARCV9 43 /* SPARC version 9 */
@@ -525,8 +526,11 @@ typedef struct {
#endif
struct domain_setup_info;
-extern int loadelfimage(char *);
-extern int loadelfsymtab(char *, int, struct domain_setup_info *);
-extern int parseelfimage(char *, unsigned long, struct domain_setup_info *);
+extern int loadelfimage(struct domain_setup_info *);
+extern int parseelfimage(struct domain_setup_info *);
+
+#ifdef Elf_Ehdr
+extern int elf_sanity_check(Elf_Ehdr *ehdr);
+#endif
#endif /* __XEN_ELF_H__ */
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 1fcef0f03e..734427266b 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -11,6 +11,7 @@
#include <xen/config.h>
#include <xen/sched.h>
+#include <xen/smp.h>
#include <asm/bitops.h>
/*
@@ -20,43 +21,47 @@
* may require explicit memory barriers.
*/
-static inline void evtchn_set_pending(struct domain *d, int port)
+static inline void evtchn_set_pending(struct vcpu *v, int port)
{
+ struct domain *d = v->domain;
shared_info_t *s = d->shared_info;
int running;
/* These three operations must happen in strict order. */
if ( !test_and_set_bit(port, &s->evtchn_pending[0]) &&
!test_bit (port, &s->evtchn_mask[0]) &&
- !test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) )
{
/* The VCPU pending flag must be set /after/ update to evtchn-pend. */
- set_bit(0, &s->vcpu_data[0].evtchn_upcall_pending);
+ set_bit(0, &v->vcpu_info->evtchn_upcall_pending);
/*
- * NB1. 'flags' and 'processor' must be checked /after/ update of
+ * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
* pending flag. These values may fluctuate (after all, we hold no
* locks) but the key insight is that each change will cause
* evtchn_upcall_pending to be polled.
*
- * NB2. We save DF_RUNNING across the unblock to avoid a needless
+ * NB2. We save VCPUF_running across the unblock to avoid a needless
* IPI for domains that we IPI'd to unblock.
*/
- running = test_bit(DF_RUNNING, &d->flags);
- domain_unblock(d);
+ running = test_bit(_VCPUF_running, &v->vcpu_flags);
+ vcpu_unblock(v);
if ( running )
- smp_send_event_check_cpu(d->processor);
+ smp_send_event_check_cpu(v->processor);
}
}
/*
* send_guest_virq:
- * @d: Domain to which virtual IRQ should be sent
+ * @v: VCPU to which virtual IRQ should be sent
* @virq: Virtual IRQ number (VIRQ_*)
*/
-static inline void send_guest_virq(struct domain *d, int virq)
+static inline void send_guest_virq(struct vcpu *v, int virq)
{
- evtchn_set_pending(d, d->virq_to_evtchn[virq]);
+ int port = v->virq_to_evtchn[virq];
+
+ if ( likely(port != 0) )
+ evtchn_set_pending(v, port);
}
/*
@@ -64,13 +69,10 @@ static inline void send_guest_virq(struct domain *d, int virq)
* @d: Domain to which physical IRQ should be sent
* @pirq: Physical IRQ number
*/
-static inline void send_guest_pirq(struct domain *d, int pirq)
-{
- evtchn_set_pending(d, d->pirq_to_evtchn[pirq]);
-}
+extern void send_guest_pirq(struct domain *d, int pirq);
#define event_pending(_d) \
- ((_d)->shared_info->vcpu_data[0].evtchn_upcall_pending && \
- !(_d)->shared_info->vcpu_data[0].evtchn_upcall_mask)
+ ((_d)->vcpu_info->evtchn_upcall_pending && \
+ !(_d)->vcpu_info->evtchn_upcall_mask)
#endif /* __XEN_EVENT_H__ */
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index c161667cdf..c273eacada 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -51,7 +51,10 @@ typedef struct {
#define GNTPIN_devr_inc (1 << GNTPIN_devr_shift)
#define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift)
-#define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t))
+#define ORDER_GRANT_FRAMES 2
+#define NR_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES)
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+
/*
* Tracks a mapping of another domain's grant reference. Each domain has a
@@ -63,7 +66,6 @@ typedef struct {
} grant_mapping_t;
#define MAPTRACK_GNTMAP_MASK 7
#define MAPTRACK_REF_SHIFT 3
-#define NR_MAPTRACK_ENTRIES (PAGE_SIZE / sizeof(grant_mapping_t))
/* Per-domain grant information. */
typedef struct {
@@ -74,6 +76,9 @@ typedef struct {
/* Mapping tracking table. */
grant_mapping_t *maptrack;
unsigned int maptrack_head;
+ unsigned int maptrack_order;
+ unsigned int maptrack_limit;
+ unsigned int map_count;
/* Lock protecting updates to active and shared grant tables. */
spinlock_t lock;
} grant_table_t;
@@ -103,6 +108,10 @@ gnttab_prepare_for_transfer(
/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
void
gnttab_notify_transfer(
- struct domain *rd, grant_ref_t ref, unsigned long frame);
+ struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame);
+
+/* Pre-domain destruction release of granted device mappings of other domains.*/
+void
+gnttab_release_dev_mappings(grant_table_t *gt);
#endif /* __XEN_GRANT_H__ */
diff --git a/xen/include/xen/init.h b/xen/include/xen/init.h
index 9e1e62e48f..0709c9abd8 100644
--- a/xen/include/xen/init.h
+++ b/xen/include/xen/init.h
@@ -2,6 +2,7 @@
#define _LINUX_INIT_H
#include <xen/config.h>
+#include <asm/init.h>
/* These macros are used to mark some functions or
* initialized data (doesn't apply to uninitialized data)
@@ -58,13 +59,17 @@ extern initcall_t __initcall_start, __initcall_end;
*/
struct kernel_param {
const char *name;
- enum { OPT_STR, OPT_UINT, OPT_BOOL } type;
+ enum { OPT_STR, OPT_UINT, OPT_BOOL, OPT_CUSTOM } type;
void *var;
unsigned int len;
};
extern struct kernel_param __setup_start, __setup_end;
+#define custom_param(_name, _var) \
+ static char __setup_str_##_var[] __initdata = _name; \
+ static struct kernel_param __setup_##_var __attribute_used__ \
+ __initsetup = { __setup_str_##_var, OPT_CUSTOM, &_var, 0 }
#define boolean_param(_name, _var) \
static char __setup_str_##_var[] __initdata = _name; \
static struct kernel_param __setup_##_var __attribute_used__ \
@@ -83,30 +88,6 @@ extern struct kernel_param __setup_start, __setup_end;
#endif /* __ASSEMBLY__ */
-/*
- * Mark functions and data as being only used at initialization
- * or exit time.
- */
-#define __init \
- __attribute__ ((__section__ (".text.init")))
-#define __exit \
- __attribute_used__ __attribute__ ((__section__(".text.exit")))
-#define __initdata \
- __attribute__ ((__section__ (".data.init")))
-#define __exitdata \
- __attribute_used__ __attribute__ ((__section__ (".data.exit")))
-#define __initsetup \
- __attribute_used__ __attribute__ ((__section__ (".setup.init")))
-#define __init_call \
- __attribute_used__ __attribute__ ((__section__ (".initcall.init")))
-#define __exit_call \
- __attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
-
-/* For assembly routines */
-#define __INIT .section ".text.init","ax"
-#define __FINIT .previous
-#define __INITDATA .section ".data.init","aw"
-
#ifdef CONFIG_HOTPLUG
#define __devinit
#define __devinitdata
diff --git a/xen/include/xen/inttypes.h b/xen/include/xen/inttypes.h
new file mode 100644
index 0000000000..2163244326
--- /dev/null
+++ b/xen/include/xen/inttypes.h
@@ -0,0 +1,251 @@
+/* Copyright (C) 1997-2001, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * ISO C99: 7.8 Format conversion of integer types <inttypes.h>
+ */
+
+#ifndef _XEN_INTTYPES_H
+#define _XEN_INTTYPES_H 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+
+# if BITS_PER_LONG == 64
+# define __PRI64_PREFIX "l"
+# define __PRIPTR_PREFIX "l"
+# else
+# define __PRI64_PREFIX "ll"
+# define __PRIPTR_PREFIX
+# endif
+
+/* Macros for printing format specifiers. */
+
+/* Decimal notation. */
+# define PRId8 "d"
+# define PRId16 "d"
+# define PRId32 "d"
+# define PRId64 __PRI64_PREFIX "d"
+
+# define PRIdLEAST8 "d"
+# define PRIdLEAST16 "d"
+# define PRIdLEAST32 "d"
+# define PRIdLEAST64 __PRI64_PREFIX "d"
+
+# define PRIdFAST8 "d"
+# define PRIdFAST16 __PRIPTR_PREFIX "d"
+# define PRIdFAST32 __PRIPTR_PREFIX "d"
+# define PRIdFAST64 __PRI64_PREFIX "d"
+
+
+# define PRIi8 "i"
+# define PRIi16 "i"
+# define PRIi32 "i"
+# define PRIi64 __PRI64_PREFIX "i"
+
+# define PRIiLEAST8 "i"
+# define PRIiLEAST16 "i"
+# define PRIiLEAST32 "i"
+# define PRIiLEAST64 __PRI64_PREFIX "i"
+
+# define PRIiFAST8 "i"
+# define PRIiFAST16 __PRIPTR_PREFIX "i"
+# define PRIiFAST32 __PRIPTR_PREFIX "i"
+# define PRIiFAST64 __PRI64_PREFIX "i"
+
+/* Octal notation. */
+# define PRIo8 "o"
+# define PRIo16 "o"
+# define PRIo32 "o"
+# define PRIo64 __PRI64_PREFIX "o"
+
+# define PRIoLEAST8 "o"
+# define PRIoLEAST16 "o"
+# define PRIoLEAST32 "o"
+# define PRIoLEAST64 __PRI64_PREFIX "o"
+
+# define PRIoFAST8 "o"
+# define PRIoFAST16 __PRIPTR_PREFIX "o"
+# define PRIoFAST32 __PRIPTR_PREFIX "o"
+# define PRIoFAST64 __PRI64_PREFIX "o"
+
+/* Unsigned integers. */
+# define PRIu8 "u"
+# define PRIu16 "u"
+# define PRIu32 "u"
+# define PRIu64 __PRI64_PREFIX "u"
+
+# define PRIuLEAST8 "u"
+# define PRIuLEAST16 "u"
+# define PRIuLEAST32 "u"
+# define PRIuLEAST64 __PRI64_PREFIX "u"
+
+# define PRIuFAST8 "u"
+# define PRIuFAST16 __PRIPTR_PREFIX "u"
+# define PRIuFAST32 __PRIPTR_PREFIX "u"
+# define PRIuFAST64 __PRI64_PREFIX "u"
+
+/* lowercase hexadecimal notation. */
+# define PRIx8 "x"
+# define PRIx16 "x"
+# define PRIx32 "x"
+# define PRIx64 __PRI64_PREFIX "x"
+
+# define PRIxLEAST8 "x"
+# define PRIxLEAST16 "x"
+# define PRIxLEAST32 "x"
+# define PRIxLEAST64 __PRI64_PREFIX "x"
+
+# define PRIxFAST8 "x"
+# define PRIxFAST16 __PRIPTR_PREFIX "x"
+# define PRIxFAST32 __PRIPTR_PREFIX "x"
+# define PRIxFAST64 __PRI64_PREFIX "x"
+
+/* UPPERCASE hexadecimal notation. */
+# define PRIX8 "X"
+# define PRIX16 "X"
+# define PRIX32 "X"
+# define PRIX64 __PRI64_PREFIX "X"
+
+# define PRIXLEAST8 "X"
+# define PRIXLEAST16 "X"
+# define PRIXLEAST32 "X"
+# define PRIXLEAST64 __PRI64_PREFIX "X"
+
+# define PRIXFAST8 "X"
+# define PRIXFAST16 __PRIPTR_PREFIX "X"
+# define PRIXFAST32 __PRIPTR_PREFIX "X"
+# define PRIXFAST64 __PRI64_PREFIX "X"
+
+
+/* Macros for printing `intmax_t' and `uintmax_t'. */
+# define PRIdMAX __PRI64_PREFIX "d"
+# define PRIiMAX __PRI64_PREFIX "i"
+# define PRIoMAX __PRI64_PREFIX "o"
+# define PRIuMAX __PRI64_PREFIX "u"
+# define PRIxMAX __PRI64_PREFIX "x"
+# define PRIXMAX __PRI64_PREFIX "X"
+
+
+/* Macros for printing `intptr_t' and `uintptr_t'. */
+# define PRIdPTR __PRIPTR_PREFIX "d"
+# define PRIiPTR __PRIPTR_PREFIX "i"
+# define PRIoPTR __PRIPTR_PREFIX "o"
+# define PRIuPTR __PRIPTR_PREFIX "u"
+# define PRIxPTR __PRIPTR_PREFIX "x"
+# define PRIXPTR __PRIPTR_PREFIX "X"
+
+
+/* Macros for scanning format specifiers. */
+
+/* Signed decimal notation. */
+# define SCNd8 "hhd"
+# define SCNd16 "hd"
+# define SCNd32 "d"
+# define SCNd64 __PRI64_PREFIX "d"
+
+# define SCNdLEAST8 "hhd"
+# define SCNdLEAST16 "hd"
+# define SCNdLEAST32 "d"
+# define SCNdLEAST64 __PRI64_PREFIX "d"
+
+# define SCNdFAST8 "hhd"
+# define SCNdFAST16 __PRIPTR_PREFIX "d"
+# define SCNdFAST32 __PRIPTR_PREFIX "d"
+# define SCNdFAST64 __PRI64_PREFIX "d"
+
+/* Signed decimal notation. */
+# define SCNi8 "hhi"
+# define SCNi16 "hi"
+# define SCNi32 "i"
+# define SCNi64 __PRI64_PREFIX "i"
+
+# define SCNiLEAST8 "hhi"
+# define SCNiLEAST16 "hi"
+# define SCNiLEAST32 "i"
+# define SCNiLEAST64 __PRI64_PREFIX "i"
+
+# define SCNiFAST8 "hhi"
+# define SCNiFAST16 __PRIPTR_PREFIX "i"
+# define SCNiFAST32 __PRIPTR_PREFIX "i"
+# define SCNiFAST64 __PRI64_PREFIX "i"
+
+/* Unsigned decimal notation. */
+# define SCNu8 "hhu"
+# define SCNu16 "hu"
+# define SCNu32 "u"
+# define SCNu64 __PRI64_PREFIX "u"
+
+# define SCNuLEAST8 "hhu"
+# define SCNuLEAST16 "hu"
+# define SCNuLEAST32 "u"
+# define SCNuLEAST64 __PRI64_PREFIX "u"
+
+# define SCNuFAST8 "hhu"
+# define SCNuFAST16 __PRIPTR_PREFIX "u"
+# define SCNuFAST32 __PRIPTR_PREFIX "u"
+# define SCNuFAST64 __PRI64_PREFIX "u"
+
+/* Octal notation. */
+# define SCNo8 "hho"
+# define SCNo16 "ho"
+# define SCNo32 "o"
+# define SCNo64 __PRI64_PREFIX "o"
+
+# define SCNoLEAST8 "hho"
+# define SCNoLEAST16 "ho"
+# define SCNoLEAST32 "o"
+# define SCNoLEAST64 __PRI64_PREFIX "o"
+
+# define SCNoFAST8 "hho"
+# define SCNoFAST16 __PRIPTR_PREFIX "o"
+# define SCNoFAST32 __PRIPTR_PREFIX "o"
+# define SCNoFAST64 __PRI64_PREFIX "o"
+
+/* Hexadecimal notation. */
+# define SCNx8 "hhx"
+# define SCNx16 "hx"
+# define SCNx32 "x"
+# define SCNx64 __PRI64_PREFIX "x"
+
+# define SCNxLEAST8 "hhx"
+# define SCNxLEAST16 "hx"
+# define SCNxLEAST32 "x"
+# define SCNxLEAST64 __PRI64_PREFIX "x"
+
+# define SCNxFAST8 "hhx"
+# define SCNxFAST16 __PRIPTR_PREFIX "x"
+# define SCNxFAST32 __PRIPTR_PREFIX "x"
+# define SCNxFAST64 __PRI64_PREFIX "x"
+
+
+/* Macros for scanning `intmax_t' and `uintmax_t'. */
+# define SCNdMAX __PRI64_PREFIX "d"
+# define SCNiMAX __PRI64_PREFIX "i"
+# define SCNoMAX __PRI64_PREFIX "o"
+# define SCNuMAX __PRI64_PREFIX "u"
+# define SCNxMAX __PRI64_PREFIX "x"
+
+/* Macros for scaning `intptr_t' and `uintptr_t'. */
+# define SCNdPTR __PRIPTR_PREFIX "d"
+# define SCNiPTR __PRIPTR_PREFIX "i"
+# define SCNoPTR __PRIPTR_PREFIX "o"
+# define SCNuPTR __PRIPTR_PREFIX "u"
+# define SCNxPTR __PRIPTR_PREFIX "x"
+
+#endif /* _XEN_INTTYPES_H */
diff --git a/xen/include/xen/ioport.h b/xen/include/xen/ioport.h
deleted file mode 100644
index b39d07ae32..0000000000
--- a/xen/include/xen/ioport.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * ioport.h Definitions of routines for detecting, reserving and
- * allocating system resources.
- *
- * Authors: Linus Torvalds
- */
-
-#ifndef _LINUX_IOPORT_H
-#define _LINUX_IOPORT_H
-
-/*
- * Resources are tree-like, allowing
- * nesting etc..
- */
-struct resource {
- const char *name;
- unsigned long start, end;
- unsigned long flags;
- struct resource *parent, *sibling, *child;
-};
-
-struct resource_list {
- struct resource_list *next;
- struct resource *res;
- struct pci_dev *dev;
-};
-
-/*
- * IO resources have these defined flags.
- */
-#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */
-
-#define IORESOURCE_IO 0x00000100 /* Resource type */
-#define IORESOURCE_MEM 0x00000200
-#define IORESOURCE_IRQ 0x00000400
-#define IORESOURCE_DMA 0x00000800
-
-#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
-#define IORESOURCE_READONLY 0x00002000
-#define IORESOURCE_CACHEABLE 0x00004000
-#define IORESOURCE_RANGELENGTH 0x00008000
-#define IORESOURCE_SHADOWABLE 0x00010000
-#define IORESOURCE_BUS_HAS_VGA 0x00080000
-
-#define IORESOURCE_UNSET 0x20000000
-#define IORESOURCE_AUTO 0x40000000
-#define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */
-
-/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */
-#define IORESOURCE_IRQ_HIGHEDGE (1<<0)
-#define IORESOURCE_IRQ_LOWEDGE (1<<1)
-#define IORESOURCE_IRQ_HIGHLEVEL (1<<2)
-#define IORESOURCE_IRQ_LOWLEVEL (1<<3)
-
-/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
-#define IORESOURCE_DMA_TYPE_MASK (3<<0)
-#define IORESOURCE_DMA_8BIT (0<<0)
-#define IORESOURCE_DMA_8AND16BIT (1<<0)
-#define IORESOURCE_DMA_16BIT (2<<0)
-
-#define IORESOURCE_DMA_MASTER (1<<2)
-#define IORESOURCE_DMA_BYTE (1<<3)
-#define IORESOURCE_DMA_WORD (1<<4)
-
-#define IORESOURCE_DMA_SPEED_MASK (3<<6)
-#define IORESOURCE_DMA_COMPATIBLE (0<<6)
-#define IORESOURCE_DMA_TYPEA (1<<6)
-#define IORESOURCE_DMA_TYPEB (2<<6)
-#define IORESOURCE_DMA_TYPEF (3<<6)
-
-/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */
-#define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */
-#define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */
-#define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */
-#define IORESOURCE_MEM_TYPE_MASK (3<<3)
-#define IORESOURCE_MEM_8BIT (0<<3)
-#define IORESOURCE_MEM_16BIT (1<<3)
-#define IORESOURCE_MEM_8AND16BIT (2<<3)
-#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */
-#define IORESOURCE_MEM_EXPANSIONROM (1<<6)
-
-/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
-extern struct resource ioport_resource;
-extern struct resource iomem_resource;
-
-extern int get_resource_list(struct resource *, char *buf, int size);
-
-extern int check_resource(struct resource *root, unsigned long, unsigned long);
-extern int request_resource(struct resource *root, struct resource *new);
-extern int release_resource(struct resource *new);
-extern int allocate_resource(struct resource *root, struct resource *new,
- unsigned long size,
- unsigned long min, unsigned long max,
- unsigned long align,
- void (*alignf)(void *, struct resource *,
- unsigned long, unsigned long),
- void *alignf_data);
-
-/* Convenience shorthand with allocation */
-#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name))
-#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
-
-#define release_region(start,n) __release_region(&ioport_resource, (start), (n))
-#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n))
-
-extern void __release_region(struct resource *, unsigned long, unsigned long);
-
-extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name);
-
-#define get_ioport_list(buf) get_resource_list(&ioport_resource, buf, PAGE_SIZE)
-#define get_mem_list(buf) get_resource_list(&iomem_resource, buf, PAGE_SIZE)
-
-#define HAVE_AUTOIRQ
-extern void autoirq_setup(int waittime);
-extern int autoirq_report(int waittime);
-
-#endif /* _LINUX_IOPORT_H */
diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
index 42e6d266a4..7b1f826198 100644
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -2,13 +2,14 @@
#define __XEN_IRQ_H__
#include <xen/config.h>
+#include <xen/cpumask.h>
#include <xen/spinlock.h>
#include <asm/regs.h>
#include <asm/hardirq.h>
struct irqaction
{
- void (*handler)(int, void *, struct xen_regs *);
+ void (*handler)(int, void *, struct cpu_user_regs *);
const char *name;
void *dev_id;
};
@@ -35,7 +36,7 @@ struct hw_interrupt_type {
void (*disable)(unsigned int irq);
void (*ack)(unsigned int irq);
void (*end)(unsigned int irq);
- void (*set_affinity)(unsigned int irq, unsigned long mask);
+ void (*set_affinity)(unsigned int irq, cpumask_t mask);
};
typedef struct hw_interrupt_type hw_irq_controller;
@@ -63,12 +64,12 @@ extern int setup_irq(unsigned int, struct irqaction *);
extern void free_irq(unsigned int);
extern hw_irq_controller no_irq_type;
-extern void no_action(int cpl, void *dev_id, struct xen_regs *regs);
+extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
struct domain;
+struct vcpu;
extern int pirq_guest_unmask(struct domain *p);
-extern int pirq_guest_bind(struct domain *p, int irq, int will_share);
+extern int pirq_guest_bind(struct vcpu *p, int irq, int will_share);
extern int pirq_guest_unbind(struct domain *p, int irq);
-extern int pirq_guest_bindable(int irq, int will_share);
#endif /* __XEN_IRQ_H__ */
diff --git a/xen/include/xen/irq_cpustat.h b/xen/include/xen/irq_cpustat.h
index 9d09ad3fb8..6465c8a0fe 100644
--- a/xen/include/xen/irq_cpustat.h
+++ b/xen/include/xen/irq_cpustat.h
@@ -20,11 +20,7 @@
extern irq_cpustat_t irq_stat[];
-#ifdef CONFIG_SMP
#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
-#else
-#define __IRQ_STAT(cpu, member) ((void)(cpu), irq_stat[0].member)
-#endif
/* arch independent irq_stat fields */
#define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending)
diff --git a/xen/include/xen/kernel.h b/xen/include/xen/kernel.h
index 993a6c19cf..53a7251838 100644
--- a/xen/include/xen/kernel.h
+++ b/xen/include/xen/kernel.h
@@ -33,5 +33,29 @@
#define max_t(type,x,y) \
({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ *
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+
#endif /* _LINUX_KERNEL_H */
diff --git a/xen/include/xen/keyhandler.h b/xen/include/xen/keyhandler.h
index 3c7c36ef66..112ab3475b 100644
--- a/xen/include/xen/keyhandler.h
+++ b/xen/include/xen/keyhandler.h
@@ -10,8 +10,6 @@
#ifndef __XEN_KEYHANDLER_H__
#define __XEN_KEYHANDLER_H__
-struct xen_regs;
-
/*
* Register a callback function for key @key. The callback occurs in
* softirq context with no locks held and interrupts enabled.
@@ -25,11 +23,11 @@ extern void register_keyhandler(
* synchronously in hard-IRQ context with interrupts disabled. The @regs
* callback parameter points at the interrupted register context.
*/
-typedef void irq_keyhandler_t(unsigned char key, struct xen_regs *regs);
+typedef void irq_keyhandler_t(unsigned char key, struct cpu_user_regs *regs);
extern void register_irq_keyhandler(
unsigned char key, irq_keyhandler_t *handler, char *desc);
/* Inject a keypress into the key-handling subsystem. */
-extern void handle_keypress(unsigned char key, struct xen_regs *regs);
+extern void handle_keypress(unsigned char key, struct cpu_user_regs *regs);
#endif /* __XEN_KEYHANDLER_H__ */
diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
index 6ce764b7fc..2c77b43056 100644
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -1,12 +1,23 @@
#ifndef __LIB_H__
#define __LIB_H__
+#include <xen/inttypes.h>
#include <stdarg.h>
+#include <xen/config.h>
#include <xen/types.h>
+#include <xen/xmalloc.h>
#include <xen/string.h>
+#define BUG() do { \
+ debugtrace_dump(); \
+ printk("BUG at %s:%d\n", __FILE__, __LINE__); \
+ FORCE_CRASH(); \
+} while ( 0 )
+
+#define BUG_ON(_p) do { if (_p) BUG(); } while ( 0 )
+
#ifndef NDEBUG
-#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); *(int*)0=0; }
+#define ASSERT(_p) { if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); BUG(); } }
#else
#define ASSERT(_p) ((void)0)
#endif
@@ -14,27 +25,48 @@
#define SWAP(_a, _b) \
do { typeof(_a) _t = (_a); (_a) = (_b); (_b) = _t; } while ( 0 )
+#define DIV_ROUND(x, y) (((x) + (y) / 2) / (y))
+
#define reserve_bootmem(_p,_l) ((void)0)
struct domain;
void cmdline_parse(char *cmdline);
-#define printk printf
-void printf(const char *format, ...);
-void panic(const char *format, ...);
-long vm_assist(struct domain *, unsigned int, unsigned int);
+#ifndef NDEBUG
+extern int debugtrace_send_to_console;
+extern void debugtrace_dump(void);
+extern void debugtrace_printk(const char *fmt, ...);
+#else
+#define debugtrace_dump() ((void)0)
+#define debugtrace_printk(_f, ...) ((void)0)
+#endif
+
+/* Allows us to use '%p' as general-purpose machine-word format char. */
+#define _p(_x) ((void *)(unsigned long)(_x))
+#define printk(_f , _a...) printf( _f , ## _a )
+extern void printf(const char *format, ...)
+ __attribute__ ((format (printf, 1, 2)));
+extern void panic(const char *format, ...);
+extern long vm_assist(struct domain *, unsigned int, unsigned int);
/* vsprintf.c */
extern int sprintf(char * buf, const char * fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
+ __attribute__ ((format (printf, 2, 3)));
extern int vsprintf(char *buf, const char *, va_list);
extern int snprintf(char * buf, size_t size, const char * fmt, ...)
- __attribute__ ((format (printf, 3, 4)));
+ __attribute__ ((format (printf, 3, 4)));
extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
-long simple_strtol(const char *cp,char **endp,unsigned int base);
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base);
-long long simple_strtoll(const char *cp,char **endp,unsigned int base);
+long simple_strtol(
+ const char *cp,char **endp, unsigned int base);
+unsigned long simple_strtoul(
+ const char *cp,char **endp, unsigned int base);
+long long simple_strtoll(
+ const char *cp,char **endp, unsigned int base);
+unsigned long long simple_strtoull(
+ const char *cp,char **endp, unsigned int base);
+
+unsigned long long parse_size_and_unit(char *s);
#endif /* __LIB_H__ */
diff --git a/xen/include/xen/list.h b/xen/include/xen/list.h
index 7b19bb4650..93d9f987e7 100644
--- a/xen/include/xen/list.h
+++ b/xen/include/xen/list.h
@@ -174,5 +174,17 @@ static __inline__ void list_splice(struct list_head *list, struct list_head *hea
pos = list_entry(pos->member.next, typeof(*pos), member), \
prefetch(pos->member.next))
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop counter.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
#endif /* _LINUX_LIST_H */
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 807987045f..1919b5e9e7 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -3,6 +3,7 @@
#define __XEN_MM_H__
#include <xen/config.h>
+#include <xen/types.h>
#include <xen/list.h>
#include <xen/spinlock.h>
@@ -10,9 +11,9 @@ struct domain;
struct pfn_info;
/* Boot-time allocator. Turns into generic allocator after bootstrap. */
-unsigned long init_boot_allocator(unsigned long bitmap_start);
-void init_boot_pages(unsigned long ps, unsigned long pe);
-unsigned long alloc_boot_pages(unsigned long size, unsigned long align);
+physaddr_t init_boot_allocator(physaddr_t bitmap_start);
+void init_boot_pages(physaddr_t ps, physaddr_t pe);
+unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align);
void end_boot_allocator(void);
/* Generic allocator. These functions are *not* interrupt-safe. */
@@ -24,19 +25,19 @@ void free_heap_pages(
void scrub_heap_pages(void);
/* Xen suballocator. These functions are interrupt-safe. */
-void init_xenheap_pages(unsigned long ps, unsigned long pe);
-unsigned long alloc_xenheap_pages(unsigned int order);
-void free_xenheap_pages(unsigned long p, unsigned int order);
+void init_xenheap_pages(physaddr_t ps, physaddr_t pe);
+void *alloc_xenheap_pages(unsigned int order);
+void free_xenheap_pages(void *v, unsigned int order);
#define alloc_xenheap_page() (alloc_xenheap_pages(0))
-#define free_xenheap_page(_p) (free_xenheap_pages(_p,0))
+#define free_xenheap_page(v) (free_xenheap_pages(v,0))
/* Domain suballocator. These functions are *not* interrupt-safe.*/
-void init_domheap_pages(unsigned long ps, unsigned long pe);
+void init_domheap_pages(physaddr_t ps, physaddr_t pe);
struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order);
void free_domheap_pages(struct pfn_info *pg, unsigned int order);
unsigned long avail_domheap_pages(void);
-#define alloc_domheap_page(_d) (alloc_domheap_pages(_d,0))
-#define free_domheap_page(_p) (free_domheap_pages(_p,0))
+#define alloc_domheap_page(d) (alloc_domheap_pages(d,0))
+#define free_domheap_page(p) (free_domheap_pages(p,0))
/* Automatic page scrubbing for dead domains. */
extern struct list_head page_scrub_list;
@@ -48,4 +49,8 @@ extern struct list_head page_scrub_list;
#include <asm/mm.h>
+#ifndef sync_pagetable_state
+#define sync_pagetable_state(d) ((void)0)
+#endif
+
#endif /* __XEN_MM_H__ */
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
deleted file mode 100644
index 5f8740a31c..0000000000
--- a/xen/include/xen/pci.h
+++ /dev/null
@@ -1,838 +0,0 @@
-/*
- * $Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $
- *
- * PCI defines and function prototypes
- * Copyright 1994, Drew Eckhardt
- * Copyright 1997--1999 Martin Mares <mj@ucw.cz>
- *
- * For more information, please consult the following manuals (look at
- * http://www.pcisig.com/ for how to get them):
- *
- * PCI BIOS Specification
- * PCI Local Bus Specification
- * PCI to PCI Bridge Specification
- * PCI System Design Guide
- */
-
-#ifndef LINUX_PCI_H
-#define LINUX_PCI_H
-
-/*
- * Under PCI, each device has 256 bytes of configuration address space,
- * of which the first 64 bytes are standardized as follows:
- */
-#define PCI_VENDOR_ID 0x00 /* 16 bits */
-#define PCI_DEVICE_ID 0x02 /* 16 bits */
-#define PCI_COMMAND 0x04 /* 16 bits */
-#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */
-#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
-#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */
-#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */
-#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */
-#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */
-#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */
-#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */
-#define PCI_COMMAND_SERR 0x100 /* Enable SERR */
-#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */
-
-#define PCI_STATUS 0x06 /* 16 bits */
-#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
-#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */
-#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */
-#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */
-#define PCI_STATUS_PARITY 0x100 /* Detected parity error */
-#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */
-#define PCI_STATUS_DEVSEL_FAST 0x000
-#define PCI_STATUS_DEVSEL_MEDIUM 0x200
-#define PCI_STATUS_DEVSEL_SLOW 0x400
-#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
-#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
-#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
-#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
-#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
-
-#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8
- revision */
-#define PCI_REVISION_ID 0x08 /* Revision ID */
-#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */
-#define PCI_CLASS_DEVICE 0x0a /* Device class */
-
-#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
-#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
-#define PCI_HEADER_TYPE 0x0e /* 8 bits */
-#define PCI_HEADER_TYPE_NORMAL 0
-#define PCI_HEADER_TYPE_BRIDGE 1
-#define PCI_HEADER_TYPE_CARDBUS 2
-
-#define PCI_BIST 0x0f /* 8 bits */
-#define PCI_BIST_CODE_MASK 0x0f /* Return result */
-#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
-#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
-
-/*
- * Base addresses specify locations in memory or I/O space.
- * Decoded size can be determined by writing a value of
- * 0xffffffff to the register, and reading it back. Only
- * 1 bits are decoded.
- */
-#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */
-#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */
-#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */
-#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */
-#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */
-#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */
-#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */
-#define PCI_BASE_ADDRESS_SPACE_IO 0x01
-#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
-#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
-#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */
-#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */
-#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */
-#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */
-#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL)
-#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL)
-/* bit 1 is reserved if address_space = 1 */
-
-/* Header type 0 (normal devices) */
-#define PCI_CARDBUS_CIS 0x28
-#define PCI_SUBSYSTEM_VENDOR_ID 0x2c
-#define PCI_SUBSYSTEM_ID 0x2e
-#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
-#define PCI_ROM_ADDRESS_ENABLE 0x01
-#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
-
-#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
-
-/* 0x35-0x3b are reserved */
-#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
-#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
-#define PCI_MIN_GNT 0x3e /* 8 bits */
-#define PCI_MAX_LAT 0x3f /* 8 bits */
-
-/* Header type 1 (PCI-to-PCI bridges) */
-#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */
-#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */
-#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */
-#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */
-#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */
-#define PCI_IO_LIMIT 0x1d
-#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */
-#define PCI_IO_RANGE_TYPE_16 0x00
-#define PCI_IO_RANGE_TYPE_32 0x01
-#define PCI_IO_RANGE_MASK (~0x0fUL)
-#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */
-#define PCI_MEMORY_BASE 0x20 /* Memory range behind */
-#define PCI_MEMORY_LIMIT 0x22
-#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
-#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
-#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */
-#define PCI_PREF_MEMORY_LIMIT 0x26
-#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
-#define PCI_PREF_RANGE_TYPE_32 0x00
-#define PCI_PREF_RANGE_TYPE_64 0x01
-#define PCI_PREF_RANGE_MASK (~0x0fUL)
-#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */
-#define PCI_PREF_LIMIT_UPPER32 0x2c
-#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */
-#define PCI_IO_LIMIT_UPPER16 0x32
-/* 0x34 same as for htype 0 */
-/* 0x35-0x3b is reserved */
-#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */
-/* 0x3c-0x3d are same as for htype 0 */
-#define PCI_BRIDGE_CONTROL 0x3e
-#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
-#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
-#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
-#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
-#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
-#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
-#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
-
-/* Header type 2 (CardBus bridges) */
-#define PCI_CB_CAPABILITY_LIST 0x14
-/* 0x15 reserved */
-#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */
-#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */
-#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */
-#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */
-#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */
-#define PCI_CB_MEMORY_BASE_0 0x1c
-#define PCI_CB_MEMORY_LIMIT_0 0x20
-#define PCI_CB_MEMORY_BASE_1 0x24
-#define PCI_CB_MEMORY_LIMIT_1 0x28
-#define PCI_CB_IO_BASE_0 0x2c
-#define PCI_CB_IO_BASE_0_HI 0x2e
-#define PCI_CB_IO_LIMIT_0 0x30
-#define PCI_CB_IO_LIMIT_0_HI 0x32
-#define PCI_CB_IO_BASE_1 0x34
-#define PCI_CB_IO_BASE_1_HI 0x36
-#define PCI_CB_IO_LIMIT_1 0x38
-#define PCI_CB_IO_LIMIT_1_HI 0x3a
-#define PCI_CB_IO_RANGE_MASK (~0x03UL)
-/* 0x3c-0x3d are same as for htype 0 */
-#define PCI_CB_BRIDGE_CONTROL 0x3e
-#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */
-#define PCI_CB_BRIDGE_CTL_SERR 0x02
-#define PCI_CB_BRIDGE_CTL_ISA 0x04
-#define PCI_CB_BRIDGE_CTL_VGA 0x08
-#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20
-#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */
-#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */
-#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */
-#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
-#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400
-#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
-#define PCI_CB_SUBSYSTEM_ID 0x42
-#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */
-/* 0x48-0x7f reserved */
-
-/* Capability lists */
-
-#define PCI_CAP_LIST_ID 0 /* Capability ID */
-#define PCI_CAP_ID_PM 0x01 /* Power Management */
-#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */
-#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
-#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */
-#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */
-#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
-#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
-#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
-#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
-#define PCI_CAP_SIZEOF 4
-
-/* Power Management Registers */
-
-#define PCI_PM_PMC 2 /* PM Capabilities Register */
-#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */
-#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */
-#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */
-#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */
-#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */
-#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
-#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
-#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */
-#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */
-#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
-#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
-#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
-#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
-#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
-#define PCI_PM_CTRL 4 /* PM control and status register */
-#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
-#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
-#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
-#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
-#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
-#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */
-#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */
-#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */
-#define PCI_PM_DATA_REGISTER 7 /* (??) */
-#define PCI_PM_SIZEOF 8
-
-/* AGP registers */
-
-#define PCI_AGP_VERSION 2 /* BCD version number */
-#define PCI_AGP_RFU 3 /* Rest of capability flags */
-#define PCI_AGP_STATUS 4 /* Status register */
-#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */
-#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */
-#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */
-#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */
-#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */
-#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */
-#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */
-#define PCI_AGP_COMMAND 8 /* Control register */
-#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */
-#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */
-#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */
-#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */
-#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */
-#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */
-#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */
-#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */
-#define PCI_AGP_SIZEOF 12
-
-/* Slot Identification */
-
-#define PCI_SID_ESR 2 /* Expansion Slot Register */
-#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */
-#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
-#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */
-
-/* Message Signalled Interrupts registers */
-
-#define PCI_MSI_FLAGS 2 /* Various flags */
-#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */
-#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */
-#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */
-#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */
-#define PCI_MSI_RFU 3 /* Rest of capability flags */
-#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */
-#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
-#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */
-#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */
-
-/* CompactPCI Hotswap Register */
-
-#define PCI_CHSWP_CSR 2 /* Control and Status Register */
-#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */
-#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */
-#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */
-#define PCI_CHSWP_LOO 0x08 /* LED On / Off */
-#define PCI_CHSWP_PI 0x30 /* Programming Interface */
-#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */
-#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */
-
-/* PCI-X registers */
-
-#define PCI_X_CMD 2 /* Modes & Features */
-#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */
-#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */
-#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */
-#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */
-#define PCI_X_DEVFN 4 /* A copy of devfn. */
-#define PCI_X_BUSNR 5 /* Bus segment number */
-#define PCI_X_STATUS 6 /* PCI-X capabilities */
-#define PCI_X_STATUS_64BIT 0x0001 /* 64-bit device */
-#define PCI_X_STATUS_133MHZ 0x0002 /* 133 MHz capable */
-#define PCI_X_STATUS_SPL_DISC 0x0004 /* Split Completion Discarded */
-#define PCI_X_STATUS_UNX_SPL 0x0008 /* Unexpected Split Completion */
-#define PCI_X_STATUS_COMPLEX 0x0010 /* Device Complexity */
-#define PCI_X_STATUS_MAX_READ 0x0060 /* Designed Maximum Memory Read Count */
-#define PCI_X_STATUS_MAX_SPLIT 0x0380 /* Design Max Outstanding Split Trans */
-#define PCI_X_STATUS_MAX_CUM 0x1c00 /* Designed Max Cumulative Read Size */
-#define PCI_X_STATUS_SPL_ERR 0x2000 /* Rcvd Split Completion Error Msg */
-
-/* Include the ID list */
-
-#include <xen/pci_ids.h>
-
-/*
- * The PCI interface treats multi-function devices as independent
- * devices. The slot/function address of each device is encoded
- * in a single byte as follows:
- *
- * 7:3 = slot
- * 2:0 = function
- */
-#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
-#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
-#define PCI_FUNC(devfn) ((devfn) & 0x07)
-
-/* Ioctls for /proc/bus/pci/X/Y nodes. */
-#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8)
-#define PCIIOC_CONTROLLER (PCIIOC_BASE | 0x00) /* Get controller for PCI device. */
-#define PCIIOC_MMAP_IS_IO (PCIIOC_BASE | 0x01) /* Set mmap state to I/O space. */
-#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */
-#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */
-
-#include <xen/types.h>
-#include <xen/config.h>
-#include <xen/ioport.h>
-#include <xen/list.h>
-#include <xen/errno.h>
-
-/* File state for mmap()s on /proc/bus/pci/X/Y */
-enum pci_mmap_state {
- pci_mmap_io,
- pci_mmap_mem
-};
-
-/* This defines the direction arg to the DMA mapping routines. */
-#define PCI_DMA_BIDIRECTIONAL 0
-#define PCI_DMA_TODEVICE 1
-#define PCI_DMA_FROMDEVICE 2
-#define PCI_DMA_NONE 3
-
-#define DEVICE_COUNT_COMPATIBLE 4
-#define DEVICE_COUNT_IRQ 2
-#define DEVICE_COUNT_DMA 2
-#define DEVICE_COUNT_RESOURCE 12
-
-#define PCI_ANY_ID (~0)
-
-#define pci_present pcibios_present
-
-
-#define pci_for_each_dev_reverse(dev) \
- for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev))
-
-#define pci_for_each_bus(bus) \
- list_for_each_entry(bus, &pci_root_buses, node)
-
-/*
- * The pci_dev structure is used to describe both PCI and ISAPnP devices.
- */
-struct pci_dev {
- struct list_head global_list; /* node in list of all PCI devices */
- struct list_head bus_list; /* node in per-bus list */
- struct pci_bus *bus; /* bus this device is on */
- struct pci_bus *subordinate; /* bus this device bridges to */
-
- void *sysdata; /* hook for sys-specific extension */
- struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
-
- unsigned int devfn; /* encoded device & function index */
- unsigned short vendor;
- unsigned short device;
- unsigned short subsystem_vendor;
- unsigned short subsystem_device;
- unsigned int class; /* 3 bytes: (base,sub,prog-if) */
- u8 hdr_type; /* PCI header type (`multi' flag masked out) */
- u8 rom_base_reg; /* which config register controls the ROM */
-
- struct pci_driver *driver; /* which driver has allocated this device */
- void *driver_data; /* data private to the driver */
- u64 dma_mask; /* Mask of the bits of bus address this
- device implements. Normally this is
- 0xffffffff. You only need to change
- this if your device has broken DMA
- or supports 64-bit transfers. */
-
- u32 current_state; /* Current operating state. In ACPI-speak,
- this is D0-D3, D0 being fully functional,
- and D3 being off. */
-
-#ifdef LINUX_2_6
- struct device dev; /* Generic device interface */
-#endif
-
- /* device is compatible with these IDs */
- unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
- unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
-
- /*
- * Instead of touching interrupt line and base address registers
- * directly, use the values stored here. They might be different!
- */
- unsigned int irq;
- struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
- struct resource dma_resource[DEVICE_COUNT_DMA];
- struct resource irq_resource[DEVICE_COUNT_IRQ];
-
- char name[90]; /* device name */
- char slot_name[8]; /* slot name */
- int active; /* ISAPnP: device is active */
- int ro; /* ISAPnP: read only */
- unsigned short regs; /* ISAPnP: supported registers */
-
- /* These fields are used by common fixups */
- unsigned short transparent:1; /* Transparent PCI bridge */
-
- int (*prepare)(struct pci_dev *dev); /* ISAPnP hooks */
- int (*activate)(struct pci_dev *dev);
- int (*deactivate)(struct pci_dev *dev);
-};
-
-#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
-#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list)
-
-/*
- * For PCI devices, the region numbers are assigned this way:
- *
- * 0-5 standard PCI regions
- * 6 expansion ROM
- * 7-10 bridges: address space assigned to buses behind the bridge
- */
-
-#define PCI_ROM_RESOURCE 6
-#define PCI_BRIDGE_RESOURCES 7
-#define PCI_NUM_RESOURCES 11
-
-#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */
-
-struct pci_bus {
- struct list_head node; /* node in list of buses */
- struct pci_bus *parent; /* parent bus this bridge is on */
- struct list_head children; /* list of child buses */
- struct list_head devices; /* list of devices on this bus */
- struct pci_dev *self; /* bridge device as seen by parent */
- struct resource *resource[4]; /* address space routed to this bus */
-
- struct pci_ops *ops; /* configuration access functions */
- void *sysdata; /* hook for sys-specific extension */
- struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */
-
- unsigned char number; /* bus number */
- unsigned char primary; /* number of primary bridge */
- unsigned char secondary; /* number of secondary bridge */
- unsigned char subordinate; /* max number of subordinate buses */
-
- char name[48];
- unsigned short vendor;
- unsigned short device;
- unsigned int serial; /* serial number */
- unsigned char pnpver; /* Plug & Play version */
- unsigned char productver; /* product version */
- unsigned char checksum; /* if zero - checksum passed */
- unsigned char pad1;
-};
-
-#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
-
-extern struct list_head pci_root_buses; /* list of all known PCI buses */
-extern struct list_head pci_devices; /* list of all devices */
-
-extern struct proc_dir_entry *proc_bus_pci_dir;
-/*
- * Error values that may be returned by PCI functions.
- */
-#define PCIBIOS_SUCCESSFUL 0x00
-#define PCIBIOS_FUNC_NOT_SUPPORTED 0x81
-#define PCIBIOS_BAD_VENDOR_ID 0x83
-#define PCIBIOS_DEVICE_NOT_FOUND 0x86
-#define PCIBIOS_BAD_REGISTER_NUMBER 0x87
-#define PCIBIOS_SET_FAILED 0x88
-#define PCIBIOS_BUFFER_TOO_SMALL 0x89
-
-/* Low-level architecture-dependent routines */
-
-struct pci_ops {
- int (*read_byte)(struct pci_dev *, int where, u8 *val);
- int (*read_word)(struct pci_dev *, int where, u16 *val);
- int (*read_dword)(struct pci_dev *, int where, u32 *val);
- int (*write_byte)(struct pci_dev *, int where, u8 val);
- int (*write_word)(struct pci_dev *, int where, u16 val);
- int (*write_dword)(struct pci_dev *, int where, u32 val);
-};
-
-struct pbus_set_ranges_data
-{
- unsigned long io_start, io_end;
- unsigned long mem_start, mem_end;
- unsigned long prefetch_start, prefetch_end;
-};
-
-struct pci_device_id {
- unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */
- unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
- unsigned int class, class_mask; /* (class,subclass,prog-if) triplet */
- unsigned long driver_data; /* Data private to the driver */
-};
-
-struct pci_driver {
- struct list_head node;
- char *name;
- const struct pci_device_id *id_table; /* NULL if wants all devices */
- int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
- void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
- int (*save_state) (struct pci_dev *dev, u32 state); /* Save Device Context */
- int (*suspend) (struct pci_dev *dev, u32 state); /* Device suspended */
- int (*resume) (struct pci_dev *dev); /* Device woken up */
- int (*enable_wake) (struct pci_dev *dev, u32 state, int enable); /* Enable wake event */
-};
-
-/**
- * PCI_DEVICE - macro used to describe a specific pci device
- * @vend: the 16 bit PCI Vendor ID
- * @dev: the 16 bit PCI Device ID
- *
- * This macro is used to create a struct pci_device_id that matches a
- * specific device. The subvendor and subdevice fields will be set to
- * PCI_ANY_ID.
- */
-#define PCI_DEVICE(vend,dev) \
- .vendor = (vend), .device = (dev), \
- .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
-
-/**
- * PCI_DEVICE_CLASS - macro used to describe a specific pci device class
- * @dev_class: the class, subclass, prog-if triple for this device
- * @dev_class_mask: the class mask for this device
- *
- * This macro is used to create a struct pci_device_id that matches a
- * specific PCI class. The vendor, device, subvendor, and subdevice
- * fields will be set to PCI_ANY_ID.
- */
-#define PCI_DEVICE_CLASS(dev_class,dev_class_mask) \
- .class = (dev_class), .class_mask = (dev_class_mask), \
- .vendor = PCI_ANY_ID, .device = PCI_ANY_ID, \
- .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
-
-/* these external functions are only available when PCI support is enabled */
-#ifdef CONFIG_PCI
-
-#define pci_for_each_dev(dev) \
- for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next))
-
-void pcibios_init(void);
-void pcibios_fixup_bus(struct pci_bus *);
-int pcibios_enable_device(struct pci_dev *, int mask);
-char *pcibios_setup (char *str);
-
-/* Used only when drivers/pci/setup.c is used */
-void pcibios_align_resource(void *, struct resource *,
- unsigned long, unsigned long);
-void pcibios_update_resource(struct pci_dev *, struct resource *,
- struct resource *, int);
-void pcibios_update_irq(struct pci_dev *, int irq);
-void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *);
-
-/* Backward compatibility, don't use in new code! */
-
-int pcibios_present(void);
-int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned char *val);
-int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned short *val);
-int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned int *val);
-int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned char val);
-int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned short val);
-int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn,
- unsigned char where, unsigned int val);
-int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn);
-int pcibios_find_device (unsigned short vendor, unsigned short dev_id,
- unsigned short index, unsigned char *bus,
- unsigned char *dev_fn);
-
-/* Generic PCI functions used internally */
-
-void pci_init(void);
-int pci_bus_exists(const struct list_head *list, int nr);
-struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
-struct pci_bus *pci_alloc_primary_bus(int bus);
-struct pci_dev *pci_scan_slot(struct pci_dev *temp);
-int pci_proc_attach_device(struct pci_dev *dev);
-int pci_proc_detach_device(struct pci_dev *dev);
-int pci_proc_attach_bus(struct pci_bus *bus);
-int pci_proc_detach_bus(struct pci_bus *bus);
-void pci_name_device(struct pci_dev *dev);
-char *pci_class_name(u32 class);
-void pci_read_bridge_bases(struct pci_bus *child);
-struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
-int pci_setup_device(struct pci_dev *dev);
-int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
-
-/* Generic PCI functions exported to card drivers */
-
-struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from);
-struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device,
- unsigned int ss_vendor, unsigned int ss_device,
- const struct pci_dev *from);
-struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from);
-struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
-int pci_find_capability (struct pci_dev *dev, int cap);
-
-int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
-int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
-int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
-int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
-int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
-int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
-
-int pci_enable_device(struct pci_dev *dev);
-int pci_enable_device_bars(struct pci_dev *dev, int mask);
-void pci_disable_device(struct pci_dev *dev);
-void pci_set_master(struct pci_dev *dev);
-#define HAVE_PCI_SET_MWI
-int pci_set_mwi(struct pci_dev *dev);
-void pci_clear_mwi(struct pci_dev *dev);
-int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
-int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask);
-int pci_assign_resource(struct pci_dev *dev, int i);
-
-/* Power management related routines */
-int pci_save_state(struct pci_dev *dev, u32 *buffer);
-int pci_restore_state(struct pci_dev *dev, u32 *buffer);
-int pci_set_power_state(struct pci_dev *dev, int state);
-int pci_enable_wake(struct pci_dev *dev, u32 state, int enable);
-
-/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
-
-int pci_claim_resource(struct pci_dev *, int);
-void pci_assign_unassigned_resources(void);
-void pdev_enable_device(struct pci_dev *);
-void pdev_sort_resources(struct pci_dev *, struct resource_list *);
-unsigned long pci_bridge_check_io(struct pci_dev *);
-void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
- int (*)(struct pci_dev *, u8, u8));
-#define HAVE_PCI_REQ_REGIONS 2
-int pci_request_regions(struct pci_dev *, char *);
-void pci_release_regions(struct pci_dev *);
-int pci_request_region(struct pci_dev *, int, char *);
-void pci_release_region(struct pci_dev *, int);
-
-/* New-style probing supporting hot-pluggable devices */
-int pci_register_driver(struct pci_driver *);
-void pci_unregister_driver(struct pci_driver *);
-void pci_insert_device(struct pci_dev *, struct pci_bus *);
-void pci_remove_device(struct pci_dev *);
-struct pci_driver *pci_dev_driver(const struct pci_dev *);
-const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev);
-void pci_announce_device_to_drivers(struct pci_dev *);
-unsigned int pci_do_scan_bus(struct pci_bus *bus);
-struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
-
-#if 0
-/* xmem_cache style wrapper around pci_alloc_consistent() */
-struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev,
- size_t size, size_t align, size_t allocation, int flags);
-void pci_pool_destroy (struct pci_pool *pool);
-
-void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle);
-void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr);
-#endif
-
-#endif /* CONFIG_PCI */
-
-/* Include architecture-dependent settings and functions */
-
-#include <asm/pci.h>
-
-/*
- * If the system does not have PCI, clearly these return errors. Define
- * these as simple inline functions to avoid hair in drivers.
- */
-
-#ifndef CONFIG_PCI
-static inline int pcibios_present(void) { return 0; }
-static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn)
-{ return PCIBIOS_DEVICE_NOT_FOUND; }
-
-#define _PCI_NOP(o,s,t) \
- static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \
- { return PCIBIOS_FUNC_NOT_SUPPORTED; } \
- static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \
- { return PCIBIOS_FUNC_NOT_SUPPORTED; }
-#define _PCI_NOP_ALL(o,x) _PCI_NOP(o,byte,u8 x) \
- _PCI_NOP(o,word,u16 x) \
- _PCI_NOP(o,dword,u32 x)
-_PCI_NOP_ALL(read, *)
-_PCI_NOP_ALL(write,)
-
-static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
-{ return NULL; }
-
-static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from)
-{ return NULL; }
-
-static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
-{ return NULL; }
-
-static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device,
-unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from)
-{ return NULL; }
-
-static inline void pci_set_master(struct pci_dev *dev) { }
-static inline int pci_enable_device_bars(struct pci_dev *dev, int mask) { return -EBUSY; }
-static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
-static inline void pci_disable_device(struct pci_dev *dev) { }
-static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; }
-static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
-static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
-static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;}
-static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
-static inline void pci_unregister_driver(struct pci_driver *drv) { }
-static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; }
-static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; }
-static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; }
-
-/* Power management related routines */
-static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; }
-static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; }
-static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; }
-static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; }
-
-#define pci_for_each_dev(dev) \
- for(dev = NULL; 0; )
-
-#else
-
-/*
- * a helper function which helps ensure correct pci_driver
- * setup and cleanup for commonly-encountered hotplug/modular cases
- *
- * This MUST stay in a header, as it checks for -DMODULE
- */
-static inline int pci_module_init(struct pci_driver *drv)
-{
- int rc = pci_register_driver (drv);
-
- if (rc > 0)
- return 0;
-
- /* iff CONFIG_HOTPLUG and built into kernel, we should
- * leave the driver around for future hotplug events.
- * For the module case, a hotplug daemon of some sort
- * should load a module in response to an insert event. */
-#if defined(CONFIG_HOTPLUG) && !defined(MODULE)
- if (rc == 0)
- return 0;
-#else
- if (rc == 0)
- rc = -ENODEV;
-#endif
-
- /* if we get here, we need to clean up pci driver instance
- * and return some sort of error */
- pci_unregister_driver (drv);
-
- return rc;
-}
-
-#endif /* !CONFIG_PCI */
-
-/* these helpers provide future and backwards compatibility
- * for accessing popular PCI BAR info */
-#define pci_resource_start(dev,bar) ((dev)->resource[(bar)].start)
-#define pci_resource_end(dev,bar) ((dev)->resource[(bar)].end)
-#define pci_resource_flags(dev,bar) ((dev)->resource[(bar)].flags)
-#define pci_resource_len(dev,bar) \
- ((pci_resource_start((dev),(bar)) == 0 && \
- pci_resource_end((dev),(bar)) == \
- pci_resource_start((dev),(bar))) ? 0 : \
- \
- (pci_resource_end((dev),(bar)) - \
- pci_resource_start((dev),(bar)) + 1))
-
-/* Similar to the helpers above, these manipulate per-pci_dev
- * driver-specific data. Currently stored as pci_dev::driver_data,
- * a void pointer, but it is not present on older kernels.
- */
-static inline void *pci_get_drvdata (struct pci_dev *pdev)
-{
- return pdev->driver_data;
-}
-
-static inline void pci_set_drvdata (struct pci_dev *pdev, void *data)
-{
- pdev->driver_data = data;
-}
-
-static inline char *pci_name(struct pci_dev *pdev)
-{
- return pdev->slot_name;
-}
-
-/*
- * The world is not perfect and supplies us with broken PCI devices.
- * For at least a part of these bugs we need a work-around, so both
- * generic (drivers/pci/quirks.c) and per-architecture code can define
- * fixup hooks to be called for particular buggy devices.
- */
-
-struct pci_fixup {
- int pass;
- u16 vendor, device; /* You can use PCI_ANY_ID here of course */
- void (*hook)(struct pci_dev *dev);
-};
-
-extern struct pci_fixup pcibios_fixups[];
-
-#define PCI_FIXUP_HEADER 1 /* Called immediately after reading configuration header */
-#define PCI_FIXUP_FINAL 2 /* Final phase of device fixups */
-
-void pci_fixup_device(int pass, struct pci_dev *dev);
-
-extern int pci_pci_problems;
-#define PCIPCI_FAIL 1
-#define PCIPCI_TRITON 2
-#define PCIPCI_NATOMA 4
-#define PCIPCI_VIAETBF 8
-#define PCIPCI_VSFX 16
-#define PCIPCI_ALIMAGIK 32
-
-#endif /* LINUX_PCI_H */
diff --git a/xen/include/xen/pci_ids.h b/xen/include/xen/pci_ids.h
deleted file mode 100644
index f91e7d9677..0000000000
--- a/xen/include/xen/pci_ids.h
+++ /dev/null
@@ -1,2017 +0,0 @@
-/*
- * PCI Class, Vendor and Device IDs
- *
- * Please keep sorted.
- */
-
-/* Device classes and subclasses */
-
-#define PCI_CLASS_NOT_DEFINED 0x0000
-#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
-
-#define PCI_BASE_CLASS_STORAGE 0x01
-#define PCI_CLASS_STORAGE_SCSI 0x0100
-#define PCI_CLASS_STORAGE_IDE 0x0101
-#define PCI_CLASS_STORAGE_FLOPPY 0x0102
-#define PCI_CLASS_STORAGE_IPI 0x0103
-#define PCI_CLASS_STORAGE_RAID 0x0104
-#define PCI_CLASS_STORAGE_OTHER 0x0180
-
-#define PCI_BASE_CLASS_NETWORK 0x02
-#define PCI_CLASS_NETWORK_ETHERNET 0x0200
-#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
-#define PCI_CLASS_NETWORK_FDDI 0x0202
-#define PCI_CLASS_NETWORK_ATM 0x0203
-#define PCI_CLASS_NETWORK_OTHER 0x0280
-
-#define PCI_BASE_CLASS_DISPLAY 0x03
-#define PCI_CLASS_DISPLAY_VGA 0x0300
-#define PCI_CLASS_DISPLAY_XGA 0x0301
-#define PCI_CLASS_DISPLAY_3D 0x0302
-#define PCI_CLASS_DISPLAY_OTHER 0x0380
-
-#define PCI_BASE_CLASS_MULTIMEDIA 0x04
-#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400
-#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401
-#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402
-#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480
-
-#define PCI_BASE_CLASS_MEMORY 0x05
-#define PCI_CLASS_MEMORY_RAM 0x0500
-#define PCI_CLASS_MEMORY_FLASH 0x0501
-#define PCI_CLASS_MEMORY_OTHER 0x0580
-
-#define PCI_BASE_CLASS_BRIDGE 0x06
-#define PCI_CLASS_BRIDGE_HOST 0x0600
-#define PCI_CLASS_BRIDGE_ISA 0x0601
-#define PCI_CLASS_BRIDGE_EISA 0x0602
-#define PCI_CLASS_BRIDGE_MC 0x0603
-#define PCI_CLASS_BRIDGE_PCI 0x0604
-#define PCI_CLASS_BRIDGE_PCMCIA 0x0605
-#define PCI_CLASS_BRIDGE_NUBUS 0x0606
-#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
-#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
-#define PCI_CLASS_BRIDGE_OTHER 0x0680
-
-#define PCI_BASE_CLASS_COMMUNICATION 0x07
-#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
-#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
-#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
-#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
-#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
-
-#define PCI_BASE_CLASS_SYSTEM 0x08
-#define PCI_CLASS_SYSTEM_PIC 0x0800
-#define PCI_CLASS_SYSTEM_DMA 0x0801
-#define PCI_CLASS_SYSTEM_TIMER 0x0802
-#define PCI_CLASS_SYSTEM_RTC 0x0803
-#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
-#define PCI_CLASS_SYSTEM_OTHER 0x0880
-
-#define PCI_BASE_CLASS_INPUT 0x09
-#define PCI_CLASS_INPUT_KEYBOARD 0x0900
-#define PCI_CLASS_INPUT_PEN 0x0901
-#define PCI_CLASS_INPUT_MOUSE 0x0902
-#define PCI_CLASS_INPUT_SCANNER 0x0903
-#define PCI_CLASS_INPUT_GAMEPORT 0x0904
-#define PCI_CLASS_INPUT_OTHER 0x0980
-
-#define PCI_BASE_CLASS_DOCKING 0x0a
-#define PCI_CLASS_DOCKING_GENERIC 0x0a00
-#define PCI_CLASS_DOCKING_OTHER 0x0a80
-
-#define PCI_BASE_CLASS_PROCESSOR 0x0b
-#define PCI_CLASS_PROCESSOR_386 0x0b00
-#define PCI_CLASS_PROCESSOR_486 0x0b01
-#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02
-#define PCI_CLASS_PROCESSOR_ALPHA 0x0b10
-#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20
-#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
-#define PCI_CLASS_PROCESSOR_CO 0x0b40
-
-#define PCI_BASE_CLASS_SERIAL 0x0c
-#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00
-#define PCI_CLASS_SERIAL_ACCESS 0x0c01
-#define PCI_CLASS_SERIAL_SSA 0x0c02
-#define PCI_CLASS_SERIAL_USB 0x0c03
-#define PCI_CLASS_SERIAL_FIBER 0x0c04
-#define PCI_CLASS_SERIAL_SMBUS 0x0c05
-
-#define PCI_BASE_CLASS_INTELLIGENT 0x0e
-#define PCI_CLASS_INTELLIGENT_I2O 0x0e00
-
-#define PCI_BASE_CLASS_SATELLITE 0x0f
-#define PCI_CLASS_SATELLITE_TV 0x0f00
-#define PCI_CLASS_SATELLITE_AUDIO 0x0f01
-#define PCI_CLASS_SATELLITE_VOICE 0x0f03
-#define PCI_CLASS_SATELLITE_DATA 0x0f04
-
-#define PCI_BASE_CLASS_CRYPT 0x10
-#define PCI_CLASS_CRYPT_NETWORK 0x1000
-#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001
-#define PCI_CLASS_CRYPT_OTHER 0x1080
-
-#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
-#define PCI_CLASS_SP_DPIO 0x1100
-#define PCI_CLASS_SP_OTHER 0x1180
-
-#define PCI_CLASS_OTHERS 0xff
-
-/* Vendors and devices. Sort key: vendor first, device next. */
-
-#define PCI_VENDOR_ID_DYNALINK 0x0675
-#define PCI_DEVICE_ID_DYNALINK_IS64PH 0x1702
-
-#define PCI_VENDOR_ID_BERKOM 0x0871
-#define PCI_DEVICE_ID_BERKOM_A1T 0xffa1
-#define PCI_DEVICE_ID_BERKOM_T_CONCEPT 0xffa2
-#define PCI_DEVICE_ID_BERKOM_A4T 0xffa4
-#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8
-
-#define PCI_VENDOR_ID_COMPAQ 0x0e11
-#define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508
-#define PCI_DEVICE_ID_COMPAQ_1280 0x3033
-#define PCI_DEVICE_ID_COMPAQ_TRIFLEX 0x4000
-#define PCI_DEVICE_ID_COMPAQ_6010 0x6010
-#define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc
-#define PCI_DEVICE_ID_COMPAQ_SMART2P 0xae10
-#define PCI_DEVICE_ID_COMPAQ_NETEL100 0xae32
-#define PCI_DEVICE_ID_COMPAQ_TRIFLEX_IDE 0xae33
-#define PCI_DEVICE_ID_COMPAQ_NETEL10 0xae34
-#define PCI_DEVICE_ID_COMPAQ_NETFLEX3I 0xae35
-#define PCI_DEVICE_ID_COMPAQ_NETEL100D 0xae40
-#define PCI_DEVICE_ID_COMPAQ_NETEL100PI 0xae43
-#define PCI_DEVICE_ID_COMPAQ_NETEL100I 0xb011
-#define PCI_DEVICE_ID_COMPAQ_CISS 0xb060
-#define PCI_DEVICE_ID_COMPAQ_CISSB 0xb178
-#define PCI_DEVICE_ID_COMPAQ_CISSC 0x0046
-#define PCI_DEVICE_ID_COMPAQ_THUNDER 0xf130
-#define PCI_DEVICE_ID_COMPAQ_NETFLEX3B 0xf150
-
-#define PCI_VENDOR_ID_NCR 0x1000
-#define PCI_VENDOR_ID_LSI_LOGIC 0x1000
-#define PCI_DEVICE_ID_NCR_53C810 0x0001
-#define PCI_DEVICE_ID_NCR_53C820 0x0002
-#define PCI_DEVICE_ID_NCR_53C825 0x0003
-#define PCI_DEVICE_ID_NCR_53C815 0x0004
-#define PCI_DEVICE_ID_LSI_53C810AP 0x0005
-#define PCI_DEVICE_ID_NCR_53C860 0x0006
-#define PCI_DEVICE_ID_LSI_53C1510 0x000a
-#define PCI_DEVICE_ID_NCR_53C896 0x000b
-#define PCI_DEVICE_ID_NCR_53C895 0x000c
-#define PCI_DEVICE_ID_NCR_53C885 0x000d
-#define PCI_DEVICE_ID_NCR_53C875 0x000f
-#define PCI_DEVICE_ID_NCR_53C1510 0x0010
-#define PCI_DEVICE_ID_LSI_53C895A 0x0012
-#define PCI_DEVICE_ID_LSI_53C875A 0x0013
-#define PCI_DEVICE_ID_LSI_53C1010_33 0x0020
-#define PCI_DEVICE_ID_LSI_53C1010_66 0x0021
-#define PCI_DEVICE_ID_LSI_53C1030 0x0030
-#define PCI_DEVICE_ID_LSI_53C1035 0x0040
-#define PCI_DEVICE_ID_NCR_53C875J 0x008f
-#define PCI_DEVICE_ID_LSI_FC909 0x0621
-#define PCI_DEVICE_ID_LSI_FC929 0x0622
-#define PCI_DEVICE_ID_LSI_FC929_LAN 0x0623
-#define PCI_DEVICE_ID_LSI_FC919 0x0624
-#define PCI_DEVICE_ID_LSI_FC919_LAN 0x0625
-#define PCI_DEVICE_ID_LSI_FC929X 0x0626
-#define PCI_DEVICE_ID_LSI_FC919X 0x0628
-#define PCI_DEVICE_ID_NCR_YELLOWFIN 0x0701
-#define PCI_DEVICE_ID_LSI_61C102 0x0901
-#define PCI_DEVICE_ID_LSI_63C815 0x1000
-
-#define PCI_VENDOR_ID_ATI 0x1002
-/* Mach64 */
-#define PCI_DEVICE_ID_ATI_68800 0x4158
-#define PCI_DEVICE_ID_ATI_215CT222 0x4354
-#define PCI_DEVICE_ID_ATI_210888CX 0x4358
-#define PCI_DEVICE_ID_ATI_215ET222 0x4554
-/* Mach64 / Rage */
-#define PCI_DEVICE_ID_ATI_215GB 0x4742
-#define PCI_DEVICE_ID_ATI_215GD 0x4744
-#define PCI_DEVICE_ID_ATI_215GI 0x4749
-#define PCI_DEVICE_ID_ATI_215GP 0x4750
-#define PCI_DEVICE_ID_ATI_215GQ 0x4751
-#define PCI_DEVICE_ID_ATI_215XL 0x4752
-#define PCI_DEVICE_ID_ATI_215GT 0x4754
-#define PCI_DEVICE_ID_ATI_215GTB 0x4755
-#define PCI_DEVICE_ID_ATI_215_IV 0x4756
-#define PCI_DEVICE_ID_ATI_215_IW 0x4757
-#define PCI_DEVICE_ID_ATI_215_IZ 0x475A
-#define PCI_DEVICE_ID_ATI_210888GX 0x4758
-#define PCI_DEVICE_ID_ATI_215_LB 0x4c42
-#define PCI_DEVICE_ID_ATI_215_LD 0x4c44
-#define PCI_DEVICE_ID_ATI_215_LG 0x4c47
-#define PCI_DEVICE_ID_ATI_215_LI 0x4c49
-#define PCI_DEVICE_ID_ATI_215_LM 0x4c4D
-#define PCI_DEVICE_ID_ATI_215_LN 0x4c4E
-#define PCI_DEVICE_ID_ATI_215_LR 0x4c52
-#define PCI_DEVICE_ID_ATI_215_LS 0x4c53
-#define PCI_DEVICE_ID_ATI_264_LT 0x4c54
-/* Mach64 VT */
-#define PCI_DEVICE_ID_ATI_264VT 0x5654
-#define PCI_DEVICE_ID_ATI_264VU 0x5655
-#define PCI_DEVICE_ID_ATI_264VV 0x5656
-/* Rage128 Pro GL */
-#define PCI_DEVICE_ID_ATI_Rage128_PA 0x5041
-#define PCI_DEVICE_ID_ATI_Rage128_PB 0x5042
-#define PCI_DEVICE_ID_ATI_Rage128_PC 0x5043
-#define PCI_DEVICE_ID_ATI_Rage128_PD 0x5044
-#define PCI_DEVICE_ID_ATI_Rage128_PE 0x5045
-#define PCI_DEVICE_ID_ATI_RAGE128_PF 0x5046
-/* Rage128 Pro VR */
-#define PCI_DEVICE_ID_ATI_RAGE128_PG 0x5047
-#define PCI_DEVICE_ID_ATI_RAGE128_PH 0x5048
-#define PCI_DEVICE_ID_ATI_RAGE128_PI 0x5049
-#define PCI_DEVICE_ID_ATI_RAGE128_PJ 0x504A
-#define PCI_DEVICE_ID_ATI_RAGE128_PK 0x504B
-#define PCI_DEVICE_ID_ATI_RAGE128_PL 0x504C
-#define PCI_DEVICE_ID_ATI_RAGE128_PM 0x504D
-#define PCI_DEVICE_ID_ATI_RAGE128_PN 0x504E
-#define PCI_DEVICE_ID_ATI_RAGE128_PO 0x504F
-#define PCI_DEVICE_ID_ATI_RAGE128_PP 0x5050
-#define PCI_DEVICE_ID_ATI_RAGE128_PQ 0x5051
-#define PCI_DEVICE_ID_ATI_RAGE128_PR 0x5052
-#define PCI_DEVICE_ID_ATI_RAGE128_TR 0x5452
-#define PCI_DEVICE_ID_ATI_RAGE128_PS 0x5053
-#define PCI_DEVICE_ID_ATI_RAGE128_PT 0x5054
-#define PCI_DEVICE_ID_ATI_RAGE128_PU 0x5055
-#define PCI_DEVICE_ID_ATI_RAGE128_PV 0x5056
-#define PCI_DEVICE_ID_ATI_RAGE128_PW 0x5057
-#define PCI_DEVICE_ID_ATI_RAGE128_PX 0x5058
-/* Rage128 GL */
-#define PCI_DEVICE_ID_ATI_RAGE128_RE 0x5245
-#define PCI_DEVICE_ID_ATI_RAGE128_RF 0x5246
-#define PCI_DEVICE_ID_ATI_RAGE128_RG 0x534b
-#define PCI_DEVICE_ID_ATI_RAGE128_RH 0x534c
-#define PCI_DEVICE_ID_ATI_RAGE128_RI 0x534d
-/* Rage128 VR */
-#define PCI_DEVICE_ID_ATI_RAGE128_RK 0x524b
-#define PCI_DEVICE_ID_ATI_RAGE128_RL 0x524c
-#define PCI_DEVICE_ID_ATI_RAGE128_RM 0x5345
-#define PCI_DEVICE_ID_ATI_RAGE128_RN 0x5346
-#define PCI_DEVICE_ID_ATI_RAGE128_RO 0x5347
-/* Rage128 M3 */
-#define PCI_DEVICE_ID_ATI_RAGE128_LE 0x4c45
-#define PCI_DEVICE_ID_ATI_RAGE128_LF 0x4c46
-/* Rage128 Pro Ultra */
-#define PCI_DEVICE_ID_ATI_RAGE128_U1 0x5446
-#define PCI_DEVICE_ID_ATI_RAGE128_U2 0x544C
-#define PCI_DEVICE_ID_ATI_RAGE128_U3 0x5452
-/* Rage M4 */
-#define PCI_DEVICE_ID_ATI_RADEON_LE 0x4d45
-#define PCI_DEVICE_ID_ATI_RADEON_LF 0x4d46
-/* Radeon R100 */
-#define PCI_DEVICE_ID_ATI_RADEON_QD 0x5144
-#define PCI_DEVICE_ID_ATI_RADEON_QE 0x5145
-#define PCI_DEVICE_ID_ATI_RADEON_QF 0x5146
-#define PCI_DEVICE_ID_ATI_RADEON_QG 0x5147
-/* Radeon RV100 (VE) */
-#define PCI_DEVICE_ID_ATI_RADEON_QY 0x5159
-#define PCI_DEVICE_ID_ATI_RADEON_QZ 0x515a
-/* Radeon R200 (8500) */
-#define PCI_DEVICE_ID_ATI_RADEON_QL 0x514c
-#define PCI_DEVICE_ID_ATI_RADEON_QN 0x514e
-#define PCI_DEVICE_ID_ATI_RADEON_QO 0x514f
-#define PCI_DEVICE_ID_ATI_RADEON_Ql 0x516c
-#define PCI_DEVICE_ID_ATI_RADEON_BB 0x4242
-/* Radeon R200 (9100) */
-#define PCI_DEVICE_ID_ATI_RADEON_QM 0x514d
-/* Radeon RV200 (7500) */
-#define PCI_DEVICE_ID_ATI_RADEON_QW 0x5157
-#define PCI_DEVICE_ID_ATI_RADEON_QX 0x5158
-/* Radeon RV250 (9000) */
-#define PCI_DEVICE_ID_ATI_RADEON_Id 0x4964
-#define PCI_DEVICE_ID_ATI_RADEON_Ie 0x4965
-#define PCI_DEVICE_ID_ATI_RADEON_If 0x4966
-#define PCI_DEVICE_ID_ATI_RADEON_Ig 0x4967
-/* Radeon RV280 (9200) */
-#define PCI_DEVICE_ID_ATI_RADEON_Y_ 0x5960
-#define PCI_DEVICE_ID_ATI_RADEON_Ya 0x5961
-#define PCI_DEVICE_ID_ATI_RADEON_Yd 0x5964
-/* Radeon R300 (9700) */
-#define PCI_DEVICE_ID_ATI_RADEON_ND 0x4e44
-#define PCI_DEVICE_ID_ATI_RADEON_NE 0x4e45
-#define PCI_DEVICE_ID_ATI_RADEON_NF 0x4e46
-#define PCI_DEVICE_ID_ATI_RADEON_NG 0x4e47
-#define PCI_DEVICE_ID_ATI_RADEON_AE 0x4145
-#define PCI_DEVICE_ID_ATI_RADEON_AF 0x4146
-/* Radeon R300 (9500) */
-#define PCI_DEVICE_ID_ATI_RADEON_AD 0x4144
-/* Radeon R350 (9800) */
-#define PCI_DEVICE_ID_ATI_RADEON_NH 0x4e48
-#define PCI_DEVICE_ID_ATI_RADEON_NI 0x4e49
-/* Radeon RV350 (9600) */
-#define PCI_DEVICE_ID_ATI_RADEON_AP 0x4150
-#define PCI_DEVICE_ID_ATI_RADEON_AR 0x4152
-/* Radeon M6 */
-#define PCI_DEVICE_ID_ATI_RADEON_LY 0x4c59
-#define PCI_DEVICE_ID_ATI_RADEON_LZ 0x4c5a
-/* Radeon M7 */
-#define PCI_DEVICE_ID_ATI_RADEON_LW 0x4c57
-#define PCI_DEVICE_ID_ATI_RADEON_LX 0x4c58
-/* Radeon M9 */
-#define PCI_DEVICE_ID_ATI_RADEON_Ld 0x4c64
-#define PCI_DEVICE_ID_ATI_RADEON_Le 0x4c65
-#define PCI_DEVICE_ID_ATI_RADEON_Lf 0x4c66
-#define PCI_DEVICE_ID_ATI_RADEON_Lg 0x4c67
-/* RadeonIGP */
-#define PCI_DEVICE_ID_ATI_RADEON_IGP 0xCAB0
-/* ATI IXP Chipset */
-#define PCI_DEVICE_ID_ATI_IXP_IDE 0x4349
-
-#define PCI_VENDOR_ID_VLSI 0x1004
-#define PCI_DEVICE_ID_VLSI_82C592 0x0005
-#define PCI_DEVICE_ID_VLSI_82C593 0x0006
-#define PCI_DEVICE_ID_VLSI_82C594 0x0007
-#define PCI_DEVICE_ID_VLSI_82C597 0x0009
-#define PCI_DEVICE_ID_VLSI_82C541 0x000c
-#define PCI_DEVICE_ID_VLSI_82C543 0x000d
-#define PCI_DEVICE_ID_VLSI_82C532 0x0101
-#define PCI_DEVICE_ID_VLSI_82C534 0x0102
-#define PCI_DEVICE_ID_VLSI_82C535 0x0104
-#define PCI_DEVICE_ID_VLSI_82C147 0x0105
-#define PCI_DEVICE_ID_VLSI_VAS96011 0x0702
-
-#define PCI_VENDOR_ID_ADL 0x1005
-#define PCI_DEVICE_ID_ADL_2301 0x2301
-
-#define PCI_VENDOR_ID_NS 0x100b
-#define PCI_DEVICE_ID_NS_87415 0x0002
-#define PCI_DEVICE_ID_NS_87560_LIO 0x000e
-#define PCI_DEVICE_ID_NS_87560_USB 0x0012
-#define PCI_DEVICE_ID_NS_83815 0x0020
-#define PCI_DEVICE_ID_NS_83820 0x0022
-#define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500
-#define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501
-#define PCI_DEVICE_ID_NS_SCx200_IDE 0x0502
-#define PCI_DEVICE_ID_NS_SCx200_AUDIO 0x0503
-#define PCI_DEVICE_ID_NS_SCx200_VIDEO 0x0504
-#define PCI_DEVICE_ID_NS_SCx200_XBUS 0x0505
-#define PCI_DEVICE_ID_NS_87410 0xd001
-
-#define PCI_VENDOR_ID_TSENG 0x100c
-#define PCI_DEVICE_ID_TSENG_W32P_2 0x3202
-#define PCI_DEVICE_ID_TSENG_W32P_b 0x3205
-#define PCI_DEVICE_ID_TSENG_W32P_c 0x3206
-#define PCI_DEVICE_ID_TSENG_W32P_d 0x3207
-#define PCI_DEVICE_ID_TSENG_ET6000 0x3208
-
-#define PCI_VENDOR_ID_WEITEK 0x100e
-#define PCI_DEVICE_ID_WEITEK_P9000 0x9001
-#define PCI_DEVICE_ID_WEITEK_P9100 0x9100
-
-#define PCI_VENDOR_ID_DEC 0x1011
-#define PCI_DEVICE_ID_DEC_BRD 0x0001
-#define PCI_DEVICE_ID_DEC_TULIP 0x0002
-#define PCI_DEVICE_ID_DEC_TGA 0x0004
-#define PCI_DEVICE_ID_DEC_TULIP_FAST 0x0009
-#define PCI_DEVICE_ID_DEC_TGA2 0x000D
-#define PCI_DEVICE_ID_DEC_FDDI 0x000F
-#define PCI_DEVICE_ID_DEC_TULIP_PLUS 0x0014
-#define PCI_DEVICE_ID_DEC_21142 0x0019
-#define PCI_DEVICE_ID_DEC_21052 0x0021
-#define PCI_DEVICE_ID_DEC_21150 0x0022
-#define PCI_DEVICE_ID_DEC_21152 0x0024
-#define PCI_DEVICE_ID_DEC_21153 0x0025
-#define PCI_DEVICE_ID_DEC_21154 0x0026
-#define PCI_DEVICE_ID_DEC_21285 0x1065
-#define PCI_DEVICE_ID_COMPAQ_42XX 0x0046
-
-#define PCI_VENDOR_ID_CIRRUS 0x1013
-#define PCI_DEVICE_ID_CIRRUS_7548 0x0038
-#define PCI_DEVICE_ID_CIRRUS_5430 0x00a0
-#define PCI_DEVICE_ID_CIRRUS_5434_4 0x00a4
-#define PCI_DEVICE_ID_CIRRUS_5434_8 0x00a8
-#define PCI_DEVICE_ID_CIRRUS_5436 0x00ac
-#define PCI_DEVICE_ID_CIRRUS_5446 0x00b8
-#define PCI_DEVICE_ID_CIRRUS_5480 0x00bc
-#define PCI_DEVICE_ID_CIRRUS_5462 0x00d0
-#define PCI_DEVICE_ID_CIRRUS_5464 0x00d4
-#define PCI_DEVICE_ID_CIRRUS_5465 0x00d6
-#define PCI_DEVICE_ID_CIRRUS_6729 0x1100
-#define PCI_DEVICE_ID_CIRRUS_6832 0x1110
-#define PCI_DEVICE_ID_CIRRUS_7542 0x1200
-#define PCI_DEVICE_ID_CIRRUS_7543 0x1202
-#define PCI_DEVICE_ID_CIRRUS_7541 0x1204
-
-#define PCI_VENDOR_ID_IBM 0x1014
-#define PCI_DEVICE_ID_IBM_FIRE_CORAL 0x000a
-#define PCI_DEVICE_ID_IBM_TR 0x0018
-#define PCI_DEVICE_ID_IBM_82G2675 0x001d
-#define PCI_DEVICE_ID_IBM_MCA 0x0020
-#define PCI_DEVICE_ID_IBM_82351 0x0022
-#define PCI_DEVICE_ID_IBM_PYTHON 0x002d
-#define PCI_DEVICE_ID_IBM_SERVERAID 0x002e
-#define PCI_DEVICE_ID_IBM_TR_WAKE 0x003e
-#define PCI_DEVICE_ID_IBM_MPIC 0x0046
-#define PCI_DEVICE_ID_IBM_3780IDSP 0x007d
-#define PCI_DEVICE_ID_IBM_CHUKAR 0x0096
-#define PCI_DEVICE_ID_IBM_CPC710_PCI64 0x00fc
-#define PCI_DEVICE_ID_IBM_CPC710_PCI32 0x0105
-#define PCI_DEVICE_ID_IBM_405GP 0x0156
-#define PCI_DEVICE_ID_IBM_SERVERAIDI960 0x01bd
-#define PCI_DEVICE_ID_IBM_MPIC_2 0xffff
-
-#define PCI_VENDOR_ID_COMPEX2 0x101a // pci.ids says "AT&T GIS (NCR)"
-#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005
-
-#define PCI_VENDOR_ID_WD 0x101c
-#define PCI_DEVICE_ID_WD_7197 0x3296
-#define PCI_DEVICE_ID_WD_90C 0xc24a
-
-#define PCI_VENDOR_ID_AMI 0x101e
-#define PCI_DEVICE_ID_AMI_MEGARAID3 0x1960
-#define PCI_DEVICE_ID_AMI_MEGARAID 0x9010
-#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060
-
-#define PCI_VENDOR_ID_AMD 0x1022
-#define PCI_DEVICE_ID_AMD_LANCE 0x2000
-#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
-#define PCI_DEVICE_ID_AMD_SCSI 0x2020
-#define PCI_DEVICE_ID_AMD_SERENADE 0x36c0
-#define PCI_DEVICE_ID_AMD_FE_GATE_7006 0x7006
-#define PCI_DEVICE_ID_AMD_FE_GATE_7007 0x7007
-#define PCI_DEVICE_ID_AMD_FE_GATE_700C 0x700C
-#define PCI_DEVICE_ID_AMD_FE_GATE_700D 0x700D
-#define PCI_DEVICE_ID_AMD_FE_GATE_700E 0x700E
-#define PCI_DEVICE_ID_AMD_FE_GATE_700F 0x700F
-#define PCI_DEVICE_ID_AMD_COBRA_7400 0x7400
-#define PCI_DEVICE_ID_AMD_COBRA_7401 0x7401
-#define PCI_DEVICE_ID_AMD_COBRA_7403 0x7403
-#define PCI_DEVICE_ID_AMD_COBRA_7404 0x7404
-#define PCI_DEVICE_ID_AMD_VIPER_7408 0x7408
-#define PCI_DEVICE_ID_AMD_VIPER_7409 0x7409
-#define PCI_DEVICE_ID_AMD_VIPER_740B 0x740B
-#define PCI_DEVICE_ID_AMD_VIPER_740C 0x740C
-#define PCI_DEVICE_ID_AMD_VIPER_7410 0x7410
-#define PCI_DEVICE_ID_AMD_VIPER_7411 0x7411
-#define PCI_DEVICE_ID_AMD_VIPER_7413 0x7413
-#define PCI_DEVICE_ID_AMD_VIPER_7414 0x7414
-#define PCI_DEVICE_ID_AMD_OPUS_7440 0x7440
-# define PCI_DEVICE_ID_AMD_VIPER_7440 PCI_DEVICE_ID_AMD_OPUS_7440
-#define PCI_DEVICE_ID_AMD_OPUS_7441 0x7441
-# define PCI_DEVICE_ID_AMD_VIPER_7441 PCI_DEVICE_ID_AMD_OPUS_7441
-#define PCI_DEVICE_ID_AMD_OPUS_7443 0x7443
-# define PCI_DEVICE_ID_AMD_VIPER_7443 PCI_DEVICE_ID_AMD_OPUS_7443
-#define PCI_DEVICE_ID_AMD_OPUS_7448 0x7448
-# define PCI_DEVICE_ID_AMD_VIPER_7448 PCI_DEVICE_ID_AMD_OPUS_7448
-#define PCI_DEVICE_ID_AMD_OPUS_7449 0x7449
-# define PCI_DEVICE_ID_AMD_VIPER_7449 PCI_DEVICE_ID_AMD_OPUS_7449
-#define PCI_DEVICE_ID_AMD_8111_LAN 0x7462
-#define PCI_DEVICE_ID_AMD_8111_IDE 0x7469
-#define PCI_DEVICE_ID_AMD_8111_AC97 0x746d
-#define PCI_DEVICE_ID_AMD_8131_APIC 0x7450
-
-#define PCI_VENDOR_ID_TRIDENT 0x1023
-#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000
-#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX 0x2001
-#define PCI_DEVICE_ID_TRIDENT_9320 0x9320
-#define PCI_DEVICE_ID_TRIDENT_9388 0x9388
-#define PCI_DEVICE_ID_TRIDENT_9397 0x9397
-#define PCI_DEVICE_ID_TRIDENT_939A 0x939A
-#define PCI_DEVICE_ID_TRIDENT_9520 0x9520
-#define PCI_DEVICE_ID_TRIDENT_9525 0x9525
-#define PCI_DEVICE_ID_TRIDENT_9420 0x9420
-#define PCI_DEVICE_ID_TRIDENT_9440 0x9440
-#define PCI_DEVICE_ID_TRIDENT_9660 0x9660
-#define PCI_DEVICE_ID_TRIDENT_9750 0x9750
-#define PCI_DEVICE_ID_TRIDENT_9850 0x9850
-#define PCI_DEVICE_ID_TRIDENT_9880 0x9880
-#define PCI_DEVICE_ID_TRIDENT_8400 0x8400
-#define PCI_DEVICE_ID_TRIDENT_8420 0x8420
-#define PCI_DEVICE_ID_TRIDENT_8500 0x8500
-
-#define PCI_VENDOR_ID_AI 0x1025
-#define PCI_DEVICE_ID_AI_M1435 0x1435
-
-#define PCI_VENDOR_ID_DELL 0x1028
-
-#define PCI_VENDOR_ID_MATROX 0x102B
-#define PCI_DEVICE_ID_MATROX_MGA_2 0x0518
-#define PCI_DEVICE_ID_MATROX_MIL 0x0519
-#define PCI_DEVICE_ID_MATROX_MYS 0x051A
-#define PCI_DEVICE_ID_MATROX_MIL_2 0x051b
-#define PCI_DEVICE_ID_MATROX_MIL_2_AGP 0x051f
-#define PCI_DEVICE_ID_MATROX_MGA_IMP 0x0d10
-#define PCI_DEVICE_ID_MATROX_G100_MM 0x1000
-#define PCI_DEVICE_ID_MATROX_G100_AGP 0x1001
-#define PCI_DEVICE_ID_MATROX_G200_PCI 0x0520
-#define PCI_DEVICE_ID_MATROX_G200_AGP 0x0521
-#define PCI_DEVICE_ID_MATROX_G400 0x0525
-#define PCI_DEVICE_ID_MATROX_G550 0x2527
-#define PCI_DEVICE_ID_MATROX_VIA 0x4536
-
-#define PCI_VENDOR_ID_CT 0x102c
-#define PCI_DEVICE_ID_CT_65545 0x00d8
-#define PCI_DEVICE_ID_CT_65548 0x00dc
-#define PCI_DEVICE_ID_CT_65550 0x00e0
-#define PCI_DEVICE_ID_CT_65554 0x00e4
-#define PCI_DEVICE_ID_CT_65555 0x00e5
-
-#define PCI_VENDOR_ID_MIRO 0x1031
-#define PCI_DEVICE_ID_MIRO_36050 0x5601
-
-#define PCI_VENDOR_ID_NEC 0x1033
-#define PCI_DEVICE_ID_NEC_NAPCCARD 0x003e
-#define PCI_DEVICE_ID_NEC_PCX2 0x0046
-#define PCI_DEVICE_ID_NEC_NILE4 0x005a
-#define PCI_DEVICE_ID_NEC_VRC5476 0x009b
-#define PCI_DEVICE_ID_NEC_VRC4173 0x00a5
-#define PCI_DEVICE_ID_NEC_VRC5477_AC97 0x00a6
-
-#define PCI_VENDOR_ID_FD 0x1036
-#define PCI_DEVICE_ID_FD_36C70 0x0000
-
-#define PCI_VENDOR_ID_SI 0x1039
-#define PCI_DEVICE_ID_SI_5591_AGP 0x0001
-#define PCI_DEVICE_ID_SI_6202 0x0002
-#define PCI_DEVICE_ID_SI_503 0x0008
-#define PCI_DEVICE_ID_SI_ACPI 0x0009
-#define PCI_DEVICE_ID_SI_180 0x0180
-#define PCI_DEVICE_ID_SI_5597_VGA 0x0200
-#define PCI_DEVICE_ID_SI_6205 0x0205
-#define PCI_DEVICE_ID_SI_501 0x0406
-#define PCI_DEVICE_ID_SI_496 0x0496
-#define PCI_DEVICE_ID_SI_300 0x0300
-#define PCI_DEVICE_ID_SI_315H 0x0310
-#define PCI_DEVICE_ID_SI_315 0x0315
-#define PCI_DEVICE_ID_SI_315PRO 0x0325
-#define PCI_DEVICE_ID_SI_530 0x0530
-#define PCI_DEVICE_ID_SI_540 0x0540
-#define PCI_DEVICE_ID_SI_550 0x0550
-#define PCI_DEVICE_ID_SI_540_VGA 0x5300
-#define PCI_DEVICE_ID_SI_550_VGA 0x5315
-#define PCI_DEVICE_ID_SI_601 0x0601
-#define PCI_DEVICE_ID_SI_620 0x0620
-#define PCI_DEVICE_ID_SI_630 0x0630
-#define PCI_DEVICE_ID_SI_633 0x0633
-#define PCI_DEVICE_ID_SI_635 0x0635
-#define PCI_DEVICE_ID_SI_640 0x0640
-#define PCI_DEVICE_ID_SI_645 0x0645
-#define PCI_DEVICE_ID_SI_646 0x0646
-#define PCI_DEVICE_ID_SI_648 0x0648
-#define PCI_DEVICE_ID_SI_650 0x0650
-#define PCI_DEVICE_ID_SI_651 0x0651
-#define PCI_DEVICE_ID_SI_652 0x0652
-#define PCI_DEVICE_ID_SI_655 0x0655
-#define PCI_DEVICE_ID_SI_730 0x0730
-#define PCI_DEVICE_ID_SI_733 0x0733
-#define PCI_DEVICE_ID_SI_630_VGA 0x6300
-#define PCI_DEVICE_ID_SI_730_VGA 0x7300
-#define PCI_DEVICE_ID_SI_735 0x0735
-#define PCI_DEVICE_ID_SI_740 0x0740
-#define PCI_DEVICE_ID_SI_745 0x0745
-#define PCI_DEVICE_ID_SI_746 0x0746
-#define PCI_DEVICE_ID_SI_748 0x0748
-#define PCI_DEVICE_ID_SI_750 0x0750
-#define PCI_DEVICE_ID_SI_751 0x0751
-#define PCI_DEVICE_ID_SI_752 0x0752
-#define PCI_DEVICE_ID_SI_755 0x0755
-#define PCI_DEVICE_ID_SI_900 0x0900
-#define PCI_DEVICE_ID_SI_5107 0x5107
-#define PCI_DEVICE_ID_SI_5300 0x5300
-#define PCI_DEVICE_ID_SI_5511 0x5511
-#define PCI_DEVICE_ID_SI_5513 0x5513
-#define PCI_DEVICE_ID_SI_5518 0x5518
-#define PCI_DEVICE_ID_SI_5571 0x5571
-#define PCI_DEVICE_ID_SI_5581 0x5581
-#define PCI_DEVICE_ID_SI_5582 0x5582
-#define PCI_DEVICE_ID_SI_5591 0x5591
-#define PCI_DEVICE_ID_SI_5596 0x5596
-#define PCI_DEVICE_ID_SI_5597 0x5597
-#define PCI_DEVICE_ID_SI_5598 0x5598
-#define PCI_DEVICE_ID_SI_5600 0x5600
-#define PCI_DEVICE_ID_SI_6300 0x6300
-#define PCI_DEVICE_ID_SI_6306 0x6306
-#define PCI_DEVICE_ID_SI_6326 0x6326
-#define PCI_DEVICE_ID_SI_7001 0x7001
-#define PCI_DEVICE_ID_SI_7016 0x7016
-
-#define PCI_VENDOR_ID_HP 0x103c
-#define PCI_DEVICE_ID_HP_DONNER_GFX 0x1008
-#define PCI_DEVICE_ID_HP_TACHYON 0x1028
-#define PCI_DEVICE_ID_HP_TACHLITE 0x1029
-#define PCI_DEVICE_ID_HP_J2585A 0x1030
-#define PCI_DEVICE_ID_HP_J2585B 0x1031
-#define PCI_DEVICE_ID_HP_SAS 0x1048
-#define PCI_DEVICE_ID_HP_DIVA1 0x1049
-#define PCI_DEVICE_ID_HP_DIVA2 0x104A
-#define PCI_DEVICE_ID_HP_SP2_0 0x104B
-#define PCI_DEVICE_ID_HP_PCI_LBA 0x1054
-#define PCI_DEVICE_ID_HP_REO_SBA 0x10f0
-#define PCI_DEVICE_ID_HP_REO_IOC 0x10f1
-#define PCI_DEVICE_ID_HP_ZX1_SBA 0x1229
-#define PCI_DEVICE_ID_HP_ZX1_IOC 0x122a
-#define PCI_DEVICE_ID_HP_PCIX_LBA 0x122e
-#define PCI_DEVICE_ID_HP_SX1000_IOC 0x127c
-
-#define PCI_VENDOR_ID_PCTECH 0x1042
-#define PCI_DEVICE_ID_PCTECH_RZ1000 0x1000
-#define PCI_DEVICE_ID_PCTECH_RZ1001 0x1001
-#define PCI_DEVICE_ID_PCTECH_SAMURAI_0 0x3000
-#define PCI_DEVICE_ID_PCTECH_SAMURAI_1 0x3010
-#define PCI_DEVICE_ID_PCTECH_SAMURAI_IDE 0x3020
-
-#define PCI_VENDOR_ID_ASUSTEK 0x1043
-#define PCI_DEVICE_ID_ASUSTEK_0675 0x0675
-
-#define PCI_VENDOR_ID_DPT 0x1044
-#define PCI_DEVICE_ID_DPT 0xa400
-
-#define PCI_VENDOR_ID_OPTI 0x1045
-#define PCI_DEVICE_ID_OPTI_92C178 0xc178
-#define PCI_DEVICE_ID_OPTI_82C557 0xc557
-#define PCI_DEVICE_ID_OPTI_82C558 0xc558
-#define PCI_DEVICE_ID_OPTI_82C621 0xc621
-#define PCI_DEVICE_ID_OPTI_82C700 0xc700
-#define PCI_DEVICE_ID_OPTI_82C701 0xc701
-#define PCI_DEVICE_ID_OPTI_82C814 0xc814
-#define PCI_DEVICE_ID_OPTI_82C822 0xc822
-#define PCI_DEVICE_ID_OPTI_82C861 0xc861
-#define PCI_DEVICE_ID_OPTI_82C825 0xd568
-
-#define PCI_VENDOR_ID_ELSA 0x1048
-#define PCI_DEVICE_ID_ELSA_MICROLINK 0x1000
-#define PCI_DEVICE_ID_ELSA_QS3000 0x3000
-
-#define PCI_VENDOR_ID_SGS 0x104a
-#define PCI_DEVICE_ID_SGS_2000 0x0008
-#define PCI_DEVICE_ID_SGS_1764 0x0009
-
-#define PCI_VENDOR_ID_BUSLOGIC 0x104B
-#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
-#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040
-#define PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT 0x8130
-
-#define PCI_VENDOR_ID_TI 0x104c
-#define PCI_DEVICE_ID_TI_TVP4010 0x3d04
-#define PCI_DEVICE_ID_TI_TVP4020 0x3d07
-#define PCI_DEVICE_ID_TI_1130 0xac12
-#define PCI_DEVICE_ID_TI_1031 0xac13
-#define PCI_DEVICE_ID_TI_1131 0xac15
-#define PCI_DEVICE_ID_TI_1250 0xac16
-#define PCI_DEVICE_ID_TI_1220 0xac17
-#define PCI_DEVICE_ID_TI_1221 0xac19
-#define PCI_DEVICE_ID_TI_1210 0xac1a
-#define PCI_DEVICE_ID_TI_1410 0xac50
-#define PCI_DEVICE_ID_TI_1450 0xac1b
-#define PCI_DEVICE_ID_TI_1225 0xac1c
-#define PCI_DEVICE_ID_TI_1251A 0xac1d
-#define PCI_DEVICE_ID_TI_1211 0xac1e
-#define PCI_DEVICE_ID_TI_1251B 0xac1f
-#define PCI_DEVICE_ID_TI_4410 0xac41
-#define PCI_DEVICE_ID_TI_4451 0xac42
-#define PCI_DEVICE_ID_TI_1420 0xac51
-#define PCI_DEVICE_ID_TI_1520 0xac55
-#define PCI_DEVICE_ID_TI_1510 0xac56
-
-#define PCI_VENDOR_ID_SONY 0x104d
-#define PCI_DEVICE_ID_SONY_CXD3222 0x8039
-
-#define PCI_VENDOR_ID_OAK 0x104e
-#define PCI_DEVICE_ID_OAK_OTI107 0x0107
-
-/* Winbond have two vendor IDs! See 0x10ad as well */
-#define PCI_VENDOR_ID_WINBOND2 0x1050
-#define PCI_DEVICE_ID_WINBOND2_89C940 0x0940
-#define PCI_DEVICE_ID_WINBOND2_89C940F 0x5a5a
-#define PCI_DEVICE_ID_WINBOND2_6692 0x6692
-
-#define PCI_VENDOR_ID_ANIGMA 0x1051
-#define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100
-
-#define PCI_VENDOR_ID_EFAR 0x1055
-#define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130
-#define PCI_DEVICE_ID_EFAR_SLC90E66_0 0x9460
-#define PCI_DEVICE_ID_EFAR_SLC90E66_2 0x9462
-#define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463
-
-#define PCI_VENDOR_ID_MOTOROLA 0x1057
-#define PCI_VENDOR_ID_MOTOROLA_OOPS 0x1507
-#define PCI_DEVICE_ID_MOTOROLA_MPC105 0x0001
-#define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002
-#define PCI_DEVICE_ID_MOTOROLA_MPC107 0x0004
-#define PCI_DEVICE_ID_MOTOROLA_RAVEN 0x4801
-#define PCI_DEVICE_ID_MOTOROLA_FALCON 0x4802
-#define PCI_DEVICE_ID_MOTOROLA_HAWK 0x4803
-#define PCI_DEVICE_ID_MOTOROLA_CPX8216 0x4806
-
-#define PCI_VENDOR_ID_PROMISE 0x105a
-#define PCI_DEVICE_ID_PROMISE_20265 0x0d30
-#define PCI_DEVICE_ID_PROMISE_20267 0x4d30
-#define PCI_DEVICE_ID_PROMISE_20246 0x4d33
-#define PCI_DEVICE_ID_PROMISE_20262 0x4d38
-#define PCI_DEVICE_ID_PROMISE_20263 0x0D38
-#define PCI_DEVICE_ID_PROMISE_20268 0x4d68
-#define PCI_DEVICE_ID_PROMISE_20270 0x6268
-#define PCI_DEVICE_ID_PROMISE_20269 0x4d69
-#define PCI_DEVICE_ID_PROMISE_20271 0x6269
-#define PCI_DEVICE_ID_PROMISE_20275 0x1275
-#define PCI_DEVICE_ID_PROMISE_20276 0x5275
-#define PCI_DEVICE_ID_PROMISE_20277 0x7275
-#define PCI_DEVICE_ID_PROMISE_5300 0x5300
-
-#define PCI_VENDOR_ID_N9 0x105d
-#define PCI_DEVICE_ID_N9_I128 0x2309
-#define PCI_DEVICE_ID_N9_I128_2 0x2339
-#define PCI_DEVICE_ID_N9_I128_T2R 0x493d
-
-#define PCI_VENDOR_ID_UMC 0x1060
-#define PCI_DEVICE_ID_UMC_UM8673F 0x0101
-#define PCI_DEVICE_ID_UMC_UM8891A 0x0891
-#define PCI_DEVICE_ID_UMC_UM8886BF 0x673a
-#define PCI_DEVICE_ID_UMC_UM8886A 0x886a
-#define PCI_DEVICE_ID_UMC_UM8881F 0x8881
-#define PCI_DEVICE_ID_UMC_UM8886F 0x8886
-#define PCI_DEVICE_ID_UMC_UM9017F 0x9017
-#define PCI_DEVICE_ID_UMC_UM8886N 0xe886
-#define PCI_DEVICE_ID_UMC_UM8891N 0xe891
-
-#define PCI_VENDOR_ID_X 0x1061
-#define PCI_DEVICE_ID_X_AGX016 0x0001
-
-#define PCI_VENDOR_ID_MYLEX 0x1069
-#define PCI_DEVICE_ID_MYLEX_DAC960_P 0x0001
-#define PCI_DEVICE_ID_MYLEX_DAC960_PD 0x0002
-#define PCI_DEVICE_ID_MYLEX_DAC960_PG 0x0010
-#define PCI_DEVICE_ID_MYLEX_DAC960_LA 0x0020
-#define PCI_DEVICE_ID_MYLEX_DAC960_LP 0x0050
-#define PCI_DEVICE_ID_MYLEX_DAC960_BA 0xBA56
-
-#define PCI_VENDOR_ID_PICOP 0x1066
-#define PCI_DEVICE_ID_PICOP_PT86C52X 0x0001
-#define PCI_DEVICE_ID_PICOP_PT80C524 0x8002
-
-#define PCI_VENDOR_ID_APPLE 0x106b
-#define PCI_DEVICE_ID_APPLE_BANDIT 0x0001
-#define PCI_DEVICE_ID_APPLE_GC 0x0002
-#define PCI_DEVICE_ID_APPLE_HYDRA 0x000e
-#define PCI_DEVICE_ID_APPLE_UNI_N_FW 0x0018
-#define PCI_DEVICE_ID_APPLE_KL_USB 0x0019
-#define PCI_DEVICE_ID_APPLE_UNI_N_AGP 0x0020
-#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC 0x0021
-#define PCI_DEVICE_ID_APPLE_KEYLARGO 0x0022
-#define PCI_DEVICE_ID_APPLE_UNI_N_GMACP 0x0024
-#define PCI_DEVICE_ID_APPLE_KEYLARGO_P 0x0025
-#define PCI_DEVICE_ID_APPLE_KL_USB_P 0x0026
-#define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P 0x0027
-#define PCI_DEVICE_ID_APPLE_UNI_N_AGP15 0x002d
-#define PCI_DEVICE_ID_APPLE_UNI_N_FW2 0x0030
-#define PCI_DEVICE_ID_APPLE_TIGON3 0x1645
-
-#define PCI_VENDOR_ID_YAMAHA 0x1073
-#define PCI_DEVICE_ID_YAMAHA_724 0x0004
-#define PCI_DEVICE_ID_YAMAHA_724F 0x000d
-#define PCI_DEVICE_ID_YAMAHA_740 0x000a
-#define PCI_DEVICE_ID_YAMAHA_740C 0x000c
-#define PCI_DEVICE_ID_YAMAHA_744 0x0010
-#define PCI_DEVICE_ID_YAMAHA_754 0x0012
-
-#define PCI_VENDOR_ID_NEXGEN 0x1074
-#define PCI_DEVICE_ID_NEXGEN_82C501 0x4e78
-
-#define PCI_VENDOR_ID_QLOGIC 0x1077
-#define PCI_DEVICE_ID_QLOGIC_ISP1020 0x1020
-#define PCI_DEVICE_ID_QLOGIC_ISP1022 0x1022
-#define PCI_DEVICE_ID_QLOGIC_ISP2100 0x2100
-#define PCI_DEVICE_ID_QLOGIC_ISP2200 0x2200
-
-#define PCI_VENDOR_ID_CYRIX 0x1078
-#define PCI_DEVICE_ID_CYRIX_5510 0x0000
-#define PCI_DEVICE_ID_CYRIX_PCI_MASTER 0x0001
-#define PCI_DEVICE_ID_CYRIX_5520 0x0002
-#define PCI_DEVICE_ID_CYRIX_5530_LEGACY 0x0100
-#define PCI_DEVICE_ID_CYRIX_5530_SMI 0x0101
-#define PCI_DEVICE_ID_CYRIX_5530_IDE 0x0102
-#define PCI_DEVICE_ID_CYRIX_5530_AUDIO 0x0103
-#define PCI_DEVICE_ID_CYRIX_5530_VIDEO 0x0104
-
-#define PCI_VENDOR_ID_LEADTEK 0x107d
-#define PCI_DEVICE_ID_LEADTEK_805 0x0000
-
-#define PCI_VENDOR_ID_INTERPHASE 0x107e
-#define PCI_DEVICE_ID_INTERPHASE_5526 0x0004
-#define PCI_DEVICE_ID_INTERPHASE_55x6 0x0005
-#define PCI_DEVICE_ID_INTERPHASE_5575 0x0008
-
-#define PCI_VENDOR_ID_CONTAQ 0x1080
-#define PCI_DEVICE_ID_CONTAQ_82C599 0x0600
-#define PCI_DEVICE_ID_CONTAQ_82C693 0xc693
-
-#define PCI_VENDOR_ID_FOREX 0x1083
-
-#define PCI_VENDOR_ID_OLICOM 0x108d
-#define PCI_DEVICE_ID_OLICOM_OC3136 0x0001
-#define PCI_DEVICE_ID_OLICOM_OC2315 0x0011
-#define PCI_DEVICE_ID_OLICOM_OC2325 0x0012
-#define PCI_DEVICE_ID_OLICOM_OC2183 0x0013
-#define PCI_DEVICE_ID_OLICOM_OC2326 0x0014
-#define PCI_DEVICE_ID_OLICOM_OC6151 0x0021
-
-#define PCI_VENDOR_ID_SUN 0x108e
-#define PCI_DEVICE_ID_SUN_EBUS 0x1000
-#define PCI_DEVICE_ID_SUN_HAPPYMEAL 0x1001
-#define PCI_DEVICE_ID_SUN_RIO_EBUS 0x1100
-#define PCI_DEVICE_ID_SUN_RIO_GEM 0x1101
-#define PCI_DEVICE_ID_SUN_RIO_1394 0x1102
-#define PCI_DEVICE_ID_SUN_RIO_USB 0x1103
-#define PCI_DEVICE_ID_SUN_GEM 0x2bad
-#define PCI_DEVICE_ID_SUN_SIMBA 0x5000
-#define PCI_DEVICE_ID_SUN_PBM 0x8000
-#define PCI_DEVICE_ID_SUN_SCHIZO 0x8001
-#define PCI_DEVICE_ID_SUN_SABRE 0xa000
-#define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001
-#define PCI_DEVICE_ID_SUN_TOMATILLO 0xa801
-
-#define PCI_VENDOR_ID_CMD 0x1095
-#define PCI_DEVICE_ID_SII_1210SA 0x0240
-
-#define PCI_DEVICE_ID_CMD_640 0x0640
-#define PCI_DEVICE_ID_CMD_643 0x0643
-#define PCI_DEVICE_ID_CMD_646 0x0646
-#define PCI_DEVICE_ID_CMD_647 0x0647
-#define PCI_DEVICE_ID_CMD_648 0x0648
-#define PCI_DEVICE_ID_CMD_649 0x0649
-#define PCI_DEVICE_ID_CMD_670 0x0670
-
-#define PCI_DEVICE_ID_SII_680 0x0680
-#define PCI_DEVICE_ID_SII_3112 0x3112
-
-#define PCI_VENDOR_ID_VISION 0x1098
-#define PCI_DEVICE_ID_VISION_QD8500 0x0001
-#define PCI_DEVICE_ID_VISION_QD8580 0x0002
-
-#define PCI_VENDOR_ID_BROOKTREE 0x109e
-#define PCI_DEVICE_ID_BROOKTREE_848 0x0350
-#define PCI_DEVICE_ID_BROOKTREE_849A 0x0351
-#define PCI_DEVICE_ID_BROOKTREE_878_1 0x036e
-#define PCI_DEVICE_ID_BROOKTREE_878 0x0878
-#define PCI_DEVICE_ID_BROOKTREE_8474 0x8474
-
-#define PCI_VENDOR_ID_SIERRA 0x10a8
-#define PCI_DEVICE_ID_SIERRA_STB 0x0000
-
-#define PCI_VENDOR_ID_SGI 0x10a9
-#define PCI_DEVICE_ID_SGI_IOC3 0x0003
-#define PCI_DEVICE_ID_SGI_IOC4 0x100a
-
-#define PCI_VENDOR_ID_ACC 0x10aa
-#define PCI_DEVICE_ID_ACC_2056 0x0000
-
-#define PCI_VENDOR_ID_WINBOND 0x10ad
-#define PCI_DEVICE_ID_WINBOND_83769 0x0001
-#define PCI_DEVICE_ID_WINBOND_82C105 0x0105
-#define PCI_DEVICE_ID_WINBOND_83C553 0x0565
-
-#define PCI_VENDOR_ID_DATABOOK 0x10b3
-#define PCI_DEVICE_ID_DATABOOK_87144 0xb106
-
-#define PCI_VENDOR_ID_PLX 0x10b5
-#define PCI_DEVICE_ID_PLX_R685 0x1030
-#define PCI_DEVICE_ID_PLX_ROMULUS 0x106a
-#define PCI_DEVICE_ID_PLX_SPCOM800 0x1076
-#define PCI_DEVICE_ID_PLX_1077 0x1077
-#define PCI_DEVICE_ID_PLX_SPCOM200 0x1103
-#define PCI_DEVICE_ID_PLX_DJINN_ITOO 0x1151
-#define PCI_DEVICE_ID_PLX_R753 0x1152
-#define PCI_DEVICE_ID_PLX_9050 0x9050
-#define PCI_DEVICE_ID_PLX_9060 0x9060
-#define PCI_DEVICE_ID_PLX_9060ES 0x906E
-#define PCI_DEVICE_ID_PLX_9060SD 0x906D
-#define PCI_DEVICE_ID_PLX_9080 0x9080
-#define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001
-
-#define PCI_VENDOR_ID_MADGE 0x10b6
-#define PCI_DEVICE_ID_MADGE_MK2 0x0002
-#define PCI_DEVICE_ID_MADGE_C155S 0x1001
-
-#define PCI_VENDOR_ID_3COM 0x10b7
-#define PCI_DEVICE_ID_3COM_3C985 0x0001
-#define PCI_DEVICE_ID_3COM_3C339 0x3390
-#define PCI_DEVICE_ID_3COM_3C590 0x5900
-#define PCI_DEVICE_ID_3COM_3C595TX 0x5950
-#define PCI_DEVICE_ID_3COM_3C595T4 0x5951
-#define PCI_DEVICE_ID_3COM_3C595MII 0x5952
-#define PCI_DEVICE_ID_3COM_3C900TPO 0x9000
-#define PCI_DEVICE_ID_3COM_3C900COMBO 0x9001
-#define PCI_DEVICE_ID_3COM_3C905TX 0x9050
-#define PCI_DEVICE_ID_3COM_3C905T4 0x9051
-#define PCI_DEVICE_ID_3COM_3C905B_TX 0x9055
-#define PCI_DEVICE_ID_3COM_3CR990 0x9900
-#define PCI_DEVICE_ID_3COM_3CR990_TX_95 0x9902
-#define PCI_DEVICE_ID_3COM_3CR990_TX_97 0x9903
-#define PCI_DEVICE_ID_3COM_3CR990B 0x9904
-#define PCI_DEVICE_ID_3COM_3CR990_FX 0x9905
-#define PCI_DEVICE_ID_3COM_3CR990SVR95 0x9908
-#define PCI_DEVICE_ID_3COM_3CR990SVR97 0x9909
-#define PCI_DEVICE_ID_3COM_3CR990SVR 0x990a
-
-#define PCI_VENDOR_ID_SMC 0x10b8
-#define PCI_DEVICE_ID_SMC_EPIC100 0x0005
-
-#define PCI_VENDOR_ID_AL 0x10b9
-#define PCI_DEVICE_ID_AL_M1445 0x1445
-#define PCI_DEVICE_ID_AL_M1449 0x1449
-#define PCI_DEVICE_ID_AL_M1451 0x1451
-#define PCI_DEVICE_ID_AL_M1461 0x1461
-#define PCI_DEVICE_ID_AL_M1489 0x1489
-#define PCI_DEVICE_ID_AL_M1511 0x1511
-#define PCI_DEVICE_ID_AL_M1513 0x1513
-#define PCI_DEVICE_ID_AL_M1521 0x1521
-#define PCI_DEVICE_ID_AL_M1523 0x1523
-#define PCI_DEVICE_ID_AL_M1531 0x1531
-#define PCI_DEVICE_ID_AL_M1533 0x1533
-#define PCI_DEVICE_ID_AL_M1535 0x1535
-#define PCI_DEVICE_ID_AL_M1541 0x1541
-#define PCI_DEVICE_ID_AL_M1621 0x1621
-#define PCI_DEVICE_ID_AL_M1631 0x1631
-#define PCI_DEVICE_ID_AL_M1641 0x1641
-#define PCI_DEVICE_ID_AL_M1644 0x1644
-#define PCI_DEVICE_ID_AL_M1647 0x1647
-#define PCI_DEVICE_ID_AL_M1651 0x1651
-#define PCI_DEVICE_ID_AL_M1543 0x1543
-#define PCI_DEVICE_ID_AL_M3307 0x3307
-#define PCI_DEVICE_ID_AL_M4803 0x5215
-#define PCI_DEVICE_ID_AL_M5219 0x5219
-#define PCI_DEVICE_ID_AL_M5229 0x5229
-#define PCI_DEVICE_ID_AL_M5237 0x5237
-#define PCI_DEVICE_ID_AL_M5243 0x5243
-#define PCI_DEVICE_ID_AL_M5451 0x5451
-#define PCI_DEVICE_ID_AL_M7101 0x7101
-
-#define PCI_VENDOR_ID_MITSUBISHI 0x10ba
-
-#define PCI_VENDOR_ID_SURECOM 0x10bd
-#define PCI_DEVICE_ID_SURECOM_NE34 0x0e34
-
-#define PCI_VENDOR_ID_NEOMAGIC 0x10c8
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2070 0x0001
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128V 0x0002
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZV 0x0003
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2160 0x0004
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICMEDIA_256AV 0x0005
-#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZVPLUS 0x0083
-
-#define PCI_VENDOR_ID_ASP 0x10cd
-#define PCI_DEVICE_ID_ASP_ABP940 0x1200
-#define PCI_DEVICE_ID_ASP_ABP940U 0x1300
-#define PCI_DEVICE_ID_ASP_ABP940UW 0x2300
-
-#define PCI_VENDOR_ID_MACRONIX 0x10d9
-#define PCI_DEVICE_ID_MACRONIX_MX98713 0x0512
-#define PCI_DEVICE_ID_MACRONIX_MX987x5 0x0531
-
-#define PCI_VENDOR_ID_TCONRAD 0x10da
-#define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508
-
-#define PCI_VENDOR_ID_CERN 0x10dc
-#define PCI_DEVICE_ID_CERN_SPSB_PMC 0x0001
-#define PCI_DEVICE_ID_CERN_SPSB_PCI 0x0002
-#define PCI_DEVICE_ID_CERN_HIPPI_DST 0x0021
-#define PCI_DEVICE_ID_CERN_HIPPI_SRC 0x0022
-
-#define PCI_VENDOR_ID_NVIDIA 0x10de
-#define PCI_DEVICE_ID_NVIDIA_TNT 0x0020
-#define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028
-#define PCI_DEVICE_ID_NVIDIA_UTNT2 0x0029
-#define PCI_DEVICE_ID_NVIDIA_VTNT2 0x002C
-#define PCI_DEVICE_ID_NVIDIA_UVTNT2 0x002D
-#define PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE 0x0065
-#define PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE 0x0085
-#define PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA 0x008e
-#define PCI_DEVICE_ID_NVIDIA_ITNT2 0x00A0
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3 0x00d1
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE 0x00d5
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3S 0x00e1
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA 0x00e3
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE 0x00e5
-#define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2 0x00ee
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR 0x0100
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR 0x0101
-#define PCI_DEVICE_ID_NVIDIA_QUADRO 0x0103
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX 0x0110
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2 0x0111
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO 0x0112
-#define PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR 0x0113
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS 0x0150
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2 0x0151
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA 0x0152
-#define PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO 0x0153
-#define PCI_DEVICE_ID_NVIDIA_IGEFORCE2 0x01a0
-#define PCI_DEVICE_ID_NVIDIA_NFORCE 0x01a4
-#define PCI_DEVICE_ID_NVIDIA_NFORCE_IDE 0x01bc
-#define PCI_DEVICE_ID_NVIDIA_NFORCE2 0x01e0
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE3 0x0200
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1 0x0201
-#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_2 0x0202
-#define PCI_DEVICE_ID_NVIDIA_QUADRO_DDC 0x0203
-
-#define PCI_VENDOR_ID_IMS 0x10e0
-#define PCI_DEVICE_ID_IMS_8849 0x8849
-#define PCI_DEVICE_ID_IMS_TT128 0x9128
-#define PCI_DEVICE_ID_IMS_TT3D 0x9135
-
-#define PCI_VENDOR_ID_TEKRAM2 0x10e1
-#define PCI_DEVICE_ID_TEKRAM2_690c 0x690c
-
-#define PCI_VENDOR_ID_TUNDRA 0x10e3
-#define PCI_DEVICE_ID_TUNDRA_CA91C042 0x0000
-
-#define PCI_VENDOR_ID_AMCC 0x10e8
-#define PCI_DEVICE_ID_AMCC_MYRINET 0x8043
-#define PCI_DEVICE_ID_AMCC_PARASTATION 0x8062
-#define PCI_DEVICE_ID_AMCC_S5933 0x807d
-#define PCI_DEVICE_ID_AMCC_S5933_HEPC3 0x809c
-
-#define PCI_VENDOR_ID_INTERG 0x10ea
-#define PCI_DEVICE_ID_INTERG_1680 0x1680
-#define PCI_DEVICE_ID_INTERG_1682 0x1682
-#define PCI_DEVICE_ID_INTERG_2000 0x2000
-#define PCI_DEVICE_ID_INTERG_2010 0x2010
-#define PCI_DEVICE_ID_INTERG_5000 0x5000
-#define PCI_DEVICE_ID_INTERG_5050 0x5050
-
-#define PCI_VENDOR_ID_REALTEK 0x10ec
-#define PCI_DEVICE_ID_REALTEK_8029 0x8029
-#define PCI_DEVICE_ID_REALTEK_8129 0x8129
-#define PCI_DEVICE_ID_REALTEK_8139 0x8139
-#define PCI_DEVICE_ID_REALTEK_8169 0x8169
-
-#define PCI_VENDOR_ID_XILINX 0x10ee
-#define PCI_DEVICE_ID_TURBOPAM 0x4020
-
-#define PCI_VENDOR_ID_TRUEVISION 0x10fa
-#define PCI_DEVICE_ID_TRUEVISION_T1000 0x000c
-
-#define PCI_VENDOR_ID_INIT 0x1101
-#define PCI_DEVICE_ID_INIT_320P 0x9100
-#define PCI_DEVICE_ID_INIT_360P 0x9500
-
-#define PCI_VENDOR_ID_CREATIVE 0x1102 // duplicate: ECTIVA
-#define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002
-
-#define PCI_VENDOR_ID_ECTIVA 0x1102 // duplicate: CREATIVE
-#define PCI_DEVICE_ID_ECTIVA_EV1938 0x8938
-
-#define PCI_VENDOR_ID_TTI 0x1103
-#define PCI_DEVICE_ID_TTI_HPT343 0x0003
-#define PCI_DEVICE_ID_TTI_HPT366 0x0004
-#define PCI_DEVICE_ID_TTI_HPT372 0x0005
-#define PCI_DEVICE_ID_TTI_HPT302 0x0006
-#define PCI_DEVICE_ID_TTI_HPT371 0x0007
-#define PCI_DEVICE_ID_TTI_HPT374 0x0008
-#define PCI_DEVICE_ID_TTI_HPT372N 0x0009 // appoarently a 372N variant?
-
-#define PCI_VENDOR_ID_VIA 0x1106
-#define PCI_DEVICE_ID_VIA_8363_0 0x0305
-#define PCI_DEVICE_ID_VIA_8371_0 0x0391
-#define PCI_DEVICE_ID_VIA_8501_0 0x0501
-#define PCI_DEVICE_ID_VIA_82C505 0x0505
-#define PCI_DEVICE_ID_VIA_82C561 0x0561
-#define PCI_DEVICE_ID_VIA_82C586_1 0x0571
-#define PCI_DEVICE_ID_VIA_82C576 0x0576
-#define PCI_DEVICE_ID_VIA_82C585 0x0585
-#define PCI_DEVICE_ID_VIA_82C586_0 0x0586
-#define PCI_DEVICE_ID_VIA_82C595 0x0595
-#define PCI_DEVICE_ID_VIA_82C596 0x0596
-#define PCI_DEVICE_ID_VIA_82C597_0 0x0597
-#define PCI_DEVICE_ID_VIA_82C598_0 0x0598
-#define PCI_DEVICE_ID_VIA_8601_0 0x0601
-#define PCI_DEVICE_ID_VIA_8605_0 0x0605
-#define PCI_DEVICE_ID_VIA_82C680 0x0680
-#define PCI_DEVICE_ID_VIA_82C686 0x0686
-#define PCI_DEVICE_ID_VIA_82C691 0x0691
-#define PCI_DEVICE_ID_VIA_82C693 0x0693
-#define PCI_DEVICE_ID_VIA_82C693_1 0x0698
-#define PCI_DEVICE_ID_VIA_82C926 0x0926
-#define PCI_DEVICE_ID_VIA_82C576_1 0x1571
-#define PCI_DEVICE_ID_VIA_82C595_97 0x1595
-#define PCI_DEVICE_ID_VIA_82C586_2 0x3038
-#define PCI_DEVICE_ID_VIA_82C586_3 0x3040
-#define PCI_DEVICE_ID_VIA_6305 0x3044
-#define PCI_DEVICE_ID_VIA_82C596_3 0x3050
-#define PCI_DEVICE_ID_VIA_82C596B_3 0x3051
-#define PCI_DEVICE_ID_VIA_82C686_4 0x3057
-#define PCI_DEVICE_ID_VIA_82C686_5 0x3058
-#define PCI_DEVICE_ID_VIA_8233_5 0x3059
-#define PCI_DEVICE_ID_VIA_8233_7 0x3065
-#define PCI_DEVICE_ID_VIA_82C686_6 0x3068
-#define PCI_DEVICE_ID_VIA_8233_0 0x3074
-#define PCI_DEVICE_ID_VIA_8633_0 0x3091
-#define PCI_DEVICE_ID_VIA_8367_0 0x3099
-#define PCI_DEVICE_ID_VIA_8622 0x3102
-#define PCI_DEVICE_ID_VIA_8233C_0 0x3109
-#define PCI_DEVICE_ID_VIA_8361 0x3112
-#define PCI_DEVICE_ID_VIA_8375 0x3116
-#define PCI_DEVICE_ID_VIA_CLE266 0x3123
-#define PCI_DEVICE_ID_VIA_8233A 0x3147
-#define PCI_DEVICE_ID_VIA_P4M266 0x3148
-#define PCI_DEVICE_ID_VIA_8237_SATA 0x3149
-#define PCI_DEVICE_ID_VIA_P4X333 0x3168
-#define PCI_DEVICE_ID_VIA_8235 0x3177
-#define PCI_DEVICE_ID_VIA_8377_0 0x3189
-#define PCI_DEVICE_ID_VIA_K8T400M_0 0x3188
-#define PCI_DEVICE_ID_VIA_8237 0x3227
-#define PCI_DEVICE_ID_VIA_86C100A 0x6100
-#define PCI_DEVICE_ID_VIA_8231 0x8231
-#define PCI_DEVICE_ID_VIA_8231_4 0x8235
-#define PCI_DEVICE_ID_VIA_8365_1 0x8305
-#define PCI_DEVICE_ID_VIA_8371_1 0x8391
-#define PCI_DEVICE_ID_VIA_8501_1 0x8501
-#define PCI_DEVICE_ID_VIA_82C597_1 0x8597
-#define PCI_DEVICE_ID_VIA_82C598_1 0x8598
-#define PCI_DEVICE_ID_VIA_8601_1 0x8601
-#define PCI_DEVICE_ID_VIA_8505_1 0x8605
-#define PCI_DEVICE_ID_VIA_8633_1 0xB091
-#define PCI_DEVICE_ID_VIA_8367_1 0xB099
-
-#define PCI_VENDOR_ID_SIEMENS 0x110A
-#define PCI_DEVICE_ID_SIEMENS_DSCC4 0x2102
-
-#define PCI_VENDOR_ID_SMC2 0x1113
-#define PCI_DEVICE_ID_SMC2_1211TX 0x1211
-
-#define PCI_VENDOR_ID_VORTEX 0x1119
-#define PCI_DEVICE_ID_VORTEX_GDT60x0 0x0000
-#define PCI_DEVICE_ID_VORTEX_GDT6000B 0x0001
-#define PCI_DEVICE_ID_VORTEX_GDT6x10 0x0002
-#define PCI_DEVICE_ID_VORTEX_GDT6x20 0x0003
-#define PCI_DEVICE_ID_VORTEX_GDT6530 0x0004
-#define PCI_DEVICE_ID_VORTEX_GDT6550 0x0005
-#define PCI_DEVICE_ID_VORTEX_GDT6x17 0x0006
-#define PCI_DEVICE_ID_VORTEX_GDT6x27 0x0007
-#define PCI_DEVICE_ID_VORTEX_GDT6537 0x0008
-#define PCI_DEVICE_ID_VORTEX_GDT6557 0x0009
-#define PCI_DEVICE_ID_VORTEX_GDT6x15 0x000a
-#define PCI_DEVICE_ID_VORTEX_GDT6x25 0x000b
-#define PCI_DEVICE_ID_VORTEX_GDT6535 0x000c
-#define PCI_DEVICE_ID_VORTEX_GDT6555 0x000d
-#define PCI_DEVICE_ID_VORTEX_GDT6x17RP 0x0100
-#define PCI_DEVICE_ID_VORTEX_GDT6x27RP 0x0101
-#define PCI_DEVICE_ID_VORTEX_GDT6537RP 0x0102
-#define PCI_DEVICE_ID_VORTEX_GDT6557RP 0x0103
-#define PCI_DEVICE_ID_VORTEX_GDT6x11RP 0x0104
-#define PCI_DEVICE_ID_VORTEX_GDT6x21RP 0x0105
-#define PCI_DEVICE_ID_VORTEX_GDT6x17RP1 0x0110
-#define PCI_DEVICE_ID_VORTEX_GDT6x27RP1 0x0111
-#define PCI_DEVICE_ID_VORTEX_GDT6537RP1 0x0112
-#define PCI_DEVICE_ID_VORTEX_GDT6557RP1 0x0113
-#define PCI_DEVICE_ID_VORTEX_GDT6x11RP1 0x0114
-#define PCI_DEVICE_ID_VORTEX_GDT6x21RP1 0x0115
-#define PCI_DEVICE_ID_VORTEX_GDT6x17RP2 0x0120
-#define PCI_DEVICE_ID_VORTEX_GDT6x27RP2 0x0121
-#define PCI_DEVICE_ID_VORTEX_GDT6537RP2 0x0122
-#define PCI_DEVICE_ID_VORTEX_GDT6557RP2 0x0123
-#define PCI_DEVICE_ID_VORTEX_GDT6x11RP2 0x0124
-#define PCI_DEVICE_ID_VORTEX_GDT6x21RP2 0x0125
-
-#define PCI_VENDOR_ID_EF 0x111a
-#define PCI_DEVICE_ID_EF_ATM_FPGA 0x0000
-#define PCI_DEVICE_ID_EF_ATM_ASIC 0x0002
-
-#define PCI_VENDOR_ID_IDT 0x111d
-#define PCI_DEVICE_ID_IDT_IDT77201 0x0001
-
-#define PCI_VENDOR_ID_FORE 0x1127
-#define PCI_DEVICE_ID_FORE_PCA200PC 0x0210
-#define PCI_DEVICE_ID_FORE_PCA200E 0x0300
-
-#define PCI_VENDOR_ID_IMAGINGTECH 0x112f
-#define PCI_DEVICE_ID_IMAGINGTECH_ICPCI 0x0000
-
-#define PCI_VENDOR_ID_PHILIPS 0x1131
-#define PCI_DEVICE_ID_PHILIPS_SAA7145 0x7145
-#define PCI_DEVICE_ID_PHILIPS_SAA7146 0x7146
-#define PCI_DEVICE_ID_PHILIPS_SAA9730 0x9730
-
-#define PCI_VENDOR_ID_EICON 0x1133
-#define PCI_DEVICE_ID_EICON_DIVA20PRO 0xe001
-#define PCI_DEVICE_ID_EICON_DIVA20 0xe002
-#define PCI_DEVICE_ID_EICON_DIVA20PRO_U 0xe003
-#define PCI_DEVICE_ID_EICON_DIVA20_U 0xe004
-#define PCI_DEVICE_ID_EICON_DIVA201 0xe005
-#define PCI_DEVICE_ID_EICON_DIVA202 0xe00b
-#define PCI_DEVICE_ID_EICON_MAESTRA 0xe010
-#define PCI_DEVICE_ID_EICON_MAESTRAQ 0xe012
-#define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013
-#define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014
-
-#define PCI_VENDOR_ID_CYCLONE 0x113c
-#define PCI_DEVICE_ID_CYCLONE_SDK 0x0001
-
-#define PCI_VENDOR_ID_ALLIANCE 0x1142
-#define PCI_DEVICE_ID_ALLIANCE_PROMOTIO 0x3210
-#define PCI_DEVICE_ID_ALLIANCE_PROVIDEO 0x6422
-#define PCI_DEVICE_ID_ALLIANCE_AT24 0x6424
-#define PCI_DEVICE_ID_ALLIANCE_AT3D 0x643d
-
-#define PCI_VENDOR_ID_SYSKONNECT 0x1148
-#define PCI_DEVICE_ID_SYSKONNECT_FP 0x4000
-#define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200
-#define PCI_DEVICE_ID_SYSKONNECT_GE 0x4300
-#define PCI_DEVICE_ID_SYSKONNECT_YU 0x4320
-#define PCI_DEVICE_ID_SYSKONNECT_9DXX 0x4400
-#define PCI_DEVICE_ID_SYSKONNECT_9MXX 0x4500
-
-#define PCI_VENDOR_ID_VMIC 0x114a
-#define PCI_DEVICE_ID_VMIC_VME 0x7587
-
-#define PCI_VENDOR_ID_DIGI 0x114f
-#define PCI_DEVICE_ID_DIGI_EPC 0x0002
-#define PCI_DEVICE_ID_DIGI_RIGHTSWITCH 0x0003
-#define PCI_DEVICE_ID_DIGI_XEM 0x0004
-#define PCI_DEVICE_ID_DIGI_XR 0x0005
-#define PCI_DEVICE_ID_DIGI_CX 0x0006
-#define PCI_DEVICE_ID_DIGI_XRJ 0x0009
-#define PCI_DEVICE_ID_DIGI_EPCJ 0x000a
-#define PCI_DEVICE_ID_DIGI_XR_920 0x0027
-#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E 0x0070
-#define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071
-#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072
-#define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073
-
-#define PCI_VENDOR_ID_MUTECH 0x1159
-#define PCI_DEVICE_ID_MUTECH_MV1000 0x0001
-
-#define PCI_VENDOR_ID_XIRCOM 0x115d
-#define PCI_DEVICE_ID_XIRCOM_X3201_ETH 0x0003
-#define PCI_DEVICE_ID_XIRCOM_X3201_MDM 0x0103
-
-#define PCI_VENDOR_ID_RENDITION 0x1163
-#define PCI_DEVICE_ID_RENDITION_VERITE 0x0001
-#define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000
-
-#define PCI_VENDOR_ID_SERVERWORKS 0x1166
-#define PCI_DEVICE_ID_SERVERWORKS_HE 0x0008
-#define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009
-#define PCI_DEVICE_ID_SERVERWORKS_CIOB30 0x0010
-#define PCI_DEVICE_ID_SERVERWORKS_CMIC_HE 0x0011
-#define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
-#define PCI_DEVICE_ID_SERVERWORKS_OSB4 0x0200
-#define PCI_DEVICE_ID_SERVERWORKS_CSB5 0x0201
-#define PCI_DEVICE_ID_SERVERWORKS_CSB6 0x0203
-#define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211
-#define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212
-#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213
-#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217
-#define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
-#define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
-#define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221
-#define PCI_DEVICE_ID_SERVERWORKS_GCLE 0x0225
-#define PCI_DEVICE_ID_SERVERWORKS_GCLE2 0x0227
-#define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
-
-#define PCI_VENDOR_ID_SBE 0x1176
-#define PCI_DEVICE_ID_SBE_WANXL100 0x0301
-#define PCI_DEVICE_ID_SBE_WANXL200 0x0302
-#define PCI_DEVICE_ID_SBE_WANXL400 0x0104
-
-#define PCI_VENDOR_ID_TOSHIBA 0x1179
-#define PCI_DEVICE_ID_TOSHIBA_PICCOLO 0x0102
-#define PCI_DEVICE_ID_TOSHIBA_601 0x0601
-#define PCI_DEVICE_ID_TOSHIBA_TOPIC95 0x060a
-#define PCI_DEVICE_ID_TOSHIBA_TOPIC97 0x060f
-
-#define PCI_VENDOR_ID_TOSHIBA_2 0x102f
-#define PCI_DEVICE_ID_TOSHIBA_TX3927 0x000a
-#define PCI_DEVICE_ID_TOSHIBA_TC35815CF 0x0030
-#define PCI_DEVICE_ID_TOSHIBA_TX4927 0x0180
-
-#define PCI_VENDOR_ID_RICOH 0x1180
-#define PCI_DEVICE_ID_RICOH_RL5C465 0x0465
-#define PCI_DEVICE_ID_RICOH_RL5C466 0x0466
-#define PCI_DEVICE_ID_RICOH_RL5C475 0x0475
-#define PCI_DEVICE_ID_RICOH_RL5C476 0x0476
-#define PCI_DEVICE_ID_RICOH_RL5C478 0x0478
-
-#define PCI_VENDOR_ID_ARTOP 0x1191
-#define PCI_DEVICE_ID_ARTOP_ATP8400 0x0004
-#define PCI_DEVICE_ID_ARTOP_ATP850UF 0x0005
-#define PCI_DEVICE_ID_ARTOP_ATP860 0x0006
-#define PCI_DEVICE_ID_ARTOP_ATP860R 0x0007
-#define PCI_DEVICE_ID_ARTOP_ATP865 0x0008
-#define PCI_DEVICE_ID_ARTOP_ATP865R 0x0009
-#define PCI_DEVICE_ID_ARTOP_AEC7610 0x8002
-#define PCI_DEVICE_ID_ARTOP_AEC7612UW 0x8010
-#define PCI_DEVICE_ID_ARTOP_AEC7612U 0x8020
-#define PCI_DEVICE_ID_ARTOP_AEC7612S 0x8030
-#define PCI_DEVICE_ID_ARTOP_AEC7612D 0x8040
-#define PCI_DEVICE_ID_ARTOP_AEC7612SUW 0x8050
-#define PCI_DEVICE_ID_ARTOP_8060 0x8060
-
-#define PCI_VENDOR_ID_ZEITNET 0x1193
-#define PCI_DEVICE_ID_ZEITNET_1221 0x0001
-#define PCI_DEVICE_ID_ZEITNET_1225 0x0002
-
-#define PCI_VENDOR_ID_OMEGA 0x119b
-#define PCI_DEVICE_ID_OMEGA_82C092G 0x1221
-
-#define PCI_VENDOR_ID_FUJITSU_ME 0x119e
-#define PCI_DEVICE_ID_FUJITSU_FS155 0x0001
-#define PCI_DEVICE_ID_FUJITSU_FS50 0x0003
-
-#define PCI_SUBVENDOR_ID_KEYSPAN 0x11a9
-#define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334
-
-#define PCI_VENDOR_ID_GALILEO 0x11ab
-#define PCI_DEVICE_ID_GALILEO_GT64011 0x4146
-#define PCI_DEVICE_ID_GALILEO_GT64111 0x4146
-#define PCI_DEVICE_ID_GALILEO_GT96100 0x9652
-#define PCI_DEVICE_ID_GALILEO_GT96100A 0x9653
-
-#define PCI_VENDOR_ID_LITEON 0x11ad
-#define PCI_DEVICE_ID_LITEON_LNE100TX 0x0002
-
-#define PCI_VENDOR_ID_V3 0x11b0
-#define PCI_DEVICE_ID_V3_V960 0x0001
-#define PCI_DEVICE_ID_V3_V350 0x0001
-#define PCI_DEVICE_ID_V3_V961 0x0002
-#define PCI_DEVICE_ID_V3_V351 0x0002
-
-#define PCI_VENDOR_ID_NP 0x11bc
-#define PCI_DEVICE_ID_NP_PCI_FDDI 0x0001
-
-#define PCI_VENDOR_ID_ATT 0x11c1
-#define PCI_DEVICE_ID_ATT_L56XMF 0x0440
-#define PCI_DEVICE_ID_ATT_VENUS_MODEM 0x480
-
-#define PCI_VENDOR_ID_SPECIALIX 0x11cb
-#define PCI_DEVICE_ID_SPECIALIX_IO8 0x2000
-#define PCI_DEVICE_ID_SPECIALIX_XIO 0x4000
-#define PCI_DEVICE_ID_SPECIALIX_RIO 0x8000
-#define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
-
-#define PCI_VENDOR_ID_AURAVISION 0x11d1
-#define PCI_DEVICE_ID_AURAVISION_VXP524 0x01f7
-
-#define PCI_VENDOR_ID_ANALOG_DEVICES 0x11d4
-#define PCI_DEVICE_ID_AD1889JS 0x1889
-
-#define PCI_VENDOR_ID_IKON 0x11d5
-#define PCI_DEVICE_ID_IKON_10115 0x0115
-#define PCI_DEVICE_ID_IKON_10117 0x0117
-
-#define PCI_VENDOR_ID_ZORAN 0x11de
-#define PCI_DEVICE_ID_ZORAN_36057 0x6057
-#define PCI_DEVICE_ID_ZORAN_36120 0x6120
-
-#define PCI_VENDOR_ID_KINETIC 0x11f4
-#define PCI_DEVICE_ID_KINETIC_2915 0x2915
-
-#define PCI_VENDOR_ID_COMPEX 0x11f6
-#define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112
-#define PCI_DEVICE_ID_COMPEX_RL2000 0x1401
-
-#define PCI_VENDOR_ID_RP 0x11fe
-#define PCI_DEVICE_ID_RP32INTF 0x0001
-#define PCI_DEVICE_ID_RP8INTF 0x0002
-#define PCI_DEVICE_ID_RP16INTF 0x0003
-#define PCI_DEVICE_ID_RP4QUAD 0x0004
-#define PCI_DEVICE_ID_RP8OCTA 0x0005
-#define PCI_DEVICE_ID_RP8J 0x0006
-#define PCI_DEVICE_ID_RPP4 0x000A
-#define PCI_DEVICE_ID_RPP8 0x000B
-#define PCI_DEVICE_ID_RP8M 0x000C
-
-#define PCI_VENDOR_ID_CYCLADES 0x120e
-#define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100
-#define PCI_DEVICE_ID_CYCLOM_Y_Hi 0x0101
-#define PCI_DEVICE_ID_CYCLOM_4Y_Lo 0x0102
-#define PCI_DEVICE_ID_CYCLOM_4Y_Hi 0x0103
-#define PCI_DEVICE_ID_CYCLOM_8Y_Lo 0x0104
-#define PCI_DEVICE_ID_CYCLOM_8Y_Hi 0x0105
-#define PCI_DEVICE_ID_CYCLOM_Z_Lo 0x0200
-#define PCI_DEVICE_ID_CYCLOM_Z_Hi 0x0201
-#define PCI_DEVICE_ID_PC300_RX_2 0x0300
-#define PCI_DEVICE_ID_PC300_RX_1 0x0301
-#define PCI_DEVICE_ID_PC300_TE_2 0x0310
-#define PCI_DEVICE_ID_PC300_TE_1 0x0311
-
-#define PCI_VENDOR_ID_ESSENTIAL 0x120f
-#define PCI_DEVICE_ID_ESSENTIAL_ROADRUNNER 0x0001
-
-#define PCI_VENDOR_ID_O2 0x1217
-#define PCI_DEVICE_ID_O2_6729 0x6729
-#define PCI_DEVICE_ID_O2_6730 0x673a
-#define PCI_DEVICE_ID_O2_6832 0x6832
-#define PCI_DEVICE_ID_O2_6836 0x6836
-
-#define PCI_VENDOR_ID_3DFX 0x121a
-#define PCI_DEVICE_ID_3DFX_VOODOO 0x0001
-#define PCI_DEVICE_ID_3DFX_VOODOO2 0x0002
-#define PCI_DEVICE_ID_3DFX_BANSHEE 0x0003
-#define PCI_DEVICE_ID_3DFX_VOODOO3 0x0005
-
-#define PCI_VENDOR_ID_SIGMADES 0x1236
-#define PCI_DEVICE_ID_SIGMADES_6425 0x6401
-
-#define PCI_VENDOR_ID_CCUBE 0x123f
-
-#define PCI_VENDOR_ID_AVM 0x1244
-#define PCI_DEVICE_ID_AVM_B1 0x0700
-#define PCI_DEVICE_ID_AVM_C4 0x0800
-#define PCI_DEVICE_ID_AVM_A1 0x0a00
-#define PCI_DEVICE_ID_AVM_A1_V2 0x0e00
-#define PCI_DEVICE_ID_AVM_C2 0x1100
-#define PCI_DEVICE_ID_AVM_T1 0x1200
-
-#define PCI_VENDOR_ID_DIPIX 0x1246
-
-#define PCI_VENDOR_ID_STALLION 0x124d
-#define PCI_DEVICE_ID_STALLION_ECHPCI832 0x0000
-#define PCI_DEVICE_ID_STALLION_ECHPCI864 0x0002
-#define PCI_DEVICE_ID_STALLION_EIOPCI 0x0003
-
-#define PCI_VENDOR_ID_OPTIBASE 0x1255
-#define PCI_DEVICE_ID_OPTIBASE_FORGE 0x1110
-#define PCI_DEVICE_ID_OPTIBASE_FUSION 0x1210
-#define PCI_DEVICE_ID_OPTIBASE_VPLEX 0x2110
-#define PCI_DEVICE_ID_OPTIBASE_VPLEXCC 0x2120
-#define PCI_DEVICE_ID_OPTIBASE_VQUEST 0x2130
-
-#define PCI_VENDOR_ID_ESS 0x125d
-#define PCI_DEVICE_ID_ESS_ESS1968 0x1968
-#define PCI_DEVICE_ID_ESS_AUDIOPCI 0x1969
-#define PCI_DEVICE_ID_ESS_ESS1978 0x1978
-
-#define PCI_VENDOR_ID_SATSAGEM 0x1267
-#define PCI_DEVICE_ID_SATSAGEM_NICCY 0x1016
-#define PCI_DEVICE_ID_SATSAGEM_PCR2101 0x5352
-#define PCI_DEVICE_ID_SATSAGEM_TELSATTURBO 0x5a4b
-
-#define PCI_VENDOR_ID_HUGHES 0x1273
-#define PCI_DEVICE_ID_HUGHES_DIRECPC 0x0002
-
-#define PCI_VENDOR_ID_ENSONIQ 0x1274
-#define PCI_DEVICE_ID_ENSONIQ_CT5880 0x5880
-#define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000
-#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371
-
-#define PCI_VENDOR_ID_ROCKWELL 0x127A
-
-#define PCI_VENDOR_ID_ITE 0x1283
-#define PCI_DEVICE_ID_ITE_IT8172G 0x8172
-#define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801
-#define PCI_DEVICE_ID_ITE_IT8181 0x8181
-#define PCI_DEVICE_ID_ITE_8872 0x8872
-
-#define PCI_DEVICE_ID_ITE_IT8330G_0 0xe886
-
-/* formerly Platform Tech */
-#define PCI_VENDOR_ID_ESS_OLD 0x1285
-#define PCI_DEVICE_ID_ESS_ESS0100 0x0100
-
-#define PCI_VENDOR_ID_ALTEON 0x12ae
-#define PCI_DEVICE_ID_ALTEON_ACENIC 0x0001
-
-#define PCI_VENDOR_ID_USR 0x12B9
-
-#define PCI_SUBVENDOR_ID_CONNECT_TECH 0x12c4
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232 0x0001
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232 0x0002
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232 0x0003
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485 0x0004
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4 0x0005
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485 0x0006
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2 0x0007
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485 0x0008
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6 0x0009
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1 0x000A
-#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1 0x000B
-
-#define PCI_VENDOR_ID_PICTUREL 0x12c5
-#define PCI_DEVICE_ID_PICTUREL_PCIVST 0x0081
-
-#define PCI_VENDOR_ID_NVIDIA_SGS 0x12d2
-#define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018
-
-#define PCI_SUBVENDOR_ID_CHASE_PCIFAST 0x12E0
-#define PCI_SUBDEVICE_ID_CHASE_PCIFAST4 0x0031
-#define PCI_SUBDEVICE_ID_CHASE_PCIFAST8 0x0021
-#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16 0x0011
-#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16FMC 0x0041
-#define PCI_SUBVENDOR_ID_CHASE_PCIRAS 0x124D
-#define PCI_SUBDEVICE_ID_CHASE_PCIRAS4 0xF001
-#define PCI_SUBDEVICE_ID_CHASE_PCIRAS8 0xF010
-
-#define PCI_VENDOR_ID_AUREAL 0x12eb
-#define PCI_DEVICE_ID_AUREAL_VORTEX_1 0x0001
-#define PCI_DEVICE_ID_AUREAL_VORTEX_2 0x0002
-
-#define PCI_VENDOR_ID_CBOARDS 0x1307
-#define PCI_DEVICE_ID_CBOARDS_DAS1602_16 0x0001
-
-#define PCI_VENDOR_ID_SIIG 0x131f
-#define PCI_DEVICE_ID_SIIG_1S_10x_550 0x1000
-#define PCI_DEVICE_ID_SIIG_1S_10x_650 0x1001
-#define PCI_DEVICE_ID_SIIG_1S_10x_850 0x1002
-#define PCI_DEVICE_ID_SIIG_1S1P_10x_550 0x1010
-#define PCI_DEVICE_ID_SIIG_1S1P_10x_650 0x1011
-#define PCI_DEVICE_ID_SIIG_1S1P_10x_850 0x1012
-#define PCI_DEVICE_ID_SIIG_1P_10x 0x1020
-#define PCI_DEVICE_ID_SIIG_2P_10x 0x1021
-#define PCI_DEVICE_ID_SIIG_2S_10x_550 0x1030
-#define PCI_DEVICE_ID_SIIG_2S_10x_650 0x1031
-#define PCI_DEVICE_ID_SIIG_2S_10x_850 0x1032
-#define PCI_DEVICE_ID_SIIG_2S1P_10x_550 0x1034
-#define PCI_DEVICE_ID_SIIG_2S1P_10x_650 0x1035
-#define PCI_DEVICE_ID_SIIG_2S1P_10x_850 0x1036
-#define PCI_DEVICE_ID_SIIG_4S_10x_550 0x1050
-#define PCI_DEVICE_ID_SIIG_4S_10x_650 0x1051
-#define PCI_DEVICE_ID_SIIG_4S_10x_850 0x1052
-#define PCI_DEVICE_ID_SIIG_1S_20x_550 0x2000
-#define PCI_DEVICE_ID_SIIG_1S_20x_650 0x2001
-#define PCI_DEVICE_ID_SIIG_1S_20x_850 0x2002
-#define PCI_DEVICE_ID_SIIG_1P_20x 0x2020
-#define PCI_DEVICE_ID_SIIG_2P_20x 0x2021
-#define PCI_DEVICE_ID_SIIG_2S_20x_550 0x2030
-#define PCI_DEVICE_ID_SIIG_2S_20x_650 0x2031
-#define PCI_DEVICE_ID_SIIG_2S_20x_850 0x2032
-#define PCI_DEVICE_ID_SIIG_2P1S_20x_550 0x2040
-#define PCI_DEVICE_ID_SIIG_2P1S_20x_650 0x2041
-#define PCI_DEVICE_ID_SIIG_2P1S_20x_850 0x2042
-#define PCI_DEVICE_ID_SIIG_1S1P_20x_550 0x2010
-#define PCI_DEVICE_ID_SIIG_1S1P_20x_650 0x2011
-#define PCI_DEVICE_ID_SIIG_1S1P_20x_850 0x2012
-#define PCI_DEVICE_ID_SIIG_4S_20x_550 0x2050
-#define PCI_DEVICE_ID_SIIG_4S_20x_650 0x2051
-#define PCI_DEVICE_ID_SIIG_4S_20x_850 0x2052
-#define PCI_DEVICE_ID_SIIG_2S1P_20x_550 0x2060
-#define PCI_DEVICE_ID_SIIG_2S1P_20x_650 0x2061
-#define PCI_DEVICE_ID_SIIG_2S1P_20x_850 0x2062
-
-#define PCI_VENDOR_ID_DOMEX 0x134a
-#define PCI_DEVICE_ID_DOMEX_DMX3191D 0x0001
-
-#define PCI_VENDOR_ID_QUATECH 0x135C
-#define PCI_DEVICE_ID_QUATECH_QSC100 0x0010
-#define PCI_DEVICE_ID_QUATECH_DSC100 0x0020
-#define PCI_DEVICE_ID_QUATECH_DSC200 0x0030
-#define PCI_DEVICE_ID_QUATECH_QSC200 0x0040
-#define PCI_DEVICE_ID_QUATECH_ESC100D 0x0050
-#define PCI_DEVICE_ID_QUATECH_ESC100M 0x0060
-
-#define PCI_VENDOR_ID_SEALEVEL 0x135e
-#define PCI_DEVICE_ID_SEALEVEL_U530 0x7101
-#define PCI_DEVICE_ID_SEALEVEL_UCOMM2 0x7201
-#define PCI_DEVICE_ID_SEALEVEL_UCOMM422 0x7402
-#define PCI_DEVICE_ID_SEALEVEL_UCOMM232 0x7202
-#define PCI_DEVICE_ID_SEALEVEL_COMM4 0x7401
-#define PCI_DEVICE_ID_SEALEVEL_COMM8 0x7801
-
-#define PCI_VENDOR_ID_HYPERCOPE 0x1365
-#define PCI_DEVICE_ID_HYPERCOPE_PLX 0x9050
-#define PCI_SUBDEVICE_ID_HYPERCOPE_OLD_ERGO 0x0104
-#define PCI_SUBDEVICE_ID_HYPERCOPE_ERGO 0x0106
-#define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107
-#define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108
-#define PCI_SUBDEVICE_ID_HYPERCOPE_PLEXUS 0x0109
-
-#define PCI_VENDOR_ID_KAWASAKI 0x136b
-#define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01
-
-#define PCI_VENDOR_ID_LMC 0x1376
-#define PCI_DEVICE_ID_LMC_HSSI 0x0003
-#define PCI_DEVICE_ID_LMC_DS3 0x0004
-#define PCI_DEVICE_ID_LMC_SSI 0x0005
-#define PCI_DEVICE_ID_LMC_T1 0x0006
-
-#define PCI_VENDOR_ID_NETGEAR 0x1385
-#define PCI_DEVICE_ID_NETGEAR_GA620 0x620a
-#define PCI_DEVICE_ID_NETGEAR_GA622 0x622a
-
-#define PCI_VENDOR_ID_APPLICOM 0x1389
-#define PCI_DEVICE_ID_APPLICOM_PCIGENERIC 0x0001
-#define PCI_DEVICE_ID_APPLICOM_PCI2000IBS_CAN 0x0002
-#define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003
-
-#define PCI_VENDOR_ID_MOXA 0x1393
-#define PCI_DEVICE_ID_MOXA_C104 0x1040
-#define PCI_DEVICE_ID_MOXA_C168 0x1680
-#define PCI_DEVICE_ID_MOXA_CP204J 0x2040
-#define PCI_DEVICE_ID_MOXA_C218 0x2180
-#define PCI_DEVICE_ID_MOXA_C320 0x3200
-
-#define PCI_VENDOR_ID_CCD 0x1397
-#define PCI_DEVICE_ID_CCD_2BD0 0x2bd0
-#define PCI_DEVICE_ID_CCD_B000 0xb000
-#define PCI_DEVICE_ID_CCD_B006 0xb006
-#define PCI_DEVICE_ID_CCD_B007 0xb007
-#define PCI_DEVICE_ID_CCD_B008 0xb008
-#define PCI_DEVICE_ID_CCD_B009 0xb009
-#define PCI_DEVICE_ID_CCD_B00A 0xb00a
-#define PCI_DEVICE_ID_CCD_B00B 0xb00b
-#define PCI_DEVICE_ID_CCD_B00C 0xb00c
-#define PCI_DEVICE_ID_CCD_B100 0xb100
-
-#define PCI_VENDOR_ID_3WARE 0x13C1
-#define PCI_DEVICE_ID_3WARE_1000 0x1000
-
-#define PCI_VENDOR_ID_ABOCOM 0x13D1
-#define PCI_DEVICE_ID_ABOCOM_2BD1 0x2BD1
-
-#define PCI_VENDOR_ID_CMEDIA 0x13f6
-#define PCI_DEVICE_ID_CMEDIA_CM8338A 0x0100
-#define PCI_DEVICE_ID_CMEDIA_CM8338B 0x0101
-#define PCI_DEVICE_ID_CMEDIA_CM8738 0x0111
-#define PCI_DEVICE_ID_CMEDIA_CM8738B 0x0112
-
-#define PCI_VENDOR_ID_LAVA 0x1407
-#define PCI_DEVICE_ID_LAVA_DSERIAL 0x0100 /* 2x 16550 */
-#define PCI_DEVICE_ID_LAVA_QUATRO_A 0x0101 /* 2x 16550, half of 4 port */
-#define PCI_DEVICE_ID_LAVA_QUATRO_B 0x0102 /* 2x 16550, half of 4 port */
-#define PCI_DEVICE_ID_LAVA_OCTO_A 0x0180 /* 4x 16550A, half of 8 port */
-#define PCI_DEVICE_ID_LAVA_OCTO_B 0x0181 /* 4x 16550A, half of 8 port */
-#define PCI_DEVICE_ID_LAVA_PORT_PLUS 0x0200 /* 2x 16650 */
-#define PCI_DEVICE_ID_LAVA_QUAD_A 0x0201 /* 2x 16650, half of 4 port */
-#define PCI_DEVICE_ID_LAVA_QUAD_B 0x0202 /* 2x 16650, half of 4 port */
-#define PCI_DEVICE_ID_LAVA_SSERIAL 0x0500 /* 1x 16550 */
-#define PCI_DEVICE_ID_LAVA_PORT_650 0x0600 /* 1x 16650 */
-#define PCI_DEVICE_ID_LAVA_PARALLEL 0x8000
-#define PCI_DEVICE_ID_LAVA_DUAL_PAR_A 0x8002 /* The Lava Dual Parallel is */
-#define PCI_DEVICE_ID_LAVA_DUAL_PAR_B 0x8003 /* two PCI devices on a card */
-#define PCI_DEVICE_ID_LAVA_BOCA_IOPPAR 0x8800
-
-#define PCI_VENDOR_ID_TIMEDIA 0x1409
-#define PCI_DEVICE_ID_TIMEDIA_1889 0x7168
-
-#define PCI_VENDOR_ID_OXSEMI 0x1415
-#define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403
-#define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501
-#define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511
-#define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513
-#define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521
-
-#define PCI_VENDOR_ID_AIRONET 0x14b9
-#define PCI_DEVICE_ID_AIRONET_4800_1 0x0001
-#define PCI_DEVICE_ID_AIRONET_4800 0x4500 // values switched? see
-#define PCI_DEVICE_ID_AIRONET_4500 0x4800 // drivers/net/aironet4500_card.c
-
-#define PCI_VENDOR_ID_TITAN 0x14D2
-#define PCI_DEVICE_ID_TITAN_010L 0x8001
-#define PCI_DEVICE_ID_TITAN_100L 0x8010
-#define PCI_DEVICE_ID_TITAN_110L 0x8011
-#define PCI_DEVICE_ID_TITAN_200L 0x8020
-#define PCI_DEVICE_ID_TITAN_210L 0x8021
-#define PCI_DEVICE_ID_TITAN_400L 0x8040
-#define PCI_DEVICE_ID_TITAN_800L 0x8080
-#define PCI_DEVICE_ID_TITAN_100 0xA001
-#define PCI_DEVICE_ID_TITAN_200 0xA005
-#define PCI_DEVICE_ID_TITAN_400 0xA003
-#define PCI_DEVICE_ID_TITAN_800B 0xA004
-
-#define PCI_VENDOR_ID_PANACOM 0x14d4
-#define PCI_DEVICE_ID_PANACOM_QUADMODEM 0x0400
-#define PCI_DEVICE_ID_PANACOM_DUALMODEM 0x0402
-
-#define PCI_VENDOR_ID_AFAVLAB 0x14db
-#define PCI_DEVICE_ID_AFAVLAB_P028 0x2180
-
-#define PCI_VENDOR_ID_BROADCOM 0x14e4
-#define PCI_DEVICE_ID_TIGON3_5700 0x1644
-#define PCI_DEVICE_ID_TIGON3_5701 0x1645
-#define PCI_DEVICE_ID_TIGON3_5702 0x1646
-#define PCI_DEVICE_ID_TIGON3_5703 0x1647
-#define PCI_DEVICE_ID_TIGON3_5704 0x1648
-#define PCI_DEVICE_ID_TIGON3_5704S_2 0x1649
-#define PCI_DEVICE_ID_TIGON3_5702FE 0x164d
-#define PCI_DEVICE_ID_TIGON3_5705 0x1653
-#define PCI_DEVICE_ID_TIGON3_5705_2 0x1654
-#define PCI_DEVICE_ID_TIGON3_5705M 0x165d
-#define PCI_DEVICE_ID_TIGON3_5705M_2 0x165e
-#define PCI_DEVICE_ID_TIGON3_5705F 0x166e
-#define PCI_DEVICE_ID_TIGON3_5782 0x1696
-#define PCI_DEVICE_ID_TIGON3_5788 0x169c
-#define PCI_DEVICE_ID_TIGON3_5702X 0x16a6
-#define PCI_DEVICE_ID_TIGON3_5703X 0x16a7
-#define PCI_DEVICE_ID_TIGON3_5704S 0x16a8
-#define PCI_DEVICE_ID_TIGON3_5702A3 0x16c6
-#define PCI_DEVICE_ID_TIGON3_5703A3 0x16c7
-#define PCI_DEVICE_ID_TIGON3_5901 0x170d
-#define PCI_DEVICE_ID_TIGON3_5901_2 0x170e
-#define PCI_DEVICE_ID_BCM4401 0x4401
-
-#define PCI_VENDOR_ID_ENE 0x1524
-#define PCI_DEVICE_ID_ENE_1211 0x1211
-#define PCI_DEVICE_ID_ENE_1225 0x1225
-#define PCI_DEVICE_ID_ENE_1410 0x1410
-#define PCI_DEVICE_ID_ENE_1420 0x1420
-
-#define PCI_VENDOR_ID_SYBA 0x1592
-#define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782
-#define PCI_DEVICE_ID_SYBA_1P_ECP 0x0783
-
-#define PCI_VENDOR_ID_MORETON 0x15aa
-#define PCI_DEVICE_ID_RASTEL_2PORT 0x2000
-
-#define PCI_VENDOR_ID_ZOLTRIX 0x15b0
-#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0
-
-#define PCI_VENDOR_ID_PDC 0x15e9
-#define PCI_DEVICE_ID_PDC_ADMA100 0x1841
-
-#define PCI_VENDOR_ID_ALTIMA 0x173b
-#define PCI_DEVICE_ID_ALTIMA_AC1000 0x03e8
-#define PCI_DEVICE_ID_ALTIMA_AC1001 0x03e9
-#define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea
-#define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb
-
-#define PCI_VENDOR_ID_SYMPHONY 0x1c1c
-#define PCI_DEVICE_ID_SYMPHONY_101 0x0001
-
-#define PCI_VENDOR_ID_TEKRAM 0x1de1
-#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
-
-#define PCI_VENDOR_ID_HINT 0x3388
-#define PCI_DEVICE_ID_HINT_VXPROII_IDE 0x8013
-
-#define PCI_VENDOR_ID_3DLABS 0x3d3d
-#define PCI_DEVICE_ID_3DLABS_300SX 0x0001
-#define PCI_DEVICE_ID_3DLABS_500TX 0x0002
-#define PCI_DEVICE_ID_3DLABS_DELTA 0x0003
-#define PCI_DEVICE_ID_3DLABS_PERMEDIA 0x0004
-#define PCI_DEVICE_ID_3DLABS_MX 0x0006
-#define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007
-#define PCI_DEVICE_ID_3DLABS_GAMMA 0x0008
-#define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009
-
-#define PCI_VENDOR_ID_AVANCE 0x4005
-#define PCI_DEVICE_ID_AVANCE_ALG2064 0x2064
-#define PCI_DEVICE_ID_AVANCE_2302 0x2302
-
-#define PCI_VENDOR_ID_AKS 0x416c
-#define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100
-#define PCI_DEVICE_ID_AKS_CPC 0x0200
-
-#define PCI_VENDOR_ID_REDCREEK 0x4916
-#define PCI_DEVICE_ID_RC45 0x1960
-
-#define PCI_VENDOR_ID_NETVIN 0x4a14
-#define PCI_DEVICE_ID_NETVIN_NV5000SC 0x5000
-
-#define PCI_VENDOR_ID_S3 0x5333
-#define PCI_DEVICE_ID_S3_PLATO_PXS 0x0551
-#define PCI_DEVICE_ID_S3_ViRGE 0x5631
-#define PCI_DEVICE_ID_S3_TRIO 0x8811
-#define PCI_DEVICE_ID_S3_AURORA64VP 0x8812
-#define PCI_DEVICE_ID_S3_TRIO64UVP 0x8814
-#define PCI_DEVICE_ID_S3_ViRGE_VX 0x883d
-#define PCI_DEVICE_ID_S3_868 0x8880
-#define PCI_DEVICE_ID_S3_928 0x88b0
-#define PCI_DEVICE_ID_S3_864_1 0x88c0
-#define PCI_DEVICE_ID_S3_864_2 0x88c1
-#define PCI_DEVICE_ID_S3_964_1 0x88d0
-#define PCI_DEVICE_ID_S3_964_2 0x88d1
-#define PCI_DEVICE_ID_S3_968 0x88f0
-#define PCI_DEVICE_ID_S3_TRIO64V2 0x8901
-#define PCI_DEVICE_ID_S3_PLATO_PXG 0x8902
-#define PCI_DEVICE_ID_S3_ViRGE_DXGX 0x8a01
-#define PCI_DEVICE_ID_S3_ViRGE_GX2 0x8a10
-#define PCI_DEVICE_ID_S3_ViRGE_MX 0x8c01
-#define PCI_DEVICE_ID_S3_ViRGE_MXP 0x8c02
-#define PCI_DEVICE_ID_S3_ViRGE_MXPMV 0x8c03
-#define PCI_DEVICE_ID_S3_SONICVIBES 0xca00
-
-#define PCI_VENDOR_ID_DUNORD 0x5544
-#define PCI_DEVICE_ID_DUNORD_I3000 0x0001
-#define PCI_VENDOR_ID_GENROCO 0x5555
-#define PCI_DEVICE_ID_GENROCO_HFP832 0x0003
-
-#define PCI_VENDOR_ID_DCI 0x6666
-#define PCI_DEVICE_ID_DCI_PCCOM4 0x0001
-#define PCI_DEVICE_ID_DCI_PCCOM8 0x0002
-
-#define PCI_VENDOR_ID_INTEL 0x8086
-#define PCI_DEVICE_ID_INTEL_21145 0x0039
-#define PCI_DEVICE_ID_INTEL_82375 0x0482
-#define PCI_DEVICE_ID_INTEL_82424 0x0483
-#define PCI_DEVICE_ID_INTEL_82378 0x0484
-#define PCI_DEVICE_ID_INTEL_82430 0x0486
-#define PCI_DEVICE_ID_INTEL_82434 0x04a3
-#define PCI_DEVICE_ID_INTEL_I960 0x0960
-#define PCI_DEVICE_ID_INTEL_I960RM 0x0962
-#define PCI_DEVICE_ID_INTEL_82562ET 0x1031
-
-#define PCI_DEVICE_ID_INTEL_82815_MC 0x1130
-
-#define PCI_DEVICE_ID_INTEL_82559ER 0x1209
-#define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221
-#define PCI_DEVICE_ID_INTEL_82092AA_1 0x1222
-#define PCI_DEVICE_ID_INTEL_7116 0x1223
-#define PCI_DEVICE_ID_INTEL_7205_0 0x255d
-#define PCI_DEVICE_ID_INTEL_82596 0x1226
-#define PCI_DEVICE_ID_INTEL_82865 0x1227
-#define PCI_DEVICE_ID_INTEL_82557 0x1229
-#define PCI_DEVICE_ID_INTEL_82437 0x122d
-#define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e
-#define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230
-#define PCI_DEVICE_ID_INTEL_82371MX 0x1234
-#define PCI_DEVICE_ID_INTEL_82437MX 0x1235
-#define PCI_DEVICE_ID_INTEL_82441 0x1237
-#define PCI_DEVICE_ID_INTEL_82380FB 0x124b
-#define PCI_DEVICE_ID_INTEL_82439 0x1250
-#define PCI_DEVICE_ID_INTEL_80960_RP 0x1960
-#define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30
-#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000
-#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010
-#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020
-#define PCI_DEVICE_ID_INTEL_82437VX 0x7030
-#define PCI_DEVICE_ID_INTEL_82439TX 0x7100
-#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110
-#define PCI_DEVICE_ID_INTEL_82371AB 0x7111
-#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112
-#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113
-#define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410
-#define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411
-#define PCI_DEVICE_ID_INTEL_82801AA_2 0x2412
-#define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413
-#define PCI_DEVICE_ID_INTEL_82801AA_5 0x2415
-#define PCI_DEVICE_ID_INTEL_82801AA_6 0x2416
-#define PCI_DEVICE_ID_INTEL_82801AA_8 0x2418
-#define PCI_DEVICE_ID_INTEL_82801AB_0 0x2420
-#define PCI_DEVICE_ID_INTEL_82801AB_1 0x2421
-#define PCI_DEVICE_ID_INTEL_82801AB_2 0x2422
-#define PCI_DEVICE_ID_INTEL_82801AB_3 0x2423
-#define PCI_DEVICE_ID_INTEL_82801AB_5 0x2425
-#define PCI_DEVICE_ID_INTEL_82801AB_6 0x2426
-#define PCI_DEVICE_ID_INTEL_82801AB_8 0x2428
-#define PCI_DEVICE_ID_INTEL_82801BA_0 0x2440
-#define PCI_DEVICE_ID_INTEL_82801BA_1 0x2442
-#define PCI_DEVICE_ID_INTEL_82801BA_2 0x2443
-#define PCI_DEVICE_ID_INTEL_82801BA_3 0x2444
-#define PCI_DEVICE_ID_INTEL_82801BA_4 0x2445
-#define PCI_DEVICE_ID_INTEL_82801BA_5 0x2446
-#define PCI_DEVICE_ID_INTEL_82801BA_6 0x2448
-#define PCI_DEVICE_ID_INTEL_82801BA_7 0x2449
-#define PCI_DEVICE_ID_INTEL_82801BA_8 0x244a
-#define PCI_DEVICE_ID_INTEL_82801BA_9 0x244b
-#define PCI_DEVICE_ID_INTEL_82801BA_10 0x244c
-#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e
-#define PCI_DEVICE_ID_INTEL_82801E_0 0x2450
-#define PCI_DEVICE_ID_INTEL_82801E_2 0x2452
-#define PCI_DEVICE_ID_INTEL_82801E_3 0x2453
-#define PCI_DEVICE_ID_INTEL_82801E_9 0x2459
-#define PCI_DEVICE_ID_INTEL_82801E_11 0x245B
-#define PCI_DEVICE_ID_INTEL_82801E_14 0x245D
-#define PCI_DEVICE_ID_INTEL_82801E_15 0x245E
-#define PCI_DEVICE_ID_INTEL_82801CA_0 0x2480
-#define PCI_DEVICE_ID_INTEL_82801CA_2 0x2482
-#define PCI_DEVICE_ID_INTEL_82801CA_3 0x2483
-#define PCI_DEVICE_ID_INTEL_82801CA_4 0x2484
-#define PCI_DEVICE_ID_INTEL_82801CA_5 0x2485
-#define PCI_DEVICE_ID_INTEL_82801CA_6 0x2486
-#define PCI_DEVICE_ID_INTEL_82801CA_7 0x2487
-#define PCI_DEVICE_ID_INTEL_82801CA_10 0x248a
-#define PCI_DEVICE_ID_INTEL_82801CA_11 0x248b
-#define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c
-#define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0
-#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2
-#define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3
-#define PCI_DEVICE_ID_INTEL_82801DB_4 0x24c4
-#define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5
-#define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6
-#define PCI_DEVICE_ID_INTEL_82801DB_7 0x24c7
-#define PCI_DEVICE_ID_INTEL_82801DB_10 0x24ca
-#define PCI_DEVICE_ID_INTEL_82801DB_11 0x24cb
-#define PCI_DEVICE_ID_INTEL_82801DB_12 0x24cc
-#define PCI_DEVICE_ID_INTEL_82801DB_13 0x24cd
-#define PCI_DEVICE_ID_INTEL_82801EB_0 0x24d0
-#define PCI_DEVICE_ID_INTEL_82801EB_1 0x24d1
-#define PCI_DEVICE_ID_INTEL_82801EB_2 0x24d2
-#define PCI_DEVICE_ID_INTEL_82801EB_3 0x24d3
-#define PCI_DEVICE_ID_INTEL_82801EB_4 0x24d4
-#define PCI_DEVICE_ID_INTEL_82801EB_5 0x24d5
-#define PCI_DEVICE_ID_INTEL_82801EB_6 0x24d6
-#define PCI_DEVICE_ID_INTEL_82801EB_7 0x24d7
-#define PCI_DEVICE_ID_INTEL_82801DB_10 0x24ca
-#define PCI_DEVICE_ID_INTEL_82801EB_11 0x24db
-#define PCI_DEVICE_ID_INTEL_82801EB_13 0x24dd
-#define PCI_DEVICE_ID_INTEL_ESB_0 0x25a0
-#define PCI_DEVICE_ID_INTEL_ESB_1 0x25a1
-#define PCI_DEVICE_ID_INTEL_ESB_2 0x25a2
-#define PCI_DEVICE_ID_INTEL_ESB_3 0x25a3
-#define PCI_DEVICE_ID_INTEL_ESB_31 0x25b0
-#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4
-#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6
-#define PCI_DEVICE_ID_INTEL_ESB_6 0x25a7
-#define PCI_DEVICE_ID_INTEL_ESB_7 0x25a9
-#define PCI_DEVICE_ID_INTEL_ESB_8 0x25aa
-#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
-#define PCI_DEVICE_ID_INTEL_ESB_11 0x25ac
-#define PCI_DEVICE_ID_INTEL_ESB_12 0x25ad
-#define PCI_DEVICE_ID_INTEL_ESB_13 0x25ae
-#define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640
-#define PCI_DEVICE_ID_INTEL_ICH6_1 0x2641
-#define PCI_DEVICE_ID_INTEL_ICH6_2 0x266f
-#define PCI_DEVICE_ID_INTEL_ICH6_3 0x266e
-#define PCI_DEVICE_ID_INTEL_82850_HB 0x2530
-#define PCI_DEVICE_ID_INTEL_82845G_HB 0x2560
-#define PCI_DEVICE_ID_INTEL_80310 0x530d
-#define PCI_DEVICE_ID_INTEL_82810_MC1 0x7120
-#define PCI_DEVICE_ID_INTEL_82810_IG1 0x7121
-#define PCI_DEVICE_ID_INTEL_82810_MC3 0x7122
-#define PCI_DEVICE_ID_INTEL_82810_IG3 0x7123
-#define PCI_DEVICE_ID_INTEL_82443LX_0 0x7180
-#define PCI_DEVICE_ID_INTEL_82443LX_1 0x7181
-#define PCI_DEVICE_ID_INTEL_82443BX_0 0x7190
-#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191
-#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192
-#define PCI_DEVICE_ID_INTEL_82443MX_0 0x7198
-#define PCI_DEVICE_ID_INTEL_82443MX_1 0x7199
-#define PCI_DEVICE_ID_INTEL_82443MX_2 0x719a
-#define PCI_DEVICE_ID_INTEL_82443MX_3 0x719b
-#define PCI_DEVICE_ID_INTEL_82443GX_0 0x71a0
-#define PCI_DEVICE_ID_INTEL_82443GX_1 0x71a1
-#define PCI_DEVICE_ID_INTEL_82443GX_2 0x71a2
-#define PCI_DEVICE_ID_INTEL_82372FB_0 0x7600
-#define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601
-#define PCI_DEVICE_ID_INTEL_82372FB_2 0x7602
-#define PCI_DEVICE_ID_INTEL_82372FB_3 0x7603
-#define PCI_DEVICE_ID_INTEL_82454GX 0x84c4
-#define PCI_DEVICE_ID_INTEL_82450GX 0x84c5
-#define PCI_DEVICE_ID_INTEL_82451NX 0x84ca
-#define PCI_DEVICE_ID_INTEL_82454NX 0x84cb
-
-#define PCI_VENDOR_ID_COMPUTONE 0x8e0e
-#define PCI_DEVICE_ID_COMPUTONE_IP2EX 0x0291
-#define PCI_DEVICE_ID_COMPUTONE_PG 0x0302
-#define PCI_SUBVENDOR_ID_COMPUTONE 0x8e0e
-#define PCI_SUBDEVICE_ID_COMPUTONE_PG4 0x0001
-#define PCI_SUBDEVICE_ID_COMPUTONE_PG8 0x0002
-#define PCI_SUBDEVICE_ID_COMPUTONE_PG6 0x0003
-
-#define PCI_VENDOR_ID_KTI 0x8e2e
-#define PCI_DEVICE_ID_KTI_ET32P2 0x3000
-
-#define PCI_VENDOR_ID_ADAPTEC 0x9004
-#define PCI_DEVICE_ID_ADAPTEC_7810 0x1078
-#define PCI_DEVICE_ID_ADAPTEC_7821 0x2178
-#define PCI_DEVICE_ID_ADAPTEC_38602 0x3860
-#define PCI_DEVICE_ID_ADAPTEC_7850 0x5078
-#define PCI_DEVICE_ID_ADAPTEC_7855 0x5578
-#define PCI_DEVICE_ID_ADAPTEC_5800 0x5800
-#define PCI_DEVICE_ID_ADAPTEC_3860 0x6038
-#define PCI_DEVICE_ID_ADAPTEC_1480A 0x6075
-#define PCI_DEVICE_ID_ADAPTEC_7860 0x6078
-#define PCI_DEVICE_ID_ADAPTEC_7861 0x6178
-#define PCI_DEVICE_ID_ADAPTEC_7870 0x7078
-#define PCI_DEVICE_ID_ADAPTEC_7871 0x7178
-#define PCI_DEVICE_ID_ADAPTEC_7872 0x7278
-#define PCI_DEVICE_ID_ADAPTEC_7873 0x7378
-#define PCI_DEVICE_ID_ADAPTEC_7874 0x7478
-#define PCI_DEVICE_ID_ADAPTEC_7895 0x7895
-#define PCI_DEVICE_ID_ADAPTEC_7880 0x8078
-#define PCI_DEVICE_ID_ADAPTEC_7881 0x8178
-#define PCI_DEVICE_ID_ADAPTEC_7882 0x8278
-#define PCI_DEVICE_ID_ADAPTEC_7883 0x8378
-#define PCI_DEVICE_ID_ADAPTEC_7884 0x8478
-#define PCI_DEVICE_ID_ADAPTEC_7885 0x8578
-#define PCI_DEVICE_ID_ADAPTEC_7886 0x8678
-#define PCI_DEVICE_ID_ADAPTEC_7887 0x8778
-#define PCI_DEVICE_ID_ADAPTEC_7888 0x8878
-#define PCI_DEVICE_ID_ADAPTEC_1030 0x8b78
-
-#define PCI_VENDOR_ID_ADAPTEC2 0x9005
-#define PCI_DEVICE_ID_ADAPTEC2_2940U2 0x0010
-#define PCI_DEVICE_ID_ADAPTEC2_2930U2 0x0011
-#define PCI_DEVICE_ID_ADAPTEC2_7890B 0x0013
-#define PCI_DEVICE_ID_ADAPTEC2_7890 0x001f
-#define PCI_DEVICE_ID_ADAPTEC2_3940U2 0x0050
-#define PCI_DEVICE_ID_ADAPTEC2_3950U2D 0x0051
-#define PCI_DEVICE_ID_ADAPTEC2_7896 0x005f
-#define PCI_DEVICE_ID_ADAPTEC2_7892A 0x0080
-#define PCI_DEVICE_ID_ADAPTEC2_7892B 0x0081
-#define PCI_DEVICE_ID_ADAPTEC2_7892D 0x0083
-#define PCI_DEVICE_ID_ADAPTEC2_7892P 0x008f
-#define PCI_DEVICE_ID_ADAPTEC2_7899A 0x00c0
-#define PCI_DEVICE_ID_ADAPTEC2_7899B 0x00c1
-#define PCI_DEVICE_ID_ADAPTEC2_7899D 0x00c3
-#define PCI_DEVICE_ID_ADAPTEC2_7899P 0x00cf
-
-#define PCI_VENDOR_ID_ATRONICS 0x907f
-#define PCI_DEVICE_ID_ATRONICS_2015 0x2015
-
-#define PCI_VENDOR_ID_HOLTEK 0x9412
-#define PCI_DEVICE_ID_HOLTEK_6565 0x6565
-
-#define PCI_VENDOR_ID_NETMOS 0x9710
-#define PCI_DEVICE_ID_NETMOS_9735 0x9735
-#define PCI_DEVICE_ID_NETMOS_9835 0x9835
-
-#define PCI_SUBVENDOR_ID_EXSYS 0xd84d
-#define PCI_SUBDEVICE_ID_EXSYS_4014 0x4014
-
-#define PCI_VENDOR_ID_TIGERJET 0xe159
-#define PCI_DEVICE_ID_TIGERJET_300 0x0001
-#define PCI_DEVICE_ID_TIGERJET_100 0x0002
-
-#define PCI_VENDOR_ID_ARK 0xedd8
-#define PCI_DEVICE_ID_ARK_STING 0xa091
-#define PCI_DEVICE_ID_ARK_STINGARK 0xa099
-#define PCI_DEVICE_ID_ARK_2000MT 0xa0a1
-
-#define PCI_VENDOR_ID_MICROGATE 0x13c0
-#define PCI_DEVICE_ID_MICROGATE_USC 0x0010
-#define PCI_DEVICE_ID_MICROGATE_SCC 0x0020
-#define PCI_DEVICE_ID_MICROGATE_SCA 0x0030
diff --git a/xen/include/xen/perfc.h b/xen/include/xen/perfc.h
index ccf8d2ef9f..48c7e90fb4 100644
--- a/xen/include/xen/perfc.h
+++ b/xen/include/xen/perfc.h
@@ -46,27 +46,64 @@
#define PERFSTATUS_ARRAY( var, name, size ) \
atomic_t var[size];
-struct perfcounter_t
-{
+struct perfcounter {
#include <xen/perfc_defn.h>
};
-extern struct perfcounter_t perfcounters;
+extern struct perfcounter perfcounters;
#define perfc_value(x) atomic_read(&perfcounters.x[0])
#define perfc_valuec(x) atomic_read(&perfcounters.x[smp_processor_id()])
-#define perfc_valuea(x,y) atomic_read(&perfcounters.x[y])
+#define perfc_valuea(x,y) \
+ do { \
+ if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
+ atomic_read(&perfcounters.x[y]); \
+ } while ( 0 )
#define perfc_set(x,v) atomic_set(&perfcounters.x[0], v)
#define perfc_setc(x,v) atomic_set(&perfcounters.x[smp_processor_id()], v)
-#define perfc_seta(x,y,v) atomic_set(&perfcounters.x[y], v)
+#define perfc_seta(x,y,v) \
+ do { \
+ if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
+ atomic_set(&perfcounters.x[y], v); \
+ } while ( 0 )
#define perfc_incr(x) atomic_inc(&perfcounters.x[0])
#define perfc_decr(x) atomic_dec(&perfcounters.x[0])
#define perfc_incrc(x) atomic_inc(&perfcounters.x[smp_processor_id()])
-#define perfc_incra(x,y) atomic_inc(&perfcounters.x[y])
+#define perfc_decrc(x) atomic_dec(&perfcounters.x[smp_processor_id()])
+#define perfc_incra(x,y) \
+ do { \
+ if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
+ atomic_inc(&perfcounters.x[y]); \
+ } while ( 0 )
#define perfc_add(x,y) atomic_add((y), &perfcounters.x[0])
#define perfc_addc(x,y) atomic_add((y), &perfcounters.x[smp_processor_id()])
-#define perfc_adda(x,y,z) atomic_add((z), &perfcounters.x[y])
+#define perfc_adda(x,y,z) \
+ do { \
+ if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
+ atomic_add((z), &perfcounters.x[y]); \
+ } while ( 0 )
+/*
+ * Histogram: special treatment for 0 and 1 count. After that equally spaced
+ * with last bucket taking the rest.
+ */
+#ifdef PERFC_ARRAYS
+#define perfc_incr_histo(_x,_v,_n) \
+ do { \
+ if ( (_v) == 0 ) \
+ perfc_incra(_x, 0); \
+ else if ( (_v) == 1 ) \
+ perfc_incra(_x, 1); \
+ else if ( (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) < \
+ (PERFC_MAX_ ## _n - 3) ) \
+ perfc_incra(_x, (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) + 2); \
+ else \
+ perfc_incra(_x, PERFC_MAX_ ## _n - 1); \
+ } while ( 0 )
+#else
+#define perfc_incr_histo(_x,_v,_n) ((void)0)
+#endif
+
#else /* PERF_COUNTERS */
#define perfc_value(x) (0)
@@ -78,10 +115,13 @@ extern struct perfcounter_t perfcounters;
#define perfc_incr(x) ((void)0)
#define perfc_decr(x) ((void)0)
#define perfc_incrc(x) ((void)0)
+#define perfc_decrc(x) ((void)0)
#define perfc_incra(x,y) ((void)0)
+#define perfc_decra(x,y) ((void)0)
#define perfc_add(x,y) ((void)0)
#define perfc_addc(x,y) ((void)0)
#define perfc_adda(x,y,z) ((void)0)
+#define perfc_incr_histo(x,y,z) ((void)0)
#endif /* PERF_COUNTERS */
diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h
index d5dc9436ff..abc17e0b68 100644
--- a/xen/include/xen/perfc_defn.h
+++ b/xen/include/xen/perfc_defn.h
@@ -1,33 +1,127 @@
+#ifndef __XEN_PERFC_DEFN_H__
+#define __XEN_PERFC_DEFN_H__
-PERFCOUNTER_CPU (seg_fixups, "segmentation fixups" )
+#define PERFC_MAX_PT_UPDATES 64
+#define PERFC_PT_UPDATES_BUCKET_SIZE 3
+PERFCOUNTER_ARRAY(wpt_updates, "writable pt updates",
+ PERFC_MAX_PT_UPDATES)
+PERFCOUNTER_ARRAY(bpt_updates, "batched pt updates",
+ PERFC_MAX_PT_UPDATES)
+PERFCOUNTER_ARRAY(l1_entries_checked, "l1 entries checked",
+ PERFC_MAX_PT_UPDATES)
+PERFCOUNTER_ARRAY(shm_l2_updates, "shadow mode L2 pt updates",
+ PERFC_MAX_PT_UPDATES)
+PERFCOUNTER_ARRAY(shm_hl2_updates, "shadow mode HL2 pt updates",
+ PERFC_MAX_PT_UPDATES)
+PERFCOUNTER_ARRAY(snapshot_copies, "entries copied per snapshot",
+ PERFC_MAX_PT_UPDATES)
-PERFCOUNTER_CPU( irqs, "#interrupts" )
-PERFCOUNTER_CPU( ipis, "#IPIs" )
-PERFCOUNTER_CPU( irq_time, "cycles spent in irq handler" )
+PERFCOUNTER_ARRAY(hypercalls, "hypercalls", NR_hypercalls)
+PERFCOUNTER_ARRAY(exceptions, "exceptions", 32)
-PERFCOUNTER_CPU( apic_timer, "apic timer interrupts" )
-PERFCOUNTER_CPU( ac_timer_max, "ac_timer max error (ns)" )
-PERFCOUNTER_CPU( sched_irq, "sched: timer" )
-PERFCOUNTER_CPU( sched_run, "sched: runs through scheduler" )
-PERFCOUNTER_CPU( sched_ctx, "sched: context switches" )
+#define VMX_PERF_EXIT_REASON_SIZE 37
+#define VMX_PERF_VECTOR_SIZE 0x20
+PERFCOUNTER_ARRAY(vmexits, "vmexits", VMX_PERF_EXIT_REASON_SIZE)
+PERFCOUNTER_ARRAY(cause_vector, "cause vector", VMX_PERF_VECTOR_SIZE)
-PERFCOUNTER_CPU( domain_page_tlb_flush, "domain page tlb flushes" )
-PERFCOUNTER_CPU( need_flush_tlb_flush, "PG_need_flush tlb flushes" )
+PERFCOUNTER_CPU(seg_fixups, "segmentation fixups")
-PERFCOUNTER_CPU( calls_to_mmu_update, "calls_to_mmu_update" )
-PERFCOUNTER_CPU( num_page_updates, "num_page_updates" )
-PERFCOUNTER_CPU( calls_to_update_va, "calls_to_update_va_map" )
-PERFCOUNTER_CPU( page_faults, "page faults" )
-PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" )
-PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" )
+PERFCOUNTER_CPU(irqs, "#interrupts")
+PERFCOUNTER_CPU(ipis, "#IPIs")
+PERFCOUNTER_CPU(irq_time, "cycles spent in irq handler")
-PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" )
-PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" )
-PERFCOUNTER_CPU( unshadow_table_count, "unshadow_table count" )
-PERFCOUNTER_CPU( shadow_fixup_count, "shadow_fixup count" )
-PERFCOUNTER_CPU( shadow_update_va_fail1, "shadow_update_va_fail1" )
-PERFCOUNTER_CPU( shadow_update_va_fail2, "shadow_update_va_fail2" )
+PERFCOUNTER_CPU(apic_timer, "apic timer interrupts")
+PERFCOUNTER_CPU(ac_timer_max, "ac_timer max error (ns)")
+PERFCOUNTER_CPU(sched_irq, "sched: timer")
+PERFCOUNTER_CPU(sched_run, "sched: runs through scheduler")
+PERFCOUNTER_CPU(sched_ctx, "sched: context switches")
+
+PERFCOUNTER_CPU(domain_page_tlb_flush, "domain page tlb flushes")
+PERFCOUNTER_CPU(need_flush_tlb_flush, "PG_need_flush tlb flushes")
+
+PERFCOUNTER_CPU(calls_to_mmu_update, "calls_to_mmu_update")
+PERFCOUNTER_CPU(num_page_updates, "num_page_updates")
+PERFCOUNTER_CPU(calls_to_update_va, "calls_to_update_va_map")
+PERFCOUNTER_CPU(page_faults, "page faults")
+PERFCOUNTER_CPU(copy_user_faults, "copy_user faults")
+
+PERFCOUNTER_CPU(shadow_fault_calls, "calls to shadow_fault")
+PERFCOUNTER_CPU(shadow_fault_bail_pde_not_present,
+ "sf bailed due to pde not present")
+PERFCOUNTER_CPU(shadow_fault_bail_pte_not_present,
+ "sf bailed due to pte not present")
+PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping,
+ "sf bailed due to a ro mapping")
+PERFCOUNTER_CPU(shadow_fault_fixed, "sf fixed the pgfault")
+PERFCOUNTER_CPU(write_fault_bail, "sf bailed due to write_fault")
+PERFCOUNTER_CPU(read_fault_bail, "sf bailed due to read_fault")
+
+PERFCOUNTER_CPU(map_domain_page_count, "map_domain_page count")
+PERFCOUNTER_CPU(ptwr_emulations, "writable pt emulations")
+
+PERFCOUNTER_CPU(shadow_l2_table_count, "shadow_l2_table count")
+PERFCOUNTER_CPU(shadow_l1_table_count, "shadow_l1_table count")
+PERFCOUNTER_CPU(unshadow_table_count, "unshadow_table count")
+PERFCOUNTER_CPU(shadow_fixup_count, "shadow_fixup count")
+PERFCOUNTER_CPU(shadow_update_va_fail1, "shadow_update_va_fail1")
+PERFCOUNTER_CPU(shadow_update_va_fail2, "shadow_update_va_fail2")
/* STATUS counters do not reset when 'P' is hit */
-PERFSTATUS( shadow_l2_pages, "current # shadow L2 pages" )
-PERFSTATUS( shadow_l1_pages, "current # shadow L1 pages" )
+PERFSTATUS(shadow_l2_pages, "current # shadow L2 pages")
+PERFSTATUS(shadow_l1_pages, "current # shadow L1 pages")
+PERFSTATUS(hl2_table_pages, "current # hl2 pages")
+PERFSTATUS(snapshot_pages, "current # fshadow snapshot pages")
+PERFSTATUS(writable_pte_predictions, "# writable pte predictions")
+PERFSTATUS(free_l1_pages, "current # free shadow L1 pages")
+
+PERFCOUNTER_CPU(check_pagetable, "calls to check_pagetable")
+PERFCOUNTER_CPU(check_all_pagetables, "calls to check_all_pagetables")
+
+PERFCOUNTER_CPU(shadow_hl2_table_count, "shadow_hl2_table count")
+PERFCOUNTER_CPU(shadow_set_l1e_force_map, "shadow_set_l1e forced to map l1")
+PERFCOUNTER_CPU(shadow_set_l1e_unlinked, "shadow_set_l1e found unlinked l1")
+PERFCOUNTER_CPU(shadow_set_l1e_fail, "shadow_set_l1e failed (no sl1)")
+PERFCOUNTER_CPU(shadow_invlpg_faults, "shadow_invlpg's get_user faulted")
+PERFCOUNTER_CPU(unshadow_l2_count, "unpinned L2 count")
+
+PERFCOUNTER_CPU(shadow_status_shortcut, "fastpath miss on shadow cache")
+PERFCOUNTER_CPU(shadow_status_calls, "calls to ___shadow_status")
+PERFCOUNTER_CPU(shadow_status_miss, "missed shadow cache")
+PERFCOUNTER_CPU(shadow_status_hit_head, "hits on head of bucket")
+PERFCOUNTER_CPU(shadow_max_type, "calls to shadow_max_type")
+
+PERFCOUNTER_CPU(shadow_sync_all, "calls to shadow_sync_all")
+PERFCOUNTER_CPU(shadow_sync_va, "calls to shadow_sync_va")
+PERFCOUNTER_CPU(resync_l1, "resync L1 page")
+PERFCOUNTER_CPU(resync_l2, "resync L2 page")
+PERFCOUNTER_CPU(resync_hl2, "resync HL2 page")
+PERFCOUNTER_CPU(shadow_make_snapshot, "snapshots created")
+PERFCOUNTER_CPU(shadow_mark_mfn_out_of_sync_calls,
+ "calls to shadow_mk_out_of_sync")
+PERFCOUNTER_CPU(shadow_out_of_sync_calls, "calls to shadow_out_of_sync")
+PERFCOUNTER_CPU(snapshot_entry_matches_calls, "calls to ss_entry_matches")
+PERFCOUNTER_CPU(snapshot_entry_matches_true, "ss_entry_matches returns true")
+
+PERFCOUNTER_CPU(validate_pte_calls, "calls to validate_pte_change")
+PERFCOUNTER_CPU(validate_pte_changes1, "validate_pte makes changes1")
+PERFCOUNTER_CPU(validate_pte_changes2, "validate_pte makes changes2")
+PERFCOUNTER_CPU(validate_pte_changes3, "validate_pte makes changes3")
+PERFCOUNTER_CPU(validate_pte_changes4, "validate_pte makes changes4")
+PERFCOUNTER_CPU(validate_pde_calls, "calls to validate_pde_change")
+PERFCOUNTER_CPU(validate_pde_changes, "validate_pde makes changes")
+PERFCOUNTER_CPU(shadow_get_page_fail, "shadow_get_page_from_l1e fails")
+PERFCOUNTER_CPU(validate_hl2e_calls, "calls to validate_hl2e_change")
+PERFCOUNTER_CPU(validate_hl2e_changes, "validate_hl2e makes changes")
+PERFCOUNTER_CPU(exception_fixed, "pre-exception fixed")
+PERFCOUNTER_CPU(gpfn_to_mfn_foreign, "calls to gpfn_to_mfn_foreign")
+PERFCOUNTER_CPU(remove_all_access, "calls to remove_all_access")
+PERFCOUNTER_CPU(remove_write_access, "calls to remove_write_access")
+PERFCOUNTER_CPU(remove_write_access_easy, "easy outs of remove_write_access")
+PERFCOUNTER_CPU(remove_write_no_work, "no work in remove_write_access")
+PERFCOUNTER_CPU(remove_write_not_writable, "remove_write non-writable page")
+PERFCOUNTER_CPU(remove_write_fast_exit, "remove_write hit predicted entry")
+PERFCOUNTER_CPU(remove_write_predicted, "remove_write predict hit&exit")
+PERFCOUNTER_CPU(remove_write_bad_prediction, "remove_write bad prediction")
+PERFCOUNTER_CPU(update_hl2e_invlpg, "update_hl2e calls invlpg")
+
+#endif /* __XEN_PERFC_DEFN_H__ */
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 15f992614a..58c33e8b3c 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -1,59 +1,52 @@
-#include <asm/types.h>
-
-/*
+/******************************************************************************
* Additional declarations for the generic scheduler interface. This should
* only be included by files that implement conforming schedulers.
*
* Portions by Mark Williamson are (C) 2004 Intel Research Cambridge
*/
-#define BUCKETS 10
+#ifndef __XEN_SCHED_IF_H__
+#define __XEN_SCHED_IF_H__
+
+#define BUCKETS 10
+/*300*/
-typedef struct schedule_data_st
-{
- spinlock_t schedule_lock; /* spinlock protecting curr pointer
- TODO check this */
- struct domain *curr; /* current task */
- struct domain *idle; /* idle task for this cpu */
- void * sched_priv;
- struct ac_timer s_timer; /* scheduling timer */
+struct schedule_data {
+ spinlock_t schedule_lock; /* spinlock protecting curr */
+ struct vcpu *curr; /* current task */
+ struct vcpu *idle; /* idle task for this cpu */
+ void *sched_priv;
+ struct ac_timer s_timer; /* scheduling timer */
+ unsigned long tick; /* current periodic 'tick' */
#ifdef BUCKETS
u32 hist[BUCKETS]; /* for scheduler latency histogram */
#endif
-} __cacheline_aligned schedule_data_t;
-
+} __cacheline_aligned;
-typedef struct task_slice_st
-{
- struct domain *task;
+struct task_slice {
+ struct vcpu *task;
s_time_t time;
-} task_slice_t;
+};
-struct scheduler
-{
+struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
- int (*init_scheduler) ();
- int (*init_idle_task) (struct domain *);
- int (*alloc_task) (struct domain *);
- void (*add_task) (struct domain *);
+ int (*alloc_task) (struct vcpu *);
+ void (*add_task) (struct vcpu *);
void (*free_task) (struct domain *);
- void (*rem_task) (struct domain *);
- void (*sleep) (struct domain *);
- void (*wake) (struct domain *);
- void (*do_block) (struct domain *);
- task_slice_t (*do_schedule) (s_time_t);
+ void (*rem_task) (struct vcpu *);
+ void (*sleep) (struct vcpu *);
+ void (*wake) (struct vcpu *);
+ struct task_slice (*do_schedule) (s_time_t);
int (*control) (struct sched_ctl_cmd *);
int (*adjdom) (struct domain *,
struct sched_adjdom_cmd *);
void (*dump_settings) (void);
void (*dump_cpu_state) (int);
- int (*prn_state) (int);
};
-/* per CPU scheduler information */
-extern schedule_data_t schedule_data[];
-
+extern struct schedule_data schedule_data[];
+#endif /* __XEN_SCHED_IF_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 4b7e7080c8..35a3c36cab 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -1,96 +1,121 @@
+
#ifndef __SCHED_H__
#define __SCHED_H__
-#define STACK_SIZE (2*PAGE_SIZE)
-
#include <xen/config.h>
#include <xen/types.h>
#include <xen/spinlock.h>
-#include <xen/cache.h>
-#include <asm/regs.h>
#include <xen/smp.h>
-#include <asm/page.h>
-#include <asm/processor.h>
#include <public/xen.h>
#include <public/dom0_ops.h>
-#include <xen/list.h>
#include <xen/time.h>
#include <xen/ac_timer.h>
-#include <xen/delay.h>
-#include <asm/atomic.h>
-#include <asm/current.h>
-#include <xen/spinlock.h>
#include <xen/grant_table.h>
-#include <xen/irq_cpustat.h>
+#include <asm/domain.h>
extern unsigned long volatile jiffies;
extern rwlock_t domlist_lock;
-struct domain;
-
/* A global pointer to the initial domain (DOM0). */
extern struct domain *dom0;
-typedef struct event_channel_st
+#define MAX_EVTCHNS 1024
+#define EVTCHNS_PER_BUCKET 128
+#define NR_EVTCHN_BUCKETS (MAX_EVTCHNS / EVTCHNS_PER_BUCKET)
+
+struct evtchn
{
#define ECS_FREE 0 /* Channel is available for use. */
-#define ECS_UNBOUND 1 /* Channel is waiting to bind to a remote domain. */
-#define ECS_INTERDOMAIN 2 /* Channel is bound to another domain. */
-#define ECS_PIRQ 3 /* Channel is bound to a physical IRQ line. */
-#define ECS_VIRQ 4 /* Channel is bound to a virtual IRQ line. */
- u16 state;
+#define ECS_RESERVED 1 /* Channel is reserved. */
+#define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
+#define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
+#define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
+#define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
+#define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
+ u16 state; /* ECS_* */
+ u16 notify_vcpu_id; /* VCPU for local delivery notification */
union {
struct {
domid_t remote_domid;
- } __attribute__ ((packed)) unbound; /* state == ECS_UNBOUND */
+ } unbound; /* state == ECS_UNBOUND */
struct {
u16 remote_port;
struct domain *remote_dom;
- } __attribute__ ((packed)) interdomain; /* state == ECS_INTERDOMAIN */
- u16 pirq; /* state == ECS_PIRQ */
- u16 virq; /* state == ECS_VIRQ */
+ } interdomain; /* state == ECS_INTERDOMAIN */
+ u16 pirq; /* state == ECS_PIRQ */
+ u16 virq; /* state == ECS_VIRQ */
} u;
-} event_channel_t;
+};
+
+int evtchn_init(struct domain *d);
+void evtchn_destroy(struct domain *d);
+
+#define CPUMAP_RUNANYWHERE 0xFFFFFFFF
+
+struct vcpu
+{
+ int vcpu_id;
+
+ int processor;
+
+ vcpu_info_t *vcpu_info;
+
+ struct domain *domain;
+ struct vcpu *next_in_list;
+
+ struct ac_timer timer; /* one-shot timer for timeout values */
+ unsigned long sleep_tick; /* tick at which this vcpu started sleep */
-int init_event_channels(struct domain *d);
-void destroy_event_channels(struct domain *d);
+ s_time_t lastschd; /* time this domain was last scheduled */
+ s_time_t lastdeschd; /* time this domain was last descheduled */
+ s_time_t cpu_time; /* total CPU time received till now */
+ s_time_t wokenup; /* time domain got woken up */
+ void *sched_priv; /* scheduler-specific data */
+
+ unsigned long vcpu_flags;
+
+ u16 virq_to_evtchn[NR_VIRQS];
+
+ atomic_t pausecnt;
-struct domain
+ cpumap_t cpumap; /* which cpus this domain can run on */
+
+ struct arch_vcpu arch;
+};
+
+/* Per-domain lock can be recursively acquired in fault handlers. */
+#define LOCK_BIGLOCK(_d) spin_lock_recursive(&(_d)->big_lock)
+#define UNLOCK_BIGLOCK(_d) spin_unlock_recursive(&(_d)->big_lock)
+
+struct domain
{
- u32 processor;
+ domid_t domain_id;
- shared_info_t *shared_info;
+ shared_info_t *shared_info; /* shared data area */
+ spinlock_t time_lock;
- domid_t id;
- s_time_t create_time;
+ spinlock_t big_lock;
spinlock_t page_alloc_lock; /* protects all the following fields */
struct list_head page_list; /* linked list, of size tot_pages */
struct list_head xenpage_list; /* linked list, of size xenheap_pages */
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
+ unsigned int next_io_page; /* next io pfn to give to domain */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
/* Scheduling. */
- int shutdown_code; /* code value from OS (if DF_SHUTDOWN). */
- s_time_t lastschd; /* time this domain was last scheduled */
- s_time_t lastdeschd; /* time this domain was last descheduled */
- s_time_t cpu_time; /* total CPU time received till now */
- s_time_t wokenup; /* time domain got woken up */
- struct ac_timer timer; /* one-shot timer for timeout values */
+ int shutdown_code; /* code value from OS (if DOMF_shutdown) */
void *sched_priv; /* scheduler-specific data */
- struct mm_struct mm;
-
- struct thread_struct thread;
- struct domain *next_list, *next_hash;
+ struct domain *next_in_list;
+ struct domain *next_in_hashbucket;
/* Event channel information. */
- event_channel_t *event_channel;
- unsigned int max_event_channel;
- spinlock_t event_channel_lock;
+ struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
+ spinlock_t evtchn_lock;
- grant_table_t *grant_table;
+ grant_table_t *grant_table;
/*
* Interrupt to event-channel mappings. Updates should be protected by the
@@ -98,46 +123,50 @@ struct domain
* the lock, but races don't usually matter.
*/
#define NR_PIRQS 128 /* Put this somewhere sane! */
- u16 pirq_to_evtchn[NR_PIRQS];
- u16 virq_to_evtchn[NR_VIRQS];
- u32 pirq_mask[NR_PIRQS/32];
+ u16 pirq_to_evtchn[NR_PIRQS];
+ u32 pirq_mask[NR_PIRQS/32];
+
+ unsigned long domain_flags;
+ unsigned long vm_assist;
- /* Last point at which timestamp info was propagated to the guest. */
- u64 last_propagated_timestamp;
+ atomic_t refcnt;
- /* Physical I/O */
- spinlock_t pcidev_lock;
- struct list_head pcidev_list;
+ struct vcpu *vcpu[MAX_VIRT_CPUS];
- unsigned long flags;
- unsigned long vm_assist;
+ /* Bitmask of CPUs which are holding onto this domain's state. */
+ cpumask_t cpumask;
- atomic_t refcnt;
- atomic_t pausecnt;
+ struct arch_domain arch;
};
struct domain_setup_info
{
+ /* Initialised by caller. */
+ unsigned long image_addr;
+ unsigned long image_len;
+ /* Initialised by loader: Public. */
unsigned long v_start;
unsigned long v_end;
unsigned long v_kernstart;
unsigned long v_kernend;
unsigned long v_kernentry;
-
- unsigned int use_writable_pagetables;
- unsigned int load_bsd_symtab;
-
+ /* Initialised by loader: Private. */
+ unsigned int load_symtab;
unsigned long symtab_addr;
unsigned long symtab_len;
+ /* Indicate whether it's xen specific image */
+ char *xen_section_string;
};
-#include <asm/uaccess.h> /* for KERNEL_DS */
+extern struct domain idle0_domain;
+extern struct vcpu idle0_vcpu;
-extern struct domain idle0_task;
-
-extern struct domain *idle_task[NR_CPUS];
+extern struct vcpu *idle_task[NR_CPUS];
#define IDLE_DOMAIN_ID (0x7FFFU)
-#define is_idle_task(_p) (test_bit(DF_IDLETASK, &(_p)->flags))
+#define is_idle_task(_d) (test_bit(_DOMF_idle_domain, &(_d)->domain_flags))
+
+struct vcpu *alloc_vcpu_struct(struct domain *d,
+ unsigned long vcpu);
void free_domain_struct(struct domain *d);
struct domain *alloc_domain_struct();
@@ -174,22 +203,21 @@ static inline void get_knownalive_domain(struct domain *d)
atomic_inc(&d->refcnt);
ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTRUCTED));
}
-
+
extern struct domain *do_createdomain(
domid_t dom_id, unsigned int cpu);
-extern int construct_dom0(struct domain *d,
- unsigned long alloc_start,
- unsigned long alloc_end,
- char *image_start, unsigned long image_len,
- char *initrd_start, unsigned long initrd_len,
- char *cmdline);
-extern int final_setup_guestos(struct domain *d, dom0_builddomain_t *);
+extern int construct_dom0(
+ struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline);
+extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
struct domain *find_domain_by_id(domid_t dom);
-struct domain *find_last_domain(void);
extern void domain_destruct(struct domain *d);
extern void domain_kill(struct domain *d);
extern void domain_shutdown(u8 reason);
+extern void domain_pause_for_debugger(void);
/*
* Mark current domain as crashed. This function returns: the domain is not
@@ -203,39 +231,76 @@ extern void domain_crash(void);
*/
extern void domain_crash_synchronous(void) __attribute__((noreturn));
-void new_thread(struct domain *d,
+void new_thread(struct vcpu *d,
unsigned long start_pc,
unsigned long start_stack,
unsigned long start_info);
-extern unsigned long wait_init_idle;
-#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
-
#define set_current_state(_s) do { current->state = (_s); } while (0)
void scheduler_init(void);
void schedulers_start(void);
-void sched_add_domain(struct domain *d);
-void sched_rem_domain(struct domain *d);
+void sched_add_domain(struct vcpu *);
+void sched_rem_domain(struct vcpu *);
long sched_ctl(struct sched_ctl_cmd *);
long sched_adjdom(struct sched_adjdom_cmd *);
int sched_id();
-void init_idle_task(void);
-void domain_wake(struct domain *d);
-void domain_sleep(struct domain *d);
+void domain_wake(struct vcpu *d);
+void domain_sleep_nosync(struct vcpu *d);
+void domain_sleep_sync(struct vcpu *d);
-extern void switch_to(struct domain *prev,
- struct domain *next);
+/*
+ * Force loading of currently-executing domain state on the specified set
+ * of CPUs. This is used to counteract lazy state switching where required.
+ */
+extern void sync_lazy_execstate_cpu(unsigned int cpu);
+extern void sync_lazy_execstate_mask(cpumask_t mask);
+extern void sync_lazy_execstate_all(void);
+extern int __sync_lazy_execstate(void);
-void domain_init(void);
+/* Called by the scheduler to switch to another vcpu. */
+extern void context_switch(
+ struct vcpu *prev,
+ struct vcpu *next);
+
+/* Called by the scheduler to continue running the current vcpu. */
+extern void continue_running(
+ struct vcpu *same);
int idle_cpu(int cpu); /* Is CPU 'cpu' idle right now? */
void startup_cpu_idle_loop(void);
-unsigned long hypercall_create_continuation(
+unsigned long __hypercall_create_continuation(
unsigned int op, unsigned int nr_args, ...);
-#define hypercall_preempt_check() \
- (unlikely(softirq_pending(smp_processor_id())))
+#define hypercall0_create_continuation(_op) \
+ __hypercall_create_continuation((_op), 0)
+#define hypercall1_create_continuation(_op, _a1) \
+ __hypercall_create_continuation((_op), 1, \
+ (unsigned long)(_a1))
+#define hypercall2_create_continuation(_op, _a1, _a2) \
+ __hypercall_create_continuation((_op), 2, \
+ (unsigned long)(_a1), (unsigned long)(_a2))
+#define hypercall3_create_continuation(_op, _a1, _a2, _a3) \
+ __hypercall_create_continuation((_op), 3, \
+ (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3))
+#define hypercall4_create_continuation(_op, _a1, _a2, _a3, _a4) \
+ __hypercall_create_continuation((_op), 4, \
+ (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \
+ (unsigned long)(_a4))
+#define hypercall5_create_continuation(_op, _a1, _a2, _a3, _a4, _a5) \
+ __hypercall_create_continuation((_op), 5, \
+ (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \
+ (unsigned long)(_a4), (unsigned long)(_a5))
+#define hypercall6_create_continuation(_op, _a1, _a2, _a3, _a4, _a5, _a6) \
+ __hypercall_create_continuation((_op), 6, \
+ (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \
+ (unsigned long)(_a4), (unsigned long)(_a5), (unsigned long)(_a6))
+
+#define hypercall_preempt_check() (unlikely( \
+ softirq_pending(smp_processor_id()) | \
+ (!!current->vcpu_info->evtchn_upcall_pending & \
+ !current->vcpu_info->evtchn_upcall_mask) \
+ ))
/* This domain_hash and domain_list are protected by the domlist_lock. */
#define DOMAIN_HASH_SIZE 256
@@ -243,72 +308,105 @@ unsigned long hypercall_create_continuation(
extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
extern struct domain *domain_list;
-#define for_each_domain(_p) \
- for ( (_p) = domain_list; (_p) != NULL; (_p) = (_p)->next_list )
-
-#define DF_DONEFPUINIT 0 /* Has the FPU been initialised for this task? */
-#define DF_USEDFPU 1 /* Has this task used the FPU since last save? */
-#define DF_GUEST_STTS 2 /* Has the guest OS requested 'stts'? */
-#define DF_CONSTRUCTED 3 /* Has the guest OS been fully built yet? */
-#define DF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */
-#define DF_PRIVILEGED 5 /* Is this domain privileged? */
-#define DF_PHYSDEV 6 /* May this domain do IO to physical devices? */
-#define DF_BLOCKED 7 /* Domain is blocked waiting for an event. */
-#define DF_CTRLPAUSE 8 /* Domain is paused by controller software. */
-#define DF_SHUTDOWN 9 /* Guest shut itself down for some reason. */
-#define DF_CRASHED 10 /* Domain crashed inside Xen, cannot continue. */
-#define DF_DYING 11 /* Death rattle. */
-#define DF_RUNNING 12 /* Currently running on a CPU. */
-#define DF_CPUPINNED 13 /* Disables auto-migration. */
-#define DF_MIGRATED 14 /* Domain migrated between CPUs. */
-
-static inline int domain_runnable(struct domain *d)
-{
- return ( (atomic_read(&d->pausecnt) == 0) &&
- !(d->flags & ((1<<DF_BLOCKED)|(1<<DF_CTRLPAUSE)|
- (1<<DF_SHUTDOWN)|(1<<DF_CRASHED))) );
-}
+#define for_each_domain(_d) \
+ for ( (_d) = domain_list; (_d) != NULL; (_d) = (_d)->next_in_list )
-static inline void domain_pause(struct domain *d)
-{
- ASSERT(d != current);
- atomic_inc(&d->pausecnt);
- domain_sleep(d);
-}
+#define for_each_vcpu(_d,_ed) \
+ for ( (_ed) = (_d)->vcpu[0]; \
+ (_ed) != NULL; \
+ (_ed) = (_ed)->next_in_list )
-static inline void domain_unpause(struct domain *d)
-{
- ASSERT(d != current);
- if ( atomic_dec_and_test(&d->pausecnt) )
- domain_wake(d);
-}
+/*
+ * Per-VCPU flags (vcpu_flags).
+ */
+ /* Has the FPU been initialised? */
+#define _VCPUF_fpu_initialised 0
+#define VCPUF_fpu_initialised (1UL<<_VCPUF_fpu_initialised)
+ /* Has the FPU been used since it was last saved? */
+#define _VCPUF_fpu_dirtied 1
+#define VCPUF_fpu_dirtied (1UL<<_VCPUF_fpu_dirtied)
+ /* Has the guest OS requested 'stts'? */
+#define _VCPUF_guest_stts 2
+#define VCPUF_guest_stts (1UL<<_VCPUF_guest_stts)
+ /* Domain is blocked waiting for an event. */
+#define _VCPUF_blocked 3
+#define VCPUF_blocked (1UL<<_VCPUF_blocked)
+ /* Domain is paused by controller software. */
+#define _VCPUF_ctrl_pause 4
+#define VCPUF_ctrl_pause (1UL<<_VCPUF_ctrl_pause)
+ /* Currently running on a CPU? */
+#define _VCPUF_running 5
+#define VCPUF_running (1UL<<_VCPUF_running)
+ /* Disables auto-migration between CPUs. */
+#define _VCPUF_cpu_pinned 6
+#define VCPUF_cpu_pinned (1UL<<_VCPUF_cpu_pinned)
+ /* Domain migrated between CPUs. */
+#define _VCPUF_cpu_migrated 7
+#define VCPUF_cpu_migrated (1UL<<_VCPUF_cpu_migrated)
+ /* Initialization completed. */
+#define _VCPUF_initialised 8
+#define VCPUF_initialised (1UL<<_VCPUF_initialised)
-static inline void domain_unblock(struct domain *d)
+/*
+ * Per-domain flags (domain_flags).
+ */
+ /* Has the guest OS been fully built yet? */
+#define _DOMF_constructed 0
+#define DOMF_constructed (1UL<<_DOMF_constructed)
+ /* Is this one of the per-CPU idle domains? */
+#define _DOMF_idle_domain 1
+#define DOMF_idle_domain (1UL<<_DOMF_idle_domain)
+ /* Is this domain privileged? */
+#define _DOMF_privileged 2
+#define DOMF_privileged (1UL<<_DOMF_privileged)
+ /* May this domain do IO to physical devices? */
+#define _DOMF_physdev_access 3
+#define DOMF_physdev_access (1UL<<_DOMF_physdev_access)
+ /* Guest shut itself down for some reason. */
+#define _DOMF_shutdown 4
+#define DOMF_shutdown (1UL<<_DOMF_shutdown)
+ /* Guest is in process of shutting itself down (becomes DOMF_shutdown). */
+#define _DOMF_shuttingdown 5
+#define DOMF_shuttingdown (1UL<<_DOMF_shuttingdown)
+ /* Death rattle. */
+#define _DOMF_dying 6
+#define DOMF_dying (1UL<<_DOMF_dying)
+
+static inline int domain_runnable(struct vcpu *v)
{
- if ( test_and_clear_bit(DF_BLOCKED, &d->flags) )
- domain_wake(d);
+ return ( (atomic_read(&v->pausecnt) == 0) &&
+ !(v->vcpu_flags & (VCPUF_blocked|VCPUF_ctrl_pause)) &&
+ !(v->domain->domain_flags & (DOMF_shutdown|DOMF_shuttingdown)) );
}
-static inline void domain_pause_by_systemcontroller(struct domain *d)
-{
- ASSERT(d != current);
- if ( !test_and_set_bit(DF_CTRLPAUSE, &d->flags) )
- domain_sleep(d);
-}
+void vcpu_pause(struct vcpu *v);
+void domain_pause(struct domain *d);
+void vcpu_unpause(struct vcpu *v);
+void domain_unpause(struct domain *d);
+void domain_pause_by_systemcontroller(struct domain *d);
+void domain_unpause_by_systemcontroller(struct domain *d);
-static inline void domain_unpause_by_systemcontroller(struct domain *d)
+static inline void vcpu_unblock(struct vcpu *v)
{
- if ( test_and_clear_bit(DF_CTRLPAUSE, &d->flags) )
- domain_wake(d);
+ if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
+ domain_wake(v);
}
-
-#define IS_PRIV(_d) (test_bit(DF_PRIVILEGED, &(_d)->flags))
-#define IS_CAPABLE_PHYSDEV(_d) (test_bit(DF_PHYSDEV, &(_d)->flags))
+#define IS_PRIV(_d) \
+ (test_bit(_DOMF_privileged, &(_d)->domain_flags))
+#define IS_CAPABLE_PHYSDEV(_d) \
+ (test_bit(_DOMF_physdev_access, &(_d)->domain_flags))
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
-#include <xen/slab.h>
-#include <xen/domain.h>
-
#endif /* __SCHED_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/serial.h b/xen/include/xen/serial.h
index 5c40db3e7d..8d5f246ab2 100644
--- a/xen/include/xen/serial.h
+++ b/xen/include/xen/serial.h
@@ -1,52 +1,128 @@
/******************************************************************************
* serial.h
*
- * Driver for 16550-series UARTs. This driver is to be kept within Xen as
- * it permits debugging of seriously-toasted machines (e.g., in situations
- * where a device driver within a guest OS would be inaccessible).
+ * Framework for serial device drivers.
*
- * Copyright (c) 2003-2004, K A Fraser
+ * Copyright (c) 2003-2005, K A Fraser
*/
#ifndef __XEN_SERIAL_H__
#define __XEN_SERIAL_H__
-#include <asm/regs.h>
+struct cpu_user_regs;
-/* 'Serial handles' are comprise the following fields. */
+/* Register a character-receive hook on the specified COM port. */
+typedef void (*serial_rx_fn)(char, struct cpu_user_regs *);
+void serial_set_rx_handler(int handle, serial_rx_fn fn);
+
+/* Number of characters we buffer for a polling receiver. */
+#define SERIAL_RXBUFSZ 32
+#define MASK_SERIAL_RXBUF_IDX(_i) ((_i)&(SERIAL_RXBUFSZ-1))
+
+/* Number of characters we buffer for an interrupt-driven transmitter. */
+#define SERIAL_TXBUFSZ 16384
+#define MASK_SERIAL_TXBUF_IDX(_i) ((_i)&(SERIAL_TXBUFSZ-1))
+
+struct uart_driver;
+
+struct serial_port {
+ /* Uart-driver parameters. */
+ struct uart_driver *driver;
+ void *uart;
+ /* Number of characters the port can hold for transmit. */
+ int tx_fifo_size;
+ /* Transmit data buffer (interrupt-driven uart). */
+ char *txbuf;
+ unsigned int txbufp, txbufc;
+ /* Force synchronous transmit. */
+ int sync;
+ /* Receiver callback functions (asynchronous receivers). */
+ serial_rx_fn rx_lo, rx_hi, rx;
+ /* Receive data buffer (polling receivers). */
+ char rxbuf[SERIAL_RXBUFSZ];
+ unsigned int rxbufp, rxbufc;
+ /* Serial I/O is concurrency-safe. */
+ spinlock_t lock;
+};
+
+struct uart_driver {
+ /* Driver initialisation (pre- and post-IRQ subsystem setup). */
+ void (*init_preirq)(struct serial_port *);
+ void (*init_postirq)(struct serial_port *);
+ /* Hook to clean up after Xen bootstrap (before domain 0 runs). */
+ void (*endboot)(struct serial_port *);
+ /* Transmit FIFO ready to receive up to @tx_fifo_size characters? */
+ int (*tx_empty)(struct serial_port *);
+ /* Put a character onto the serial line. */
+ void (*putc)(struct serial_port *, char);
+ /* Get a character from the serial line: returns 0 if none available. */
+ int (*getc)(struct serial_port *, char *);
+};
+
+/* 'Serial handles' are composed from the following fields. */
#define SERHND_IDX (1<<0) /* COM1 or COM2? */
#define SERHND_HI (1<<1) /* Mux/demux each transferred char by MSB. */
#define SERHND_LO (1<<2) /* Ditto, except that the MSB is cleared. */
#define SERHND_COOKED (1<<3) /* Newline/carriage-return translation? */
/* Two-stage initialisation (before/after IRQ-subsystem initialisation). */
-void serial_init_stage1(void);
-void serial_init_stage2(void);
+void serial_init_preirq(void);
+void serial_init_postirq(void);
-/* Takes a config string and creates a numeric handle on the COM port. */
-int parse_serial_handle(char *conf);
+/* Clean-up hook before domain 0 runs. */
+void serial_endboot(void);
-/* Register a character-receive hook on the specified COM port. */
-typedef void (*serial_rx_fn)(unsigned char, struct xen_regs *);
-void serial_set_rx_handler(int handle, serial_rx_fn fn);
+/* Takes a config string and creates a numeric handle on the COM port. */
+int serial_parse_handle(char *conf);
/* Transmit a single character via the specified COM port. */
-void serial_putc(int handle, unsigned char c);
+void serial_putc(int handle, char c);
/* Transmit a NULL-terminated string via the specified COM port. */
-void serial_puts(int handle, const unsigned char *s);
+void serial_puts(int handle, const char *s);
/*
* An alternative to registering a character-receive hook. This function
* will not return until a character is available. It can safely be
* called with interrupts disabled.
*/
-unsigned char serial_getc(int handle);
-/*
- * Same as serial_getc but can also be called from interrupt handlers.
- */
-unsigned char irq_serial_getc(int handle);
+char serial_getc(int handle);
+/* Forcibly prevent serial lockup when the system is in a bad way. */
+/* (NB. This also forces an implicit serial_start_sync()). */
void serial_force_unlock(int handle);
+/* Start/end a synchronous region (temporarily disable interrupt-driven tx). */
+void serial_start_sync(int handle);
+void serial_end_sync(int handle);
+
+/* Return number of bytes headroom in transmit buffer. */
+int serial_tx_space(int handle);
+
+/*
+ * Initialisation and helper functions for uart drivers.
+ */
+/* Register a uart on serial port @idx (e.g., @idx==0 is COM1). */
+void serial_register_uart(int idx, struct uart_driver *driver, void *uart);
+/* Place the serial port into asynchronous transmit mode. */
+void serial_async_transmit(struct serial_port *port);
+/* Process work in interrupt context. */
+void serial_rx_interrupt(struct serial_port *port, struct cpu_user_regs *regs);
+void serial_tx_interrupt(struct serial_port *port, struct cpu_user_regs *regs);
+
+/*
+ * Initialisers for individual uart drivers.
+ */
+void ns16550_init(void);
+
#endif /* __XEN_SERIAL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/shadow.h b/xen/include/xen/shadow.h
new file mode 100644
index 0000000000..4248b30f80
--- /dev/null
+++ b/xen/include/xen/shadow.h
@@ -0,0 +1,19 @@
+
+#ifndef __XEN_SHADOW_H__
+#define __XEN_SHADOW_H__
+
+#include <xen/config.h>
+
+#ifdef CONFIG_SHADOW
+
+#include <asm/shadow.h>
+
+#else
+
+#define shadow_drop_references(_d, _p) ((void)0)
+#define shadow_sync_and_drop_references(_d, _p) ((void)0)
+#define shadow_tainted_refcnts(_d) (0)
+
+#endif
+
+#endif /* __XEN_SHADOW_H__ */
diff --git a/xen/include/xen/slab.h b/xen/include/xen/slab.h
deleted file mode 100644
index 692b3b63f3..0000000000
--- a/xen/include/xen/slab.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Written by Mark Hemment, 1996.
- * (markhe@nextd.demon.co.uk)
- */
-
-#ifndef __SLAB_H__
-#define __SLAB_H__
-
-#include <xen/config.h>
-
-#ifdef __ARCH_HAS_SLAB_ALLOCATOR
-
-#include <asm/slab.h>
-
-#else
-
-typedef struct xmem_cache_s xmem_cache_t;
-
-#include <xen/mm.h>
-#include <xen/cache.h>
-
-/* Flags to pass to xmem_cache_create(). */
-/* NB. The first 3 are only valid when built with SLAB_DEBUG_SUPPORT. */
-#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor */
-#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */
-#define SLAB_POISON 0x00000800UL /* Poison objects */
-#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */
-#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align obj on a cache line */
-
-/* Flags passed to a constructor function. */
-#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
-#define SLAB_CTOR_ATOMIC 0x002UL /* tell cons. it can't sleep */
-#define SLAB_CTOR_VERIFY 0x004UL /* tell cons. it's a verify call */
-
-extern void xmem_cache_init(void);
-extern void xmem_cache_sizes_init(unsigned long);
-
-extern xmem_cache_t *xmem_find_general_cachep(size_t);
-extern xmem_cache_t *xmem_cache_create(
- const char *, size_t, size_t, unsigned long,
- void (*)(void *, xmem_cache_t *, unsigned long),
- void (*)(void *, xmem_cache_t *, unsigned long));
-extern int xmem_cache_destroy(xmem_cache_t *);
-extern int xmem_cache_shrink(xmem_cache_t *);
-extern void *xmem_cache_alloc(xmem_cache_t *);
-extern void xmem_cache_free(xmem_cache_t *, void *);
-
-extern void *xmalloc(size_t);
-extern void xfree(const void *);
-
-extern int xmem_cache_reap(void);
-
-extern void dump_slabinfo();
-
-#endif /* __ARCH_HAS_SLAB_ALLOCATOR */
-
-#endif /* __SLAB_H__ */
diff --git a/xen/include/xen/smp.h b/xen/include/xen/smp.h
index 13e370cdca..2004211589 100644
--- a/xen/include/xen/smp.h
+++ b/xen/include/xen/smp.h
@@ -22,38 +22,43 @@
*/
extern void smp_send_stop(void);
-extern void smp_send_event_check_mask(unsigned long cpu_mask);
-#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
+extern void smp_send_event_check_mask(cpumask_t mask);
+#define smp_send_event_check_cpu(cpu) \
+ smp_send_event_check_mask(cpumask_of_cpu(cpu))
/*
- * Boot processor call to load the other CPU's
+ * Prepare machine for booting other CPUs.
*/
-extern void smp_boot_cpus(void);
+extern void smp_prepare_cpus(unsigned int max_cpus);
/*
- * Processor call in. Must hold processors until ..
+ * Bring a CPU up
*/
-extern void smp_callin(void);
+extern int __cpu_up(unsigned int cpunum);
/*
- * Multiprocessors may now schedule
+ * Final polishing of CPUs
*/
-extern void smp_commence(void);
+extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
*/
-extern int smp_call_function (void (*func) (void *info), void *info,
- int retry, int wait);
+extern int smp_call_function(
+ void (*func) (void *info), void *info, int retry, int wait);
/*
- * True once the per process idle is forked
+ * Call a function on all processors
*/
-extern int smp_threads_ready;
+static inline int on_each_cpu(void (*func) (void *info), void *info,
+ int retry, int wait)
+{
+ int ret = smp_call_function(func, info, retry, wait);
+ func(info);
+ return ret;
+}
-extern int smp_num_cpus;
extern int ht_per_core;
-extern int opt_noht;
extern volatile unsigned long smp_msg_data;
extern volatile int smp_src_cpu;
@@ -69,23 +74,35 @@ extern volatile int smp_msg_id;
#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/
#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */
+/*
+ * Mark the boot cpu "online" so that it can call console drivers in
+ * printk() and can access its per-cpu storage.
+ */
+void smp_prepare_boot_cpu(void);
+
#else
/*
* These macros fold the SMP functionality into a single CPU system
*/
-#define smp_send_event_check_mask(_m) ((void)0)
-#define smp_send_event_check_cpu(_p) ((void)0)
-#define smp_num_cpus 1
+#define smp_send_event_check_mask(m) ((void)0)
+#define smp_send_event_check_cpu(p) ((void)0)
+#ifndef __smp_processor_id
#define smp_processor_id() 0
+#endif
#define hard_smp_processor_id() 0
-#define smp_threads_ready 1
-#define kernel_lock()
-#define cpu_logical_map(cpu) 0
-#define cpu_number_map(cpu) 0
-#define smp_call_function(func,info,retry,wait) ({ 0; })
-#define cpu_online_map 1
+#define smp_call_function(func,info,retry,wait) 0
+#define on_each_cpu(func,info,retry,wait) ({ func(info); 0; })
+#define num_booting_cpus() 1
+#define smp_prepare_boot_cpu() do {} while (0)
#endif
+
+#ifdef __smp_processor_id
+#define smp_processor_id() __smp_processor_id()
+#else
+extern unsigned int smp_processor_id(void);
+#endif
+
#endif
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index a538540247..87d6c807ee 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -8,7 +8,8 @@
#define KEYPRESS_SOFTIRQ 3
#define NMI_SOFTIRQ 4
#define PAGE_SCRUB_SOFTIRQ 5
-#define NR_SOFTIRQS 6
+#define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ 6
+#define NR_SOFTIRQS 7
#ifndef __ASSEMBLY__
@@ -16,6 +17,7 @@
#include <xen/lib.h>
#include <xen/smp.h>
#include <asm/bitops.h>
+#include <asm/current.h>
#include <asm/hardirq.h>
typedef void (*softirq_handler)(void);
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index ef58ba591b..73184daad8 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -82,4 +82,7 @@ typedef struct { int gcc_is_buggy; } rwlock_t;
#define write_lock(_lock) _raw_write_lock(_lock)
#define write_unlock(_lock) _raw_write_unlock(_lock)
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED
+
#endif /* __SPINLOCK_H__ */
diff --git a/xen/include/xen/time.h b/xen/include/xen/time.h
index 4df2e96a55..d0091b6a10 100644
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -1,5 +1,4 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
+/****************************************************************************
* (C) 2002 - Rolf Neugebauer - Intel Research Cambridge
****************************************************************************
*
@@ -29,11 +28,14 @@
#include <xen/types.h>
#include <public/xen.h>
+#include <asm/time.h>
extern int init_xen_time();
extern unsigned long cpu_khz;
+struct domain;
+
/*
* System Time
* 64 bit value containing the nanoseconds elapsed since boot time.
@@ -52,9 +54,18 @@ s_time_t get_s_time(void);
#define MILLISECS(_ms) (((s_time_t)(_ms)) * 1000000ULL )
#define MICROSECS(_us) (((s_time_t)(_us)) * 1000ULL )
-struct domain;
-extern int update_dom_time(struct domain *d);
-extern void do_settime(unsigned long secs, unsigned long usecs,
- u64 system_time_base);
+extern void update_dom_time(struct vcpu *v);
+extern void do_settime(
+ unsigned long secs, unsigned long usecs, u64 system_time_base);
#endif /* __XEN_TIME_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/trace.h b/xen/include/xen/trace.h
index 88c0b23d39..68b51cbfb7 100644
--- a/xen/include/xen/trace.h
+++ b/xen/include/xen/trace.h
@@ -8,6 +8,8 @@
* Author: Mark Williamson, mark.a.williamson@intel.com
* Date: January 2004
*
+ * Copyright (C) 2005 Bin Ren
+ *
* The trace buffer code is designed to allow debugging traces of Xen to be
* generated on UP / SMP machines. Each trace entry is timestamped so that
* it's possible to reconstruct a chronological record of trace events.
@@ -21,11 +23,8 @@
#ifndef __XEN_TRACE_H__
#define __XEN_TRACE_H__
-#include <public/trace.h>
-
#ifdef TRACE_BUFFER
-#include <xen/spinlock.h>
#include <asm/page.h>
#include <xen/types.h>
#include <xen/sched.h>
@@ -33,12 +32,18 @@
#include <asm/current.h>
#include <asm/msr.h>
#include <public/dom0_ops.h>
+#include <public/trace.h>
+
+extern struct t_buf *t_bufs[];
+extern int tb_init_done;
+extern unsigned long tb_cpu_mask;
+extern u32 tb_event_mask;
/* Used to initialise trace buffer functionality */
void init_trace_bufs(void);
/* used to retrieve the physical address of the trace buffers */
-int get_tb_info(dom0_gettbufs_t *st);
+int tb_control(dom0_tbufcontrol_t *tbc);
/**
* trace - Enters a trace tuple into the trace buffer for the current CPU.
@@ -49,42 +54,43 @@ int get_tb_info(dom0_gettbufs_t *st);
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-static inline int trace(u32 event, u32 d1, u32 d2, u32 d3, u32 d4, u32 d5)
+static inline int trace(u32 event, unsigned long d1, unsigned long d2,
+ unsigned long d3, unsigned long d4, unsigned long d5)
{
- extern struct t_buf *t_bufs[]; /* global array of pointers to bufs */
- extern int tb_init_done; /* set when buffers are initialised */
- unsigned long flags; /* for saving interrupt flags */
- struct t_buf *buf; /* the buffer we're working on */
- struct t_rec *rec; /* next record to fill out */
-
+ atomic_t old, new, seen;
+ struct t_buf *buf;
+ struct t_rec *rec;
if ( !tb_init_done )
return -1;
+ if ( (tb_event_mask & event) == 0 )
+ return 0;
+
+ if ( (tb_cpu_mask & (1UL << smp_processor_id())) == 0 )
+ return 0;
buf = t_bufs[smp_processor_id()];
- local_irq_save(flags);
+ do
+ {
+ old = buf->rec_idx;
+ _atomic_set(new, (_atomic_read(old) + 1) % buf->rec_num);
+ seen = atomic_compareandswap(old, new, &buf->rec_idx);
+ }
+ while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
- rec = buf->head_ptr;
+ wmb();
+ rec = &buf->rec[_atomic_read(old)];
rdtscll(rec->cycles);
- rec->event = event;
- rec->d1 = d1;
- rec->d2 = d2;
- rec->d3 = d3;
- rec->d4 = d4;
- rec->d5 = d5;
-
- wmb(); /* above must be visible before reader sees index updated */
-
- buf->head_ptr++;
- buf->head++;
- if ( buf->head_ptr == (buf->vdata + buf->size) )
- buf->head_ptr = buf->vdata;
-
- local_irq_restore(flags);
-
+ rec->event = event;
+ rec->data[0] = d1;
+ rec->data[1] = d2;
+ rec->data[2] = d3;
+ rec->data[3] = d4;
+ rec->data[4] = d5;
+
return 0;
}
diff --git a/xen/include/xen/types.h b/xen/include/xen/types.h
index 0299f74136..ab045d9387 100644
--- a/xen/include/xen/types.h
+++ b/xen/include/xen/types.h
@@ -1,8 +1,14 @@
#ifndef __TYPES_H__
#define __TYPES_H__
+#include <xen/config.h>
#include <asm/types.h>
+#define BITS_TO_LONGS(bits) \
+ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
#ifndef NULL
#define NULL ((void*)0)
#endif
@@ -44,5 +50,7 @@ typedef __u32 uint32_t;
typedef __u64 uint64_t;
+struct domain;
+struct vcpu;
#endif /* __TYPES_H__ */
diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
new file mode 100644
index 0000000000..893627f04a
--- /dev/null
+++ b/xen/include/xen/xmalloc.h
@@ -0,0 +1,27 @@
+
+#ifndef __XMALLOC_H__
+#define __XMALLOC_H__
+
+/* Allocate space for typed object. */
+#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type)))
+
+/* Allocate space for array of typed objects. */
+#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num))
+
+/* Allocate untyped storage. */
+#define xmalloc_bytes(_bytes) (_xmalloc(_bytes, SMP_CACHE_BYTES))
+
+/* Free any of the above. */
+extern void xfree(const void *);
+
+/* Underlying functions */
+extern void *_xmalloc(size_t size, size_t align);
+static inline void *_xmalloc_array(size_t size, size_t align, size_t num)
+{
+ /* Check for overflow. */
+ if (size && num > UINT_MAX / size)
+ return NULL;
+ return _xmalloc(size * num, align);
+}
+
+#endif /* __XMALLOC_H__ */
diff --git a/xen/tools/figlet/Makefile b/xen/tools/figlet/Makefile
index 9ed8fdff9c..bb9c64fa79 100644
--- a/xen/tools/figlet/Makefile
+++ b/xen/tools/figlet/Makefile
@@ -1,8 +1,8 @@
-CC := gcc
+include $(BASEDIR)/../Config.mk
figlet: figlet.c
- $(CC) -o $@ $<
+ $(HOSTCC) -o $@ $<
clean:
rm -f *.o figlet